[llvm] ae0d73a - [CostModel][X86] Adjust fptosi/fptoui SSE/AVX legalized costs based on llvm-mca reports.
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 12 12:42:19 PDT 2021
Author: Simon Pilgrim
Date: 2021-07-12T20:38:25+01:00
New Revision: ae0d73ac3bb842a9a043986f2925efb08f8c569a
URL: https://github.com/llvm/llvm-project/commit/ae0d73ac3bb842a9a043986f2925efb08f8c569a
DIFF: https://github.com/llvm/llvm-project/commit/ae0d73ac3bb842a9a043986f2925efb08f8c569a.diff
LOG: [CostModel][X86] Adjust fptosi/fptoui SSE/AVX legalized costs based on llvm-mca reports.
Update (mainly) vXf32/vXf64 -> vXi8/vXi16 fptosi/fptoui costs based on the worst case costs from the script in D103695.
Move to using legalized types wherever possible, which allows us to prune the cost tables.
Added:
Modified:
llvm/lib/Target/X86/X86TargetTransformInfo.cpp
llvm/test/Analysis/CostModel/X86/fptosi.ll
llvm/test/Analysis/CostModel/X86/fptoui.ll
llvm/test/Transforms/LoopVectorize/X86/fp_to_sint8-cost-model.ll
llvm/test/Transforms/SLPVectorizer/X86/fptoui.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 30156f45c4f72..42406d15226a2 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -1696,11 +1696,16 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i64, 26 },
{ ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i64, 5 },
- { ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f64, 3 },
+ { ISD::FP_TO_SINT, MVT::v16i8, MVT::v16f32, 2 },
+ { ISD::FP_TO_SINT, MVT::v16i8, MVT::v16f64, 7 },
+ { ISD::FP_TO_SINT, MVT::v32i8, MVT::v32f64,15 },
+ { ISD::FP_TO_SINT, MVT::v64i8, MVT::v64f32,11 },
+ { ISD::FP_TO_SINT, MVT::v64i8, MVT::v64f64,31 },
{ ISD::FP_TO_SINT, MVT::v8i16, MVT::v8f64, 3 },
+ { ISD::FP_TO_SINT, MVT::v16i16, MVT::v16f64, 7 },
+ { ISD::FP_TO_SINT, MVT::v32i16, MVT::v32f32, 5 },
+ { ISD::FP_TO_SINT, MVT::v32i16, MVT::v32f64,15 },
{ ISD::FP_TO_SINT, MVT::v8i32, MVT::v8f64, 1 },
- { ISD::FP_TO_SINT, MVT::v16i8, MVT::v16f32, 3 },
- { ISD::FP_TO_SINT, MVT::v16i16, MVT::v16f32, 3 },
{ ISD::FP_TO_SINT, MVT::v16i32, MVT::v16f64, 3 },
{ ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f64, 1 },
@@ -1757,12 +1762,12 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i64, 1 },
{ ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i64, 1 },
- { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f32, 1 },
+ { ISD::FP_TO_SINT, MVT::v2i64, MVT::v4f32, 1 },
{ ISD::FP_TO_SINT, MVT::v4i64, MVT::v4f32, 1 },
{ ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 1 },
{ ISD::FP_TO_SINT, MVT::v4i64, MVT::v4f64, 1 },
- { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f32, 1 },
+ { ISD::FP_TO_UINT, MVT::v2i64, MVT::v4f32, 1 },
{ ISD::FP_TO_UINT, MVT::v4i64, MVT::v4f32, 1 },
{ ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 1 },
{ ISD::FP_TO_UINT, MVT::v4i64, MVT::v4f64, 1 },
@@ -1852,16 +1857,17 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 5 },
{ ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i64, 5 },
- { ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 3 },
- { ISD::FP_TO_UINT, MVT::v8i8, MVT::v8f32, 3 },
+ { ISD::FP_TO_SINT, MVT::v16i8, MVT::v8f32, 2 },
+ { ISD::FP_TO_SINT, MVT::v16i8, MVT::v16f32, 2 },
+ { ISD::FP_TO_SINT, MVT::v32i8, MVT::v32f32, 5 },
{ ISD::FP_TO_UINT, MVT::i64, MVT::f32, 1 },
{ ISD::FP_TO_UINT, MVT::i64, MVT::f64, 1 },
- { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f32, 1 },
{ ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 },
- { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 1 },
+ { ISD::FP_TO_UINT, MVT::v4i32, MVT::v2f64, 1 },
{ ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f64, 1 },
{ ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f32, 1 },
+ { ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f64, 1 },
};
static const TypeConversionCostTblEntry AVX2ConversionTbl[] = {
@@ -1903,16 +1909,17 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
{ ISD::FP_EXTEND, MVT::v8f64, MVT::v8f32, 3 },
{ ISD::FP_ROUND, MVT::v8f32, MVT::v8f64, 3 },
+ { ISD::FP_TO_SINT, MVT::v16i16, MVT::v8f32, 1 },
{ ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f64, 1 },
{ ISD::FP_TO_SINT, MVT::v8i32, MVT::v8f32, 1 },
{ ISD::FP_TO_SINT, MVT::v8i32, MVT::v8f64, 3 },
- { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f32, 4 },
- { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 7 },
+ { ISD::FP_TO_UINT, MVT::v16i16, MVT::v8f32, 1 },
{ ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 4 },
+ { ISD::FP_TO_UINT, MVT::v4i32, MVT::v2f64, 7 },
{ ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f64, 7 },
{ ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f32, 4 },
- { ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f64, 15 },
+ { ISD::FP_TO_UINT, MVT::v8i32, MVT::v4f64, 7 },
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v16i8, 2 },
{ ISD::SINT_TO_FP, MVT::v8f32, MVT::v16i8, 2 },
@@ -1999,24 +2006,30 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 5 },
{ ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i64, 10 },
- { ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 4 },
- { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f64, 3 },
- { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f64, 2 },
+ { ISD::FP_TO_SINT, MVT::v16i8, MVT::v8f32, 2 },
+ { ISD::FP_TO_SINT, MVT::v16i8, MVT::v4f64, 2 },
+ { ISD::FP_TO_SINT, MVT::v32i8, MVT::v8f32, 2 },
+ { ISD::FP_TO_SINT, MVT::v32i8, MVT::v4f64, 2 },
+ { ISD::FP_TO_SINT, MVT::v8i16, MVT::v8f32, 2 },
+ { ISD::FP_TO_SINT, MVT::v8i16, MVT::v4f64, 2 },
+ { ISD::FP_TO_SINT, MVT::v16i16, MVT::v8f32, 2 },
+ { ISD::FP_TO_SINT, MVT::v16i16, MVT::v4f64, 2 },
{ ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f64, 2 },
- { ISD::FP_TO_SINT, MVT::v8i16, MVT::v8f32, 3 },
{ ISD::FP_TO_SINT, MVT::v8i32, MVT::v8f32, 2 },
{ ISD::FP_TO_SINT, MVT::v8i32, MVT::v8f64, 5 },
- { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f32, 5 },
- { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 9 },
- { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 5 },
- { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f64, 3 },
- { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f64, 2 },
+ { ISD::FP_TO_UINT, MVT::v16i8, MVT::v8f32, 2 },
+ { ISD::FP_TO_UINT, MVT::v16i8, MVT::v4f64, 2 },
+ { ISD::FP_TO_UINT, MVT::v32i8, MVT::v8f32, 2 },
+ { ISD::FP_TO_UINT, MVT::v32i8, MVT::v4f64, 2 },
+ { ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f32, 2 },
+ { ISD::FP_TO_UINT, MVT::v8i16, MVT::v4f64, 2 },
+ { ISD::FP_TO_UINT, MVT::v16i16, MVT::v8f32, 2 },
+ { ISD::FP_TO_UINT, MVT::v16i16, MVT::v4f64, 2 },
+ { ISD::FP_TO_UINT, MVT::v4i32, MVT::v2f64, 9 },
{ ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f64, 9 },
- { ISD::FP_TO_UINT, MVT::v8i8, MVT::v8f32, 4 },
- { ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f32, 3 },
{ ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f32, 9 },
- { ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f64, 19 },
+ { ISD::FP_TO_UINT, MVT::v8i32, MVT::v4f64, 9 },
{ ISD::FP_EXTEND, MVT::v4f64, MVT::v4f32, 1 },
{ ISD::FP_ROUND, MVT::v4f32, MVT::v4f64, 1 },
@@ -2076,14 +2089,23 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
{ ISD::FP_TO_SINT, MVT::i64, MVT::f32, 1 },
{ ISD::FP_TO_SINT, MVT::i32, MVT::f64, 1 },
{ ISD::FP_TO_SINT, MVT::i64, MVT::f64, 1 },
- { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f32, 3 },
- { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f64, 3 },
+ { ISD::FP_TO_SINT, MVT::v16i8, MVT::v4f32, 2 },
+ { ISD::FP_TO_SINT, MVT::v16i8, MVT::v2f64, 2 },
+ { ISD::FP_TO_SINT, MVT::v8i16, MVT::v4f32, 1 },
+ { ISD::FP_TO_SINT, MVT::v8i16, MVT::v2f64, 1 },
+ { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 },
+ { ISD::FP_TO_SINT, MVT::v4i32, MVT::v2f64, 1 },
{ ISD::FP_TO_UINT, MVT::i32, MVT::f32, 1 },
+ { ISD::FP_TO_UINT, MVT::i64, MVT::f32, 5 },
{ ISD::FP_TO_UINT, MVT::i32, MVT::f64, 1 },
- { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f32, 3 },
- { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f64, 3 },
- { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 },
+ { ISD::FP_TO_UINT, MVT::i64, MVT::f64, 5 },
+ { ISD::FP_TO_UINT, MVT::v16i8, MVT::v4f32, 2 },
+ { ISD::FP_TO_UINT, MVT::v16i8, MVT::v2f64, 2 },
+ { ISD::FP_TO_UINT, MVT::v8i16, MVT::v4f32, 1 },
+ { ISD::FP_TO_UINT, MVT::v8i16, MVT::v2f64, 1 },
+ { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 6 },
+ { ISD::FP_TO_UINT, MVT::v4i32, MVT::v2f64, 3 },
};
static const TypeConversionCostTblEntry SSE2ConversionTbl[] = {
@@ -2121,25 +2143,23 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
{ ISD::FP_TO_SINT, MVT::i64, MVT::f32, 4 },
{ ISD::FP_TO_SINT, MVT::i32, MVT::f64, 4 },
{ ISD::FP_TO_SINT, MVT::i64, MVT::f64, 4 },
- { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f32, 4 },
- { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f32, 2 },
- { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 3 },
- { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 },
- { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f64, 2 },
- { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f64, 4 },
- { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 1 },
+ { ISD::FP_TO_SINT, MVT::v16i8, MVT::v4f32, 6 },
+ { ISD::FP_TO_SINT, MVT::v16i8, MVT::v2f64, 6 },
+ { ISD::FP_TO_SINT, MVT::v8i16, MVT::v4f32, 5 },
+ { ISD::FP_TO_SINT, MVT::v8i16, MVT::v2f64, 5 },
+ { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 4 },
+ { ISD::FP_TO_SINT, MVT::v4i32, MVT::v2f64, 4 },
{ ISD::FP_TO_UINT, MVT::i32, MVT::f32, 4 },
{ ISD::FP_TO_UINT, MVT::i64, MVT::f32, 4 },
{ ISD::FP_TO_UINT, MVT::i32, MVT::f64, 4 },
{ ISD::FP_TO_UINT, MVT::i64, MVT::f64, 15 },
- { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f32, 4 },
- { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f64, 4 },
- { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 3 },
- { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f32, 2 },
- { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f64, 2 },
- { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 4 },
+ { ISD::FP_TO_UINT, MVT::v16i8, MVT::v4f32, 6 },
+ { ISD::FP_TO_UINT, MVT::v16i8, MVT::v2f64, 6 },
+ { ISD::FP_TO_UINT, MVT::v8i16, MVT::v4f32, 5 },
+ { ISD::FP_TO_UINT, MVT::v8i16, MVT::v2f64, 5 },
{ ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 8 },
+ { ISD::FP_TO_UINT, MVT::v4i32, MVT::v2f64, 8 },
{ ISD::ZERO_EXTEND, MVT::v2i64, MVT::v16i8, 4 },
{ ISD::SIGN_EXTEND, MVT::v2i64, MVT::v16i8, 4 },
diff --git a/llvm/test/Analysis/CostModel/X86/fptosi.ll b/llvm/test/Analysis/CostModel/X86/fptosi.ll
index a5aa5bd74977a..535e960be715d 100644
--- a/llvm/test/Analysis/CostModel/X86/fptosi.ll
+++ b/llvm/test/Analysis/CostModel/X86/fptosi.ll
@@ -63,16 +63,16 @@ define i32 @fptosi_double_i64(i32 %arg) {
define i32 @fptosi_double_i32(i32 %arg) {
; SSE2-LABEL: 'fptosi_double_i32'
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = fptosi double undef to i32
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = fptosi <2 x double> undef to <2 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = fptosi <4 x double> undef to <4 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = fptosi <8 x double> undef to <8 x i32>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = fptosi <2 x double> undef to <2 x i32>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = fptosi <4 x double> undef to <4 x i32>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = fptosi <8 x double> undef to <8 x i32>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'fptosi_double_i32'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptosi double undef to i32
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = fptosi <2 x double> undef to <2 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = fptosi <4 x double> undef to <4 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = fptosi <8 x double> undef to <8 x i32>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = fptosi <4 x double> undef to <4 x i32>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = fptosi <8 x double> undef to <8 x i32>
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'fptosi_double_i32'
@@ -99,8 +99,8 @@ define i32 @fptosi_double_i32(i32 %arg) {
; SLM-LABEL: 'fptosi_double_i32'
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptosi double undef to i32
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = fptosi <2 x double> undef to <2 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = fptosi <4 x double> undef to <4 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = fptosi <8 x double> undef to <8 x i32>
+; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = fptosi <4 x double> undef to <4 x i32>
+; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = fptosi <8 x double> undef to <8 x i32>
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
%I32 = fptosi double undef to i32
@@ -113,44 +113,37 @@ define i32 @fptosi_double_i32(i32 %arg) {
define i32 @fptosi_double_i16(i32 %arg) {
; SSE2-LABEL: 'fptosi_double_i16'
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = fptosi double undef to i16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I16 = fptosi <4 x double> undef to <4 x i16>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I16 = fptosi <4 x double> undef to <4 x i16>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'fptosi_double_i16'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi double undef to i16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I16 = fptosi <4 x double> undef to <4 x i16>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x double> undef to <4 x i16>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16>
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
-; AVX1-LABEL: 'fptosi_double_i16'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi double undef to i16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x double> undef to <4 x i16>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; AVX2-LABEL: 'fptosi_double_i16'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi double undef to i16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x double> undef to <4 x i16>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; AVX-LABEL: 'fptosi_double_i16'
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi double undef to i16
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16>
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x double> undef to <4 x i16>
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16>
+; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'fptosi_double_i16'
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi double undef to i16
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16>
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16>
; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x double> undef to <4 x i16>
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16>
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SLM-LABEL: 'fptosi_double_i16'
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi double undef to i16
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16>
-; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I16 = fptosi <4 x double> undef to <4 x i16>
-; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16>
+; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16>
+; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x double> undef to <4 x i16>
+; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16>
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
%I16 = fptosi double undef to i16
@@ -163,44 +156,37 @@ define i32 @fptosi_double_i16(i32 %arg) {
define i32 @fptosi_double_i8(i32 %arg) {
; SSE2-LABEL: 'fptosi_double_i8'
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = fptosi double undef to i8
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'fptosi_double_i8'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi double undef to i8
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8>
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
-; AVX1-LABEL: 'fptosi_double_i8'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi double undef to i8
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; AVX2-LABEL: 'fptosi_double_i8'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi double undef to i8
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; AVX-LABEL: 'fptosi_double_i8'
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi double undef to i8
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8>
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8>
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8>
+; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'fptosi_double_i8'
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi double undef to i8
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8>
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8>
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8>
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8>
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SLM-LABEL: 'fptosi_double_i8'
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi double undef to i8
-; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8>
-; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8>
-; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8>
+; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8>
+; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8>
+; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8>
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
%I8 = fptosi double undef to i8
@@ -270,10 +256,10 @@ define i32 @fptosi_float_i64(i32 %arg) {
define i32 @fptosi_float_i32(i32 %arg) {
; SSE2-LABEL: 'fptosi_float_i32'
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = fptosi float undef to i32
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = fptosi <2 x float> undef to <2 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = fptosi <4 x float> undef to <4 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = fptosi <8 x float> undef to <8 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = fptosi <16 x float> undef to <16 x i32>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = fptosi <2 x float> undef to <2 x i32>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = fptosi <4 x float> undef to <4 x i32>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = fptosi <8 x float> undef to <8 x i32>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = fptosi <16 x float> undef to <16 x i32>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'fptosi_float_i32'
@@ -327,50 +313,50 @@ define i32 @fptosi_float_i32(i32 %arg) {
define i32 @fptosi_float_i16(i32 %arg) {
; SSE2-LABEL: 'fptosi_float_i16'
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = fptosi float undef to i16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x float> undef to <2 x i16>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I16 = fptosi <2 x float> undef to <2 x i16>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'fptosi_float_i16'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi float undef to i16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x float> undef to <2 x i16>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptosi <2 x float> undef to <2 x i16>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16>
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'fptosi_float_i16'
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi float undef to i16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x float> undef to <2 x i16>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptosi <2 x float> undef to <2 x i16>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16>
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'fptosi_float_i16'
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi float undef to i16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x float> undef to <2 x i16>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16>
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptosi <2 x float> undef to <2 x i16>
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16>
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16>
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16>
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'fptosi_float_i16'
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi float undef to i16
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x float> undef to <2 x i16>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16>
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptosi <2 x float> undef to <2 x i16>
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16>
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16>
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16>
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SLM-LABEL: 'fptosi_float_i16'
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi float undef to i16
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x float> undef to <2 x i16>
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16>
-; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16>
-; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16>
+; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptosi <2 x float> undef to <2 x i16>
+; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16>
+; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16>
+; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16>
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
%I16 = fptosi float undef to i16
@@ -384,50 +370,42 @@ define i32 @fptosi_float_i16(i32 %arg) {
define i32 @fptosi_float_i8(i32 %arg) {
; SSE2-LABEL: 'fptosi_float_i8'
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = fptosi float undef to i8
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = fptosi <2 x float> undef to <2 x i8>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptosi <2 x float> undef to <2 x i8>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'fptosi_float_i8'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi float undef to i8
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptosi <2 x float> undef to <2 x i8>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = fptosi <2 x float> undef to <2 x i8>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8>
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
-; AVX1-LABEL: 'fptosi_float_i8'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi float undef to i8
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptosi <2 x float> undef to <2 x i8>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; AVX2-LABEL: 'fptosi_float_i8'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi float undef to i8
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptosi <2 x float> undef to <2 x i8>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; AVX-LABEL: 'fptosi_float_i8'
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi float undef to i8
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = fptosi <2 x float> undef to <2 x i8>
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8>
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8>
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8>
+; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'fptosi_float_i8'
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi float undef to i8
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptosi <2 x float> undef to <2 x i8>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8>
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = fptosi <2 x float> undef to <2 x i8>
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8>
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8>
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8>
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SLM-LABEL: 'fptosi_float_i8'
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi float undef to i8
-; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptosi <2 x float> undef to <2 x i8>
-; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8>
-; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8>
-; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8>
+; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = fptosi <2 x float> undef to <2 x i8>
+; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8>
+; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8>
+; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8>
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
%I8 = fptosi float undef to i8
diff --git a/llvm/test/Analysis/CostModel/X86/fptoui.ll b/llvm/test/Analysis/CostModel/X86/fptoui.ll
index ed1c420735108..eac56601c171e 100644
--- a/llvm/test/Analysis/CostModel/X86/fptoui.ll
+++ b/llvm/test/Analysis/CostModel/X86/fptoui.ll
@@ -19,17 +19,17 @@ define i32 @fptoui_double_i64(i32 %arg) {
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'fptoui_double_i64'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %I64 = fptoui double undef to i64
-; SSE42-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = fptoui double undef to i64
+; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64>
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX-LABEL: 'fptoui_double_i64'
-; AVX-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %I64 = fptoui double undef to i64
-; AVX-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64>
-; AVX-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64>
-; AVX-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64>
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = fptoui double undef to i64
+; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64>
+; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64>
+; AVX-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'fptoui_double_i64'
@@ -47,10 +47,10 @@ define i32 @fptoui_double_i64(i32 %arg) {
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SLM-LABEL: 'fptoui_double_i64'
-; SLM-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %I64 = fptoui double undef to i64
-; SLM-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64>
+; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = fptoui double undef to i64
+; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64>
+; SLM-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64>
+; SLM-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64>
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
%I64 = fptoui double undef to i64
@@ -63,30 +63,30 @@ define i32 @fptoui_double_i64(i32 %arg) {
define i32 @fptoui_double_i32(i32 %arg) {
; SSE2-LABEL: 'fptoui_double_i32'
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = fptoui double undef to i32
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2I32 = fptoui <2 x double> undef to <2 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4I32 = fptoui <4 x double> undef to <4 x i32>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V8I32 = fptoui <8 x double> undef to <8 x i32>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = fptoui <2 x double> undef to <2 x i32>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I32 = fptoui <4 x double> undef to <4 x i32>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I32 = fptoui <8 x double> undef to <8 x i32>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'fptoui_double_i32'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui double undef to i32
-; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = fptoui <2 x double> undef to <2 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = fptoui <4 x double> undef to <4 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I32 = fptoui <8 x double> undef to <8 x i32>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I32 = fptoui <2 x double> undef to <2 x i32>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = fptoui <4 x double> undef to <4 x i32>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = fptoui <8 x double> undef to <8 x i32>
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'fptoui_double_i32'
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui double undef to i32
; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I32 = fptoui <2 x double> undef to <2 x i32>
; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = fptoui <4 x double> undef to <4 x i32>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I32 = fptoui <8 x double> undef to <8 x i32>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I32 = fptoui <8 x double> undef to <8 x i32>
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'fptoui_double_i32'
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui double undef to i32
; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = fptoui <2 x double> undef to <2 x i32>
; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = fptoui <4 x double> undef to <4 x i32>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8I32 = fptoui <8 x double> undef to <8 x i32>
+; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8I32 = fptoui <8 x double> undef to <8 x i32>
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'fptoui_double_i32'
@@ -98,9 +98,9 @@ define i32 @fptoui_double_i32(i32 %arg) {
;
; SLM-LABEL: 'fptoui_double_i32'
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui double undef to i32
-; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I32 = fptoui <2 x double> undef to <2 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4I32 = fptoui <4 x double> undef to <4 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V8I32 = fptoui <8 x double> undef to <8 x i32>
+; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I32 = fptoui <2 x double> undef to <2 x i32>
+; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = fptoui <4 x double> undef to <4 x i32>
+; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = fptoui <8 x double> undef to <8 x i32>
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
%I32 = fptoui double undef to i32
@@ -113,44 +113,37 @@ define i32 @fptoui_double_i32(i32 %arg) {
define i32 @fptoui_double_i16(i32 %arg) {
; SSE2-LABEL: 'fptoui_double_i16'
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = fptoui double undef to i16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'fptoui_double_i16'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui double undef to i16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16>
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
-; AVX1-LABEL: 'fptoui_double_i16'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui double undef to i16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; AVX2-LABEL: 'fptoui_double_i16'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui double undef to i16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; AVX-LABEL: 'fptoui_double_i16'
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui double undef to i16
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16>
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16>
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16>
+; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'fptoui_double_i16'
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui double undef to i16
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16>
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16>
; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16>
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16>
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SLM-LABEL: 'fptoui_double_i16'
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui double undef to i16
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16>
-; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16>
-; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16>
+; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16>
+; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16>
+; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16>
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
%I16 = fptoui double undef to i16
@@ -163,44 +156,37 @@ define i32 @fptoui_double_i16(i32 %arg) {
define i32 @fptoui_double_i8(i32 %arg) {
; SSE2-LABEL: 'fptoui_double_i8'
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = fptoui double undef to i8
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'fptoui_double_i8'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui double undef to i8
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8>
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
-; AVX1-LABEL: 'fptoui_double_i8'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui double undef to i8
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; AVX2-LABEL: 'fptoui_double_i8'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui double undef to i8
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; AVX-LABEL: 'fptoui_double_i8'
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui double undef to i8
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8>
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8>
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8>
+; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'fptoui_double_i8'
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui double undef to i8
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8>
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8>
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8>
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8>
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SLM-LABEL: 'fptoui_double_i8'
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui double undef to i8
-; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8>
-; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8>
-; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8>
+; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8>
+; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8>
+; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8>
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
%I8 = fptoui double undef to i8
@@ -220,19 +206,19 @@ define i32 @fptoui_float_i64(i32 %arg) {
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'fptoui_float_i64'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui float undef to i64
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = fptoui <2 x float> undef to <2 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = fptoui float undef to i64
+; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2I64 = fptoui <2 x float> undef to <2 x i64>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64>
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX-LABEL: 'fptoui_float_i64'
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui float undef to i64
-; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = fptoui <2 x float> undef to <2 x i64>
-; AVX-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64>
-; AVX-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64>
-; AVX-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64>
+; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = fptoui float undef to i64
+; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2I64 = fptoui <2 x float> undef to <2 x i64>
+; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64>
+; AVX-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64>
+; AVX-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'fptoui_float_i64'
@@ -252,11 +238,11 @@ define i32 @fptoui_float_i64(i32 %arg) {
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SLM-LABEL: 'fptoui_float_i64'
-; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui float undef to i64
-; SLM-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I64 = fptoui <2 x float> undef to <2 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64>
-; SLM-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64>
+; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I64 = fptoui float undef to i64
+; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V2I64 = fptoui <2 x float> undef to <2 x i64>
+; SLM-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64>
+; SLM-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64>
+; SLM-NEXT: Cost Model: Found an estimated cost of 164 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64>
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
%I64 = fptoui float undef to i64
@@ -278,16 +264,16 @@ define i32 @fptoui_float_i32(i32 %arg) {
;
; SSE42-LABEL: 'fptoui_float_i32'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui float undef to i32
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = fptoui <2 x float> undef to <2 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = fptoui <4 x float> undef to <4 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = fptoui <8 x float> undef to <8 x i32>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = fptoui <16 x float> undef to <16 x i32>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = fptoui <2 x float> undef to <2 x i32>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = fptoui <4 x float> undef to <4 x i32>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = fptoui <8 x float> undef to <8 x i32>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16I32 = fptoui <16 x float> undef to <16 x i32>
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'fptoui_float_i32'
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui float undef to i32
-; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = fptoui <2 x float> undef to <2 x i32>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = fptoui <4 x float> undef to <4 x i32>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = fptoui <2 x float> undef to <2 x i32>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = fptoui <4 x float> undef to <4 x i32>
; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I32 = fptoui <8 x float> undef to <8 x i32>
; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I32 = fptoui <16 x float> undef to <16 x i32>
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
@@ -310,10 +296,10 @@ define i32 @fptoui_float_i32(i32 %arg) {
;
; SLM-LABEL: 'fptoui_float_i32'
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui float undef to i32
-; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = fptoui <2 x float> undef to <2 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = fptoui <4 x float> undef to <4 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = fptoui <8 x float> undef to <8 x i32>
-; SLM-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = fptoui <16 x float> undef to <16 x i32>
+; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = fptoui <2 x float> undef to <2 x i32>
+; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = fptoui <4 x float> undef to <4 x i32>
+; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = fptoui <8 x float> undef to <8 x i32>
+; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16I32 = fptoui <16 x float> undef to <16 x i32>
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
%I32 = fptoui float undef to i32
@@ -327,50 +313,50 @@ define i32 @fptoui_float_i32(i32 %arg) {
define i32 @fptoui_float_i16(i32 %arg) {
; SSE2-LABEL: 'fptoui_float_i16'
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = fptoui float undef to i16
-; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'fptoui_float_i16'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui float undef to i16
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16>
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'fptoui_float_i16'
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui float undef to i16
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16>
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'fptoui_float_i16'
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui float undef to i16
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16>
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16>
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16>
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16>
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16>
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'fptoui_float_i16'
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui float undef to i16
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16>
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16>
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16>
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16>
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16>
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SLM-LABEL: 'fptoui_float_i16'
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui float undef to i16
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16>
-; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16>
-; SLM-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16>
-; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16>
+; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16>
+; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16>
+; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16>
+; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16>
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
%I16 = fptoui float undef to i16
@@ -384,50 +370,42 @@ define i32 @fptoui_float_i16(i32 %arg) {
define i32 @fptoui_float_i8(i32 %arg) {
; SSE2-LABEL: 'fptoui_float_i8'
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = fptoui float undef to i8
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = fptoui <2 x float> undef to <2 x i8>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8>
-; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptoui <2 x float> undef to <2 x i8>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'fptoui_float_i8'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui float undef to i8
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptoui <2 x float> undef to <2 x i8>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = fptoui <2 x float> undef to <2 x i8>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8>
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
-; AVX1-LABEL: 'fptoui_float_i8'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui float undef to i8
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptoui <2 x float> undef to <2 x i8>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; AVX2-LABEL: 'fptoui_float_i8'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui float undef to i8
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptoui <2 x float> undef to <2 x i8>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; AVX-LABEL: 'fptoui_float_i8'
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui float undef to i8
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = fptoui <2 x float> undef to <2 x i8>
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8>
+; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8>
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8>
+; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'fptoui_float_i8'
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui float undef to i8
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptoui <2 x float> undef to <2 x i8>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8>
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = fptoui <2 x float> undef to <2 x i8>
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8>
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8>
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8>
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SLM-LABEL: 'fptoui_float_i8'
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui float undef to i8
-; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptoui <2 x float> undef to <2 x i8>
-; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8>
-; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8>
-; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8>
+; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = fptoui <2 x float> undef to <2 x i8>
+; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8>
+; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8>
+; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8>
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
%I8 = fptoui float undef to i8
diff --git a/llvm/test/Transforms/LoopVectorize/X86/fp_to_sint8-cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/fp_to_sint8-cost-model.ll
index b5fed033e327e..e8157d2816dd2 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/fp_to_sint8-cost-model.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/fp_to_sint8-cost-model.ll
@@ -5,7 +5,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
target triple = "x86_64-apple-macosx10.8.0"
-; CHECK: cost of 4 for VF 8 For instruction: %conv = fptosi float %tmp to i8
+; CHECK: cost of 1 for VF 1 For instruction: %conv = fptosi float %tmp to i8
define void @float_to_sint8_cost(i8* noalias nocapture %a, float* noalias nocapture readonly %b) nounwind {
entry:
br label %for.body
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/fptoui.ll b/llvm/test/Transforms/SLPVectorizer/X86/fptoui.ll
index 5b5f5b051bae4..7612b1f746469 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/fptoui.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/fptoui.ll
@@ -118,30 +118,12 @@ define void @fptoui_8f64_8i64() #0 {
define void @fptoui_8f64_8i32() #0 {
; SSE-LABEL: @fptoui_8f64_8i32(
-; SSE-NEXT: [[A0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
-; SSE-NEXT: [[A1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
-; SSE-NEXT: [[A2:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
-; SSE-NEXT: [[A3:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
-; SSE-NEXT: [[A4:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4), align 8
-; SSE-NEXT: [[A5:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 5), align 8
-; SSE-NEXT: [[A6:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6), align 8
-; SSE-NEXT: [[A7:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 7), align 8
-; SSE-NEXT: [[CVT0:%.*]] = fptoui double [[A0]] to i32
-; SSE-NEXT: [[CVT1:%.*]] = fptoui double [[A1]] to i32
-; SSE-NEXT: [[CVT2:%.*]] = fptoui double [[A2]] to i32
-; SSE-NEXT: [[CVT3:%.*]] = fptoui double [[A3]] to i32
-; SSE-NEXT: [[CVT4:%.*]] = fptoui double [[A4]] to i32
-; SSE-NEXT: [[CVT5:%.*]] = fptoui double [[A5]] to i32
-; SSE-NEXT: [[CVT6:%.*]] = fptoui double [[A6]] to i32
-; SSE-NEXT: [[CVT7:%.*]] = fptoui double [[A7]] to i32
-; SSE-NEXT: store i32 [[CVT0]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 0), align 4
-; SSE-NEXT: store i32 [[CVT1]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 1), align 4
-; SSE-NEXT: store i32 [[CVT2]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 2), align 4
-; SSE-NEXT: store i32 [[CVT3]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 3), align 4
-; SSE-NEXT: store i32 [[CVT4]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 4), align 4
-; SSE-NEXT: store i32 [[CVT5]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 5), align 4
-; SSE-NEXT: store i32 [[CVT6]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 6), align 4
-; SSE-NEXT: store i32 [[CVT7]], i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 7), align 4
+; SSE-NEXT: [[TMP1:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @src64 to <4 x double>*), align 8
+; SSE-NEXT: [[TMP2:%.*]] = load <4 x double>, <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4) to <4 x double>*), align 8
+; SSE-NEXT: [[TMP3:%.*]] = fptoui <4 x double> [[TMP1]] to <4 x i32>
+; SSE-NEXT: [[TMP4:%.*]] = fptoui <4 x double> [[TMP2]] to <4 x i32>
+; SSE-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* bitcast ([16 x i32]* @dst32 to <4 x i32>*), align 4
+; SSE-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @dst32, i32 0, i64 4) to <4 x i32>*), align 4
; SSE-NEXT: ret void
;
; AVX1-LABEL: @fptoui_8f64_8i32(
More information about the llvm-commits
mailing list