[llvm] a7da029 - [CostModel][X86] Adjust sitofp/uitofp SSE/AVX legalized costs based on llvm-mca reports.
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 7 04:24:16 PDT 2021
Author: Simon Pilgrim
Date: 2021-07-07T12:03:45+01:00
New Revision: a7da0296a663094e661c54a5ba2c4ce0239c312b
URL: https://github.com/llvm/llvm-project/commit/a7da0296a663094e661c54a5ba2c4ce0239c312b
DIFF: https://github.com/llvm/llvm-project/commit/a7da0296a663094e661c54a5ba2c4ce0239c312b.diff
LOG: [CostModel][X86] Adjust sitofp/uitofp SSE/AVX legalized costs based on llvm-mca reports.
Update (mainly) vXi8/vXi16 -> vXf32/vXf64 sitofp/uitofp costs based on the worst case costs from the script in D103695.
Move to using legalized types wherever possible, which allows us to prune the cost tables.
Added:
Modified:
llvm/lib/Target/X86/X86TargetTransformInfo.cpp
llvm/test/Analysis/CostModel/X86/cast.ll
llvm/test/Analysis/CostModel/X86/sitofp.ll
llvm/test/Analysis/CostModel/X86/uitofp.ll
llvm/test/Transforms/SLPVectorizer/X86/sitofp-inseltpoison.ll
llvm/test/Transforms/SLPVectorizer/X86/sitofp.ll
llvm/test/Transforms/SLPVectorizer/X86/uitofp.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 87b96b699928..561442d34b02 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -1673,24 +1673,24 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
{ ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i32, 1 },
{ ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i32, 1 },
- { ISD::SIGN_EXTEND, MVT::v32i16, MVT::v32i8, 3 }, // FIXME: May not be right
- { ISD::ZERO_EXTEND, MVT::v32i16, MVT::v32i8, 3 }, // FIXME: May not be right
+ { ISD::SIGN_EXTEND, MVT::v32i16, MVT::v32i8, 3 }, // FIXME: May not be right
+ { ISD::ZERO_EXTEND, MVT::v32i16, MVT::v32i8, 3 }, // FIXME: May not be right
{ ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i1, 4 },
{ ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i1, 3 },
- { ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i8, 2 },
- { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i8, 2 },
+ { ISD::SINT_TO_FP, MVT::v8f64, MVT::v16i8, 2 },
+ { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i8, 1 },
{ ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i16, 2 },
- { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i16, 2 },
- { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i32, 1 },
+ { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i16, 1 },
{ ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i32, 1 },
+ { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i32, 1 },
{ ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i1, 4 },
{ ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i1, 3 },
- { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i8, 2 },
- { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i8, 2 },
+ { ISD::UINT_TO_FP, MVT::v8f64, MVT::v16i8, 2 },
+ { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i8, 1 },
{ ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i16, 2 },
- { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i16, 2 },
+ { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i16, 1 },
{ ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i32, 1 },
{ ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i32, 1 },
{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i64, 26 },
@@ -1820,16 +1820,18 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 1 }, // vpternlogq
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i1, 2 }, // vpternlogq+psrlq
+ { ISD::SINT_TO_FP, MVT::v2f64, MVT::v16i8, 1 },
+ { ISD::SINT_TO_FP, MVT::v8f32, MVT::v16i8, 1 },
+ { ISD::SINT_TO_FP, MVT::v2f64, MVT::v8i16, 1 },
+ { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 1 },
+
{ ISD::UINT_TO_FP, MVT::f32, MVT::i64, 1 },
{ ISD::UINT_TO_FP, MVT::f64, MVT::i64, 1 },
- { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 2 },
- { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i8, 2 },
- { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 2 },
- { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 5 },
- { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i16, 2 },
- { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 2 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v16i8, 1 },
+ { ISD::UINT_TO_FP, MVT::v8f32, MVT::v16i8, 1 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v8i16, 1 },
+ { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 1 },
{ ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
- { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 1 },
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
{ ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i32, 1 },
{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 1 },
@@ -1891,10 +1893,18 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
{ ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f32, 4 },
{ ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f64, 15 },
+ { ISD::SINT_TO_FP, MVT::v2f64, MVT::v16i8, 2 },
+ { ISD::SINT_TO_FP, MVT::v8f32, MVT::v16i8, 2 },
+ { ISD::SINT_TO_FP, MVT::v2f64, MVT::v8i16, 2 },
+ { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 2 },
{ ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i32, 1 },
{ ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i32, 1 },
{ ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i32, 3 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v16i8, 2 },
+ { ISD::UINT_TO_FP, MVT::v8f32, MVT::v16i8, 2 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v8i16, 2 },
+ { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 2 },
{ ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 2 },
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 1 },
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 2 },
@@ -1923,77 +1933,73 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 3 },
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 3 },
- { ISD::TRUNCATE, MVT::v4i1, MVT::v4i64, 4 },
- { ISD::TRUNCATE, MVT::v8i1, MVT::v8i32, 5 },
- { ISD::TRUNCATE, MVT::v16i1, MVT::v16i16, 4 },
- { ISD::TRUNCATE, MVT::v8i1, MVT::v8i64, 9 },
- { ISD::TRUNCATE, MVT::v16i1, MVT::v16i64, 11 },
-
- { ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 2 }, // and+extract+packuswb
- { ISD::TRUNCATE, MVT::v2i8, MVT::v2i32, 2 }, // and+packusdw+packuswb
- { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 4 },
- { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 5 },
- { ISD::TRUNCATE, MVT::v4i8, MVT::v4i64, 4 },
- { ISD::TRUNCATE, MVT::v4i16, MVT::v4i64, 3 }, // and+extract+2*packusdw
- { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 2 },
- { ISD::TRUNCATE, MVT::v8i8, MVT::v8i64, 11 },
- { ISD::TRUNCATE, MVT::v8i16, MVT::v8i64, 9 },
- { ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 3 },
- { ISD::TRUNCATE, MVT::v16i8, MVT::v16i64, 11 },
-
- { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 },
- { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i1, 3 },
- { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i1, 8 },
- { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
- { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i8, 3 },
- { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 8 },
- { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 3 },
- { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i16, 3 },
- { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 5 },
- { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i32, 2 },
- { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i32, 2 },
- { ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i32, 4 },
- { ISD::SINT_TO_FP, MVT::v4f32, MVT::v2i64, 5 },
- { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i64, 8 },
-
- { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i1, 7 },
- { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i1, 7 },
- { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i1, 6 },
- { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 2 },
- { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i8, 2 },
- { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 5 },
- { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
- { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i16, 2 },
- { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 5 },
- { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 4 },
- { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i64, 10 },
- { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 5 },
- { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i32, 6 },
- { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i64, 18 },
- { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i64, 10 },
- { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 8 },
- { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i32, 10 },
- { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 5 },
- { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i64, 6 },
-
- { ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 4 },
- { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f64, 3 },
- { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f64, 2 },
- { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f64, 2 },
- { ISD::FP_TO_SINT, MVT::v8i16, MVT::v8f32, 3 },
- { ISD::FP_TO_SINT, MVT::v8i32, MVT::v8f32, 2 },
- { ISD::FP_TO_SINT, MVT::v8i32, MVT::v8f64, 5 },
-
- { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f32, 5 },
- { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 9 },
- { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 5 },
- { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f64, 3 },
- { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f64, 2 },
- { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f64, 9 },
- { ISD::FP_TO_UINT, MVT::v8i8, MVT::v8f32, 4 },
- { ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f32, 3 },
- { ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f32, 9 },
- { ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f64, 19 },
+ { ISD::TRUNCATE, MVT::v4i1, MVT::v4i64, 4 },
+ { ISD::TRUNCATE, MVT::v8i1, MVT::v8i32, 5 },
+ { ISD::TRUNCATE, MVT::v16i1, MVT::v16i16, 4 },
+ { ISD::TRUNCATE, MVT::v8i1, MVT::v8i64, 9 },
+ { ISD::TRUNCATE, MVT::v16i1, MVT::v16i64, 11 },
+
+ { ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 2 }, // and+extract+packuswb
+ { ISD::TRUNCATE, MVT::v2i8, MVT::v2i32, 2 }, // and+packusdw+packuswb
+ { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 4 },
+ { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 5 },
+ { ISD::TRUNCATE, MVT::v4i8, MVT::v4i64, 4 },
+ { ISD::TRUNCATE, MVT::v4i16, MVT::v4i64, 3 }, // and+extract+2*packusdw
+ { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 2 },
+ { ISD::TRUNCATE, MVT::v8i8, MVT::v8i64, 11 },
+ { ISD::TRUNCATE, MVT::v8i16, MVT::v8i64, 9 },
+ { ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 3 },
+ { ISD::TRUNCATE, MVT::v16i8, MVT::v16i64, 11 },
+
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 },
+ { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i1, 3 },
+ { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i1, 8 },
+ { ISD::SINT_TO_FP, MVT::v8f32, MVT::v16i8, 4 },
+ { ISD::SINT_TO_FP, MVT::v4f64, MVT::v16i8, 2 },
+ { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
+ { ISD::SINT_TO_FP, MVT::v4f64, MVT::v8i16, 2 },
+ { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i32, 2 },
+ { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i32, 2 },
+ { ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i32, 4 },
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v2i64, 5 },
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i64, 8 },
+
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i1, 7 },
+ { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i1, 7 },
+ { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i1, 6 },
+ { ISD::UINT_TO_FP, MVT::v8f32, MVT::v16i8, 4 },
+ { ISD::UINT_TO_FP, MVT::v4f64, MVT::v16i8, 2 },
+ { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
+ { ISD::UINT_TO_FP, MVT::v4f64, MVT::v8i16, 2 },
+ { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 4 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 4 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 5 },
+ { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i32, 6 },
+ { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 8 },
+ { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i32, 10 },
+ { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i64, 10 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i64, 18 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 5 },
+ { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i64, 10 },
+
+ { ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 4 },
+ { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f64, 3 },
+ { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f64, 2 },
+ { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f64, 2 },
+ { ISD::FP_TO_SINT, MVT::v8i16, MVT::v8f32, 3 },
+ { ISD::FP_TO_SINT, MVT::v8i32, MVT::v8f32, 2 },
+ { ISD::FP_TO_SINT, MVT::v8i32, MVT::v8f64, 5 },
+
+ { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f32, 5 },
+ { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 9 },
+ { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 5 },
+ { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f64, 3 },
+ { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f64, 2 },
+ { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f64, 9 },
+ { ISD::FP_TO_UINT, MVT::v8i8, MVT::v8f32, 4 },
+ { ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f32, 3 },
+ { ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f32, 9 },
+ { ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f64, 19 },
{ ISD::FP_EXTEND, MVT::v4f64, MVT::v4f32, 1 },
{ ISD::FP_ROUND, MVT::v4f32, MVT::v4f64, 1 },
@@ -2057,6 +2063,10 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
{ ISD::SINT_TO_FP, MVT::f64, MVT::i32, 1 },
{ ISD::SINT_TO_FP, MVT::f32, MVT::i64, 1 },
{ ISD::SINT_TO_FP, MVT::f64, MVT::i64, 1 },
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v16i8, 1 },
+ { ISD::SINT_TO_FP, MVT::v2f64, MVT::v16i8, 1 },
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v8i16, 1 },
+ { ISD::SINT_TO_FP, MVT::v2f64, MVT::v8i16, 1 },
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v4i32, 1 },
{ ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i32, 2 },
@@ -2065,6 +2075,13 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
{ ISD::UINT_TO_FP, MVT::f64, MVT::i32, 1 },
{ ISD::UINT_TO_FP, MVT::f32, MVT::i64, 4 },
{ ISD::UINT_TO_FP, MVT::f64, MVT::i64, 4 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v16i8, 1 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v16i8, 1 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v8i16, 1 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v8i16, 1 },
+ { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 3 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 3 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v4i32, 2 },
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v2i64, 12 },
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i64, 22 },
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 4 },
@@ -2102,6 +2119,7 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v16i8, 4 },
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v8i16, 4 },
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v8i16, 4 },
+ { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 7 },
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v4i32, 7 },
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 5 },
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 15 },
diff --git a/llvm/test/Analysis/CostModel/X86/cast.ll b/llvm/test/Analysis/CostModel/X86/cast.ll
index 7d0a3fd8fba1..444398e25329 100644
--- a/llvm/test/Analysis/CostModel/X86/cast.ll
+++ b/llvm/test/Analysis/CostModel/X86/cast.ll
@@ -386,10 +386,10 @@ define void @sitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) {
; SSE41-LABEL: 'sitofp4'
; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %A1 = sitofp <4 x i1> %a to <4 x float>
; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %A2 = sitofp <4 x i1> %a to <4 x double>
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %B1 = sitofp <4 x i8> %b to <4 x float>
-; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B2 = sitofp <4 x i8> %b to <4 x double>
-; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %C1 = sitofp <4 x i16> %c to <4 x float>
-; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %C2 = sitofp <4 x i16> %c to <4 x double>
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %B1 = sitofp <4 x i8> %b to <4 x float>
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %B2 = sitofp <4 x i8> %b to <4 x double>
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C1 = sitofp <4 x i16> %c to <4 x float>
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %C2 = sitofp <4 x i16> %c to <4 x double>
; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D1 = sitofp <4 x i32> %d to <4 x float>
; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %D2 = sitofp <4 x i32> %d to <4 x double>
; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
@@ -397,10 +397,10 @@ define void @sitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) {
; AVX1-LABEL: 'sitofp4'
; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %A1 = sitofp <4 x i1> %a to <4 x float>
; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %A2 = sitofp <4 x i1> %a to <4 x double>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %B1 = sitofp <4 x i8> %b to <4 x float>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %B2 = sitofp <4 x i8> %b to <4 x double>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %C1 = sitofp <4 x i16> %c to <4 x float>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %C2 = sitofp <4 x i16> %c to <4 x double>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %B1 = sitofp <4 x i8> %b to <4 x float>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %B2 = sitofp <4 x i8> %b to <4 x double>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C1 = sitofp <4 x i16> %c to <4 x float>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %C2 = sitofp <4 x i16> %c to <4 x double>
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D1 = sitofp <4 x i32> %d to <4 x float>
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %D2 = sitofp <4 x i32> %d to <4 x double>
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
@@ -408,10 +408,10 @@ define void @sitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) {
; AVX2-LABEL: 'sitofp4'
; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %A1 = sitofp <4 x i1> %a to <4 x float>
; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %A2 = sitofp <4 x i1> %a to <4 x double>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %B1 = sitofp <4 x i8> %b to <4 x float>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %B2 = sitofp <4 x i8> %b to <4 x double>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %C1 = sitofp <4 x i16> %c to <4 x float>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %C2 = sitofp <4 x i16> %c to <4 x double>
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %B1 = sitofp <4 x i8> %b to <4 x float>
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %B2 = sitofp <4 x i8> %b to <4 x double>
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C1 = sitofp <4 x i16> %c to <4 x float>
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %C2 = sitofp <4 x i16> %c to <4 x double>
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D1 = sitofp <4 x i32> %d to <4 x float>
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D2 = sitofp <4 x i32> %d to <4 x double>
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
@@ -419,10 +419,10 @@ define void @sitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) {
; AVX512-LABEL: 'sitofp4'
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %A1 = sitofp <4 x i1> %a to <4 x float>
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %A2 = sitofp <4 x i1> %a to <4 x double>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %B1 = sitofp <4 x i8> %b to <4 x float>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %B2 = sitofp <4 x i8> %b to <4 x double>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %C1 = sitofp <4 x i16> %c to <4 x float>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %C2 = sitofp <4 x i16> %c to <4 x double>
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %B1 = sitofp <4 x i8> %b to <4 x float>
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %B2 = sitofp <4 x i8> %b to <4 x double>
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C1 = sitofp <4 x i16> %c to <4 x float>
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %C2 = sitofp <4 x i16> %c to <4 x double>
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D1 = sitofp <4 x i32> %d to <4 x float>
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D2 = sitofp <4 x i32> %d to <4 x double>
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
@@ -447,30 +447,30 @@ define void @sitofp8(<8 x i1> %a, <8 x i8> %b, <8 x i16> %c, <8 x i32> %d) {
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SSE41-LABEL: 'sitofp8'
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %A1 = sitofp <8 x i1> %a to <8 x float>
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %B1 = sitofp <8 x i8> %b to <8 x float>
-; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %C1 = sitofp <8 x i16> %c to <8 x float>
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %A1 = sitofp <8 x i1> %a to <8 x float>
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %B1 = sitofp <8 x i8> %b to <8 x float>
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %C1 = sitofp <8 x i16> %c to <8 x float>
; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %D1 = sitofp <8 x i32> %d to <8 x float>
; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX1-LABEL: 'sitofp8'
; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %A1 = sitofp <8 x i1> %a to <8 x float>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B1 = sitofp <8 x i8> %b to <8 x float>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %C1 = sitofp <8 x i16> %c to <8 x float>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %B1 = sitofp <8 x i8> %b to <8 x float>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C1 = sitofp <8 x i16> %c to <8 x float>
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %D1 = sitofp <8 x i32> %d to <8 x float>
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX2-LABEL: 'sitofp8'
; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %A1 = sitofp <8 x i1> %a to <8 x float>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B1 = sitofp <8 x i8> %b to <8 x float>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %C1 = sitofp <8 x i16> %c to <8 x float>
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %B1 = sitofp <8 x i8> %b to <8 x float>
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %C1 = sitofp <8 x i16> %c to <8 x float>
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D1 = sitofp <8 x i32> %d to <8 x float>
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX512-LABEL: 'sitofp8'
; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %A1 = sitofp <8 x i1> %a to <8 x float>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B1 = sitofp <8 x i8> %b to <8 x float>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %C1 = sitofp <8 x i16> %c to <8 x float>
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %B1 = sitofp <8 x i8> %b to <8 x float>
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C1 = sitofp <8 x i16> %c to <8 x float>
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D1 = sitofp <8 x i32> %d to <8 x float>
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
@@ -482,23 +482,34 @@ define void @sitofp8(<8 x i1> %a, <8 x i8> %b, <8 x i16> %c, <8 x i32> %d) {
}
define void @uitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) {
-; SSE-LABEL: 'uitofp4'
-; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %A1 = uitofp <4 x i1> %a to <4 x float>
-; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %A2 = uitofp <4 x i1> %a to <4 x double>
-; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %B1 = uitofp <4 x i8> %b to <4 x float>
-; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B2 = uitofp <4 x i8> %b to <4 x double>
-; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C1 = uitofp <4 x i16> %c to <4 x float>
-; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %C2 = uitofp <4 x i16> %c to <4 x double>
-; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %D1 = uitofp <4 x i32> %d to <4 x float>
-; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %D2 = uitofp <4 x i32> %d to <4 x double>
-; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; SSE2-LABEL: 'uitofp4'
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %A1 = uitofp <4 x i1> %a to <4 x float>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %A2 = uitofp <4 x i1> %a to <4 x double>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %B1 = uitofp <4 x i8> %b to <4 x float>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B2 = uitofp <4 x i8> %b to <4 x double>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C1 = uitofp <4 x i16> %c to <4 x float>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %C2 = uitofp <4 x i16> %c to <4 x double>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %D1 = uitofp <4 x i32> %d to <4 x float>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %D2 = uitofp <4 x i32> %d to <4 x double>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SSE41-LABEL: 'uitofp4'
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %A1 = uitofp <4 x i1> %a to <4 x float>
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %A2 = uitofp <4 x i1> %a to <4 x double>
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %B1 = uitofp <4 x i8> %b to <4 x float>
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %B2 = uitofp <4 x i8> %b to <4 x double>
+; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C1 = uitofp <4 x i16> %c to <4 x float>
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %C2 = uitofp <4 x i16> %c to <4 x double>
+; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %D1 = uitofp <4 x i32> %d to <4 x float>
+; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %D2 = uitofp <4 x i32> %d to <4 x double>
+; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX1-LABEL: 'uitofp4'
; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %A1 = uitofp <4 x i1> %a to <4 x float>
; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %A2 = uitofp <4 x i1> %a to <4 x double>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %B1 = uitofp <4 x i8> %b to <4 x float>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %B1 = uitofp <4 x i8> %b to <4 x float>
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %B2 = uitofp <4 x i8> %b to <4 x double>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %C1 = uitofp <4 x i16> %c to <4 x float>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C1 = uitofp <4 x i16> %c to <4 x float>
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %C2 = uitofp <4 x i16> %c to <4 x double>
; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %D1 = uitofp <4 x i32> %d to <4 x float>
; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %D2 = uitofp <4 x i32> %d to <4 x double>
@@ -507,9 +518,9 @@ define void @uitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) {
; AVX2-LABEL: 'uitofp4'
; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %A1 = uitofp <4 x i1> %a to <4 x float>
; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %A2 = uitofp <4 x i1> %a to <4 x double>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %B1 = uitofp <4 x i8> %b to <4 x float>
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %B1 = uitofp <4 x i8> %b to <4 x float>
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %B2 = uitofp <4 x i8> %b to <4 x double>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %C1 = uitofp <4 x i16> %c to <4 x float>
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C1 = uitofp <4 x i16> %c to <4 x float>
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %C2 = uitofp <4 x i16> %c to <4 x double>
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %D1 = uitofp <4 x i32> %d to <4 x float>
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %D2 = uitofp <4 x i32> %d to <4 x double>
@@ -518,9 +529,9 @@ define void @uitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) {
; AVX512-LABEL: 'uitofp4'
; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %A1 = uitofp <4 x i1> %a to <4 x float>
; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %A2 = uitofp <4 x i1> %a to <4 x double>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %B1 = uitofp <4 x i8> %b to <4 x float>
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %B1 = uitofp <4 x i8> %b to <4 x float>
; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %B2 = uitofp <4 x i8> %b to <4 x double>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %C1 = uitofp <4 x i16> %c to <4 x float>
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C1 = uitofp <4 x i16> %c to <4 x float>
; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %C2 = uitofp <4 x i16> %c to <4 x double>
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D1 = uitofp <4 x i32> %d to <4 x float>
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D2 = uitofp <4 x i32> %d to <4 x double>
@@ -538,31 +549,38 @@ define void @uitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) {
}
define void @uitofp8(<8 x i1> %a, <8 x i8> %b, <8 x i16> %c, <8 x i32> %d) {
-; SSE-LABEL: 'uitofp8'
-; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %A1 = uitofp <8 x i1> %a to <8 x float>
-; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B1 = uitofp <8 x i8> %b to <8 x float>
-; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %C1 = uitofp <8 x i16> %c to <8 x float>
-; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %D1 = uitofp <8 x i32> %d to <8 x float>
-; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; SSE2-LABEL: 'uitofp8'
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %A1 = uitofp <8 x i1> %a to <8 x float>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B1 = uitofp <8 x i8> %b to <8 x float>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %C1 = uitofp <8 x i16> %c to <8 x float>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %D1 = uitofp <8 x i32> %d to <8 x float>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; SSE41-LABEL: 'uitofp8'
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %A1 = uitofp <8 x i1> %a to <8 x float>
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %B1 = uitofp <8 x i8> %b to <8 x float>
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %C1 = uitofp <8 x i16> %c to <8 x float>
+; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %D1 = uitofp <8 x i32> %d to <8 x float>
+; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX1-LABEL: 'uitofp8'
; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %A1 = uitofp <8 x i1> %a to <8 x float>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %B1 = uitofp <8 x i8> %b to <8 x float>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %C1 = uitofp <8 x i16> %c to <8 x float>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %B1 = uitofp <8 x i8> %b to <8 x float>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C1 = uitofp <8 x i16> %c to <8 x float>
; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %D1 = uitofp <8 x i32> %d to <8 x float>
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX2-LABEL: 'uitofp8'
; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %A1 = uitofp <8 x i1> %a to <8 x float>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %B1 = uitofp <8 x i8> %b to <8 x float>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %C1 = uitofp <8 x i16> %c to <8 x float>
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %B1 = uitofp <8 x i8> %b to <8 x float>
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %C1 = uitofp <8 x i16> %c to <8 x float>
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %D1 = uitofp <8 x i32> %d to <8 x float>
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX512-LABEL: 'uitofp8'
; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %A1 = uitofp <8 x i1> %a to <8 x float>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %B1 = uitofp <8 x i8> %b to <8 x float>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %C1 = uitofp <8 x i16> %c to <8 x float>
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %B1 = uitofp <8 x i8> %b to <8 x float>
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C1 = uitofp <8 x i16> %c to <8 x float>
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D1 = uitofp <8 x i32> %d to <8 x float>
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
diff --git a/llvm/test/Analysis/CostModel/X86/sitofp.ll b/llvm/test/Analysis/CostModel/X86/sitofp.ll
index 69af3a273a33..b6d17253128c 100644
--- a/llvm/test/Analysis/CostModel/X86/sitofp.ll
+++ b/llvm/test/Analysis/CostModel/X86/sitofp.ll
@@ -20,29 +20,29 @@ define i32 @sitofp_i8_double() {
;
; SSE42-LABEL: 'sitofp_i8_double'
; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_i8_f64 = sitofp i8 undef to double
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i8_v2f64 = sitofp <2 x i8> undef to <2 x double>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v4i8_v4f64 = sitofp <4 x i8> undef to <4 x double>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %cvt_v8i8_v8f64 = sitofp <8 x i8> undef to <8 x double>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i8_v2f64 = sitofp <2 x i8> undef to <2 x double>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v4i8_v4f64 = sitofp <4 x i8> undef to <4 x double>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v8i8_v8f64 = sitofp <8 x i8> undef to <8 x double>
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'sitofp_i8_double'
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_i8_f64 = sitofp i8 undef to double
-; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i8_v2f64 = sitofp <2 x i8> undef to <2 x double>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v4i8_v4f64 = sitofp <4 x i8> undef to <4 x double>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %cvt_v8i8_v8f64 = sitofp <8 x i8> undef to <8 x double>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i8_v2f64 = sitofp <2 x i8> undef to <2 x double>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v4i8_v4f64 = sitofp <4 x i8> undef to <4 x double>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v8i8_v8f64 = sitofp <8 x i8> undef to <8 x double>
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'sitofp_i8_double'
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_i8_f64 = sitofp i8 undef to double
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i8_v2f64 = sitofp <2 x i8> undef to <2 x double>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v4i8_v4f64 = sitofp <4 x i8> undef to <4 x double>
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v2i8_v2f64 = sitofp <2 x i8> undef to <2 x double>
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v4i8_v4f64 = sitofp <4 x i8> undef to <4 x double>
; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v8i8_v8f64 = sitofp <8 x i8> undef to <8 x double>
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'sitofp_i8_double'
; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_i8_f64 = sitofp i8 undef to double
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i8_v2f64 = sitofp <2 x i8> undef to <2 x double>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v4i8_v4f64 = sitofp <4 x i8> undef to <4 x double>
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i8_v2f64 = sitofp <2 x i8> undef to <2 x double>
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v4i8_v4f64 = sitofp <4 x i8> undef to <4 x double>
; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v8i8_v8f64 = sitofp <8 x i8> undef to <8 x double>
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
@@ -63,29 +63,29 @@ define i32 @sitofp_i16_double() {
;
; SSE42-LABEL: 'sitofp_i16_double'
; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_i16_f64 = sitofp i16 undef to double
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i16_v2f64 = sitofp <2 x i16> undef to <2 x double>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v4i16_v4f64 = sitofp <4 x i16> undef to <4 x double>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %cvt_v8i16_v8f64 = sitofp <8 x i16> undef to <8 x double>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i16_v2f64 = sitofp <2 x i16> undef to <2 x double>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v4i16_v4f64 = sitofp <4 x i16> undef to <4 x double>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v8i16_v8f64 = sitofp <8 x i16> undef to <8 x double>
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'sitofp_i16_double'
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_i16_f64 = sitofp i16 undef to double
-; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i16_v2f64 = sitofp <2 x i16> undef to <2 x double>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v4i16_v4f64 = sitofp <4 x i16> undef to <4 x double>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %cvt_v8i16_v8f64 = sitofp <8 x i16> undef to <8 x double>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i16_v2f64 = sitofp <2 x i16> undef to <2 x double>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v4i16_v4f64 = sitofp <4 x i16> undef to <4 x double>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v8i16_v8f64 = sitofp <8 x i16> undef to <8 x double>
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'sitofp_i16_double'
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_i16_f64 = sitofp i16 undef to double
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i16_v2f64 = sitofp <2 x i16> undef to <2 x double>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v4i16_v4f64 = sitofp <4 x i16> undef to <4 x double>
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v2i16_v2f64 = sitofp <2 x i16> undef to <2 x double>
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v4i16_v4f64 = sitofp <4 x i16> undef to <4 x double>
; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v8i16_v8f64 = sitofp <8 x i16> undef to <8 x double>
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'sitofp_i16_double'
; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_i16_f64 = sitofp i16 undef to double
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i16_v2f64 = sitofp <2 x i16> undef to <2 x double>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v4i16_v4f64 = sitofp <4 x i16> undef to <4 x double>
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i16_v2f64 = sitofp <2 x i16> undef to <2 x double>
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v4i16_v4f64 = sitofp <4 x i16> undef to <4 x double>
; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v8i16_v8f64 = sitofp <8 x i16> undef to <8 x double>
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
@@ -193,34 +193,34 @@ define i32 @sitofp_i8_float() {
;
; SSE42-LABEL: 'sitofp_i8_float'
; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_i8_f32 = sitofp i8 undef to float
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v2i8_v2f32 = sitofp <2 x i8> undef to <2 x float>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v4i8_v4f32 = sitofp <4 x i8> undef to <4 x float>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cvt_v8i8_v8f32 = sitofp <8 x i8> undef to <8 x float>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cvt_v16i8_v16f32 = sitofp <16 x i8> undef to <16 x float>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i8_v2f32 = sitofp <2 x i8> undef to <2 x float>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i8_v4f32 = sitofp <4 x i8> undef to <4 x float>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v8i8_v8f32 = sitofp <8 x i8> undef to <8 x float>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v16i8_v16f32 = sitofp <16 x i8> undef to <16 x float>
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'sitofp_i8_float'
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_i8_f32 = sitofp i8 undef to float
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v2i8_v2f32 = sitofp <2 x i8> undef to <2 x float>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v4i8_v4f32 = sitofp <4 x i8> undef to <4 x float>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v8i8_v8f32 = sitofp <8 x i8> undef to <8 x float>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i8_v2f32 = sitofp <2 x i8> undef to <2 x float>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i8_v4f32 = sitofp <4 x i8> undef to <4 x float>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v8i8_v8f32 = sitofp <8 x i8> undef to <8 x float>
; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v16i8_v16f32 = sitofp <16 x i8> undef to <16 x float>
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'sitofp_i8_float'
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_i8_f32 = sitofp i8 undef to float
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v2i8_v2f32 = sitofp <2 x i8> undef to <2 x float>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v4i8_v4f32 = sitofp <4 x i8> undef to <4 x float>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v8i8_v8f32 = sitofp <8 x i8> undef to <8 x float>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cvt_v16i8_v16f32 = sitofp <16 x i8> undef to <16 x float>
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i8_v2f32 = sitofp <2 x i8> undef to <2 x float>
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i8_v4f32 = sitofp <4 x i8> undef to <4 x float>
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v8i8_v8f32 = sitofp <8 x i8> undef to <8 x float>
+; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v16i8_v16f32 = sitofp <16 x i8> undef to <16 x float>
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'sitofp_i8_float'
; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_i8_f32 = sitofp i8 undef to float
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v2i8_v2f32 = sitofp <2 x i8> undef to <2 x float>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v4i8_v4f32 = sitofp <4 x i8> undef to <4 x float>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v8i8_v8f32 = sitofp <8 x i8> undef to <8 x float>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v16i8_v16f32 = sitofp <16 x i8> undef to <16 x float>
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i8_v2f32 = sitofp <2 x i8> undef to <2 x float>
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i8_v4f32 = sitofp <4 x i8> undef to <4 x float>
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v8i8_v8f32 = sitofp <8 x i8> undef to <8 x float>
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v16i8_v16f32 = sitofp <16 x i8> undef to <16 x float>
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
%cvt_i8_f32 = sitofp i8 undef to float
@@ -242,34 +242,34 @@ define i32 @sitofp_i16_float() {
;
; SSE42-LABEL: 'sitofp_i16_float'
; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_i16_f32 = sitofp i16 undef to float
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v2i16_v2f32 = sitofp <2 x i16> undef to <2 x float>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v4i16_v4f32 = sitofp <4 x i16> undef to <4 x float>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cvt_v8i16_v8f32 = sitofp <8 x i16> undef to <8 x float>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cvt_v16i16_v16f32 = sitofp <16 x i16> undef to <16 x float>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i16_v2f32 = sitofp <2 x i16> undef to <2 x float>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i16_v4f32 = sitofp <4 x i16> undef to <4 x float>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v8i16_v8f32 = sitofp <8 x i16> undef to <8 x float>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v16i16_v16f32 = sitofp <16 x i16> undef to <16 x float>
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'sitofp_i16_float'
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_i16_f32 = sitofp i16 undef to float
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v2i16_v2f32 = sitofp <2 x i16> undef to <2 x float>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v4i16_v4f32 = sitofp <4 x i16> undef to <4 x float>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v8i16_v8f32 = sitofp <8 x i16> undef to <8 x float>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i16_v2f32 = sitofp <2 x i16> undef to <2 x float>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i16_v4f32 = sitofp <4 x i16> undef to <4 x float>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v8i16_v8f32 = sitofp <8 x i16> undef to <8 x float>
; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v16i16_v16f32 = sitofp <16 x i16> undef to <16 x float>
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'sitofp_i16_float'
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_i16_f32 = sitofp i16 undef to float
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v2i16_v2f32 = sitofp <2 x i16> undef to <2 x float>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v4i16_v4f32 = sitofp <4 x i16> undef to <4 x float>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v8i16_v8f32 = sitofp <8 x i16> undef to <8 x float>
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i16_v2f32 = sitofp <2 x i16> undef to <2 x float>
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i16_v4f32 = sitofp <4 x i16> undef to <4 x float>
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v8i16_v8f32 = sitofp <8 x i16> undef to <8 x float>
; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v16i16_v16f32 = sitofp <16 x i16> undef to <16 x float>
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'sitofp_i16_float'
; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_i16_f32 = sitofp i16 undef to float
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v2i16_v2f32 = sitofp <2 x i16> undef to <2 x float>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v4i16_v4f32 = sitofp <4 x i16> undef to <4 x float>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v8i16_v8f32 = sitofp <8 x i16> undef to <8 x float>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v16i16_v16f32 = sitofp <16 x i16> undef to <16 x float>
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i16_v2f32 = sitofp <2 x i16> undef to <2 x float>
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i16_v4f32 = sitofp <4 x i16> undef to <4 x float>
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v8i16_v8f32 = sitofp <8 x i16> undef to <8 x float>
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v16i16_v16f32 = sitofp <16 x i16> undef to <16 x float>
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
%cvt_i16_f32 = sitofp i16 undef to float
diff --git a/llvm/test/Analysis/CostModel/X86/uitofp.ll b/llvm/test/Analysis/CostModel/X86/uitofp.ll
index e7dedf74ff0f..5f83033a1eea 100644
--- a/llvm/test/Analysis/CostModel/X86/uitofp.ll
+++ b/llvm/test/Analysis/CostModel/X86/uitofp.ll
@@ -20,28 +20,28 @@ define i32 @uitofp_i8_double() {
;
; SSE42-LABEL: 'uitofp_i8_double'
; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_i8_f64 = uitofp i8 undef to double
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i8_v2f64 = uitofp <2 x i8> undef to <2 x double>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v4i8_v4f64 = uitofp <4 x i8> undef to <4 x double>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %cvt_v8i8_v8f64 = uitofp <8 x i8> undef to <8 x double>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i8_v2f64 = uitofp <2 x i8> undef to <2 x double>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v4i8_v4f64 = uitofp <4 x i8> undef to <4 x double>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v8i8_v8f64 = uitofp <8 x i8> undef to <8 x double>
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'uitofp_i8_double'
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_i8_f64 = uitofp i8 undef to double
-; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i8_v2f64 = uitofp <2 x i8> undef to <2 x double>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i8_v2f64 = uitofp <2 x i8> undef to <2 x double>
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v4i8_v4f64 = uitofp <4 x i8> undef to <4 x double>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %cvt_v8i8_v8f64 = uitofp <8 x i8> undef to <8 x double>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v8i8_v8f64 = uitofp <8 x i8> undef to <8 x double>
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'uitofp_i8_double'
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_i8_f64 = uitofp i8 undef to double
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i8_v2f64 = uitofp <2 x i8> undef to <2 x double>
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v2i8_v2f64 = uitofp <2 x i8> undef to <2 x double>
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v4i8_v4f64 = uitofp <4 x i8> undef to <4 x double>
; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v8i8_v8f64 = uitofp <8 x i8> undef to <8 x double>
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'uitofp_i8_double'
; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_i8_f64 = uitofp i8 undef to double
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v2i8_v2f64 = uitofp <2 x i8> undef to <2 x double>
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i8_v2f64 = uitofp <2 x i8> undef to <2 x double>
; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v4i8_v4f64 = uitofp <4 x i8> undef to <4 x double>
; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v8i8_v8f64 = uitofp <8 x i8> undef to <8 x double>
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
@@ -63,28 +63,28 @@ define i32 @uitofp_i16_double() {
;
; SSE42-LABEL: 'uitofp_i16_double'
; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_i16_f64 = uitofp i16 undef to double
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i16_v2f64 = uitofp <2 x i16> undef to <2 x double>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v4i16_v4f64 = uitofp <4 x i16> undef to <4 x double>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %cvt_v8i16_v8f64 = uitofp <8 x i16> undef to <8 x double>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i16_v2f64 = uitofp <2 x i16> undef to <2 x double>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v4i16_v4f64 = uitofp <4 x i16> undef to <4 x double>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v8i16_v8f64 = uitofp <8 x i16> undef to <8 x double>
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'uitofp_i16_double'
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_i16_f64 = uitofp i16 undef to double
-; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i16_v2f64 = uitofp <2 x i16> undef to <2 x double>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i16_v2f64 = uitofp <2 x i16> undef to <2 x double>
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v4i16_v4f64 = uitofp <4 x i16> undef to <4 x double>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %cvt_v8i16_v8f64 = uitofp <8 x i16> undef to <8 x double>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v8i16_v8f64 = uitofp <8 x i16> undef to <8 x double>
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'uitofp_i16_double'
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_i16_f64 = uitofp i16 undef to double
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i16_v2f64 = uitofp <2 x i16> undef to <2 x double>
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v2i16_v2f64 = uitofp <2 x i16> undef to <2 x double>
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v4i16_v4f64 = uitofp <4 x i16> undef to <4 x double>
; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v8i16_v8f64 = uitofp <8 x i16> undef to <8 x double>
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'uitofp_i16_double'
; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_i16_f64 = uitofp i16 undef to double
-; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v2i16_v2f64 = uitofp <2 x i16> undef to <2 x double>
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i16_v2f64 = uitofp <2 x i16> undef to <2 x double>
; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v4i16_v4f64 = uitofp <4 x i16> undef to <4 x double>
; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v8i16_v8f64 = uitofp <8 x i16> undef to <8 x double>
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
@@ -106,9 +106,9 @@ define i32 @uitofp_i32_double() {
;
; SSE42-LABEL: 'uitofp_i32_double'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f64 = uitofp i32 undef to double
-; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %cvt_v2i32_v2f64 = uitofp <2 x i32> undef to <2 x double>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %cvt_v4i32_v4f64 = uitofp <4 x i32> undef to <4 x double>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %cvt_v8i32_v8f64 = uitofp <8 x i32> undef to <8 x double>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v2i32_v2f64 = uitofp <2 x i32> undef to <2 x double>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v4i32_v4f64 = uitofp <4 x i32> undef to <4 x double>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v8i32_v8f64 = uitofp <8 x i32> undef to <8 x double>
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'uitofp_i32_double'
@@ -193,34 +193,34 @@ define i32 @uitofp_i8_float() {
;
; SSE42-LABEL: 'uitofp_i8_float'
; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_i8_f32 = uitofp i8 undef to float
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i8_v2f32 = uitofp <2 x i8> undef to <2 x float>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v4i8_v4f32 = uitofp <4 x i8> undef to <4 x float>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v8i8_v8f32 = uitofp <8 x i8> undef to <8 x float>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %cvt_v16i8_v16f32 = uitofp <16 x i8> undef to <16 x float>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i8_v2f32 = uitofp <2 x i8> undef to <2 x float>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i8_v4f32 = uitofp <4 x i8> undef to <4 x float>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v8i8_v8f32 = uitofp <8 x i8> undef to <8 x float>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v16i8_v16f32 = uitofp <16 x i8> undef to <16 x float>
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'uitofp_i8_float'
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_i8_f32 = uitofp i8 undef to float
-; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i8_v2f32 = uitofp <2 x i8> undef to <2 x float>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v4i8_v4f32 = uitofp <4 x i8> undef to <4 x float>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v8i8_v8f32 = uitofp <8 x i8> undef to <8 x float>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i8_v2f32 = uitofp <2 x i8> undef to <2 x float>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i8_v4f32 = uitofp <4 x i8> undef to <4 x float>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v8i8_v8f32 = uitofp <8 x i8> undef to <8 x float>
; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v16i8_v16f32 = uitofp <16 x i8> undef to <16 x float>
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'uitofp_i8_float'
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_i8_f32 = uitofp i8 undef to float
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i8_v2f32 = uitofp <2 x i8> undef to <2 x float>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v4i8_v4f32 = uitofp <4 x i8> undef to <4 x float>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v8i8_v8f32 = uitofp <8 x i8> undef to <8 x float>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cvt_v16i8_v16f32 = uitofp <16 x i8> undef to <16 x float>
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i8_v2f32 = uitofp <2 x i8> undef to <2 x float>
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i8_v4f32 = uitofp <4 x i8> undef to <4 x float>
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v8i8_v8f32 = uitofp <8 x i8> undef to <8 x float>
+; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v16i8_v16f32 = uitofp <16 x i8> undef to <16 x float>
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'uitofp_i8_float'
; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_i8_f32 = uitofp i8 undef to float
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i8_v2f32 = uitofp <2 x i8> undef to <2 x float>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v4i8_v4f32 = uitofp <4 x i8> undef to <4 x float>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v8i8_v8f32 = uitofp <8 x i8> undef to <8 x float>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v16i8_v16f32 = uitofp <16 x i8> undef to <16 x float>
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i8_v2f32 = uitofp <2 x i8> undef to <2 x float>
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i8_v4f32 = uitofp <4 x i8> undef to <4 x float>
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v8i8_v8f32 = uitofp <8 x i8> undef to <8 x float>
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v16i8_v16f32 = uitofp <16 x i8> undef to <16 x float>
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
%cvt_i8_f32 = uitofp i8 undef to float
@@ -242,34 +242,34 @@ define i32 @uitofp_i16_float() {
;
; SSE42-LABEL: 'uitofp_i16_float'
; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_i16_f32 = uitofp i16 undef to float
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i16_v2f32 = uitofp <2 x i16> undef to <2 x float>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v4i16_v4f32 = uitofp <4 x i16> undef to <4 x float>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v8i16_v8f32 = uitofp <8 x i16> undef to <8 x float>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %cvt_v16i16_v16f32 = uitofp <16 x i16> undef to <16 x float>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i16_v2f32 = uitofp <2 x i16> undef to <2 x float>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i16_v4f32 = uitofp <4 x i16> undef to <4 x float>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v8i16_v8f32 = uitofp <8 x i16> undef to <8 x float>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v16i16_v16f32 = uitofp <16 x i16> undef to <16 x float>
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'uitofp_i16_float'
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_i16_f32 = uitofp i16 undef to float
-; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i16_v2f32 = uitofp <2 x i16> undef to <2 x float>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v4i16_v4f32 = uitofp <4 x i16> undef to <4 x float>
-; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v8i16_v8f32 = uitofp <8 x i16> undef to <8 x float>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i16_v2f32 = uitofp <2 x i16> undef to <2 x float>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i16_v4f32 = uitofp <4 x i16> undef to <4 x float>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v8i16_v8f32 = uitofp <8 x i16> undef to <8 x float>
; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v16i16_v16f32 = uitofp <16 x i16> undef to <16 x float>
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'uitofp_i16_float'
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_i16_f32 = uitofp i16 undef to float
-; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i16_v2f32 = uitofp <2 x i16> undef to <2 x float>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v4i16_v4f32 = uitofp <4 x i16> undef to <4 x float>
-; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v8i16_v8f32 = uitofp <8 x i16> undef to <8 x float>
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i16_v2f32 = uitofp <2 x i16> undef to <2 x float>
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i16_v4f32 = uitofp <4 x i16> undef to <4 x float>
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v8i16_v8f32 = uitofp <8 x i16> undef to <8 x float>
; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v16i16_v16f32 = uitofp <16 x i16> undef to <16 x float>
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'uitofp_i16_float'
; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_i16_f32 = uitofp i16 undef to float
-; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i16_v2f32 = uitofp <2 x i16> undef to <2 x float>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v4i16_v4f32 = uitofp <4 x i16> undef to <4 x float>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v8i16_v8f32 = uitofp <8 x i16> undef to <8 x float>
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cvt_v16i16_v16f32 = uitofp <16 x i16> undef to <16 x float>
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i16_v2f32 = uitofp <2 x i16> undef to <2 x float>
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i16_v4f32 = uitofp <4 x i16> undef to <4 x float>
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v8i16_v8f32 = uitofp <8 x i16> undef to <8 x float>
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v16i16_v16f32 = uitofp <16 x i16> undef to <16 x float>
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
%cvt_i16_f32 = uitofp i16 undef to float
@@ -283,7 +283,7 @@ define i32 @uitofp_i16_float() {
define i32 @uitofp_i32_float() {
; SSE2-LABEL: 'uitofp_i32_float'
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_i32_f32 = uitofp i32 undef to float
-; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v2i32_v2f32 = uitofp <2 x i32> undef to <2 x float>
+; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %cvt_v2i32_v2f32 = uitofp <2 x i32> undef to <2 x float>
; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v4i32_v4f32 = uitofp <4 x i32> undef to <4 x float>
; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cvt_v8i32_v8f32 = uitofp <8 x i32> undef to <8 x float>
; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cvt_v16i32_v16f32 = uitofp <16 x i32> undef to <16 x float>
@@ -291,15 +291,15 @@ define i32 @uitofp_i32_float() {
;
; SSE42-LABEL: 'uitofp_i32_float'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f32 = uitofp i32 undef to float
-; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v2i32_v2f32 = uitofp <2 x i32> undef to <2 x float>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v4i32_v4f32 = uitofp <4 x i32> undef to <4 x float>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cvt_v8i32_v8f32 = uitofp <8 x i32> undef to <8 x float>
-; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cvt_v16i32_v16f32 = uitofp <16 x i32> undef to <16 x float>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v2i32_v2f32 = uitofp <2 x i32> undef to <2 x float>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %cvt_v4i32_v4f32 = uitofp <4 x i32> undef to <4 x float>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %cvt_v8i32_v8f32 = uitofp <8 x i32> undef to <8 x float>
+; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %cvt_v16i32_v16f32 = uitofp <16 x i32> undef to <16 x float>
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'uitofp_i32_float'
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i32_f32 = uitofp i32 undef to float
-; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v2i32_v2f32 = uitofp <2 x i32> undef to <2 x float>
+; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i32_v2f32 = uitofp <2 x i32> undef to <2 x float>
; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v4i32_v4f32 = uitofp <4 x i32> undef to <4 x float>
; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v8i32_v8f32 = uitofp <8 x i32> undef to <8 x float>
; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %cvt_v16i32_v16f32 = uitofp <16 x i32> undef to <16 x float>
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/sitofp-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/sitofp-inseltpoison.ll
index c0b1bae204ad..3418b6e01696 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/sitofp-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/sitofp-inseltpoison.ll
@@ -321,20 +321,11 @@ define void @sitofp_8i32_8f64() #0 {
}
define void @sitofp_2i16_2f64() #0 {
-; SSE-LABEL: @sitofp_2i16_2f64(
-; SSE-NEXT: [[TMP1:%.*]] = load <2 x i16>, <2 x i16>* bitcast ([32 x i16]* @src16 to <2 x i16>*), align 64
-; SSE-NEXT: [[TMP2:%.*]] = sitofp <2 x i16> [[TMP1]] to <2 x double>
-; SSE-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
-; SSE-NEXT: ret void
-;
-; AVX-LABEL: @sitofp_2i16_2f64(
-; AVX-NEXT: [[LD0:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64
-; AVX-NEXT: [[LD1:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2
-; AVX-NEXT: [[CVT0:%.*]] = sitofp i16 [[LD0]] to double
-; AVX-NEXT: [[CVT1:%.*]] = sitofp i16 [[LD1]] to double
-; AVX-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
-; AVX-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
-; AVX-NEXT: ret void
+; CHECK-LABEL: @sitofp_2i16_2f64(
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i16>, <2 x i16>* bitcast ([32 x i16]* @src16 to <2 x i16>*), align 64
+; CHECK-NEXT: [[TMP2:%.*]] = sitofp <2 x i16> [[TMP1]] to <2 x double>
+; CHECK-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
+; CHECK-NEXT: ret void
;
%ld0 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64
%ld1 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2
@@ -435,20 +426,11 @@ define void @sitofp_8i16_8f64() #0 {
}
define void @sitofp_2i8_2f64() #0 {
-; SSE-LABEL: @sitofp_2i8_2f64(
-; SSE-NEXT: [[TMP1:%.*]] = load <2 x i8>, <2 x i8>* bitcast ([64 x i8]* @src8 to <2 x i8>*), align 64
-; SSE-NEXT: [[TMP2:%.*]] = sitofp <2 x i8> [[TMP1]] to <2 x double>
-; SSE-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
-; SSE-NEXT: ret void
-;
-; AVX-LABEL: @sitofp_2i8_2f64(
-; AVX-NEXT: [[LD0:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64
-; AVX-NEXT: [[LD1:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1
-; AVX-NEXT: [[CVT0:%.*]] = sitofp i8 [[LD0]] to double
-; AVX-NEXT: [[CVT1:%.*]] = sitofp i8 [[LD1]] to double
-; AVX-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
-; AVX-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
-; AVX-NEXT: ret void
+; CHECK-LABEL: @sitofp_2i8_2f64(
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i8>, <2 x i8>* bitcast ([64 x i8]* @src8 to <2 x i8>*), align 64
+; CHECK-NEXT: [[TMP2:%.*]] = sitofp <2 x i8> [[TMP1]] to <2 x double>
+; CHECK-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
+; CHECK-NEXT: ret void
;
%ld0 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64
%ld1 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/sitofp.ll b/llvm/test/Transforms/SLPVectorizer/X86/sitofp.ll
index f0cb3e542875..5cec75d85fea 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/sitofp.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/sitofp.ll
@@ -321,20 +321,11 @@ define void @sitofp_8i32_8f64() #0 {
}
define void @sitofp_2i16_2f64() #0 {
-; SSE-LABEL: @sitofp_2i16_2f64(
-; SSE-NEXT: [[TMP1:%.*]] = load <2 x i16>, <2 x i16>* bitcast ([32 x i16]* @src16 to <2 x i16>*), align 64
-; SSE-NEXT: [[TMP2:%.*]] = sitofp <2 x i16> [[TMP1]] to <2 x double>
-; SSE-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
-; SSE-NEXT: ret void
-;
-; AVX-LABEL: @sitofp_2i16_2f64(
-; AVX-NEXT: [[LD0:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64
-; AVX-NEXT: [[LD1:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2
-; AVX-NEXT: [[CVT0:%.*]] = sitofp i16 [[LD0]] to double
-; AVX-NEXT: [[CVT1:%.*]] = sitofp i16 [[LD1]] to double
-; AVX-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
-; AVX-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
-; AVX-NEXT: ret void
+; CHECK-LABEL: @sitofp_2i16_2f64(
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i16>, <2 x i16>* bitcast ([32 x i16]* @src16 to <2 x i16>*), align 64
+; CHECK-NEXT: [[TMP2:%.*]] = sitofp <2 x i16> [[TMP1]] to <2 x double>
+; CHECK-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
+; CHECK-NEXT: ret void
;
%ld0 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64
%ld1 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2
@@ -435,20 +426,11 @@ define void @sitofp_8i16_8f64() #0 {
}
define void @sitofp_2i8_2f64() #0 {
-; SSE-LABEL: @sitofp_2i8_2f64(
-; SSE-NEXT: [[TMP1:%.*]] = load <2 x i8>, <2 x i8>* bitcast ([64 x i8]* @src8 to <2 x i8>*), align 64
-; SSE-NEXT: [[TMP2:%.*]] = sitofp <2 x i8> [[TMP1]] to <2 x double>
-; SSE-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
-; SSE-NEXT: ret void
-;
-; AVX-LABEL: @sitofp_2i8_2f64(
-; AVX-NEXT: [[LD0:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64
-; AVX-NEXT: [[LD1:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1
-; AVX-NEXT: [[CVT0:%.*]] = sitofp i8 [[LD0]] to double
-; AVX-NEXT: [[CVT1:%.*]] = sitofp i8 [[LD1]] to double
-; AVX-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
-; AVX-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
-; AVX-NEXT: ret void
+; CHECK-LABEL: @sitofp_2i8_2f64(
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i8>, <2 x i8>* bitcast ([64 x i8]* @src8 to <2 x i8>*), align 64
+; CHECK-NEXT: [[TMP2:%.*]] = sitofp <2 x i8> [[TMP1]] to <2 x double>
+; CHECK-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
+; CHECK-NEXT: ret void
;
%ld0 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64
%ld1 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/uitofp.ll b/llvm/test/Transforms/SLPVectorizer/X86/uitofp.ll
index 67feab3d4875..b38bd8d6f88e 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/uitofp.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/uitofp.ll
@@ -258,20 +258,11 @@ define void @uitofp_8i32_8f64() #0 {
}
define void @uitofp_2i16_2f64() #0 {
-; SSE-LABEL: @uitofp_2i16_2f64(
-; SSE-NEXT: [[TMP1:%.*]] = load <2 x i16>, <2 x i16>* bitcast ([32 x i16]* @src16 to <2 x i16>*), align 64
-; SSE-NEXT: [[TMP2:%.*]] = uitofp <2 x i16> [[TMP1]] to <2 x double>
-; SSE-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
-; SSE-NEXT: ret void
-;
-; AVX-LABEL: @uitofp_2i16_2f64(
-; AVX-NEXT: [[LD0:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64
-; AVX-NEXT: [[LD1:%.*]] = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2
-; AVX-NEXT: [[CVT0:%.*]] = uitofp i16 [[LD0]] to double
-; AVX-NEXT: [[CVT1:%.*]] = uitofp i16 [[LD1]] to double
-; AVX-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
-; AVX-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
-; AVX-NEXT: ret void
+; CHECK-LABEL: @uitofp_2i16_2f64(
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i16>, <2 x i16>* bitcast ([32 x i16]* @src16 to <2 x i16>*), align 64
+; CHECK-NEXT: [[TMP2:%.*]] = uitofp <2 x i16> [[TMP1]] to <2 x double>
+; CHECK-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
+; CHECK-NEXT: ret void
;
%ld0 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 0), align 64
%ld1 = load i16, i16* getelementptr inbounds ([32 x i16], [32 x i16]* @src16, i32 0, i64 1), align 2
@@ -372,41 +363,11 @@ define void @uitofp_8i16_8f64() #0 {
}
define void @uitofp_2i8_2f64() #0 {
-; SSE-LABEL: @uitofp_2i8_2f64(
-; SSE-NEXT: [[TMP1:%.*]] = load <2 x i8>, <2 x i8>* bitcast ([64 x i8]* @src8 to <2 x i8>*), align 64
-; SSE-NEXT: [[TMP2:%.*]] = uitofp <2 x i8> [[TMP1]] to <2 x double>
-; SSE-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
-; SSE-NEXT: ret void
-;
-; AVX1-LABEL: @uitofp_2i8_2f64(
-; AVX1-NEXT: [[LD0:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64
-; AVX1-NEXT: [[LD1:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1
-; AVX1-NEXT: [[CVT0:%.*]] = uitofp i8 [[LD0]] to double
-; AVX1-NEXT: [[CVT1:%.*]] = uitofp i8 [[LD1]] to double
-; AVX1-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
-; AVX1-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
-; AVX1-NEXT: ret void
-;
-; AVX2-LABEL: @uitofp_2i8_2f64(
-; AVX2-NEXT: [[LD0:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64
-; AVX2-NEXT: [[LD1:%.*]] = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1
-; AVX2-NEXT: [[CVT0:%.*]] = uitofp i8 [[LD0]] to double
-; AVX2-NEXT: [[CVT1:%.*]] = uitofp i8 [[LD1]] to double
-; AVX2-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
-; AVX2-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
-; AVX2-NEXT: ret void
-;
-; AVX512-LABEL: @uitofp_2i8_2f64(
-; AVX512-NEXT: [[TMP1:%.*]] = load <2 x i8>, <2 x i8>* bitcast ([64 x i8]* @src8 to <2 x i8>*), align 64
-; AVX512-NEXT: [[TMP2:%.*]] = uitofp <2 x i8> [[TMP1]] to <2 x double>
-; AVX512-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
-; AVX512-NEXT: ret void
-;
-; AVX256DQ-LABEL: @uitofp_2i8_2f64(
-; AVX256DQ-NEXT: [[TMP1:%.*]] = load <2 x i8>, <2 x i8>* bitcast ([64 x i8]* @src8 to <2 x i8>*), align 64
-; AVX256DQ-NEXT: [[TMP2:%.*]] = uitofp <2 x i8> [[TMP1]] to <2 x double>
-; AVX256DQ-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
-; AVX256DQ-NEXT: ret void
+; CHECK-LABEL: @uitofp_2i8_2f64(
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i8>, <2 x i8>* bitcast ([64 x i8]* @src8 to <2 x i8>*), align 64
+; CHECK-NEXT: [[TMP2:%.*]] = uitofp <2 x i8> [[TMP1]] to <2 x double>
+; CHECK-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
+; CHECK-NEXT: ret void
;
%ld0 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 0), align 64
%ld1 = load i8, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @src8, i32 0, i64 1), align 1
More information about the llvm-commits
mailing list