[llvm] 626a84d - [CostModel][X86] getTypeBasedIntrinsicInstrCost - convert to CostKindTblEntry

Sun Sep 4 09:59:22 PDT 2022

Author: Simon Pilgrim
Date: 2022-09-04T17:59:08+01:00
New Revision: 626a84db477cf5e2d076b29c9491620bcc810308

URL: https://github.com/llvm/llvm-project/commit/626a84db477cf5e2d076b29c9491620bcc810308
DIFF: https://github.com/llvm/llvm-project/commit/626a84db477cf5e2d076b29c9491620bcc810308.diff

LOG: [CostModel][X86] getTypeBasedIntrinsicInstrCost - convert to CostKindTblEntry

Begin the refactoring to use CostKindTblEntry and return real latency/codesize/sizelatency costs instead of reusing the throughput numbers

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86TargetTransformInfo.cpp
    llvm/test/Analysis/CostModel/X86/arith-fp-latency.ll
    llvm/test/Analysis/CostModel/X86/arith-fp-sizelatency.ll
    llvm/test/Analysis/CostModel/X86/costmodel.ll
    llvm/test/Analysis/CostModel/X86/fmaxnum-size-latency.ll
    llvm/test/Analysis/CostModel/X86/fminnum-size-latency.ll
    llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll
    llvm/test/Transforms/PhaseOrdering/X86/unsigned-multiply-overflow-check.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 641e4611e113..54bbd337c1cc 100644

--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -3099,457 +3099,457 @@ X86TTIImpl::getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
 
   // TODO: Overflow intrinsics (*ADDO, *SUBO, *MULO) with vector types are not
   //       specialized in these tables yet.
-  static const CostTblEntry AVX512BITALGCostTbl[] = {
-    { ISD::CTPOP,      MVT::v32i16,  1 },
-    { ISD::CTPOP,      MVT::v64i8,   1 },
-    { ISD::CTPOP,      MVT::v16i16,  1 },
-    { ISD::CTPOP,      MVT::v32i8,   1 },
-    { ISD::CTPOP,      MVT::v8i16,   1 },
-    { ISD::CTPOP,      MVT::v16i8,   1 },
+  static const CostKindTblEntry AVX512BITALGCostTbl[] = {
+    { ISD::CTPOP,      MVT::v32i16,  {  1 } },
+    { ISD::CTPOP,      MVT::v64i8,   {  1 } },
+    { ISD::CTPOP,      MVT::v16i16,  {  1 } },
+    { ISD::CTPOP,      MVT::v32i8,   {  1 } },
+    { ISD::CTPOP,      MVT::v8i16,   {  1 } },
+    { ISD::CTPOP,      MVT::v16i8,   {  1 } },
   };
-  static const CostTblEntry AVX512VPOPCNTDQCostTbl[] = {
-    { ISD::CTPOP,      MVT::v8i64,   1 },
-    { ISD::CTPOP,      MVT::v16i32,  1 },
-    { ISD::CTPOP,      MVT::v4i64,   1 },
-    { ISD::CTPOP,      MVT::v8i32,   1 },
-    { ISD::CTPOP,      MVT::v2i64,   1 },
-    { ISD::CTPOP,      MVT::v4i32,   1 },
+  static const CostKindTblEntry AVX512VPOPCNTDQCostTbl[] = {
+    { ISD::CTPOP,      MVT::v8i64,   {  1 } },
+    { ISD::CTPOP,      MVT::v16i32,  {  1 } },
+    { ISD::CTPOP,      MVT::v4i64,   {  1 } },
+    { ISD::CTPOP,      MVT::v8i32,   {  1 } },
+    { ISD::CTPOP,      MVT::v2i64,   {  1 } },
+    { ISD::CTPOP,      MVT::v4i32,   {  1 } },
   };
-  static const CostTblEntry AVX512CDCostTbl[] = {
-    { ISD::CTLZ,       MVT::v8i64,   1 },
-    { ISD::CTLZ,       MVT::v16i32,  1 },
-    { ISD::CTLZ,       MVT::v32i16,  8 },
-    { ISD::CTLZ,       MVT::v64i8,  20 },
-    { ISD::CTLZ,       MVT::v4i64,   1 },
-    { ISD::CTLZ,       MVT::v8i32,   1 },
-    { ISD::CTLZ,       MVT::v16i16,  4 },
-    { ISD::CTLZ,       MVT::v32i8,  10 },
-    { ISD::CTLZ,       MVT::v2i64,   1 },
-    { ISD::CTLZ,       MVT::v4i32,   1 },
-    { ISD::CTLZ,       MVT::v8i16,   4 },
-    { ISD::CTLZ,       MVT::v16i8,   4 },
+  static const CostKindTblEntry AVX512CDCostTbl[] = {
+    { ISD::CTLZ,       MVT::v8i64,   {  1 } },
+    { ISD::CTLZ,       MVT::v16i32,  {  1 } },
+    { ISD::CTLZ,       MVT::v32i16,  {  8 } },
+    { ISD::CTLZ,       MVT::v64i8,   { 20 } },
+    { ISD::CTLZ,       MVT::v4i64,   {  1 } },
+    { ISD::CTLZ,       MVT::v8i32,   {  1 } },
+    { ISD::CTLZ,       MVT::v16i16,  {  4 } },
+    { ISD::CTLZ,       MVT::v32i8,   { 10 } },
+    { ISD::CTLZ,       MVT::v2i64,   {  1 } },
+    { ISD::CTLZ,       MVT::v4i32,   {  1 } },
+    { ISD::CTLZ,       MVT::v8i16,   {  4 } },
+    { ISD::CTLZ,       MVT::v16i8,   {  4 } },
   };
-  static const CostTblEntry AVX512BWCostTbl[] = {
-    { ISD::ABS,        MVT::v32i16,  1 },
-    { ISD::ABS,        MVT::v64i8,   1 },
-    { ISD::BITREVERSE, MVT::v8i64,   3 },
-    { ISD::BITREVERSE, MVT::v16i32,  3 },
-    { ISD::BITREVERSE, MVT::v32i16,  3 },
-    { ISD::BITREVERSE, MVT::v64i8,   2 },
-    { ISD::BSWAP,      MVT::v8i64,   1 },
-    { ISD::BSWAP,      MVT::v16i32,  1 },
-    { ISD::BSWAP,      MVT::v32i16,  1 },
-    { ISD::CTLZ,       MVT::v8i64,  23 },
-    { ISD::CTLZ,       MVT::v16i32, 22 },
-    { ISD::CTLZ,       MVT::v32i16, 18 },
-    { ISD::CTLZ,       MVT::v64i8,  17 },
-    { ISD::CTPOP,      MVT::v8i64,   7 },
-    { ISD::CTPOP,      MVT::v16i32, 11 },
-    { ISD::CTPOP,      MVT::v32i16,  9 },
-    { ISD::CTPOP,      MVT::v64i8,   6 },
-    { ISD::CTTZ,       MVT::v8i64,  10 },
-    { ISD::CTTZ,       MVT::v16i32, 14 },
-    { ISD::CTTZ,       MVT::v32i16, 12 },
-    { ISD::CTTZ,       MVT::v64i8,   9 },
-    { ISD::SADDSAT,    MVT::v32i16,  1 },
-    { ISD::SADDSAT,    MVT::v64i8,   1 },
-    { ISD::SMAX,       MVT::v32i16,  1 },
-    { ISD::SMAX,       MVT::v64i8,   1 },
-    { ISD::SMIN,       MVT::v32i16,  1 },
-    { ISD::SMIN,       MVT::v64i8,   1 },
-    { ISD::SSUBSAT,    MVT::v32i16,  1 },
-    { ISD::SSUBSAT,    MVT::v64i8,   1 },
-    { ISD::UADDSAT,    MVT::v32i16,  1 },
-    { ISD::UADDSAT,    MVT::v64i8,   1 },
-    { ISD::UMAX,       MVT::v32i16,  1 },
-    { ISD::UMAX,       MVT::v64i8,   1 },
-    { ISD::UMIN,       MVT::v32i16,  1 },
-    { ISD::UMIN,       MVT::v64i8,   1 },
-    { ISD::USUBSAT,    MVT::v32i16,  1 },
-    { ISD::USUBSAT,    MVT::v64i8,   1 },
+  static const CostKindTblEntry AVX512BWCostTbl[] = {
+    { ISD::ABS,        MVT::v32i16,  {  1 } },
+    { ISD::ABS,        MVT::v64i8,   {  1 } },
+    { ISD::BITREVERSE, MVT::v8i64,   {  3 } },
+    { ISD::BITREVERSE, MVT::v16i32,  {  3 } },
+    { ISD::BITREVERSE, MVT::v32i16,  {  3 } },
+    { ISD::BITREVERSE, MVT::v64i8,   {  2 } },
+    { ISD::BSWAP,      MVT::v8i64,   {  1 } },
+    { ISD::BSWAP,      MVT::v16i32,  {  1 } },
+    { ISD::BSWAP,      MVT::v32i16,  {  1 } },
+    { ISD::CTLZ,       MVT::v8i64,   { 23 } },
+    { ISD::CTLZ,       MVT::v16i32,  { 22 } },
+    { ISD::CTLZ,       MVT::v32i16,  { 18 } },
+    { ISD::CTLZ,       MVT::v64i8,   { 17 } },
+    { ISD::CTPOP,      MVT::v8i64,   {  7 } },
+    { ISD::CTPOP,      MVT::v16i32,  { 11 } },
+    { ISD::CTPOP,      MVT::v32i16,  {  9 } },
+    { ISD::CTPOP,      MVT::v64i8,   {  6 } },
+    { ISD::CTTZ,       MVT::v8i64,   { 10 } },
+    { ISD::CTTZ,       MVT::v16i32,  { 14 } },
+    { ISD::CTTZ,       MVT::v32i16,  { 12 } },
+    { ISD::CTTZ,       MVT::v64i8,   {  9 } },
+    { ISD::SADDSAT,    MVT::v32i16,  {  1 } },
+    { ISD::SADDSAT,    MVT::v64i8,   {  1 } },
+    { ISD::SMAX,       MVT::v32i16,  {  1 } },
+    { ISD::SMAX,       MVT::v64i8,   {  1 } },
+    { ISD::SMIN,       MVT::v32i16,  {  1 } },
+    { ISD::SMIN,       MVT::v64i8,   {  1 } },
+    { ISD::SSUBSAT,    MVT::v32i16,  {  1 } },
+    { ISD::SSUBSAT,    MVT::v64i8,   {  1 } },
+    { ISD::UADDSAT,    MVT::v32i16,  {  1 } },
+    { ISD::UADDSAT,    MVT::v64i8,   {  1 } },
+    { ISD::UMAX,       MVT::v32i16,  {  1 } },
+    { ISD::UMAX,       MVT::v64i8,   {  1 } },
+    { ISD::UMIN,       MVT::v32i16,  {  1 } },
+    { ISD::UMIN,       MVT::v64i8,   {  1 } },
+    { ISD::USUBSAT,    MVT::v32i16,  {  1 } },
+    { ISD::USUBSAT,    MVT::v64i8,   {  1 } },
   };
-  static const CostTblEntry AVX512CostTbl[] = {
-    { ISD::ABS,        MVT::v8i64,   1 },
-    { ISD::ABS,        MVT::v16i32,  1 },
-    { ISD::ABS,        MVT::v32i16,  2 },
-    { ISD::ABS,        MVT::v64i8,   2 },
-    { ISD::ABS,        MVT::v4i64,   1 },
-    { ISD::ABS,        MVT::v2i64,   1 },
-    { ISD::BITREVERSE, MVT::v8i64,  36 },
-    { ISD::BITREVERSE, MVT::v16i32, 24 },
-    { ISD::BITREVERSE, MVT::v32i16, 10 },
-    { ISD::BITREVERSE, MVT::v64i8,  10 },
-    { ISD::BSWAP,      MVT::v8i64,   4 },
-    { ISD::BSWAP,      MVT::v16i32,  4 },
-    { ISD::BSWAP,      MVT::v32i16,  4 },
-    { ISD::CTLZ,       MVT::v8i64,  29 },
-    { ISD::CTLZ,       MVT::v16i32, 35 },
-    { ISD::CTLZ,       MVT::v32i16, 28 },
-    { ISD::CTLZ,       MVT::v64i8,  18 },
-    { ISD::CTPOP,      MVT::v8i64,  16 },
-    { ISD::CTPOP,      MVT::v16i32, 24 },
-    { ISD::CTPOP,      MVT::v32i16, 18 },
-    { ISD::CTPOP,      MVT::v64i8,  12 },
-    { ISD::CTTZ,       MVT::v8i64,  20 },
-    { ISD::CTTZ,       MVT::v16i32, 28 },
-    { ISD::CTTZ,       MVT::v32i16, 24 },
-    { ISD::CTTZ,       MVT::v64i8,  18 },
-    { ISD::SMAX,       MVT::v8i64,   1 },
-    { ISD::SMAX,       MVT::v16i32,  1 },
-    { ISD::SMAX,       MVT::v32i16,  2 },
-    { ISD::SMAX,       MVT::v64i8,   2 },
-    { ISD::SMAX,       MVT::v4i64,   1 },
-    { ISD::SMAX,       MVT::v2i64,   1 },
-    { ISD::SMIN,       MVT::v8i64,   1 },
-    { ISD::SMIN,       MVT::v16i32,  1 },
-    { ISD::SMIN,       MVT::v32i16,  2 },
-    { ISD::SMIN,       MVT::v64i8,   2 },
-    { ISD::SMIN,       MVT::v4i64,   1 },
-    { ISD::SMIN,       MVT::v2i64,   1 },
-    { ISD::UMAX,       MVT::v8i64,   1 },
-    { ISD::UMAX,       MVT::v16i32,  1 },
-    { ISD::UMAX,       MVT::v32i16,  2 },
-    { ISD::UMAX,       MVT::v64i8,   2 },
-    { ISD::UMAX,       MVT::v4i64,   1 },
-    { ISD::UMAX,       MVT::v2i64,   1 },
-    { ISD::UMIN,       MVT::v8i64,   1 },
-    { ISD::UMIN,       MVT::v16i32,  1 },
-    { ISD::UMIN,       MVT::v32i16,  2 },
-    { ISD::UMIN,       MVT::v64i8,   2 },
-    { ISD::UMIN,       MVT::v4i64,   1 },
-    { ISD::UMIN,       MVT::v2i64,   1 },
-    { ISD::USUBSAT,    MVT::v16i32,  2 }, // pmaxud + psubd
-    { ISD::USUBSAT,    MVT::v2i64,   2 }, // pmaxuq + psubq
-    { ISD::USUBSAT,    MVT::v4i64,   2 }, // pmaxuq + psubq
-    { ISD::USUBSAT,    MVT::v8i64,   2 }, // pmaxuq + psubq
-    { ISD::UADDSAT,    MVT::v16i32,  3 }, // not + pminud + paddd
-    { ISD::UADDSAT,    MVT::v2i64,   3 }, // not + pminuq + paddq
-    { ISD::UADDSAT,    MVT::v4i64,   3 }, // not + pminuq + paddq
-    { ISD::UADDSAT,    MVT::v8i64,   3 }, // not + pminuq + paddq
-    { ISD::SADDSAT,    MVT::v32i16,  2 },
-    { ISD::SADDSAT,    MVT::v64i8,   2 },
-    { ISD::SSUBSAT,    MVT::v32i16,  2 },
-    { ISD::SSUBSAT,    MVT::v64i8,   2 },
-    { ISD::UADDSAT,    MVT::v32i16,  2 },
-    { ISD::UADDSAT,    MVT::v64i8,   2 },
-    { ISD::USUBSAT,    MVT::v32i16,  2 },
-    { ISD::USUBSAT,    MVT::v64i8,   2 },
-    { ISD::FMAXNUM,    MVT::f32,     2 },
-    { ISD::FMAXNUM,    MVT::v4f32,   2 },
-    { ISD::FMAXNUM,    MVT::v8f32,   2 },
-    { ISD::FMAXNUM,    MVT::v16f32,  2 },
-    { ISD::FMAXNUM,    MVT::f64,     2 },
-    { ISD::FMAXNUM,    MVT::v2f64,   2 },
-    { ISD::FMAXNUM,    MVT::v4f64,   2 },
-    { ISD::FMAXNUM,    MVT::v8f64,   2 },
+  static const CostKindTblEntry AVX512CostTbl[] = {
+    { ISD::ABS,        MVT::v8i64,   {  1 } },
+    { ISD::ABS,        MVT::v16i32,  {  1 } },
+    { ISD::ABS,        MVT::v32i16,  {  2 } },
+    { ISD::ABS,        MVT::v64i8,   {  2 } },
+    { ISD::ABS,        MVT::v4i64,   {  1 } },
+    { ISD::ABS,        MVT::v2i64,   {  1 } },
+    { ISD::BITREVERSE, MVT::v8i64,   { 36 } },
+    { ISD::BITREVERSE, MVT::v16i32,  { 24 } },
+    { ISD::BITREVERSE, MVT::v32i16,  { 10 } },
+    { ISD::BITREVERSE, MVT::v64i8,   { 10 } },
+    { ISD::BSWAP,      MVT::v8i64,   {  4 } },
+    { ISD::BSWAP,      MVT::v16i32,  {  4 } },
+    { ISD::BSWAP,      MVT::v32i16,  {  4 } },
+    { ISD::CTLZ,       MVT::v8i64,   { 29 } },
+    { ISD::CTLZ,       MVT::v16i32,  { 35 } },
+    { ISD::CTLZ,       MVT::v32i16,  { 28 } },
+    { ISD::CTLZ,       MVT::v64i8,   { 18 } },
+    { ISD::CTPOP,      MVT::v8i64,   { 16 } },
+    { ISD::CTPOP,      MVT::v16i32,  { 24 } },
+    { ISD::CTPOP,      MVT::v32i16,  { 18 } },
+    { ISD::CTPOP,      MVT::v64i8,   { 12 } },
+    { ISD::CTTZ,       MVT::v8i64,   { 20 } },
+    { ISD::CTTZ,       MVT::v16i32,  { 28 } },
+    { ISD::CTTZ,       MVT::v32i16,  { 24 } },
+    { ISD::CTTZ,       MVT::v64i8,   { 18 } },
+    { ISD::SMAX,       MVT::v8i64,   {  1 } },
+    { ISD::SMAX,       MVT::v16i32,  {  1 } },
+    { ISD::SMAX,       MVT::v32i16,  {  2 } },
+    { ISD::SMAX,       MVT::v64i8,   {  2 } },
+    { ISD::SMAX,       MVT::v4i64,   {  1 } },
+    { ISD::SMAX,       MVT::v2i64,   {  1 } },
+    { ISD::SMIN,       MVT::v8i64,   {  1 } },
+    { ISD::SMIN,       MVT::v16i32,  {  1 } },
+    { ISD::SMIN,       MVT::v32i16,  {  2 } },
+    { ISD::SMIN,       MVT::v64i8,   {  2 } },
+    { ISD::SMIN,       MVT::v4i64,   {  1 } },
+    { ISD::SMIN,       MVT::v2i64,   {  1 } },
+    { ISD::UMAX,       MVT::v8i64,   {  1 } },
+    { ISD::UMAX,       MVT::v16i32,  {  1 } },
+    { ISD::UMAX,       MVT::v32i16,  {  2 } },
+    { ISD::UMAX,       MVT::v64i8,   {  2 } },
+    { ISD::UMAX,       MVT::v4i64,   {  1 } },
+    { ISD::UMAX,       MVT::v2i64,   {  1 } },
+    { ISD::UMIN,       MVT::v8i64,   {  1 } },
+    { ISD::UMIN,       MVT::v16i32,  {  1 } },
+    { ISD::UMIN,       MVT::v32i16,  {  2 } },
+    { ISD::UMIN,       MVT::v64i8,   {  2 } },
+    { ISD::UMIN,       MVT::v4i64,   {  1 } },
+    { ISD::UMIN,       MVT::v2i64,   {  1 } },
+    { ISD::USUBSAT,    MVT::v16i32,  {  2 } }, // pmaxud + psubd
+    { ISD::USUBSAT,    MVT::v2i64,   {  2 } }, // pmaxuq + psubq
+    { ISD::USUBSAT,    MVT::v4i64,   {  2 } }, // pmaxuq + psubq
+    { ISD::USUBSAT,    MVT::v8i64,   {  2 } }, // pmaxuq + psubq
+    { ISD::UADDSAT,    MVT::v16i32,  {  3 } }, // not + pminud + paddd
+    { ISD::UADDSAT,    MVT::v2i64,   {  3 } }, // not + pminuq + paddq
+    { ISD::UADDSAT,    MVT::v4i64,   {  3 } }, // not + pminuq + paddq
+    { ISD::UADDSAT,    MVT::v8i64,   {  3 } }, // not + pminuq + paddq
+    { ISD::SADDSAT,    MVT::v32i16,  {  2 } },
+    { ISD::SADDSAT,    MVT::v64i8,   {  2 } },
+    { ISD::SSUBSAT,    MVT::v32i16,  {  2 } },
+    { ISD::SSUBSAT,    MVT::v64i8,   {  2 } },
+    { ISD::UADDSAT,    MVT::v32i16,  {  2 } },
+    { ISD::UADDSAT,    MVT::v64i8,   {  2 } },
+    { ISD::USUBSAT,    MVT::v32i16,  {  2 } },
+    { ISD::USUBSAT,    MVT::v64i8,   {  2 } },
+    { ISD::FMAXNUM,    MVT::f32,     {  2 } },
+    { ISD::FMAXNUM,    MVT::v4f32,   {  2 } },
+    { ISD::FMAXNUM,    MVT::v8f32,   {  2 } },
+    { ISD::FMAXNUM,    MVT::v16f32,  {  2 } },
+    { ISD::FMAXNUM,    MVT::f64,     {  2 } },
+    { ISD::FMAXNUM,    MVT::v2f64,   {  2 } },
+    { ISD::FMAXNUM,    MVT::v4f64,   {  2 } },
+    { ISD::FMAXNUM,    MVT::v8f64,   {  2 } },
   };
-  static const CostTblEntry XOPCostTbl[] = {
-    { ISD::BITREVERSE, MVT::v4i64,   4 },
-    { ISD::BITREVERSE, MVT::v8i32,   4 },
-    { ISD::BITREVERSE, MVT::v16i16,  4 },
-    { ISD::BITREVERSE, MVT::v32i8,   4 },
-    { ISD::BITREVERSE, MVT::v2i64,   1 },
-    { ISD::BITREVERSE, MVT::v4i32,   1 },
-    { ISD::BITREVERSE, MVT::v8i16,   1 },
-    { ISD::BITREVERSE, MVT::v16i8,   1 },
-    { ISD::BITREVERSE, MVT::i64,     3 },
-    { ISD::BITREVERSE, MVT::i32,     3 },
-    { ISD::BITREVERSE, MVT::i16,     3 },
-    { ISD::BITREVERSE, MVT::i8,      3 }
+  static const CostKindTblEntry XOPCostTbl[] = {
+    { ISD::BITREVERSE, MVT::v4i64,   {  4 } },
+    { ISD::BITREVERSE, MVT::v8i32,   {  4 } },
+    { ISD::BITREVERSE, MVT::v16i16,  {  4 } },
+    { ISD::BITREVERSE, MVT::v32i8,   {  4 } },
+    { ISD::BITREVERSE, MVT::v2i64,   {  1 } },
+    { ISD::BITREVERSE, MVT::v4i32,   {  1 } },
+    { ISD::BITREVERSE, MVT::v8i16,   {  1 } },
+    { ISD::BITREVERSE, MVT::v16i8,   {  1 } },
+    { ISD::BITREVERSE, MVT::i64,     {  3 } },
+    { ISD::BITREVERSE, MVT::i32,     {  3 } },
+    { ISD::BITREVERSE, MVT::i16,     {  3 } },
+    { ISD::BITREVERSE, MVT::i8,      {  3 } }
   };
-  static const CostTblEntry AVX2CostTbl[] = {
-    { ISD::ABS,        MVT::v4i64,   2 }, // VBLENDVPD(X,VPSUBQ(0,X),X)
-    { ISD::ABS,        MVT::v8i32,   1 },
-    { ISD::ABS,        MVT::v16i16,  1 },
-    { ISD::ABS,        MVT::v32i8,   1 },
-    { ISD::BITREVERSE, MVT::v2i64,   3 },
-    { ISD::BITREVERSE, MVT::v4i64,   3 },
-    { ISD::BITREVERSE, MVT::v4i32,   3 },
-    { ISD::BITREVERSE, MVT::v8i32,   3 },
-    { ISD::BITREVERSE, MVT::v8i16,   3 },
-    { ISD::BITREVERSE, MVT::v16i16,  3 },
-    { ISD::BITREVERSE, MVT::v16i8,   3 },
-    { ISD::BITREVERSE, MVT::v32i8,   3 },
-    { ISD::BSWAP,      MVT::v4i64,   1 },
-    { ISD::BSWAP,      MVT::v8i32,   1 },
-    { ISD::BSWAP,      MVT::v16i16,  1 },
-    { ISD::CTLZ,       MVT::v2i64,   7 },
-    { ISD::CTLZ,       MVT::v4i64,   7 },
-    { ISD::CTLZ,       MVT::v4i32,   5 },
-    { ISD::CTLZ,       MVT::v8i32,   5 },
-    { ISD::CTLZ,       MVT::v8i16,   4 },
-    { ISD::CTLZ,       MVT::v16i16,  4 },
-    { ISD::CTLZ,       MVT::v16i8,   3 },
-    { ISD::CTLZ,       MVT::v32i8,   3 },
-    { ISD::CTPOP,      MVT::v2i64,   3 },
-    { ISD::CTPOP,      MVT::v4i64,   3 },
-    { ISD::CTPOP,      MVT::v4i32,   7 },
-    { ISD::CTPOP,      MVT::v8i32,   7 },
-    { ISD::CTPOP,      MVT::v8i16,   3 },
-    { ISD::CTPOP,      MVT::v16i16,  3 },
-    { ISD::CTPOP,      MVT::v16i8,   2 },
-    { ISD::CTPOP,      MVT::v32i8,   2 },
-    { ISD::CTTZ,       MVT::v2i64,   4 },
-    { ISD::CTTZ,       MVT::v4i64,   4 },
-    { ISD::CTTZ,       MVT::v4i32,   7 },
-    { ISD::CTTZ,       MVT::v8i32,   7 },
-    { ISD::CTTZ,       MVT::v8i16,   4 },
-    { ISD::CTTZ,       MVT::v16i16,  4 },
-    { ISD::CTTZ,       MVT::v16i8,   3 },
-    { ISD::CTTZ,       MVT::v32i8,   3 },
-    { ISD::SADDSAT,    MVT::v16i16,  1 },
-    { ISD::SADDSAT,    MVT::v32i8,   1 },
-    { ISD::SMAX,       MVT::v8i32,   1 },
-    { ISD::SMAX,       MVT::v16i16,  1 },
-    { ISD::SMAX,       MVT::v32i8,   1 },
-    { ISD::SMIN,       MVT::v8i32,   1 },
-    { ISD::SMIN,       MVT::v16i16,  1 },
-    { ISD::SMIN,       MVT::v32i8,   1 },
-    { ISD::SSUBSAT,    MVT::v16i16,  1 },
-    { ISD::SSUBSAT,    MVT::v32i8,   1 },
-    { ISD::UADDSAT,    MVT::v16i16,  1 },
-    { ISD::UADDSAT,    MVT::v32i8,   1 },
-    { ISD::UADDSAT,    MVT::v8i32,   3 }, // not + pminud + paddd
-    { ISD::UMAX,       MVT::v8i32,   1 },
-    { ISD::UMAX,       MVT::v16i16,  1 },
-    { ISD::UMAX,       MVT::v32i8,   1 },
-    { ISD::UMIN,       MVT::v8i32,   1 },
-    { ISD::UMIN,       MVT::v16i16,  1 },
-    { ISD::UMIN,       MVT::v32i8,   1 },
-    { ISD::USUBSAT,    MVT::v16i16,  1 },
-    { ISD::USUBSAT,    MVT::v32i8,   1 },
-    { ISD::USUBSAT,    MVT::v8i32,   2 }, // pmaxud + psubd
-    { ISD::FMAXNUM,    MVT::v8f32,   3 }, // MAXPS + CMPUNORDPS + BLENDVPS
-    { ISD::FMAXNUM,    MVT::v4f64,   3 }, // MAXPD + CMPUNORDPD + BLENDVPD
-    { ISD::FSQRT,      MVT::f32,     7 }, // Haswell from http://www.agner.org/
-    { ISD::FSQRT,      MVT::v4f32,   7 }, // Haswell from http://www.agner.org/
-    { ISD::FSQRT,      MVT::v8f32,  14 }, // Haswell from http://www.agner.org/
-    { ISD::FSQRT,      MVT::f64,    14 }, // Haswell from http://www.agner.org/
-    { ISD::FSQRT,      MVT::v2f64,  14 }, // Haswell from http://www.agner.org/
-    { ISD::FSQRT,      MVT::v4f64,  28 }, // Haswell from http://www.agner.org/
+  static const CostKindTblEntry AVX2CostTbl[] = {
+    { ISD::ABS,        MVT::v4i64,   {  2 } }, // VBLENDVPD(X,VPSUBQ(0,X),X)
+    { ISD::ABS,        MVT::v8i32,   {  1 } },
+    { ISD::ABS,        MVT::v16i16,  {  1 } },
+    { ISD::ABS,        MVT::v32i8,   {  1 } },
+    { ISD::BITREVERSE, MVT::v2i64,   {  3 } },
+    { ISD::BITREVERSE, MVT::v4i64,   {  3 } },
+    { ISD::BITREVERSE, MVT::v4i32,   {  3 } },
+    { ISD::BITREVERSE, MVT::v8i32,   {  3 } },
+    { ISD::BITREVERSE, MVT::v8i16,   {  3 } },
+    { ISD::BITREVERSE, MVT::v16i16,  {  3 } },
+    { ISD::BITREVERSE, MVT::v16i8,   {  3 } },
+    { ISD::BITREVERSE, MVT::v32i8,   {  3 } },
+    { ISD::BSWAP,      MVT::v4i64,   {  1 } },
+    { ISD::BSWAP,      MVT::v8i32,   {  1 } },
+    { ISD::BSWAP,      MVT::v16i16,  {  1 } },
+    { ISD::CTLZ,       MVT::v2i64,   {  7 } },
+    { ISD::CTLZ,       MVT::v4i64,   {  7 } },
+    { ISD::CTLZ,       MVT::v4i32,   {  5 } },
+    { ISD::CTLZ,       MVT::v8i32,   {  5 } },
+    { ISD::CTLZ,       MVT::v8i16,   {  4 } },
+    { ISD::CTLZ,       MVT::v16i16,  {  4 } },
+    { ISD::CTLZ,       MVT::v16i8,   {  3 } },
+    { ISD::CTLZ,       MVT::v32i8,   {  3 } },
+    { ISD::CTPOP,      MVT::v2i64,   {  3 } },
+    { ISD::CTPOP,      MVT::v4i64,   {  3 } },
+    { ISD::CTPOP,      MVT::v4i32,   {  7 } },
+    { ISD::CTPOP,      MVT::v8i32,   {  7 } },
+    { ISD::CTPOP,      MVT::v8i16,   {  3 } },
+    { ISD::CTPOP,      MVT::v16i16,  {  3 } },
+    { ISD::CTPOP,      MVT::v16i8,   {  2 } },
+    { ISD::CTPOP,      MVT::v32i8,   {  2 } },
+    { ISD::CTTZ,       MVT::v2i64,   {  4 } },
+    { ISD::CTTZ,       MVT::v4i64,   {  4 } },
+    { ISD::CTTZ,       MVT::v4i32,   {  7 } },
+    { ISD::CTTZ,       MVT::v8i32,   {  7 } },
+    { ISD::CTTZ,       MVT::v8i16,   {  4 } },
+    { ISD::CTTZ,       MVT::v16i16,  {  4 } },
+    { ISD::CTTZ,       MVT::v16i8,   {  3 } },
+    { ISD::CTTZ,       MVT::v32i8,   {  3 } },
+    { ISD::SADDSAT,    MVT::v16i16,  {  1 } },
+    { ISD::SADDSAT,    MVT::v32i8,   {  1 } },
+    { ISD::SMAX,       MVT::v8i32,   {  1 } },
+    { ISD::SMAX,       MVT::v16i16,  {  1 } },
+    { ISD::SMAX,       MVT::v32i8,   {  1 } },
+    { ISD::SMIN,       MVT::v8i32,   {  1 } },
+    { ISD::SMIN,       MVT::v16i16,  {  1 } },
+    { ISD::SMIN,       MVT::v32i8,   {  1 } },
+    { ISD::SSUBSAT,    MVT::v16i16,  {  1 } },
+    { ISD::SSUBSAT,    MVT::v32i8,   {  1 } },
+    { ISD::UADDSAT,    MVT::v16i16,  {  1 } },
+    { ISD::UADDSAT,    MVT::v32i8,   {  1 } },
+    { ISD::UADDSAT,    MVT::v8i32,   {  3 } }, // not + pminud + paddd
+    { ISD::UMAX,       MVT::v8i32,   {  1 } },
+    { ISD::UMAX,       MVT::v16i16,  {  1 } },
+    { ISD::UMAX,       MVT::v32i8,   {  1 } },
+    { ISD::UMIN,       MVT::v8i32,   {  1 } },
+    { ISD::UMIN,       MVT::v16i16,  {  1 } },
+    { ISD::UMIN,       MVT::v32i8,   {  1 } },
+    { ISD::USUBSAT,    MVT::v16i16,  {  1 } },
+    { ISD::USUBSAT,    MVT::v32i8,   {  1 } },
+    { ISD::USUBSAT,    MVT::v8i32,   {  2 } }, // pmaxud + psubd
+    { ISD::FMAXNUM,    MVT::v8f32,   {  3 } }, // MAXPS + CMPUNORDPS + BLENDVPS
+    { ISD::FMAXNUM,    MVT::v4f64,   {  3 } }, // MAXPD + CMPUNORDPD + BLENDVPD
+    { ISD::FSQRT,      MVT::f32,     {  7 } }, // Haswell from http://www.agner.org/
+    { ISD::FSQRT,      MVT::v4f32,   {  7 } }, // Haswell from http://www.agner.org/
+    { ISD::FSQRT,      MVT::v8f32,   { 14 } }, // Haswell from http://www.agner.org/
+    { ISD::FSQRT,      MVT::f64,     { 14 } }, // Haswell from http://www.agner.org/
+    { ISD::FSQRT,      MVT::v2f64,   { 14 } }, // Haswell from http://www.agner.org/
+    { ISD::FSQRT,      MVT::v4f64,   { 28 } }, // Haswell from http://www.agner.org/
   };
-  static const CostTblEntry AVX1CostTbl[] = {
-    { ISD::ABS,        MVT::v4i64,   5 }, // VBLENDVPD(X,VPSUBQ(0,X),X)
-    { ISD::ABS,        MVT::v8i32,   3 },
-    { ISD::ABS,        MVT::v16i16,  3 },
-    { ISD::ABS,        MVT::v32i8,   3 },
-    { ISD::BITREVERSE, MVT::v4i64,  12 }, // 2 x 128-bit Op + extract/insert
-    { ISD::BITREVERSE, MVT::v8i32,  12 }, // 2 x 128-bit Op + extract/insert
-    { ISD::BITREVERSE, MVT::v16i16, 12 }, // 2 x 128-bit Op + extract/insert
-    { ISD::BITREVERSE, MVT::v32i8,  12 }, // 2 x 128-bit Op + extract/insert
-    { ISD::BSWAP,      MVT::v4i64,   4 },
-    { ISD::BSWAP,      MVT::v8i32,   4 },
-    { ISD::BSWAP,      MVT::v16i16,  4 },
-    { ISD::CTLZ,       MVT::v4i64,  48 }, // 2 x 128-bit Op + extract/insert
-    { ISD::CTLZ,       MVT::v8i32,  38 }, // 2 x 128-bit Op + extract/insert
-    { ISD::CTLZ,       MVT::v16i16, 30 }, // 2 x 128-bit Op + extract/insert
-    { ISD::CTLZ,       MVT::v32i8,  20 }, // 2 x 128-bit Op + extract/insert
-    { ISD::CTPOP,      MVT::v4i64,  16 }, // 2 x 128-bit Op + extract/insert
-    { ISD::CTPOP,      MVT::v8i32,  24 }, // 2 x 128-bit Op + extract/insert
-    { ISD::CTPOP,      MVT::v16i16, 20 }, // 2 x 128-bit Op + extract/insert
-    { ISD::CTPOP,      MVT::v32i8,  14 }, // 2 x 128-bit Op + extract/insert
-    { ISD::CTTZ,       MVT::v4i64,  22 }, // 2 x 128-bit Op + extract/insert
-    { ISD::CTTZ,       MVT::v8i32,  30 }, // 2 x 128-bit Op + extract/insert
-    { ISD::CTTZ,       MVT::v16i16, 26 }, // 2 x 128-bit Op + extract/insert
-    { ISD::CTTZ,       MVT::v32i8,  20 }, // 2 x 128-bit Op + extract/insert
-    { ISD::SADDSAT,    MVT::v16i16,  4 }, // 2 x 128-bit Op + extract/insert
-    { ISD::SADDSAT,    MVT::v32i8,   4 }, // 2 x 128-bit Op + extract/insert
-    { ISD::SMAX,       MVT::v8i32,   4 }, // 2 x 128-bit Op + extract/insert
-    { ISD::SMAX,       MVT::v16i16,  4 }, // 2 x 128-bit Op + extract/insert
-    { ISD::SMAX,       MVT::v32i8,   4 }, // 2 x 128-bit Op + extract/insert
-    { ISD::SMIN,       MVT::v8i32,   4 }, // 2 x 128-bit Op + extract/insert
-    { ISD::SMIN,       MVT::v16i16,  4 }, // 2 x 128-bit Op + extract/insert
-    { ISD::SMIN,       MVT::v32i8,   4 }, // 2 x 128-bit Op + extract/insert
-    { ISD::SSUBSAT,    MVT::v16i16,  4 }, // 2 x 128-bit Op + extract/insert
-    { ISD::SSUBSAT,    MVT::v32i8,   4 }, // 2 x 128-bit Op + extract/insert
-    { ISD::UADDSAT,    MVT::v16i16,  4 }, // 2 x 128-bit Op + extract/insert
-    { ISD::UADDSAT,    MVT::v32i8,   4 }, // 2 x 128-bit Op + extract/insert
-    { ISD::UADDSAT,    MVT::v8i32,   8 }, // 2 x 128-bit Op + extract/insert
-    { ISD::UMAX,       MVT::v8i32,   4 }, // 2 x 128-bit Op + extract/insert
-    { ISD::UMAX,       MVT::v16i16,  4 }, // 2 x 128-bit Op + extract/insert
-    { ISD::UMAX,       MVT::v32i8,   4 }, // 2 x 128-bit Op + extract/insert
-    { ISD::UMIN,       MVT::v8i32,   4 }, // 2 x 128-bit Op + extract/insert
-    { ISD::UMIN,       MVT::v16i16,  4 }, // 2 x 128-bit Op + extract/insert
-    { ISD::UMIN,       MVT::v32i8,   4 }, // 2 x 128-bit Op + extract/insert
-    { ISD::USUBSAT,    MVT::v16i16,  4 }, // 2 x 128-bit Op + extract/insert
-    { ISD::USUBSAT,    MVT::v32i8,   4 }, // 2 x 128-bit Op + extract/insert
-    { ISD::USUBSAT,    MVT::v8i32,   6 }, // 2 x 128-bit Op + extract/insert
-    { ISD::FMAXNUM,    MVT::f32,     3 }, // MAXSS + CMPUNORDSS + BLENDVPS
-    { ISD::FMAXNUM,    MVT::v4f32,   3 }, // MAXPS + CMPUNORDPS + BLENDVPS
-    { ISD::FMAXNUM,    MVT::v8f32,   5 }, // MAXPS + CMPUNORDPS + BLENDVPS + ?
-    { ISD::FMAXNUM,    MVT::f64,     3 }, // MAXSD + CMPUNORDSD + BLENDVPD
-    { ISD::FMAXNUM,    MVT::v2f64,   3 }, // MAXPD + CMPUNORDPD + BLENDVPD
-    { ISD::FMAXNUM,    MVT::v4f64,   5 }, // MAXPD + CMPUNORDPD + BLENDVPD + ?
-    { ISD::FSQRT,      MVT::f32,    14 }, // SNB from http://www.agner.org/
-    { ISD::FSQRT,      MVT::v4f32,  14 }, // SNB from http://www.agner.org/
-    { ISD::FSQRT,      MVT::v8f32,  28 }, // SNB from http://www.agner.org/
-    { ISD::FSQRT,      MVT::f64,    21 }, // SNB from http://www.agner.org/
-    { ISD::FSQRT,      MVT::v2f64,  21 }, // SNB from http://www.agner.org/
-    { ISD::FSQRT,      MVT::v4f64,  43 }, // SNB from http://www.agner.org/
+  static const CostKindTblEntry AVX1CostTbl[] = {
+    { ISD::ABS,        MVT::v4i64,   {  5 } }, // VBLENDVPD(X,VPSUBQ(0,X),X)
+    { ISD::ABS,        MVT::v8i32,   {  3 } },
+    { ISD::ABS,        MVT::v16i16,  {  3 } },
+    { ISD::ABS,        MVT::v32i8,   {  3 } },
+    { ISD::BITREVERSE, MVT::v4i64,   { 12 } }, // 2 x 128-bit Op + extract/insert
+    { ISD::BITREVERSE, MVT::v8i32,   { 12 } }, // 2 x 128-bit Op + extract/insert
+    { ISD::BITREVERSE, MVT::v16i16,  { 12 } }, // 2 x 128-bit Op + extract/insert
+    { ISD::BITREVERSE, MVT::v32i8,   { 12 } }, // 2 x 128-bit Op + extract/insert
+    { ISD::BSWAP,      MVT::v4i64,   {  4 } },
+    { ISD::BSWAP,      MVT::v8i32,   {  4 } },
+    { ISD::BSWAP,      MVT::v16i16,  {  4 } },
+    { ISD::CTLZ,       MVT::v4i64,   { 48 } }, // 2 x 128-bit Op + extract/insert
+    { ISD::CTLZ,       MVT::v8i32,   { 38 } }, // 2 x 128-bit Op + extract/insert
+    { ISD::CTLZ,       MVT::v16i16,  { 30 } }, // 2 x 128-bit Op + extract/insert
+    { ISD::CTLZ,       MVT::v32i8,   { 20 } }, // 2 x 128-bit Op + extract/insert
+    { ISD::CTPOP,      MVT::v4i64,   { 16 } }, // 2 x 128-bit Op + extract/insert
+    { ISD::CTPOP,      MVT::v8i32,   { 24 } }, // 2 x 128-bit Op + extract/insert
+    { ISD::CTPOP,      MVT::v16i16,  { 20 } }, // 2 x 128-bit Op + extract/insert
+    { ISD::CTPOP,      MVT::v32i8,   { 14 } }, // 2 x 128-bit Op + extract/insert
+    { ISD::CTTZ,       MVT::v4i64,   { 22 } }, // 2 x 128-bit Op + extract/insert
+    { ISD::CTTZ,       MVT::v8i32,   { 30 } }, // 2 x 128-bit Op + extract/insert
+    { ISD::CTTZ,       MVT::v16i16,  { 26 } }, // 2 x 128-bit Op + extract/insert
+    { ISD::CTTZ,       MVT::v32i8,   { 20 } }, // 2 x 128-bit Op + extract/insert
+    { ISD::SADDSAT,    MVT::v16i16,  {  4 } }, // 2 x 128-bit Op + extract/insert
+    { ISD::SADDSAT,    MVT::v32i8,   {  4 } }, // 2 x 128-bit Op + extract/insert
+    { ISD::SMAX,       MVT::v8i32,   {  4 } }, // 2 x 128-bit Op + extract/insert
+    { ISD::SMAX,       MVT::v16i16,  {  4 } }, // 2 x 128-bit Op + extract/insert
+    { ISD::SMAX,       MVT::v32i8,   {  4 } }, // 2 x 128-bit Op + extract/insert
+    { ISD::SMIN,       MVT::v8i32,   {  4 } }, // 2 x 128-bit Op + extract/insert
+    { ISD::SMIN,       MVT::v16i16,  {  4 } }, // 2 x 128-bit Op + extract/insert
+    { ISD::SMIN,       MVT::v32i8,   {  4 } }, // 2 x 128-bit Op + extract/insert
+    { ISD::SSUBSAT,    MVT::v16i16,  {  4 } }, // 2 x 128-bit Op + extract/insert
+    { ISD::SSUBSAT,    MVT::v32i8,   {  4 } }, // 2 x 128-bit Op + extract/insert
+    { ISD::UADDSAT,    MVT::v16i16,  {  4 } }, // 2 x 128-bit Op + extract/insert
+    { ISD::UADDSAT,    MVT::v32i8,   {  4 } }, // 2 x 128-bit Op + extract/insert
+    { ISD::UADDSAT,    MVT::v8i32,   {  8 } }, // 2 x 128-bit Op + extract/insert
+    { ISD::UMAX,       MVT::v8i32,   {  4 } }, // 2 x 128-bit Op + extract/insert
+    { ISD::UMAX,       MVT::v16i16,  {  4 } }, // 2 x 128-bit Op + extract/insert
+    { ISD::UMAX,       MVT::v32i8,   {  4 } }, // 2 x 128-bit Op + extract/insert
+    { ISD::UMIN,       MVT::v8i32,   {  4 } }, // 2 x 128-bit Op + extract/insert
+    { ISD::UMIN,       MVT::v16i16,  {  4 } }, // 2 x 128-bit Op + extract/insert
+    { ISD::UMIN,       MVT::v32i8,   {  4 } }, // 2 x 128-bit Op + extract/insert
+    { ISD::USUBSAT,    MVT::v16i16,  {  4 } }, // 2 x 128-bit Op + extract/insert
+    { ISD::USUBSAT,    MVT::v32i8,   {  4 } }, // 2 x 128-bit Op + extract/insert
+    { ISD::USUBSAT,    MVT::v8i32,   {  6 } }, // 2 x 128-bit Op + extract/insert
+    { ISD::FMAXNUM,    MVT::f32,     {  3 } }, // MAXSS + CMPUNORDSS + BLENDVPS
+    { ISD::FMAXNUM,    MVT::v4f32,   {  3 } }, // MAXPS + CMPUNORDPS + BLENDVPS
+    { ISD::FMAXNUM,    MVT::v8f32,   {  5 } }, // MAXPS + CMPUNORDPS + BLENDVPS + ?
+    { ISD::FMAXNUM,    MVT::f64,     {  3 } }, // MAXSD + CMPUNORDSD + BLENDVPD
+    { ISD::FMAXNUM,    MVT::v2f64,   {  3 } }, // MAXPD + CMPUNORDPD + BLENDVPD
+    { ISD::FMAXNUM,    MVT::v4f64,   {  5 } }, // MAXPD + CMPUNORDPD + BLENDVPD + ?
+    { ISD::FSQRT,      MVT::f32,     { 14 } }, // SNB from http://www.agner.org/
+    { ISD::FSQRT,      MVT::v4f32,   { 14 } }, // SNB from http://www.agner.org/
+    { ISD::FSQRT,      MVT::v8f32,   { 28 } }, // SNB from http://www.agner.org/
+    { ISD::FSQRT,      MVT::f64,     { 21 } }, // SNB from http://www.agner.org/
+    { ISD::FSQRT,      MVT::v2f64,   { 21 } }, // SNB from http://www.agner.org/
+    { ISD::FSQRT,      MVT::v4f64,   { 43 } }, // SNB from http://www.agner.org/
   };
-  static const CostTblEntry GLMCostTbl[] = {
-    { ISD::FSQRT, MVT::f32,   19 }, // sqrtss
-    { ISD::FSQRT, MVT::v4f32, 37 }, // sqrtps
-    { ISD::FSQRT, MVT::f64,   34 }, // sqrtsd
-    { ISD::FSQRT, MVT::v2f64, 67 }, // sqrtpd
+  static const CostKindTblEntry GLMCostTbl[] = {
+    { ISD::FSQRT,      MVT::f32,     { 19 } }, // sqrtss
+    { ISD::FSQRT,      MVT::v4f32,   { 37 } }, // sqrtps
+    { ISD::FSQRT,      MVT::f64,     { 34 } }, // sqrtsd
+    { ISD::FSQRT,      MVT::v2f64,   { 67 } }, // sqrtpd
   };
-  static const CostTblEntry SLMCostTbl[] = {
-    { ISD::FSQRT, MVT::f32,   20 }, // sqrtss
-    { ISD::FSQRT, MVT::v4f32, 40 }, // sqrtps
-    { ISD::FSQRT, MVT::f64,   35 }, // sqrtsd
-    { ISD::FSQRT, MVT::v2f64, 70 }, // sqrtpd
+  static const CostKindTblEntry SLMCostTbl[] = {
+    { ISD::FSQRT,      MVT::f32,     { 20 } }, // sqrtss
+    { ISD::FSQRT,      MVT::v4f32,   { 40 } }, // sqrtps
+    { ISD::FSQRT,      MVT::f64,     { 35 } }, // sqrtsd
+    { ISD::FSQRT,      MVT::v2f64,   { 70 } }, // sqrtpd
   };
-  static const CostTblEntry SSE42CostTbl[] = {
-    { ISD::USUBSAT,    MVT::v4i32,   2 }, // pmaxud + psubd
-    { ISD::UADDSAT,    MVT::v4i32,   3 }, // not + pminud + paddd
-    { ISD::FSQRT,      MVT::f32,    18 }, // Nehalem from http://www.agner.org/
-    { ISD::FSQRT,      MVT::v4f32,  18 }, // Nehalem from http://www.agner.org/
+  static const CostKindTblEntry SSE42CostTbl[] = {
+    { ISD::USUBSAT,    MVT::v4i32,   {  2 } }, // pmaxud + psubd
+    { ISD::UADDSAT,    MVT::v4i32,   {  3 } }, // not + pminud + paddd
+    { ISD::FSQRT,      MVT::f32,     { 18 } }, // Nehalem from http://www.agner.org/
+    { ISD::FSQRT,      MVT::v4f32,   { 18 } }, // Nehalem from http://www.agner.org/
   };
-  static const CostTblEntry SSE41CostTbl[] = {
-    { ISD::ABS,        MVT::v2i64,   2 }, // BLENDVPD(X,PSUBQ(0,X),X)
-    { ISD::SMAX,       MVT::v4i32,   1 },
-    { ISD::SMAX,       MVT::v16i8,   1 },
-    { ISD::SMIN,       MVT::v4i32,   1 },
-    { ISD::SMIN,       MVT::v16i8,   1 },
-    { ISD::UMAX,       MVT::v4i32,   1 },
-    { ISD::UMAX,       MVT::v8i16,   1 },
-    { ISD::UMIN,       MVT::v4i32,   1 },
-    { ISD::UMIN,       MVT::v8i16,   1 },
+  static const CostKindTblEntry SSE41CostTbl[] = {
+    { ISD::ABS,        MVT::v2i64,   {  2 } }, // BLENDVPD(X,PSUBQ(0,X),X)
+    { ISD::SMAX,       MVT::v4i32,   {  1 } },
+    { ISD::SMAX,       MVT::v16i8,   {  1 } },
+    { ISD::SMIN,       MVT::v4i32,   {  1 } },
+    { ISD::SMIN,       MVT::v16i8,   {  1 } },
+    { ISD::UMAX,       MVT::v4i32,   {  1 } },
+    { ISD::UMAX,       MVT::v8i16,   {  1 } },
+    { ISD::UMIN,       MVT::v4i32,   {  1 } },
+    { ISD::UMIN,       MVT::v8i16,   {  1 } },
   };
-  static const CostTblEntry SSSE3CostTbl[] = {
-    { ISD::ABS,        MVT::v4i32,   1 },
-    { ISD::ABS,        MVT::v8i16,   1 },
-    { ISD::ABS,        MVT::v16i8,   1 },
-    { ISD::BITREVERSE, MVT::v2i64,   5 },
-    { ISD::BITREVERSE, MVT::v4i32,   5 },
-    { ISD::BITREVERSE, MVT::v8i16,   5 },
-    { ISD::BITREVERSE, MVT::v16i8,   5 },
-    { ISD::BSWAP,      MVT::v2i64,   1 },
-    { ISD::BSWAP,      MVT::v4i32,   1 },
-    { ISD::BSWAP,      MVT::v8i16,   1 },
-    { ISD::CTLZ,       MVT::v2i64,  23 },
-    { ISD::CTLZ,       MVT::v4i32,  18 },
-    { ISD::CTLZ,       MVT::v8i16,  14 },
-    { ISD::CTLZ,       MVT::v16i8,   9 },
-    { ISD::CTPOP,      MVT::v2i64,   7 },
-    { ISD::CTPOP,      MVT::v4i32,  11 },
-    { ISD::CTPOP,      MVT::v8i16,   9 },
-    { ISD::CTPOP,      MVT::v16i8,   6 },
-    { ISD::CTTZ,       MVT::v2i64,  10 },
-    { ISD::CTTZ,       MVT::v4i32,  14 },
-    { ISD::CTTZ,       MVT::v8i16,  12 },
-    { ISD::CTTZ,       MVT::v16i8,   9 }
+  static const CostKindTblEntry SSSE3CostTbl[] = {
+    { ISD::ABS,        MVT::v4i32,   {  1 } },
+    { ISD::ABS,        MVT::v8i16,   {  1 } },
+    { ISD::ABS,        MVT::v16i8,   {  1 } },
+    { ISD::BITREVERSE, MVT::v2i64,   {  5 } },
+    { ISD::BITREVERSE, MVT::v4i32,   {  5 } },
+    { ISD::BITREVERSE, MVT::v8i16,   {  5 } },
+    { ISD::BITREVERSE, MVT::v16i8,   {  5 } },
+    { ISD::BSWAP,      MVT::v2i64,   {  1 } },
+    { ISD::BSWAP,      MVT::v4i32,   {  1 } },
+    { ISD::BSWAP,      MVT::v8i16,   {  1 } },
+    { ISD::CTLZ,       MVT::v2i64,   { 23 } },
+    { ISD::CTLZ,       MVT::v4i32,   { 18 } },
+    { ISD::CTLZ,       MVT::v8i16,   { 14 } },
+    { ISD::CTLZ,       MVT::v16i8,   {  9 } },
+    { ISD::CTPOP,      MVT::v2i64,   {  7 } },
+    { ISD::CTPOP,      MVT::v4i32,   { 11 } },
+    { ISD::CTPOP,      MVT::v8i16,   {  9 } },
+    { ISD::CTPOP,      MVT::v16i8,   {  6 } },
+    { ISD::CTTZ,       MVT::v2i64,   { 10 } },
+    { ISD::CTTZ,       MVT::v4i32,   { 14 } },
+    { ISD::CTTZ,       MVT::v8i16,   { 12 } },
+    { ISD::CTTZ,       MVT::v16i8,   {  9 } }
   };
-  static const CostTblEntry SSE2CostTbl[] = {
-    { ISD::ABS,        MVT::v2i64,   4 },
-    { ISD::ABS,        MVT::v4i32,   3 },
-    { ISD::ABS,        MVT::v8i16,   2 },
-    { ISD::ABS,        MVT::v16i8,   2 },
-    { ISD::BITREVERSE, MVT::v2i64,  29 },
-    { ISD::BITREVERSE, MVT::v4i32,  27 },
-    { ISD::BITREVERSE, MVT::v8i16,  27 },
-    { ISD::BITREVERSE, MVT::v16i8,  20 },
-    { ISD::BSWAP,      MVT::v2i64,   7 },
-    { ISD::BSWAP,      MVT::v4i32,   7 },
-    { ISD::BSWAP,      MVT::v8i16,   7 },
-    { ISD::CTLZ,       MVT::v2i64,  25 },
-    { ISD::CTLZ,       MVT::v4i32,  26 },
-    { ISD::CTLZ,       MVT::v8i16,  20 },
-    { ISD::CTLZ,       MVT::v16i8,  17 },
-    { ISD::CTPOP,      MVT::v2i64,  12 },
-    { ISD::CTPOP,      MVT::v4i32,  15 },
-    { ISD::CTPOP,      MVT::v8i16,  13 },
-    { ISD::CTPOP,      MVT::v16i8,  10 },
-    { ISD::CTTZ,       MVT::v2i64,  14 },
-    { ISD::CTTZ,       MVT::v4i32,  18 },
-    { ISD::CTTZ,       MVT::v8i16,  16 },
-    { ISD::CTTZ,       MVT::v16i8,  13 },
-    { ISD::SADDSAT,    MVT::v8i16,   1 },
-    { ISD::SADDSAT,    MVT::v16i8,   1 },
-    { ISD::SMAX,       MVT::v8i16,   1 },
-    { ISD::SMIN,       MVT::v8i16,   1 },
-    { ISD::SSUBSAT,    MVT::v8i16,   1 },
-    { ISD::SSUBSAT,    MVT::v16i8,   1 },
-    { ISD::UADDSAT,    MVT::v8i16,   1 },
-    { ISD::UADDSAT,    MVT::v16i8,   1 },
-    { ISD::UMAX,       MVT::v8i16,   2 },
-    { ISD::UMAX,       MVT::v16i8,   1 },
-    { ISD::UMIN,       MVT::v8i16,   2 },
-    { ISD::UMIN,       MVT::v16i8,   1 },
-    { ISD::USUBSAT,    MVT::v8i16,   1 },
-    { ISD::USUBSAT,    MVT::v16i8,   1 },
-    { ISD::FMAXNUM,    MVT::f64,     4 },
-    { ISD::FMAXNUM,    MVT::v2f64,   4 },
-    { ISD::FSQRT,      MVT::f64,    32 }, // Nehalem from http://www.agner.org/
-    { ISD::FSQRT,      MVT::v2f64,  32 }, // Nehalem from http://www.agner.org/
+  static const CostKindTblEntry SSE2CostTbl[] = {
+    { ISD::ABS,        MVT::v2i64,   {  4 } },
+    { ISD::ABS,        MVT::v4i32,   {  3 } },
+    { ISD::ABS,        MVT::v8i16,   {  2 } },
+    { ISD::ABS,        MVT::v16i8,   {  2 } },
+    { ISD::BITREVERSE, MVT::v2i64,   { 29 } },
+    { ISD::BITREVERSE, MVT::v4i32,   { 27 } },
+    { ISD::BITREVERSE, MVT::v8i16,   { 27 } },
+    { ISD::BITREVERSE, MVT::v16i8,   { 20 } },
+    { ISD::BSWAP,      MVT::v2i64,   {  7 } },
+    { ISD::BSWAP,      MVT::v4i32,   {  7 } },
+    { ISD::BSWAP,      MVT::v8i16,   {  7 } },
+    { ISD::CTLZ,       MVT::v2i64,   { 25 } },
+    { ISD::CTLZ,       MVT::v4i32,   { 26 } },
+    { ISD::CTLZ,       MVT::v8i16,   { 20 } },
+    { ISD::CTLZ,       MVT::v16i8,   { 17 } },
+    { ISD::CTPOP,      MVT::v2i64,   { 12 } },
+    { ISD::CTPOP,      MVT::v4i32,   { 15 } },
+    { ISD::CTPOP,      MVT::v8i16,   { 13 } },
+    { ISD::CTPOP,      MVT::v16i8,   { 10 } },
+    { ISD::CTTZ,       MVT::v2i64,   { 14 } },
+    { ISD::CTTZ,       MVT::v4i32,   { 18 } },
+    { ISD::CTTZ,       MVT::v8i16,   { 16 } },
+    { ISD::CTTZ,       MVT::v16i8,   { 13 } },
+    { ISD::SADDSAT,    MVT::v8i16,   {  1 } },
+    { ISD::SADDSAT,    MVT::v16i8,   {  1 } },
+    { ISD::SMAX,       MVT::v8i16,   {  1 } },
+    { ISD::SMIN,       MVT::v8i16,   {  1 } },
+    { ISD::SSUBSAT,    MVT::v8i16,   {  1 } },
+    { ISD::SSUBSAT,    MVT::v16i8,   {  1 } },
+    { ISD::UADDSAT,    MVT::v8i16,   {  1 } },
+    { ISD::UADDSAT,    MVT::v16i8,   {  1 } },
+    { ISD::UMAX,       MVT::v8i16,   {  2 } },
+    { ISD::UMAX,       MVT::v16i8,   {  1 } },
+    { ISD::UMIN,       MVT::v8i16,   {  2 } },
+    { ISD::UMIN,       MVT::v16i8,   {  1 } },
+    { ISD::USUBSAT,    MVT::v8i16,   {  1 } },
+    { ISD::USUBSAT,    MVT::v16i8,   {  1 } },
+    { ISD::FMAXNUM,    MVT::f64,     {  4 } },
+    { ISD::FMAXNUM,    MVT::v2f64,   {  4 } },
+    { ISD::FSQRT,      MVT::f64,     { 32 } }, // Nehalem from http://www.agner.org/
+    { ISD::FSQRT,      MVT::v2f64,   { 32 } }, // Nehalem from http://www.agner.org/
   };
-  static const CostTblEntry SSE1CostTbl[] = {
-    { ISD::FMAXNUM,    MVT::f32,     4 },
-    { ISD::FMAXNUM,    MVT::v4f32,   4 },
-    { ISD::FSQRT,      MVT::f32,    28 }, // Pentium III from http://www.agner.org/
-    { ISD::FSQRT,      MVT::v4f32,  56 }, // Pentium III from http://www.agner.org/
+  static const CostKindTblEntry SSE1CostTbl[] = {
+    { ISD::FMAXNUM,    MVT::f32,     {  4 } },
+    { ISD::FMAXNUM,    MVT::v4f32,   {  4 } },
+    { ISD::FSQRT,      MVT::f32,     { 28 } }, // Pentium III from http://www.agner.org/
+    { ISD::FSQRT,      MVT::v4f32,   { 56 } }, // Pentium III from http://www.agner.org/
   };
-  static const CostTblEntry BMI64CostTbl[] = { // 64-bit targets
-    { ISD::CTTZ,       MVT::i64,     1 },
+  static const CostKindTblEntry BMI64CostTbl[] = { // 64-bit targets
+    { ISD::CTTZ,       MVT::i64,     {  1 } },
   };
-  static const CostTblEntry BMI32CostTbl[] = { // 32 or 64-bit targets
-    { ISD::CTTZ,       MVT::i32,     1 },
-    { ISD::CTTZ,       MVT::i16,     1 },
-    { ISD::CTTZ,       MVT::i8,      1 },
+  static const CostKindTblEntry BMI32CostTbl[] = { // 32 or 64-bit targets
+    { ISD::CTTZ,       MVT::i32,     {  1 } },
+    { ISD::CTTZ,       MVT::i16,     {  1 } },
+    { ISD::CTTZ,       MVT::i8,      {  1 } },
   };
-  static const CostTblEntry LZCNT64CostTbl[] = { // 64-bit targets
-    { ISD::CTLZ,       MVT::i64,     1 },
+  static const CostKindTblEntry LZCNT64CostTbl[] = { // 64-bit targets
+    { ISD::CTLZ,       MVT::i64,     {  1 } },
   };
-  static const CostTblEntry LZCNT32CostTbl[] = { // 32 or 64-bit targets
-    { ISD::CTLZ,       MVT::i32,     1 },
-    { ISD::CTLZ,       MVT::i16,     1 },
-    { ISD::CTLZ,       MVT::i8,      1 },
+  static const CostKindTblEntry LZCNT32CostTbl[] = { // 32 or 64-bit targets
+    { ISD::CTLZ,       MVT::i32,     {  1 } },
+    { ISD::CTLZ,       MVT::i16,     {  1 } },
+    { ISD::CTLZ,       MVT::i8,      {  1 } },
   };
-  static const CostTblEntry POPCNT64CostTbl[] = { // 64-bit targets
-    { ISD::CTPOP,      MVT::i64,     1 },
+  static const CostKindTblEntry POPCNT64CostTbl[] = { // 64-bit targets
+    { ISD::CTPOP,      MVT::i64,     {  1 } },
   };
-  static const CostTblEntry POPCNT32CostTbl[] = { // 32 or 64-bit targets
-    { ISD::CTPOP,      MVT::i32,     1 },
-    { ISD::CTPOP,      MVT::i16,     1 },
-    { ISD::CTPOP,      MVT::i8,      1 },
+  static const CostKindTblEntry POPCNT32CostTbl[] = { // 32 or 64-bit targets
+    { ISD::CTPOP,      MVT::i32,     {  1 } },
+    { ISD::CTPOP,      MVT::i16,     {  1 } },
+    { ISD::CTPOP,      MVT::i8,      {  1 } },
   };
-  static const CostTblEntry X64CostTbl[] = { // 64-bit targets
-    { ISD::ABS,        MVT::i64,     2 }, // SUB+CMOV
-    { ISD::BITREVERSE, MVT::i64,    14 },
-    { ISD::BSWAP,      MVT::i64,     1 },
-    { ISD::CTLZ,       MVT::i64,     4 }, // BSR+XOR or BSR+XOR+CMOV
-    { ISD::CTTZ,       MVT::i64,     3 }, // TEST+BSF+CMOV/BRANCH
-    { ISD::CTPOP,      MVT::i64,    10 },
-    { ISD::SADDO,      MVT::i64,     1 },
-    { ISD::UADDO,      MVT::i64,     1 },
-    { ISD::UMULO,      MVT::i64,     2 }, // mulq + seto
+  static const CostKindTblEntry X64CostTbl[] = { // 64-bit targets
+    { ISD::ABS,        MVT::i64,     {  2 } }, // SUB+CMOV
+    { ISD::BITREVERSE, MVT::i64,     { 14 } },
+    { ISD::BSWAP,      MVT::i64,     {  1 } },
+    { ISD::CTLZ,       MVT::i64,     {  4 } }, // BSR+XOR or BSR+XOR+CMOV
+    { ISD::CTTZ,       MVT::i64,     {  3 } }, // TEST+BSF+CMOV/BRANCH
+    { ISD::CTPOP,      MVT::i64,     { 10 } },
+    { ISD::SADDO,      MVT::i64,     {  1 } },
+    { ISD::UADDO,      MVT::i64,     {  1 } },
+    { ISD::UMULO,      MVT::i64,     {  2 } }, // mulq + seto
   };
-  static const CostTblEntry X86CostTbl[] = { // 32 or 64-bit targets
-    { ISD::ABS,        MVT::i32,     2 }, // SUB+CMOV
-    { ISD::ABS,        MVT::i16,     2 }, // SUB+CMOV
-    { ISD::BITREVERSE, MVT::i32,    14 },
-    { ISD::BITREVERSE, MVT::i16,    14 },
-    { ISD::BITREVERSE, MVT::i8,     11 },
-    { ISD::BSWAP,      MVT::i32,     1 },
-    { ISD::BSWAP,      MVT::i16,     1 }, // ROL
-    { ISD::CTLZ,       MVT::i32,     4 }, // BSR+XOR or BSR+XOR+CMOV
-    { ISD::CTLZ,       MVT::i16,     4 }, // BSR+XOR or BSR+XOR+CMOV
-    { ISD::CTLZ,       MVT::i8,      4 }, // BSR+XOR or BSR+XOR+CMOV
-    { ISD::CTTZ,       MVT::i32,     3 }, // TEST+BSF+CMOV/BRANCH
-    { ISD::CTTZ,       MVT::i16,     3 }, // TEST+BSF+CMOV/BRANCH
-    { ISD::CTTZ,       MVT::i8,      3 }, // TEST+BSF+CMOV/BRANCH
-    { ISD::CTPOP,      MVT::i32,     8 },
-    { ISD::CTPOP,      MVT::i16,     9 },
-    { ISD::CTPOP,      MVT::i8,      7 },
-    { ISD::SADDO,      MVT::i32,     1 },
-    { ISD::SADDO,      MVT::i16,     1 },
-    { ISD::SADDO,      MVT::i8,      1 },
-    { ISD::UADDO,      MVT::i32,     1 },
-    { ISD::UADDO,      MVT::i16,     1 },
-    { ISD::UADDO,      MVT::i8,      1 },
-    { ISD::UMULO,      MVT::i32,     2 }, // mul + seto
-    { ISD::UMULO,      MVT::i16,     2 },
-    { ISD::UMULO,      MVT::i8,      2 },
+  static const CostKindTblEntry X86CostTbl[] = { // 32 or 64-bit targets
+    { ISD::ABS,        MVT::i32,     {  2 } }, // SUB+CMOV
+    { ISD::ABS,        MVT::i16,     {  2 } }, // SUB+CMOV
+    { ISD::BITREVERSE, MVT::i32,     { 14 } },
+    { ISD::BITREVERSE, MVT::i16,     { 14 } },
+    { ISD::BITREVERSE, MVT::i8,      { 11 } },
+    { ISD::BSWAP,      MVT::i32,     {  1 } },
+    { ISD::BSWAP,      MVT::i16,     {  1 } }, // ROL
+    { ISD::CTLZ,       MVT::i32,     {  4 } }, // BSR+XOR or BSR+XOR+CMOV
+    { ISD::CTLZ,       MVT::i16,     {  4 } }, // BSR+XOR or BSR+XOR+CMOV
+    { ISD::CTLZ,       MVT::i8,      {  4 } }, // BSR+XOR or BSR+XOR+CMOV
+    { ISD::CTTZ,       MVT::i32,     {  3 } }, // TEST+BSF+CMOV/BRANCH
+    { ISD::CTTZ,       MVT::i16,     {  3 } }, // TEST+BSF+CMOV/BRANCH
+    { ISD::CTTZ,       MVT::i8,      {  3 } }, // TEST+BSF+CMOV/BRANCH
+    { ISD::CTPOP,      MVT::i32,     {  8 } },
+    { ISD::CTPOP,      MVT::i16,     {  9 } },
+    { ISD::CTPOP,      MVT::i8,      {  7 } },
+    { ISD::SADDO,      MVT::i32,     {  1 } },
+    { ISD::SADDO,      MVT::i16,     {  1 } },
+    { ISD::SADDO,      MVT::i8,      {  1 } },
+    { ISD::UADDO,      MVT::i32,     {  1 } },
+    { ISD::UADDO,      MVT::i16,     {  1 } },
+    { ISD::UADDO,      MVT::i8,      {  1 } },
+    { ISD::UMULO,      MVT::i32,     {  2 } }, // mul + seto
+    { ISD::UMULO,      MVT::i16,     {  2 } },
+    { ISD::UMULO,      MVT::i8,      {  2 } },
   };
 
   Type *RetTy = ICA.getReturnType();
@@ -3670,110 +3670,131 @@ X86TTIImpl::getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
 
     if (ST->useGLMDivSqrtCosts())
       if (const auto *Entry = CostTableLookup(GLMCostTbl, ISD, MTy))
-        return adjustTableCost(Entry->ISD, Entry->Cost, LT.first,
-                               ICA.getFlags());
+        if (auto KindCost = Entry->Cost[CostKind])
+          return adjustTableCost(Entry->ISD, KindCost.value(), LT.first,
+                                 ICA.getFlags());
 
     if (ST->useSLMArithCosts())
       if (const auto *Entry = CostTableLookup(SLMCostTbl, ISD, MTy))
-        return adjustTableCost(Entry->ISD, Entry->Cost, LT.first,
-                               ICA.getFlags());
+        if (auto KindCost = Entry->Cost[CostKind])
+          return adjustTableCost(Entry->ISD, KindCost.value(), LT.first,
+                                 ICA.getFlags());
 
     if (ST->hasBITALG())
       if (const auto *Entry = CostTableLookup(AVX512BITALGCostTbl, ISD, MTy))
-        return adjustTableCost(Entry->ISD, Entry->Cost, LT.first,
-                               ICA.getFlags());
+        if (auto KindCost = Entry->Cost[CostKind])
+          return adjustTableCost(Entry->ISD, KindCost.value(), LT.first,
+                                 ICA.getFlags());
 
     if (ST->hasVPOPCNTDQ())
       if (const auto *Entry = CostTableLookup(AVX512VPOPCNTDQCostTbl, ISD, MTy))
-        return adjustTableCost(Entry->ISD, Entry->Cost, LT.first,
-                               ICA.getFlags());
+        if (auto KindCost = Entry->Cost[CostKind])
+          return adjustTableCost(Entry->ISD, KindCost.value(), LT.first,
+                                 ICA.getFlags());
 
     if (ST->hasCDI())
       if (const auto *Entry = CostTableLookup(AVX512CDCostTbl, ISD, MTy))
-        return adjustTableCost(Entry->ISD, Entry->Cost, LT.first,
-                               ICA.getFlags());
+        if (auto KindCost = Entry->Cost[CostKind])
+          return adjustTableCost(Entry->ISD, KindCost.value(), LT.first,
+                                 ICA.getFlags());
 
     if (ST->hasBWI())
       if (const auto *Entry = CostTableLookup(AVX512BWCostTbl, ISD, MTy))
-        return adjustTableCost(Entry->ISD, Entry->Cost, LT.first,
-                               ICA.getFlags());
+        if (auto KindCost = Entry->Cost[CostKind])
+          return adjustTableCost(Entry->ISD, KindCost.value(), LT.first,
+                                 ICA.getFlags());
 
     if (ST->hasAVX512())
       if (const auto *Entry = CostTableLookup(AVX512CostTbl, ISD, MTy))
-        return adjustTableCost(Entry->ISD, Entry->Cost, LT.first,
-                               ICA.getFlags());
+        if (auto KindCost = Entry->Cost[CostKind])
+          return adjustTableCost(Entry->ISD, KindCost.value(), LT.first,
+                                 ICA.getFlags());
 
     if (ST->hasXOP())
       if (const auto *Entry = CostTableLookup(XOPCostTbl, ISD, MTy))
-        return adjustTableCost(Entry->ISD, Entry->Cost, LT.first,
-                               ICA.getFlags());
+        if (auto KindCost = Entry->Cost[CostKind])
+          return adjustTableCost(Entry->ISD, KindCost.value(), LT.first,
+                                 ICA.getFlags());
 
     if (ST->hasAVX2())
       if (const auto *Entry = CostTableLookup(AVX2CostTbl, ISD, MTy))
-        return adjustTableCost(Entry->ISD, Entry->Cost, LT.first,
-                               ICA.getFlags());
+        if (auto KindCost = Entry->Cost[CostKind])
+          return adjustTableCost(Entry->ISD, KindCost.value(), LT.first,
+                                 ICA.getFlags());
 
     if (ST->hasAVX())
       if (const auto *Entry = CostTableLookup(AVX1CostTbl, ISD, MTy))
-        return adjustTableCost(Entry->ISD, Entry->Cost, LT.first,
-                               ICA.getFlags());
+        if (auto KindCost = Entry->Cost[CostKind])
+          return adjustTableCost(Entry->ISD, KindCost.value(), LT.first,
+                                 ICA.getFlags());
 
     if (ST->hasSSE42())
       if (const auto *Entry = CostTableLookup(SSE42CostTbl, ISD, MTy))
-        return adjustTableCost(Entry->ISD, Entry->Cost, LT.first,
-                               ICA.getFlags());
+        if (auto KindCost = Entry->Cost[CostKind])
+          return adjustTableCost(Entry->ISD, KindCost.value(), LT.first,
+                                 ICA.getFlags());
 
     if (ST->hasSSE41())
       if (const auto *Entry = CostTableLookup(SSE41CostTbl, ISD, MTy))
-        return adjustTableCost(Entry->ISD, Entry->Cost, LT.first,
-                               ICA.getFlags());
+        if (auto KindCost = Entry->Cost[CostKind])
+          return adjustTableCost(Entry->ISD, KindCost.value(), LT.first,
+                                 ICA.getFlags());
 
     if (ST->hasSSSE3())
       if (const auto *Entry = CostTableLookup(SSSE3CostTbl, ISD, MTy))
-        return adjustTableCost(Entry->ISD, Entry->Cost, LT.first,
-                               ICA.getFlags());
+        if (auto KindCost = Entry->Cost[CostKind])
+          return adjustTableCost(Entry->ISD, KindCost.value(), LT.first,
+                                 ICA.getFlags());
 
     if (ST->hasSSE2())
       if (const auto *Entry = CostTableLookup(SSE2CostTbl, ISD, MTy))
-        return adjustTableCost(Entry->ISD, Entry->Cost, LT.first,
-                               ICA.getFlags());
+        if (auto KindCost = Entry->Cost[CostKind])
+          return adjustTableCost(Entry->ISD, KindCost.value(), LT.first,
+                                 ICA.getFlags());
 
     if (ST->hasSSE1())
       if (const auto *Entry = CostTableLookup(SSE1CostTbl, ISD, MTy))
-        return adjustTableCost(Entry->ISD, Entry->Cost, LT.first,
-                               ICA.getFlags());
+        if (auto KindCost = Entry->Cost[CostKind])
+          return adjustTableCost(Entry->ISD, KindCost.value(), LT.first,
+                                 ICA.getFlags());
 
     if (ST->hasBMI()) {
       if (ST->is64Bit())
         if (const auto *Entry = CostTableLookup(BMI64CostTbl, ISD, MTy))
-          return adjustTableCost(Entry->ISD, Entry->Cost, LT.first,
-                                 ICA.getFlags());
+          if (auto KindCost = Entry->Cost[CostKind])
+            return adjustTableCost(Entry->ISD, KindCost.value(), LT.first,
+                                   ICA.getFlags());
 
       if (const auto *Entry = CostTableLookup(BMI32CostTbl, ISD, MTy))
-        return adjustTableCost(Entry->ISD, Entry->Cost, LT.first,
-                               ICA.getFlags());
+        if (auto KindCost = Entry->Cost[CostKind])
+          return adjustTableCost(Entry->ISD, KindCost.value(), LT.first,
+                                 ICA.getFlags());
     }
 
     if (ST->hasLZCNT()) {
       if (ST->is64Bit())
         if (const auto *Entry = CostTableLookup(LZCNT64CostTbl, ISD, MTy))
-          return adjustTableCost(Entry->ISD, Entry->Cost, LT.first,
-                                 ICA.getFlags());
+          if (auto KindCost = Entry->Cost[CostKind])
+            return adjustTableCost(Entry->ISD, KindCost.value(), LT.first,
+                                   ICA.getFlags());
 
       if (const auto *Entry = CostTableLookup(LZCNT32CostTbl, ISD, MTy))
-        return adjustTableCost(Entry->ISD, Entry->Cost, LT.first,
-                               ICA.getFlags());
+        if (auto KindCost = Entry->Cost[CostKind])
+          return adjustTableCost(Entry->ISD, KindCost.value(), LT.first,
+                                 ICA.getFlags());
     }
 
     if (ST->hasPOPCNT()) {
       if (ST->is64Bit())
         if (const auto *Entry = CostTableLookup(POPCNT64CostTbl, ISD, MTy))
-          return adjustTableCost(Entry->ISD, Entry->Cost, LT.first,
-                                 ICA.getFlags());
+          if (auto KindCost = Entry->Cost[CostKind])
+            return adjustTableCost(Entry->ISD, KindCost.value(), LT.first,
+                                   ICA.getFlags());
 
       if (const auto *Entry = CostTableLookup(POPCNT32CostTbl, ISD, MTy))
-        return adjustTableCost(Entry->ISD, Entry->Cost, LT.first,
-                               ICA.getFlags());
+        if (auto KindCost = Entry->Cost[CostKind])
+          return adjustTableCost(Entry->ISD, KindCost.value(), LT.first,
+                                 ICA.getFlags());
     }
 
     if (ISD == ISD::BSWAP && ST->hasMOVBE() && ST->hasFastMOVBE()) {
@@ -3789,11 +3810,14 @@ X86TTIImpl::getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
 
     if (ST->is64Bit())
       if (const auto *Entry = CostTableLookup(X64CostTbl, ISD, MTy))
-        return adjustTableCost(Entry->ISD, Entry->Cost, LT.first,
-                               ICA.getFlags());
+        if (auto KindCost = Entry->Cost[CostKind])
+          return adjustTableCost(Entry->ISD, KindCost.value(), LT.first,
+                                 ICA.getFlags());
 
     if (const auto *Entry = CostTableLookup(X86CostTbl, ISD, MTy))
-      return adjustTableCost(Entry->ISD, Entry->Cost, LT.first, ICA.getFlags());
+      if (auto KindCost = Entry->Cost[CostKind])
+        return adjustTableCost(Entry->ISD, KindCost.value(), LT.first,
+                               ICA.getFlags());
   }
 
   return BaseT::getIntrinsicInstrCost(ICA, CostKind);

diff  --git a/llvm/test/Analysis/CostModel/X86/arith-fp-latency.ll b/llvm/test/Analysis/CostModel/X86/arith-fp-latency.ll
index d6725f464368..b25e69b01060 100644
--- a/llvm/test/Analysis/CostModel/X86/arith-fp-latency.ll
+++ b/llvm/test/Analysis/CostModel/X86/arith-fp-latency.ll
@@ -639,10 +639,10 @@ define i32 @frem(i32 %arg) {
 
 define i32 @fsqrt(i32 %arg) {
 ; SSE1-LABEL: 'fsqrt'
-; SSE1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
-; SSE1-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
-; SSE1-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
-; SSE1-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
+; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
+; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
+; SSE1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
+; SSE1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
@@ -650,80 +650,69 @@ define i32 @fsqrt(i32 %arg) {
 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; SSE2-LABEL: 'fsqrt'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; SSE42-LABEL: 'fsqrt'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
-; AVX1-LABEL: 'fsqrt'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 86 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; AVX2-LABEL: 'fsqrt'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; AVX-LABEL: 'fsqrt'
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX512-LABEL: 'fsqrt'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; SLM-LABEL: 'fsqrt'
-; SLM-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 140 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 280 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; GLM-LABEL: 'fsqrt'
-; GLM-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 37 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 74 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 148 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 134 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 268 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %F32 = call float @llvm.sqrt.f32(float undef)

diff  --git a/llvm/test/Analysis/CostModel/X86/arith-fp-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/arith-fp-sizelatency.ll
index 7783cd900a9b..1474865890f0 100644
--- a/llvm/test/Analysis/CostModel/X86/arith-fp-sizelatency.ll
+++ b/llvm/test/Analysis/CostModel/X86/arith-fp-sizelatency.ll
@@ -2,14 +2,14 @@
 ; RUN: opt < %s -enable-no-nans-fp-math  -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mattr=-sse2 | FileCheck %s --check-prefixes=CHECK,SSE1
 ; RUN: opt < %s -enable-no-nans-fp-math  -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE2
 ; RUN: opt < %s -enable-no-nans-fp-math  -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE42
-; RUN: opt < %s -enable-no-nans-fp-math  -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX,AVX1
-; RUN: opt < %s -enable-no-nans-fp-math  -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2
+; RUN: opt < %s -enable-no-nans-fp-math  -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX
+; RUN: opt < %s -enable-no-nans-fp-math  -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX
 ; RUN: opt < %s -enable-no-nans-fp-math  -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512
 ; RUN: opt < %s -enable-no-nans-fp-math  -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512
 ;
 ; RUN: opt < %s -enable-no-nans-fp-math  -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SLM
 ; RUN: opt < %s -enable-no-nans-fp-math  -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,GLM
-; RUN: opt < %s -enable-no-nans-fp-math  -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mcpu=btver2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX1
+; RUN: opt < %s -enable-no-nans-fp-math  -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mcpu=btver2 | FileCheck %s --check-prefixes=CHECK,AVX
 
 define i32 @fadd(i32 %arg) {
 ; SSE1-LABEL: 'fadd'
@@ -584,10 +584,10 @@ define i32 @frem(i32 %arg) {
 
 define i32 @fsqrt(i32 %arg) {
 ; SSE1-LABEL: 'fsqrt'
-; SSE1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
-; SSE1-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
-; SSE1-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
-; SSE1-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
+; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
+; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
+; SSE1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
+; SSE1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
@@ -595,80 +595,69 @@ define i32 @fsqrt(i32 %arg) {
 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; SSE2-LABEL: 'fsqrt'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; SSE42-LABEL: 'fsqrt'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
-; AVX1-LABEL: 'fsqrt'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 86 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; AVX2-LABEL: 'fsqrt'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; AVX-LABEL: 'fsqrt'
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX512-LABEL: 'fsqrt'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; SLM-LABEL: 'fsqrt'
-; SLM-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 140 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 280 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; GLM-LABEL: 'fsqrt'
-; GLM-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 37 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 74 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 148 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 67 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 134 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 268 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %F32 = call float @llvm.sqrt.f32(float undef)

diff  --git a/llvm/test/Analysis/CostModel/X86/costmodel.ll b/llvm/test/Analysis/CostModel/X86/costmodel.ll
index 336a2f6be7bb..4e24b807a195 100644
--- a/llvm/test/Analysis/CostModel/X86/costmodel.ll
+++ b/llvm/test/Analysis/CostModel/X86/costmodel.ll
@@ -18,7 +18,7 @@ define i64 @foo(i64 %arg) {
 ; LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %I2P = inttoptr i64 undef to i8*
 ; LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %P2I = ptrtoint i8* undef to i64
 ; LATENCY-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %TC = trunc i64 undef to i32
-; LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef)
+; LATENCY-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef)
 ; LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void undef()
 ; LATENCY-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 undef
 ;
@@ -31,7 +31,7 @@ define i64 @foo(i64 %arg) {
 ; CODESIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %I2P = inttoptr i64 undef to i8*
 ; CODESIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %P2I = ptrtoint i8* undef to i64
 ; CODESIZE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %TC = trunc i64 undef to i32
-; CODESIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef)
+; CODESIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef)
 ; CODESIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void undef()
 ; CODESIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i64 undef
 ;

diff  --git a/llvm/test/Analysis/CostModel/X86/fmaxnum-size-latency.ll b/llvm/test/Analysis/CostModel/X86/fmaxnum-size-latency.ll
index 7374e2e97f67..162776b33079 100644
--- a/llvm/test/Analysis/CostModel/X86/fmaxnum-size-latency.ll
+++ b/llvm/test/Analysis/CostModel/X86/fmaxnum-size-latency.ll
@@ -4,19 +4,19 @@
 
 define i32 @f32(i32 %arg) {
 ; SSE2-LABEL: 'f32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %F32 = call float @llvm.maxnum.f32(float undef, float undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <2 x float> @llvm.maxnum.v2f32(<2 x float> undef, <2 x float> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.maxnum.v4f32(<4 x float> undef, <4 x float> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.maxnum.v8f32(<8 x float> undef, <8 x float> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.maxnum.v16f32(<16 x float> undef, <16 x float> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.maxnum.f32(float undef, float undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V2F32 = call <2 x float> @llvm.maxnum.v2f32(<2 x float> undef, <2 x float> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.maxnum.v4f32(<4 x float> undef, <4 x float> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 86 for instruction: %V8F32 = call <8 x float> @llvm.maxnum.v8f32(<8 x float> undef, <8 x float> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 172 for instruction: %V16F32 = call <16 x float> @llvm.maxnum.v16f32(<16 x float> undef, <16 x float> undef)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX2-LABEL: 'f32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %F32 = call float @llvm.maxnum.f32(float undef, float undef)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.maxnum.v2f32(<2 x float> undef, <2 x float> undef)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = call <4 x float> @llvm.maxnum.v4f32(<4 x float> undef, <4 x float> undef)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8F32 = call <8 x float> @llvm.maxnum.v8f32(<8 x float> undef, <8 x float> undef)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16F32 = call <16 x float> @llvm.maxnum.v16f32(<16 x float> undef, <16 x float> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.maxnum.f32(float undef, float undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V2F32 = call <2 x float> @llvm.maxnum.v2f32(<2 x float> undef, <2 x float> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.maxnum.v4f32(<4 x float> undef, <4 x float> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V8F32 = call <8 x float> @llvm.maxnum.v8f32(<8 x float> undef, <8 x float> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 176 for instruction: %V16F32 = call <16 x float> @llvm.maxnum.v16f32(<16 x float> undef, <16 x float> undef)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %F32 = call float @llvm.maxnum.f32(float undef, float undef)
@@ -29,19 +29,19 @@ define i32 @f32(i32 %arg) {
 
 define i32 @f64(i32 %arg) {
 ; SSE2-LABEL: 'f64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f64 = call double @llvm.maxnum.f64(double undef, double undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2f64 = call <2 x double> @llvm.maxnum.v2f64(<2 x double> undef, <2 x double> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4f64 = call <4 x double> @llvm.maxnum.v4f64(<4 x double> undef, <4 x double> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8f64 = call <8 x double> @llvm.maxnum.v8f64(<8 x double> undef, <8 x double> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16f64 = call <16 x double> @llvm.maxnum.v16f64(<16 x double> undef, <16 x double> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %f64 = call double @llvm.maxnum.f64(double undef, double undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V2f64 = call <2 x double> @llvm.maxnum.v2f64(<2 x double> undef, <2 x double> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V4f64 = call <4 x double> @llvm.maxnum.v4f64(<4 x double> undef, <4 x double> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 84 for instruction: %V8f64 = call <8 x double> @llvm.maxnum.v8f64(<8 x double> undef, <8 x double> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 168 for instruction: %V16f64 = call <16 x double> @llvm.maxnum.v16f64(<16 x double> undef, <16 x double> undef)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX2-LABEL: 'f64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %f64 = call double @llvm.maxnum.f64(double undef, double undef)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2f64 = call <2 x double> @llvm.maxnum.v2f64(<2 x double> undef, <2 x double> undef)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4f64 = call <4 x double> @llvm.maxnum.v4f64(<4 x double> undef, <4 x double> undef)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8f64 = call <8 x double> @llvm.maxnum.v8f64(<8 x double> undef, <8 x double> undef)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16f64 = call <16 x double> @llvm.maxnum.v16f64(<16 x double> undef, <16 x double> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %f64 = call double @llvm.maxnum.f64(double undef, double undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V2f64 = call <2 x double> @llvm.maxnum.v2f64(<2 x double> undef, <2 x double> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V4f64 = call <4 x double> @llvm.maxnum.v4f64(<4 x double> undef, <4 x double> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 86 for instruction: %V8f64 = call <8 x double> @llvm.maxnum.v8f64(<8 x double> undef, <8 x double> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 172 for instruction: %V16f64 = call <16 x double> @llvm.maxnum.v16f64(<16 x double> undef, <16 x double> undef)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %f64 = call double @llvm.maxnum.f64(double undef, double undef)

diff  --git a/llvm/test/Analysis/CostModel/X86/fminnum-size-latency.ll b/llvm/test/Analysis/CostModel/X86/fminnum-size-latency.ll
index ee9fcf9983d3..53a5a20612de 100644
--- a/llvm/test/Analysis/CostModel/X86/fminnum-size-latency.ll
+++ b/llvm/test/Analysis/CostModel/X86/fminnum-size-latency.ll
@@ -4,19 +4,19 @@
 
 define i32 @f32(i32 %arg) {
 ; SSE2-LABEL: 'f32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %F32 = call float @llvm.minnum.f32(float undef, float undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <2 x float> @llvm.minnum.v2f32(<2 x float> undef, <2 x float> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.minnum.v4f32(<4 x float> undef, <4 x float> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.minnum.v8f32(<8 x float> undef, <8 x float> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.minnum.v16f32(<16 x float> undef, <16 x float> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.minnum.f32(float undef, float undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V2F32 = call <2 x float> @llvm.minnum.v2f32(<2 x float> undef, <2 x float> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.minnum.v4f32(<4 x float> undef, <4 x float> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 86 for instruction: %V8F32 = call <8 x float> @llvm.minnum.v8f32(<8 x float> undef, <8 x float> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 172 for instruction: %V16F32 = call <16 x float> @llvm.minnum.v16f32(<16 x float> undef, <16 x float> undef)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX2-LABEL: 'f32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %F32 = call float @llvm.minnum.f32(float undef, float undef)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.minnum.v2f32(<2 x float> undef, <2 x float> undef)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = call <4 x float> @llvm.minnum.v4f32(<4 x float> undef, <4 x float> undef)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8F32 = call <8 x float> @llvm.minnum.v8f32(<8 x float> undef, <8 x float> undef)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16F32 = call <16 x float> @llvm.minnum.v16f32(<16 x float> undef, <16 x float> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.minnum.f32(float undef, float undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V2F32 = call <2 x float> @llvm.minnum.v2f32(<2 x float> undef, <2 x float> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.minnum.v4f32(<4 x float> undef, <4 x float> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V8F32 = call <8 x float> @llvm.minnum.v8f32(<8 x float> undef, <8 x float> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 176 for instruction: %V16F32 = call <16 x float> @llvm.minnum.v16f32(<16 x float> undef, <16 x float> undef)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %F32 = call float @llvm.minnum.f32(float undef, float undef)
@@ -29,19 +29,19 @@ define i32 @f32(i32 %arg) {
 
 define i32 @f64(i32 %arg) {
 ; SSE2-LABEL: 'f64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f64 = call double @llvm.minnum.f64(double undef, double undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2f64 = call <2 x double> @llvm.minnum.v2f64(<2 x double> undef, <2 x double> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4f64 = call <4 x double> @llvm.minnum.v4f64(<4 x double> undef, <4 x double> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8f64 = call <8 x double> @llvm.minnum.v8f64(<8 x double> undef, <8 x double> undef)
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16f64 = call <16 x double> @llvm.minnum.v16f64(<16 x double> undef, <16 x double> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %f64 = call double @llvm.minnum.f64(double undef, double undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V2f64 = call <2 x double> @llvm.minnum.v2f64(<2 x double> undef, <2 x double> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V4f64 = call <4 x double> @llvm.minnum.v4f64(<4 x double> undef, <4 x double> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 84 for instruction: %V8f64 = call <8 x double> @llvm.minnum.v8f64(<8 x double> undef, <8 x double> undef)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 168 for instruction: %V16f64 = call <16 x double> @llvm.minnum.v16f64(<16 x double> undef, <16 x double> undef)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX2-LABEL: 'f64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %f64 = call double @llvm.minnum.f64(double undef, double undef)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2f64 = call <2 x double> @llvm.minnum.v2f64(<2 x double> undef, <2 x double> undef)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4f64 = call <4 x double> @llvm.minnum.v4f64(<4 x double> undef, <4 x double> undef)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8f64 = call <8 x double> @llvm.minnum.v8f64(<8 x double> undef, <8 x double> undef)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16f64 = call <16 x double> @llvm.minnum.v16f64(<16 x double> undef, <16 x double> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %f64 = call double @llvm.minnum.f64(double undef, double undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V2f64 = call <2 x double> @llvm.minnum.v2f64(<2 x double> undef, <2 x double> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V4f64 = call <4 x double> @llvm.minnum.v4f64(<4 x double> undef, <4 x double> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 86 for instruction: %V8f64 = call <8 x double> @llvm.minnum.v8f64(<8 x double> undef, <8 x double> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 172 for instruction: %V16f64 = call <16 x double> @llvm.minnum.v16f64(<16 x double> undef, <16 x double> undef)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %f64 = call double @llvm.minnum.f64(double undef, double undef)

diff  --git a/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll b/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll
index ac61f881daa0..179dc1d5bd8d 100644
--- a/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll
+++ b/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll
@@ -55,17 +55,17 @@ define void @umul(i32 %a, i32 %b, <16 x i32> %va, <16 x i32> %vb) {
 ; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; LATE-LABEL: 'umul'
-; LATE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
+; LATE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %s = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
 ; LATE-NEXT:  Cost Model: Found an estimated cost of 86 for instruction: %v = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> %va, <16 x i32> %vb)
 ; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SIZE-LABEL: 'umul'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %s = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %v = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> %va, <16 x i32> %vb)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SIZE_LATE-LABEL: 'umul'
-; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
+; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %s = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %v = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> %va, <16 x i32> %vb)
 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
@@ -237,18 +237,18 @@ define void @cttz(i32 %a, <16 x i32> %va) {
 ; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; LATE-LABEL: 'cttz'
-; LATE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false)
-; LATE-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false)
+; LATE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false)
+; LATE-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false)
 ; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SIZE-LABEL: 'cttz'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SIZE_LATE-LABEL: 'cttz'
-; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false)
-; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false)
+; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false)
+; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false)
 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %s = call i32 @llvm.cttz.i32(i32 %a, i1 false)
@@ -263,18 +263,18 @@ define void @ctlz(i32 %a, <16 x i32> %va) {
 ; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; LATE-LABEL: 'ctlz'
-; LATE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true)
-; LATE-NEXT:  Cost Model: Found an estimated cost of 104 for instruction: %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true)
+; LATE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true)
+; LATE-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true)
 ; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SIZE-LABEL: 'ctlz'
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 104 for instruction: %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SIZE_LATE-LABEL: 'ctlz'
-; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true)
-; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 104 for instruction: %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true)
+; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true)
+; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true)
 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true)

diff  --git a/llvm/test/Transforms/PhaseOrdering/X86/unsigned-multiply-overflow-check.ll b/llvm/test/Transforms/PhaseOrdering/X86/unsigned-multiply-overflow-check.ll
index 16dd203887ec..7c4969d7f01d 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/unsigned-multiply-overflow-check.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/unsigned-multiply-overflow-check.ll
@@ -35,27 +35,36 @@ define i1 @will_not_overflow(i64 %arg, i64 %arg1) {
 ; INSTCOMBINEONLY-NEXT:    [[T0:%.*]] = icmp eq i64 [[ARG:%.*]], 0
 ; INSTCOMBINEONLY-NEXT:    br i1 [[T0]], label [[BB5:%.*]], label [[BB2:%.*]]
 ; INSTCOMBINEONLY:       bb2:
-; INSTCOMBINEONLY-NEXT:    [[UMUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG]], i64 [[ARG1:%.*]])
-; INSTCOMBINEONLY-NEXT:    [[UMUL_OV:%.*]] = extractvalue { i64, i1 } [[UMUL]], 1
+; INSTCOMBINEONLY-NEXT:    [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG]], i64 [[ARG1:%.*]])
+; INSTCOMBINEONLY-NEXT:    [[MUL_OV:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
 ; INSTCOMBINEONLY-NEXT:    br label [[BB5]]
 ; INSTCOMBINEONLY:       bb5:
-; INSTCOMBINEONLY-NEXT:    [[T6:%.*]] = phi i1 [ false, [[BB:%.*]] ], [ [[UMUL_OV]], [[BB2]] ]
+; INSTCOMBINEONLY-NEXT:    [[T6:%.*]] = phi i1 [ false, [[BB:%.*]] ], [ [[MUL_OV]], [[BB2]] ]
 ; INSTCOMBINEONLY-NEXT:    ret i1 [[T6]]
 ;
 ; INSTCOMBINESIMPLIFYCFGONLY-LABEL: @will_not_overflow(
 ; INSTCOMBINESIMPLIFYCFGONLY-NEXT:  bb:
 ; INSTCOMBINESIMPLIFYCFGONLY-NEXT:    [[T0:%.*]] = icmp eq i64 [[ARG:%.*]], 0
-; INSTCOMBINESIMPLIFYCFGONLY-NEXT:    [[UMUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG]], i64 [[ARG1:%.*]])
-; INSTCOMBINESIMPLIFYCFGONLY-NEXT:    [[UMUL_OV:%.*]] = extractvalue { i64, i1 } [[UMUL]], 1
-; INSTCOMBINESIMPLIFYCFGONLY-NEXT:    [[T6:%.*]] = select i1 [[T0]], i1 false, i1 [[UMUL_OV]]
+; INSTCOMBINESIMPLIFYCFGONLY-NEXT:    br i1 [[T0]], label [[BB5:%.*]], label [[BB2:%.*]]
+; INSTCOMBINESIMPLIFYCFGONLY:       bb2:
+; INSTCOMBINESIMPLIFYCFGONLY-NEXT:    [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG]], i64 [[ARG1:%.*]])
+; INSTCOMBINESIMPLIFYCFGONLY-NEXT:    [[MUL_OV:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
+; INSTCOMBINESIMPLIFYCFGONLY-NEXT:    br label [[BB5]]
+; INSTCOMBINESIMPLIFYCFGONLY:       bb5:
+; INSTCOMBINESIMPLIFYCFGONLY-NEXT:    [[T6:%.*]] = phi i1 [ false, [[BB:%.*]] ], [ [[MUL_OV]], [[BB2]] ]
 ; INSTCOMBINESIMPLIFYCFGONLY-NEXT:    ret i1 [[T6]]
 ;
 ; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-LABEL: @will_not_overflow(
 ; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT:  bb:
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT:    [[ARG1_FR:%.*]] = freeze i64 [[ARG1:%.*]]
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT:    [[UMUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG:%.*]], i64 [[ARG1_FR]])
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT:    [[UMUL_OV:%.*]] = extractvalue { i64, i1 } [[UMUL]], 1
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT:    ret i1 [[UMUL_OV]]
+; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT:    [[T0:%.*]] = icmp eq i64 [[ARG:%.*]], 0
+; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT:    br i1 [[T0]], label [[BB5:%.*]], label [[BB2:%.*]]
+; INSTCOMBINESIMPLIFYCFGINSTCOMBINE:       bb2:
+; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT:    [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG]], i64 [[ARG1:%.*]])
+; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT:    [[MUL_OV:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
+; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT:    br label [[BB5]]
+; INSTCOMBINESIMPLIFYCFGINSTCOMBINE:       bb5:
+; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT:    [[T6:%.*]] = phi i1 [ false, [[BB:%.*]] ], [ [[MUL_OV]], [[BB2]] ]
+; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT:    ret i1 [[T6]]
 ;
 bb:
   %t0 = icmp eq i64 %arg, 0
@@ -92,9 +101,9 @@ define i1 @will_overflow(i64 %arg, i64 %arg1) {
 ; INSTCOMBINEONLY-NEXT:    [[T0:%.*]] = icmp eq i64 [[ARG:%.*]], 0
 ; INSTCOMBINEONLY-NEXT:    br i1 [[T0]], label [[BB5:%.*]], label [[BB2:%.*]]
 ; INSTCOMBINEONLY:       bb2:
-; INSTCOMBINEONLY-NEXT:    [[UMUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG]], i64 [[ARG1:%.*]])
-; INSTCOMBINEONLY-NEXT:    [[UMUL_OV:%.*]] = extractvalue { i64, i1 } [[UMUL]], 1
-; INSTCOMBINEONLY-NEXT:    [[PHI_BO:%.*]] = xor i1 [[UMUL_OV]], true
+; INSTCOMBINEONLY-NEXT:    [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG]], i64 [[ARG1:%.*]])
+; INSTCOMBINEONLY-NEXT:    [[MUL_OV:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
+; INSTCOMBINEONLY-NEXT:    [[PHI_BO:%.*]] = xor i1 [[MUL_OV]], true
 ; INSTCOMBINEONLY-NEXT:    br label [[BB5]]
 ; INSTCOMBINEONLY:       bb5:
 ; INSTCOMBINEONLY-NEXT:    [[T6:%.*]] = phi i1 [ true, [[BB:%.*]] ], [ [[PHI_BO]], [[BB2]] ]
@@ -103,19 +112,28 @@ define i1 @will_overflow(i64 %arg, i64 %arg1) {
 ; INSTCOMBINESIMPLIFYCFGONLY-LABEL: @will_overflow(
 ; INSTCOMBINESIMPLIFYCFGONLY-NEXT:  bb:
 ; INSTCOMBINESIMPLIFYCFGONLY-NEXT:    [[T0:%.*]] = icmp eq i64 [[ARG:%.*]], 0
-; INSTCOMBINESIMPLIFYCFGONLY-NEXT:    [[UMUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG]], i64 [[ARG1:%.*]])
-; INSTCOMBINESIMPLIFYCFGONLY-NEXT:    [[UMUL_OV:%.*]] = extractvalue { i64, i1 } [[UMUL]], 1
-; INSTCOMBINESIMPLIFYCFGONLY-NEXT:    [[PHI_BO:%.*]] = xor i1 [[UMUL_OV]], true
-; INSTCOMBINESIMPLIFYCFGONLY-NEXT:    [[T6:%.*]] = select i1 [[T0]], i1 true, i1 [[PHI_BO]]
+; INSTCOMBINESIMPLIFYCFGONLY-NEXT:    br i1 [[T0]], label [[BB5:%.*]], label [[BB2:%.*]]
+; INSTCOMBINESIMPLIFYCFGONLY:       bb2:
+; INSTCOMBINESIMPLIFYCFGONLY-NEXT:    [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG]], i64 [[ARG1:%.*]])
+; INSTCOMBINESIMPLIFYCFGONLY-NEXT:    [[MUL_OV:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
+; INSTCOMBINESIMPLIFYCFGONLY-NEXT:    [[PHI_BO:%.*]] = xor i1 [[MUL_OV]], true
+; INSTCOMBINESIMPLIFYCFGONLY-NEXT:    br label [[BB5]]
+; INSTCOMBINESIMPLIFYCFGONLY:       bb5:
+; INSTCOMBINESIMPLIFYCFGONLY-NEXT:    [[T6:%.*]] = phi i1 [ true, [[BB:%.*]] ], [ [[PHI_BO]], [[BB2]] ]
 ; INSTCOMBINESIMPLIFYCFGONLY-NEXT:    ret i1 [[T6]]
 ;
 ; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-LABEL: @will_overflow(
 ; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT:  bb:
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT:    [[ARG1_FR:%.*]] = freeze i64 [[ARG1:%.*]]
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT:    [[UMUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG:%.*]], i64 [[ARG1_FR]])
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT:    [[UMUL_OV:%.*]] = extractvalue { i64, i1 } [[UMUL]], 1
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT:    [[PHI_BO:%.*]] = xor i1 [[UMUL_OV]], true
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT:    ret i1 [[PHI_BO]]
+; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT:    [[T0:%.*]] = icmp eq i64 [[ARG:%.*]], 0
+; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT:    br i1 [[T0]], label [[BB5:%.*]], label [[BB2:%.*]]
+; INSTCOMBINESIMPLIFYCFGINSTCOMBINE:       bb2:
+; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT:    [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG]], i64 [[ARG1:%.*]])
+; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT:    [[MUL_OV:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
+; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT:    [[PHI_BO:%.*]] = xor i1 [[MUL_OV]], true
+; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT:    br label [[BB5]]
+; INSTCOMBINESIMPLIFYCFGINSTCOMBINE:       bb5:
+; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT:    [[T6:%.*]] = phi i1 [ true, [[BB:%.*]] ], [ [[PHI_BO]], [[BB2]] ]
+; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT:    ret i1 [[T6]]
 ;
 bb:
   %t0 = icmp eq i64 %arg, 0