[llvm] 626a84d - [CostModel][X86] getTypeBasedIntrinsicInstrCost - convert to CostKindTblEntry
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sun Sep 4 09:59:22 PDT 2022
Author: Simon Pilgrim
Date: 2022-09-04T17:59:08+01:00
New Revision: 626a84db477cf5e2d076b29c9491620bcc810308
URL: https://github.com/llvm/llvm-project/commit/626a84db477cf5e2d076b29c9491620bcc810308
DIFF: https://github.com/llvm/llvm-project/commit/626a84db477cf5e2d076b29c9491620bcc810308.diff
LOG: [CostModel][X86] getTypeBasedIntrinsicInstrCost - convert to CostKindTblEntry
Begin the refactoring to use CostKindTblEntry and return real latency/codesize/sizelatency costs instead of reusing the throughput numbers
Added:
Modified:
llvm/lib/Target/X86/X86TargetTransformInfo.cpp
llvm/test/Analysis/CostModel/X86/arith-fp-latency.ll
llvm/test/Analysis/CostModel/X86/arith-fp-sizelatency.ll
llvm/test/Analysis/CostModel/X86/costmodel.ll
llvm/test/Analysis/CostModel/X86/fmaxnum-size-latency.ll
llvm/test/Analysis/CostModel/X86/fminnum-size-latency.ll
llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll
llvm/test/Transforms/PhaseOrdering/X86/unsigned-multiply-overflow-check.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 641e4611e113..54bbd337c1cc 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -3099,457 +3099,457 @@ X86TTIImpl::getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
// TODO: Overflow intrinsics (*ADDO, *SUBO, *MULO) with vector types are not
// specialized in these tables yet.
- static const CostTblEntry AVX512BITALGCostTbl[] = {
- { ISD::CTPOP, MVT::v32i16, 1 },
- { ISD::CTPOP, MVT::v64i8, 1 },
- { ISD::CTPOP, MVT::v16i16, 1 },
- { ISD::CTPOP, MVT::v32i8, 1 },
- { ISD::CTPOP, MVT::v8i16, 1 },
- { ISD::CTPOP, MVT::v16i8, 1 },
+ static const CostKindTblEntry AVX512BITALGCostTbl[] = {
+ { ISD::CTPOP, MVT::v32i16, { 1 } },
+ { ISD::CTPOP, MVT::v64i8, { 1 } },
+ { ISD::CTPOP, MVT::v16i16, { 1 } },
+ { ISD::CTPOP, MVT::v32i8, { 1 } },
+ { ISD::CTPOP, MVT::v8i16, { 1 } },
+ { ISD::CTPOP, MVT::v16i8, { 1 } },
};
- static const CostTblEntry AVX512VPOPCNTDQCostTbl[] = {
- { ISD::CTPOP, MVT::v8i64, 1 },
- { ISD::CTPOP, MVT::v16i32, 1 },
- { ISD::CTPOP, MVT::v4i64, 1 },
- { ISD::CTPOP, MVT::v8i32, 1 },
- { ISD::CTPOP, MVT::v2i64, 1 },
- { ISD::CTPOP, MVT::v4i32, 1 },
+ static const CostKindTblEntry AVX512VPOPCNTDQCostTbl[] = {
+ { ISD::CTPOP, MVT::v8i64, { 1 } },
+ { ISD::CTPOP, MVT::v16i32, { 1 } },
+ { ISD::CTPOP, MVT::v4i64, { 1 } },
+ { ISD::CTPOP, MVT::v8i32, { 1 } },
+ { ISD::CTPOP, MVT::v2i64, { 1 } },
+ { ISD::CTPOP, MVT::v4i32, { 1 } },
};
- static const CostTblEntry AVX512CDCostTbl[] = {
- { ISD::CTLZ, MVT::v8i64, 1 },
- { ISD::CTLZ, MVT::v16i32, 1 },
- { ISD::CTLZ, MVT::v32i16, 8 },
- { ISD::CTLZ, MVT::v64i8, 20 },
- { ISD::CTLZ, MVT::v4i64, 1 },
- { ISD::CTLZ, MVT::v8i32, 1 },
- { ISD::CTLZ, MVT::v16i16, 4 },
- { ISD::CTLZ, MVT::v32i8, 10 },
- { ISD::CTLZ, MVT::v2i64, 1 },
- { ISD::CTLZ, MVT::v4i32, 1 },
- { ISD::CTLZ, MVT::v8i16, 4 },
- { ISD::CTLZ, MVT::v16i8, 4 },
+ static const CostKindTblEntry AVX512CDCostTbl[] = {
+ { ISD::CTLZ, MVT::v8i64, { 1 } },
+ { ISD::CTLZ, MVT::v16i32, { 1 } },
+ { ISD::CTLZ, MVT::v32i16, { 8 } },
+ { ISD::CTLZ, MVT::v64i8, { 20 } },
+ { ISD::CTLZ, MVT::v4i64, { 1 } },
+ { ISD::CTLZ, MVT::v8i32, { 1 } },
+ { ISD::CTLZ, MVT::v16i16, { 4 } },
+ { ISD::CTLZ, MVT::v32i8, { 10 } },
+ { ISD::CTLZ, MVT::v2i64, { 1 } },
+ { ISD::CTLZ, MVT::v4i32, { 1 } },
+ { ISD::CTLZ, MVT::v8i16, { 4 } },
+ { ISD::CTLZ, MVT::v16i8, { 4 } },
};
- static const CostTblEntry AVX512BWCostTbl[] = {
- { ISD::ABS, MVT::v32i16, 1 },
- { ISD::ABS, MVT::v64i8, 1 },
- { ISD::BITREVERSE, MVT::v8i64, 3 },
- { ISD::BITREVERSE, MVT::v16i32, 3 },
- { ISD::BITREVERSE, MVT::v32i16, 3 },
- { ISD::BITREVERSE, MVT::v64i8, 2 },
- { ISD::BSWAP, MVT::v8i64, 1 },
- { ISD::BSWAP, MVT::v16i32, 1 },
- { ISD::BSWAP, MVT::v32i16, 1 },
- { ISD::CTLZ, MVT::v8i64, 23 },
- { ISD::CTLZ, MVT::v16i32, 22 },
- { ISD::CTLZ, MVT::v32i16, 18 },
- { ISD::CTLZ, MVT::v64i8, 17 },
- { ISD::CTPOP, MVT::v8i64, 7 },
- { ISD::CTPOP, MVT::v16i32, 11 },
- { ISD::CTPOP, MVT::v32i16, 9 },
- { ISD::CTPOP, MVT::v64i8, 6 },
- { ISD::CTTZ, MVT::v8i64, 10 },
- { ISD::CTTZ, MVT::v16i32, 14 },
- { ISD::CTTZ, MVT::v32i16, 12 },
- { ISD::CTTZ, MVT::v64i8, 9 },
- { ISD::SADDSAT, MVT::v32i16, 1 },
- { ISD::SADDSAT, MVT::v64i8, 1 },
- { ISD::SMAX, MVT::v32i16, 1 },
- { ISD::SMAX, MVT::v64i8, 1 },
- { ISD::SMIN, MVT::v32i16, 1 },
- { ISD::SMIN, MVT::v64i8, 1 },
- { ISD::SSUBSAT, MVT::v32i16, 1 },
- { ISD::SSUBSAT, MVT::v64i8, 1 },
- { ISD::UADDSAT, MVT::v32i16, 1 },
- { ISD::UADDSAT, MVT::v64i8, 1 },
- { ISD::UMAX, MVT::v32i16, 1 },
- { ISD::UMAX, MVT::v64i8, 1 },
- { ISD::UMIN, MVT::v32i16, 1 },
- { ISD::UMIN, MVT::v64i8, 1 },
- { ISD::USUBSAT, MVT::v32i16, 1 },
- { ISD::USUBSAT, MVT::v64i8, 1 },
+ static const CostKindTblEntry AVX512BWCostTbl[] = {
+ { ISD::ABS, MVT::v32i16, { 1 } },
+ { ISD::ABS, MVT::v64i8, { 1 } },
+ { ISD::BITREVERSE, MVT::v8i64, { 3 } },
+ { ISD::BITREVERSE, MVT::v16i32, { 3 } },
+ { ISD::BITREVERSE, MVT::v32i16, { 3 } },
+ { ISD::BITREVERSE, MVT::v64i8, { 2 } },
+ { ISD::BSWAP, MVT::v8i64, { 1 } },
+ { ISD::BSWAP, MVT::v16i32, { 1 } },
+ { ISD::BSWAP, MVT::v32i16, { 1 } },
+ { ISD::CTLZ, MVT::v8i64, { 23 } },
+ { ISD::CTLZ, MVT::v16i32, { 22 } },
+ { ISD::CTLZ, MVT::v32i16, { 18 } },
+ { ISD::CTLZ, MVT::v64i8, { 17 } },
+ { ISD::CTPOP, MVT::v8i64, { 7 } },
+ { ISD::CTPOP, MVT::v16i32, { 11 } },
+ { ISD::CTPOP, MVT::v32i16, { 9 } },
+ { ISD::CTPOP, MVT::v64i8, { 6 } },
+ { ISD::CTTZ, MVT::v8i64, { 10 } },
+ { ISD::CTTZ, MVT::v16i32, { 14 } },
+ { ISD::CTTZ, MVT::v32i16, { 12 } },
+ { ISD::CTTZ, MVT::v64i8, { 9 } },
+ { ISD::SADDSAT, MVT::v32i16, { 1 } },
+ { ISD::SADDSAT, MVT::v64i8, { 1 } },
+ { ISD::SMAX, MVT::v32i16, { 1 } },
+ { ISD::SMAX, MVT::v64i8, { 1 } },
+ { ISD::SMIN, MVT::v32i16, { 1 } },
+ { ISD::SMIN, MVT::v64i8, { 1 } },
+ { ISD::SSUBSAT, MVT::v32i16, { 1 } },
+ { ISD::SSUBSAT, MVT::v64i8, { 1 } },
+ { ISD::UADDSAT, MVT::v32i16, { 1 } },
+ { ISD::UADDSAT, MVT::v64i8, { 1 } },
+ { ISD::UMAX, MVT::v32i16, { 1 } },
+ { ISD::UMAX, MVT::v64i8, { 1 } },
+ { ISD::UMIN, MVT::v32i16, { 1 } },
+ { ISD::UMIN, MVT::v64i8, { 1 } },
+ { ISD::USUBSAT, MVT::v32i16, { 1 } },
+ { ISD::USUBSAT, MVT::v64i8, { 1 } },
};
- static const CostTblEntry AVX512CostTbl[] = {
- { ISD::ABS, MVT::v8i64, 1 },
- { ISD::ABS, MVT::v16i32, 1 },
- { ISD::ABS, MVT::v32i16, 2 },
- { ISD::ABS, MVT::v64i8, 2 },
- { ISD::ABS, MVT::v4i64, 1 },
- { ISD::ABS, MVT::v2i64, 1 },
- { ISD::BITREVERSE, MVT::v8i64, 36 },
- { ISD::BITREVERSE, MVT::v16i32, 24 },
- { ISD::BITREVERSE, MVT::v32i16, 10 },
- { ISD::BITREVERSE, MVT::v64i8, 10 },
- { ISD::BSWAP, MVT::v8i64, 4 },
- { ISD::BSWAP, MVT::v16i32, 4 },
- { ISD::BSWAP, MVT::v32i16, 4 },
- { ISD::CTLZ, MVT::v8i64, 29 },
- { ISD::CTLZ, MVT::v16i32, 35 },
- { ISD::CTLZ, MVT::v32i16, 28 },
- { ISD::CTLZ, MVT::v64i8, 18 },
- { ISD::CTPOP, MVT::v8i64, 16 },
- { ISD::CTPOP, MVT::v16i32, 24 },
- { ISD::CTPOP, MVT::v32i16, 18 },
- { ISD::CTPOP, MVT::v64i8, 12 },
- { ISD::CTTZ, MVT::v8i64, 20 },
- { ISD::CTTZ, MVT::v16i32, 28 },
- { ISD::CTTZ, MVT::v32i16, 24 },
- { ISD::CTTZ, MVT::v64i8, 18 },
- { ISD::SMAX, MVT::v8i64, 1 },
- { ISD::SMAX, MVT::v16i32, 1 },
- { ISD::SMAX, MVT::v32i16, 2 },
- { ISD::SMAX, MVT::v64i8, 2 },
- { ISD::SMAX, MVT::v4i64, 1 },
- { ISD::SMAX, MVT::v2i64, 1 },
- { ISD::SMIN, MVT::v8i64, 1 },
- { ISD::SMIN, MVT::v16i32, 1 },
- { ISD::SMIN, MVT::v32i16, 2 },
- { ISD::SMIN, MVT::v64i8, 2 },
- { ISD::SMIN, MVT::v4i64, 1 },
- { ISD::SMIN, MVT::v2i64, 1 },
- { ISD::UMAX, MVT::v8i64, 1 },
- { ISD::UMAX, MVT::v16i32, 1 },
- { ISD::UMAX, MVT::v32i16, 2 },
- { ISD::UMAX, MVT::v64i8, 2 },
- { ISD::UMAX, MVT::v4i64, 1 },
- { ISD::UMAX, MVT::v2i64, 1 },
- { ISD::UMIN, MVT::v8i64, 1 },
- { ISD::UMIN, MVT::v16i32, 1 },
- { ISD::UMIN, MVT::v32i16, 2 },
- { ISD::UMIN, MVT::v64i8, 2 },
- { ISD::UMIN, MVT::v4i64, 1 },
- { ISD::UMIN, MVT::v2i64, 1 },
- { ISD::USUBSAT, MVT::v16i32, 2 }, // pmaxud + psubd
- { ISD::USUBSAT, MVT::v2i64, 2 }, // pmaxuq + psubq
- { ISD::USUBSAT, MVT::v4i64, 2 }, // pmaxuq + psubq
- { ISD::USUBSAT, MVT::v8i64, 2 }, // pmaxuq + psubq
- { ISD::UADDSAT, MVT::v16i32, 3 }, // not + pminud + paddd
- { ISD::UADDSAT, MVT::v2i64, 3 }, // not + pminuq + paddq
- { ISD::UADDSAT, MVT::v4i64, 3 }, // not + pminuq + paddq
- { ISD::UADDSAT, MVT::v8i64, 3 }, // not + pminuq + paddq
- { ISD::SADDSAT, MVT::v32i16, 2 },
- { ISD::SADDSAT, MVT::v64i8, 2 },
- { ISD::SSUBSAT, MVT::v32i16, 2 },
- { ISD::SSUBSAT, MVT::v64i8, 2 },
- { ISD::UADDSAT, MVT::v32i16, 2 },
- { ISD::UADDSAT, MVT::v64i8, 2 },
- { ISD::USUBSAT, MVT::v32i16, 2 },
- { ISD::USUBSAT, MVT::v64i8, 2 },
- { ISD::FMAXNUM, MVT::f32, 2 },
- { ISD::FMAXNUM, MVT::v4f32, 2 },
- { ISD::FMAXNUM, MVT::v8f32, 2 },
- { ISD::FMAXNUM, MVT::v16f32, 2 },
- { ISD::FMAXNUM, MVT::f64, 2 },
- { ISD::FMAXNUM, MVT::v2f64, 2 },
- { ISD::FMAXNUM, MVT::v4f64, 2 },
- { ISD::FMAXNUM, MVT::v8f64, 2 },
+ static const CostKindTblEntry AVX512CostTbl[] = {
+ { ISD::ABS, MVT::v8i64, { 1 } },
+ { ISD::ABS, MVT::v16i32, { 1 } },
+ { ISD::ABS, MVT::v32i16, { 2 } },
+ { ISD::ABS, MVT::v64i8, { 2 } },
+ { ISD::ABS, MVT::v4i64, { 1 } },
+ { ISD::ABS, MVT::v2i64, { 1 } },
+ { ISD::BITREVERSE, MVT::v8i64, { 36 } },
+ { ISD::BITREVERSE, MVT::v16i32, { 24 } },
+ { ISD::BITREVERSE, MVT::v32i16, { 10 } },
+ { ISD::BITREVERSE, MVT::v64i8, { 10 } },
+ { ISD::BSWAP, MVT::v8i64, { 4 } },
+ { ISD::BSWAP, MVT::v16i32, { 4 } },
+ { ISD::BSWAP, MVT::v32i16, { 4 } },
+ { ISD::CTLZ, MVT::v8i64, { 29 } },
+ { ISD::CTLZ, MVT::v16i32, { 35 } },
+ { ISD::CTLZ, MVT::v32i16, { 28 } },
+ { ISD::CTLZ, MVT::v64i8, { 18 } },
+ { ISD::CTPOP, MVT::v8i64, { 16 } },
+ { ISD::CTPOP, MVT::v16i32, { 24 } },
+ { ISD::CTPOP, MVT::v32i16, { 18 } },
+ { ISD::CTPOP, MVT::v64i8, { 12 } },
+ { ISD::CTTZ, MVT::v8i64, { 20 } },
+ { ISD::CTTZ, MVT::v16i32, { 28 } },
+ { ISD::CTTZ, MVT::v32i16, { 24 } },
+ { ISD::CTTZ, MVT::v64i8, { 18 } },
+ { ISD::SMAX, MVT::v8i64, { 1 } },
+ { ISD::SMAX, MVT::v16i32, { 1 } },
+ { ISD::SMAX, MVT::v32i16, { 2 } },
+ { ISD::SMAX, MVT::v64i8, { 2 } },
+ { ISD::SMAX, MVT::v4i64, { 1 } },
+ { ISD::SMAX, MVT::v2i64, { 1 } },
+ { ISD::SMIN, MVT::v8i64, { 1 } },
+ { ISD::SMIN, MVT::v16i32, { 1 } },
+ { ISD::SMIN, MVT::v32i16, { 2 } },
+ { ISD::SMIN, MVT::v64i8, { 2 } },
+ { ISD::SMIN, MVT::v4i64, { 1 } },
+ { ISD::SMIN, MVT::v2i64, { 1 } },
+ { ISD::UMAX, MVT::v8i64, { 1 } },
+ { ISD::UMAX, MVT::v16i32, { 1 } },
+ { ISD::UMAX, MVT::v32i16, { 2 } },
+ { ISD::UMAX, MVT::v64i8, { 2 } },
+ { ISD::UMAX, MVT::v4i64, { 1 } },
+ { ISD::UMAX, MVT::v2i64, { 1 } },
+ { ISD::UMIN, MVT::v8i64, { 1 } },
+ { ISD::UMIN, MVT::v16i32, { 1 } },
+ { ISD::UMIN, MVT::v32i16, { 2 } },
+ { ISD::UMIN, MVT::v64i8, { 2 } },
+ { ISD::UMIN, MVT::v4i64, { 1 } },
+ { ISD::UMIN, MVT::v2i64, { 1 } },
+ { ISD::USUBSAT, MVT::v16i32, { 2 } }, // pmaxud + psubd
+ { ISD::USUBSAT, MVT::v2i64, { 2 } }, // pmaxuq + psubq
+ { ISD::USUBSAT, MVT::v4i64, { 2 } }, // pmaxuq + psubq
+ { ISD::USUBSAT, MVT::v8i64, { 2 } }, // pmaxuq + psubq
+ { ISD::UADDSAT, MVT::v16i32, { 3 } }, // not + pminud + paddd
+ { ISD::UADDSAT, MVT::v2i64, { 3 } }, // not + pminuq + paddq
+ { ISD::UADDSAT, MVT::v4i64, { 3 } }, // not + pminuq + paddq
+ { ISD::UADDSAT, MVT::v8i64, { 3 } }, // not + pminuq + paddq
+ { ISD::SADDSAT, MVT::v32i16, { 2 } },
+ { ISD::SADDSAT, MVT::v64i8, { 2 } },
+ { ISD::SSUBSAT, MVT::v32i16, { 2 } },
+ { ISD::SSUBSAT, MVT::v64i8, { 2 } },
+ { ISD::UADDSAT, MVT::v32i16, { 2 } },
+ { ISD::UADDSAT, MVT::v64i8, { 2 } },
+ { ISD::USUBSAT, MVT::v32i16, { 2 } },
+ { ISD::USUBSAT, MVT::v64i8, { 2 } },
+ { ISD::FMAXNUM, MVT::f32, { 2 } },
+ { ISD::FMAXNUM, MVT::v4f32, { 2 } },
+ { ISD::FMAXNUM, MVT::v8f32, { 2 } },
+ { ISD::FMAXNUM, MVT::v16f32, { 2 } },
+ { ISD::FMAXNUM, MVT::f64, { 2 } },
+ { ISD::FMAXNUM, MVT::v2f64, { 2 } },
+ { ISD::FMAXNUM, MVT::v4f64, { 2 } },
+ { ISD::FMAXNUM, MVT::v8f64, { 2 } },
};
- static const CostTblEntry XOPCostTbl[] = {
- { ISD::BITREVERSE, MVT::v4i64, 4 },
- { ISD::BITREVERSE, MVT::v8i32, 4 },
- { ISD::BITREVERSE, MVT::v16i16, 4 },
- { ISD::BITREVERSE, MVT::v32i8, 4 },
- { ISD::BITREVERSE, MVT::v2i64, 1 },
- { ISD::BITREVERSE, MVT::v4i32, 1 },
- { ISD::BITREVERSE, MVT::v8i16, 1 },
- { ISD::BITREVERSE, MVT::v16i8, 1 },
- { ISD::BITREVERSE, MVT::i64, 3 },
- { ISD::BITREVERSE, MVT::i32, 3 },
- { ISD::BITREVERSE, MVT::i16, 3 },
- { ISD::BITREVERSE, MVT::i8, 3 }
+ static const CostKindTblEntry XOPCostTbl[] = {
+ { ISD::BITREVERSE, MVT::v4i64, { 4 } },
+ { ISD::BITREVERSE, MVT::v8i32, { 4 } },
+ { ISD::BITREVERSE, MVT::v16i16, { 4 } },
+ { ISD::BITREVERSE, MVT::v32i8, { 4 } },
+ { ISD::BITREVERSE, MVT::v2i64, { 1 } },
+ { ISD::BITREVERSE, MVT::v4i32, { 1 } },
+ { ISD::BITREVERSE, MVT::v8i16, { 1 } },
+ { ISD::BITREVERSE, MVT::v16i8, { 1 } },
+ { ISD::BITREVERSE, MVT::i64, { 3 } },
+ { ISD::BITREVERSE, MVT::i32, { 3 } },
+ { ISD::BITREVERSE, MVT::i16, { 3 } },
+ { ISD::BITREVERSE, MVT::i8, { 3 } }
};
- static const CostTblEntry AVX2CostTbl[] = {
- { ISD::ABS, MVT::v4i64, 2 }, // VBLENDVPD(X,VPSUBQ(0,X),X)
- { ISD::ABS, MVT::v8i32, 1 },
- { ISD::ABS, MVT::v16i16, 1 },
- { ISD::ABS, MVT::v32i8, 1 },
- { ISD::BITREVERSE, MVT::v2i64, 3 },
- { ISD::BITREVERSE, MVT::v4i64, 3 },
- { ISD::BITREVERSE, MVT::v4i32, 3 },
- { ISD::BITREVERSE, MVT::v8i32, 3 },
- { ISD::BITREVERSE, MVT::v8i16, 3 },
- { ISD::BITREVERSE, MVT::v16i16, 3 },
- { ISD::BITREVERSE, MVT::v16i8, 3 },
- { ISD::BITREVERSE, MVT::v32i8, 3 },
- { ISD::BSWAP, MVT::v4i64, 1 },
- { ISD::BSWAP, MVT::v8i32, 1 },
- { ISD::BSWAP, MVT::v16i16, 1 },
- { ISD::CTLZ, MVT::v2i64, 7 },
- { ISD::CTLZ, MVT::v4i64, 7 },
- { ISD::CTLZ, MVT::v4i32, 5 },
- { ISD::CTLZ, MVT::v8i32, 5 },
- { ISD::CTLZ, MVT::v8i16, 4 },
- { ISD::CTLZ, MVT::v16i16, 4 },
- { ISD::CTLZ, MVT::v16i8, 3 },
- { ISD::CTLZ, MVT::v32i8, 3 },
- { ISD::CTPOP, MVT::v2i64, 3 },
- { ISD::CTPOP, MVT::v4i64, 3 },
- { ISD::CTPOP, MVT::v4i32, 7 },
- { ISD::CTPOP, MVT::v8i32, 7 },
- { ISD::CTPOP, MVT::v8i16, 3 },
- { ISD::CTPOP, MVT::v16i16, 3 },
- { ISD::CTPOP, MVT::v16i8, 2 },
- { ISD::CTPOP, MVT::v32i8, 2 },
- { ISD::CTTZ, MVT::v2i64, 4 },
- { ISD::CTTZ, MVT::v4i64, 4 },
- { ISD::CTTZ, MVT::v4i32, 7 },
- { ISD::CTTZ, MVT::v8i32, 7 },
- { ISD::CTTZ, MVT::v8i16, 4 },
- { ISD::CTTZ, MVT::v16i16, 4 },
- { ISD::CTTZ, MVT::v16i8, 3 },
- { ISD::CTTZ, MVT::v32i8, 3 },
- { ISD::SADDSAT, MVT::v16i16, 1 },
- { ISD::SADDSAT, MVT::v32i8, 1 },
- { ISD::SMAX, MVT::v8i32, 1 },
- { ISD::SMAX, MVT::v16i16, 1 },
- { ISD::SMAX, MVT::v32i8, 1 },
- { ISD::SMIN, MVT::v8i32, 1 },
- { ISD::SMIN, MVT::v16i16, 1 },
- { ISD::SMIN, MVT::v32i8, 1 },
- { ISD::SSUBSAT, MVT::v16i16, 1 },
- { ISD::SSUBSAT, MVT::v32i8, 1 },
- { ISD::UADDSAT, MVT::v16i16, 1 },
- { ISD::UADDSAT, MVT::v32i8, 1 },
- { ISD::UADDSAT, MVT::v8i32, 3 }, // not + pminud + paddd
- { ISD::UMAX, MVT::v8i32, 1 },
- { ISD::UMAX, MVT::v16i16, 1 },
- { ISD::UMAX, MVT::v32i8, 1 },
- { ISD::UMIN, MVT::v8i32, 1 },
- { ISD::UMIN, MVT::v16i16, 1 },
- { ISD::UMIN, MVT::v32i8, 1 },
- { ISD::USUBSAT, MVT::v16i16, 1 },
- { ISD::USUBSAT, MVT::v32i8, 1 },
- { ISD::USUBSAT, MVT::v8i32, 2 }, // pmaxud + psubd
- { ISD::FMAXNUM, MVT::v8f32, 3 }, // MAXPS + CMPUNORDPS + BLENDVPS
- { ISD::FMAXNUM, MVT::v4f64, 3 }, // MAXPD + CMPUNORDPD + BLENDVPD
- { ISD::FSQRT, MVT::f32, 7 }, // Haswell from http://www.agner.org/
- { ISD::FSQRT, MVT::v4f32, 7 }, // Haswell from http://www.agner.org/
- { ISD::FSQRT, MVT::v8f32, 14 }, // Haswell from http://www.agner.org/
- { ISD::FSQRT, MVT::f64, 14 }, // Haswell from http://www.agner.org/
- { ISD::FSQRT, MVT::v2f64, 14 }, // Haswell from http://www.agner.org/
- { ISD::FSQRT, MVT::v4f64, 28 }, // Haswell from http://www.agner.org/
+ static const CostKindTblEntry AVX2CostTbl[] = {
+ { ISD::ABS, MVT::v4i64, { 2 } }, // VBLENDVPD(X,VPSUBQ(0,X),X)
+ { ISD::ABS, MVT::v8i32, { 1 } },
+ { ISD::ABS, MVT::v16i16, { 1 } },
+ { ISD::ABS, MVT::v32i8, { 1 } },
+ { ISD::BITREVERSE, MVT::v2i64, { 3 } },
+ { ISD::BITREVERSE, MVT::v4i64, { 3 } },
+ { ISD::BITREVERSE, MVT::v4i32, { 3 } },
+ { ISD::BITREVERSE, MVT::v8i32, { 3 } },
+ { ISD::BITREVERSE, MVT::v8i16, { 3 } },
+ { ISD::BITREVERSE, MVT::v16i16, { 3 } },
+ { ISD::BITREVERSE, MVT::v16i8, { 3 } },
+ { ISD::BITREVERSE, MVT::v32i8, { 3 } },
+ { ISD::BSWAP, MVT::v4i64, { 1 } },
+ { ISD::BSWAP, MVT::v8i32, { 1 } },
+ { ISD::BSWAP, MVT::v16i16, { 1 } },
+ { ISD::CTLZ, MVT::v2i64, { 7 } },
+ { ISD::CTLZ, MVT::v4i64, { 7 } },
+ { ISD::CTLZ, MVT::v4i32, { 5 } },
+ { ISD::CTLZ, MVT::v8i32, { 5 } },
+ { ISD::CTLZ, MVT::v8i16, { 4 } },
+ { ISD::CTLZ, MVT::v16i16, { 4 } },
+ { ISD::CTLZ, MVT::v16i8, { 3 } },
+ { ISD::CTLZ, MVT::v32i8, { 3 } },
+ { ISD::CTPOP, MVT::v2i64, { 3 } },
+ { ISD::CTPOP, MVT::v4i64, { 3 } },
+ { ISD::CTPOP, MVT::v4i32, { 7 } },
+ { ISD::CTPOP, MVT::v8i32, { 7 } },
+ { ISD::CTPOP, MVT::v8i16, { 3 } },
+ { ISD::CTPOP, MVT::v16i16, { 3 } },
+ { ISD::CTPOP, MVT::v16i8, { 2 } },
+ { ISD::CTPOP, MVT::v32i8, { 2 } },
+ { ISD::CTTZ, MVT::v2i64, { 4 } },
+ { ISD::CTTZ, MVT::v4i64, { 4 } },
+ { ISD::CTTZ, MVT::v4i32, { 7 } },
+ { ISD::CTTZ, MVT::v8i32, { 7 } },
+ { ISD::CTTZ, MVT::v8i16, { 4 } },
+ { ISD::CTTZ, MVT::v16i16, { 4 } },
+ { ISD::CTTZ, MVT::v16i8, { 3 } },
+ { ISD::CTTZ, MVT::v32i8, { 3 } },
+ { ISD::SADDSAT, MVT::v16i16, { 1 } },
+ { ISD::SADDSAT, MVT::v32i8, { 1 } },
+ { ISD::SMAX, MVT::v8i32, { 1 } },
+ { ISD::SMAX, MVT::v16i16, { 1 } },
+ { ISD::SMAX, MVT::v32i8, { 1 } },
+ { ISD::SMIN, MVT::v8i32, { 1 } },
+ { ISD::SMIN, MVT::v16i16, { 1 } },
+ { ISD::SMIN, MVT::v32i8, { 1 } },
+ { ISD::SSUBSAT, MVT::v16i16, { 1 } },
+ { ISD::SSUBSAT, MVT::v32i8, { 1 } },
+ { ISD::UADDSAT, MVT::v16i16, { 1 } },
+ { ISD::UADDSAT, MVT::v32i8, { 1 } },
+ { ISD::UADDSAT, MVT::v8i32, { 3 } }, // not + pminud + paddd
+ { ISD::UMAX, MVT::v8i32, { 1 } },
+ { ISD::UMAX, MVT::v16i16, { 1 } },
+ { ISD::UMAX, MVT::v32i8, { 1 } },
+ { ISD::UMIN, MVT::v8i32, { 1 } },
+ { ISD::UMIN, MVT::v16i16, { 1 } },
+ { ISD::UMIN, MVT::v32i8, { 1 } },
+ { ISD::USUBSAT, MVT::v16i16, { 1 } },
+ { ISD::USUBSAT, MVT::v32i8, { 1 } },
+ { ISD::USUBSAT, MVT::v8i32, { 2 } }, // pmaxud + psubd
+ { ISD::FMAXNUM, MVT::v8f32, { 3 } }, // MAXPS + CMPUNORDPS + BLENDVPS
+ { ISD::FMAXNUM, MVT::v4f64, { 3 } }, // MAXPD + CMPUNORDPD + BLENDVPD
+ { ISD::FSQRT, MVT::f32, { 7 } }, // Haswell from http://www.agner.org/
+ { ISD::FSQRT, MVT::v4f32, { 7 } }, // Haswell from http://www.agner.org/
+ { ISD::FSQRT, MVT::v8f32, { 14 } }, // Haswell from http://www.agner.org/
+ { ISD::FSQRT, MVT::f64, { 14 } }, // Haswell from http://www.agner.org/
+ { ISD::FSQRT, MVT::v2f64, { 14 } }, // Haswell from http://www.agner.org/
+ { ISD::FSQRT, MVT::v4f64, { 28 } }, // Haswell from http://www.agner.org/
};
- static const CostTblEntry AVX1CostTbl[] = {
- { ISD::ABS, MVT::v4i64, 5 }, // VBLENDVPD(X,VPSUBQ(0,X),X)
- { ISD::ABS, MVT::v8i32, 3 },
- { ISD::ABS, MVT::v16i16, 3 },
- { ISD::ABS, MVT::v32i8, 3 },
- { ISD::BITREVERSE, MVT::v4i64, 12 }, // 2 x 128-bit Op + extract/insert
- { ISD::BITREVERSE, MVT::v8i32, 12 }, // 2 x 128-bit Op + extract/insert
- { ISD::BITREVERSE, MVT::v16i16, 12 }, // 2 x 128-bit Op + extract/insert
- { ISD::BITREVERSE, MVT::v32i8, 12 }, // 2 x 128-bit Op + extract/insert
- { ISD::BSWAP, MVT::v4i64, 4 },
- { ISD::BSWAP, MVT::v8i32, 4 },
- { ISD::BSWAP, MVT::v16i16, 4 },
- { ISD::CTLZ, MVT::v4i64, 48 }, // 2 x 128-bit Op + extract/insert
- { ISD::CTLZ, MVT::v8i32, 38 }, // 2 x 128-bit Op + extract/insert
- { ISD::CTLZ, MVT::v16i16, 30 }, // 2 x 128-bit Op + extract/insert
- { ISD::CTLZ, MVT::v32i8, 20 }, // 2 x 128-bit Op + extract/insert
- { ISD::CTPOP, MVT::v4i64, 16 }, // 2 x 128-bit Op + extract/insert
- { ISD::CTPOP, MVT::v8i32, 24 }, // 2 x 128-bit Op + extract/insert
- { ISD::CTPOP, MVT::v16i16, 20 }, // 2 x 128-bit Op + extract/insert
- { ISD::CTPOP, MVT::v32i8, 14 }, // 2 x 128-bit Op + extract/insert
- { ISD::CTTZ, MVT::v4i64, 22 }, // 2 x 128-bit Op + extract/insert
- { ISD::CTTZ, MVT::v8i32, 30 }, // 2 x 128-bit Op + extract/insert
- { ISD::CTTZ, MVT::v16i16, 26 }, // 2 x 128-bit Op + extract/insert
- { ISD::CTTZ, MVT::v32i8, 20 }, // 2 x 128-bit Op + extract/insert
- { ISD::SADDSAT, MVT::v16i16, 4 }, // 2 x 128-bit Op + extract/insert
- { ISD::SADDSAT, MVT::v32i8, 4 }, // 2 x 128-bit Op + extract/insert
- { ISD::SMAX, MVT::v8i32, 4 }, // 2 x 128-bit Op + extract/insert
- { ISD::SMAX, MVT::v16i16, 4 }, // 2 x 128-bit Op + extract/insert
- { ISD::SMAX, MVT::v32i8, 4 }, // 2 x 128-bit Op + extract/insert
- { ISD::SMIN, MVT::v8i32, 4 }, // 2 x 128-bit Op + extract/insert
- { ISD::SMIN, MVT::v16i16, 4 }, // 2 x 128-bit Op + extract/insert
- { ISD::SMIN, MVT::v32i8, 4 }, // 2 x 128-bit Op + extract/insert
- { ISD::SSUBSAT, MVT::v16i16, 4 }, // 2 x 128-bit Op + extract/insert
- { ISD::SSUBSAT, MVT::v32i8, 4 }, // 2 x 128-bit Op + extract/insert
- { ISD::UADDSAT, MVT::v16i16, 4 }, // 2 x 128-bit Op + extract/insert
- { ISD::UADDSAT, MVT::v32i8, 4 }, // 2 x 128-bit Op + extract/insert
- { ISD::UADDSAT, MVT::v8i32, 8 }, // 2 x 128-bit Op + extract/insert
- { ISD::UMAX, MVT::v8i32, 4 }, // 2 x 128-bit Op + extract/insert
- { ISD::UMAX, MVT::v16i16, 4 }, // 2 x 128-bit Op + extract/insert
- { ISD::UMAX, MVT::v32i8, 4 }, // 2 x 128-bit Op + extract/insert
- { ISD::UMIN, MVT::v8i32, 4 }, // 2 x 128-bit Op + extract/insert
- { ISD::UMIN, MVT::v16i16, 4 }, // 2 x 128-bit Op + extract/insert
- { ISD::UMIN, MVT::v32i8, 4 }, // 2 x 128-bit Op + extract/insert
- { ISD::USUBSAT, MVT::v16i16, 4 }, // 2 x 128-bit Op + extract/insert
- { ISD::USUBSAT, MVT::v32i8, 4 }, // 2 x 128-bit Op + extract/insert
- { ISD::USUBSAT, MVT::v8i32, 6 }, // 2 x 128-bit Op + extract/insert
- { ISD::FMAXNUM, MVT::f32, 3 }, // MAXSS + CMPUNORDSS + BLENDVPS
- { ISD::FMAXNUM, MVT::v4f32, 3 }, // MAXPS + CMPUNORDPS + BLENDVPS
- { ISD::FMAXNUM, MVT::v8f32, 5 }, // MAXPS + CMPUNORDPS + BLENDVPS + ?
- { ISD::FMAXNUM, MVT::f64, 3 }, // MAXSD + CMPUNORDSD + BLENDVPD
- { ISD::FMAXNUM, MVT::v2f64, 3 }, // MAXPD + CMPUNORDPD + BLENDVPD
- { ISD::FMAXNUM, MVT::v4f64, 5 }, // MAXPD + CMPUNORDPD + BLENDVPD + ?
- { ISD::FSQRT, MVT::f32, 14 }, // SNB from http://www.agner.org/
- { ISD::FSQRT, MVT::v4f32, 14 }, // SNB from http://www.agner.org/
- { ISD::FSQRT, MVT::v8f32, 28 }, // SNB from http://www.agner.org/
- { ISD::FSQRT, MVT::f64, 21 }, // SNB from http://www.agner.org/
- { ISD::FSQRT, MVT::v2f64, 21 }, // SNB from http://www.agner.org/
- { ISD::FSQRT, MVT::v4f64, 43 }, // SNB from http://www.agner.org/
+ static const CostKindTblEntry AVX1CostTbl[] = {
+ { ISD::ABS, MVT::v4i64, { 5 } }, // VBLENDVPD(X,VPSUBQ(0,X),X)
+ { ISD::ABS, MVT::v8i32, { 3 } },
+ { ISD::ABS, MVT::v16i16, { 3 } },
+ { ISD::ABS, MVT::v32i8, { 3 } },
+ { ISD::BITREVERSE, MVT::v4i64, { 12 } }, // 2 x 128-bit Op + extract/insert
+ { ISD::BITREVERSE, MVT::v8i32, { 12 } }, // 2 x 128-bit Op + extract/insert
+ { ISD::BITREVERSE, MVT::v16i16, { 12 } }, // 2 x 128-bit Op + extract/insert
+ { ISD::BITREVERSE, MVT::v32i8, { 12 } }, // 2 x 128-bit Op + extract/insert
+ { ISD::BSWAP, MVT::v4i64, { 4 } },
+ { ISD::BSWAP, MVT::v8i32, { 4 } },
+ { ISD::BSWAP, MVT::v16i16, { 4 } },
+ { ISD::CTLZ, MVT::v4i64, { 48 } }, // 2 x 128-bit Op + extract/insert
+ { ISD::CTLZ, MVT::v8i32, { 38 } }, // 2 x 128-bit Op + extract/insert
+ { ISD::CTLZ, MVT::v16i16, { 30 } }, // 2 x 128-bit Op + extract/insert
+ { ISD::CTLZ, MVT::v32i8, { 20 } }, // 2 x 128-bit Op + extract/insert
+ { ISD::CTPOP, MVT::v4i64, { 16 } }, // 2 x 128-bit Op + extract/insert
+ { ISD::CTPOP, MVT::v8i32, { 24 } }, // 2 x 128-bit Op + extract/insert
+ { ISD::CTPOP, MVT::v16i16, { 20 } }, // 2 x 128-bit Op + extract/insert
+ { ISD::CTPOP, MVT::v32i8, { 14 } }, // 2 x 128-bit Op + extract/insert
+ { ISD::CTTZ, MVT::v4i64, { 22 } }, // 2 x 128-bit Op + extract/insert
+ { ISD::CTTZ, MVT::v8i32, { 30 } }, // 2 x 128-bit Op + extract/insert
+ { ISD::CTTZ, MVT::v16i16, { 26 } }, // 2 x 128-bit Op + extract/insert
+ { ISD::CTTZ, MVT::v32i8, { 20 } }, // 2 x 128-bit Op + extract/insert
+ { ISD::SADDSAT, MVT::v16i16, { 4 } }, // 2 x 128-bit Op + extract/insert
+ { ISD::SADDSAT, MVT::v32i8, { 4 } }, // 2 x 128-bit Op + extract/insert
+ { ISD::SMAX, MVT::v8i32, { 4 } }, // 2 x 128-bit Op + extract/insert
+ { ISD::SMAX, MVT::v16i16, { 4 } }, // 2 x 128-bit Op + extract/insert
+ { ISD::SMAX, MVT::v32i8, { 4 } }, // 2 x 128-bit Op + extract/insert
+ { ISD::SMIN, MVT::v8i32, { 4 } }, // 2 x 128-bit Op + extract/insert
+ { ISD::SMIN, MVT::v16i16, { 4 } }, // 2 x 128-bit Op + extract/insert
+ { ISD::SMIN, MVT::v32i8, { 4 } }, // 2 x 128-bit Op + extract/insert
+ { ISD::SSUBSAT, MVT::v16i16, { 4 } }, // 2 x 128-bit Op + extract/insert
+ { ISD::SSUBSAT, MVT::v32i8, { 4 } }, // 2 x 128-bit Op + extract/insert
+ { ISD::UADDSAT, MVT::v16i16, { 4 } }, // 2 x 128-bit Op + extract/insert
+ { ISD::UADDSAT, MVT::v32i8, { 4 } }, // 2 x 128-bit Op + extract/insert
+ { ISD::UADDSAT, MVT::v8i32, { 8 } }, // 2 x 128-bit Op + extract/insert
+ { ISD::UMAX, MVT::v8i32, { 4 } }, // 2 x 128-bit Op + extract/insert
+ { ISD::UMAX, MVT::v16i16, { 4 } }, // 2 x 128-bit Op + extract/insert
+ { ISD::UMAX, MVT::v32i8, { 4 } }, // 2 x 128-bit Op + extract/insert
+ { ISD::UMIN, MVT::v8i32, { 4 } }, // 2 x 128-bit Op + extract/insert
+ { ISD::UMIN, MVT::v16i16, { 4 } }, // 2 x 128-bit Op + extract/insert
+ { ISD::UMIN, MVT::v32i8, { 4 } }, // 2 x 128-bit Op + extract/insert
+ { ISD::USUBSAT, MVT::v16i16, { 4 } }, // 2 x 128-bit Op + extract/insert
+ { ISD::USUBSAT, MVT::v32i8, { 4 } }, // 2 x 128-bit Op + extract/insert
+ { ISD::USUBSAT, MVT::v8i32, { 6 } }, // 2 x 128-bit Op + extract/insert
+ { ISD::FMAXNUM, MVT::f32, { 3 } }, // MAXSS + CMPUNORDSS + BLENDVPS
+ { ISD::FMAXNUM, MVT::v4f32, { 3 } }, // MAXPS + CMPUNORDPS + BLENDVPS
+ { ISD::FMAXNUM, MVT::v8f32, { 5 } }, // MAXPS + CMPUNORDPS + BLENDVPS + ?
+ { ISD::FMAXNUM, MVT::f64, { 3 } }, // MAXSD + CMPUNORDSD + BLENDVPD
+ { ISD::FMAXNUM, MVT::v2f64, { 3 } }, // MAXPD + CMPUNORDPD + BLENDVPD
+ { ISD::FMAXNUM, MVT::v4f64, { 5 } }, // MAXPD + CMPUNORDPD + BLENDVPD + ?
+ { ISD::FSQRT, MVT::f32, { 14 } }, // SNB from http://www.agner.org/
+ { ISD::FSQRT, MVT::v4f32, { 14 } }, // SNB from http://www.agner.org/
+ { ISD::FSQRT, MVT::v8f32, { 28 } }, // SNB from http://www.agner.org/
+ { ISD::FSQRT, MVT::f64, { 21 } }, // SNB from http://www.agner.org/
+ { ISD::FSQRT, MVT::v2f64, { 21 } }, // SNB from http://www.agner.org/
+ { ISD::FSQRT, MVT::v4f64, { 43 } }, // SNB from http://www.agner.org/
};
- static const CostTblEntry GLMCostTbl[] = {
- { ISD::FSQRT, MVT::f32, 19 }, // sqrtss
- { ISD::FSQRT, MVT::v4f32, 37 }, // sqrtps
- { ISD::FSQRT, MVT::f64, 34 }, // sqrtsd
- { ISD::FSQRT, MVT::v2f64, 67 }, // sqrtpd
+ static const CostKindTblEntry GLMCostTbl[] = {
+ { ISD::FSQRT, MVT::f32, { 19 } }, // sqrtss
+ { ISD::FSQRT, MVT::v4f32, { 37 } }, // sqrtps
+ { ISD::FSQRT, MVT::f64, { 34 } }, // sqrtsd
+ { ISD::FSQRT, MVT::v2f64, { 67 } }, // sqrtpd
};
- static const CostTblEntry SLMCostTbl[] = {
- { ISD::FSQRT, MVT::f32, 20 }, // sqrtss
- { ISD::FSQRT, MVT::v4f32, 40 }, // sqrtps
- { ISD::FSQRT, MVT::f64, 35 }, // sqrtsd
- { ISD::FSQRT, MVT::v2f64, 70 }, // sqrtpd
+ static const CostKindTblEntry SLMCostTbl[] = {
+ { ISD::FSQRT, MVT::f32, { 20 } }, // sqrtss
+ { ISD::FSQRT, MVT::v4f32, { 40 } }, // sqrtps
+ { ISD::FSQRT, MVT::f64, { 35 } }, // sqrtsd
+ { ISD::FSQRT, MVT::v2f64, { 70 } }, // sqrtpd
};
- static const CostTblEntry SSE42CostTbl[] = {
- { ISD::USUBSAT, MVT::v4i32, 2 }, // pmaxud + psubd
- { ISD::UADDSAT, MVT::v4i32, 3 }, // not + pminud + paddd
- { ISD::FSQRT, MVT::f32, 18 }, // Nehalem from http://www.agner.org/
- { ISD::FSQRT, MVT::v4f32, 18 }, // Nehalem from http://www.agner.org/
+ static const CostKindTblEntry SSE42CostTbl[] = {
+ { ISD::USUBSAT, MVT::v4i32, { 2 } }, // pmaxud + psubd
+ { ISD::UADDSAT, MVT::v4i32, { 3 } }, // not + pminud + paddd
+ { ISD::FSQRT, MVT::f32, { 18 } }, // Nehalem from http://www.agner.org/
+ { ISD::FSQRT, MVT::v4f32, { 18 } }, // Nehalem from http://www.agner.org/
};
- static const CostTblEntry SSE41CostTbl[] = {
- { ISD::ABS, MVT::v2i64, 2 }, // BLENDVPD(X,PSUBQ(0,X),X)
- { ISD::SMAX, MVT::v4i32, 1 },
- { ISD::SMAX, MVT::v16i8, 1 },
- { ISD::SMIN, MVT::v4i32, 1 },
- { ISD::SMIN, MVT::v16i8, 1 },
- { ISD::UMAX, MVT::v4i32, 1 },
- { ISD::UMAX, MVT::v8i16, 1 },
- { ISD::UMIN, MVT::v4i32, 1 },
- { ISD::UMIN, MVT::v8i16, 1 },
+ static const CostKindTblEntry SSE41CostTbl[] = {
+ { ISD::ABS, MVT::v2i64, { 2 } }, // BLENDVPD(X,PSUBQ(0,X),X)
+ { ISD::SMAX, MVT::v4i32, { 1 } },
+ { ISD::SMAX, MVT::v16i8, { 1 } },
+ { ISD::SMIN, MVT::v4i32, { 1 } },
+ { ISD::SMIN, MVT::v16i8, { 1 } },
+ { ISD::UMAX, MVT::v4i32, { 1 } },
+ { ISD::UMAX, MVT::v8i16, { 1 } },
+ { ISD::UMIN, MVT::v4i32, { 1 } },
+ { ISD::UMIN, MVT::v8i16, { 1 } },
};
- static const CostTblEntry SSSE3CostTbl[] = {
- { ISD::ABS, MVT::v4i32, 1 },
- { ISD::ABS, MVT::v8i16, 1 },
- { ISD::ABS, MVT::v16i8, 1 },
- { ISD::BITREVERSE, MVT::v2i64, 5 },
- { ISD::BITREVERSE, MVT::v4i32, 5 },
- { ISD::BITREVERSE, MVT::v8i16, 5 },
- { ISD::BITREVERSE, MVT::v16i8, 5 },
- { ISD::BSWAP, MVT::v2i64, 1 },
- { ISD::BSWAP, MVT::v4i32, 1 },
- { ISD::BSWAP, MVT::v8i16, 1 },
- { ISD::CTLZ, MVT::v2i64, 23 },
- { ISD::CTLZ, MVT::v4i32, 18 },
- { ISD::CTLZ, MVT::v8i16, 14 },
- { ISD::CTLZ, MVT::v16i8, 9 },
- { ISD::CTPOP, MVT::v2i64, 7 },
- { ISD::CTPOP, MVT::v4i32, 11 },
- { ISD::CTPOP, MVT::v8i16, 9 },
- { ISD::CTPOP, MVT::v16i8, 6 },
- { ISD::CTTZ, MVT::v2i64, 10 },
- { ISD::CTTZ, MVT::v4i32, 14 },
- { ISD::CTTZ, MVT::v8i16, 12 },
- { ISD::CTTZ, MVT::v16i8, 9 }
+ static const CostKindTblEntry SSSE3CostTbl[] = {
+ { ISD::ABS, MVT::v4i32, { 1 } },
+ { ISD::ABS, MVT::v8i16, { 1 } },
+ { ISD::ABS, MVT::v16i8, { 1 } },
+ { ISD::BITREVERSE, MVT::v2i64, { 5 } },
+ { ISD::BITREVERSE, MVT::v4i32, { 5 } },
+ { ISD::BITREVERSE, MVT::v8i16, { 5 } },
+ { ISD::BITREVERSE, MVT::v16i8, { 5 } },
+ { ISD::BSWAP, MVT::v2i64, { 1 } },
+ { ISD::BSWAP, MVT::v4i32, { 1 } },
+ { ISD::BSWAP, MVT::v8i16, { 1 } },
+ { ISD::CTLZ, MVT::v2i64, { 23 } },
+ { ISD::CTLZ, MVT::v4i32, { 18 } },
+ { ISD::CTLZ, MVT::v8i16, { 14 } },
+ { ISD::CTLZ, MVT::v16i8, { 9 } },
+ { ISD::CTPOP, MVT::v2i64, { 7 } },
+ { ISD::CTPOP, MVT::v4i32, { 11 } },
+ { ISD::CTPOP, MVT::v8i16, { 9 } },
+ { ISD::CTPOP, MVT::v16i8, { 6 } },
+ { ISD::CTTZ, MVT::v2i64, { 10 } },
+ { ISD::CTTZ, MVT::v4i32, { 14 } },
+ { ISD::CTTZ, MVT::v8i16, { 12 } },
+ { ISD::CTTZ, MVT::v16i8, { 9 } }
};
- static const CostTblEntry SSE2CostTbl[] = {
- { ISD::ABS, MVT::v2i64, 4 },
- { ISD::ABS, MVT::v4i32, 3 },
- { ISD::ABS, MVT::v8i16, 2 },
- { ISD::ABS, MVT::v16i8, 2 },
- { ISD::BITREVERSE, MVT::v2i64, 29 },
- { ISD::BITREVERSE, MVT::v4i32, 27 },
- { ISD::BITREVERSE, MVT::v8i16, 27 },
- { ISD::BITREVERSE, MVT::v16i8, 20 },
- { ISD::BSWAP, MVT::v2i64, 7 },
- { ISD::BSWAP, MVT::v4i32, 7 },
- { ISD::BSWAP, MVT::v8i16, 7 },
- { ISD::CTLZ, MVT::v2i64, 25 },
- { ISD::CTLZ, MVT::v4i32, 26 },
- { ISD::CTLZ, MVT::v8i16, 20 },
- { ISD::CTLZ, MVT::v16i8, 17 },
- { ISD::CTPOP, MVT::v2i64, 12 },
- { ISD::CTPOP, MVT::v4i32, 15 },
- { ISD::CTPOP, MVT::v8i16, 13 },
- { ISD::CTPOP, MVT::v16i8, 10 },
- { ISD::CTTZ, MVT::v2i64, 14 },
- { ISD::CTTZ, MVT::v4i32, 18 },
- { ISD::CTTZ, MVT::v8i16, 16 },
- { ISD::CTTZ, MVT::v16i8, 13 },
- { ISD::SADDSAT, MVT::v8i16, 1 },
- { ISD::SADDSAT, MVT::v16i8, 1 },
- { ISD::SMAX, MVT::v8i16, 1 },
- { ISD::SMIN, MVT::v8i16, 1 },
- { ISD::SSUBSAT, MVT::v8i16, 1 },
- { ISD::SSUBSAT, MVT::v16i8, 1 },
- { ISD::UADDSAT, MVT::v8i16, 1 },
- { ISD::UADDSAT, MVT::v16i8, 1 },
- { ISD::UMAX, MVT::v8i16, 2 },
- { ISD::UMAX, MVT::v16i8, 1 },
- { ISD::UMIN, MVT::v8i16, 2 },
- { ISD::UMIN, MVT::v16i8, 1 },
- { ISD::USUBSAT, MVT::v8i16, 1 },
- { ISD::USUBSAT, MVT::v16i8, 1 },
- { ISD::FMAXNUM, MVT::f64, 4 },
- { ISD::FMAXNUM, MVT::v2f64, 4 },
- { ISD::FSQRT, MVT::f64, 32 }, // Nehalem from http://www.agner.org/
- { ISD::FSQRT, MVT::v2f64, 32 }, // Nehalem from http://www.agner.org/
+ static const CostKindTblEntry SSE2CostTbl[] = {
+ { ISD::ABS, MVT::v2i64, { 4 } },
+ { ISD::ABS, MVT::v4i32, { 3 } },
+ { ISD::ABS, MVT::v8i16, { 2 } },
+ { ISD::ABS, MVT::v16i8, { 2 } },
+ { ISD::BITREVERSE, MVT::v2i64, { 29 } },
+ { ISD::BITREVERSE, MVT::v4i32, { 27 } },
+ { ISD::BITREVERSE, MVT::v8i16, { 27 } },
+ { ISD::BITREVERSE, MVT::v16i8, { 20 } },
+ { ISD::BSWAP, MVT::v2i64, { 7 } },
+ { ISD::BSWAP, MVT::v4i32, { 7 } },
+ { ISD::BSWAP, MVT::v8i16, { 7 } },
+ { ISD::CTLZ, MVT::v2i64, { 25 } },
+ { ISD::CTLZ, MVT::v4i32, { 26 } },
+ { ISD::CTLZ, MVT::v8i16, { 20 } },
+ { ISD::CTLZ, MVT::v16i8, { 17 } },
+ { ISD::CTPOP, MVT::v2i64, { 12 } },
+ { ISD::CTPOP, MVT::v4i32, { 15 } },
+ { ISD::CTPOP, MVT::v8i16, { 13 } },
+ { ISD::CTPOP, MVT::v16i8, { 10 } },
+ { ISD::CTTZ, MVT::v2i64, { 14 } },
+ { ISD::CTTZ, MVT::v4i32, { 18 } },
+ { ISD::CTTZ, MVT::v8i16, { 16 } },
+ { ISD::CTTZ, MVT::v16i8, { 13 } },
+ { ISD::SADDSAT, MVT::v8i16, { 1 } },
+ { ISD::SADDSAT, MVT::v16i8, { 1 } },
+ { ISD::SMAX, MVT::v8i16, { 1 } },
+ { ISD::SMIN, MVT::v8i16, { 1 } },
+ { ISD::SSUBSAT, MVT::v8i16, { 1 } },
+ { ISD::SSUBSAT, MVT::v16i8, { 1 } },
+ { ISD::UADDSAT, MVT::v8i16, { 1 } },
+ { ISD::UADDSAT, MVT::v16i8, { 1 } },
+ { ISD::UMAX, MVT::v8i16, { 2 } },
+ { ISD::UMAX, MVT::v16i8, { 1 } },
+ { ISD::UMIN, MVT::v8i16, { 2 } },
+ { ISD::UMIN, MVT::v16i8, { 1 } },
+ { ISD::USUBSAT, MVT::v8i16, { 1 } },
+ { ISD::USUBSAT, MVT::v16i8, { 1 } },
+ { ISD::FMAXNUM, MVT::f64, { 4 } },
+ { ISD::FMAXNUM, MVT::v2f64, { 4 } },
+ { ISD::FSQRT, MVT::f64, { 32 } }, // Nehalem from http://www.agner.org/
+ { ISD::FSQRT, MVT::v2f64, { 32 } }, // Nehalem from http://www.agner.org/
};
- static const CostTblEntry SSE1CostTbl[] = {
- { ISD::FMAXNUM, MVT::f32, 4 },
- { ISD::FMAXNUM, MVT::v4f32, 4 },
- { ISD::FSQRT, MVT::f32, 28 }, // Pentium III from http://www.agner.org/
- { ISD::FSQRT, MVT::v4f32, 56 }, // Pentium III from http://www.agner.org/
+ static const CostKindTblEntry SSE1CostTbl[] = {
+ { ISD::FMAXNUM, MVT::f32, { 4 } },
+ { ISD::FMAXNUM, MVT::v4f32, { 4 } },
+ { ISD::FSQRT, MVT::f32, { 28 } }, // Pentium III from http://www.agner.org/
+ { ISD::FSQRT, MVT::v4f32, { 56 } }, // Pentium III from http://www.agner.org/
};
- static const CostTblEntry BMI64CostTbl[] = { // 64-bit targets
- { ISD::CTTZ, MVT::i64, 1 },
+ static const CostKindTblEntry BMI64CostTbl[] = { // 64-bit targets
+ { ISD::CTTZ, MVT::i64, { 1 } },
};
- static const CostTblEntry BMI32CostTbl[] = { // 32 or 64-bit targets
- { ISD::CTTZ, MVT::i32, 1 },
- { ISD::CTTZ, MVT::i16, 1 },
- { ISD::CTTZ, MVT::i8, 1 },
+ static const CostKindTblEntry BMI32CostTbl[] = { // 32 or 64-bit targets
+ { ISD::CTTZ, MVT::i32, { 1 } },
+ { ISD::CTTZ, MVT::i16, { 1 } },
+ { ISD::CTTZ, MVT::i8, { 1 } },
};
- static const CostTblEntry LZCNT64CostTbl[] = { // 64-bit targets
- { ISD::CTLZ, MVT::i64, 1 },
+ static const CostKindTblEntry LZCNT64CostTbl[] = { // 64-bit targets
+ { ISD::CTLZ, MVT::i64, { 1 } },
};
- static const CostTblEntry LZCNT32CostTbl[] = { // 32 or 64-bit targets
- { ISD::CTLZ, MVT::i32, 1 },
- { ISD::CTLZ, MVT::i16, 1 },
- { ISD::CTLZ, MVT::i8, 1 },
+ static const CostKindTblEntry LZCNT32CostTbl[] = { // 32 or 64-bit targets
+ { ISD::CTLZ, MVT::i32, { 1 } },
+ { ISD::CTLZ, MVT::i16, { 1 } },
+ { ISD::CTLZ, MVT::i8, { 1 } },
};
- static const CostTblEntry POPCNT64CostTbl[] = { // 64-bit targets
- { ISD::CTPOP, MVT::i64, 1 },
+ static const CostKindTblEntry POPCNT64CostTbl[] = { // 64-bit targets
+ { ISD::CTPOP, MVT::i64, { 1 } },
};
- static const CostTblEntry POPCNT32CostTbl[] = { // 32 or 64-bit targets
- { ISD::CTPOP, MVT::i32, 1 },
- { ISD::CTPOP, MVT::i16, 1 },
- { ISD::CTPOP, MVT::i8, 1 },
+ static const CostKindTblEntry POPCNT32CostTbl[] = { // 32 or 64-bit targets
+ { ISD::CTPOP, MVT::i32, { 1 } },
+ { ISD::CTPOP, MVT::i16, { 1 } },
+ { ISD::CTPOP, MVT::i8, { 1 } },
};
- static const CostTblEntry X64CostTbl[] = { // 64-bit targets
- { ISD::ABS, MVT::i64, 2 }, // SUB+CMOV
- { ISD::BITREVERSE, MVT::i64, 14 },
- { ISD::BSWAP, MVT::i64, 1 },
- { ISD::CTLZ, MVT::i64, 4 }, // BSR+XOR or BSR+XOR+CMOV
- { ISD::CTTZ, MVT::i64, 3 }, // TEST+BSF+CMOV/BRANCH
- { ISD::CTPOP, MVT::i64, 10 },
- { ISD::SADDO, MVT::i64, 1 },
- { ISD::UADDO, MVT::i64, 1 },
- { ISD::UMULO, MVT::i64, 2 }, // mulq + seto
+ static const CostKindTblEntry X64CostTbl[] = { // 64-bit targets
+ { ISD::ABS, MVT::i64, { 2 } }, // SUB+CMOV
+ { ISD::BITREVERSE, MVT::i64, { 14 } },
+ { ISD::BSWAP, MVT::i64, { 1 } },
+ { ISD::CTLZ, MVT::i64, { 4 } }, // BSR+XOR or BSR+XOR+CMOV
+ { ISD::CTTZ, MVT::i64, { 3 } }, // TEST+BSF+CMOV/BRANCH
+ { ISD::CTPOP, MVT::i64, { 10 } },
+ { ISD::SADDO, MVT::i64, { 1 } },
+ { ISD::UADDO, MVT::i64, { 1 } },
+ { ISD::UMULO, MVT::i64, { 2 } }, // mulq + seto
};
- static const CostTblEntry X86CostTbl[] = { // 32 or 64-bit targets
- { ISD::ABS, MVT::i32, 2 }, // SUB+CMOV
- { ISD::ABS, MVT::i16, 2 }, // SUB+CMOV
- { ISD::BITREVERSE, MVT::i32, 14 },
- { ISD::BITREVERSE, MVT::i16, 14 },
- { ISD::BITREVERSE, MVT::i8, 11 },
- { ISD::BSWAP, MVT::i32, 1 },
- { ISD::BSWAP, MVT::i16, 1 }, // ROL
- { ISD::CTLZ, MVT::i32, 4 }, // BSR+XOR or BSR+XOR+CMOV
- { ISD::CTLZ, MVT::i16, 4 }, // BSR+XOR or BSR+XOR+CMOV
- { ISD::CTLZ, MVT::i8, 4 }, // BSR+XOR or BSR+XOR+CMOV
- { ISD::CTTZ, MVT::i32, 3 }, // TEST+BSF+CMOV/BRANCH
- { ISD::CTTZ, MVT::i16, 3 }, // TEST+BSF+CMOV/BRANCH
- { ISD::CTTZ, MVT::i8, 3 }, // TEST+BSF+CMOV/BRANCH
- { ISD::CTPOP, MVT::i32, 8 },
- { ISD::CTPOP, MVT::i16, 9 },
- { ISD::CTPOP, MVT::i8, 7 },
- { ISD::SADDO, MVT::i32, 1 },
- { ISD::SADDO, MVT::i16, 1 },
- { ISD::SADDO, MVT::i8, 1 },
- { ISD::UADDO, MVT::i32, 1 },
- { ISD::UADDO, MVT::i16, 1 },
- { ISD::UADDO, MVT::i8, 1 },
- { ISD::UMULO, MVT::i32, 2 }, // mul + seto
- { ISD::UMULO, MVT::i16, 2 },
- { ISD::UMULO, MVT::i8, 2 },
+ static const CostKindTblEntry X86CostTbl[] = { // 32 or 64-bit targets
+ { ISD::ABS, MVT::i32, { 2 } }, // SUB+CMOV
+ { ISD::ABS, MVT::i16, { 2 } }, // SUB+CMOV
+ { ISD::BITREVERSE, MVT::i32, { 14 } },
+ { ISD::BITREVERSE, MVT::i16, { 14 } },
+ { ISD::BITREVERSE, MVT::i8, { 11 } },
+ { ISD::BSWAP, MVT::i32, { 1 } },
+ { ISD::BSWAP, MVT::i16, { 1 } }, // ROL
+ { ISD::CTLZ, MVT::i32, { 4 } }, // BSR+XOR or BSR+XOR+CMOV
+ { ISD::CTLZ, MVT::i16, { 4 } }, // BSR+XOR or BSR+XOR+CMOV
+ { ISD::CTLZ, MVT::i8, { 4 } }, // BSR+XOR or BSR+XOR+CMOV
+ { ISD::CTTZ, MVT::i32, { 3 } }, // TEST+BSF+CMOV/BRANCH
+ { ISD::CTTZ, MVT::i16, { 3 } }, // TEST+BSF+CMOV/BRANCH
+ { ISD::CTTZ, MVT::i8, { 3 } }, // TEST+BSF+CMOV/BRANCH
+ { ISD::CTPOP, MVT::i32, { 8 } },
+ { ISD::CTPOP, MVT::i16, { 9 } },
+ { ISD::CTPOP, MVT::i8, { 7 } },
+ { ISD::SADDO, MVT::i32, { 1 } },
+ { ISD::SADDO, MVT::i16, { 1 } },
+ { ISD::SADDO, MVT::i8, { 1 } },
+ { ISD::UADDO, MVT::i32, { 1 } },
+ { ISD::UADDO, MVT::i16, { 1 } },
+ { ISD::UADDO, MVT::i8, { 1 } },
+ { ISD::UMULO, MVT::i32, { 2 } }, // mul + seto
+ { ISD::UMULO, MVT::i16, { 2 } },
+ { ISD::UMULO, MVT::i8, { 2 } },
};
Type *RetTy = ICA.getReturnType();
@@ -3670,110 +3670,131 @@ X86TTIImpl::getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
if (ST->useGLMDivSqrtCosts())
if (const auto *Entry = CostTableLookup(GLMCostTbl, ISD, MTy))
- return adjustTableCost(Entry->ISD, Entry->Cost, LT.first,
- ICA.getFlags());
+ if (auto KindCost = Entry->Cost[CostKind])
+ return adjustTableCost(Entry->ISD, KindCost.value(), LT.first,
+ ICA.getFlags());
if (ST->useSLMArithCosts())
if (const auto *Entry = CostTableLookup(SLMCostTbl, ISD, MTy))
- return adjustTableCost(Entry->ISD, Entry->Cost, LT.first,
- ICA.getFlags());
+ if (auto KindCost = Entry->Cost[CostKind])
+ return adjustTableCost(Entry->ISD, KindCost.value(), LT.first,
+ ICA.getFlags());
if (ST->hasBITALG())
if (const auto *Entry = CostTableLookup(AVX512BITALGCostTbl, ISD, MTy))
- return adjustTableCost(Entry->ISD, Entry->Cost, LT.first,
- ICA.getFlags());
+ if (auto KindCost = Entry->Cost[CostKind])
+ return adjustTableCost(Entry->ISD, KindCost.value(), LT.first,
+ ICA.getFlags());
if (ST->hasVPOPCNTDQ())
if (const auto *Entry = CostTableLookup(AVX512VPOPCNTDQCostTbl, ISD, MTy))
- return adjustTableCost(Entry->ISD, Entry->Cost, LT.first,
- ICA.getFlags());
+ if (auto KindCost = Entry->Cost[CostKind])
+ return adjustTableCost(Entry->ISD, KindCost.value(), LT.first,
+ ICA.getFlags());
if (ST->hasCDI())
if (const auto *Entry = CostTableLookup(AVX512CDCostTbl, ISD, MTy))
- return adjustTableCost(Entry->ISD, Entry->Cost, LT.first,
- ICA.getFlags());
+ if (auto KindCost = Entry->Cost[CostKind])
+ return adjustTableCost(Entry->ISD, KindCost.value(), LT.first,
+ ICA.getFlags());
if (ST->hasBWI())
if (const auto *Entry = CostTableLookup(AVX512BWCostTbl, ISD, MTy))
- return adjustTableCost(Entry->ISD, Entry->Cost, LT.first,
- ICA.getFlags());
+ if (auto KindCost = Entry->Cost[CostKind])
+ return adjustTableCost(Entry->ISD, KindCost.value(), LT.first,
+ ICA.getFlags());
if (ST->hasAVX512())
if (const auto *Entry = CostTableLookup(AVX512CostTbl, ISD, MTy))
- return adjustTableCost(Entry->ISD, Entry->Cost, LT.first,
- ICA.getFlags());
+ if (auto KindCost = Entry->Cost[CostKind])
+ return adjustTableCost(Entry->ISD, KindCost.value(), LT.first,
+ ICA.getFlags());
if (ST->hasXOP())
if (const auto *Entry = CostTableLookup(XOPCostTbl, ISD, MTy))
- return adjustTableCost(Entry->ISD, Entry->Cost, LT.first,
- ICA.getFlags());
+ if (auto KindCost = Entry->Cost[CostKind])
+ return adjustTableCost(Entry->ISD, KindCost.value(), LT.first,
+ ICA.getFlags());
if (ST->hasAVX2())
if (const auto *Entry = CostTableLookup(AVX2CostTbl, ISD, MTy))
- return adjustTableCost(Entry->ISD, Entry->Cost, LT.first,
- ICA.getFlags());
+ if (auto KindCost = Entry->Cost[CostKind])
+ return adjustTableCost(Entry->ISD, KindCost.value(), LT.first,
+ ICA.getFlags());
if (ST->hasAVX())
if (const auto *Entry = CostTableLookup(AVX1CostTbl, ISD, MTy))
- return adjustTableCost(Entry->ISD, Entry->Cost, LT.first,
- ICA.getFlags());
+ if (auto KindCost = Entry->Cost[CostKind])
+ return adjustTableCost(Entry->ISD, KindCost.value(), LT.first,
+ ICA.getFlags());
if (ST->hasSSE42())
if (const auto *Entry = CostTableLookup(SSE42CostTbl, ISD, MTy))
- return adjustTableCost(Entry->ISD, Entry->Cost, LT.first,
- ICA.getFlags());
+ if (auto KindCost = Entry->Cost[CostKind])
+ return adjustTableCost(Entry->ISD, KindCost.value(), LT.first,
+ ICA.getFlags());
if (ST->hasSSE41())
if (const auto *Entry = CostTableLookup(SSE41CostTbl, ISD, MTy))
- return adjustTableCost(Entry->ISD, Entry->Cost, LT.first,
- ICA.getFlags());
+ if (auto KindCost = Entry->Cost[CostKind])
+ return adjustTableCost(Entry->ISD, KindCost.value(), LT.first,
+ ICA.getFlags());
if (ST->hasSSSE3())
if (const auto *Entry = CostTableLookup(SSSE3CostTbl, ISD, MTy))
- return adjustTableCost(Entry->ISD, Entry->Cost, LT.first,
- ICA.getFlags());
+ if (auto KindCost = Entry->Cost[CostKind])
+ return adjustTableCost(Entry->ISD, KindCost.value(), LT.first,
+ ICA.getFlags());
if (ST->hasSSE2())
if (const auto *Entry = CostTableLookup(SSE2CostTbl, ISD, MTy))
- return adjustTableCost(Entry->ISD, Entry->Cost, LT.first,
- ICA.getFlags());
+ if (auto KindCost = Entry->Cost[CostKind])
+ return adjustTableCost(Entry->ISD, KindCost.value(), LT.first,
+ ICA.getFlags());
if (ST->hasSSE1())
if (const auto *Entry = CostTableLookup(SSE1CostTbl, ISD, MTy))
- return adjustTableCost(Entry->ISD, Entry->Cost, LT.first,
- ICA.getFlags());
+ if (auto KindCost = Entry->Cost[CostKind])
+ return adjustTableCost(Entry->ISD, KindCost.value(), LT.first,
+ ICA.getFlags());
if (ST->hasBMI()) {
if (ST->is64Bit())
if (const auto *Entry = CostTableLookup(BMI64CostTbl, ISD, MTy))
- return adjustTableCost(Entry->ISD, Entry->Cost, LT.first,
- ICA.getFlags());
+ if (auto KindCost = Entry->Cost[CostKind])
+ return adjustTableCost(Entry->ISD, KindCost.value(), LT.first,
+ ICA.getFlags());
if (const auto *Entry = CostTableLookup(BMI32CostTbl, ISD, MTy))
- return adjustTableCost(Entry->ISD, Entry->Cost, LT.first,
- ICA.getFlags());
+ if (auto KindCost = Entry->Cost[CostKind])
+ return adjustTableCost(Entry->ISD, KindCost.value(), LT.first,
+ ICA.getFlags());
}
if (ST->hasLZCNT()) {
if (ST->is64Bit())
if (const auto *Entry = CostTableLookup(LZCNT64CostTbl, ISD, MTy))
- return adjustTableCost(Entry->ISD, Entry->Cost, LT.first,
- ICA.getFlags());
+ if (auto KindCost = Entry->Cost[CostKind])
+ return adjustTableCost(Entry->ISD, KindCost.value(), LT.first,
+ ICA.getFlags());
if (const auto *Entry = CostTableLookup(LZCNT32CostTbl, ISD, MTy))
- return adjustTableCost(Entry->ISD, Entry->Cost, LT.first,
- ICA.getFlags());
+ if (auto KindCost = Entry->Cost[CostKind])
+ return adjustTableCost(Entry->ISD, KindCost.value(), LT.first,
+ ICA.getFlags());
}
if (ST->hasPOPCNT()) {
if (ST->is64Bit())
if (const auto *Entry = CostTableLookup(POPCNT64CostTbl, ISD, MTy))
- return adjustTableCost(Entry->ISD, Entry->Cost, LT.first,
- ICA.getFlags());
+ if (auto KindCost = Entry->Cost[CostKind])
+ return adjustTableCost(Entry->ISD, KindCost.value(), LT.first,
+ ICA.getFlags());
if (const auto *Entry = CostTableLookup(POPCNT32CostTbl, ISD, MTy))
- return adjustTableCost(Entry->ISD, Entry->Cost, LT.first,
- ICA.getFlags());
+ if (auto KindCost = Entry->Cost[CostKind])
+ return adjustTableCost(Entry->ISD, KindCost.value(), LT.first,
+ ICA.getFlags());
}
if (ISD == ISD::BSWAP && ST->hasMOVBE() && ST->hasFastMOVBE()) {
@@ -3789,11 +3810,14 @@ X86TTIImpl::getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
if (ST->is64Bit())
if (const auto *Entry = CostTableLookup(X64CostTbl, ISD, MTy))
- return adjustTableCost(Entry->ISD, Entry->Cost, LT.first,
- ICA.getFlags());
+ if (auto KindCost = Entry->Cost[CostKind])
+ return adjustTableCost(Entry->ISD, KindCost.value(), LT.first,
+ ICA.getFlags());
if (const auto *Entry = CostTableLookup(X86CostTbl, ISD, MTy))
- return adjustTableCost(Entry->ISD, Entry->Cost, LT.first, ICA.getFlags());
+ if (auto KindCost = Entry->Cost[CostKind])
+ return adjustTableCost(Entry->ISD, KindCost.value(), LT.first,
+ ICA.getFlags());
}
return BaseT::getIntrinsicInstrCost(ICA, CostKind);
diff --git a/llvm/test/Analysis/CostModel/X86/arith-fp-latency.ll b/llvm/test/Analysis/CostModel/X86/arith-fp-latency.ll
index d6725f464368..b25e69b01060 100644
--- a/llvm/test/Analysis/CostModel/X86/arith-fp-latency.ll
+++ b/llvm/test/Analysis/CostModel/X86/arith-fp-latency.ll
@@ -639,10 +639,10 @@ define i32 @frem(i32 %arg) {
define i32 @fsqrt(i32 %arg) {
; SSE1-LABEL: 'fsqrt'
-; SSE1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
-; SSE1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
-; SSE1-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
-; SSE1-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
+; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
+; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
+; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
+; SSE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
; SSE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
@@ -650,80 +650,69 @@ define i32 @fsqrt(i32 %arg) {
; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; SSE2-LABEL: 'fsqrt'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; SSE42-LABEL: 'fsqrt'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
-; AVX1-LABEL: 'fsqrt'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; AVX2-LABEL: 'fsqrt'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; AVX-LABEL: 'fsqrt'
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; AVX512-LABEL: 'fsqrt'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; SLM-LABEL: 'fsqrt'
-; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
-; SLM-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
-; SLM-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
-; SLM-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
-; SLM-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
-; SLM-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
-; SLM-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
-; SLM-NEXT: Cost Model: Found an estimated cost of 280 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
+; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
+; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
+; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
+; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
+; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
+; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
+; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
+; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; GLM-LABEL: 'fsqrt'
-; GLM-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
-; GLM-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
-; GLM-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
-; GLM-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
-; GLM-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
-; GLM-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
-; GLM-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
-; GLM-NEXT: Cost Model: Found an estimated cost of 268 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
+; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
+; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
+; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
+; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
+; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
+; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
+; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
+; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%F32 = call float @llvm.sqrt.f32(float undef)
diff --git a/llvm/test/Analysis/CostModel/X86/arith-fp-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/arith-fp-sizelatency.ll
index 7783cd900a9b..1474865890f0 100644
--- a/llvm/test/Analysis/CostModel/X86/arith-fp-sizelatency.ll
+++ b/llvm/test/Analysis/CostModel/X86/arith-fp-sizelatency.ll
@@ -2,14 +2,14 @@
; RUN: opt < %s -enable-no-nans-fp-math -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mattr=-sse2 | FileCheck %s --check-prefixes=CHECK,SSE1
; RUN: opt < %s -enable-no-nans-fp-math -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE2
; RUN: opt < %s -enable-no-nans-fp-math -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE42
-; RUN: opt < %s -enable-no-nans-fp-math -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX,AVX1
-; RUN: opt < %s -enable-no-nans-fp-math -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2
+; RUN: opt < %s -enable-no-nans-fp-math -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX
+; RUN: opt < %s -enable-no-nans-fp-math -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX
; RUN: opt < %s -enable-no-nans-fp-math -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512
; RUN: opt < %s -enable-no-nans-fp-math -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512
;
; RUN: opt < %s -enable-no-nans-fp-math -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SLM
; RUN: opt < %s -enable-no-nans-fp-math -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,GLM
-; RUN: opt < %s -enable-no-nans-fp-math -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mcpu=btver2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX1
+; RUN: opt < %s -enable-no-nans-fp-math -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency -mtriple=x86_64-- -mcpu=btver2 | FileCheck %s --check-prefixes=CHECK,AVX
define i32 @fadd(i32 %arg) {
; SSE1-LABEL: 'fadd'
@@ -584,10 +584,10 @@ define i32 @frem(i32 %arg) {
define i32 @fsqrt(i32 %arg) {
; SSE1-LABEL: 'fsqrt'
-; SSE1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
-; SSE1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
-; SSE1-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
-; SSE1-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
+; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
+; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
+; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
+; SSE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
; SSE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
@@ -595,80 +595,69 @@ define i32 @fsqrt(i32 %arg) {
; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; SSE2-LABEL: 'fsqrt'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; SSE42-LABEL: 'fsqrt'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
-; AVX1-LABEL: 'fsqrt'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
-;
-; AVX2-LABEL: 'fsqrt'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; AVX-LABEL: 'fsqrt'
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; AVX512-LABEL: 'fsqrt'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; SLM-LABEL: 'fsqrt'
-; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
-; SLM-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
-; SLM-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
-; SLM-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
-; SLM-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
-; SLM-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
-; SLM-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
-; SLM-NEXT: Cost Model: Found an estimated cost of 280 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
+; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
+; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
+; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
+; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
+; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
+; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
+; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
+; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; GLM-LABEL: 'fsqrt'
-; GLM-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
-; GLM-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
-; GLM-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
-; GLM-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
-; GLM-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
-; GLM-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
-; GLM-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
-; GLM-NEXT: Cost Model: Found an estimated cost of 268 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
+; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
+; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
+; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
+; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
+; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
+; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
+; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
+; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%F32 = call float @llvm.sqrt.f32(float undef)
diff --git a/llvm/test/Analysis/CostModel/X86/costmodel.ll b/llvm/test/Analysis/CostModel/X86/costmodel.ll
index 336a2f6be7bb..4e24b807a195 100644
--- a/llvm/test/Analysis/CostModel/X86/costmodel.ll
+++ b/llvm/test/Analysis/CostModel/X86/costmodel.ll
@@ -18,7 +18,7 @@ define i64 @foo(i64 %arg) {
; LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %I2P = inttoptr i64 undef to i8*
; LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %P2I = ptrtoint i8* undef to i64
; LATENCY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %TC = trunc i64 undef to i32
-; LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef)
+; LATENCY-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef)
; LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void undef()
; LATENCY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 undef
;
@@ -31,7 +31,7 @@ define i64 @foo(i64 %arg) {
; CODESIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %I2P = inttoptr i64 undef to i8*
; CODESIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %P2I = ptrtoint i8* undef to i64
; CODESIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %TC = trunc i64 undef to i32
-; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef)
+; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef)
; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void undef()
; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 undef
;
diff --git a/llvm/test/Analysis/CostModel/X86/fmaxnum-size-latency.ll b/llvm/test/Analysis/CostModel/X86/fmaxnum-size-latency.ll
index 7374e2e97f67..162776b33079 100644
--- a/llvm/test/Analysis/CostModel/X86/fmaxnum-size-latency.ll
+++ b/llvm/test/Analysis/CostModel/X86/fmaxnum-size-latency.ll
@@ -4,19 +4,19 @@
define i32 @f32(i32 %arg) {
; SSE2-LABEL: 'f32'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F32 = call float @llvm.maxnum.f32(float undef, float undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <2 x float> @llvm.maxnum.v2f32(<2 x float> undef, <2 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.maxnum.v4f32(<4 x float> undef, <4 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.maxnum.v8f32(<8 x float> undef, <8 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.maxnum.v16f32(<16 x float> undef, <16 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.maxnum.f32(float undef, float undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F32 = call <2 x float> @llvm.maxnum.v2f32(<2 x float> undef, <2 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.maxnum.v4f32(<4 x float> undef, <4 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8F32 = call <8 x float> @llvm.maxnum.v8f32(<8 x float> undef, <8 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %V16F32 = call <16 x float> @llvm.maxnum.v16f32(<16 x float> undef, <16 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; AVX2-LABEL: 'f32'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F32 = call float @llvm.maxnum.f32(float undef, float undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.maxnum.v2f32(<2 x float> undef, <2 x float> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = call <4 x float> @llvm.maxnum.v4f32(<4 x float> undef, <4 x float> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8F32 = call <8 x float> @llvm.maxnum.v8f32(<8 x float> undef, <8 x float> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16F32 = call <16 x float> @llvm.maxnum.v16f32(<16 x float> undef, <16 x float> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.maxnum.f32(float undef, float undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F32 = call <2 x float> @llvm.maxnum.v2f32(<2 x float> undef, <2 x float> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.maxnum.v4f32(<4 x float> undef, <4 x float> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8F32 = call <8 x float> @llvm.maxnum.v8f32(<8 x float> undef, <8 x float> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V16F32 = call <16 x float> @llvm.maxnum.v16f32(<16 x float> undef, <16 x float> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%F32 = call float @llvm.maxnum.f32(float undef, float undef)
@@ -29,19 +29,19 @@ define i32 @f32(i32 %arg) {
define i32 @f64(i32 %arg) {
; SSE2-LABEL: 'f64'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f64 = call double @llvm.maxnum.f64(double undef, double undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2f64 = call <2 x double> @llvm.maxnum.v2f64(<2 x double> undef, <2 x double> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4f64 = call <4 x double> @llvm.maxnum.v4f64(<4 x double> undef, <4 x double> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8f64 = call <8 x double> @llvm.maxnum.v8f64(<8 x double> undef, <8 x double> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16f64 = call <16 x double> @llvm.maxnum.v16f64(<16 x double> undef, <16 x double> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %f64 = call double @llvm.maxnum.f64(double undef, double undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2f64 = call <2 x double> @llvm.maxnum.v2f64(<2 x double> undef, <2 x double> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4f64 = call <4 x double> @llvm.maxnum.v4f64(<4 x double> undef, <4 x double> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V8f64 = call <8 x double> @llvm.maxnum.v8f64(<8 x double> undef, <8 x double> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 168 for instruction: %V16f64 = call <16 x double> @llvm.maxnum.v16f64(<16 x double> undef, <16 x double> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; AVX2-LABEL: 'f64'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %f64 = call double @llvm.maxnum.f64(double undef, double undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2f64 = call <2 x double> @llvm.maxnum.v2f64(<2 x double> undef, <2 x double> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4f64 = call <4 x double> @llvm.maxnum.v4f64(<4 x double> undef, <4 x double> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8f64 = call <8 x double> @llvm.maxnum.v8f64(<8 x double> undef, <8 x double> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16f64 = call <16 x double> @llvm.maxnum.v16f64(<16 x double> undef, <16 x double> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %f64 = call double @llvm.maxnum.f64(double undef, double undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2f64 = call <2 x double> @llvm.maxnum.v2f64(<2 x double> undef, <2 x double> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4f64 = call <4 x double> @llvm.maxnum.v4f64(<4 x double> undef, <4 x double> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8f64 = call <8 x double> @llvm.maxnum.v8f64(<8 x double> undef, <8 x double> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %V16f64 = call <16 x double> @llvm.maxnum.v16f64(<16 x double> undef, <16 x double> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%f64 = call double @llvm.maxnum.f64(double undef, double undef)
diff --git a/llvm/test/Analysis/CostModel/X86/fminnum-size-latency.ll b/llvm/test/Analysis/CostModel/X86/fminnum-size-latency.ll
index ee9fcf9983d3..53a5a20612de 100644
--- a/llvm/test/Analysis/CostModel/X86/fminnum-size-latency.ll
+++ b/llvm/test/Analysis/CostModel/X86/fminnum-size-latency.ll
@@ -4,19 +4,19 @@
define i32 @f32(i32 %arg) {
; SSE2-LABEL: 'f32'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F32 = call float @llvm.minnum.f32(float undef, float undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <2 x float> @llvm.minnum.v2f32(<2 x float> undef, <2 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.minnum.v4f32(<4 x float> undef, <4 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.minnum.v8f32(<8 x float> undef, <8 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.minnum.v16f32(<16 x float> undef, <16 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.minnum.f32(float undef, float undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F32 = call <2 x float> @llvm.minnum.v2f32(<2 x float> undef, <2 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.minnum.v4f32(<4 x float> undef, <4 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8F32 = call <8 x float> @llvm.minnum.v8f32(<8 x float> undef, <8 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %V16F32 = call <16 x float> @llvm.minnum.v16f32(<16 x float> undef, <16 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; AVX2-LABEL: 'f32'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F32 = call float @llvm.minnum.f32(float undef, float undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.minnum.v2f32(<2 x float> undef, <2 x float> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = call <4 x float> @llvm.minnum.v4f32(<4 x float> undef, <4 x float> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8F32 = call <8 x float> @llvm.minnum.v8f32(<8 x float> undef, <8 x float> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16F32 = call <16 x float> @llvm.minnum.v16f32(<16 x float> undef, <16 x float> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.minnum.f32(float undef, float undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F32 = call <2 x float> @llvm.minnum.v2f32(<2 x float> undef, <2 x float> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.minnum.v4f32(<4 x float> undef, <4 x float> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8F32 = call <8 x float> @llvm.minnum.v8f32(<8 x float> undef, <8 x float> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V16F32 = call <16 x float> @llvm.minnum.v16f32(<16 x float> undef, <16 x float> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%F32 = call float @llvm.minnum.f32(float undef, float undef)
@@ -29,19 +29,19 @@ define i32 @f32(i32 %arg) {
define i32 @f64(i32 %arg) {
; SSE2-LABEL: 'f64'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f64 = call double @llvm.minnum.f64(double undef, double undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2f64 = call <2 x double> @llvm.minnum.v2f64(<2 x double> undef, <2 x double> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4f64 = call <4 x double> @llvm.minnum.v4f64(<4 x double> undef, <4 x double> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8f64 = call <8 x double> @llvm.minnum.v8f64(<8 x double> undef, <8 x double> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16f64 = call <16 x double> @llvm.minnum.v16f64(<16 x double> undef, <16 x double> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %f64 = call double @llvm.minnum.f64(double undef, double undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2f64 = call <2 x double> @llvm.minnum.v2f64(<2 x double> undef, <2 x double> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4f64 = call <4 x double> @llvm.minnum.v4f64(<4 x double> undef, <4 x double> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V8f64 = call <8 x double> @llvm.minnum.v8f64(<8 x double> undef, <8 x double> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 168 for instruction: %V16f64 = call <16 x double> @llvm.minnum.v16f64(<16 x double> undef, <16 x double> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; AVX2-LABEL: 'f64'
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %f64 = call double @llvm.minnum.f64(double undef, double undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2f64 = call <2 x double> @llvm.minnum.v2f64(<2 x double> undef, <2 x double> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4f64 = call <4 x double> @llvm.minnum.v4f64(<4 x double> undef, <4 x double> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8f64 = call <8 x double> @llvm.minnum.v8f64(<8 x double> undef, <8 x double> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16f64 = call <16 x double> @llvm.minnum.v16f64(<16 x double> undef, <16 x double> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %f64 = call double @llvm.minnum.f64(double undef, double undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2f64 = call <2 x double> @llvm.minnum.v2f64(<2 x double> undef, <2 x double> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4f64 = call <4 x double> @llvm.minnum.v4f64(<4 x double> undef, <4 x double> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8f64 = call <8 x double> @llvm.minnum.v8f64(<8 x double> undef, <8 x double> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %V16f64 = call <16 x double> @llvm.minnum.v16f64(<16 x double> undef, <16 x double> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%f64 = call double @llvm.minnum.f64(double undef, double undef)
diff --git a/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll b/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll
index ac61f881daa0..179dc1d5bd8d 100644
--- a/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll
+++ b/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll
@@ -55,17 +55,17 @@ define void @umul(i32 %a, i32 %b, <16 x i32> %va, <16 x i32> %vb) {
; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; LATE-LABEL: 'umul'
-; LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
+; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %s = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
; LATE-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %v = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> %va, <16 x i32> %vb)
; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SIZE-LABEL: 'umul'
-; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %s = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
; SIZE-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %v = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> %va, <16 x i32> %vb)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SIZE_LATE-LABEL: 'umul'
-; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
+; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %s = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %v = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> %va, <16 x i32> %vb)
; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
@@ -237,18 +237,18 @@ define void @cttz(i32 %a, <16 x i32> %va) {
; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; LATE-LABEL: 'cttz'
-; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false)
-; LATE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false)
+; LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false)
+; LATE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false)
; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SIZE-LABEL: 'cttz'
-; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SIZE_LATE-LABEL: 'cttz'
-; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false)
-; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false)
+; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false)
+; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false)
; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%s = call i32 @llvm.cttz.i32(i32 %a, i1 false)
@@ -263,18 +263,18 @@ define void @ctlz(i32 %a, <16 x i32> %va) {
; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; LATE-LABEL: 'ctlz'
-; LATE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true)
-; LATE-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true)
+; LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true)
+; LATE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true)
; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SIZE-LABEL: 'ctlz'
-; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true)
-; SIZE-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SIZE_LATE-LABEL: 'ctlz'
-; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true)
-; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true)
+; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true)
+; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true)
; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%s = call i32 @llvm.ctlz.i32(i32 %a, i1 true)
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/unsigned-multiply-overflow-check.ll b/llvm/test/Transforms/PhaseOrdering/X86/unsigned-multiply-overflow-check.ll
index 16dd203887ec..7c4969d7f01d 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/unsigned-multiply-overflow-check.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/unsigned-multiply-overflow-check.ll
@@ -35,27 +35,36 @@ define i1 @will_not_overflow(i64 %arg, i64 %arg1) {
; INSTCOMBINEONLY-NEXT: [[T0:%.*]] = icmp eq i64 [[ARG:%.*]], 0
; INSTCOMBINEONLY-NEXT: br i1 [[T0]], label [[BB5:%.*]], label [[BB2:%.*]]
; INSTCOMBINEONLY: bb2:
-; INSTCOMBINEONLY-NEXT: [[UMUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG]], i64 [[ARG1:%.*]])
-; INSTCOMBINEONLY-NEXT: [[UMUL_OV:%.*]] = extractvalue { i64, i1 } [[UMUL]], 1
+; INSTCOMBINEONLY-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG]], i64 [[ARG1:%.*]])
+; INSTCOMBINEONLY-NEXT: [[MUL_OV:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
; INSTCOMBINEONLY-NEXT: br label [[BB5]]
; INSTCOMBINEONLY: bb5:
-; INSTCOMBINEONLY-NEXT: [[T6:%.*]] = phi i1 [ false, [[BB:%.*]] ], [ [[UMUL_OV]], [[BB2]] ]
+; INSTCOMBINEONLY-NEXT: [[T6:%.*]] = phi i1 [ false, [[BB:%.*]] ], [ [[MUL_OV]], [[BB2]] ]
; INSTCOMBINEONLY-NEXT: ret i1 [[T6]]
;
; INSTCOMBINESIMPLIFYCFGONLY-LABEL: @will_not_overflow(
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: bb:
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[T0:%.*]] = icmp eq i64 [[ARG:%.*]], 0
-; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[UMUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG]], i64 [[ARG1:%.*]])
-; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[UMUL_OV:%.*]] = extractvalue { i64, i1 } [[UMUL]], 1
-; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[T6:%.*]] = select i1 [[T0]], i1 false, i1 [[UMUL_OV]]
+; INSTCOMBINESIMPLIFYCFGONLY-NEXT: br i1 [[T0]], label [[BB5:%.*]], label [[BB2:%.*]]
+; INSTCOMBINESIMPLIFYCFGONLY: bb2:
+; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG]], i64 [[ARG1:%.*]])
+; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[MUL_OV:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
+; INSTCOMBINESIMPLIFYCFGONLY-NEXT: br label [[BB5]]
+; INSTCOMBINESIMPLIFYCFGONLY: bb5:
+; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[T6:%.*]] = phi i1 [ false, [[BB:%.*]] ], [ [[MUL_OV]], [[BB2]] ]
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: ret i1 [[T6]]
;
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-LABEL: @will_not_overflow(
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: bb:
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[ARG1_FR:%.*]] = freeze i64 [[ARG1:%.*]]
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[UMUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG:%.*]], i64 [[ARG1_FR]])
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[UMUL_OV:%.*]] = extractvalue { i64, i1 } [[UMUL]], 1
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: ret i1 [[UMUL_OV]]
+; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[T0:%.*]] = icmp eq i64 [[ARG:%.*]], 0
+; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: br i1 [[T0]], label [[BB5:%.*]], label [[BB2:%.*]]
+; INSTCOMBINESIMPLIFYCFGINSTCOMBINE: bb2:
+; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG]], i64 [[ARG1:%.*]])
+; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[MUL_OV:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
+; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: br label [[BB5]]
+; INSTCOMBINESIMPLIFYCFGINSTCOMBINE: bb5:
+; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[T6:%.*]] = phi i1 [ false, [[BB:%.*]] ], [ [[MUL_OV]], [[BB2]] ]
+; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: ret i1 [[T6]]
;
bb:
%t0 = icmp eq i64 %arg, 0
@@ -92,9 +101,9 @@ define i1 @will_overflow(i64 %arg, i64 %arg1) {
; INSTCOMBINEONLY-NEXT: [[T0:%.*]] = icmp eq i64 [[ARG:%.*]], 0
; INSTCOMBINEONLY-NEXT: br i1 [[T0]], label [[BB5:%.*]], label [[BB2:%.*]]
; INSTCOMBINEONLY: bb2:
-; INSTCOMBINEONLY-NEXT: [[UMUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG]], i64 [[ARG1:%.*]])
-; INSTCOMBINEONLY-NEXT: [[UMUL_OV:%.*]] = extractvalue { i64, i1 } [[UMUL]], 1
-; INSTCOMBINEONLY-NEXT: [[PHI_BO:%.*]] = xor i1 [[UMUL_OV]], true
+; INSTCOMBINEONLY-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG]], i64 [[ARG1:%.*]])
+; INSTCOMBINEONLY-NEXT: [[MUL_OV:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
+; INSTCOMBINEONLY-NEXT: [[PHI_BO:%.*]] = xor i1 [[MUL_OV]], true
; INSTCOMBINEONLY-NEXT: br label [[BB5]]
; INSTCOMBINEONLY: bb5:
; INSTCOMBINEONLY-NEXT: [[T6:%.*]] = phi i1 [ true, [[BB:%.*]] ], [ [[PHI_BO]], [[BB2]] ]
@@ -103,19 +112,28 @@ define i1 @will_overflow(i64 %arg, i64 %arg1) {
; INSTCOMBINESIMPLIFYCFGONLY-LABEL: @will_overflow(
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: bb:
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[T0:%.*]] = icmp eq i64 [[ARG:%.*]], 0
-; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[UMUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG]], i64 [[ARG1:%.*]])
-; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[UMUL_OV:%.*]] = extractvalue { i64, i1 } [[UMUL]], 1
-; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[PHI_BO:%.*]] = xor i1 [[UMUL_OV]], true
-; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[T6:%.*]] = select i1 [[T0]], i1 true, i1 [[PHI_BO]]
+; INSTCOMBINESIMPLIFYCFGONLY-NEXT: br i1 [[T0]], label [[BB5:%.*]], label [[BB2:%.*]]
+; INSTCOMBINESIMPLIFYCFGONLY: bb2:
+; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG]], i64 [[ARG1:%.*]])
+; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[MUL_OV:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
+; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[PHI_BO:%.*]] = xor i1 [[MUL_OV]], true
+; INSTCOMBINESIMPLIFYCFGONLY-NEXT: br label [[BB5]]
+; INSTCOMBINESIMPLIFYCFGONLY: bb5:
+; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[T6:%.*]] = phi i1 [ true, [[BB:%.*]] ], [ [[PHI_BO]], [[BB2]] ]
; INSTCOMBINESIMPLIFYCFGONLY-NEXT: ret i1 [[T6]]
;
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-LABEL: @will_overflow(
; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: bb:
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[ARG1_FR:%.*]] = freeze i64 [[ARG1:%.*]]
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[UMUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG:%.*]], i64 [[ARG1_FR]])
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[UMUL_OV:%.*]] = extractvalue { i64, i1 } [[UMUL]], 1
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[PHI_BO:%.*]] = xor i1 [[UMUL_OV]], true
-; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: ret i1 [[PHI_BO]]
+; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[T0:%.*]] = icmp eq i64 [[ARG:%.*]], 0
+; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: br i1 [[T0]], label [[BB5:%.*]], label [[BB2:%.*]]
+; INSTCOMBINESIMPLIFYCFGINSTCOMBINE: bb2:
+; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG]], i64 [[ARG1:%.*]])
+; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[MUL_OV:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
+; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[PHI_BO:%.*]] = xor i1 [[MUL_OV]], true
+; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: br label [[BB5]]
+; INSTCOMBINESIMPLIFYCFGINSTCOMBINE: bb5:
+; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[T6:%.*]] = phi i1 [ true, [[BB:%.*]] ], [ [[PHI_BO]], [[BB2]] ]
+; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: ret i1 [[T6]]
;
bb:
%t0 = icmp eq i64 %arg, 0
More information about the llvm-commits
mailing list