[llvm] 8534f51 - [CostModel][X86] Add CostKinds handling for sqrt intrinsicc
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sun Sep 4 10:39:37 PDT 2022
Author: Simon Pilgrim
Date: 2022-09-04T18:39:21+01:00
New Revision: 8534f514747d57cd42602403eb98c8230f5c7ff9
URL: https://github.com/llvm/llvm-project/commit/8534f514747d57cd42602403eb98c8230f5c7ff9
DIFF: https://github.com/llvm/llvm-project/commit/8534f514747d57cd42602403eb98c8230f5c7ff9.diff
LOG: [CostModel][X86] Add CostKinds handling for sqrt intrinsicc
This was achieved using the 'cost-tables vs llvm-mca' script from D103695
Some of the znver1/znver2 latency/throughput numbers were really weird (some copy+paste afaict) - I've used the numbers from the AMD SoG, which roughly match the 'worst case' range value from Agner
Added:
Modified:
llvm/lib/Target/X86/X86ScheduleZnver1.td
llvm/lib/Target/X86/X86ScheduleZnver2.td
llvm/lib/Target/X86/X86TargetTransformInfo.cpp
llvm/test/Analysis/CostModel/X86/arith-fp-latency.ll
llvm/test/Analysis/CostModel/X86/arith-fp-sizelatency.ll
llvm/test/Analysis/CostModel/X86/arith-fp.ll
llvm/test/tools/llvm-mca/X86/Znver1/resources-avx1.s
llvm/test/tools/llvm-mca/X86/Znver1/resources-sse1.s
llvm/test/tools/llvm-mca/X86/Znver1/resources-sse2.s
llvm/test/tools/llvm-mca/X86/Znver2/resources-avx1.s
llvm/test/tools/llvm-mca/X86/Znver2/resources-sse1.s
llvm/test/tools/llvm-mca/X86/Znver2/resources-sse2.s
llvm/test/tools/llvm-mca/X86/sqrt-rsqrt-rcp-memop.s
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td
index 8003bba64a811..18becc2f26414 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver1.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td
@@ -363,13 +363,13 @@ defm : ZnWriteResFpuPair<WriteFRsqrt, [ZnFPU01], 5>;
defm : ZnWriteResFpuPair<WriteFRsqrtX, [ZnFPU01], 5>;
defm : ZnWriteResFpuPair<WriteFRsqrtY, [ZnFPU01], 5, [2], 2>;
defm : X86WriteResPairUnsupported<WriteFRsqrtZ>;
-defm : ZnWriteResFpuPair<WriteFSqrt, [ZnFPU3], 20, [20]>;
-defm : ZnWriteResFpuPair<WriteFSqrtX, [ZnFPU3], 20, [20]>;
-defm : ZnWriteResFpuPair<WriteFSqrtY, [ZnFPU3], 28, [28], 1, 7, 1>;
+defm : ZnWriteResFpuPair<WriteFSqrt, [ZnFPU3], 14, [5]>;
+defm : ZnWriteResFpuPair<WriteFSqrtX, [ZnFPU3], 14, [5]>;
+defm : ZnWriteResFpuPair<WriteFSqrtY, [ZnFPU3], 14, [10], 1>;
defm : X86WriteResPairUnsupported<WriteFSqrtZ>;
-defm : ZnWriteResFpuPair<WriteFSqrt64, [ZnFPU3], 20, [20]>;
-defm : ZnWriteResFpuPair<WriteFSqrt64X, [ZnFPU3], 20, [20]>;
-defm : ZnWriteResFpuPair<WriteFSqrt64Y, [ZnFPU3], 40, [40], 1, 7, 1>;
+defm : ZnWriteResFpuPair<WriteFSqrt64, [ZnFPU3], 20, [8]>;
+defm : ZnWriteResFpuPair<WriteFSqrt64X, [ZnFPU3], 20, [8]>;
+defm : ZnWriteResFpuPair<WriteFSqrt64Y, [ZnFPU3], 20, [16], 1>;
defm : X86WriteResPairUnsupported<WriteFSqrt64Z>;
defm : ZnWriteResFpuPair<WriteFSqrt80, [ZnFPU3], 20, [20]>;
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver2.td b/llvm/lib/Target/X86/X86ScheduleZnver2.td
index ebb12a70e2d1e..f44cc3aa8d9ad 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver2.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver2.td
@@ -362,13 +362,13 @@ defm : Zn2WriteResFpuPair<WriteFRsqrt, [Zn2FPU01], 5>;
defm : Zn2WriteResFpuPair<WriteFRsqrtX, [Zn2FPU01], 5>;
defm : Zn2WriteResFpuPair<WriteFRsqrtY, [Zn2FPU01], 5>;
defm : X86WriteResPairUnsupported<WriteFRsqrtZ>;
-defm : Zn2WriteResFpuPair<WriteFSqrt, [Zn2FPU3], 20, [20]>;
-defm : Zn2WriteResFpuPair<WriteFSqrtX, [Zn2FPU3], 20, [20]>;
-defm : Zn2WriteResFpuPair<WriteFSqrtY, [Zn2FPU3], 28, [28], 1, 7, 1>;
+defm : Zn2WriteResFpuPair<WriteFSqrt, [Zn2FPU3], 14, [7]>;
+defm : Zn2WriteResFpuPair<WriteFSqrtX, [Zn2FPU3], 14, [7]>;
+defm : Zn2WriteResFpuPair<WriteFSqrtY, [Zn2FPU3], 14, [7]>;
defm : X86WriteResPairUnsupported<WriteFSqrtZ>;
-defm : Zn2WriteResFpuPair<WriteFSqrt64, [Zn2FPU3], 20, [20]>;
-defm : Zn2WriteResFpuPair<WriteFSqrt64X, [Zn2FPU3], 20, [20]>;
-defm : Zn2WriteResFpuPair<WriteFSqrt64Y, [Zn2FPU3], 20, [20], 1, 7, 1>;
+defm : Zn2WriteResFpuPair<WriteFSqrt64, [Zn2FPU3], 20, [10]>;
+defm : Zn2WriteResFpuPair<WriteFSqrt64X, [Zn2FPU3], 20, [10]>;
+defm : Zn2WriteResFpuPair<WriteFSqrt64Y, [Zn2FPU3], 20, [10]>;
defm : X86WriteResPairUnsupported<WriteFSqrt64Z>;
defm : Zn2WriteResFpuPair<WriteFSqrt80, [Zn2FPU3], 20, [20]>;
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 54bbd337c1cc7..8b2b5471edd46 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -3242,6 +3242,14 @@ X86TTIImpl::getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
{ ISD::FMAXNUM, MVT::v2f64, { 2 } },
{ ISD::FMAXNUM, MVT::v4f64, { 2 } },
{ ISD::FMAXNUM, MVT::v8f64, { 2 } },
+ { ISD::FSQRT, MVT::f32, { 3, 12, 1, 1 } }, // Skylake from http://www.agner.org/
+ { ISD::FSQRT, MVT::v4f32, { 3, 12, 1, 1 } }, // Skylake from http://www.agner.org/
+ { ISD::FSQRT, MVT::v8f32, { 6, 12, 1, 1 } }, // Skylake from http://www.agner.org/
+ { ISD::FSQRT, MVT::v16f32, { 12, 20, 1, 3 } }, // Skylake from http://www.agner.org/
+ { ISD::FSQRT, MVT::f64, { 6, 18, 1, 1 } }, // Skylake from http://www.agner.org/
+ { ISD::FSQRT, MVT::v2f64, { 6, 18, 1, 1 } }, // Skylake from http://www.agner.org/
+ { ISD::FSQRT, MVT::v4f64, { 12, 18, 1, 1 } }, // Skylake from http://www.agner.org/
+ { ISD::FSQRT, MVT::v8f64, { 24, 32, 1, 3 } }, // Skylake from http://www.agner.org/
};
static const CostKindTblEntry XOPCostTbl[] = {
{ ISD::BITREVERSE, MVT::v4i64, { 4 } },
@@ -3321,12 +3329,12 @@ X86TTIImpl::getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
{ ISD::USUBSAT, MVT::v8i32, { 2 } }, // pmaxud + psubd
{ ISD::FMAXNUM, MVT::v8f32, { 3 } }, // MAXPS + CMPUNORDPS + BLENDVPS
{ ISD::FMAXNUM, MVT::v4f64, { 3 } }, // MAXPD + CMPUNORDPD + BLENDVPD
- { ISD::FSQRT, MVT::f32, { 7 } }, // Haswell from http://www.agner.org/
- { ISD::FSQRT, MVT::v4f32, { 7 } }, // Haswell from http://www.agner.org/
- { ISD::FSQRT, MVT::v8f32, { 14 } }, // Haswell from http://www.agner.org/
- { ISD::FSQRT, MVT::f64, { 14 } }, // Haswell from http://www.agner.org/
- { ISD::FSQRT, MVT::v2f64, { 14 } }, // Haswell from http://www.agner.org/
- { ISD::FSQRT, MVT::v4f64, { 28 } }, // Haswell from http://www.agner.org/
+ { ISD::FSQRT, MVT::f32, { 7, 15, 1, 1 } }, // vsqrtss
+ { ISD::FSQRT, MVT::v4f32, { 7, 15, 1, 1 } }, // vsqrtps
+ { ISD::FSQRT, MVT::v8f32, { 14, 21, 1, 3 } }, // vsqrtps
+ { ISD::FSQRT, MVT::f64, { 14, 21, 1, 1 } }, // vsqrtsd
+ { ISD::FSQRT, MVT::v2f64, { 14, 21, 1, 1 } }, // vsqrtpd
+ { ISD::FSQRT, MVT::v4f64, { 28, 35, 1, 3 } }, // vsqrtpd
};
static const CostKindTblEntry AVX1CostTbl[] = {
{ ISD::ABS, MVT::v4i64, { 5 } }, // VBLENDVPD(X,VPSUBQ(0,X),X)
@@ -3380,30 +3388,30 @@ X86TTIImpl::getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
{ ISD::FMAXNUM, MVT::f64, { 3 } }, // MAXSD + CMPUNORDSD + BLENDVPD
{ ISD::FMAXNUM, MVT::v2f64, { 3 } }, // MAXPD + CMPUNORDPD + BLENDVPD
{ ISD::FMAXNUM, MVT::v4f64, { 5 } }, // MAXPD + CMPUNORDPD + BLENDVPD + ?
- { ISD::FSQRT, MVT::f32, { 14 } }, // SNB from http://www.agner.org/
- { ISD::FSQRT, MVT::v4f32, { 14 } }, // SNB from http://www.agner.org/
- { ISD::FSQRT, MVT::v8f32, { 28 } }, // SNB from http://www.agner.org/
- { ISD::FSQRT, MVT::f64, { 21 } }, // SNB from http://www.agner.org/
- { ISD::FSQRT, MVT::v2f64, { 21 } }, // SNB from http://www.agner.org/
- { ISD::FSQRT, MVT::v4f64, { 43 } }, // SNB from http://www.agner.org/
+ { ISD::FSQRT, MVT::f32, { 21, 21, 1, 1 } }, // vsqrtss
+ { ISD::FSQRT, MVT::v4f32, { 21, 21, 1, 1 } }, // vsqrtps
+ { ISD::FSQRT, MVT::v8f32, { 42, 42, 1, 3 } }, // vsqrtps
+ { ISD::FSQRT, MVT::f64, { 27, 27, 1, 1 } }, // vsqrtsd
+ { ISD::FSQRT, MVT::v2f64, { 27, 27, 1, 1 } }, // vsqrtpd
+ { ISD::FSQRT, MVT::v4f64, { 54, 54, 1, 3 } }, // vsqrtpd
};
static const CostKindTblEntry GLMCostTbl[] = {
- { ISD::FSQRT, MVT::f32, { 19 } }, // sqrtss
- { ISD::FSQRT, MVT::v4f32, { 37 } }, // sqrtps
- { ISD::FSQRT, MVT::f64, { 34 } }, // sqrtsd
- { ISD::FSQRT, MVT::v2f64, { 67 } }, // sqrtpd
+ { ISD::FSQRT, MVT::f32, { 19, 20, 1, 1 } }, // sqrtss
+ { ISD::FSQRT, MVT::v4f32, { 37, 41, 1, 5 } }, // sqrtps
+ { ISD::FSQRT, MVT::f64, { 34, 35, 1, 1 } }, // sqrtsd
+ { ISD::FSQRT, MVT::v2f64, { 67, 71, 1, 5 } }, // sqrtpd
};
static const CostKindTblEntry SLMCostTbl[] = {
- { ISD::FSQRT, MVT::f32, { 20 } }, // sqrtss
- { ISD::FSQRT, MVT::v4f32, { 40 } }, // sqrtps
- { ISD::FSQRT, MVT::f64, { 35 } }, // sqrtsd
- { ISD::FSQRT, MVT::v2f64, { 70 } }, // sqrtpd
+ { ISD::FSQRT, MVT::f32, { 20, 20, 1, 1 } }, // sqrtss
+ { ISD::FSQRT, MVT::v4f32, { 40, 41, 1, 5 } }, // sqrtps
+ { ISD::FSQRT, MVT::f64, { 35, 35, 1, 1 } }, // sqrtsd
+ { ISD::FSQRT, MVT::v2f64, { 70, 71, 1, 5 } }, // sqrtpd
};
static const CostKindTblEntry SSE42CostTbl[] = {
{ ISD::USUBSAT, MVT::v4i32, { 2 } }, // pmaxud + psubd
{ ISD::UADDSAT, MVT::v4i32, { 3 } }, // not + pminud + paddd
- { ISD::FSQRT, MVT::f32, { 18 } }, // Nehalem from http://www.agner.org/
- { ISD::FSQRT, MVT::v4f32, { 18 } }, // Nehalem from http://www.agner.org/
+ { ISD::FSQRT, MVT::f32, { 18, 18, 1, 1 } }, // Nehalem from http://www.agner.org/
+ { ISD::FSQRT, MVT::v4f32, { 18, 18, 1, 1 } }, // Nehalem from http://www.agner.org/
};
static const CostKindTblEntry SSE41CostTbl[] = {
{ ISD::ABS, MVT::v2i64, { 2 } }, // BLENDVPD(X,PSUBQ(0,X),X)
@@ -3480,14 +3488,14 @@ X86TTIImpl::getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
{ ISD::USUBSAT, MVT::v16i8, { 1 } },
{ ISD::FMAXNUM, MVT::f64, { 4 } },
{ ISD::FMAXNUM, MVT::v2f64, { 4 } },
- { ISD::FSQRT, MVT::f64, { 32 } }, // Nehalem from http://www.agner.org/
- { ISD::FSQRT, MVT::v2f64, { 32 } }, // Nehalem from http://www.agner.org/
+ { ISD::FSQRT, MVT::f64, { 32, 32, 1, 1 } }, // Nehalem from http://www.agner.org/
+ { ISD::FSQRT, MVT::v2f64, { 32, 32, 1, 1 } }, // Nehalem from http://www.agner.org/
};
static const CostKindTblEntry SSE1CostTbl[] = {
{ ISD::FMAXNUM, MVT::f32, { 4 } },
{ ISD::FMAXNUM, MVT::v4f32, { 4 } },
- { ISD::FSQRT, MVT::f32, { 28 } }, // Pentium III from http://www.agner.org/
- { ISD::FSQRT, MVT::v4f32, { 56 } }, // Pentium III from http://www.agner.org/
+ { ISD::FSQRT, MVT::f32, { 28, 30, 1, 2 } }, // Pentium III from http://www.agner.org/
+ { ISD::FSQRT, MVT::v4f32, { 56, 56, 1, 2 } }, // Pentium III from http://www.agner.org/
};
static const CostKindTblEntry BMI64CostTbl[] = { // 64-bit targets
{ ISD::CTTZ, MVT::i64, { 1 } },
diff --git a/llvm/test/Analysis/CostModel/X86/arith-fp-latency.ll b/llvm/test/Analysis/CostModel/X86/arith-fp-latency.ll
index b25e69b010604..a1b31df0747f5 100644
--- a/llvm/test/Analysis/CostModel/X86/arith-fp-latency.ll
+++ b/llvm/test/Analysis/CostModel/X86/arith-fp-latency.ll
@@ -639,10 +639,10 @@ define i32 @frem(i32 %arg) {
define i32 @fsqrt(i32 %arg) {
; SSE1-LABEL: 'fsqrt'
-; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
-; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
-; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
-; SSE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
+; SSE1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
+; SSE1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
+; SSE1-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
+; SSE1-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
; SSE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
@@ -650,69 +650,80 @@ define i32 @fsqrt(i32 %arg) {
; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; SSE2-LABEL: 'fsqrt'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; SSE42-LABEL: 'fsqrt'
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
-; AVX-LABEL: 'fsqrt'
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+; AVX1-LABEL: 'fsqrt'
+; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; AVX2-LABEL: 'fsqrt'
+; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; AVX512-LABEL: 'fsqrt'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; SLM-LABEL: 'fsqrt'
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
-; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
-; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
-; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
-; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
+; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
+; SLM-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
+; SLM-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
+; SLM-NEXT: Cost Model: Found an estimated cost of 164 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
+; SLM-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
+; SLM-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
+; SLM-NEXT: Cost Model: Found an estimated cost of 142 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
+; SLM-NEXT: Cost Model: Found an estimated cost of 284 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; GLM-LABEL: 'fsqrt'
-; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
-; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
-; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
-; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
-; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
-; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
-; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
-; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
+; GLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
+; GLM-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
+; GLM-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
+; GLM-NEXT: Cost Model: Found an estimated cost of 164 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
+; GLM-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
+; GLM-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
+; GLM-NEXT: Cost Model: Found an estimated cost of 142 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
+; GLM-NEXT: Cost Model: Found an estimated cost of 284 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%F32 = call float @llvm.sqrt.f32(float undef)
diff --git a/llvm/test/Analysis/CostModel/X86/arith-fp-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/arith-fp-sizelatency.ll
index 1474865890f07..140476cdeb8dc 100644
--- a/llvm/test/Analysis/CostModel/X86/arith-fp-sizelatency.ll
+++ b/llvm/test/Analysis/CostModel/X86/arith-fp-sizelatency.ll
@@ -584,8 +584,8 @@ define i32 @frem(i32 %arg) {
define i32 @fsqrt(i32 %arg) {
; SSE1-LABEL: 'fsqrt'
-; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
-; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
+; SSE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
+; SSE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
; SSE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
@@ -595,69 +595,69 @@ define i32 @fsqrt(i32 %arg) {
; SSE1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; SSE2-LABEL: 'fsqrt'
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; SSE42-LABEL: 'fsqrt'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; AVX-LABEL: 'fsqrt'
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
-; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
+; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; AVX512-LABEL: 'fsqrt'
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; SLM-LABEL: 'fsqrt'
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
-; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
-; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
+; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
+; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
+; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
-; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
-; SLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
-; SLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
+; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
+; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
+; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
; GLM-LABEL: 'fsqrt'
; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
-; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
-; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
-; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
+; GLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
+; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
+; GLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
-; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
-; GLM-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
-; GLM-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
+; GLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
+; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
+; GLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
;
%F32 = call float @llvm.sqrt.f32(float undef)
diff --git a/llvm/test/Analysis/CostModel/X86/arith-fp.ll b/llvm/test/Analysis/CostModel/X86/arith-fp.ll
index ff41e4dcb5172..b442e66745af4 100644
--- a/llvm/test/Analysis/CostModel/X86/arith-fp.ll
+++ b/llvm/test/Analysis/CostModel/X86/arith-fp.ll
@@ -749,14 +749,14 @@ define i32 @fsqrt(i32 %arg) {
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'fsqrt'
-; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'fsqrt'
@@ -771,14 +771,14 @@ define i32 @fsqrt(i32 %arg) {
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'fsqrt'
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F32 = call float @llvm.sqrt.f32(float undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %F64 = call double @llvm.sqrt.f64(double undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SLM-LABEL: 'fsqrt'
diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx1.s
index 068341e79fe79..27d18c2a7c47f 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx1.s
@@ -1658,18 +1658,18 @@ vzeroupper
# CHECK-NEXT: 1 8 0.50 * vshufps $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vshufps $1, %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 8 0.50 * vshufps $1, (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 20 20.00 vsqrtpd %xmm0, %xmm2
-# CHECK-NEXT: 1 27 20.00 * vsqrtpd (%rax), %xmm2
-# CHECK-NEXT: 1 40 40.00 vsqrtpd %ymm0, %ymm2
-# CHECK-NEXT: 2 47 40.00 * vsqrtpd (%rax), %ymm2
-# CHECK-NEXT: 1 20 20.00 vsqrtps %xmm0, %xmm2
-# CHECK-NEXT: 1 27 20.00 * vsqrtps (%rax), %xmm2
-# CHECK-NEXT: 1 28 28.00 vsqrtps %ymm0, %ymm2
-# CHECK-NEXT: 2 35 28.00 * vsqrtps (%rax), %ymm2
-# CHECK-NEXT: 1 20 20.00 vsqrtsd %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 1 27 20.00 * vsqrtsd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 20 20.00 vsqrtss %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 1 27 20.00 * vsqrtss (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 20 8.00 vsqrtpd %xmm0, %xmm2
+# CHECK-NEXT: 1 27 8.00 * vsqrtpd (%rax), %xmm2
+# CHECK-NEXT: 1 20 16.00 vsqrtpd %ymm0, %ymm2
+# CHECK-NEXT: 1 27 16.00 * vsqrtpd (%rax), %ymm2
+# CHECK-NEXT: 1 14 5.00 vsqrtps %xmm0, %xmm2
+# CHECK-NEXT: 1 21 5.00 * vsqrtps (%rax), %xmm2
+# CHECK-NEXT: 1 14 10.00 vsqrtps %ymm0, %ymm2
+# CHECK-NEXT: 1 21 10.00 * vsqrtps (%rax), %ymm2
+# CHECK-NEXT: 1 20 8.00 vsqrtsd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 27 8.00 * vsqrtsd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 14 5.00 vsqrtss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 21 5.00 * vsqrtss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 100 0.25 * U vstmxcsr (%rax)
# CHECK-NEXT: 1 3 0.50 vsubpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 10 0.50 * vsubpd (%rax), %xmm1, %xmm2
@@ -1738,7 +1738,7 @@ vzeroupper
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]
-# CHECK-NEXT: 175.00 175.00 - - - - - 153.58 191.58 218.75 513.08 -
+# CHECK-NEXT: 175.00 175.00 - - - - - 153.58 191.58 218.75 321.08 -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions:
@@ -2370,18 +2370,18 @@ vzeroupper
# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vshufps $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vshufps $1, %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vshufps $1, (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - - - - - - - - - 20.00 - vsqrtpd %xmm0, %xmm2
-# CHECK-NEXT: 0.50 0.50 - - - - - - - - 20.00 - vsqrtpd (%rax), %xmm2
-# CHECK-NEXT: - - - - - - - - - - 40.00 - vsqrtpd %ymm0, %ymm2
-# CHECK-NEXT: 0.50 0.50 - - - - - - - - 40.00 - vsqrtpd (%rax), %ymm2
-# CHECK-NEXT: - - - - - - - - - - 20.00 - vsqrtps %xmm0, %xmm2
-# CHECK-NEXT: 0.50 0.50 - - - - - - - - 20.00 - vsqrtps (%rax), %xmm2
-# CHECK-NEXT: - - - - - - - - - - 28.00 - vsqrtps %ymm0, %ymm2
-# CHECK-NEXT: 0.50 0.50 - - - - - - - - 28.00 - vsqrtps (%rax), %ymm2
-# CHECK-NEXT: - - - - - - - - - - 20.00 - vsqrtsd %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 0.50 0.50 - - - - - - - - 20.00 - vsqrtsd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - - - - - - - - - 20.00 - vsqrtss %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 0.50 0.50 - - - - - - - - 20.00 - vsqrtss (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - - - - - - - - 8.00 - vsqrtpd %xmm0, %xmm2
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - 8.00 - vsqrtpd (%rax), %xmm2
+# CHECK-NEXT: - - - - - - - - - - 16.00 - vsqrtpd %ymm0, %ymm2
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - 16.00 - vsqrtpd (%rax), %ymm2
+# CHECK-NEXT: - - - - - - - - - - 5.00 - vsqrtps %xmm0, %xmm2
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - 5.00 - vsqrtps (%rax), %xmm2
+# CHECK-NEXT: - - - - - - - - - - 10.00 - vsqrtps %ymm0, %ymm2
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - 10.00 - vsqrtps (%rax), %ymm2
+# CHECK-NEXT: - - - - - - - - - - 8.00 - vsqrtsd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - 8.00 - vsqrtsd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - - - - - - - - 5.00 - vsqrtss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - 5.00 - vsqrtss (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - - - - - - vstmxcsr (%rax)
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - vsubpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - - - 0.50 0.50 - vsubpd (%rax), %xmm1, %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse1.s b/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse1.s
index 3529aa74f4b02..bcfed6fdb077b 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse1.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse1.s
@@ -301,10 +301,10 @@ xorps (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 * * U sfence
# CHECK-NEXT: 1 1 0.50 shufps $1, %xmm0, %xmm2
# CHECK-NEXT: 1 8 0.50 * shufps $1, (%rax), %xmm2
-# CHECK-NEXT: 1 20 20.00 sqrtps %xmm0, %xmm2
-# CHECK-NEXT: 1 27 20.00 * sqrtps (%rax), %xmm2
-# CHECK-NEXT: 1 20 20.00 sqrtss %xmm0, %xmm2
-# CHECK-NEXT: 1 27 20.00 * sqrtss (%rax), %xmm2
+# CHECK-NEXT: 1 14 5.00 sqrtps %xmm0, %xmm2
+# CHECK-NEXT: 1 21 5.00 * sqrtps (%rax), %xmm2
+# CHECK-NEXT: 1 14 5.00 sqrtss %xmm0, %xmm2
+# CHECK-NEXT: 1 21 5.00 * sqrtss (%rax), %xmm2
# CHECK-NEXT: 1 100 0.25 * U stmxcsr (%rax)
# CHECK-NEXT: 1 3 0.50 subps %xmm0, %xmm2
# CHECK-NEXT: 1 10 0.50 * subps (%rax), %xmm2
@@ -335,7 +335,7 @@ xorps (%rax), %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]
-# CHECK-NEXT: 32.50 32.50 - - - - - 24.50 30.50 28.50 120.50 -
+# CHECK-NEXT: 32.50 32.50 - - - - - 24.50 30.50 28.50 60.50 -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions:
@@ -446,10 +446,10 @@ xorps (%rax), %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - sfence
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - shufps $1, %xmm0, %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - shufps $1, (%rax), %xmm2
-# CHECK-NEXT: - - - - - - - - - - 20.00 - sqrtps %xmm0, %xmm2
-# CHECK-NEXT: 0.50 0.50 - - - - - - - - 20.00 - sqrtps (%rax), %xmm2
-# CHECK-NEXT: - - - - - - - - - - 20.00 - sqrtss %xmm0, %xmm2
-# CHECK-NEXT: 0.50 0.50 - - - - - - - - 20.00 - sqrtss (%rax), %xmm2
+# CHECK-NEXT: - - - - - - - - - - 5.00 - sqrtps %xmm0, %xmm2
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - 5.00 - sqrtps (%rax), %xmm2
+# CHECK-NEXT: - - - - - - - - - - 5.00 - sqrtss %xmm0, %xmm2
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - 5.00 - sqrtss (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - - - - stmxcsr (%rax)
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - subps %xmm0, %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - - - 0.50 0.50 - subps (%rax), %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse2.s
index c893f2d615f28..188f74653a9ef 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse2.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse2.s
@@ -658,10 +658,10 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 1 8 0.50 * pxor (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 shufpd $1, %xmm0, %xmm2
# CHECK-NEXT: 1 8 0.50 * shufpd $1, (%rax), %xmm2
-# CHECK-NEXT: 1 20 20.00 sqrtpd %xmm0, %xmm2
-# CHECK-NEXT: 1 27 20.00 * sqrtpd (%rax), %xmm2
-# CHECK-NEXT: 1 20 20.00 sqrtsd %xmm0, %xmm2
-# CHECK-NEXT: 1 27 20.00 * sqrtsd (%rax), %xmm2
+# CHECK-NEXT: 1 20 8.00 sqrtpd %xmm0, %xmm2
+# CHECK-NEXT: 1 27 8.00 * sqrtpd (%rax), %xmm2
+# CHECK-NEXT: 1 20 8.00 sqrtsd %xmm0, %xmm2
+# CHECK-NEXT: 1 27 8.00 * sqrtsd (%rax), %xmm2
# CHECK-NEXT: 1 3 0.50 subpd %xmm0, %xmm2
# CHECK-NEXT: 1 10 0.50 * subpd (%rax), %xmm2
# CHECK-NEXT: 1 3 0.50 subsd %xmm0, %xmm2
@@ -691,7 +691,7 @@ xorpd (%rax), %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]
-# CHECK-NEXT: 66.50 66.50 - - - - - 54.92 46.42 85.75 171.92 -
+# CHECK-NEXT: 66.50 66.50 - - - - - 54.92 46.42 85.75 123.92 -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions:
@@ -946,10 +946,10 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - pxor (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - shufpd $1, %xmm0, %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - shufpd $1, (%rax), %xmm2
-# CHECK-NEXT: - - - - - - - - - - 20.00 - sqrtpd %xmm0, %xmm2
-# CHECK-NEXT: 0.50 0.50 - - - - - - - - 20.00 - sqrtpd (%rax), %xmm2
-# CHECK-NEXT: - - - - - - - - - - 20.00 - sqrtsd %xmm0, %xmm2
-# CHECK-NEXT: 0.50 0.50 - - - - - - - - 20.00 - sqrtsd (%rax), %xmm2
+# CHECK-NEXT: - - - - - - - - - - 8.00 - sqrtpd %xmm0, %xmm2
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - 8.00 - sqrtpd (%rax), %xmm2
+# CHECK-NEXT: - - - - - - - - - - 8.00 - sqrtsd %xmm0, %xmm2
+# CHECK-NEXT: 0.50 0.50 - - - - - - - - 8.00 - sqrtsd (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - subpd %xmm0, %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - - - 0.50 0.50 - subpd (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - subsd %xmm0, %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/Znver2/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Znver2/resources-avx1.s
index faea6244f8c11..03401ab2f1f90 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver2/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver2/resources-avx1.s
@@ -1658,18 +1658,18 @@ vzeroupper
# CHECK-NEXT: 1 8 0.50 * vshufps $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vshufps $1, %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 8 0.50 * vshufps $1, (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 20 20.00 vsqrtpd %xmm0, %xmm2
-# CHECK-NEXT: 1 27 20.00 * vsqrtpd (%rax), %xmm2
-# CHECK-NEXT: 1 20 20.00 vsqrtpd %ymm0, %ymm2
-# CHECK-NEXT: 2 27 20.00 * vsqrtpd (%rax), %ymm2
-# CHECK-NEXT: 1 20 20.00 vsqrtps %xmm0, %xmm2
-# CHECK-NEXT: 1 27 20.00 * vsqrtps (%rax), %xmm2
-# CHECK-NEXT: 1 28 28.00 vsqrtps %ymm0, %ymm2
-# CHECK-NEXT: 2 35 28.00 * vsqrtps (%rax), %ymm2
-# CHECK-NEXT: 1 20 20.00 vsqrtsd %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 1 27 20.00 * vsqrtsd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 20 20.00 vsqrtss %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 1 27 20.00 * vsqrtss (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 20 10.00 vsqrtpd %xmm0, %xmm2
+# CHECK-NEXT: 1 27 10.00 * vsqrtpd (%rax), %xmm2
+# CHECK-NEXT: 1 20 10.00 vsqrtpd %ymm0, %ymm2
+# CHECK-NEXT: 1 27 10.00 * vsqrtpd (%rax), %ymm2
+# CHECK-NEXT: 1 14 7.00 vsqrtps %xmm0, %xmm2
+# CHECK-NEXT: 1 21 7.00 * vsqrtps (%rax), %xmm2
+# CHECK-NEXT: 1 14 7.00 vsqrtps %ymm0, %ymm2
+# CHECK-NEXT: 1 21 7.00 * vsqrtps (%rax), %ymm2
+# CHECK-NEXT: 1 20 10.00 vsqrtsd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 27 10.00 * vsqrtsd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 14 7.00 vsqrtss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 1 21 7.00 * vsqrtss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 100 0.25 * U vstmxcsr (%rax)
# CHECK-NEXT: 1 3 0.50 vsubpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 10 0.50 * vsubpd (%rax), %xmm1, %xmm2
@@ -1739,7 +1739,7 @@ vzeroupper
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12]
-# CHECK-NEXT: 117.00 117.00 117.00 0.25 0.25 0.25 0.25 - 134.92 169.92 204.75 465.42 -
+# CHECK-NEXT: 117.00 117.00 117.00 0.25 0.25 0.25 0.25 - 134.92 169.92 204.75 311.42 -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions:
@@ -2371,18 +2371,18 @@ vzeroupper
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - 0.50 0.50 - - vshufps $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - vshufps $1, %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - 0.50 0.50 - - vshufps $1, (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - - - - - - - - - - 20.00 - vsqrtpd %xmm0, %xmm2
-# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - 20.00 - vsqrtpd (%rax), %xmm2
-# CHECK-NEXT: - - - - - - - - - - - 20.00 - vsqrtpd %ymm0, %ymm2
-# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - 20.00 - vsqrtpd (%rax), %ymm2
-# CHECK-NEXT: - - - - - - - - - - - 20.00 - vsqrtps %xmm0, %xmm2
-# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - 20.00 - vsqrtps (%rax), %xmm2
-# CHECK-NEXT: - - - - - - - - - - - 28.00 - vsqrtps %ymm0, %ymm2
-# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - 28.00 - vsqrtps (%rax), %ymm2
-# CHECK-NEXT: - - - - - - - - - - - 20.00 - vsqrtsd %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - 20.00 - vsqrtsd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - - - - - - - - - - 20.00 - vsqrtss %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - 20.00 - vsqrtss (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - - - - - - - - - 10.00 - vsqrtpd %xmm0, %xmm2
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - 10.00 - vsqrtpd (%rax), %xmm2
+# CHECK-NEXT: - - - - - - - - - - - 10.00 - vsqrtpd %ymm0, %ymm2
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - 10.00 - vsqrtpd (%rax), %ymm2
+# CHECK-NEXT: - - - - - - - - - - - 7.00 - vsqrtps %xmm0, %xmm2
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - 7.00 - vsqrtps (%rax), %xmm2
+# CHECK-NEXT: - - - - - - - - - - - 7.00 - vsqrtps %ymm0, %ymm2
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - 7.00 - vsqrtps (%rax), %ymm2
+# CHECK-NEXT: - - - - - - - - - - - 10.00 - vsqrtsd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - 10.00 - vsqrtsd (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - - - - - - - - - 7.00 - vsqrtss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - 7.00 - vsqrtss (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - - - - - - - vstmxcsr (%rax)
# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - vsubpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - 0.50 0.50 - vsubpd (%rax), %xmm1, %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse1.s b/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse1.s
index a559937e10984..59e9c4ecc9b8a 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse1.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse1.s
@@ -301,10 +301,10 @@ xorps (%rax), %xmm2
# CHECK-NEXT: 1 1 0.33 * * U sfence
# CHECK-NEXT: 1 1 0.50 shufps $1, %xmm0, %xmm2
# CHECK-NEXT: 1 8 0.50 * shufps $1, (%rax), %xmm2
-# CHECK-NEXT: 1 20 20.00 sqrtps %xmm0, %xmm2
-# CHECK-NEXT: 1 27 20.00 * sqrtps (%rax), %xmm2
-# CHECK-NEXT: 1 20 20.00 sqrtss %xmm0, %xmm2
-# CHECK-NEXT: 1 27 20.00 * sqrtss (%rax), %xmm2
+# CHECK-NEXT: 1 14 7.00 sqrtps %xmm0, %xmm2
+# CHECK-NEXT: 1 21 7.00 * sqrtps (%rax), %xmm2
+# CHECK-NEXT: 1 14 7.00 sqrtss %xmm0, %xmm2
+# CHECK-NEXT: 1 21 7.00 * sqrtss (%rax), %xmm2
# CHECK-NEXT: 1 100 0.25 * U stmxcsr (%rax)
# CHECK-NEXT: 1 3 0.50 subps %xmm0, %xmm2
# CHECK-NEXT: 1 10 0.50 * subps (%rax), %xmm2
@@ -336,7 +336,7 @@ xorps (%rax), %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12]
-# CHECK-NEXT: 21.67 21.67 21.67 - - - - - 24.50 30.50 28.50 128.50 -
+# CHECK-NEXT: 21.67 21.67 21.67 - - - - - 24.50 30.50 28.50 76.50 -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions:
@@ -447,10 +447,10 @@ xorps (%rax), %xmm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - sfence
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - shufps $1, %xmm0, %xmm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - 0.50 0.50 - - shufps $1, (%rax), %xmm2
-# CHECK-NEXT: - - - - - - - - - - - 20.00 - sqrtps %xmm0, %xmm2
-# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - 20.00 - sqrtps (%rax), %xmm2
-# CHECK-NEXT: - - - - - - - - - - - 20.00 - sqrtss %xmm0, %xmm2
-# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - 20.00 - sqrtss (%rax), %xmm2
+# CHECK-NEXT: - - - - - - - - - - - 7.00 - sqrtps %xmm0, %xmm2
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - 7.00 - sqrtps (%rax), %xmm2
+# CHECK-NEXT: - - - - - - - - - - - 7.00 - sqrtss %xmm0, %xmm2
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - 7.00 - sqrtss (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - - - - - stmxcsr (%rax)
# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - subps %xmm0, %xmm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - 0.50 0.50 - subps (%rax), %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse2.s
index 927684a946a08..c72bad9332025 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse2.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse2.s
@@ -658,10 +658,10 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 1 8 0.33 * pxor (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 shufpd $1, %xmm0, %xmm2
# CHECK-NEXT: 1 8 0.50 * shufpd $1, (%rax), %xmm2
-# CHECK-NEXT: 1 20 20.00 sqrtpd %xmm0, %xmm2
-# CHECK-NEXT: 1 27 20.00 * sqrtpd (%rax), %xmm2
-# CHECK-NEXT: 1 20 20.00 sqrtsd %xmm0, %xmm2
-# CHECK-NEXT: 1 27 20.00 * sqrtsd (%rax), %xmm2
+# CHECK-NEXT: 1 20 10.00 sqrtpd %xmm0, %xmm2
+# CHECK-NEXT: 1 27 10.00 * sqrtpd (%rax), %xmm2
+# CHECK-NEXT: 1 20 10.00 sqrtsd %xmm0, %xmm2
+# CHECK-NEXT: 1 27 10.00 * sqrtsd (%rax), %xmm2
# CHECK-NEXT: 1 3 0.50 subpd %xmm0, %xmm2
# CHECK-NEXT: 1 10 0.50 * subpd (%rax), %xmm2
# CHECK-NEXT: 1 3 0.50 subsd %xmm0, %xmm2
@@ -692,7 +692,7 @@ xorpd (%rax), %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12]
-# CHECK-NEXT: 44.33 44.33 44.33 - - - - - 53.92 47.92 87.25 174.92 -
+# CHECK-NEXT: 44.33 44.33 44.33 - - - - - 53.92 47.92 87.25 134.92 -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions:
@@ -947,10 +947,10 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - 0.25 0.25 0.25 0.25 - pxor (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - shufpd $1, %xmm0, %xmm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - 0.50 0.50 - - shufpd $1, (%rax), %xmm2
-# CHECK-NEXT: - - - - - - - - - - - 20.00 - sqrtpd %xmm0, %xmm2
-# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - 20.00 - sqrtpd (%rax), %xmm2
-# CHECK-NEXT: - - - - - - - - - - - 20.00 - sqrtsd %xmm0, %xmm2
-# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - 20.00 - sqrtsd (%rax), %xmm2
+# CHECK-NEXT: - - - - - - - - - - - 10.00 - sqrtpd %xmm0, %xmm2
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - 10.00 - sqrtpd (%rax), %xmm2
+# CHECK-NEXT: - - - - - - - - - - - 10.00 - sqrtsd %xmm0, %xmm2
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - 10.00 - sqrtsd (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - subpd %xmm0, %xmm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - 0.50 0.50 - subpd (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - subsd %xmm0, %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/sqrt-rsqrt-rcp-memop.s b/llvm/test/tools/llvm-mca/X86/sqrt-rsqrt-rcp-memop.s
index 81ab290d0044e..abf01cb0b10f9 100644
--- a/llvm/test/tools/llvm-mca/X86/sqrt-rsqrt-rcp-memop.s
+++ b/llvm/test/tools/llvm-mca/X86/sqrt-rsqrt-rcp-memop.s
@@ -51,11 +51,11 @@ rcpss (%rax), %xmm1
# SKYLAKE-NEXT: 0123456789
# SKYLAKE-NEXT: Index 0123456789 0
-# ZNVER1-NEXT: 0123456789 0
-# ZNVER1-NEXT: Index 0123456789 0123456789
+# ZNVER1-NEXT: 0123456789
+# ZNVER1-NEXT: Index 0123456789 01234
-# ZNVER2-NEXT: 0123456789 0
-# ZNVER2-NEXT: Index 0123456789 0123456789
+# ZNVER2-NEXT: 0123456789
+# ZNVER2-NEXT: Index 0123456789 01234
# BARCELONA: [0,0] DeER . . . . . leaq 8(%rsp,%rdi,2), %rax
# BARCELONA-NEXT: [0,1] D=eeeeeeeeeeeeeeeeeeeeER sqrtss (%rax), %xmm1
@@ -75,11 +75,11 @@ rcpss (%rax), %xmm1
# SKYLAKE: [0,0] DeER . . . . leaq 8(%rsp,%rdi,2), %rax
# SKYLAKE-NEXT: [0,1] D=eeeeeeeeeeeeeeeeeER sqrtss (%rax), %xmm1
-# ZNVER1: [0,0] DeER . . . . . . leaq 8(%rsp,%rdi,2), %rax
-# ZNVER1-NEXT: [0,1] D=eeeeeeeeeeeeeeeeeeeeeeeeeeeER sqrtss (%rax), %xmm1
+# ZNVER1: [0,0] DeER . . . . . leaq 8(%rsp,%rdi,2), %rax
+# ZNVER1-NEXT: [0,1] D=eeeeeeeeeeeeeeeeeeeeeER sqrtss (%rax), %xmm1
-# ZNVER2: [0,0] DeER . . . . . . leaq 8(%rsp,%rdi,2), %rax
-# ZNVER2-NEXT: [0,1] D=eeeeeeeeeeeeeeeeeeeeeeeeeeeER sqrtss (%rax), %xmm1
+# ZNVER2: [0,0] DeER . . . . . leaq 8(%rsp,%rdi,2), %rax
+# ZNVER2-NEXT: [0,1] D=eeeeeeeeeeeeeeeeeeeeeER sqrtss (%rax), %xmm1
# ALL: Average Wait times (based on the timeline view):
# ALL-NEXT: [0]: Executions
More information about the llvm-commits
mailing list