[llvm] f8d4da7 - [X86] Fix reciprocal instruction throughput/uops counts
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 1 12:26:17 PDT 2022
Author: Simon Pilgrim
Date: 2022-09-01T20:25:52+01:00
New Revision: f8d4da76307ed15799f352e91d62df27c4602320
URL: https://github.com/llvm/llvm-project/commit/f8d4da76307ed15799f352e91d62df27c4602320
DIFF: https://github.com/llvm/llvm-project/commit/f8d4da76307ed15799f352e91d62df27c4602320.diff
LOG: [X86] Fix reciprocal instruction throughput/uops counts
Matches numbers from AMD SoG + Agner - should always be on FPU Pipes 0+1, no additional uops for folded instructions and znver1 double pumps 256-bit vectors
Noticed while adding CostKinds support to the x86 cost models
Added:
Modified:
llvm/lib/Target/X86/X86ScheduleZnver1.td
llvm/lib/Target/X86/X86ScheduleZnver2.td
llvm/test/tools/llvm-mca/X86/Znver1/resources-avx1.s
llvm/test/tools/llvm-mca/X86/Znver1/resources-sse1.s
llvm/test/tools/llvm-mca/X86/Znver2/resources-avx1.s
llvm/test/tools/llvm-mca/X86/Znver2/resources-sse1.s
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td
index 85f85fbda39e4..fb82ec7aec896 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver1.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td
@@ -357,11 +357,11 @@ defm : ZnWriteResFpuPair<WriteFMAY, [ZnFPU03], 5>;
defm : X86WriteResPairUnsupported<WriteFMAZ>;
defm : ZnWriteResFpuPair<WriteFRcp, [ZnFPU01], 5>;
defm : ZnWriteResFpuPair<WriteFRcpX, [ZnFPU01], 5>;
-defm : ZnWriteResFpuPair<WriteFRcpY, [ZnFPU01], 5, [1], 1, 7, 2>;
+defm : ZnWriteResFpuPair<WriteFRcpY, [ZnFPU01], 5, [2], 2>;
defm : X86WriteResPairUnsupported<WriteFRcpZ>;
-//defm : ZnWriteResFpuPair<WriteFRsqrt, [ZnFPU02], 5>;
-defm : ZnWriteResFpuPair<WriteFRsqrtX, [ZnFPU01], 5, [1], 1, 7, 1>;
-//defm : ZnWriteResFpuPair<WriteFRsqrtY, [ZnFPU01], 5, [2], 2>;
+defm : ZnWriteResFpuPair<WriteFRsqrt, [ZnFPU01], 5>;
+defm : ZnWriteResFpuPair<WriteFRsqrtX, [ZnFPU01], 5>;
+defm : ZnWriteResFpuPair<WriteFRsqrtY, [ZnFPU01], 5, [2], 2>;
defm : X86WriteResPairUnsupported<WriteFRsqrtZ>;
defm : ZnWriteResFpuPair<WriteFSqrt, [ZnFPU3], 20, [20]>;
defm : ZnWriteResFpuPair<WriteFSqrtX, [ZnFPU3], 20, [20]>;
@@ -1480,39 +1480,6 @@ def : SchedAlias<WriteDPPD, ZnWriteMicrocoded>;
// x,m,i.
def : SchedAlias<WriteDPPDLd, ZnWriteMicrocoded>;
-// RSQRTSS
-// TODO - convert to ZnWriteResFpuPair
-// x,x.
-def ZnWriteRSQRTSSr : SchedWriteRes<[ZnFPU02]> {
- let Latency = 5;
-}
-def : SchedAlias<WriteFRsqrt, ZnWriteRSQRTSSr>;
-
-// x,m128.
-def ZnWriteRSQRTSSLd: SchedWriteRes<[ZnAGU, ZnFPU02]> {
- let Latency = 12;
- let NumMicroOps = 2;
- let ResourceCycles = [1,2]; // FIXME: Is this right?
-}
-def : SchedAlias<WriteFRsqrtLd, ZnWriteRSQRTSSLd>;
-
-// RSQRTPS
-// TODO - convert to ZnWriteResFpuPair
-// y,y.
-def ZnWriteRSQRTPSYr : SchedWriteRes<[ZnFPU01]> {
- let Latency = 5;
- let NumMicroOps = 2;
- let ResourceCycles = [2];
-}
-def : SchedAlias<WriteFRsqrtY, ZnWriteRSQRTPSYr>;
-
-// y,m256.
-def ZnWriteRSQRTPSYLd : SchedWriteRes<[ZnAGU, ZnFPU01]> {
- let Latency = 12;
- let NumMicroOps = 2;
-}
-def : SchedAlias<WriteFRsqrtYLd, ZnWriteRSQRTPSYLd>;
-
//-- Other instructions --//
// VZEROUPPER.
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver2.td b/llvm/lib/Target/X86/X86ScheduleZnver2.td
index c47d235eab9b2..b6daa4fecea80 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver2.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver2.td
@@ -354,9 +354,11 @@ defm : Zn2WriteResFpuPair<WriteFMAY, [Zn2FPU03], 5>;
defm : X86WriteResPairUnsupported<WriteFMAZ>;
defm : Zn2WriteResFpuPair<WriteFRcp, [Zn2FPU01], 5>;
defm : Zn2WriteResFpuPair<WriteFRcpX, [Zn2FPU01], 5>;
-defm : Zn2WriteResFpuPair<WriteFRcpY, [Zn2FPU01], 5, [1], 1, 7, 2>;
+defm : Zn2WriteResFpuPair<WriteFRcpY, [Zn2FPU01], 5>;
defm : X86WriteResPairUnsupported<WriteFRcpZ>;
-defm : Zn2WriteResFpuPair<WriteFRsqrtX, [Zn2FPU01], 5, [1], 1, 7, 1>;
+defm : Zn2WriteResFpuPair<WriteFRsqrt, [Zn2FPU01], 5>;
+defm : Zn2WriteResFpuPair<WriteFRsqrtX, [Zn2FPU01], 5>;
+defm : Zn2WriteResFpuPair<WriteFRsqrtY, [Zn2FPU01], 5>;
defm : X86WriteResPairUnsupported<WriteFRsqrtZ>;
defm : Zn2WriteResFpuPair<WriteFSqrt, [Zn2FPU3], 20, [20]>;
defm : Zn2WriteResFpuPair<WriteFSqrtX, [Zn2FPU3], 20, [20]>;
@@ -1491,39 +1493,6 @@ def : SchedAlias<WriteDPPD, Zn2WriteMicrocoded>;
// x,m,i.
def : SchedAlias<WriteDPPDLd, Zn2WriteMicrocoded>;
-// RSQRTSS
-// TODO - convert to Zn2WriteResFpuPair
-// x,x.
-def Zn2WriteRSQRTSSr : SchedWriteRes<[Zn2FPU02]> {
- let Latency = 5;
-}
-def : SchedAlias<WriteFRsqrt, Zn2WriteRSQRTSSr>;
-
-// x,m128.
-def Zn2WriteRSQRTSSLd: SchedWriteRes<[Zn2AGU, Zn2FPU02]> {
- let Latency = 12;
- let NumMicroOps = 2;
- let ResourceCycles = [1,2];
-}
-def : SchedAlias<WriteFRsqrtLd, Zn2WriteRSQRTSSLd>;
-
-// RSQRTPS
-// TODO - convert to Zn2WriteResFpuPair
-// y,y.
-def Zn2WriteRSQRTPSYr : SchedWriteRes<[Zn2FPU01]> {
- let Latency = 5;
- let NumMicroOps = 2;
- let ResourceCycles = [2];
-}
-def : SchedAlias<WriteFRsqrtY, Zn2WriteRSQRTPSYr>;
-
-// y,m256.
-def Zn2WriteRSQRTPSYLd : SchedWriteRes<[Zn2AGU, Zn2FPU01]> {
- let Latency = 12;
- let NumMicroOps = 2;
-}
-def : SchedAlias<WriteFRsqrtYLd, Zn2WriteRSQRTPSYLd>;
-
//-- Other instructions --//
// VZEROUPPER.
diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx1.s
index 0bd5eac8bd49c..ace7b60d94955 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx1.s
@@ -1628,8 +1628,8 @@ vzeroupper
# CHECK-NEXT: 1 8 0.50 * vpxor (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 0.50 vrcpps %xmm0, %xmm2
# CHECK-NEXT: 1 12 0.50 * vrcpps (%rax), %xmm2
-# CHECK-NEXT: 1 5 0.50 vrcpps %ymm0, %ymm2
-# CHECK-NEXT: 3 12 0.50 * vrcpps (%rax), %ymm2
+# CHECK-NEXT: 2 5 1.00 vrcpps %ymm0, %ymm2
+# CHECK-NEXT: 2 12 1.00 * vrcpps (%rax), %ymm2
# CHECK-NEXT: 1 5 0.50 vrcpss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 12 0.50 * vrcpss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 4 1.00 vroundpd $1, %xmm0, %xmm2
@@ -1645,11 +1645,11 @@ vzeroupper
# CHECK-NEXT: 1 4 1.00 vroundss $1, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 11 1.00 * vroundss $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 0.50 vrsqrtps %xmm0, %xmm2
-# CHECK-NEXT: 2 12 0.50 * vrsqrtps (%rax), %xmm2
+# CHECK-NEXT: 1 12 0.50 * vrsqrtps (%rax), %xmm2
# CHECK-NEXT: 2 5 1.00 vrsqrtps %ymm0, %ymm2
-# CHECK-NEXT: 2 12 0.50 * vrsqrtps (%rax), %ymm2
+# CHECK-NEXT: 2 12 1.00 * vrsqrtps (%rax), %ymm2
# CHECK-NEXT: 1 5 0.50 vrsqrtss %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 2 12 1.00 * vrsqrtss (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 12 0.50 * vrsqrtss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vshufpd $1, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 8 0.50 * vshufpd $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vshufpd $1, %ymm0, %ymm1, %ymm2
@@ -1738,7 +1738,7 @@ vzeroupper
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]
-# CHECK-NEXT: 175.00 175.00 - - - - - 148.58 187.08 220.25 527.08 -
+# CHECK-NEXT: 175.00 175.00 - - - - - 149.58 189.58 218.75 527.08 -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions:
@@ -2340,8 +2340,8 @@ vzeroupper
# CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - vpxor (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - vrcpps %xmm0, %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - 0.50 0.50 - - - vrcpps (%rax), %xmm2
-# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - vrcpps %ymm0, %ymm2
-# CHECK-NEXT: 0.50 0.50 - - - - - 0.50 0.50 - - - vrcpps (%rax), %ymm2
+# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - vrcpps %ymm0, %ymm2
+# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 1.00 - - - vrcpps (%rax), %ymm2
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - vrcpss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - 0.50 0.50 - - - vrcpss (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - - - - 1.00 - vroundpd $1, %xmm0, %xmm2
@@ -2359,9 +2359,9 @@ vzeroupper
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - vrsqrtps %xmm0, %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - 0.50 0.50 - - - vrsqrtps (%rax), %xmm2
# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - vrsqrtps %ymm0, %ymm2
-# CHECK-NEXT: 0.50 0.50 - - - - - 0.50 0.50 - - - vrsqrtps (%rax), %ymm2
-# CHECK-NEXT: - - - - - - - 0.50 - 0.50 - - vrsqrtss %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - 1.00 - - vrsqrtss (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 1.00 - - - vrsqrtps (%rax), %ymm2
+# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - vrsqrtss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 0.50 0.50 - - - - - 0.50 0.50 - - - vrsqrtss (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vshufpd $1, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vshufpd $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vshufpd $1, %ymm0, %ymm1, %ymm2
diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse1.s b/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse1.s
index ea4bafe3332d8..8bb62fe40aaca 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse1.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse1.s
@@ -295,9 +295,9 @@ xorps (%rax), %xmm2
# CHECK-NEXT: 1 5 0.50 rcpss %xmm0, %xmm2
# CHECK-NEXT: 1 12 0.50 * rcpss (%rax), %xmm2
# CHECK-NEXT: 1 5 0.50 rsqrtps %xmm0, %xmm2
-# CHECK-NEXT: 2 12 0.50 * rsqrtps (%rax), %xmm2
+# CHECK-NEXT: 1 12 0.50 * rsqrtps (%rax), %xmm2
# CHECK-NEXT: 1 5 0.50 rsqrtss %xmm0, %xmm2
-# CHECK-NEXT: 2 12 1.00 * rsqrtss (%rax), %xmm2
+# CHECK-NEXT: 1 12 0.50 * rsqrtss (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 * * U sfence
# CHECK-NEXT: 1 1 0.50 shufps $1, %xmm0, %xmm2
# CHECK-NEXT: 1 8 0.50 * shufps $1, (%rax), %xmm2
@@ -335,7 +335,7 @@ xorps (%rax), %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]
-# CHECK-NEXT: 32.50 32.50 - - - - - 25.00 29.50 30.00 112.50 -
+# CHECK-NEXT: 32.50 32.50 - - - - - 24.50 30.50 28.50 112.50 -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions:
@@ -441,8 +441,8 @@ xorps (%rax), %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - 0.50 0.50 - - - rcpss (%rax), %xmm2
# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - rsqrtps %xmm0, %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - 0.50 0.50 - - - rsqrtps (%rax), %xmm2
-# CHECK-NEXT: - - - - - - - 0.50 - 0.50 - - rsqrtss %xmm0, %xmm2
-# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - 1.00 - - rsqrtss (%rax), %xmm2
+# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - rsqrtss %xmm0, %xmm2
+# CHECK-NEXT: 0.50 0.50 - - - - - 0.50 0.50 - - - rsqrtss (%rax), %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - sfence
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - shufps $1, %xmm0, %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - shufps $1, (%rax), %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/Znver2/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Znver2/resources-avx1.s
index 4fd1cfa64f82b..4a5a785475eab 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver2/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver2/resources-avx1.s
@@ -1629,7 +1629,7 @@ vzeroupper
# CHECK-NEXT: 1 5 0.50 vrcpps %xmm0, %xmm2
# CHECK-NEXT: 1 12 0.50 * vrcpps (%rax), %xmm2
# CHECK-NEXT: 1 5 0.50 vrcpps %ymm0, %ymm2
-# CHECK-NEXT: 3 12 0.50 * vrcpps (%rax), %ymm2
+# CHECK-NEXT: 1 12 0.50 * vrcpps (%rax), %ymm2
# CHECK-NEXT: 1 5 0.50 vrcpss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 12 0.50 * vrcpss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 3 1.00 vroundpd $1, %xmm0, %xmm2
@@ -1645,11 +1645,11 @@ vzeroupper
# CHECK-NEXT: 1 3 1.00 vroundss $1, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 10 1.00 * vroundss $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 0.50 vrsqrtps %xmm0, %xmm2
-# CHECK-NEXT: 2 12 0.50 * vrsqrtps (%rax), %xmm2
-# CHECK-NEXT: 2 5 1.00 vrsqrtps %ymm0, %ymm2
-# CHECK-NEXT: 2 12 0.50 * vrsqrtps (%rax), %ymm2
+# CHECK-NEXT: 1 12 0.50 * vrsqrtps (%rax), %xmm2
+# CHECK-NEXT: 1 5 0.50 vrsqrtps %ymm0, %ymm2
+# CHECK-NEXT: 1 12 0.50 * vrsqrtps (%rax), %ymm2
# CHECK-NEXT: 1 5 0.50 vrsqrtss %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 2 12 1.00 * vrsqrtss (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 1 12 0.50 * vrsqrtss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vshufpd $1, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 8 0.50 * vshufpd $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vshufpd $1, %ymm0, %ymm1, %ymm2
@@ -1739,7 +1739,7 @@ vzeroupper
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12]
-# CHECK-NEXT: 117.00 117.00 117.00 0.25 0.25 0.25 0.25 - 133.92 169.42 206.25 467.42 -
+# CHECK-NEXT: 117.00 117.00 117.00 0.25 0.25 0.25 0.25 - 132.92 169.92 204.75 467.42 -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions:
@@ -2359,10 +2359,10 @@ vzeroupper
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - 1.00 - vroundss $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - vrsqrtps %xmm0, %xmm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - 0.50 0.50 - - - vrsqrtps (%rax), %xmm2
-# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - vrsqrtps %ymm0, %ymm2
+# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - vrsqrtps %ymm0, %ymm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - 0.50 0.50 - - - vrsqrtps (%rax), %ymm2
-# CHECK-NEXT: - - - - - - - - 0.50 - 0.50 - - vrsqrtss %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 0.33 0.33 0.33 - - - - - 1.00 - 1.00 - - vrsqrtss (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - vrsqrtss %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - 0.50 0.50 - - - vrsqrtss (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - vshufpd $1, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - 0.50 0.50 - - vshufpd $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - vshufpd $1, %ymm0, %ymm1, %ymm2
diff --git a/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse1.s b/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse1.s
index 34d2df5ccca36..83c5da7c2c27b 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse1.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse1.s
@@ -295,9 +295,9 @@ xorps (%rax), %xmm2
# CHECK-NEXT: 1 5 0.50 rcpss %xmm0, %xmm2
# CHECK-NEXT: 1 12 0.50 * rcpss (%rax), %xmm2
# CHECK-NEXT: 1 5 0.50 rsqrtps %xmm0, %xmm2
-# CHECK-NEXT: 2 12 0.50 * rsqrtps (%rax), %xmm2
+# CHECK-NEXT: 1 12 0.50 * rsqrtps (%rax), %xmm2
# CHECK-NEXT: 1 5 0.50 rsqrtss %xmm0, %xmm2
-# CHECK-NEXT: 2 12 1.00 * rsqrtss (%rax), %xmm2
+# CHECK-NEXT: 1 12 0.50 * rsqrtss (%rax), %xmm2
# CHECK-NEXT: 1 1 0.33 * * U sfence
# CHECK-NEXT: 1 1 0.50 shufps $1, %xmm0, %xmm2
# CHECK-NEXT: 1 8 0.50 * shufps $1, (%rax), %xmm2
@@ -336,7 +336,7 @@ xorps (%rax), %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12]
-# CHECK-NEXT: 21.67 21.67 21.67 - - - - - 25.00 29.50 30.00 112.50 -
+# CHECK-NEXT: 21.67 21.67 21.67 - - - - - 24.50 30.50 28.50 112.50 -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions:
@@ -442,8 +442,8 @@ xorps (%rax), %xmm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - 0.50 0.50 - - - rcpss (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - rsqrtps %xmm0, %xmm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - 0.50 0.50 - - - rsqrtps (%rax), %xmm2
-# CHECK-NEXT: - - - - - - - - 0.50 - 0.50 - - rsqrtss %xmm0, %xmm2
-# CHECK-NEXT: 0.33 0.33 0.33 - - - - - 1.00 - 1.00 - - rsqrtss (%rax), %xmm2
+# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - rsqrtss %xmm0, %xmm2
+# CHECK-NEXT: 0.33 0.33 0.33 - - - - - 0.50 0.50 - - - rsqrtss (%rax), %xmm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - sfence
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - shufps $1, %xmm0, %xmm2
# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - 0.50 0.50 - - shufps $1, (%rax), %xmm2
More information about the llvm-commits
mailing list