[llvm] d384a4c - [X86] Adjust vector test costs to match SoG (Issue #54889)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue May 31 01:14:33 PDT 2022
Author: Simon Pilgrim
Date: 2022-05-31T09:14:06+01:00
New Revision: d384a4c530623c73048da040210d44fea1167321
URL: https://github.com/llvm/llvm-project/commit/d384a4c530623c73048da040210d44fea1167321
DIFF: https://github.com/llvm/llvm-project/commit/d384a4c530623c73048da040210d44fea1167321.diff
LOG: [X86] Adjust vector test costs to match SoG (Issue #54889)
znver1/2 models were incorrectly modelling the latency/throughput/uops and znver1 ymm variants also require double pumping.
Now matches what I can decipher from the AMD SoG, Agner and instlatx64 numbers vs the llvm-exegesis report provided by @fabian-r
Added:
Modified:
llvm/lib/Target/X86/X86ScheduleZnver1.td
llvm/lib/Target/X86/X86ScheduleZnver2.td
llvm/test/tools/llvm-mca/X86/Znver1/resources-avx1.s
llvm/test/tools/llvm-mca/X86/Znver1/resources-sse41.s
llvm/test/tools/llvm-mca/X86/Znver2/resources-avx1.s
llvm/test/tools/llvm-mca/X86/Znver2/resources-sse41.s
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td
index bfa5a1447f6a7..aada3e0bd9063 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver1.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td
@@ -334,8 +334,8 @@ defm : X86WriteResPairUnsupported<WriteFRndZ>;
defm : ZnWriteResFpuPair<WriteFLogic, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteFLogicY, [ZnFPU], 1>;
defm : X86WriteResPairUnsupported<WriteFLogicZ>;
-defm : ZnWriteResFpuPair<WriteFTest, [ZnFPU12], 1, [2], 1, 7, 1>;
-defm : ZnWriteResFpuPair<WriteFTestY, [ZnFPU12], 1, [2], 1, 7, 1>;
+defm : ZnWriteResFpuPair<WriteFTest, [ZnFPU12], 2, [2], 1, 7, 1>;
+defm : ZnWriteResFpuPair<WriteFTestY, [ZnFPU12], 4, [4], 3, 7, 2>;
defm : X86WriteResPairUnsupported<WriteFTestZ>;
defm : ZnWriteResFpuPair<WriteFShuffle, [ZnFPU12], 1>;
defm : ZnWriteResFpuPair<WriteFShuffleY, [ZnFPU12], 1>;
@@ -413,8 +413,8 @@ defm : ZnWriteResFpuPair<WriteVecLogic, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteVecLogicX, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteVecLogicY, [ZnFPU], 1>;
defm : X86WriteResPairUnsupported<WriteVecLogicZ>;
-defm : ZnWriteResFpuPair<WriteVecTest, [ZnFPU12], 1, [2], 1, 7, 1>;
-defm : ZnWriteResFpuPair<WriteVecTestY, [ZnFPU12], 1, [2], 1, 7, 1>;
+defm : ZnWriteResFpuPair<WriteVecTest, [ZnFPU12], 2, [2], 1, 7, 1>;
+defm : ZnWriteResFpuPair<WriteVecTestY, [ZnFPU12], 4, [4], 3, 7, 2>;
defm : X86WriteResPairUnsupported<WriteVecTestZ>;
defm : ZnWriteResFpuPair<WriteVecALU, [ZnFPU], 1>;
defm : ZnWriteResFpuPair<WriteVecALUX, [ZnFPU], 1>;
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver2.td b/llvm/lib/Target/X86/X86ScheduleZnver2.td
index 7a98b943191d4..c47d235eab9b2 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver2.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver2.td
@@ -331,8 +331,8 @@ defm : X86WriteResPairUnsupported<WriteFRndZ>;
defm : Zn2WriteResFpuPair<WriteFLogic, [Zn2FPU], 1>;
defm : Zn2WriteResFpuPair<WriteFLogicY, [Zn2FPU], 1>;
defm : X86WriteResPairUnsupported<WriteFLogicZ>;
-defm : Zn2WriteResFpuPair<WriteFTest, [Zn2FPU12], 1, [2], 1, 7, 1>;
-defm : Zn2WriteResFpuPair<WriteFTestY, [Zn2FPU12], 1, [2], 1, 7, 1>;
+defm : Zn2WriteResFpuPair<WriteFTest, [Zn2FPU12], 3, [2], 1, 7, 1>;
+defm : Zn2WriteResFpuPair<WriteFTestY, [Zn2FPU12], 3, [2], 1, 7, 1>;
defm : X86WriteResPairUnsupported<WriteFTestZ>;
defm : Zn2WriteResFpuPair<WriteFShuffle, [Zn2FPU12], 1>;
defm : Zn2WriteResFpuPair<WriteFShuffleY, [Zn2FPU12], 1>;
@@ -408,8 +408,8 @@ defm : Zn2WriteResFpuPair<WriteVecLogic, [Zn2FPU], 1>;
defm : Zn2WriteResFpuPair<WriteVecLogicX, [Zn2FPU], 1>;
defm : Zn2WriteResFpuPair<WriteVecLogicY, [Zn2FPU], 1>;
defm : X86WriteResPairUnsupported<WriteVecLogicZ>;
-defm : Zn2WriteResFpuPair<WriteVecTest, [Zn2FPU12], 1, [2], 1, 7, 1>;
-defm : Zn2WriteResFpuPair<WriteVecTestY, [Zn2FPU12], 1, [2], 1, 7, 1>;
+defm : Zn2WriteResFpuPair<WriteVecTest, [Zn2FPU12], 3, [2], 1, 7, 1>;
+defm : Zn2WriteResFpuPair<WriteVecTestY, [Zn2FPU12], 3, [2], 1, 7, 1>;
defm : X86WriteResPairUnsupported<WriteVecTestZ>;
defm : Zn2WriteResFpuPair<WriteVecALU, [Zn2FPU], 1>;
defm : Zn2WriteResFpuPair<WriteVecALUX, [Zn2FPU], 1>;
diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx1.s
index 905fbe09fae13..c6315d540106a 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx1.s
@@ -1604,10 +1604,10 @@ vzeroupper
# CHECK-NEXT: 1 8 0.50 * vpsubusw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.25 vpsubw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 8 0.50 * vpsubw (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 1 1.00 vptest %xmm0, %xmm1
-# CHECK-NEXT: 2 8 1.00 * vptest (%rax), %xmm1
-# CHECK-NEXT: 1 1 1.00 vptest %ymm0, %ymm1
-# CHECK-NEXT: 2 8 1.00 * vptest (%rax), %ymm1
+# CHECK-NEXT: 1 2 1.00 vptest %xmm0, %xmm1
+# CHECK-NEXT: 2 9 1.00 * vptest (%rax), %xmm1
+# CHECK-NEXT: 3 4 2.00 vptest %ymm0, %ymm1
+# CHECK-NEXT: 5 11 2.00 * vptest (%rax), %ymm1
# CHECK-NEXT: 1 1 0.25 vpunpckhbw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 8 0.50 * vpunpckhbw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.25 vpunpckhdq %xmm0, %xmm1, %xmm2
@@ -1683,14 +1683,14 @@ vzeroupper
# CHECK-NEXT: 1 10 0.50 * vsubsd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 3 0.50 vsubss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 10 0.50 * vsubss (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 1 1.00 vtestpd %xmm0, %xmm1
-# CHECK-NEXT: 2 8 1.00 * vtestpd (%rax), %xmm1
-# CHECK-NEXT: 1 1 1.00 vtestpd %ymm0, %ymm1
-# CHECK-NEXT: 2 8 1.00 * vtestpd (%rax), %ymm1
-# CHECK-NEXT: 1 1 1.00 vtestps %xmm0, %xmm1
-# CHECK-NEXT: 2 8 1.00 * vtestps (%rax), %xmm1
-# CHECK-NEXT: 1 1 1.00 vtestps %ymm0, %ymm1
-# CHECK-NEXT: 2 8 1.00 * vtestps (%rax), %ymm1
+# CHECK-NEXT: 1 2 1.00 vtestpd %xmm0, %xmm1
+# CHECK-NEXT: 2 9 1.00 * vtestpd (%rax), %xmm1
+# CHECK-NEXT: 3 4 2.00 vtestpd %ymm0, %ymm1
+# CHECK-NEXT: 5 11 2.00 * vtestpd (%rax), %ymm1
+# CHECK-NEXT: 1 2 1.00 vtestps %xmm0, %xmm1
+# CHECK-NEXT: 2 9 1.00 * vtestps (%rax), %xmm1
+# CHECK-NEXT: 3 4 2.00 vtestps %ymm0, %ymm1
+# CHECK-NEXT: 5 11 2.00 * vtestps (%rax), %ymm1
# CHECK-NEXT: 2 3 1.00 vucomisd %xmm0, %xmm1
# CHECK-NEXT: 2 10 1.00 * vucomisd (%rax), %xmm1
# CHECK-NEXT: 2 3 1.00 vucomiss %xmm0, %xmm1
@@ -1738,7 +1738,7 @@ vzeroupper
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]
-# CHECK-NEXT: 175.00 175.00 - - - - - 142.58 175.08 210.25 523.08 -
+# CHECK-NEXT: 175.00 175.00 - - - - - 142.58 181.08 216.25 523.08 -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions:
@@ -2318,8 +2318,8 @@ vzeroupper
# CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - vpsubw (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - vptest %xmm0, %xmm1
# CHECK-NEXT: 0.50 0.50 - - - - - - 1.00 1.00 - - vptest (%rax), %xmm1
-# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - vptest %ymm0, %ymm1
-# CHECK-NEXT: 0.50 0.50 - - - - - - 1.00 1.00 - - vptest (%rax), %ymm1
+# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - vptest %ymm0, %ymm1
+# CHECK-NEXT: 0.50 0.50 - - - - - - 2.00 2.00 - - vptest (%rax), %ymm1
# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vpunpckhbw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - vpunpckhbw (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vpunpckhdq %xmm0, %xmm1, %xmm2
@@ -2397,12 +2397,12 @@ vzeroupper
# CHECK-NEXT: 0.50 0.50 - - - - - - - 0.50 0.50 - vsubss (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - vtestpd %xmm0, %xmm1
# CHECK-NEXT: 0.50 0.50 - - - - - - 1.00 1.00 - - vtestpd (%rax), %xmm1
-# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - vtestpd %ymm0, %ymm1
-# CHECK-NEXT: 0.50 0.50 - - - - - - 1.00 1.00 - - vtestpd (%rax), %ymm1
+# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - vtestpd %ymm0, %ymm1
+# CHECK-NEXT: 0.50 0.50 - - - - - - 2.00 2.00 - - vtestpd (%rax), %ymm1
# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - vtestps %xmm0, %xmm1
# CHECK-NEXT: 0.50 0.50 - - - - - - 1.00 1.00 - - vtestps (%rax), %xmm1
-# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - vtestps %ymm0, %ymm1
-# CHECK-NEXT: 0.50 0.50 - - - - - - 1.00 1.00 - - vtestps (%rax), %ymm1
+# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - vtestps %ymm0, %ymm1
+# CHECK-NEXT: 0.50 0.50 - - - - - - 2.00 2.00 - - vtestps (%rax), %ymm1
# CHECK-NEXT: - - - - - - - 0.50 0.50 1.00 - - vucomisd %xmm0, %xmm1
# CHECK-NEXT: 0.50 0.50 - - - - - 0.50 0.50 1.00 - - vucomisd (%rax), %xmm1
# CHECK-NEXT: - - - - - - - 0.50 0.50 1.00 - - vucomiss %xmm0, %xmm1
diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse41.s b/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse41.s
index 6c0f87708798b..fb39f9471654f 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse41.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse41.s
@@ -241,8 +241,8 @@ roundss $1, (%rax), %xmm2
# CHECK-NEXT: 1 11 1.00 * pmuldq (%rax), %xmm2
# CHECK-NEXT: 1 4 1.00 pmulld %xmm0, %xmm2
# CHECK-NEXT: 2 11 1.00 * pmulld (%rax), %xmm2
-# CHECK-NEXT: 1 1 1.00 ptest %xmm0, %xmm1
-# CHECK-NEXT: 2 8 1.00 * ptest (%rax), %xmm1
+# CHECK-NEXT: 1 2 1.00 ptest %xmm0, %xmm1
+# CHECK-NEXT: 2 9 1.00 * ptest (%rax), %xmm1
# CHECK-NEXT: 1 4 1.00 roundpd $1, %xmm0, %xmm2
# CHECK-NEXT: 2 11 1.00 * roundpd $1, (%rax), %xmm2
# CHECK-NEXT: 1 4 1.00 roundps $1, %xmm0, %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/Znver2/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Znver2/resources-avx1.s
index 755a754a2f0a6..4fd1cfa64f82b 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver2/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver2/resources-avx1.s
@@ -1604,10 +1604,10 @@ vzeroupper
# CHECK-NEXT: 1 8 0.33 * vpsubusw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.25 vpsubw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 8 0.33 * vpsubw (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 1 1.00 vptest %xmm0, %xmm1
-# CHECK-NEXT: 2 8 1.00 * vptest (%rax), %xmm1
-# CHECK-NEXT: 1 1 1.00 vptest %ymm0, %ymm1
-# CHECK-NEXT: 2 8 1.00 * vptest (%rax), %ymm1
+# CHECK-NEXT: 1 3 1.00 vptest %xmm0, %xmm1
+# CHECK-NEXT: 2 10 1.00 * vptest (%rax), %xmm1
+# CHECK-NEXT: 1 3 1.00 vptest %ymm0, %ymm1
+# CHECK-NEXT: 2 10 1.00 * vptest (%rax), %ymm1
# CHECK-NEXT: 1 1 0.25 vpunpckhbw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 8 0.33 * vpunpckhbw (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.25 vpunpckhdq %xmm0, %xmm1, %xmm2
@@ -1683,14 +1683,14 @@ vzeroupper
# CHECK-NEXT: 1 10 0.50 * vsubsd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 3 0.50 vsubss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 10 0.50 * vsubss (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 1 1.00 vtestpd %xmm0, %xmm1
-# CHECK-NEXT: 2 8 1.00 * vtestpd (%rax), %xmm1
-# CHECK-NEXT: 1 1 1.00 vtestpd %ymm0, %ymm1
-# CHECK-NEXT: 2 8 1.00 * vtestpd (%rax), %ymm1
-# CHECK-NEXT: 1 1 1.00 vtestps %xmm0, %xmm1
-# CHECK-NEXT: 2 8 1.00 * vtestps (%rax), %xmm1
-# CHECK-NEXT: 1 1 1.00 vtestps %ymm0, %ymm1
-# CHECK-NEXT: 2 8 1.00 * vtestps (%rax), %ymm1
+# CHECK-NEXT: 1 3 1.00 vtestpd %xmm0, %xmm1
+# CHECK-NEXT: 2 10 1.00 * vtestpd (%rax), %xmm1
+# CHECK-NEXT: 1 3 1.00 vtestpd %ymm0, %ymm1
+# CHECK-NEXT: 2 10 1.00 * vtestpd (%rax), %ymm1
+# CHECK-NEXT: 1 3 1.00 vtestps %xmm0, %xmm1
+# CHECK-NEXT: 2 10 1.00 * vtestps (%rax), %xmm1
+# CHECK-NEXT: 1 3 1.00 vtestps %ymm0, %ymm1
+# CHECK-NEXT: 2 10 1.00 * vtestps (%rax), %ymm1
# CHECK-NEXT: 2 3 1.00 vucomisd %xmm0, %xmm1
# CHECK-NEXT: 2 10 1.00 * vucomisd (%rax), %xmm1
# CHECK-NEXT: 2 3 1.00 vucomiss %xmm0, %xmm1
diff --git a/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse41.s b/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse41.s
index fb26a436cf5c3..d15850a3c1f50 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse41.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse41.s
@@ -241,8 +241,8 @@ roundss $1, (%rax), %xmm2
# CHECK-NEXT: 1 11 1.00 * pmuldq (%rax), %xmm2
# CHECK-NEXT: 1 4 1.00 pmulld %xmm0, %xmm2
# CHECK-NEXT: 2 11 1.00 * pmulld (%rax), %xmm2
-# CHECK-NEXT: 1 1 1.00 ptest %xmm0, %xmm1
-# CHECK-NEXT: 2 8 1.00 * ptest (%rax), %xmm1
+# CHECK-NEXT: 1 3 1.00 ptest %xmm0, %xmm1
+# CHECK-NEXT: 2 10 1.00 * ptest (%rax), %xmm1
# CHECK-NEXT: 1 3 1.00 roundpd $1, %xmm0, %xmm2
# CHECK-NEXT: 1 10 1.00 * roundpd $1, (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 roundps $1, %xmm0, %xmm2
More information about the llvm-commits
mailing list