[llvm] d384a4c - [X86] Adjust vector test costs to match SoG (Issue #54889)

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Tue May 31 01:14:33 PDT 2022


Author: Simon Pilgrim
Date: 2022-05-31T09:14:06+01:00
New Revision: d384a4c530623c73048da040210d44fea1167321

URL: https://github.com/llvm/llvm-project/commit/d384a4c530623c73048da040210d44fea1167321
DIFF: https://github.com/llvm/llvm-project/commit/d384a4c530623c73048da040210d44fea1167321.diff

LOG: [X86] Adjust vector test costs to match SoG (Issue #54889)

znver1/2 models were incorrectly modelling the latency/throughput/uops and znver1 ymm variants also require double pumping.

Now matches what I can decipher from the AMD SoG, Agner and instlatx64 numbers vs the llvm-exegesis report provided by @fabian-r

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ScheduleZnver1.td
    llvm/lib/Target/X86/X86ScheduleZnver2.td
    llvm/test/tools/llvm-mca/X86/Znver1/resources-avx1.s
    llvm/test/tools/llvm-mca/X86/Znver1/resources-sse41.s
    llvm/test/tools/llvm-mca/X86/Znver2/resources-avx1.s
    llvm/test/tools/llvm-mca/X86/Znver2/resources-sse41.s

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td
index bfa5a1447f6a7..aada3e0bd9063 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver1.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td
@@ -334,8 +334,8 @@ defm : X86WriteResPairUnsupported<WriteFRndZ>;
 defm : ZnWriteResFpuPair<WriteFLogic,    [ZnFPU],   1>;
 defm : ZnWriteResFpuPair<WriteFLogicY,   [ZnFPU],   1>;
 defm : X86WriteResPairUnsupported<WriteFLogicZ>;
-defm : ZnWriteResFpuPair<WriteFTest,     [ZnFPU12], 1, [2], 1, 7, 1>;
-defm : ZnWriteResFpuPair<WriteFTestY,    [ZnFPU12], 1, [2], 1, 7, 1>;
+defm : ZnWriteResFpuPair<WriteFTest,     [ZnFPU12], 2, [2], 1, 7, 1>;
+defm : ZnWriteResFpuPair<WriteFTestY,    [ZnFPU12], 4, [4], 3, 7, 2>;
 defm : X86WriteResPairUnsupported<WriteFTestZ>;
 defm : ZnWriteResFpuPair<WriteFShuffle,  [ZnFPU12], 1>;
 defm : ZnWriteResFpuPair<WriteFShuffleY, [ZnFPU12], 1>;
@@ -413,8 +413,8 @@ defm : ZnWriteResFpuPair<WriteVecLogic,   [ZnFPU],   1>;
 defm : ZnWriteResFpuPair<WriteVecLogicX,  [ZnFPU],   1>;
 defm : ZnWriteResFpuPair<WriteVecLogicY,  [ZnFPU],   1>;
 defm : X86WriteResPairUnsupported<WriteVecLogicZ>;
-defm : ZnWriteResFpuPair<WriteVecTest,    [ZnFPU12], 1, [2], 1, 7, 1>;
-defm : ZnWriteResFpuPair<WriteVecTestY,   [ZnFPU12], 1, [2], 1, 7, 1>;
+defm : ZnWriteResFpuPair<WriteVecTest,    [ZnFPU12], 2, [2], 1, 7, 1>;
+defm : ZnWriteResFpuPair<WriteVecTestY,   [ZnFPU12], 4, [4], 3, 7, 2>;
 defm : X86WriteResPairUnsupported<WriteVecTestZ>;
 defm : ZnWriteResFpuPair<WriteVecALU,     [ZnFPU],   1>;
 defm : ZnWriteResFpuPair<WriteVecALUX,    [ZnFPU],   1>;

diff  --git a/llvm/lib/Target/X86/X86ScheduleZnver2.td b/llvm/lib/Target/X86/X86ScheduleZnver2.td
index 7a98b943191d4..c47d235eab9b2 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver2.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver2.td
@@ -331,8 +331,8 @@ defm : X86WriteResPairUnsupported<WriteFRndZ>;
 defm : Zn2WriteResFpuPair<WriteFLogic,    [Zn2FPU],   1>;
 defm : Zn2WriteResFpuPair<WriteFLogicY,   [Zn2FPU],   1>;
 defm : X86WriteResPairUnsupported<WriteFLogicZ>;
-defm : Zn2WriteResFpuPair<WriteFTest,     [Zn2FPU12], 1, [2], 1, 7, 1>;
-defm : Zn2WriteResFpuPair<WriteFTestY,    [Zn2FPU12], 1, [2], 1, 7, 1>;
+defm : Zn2WriteResFpuPair<WriteFTest,     [Zn2FPU12], 3, [2], 1, 7, 1>;
+defm : Zn2WriteResFpuPair<WriteFTestY,    [Zn2FPU12], 3, [2], 1, 7, 1>;
 defm : X86WriteResPairUnsupported<WriteFTestZ>;
 defm : Zn2WriteResFpuPair<WriteFShuffle,  [Zn2FPU12], 1>;
 defm : Zn2WriteResFpuPair<WriteFShuffleY, [Zn2FPU12], 1>;
@@ -408,8 +408,8 @@ defm : Zn2WriteResFpuPair<WriteVecLogic,   [Zn2FPU],   1>;
 defm : Zn2WriteResFpuPair<WriteVecLogicX,  [Zn2FPU],   1>;
 defm : Zn2WriteResFpuPair<WriteVecLogicY,  [Zn2FPU],   1>;
 defm : X86WriteResPairUnsupported<WriteVecLogicZ>;
-defm : Zn2WriteResFpuPair<WriteVecTest,    [Zn2FPU12], 1, [2], 1, 7, 1>;
-defm : Zn2WriteResFpuPair<WriteVecTestY,   [Zn2FPU12], 1, [2], 1, 7, 1>;
+defm : Zn2WriteResFpuPair<WriteVecTest,    [Zn2FPU12], 3, [2], 1, 7, 1>;
+defm : Zn2WriteResFpuPair<WriteVecTestY,   [Zn2FPU12], 3, [2], 1, 7, 1>;
 defm : X86WriteResPairUnsupported<WriteVecTestZ>;
 defm : Zn2WriteResFpuPair<WriteVecALU,     [Zn2FPU],   1>;
 defm : Zn2WriteResFpuPair<WriteVecALUX,    [Zn2FPU],   1>;

diff  --git a/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx1.s
index 905fbe09fae13..c6315d540106a 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx1.s
@@ -1604,10 +1604,10 @@ vzeroupper
 # CHECK-NEXT:  1      8     0.50    *                   vpsubusw	(%rax), %xmm1, %xmm2
 # CHECK-NEXT:  1      1     0.25                        vpsubw	%xmm0, %xmm1, %xmm2
 # CHECK-NEXT:  1      8     0.50    *                   vpsubw	(%rax), %xmm1, %xmm2
-# CHECK-NEXT:  1      1     1.00                        vptest	%xmm0, %xmm1
-# CHECK-NEXT:  2      8     1.00    *                   vptest	(%rax), %xmm1
-# CHECK-NEXT:  1      1     1.00                        vptest	%ymm0, %ymm1
-# CHECK-NEXT:  2      8     1.00    *                   vptest	(%rax), %ymm1
+# CHECK-NEXT:  1      2     1.00                        vptest	%xmm0, %xmm1
+# CHECK-NEXT:  2      9     1.00    *                   vptest	(%rax), %xmm1
+# CHECK-NEXT:  3      4     2.00                        vptest	%ymm0, %ymm1
+# CHECK-NEXT:  5      11    2.00    *                   vptest	(%rax), %ymm1
 # CHECK-NEXT:  1      1     0.25                        vpunpckhbw	%xmm0, %xmm1, %xmm2
 # CHECK-NEXT:  1      8     0.50    *                   vpunpckhbw	(%rax), %xmm1, %xmm2
 # CHECK-NEXT:  1      1     0.25                        vpunpckhdq	%xmm0, %xmm1, %xmm2
@@ -1683,14 +1683,14 @@ vzeroupper
 # CHECK-NEXT:  1      10    0.50    *                   vsubsd	(%rax), %xmm1, %xmm2
 # CHECK-NEXT:  1      3     0.50                        vsubss	%xmm0, %xmm1, %xmm2
 # CHECK-NEXT:  1      10    0.50    *                   vsubss	(%rax), %xmm1, %xmm2
-# CHECK-NEXT:  1      1     1.00                        vtestpd	%xmm0, %xmm1
-# CHECK-NEXT:  2      8     1.00    *                   vtestpd	(%rax), %xmm1
-# CHECK-NEXT:  1      1     1.00                        vtestpd	%ymm0, %ymm1
-# CHECK-NEXT:  2      8     1.00    *                   vtestpd	(%rax), %ymm1
-# CHECK-NEXT:  1      1     1.00                        vtestps	%xmm0, %xmm1
-# CHECK-NEXT:  2      8     1.00    *                   vtestps	(%rax), %xmm1
-# CHECK-NEXT:  1      1     1.00                        vtestps	%ymm0, %ymm1
-# CHECK-NEXT:  2      8     1.00    *                   vtestps	(%rax), %ymm1
+# CHECK-NEXT:  1      2     1.00                        vtestpd	%xmm0, %xmm1
+# CHECK-NEXT:  2      9     1.00    *                   vtestpd	(%rax), %xmm1
+# CHECK-NEXT:  3      4     2.00                        vtestpd	%ymm0, %ymm1
+# CHECK-NEXT:  5      11    2.00    *                   vtestpd	(%rax), %ymm1
+# CHECK-NEXT:  1      2     1.00                        vtestps	%xmm0, %xmm1
+# CHECK-NEXT:  2      9     1.00    *                   vtestps	(%rax), %xmm1
+# CHECK-NEXT:  3      4     2.00                        vtestps	%ymm0, %ymm1
+# CHECK-NEXT:  5      11    2.00    *                   vtestps	(%rax), %ymm1
 # CHECK-NEXT:  2      3     1.00                        vucomisd	%xmm0, %xmm1
 # CHECK-NEXT:  2      10    1.00    *                   vucomisd	(%rax), %xmm1
 # CHECK-NEXT:  2      3     1.00                        vucomiss	%xmm0, %xmm1
@@ -1738,7 +1738,7 @@ vzeroupper
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]
-# CHECK-NEXT: 175.00 175.00  -      -      -      -      -     142.58 175.08 210.25 523.08  -
+# CHECK-NEXT: 175.00 175.00  -      -      -      -      -     142.58 181.08 216.25 523.08  -
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   Instructions:
@@ -2318,8 +2318,8 @@ vzeroupper
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.25   0.25   0.25   0.25    -     vpsubw	(%rax), %xmm1, %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     1.00   1.00    -      -     vptest	%xmm0, %xmm1
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     1.00   1.00    -      -     vptest	(%rax), %xmm1
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     1.00   1.00    -      -     vptest	%ymm0, %ymm1
-# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     1.00   1.00    -      -     vptest	(%rax), %ymm1
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     2.00   2.00    -      -     vptest	%ymm0, %ymm1
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     2.00   2.00    -      -     vptest	(%rax), %ymm1
 # CHECK-NEXT:  -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -     vpunpckhbw	%xmm0, %xmm1, %xmm2
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.25   0.25   0.25   0.25    -     vpunpckhbw	(%rax), %xmm1, %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -     vpunpckhdq	%xmm0, %xmm1, %xmm2
@@ -2397,12 +2397,12 @@ vzeroupper
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -      -     0.50   0.50    -     vsubss	(%rax), %xmm1, %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     1.00   1.00    -      -     vtestpd	%xmm0, %xmm1
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     1.00   1.00    -      -     vtestpd	(%rax), %xmm1
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     1.00   1.00    -      -     vtestpd	%ymm0, %ymm1
-# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     1.00   1.00    -      -     vtestpd	(%rax), %ymm1
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     2.00   2.00    -      -     vtestpd	%ymm0, %ymm1
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     2.00   2.00    -      -     vtestpd	(%rax), %ymm1
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     1.00   1.00    -      -     vtestps	%xmm0, %xmm1
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     1.00   1.00    -      -     vtestps	(%rax), %xmm1
-# CHECK-NEXT:  -      -      -      -      -      -      -      -     1.00   1.00    -      -     vtestps	%ymm0, %ymm1
-# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     1.00   1.00    -      -     vtestps	(%rax), %ymm1
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     2.00   2.00    -      -     vtestps	%ymm0, %ymm1
+# CHECK-NEXT: 0.50   0.50    -      -      -      -      -      -     2.00   2.00    -      -     vtestps	(%rax), %ymm1
 # CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50   1.00    -      -     vucomisd	%xmm0, %xmm1
 # CHECK-NEXT: 0.50   0.50    -      -      -      -      -     0.50   0.50   1.00    -      -     vucomisd	(%rax), %xmm1
 # CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50   1.00    -      -     vucomiss	%xmm0, %xmm1

diff  --git a/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse41.s b/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse41.s
index 6c0f87708798b..fb39f9471654f 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse41.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse41.s
@@ -241,8 +241,8 @@ roundss     $1, (%rax), %xmm2
 # CHECK-NEXT:  1      11    1.00    *                   pmuldq	(%rax), %xmm2
 # CHECK-NEXT:  1      4     1.00                        pmulld	%xmm0, %xmm2
 # CHECK-NEXT:  2      11    1.00    *                   pmulld	(%rax), %xmm2
-# CHECK-NEXT:  1      1     1.00                        ptest	%xmm0, %xmm1
-# CHECK-NEXT:  2      8     1.00    *                   ptest	(%rax), %xmm1
+# CHECK-NEXT:  1      2     1.00                        ptest	%xmm0, %xmm1
+# CHECK-NEXT:  2      9     1.00    *                   ptest	(%rax), %xmm1
 # CHECK-NEXT:  1      4     1.00                        roundpd	$1, %xmm0, %xmm2
 # CHECK-NEXT:  2      11    1.00    *                   roundpd	$1, (%rax), %xmm2
 # CHECK-NEXT:  1      4     1.00                        roundps	$1, %xmm0, %xmm2

diff  --git a/llvm/test/tools/llvm-mca/X86/Znver2/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Znver2/resources-avx1.s
index 755a754a2f0a6..4fd1cfa64f82b 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver2/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver2/resources-avx1.s
@@ -1604,10 +1604,10 @@ vzeroupper
 # CHECK-NEXT:  1      8     0.33    *                   vpsubusw	(%rax), %xmm1, %xmm2
 # CHECK-NEXT:  1      1     0.25                        vpsubw	%xmm0, %xmm1, %xmm2
 # CHECK-NEXT:  1      8     0.33    *                   vpsubw	(%rax), %xmm1, %xmm2
-# CHECK-NEXT:  1      1     1.00                        vptest	%xmm0, %xmm1
-# CHECK-NEXT:  2      8     1.00    *                   vptest	(%rax), %xmm1
-# CHECK-NEXT:  1      1     1.00                        vptest	%ymm0, %ymm1
-# CHECK-NEXT:  2      8     1.00    *                   vptest	(%rax), %ymm1
+# CHECK-NEXT:  1      3     1.00                        vptest	%xmm0, %xmm1
+# CHECK-NEXT:  2      10    1.00    *                   vptest	(%rax), %xmm1
+# CHECK-NEXT:  1      3     1.00                        vptest	%ymm0, %ymm1
+# CHECK-NEXT:  2      10    1.00    *                   vptest	(%rax), %ymm1
 # CHECK-NEXT:  1      1     0.25                        vpunpckhbw	%xmm0, %xmm1, %xmm2
 # CHECK-NEXT:  1      8     0.33    *                   vpunpckhbw	(%rax), %xmm1, %xmm2
 # CHECK-NEXT:  1      1     0.25                        vpunpckhdq	%xmm0, %xmm1, %xmm2
@@ -1683,14 +1683,14 @@ vzeroupper
 # CHECK-NEXT:  1      10    0.50    *                   vsubsd	(%rax), %xmm1, %xmm2
 # CHECK-NEXT:  1      3     0.50                        vsubss	%xmm0, %xmm1, %xmm2
 # CHECK-NEXT:  1      10    0.50    *                   vsubss	(%rax), %xmm1, %xmm2
-# CHECK-NEXT:  1      1     1.00                        vtestpd	%xmm0, %xmm1
-# CHECK-NEXT:  2      8     1.00    *                   vtestpd	(%rax), %xmm1
-# CHECK-NEXT:  1      1     1.00                        vtestpd	%ymm0, %ymm1
-# CHECK-NEXT:  2      8     1.00    *                   vtestpd	(%rax), %ymm1
-# CHECK-NEXT:  1      1     1.00                        vtestps	%xmm0, %xmm1
-# CHECK-NEXT:  2      8     1.00    *                   vtestps	(%rax), %xmm1
-# CHECK-NEXT:  1      1     1.00                        vtestps	%ymm0, %ymm1
-# CHECK-NEXT:  2      8     1.00    *                   vtestps	(%rax), %ymm1
+# CHECK-NEXT:  1      3     1.00                        vtestpd	%xmm0, %xmm1
+# CHECK-NEXT:  2      10    1.00    *                   vtestpd	(%rax), %xmm1
+# CHECK-NEXT:  1      3     1.00                        vtestpd	%ymm0, %ymm1
+# CHECK-NEXT:  2      10    1.00    *                   vtestpd	(%rax), %ymm1
+# CHECK-NEXT:  1      3     1.00                        vtestps	%xmm0, %xmm1
+# CHECK-NEXT:  2      10    1.00    *                   vtestps	(%rax), %xmm1
+# CHECK-NEXT:  1      3     1.00                        vtestps	%ymm0, %ymm1
+# CHECK-NEXT:  2      10    1.00    *                   vtestps	(%rax), %ymm1
 # CHECK-NEXT:  2      3     1.00                        vucomisd	%xmm0, %xmm1
 # CHECK-NEXT:  2      10    1.00    *                   vucomisd	(%rax), %xmm1
 # CHECK-NEXT:  2      3     1.00                        vucomiss	%xmm0, %xmm1

diff  --git a/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse41.s b/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse41.s
index fb26a436cf5c3..d15850a3c1f50 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse41.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse41.s
@@ -241,8 +241,8 @@ roundss     $1, (%rax), %xmm2
 # CHECK-NEXT:  1      11    1.00    *                   pmuldq	(%rax), %xmm2
 # CHECK-NEXT:  1      4     1.00                        pmulld	%xmm0, %xmm2
 # CHECK-NEXT:  2      11    1.00    *                   pmulld	(%rax), %xmm2
-# CHECK-NEXT:  1      1     1.00                        ptest	%xmm0, %xmm1
-# CHECK-NEXT:  2      8     1.00    *                   ptest	(%rax), %xmm1
+# CHECK-NEXT:  1      3     1.00                        ptest	%xmm0, %xmm1
+# CHECK-NEXT:  2      10    1.00    *                   ptest	(%rax), %xmm1
 # CHECK-NEXT:  1      3     1.00                        roundpd	$1, %xmm0, %xmm2
 # CHECK-NEXT:  1      10    1.00    *                   roundpd	$1, (%rax), %xmm2
 # CHECK-NEXT:  1      3     1.00                        roundps	$1, %xmm0, %xmm2


        


More information about the llvm-commits mailing list