[llvm] 1956f28 - [X86] Adjust vector extend to ymm to match SoG (Issue #54889)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon May 30 00:59:03 PDT 2022
Author: Simon Pilgrim
Date: 2022-05-30T08:58:56+01:00
New Revision: 1956f280376a73013f442f7f5ccb0a9dfe0f6b27
URL: https://github.com/llvm/llvm-project/commit/1956f280376a73013f442f7f5ccb0a9dfe0f6b27
DIFF: https://github.com/llvm/llvm-project/commit/1956f280376a73013f442f7f5ccb0a9dfe0f6b27.diff
LOG: [X86] Adjust vector extend to ymm to match SoG (Issue #54889)
znver1 ymm variants of VPMOVSX**/VPMOVZX** instructions require double pumping.
Now matches AMD SoG, Agner and instlatx64 numbers.
Thanks to @fabian-r for the report
Added:
Modified:
llvm/lib/Target/X86/X86ScheduleZnver1.td
llvm/test/tools/llvm-mca/X86/Znver1/resources-avx2.s
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td
index 2e226e81c952..041d224d40ca 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver1.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td
@@ -439,7 +439,7 @@ defm : ZnWriteResFpuPair<WriteBlend, [ZnFPU01], 1>;
defm : ZnWriteResFpuPair<WriteBlendY, [ZnFPU01], 1>;
defm : X86WriteResPairUnsupported<WriteBlendZ>;
defm : ZnWriteResFpuPair<WriteShuffle256, [ZnFPU], 2>;
-defm : ZnWriteResFpuPair<WriteVPMOV256, [ZnFPU12], 1, [1], 2>;
+defm : ZnWriteResFpuPair<WriteVPMOV256, [ZnFPU12], 1, [4], 3>;
defm : ZnWriteResFpuPair<WriteVarShuffle256, [ZnFPU], 2>;
defm : ZnWriteResFpuPair<WritePSADBW, [ZnFPU0], 3>;
defm : ZnWriteResFpuPair<WritePSADBWX, [ZnFPU0], 3>;
diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx2.s b/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx2.s
index 19e411dfaef6..820728ef2d1d 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx2.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver1/resources-avx2.s
@@ -625,30 +625,30 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 0.25 vpminuw %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 8 0.50 * vpminuw (%rax), %ymm1, %ymm2
# CHECK-NEXT: 2 2 2.00 vpmovmskb %ymm0, %ecx
-# CHECK-NEXT: 2 1 0.50 vpmovsxbd %xmm0, %ymm2
-# CHECK-NEXT: 2 8 0.50 * vpmovsxbd (%rax), %ymm2
-# CHECK-NEXT: 2 1 0.50 vpmovsxbq %xmm0, %ymm2
-# CHECK-NEXT: 2 8 0.50 * vpmovsxbq (%rax), %ymm2
-# CHECK-NEXT: 2 1 0.50 vpmovsxbw %xmm0, %ymm2
-# CHECK-NEXT: 2 8 0.50 * vpmovsxbw (%rax), %ymm2
-# CHECK-NEXT: 2 1 0.50 vpmovsxdq %xmm0, %ymm2
-# CHECK-NEXT: 2 8 0.50 * vpmovsxdq (%rax), %ymm2
-# CHECK-NEXT: 2 1 0.50 vpmovsxwd %xmm0, %ymm2
-# CHECK-NEXT: 2 8 0.50 * vpmovsxwd (%rax), %ymm2
-# CHECK-NEXT: 2 1 0.50 vpmovsxwq %xmm0, %ymm2
-# CHECK-NEXT: 2 8 0.50 * vpmovsxwq (%rax), %ymm2
-# CHECK-NEXT: 2 1 0.50 vpmovzxbd %xmm0, %ymm2
-# CHECK-NEXT: 2 8 0.50 * vpmovzxbd (%rax), %ymm2
-# CHECK-NEXT: 2 1 0.50 vpmovzxbq %xmm0, %ymm2
-# CHECK-NEXT: 2 8 0.50 * vpmovzxbq (%rax), %ymm2
-# CHECK-NEXT: 2 1 0.50 vpmovzxbw %xmm0, %ymm2
-# CHECK-NEXT: 2 8 0.50 * vpmovzxbw (%rax), %ymm2
-# CHECK-NEXT: 2 1 0.50 vpmovzxdq %xmm0, %ymm2
-# CHECK-NEXT: 2 8 0.50 * vpmovzxdq (%rax), %ymm2
-# CHECK-NEXT: 2 1 0.50 vpmovzxwd %xmm0, %ymm2
-# CHECK-NEXT: 2 8 0.50 * vpmovzxwd (%rax), %ymm2
-# CHECK-NEXT: 2 1 0.50 vpmovzxwq %xmm0, %ymm2
-# CHECK-NEXT: 2 8 0.50 * vpmovzxwq (%rax), %ymm2
+# CHECK-NEXT: 3 1 2.00 vpmovsxbd %xmm0, %ymm2
+# CHECK-NEXT: 3 8 2.00 * vpmovsxbd (%rax), %ymm2
+# CHECK-NEXT: 3 1 2.00 vpmovsxbq %xmm0, %ymm2
+# CHECK-NEXT: 3 8 2.00 * vpmovsxbq (%rax), %ymm2
+# CHECK-NEXT: 3 1 2.00 vpmovsxbw %xmm0, %ymm2
+# CHECK-NEXT: 3 8 2.00 * vpmovsxbw (%rax), %ymm2
+# CHECK-NEXT: 3 1 2.00 vpmovsxdq %xmm0, %ymm2
+# CHECK-NEXT: 3 8 2.00 * vpmovsxdq (%rax), %ymm2
+# CHECK-NEXT: 3 1 2.00 vpmovsxwd %xmm0, %ymm2
+# CHECK-NEXT: 3 8 2.00 * vpmovsxwd (%rax), %ymm2
+# CHECK-NEXT: 3 1 2.00 vpmovsxwq %xmm0, %ymm2
+# CHECK-NEXT: 3 8 2.00 * vpmovsxwq (%rax), %ymm2
+# CHECK-NEXT: 3 1 2.00 vpmovzxbd %xmm0, %ymm2
+# CHECK-NEXT: 3 8 2.00 * vpmovzxbd (%rax), %ymm2
+# CHECK-NEXT: 3 1 2.00 vpmovzxbq %xmm0, %ymm2
+# CHECK-NEXT: 3 8 2.00 * vpmovzxbq (%rax), %ymm2
+# CHECK-NEXT: 3 1 2.00 vpmovzxbw %xmm0, %ymm2
+# CHECK-NEXT: 3 8 2.00 * vpmovzxbw (%rax), %ymm2
+# CHECK-NEXT: 3 1 2.00 vpmovzxdq %xmm0, %ymm2
+# CHECK-NEXT: 3 8 2.00 * vpmovzxdq (%rax), %ymm2
+# CHECK-NEXT: 3 1 2.00 vpmovzxwd %xmm0, %ymm2
+# CHECK-NEXT: 3 8 2.00 * vpmovzxwd (%rax), %ymm2
+# CHECK-NEXT: 3 1 2.00 vpmovzxwq %xmm0, %ymm2
+# CHECK-NEXT: 3 8 2.00 * vpmovzxwq (%rax), %ymm2
# CHECK-NEXT: 1 4 1.00 vpmuldq %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 11 1.00 * vpmuldq (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 1.00 vpmulhrsw %ymm0, %ymm1, %ymm2
@@ -778,7 +778,7 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]
-# CHECK-NEXT: 67.00 67.00 - - - - - 71.17 123.67 105.00 41.17 -
+# CHECK-NEXT: 67.00 67.00 - - - - - 71.17 159.67 141.00 41.17 -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions:
@@ -947,30 +947,30 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - vpminuw %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 0.50 0.50 - - - - - 0.25 0.25 0.25 0.25 - vpminuw (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - - - 2.00 - - vpmovmskb %ymm0, %ecx
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vpmovsxbd %xmm0, %ymm2
-# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vpmovsxbd (%rax), %ymm2
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vpmovsxbq %xmm0, %ymm2
-# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vpmovsxbq (%rax), %ymm2
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vpmovsxbw %xmm0, %ymm2
-# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vpmovsxbw (%rax), %ymm2
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vpmovsxdq %xmm0, %ymm2
-# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vpmovsxdq (%rax), %ymm2
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vpmovsxwd %xmm0, %ymm2
-# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vpmovsxwd (%rax), %ymm2
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vpmovsxwq %xmm0, %ymm2
-# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vpmovsxwq (%rax), %ymm2
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vpmovzxbd %xmm0, %ymm2
-# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vpmovzxbd (%rax), %ymm2
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vpmovzxbq %xmm0, %ymm2
-# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vpmovzxbq (%rax), %ymm2
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vpmovzxbw %xmm0, %ymm2
-# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vpmovzxbw (%rax), %ymm2
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vpmovzxdq %xmm0, %ymm2
-# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vpmovzxdq (%rax), %ymm2
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vpmovzxwd %xmm0, %ymm2
-# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vpmovzxwd (%rax), %ymm2
-# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - vpmovzxwq %xmm0, %ymm2
-# CHECK-NEXT: 0.50 0.50 - - - - - - 0.50 0.50 - - vpmovzxwq (%rax), %ymm2
+# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - vpmovsxbd %xmm0, %ymm2
+# CHECK-NEXT: 0.50 0.50 - - - - - - 2.00 2.00 - - vpmovsxbd (%rax), %ymm2
+# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - vpmovsxbq %xmm0, %ymm2
+# CHECK-NEXT: 0.50 0.50 - - - - - - 2.00 2.00 - - vpmovsxbq (%rax), %ymm2
+# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - vpmovsxbw %xmm0, %ymm2
+# CHECK-NEXT: 0.50 0.50 - - - - - - 2.00 2.00 - - vpmovsxbw (%rax), %ymm2
+# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - vpmovsxdq %xmm0, %ymm2
+# CHECK-NEXT: 0.50 0.50 - - - - - - 2.00 2.00 - - vpmovsxdq (%rax), %ymm2
+# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - vpmovsxwd %xmm0, %ymm2
+# CHECK-NEXT: 0.50 0.50 - - - - - - 2.00 2.00 - - vpmovsxwd (%rax), %ymm2
+# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - vpmovsxwq %xmm0, %ymm2
+# CHECK-NEXT: 0.50 0.50 - - - - - - 2.00 2.00 - - vpmovsxwq (%rax), %ymm2
+# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - vpmovzxbd %xmm0, %ymm2
+# CHECK-NEXT: 0.50 0.50 - - - - - - 2.00 2.00 - - vpmovzxbd (%rax), %ymm2
+# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - vpmovzxbq %xmm0, %ymm2
+# CHECK-NEXT: 0.50 0.50 - - - - - - 2.00 2.00 - - vpmovzxbq (%rax), %ymm2
+# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - vpmovzxbw %xmm0, %ymm2
+# CHECK-NEXT: 0.50 0.50 - - - - - - 2.00 2.00 - - vpmovzxbw (%rax), %ymm2
+# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - vpmovzxdq %xmm0, %ymm2
+# CHECK-NEXT: 0.50 0.50 - - - - - - 2.00 2.00 - - vpmovzxdq (%rax), %ymm2
+# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - vpmovzxwd %xmm0, %ymm2
+# CHECK-NEXT: 0.50 0.50 - - - - - - 2.00 2.00 - - vpmovzxwd (%rax), %ymm2
+# CHECK-NEXT: - - - - - - - - 2.00 2.00 - - vpmovzxwq %xmm0, %ymm2
+# CHECK-NEXT: 0.50 0.50 - - - - - - 2.00 2.00 - - vpmovzxwq (%rax), %ymm2
# CHECK-NEXT: - - - - - - - 1.00 - - - - vpmuldq %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 0.50 0.50 - - - - - 1.00 - - - - vpmuldq (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - 1.00 - - - - vpmulhrsw %ymm0, %ymm1, %ymm2
More information about the llvm-commits
mailing list