[llvm] 630820b - [X86][SLM] Adjust XMM non-PMULLD throughput costs to half rate.
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 9 05:53:22 PDT 2021
Author: Simon Pilgrim
Date: 2021-06-09T13:51:40+01:00
New Revision: 630820bafc6866ce1efa4f1e2c4b11f6250eae9c
URL: https://github.com/llvm/llvm-project/commit/630820bafc6866ce1efa4f1e2c4b11f6250eae9c
DIFF: https://github.com/llvm/llvm-project/commit/630820bafc6866ce1efa4f1e2c4b11f6250eae9c.diff
LOG: [X86][SLM] Adjust XMM non-PMULLD throughput costs to half rate.
Match what's reported in the costs table, Agner's tables and the Intel AOM
Added:
Modified:
llvm/lib/Target/X86/X86ScheduleSLM.td
llvm/test/CodeGen/X86/slow-pmulld.ll
llvm/test/tools/llvm-mca/X86/SLM/resources-sse2.s
llvm/test/tools/llvm-mca/X86/SLM/resources-sse41.s
llvm/test/tools/llvm-mca/X86/SLM/resources-ssse3.s
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ScheduleSLM.td b/llvm/lib/Target/X86/X86ScheduleSLM.td
index bbfd7b34cf2d4..98e5a2ede05b2 100644
--- a/llvm/lib/Target/X86/X86ScheduleSLM.td
+++ b/llvm/lib/Target/X86/X86ScheduleSLM.td
@@ -341,8 +341,8 @@ defm : SLMWriteResPair<WriteVecALUX, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteVecALUY, [SLM_FPC_RSV01], 1>;
defm : X86WriteResPairUnsupported<WriteVecALUZ>;
defm : SLMWriteResPair<WriteVecIMul, [SLM_FPC_RSV0], 4>;
-defm : SLMWriteResPair<WriteVecIMulX, [SLM_FPC_RSV0], 4>;
-defm : SLMWriteResPair<WriteVecIMulY, [SLM_FPC_RSV0], 4>;
+defm : SLMWriteResPair<WriteVecIMulX, [SLM_FPC_RSV0], 5, [2], 2>;
+defm : SLMWriteResPair<WriteVecIMulY, [SLM_FPC_RSV0], 5, [2], 2>;
defm : X86WriteResPairUnsupported<WriteVecIMulZ>;
// FIXME: The below is closer to correct, but caused some perf regressions.
//defm : SLMWriteResPair<WritePMULLD, [SLM_FPC_RSV0], 11, [11], 7>;
diff --git a/llvm/test/CodeGen/X86/slow-pmulld.ll b/llvm/test/CodeGen/X86/slow-pmulld.ll
index 9a10a2353e16a..aeccae300eea5 100644
--- a/llvm/test/CodeGen/X86/slow-pmulld.ll
+++ b/llvm/test/CodeGen/X86/slow-pmulld.ll
@@ -537,40 +537,40 @@ define <8 x i32> @test_mul_v8i32_v8i16(<8 x i16> %A) {
define <16 x i32> @test_mul_v16i32_v16i16(<16 x i16> %A) {
; SLM32-LABEL: test_mul_v16i32_v16i16:
; SLM32: # %bb.0:
-; SLM32-NEXT: movdqa %xmm1, %xmm3
-; SLM32-NEXT: movdqa %xmm0, %xmm1
+; SLM32-NEXT: movdqa %xmm0, %xmm4
; SLM32-NEXT: movdqa {{.*#+}} xmm0 = [18778,18778,18778,18778,18778,18778,18778,18778]
-; SLM32-NEXT: movdqa %xmm1, %xmm2
-; SLM32-NEXT: movdqa %xmm3, %xmm4
-; SLM32-NEXT: pmullw %xmm0, %xmm1
+; SLM32-NEXT: movdqa %xmm1, %xmm3
+; SLM32-NEXT: movdqa %xmm4, %xmm2
+; SLM32-NEXT: pmullw %xmm0, %xmm4
; SLM32-NEXT: pmulhuw %xmm0, %xmm2
; SLM32-NEXT: pmullw %xmm0, %xmm3
-; SLM32-NEXT: pmulhuw %xmm0, %xmm4
-; SLM32-NEXT: movdqa %xmm1, %xmm0
-; SLM32-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SLM32-NEXT: pmulhuw %xmm0, %xmm1
+; SLM32-NEXT: movdqa %xmm4, %xmm0
+; SLM32-NEXT: punpckhwd {{.*#+}} xmm4 = xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
; SLM32-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
; SLM32-NEXT: movdqa %xmm3, %xmm2
-; SLM32-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
-; SLM32-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
+; SLM32-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
+; SLM32-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SLM32-NEXT: movdqa %xmm4, %xmm1
; SLM32-NEXT: retl
;
; SLM64-LABEL: test_mul_v16i32_v16i16:
; SLM64: # %bb.0:
-; SLM64-NEXT: movdqa %xmm1, %xmm3
-; SLM64-NEXT: movdqa %xmm0, %xmm1
+; SLM64-NEXT: movdqa %xmm0, %xmm4
; SLM64-NEXT: movdqa {{.*#+}} xmm0 = [18778,18778,18778,18778,18778,18778,18778,18778]
-; SLM64-NEXT: movdqa %xmm1, %xmm2
-; SLM64-NEXT: movdqa %xmm3, %xmm4
-; SLM64-NEXT: pmullw %xmm0, %xmm1
+; SLM64-NEXT: movdqa %xmm1, %xmm3
+; SLM64-NEXT: movdqa %xmm4, %xmm2
+; SLM64-NEXT: pmullw %xmm0, %xmm4
; SLM64-NEXT: pmulhuw %xmm0, %xmm2
; SLM64-NEXT: pmullw %xmm0, %xmm3
-; SLM64-NEXT: pmulhuw %xmm0, %xmm4
-; SLM64-NEXT: movdqa %xmm1, %xmm0
-; SLM64-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SLM64-NEXT: pmulhuw %xmm0, %xmm1
+; SLM64-NEXT: movdqa %xmm4, %xmm0
+; SLM64-NEXT: punpckhwd {{.*#+}} xmm4 = xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
; SLM64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
; SLM64-NEXT: movdqa %xmm3, %xmm2
-; SLM64-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
-; SLM64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
+; SLM64-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
+; SLM64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SLM64-NEXT: movdqa %xmm4, %xmm1
; SLM64-NEXT: retq
;
; SLOW32-LABEL: test_mul_v16i32_v16i16:
diff --git a/llvm/test/tools/llvm-mca/X86/SLM/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/SLM/resources-sse2.s
index 7bcfd6b919bfd..8759dfc216559 100644
--- a/llvm/test/tools/llvm-mca/X86/SLM/resources-sse2.s
+++ b/llvm/test/tools/llvm-mca/X86/SLM/resources-sse2.s
@@ -563,8 +563,8 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 pextrw $1, %xmm0, %ecx
# CHECK-NEXT: 1 1 1.00 pinsrw $1, %eax, %xmm0
# CHECK-NEXT: 1 4 1.00 * pinsrw $1, (%rax), %xmm0
-# CHECK-NEXT: 1 4 1.00 pmaddwd %xmm0, %xmm2
-# CHECK-NEXT: 1 7 1.00 * pmaddwd (%rax), %xmm2
+# CHECK-NEXT: 2 5 2.00 pmaddwd %xmm0, %xmm2
+# CHECK-NEXT: 2 8 2.00 * pmaddwd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pmaxsw %xmm0, %xmm2
# CHECK-NEXT: 1 4 1.00 * pmaxsw (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pmaxub %xmm0, %xmm2
@@ -574,16 +574,16 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pminub %xmm0, %xmm2
# CHECK-NEXT: 1 4 1.00 * pminub (%rax), %xmm2
# CHECK-NEXT: 1 4 1.00 pmovmskb %xmm0, %ecx
-# CHECK-NEXT: 1 4 1.00 pmulhuw %xmm0, %xmm2
-# CHECK-NEXT: 1 7 1.00 * pmulhuw (%rax), %xmm2
-# CHECK-NEXT: 1 4 1.00 pmulhw %xmm0, %xmm2
-# CHECK-NEXT: 1 7 1.00 * pmulhw (%rax), %xmm2
-# CHECK-NEXT: 1 4 1.00 pmullw %xmm0, %xmm2
-# CHECK-NEXT: 1 7 1.00 * pmullw (%rax), %xmm2
+# CHECK-NEXT: 2 5 2.00 pmulhuw %xmm0, %xmm2
+# CHECK-NEXT: 2 8 2.00 * pmulhuw (%rax), %xmm2
+# CHECK-NEXT: 2 5 2.00 pmulhw %xmm0, %xmm2
+# CHECK-NEXT: 2 8 2.00 * pmulhw (%rax), %xmm2
+# CHECK-NEXT: 2 5 2.00 pmullw %xmm0, %xmm2
+# CHECK-NEXT: 2 8 2.00 * pmullw (%rax), %xmm2
# CHECK-NEXT: 1 4 1.00 pmuludq %mm0, %mm2
# CHECK-NEXT: 1 7 1.00 * pmuludq (%rax), %mm2
-# CHECK-NEXT: 1 4 1.00 pmuludq %xmm0, %xmm2
-# CHECK-NEXT: 1 7 1.00 * pmuludq (%rax), %xmm2
+# CHECK-NEXT: 2 5 2.00 pmuludq %xmm0, %xmm2
+# CHECK-NEXT: 2 8 2.00 * pmuludq (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 por %xmm0, %xmm2
# CHECK-NEXT: 1 4 1.00 * por (%rax), %xmm2
# CHECK-NEXT: 1 4 1.00 psadbw %xmm0, %xmm2
@@ -687,7 +687,7 @@ xorpd (%rax), %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7]
-# CHECK-NEXT: - 412.00 12.00 174.50 96.50 3.00 3.00 134.00
+# CHECK-NEXT: - 412.00 12.00 184.50 96.50 3.00 3.00 134.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] Instructions:
@@ -847,8 +847,8 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - - pextrw $1, %xmm0, %ecx
# CHECK-NEXT: - - - 1.00 - - - - pinsrw $1, %eax, %xmm0
# CHECK-NEXT: - - - 1.00 - - - 1.00 pinsrw $1, (%rax), %xmm0
-# CHECK-NEXT: - - - 1.00 - - - - pmaddwd %xmm0, %xmm2
-# CHECK-NEXT: - - - 1.00 - - - 1.00 pmaddwd (%rax), %xmm2
+# CHECK-NEXT: - - - 2.00 - - - - pmaddwd %xmm0, %xmm2
+# CHECK-NEXT: - - - 2.00 - - - 1.00 pmaddwd (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 0.50 - - - pmaxsw %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 0.50 - - 1.00 pmaxsw (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 0.50 - - - pmaxub %xmm0, %xmm2
@@ -858,16 +858,16 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 0.50 - - - pminub %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 0.50 - - 1.00 pminub (%rax), %xmm2
# CHECK-NEXT: - - - - 1.00 - - - pmovmskb %xmm0, %ecx
-# CHECK-NEXT: - - - 1.00 - - - - pmulhuw %xmm0, %xmm2
-# CHECK-NEXT: - - - 1.00 - - - 1.00 pmulhuw (%rax), %xmm2
-# CHECK-NEXT: - - - 1.00 - - - - pmulhw %xmm0, %xmm2
-# CHECK-NEXT: - - - 1.00 - - - 1.00 pmulhw (%rax), %xmm2
-# CHECK-NEXT: - - - 1.00 - - - - pmullw %xmm0, %xmm2
-# CHECK-NEXT: - - - 1.00 - - - 1.00 pmullw (%rax), %xmm2
+# CHECK-NEXT: - - - 2.00 - - - - pmulhuw %xmm0, %xmm2
+# CHECK-NEXT: - - - 2.00 - - - 1.00 pmulhuw (%rax), %xmm2
+# CHECK-NEXT: - - - 2.00 - - - - pmulhw %xmm0, %xmm2
+# CHECK-NEXT: - - - 2.00 - - - 1.00 pmulhw (%rax), %xmm2
+# CHECK-NEXT: - - - 2.00 - - - - pmullw %xmm0, %xmm2
+# CHECK-NEXT: - - - 2.00 - - - 1.00 pmullw (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - - pmuludq %mm0, %mm2
# CHECK-NEXT: - - - 1.00 - - - 1.00 pmuludq (%rax), %mm2
-# CHECK-NEXT: - - - 1.00 - - - - pmuludq %xmm0, %xmm2
-# CHECK-NEXT: - - - 1.00 - - - 1.00 pmuludq (%rax), %xmm2
+# CHECK-NEXT: - - - 2.00 - - - - pmuludq %xmm0, %xmm2
+# CHECK-NEXT: - - - 2.00 - - - 1.00 pmuludq (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 0.50 - - - por %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 0.50 - - 1.00 por (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - - psadbw %xmm0, %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/SLM/resources-sse41.s b/llvm/test/tools/llvm-mca/X86/SLM/resources-sse41.s
index 6f339ab0266f1..7fbf75b627dea 100644
--- a/llvm/test/tools/llvm-mca/X86/SLM/resources-sse41.s
+++ b/llvm/test/tools/llvm-mca/X86/SLM/resources-sse41.s
@@ -237,8 +237,8 @@ roundss $1, (%rax), %xmm2
# CHECK-NEXT: 1 4 1.00 * pmovzxwd (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 pmovzxwq %xmm0, %xmm2
# CHECK-NEXT: 1 4 1.00 * pmovzxwq (%rax), %xmm2
-# CHECK-NEXT: 1 4 1.00 pmuldq %xmm0, %xmm2
-# CHECK-NEXT: 1 7 1.00 * pmuldq (%rax), %xmm2
+# CHECK-NEXT: 2 5 2.00 pmuldq %xmm0, %xmm2
+# CHECK-NEXT: 2 8 2.00 * pmuldq (%rax), %xmm2
# CHECK-NEXT: 1 4 1.00 pmulld %xmm0, %xmm2
# CHECK-NEXT: 1 7 1.00 * pmulld (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 ptest %xmm0, %xmm1
@@ -264,7 +264,7 @@ roundss $1, (%rax), %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7]
-# CHECK-NEXT: - - - 88.00 25.00 - - 54.00
+# CHECK-NEXT: - - - 90.00 25.00 - - 54.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] Instructions:
@@ -350,8 +350,8 @@ roundss $1, (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - 1.00 pmovzxwd (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - - pmovzxwq %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 - - - 1.00 pmovzxwq (%rax), %xmm2
-# CHECK-NEXT: - - - 1.00 - - - - pmuldq %xmm0, %xmm2
-# CHECK-NEXT: - - - 1.00 - - - 1.00 pmuldq (%rax), %xmm2
+# CHECK-NEXT: - - - 2.00 - - - - pmuldq %xmm0, %xmm2
+# CHECK-NEXT: - - - 2.00 - - - 1.00 pmuldq (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - - pmulld %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 - - - 1.00 pmulld (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 0.50 - - - ptest %xmm0, %xmm1
diff --git a/llvm/test/tools/llvm-mca/X86/SLM/resources-ssse3.s b/llvm/test/tools/llvm-mca/X86/SLM/resources-ssse3.s
index 3084baecb2ab2..f6c1bfe3bae8d 100644
--- a/llvm/test/tools/llvm-mca/X86/SLM/resources-ssse3.s
+++ b/llvm/test/tools/llvm-mca/X86/SLM/resources-ssse3.s
@@ -148,12 +148,12 @@ psignw (%rax), %xmm2
# CHECK-NEXT: 1 4 1.00 * phsubw (%rax), %xmm2
# CHECK-NEXT: 1 4 1.00 pmaddubsw %mm0, %mm2
# CHECK-NEXT: 1 7 1.00 * pmaddubsw (%rax), %mm2
-# CHECK-NEXT: 1 4 1.00 pmaddubsw %xmm0, %xmm2
-# CHECK-NEXT: 1 7 1.00 * pmaddubsw (%rax), %xmm2
+# CHECK-NEXT: 2 5 2.00 pmaddubsw %xmm0, %xmm2
+# CHECK-NEXT: 2 8 2.00 * pmaddubsw (%rax), %xmm2
# CHECK-NEXT: 1 4 1.00 pmulhrsw %mm0, %mm2
# CHECK-NEXT: 1 7 1.00 * pmulhrsw (%rax), %mm2
-# CHECK-NEXT: 1 4 1.00 pmulhrsw %xmm0, %xmm2
-# CHECK-NEXT: 1 7 1.00 * pmulhrsw (%rax), %xmm2
+# CHECK-NEXT: 2 5 2.00 pmulhrsw %xmm0, %xmm2
+# CHECK-NEXT: 2 8 2.00 * pmulhrsw (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 pshufb %mm0, %mm2
# CHECK-NEXT: 1 4 1.00 * pshufb (%rax), %mm2
# CHECK-NEXT: 4 5 5.00 pshufb %xmm0, %xmm2
@@ -183,7 +183,7 @@ psignw (%rax), %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7]
-# CHECK-NEXT: - - - 48.00 24.00 - - 32.00
+# CHECK-NEXT: - - - 52.00 24.00 - - 32.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] Instructions:
@@ -229,12 +229,12 @@ psignw (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 0.50 - - 1.00 phsubw (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - - pmaddubsw %mm0, %mm2
# CHECK-NEXT: - - - 1.00 - - - 1.00 pmaddubsw (%rax), %mm2
-# CHECK-NEXT: - - - 1.00 - - - - pmaddubsw %xmm0, %xmm2
-# CHECK-NEXT: - - - 1.00 - - - 1.00 pmaddubsw (%rax), %xmm2
+# CHECK-NEXT: - - - 2.00 - - - - pmaddubsw %xmm0, %xmm2
+# CHECK-NEXT: - - - 2.00 - - - 1.00 pmaddubsw (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - - pmulhrsw %mm0, %mm2
# CHECK-NEXT: - - - 1.00 - - - 1.00 pmulhrsw (%rax), %mm2
-# CHECK-NEXT: - - - 1.00 - - - - pmulhrsw %xmm0, %xmm2
-# CHECK-NEXT: - - - 1.00 - - - 1.00 pmulhrsw (%rax), %xmm2
+# CHECK-NEXT: - - - 2.00 - - - - pmulhrsw %xmm0, %xmm2
+# CHECK-NEXT: - - - 2.00 - - - 1.00 pmulhrsw (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - - pshufb %mm0, %mm2
# CHECK-NEXT: - - - 1.00 - - - 1.00 pshufb (%rax), %mm2
# CHECK-NEXT: - - - 5.00 - - - - pshufb %xmm0, %xmm2
More information about the llvm-commits
mailing list