[llvm] 7ac9581 - [X86] SandyBridge masked stores use 2uop from port01
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 22 07:10:55 PDT 2023
Author: Simon Pilgrim
Date: 2023-09-22T15:10:27+01:00
New Revision: 7ac9581dd0ec38bdfc0e4910fb61e07ac45e9ac2
URL: https://github.com/llvm/llvm-project/commit/7ac9581dd0ec38bdfc0e4910fb61e07ac45e9ac2
DIFF: https://github.com/llvm/llvm-project/commit/7ac9581dd0ec38bdfc0e4910fb61e07ac45e9ac2.diff
LOG: [X86] SandyBridge masked stores use 2uop from port01
Found while reviewing a llvm-exegesis capture (and matches Agner + uops.info numbers)
Added:
Modified:
llvm/lib/Target/X86/X86SchedSandyBridge.td
llvm/test/tools/llvm-mca/X86/Generic/resources-avx1.s
llvm/test/tools/llvm-mca/X86/Generic/resources-avx2.s
llvm/test/tools/llvm-mca/X86/SandyBridge/resources-avx1.s
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td
index d39d88b9f2dc8ea..821a0cd061bb516 100644
--- a/llvm/lib/Target/X86/X86SchedSandyBridge.td
+++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td
@@ -224,10 +224,10 @@ defm : X86WriteRes<WriteFStoreNT, [SBPort23,SBPort4], 1, [1,1], 1>;
defm : X86WriteRes<WriteFStoreNTX, [SBPort23,SBPort4], 1, [1,1], 1>;
defm : X86WriteRes<WriteFStoreNTY, [SBPort23,SBPort4], 1, [1,1], 1>;
-defm : X86WriteRes<WriteFMaskedStore32, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
-defm : X86WriteRes<WriteFMaskedStore32Y, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
-defm : X86WriteRes<WriteFMaskedStore64, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
-defm : X86WriteRes<WriteFMaskedStore64Y, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
+defm : X86WriteRes<WriteFMaskedStore32, [SBPort4,SBPort01,SBPort23], 5, [1,2,1], 4>;
+defm : X86WriteRes<WriteFMaskedStore32Y, [SBPort4,SBPort01,SBPort23], 5, [1,2,1], 4>;
+defm : X86WriteRes<WriteFMaskedStore64, [SBPort4,SBPort01,SBPort23], 5, [1,2,1], 4>;
+defm : X86WriteRes<WriteFMaskedStore64Y, [SBPort4,SBPort01,SBPort23], 5, [1,2,1], 4>;
defm : X86WriteRes<WriteFMove, [SBPort5], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveX, [SBPort5], 1, [1], 1>;
@@ -384,10 +384,10 @@ defm : X86WriteRes<WriteVecStoreX, [SBPort23,SBPort4], 1, [1,1], 1>;
defm : X86WriteRes<WriteVecStoreY, [SBPort23,SBPort4], 1, [1,1], 1>;
defm : X86WriteRes<WriteVecStoreNT, [SBPort23,SBPort4], 1, [1,1], 1>;
defm : X86WriteRes<WriteVecStoreNTY, [SBPort23,SBPort4], 1, [1,1], 1>;
-defm : X86WriteRes<WriteVecMaskedStore32, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
-defm : X86WriteRes<WriteVecMaskedStore32Y, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
-defm : X86WriteRes<WriteVecMaskedStore64, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
-defm : X86WriteRes<WriteVecMaskedStore64Y, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
+defm : X86WriteRes<WriteVecMaskedStore32, [SBPort4,SBPort01,SBPort23], 5, [1,2,1], 4>;
+defm : X86WriteRes<WriteVecMaskedStore32Y, [SBPort4,SBPort01,SBPort23], 5, [1,2,1], 4>;
+defm : X86WriteRes<WriteVecMaskedStore64, [SBPort4,SBPort01,SBPort23], 5, [1,2,1], 4>;
+defm : X86WriteRes<WriteVecMaskedStore64Y, [SBPort4,SBPort01,SBPort23], 5, [1,2,1], 4>;
defm : X86WriteRes<WriteVecMove, [SBPort05], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveX, [SBPort015], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveY, [SBPort05], 1, [1], 1>;
diff --git a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx1.s
index 045435e9774d6c7..aaa39b65b12154a 100644
--- a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx1.s
@@ -1222,12 +1222,12 @@ vzeroupper
# CHECK-NEXT: 1 1 1.00 * * U vmaskmovdqu %xmm0, %xmm1
# CHECK-NEXT: 3 8 1.00 * vmaskmovpd (%rax), %xmm0, %xmm2
# CHECK-NEXT: 3 9 1.00 * vmaskmovpd (%rax), %ymm0, %ymm2
-# CHECK-NEXT: 3 5 1.00 * * vmaskmovpd %xmm0, %xmm1, (%rax)
-# CHECK-NEXT: 3 5 1.00 * * vmaskmovpd %ymm0, %ymm1, (%rax)
+# CHECK-NEXT: 4 5 1.00 * * vmaskmovpd %xmm0, %xmm1, (%rax)
+# CHECK-NEXT: 4 5 1.00 * * vmaskmovpd %ymm0, %ymm1, (%rax)
# CHECK-NEXT: 3 8 1.00 * vmaskmovps (%rax), %xmm0, %xmm2
# CHECK-NEXT: 3 9 1.00 * vmaskmovps (%rax), %ymm0, %ymm2
-# CHECK-NEXT: 3 5 1.00 * * vmaskmovps %xmm0, %xmm1, (%rax)
-# CHECK-NEXT: 3 5 1.00 * * vmaskmovps %ymm0, %ymm1, (%rax)
+# CHECK-NEXT: 4 5 1.00 * * vmaskmovps %xmm0, %xmm1, (%rax)
+# CHECK-NEXT: 4 5 1.00 * * vmaskmovps %ymm0, %ymm1, (%rax)
# CHECK-NEXT: 1 3 1.00 vmaxpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 1.00 * vmaxpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 3 1.00 vmaxpd %ymm0, %ymm1, %ymm2
@@ -1734,7 +1734,7 @@ vzeroupper
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
-# CHECK-NEXT: - 572.00 246.50 317.00 39.00 367.50 179.50 179.50
+# CHECK-NEXT: - 572.00 248.50 319.00 39.00 367.50 179.50 179.50
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
@@ -1930,12 +1930,12 @@ vzeroupper
# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmaskmovdqu %xmm0, %xmm1
# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vmaskmovpd (%rax), %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vmaskmovpd (%rax), %ymm0, %ymm2
-# CHECK-NEXT: - - 0.50 0.50 1.00 - 0.50 0.50 vmaskmovpd %xmm0, %xmm1, (%rax)
-# CHECK-NEXT: - - 0.50 0.50 1.00 - 0.50 0.50 vmaskmovpd %ymm0, %ymm1, (%rax)
+# CHECK-NEXT: - - 1.00 1.00 1.00 - 0.50 0.50 vmaskmovpd %xmm0, %xmm1, (%rax)
+# CHECK-NEXT: - - 1.00 1.00 1.00 - 0.50 0.50 vmaskmovpd %ymm0, %ymm1, (%rax)
# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vmaskmovps (%rax), %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vmaskmovps (%rax), %ymm0, %ymm2
-# CHECK-NEXT: - - 0.50 0.50 1.00 - 0.50 0.50 vmaskmovps %xmm0, %xmm1, (%rax)
-# CHECK-NEXT: - - 0.50 0.50 1.00 - 0.50 0.50 vmaskmovps %ymm0, %ymm1, (%rax)
+# CHECK-NEXT: - - 1.00 1.00 1.00 - 0.50 0.50 vmaskmovps %xmm0, %xmm1, (%rax)
+# CHECK-NEXT: - - 1.00 1.00 1.00 - 0.50 0.50 vmaskmovps %ymm0, %ymm1, (%rax)
# CHECK-NEXT: - - - 1.00 - - - - vmaxpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vmaxpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - 1.00 - - - - vmaxpd %ymm0, %ymm1, %ymm2
diff --git a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx2.s b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx2.s
index f84d3bc1645940b..8baf6c0df646438 100644
--- a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx2.s
+++ b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx2.s
@@ -594,12 +594,12 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK-NEXT: 2 12 1.00 * vpmaddwd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 3 8 1.00 * vpmaskmovd (%rax), %xmm0, %xmm2
# CHECK-NEXT: 3 9 1.00 * vpmaskmovd (%rax), %ymm0, %ymm2
-# CHECK-NEXT: 3 5 1.00 * * vpmaskmovd %xmm0, %xmm1, (%rax)
-# CHECK-NEXT: 3 5 1.00 * * vpmaskmovd %ymm0, %ymm1, (%rax)
+# CHECK-NEXT: 4 5 1.00 * * vpmaskmovd %xmm0, %xmm1, (%rax)
+# CHECK-NEXT: 4 5 1.00 * * vpmaskmovd %ymm0, %ymm1, (%rax)
# CHECK-NEXT: 3 8 1.00 * vpmaskmovq (%rax), %xmm0, %xmm2
# CHECK-NEXT: 3 9 1.00 * vpmaskmovq (%rax), %ymm0, %ymm2
-# CHECK-NEXT: 3 5 1.00 * * vpmaskmovq %xmm0, %xmm1, (%rax)
-# CHECK-NEXT: 3 5 1.00 * * vpmaskmovq %ymm0, %ymm1, (%rax)
+# CHECK-NEXT: 4 5 1.00 * * vpmaskmovq %xmm0, %xmm1, (%rax)
+# CHECK-NEXT: 4 5 1.00 * * vpmaskmovq %ymm0, %ymm1, (%rax)
# CHECK-NEXT: 1 1 0.50 vpmaxsb %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 8 0.50 * vpmaxsb (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 0.50 vpmaxsd %ymm0, %ymm1, %ymm2
@@ -774,7 +774,7 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
-# CHECK-NEXT: - - 75.67 88.67 5.00 168.67 78.50 78.50
+# CHECK-NEXT: - - 77.67 90.67 5.00 168.67 78.50 78.50
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
@@ -912,12 +912,12 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpmaddwd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vpmaskmovd (%rax), %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vpmaskmovd (%rax), %ymm0, %ymm2
-# CHECK-NEXT: - - 0.50 0.50 1.00 - 0.50 0.50 vpmaskmovd %xmm0, %xmm1, (%rax)
-# CHECK-NEXT: - - 0.50 0.50 1.00 - 0.50 0.50 vpmaskmovd %ymm0, %ymm1, (%rax)
+# CHECK-NEXT: - - 1.00 1.00 1.00 - 0.50 0.50 vpmaskmovd %xmm0, %xmm1, (%rax)
+# CHECK-NEXT: - - 1.00 1.00 1.00 - 0.50 0.50 vpmaskmovd %ymm0, %ymm1, (%rax)
# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vpmaskmovq (%rax), %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vpmaskmovq (%rax), %ymm0, %ymm2
-# CHECK-NEXT: - - 0.50 0.50 1.00 - 0.50 0.50 vpmaskmovq %xmm0, %xmm1, (%rax)
-# CHECK-NEXT: - - 0.50 0.50 1.00 - 0.50 0.50 vpmaskmovq %ymm0, %ymm1, (%rax)
+# CHECK-NEXT: - - 1.00 1.00 1.00 - 0.50 0.50 vpmaskmovq %xmm0, %xmm1, (%rax)
+# CHECK-NEXT: - - 1.00 1.00 1.00 - 0.50 0.50 vpmaskmovq %ymm0, %ymm1, (%rax)
# CHECK-NEXT: - - - 0.50 - 0.50 - - vpmaxsb %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpmaxsb (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - vpmaxsd %ymm0, %ymm1, %ymm2
diff --git a/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-avx1.s
index e22802cbdc9adc8..0512e690cc8ad39 100644
--- a/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-avx1.s
@@ -1222,12 +1222,12 @@ vzeroupper
# CHECK-NEXT: 1 1 1.00 * * U vmaskmovdqu %xmm0, %xmm1
# CHECK-NEXT: 3 8 1.00 * vmaskmovpd (%rax), %xmm0, %xmm2
# CHECK-NEXT: 3 9 1.00 * vmaskmovpd (%rax), %ymm0, %ymm2
-# CHECK-NEXT: 3 5 1.00 * * vmaskmovpd %xmm0, %xmm1, (%rax)
-# CHECK-NEXT: 3 5 1.00 * * vmaskmovpd %ymm0, %ymm1, (%rax)
+# CHECK-NEXT: 4 5 1.00 * * vmaskmovpd %xmm0, %xmm1, (%rax)
+# CHECK-NEXT: 4 5 1.00 * * vmaskmovpd %ymm0, %ymm1, (%rax)
# CHECK-NEXT: 3 8 1.00 * vmaskmovps (%rax), %xmm0, %xmm2
# CHECK-NEXT: 3 9 1.00 * vmaskmovps (%rax), %ymm0, %ymm2
-# CHECK-NEXT: 3 5 1.00 * * vmaskmovps %xmm0, %xmm1, (%rax)
-# CHECK-NEXT: 3 5 1.00 * * vmaskmovps %ymm0, %ymm1, (%rax)
+# CHECK-NEXT: 4 5 1.00 * * vmaskmovps %xmm0, %xmm1, (%rax)
+# CHECK-NEXT: 4 5 1.00 * * vmaskmovps %ymm0, %ymm1, (%rax)
# CHECK-NEXT: 1 3 1.00 vmaxpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 9 1.00 * vmaxpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 3 1.00 vmaxpd %ymm0, %ymm1, %ymm2
@@ -1734,7 +1734,7 @@ vzeroupper
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
-# CHECK-NEXT: - 572.00 246.50 317.00 39.00 367.50 179.50 179.50
+# CHECK-NEXT: - 572.00 248.50 319.00 39.00 367.50 179.50 179.50
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
@@ -1930,12 +1930,12 @@ vzeroupper
# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmaskmovdqu %xmm0, %xmm1
# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vmaskmovpd (%rax), %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vmaskmovpd (%rax), %ymm0, %ymm2
-# CHECK-NEXT: - - 0.50 0.50 1.00 - 0.50 0.50 vmaskmovpd %xmm0, %xmm1, (%rax)
-# CHECK-NEXT: - - 0.50 0.50 1.00 - 0.50 0.50 vmaskmovpd %ymm0, %ymm1, (%rax)
+# CHECK-NEXT: - - 1.00 1.00 1.00 - 0.50 0.50 vmaskmovpd %xmm0, %xmm1, (%rax)
+# CHECK-NEXT: - - 1.00 1.00 1.00 - 0.50 0.50 vmaskmovpd %ymm0, %ymm1, (%rax)
# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vmaskmovps (%rax), %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vmaskmovps (%rax), %ymm0, %ymm2
-# CHECK-NEXT: - - 0.50 0.50 1.00 - 0.50 0.50 vmaskmovps %xmm0, %xmm1, (%rax)
-# CHECK-NEXT: - - 0.50 0.50 1.00 - 0.50 0.50 vmaskmovps %ymm0, %ymm1, (%rax)
+# CHECK-NEXT: - - 1.00 1.00 1.00 - 0.50 0.50 vmaskmovps %xmm0, %xmm1, (%rax)
+# CHECK-NEXT: - - 1.00 1.00 1.00 - 0.50 0.50 vmaskmovps %ymm0, %ymm1, (%rax)
# CHECK-NEXT: - - - 1.00 - - - - vmaxpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vmaxpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - 1.00 - - - - vmaxpd %ymm0, %ymm1, %ymm2
More information about the llvm-commits
mailing list