[llvm] f6f2929 - [X86] Fix HSW/BDW masked store schedules
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 27 03:21:27 PST 2024
Author: Simon Pilgrim
Date: 2024-11-27T11:21:07Z
New Revision: f6f2929fc6fa39f62e2c3109b7a1b0f866c1c17b
URL: https://github.com/llvm/llvm-project/commit/f6f2929fc6fa39f62e2c3109b7a1b0f866c1c17b
DIFF: https://github.com/llvm/llvm-project/commit/f6f2929fc6fa39f62e2c3109b7a1b0f866c1c17b.diff
LOG: [X86] Fix HSW/BDW masked store schedules
Vector masked stores don't use Port5 or Port 7.
Confirmed by augner/uops.info
Added:
Modified:
llvm/lib/Target/X86/X86SchedBroadwell.td
llvm/lib/Target/X86/X86SchedHaswell.td
llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s
llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx2.s
llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s
llvm/test/tools/llvm-mca/X86/Haswell/resources-avx2.s
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td
index 5b50e1943e3db1..8cac444578d041 100644
--- a/llvm/lib/Target/X86/X86SchedBroadwell.td
+++ b/llvm/lib/Target/X86/X86SchedBroadwell.td
@@ -247,10 +247,10 @@ defm : X86WriteRes<WriteFStoreNT, [BWPort237,BWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreNTX, [BWPort237,BWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreNTY, [BWPort237,BWPort4], 1, [1,1], 2>;
-defm : X86WriteRes<WriteFMaskedStore32, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
-defm : X86WriteRes<WriteFMaskedStore32Y, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
-defm : X86WriteRes<WriteFMaskedStore64, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
-defm : X86WriteRes<WriteFMaskedStore64Y, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteFMaskedStore32, [BWPort0,BWPort4,BWPort23,BWPort1], 5, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteFMaskedStore32Y, [BWPort0,BWPort4,BWPort23,BWPort1], 5, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteFMaskedStore64, [BWPort0,BWPort4,BWPort23,BWPort1], 5, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteFMaskedStore64Y, [BWPort0,BWPort4,BWPort23,BWPort1], 5, [1,1,1,1], 4>;
defm : X86WriteRes<WriteFMove, [BWPort5], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveX, [BWPort5], 1, [1], 1>;
@@ -420,10 +420,10 @@ defm : X86WriteRes<WriteVecStoreX, [BWPort237,BWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreY, [BWPort237,BWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreNT, [BWPort237,BWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreNTY, [BWPort237,BWPort4], 1, [1,1], 2>;
-defm : X86WriteRes<WriteVecMaskedStore32, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
-defm : X86WriteRes<WriteVecMaskedStore32Y, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
-defm : X86WriteRes<WriteVecMaskedStore64, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
-defm : X86WriteRes<WriteVecMaskedStore64Y, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteVecMaskedStore32, [BWPort0,BWPort4,BWPort23,BWPort1], 5, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteVecMaskedStore32Y, [BWPort0,BWPort4,BWPort23,BWPort1], 5, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteVecMaskedStore64, [BWPort0,BWPort4,BWPort23,BWPort1], 5, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteVecMaskedStore64Y, [BWPort0,BWPort4,BWPort23,BWPort1], 5, [1,1,1,1], 4>;
defm : X86WriteRes<WriteVecMove, [BWPort015], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveX, [BWPort015], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveY, [BWPort015], 1, [1], 1>;
diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td
index d06e8a99370976..5e8a110ac3e5b3 100644
--- a/llvm/lib/Target/X86/X86SchedHaswell.td
+++ b/llvm/lib/Target/X86/X86SchedHaswell.td
@@ -249,10 +249,10 @@ defm : X86WriteRes<WriteFStoreNT, [HWPort237,HWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreNTX, [HWPort237,HWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreNTY, [HWPort237,HWPort4], 1, [1,1], 2>;
-defm : X86WriteRes<WriteFMaskedStore32, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
-defm : X86WriteRes<WriteFMaskedStore32Y, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
-defm : X86WriteRes<WriteFMaskedStore64, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
-defm : X86WriteRes<WriteFMaskedStore64Y, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteFMaskedStore32, [HWPort0,HWPort4,HWPort23,HWPort1], 5, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteFMaskedStore32Y, [HWPort0,HWPort4,HWPort23,HWPort1], 5, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteFMaskedStore64, [HWPort0,HWPort4,HWPort23,HWPort1], 5, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteFMaskedStore64Y, [HWPort0,HWPort4,HWPort23,HWPort1], 5, [1,1,1,1], 4>;
defm : X86WriteRes<WriteFMove, [HWPort5], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveX, [HWPort5], 1, [1], 1>;
@@ -420,10 +420,10 @@ defm : X86WriteRes<WriteVecStoreX, [HWPort237,HWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreY, [HWPort237,HWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreNT, [HWPort237,HWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreNTY, [HWPort237,HWPort4], 1, [1,1], 2>;
-defm : X86WriteRes<WriteVecMaskedStore32, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
-defm : X86WriteRes<WriteVecMaskedStore32Y, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
-defm : X86WriteRes<WriteVecMaskedStore64, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
-defm : X86WriteRes<WriteVecMaskedStore64Y, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteVecMaskedStore32, [HWPort0,HWPort4,HWPort23,HWPort1], 5, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteVecMaskedStore32Y, [HWPort0,HWPort4,HWPort23,HWPort1], 5, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteVecMaskedStore64, [HWPort0,HWPort4,HWPort23,HWPort1], 5, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteVecMaskedStore64Y, [HWPort0,HWPort4,HWPort23,HWPort1], 5, [1,1,1,1], 4>;
defm : X86WriteRes<WriteVecMove, [HWPort015], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveX, [HWPort015], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveY, [HWPort015], 1, [1], 1>;
diff --git a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s
index 028625013a85cc..62ad8f862ffdf5 100644
--- a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s
@@ -1736,7 +1736,7 @@ vzeroupper
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
-# CHECK-NEXT: - 257.00 216.25 247.25 173.17 173.17 38.00 421.25 3.25 12.67
+# CHECK-NEXT: - 257.00 216.25 249.25 173.83 173.83 38.00 419.25 3.25 11.33
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@@ -1932,12 +1932,12 @@ vzeroupper
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmaskmovdqu %xmm0, %xmm1
# CHECK-NEXT: - - - - 0.50 0.50 - 2.00 - - vmaskmovpd (%rax), %xmm0, %xmm2
# CHECK-NEXT: - - - - 0.50 0.50 - 2.00 - - vmaskmovpd (%rax), %ymm0, %ymm2
-# CHECK-NEXT: - - 1.00 0.50 0.33 0.33 1.00 0.50 - 0.33 vmaskmovpd %xmm0, %xmm1, (%rax)
-# CHECK-NEXT: - - 1.00 0.50 0.33 0.33 1.00 0.50 - 0.33 vmaskmovpd %ymm0, %ymm1, (%rax)
+# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 1.00 - - - vmaskmovpd %xmm0, %xmm1, (%rax)
+# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 1.00 - - - vmaskmovpd %ymm0, %ymm1, (%rax)
# CHECK-NEXT: - - - - 0.50 0.50 - 2.00 - - vmaskmovps (%rax), %xmm0, %xmm2
# CHECK-NEXT: - - - - 0.50 0.50 - 2.00 - - vmaskmovps (%rax), %ymm0, %ymm2
-# CHECK-NEXT: - - 1.00 0.50 0.33 0.33 1.00 0.50 - 0.33 vmaskmovps %xmm0, %xmm1, (%rax)
-# CHECK-NEXT: - - 1.00 0.50 0.33 0.33 1.00 0.50 - 0.33 vmaskmovps %ymm0, %ymm1, (%rax)
+# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 1.00 - - - vmaskmovps %xmm0, %xmm1, (%rax)
+# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 1.00 - - - vmaskmovps %ymm0, %ymm1, (%rax)
# CHECK-NEXT: - - - 1.00 - - - - - - vmaxpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vmaxpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - 1.00 - - - - - - vmaxpd %ymm0, %ymm1, %ymm2
diff --git a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx2.s b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx2.s
index 738f197afb284e..338fc82d7f220b 100644
--- a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx2.s
+++ b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx2.s
@@ -776,7 +776,7 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
-# CHECK-NEXT: - - 96.67 60.67 99.67 99.67 21.00 266.67 4.00 1.67
+# CHECK-NEXT: - - 96.67 62.67 100.33 100.33 21.00 264.67 4.00 0.33
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@@ -914,12 +914,12 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vpmaddwd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - 0.50 0.50 - 2.00 - - vpmaskmovd (%rax), %xmm0, %xmm2
# CHECK-NEXT: - - - - 0.50 0.50 - 2.00 - - vpmaskmovd (%rax), %ymm0, %ymm2
-# CHECK-NEXT: - - 1.00 0.50 0.33 0.33 1.00 0.50 - 0.33 vpmaskmovd %xmm0, %xmm1, (%rax)
-# CHECK-NEXT: - - 1.00 0.50 0.33 0.33 1.00 0.50 - 0.33 vpmaskmovd %ymm0, %ymm1, (%rax)
+# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 1.00 - - - vpmaskmovd %xmm0, %xmm1, (%rax)
+# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 1.00 - - - vpmaskmovd %ymm0, %ymm1, (%rax)
# CHECK-NEXT: - - - - 0.50 0.50 - 2.00 - - vpmaskmovq (%rax), %xmm0, %xmm2
# CHECK-NEXT: - - - - 0.50 0.50 - 2.00 - - vpmaskmovq (%rax), %ymm0, %ymm2
-# CHECK-NEXT: - - 1.00 0.50 0.33 0.33 1.00 0.50 - 0.33 vpmaskmovq %xmm0, %xmm1, (%rax)
-# CHECK-NEXT: - - 1.00 0.50 0.33 0.33 1.00 0.50 - 0.33 vpmaskmovq %ymm0, %ymm1, (%rax)
+# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 1.00 - - - vpmaskmovq %xmm0, %xmm1, (%rax)
+# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 1.00 - - - vpmaskmovq %ymm0, %ymm1, (%rax)
# CHECK-NEXT: - - - 0.50 - - - 0.50 - - vpmaxsb %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - - 0.50 0.50 0.50 - 0.50 - - vpmaxsb (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - 0.50 - - - 0.50 - - vpmaxsd %ymm0, %ymm1, %ymm2
diff --git a/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s
index 179393abb08d47..35f4945bf222a6 100644
--- a/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s
@@ -1736,7 +1736,7 @@ vzeroupper
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
-# CHECK-NEXT: - 336.00 215.58 248.58 173.17 173.17 38.00 424.58 3.25 12.67
+# CHECK-NEXT: - 336.00 215.58 250.58 173.83 173.83 38.00 422.58 3.25 11.33
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@@ -1932,12 +1932,12 @@ vzeroupper
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmaskmovdqu %xmm0, %xmm1
# CHECK-NEXT: - - - - 0.50 0.50 - 2.00 - - vmaskmovpd (%rax), %xmm0, %xmm2
# CHECK-NEXT: - - - - 0.50 0.50 - 2.00 - - vmaskmovpd (%rax), %ymm0, %ymm2
-# CHECK-NEXT: - - 1.00 0.50 0.33 0.33 1.00 0.50 - 0.33 vmaskmovpd %xmm0, %xmm1, (%rax)
-# CHECK-NEXT: - - 1.00 0.50 0.33 0.33 1.00 0.50 - 0.33 vmaskmovpd %ymm0, %ymm1, (%rax)
+# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 1.00 - - - vmaskmovpd %xmm0, %xmm1, (%rax)
+# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 1.00 - - - vmaskmovpd %ymm0, %ymm1, (%rax)
# CHECK-NEXT: - - - - 0.50 0.50 - 2.00 - - vmaskmovps (%rax), %xmm0, %xmm2
# CHECK-NEXT: - - - - 0.50 0.50 - 2.00 - - vmaskmovps (%rax), %ymm0, %ymm2
-# CHECK-NEXT: - - 1.00 0.50 0.33 0.33 1.00 0.50 - 0.33 vmaskmovps %xmm0, %xmm1, (%rax)
-# CHECK-NEXT: - - 1.00 0.50 0.33 0.33 1.00 0.50 - 0.33 vmaskmovps %ymm0, %ymm1, (%rax)
+# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 1.00 - - - vmaskmovps %xmm0, %xmm1, (%rax)
+# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 1.00 - - - vmaskmovps %ymm0, %ymm1, (%rax)
# CHECK-NEXT: - - - 1.00 - - - - - - vmaxpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vmaxpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - 1.00 - - - - - - vmaxpd %ymm0, %ymm1, %ymm2
diff --git a/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx2.s b/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx2.s
index 814fadac6bbffc..095c49b19d5d87 100644
--- a/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx2.s
+++ b/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx2.s
@@ -776,7 +776,7 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
-# CHECK-NEXT: - - 206.67 90.67 99.67 99.67 5.00 284.67 30.00 1.67
+# CHECK-NEXT: - - 206.67 92.67 100.33 100.33 5.00 282.67 30.00 0.33
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@@ -914,12 +914,12 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vpmaddwd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - 0.50 0.50 - 2.00 - - vpmaskmovd (%rax), %xmm0, %xmm2
# CHECK-NEXT: - - - - 0.50 0.50 - 2.00 - - vpmaskmovd (%rax), %ymm0, %ymm2
-# CHECK-NEXT: - - 1.00 0.50 0.33 0.33 1.00 0.50 - 0.33 vpmaskmovd %xmm0, %xmm1, (%rax)
-# CHECK-NEXT: - - 1.00 0.50 0.33 0.33 1.00 0.50 - 0.33 vpmaskmovd %ymm0, %ymm1, (%rax)
+# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 1.00 - - - vpmaskmovd %xmm0, %xmm1, (%rax)
+# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 1.00 - - - vpmaskmovd %ymm0, %ymm1, (%rax)
# CHECK-NEXT: - - - - 0.50 0.50 - 2.00 - - vpmaskmovq (%rax), %xmm0, %xmm2
# CHECK-NEXT: - - - - 0.50 0.50 - 2.00 - - vpmaskmovq (%rax), %ymm0, %ymm2
-# CHECK-NEXT: - - 1.00 0.50 0.33 0.33 1.00 0.50 - 0.33 vpmaskmovq %xmm0, %xmm1, (%rax)
-# CHECK-NEXT: - - 1.00 0.50 0.33 0.33 1.00 0.50 - 0.33 vpmaskmovq %ymm0, %ymm1, (%rax)
+# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 1.00 - - - vpmaskmovq %xmm0, %xmm1, (%rax)
+# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 1.00 - - - vpmaskmovq %ymm0, %ymm1, (%rax)
# CHECK-NEXT: - - - 0.50 - - - 0.50 - - vpmaxsb %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - - 0.50 0.50 0.50 - 0.50 - - vpmaxsb (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - 0.50 - - - 0.50 - - vpmaxsd %ymm0, %ymm1, %ymm2
More information about the llvm-commits
mailing list