[llvm] 7ac9581 - [X86] SandyBridge masked stores use 2uop from port01

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 22 07:10:55 PDT 2023


Author: Simon Pilgrim
Date: 2023-09-22T15:10:27+01:00
New Revision: 7ac9581dd0ec38bdfc0e4910fb61e07ac45e9ac2

URL: https://github.com/llvm/llvm-project/commit/7ac9581dd0ec38bdfc0e4910fb61e07ac45e9ac2
DIFF: https://github.com/llvm/llvm-project/commit/7ac9581dd0ec38bdfc0e4910fb61e07ac45e9ac2.diff

LOG: [X86] SandyBridge masked stores use 2uop from port01

Found while reviewing a llvm-exegesis capture (and matches Agner + uops.info numbers)

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86SchedSandyBridge.td
    llvm/test/tools/llvm-mca/X86/Generic/resources-avx1.s
    llvm/test/tools/llvm-mca/X86/Generic/resources-avx2.s
    llvm/test/tools/llvm-mca/X86/SandyBridge/resources-avx1.s

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td
index d39d88b9f2dc8ea..821a0cd061bb516 100644
--- a/llvm/lib/Target/X86/X86SchedSandyBridge.td
+++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td
@@ -224,10 +224,10 @@ defm : X86WriteRes<WriteFStoreNT,      [SBPort23,SBPort4], 1, [1,1], 1>;
 defm : X86WriteRes<WriteFStoreNTX,     [SBPort23,SBPort4], 1, [1,1], 1>;
 defm : X86WriteRes<WriteFStoreNTY,     [SBPort23,SBPort4], 1, [1,1], 1>;
 
-defm : X86WriteRes<WriteFMaskedStore32,  [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
-defm : X86WriteRes<WriteFMaskedStore32Y, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
-defm : X86WriteRes<WriteFMaskedStore64,  [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
-defm : X86WriteRes<WriteFMaskedStore64Y, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
+defm : X86WriteRes<WriteFMaskedStore32,  [SBPort4,SBPort01,SBPort23], 5, [1,2,1], 4>;
+defm : X86WriteRes<WriteFMaskedStore32Y, [SBPort4,SBPort01,SBPort23], 5, [1,2,1], 4>;
+defm : X86WriteRes<WriteFMaskedStore64,  [SBPort4,SBPort01,SBPort23], 5, [1,2,1], 4>;
+defm : X86WriteRes<WriteFMaskedStore64Y, [SBPort4,SBPort01,SBPort23], 5, [1,2,1], 4>;
 
 defm : X86WriteRes<WriteFMove,         [SBPort5], 1, [1], 1>;
 defm : X86WriteRes<WriteFMoveX,        [SBPort5], 1, [1], 1>;
@@ -384,10 +384,10 @@ defm : X86WriteRes<WriteVecStoreX,       [SBPort23,SBPort4], 1, [1,1], 1>;
 defm : X86WriteRes<WriteVecStoreY,       [SBPort23,SBPort4], 1, [1,1], 1>;
 defm : X86WriteRes<WriteVecStoreNT,      [SBPort23,SBPort4], 1, [1,1], 1>;
 defm : X86WriteRes<WriteVecStoreNTY,     [SBPort23,SBPort4], 1, [1,1], 1>;
-defm : X86WriteRes<WriteVecMaskedStore32,  [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
-defm : X86WriteRes<WriteVecMaskedStore32Y, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
-defm : X86WriteRes<WriteVecMaskedStore64,  [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
-defm : X86WriteRes<WriteVecMaskedStore64Y, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
+defm : X86WriteRes<WriteVecMaskedStore32,  [SBPort4,SBPort01,SBPort23], 5, [1,2,1], 4>;
+defm : X86WriteRes<WriteVecMaskedStore32Y, [SBPort4,SBPort01,SBPort23], 5, [1,2,1], 4>;
+defm : X86WriteRes<WriteVecMaskedStore64,  [SBPort4,SBPort01,SBPort23], 5, [1,2,1], 4>;
+defm : X86WriteRes<WriteVecMaskedStore64Y, [SBPort4,SBPort01,SBPort23], 5, [1,2,1], 4>;
 defm : X86WriteRes<WriteVecMove,         [SBPort05], 1, [1], 1>;
 defm : X86WriteRes<WriteVecMoveX,        [SBPort015], 1, [1], 1>;
 defm : X86WriteRes<WriteVecMoveY,        [SBPort05], 1, [1], 1>;

diff  --git a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx1.s
index 045435e9774d6c7..aaa39b65b12154a 100644
--- a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx1.s
@@ -1222,12 +1222,12 @@ vzeroupper
 # CHECK-NEXT:  1      1     1.00    *      *      U     vmaskmovdqu	%xmm0, %xmm1
 # CHECK-NEXT:  3      8     1.00    *                   vmaskmovpd	(%rax), %xmm0, %xmm2
 # CHECK-NEXT:  3      9     1.00    *                   vmaskmovpd	(%rax), %ymm0, %ymm2
-# CHECK-NEXT:  3      5     1.00    *      *            vmaskmovpd	%xmm0, %xmm1, (%rax)
-# CHECK-NEXT:  3      5     1.00    *      *            vmaskmovpd	%ymm0, %ymm1, (%rax)
+# CHECK-NEXT:  4      5     1.00    *      *            vmaskmovpd	%xmm0, %xmm1, (%rax)
+# CHECK-NEXT:  4      5     1.00    *      *            vmaskmovpd	%ymm0, %ymm1, (%rax)
 # CHECK-NEXT:  3      8     1.00    *                   vmaskmovps	(%rax), %xmm0, %xmm2
 # CHECK-NEXT:  3      9     1.00    *                   vmaskmovps	(%rax), %ymm0, %ymm2
-# CHECK-NEXT:  3      5     1.00    *      *            vmaskmovps	%xmm0, %xmm1, (%rax)
-# CHECK-NEXT:  3      5     1.00    *      *            vmaskmovps	%ymm0, %ymm1, (%rax)
+# CHECK-NEXT:  4      5     1.00    *      *            vmaskmovps	%xmm0, %xmm1, (%rax)
+# CHECK-NEXT:  4      5     1.00    *      *            vmaskmovps	%ymm0, %ymm1, (%rax)
 # CHECK-NEXT:  1      3     1.00                        vmaxpd	%xmm0, %xmm1, %xmm2
 # CHECK-NEXT:  2      9     1.00    *                   vmaxpd	(%rax), %xmm1, %xmm2
 # CHECK-NEXT:  1      3     1.00                        vmaxpd	%ymm0, %ymm1, %ymm2
@@ -1734,7 +1734,7 @@ vzeroupper
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6.0]  [6.1]
-# CHECK-NEXT:  -     572.00 246.50 317.00 39.00  367.50 179.50 179.50
+# CHECK-NEXT:  -     572.00 248.50 319.00 39.00  367.50 179.50 179.50
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6.0]  [6.1]  Instructions:
@@ -1930,12 +1930,12 @@ vzeroupper
 # CHECK-NEXT:  -      -      -      -     1.00    -     0.50   0.50   vmaskmovdqu	%xmm0, %xmm1
 # CHECK-NEXT:  -      -     1.00    -      -     1.00   0.50   0.50   vmaskmovpd	(%rax), %xmm0, %xmm2
 # CHECK-NEXT:  -      -     1.00    -      -     1.00   0.50   0.50   vmaskmovpd	(%rax), %ymm0, %ymm2
-# CHECK-NEXT:  -      -     0.50   0.50   1.00    -     0.50   0.50   vmaskmovpd	%xmm0, %xmm1, (%rax)
-# CHECK-NEXT:  -      -     0.50   0.50   1.00    -     0.50   0.50   vmaskmovpd	%ymm0, %ymm1, (%rax)
+# CHECK-NEXT:  -      -     1.00   1.00   1.00    -     0.50   0.50   vmaskmovpd	%xmm0, %xmm1, (%rax)
+# CHECK-NEXT:  -      -     1.00   1.00   1.00    -     0.50   0.50   vmaskmovpd	%ymm0, %ymm1, (%rax)
 # CHECK-NEXT:  -      -     1.00    -      -     1.00   0.50   0.50   vmaskmovps	(%rax), %xmm0, %xmm2
 # CHECK-NEXT:  -      -     1.00    -      -     1.00   0.50   0.50   vmaskmovps	(%rax), %ymm0, %ymm2
-# CHECK-NEXT:  -      -     0.50   0.50   1.00    -     0.50   0.50   vmaskmovps	%xmm0, %xmm1, (%rax)
-# CHECK-NEXT:  -      -     0.50   0.50   1.00    -     0.50   0.50   vmaskmovps	%ymm0, %ymm1, (%rax)
+# CHECK-NEXT:  -      -     1.00   1.00   1.00    -     0.50   0.50   vmaskmovps	%xmm0, %xmm1, (%rax)
+# CHECK-NEXT:  -      -     1.00   1.00   1.00    -     0.50   0.50   vmaskmovps	%ymm0, %ymm1, (%rax)
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -     vmaxpd	%xmm0, %xmm1, %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -      -     0.50   0.50   vmaxpd	(%rax), %xmm1, %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -     vmaxpd	%ymm0, %ymm1, %ymm2

diff  --git a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx2.s b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx2.s
index f84d3bc1645940b..8baf6c0df646438 100644
--- a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx2.s
+++ b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx2.s
@@ -594,12 +594,12 @@ vpxor           (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  2      12    1.00    *                   vpmaddwd	(%rax), %ymm1, %ymm2
 # CHECK-NEXT:  3      8     1.00    *                   vpmaskmovd	(%rax), %xmm0, %xmm2
 # CHECK-NEXT:  3      9     1.00    *                   vpmaskmovd	(%rax), %ymm0, %ymm2
-# CHECK-NEXT:  3      5     1.00    *      *            vpmaskmovd	%xmm0, %xmm1, (%rax)
-# CHECK-NEXT:  3      5     1.00    *      *            vpmaskmovd	%ymm0, %ymm1, (%rax)
+# CHECK-NEXT:  4      5     1.00    *      *            vpmaskmovd	%xmm0, %xmm1, (%rax)
+# CHECK-NEXT:  4      5     1.00    *      *            vpmaskmovd	%ymm0, %ymm1, (%rax)
 # CHECK-NEXT:  3      8     1.00    *                   vpmaskmovq	(%rax), %xmm0, %xmm2
 # CHECK-NEXT:  3      9     1.00    *                   vpmaskmovq	(%rax), %ymm0, %ymm2
-# CHECK-NEXT:  3      5     1.00    *      *            vpmaskmovq	%xmm0, %xmm1, (%rax)
-# CHECK-NEXT:  3      5     1.00    *      *            vpmaskmovq	%ymm0, %ymm1, (%rax)
+# CHECK-NEXT:  4      5     1.00    *      *            vpmaskmovq	%xmm0, %xmm1, (%rax)
+# CHECK-NEXT:  4      5     1.00    *      *            vpmaskmovq	%ymm0, %ymm1, (%rax)
 # CHECK-NEXT:  1      1     0.50                        vpmaxsb	%ymm0, %ymm1, %ymm2
 # CHECK-NEXT:  2      8     0.50    *                   vpmaxsb	(%rax), %ymm1, %ymm2
 # CHECK-NEXT:  1      1     0.50                        vpmaxsd	%ymm0, %ymm1, %ymm2
@@ -774,7 +774,7 @@ vpxor           (%rax), %ymm1, %ymm2
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6.0]  [6.1]
-# CHECK-NEXT:  -      -     75.67  88.67  5.00   168.67 78.50  78.50
+# CHECK-NEXT:  -      -     77.67  90.67  5.00   168.67 78.50  78.50
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6.0]  [6.1]  Instructions:
@@ -912,12 +912,12 @@ vpxor           (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -     1.00    -      -      -     0.50   0.50   vpmaddwd	(%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -     1.00    -      -     1.00   0.50   0.50   vpmaskmovd	(%rax), %xmm0, %xmm2
 # CHECK-NEXT:  -      -     1.00    -      -     1.00   0.50   0.50   vpmaskmovd	(%rax), %ymm0, %ymm2
-# CHECK-NEXT:  -      -     0.50   0.50   1.00    -     0.50   0.50   vpmaskmovd	%xmm0, %xmm1, (%rax)
-# CHECK-NEXT:  -      -     0.50   0.50   1.00    -     0.50   0.50   vpmaskmovd	%ymm0, %ymm1, (%rax)
+# CHECK-NEXT:  -      -     1.00   1.00   1.00    -     0.50   0.50   vpmaskmovd	%xmm0, %xmm1, (%rax)
+# CHECK-NEXT:  -      -     1.00   1.00   1.00    -     0.50   0.50   vpmaskmovd	%ymm0, %ymm1, (%rax)
 # CHECK-NEXT:  -      -     1.00    -      -     1.00   0.50   0.50   vpmaskmovq	(%rax), %xmm0, %xmm2
 # CHECK-NEXT:  -      -     1.00    -      -     1.00   0.50   0.50   vpmaskmovq	(%rax), %ymm0, %ymm2
-# CHECK-NEXT:  -      -     0.50   0.50   1.00    -     0.50   0.50   vpmaskmovq	%xmm0, %xmm1, (%rax)
-# CHECK-NEXT:  -      -     0.50   0.50   1.00    -     0.50   0.50   vpmaskmovq	%ymm0, %ymm1, (%rax)
+# CHECK-NEXT:  -      -     1.00   1.00   1.00    -     0.50   0.50   vpmaskmovq	%xmm0, %xmm1, (%rax)
+# CHECK-NEXT:  -      -     1.00   1.00   1.00    -     0.50   0.50   vpmaskmovq	%ymm0, %ymm1, (%rax)
 # CHECK-NEXT:  -      -      -     0.50    -     0.50    -      -     vpmaxsb	%ymm0, %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -     0.50    -     0.50   0.50   0.50   vpmaxsb	(%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -     0.50    -     0.50    -      -     vpmaxsd	%ymm0, %ymm1, %ymm2

diff  --git a/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-avx1.s
index e22802cbdc9adc8..0512e690cc8ad39 100644
--- a/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-avx1.s
@@ -1222,12 +1222,12 @@ vzeroupper
 # CHECK-NEXT:  1      1     1.00    *      *      U     vmaskmovdqu	%xmm0, %xmm1
 # CHECK-NEXT:  3      8     1.00    *                   vmaskmovpd	(%rax), %xmm0, %xmm2
 # CHECK-NEXT:  3      9     1.00    *                   vmaskmovpd	(%rax), %ymm0, %ymm2
-# CHECK-NEXT:  3      5     1.00    *      *            vmaskmovpd	%xmm0, %xmm1, (%rax)
-# CHECK-NEXT:  3      5     1.00    *      *            vmaskmovpd	%ymm0, %ymm1, (%rax)
+# CHECK-NEXT:  4      5     1.00    *      *            vmaskmovpd	%xmm0, %xmm1, (%rax)
+# CHECK-NEXT:  4      5     1.00    *      *            vmaskmovpd	%ymm0, %ymm1, (%rax)
 # CHECK-NEXT:  3      8     1.00    *                   vmaskmovps	(%rax), %xmm0, %xmm2
 # CHECK-NEXT:  3      9     1.00    *                   vmaskmovps	(%rax), %ymm0, %ymm2
-# CHECK-NEXT:  3      5     1.00    *      *            vmaskmovps	%xmm0, %xmm1, (%rax)
-# CHECK-NEXT:  3      5     1.00    *      *            vmaskmovps	%ymm0, %ymm1, (%rax)
+# CHECK-NEXT:  4      5     1.00    *      *            vmaskmovps	%xmm0, %xmm1, (%rax)
+# CHECK-NEXT:  4      5     1.00    *      *            vmaskmovps	%ymm0, %ymm1, (%rax)
 # CHECK-NEXT:  1      3     1.00                        vmaxpd	%xmm0, %xmm1, %xmm2
 # CHECK-NEXT:  2      9     1.00    *                   vmaxpd	(%rax), %xmm1, %xmm2
 # CHECK-NEXT:  1      3     1.00                        vmaxpd	%ymm0, %ymm1, %ymm2
@@ -1734,7 +1734,7 @@ vzeroupper
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6.0]  [6.1]
-# CHECK-NEXT:  -     572.00 246.50 317.00 39.00  367.50 179.50 179.50
+# CHECK-NEXT:  -     572.00 248.50 319.00 39.00  367.50 179.50 179.50
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6.0]  [6.1]  Instructions:
@@ -1930,12 +1930,12 @@ vzeroupper
 # CHECK-NEXT:  -      -      -      -     1.00    -     0.50   0.50   vmaskmovdqu	%xmm0, %xmm1
 # CHECK-NEXT:  -      -     1.00    -      -     1.00   0.50   0.50   vmaskmovpd	(%rax), %xmm0, %xmm2
 # CHECK-NEXT:  -      -     1.00    -      -     1.00   0.50   0.50   vmaskmovpd	(%rax), %ymm0, %ymm2
-# CHECK-NEXT:  -      -     0.50   0.50   1.00    -     0.50   0.50   vmaskmovpd	%xmm0, %xmm1, (%rax)
-# CHECK-NEXT:  -      -     0.50   0.50   1.00    -     0.50   0.50   vmaskmovpd	%ymm0, %ymm1, (%rax)
+# CHECK-NEXT:  -      -     1.00   1.00   1.00    -     0.50   0.50   vmaskmovpd	%xmm0, %xmm1, (%rax)
+# CHECK-NEXT:  -      -     1.00   1.00   1.00    -     0.50   0.50   vmaskmovpd	%ymm0, %ymm1, (%rax)
 # CHECK-NEXT:  -      -     1.00    -      -     1.00   0.50   0.50   vmaskmovps	(%rax), %xmm0, %xmm2
 # CHECK-NEXT:  -      -     1.00    -      -     1.00   0.50   0.50   vmaskmovps	(%rax), %ymm0, %ymm2
-# CHECK-NEXT:  -      -     0.50   0.50   1.00    -     0.50   0.50   vmaskmovps	%xmm0, %xmm1, (%rax)
-# CHECK-NEXT:  -      -     0.50   0.50   1.00    -     0.50   0.50   vmaskmovps	%ymm0, %ymm1, (%rax)
+# CHECK-NEXT:  -      -     1.00   1.00   1.00    -     0.50   0.50   vmaskmovps	%xmm0, %xmm1, (%rax)
+# CHECK-NEXT:  -      -     1.00   1.00   1.00    -     0.50   0.50   vmaskmovps	%ymm0, %ymm1, (%rax)
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -     vmaxpd	%xmm0, %xmm1, %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -      -     0.50   0.50   vmaxpd	(%rax), %xmm1, %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -     vmaxpd	%ymm0, %ymm1, %ymm2


        


More information about the llvm-commits mailing list