[llvm] 51107be - [X86] Haswell/Broadwell/Skylake DPPS folded instructions use an extra port06 resource

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Wed Apr 3 04:29:02 PDT 2024


Author: Simon Pilgrim
Date: 2024-04-03T12:28:46+01:00
New Revision: 51107be7dd7f83a107b9c35c39b16081e38f7a54

URL: https://github.com/llvm/llvm-project/commit/51107be7dd7f83a107b9c35c39b16081e38f7a54
DIFF: https://github.com/llvm/llvm-project/commit/51107be7dd7f83a107b9c35c39b16081e38f7a54.diff

LOG: [X86] Haswell/Broadwell/Skylake DPPS folded instructions use an extra port06 resource

This is an extension to 07151f0241d3f893cb36eb2dbc395d4098f74a87 which handled SandyBridge so we at least model the regression identified in #14640

Confirmed by Agner + uops.info/uica (SkylakeServer also had an incorrect use of Port015 instead of just Port01)

I raised #86669 as a proposal for a 'x86 unfold' pass that can unfold these (if we have the free registers) driven by the scheduler model.

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86SchedBroadwell.td
    llvm/lib/Target/X86/X86SchedHaswell.td
    llvm/lib/Target/X86/X86SchedSkylakeClient.td
    llvm/lib/Target/X86/X86SchedSkylakeServer.td
    llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s
    llvm/test/tools/llvm-mca/X86/Broadwell/resources-sse41.s
    llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s
    llvm/test/tools/llvm-mca/X86/Haswell/resources-sse41.s
    llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s
    llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse41.s
    llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s
    llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse41.s

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td
index 0027de851df75d..b3ee7a82b91741 100644
--- a/llvm/lib/Target/X86/X86SchedBroadwell.td
+++ b/llvm/lib/Target/X86/X86SchedBroadwell.td
@@ -324,8 +324,10 @@ defm : BWWriteResPair<WriteFMAX,   [BWPort01], 5, [1], 1, 5>; // Fused Multiply
 defm : BWWriteResPair<WriteFMAY,   [BWPort01], 5, [1], 1, 6>; // Fused Multiply Add (YMM/ZMM).
 defm : X86WriteResPairUnsupported<WriteFMAZ>;
 defm : BWWriteResPair<WriteDPPD,   [BWPort0,BWPort1,BWPort5],  9, [1,1,1], 3, 5>; // Floating point double dot product.
-defm : BWWriteResPair<WriteDPPS,   [BWPort0,BWPort1,BWPort5], 14, [2,1,1], 4, 5>; // Floating point single dot product.
-defm : BWWriteResPair<WriteDPPSY,  [BWPort0,BWPort1,BWPort5], 14, [2,1,1], 4, 6>; // Floating point single dot product (YMM).
+defm : X86WriteRes<WriteDPPS,      [BWPort0,BWPort1,BWPort5], 14, [2,1,1], 4>;
+defm : X86WriteRes<WriteDPPSY,     [BWPort0,BWPort1,BWPort5], 14, [2,1,1], 4>;
+defm : X86WriteRes<WriteDPPSLd,    [BWPort0,BWPort1,BWPort5,BWPort06,BWPort23], 19, [2,1,1,1,1], 6>;
+defm : X86WriteRes<WriteDPPSYLd,   [BWPort0,BWPort1,BWPort5,BWPort06,BWPort23], 20, [2,1,1,1,1], 6>;
 defm : BWWriteResPair<WriteFSign,     [BWPort5], 1>; // Floating point fabs/fchs.
 defm : X86WriteRes<WriteFRnd,            [BWPort23],  6, [1],   1>; // Floating point rounding.
 defm : X86WriteRes<WriteFRndY,           [BWPort23],  6, [1],   1>; // Floating point rounding (YMM/ZMM).

diff  --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td
index a11b470b1f5182..6c301a3cd3425b 100644
--- a/llvm/lib/Target/X86/X86SchedHaswell.td
+++ b/llvm/lib/Target/X86/X86SchedHaswell.td
@@ -324,8 +324,10 @@ defm : HWWriteResPair<WriteFMAX,   [HWPort01], 5, [1], 1, 6>;
 defm : HWWriteResPair<WriteFMAY,   [HWPort01], 5, [1], 1, 7>;
 defm : HWWriteResPair<WriteFMAZ,   [HWPort01], 5, [1], 1, 7>; // Unsupported = 1
 defm : HWWriteResPair<WriteDPPD,   [HWPort0,HWPort1,HWPort5],  9, [1,1,1], 3, 6>;
-defm : HWWriteResPair<WriteDPPS,   [HWPort0,HWPort1,HWPort5], 14, [2,1,1], 4, 6>;
-defm : HWWriteResPair<WriteDPPSY,  [HWPort0,HWPort1,HWPort5], 14, [2,1,1], 4, 7>;
+defm : X86WriteRes<WriteDPPS,      [HWPort0,HWPort1,HWPort5], 14, [2,1,1], 4>;
+defm : X86WriteRes<WriteDPPSY,     [HWPort0,HWPort1,HWPort5], 14, [2,1,1], 4>;
+defm : X86WriteRes<WriteDPPSLd,    [HWPort0,HWPort1,HWPort5,HWPort06,HWPort23], 20, [2,1,1,1,1], 6>;
+defm : X86WriteRes<WriteDPPSYLd,   [HWPort0,HWPort1,HWPort5,HWPort06,HWPort23], 21, [2,1,1,1,1], 6>;
 defm : HWWriteResPair<WriteFSign,  [HWPort0], 1>;
 defm : X86WriteRes<WriteFRnd,            [HWPort23],  6, [1],   1>;
 defm : X86WriteRes<WriteFRndY,           [HWPort23],  6, [1],   1>;

diff  --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td
index 4fa138f69fb92b..3ee931fe5ed9c7 100644
--- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td
+++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td
@@ -311,8 +311,10 @@ defm : SKLWriteResPair<WriteFMAX,   [SKLPort01], 4, [1], 1, 6>;
 defm : SKLWriteResPair<WriteFMAY,   [SKLPort01], 4, [1], 1, 7>;
 defm : X86WriteResPairUnsupported<WriteFMAZ>;
 defm : SKLWriteResPair<WriteDPPD,   [SKLPort5,SKLPort01],  9, [1,2], 3, 6>; // Floating point double dot product.
-defm : SKLWriteResPair<WriteDPPS,   [SKLPort5,SKLPort01], 13, [1,3], 4, 6>;
-defm : SKLWriteResPair<WriteDPPSY,  [SKLPort5,SKLPort01], 13, [1,3], 4, 7>;
+defm : X86WriteRes<WriteDPPS,       [SKLPort5,SKLPort01], 13, [1,3], 4>;
+defm : X86WriteRes<WriteDPPSY,      [SKLPort5,SKLPort01], 13, [1,3], 4>;
+defm : X86WriteRes<WriteDPPSLd,     [SKLPort5,SKLPort01,SKLPort06,SKLPort23], 19, [1,3,1,1], 6>;
+defm : X86WriteRes<WriteDPPSYLd,    [SKLPort5,SKLPort01,SKLPort06,SKLPort23], 20, [1,3,1,1], 6>;
 defm : SKLWriteResPair<WriteFSign,   [SKLPort0], 1>; // Floating point fabs/fchs.
 defm : SKLWriteResPair<WriteFRnd,     [SKLPort01], 8, [2], 2, 6>; // Floating point rounding.
 defm : SKLWriteResPair<WriteFRndY,    [SKLPort01], 8, [2], 2, 7>;

diff  --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
index 3da688cda2c6c4..a7dff0ecbcd96f 100644
--- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td
+++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
@@ -311,8 +311,10 @@ defm : SKXWriteResPair<WriteFMAX, [SKXPort01],  4, [1], 1, 6>;
 defm : SKXWriteResPair<WriteFMAY, [SKXPort01],  4, [1], 1, 7>;
 defm : SKXWriteResPair<WriteFMAZ, [SKXPort05],  4, [1], 1, 7>;
 defm : SKXWriteResPair<WriteDPPD, [SKXPort5,SKXPort015],  9, [1,2], 3, 6>; // Floating point double dot product.
-defm : SKXWriteResPair<WriteDPPS, [SKXPort5,SKXPort015], 13, [1,3], 4, 6>;
-defm : SKXWriteResPair<WriteDPPSY,[SKXPort5,SKXPort015], 13, [1,3], 4, 7>;
+defm : X86WriteRes<WriteDPPS,       [SKXPort5,SKXPort01], 13, [1,3], 4>;
+defm : X86WriteRes<WriteDPPSY,      [SKXPort5,SKXPort01], 13, [1,3], 4>;
+defm : X86WriteRes<WriteDPPSLd,     [SKXPort5,SKXPort01,SKXPort06,SKXPort23], 19, [1,3,1,1], 6>;
+defm : X86WriteRes<WriteDPPSYLd,    [SKXPort5,SKXPort01,SKXPort06,SKXPort23], 20, [1,3,1,1], 6>;
 defm : SKXWriteResPair<WriteFSign,  [SKXPort0],  1>; // Floating point fabs/fchs.
 defm : SKXWriteResPair<WriteFRnd,   [SKXPort01], 8, [2], 2, 6>; // Floating point rounding.
 defm : SKXWriteResPair<WriteFRndY,  [SKXPort01], 8, [2], 2, 7>;

diff  --git a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s
index 98b8619f2e04f9..ca1faf62aa8987 100644
--- a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s
@@ -1189,9 +1189,9 @@ vzeroupper
 # CHECK-NEXT:  3      9     1.00                        vdppd	$22, %xmm0, %xmm1, %xmm2
 # CHECK-NEXT:  4      14    1.00    *                   vdppd	$22, (%rax), %xmm1, %xmm2
 # CHECK-NEXT:  4      14    2.00                        vdpps	$22, %xmm0, %xmm1, %xmm2
-# CHECK-NEXT:  5      19    2.00    *                   vdpps	$22, (%rax), %xmm1, %xmm2
+# CHECK-NEXT:  6      19    2.00    *                   vdpps	$22, (%rax), %xmm1, %xmm2
 # CHECK-NEXT:  4      14    2.00                        vdpps	$22, %ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  5      20    2.00    *                   vdpps	$22, (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  6      20    2.00    *                   vdpps	$22, (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  1      3     1.00                        vextractf128	$1, %ymm0, %xmm2
 # CHECK-NEXT:  2      1     1.00           *            vextractf128	$1, %ymm0, (%rax)
 # CHECK-NEXT:  2      2     1.00                        vextractps	$1, %xmm0, %ecx
@@ -1736,7 +1736,7 @@ vzeroupper
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]
-# CHECK-NEXT:  -     257.00 215.25 235.25 176.17 176.17 38.00  424.25 2.25   12.67
+# CHECK-NEXT:  -     257.00 216.25 235.25 176.17 176.17 38.00  424.25 3.25   12.67
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    Instructions:
@@ -1899,9 +1899,9 @@ vzeroupper
 # CHECK-NEXT:  -      -     1.00   1.00    -      -      -     1.00    -      -     vdppd	$22, %xmm0, %xmm1, %xmm2
 # CHECK-NEXT:  -      -     1.00   1.00   0.50   0.50    -     1.00    -      -     vdppd	$22, (%rax), %xmm1, %xmm2
 # CHECK-NEXT:  -      -     2.00   1.00    -      -      -     1.00    -      -     vdpps	$22, %xmm0, %xmm1, %xmm2
-# CHECK-NEXT:  -      -     2.00   1.00   0.50   0.50    -     1.00    -      -     vdpps	$22, (%rax), %xmm1, %xmm2
+# CHECK-NEXT:  -      -     2.50   1.00   0.50   0.50    -     1.00   0.50    -     vdpps	$22, (%rax), %xmm1, %xmm2
 # CHECK-NEXT:  -      -     2.00   1.00    -      -      -     1.00    -      -     vdpps	$22, %ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  -      -     2.00   1.00   0.50   0.50    -     1.00    -      -     vdpps	$22, (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  -      -     2.50   1.00   0.50   0.50    -     1.00   0.50    -     vdpps	$22, (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vextractf128	$1, %ymm0, %xmm2
 # CHECK-NEXT:  -      -      -      -     0.33   0.33   1.00    -      -     0.33   vextractf128	$1, %ymm0, (%rax)
 # CHECK-NEXT:  -      -     1.00    -      -      -      -     1.00    -      -     vextractps	$1, %xmm0, %ecx

diff  --git a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-sse41.s b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-sse41.s
index a2899b43711cf0..dcc535306c85af 100644
--- a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-sse41.s
+++ b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-sse41.s
@@ -166,7 +166,7 @@ roundss     $1, (%rax), %xmm2
 # CHECK-NEXT:  3      9     1.00                        dppd	$22, %xmm0, %xmm2
 # CHECK-NEXT:  4      14    1.00    *                   dppd	$22, (%rax), %xmm2
 # CHECK-NEXT:  4      14    2.00                        dpps	$22, %xmm0, %xmm2
-# CHECK-NEXT:  5      19    2.00    *                   dpps	$22, (%rax), %xmm2
+# CHECK-NEXT:  6      19    2.00    *                   dpps	$22, (%rax), %xmm2
 # CHECK-NEXT:  2      2     1.00                        extractps	$1, %xmm0, %ecx
 # CHECK-NEXT:  3      2     1.00           *            extractps	$1, %xmm0, (%rax)
 # CHECK-NEXT:  1      1     1.00                        insertps	$1, %xmm0, %xmm2
@@ -266,7 +266,7 @@ roundss     $1, (%rax), %xmm2
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]
-# CHECK-NEXT:  -      -     23.33  22.33  25.67  25.67  5.00   80.33   -     1.67
+# CHECK-NEXT:  -      -     23.83  22.33  25.67  25.67  5.00   80.33  0.50   1.67
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    Instructions:
@@ -281,7 +281,7 @@ roundss     $1, (%rax), %xmm2
 # CHECK-NEXT:  -      -     1.00   1.00    -      -      -     1.00    -      -     dppd	$22, %xmm0, %xmm2
 # CHECK-NEXT:  -      -     1.00   1.00   0.50   0.50    -     1.00    -      -     dppd	$22, (%rax), %xmm2
 # CHECK-NEXT:  -      -     2.00   1.00    -      -      -     1.00    -      -     dpps	$22, %xmm0, %xmm2
-# CHECK-NEXT:  -      -     2.00   1.00   0.50   0.50    -     1.00    -      -     dpps	$22, (%rax), %xmm2
+# CHECK-NEXT:  -      -     2.50   1.00   0.50   0.50    -     1.00   0.50    -     dpps	$22, (%rax), %xmm2
 # CHECK-NEXT:  -      -     1.00    -      -      -      -     1.00    -      -     extractps	$1, %xmm0, %ecx
 # CHECK-NEXT:  -      -      -      -     0.33   0.33   1.00   1.00    -     0.33   extractps	$1, %xmm0, (%rax)
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     insertps	$1, %xmm0, %xmm2

diff  --git a/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s
index 376070d7f4e0b6..cff60c9ce3ab34 100644
--- a/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s
@@ -1189,9 +1189,9 @@ vzeroupper
 # CHECK-NEXT:  3      9     1.00                        vdppd	$22, %xmm0, %xmm1, %xmm2
 # CHECK-NEXT:  4      15    1.00    *                   vdppd	$22, (%rax), %xmm1, %xmm2
 # CHECK-NEXT:  4      14    2.00                        vdpps	$22, %xmm0, %xmm1, %xmm2
-# CHECK-NEXT:  5      20    2.00    *                   vdpps	$22, (%rax), %xmm1, %xmm2
+# CHECK-NEXT:  6      20    2.00    *                   vdpps	$22, (%rax), %xmm1, %xmm2
 # CHECK-NEXT:  4      14    2.00                        vdpps	$22, %ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  5      21    2.00    *                   vdpps	$22, (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  6      21    2.00    *                   vdpps	$22, (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  1      3     1.00                        vextractf128	$1, %ymm0, %xmm2
 # CHECK-NEXT:  2      1     1.00           *            vextractf128	$1, %ymm0, (%rax)
 # CHECK-NEXT:  2      2     1.00                        vextractps	$1, %xmm0, %ecx
@@ -1736,7 +1736,7 @@ vzeroupper
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]
-# CHECK-NEXT:  -     336.00 214.58 236.58 176.17 176.17 38.00  427.58 2.25   12.67
+# CHECK-NEXT:  -     336.00 215.58 236.58 176.17 176.17 38.00  427.58 3.25   12.67
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    Instructions:
@@ -1899,9 +1899,9 @@ vzeroupper
 # CHECK-NEXT:  -      -     1.00   1.00    -      -      -     1.00    -      -     vdppd	$22, %xmm0, %xmm1, %xmm2
 # CHECK-NEXT:  -      -     1.00   1.00   0.50   0.50    -     1.00    -      -     vdppd	$22, (%rax), %xmm1, %xmm2
 # CHECK-NEXT:  -      -     2.00   1.00    -      -      -     1.00    -      -     vdpps	$22, %xmm0, %xmm1, %xmm2
-# CHECK-NEXT:  -      -     2.00   1.00   0.50   0.50    -     1.00    -      -     vdpps	$22, (%rax), %xmm1, %xmm2
+# CHECK-NEXT:  -      -     2.50   1.00   0.50   0.50    -     1.00   0.50    -     vdpps	$22, (%rax), %xmm1, %xmm2
 # CHECK-NEXT:  -      -     2.00   1.00    -      -      -     1.00    -      -     vdpps	$22, %ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  -      -     2.00   1.00   0.50   0.50    -     1.00    -      -     vdpps	$22, (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  -      -     2.50   1.00   0.50   0.50    -     1.00   0.50    -     vdpps	$22, (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vextractf128	$1, %ymm0, %xmm2
 # CHECK-NEXT:  -      -      -      -     0.33   0.33   1.00    -      -     0.33   vextractf128	$1, %ymm0, (%rax)
 # CHECK-NEXT:  -      -     1.00    -      -      -      -     1.00    -      -     vextractps	$1, %xmm0, %ecx

diff  --git a/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse41.s b/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse41.s
index 70d93983864c5e..c2d07735f1cb64 100644
--- a/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse41.s
+++ b/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse41.s
@@ -166,7 +166,7 @@ roundss     $1, (%rax), %xmm2
 # CHECK-NEXT:  3      9     1.00                        dppd	$22, %xmm0, %xmm2
 # CHECK-NEXT:  4      15    1.00    *                   dppd	$22, (%rax), %xmm2
 # CHECK-NEXT:  4      14    2.00                        dpps	$22, %xmm0, %xmm2
-# CHECK-NEXT:  5      20    2.00    *                   dpps	$22, (%rax), %xmm2
+# CHECK-NEXT:  6      20    2.00    *                   dpps	$22, (%rax), %xmm2
 # CHECK-NEXT:  2      2     1.00                        extractps	$1, %xmm0, %ecx
 # CHECK-NEXT:  3      2     1.00           *            extractps	$1, %xmm0, (%rax)
 # CHECK-NEXT:  1      1     1.00                        insertps	$1, %xmm0, %xmm2
@@ -266,7 +266,7 @@ roundss     $1, (%rax), %xmm2
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]
-# CHECK-NEXT:  -      -     23.33  22.33  25.67  25.67  5.00   80.33   -     1.67
+# CHECK-NEXT:  -      -     23.83  22.33  25.67  25.67  5.00   80.33  0.50   1.67
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    Instructions:
@@ -281,7 +281,7 @@ roundss     $1, (%rax), %xmm2
 # CHECK-NEXT:  -      -     1.00   1.00    -      -      -     1.00    -      -     dppd	$22, %xmm0, %xmm2
 # CHECK-NEXT:  -      -     1.00   1.00   0.50   0.50    -     1.00    -      -     dppd	$22, (%rax), %xmm2
 # CHECK-NEXT:  -      -     2.00   1.00    -      -      -     1.00    -      -     dpps	$22, %xmm0, %xmm2
-# CHECK-NEXT:  -      -     2.00   1.00   0.50   0.50    -     1.00    -      -     dpps	$22, (%rax), %xmm2
+# CHECK-NEXT:  -      -     2.50   1.00   0.50   0.50    -     1.00   0.50    -     dpps	$22, (%rax), %xmm2
 # CHECK-NEXT:  -      -     1.00    -      -      -      -     1.00    -      -     extractps	$1, %xmm0, %ecx
 # CHECK-NEXT:  -      -      -      -     0.33   0.33   1.00   1.00    -     0.33   extractps	$1, %xmm0, (%rax)
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     insertps	$1, %xmm0, %xmm2

diff  --git a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s
index c2e0217ae83091..ef5a9e34a932b4 100644
--- a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s
@@ -1189,9 +1189,9 @@ vzeroupper
 # CHECK-NEXT:  3      9     1.00                        vdppd	$22, %xmm0, %xmm1, %xmm2
 # CHECK-NEXT:  4      15    1.00    *                   vdppd	$22, (%rax), %xmm1, %xmm2
 # CHECK-NEXT:  4      13    1.50                        vdpps	$22, %xmm0, %xmm1, %xmm2
-# CHECK-NEXT:  5      19    1.50    *                   vdpps	$22, (%rax), %xmm1, %xmm2
+# CHECK-NEXT:  6      19    1.50    *                   vdpps	$22, (%rax), %xmm1, %xmm2
 # CHECK-NEXT:  4      13    1.50                        vdpps	$22, %ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  5      20    1.50    *                   vdpps	$22, (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  6      20    1.50    *                   vdpps	$22, (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  1      3     1.00                        vextractf128	$1, %ymm0, %xmm2
 # CHECK-NEXT:  2      1     1.00           *            vextractf128	$1, %ymm0, (%rax)
 # CHECK-NEXT:  2      3     1.00                        vextractps	$1, %xmm0, %ecx
@@ -1736,7 +1736,7 @@ vzeroupper
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]
-# CHECK-NEXT:  -     126.00 338.58 199.58 173.83 173.83 38.00  326.58 5.25   11.33
+# CHECK-NEXT:  -     126.00 339.58 199.58 173.83 173.83 38.00  326.58 6.25   11.33
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    Instructions:
@@ -1899,9 +1899,9 @@ vzeroupper
 # CHECK-NEXT:  -      -     1.00   1.00    -      -      -     1.00    -      -     vdppd	$22, %xmm0, %xmm1, %xmm2
 # CHECK-NEXT:  -      -     1.00   1.00   0.50   0.50    -     1.00    -      -     vdppd	$22, (%rax), %xmm1, %xmm2
 # CHECK-NEXT:  -      -     1.50   1.50    -      -      -     1.00    -      -     vdpps	$22, %xmm0, %xmm1, %xmm2
-# CHECK-NEXT:  -      -     1.50   1.50   0.50   0.50    -     1.00    -      -     vdpps	$22, (%rax), %xmm1, %xmm2
+# CHECK-NEXT:  -      -     2.00   1.50   0.50   0.50    -     1.00   0.50    -     vdpps	$22, (%rax), %xmm1, %xmm2
 # CHECK-NEXT:  -      -     1.50   1.50    -      -      -     1.00    -      -     vdpps	$22, %ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  -      -     1.50   1.50   0.50   0.50    -     1.00    -      -     vdpps	$22, (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  -      -     2.00   1.50   0.50   0.50    -     1.00   0.50    -     vdpps	$22, (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vextractf128	$1, %ymm0, %xmm2
 # CHECK-NEXT:  -      -      -      -     0.33   0.33   1.00    -      -     0.33   vextractf128	$1, %ymm0, (%rax)
 # CHECK-NEXT:  -      -     1.00    -      -      -      -     1.00    -      -     vextractps	$1, %xmm0, %ecx

diff  --git a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse41.s b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse41.s
index 6e11bb6df8e291..1d8d67fd323f13 100644
--- a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse41.s
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse41.s
@@ -166,7 +166,7 @@ roundss     $1, (%rax), %xmm2
 # CHECK-NEXT:  3      9     1.00                        dppd	$22, %xmm0, %xmm2
 # CHECK-NEXT:  4      15    1.00    *                   dppd	$22, (%rax), %xmm2
 # CHECK-NEXT:  4      13    1.50                        dpps	$22, %xmm0, %xmm2
-# CHECK-NEXT:  5      19    1.50    *                   dpps	$22, (%rax), %xmm2
+# CHECK-NEXT:  6      19    1.50    *                   dpps	$22, (%rax), %xmm2
 # CHECK-NEXT:  2      3     1.00                        extractps	$1, %xmm0, %ecx
 # CHECK-NEXT:  3      2     1.00           *            extractps	$1, %xmm0, (%rax)
 # CHECK-NEXT:  1      1     1.00                        insertps	$1, %xmm0, %xmm2
@@ -266,7 +266,7 @@ roundss     $1, (%rax), %xmm2
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]
-# CHECK-NEXT:  -      -     37.33  31.33  23.67  23.67  5.00   63.33   -     1.67
+# CHECK-NEXT:  -      -     37.83  31.33  23.67  23.67  5.00   63.33  0.50   1.67
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    Instructions:
@@ -281,7 +281,7 @@ roundss     $1, (%rax), %xmm2
 # CHECK-NEXT:  -      -     1.00   1.00    -      -      -     1.00    -      -     dppd	$22, %xmm0, %xmm2
 # CHECK-NEXT:  -      -     1.00   1.00   0.50   0.50    -     1.00    -      -     dppd	$22, (%rax), %xmm2
 # CHECK-NEXT:  -      -     1.50   1.50    -      -      -     1.00    -      -     dpps	$22, %xmm0, %xmm2
-# CHECK-NEXT:  -      -     1.50   1.50   0.50   0.50    -     1.00    -      -     dpps	$22, (%rax), %xmm2
+# CHECK-NEXT:  -      -     2.00   1.50   0.50   0.50    -     1.00   0.50    -     dpps	$22, (%rax), %xmm2
 # CHECK-NEXT:  -      -     1.00    -      -      -      -     1.00    -      -     extractps	$1, %xmm0, %ecx
 # CHECK-NEXT:  -      -      -      -     0.33   0.33   1.00   1.00    -     0.33   extractps	$1, %xmm0, (%rax)
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     insertps	$1, %xmm0, %xmm2

diff  --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s
index de14ef7ee4432c..cabb002b824128 100644
--- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s
@@ -1188,10 +1188,10 @@ vzeroupper
 # CHECK-NEXT:  2      16    3.00    *                   vdivss	(%rax), %xmm1, %xmm2
 # CHECK-NEXT:  3      9     1.00                        vdppd	$22, %xmm0, %xmm1, %xmm2
 # CHECK-NEXT:  4      15    1.00    *                   vdppd	$22, (%rax), %xmm1, %xmm2
-# CHECK-NEXT:  4      13    1.33                        vdpps	$22, %xmm0, %xmm1, %xmm2
-# CHECK-NEXT:  5      19    1.33    *                   vdpps	$22, (%rax), %xmm1, %xmm2
-# CHECK-NEXT:  4      13    1.33                        vdpps	$22, %ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  5      20    1.33    *                   vdpps	$22, (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  4      13    1.50                        vdpps	$22, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  6      19    1.50    *                   vdpps	$22, (%rax), %xmm1, %xmm2
+# CHECK-NEXT:  4      13    1.50                        vdpps	$22, %ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  6      20    1.50    *                   vdpps	$22, (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  1      3     1.00                        vextractf128	$1, %ymm0, %xmm2
 # CHECK-NEXT:  2      1     1.00           *            vextractf128	$1, %ymm0, (%rax)
 # CHECK-NEXT:  2      3     1.00                        vextractps	$1, %xmm0, %ecx
@@ -1736,7 +1736,7 @@ vzeroupper
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]
-# CHECK-NEXT:  -     126.00 322.25 200.25 173.83 173.83 38.00  330.25 6.25   11.33
+# CHECK-NEXT:  -     126.00 325.25 202.25 173.83 173.83 38.00  326.25 7.25   11.33
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    Instructions:
@@ -1898,10 +1898,10 @@ vzeroupper
 # CHECK-NEXT:  -     3.00   1.00    -     0.50   0.50    -      -      -      -     vdivss	(%rax), %xmm1, %xmm2
 # CHECK-NEXT:  -      -     0.67   0.67    -      -      -     1.67    -      -     vdppd	$22, %xmm0, %xmm1, %xmm2
 # CHECK-NEXT:  -      -     0.67   0.67   0.50   0.50    -     1.67    -      -     vdppd	$22, (%rax), %xmm1, %xmm2
-# CHECK-NEXT:  -      -     1.00   1.00    -      -      -     2.00    -      -     vdpps	$22, %xmm0, %xmm1, %xmm2
-# CHECK-NEXT:  -      -     1.00   1.00   0.50   0.50    -     2.00    -      -     vdpps	$22, (%rax), %xmm1, %xmm2
-# CHECK-NEXT:  -      -     1.00   1.00    -      -      -     2.00    -      -     vdpps	$22, %ymm0, %ymm1, %ymm2
-# CHECK-NEXT:  -      -     1.00   1.00   0.50   0.50    -     2.00    -      -     vdpps	$22, (%rax), %ymm1, %ymm2
+# CHECK-NEXT:  -      -     1.50   1.50    -      -      -     1.00    -      -     vdpps	$22, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  -      -     2.00   1.50   0.50   0.50    -     1.00   0.50    -     vdpps	$22, (%rax), %xmm1, %xmm2
+# CHECK-NEXT:  -      -     1.50   1.50    -      -      -     1.00    -      -     vdpps	$22, %ymm0, %ymm1, %ymm2
+# CHECK-NEXT:  -      -     2.00   1.50   0.50   0.50    -     1.00   0.50    -     vdpps	$22, (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vextractf128	$1, %ymm0, %xmm2
 # CHECK-NEXT:  -      -      -      -     0.33   0.33   1.00    -      -     0.33   vextractf128	$1, %ymm0, (%rax)
 # CHECK-NEXT:  -      -     1.00    -      -      -      -     1.00    -      -     vextractps	$1, %xmm0, %ecx

diff  --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse41.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse41.s
index 15cd09bf7e9768..e3f34fdc9430d3 100644
--- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse41.s
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse41.s
@@ -165,8 +165,8 @@ roundss     $1, (%rax), %xmm2
 # CHECK-NEXT:  3      8     0.67    *                   blendvps	%xmm0, (%rax), %xmm2
 # CHECK-NEXT:  3      9     1.00                        dppd	$22, %xmm0, %xmm2
 # CHECK-NEXT:  4      15    1.00    *                   dppd	$22, (%rax), %xmm2
-# CHECK-NEXT:  4      13    1.33                        dpps	$22, %xmm0, %xmm2
-# CHECK-NEXT:  5      19    1.33    *                   dpps	$22, (%rax), %xmm2
+# CHECK-NEXT:  4      13    1.50                        dpps	$22, %xmm0, %xmm2
+# CHECK-NEXT:  6      19    1.50    *                   dpps	$22, (%rax), %xmm2
 # CHECK-NEXT:  2      3     1.00                        extractps	$1, %xmm0, %ecx
 # CHECK-NEXT:  3      2     1.00           *            extractps	$1, %xmm0, (%rax)
 # CHECK-NEXT:  1      1     1.00                        insertps	$1, %xmm0, %xmm2
@@ -266,7 +266,7 @@ roundss     $1, (%rax), %xmm2
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]
-# CHECK-NEXT:  -      -     36.67  28.67  23.67  23.67  5.00   66.67   -     1.67
+# CHECK-NEXT:  -      -     38.17  29.67  23.67  23.67  5.00   64.67  0.50   1.67
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    Instructions:
@@ -280,8 +280,8 @@ roundss     $1, (%rax), %xmm2
 # CHECK-NEXT:  -      -     0.67   0.67   0.50   0.50    -     0.67    -      -     blendvps	%xmm0, (%rax), %xmm2
 # CHECK-NEXT:  -      -     0.67   0.67    -      -      -     1.67    -      -     dppd	$22, %xmm0, %xmm2
 # CHECK-NEXT:  -      -     0.67   0.67   0.50   0.50    -     1.67    -      -     dppd	$22, (%rax), %xmm2
-# CHECK-NEXT:  -      -     1.00   1.00    -      -      -     2.00    -      -     dpps	$22, %xmm0, %xmm2
-# CHECK-NEXT:  -      -     1.00   1.00   0.50   0.50    -     2.00    -      -     dpps	$22, (%rax), %xmm2
+# CHECK-NEXT:  -      -     1.50   1.50    -      -      -     1.00    -      -     dpps	$22, %xmm0, %xmm2
+# CHECK-NEXT:  -      -     2.00   1.50   0.50   0.50    -     1.00   0.50    -     dpps	$22, (%rax), %xmm2
 # CHECK-NEXT:  -      -     1.00    -      -      -      -     1.00    -      -     extractps	$1, %xmm0, %ecx
 # CHECK-NEXT:  -      -      -      -     0.33   0.33   1.00   1.00    -     0.33   extractps	$1, %xmm0, (%rax)
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     insertps	$1, %xmm0, %xmm2


        


More information about the llvm-commits mailing list