[llvm] 51107be - [X86] Haswell/Broadwell/Skylake DPPS folded instructions use an extra port06 resource
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 3 04:29:02 PDT 2024
Author: Simon Pilgrim
Date: 2024-04-03T12:28:46+01:00
New Revision: 51107be7dd7f83a107b9c35c39b16081e38f7a54
URL: https://github.com/llvm/llvm-project/commit/51107be7dd7f83a107b9c35c39b16081e38f7a54
DIFF: https://github.com/llvm/llvm-project/commit/51107be7dd7f83a107b9c35c39b16081e38f7a54.diff
LOG: [X86] Haswell/Broadwell/Skylake DPPS folded instructions use an extra port06 resource
This is an extension to 07151f0241d3f893cb36eb2dbc395d4098f74a87 which handled SandyBridge so we at least model the regression identified in #14640
Confirmed by Agner + uops.info/uica (SkylakeServer also had an incorrect use of Port015 instead of just Port01)
I raised #86669 as a proposal for a 'x86 unfold' pass that can unfold these (if we have the free registers) driven by the scheduler model.
Added:
Modified:
llvm/lib/Target/X86/X86SchedBroadwell.td
llvm/lib/Target/X86/X86SchedHaswell.td
llvm/lib/Target/X86/X86SchedSkylakeClient.td
llvm/lib/Target/X86/X86SchedSkylakeServer.td
llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s
llvm/test/tools/llvm-mca/X86/Broadwell/resources-sse41.s
llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s
llvm/test/tools/llvm-mca/X86/Haswell/resources-sse41.s
llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s
llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse41.s
llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s
llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse41.s
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td
index 0027de851df75d..b3ee7a82b91741 100644
--- a/llvm/lib/Target/X86/X86SchedBroadwell.td
+++ b/llvm/lib/Target/X86/X86SchedBroadwell.td
@@ -324,8 +324,10 @@ defm : BWWriteResPair<WriteFMAX, [BWPort01], 5, [1], 1, 5>; // Fused Multiply
defm : BWWriteResPair<WriteFMAY, [BWPort01], 5, [1], 1, 6>; // Fused Multiply Add (YMM/ZMM).
defm : X86WriteResPairUnsupported<WriteFMAZ>;
defm : BWWriteResPair<WriteDPPD, [BWPort0,BWPort1,BWPort5], 9, [1,1,1], 3, 5>; // Floating point double dot product.
-defm : BWWriteResPair<WriteDPPS, [BWPort0,BWPort1,BWPort5], 14, [2,1,1], 4, 5>; // Floating point single dot product.
-defm : BWWriteResPair<WriteDPPSY, [BWPort0,BWPort1,BWPort5], 14, [2,1,1], 4, 6>; // Floating point single dot product (YMM).
+defm : X86WriteRes<WriteDPPS, [BWPort0,BWPort1,BWPort5], 14, [2,1,1], 4>;
+defm : X86WriteRes<WriteDPPSY, [BWPort0,BWPort1,BWPort5], 14, [2,1,1], 4>;
+defm : X86WriteRes<WriteDPPSLd, [BWPort0,BWPort1,BWPort5,BWPort06,BWPort23], 19, [2,1,1,1,1], 6>;
+defm : X86WriteRes<WriteDPPSYLd, [BWPort0,BWPort1,BWPort5,BWPort06,BWPort23], 20, [2,1,1,1,1], 6>;
defm : BWWriteResPair<WriteFSign, [BWPort5], 1>; // Floating point fabs/fchs.
defm : X86WriteRes<WriteFRnd, [BWPort23], 6, [1], 1>; // Floating point rounding.
defm : X86WriteRes<WriteFRndY, [BWPort23], 6, [1], 1>; // Floating point rounding (YMM/ZMM).
diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td
index a11b470b1f5182..6c301a3cd3425b 100644
--- a/llvm/lib/Target/X86/X86SchedHaswell.td
+++ b/llvm/lib/Target/X86/X86SchedHaswell.td
@@ -324,8 +324,10 @@ defm : HWWriteResPair<WriteFMAX, [HWPort01], 5, [1], 1, 6>;
defm : HWWriteResPair<WriteFMAY, [HWPort01], 5, [1], 1, 7>;
defm : HWWriteResPair<WriteFMAZ, [HWPort01], 5, [1], 1, 7>; // Unsupported = 1
defm : HWWriteResPair<WriteDPPD, [HWPort0,HWPort1,HWPort5], 9, [1,1,1], 3, 6>;
-defm : HWWriteResPair<WriteDPPS, [HWPort0,HWPort1,HWPort5], 14, [2,1,1], 4, 6>;
-defm : HWWriteResPair<WriteDPPSY, [HWPort0,HWPort1,HWPort5], 14, [2,1,1], 4, 7>;
+defm : X86WriteRes<WriteDPPS, [HWPort0,HWPort1,HWPort5], 14, [2,1,1], 4>;
+defm : X86WriteRes<WriteDPPSY, [HWPort0,HWPort1,HWPort5], 14, [2,1,1], 4>;
+defm : X86WriteRes<WriteDPPSLd, [HWPort0,HWPort1,HWPort5,HWPort06,HWPort23], 20, [2,1,1,1,1], 6>;
+defm : X86WriteRes<WriteDPPSYLd, [HWPort0,HWPort1,HWPort5,HWPort06,HWPort23], 21, [2,1,1,1,1], 6>;
defm : HWWriteResPair<WriteFSign, [HWPort0], 1>;
defm : X86WriteRes<WriteFRnd, [HWPort23], 6, [1], 1>;
defm : X86WriteRes<WriteFRndY, [HWPort23], 6, [1], 1>;
diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td
index 4fa138f69fb92b..3ee931fe5ed9c7 100644
--- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td
+++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td
@@ -311,8 +311,10 @@ defm : SKLWriteResPair<WriteFMAX, [SKLPort01], 4, [1], 1, 6>;
defm : SKLWriteResPair<WriteFMAY, [SKLPort01], 4, [1], 1, 7>;
defm : X86WriteResPairUnsupported<WriteFMAZ>;
defm : SKLWriteResPair<WriteDPPD, [SKLPort5,SKLPort01], 9, [1,2], 3, 6>; // Floating point double dot product.
-defm : SKLWriteResPair<WriteDPPS, [SKLPort5,SKLPort01], 13, [1,3], 4, 6>;
-defm : SKLWriteResPair<WriteDPPSY, [SKLPort5,SKLPort01], 13, [1,3], 4, 7>;
+defm : X86WriteRes<WriteDPPS, [SKLPort5,SKLPort01], 13, [1,3], 4>;
+defm : X86WriteRes<WriteDPPSY, [SKLPort5,SKLPort01], 13, [1,3], 4>;
+defm : X86WriteRes<WriteDPPSLd, [SKLPort5,SKLPort01,SKLPort06,SKLPort23], 19, [1,3,1,1], 6>;
+defm : X86WriteRes<WriteDPPSYLd, [SKLPort5,SKLPort01,SKLPort06,SKLPort23], 20, [1,3,1,1], 6>;
defm : SKLWriteResPair<WriteFSign, [SKLPort0], 1>; // Floating point fabs/fchs.
defm : SKLWriteResPair<WriteFRnd, [SKLPort01], 8, [2], 2, 6>; // Floating point rounding.
defm : SKLWriteResPair<WriteFRndY, [SKLPort01], 8, [2], 2, 7>;
diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
index 3da688cda2c6c4..a7dff0ecbcd96f 100644
--- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td
+++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
@@ -311,8 +311,10 @@ defm : SKXWriteResPair<WriteFMAX, [SKXPort01], 4, [1], 1, 6>;
defm : SKXWriteResPair<WriteFMAY, [SKXPort01], 4, [1], 1, 7>;
defm : SKXWriteResPair<WriteFMAZ, [SKXPort05], 4, [1], 1, 7>;
defm : SKXWriteResPair<WriteDPPD, [SKXPort5,SKXPort015], 9, [1,2], 3, 6>; // Floating point double dot product.
-defm : SKXWriteResPair<WriteDPPS, [SKXPort5,SKXPort015], 13, [1,3], 4, 6>;
-defm : SKXWriteResPair<WriteDPPSY,[SKXPort5,SKXPort015], 13, [1,3], 4, 7>;
+defm : X86WriteRes<WriteDPPS, [SKXPort5,SKXPort01], 13, [1,3], 4>;
+defm : X86WriteRes<WriteDPPSY, [SKXPort5,SKXPort01], 13, [1,3], 4>;
+defm : X86WriteRes<WriteDPPSLd, [SKXPort5,SKXPort01,SKXPort06,SKXPort23], 19, [1,3,1,1], 6>;
+defm : X86WriteRes<WriteDPPSYLd, [SKXPort5,SKXPort01,SKXPort06,SKXPort23], 20, [1,3,1,1], 6>;
defm : SKXWriteResPair<WriteFSign, [SKXPort0], 1>; // Floating point fabs/fchs.
defm : SKXWriteResPair<WriteFRnd, [SKXPort01], 8, [2], 2, 6>; // Floating point rounding.
defm : SKXWriteResPair<WriteFRndY, [SKXPort01], 8, [2], 2, 7>;
diff --git a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s
index 98b8619f2e04f9..ca1faf62aa8987 100644
--- a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s
@@ -1189,9 +1189,9 @@ vzeroupper
# CHECK-NEXT: 3 9 1.00 vdppd $22, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 4 14 1.00 * vdppd $22, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 4 14 2.00 vdpps $22, %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 5 19 2.00 * vdpps $22, (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 6 19 2.00 * vdpps $22, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 4 14 2.00 vdpps $22, %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 5 20 2.00 * vdpps $22, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 6 20 2.00 * vdpps $22, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 3 1.00 vextractf128 $1, %ymm0, %xmm2
# CHECK-NEXT: 2 1 1.00 * vextractf128 $1, %ymm0, (%rax)
# CHECK-NEXT: 2 2 1.00 vextractps $1, %xmm0, %ecx
@@ -1736,7 +1736,7 @@ vzeroupper
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
-# CHECK-NEXT: - 257.00 215.25 235.25 176.17 176.17 38.00 424.25 2.25 12.67
+# CHECK-NEXT: - 257.00 216.25 235.25 176.17 176.17 38.00 424.25 3.25 12.67
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@@ -1899,9 +1899,9 @@ vzeroupper
# CHECK-NEXT: - - 1.00 1.00 - - - 1.00 - - vdppd $22, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - 1.00 - - vdppd $22, (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 2.00 1.00 - - - 1.00 - - vdpps $22, %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 2.00 1.00 0.50 0.50 - 1.00 - - vdpps $22, (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 2.50 1.00 0.50 0.50 - 1.00 0.50 - vdpps $22, (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 2.00 1.00 - - - 1.00 - - vdpps $22, %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 2.00 1.00 0.50 0.50 - 1.00 - - vdpps $22, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 2.50 1.00 0.50 0.50 - 1.00 0.50 - vdpps $22, (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - 1.00 - - vextractf128 $1, %ymm0, %xmm2
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vextractf128 $1, %ymm0, (%rax)
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - vextractps $1, %xmm0, %ecx
diff --git a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-sse41.s b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-sse41.s
index a2899b43711cf0..dcc535306c85af 100644
--- a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-sse41.s
+++ b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-sse41.s
@@ -166,7 +166,7 @@ roundss $1, (%rax), %xmm2
# CHECK-NEXT: 3 9 1.00 dppd $22, %xmm0, %xmm2
# CHECK-NEXT: 4 14 1.00 * dppd $22, (%rax), %xmm2
# CHECK-NEXT: 4 14 2.00 dpps $22, %xmm0, %xmm2
-# CHECK-NEXT: 5 19 2.00 * dpps $22, (%rax), %xmm2
+# CHECK-NEXT: 6 19 2.00 * dpps $22, (%rax), %xmm2
# CHECK-NEXT: 2 2 1.00 extractps $1, %xmm0, %ecx
# CHECK-NEXT: 3 2 1.00 * extractps $1, %xmm0, (%rax)
# CHECK-NEXT: 1 1 1.00 insertps $1, %xmm0, %xmm2
@@ -266,7 +266,7 @@ roundss $1, (%rax), %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
-# CHECK-NEXT: - - 23.33 22.33 25.67 25.67 5.00 80.33 - 1.67
+# CHECK-NEXT: - - 23.83 22.33 25.67 25.67 5.00 80.33 0.50 1.67
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@@ -281,7 +281,7 @@ roundss $1, (%rax), %xmm2
# CHECK-NEXT: - - 1.00 1.00 - - - 1.00 - - dppd $22, %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - 1.00 - - dppd $22, (%rax), %xmm2
# CHECK-NEXT: - - 2.00 1.00 - - - 1.00 - - dpps $22, %xmm0, %xmm2
-# CHECK-NEXT: - - 2.00 1.00 0.50 0.50 - 1.00 - - dpps $22, (%rax), %xmm2
+# CHECK-NEXT: - - 2.50 1.00 0.50 0.50 - 1.00 0.50 - dpps $22, (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - extractps $1, %xmm0, %ecx
# CHECK-NEXT: - - - - 0.33 0.33 1.00 1.00 - 0.33 extractps $1, %xmm0, (%rax)
# CHECK-NEXT: - - - - - - - 1.00 - - insertps $1, %xmm0, %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s
index 376070d7f4e0b6..cff60c9ce3ab34 100644
--- a/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s
@@ -1189,9 +1189,9 @@ vzeroupper
# CHECK-NEXT: 3 9 1.00 vdppd $22, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 4 15 1.00 * vdppd $22, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 4 14 2.00 vdpps $22, %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 5 20 2.00 * vdpps $22, (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 6 20 2.00 * vdpps $22, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 4 14 2.00 vdpps $22, %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 5 21 2.00 * vdpps $22, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 6 21 2.00 * vdpps $22, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 3 1.00 vextractf128 $1, %ymm0, %xmm2
# CHECK-NEXT: 2 1 1.00 * vextractf128 $1, %ymm0, (%rax)
# CHECK-NEXT: 2 2 1.00 vextractps $1, %xmm0, %ecx
@@ -1736,7 +1736,7 @@ vzeroupper
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
-# CHECK-NEXT: - 336.00 214.58 236.58 176.17 176.17 38.00 427.58 2.25 12.67
+# CHECK-NEXT: - 336.00 215.58 236.58 176.17 176.17 38.00 427.58 3.25 12.67
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@@ -1899,9 +1899,9 @@ vzeroupper
# CHECK-NEXT: - - 1.00 1.00 - - - 1.00 - - vdppd $22, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - 1.00 - - vdppd $22, (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 2.00 1.00 - - - 1.00 - - vdpps $22, %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 2.00 1.00 0.50 0.50 - 1.00 - - vdpps $22, (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 2.50 1.00 0.50 0.50 - 1.00 0.50 - vdpps $22, (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 2.00 1.00 - - - 1.00 - - vdpps $22, %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 2.00 1.00 0.50 0.50 - 1.00 - - vdpps $22, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 2.50 1.00 0.50 0.50 - 1.00 0.50 - vdpps $22, (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - 1.00 - - vextractf128 $1, %ymm0, %xmm2
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vextractf128 $1, %ymm0, (%rax)
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - vextractps $1, %xmm0, %ecx
diff --git a/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse41.s b/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse41.s
index 70d93983864c5e..c2d07735f1cb64 100644
--- a/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse41.s
+++ b/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse41.s
@@ -166,7 +166,7 @@ roundss $1, (%rax), %xmm2
# CHECK-NEXT: 3 9 1.00 dppd $22, %xmm0, %xmm2
# CHECK-NEXT: 4 15 1.00 * dppd $22, (%rax), %xmm2
# CHECK-NEXT: 4 14 2.00 dpps $22, %xmm0, %xmm2
-# CHECK-NEXT: 5 20 2.00 * dpps $22, (%rax), %xmm2
+# CHECK-NEXT: 6 20 2.00 * dpps $22, (%rax), %xmm2
# CHECK-NEXT: 2 2 1.00 extractps $1, %xmm0, %ecx
# CHECK-NEXT: 3 2 1.00 * extractps $1, %xmm0, (%rax)
# CHECK-NEXT: 1 1 1.00 insertps $1, %xmm0, %xmm2
@@ -266,7 +266,7 @@ roundss $1, (%rax), %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
-# CHECK-NEXT: - - 23.33 22.33 25.67 25.67 5.00 80.33 - 1.67
+# CHECK-NEXT: - - 23.83 22.33 25.67 25.67 5.00 80.33 0.50 1.67
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@@ -281,7 +281,7 @@ roundss $1, (%rax), %xmm2
# CHECK-NEXT: - - 1.00 1.00 - - - 1.00 - - dppd $22, %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - 1.00 - - dppd $22, (%rax), %xmm2
# CHECK-NEXT: - - 2.00 1.00 - - - 1.00 - - dpps $22, %xmm0, %xmm2
-# CHECK-NEXT: - - 2.00 1.00 0.50 0.50 - 1.00 - - dpps $22, (%rax), %xmm2
+# CHECK-NEXT: - - 2.50 1.00 0.50 0.50 - 1.00 0.50 - dpps $22, (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - extractps $1, %xmm0, %ecx
# CHECK-NEXT: - - - - 0.33 0.33 1.00 1.00 - 0.33 extractps $1, %xmm0, (%rax)
# CHECK-NEXT: - - - - - - - 1.00 - - insertps $1, %xmm0, %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s
index c2e0217ae83091..ef5a9e34a932b4 100644
--- a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s
@@ -1189,9 +1189,9 @@ vzeroupper
# CHECK-NEXT: 3 9 1.00 vdppd $22, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 4 15 1.00 * vdppd $22, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 4 13 1.50 vdpps $22, %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 5 19 1.50 * vdpps $22, (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 6 19 1.50 * vdpps $22, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 4 13 1.50 vdpps $22, %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 5 20 1.50 * vdpps $22, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 6 20 1.50 * vdpps $22, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 3 1.00 vextractf128 $1, %ymm0, %xmm2
# CHECK-NEXT: 2 1 1.00 * vextractf128 $1, %ymm0, (%rax)
# CHECK-NEXT: 2 3 1.00 vextractps $1, %xmm0, %ecx
@@ -1736,7 +1736,7 @@ vzeroupper
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
-# CHECK-NEXT: - 126.00 338.58 199.58 173.83 173.83 38.00 326.58 5.25 11.33
+# CHECK-NEXT: - 126.00 339.58 199.58 173.83 173.83 38.00 326.58 6.25 11.33
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@@ -1899,9 +1899,9 @@ vzeroupper
# CHECK-NEXT: - - 1.00 1.00 - - - 1.00 - - vdppd $22, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - 1.00 - - vdppd $22, (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 1.50 1.50 - - - 1.00 - - vdpps $22, %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 1.50 1.50 0.50 0.50 - 1.00 - - vdpps $22, (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 2.00 1.50 0.50 0.50 - 1.00 0.50 - vdpps $22, (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 1.50 1.50 - - - 1.00 - - vdpps $22, %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 1.50 1.50 0.50 0.50 - 1.00 - - vdpps $22, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 2.00 1.50 0.50 0.50 - 1.00 0.50 - vdpps $22, (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - 1.00 - - vextractf128 $1, %ymm0, %xmm2
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vextractf128 $1, %ymm0, (%rax)
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - vextractps $1, %xmm0, %ecx
diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse41.s b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse41.s
index 6e11bb6df8e291..1d8d67fd323f13 100644
--- a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse41.s
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse41.s
@@ -166,7 +166,7 @@ roundss $1, (%rax), %xmm2
# CHECK-NEXT: 3 9 1.00 dppd $22, %xmm0, %xmm2
# CHECK-NEXT: 4 15 1.00 * dppd $22, (%rax), %xmm2
# CHECK-NEXT: 4 13 1.50 dpps $22, %xmm0, %xmm2
-# CHECK-NEXT: 5 19 1.50 * dpps $22, (%rax), %xmm2
+# CHECK-NEXT: 6 19 1.50 * dpps $22, (%rax), %xmm2
# CHECK-NEXT: 2 3 1.00 extractps $1, %xmm0, %ecx
# CHECK-NEXT: 3 2 1.00 * extractps $1, %xmm0, (%rax)
# CHECK-NEXT: 1 1 1.00 insertps $1, %xmm0, %xmm2
@@ -266,7 +266,7 @@ roundss $1, (%rax), %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
-# CHECK-NEXT: - - 37.33 31.33 23.67 23.67 5.00 63.33 - 1.67
+# CHECK-NEXT: - - 37.83 31.33 23.67 23.67 5.00 63.33 0.50 1.67
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@@ -281,7 +281,7 @@ roundss $1, (%rax), %xmm2
# CHECK-NEXT: - - 1.00 1.00 - - - 1.00 - - dppd $22, %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - 1.00 - - dppd $22, (%rax), %xmm2
# CHECK-NEXT: - - 1.50 1.50 - - - 1.00 - - dpps $22, %xmm0, %xmm2
-# CHECK-NEXT: - - 1.50 1.50 0.50 0.50 - 1.00 - - dpps $22, (%rax), %xmm2
+# CHECK-NEXT: - - 2.00 1.50 0.50 0.50 - 1.00 0.50 - dpps $22, (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - extractps $1, %xmm0, %ecx
# CHECK-NEXT: - - - - 0.33 0.33 1.00 1.00 - 0.33 extractps $1, %xmm0, (%rax)
# CHECK-NEXT: - - - - - - - 1.00 - - insertps $1, %xmm0, %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s
index de14ef7ee4432c..cabb002b824128 100644
--- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s
@@ -1188,10 +1188,10 @@ vzeroupper
# CHECK-NEXT: 2 16 3.00 * vdivss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 3 9 1.00 vdppd $22, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 4 15 1.00 * vdppd $22, (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 4 13 1.33 vdpps $22, %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 5 19 1.33 * vdpps $22, (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 4 13 1.33 vdpps $22, %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 5 20 1.33 * vdpps $22, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 4 13 1.50 vdpps $22, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 6 19 1.50 * vdpps $22, (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 4 13 1.50 vdpps $22, %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 6 20 1.50 * vdpps $22, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 3 1.00 vextractf128 $1, %ymm0, %xmm2
# CHECK-NEXT: 2 1 1.00 * vextractf128 $1, %ymm0, (%rax)
# CHECK-NEXT: 2 3 1.00 vextractps $1, %xmm0, %ecx
@@ -1736,7 +1736,7 @@ vzeroupper
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
-# CHECK-NEXT: - 126.00 322.25 200.25 173.83 173.83 38.00 330.25 6.25 11.33
+# CHECK-NEXT: - 126.00 325.25 202.25 173.83 173.83 38.00 326.25 7.25 11.33
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@@ -1898,10 +1898,10 @@ vzeroupper
# CHECK-NEXT: - 3.00 1.00 - 0.50 0.50 - - - - vdivss (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.67 0.67 - - - 1.67 - - vdppd $22, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.67 0.67 0.50 0.50 - 1.67 - - vdppd $22, (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 1.00 1.00 - - - 2.00 - - vdpps $22, %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - 2.00 - - vdpps $22, (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - 1.00 1.00 - - - 2.00 - - vdpps $22, %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - 2.00 - - vdpps $22, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - 1.50 1.50 - - - 1.00 - - vdpps $22, %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: - - 2.00 1.50 0.50 0.50 - 1.00 0.50 - vdpps $22, (%rax), %xmm1, %xmm2
+# CHECK-NEXT: - - 1.50 1.50 - - - 1.00 - - vdpps $22, %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - 2.00 1.50 0.50 0.50 - 1.00 0.50 - vdpps $22, (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - - - - - 1.00 - - vextractf128 $1, %ymm0, %xmm2
# CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vextractf128 $1, %ymm0, (%rax)
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - vextractps $1, %xmm0, %ecx
diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse41.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse41.s
index 15cd09bf7e9768..e3f34fdc9430d3 100644
--- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse41.s
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse41.s
@@ -165,8 +165,8 @@ roundss $1, (%rax), %xmm2
# CHECK-NEXT: 3 8 0.67 * blendvps %xmm0, (%rax), %xmm2
# CHECK-NEXT: 3 9 1.00 dppd $22, %xmm0, %xmm2
# CHECK-NEXT: 4 15 1.00 * dppd $22, (%rax), %xmm2
-# CHECK-NEXT: 4 13 1.33 dpps $22, %xmm0, %xmm2
-# CHECK-NEXT: 5 19 1.33 * dpps $22, (%rax), %xmm2
+# CHECK-NEXT: 4 13 1.50 dpps $22, %xmm0, %xmm2
+# CHECK-NEXT: 6 19 1.50 * dpps $22, (%rax), %xmm2
# CHECK-NEXT: 2 3 1.00 extractps $1, %xmm0, %ecx
# CHECK-NEXT: 3 2 1.00 * extractps $1, %xmm0, (%rax)
# CHECK-NEXT: 1 1 1.00 insertps $1, %xmm0, %xmm2
@@ -266,7 +266,7 @@ roundss $1, (%rax), %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
-# CHECK-NEXT: - - 36.67 28.67 23.67 23.67 5.00 66.67 - 1.67
+# CHECK-NEXT: - - 38.17 29.67 23.67 23.67 5.00 64.67 0.50 1.67
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@@ -280,8 +280,8 @@ roundss $1, (%rax), %xmm2
# CHECK-NEXT: - - 0.67 0.67 0.50 0.50 - 0.67 - - blendvps %xmm0, (%rax), %xmm2
# CHECK-NEXT: - - 0.67 0.67 - - - 1.67 - - dppd $22, %xmm0, %xmm2
# CHECK-NEXT: - - 0.67 0.67 0.50 0.50 - 1.67 - - dppd $22, (%rax), %xmm2
-# CHECK-NEXT: - - 1.00 1.00 - - - 2.00 - - dpps $22, %xmm0, %xmm2
-# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - 2.00 - - dpps $22, (%rax), %xmm2
+# CHECK-NEXT: - - 1.50 1.50 - - - 1.00 - - dpps $22, %xmm0, %xmm2
+# CHECK-NEXT: - - 2.00 1.50 0.50 0.50 - 1.00 0.50 - dpps $22, (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - extractps $1, %xmm0, %ecx
# CHECK-NEXT: - - - - 0.33 0.33 1.00 1.00 - 0.33 extractps $1, %xmm0, (%rax)
# CHECK-NEXT: - - - - - - - 1.00 - - insertps $1, %xmm0, %xmm2
More information about the llvm-commits
mailing list