[llvm] [X86] Split rr/rm CVT schedules on SNB/HSW/BDW (PR #117494)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Nov 24 06:58:58 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: Simon Pilgrim (RKSimon)
<details>
<summary>Changes</summary>
The folded load variants almost never require Port5 for length changing conversions (just for SNB ymm cases), and don't have an extra uop for the load.
Confirmed with a mixture of Agner + uops.info comparisons.
---
Patch is 48.37 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/117494.diff
15 Files Affected:
- (modified) llvm/lib/Target/X86/X86SchedBroadwell.td (+11-6)
- (modified) llvm/lib/Target/X86/X86SchedHaswell.td (+17-17)
- (modified) llvm/lib/Target/X86/X86SchedSandyBridge.td (+4-3)
- (modified) llvm/test/tools/llvm-mca/X86/Barcelona/resources-sse2.s (+3-3)
- (modified) llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s (+7-7)
- (modified) llvm/test/tools/llvm-mca/X86/Broadwell/resources-sse2.s (+5-5)
- (modified) llvm/test/tools/llvm-mca/X86/Generic/resources-avx1.s (+5-5)
- (modified) llvm/test/tools/llvm-mca/X86/Generic/resources-avx512.s (+13-13)
- (modified) llvm/test/tools/llvm-mca/X86/Generic/resources-avx512vl.s (+13-13)
- (modified) llvm/test/tools/llvm-mca/X86/Generic/resources-sse2.s (+3-3)
- (modified) llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s (+7-7)
- (modified) llvm/test/tools/llvm-mca/X86/Haswell/resources-sse1.s (+1-1)
- (modified) llvm/test/tools/llvm-mca/X86/Haswell/resources-sse2.s (+5-5)
- (modified) llvm/test/tools/llvm-mca/X86/SandyBridge/resources-avx1.s (+5-5)
- (modified) llvm/test/tools/llvm-mca/X86/SandyBridge/resources-sse2.s (+3-3)
``````````diff
diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td
index 699ca91cd1f8f4..e5b3cc4b6c90e6 100644
--- a/llvm/lib/Target/X86/X86SchedBroadwell.td
+++ b/llvm/lib/Target/X86/X86SchedBroadwell.td
@@ -367,21 +367,26 @@ defm : BWWriteResPair<WriteCvtPD2IY, [BWPort1,BWPort5], 6, [1,1], 2, 6>;
defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>;
defm : X86WriteRes<WriteCvtI2SS, [BWPort1,BWPort5], 4, [1,1], 2>;
+defm : X86WriteRes<WriteCvtI2PS, [BWPort1], 3, [1], 1>;
+defm : X86WriteRes<WriteCvtI2PSY, [BWPort1], 3, [1], 1>;
defm : X86WriteRes<WriteCvtI2SSLd, [BWPort1,BWPort23], 9, [1,1], 2>;
-defm : BWWriteResPair<WriteCvtI2PS, [BWPort1], 3>;
-defm : BWWriteResPair<WriteCvtI2PSY, [BWPort1], 3, [1], 1, 6>;
+defm : X86WriteRes<WriteCvtI2PSLd, [BWPort1,BWPort23], 8, [1,1], 2>;
+defm : X86WriteRes<WriteCvtI2PSYLd, [BWPort1,BWPort23], 9, [1,1], 2>;
defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>;
defm : X86WriteRes<WriteCvtI2SD, [BWPort1,BWPort5], 4, [1,1], 2>;
+defm : X86WriteRes<WriteCvtI2PD, [BWPort1,BWPort5], 4, [1,1], 2>;
+defm : X86WriteRes<WriteCvtI2PDY, [BWPort1,BWPort5], 6, [1,1], 2>;
defm : X86WriteRes<WriteCvtI2SDLd, [BWPort1,BWPort23], 9, [1,1], 2>;
-defm : BWWriteResPair<WriteCvtI2PD, [BWPort1,BWPort5], 4, [1,1], 2, 5>;
-defm : BWWriteResPair<WriteCvtI2PDY, [BWPort1,BWPort5], 6, [1,1], 2, 5>;
+defm : X86WriteRes<WriteCvtI2PDLd, [BWPort1,BWPort23], 9, [1,1], 2>;
+defm : X86WriteRes<WriteCvtI2PDYLd, [BWPort1,BWPort23],11, [1,1], 2>;
defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>;
defm : X86WriteRes<WriteCvtSS2SD, [BWPort0,BWPort5], 2, [1,1], 2>;
-defm : X86WriteRes<WriteCvtSS2SDLd, [BWPort0,BWPort23], 6, [1,1], 2>;
defm : X86WriteRes<WriteCvtPS2PD, [BWPort0,BWPort5], 2, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPS2PDY, [BWPort0,BWPort5], 4, [1,1], 2>;
+defm : X86WriteRes<WriteCvtSS2SDLd, [BWPort0,BWPort23], 6, [1,1], 2>;
defm : X86WriteRes<WriteCvtPS2PDLd, [BWPort0,BWPort23], 6, [1,1], 2>;
-defm : BWWriteResPair<WriteCvtPS2PDY, [BWPort0,BWPort5], 4, [1,1], 2, 5>;
+defm : X86WriteRes<WriteCvtPS2PDYLd, [BWPort0,BWPort23], 9, [1,1], 2>;
defm : X86WriteResPairUnsupported<WriteCvtPS2PDZ>;
defm : BWWriteResPair<WriteCvtSD2SS, [BWPort1,BWPort5], 4, [1,1], 2, 5>;
defm : BWWriteResPair<WriteCvtPD2PS, [BWPort1,BWPort5], 4, [1,1], 2, 5>;
diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td
index b820418bb55191..59874be34f5a28 100644
--- a/llvm/lib/Target/X86/X86SchedHaswell.td
+++ b/llvm/lib/Target/X86/X86SchedHaswell.td
@@ -364,22 +364,30 @@ defm : HWWriteResPair<WriteCvtPS2IY, [HWPort1], 3, [1], 1, 7>;
defm : HWWriteResPair<WriteCvtPS2IZ, [HWPort1], 3, [1], 1, 7>; // Unsupported = 1
defm : X86WriteRes<WriteCvtI2SD, [HWPort1,HWPort5], 4, [1,1], 2>;
+defm : X86WriteRes<WriteCvtI2PD, [HWPort1,HWPort5], 4, [1,1], 2>;
+defm : X86WriteRes<WriteCvtI2PDY, [HWPort1,HWPort5], 6, [1,1], 2>;
+defm : X86WriteRes<WriteCvtI2PDZ, [HWPort1,HWPort5], 6, [1,1], 2>; // Unsupported = 1
defm : X86WriteRes<WriteCvtI2SDLd, [HWPort1,HWPort23], 9, [1,1], 2>;
-defm : HWWriteResPair<WriteCvtI2PD, [HWPort1,HWPort5], 4, [1,1], 2, 6>;
-defm : HWWriteResPair<WriteCvtI2PDY, [HWPort1,HWPort5], 6, [1,1], 2, 6>;
-defm : HWWriteResPair<WriteCvtI2PDZ, [HWPort1,HWPort5], 6, [1,1], 2, 6>; // Unsupported = 1
+defm : X86WriteRes<WriteCvtI2PDLd, [HWPort1,HWPort23],10, [1,1], 2>;
+defm : X86WriteRes<WriteCvtI2PDYLd, [HWPort1,HWPort23],12, [1,1], 2>;
+defm : X86WriteRes<WriteCvtI2PDZLd, [HWPort1,HWPort23],12, [1,1], 2>; // Unsupported = 1
defm : X86WriteRes<WriteCvtI2SS, [HWPort1,HWPort5], 4, [1,1], 2>;
+defm : X86WriteRes<WriteCvtI2PS, [HWPort1], 3, [1], 1>;
+defm : X86WriteRes<WriteCvtI2PSY, [HWPort1], 3, [1], 1>;
+defm : X86WriteRes<WriteCvtI2PSZ, [HWPort1], 3, [1], 1>; // Unsupported = 1
defm : X86WriteRes<WriteCvtI2SSLd, [HWPort1,HWPort23], 9, [1,1], 2>;
-defm : HWWriteResPair<WriteCvtI2PS, [HWPort1], 3, [1], 1, 6>;
-defm : HWWriteResPair<WriteCvtI2PSY, [HWPort1], 3, [1], 1, 7>;
-defm : HWWriteResPair<WriteCvtI2PSZ, [HWPort1], 3, [1], 1, 7>; // Unsupported = 1
+defm : X86WriteRes<WriteCvtI2PSLd, [HWPort1,HWPort23], 9, [1,1], 2>;
+defm : X86WriteRes<WriteCvtI2PSYLd, [HWPort1,HWPort23],10, [1,1], 2>;
+defm : X86WriteRes<WriteCvtI2PSZLd, [HWPort1,HWPort23],10, [1,1], 2>; // Unsupported = 1
defm : X86WriteRes<WriteCvtSS2SD, [HWPort0,HWPort5], 2, [1,1], 2>;
-defm : X86WriteRes<WriteCvtSS2SDLd, [HWPort0,HWPort23], 7, [1,1], 2>;
defm : X86WriteRes<WriteCvtPS2PD, [HWPort0,HWPort5], 2, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPS2PDY, [HWPort0,HWPort5], 4, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPS2PDZ, [HWPort0,HWPort5], 4, [1,1], 2>; // Unsupported = 1
+defm : X86WriteRes<WriteCvtSS2SDLd, [HWPort0,HWPort23], 7, [1,1], 2>;
defm : X86WriteRes<WriteCvtPS2PDLd, [HWPort0,HWPort23], 6, [1,1], 2>;
-defm : HWWriteResPair<WriteCvtPS2PDY, [HWPort0,HWPort5], 4, [1,1], 2, 6>;
-defm : HWWriteResPair<WriteCvtPS2PDZ, [HWPort0,HWPort5], 4, [1,1], 2, 6>; // Unsupported = 1
+defm : X86WriteRes<WriteCvtPS2PDYLd, [HWPort0,HWPort23],10, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPS2PDZLd, [HWPort0,HWPort23],10, [1,1], 2>; // Unsupported = 1
defm : HWWriteResPair<WriteCvtSD2SS, [HWPort1,HWPort5], 4, [1,1], 2, 5>;
defm : HWWriteResPair<WriteCvtPD2PS, [HWPort1,HWPort5], 4, [1,1], 2, 6>;
defm : HWWriteResPair<WriteCvtPD2PSY, [HWPort1,HWPort5], 6, [1,1], 2, 6>;
@@ -983,7 +991,6 @@ def HWWriteResGroup12 : SchedWriteRes<[HWPort1,HWPort23]> {
let NumMicroOps = 2;
let ReleaseAtCycles = [1,1];
}
-def: InstRW<[HWWriteResGroup12], (instrs MMX_CVTPI2PSrm)>;
def: InstRW<[HWWriteResGroup12], (instregex "P(DEP|EXT)(32|64)rm")>;
def HWWriteResGroup13 : SchedWriteRes<[HWPort5,HWPort23]> {
@@ -1349,13 +1356,6 @@ def HWWriteResGroup75 : SchedWriteRes<[HWPort1,HWPort23]> {
}
def: InstRW<[HWWriteResGroup75], (instregex "FICOM(P?)(16|32)m")>;
-def HWWriteResGroup78_1 : SchedWriteRes<[HWPort1,HWPort5,HWPort23]> {
- let Latency = 9;
- let NumMicroOps = 3;
- let ReleaseAtCycles = [1,1,1];
-}
-def: InstRW<[HWWriteResGroup78_1], (instrs MMX_CVTPI2PDrm)>;
-
def HWWriteResGroup80 : SchedWriteRes<[HWPort5,HWPort23,HWPort015]> {
let Latency = 9;
let NumMicroOps = 3;
diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td
index 7be9f51bcd46bd..6939b1227d0a61 100644
--- a/llvm/lib/Target/X86/X86SchedSandyBridge.td
+++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td
@@ -348,13 +348,14 @@ defm : X86WriteRes<WriteCvtI2PDLd, [SBPort1,SBPort5,SBPort23], 10, [1,1,1], 3>
defm : X86WriteRes<WriteCvtI2PDYLd, [SBPort1,SBPort5,SBPort23], 10, [1,1,1], 3>;
defm : X86WriteRes<WriteCvtI2PDZLd, [SBPort1,SBPort5,SBPort23], 10, [1,1,1], 3>; // Unsupported = 1
-defm : SBWriteResPair<WriteCvtSS2SD, [SBPort0], 1, [1], 1, 6>;
+defm : X86WriteRes<WriteCvtSS2SD, [SBPort0,SBPort5], 1, [1,1], 2>;
defm : X86WriteRes<WriteCvtPS2PD, [SBPort0,SBPort5], 2, [1,1], 2>;
defm : X86WriteRes<WriteCvtPS2PDY, [SBPort0,SBPort5], 2, [1,1], 2>;
defm : X86WriteRes<WriteCvtPS2PDZ, [SBPort0,SBPort5], 2, [1,1], 2>; // Unsupported = 1
+defm : X86WriteRes<WriteCvtSS2SDLd, [SBPort0,SBPort23], 7, [1,1], 2>;
defm : X86WriteRes<WriteCvtPS2PDLd, [SBPort0,SBPort23], 7, [1,1], 2>;
-defm : X86WriteRes<WriteCvtPS2PDYLd, [SBPort0,SBPort23], 7, [1,1], 2>;
-defm : X86WriteRes<WriteCvtPS2PDZLd, [SBPort0,SBPort23], 7, [1,1], 2>; // Unsupported = 1
+defm : X86WriteRes<WriteCvtPS2PDYLd, [SBPort0,SBPort5,SBPort23], 7, [1,1,1], 3>;
+defm : X86WriteRes<WriteCvtPS2PDZLd, [SBPort0,SBPort5,SBPort23], 7, [1,1,1], 3>; // Unsupported = 1
defm : SBWriteResPair<WriteCvtSD2SS, [SBPort1,SBPort5], 4, [1,1], 2, 6>;
defm : SBWriteResPair<WriteCvtPD2PS, [SBPort1,SBPort5], 4, [1,1], 2, 6>;
defm : SBWriteResPair<WriteCvtPD2PSY, [SBPort1,SBPort5], 4, [1,1], 2, 7>;
diff --git a/llvm/test/tools/llvm-mca/X86/Barcelona/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/Barcelona/resources-sse2.s
index df0053a1dcb9b5..25f79397fa071d 100644
--- a/llvm/test/tools/llvm-mca/X86/Barcelona/resources-sse2.s
+++ b/llvm/test/tools/llvm-mca/X86/Barcelona/resources-sse2.s
@@ -448,7 +448,7 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 2 4 1.00 cvtsi2sd %rcx, %xmm2
# CHECK-NEXT: 2 9 1.00 * cvtsi2sdl (%rax), %xmm2
# CHECK-NEXT: 2 9 1.00 * cvtsi2sdq (%rax), %xmm2
-# CHECK-NEXT: 1 1 1.00 cvtss2sd %xmm0, %xmm2
+# CHECK-NEXT: 2 1 1.00 cvtss2sd %xmm0, %xmm2
# CHECK-NEXT: 2 7 1.00 * cvtss2sd (%rax), %xmm2
# CHECK-NEXT: 2 4 1.00 cvttpd2dq %xmm0, %xmm2
# CHECK-NEXT: 3 10 1.00 * cvttpd2dq (%rax), %xmm2
@@ -687,7 +687,7 @@ xorpd (%rax), %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
-# CHECK-NEXT: - 172.00 75.83 117.33 17.00 101.83 67.00 67.00
+# CHECK-NEXT: - 172.00 75.83 117.33 17.00 102.83 67.00 67.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
@@ -732,7 +732,7 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - 1.00 - - cvtsi2sd %rcx, %xmm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 cvtsi2sdl (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 cvtsi2sdq (%rax), %xmm2
-# CHECK-NEXT: - - 1.00 - - - - - cvtss2sd %xmm0, %xmm2
+# CHECK-NEXT: - - 1.00 - - 1.00 - - cvtss2sd %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 cvtss2sd (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - 1.00 - - cvttpd2dq %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 cvttpd2dq (%rax), %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s
index 1b196b4355a6d4..028625013a85cc 100644
--- a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s
@@ -1115,9 +1115,9 @@ vzeroupper
# CHECK-NEXT: 1 3 1.00 vcomiss %xmm0, %xmm1
# CHECK-NEXT: 2 8 1.00 * vcomiss (%rax), %xmm1
# CHECK-NEXT: 2 4 1.00 vcvtdq2pd %xmm0, %xmm2
-# CHECK-NEXT: 3 9 1.00 * vcvtdq2pd (%rax), %xmm2
+# CHECK-NEXT: 2 9 1.00 * vcvtdq2pd (%rax), %xmm2
# CHECK-NEXT: 2 6 1.00 vcvtdq2pd %xmm0, %ymm2
-# CHECK-NEXT: 3 11 1.00 * vcvtdq2pd (%rax), %ymm2
+# CHECK-NEXT: 2 11 1.00 * vcvtdq2pd (%rax), %ymm2
# CHECK-NEXT: 1 3 1.00 vcvtdq2ps %xmm0, %xmm2
# CHECK-NEXT: 2 8 1.00 * vcvtdq2ps (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 vcvtdq2ps %ymm0, %ymm2
@@ -1137,7 +1137,7 @@ vzeroupper
# CHECK-NEXT: 2 2 1.00 vcvtps2pd %xmm0, %xmm2
# CHECK-NEXT: 2 6 1.00 * vcvtps2pd (%rax), %xmm2
# CHECK-NEXT: 2 4 1.00 vcvtps2pd %xmm0, %ymm2
-# CHECK-NEXT: 3 9 1.00 * vcvtps2pd (%rax), %ymm2
+# CHECK-NEXT: 2 9 1.00 * vcvtps2pd (%rax), %ymm2
# CHECK-NEXT: 2 4 1.00 vcvtsd2si %xmm0, %ecx
# CHECK-NEXT: 2 4 1.00 vcvtsd2si %xmm0, %rcx
# CHECK-NEXT: 3 9 1.00 * vcvtsd2si (%rax), %ecx
@@ -1736,7 +1736,7 @@ vzeroupper
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
-# CHECK-NEXT: - 257.00 216.25 247.25 173.17 173.17 38.00 424.25 3.25 12.67
+# CHECK-NEXT: - 257.00 216.25 247.25 173.17 173.17 38.00 421.25 3.25 12.67
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@@ -1825,9 +1825,9 @@ vzeroupper
# CHECK-NEXT: - - - 1.00 - - - - - - vcomiss %xmm0, %xmm1
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcomiss (%rax), %xmm1
# CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtdq2pd %xmm0, %xmm2
-# CHECK-NEXT: - - - 1.00 0.50 0.50 - 1.00 - - vcvtdq2pd (%rax), %xmm2
+# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtdq2pd (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtdq2pd %xmm0, %ymm2
-# CHECK-NEXT: - - - 1.00 0.50 0.50 - 1.00 - - vcvtdq2pd (%rax), %ymm2
+# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtdq2pd (%rax), %ymm2
# CHECK-NEXT: - - - 1.00 - - - - - - vcvtdq2ps %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtdq2ps (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - - - - vcvtdq2ps %ymm0, %ymm2
@@ -1847,7 +1847,7 @@ vzeroupper
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - vcvtps2pd %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vcvtps2pd (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - vcvtps2pd %xmm0, %ymm2
-# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - vcvtps2pd (%rax), %ymm2
+# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vcvtps2pd (%rax), %ymm2
# CHECK-NEXT: - - 1.00 1.00 - - - - - - vcvtsd2si %xmm0, %ecx
# CHECK-NEXT: - - 1.00 1.00 - - - - - - vcvtsd2si %xmm0, %rcx
# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vcvtsd2si (%rax), %ecx
diff --git a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-sse2.s
index e76d90521afa9c..8851be4679a1e9 100644
--- a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-sse2.s
+++ b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-sse2.s
@@ -423,7 +423,7 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 comisd %xmm0, %xmm1
# CHECK-NEXT: 2 8 1.00 * comisd (%rax), %xmm1
# CHECK-NEXT: 2 4 1.00 cvtdq2pd %xmm0, %xmm2
-# CHECK-NEXT: 3 9 1.00 * cvtdq2pd (%rax), %xmm2
+# CHECK-NEXT: 2 9 1.00 * cvtdq2pd (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 cvtdq2ps %xmm0, %xmm2
# CHECK-NEXT: 2 8 1.00 * cvtdq2ps (%rax), %xmm2
# CHECK-NEXT: 2 4 1.00 cvtpd2dq %xmm0, %xmm2
@@ -433,7 +433,7 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 2 4 1.00 cvtpd2ps %xmm0, %xmm2
# CHECK-NEXT: 3 9 1.00 * cvtpd2ps (%rax), %xmm2
# CHECK-NEXT: 2 4 1.00 cvtpi2pd %mm0, %xmm2
-# CHECK-NEXT: 3 9 1.00 * cvtpi2pd (%rax), %xmm2
+# CHECK-NEXT: 2 9 1.00 * cvtpi2pd (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 cvtps2dq %xmm0, %xmm2
# CHECK-NEXT: 2 8 1.00 * cvtps2dq (%rax), %xmm2
# CHECK-NEXT: 2 2 1.00 cvtps2pd %xmm0, %xmm2
@@ -689,7 +689,7 @@ xorpd (%rax), %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
-# CHECK-NEXT: - 78.00 70.75 95.75 63.17 63.17 14.00 119.25 2.25 4.67
+# CHECK-NEXT: - 78.00 70.75 95.75 63.17 63.17 14.00 117.25 2.25 4.67
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
@@ -709,7 +709,7 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - - - - comisd %xmm0, %xmm1
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - comisd (%rax), %xmm1
# CHECK-NEXT: - - - 1.00 - - - 1.00 - - cvtdq2pd %xmm0, %xmm2
-# CHECK-NEXT: - - - 1.00 0.50 0.50 - 1.00 - - cvtdq2pd (%rax), %xmm2
+# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - cvtdq2pd (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - - - - cvtdq2ps %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - cvtdq2ps (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - 1.00 - - cvtpd2dq %xmm0, %xmm2
@@ -719,7 +719,7 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - 1.00 - - cvtpd2ps %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 0.50 0.50 - 1.00 - - cvtpd2ps (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - 1.00 - - cvtpi2pd %mm0, %xmm2
-# CHECK-NEXT: - - - 1.00 0.50 0.50 - 1.00 - - cvtpi2pd (%rax), %xmm2
+# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - cvtpi2pd (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - - - - cvtps2dq %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - cvtps2dq (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - cvtps2pd %xmm0, %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx1.s
index 49db25cb0bdfb1..7f07fd56fe60dc 100644
--- a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx1.s
@@ -1137,7 +1137,7 @@ vzeroupper
# CHECK-NEXT: 2 2 1.00 vcvtps2pd %xmm0, %xmm2
# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax), %xmm2
# CHECK-NEXT: 2 2 1.00 vcvtps2pd %xmm0, %ymm2
-# CHECK-NEXT: 2 7 1.00 * vcvtps2pd (%rax), %ymm2
+# CHECK-NEXT: 3 7 1.00 * vcvtps2pd (%rax), %ymm2
# CHECK-NEXT: 2 5 1.00 vcvtsd2si %xmm0, %ecx
# CHECK-NEXT: 2 5 1.00 vcvtsd2si %xmm0, %rcx
# CHECK-NEXT: 3 10 1.00 * vcvtsd2si (%rax), %ecx
@@ -1152,7 +1152,7 @@ vzeroupper
# CHECK-NEXT: 3 5 2.00 vcvtsi2ss %rcx, %xmm0, %xmm2
# CHECK-NEXT: 3 10 1.00 * vcvtsi2ssl (%rax), %xmm0, %xmm2
# CHECK-NEXT: 3 10 1.00 * vcvtsi2ssq (%rax), %xmm0, %xmm2
-# CHECK-NEXT: 1 1 1.00 vcvtss2sd %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 1 1.00 vcvtss2sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 7 1.00 * vcvtss2sd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 2 5 1.00 vcvtss2si %xmm0, %ecx
# CHECK-NEXT: 2 5 1.00 vcvtss2si %xmm0, %rcx
@@ -1734,7 +1734,7 @@ vzeroupper
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/117494
More information about the llvm-commits
mailing list