[llvm] [X86] Split rr/rm CVT schedules on SNB/HSW/BDW (PR #117494)

via llvm-commits llvm-commits at lists.llvm.org
Sun Nov 24 06:58:58 PST 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-x86

Author: Simon Pilgrim (RKSimon)

<details>
<summary>Changes</summary>

The folded load variants almost never require Port5 for length changing conversions (just for SNB ymm cases), and don't have an extra uop for the load.

Confirmed with a mixture of Agner + uops.info comparisons.

---

Patch is 48.37 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/117494.diff


15 Files Affected:

- (modified) llvm/lib/Target/X86/X86SchedBroadwell.td (+11-6) 
- (modified) llvm/lib/Target/X86/X86SchedHaswell.td (+17-17) 
- (modified) llvm/lib/Target/X86/X86SchedSandyBridge.td (+4-3) 
- (modified) llvm/test/tools/llvm-mca/X86/Barcelona/resources-sse2.s (+3-3) 
- (modified) llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s (+7-7) 
- (modified) llvm/test/tools/llvm-mca/X86/Broadwell/resources-sse2.s (+5-5) 
- (modified) llvm/test/tools/llvm-mca/X86/Generic/resources-avx1.s (+5-5) 
- (modified) llvm/test/tools/llvm-mca/X86/Generic/resources-avx512.s (+13-13) 
- (modified) llvm/test/tools/llvm-mca/X86/Generic/resources-avx512vl.s (+13-13) 
- (modified) llvm/test/tools/llvm-mca/X86/Generic/resources-sse2.s (+3-3) 
- (modified) llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s (+7-7) 
- (modified) llvm/test/tools/llvm-mca/X86/Haswell/resources-sse1.s (+1-1) 
- (modified) llvm/test/tools/llvm-mca/X86/Haswell/resources-sse2.s (+5-5) 
- (modified) llvm/test/tools/llvm-mca/X86/SandyBridge/resources-avx1.s (+5-5) 
- (modified) llvm/test/tools/llvm-mca/X86/SandyBridge/resources-sse2.s (+3-3) 


``````````diff
diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td
index 699ca91cd1f8f4..e5b3cc4b6c90e6 100644
--- a/llvm/lib/Target/X86/X86SchedBroadwell.td
+++ b/llvm/lib/Target/X86/X86SchedBroadwell.td
@@ -367,21 +367,26 @@ defm : BWWriteResPair<WriteCvtPD2IY,  [BWPort1,BWPort5], 6, [1,1], 2, 6>;
 defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>;
 
 defm : X86WriteRes<WriteCvtI2SS,      [BWPort1,BWPort5], 4, [1,1], 2>;
+defm : X86WriteRes<WriteCvtI2PS,              [BWPort1], 3,   [1], 1>;
+defm : X86WriteRes<WriteCvtI2PSY,             [BWPort1], 3,   [1], 1>;
 defm : X86WriteRes<WriteCvtI2SSLd,   [BWPort1,BWPort23], 9, [1,1], 2>;
-defm : BWWriteResPair<WriteCvtI2PS,   [BWPort1], 3>;
-defm : BWWriteResPair<WriteCvtI2PSY,  [BWPort1], 3, [1], 1, 6>;
+defm : X86WriteRes<WriteCvtI2PSLd,   [BWPort1,BWPort23], 8, [1,1], 2>;
+defm : X86WriteRes<WriteCvtI2PSYLd,  [BWPort1,BWPort23], 9, [1,1], 2>;
 defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>;
 defm : X86WriteRes<WriteCvtI2SD,      [BWPort1,BWPort5], 4, [1,1], 2>;
+defm : X86WriteRes<WriteCvtI2PD,      [BWPort1,BWPort5], 4, [1,1], 2>;
+defm : X86WriteRes<WriteCvtI2PDY,     [BWPort1,BWPort5], 6, [1,1], 2>;
 defm : X86WriteRes<WriteCvtI2SDLd,   [BWPort1,BWPort23], 9, [1,1], 2>;
-defm : BWWriteResPair<WriteCvtI2PD,   [BWPort1,BWPort5], 4, [1,1], 2, 5>;
-defm : BWWriteResPair<WriteCvtI2PDY,  [BWPort1,BWPort5], 6, [1,1], 2, 5>;
+defm : X86WriteRes<WriteCvtI2PDLd,   [BWPort1,BWPort23], 9, [1,1], 2>;
+defm : X86WriteRes<WriteCvtI2PDYLd,  [BWPort1,BWPort23],11, [1,1], 2>;
 defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>;
 
 defm : X86WriteRes<WriteCvtSS2SD,     [BWPort0,BWPort5], 2, [1,1], 2>;
-defm : X86WriteRes<WriteCvtSS2SDLd,  [BWPort0,BWPort23], 6, [1,1], 2>;
 defm : X86WriteRes<WriteCvtPS2PD,     [BWPort0,BWPort5], 2, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPS2PDY,    [BWPort0,BWPort5], 4, [1,1], 2>;
+defm : X86WriteRes<WriteCvtSS2SDLd,  [BWPort0,BWPort23], 6, [1,1], 2>;
 defm : X86WriteRes<WriteCvtPS2PDLd,  [BWPort0,BWPort23], 6, [1,1], 2>;
-defm : BWWriteResPair<WriteCvtPS2PDY, [BWPort0,BWPort5], 4, [1,1], 2, 5>;
+defm : X86WriteRes<WriteCvtPS2PDYLd, [BWPort0,BWPort23], 9, [1,1], 2>;
 defm : X86WriteResPairUnsupported<WriteCvtPS2PDZ>;
 defm : BWWriteResPair<WriteCvtSD2SS,  [BWPort1,BWPort5], 4, [1,1], 2, 5>;
 defm : BWWriteResPair<WriteCvtPD2PS,  [BWPort1,BWPort5], 4, [1,1], 2, 5>;
diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td
index b820418bb55191..59874be34f5a28 100644
--- a/llvm/lib/Target/X86/X86SchedHaswell.td
+++ b/llvm/lib/Target/X86/X86SchedHaswell.td
@@ -364,22 +364,30 @@ defm : HWWriteResPair<WriteCvtPS2IY,  [HWPort1], 3, [1], 1, 7>;
 defm : HWWriteResPair<WriteCvtPS2IZ,  [HWPort1], 3, [1], 1, 7>; // Unsupported = 1
 
 defm : X86WriteRes<WriteCvtI2SD,      [HWPort1,HWPort5], 4, [1,1], 2>;
+defm : X86WriteRes<WriteCvtI2PD,      [HWPort1,HWPort5], 4, [1,1], 2>;
+defm : X86WriteRes<WriteCvtI2PDY,     [HWPort1,HWPort5], 6, [1,1], 2>;
+defm : X86WriteRes<WriteCvtI2PDZ,     [HWPort1,HWPort5], 6, [1,1], 2>; // Unsupported = 1
 defm : X86WriteRes<WriteCvtI2SDLd,   [HWPort1,HWPort23], 9, [1,1], 2>;
-defm : HWWriteResPair<WriteCvtI2PD,   [HWPort1,HWPort5], 4, [1,1], 2, 6>;
-defm : HWWriteResPair<WriteCvtI2PDY,  [HWPort1,HWPort5], 6, [1,1], 2, 6>;
-defm : HWWriteResPair<WriteCvtI2PDZ,  [HWPort1,HWPort5], 6, [1,1], 2, 6>; // Unsupported = 1
+defm : X86WriteRes<WriteCvtI2PDLd,   [HWPort1,HWPort23],10, [1,1], 2>;
+defm : X86WriteRes<WriteCvtI2PDYLd,  [HWPort1,HWPort23],12, [1,1], 2>;
+defm : X86WriteRes<WriteCvtI2PDZLd,  [HWPort1,HWPort23],12, [1,1], 2>; // Unsupported = 1
 defm : X86WriteRes<WriteCvtI2SS,      [HWPort1,HWPort5], 4, [1,1], 2>;
+defm : X86WriteRes<WriteCvtI2PS,              [HWPort1], 3,   [1], 1>;
+defm : X86WriteRes<WriteCvtI2PSY,             [HWPort1], 3,   [1], 1>;
+defm : X86WriteRes<WriteCvtI2PSZ,             [HWPort1], 3,   [1], 1>; // Unsupported = 1
 defm : X86WriteRes<WriteCvtI2SSLd,   [HWPort1,HWPort23], 9, [1,1], 2>;
-defm : HWWriteResPair<WriteCvtI2PS,   [HWPort1], 3, [1], 1, 6>;
-defm : HWWriteResPair<WriteCvtI2PSY,  [HWPort1], 3, [1], 1, 7>;
-defm : HWWriteResPair<WriteCvtI2PSZ,  [HWPort1], 3, [1], 1, 7>; // Unsupported = 1
+defm : X86WriteRes<WriteCvtI2PSLd,   [HWPort1,HWPort23], 9, [1,1], 2>;
+defm : X86WriteRes<WriteCvtI2PSYLd,  [HWPort1,HWPort23],10, [1,1], 2>;
+defm : X86WriteRes<WriteCvtI2PSZLd,  [HWPort1,HWPort23],10, [1,1], 2>; // Unsupported = 1
 
 defm : X86WriteRes<WriteCvtSS2SD,     [HWPort0,HWPort5], 2, [1,1], 2>;
-defm : X86WriteRes<WriteCvtSS2SDLd,  [HWPort0,HWPort23], 7, [1,1], 2>;
 defm : X86WriteRes<WriteCvtPS2PD,     [HWPort0,HWPort5], 2, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPS2PDY,    [HWPort0,HWPort5], 4, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPS2PDZ,    [HWPort0,HWPort5], 4, [1,1], 2>; // Unsupported = 1
+defm : X86WriteRes<WriteCvtSS2SDLd,  [HWPort0,HWPort23], 7, [1,1], 2>;
 defm : X86WriteRes<WriteCvtPS2PDLd,  [HWPort0,HWPort23], 6, [1,1], 2>;
-defm : HWWriteResPair<WriteCvtPS2PDY, [HWPort0,HWPort5], 4, [1,1], 2, 6>;
-defm : HWWriteResPair<WriteCvtPS2PDZ, [HWPort0,HWPort5], 4, [1,1], 2, 6>; // Unsupported = 1
+defm : X86WriteRes<WriteCvtPS2PDYLd, [HWPort0,HWPort23],10, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPS2PDZLd, [HWPort0,HWPort23],10, [1,1], 2>; // Unsupported = 1
 defm : HWWriteResPair<WriteCvtSD2SS,  [HWPort1,HWPort5], 4, [1,1], 2, 5>;
 defm : HWWriteResPair<WriteCvtPD2PS,  [HWPort1,HWPort5], 4, [1,1], 2, 6>;
 defm : HWWriteResPair<WriteCvtPD2PSY, [HWPort1,HWPort5], 6, [1,1], 2, 6>;
@@ -983,7 +991,6 @@ def HWWriteResGroup12 : SchedWriteRes<[HWPort1,HWPort23]> {
   let NumMicroOps = 2;
   let ReleaseAtCycles = [1,1];
 }
-def: InstRW<[HWWriteResGroup12], (instrs MMX_CVTPI2PSrm)>;
 def: InstRW<[HWWriteResGroup12], (instregex "P(DEP|EXT)(32|64)rm")>;
 
 def HWWriteResGroup13 : SchedWriteRes<[HWPort5,HWPort23]> {
@@ -1349,13 +1356,6 @@ def HWWriteResGroup75 : SchedWriteRes<[HWPort1,HWPort23]> {
 }
 def: InstRW<[HWWriteResGroup75], (instregex "FICOM(P?)(16|32)m")>;
 
-def HWWriteResGroup78_1 : SchedWriteRes<[HWPort1,HWPort5,HWPort23]> {
-  let Latency = 9;
-  let NumMicroOps = 3;
-  let ReleaseAtCycles = [1,1,1];
-}
-def: InstRW<[HWWriteResGroup78_1], (instrs MMX_CVTPI2PDrm)>;
-
 def HWWriteResGroup80 : SchedWriteRes<[HWPort5,HWPort23,HWPort015]> {
   let Latency = 9;
   let NumMicroOps = 3;
diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td
index 7be9f51bcd46bd..6939b1227d0a61 100644
--- a/llvm/lib/Target/X86/X86SchedSandyBridge.td
+++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td
@@ -348,13 +348,14 @@ defm : X86WriteRes<WriteCvtI2PDLd,   [SBPort1,SBPort5,SBPort23], 10, [1,1,1], 3>
 defm : X86WriteRes<WriteCvtI2PDYLd,  [SBPort1,SBPort5,SBPort23], 10, [1,1,1], 3>;
 defm : X86WriteRes<WriteCvtI2PDZLd,  [SBPort1,SBPort5,SBPort23], 10, [1,1,1], 3>; // Unsupported = 1
 
-defm : SBWriteResPair<WriteCvtSS2SD,  [SBPort0], 1, [1], 1, 6>;
+defm : X86WriteRes<WriteCvtSS2SD,     [SBPort0,SBPort5], 1, [1,1], 2>;
 defm : X86WriteRes<WriteCvtPS2PD,     [SBPort0,SBPort5], 2, [1,1], 2>;
 defm : X86WriteRes<WriteCvtPS2PDY,    [SBPort0,SBPort5], 2, [1,1], 2>;
 defm : X86WriteRes<WriteCvtPS2PDZ,    [SBPort0,SBPort5], 2, [1,1], 2>; // Unsupported = 1
+defm : X86WriteRes<WriteCvtSS2SDLd,  [SBPort0,SBPort23], 7, [1,1], 2>;
 defm : X86WriteRes<WriteCvtPS2PDLd,  [SBPort0,SBPort23], 7, [1,1], 2>;
-defm : X86WriteRes<WriteCvtPS2PDYLd, [SBPort0,SBPort23], 7, [1,1], 2>;
-defm : X86WriteRes<WriteCvtPS2PDZLd, [SBPort0,SBPort23], 7, [1,1], 2>; // Unsupported = 1
+defm : X86WriteRes<WriteCvtPS2PDYLd, [SBPort0,SBPort5,SBPort23], 7, [1,1,1], 3>;
+defm : X86WriteRes<WriteCvtPS2PDZLd, [SBPort0,SBPort5,SBPort23], 7, [1,1,1], 3>; // Unsupported = 1
 defm : SBWriteResPair<WriteCvtSD2SS,  [SBPort1,SBPort5], 4, [1,1], 2, 6>;
 defm : SBWriteResPair<WriteCvtPD2PS,  [SBPort1,SBPort5], 4, [1,1], 2, 6>;
 defm : SBWriteResPair<WriteCvtPD2PSY, [SBPort1,SBPort5], 4, [1,1], 2, 7>;
diff --git a/llvm/test/tools/llvm-mca/X86/Barcelona/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/Barcelona/resources-sse2.s
index df0053a1dcb9b5..25f79397fa071d 100644
--- a/llvm/test/tools/llvm-mca/X86/Barcelona/resources-sse2.s
+++ b/llvm/test/tools/llvm-mca/X86/Barcelona/resources-sse2.s
@@ -448,7 +448,7 @@ xorpd       (%rax), %xmm2
 # CHECK-NEXT:  2      4     1.00                        cvtsi2sd	%rcx, %xmm2
 # CHECK-NEXT:  2      9     1.00    *                   cvtsi2sdl	(%rax), %xmm2
 # CHECK-NEXT:  2      9     1.00    *                   cvtsi2sdq	(%rax), %xmm2
-# CHECK-NEXT:  1      1     1.00                        cvtss2sd	%xmm0, %xmm2
+# CHECK-NEXT:  2      1     1.00                        cvtss2sd	%xmm0, %xmm2
 # CHECK-NEXT:  2      7     1.00    *                   cvtss2sd	(%rax), %xmm2
 # CHECK-NEXT:  2      4     1.00                        cvttpd2dq	%xmm0, %xmm2
 # CHECK-NEXT:  3      10    1.00    *                   cvttpd2dq	(%rax), %xmm2
@@ -687,7 +687,7 @@ xorpd       (%rax), %xmm2
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6.0]  [6.1]
-# CHECK-NEXT:  -     172.00 75.83  117.33 17.00  101.83 67.00  67.00
+# CHECK-NEXT:  -     172.00 75.83  117.33 17.00  102.83 67.00  67.00
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6.0]  [6.1]  Instructions:
@@ -732,7 +732,7 @@ xorpd       (%rax), %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -     1.00    -      -     cvtsi2sd	%rcx, %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -      -     0.50   0.50   cvtsi2sdl	(%rax), %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -      -     0.50   0.50   cvtsi2sdq	(%rax), %xmm2
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     cvtss2sd	%xmm0, %xmm2
+# CHECK-NEXT:  -      -     1.00    -      -     1.00    -      -     cvtss2sd	%xmm0, %xmm2
 # CHECK-NEXT:  -      -     1.00    -      -      -     0.50   0.50   cvtss2sd	(%rax), %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -     1.00    -      -     cvttpd2dq	%xmm0, %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -     1.00   0.50   0.50   cvttpd2dq	(%rax), %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s
index 1b196b4355a6d4..028625013a85cc 100644
--- a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s
@@ -1115,9 +1115,9 @@ vzeroupper
 # CHECK-NEXT:  1      3     1.00                        vcomiss	%xmm0, %xmm1
 # CHECK-NEXT:  2      8     1.00    *                   vcomiss	(%rax), %xmm1
 # CHECK-NEXT:  2      4     1.00                        vcvtdq2pd	%xmm0, %xmm2
-# CHECK-NEXT:  3      9     1.00    *                   vcvtdq2pd	(%rax), %xmm2
+# CHECK-NEXT:  2      9     1.00    *                   vcvtdq2pd	(%rax), %xmm2
 # CHECK-NEXT:  2      6     1.00                        vcvtdq2pd	%xmm0, %ymm2
-# CHECK-NEXT:  3      11    1.00    *                   vcvtdq2pd	(%rax), %ymm2
+# CHECK-NEXT:  2      11    1.00    *                   vcvtdq2pd	(%rax), %ymm2
 # CHECK-NEXT:  1      3     1.00                        vcvtdq2ps	%xmm0, %xmm2
 # CHECK-NEXT:  2      8     1.00    *                   vcvtdq2ps	(%rax), %xmm2
 # CHECK-NEXT:  1      3     1.00                        vcvtdq2ps	%ymm0, %ymm2
@@ -1137,7 +1137,7 @@ vzeroupper
 # CHECK-NEXT:  2      2     1.00                        vcvtps2pd	%xmm0, %xmm2
 # CHECK-NEXT:  2      6     1.00    *                   vcvtps2pd	(%rax), %xmm2
 # CHECK-NEXT:  2      4     1.00                        vcvtps2pd	%xmm0, %ymm2
-# CHECK-NEXT:  3      9     1.00    *                   vcvtps2pd	(%rax), %ymm2
+# CHECK-NEXT:  2      9     1.00    *                   vcvtps2pd	(%rax), %ymm2
 # CHECK-NEXT:  2      4     1.00                        vcvtsd2si	%xmm0, %ecx
 # CHECK-NEXT:  2      4     1.00                        vcvtsd2si	%xmm0, %rcx
 # CHECK-NEXT:  3      9     1.00    *                   vcvtsd2si	(%rax), %ecx
@@ -1736,7 +1736,7 @@ vzeroupper
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]
-# CHECK-NEXT:  -     257.00 216.25 247.25 173.17 173.17 38.00  424.25 3.25   12.67
+# CHECK-NEXT:  -     257.00 216.25 247.25 173.17 173.17 38.00  421.25 3.25   12.67
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    Instructions:
@@ -1825,9 +1825,9 @@ vzeroupper
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vcomiss	%xmm0, %xmm1
 # CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -      -      -      -     vcomiss	(%rax), %xmm1
 # CHECK-NEXT:  -      -      -     1.00    -      -      -     1.00    -      -     vcvtdq2pd	%xmm0, %xmm2
-# CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -     1.00    -      -     vcvtdq2pd	(%rax), %xmm2
+# CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -      -      -      -     vcvtdq2pd	(%rax), %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -     1.00    -      -     vcvtdq2pd	%xmm0, %ymm2
-# CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -     1.00    -      -     vcvtdq2pd	(%rax), %ymm2
+# CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -      -      -      -     vcvtdq2pd	(%rax), %ymm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vcvtdq2ps	%xmm0, %xmm2
 # CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -      -      -      -     vcvtdq2ps	(%rax), %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     vcvtdq2ps	%ymm0, %ymm2
@@ -1847,7 +1847,7 @@ vzeroupper
 # CHECK-NEXT:  -      -     1.00    -      -      -      -     1.00    -      -     vcvtps2pd	%xmm0, %xmm2
 # CHECK-NEXT:  -      -     1.00    -     0.50   0.50    -      -      -      -     vcvtps2pd	(%rax), %xmm2
 # CHECK-NEXT:  -      -     1.00    -      -      -      -     1.00    -      -     vcvtps2pd	%xmm0, %ymm2
-# CHECK-NEXT:  -      -     1.00    -     0.50   0.50    -     1.00    -      -     vcvtps2pd	(%rax), %ymm2
+# CHECK-NEXT:  -      -     1.00    -     0.50   0.50    -      -      -      -     vcvtps2pd	(%rax), %ymm2
 # CHECK-NEXT:  -      -     1.00   1.00    -      -      -      -      -      -     vcvtsd2si	%xmm0, %ecx
 # CHECK-NEXT:  -      -     1.00   1.00    -      -      -      -      -      -     vcvtsd2si	%xmm0, %rcx
 # CHECK-NEXT:  -      -     1.00   1.00   0.50   0.50    -      -      -      -     vcvtsd2si	(%rax), %ecx
diff --git a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-sse2.s
index e76d90521afa9c..8851be4679a1e9 100644
--- a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-sse2.s
+++ b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-sse2.s
@@ -423,7 +423,7 @@ xorpd       (%rax), %xmm2
 # CHECK-NEXT:  1      3     1.00                        comisd	%xmm0, %xmm1
 # CHECK-NEXT:  2      8     1.00    *                   comisd	(%rax), %xmm1
 # CHECK-NEXT:  2      4     1.00                        cvtdq2pd	%xmm0, %xmm2
-# CHECK-NEXT:  3      9     1.00    *                   cvtdq2pd	(%rax), %xmm2
+# CHECK-NEXT:  2      9     1.00    *                   cvtdq2pd	(%rax), %xmm2
 # CHECK-NEXT:  1      3     1.00                        cvtdq2ps	%xmm0, %xmm2
 # CHECK-NEXT:  2      8     1.00    *                   cvtdq2ps	(%rax), %xmm2
 # CHECK-NEXT:  2      4     1.00                        cvtpd2dq	%xmm0, %xmm2
@@ -433,7 +433,7 @@ xorpd       (%rax), %xmm2
 # CHECK-NEXT:  2      4     1.00                        cvtpd2ps	%xmm0, %xmm2
 # CHECK-NEXT:  3      9     1.00    *                   cvtpd2ps	(%rax), %xmm2
 # CHECK-NEXT:  2      4     1.00                        cvtpi2pd	%mm0, %xmm2
-# CHECK-NEXT:  3      9     1.00    *                   cvtpi2pd	(%rax), %xmm2
+# CHECK-NEXT:  2      9     1.00    *                   cvtpi2pd	(%rax), %xmm2
 # CHECK-NEXT:  1      3     1.00                        cvtps2dq	%xmm0, %xmm2
 # CHECK-NEXT:  2      8     1.00    *                   cvtps2dq	(%rax), %xmm2
 # CHECK-NEXT:  2      2     1.00                        cvtps2pd	%xmm0, %xmm2
@@ -689,7 +689,7 @@ xorpd       (%rax), %xmm2
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]
-# CHECK-NEXT:  -     78.00  70.75  95.75  63.17  63.17  14.00  119.25 2.25   4.67
+# CHECK-NEXT:  -     78.00  70.75  95.75  63.17  63.17  14.00  117.25 2.25   4.67
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    Instructions:
@@ -709,7 +709,7 @@ xorpd       (%rax), %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     comisd	%xmm0, %xmm1
 # CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -      -      -      -     comisd	(%rax), %xmm1
 # CHECK-NEXT:  -      -      -     1.00    -      -      -     1.00    -      -     cvtdq2pd	%xmm0, %xmm2
-# CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -     1.00    -      -     cvtdq2pd	(%rax), %xmm2
+# CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -      -      -      -     cvtdq2pd	(%rax), %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     cvtdq2ps	%xmm0, %xmm2
 # CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -      -      -      -     cvtdq2ps	(%rax), %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -     1.00    -      -     cvtpd2dq	%xmm0, %xmm2
@@ -719,7 +719,7 @@ xorpd       (%rax), %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -     1.00    -      -     cvtpd2ps	%xmm0, %xmm2
 # CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -     1.00    -      -     cvtpd2ps	(%rax), %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -     1.00    -      -     cvtpi2pd	%mm0, %xmm2
-# CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -     1.00    -      -     cvtpi2pd	(%rax), %xmm2
+# CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -      -      -      -     cvtpi2pd	(%rax), %xmm2
 # CHECK-NEXT:  -      -      -     1.00    -      -      -      -      -      -     cvtps2dq	%xmm0, %xmm2
 # CHECK-NEXT:  -      -      -     1.00   0.50   0.50    -      -      -      -     cvtps2dq	(%rax), %xmm2
 # CHECK-NEXT:  -      -     1.00    -      -      -      -     1.00    -      -     cvtps2pd	%xmm0, %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx1.s
index 49db25cb0bdfb1..7f07fd56fe60dc 100644
--- a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx1.s
@@ -1137,7 +1137,7 @@ vzeroupper
 # CHECK-NEXT:  2      2     1.00                        vcvtps2pd	%xmm0, %xmm2
 # CHECK-NEXT:  2      7     1.00    *                   vcvtps2pd	(%rax), %xmm2
 # CHECK-NEXT:  2      2     1.00                        vcvtps2pd	%xmm0, %ymm2
-# CHECK-NEXT:  2      7     1.00    *                   vcvtps2pd	(%rax), %ymm2
+# CHECK-NEXT:  3      7     1.00    *                   vcvtps2pd	(%rax), %ymm2
 # CHECK-NEXT:  2      5     1.00                        vcvtsd2si	%xmm0, %ecx
 # CHECK-NEXT:  2      5     1.00                        vcvtsd2si	%xmm0, %rcx
 # CHECK-NEXT:  3      10    1.00    *                   vcvtsd2si	(%rax), %ecx
@@ -1152,7 +1152,7 @@ vzeroupper
 # CHECK-NEXT:  3      5     2.00                        vcvtsi2ss	%rcx, %xmm0, %xmm2
 # CHECK-NEXT:  3      10    1.00    *                   vcvtsi2ssl	(%rax), %xmm0, %xmm2
 # CHECK-NEXT:  3      10    1.00    *                   vcvtsi2ssq	(%rax), %xmm0, %xmm2
-# CHECK-NEXT:  1      1     1.00                        vcvtss2sd	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  2      1     1.00                        vcvtss2sd	%xmm0, %xmm1, %xmm2
 # CHECK-NEXT:  2      7     1.00    *                   vcvtss2sd	(%rax), %xmm1, %xmm2
 # CHECK-NEXT:  2      5     1.00                        vcvtss2si	%xmm0, %ecx
 # CHECK-NEXT:  2      5     1.00                        vcvtss2si	%xmm0, %rcx
@@ -1734,7 +1734,7 @@ vzeroupper
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]   ...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/117494


More information about the llvm-commits mailing list