[llvm] [X86] Fix scheduler class for EVEX VMOVNTDQA variants and cleanup Skylake/Icelake resource usage (PR #116946)

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 20 01:59:04 PST 2024


https://github.com/RKSimon created https://github.com/llvm/llvm-project/pull/116946

Ensure we use the SchedWriteVecMoveLSNT class for all (V)MOVNTDQA instructions, remove unnecessary scheduler overrides and adjust resource pipe usage to match uops.info/Agner numbers

>From f58551719f07f4f3eb8fe7843bd7efe8e7e9d908 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Wed, 20 Nov 2024 09:57:45 +0000
Subject: [PATCH] [X86] Fix scheduler class for EVEX VMOVNTDQA variants and
 cleanup Skylake/Icelake resource usage

Ensure we use SchedWriteVecMoveLSNT for (V)MOVNTDQA instructions, remove unnecessary scheduler overrides and adjust resource pipe usage to match uops.info/Agner numbers
---
 llvm/lib/Target/X86/X86InstrAVX512.td                  |  6 +++---
 llvm/lib/Target/X86/X86SchedIceLake.td                 | 10 ++++------
 llvm/lib/Target/X86/X86SchedSkylakeClient.td           |  4 ++--
 llvm/lib/Target/X86/X86SchedSkylakeServer.td           | 10 ++++------
 .../tools/llvm-mca/X86/IceLakeServer/resources-avx1.s  | 10 +++++-----
 .../tools/llvm-mca/X86/IceLakeServer/resources-avx2.s  |  6 +++---
 .../llvm-mca/X86/IceLakeServer/resources-avx512.s      |  6 +++---
 .../tools/llvm-mca/X86/IceLakeServer/resources-sse41.s |  6 +++---
 .../tools/llvm-mca/X86/SkylakeClient/resources-avx1.s  | 10 +++++-----
 .../tools/llvm-mca/X86/SkylakeClient/resources-avx2.s  |  6 +++---
 .../tools/llvm-mca/X86/SkylakeClient/resources-sse41.s |  6 +++---
 .../tools/llvm-mca/X86/SkylakeServer/resources-avx1.s  | 10 +++++-----
 .../tools/llvm-mca/X86/SkylakeServer/resources-avx2.s  |  6 +++---
 .../llvm-mca/X86/SkylakeServer/resources-avx512.s      |  6 +++---
 .../tools/llvm-mca/X86/SkylakeServer/resources-sse41.s |  6 +++---
 15 files changed, 52 insertions(+), 56 deletions(-)

diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 7f854309eeed04..a05a3063cac559 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -4549,20 +4549,20 @@ let Predicates = [HasAVX512] in {
 
 def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
                       (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
-                      [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.ZMM.RM]>,
+                      [], SSEPackedInt>, Sched<[SchedWriteVecMoveLSNT.ZMM.RM]>,
                       EVEX, T8, PD, EVEX_V512, EVEX_CD8<64, CD8VF>;
 
 let Predicates = [HasVLX] in {
   def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
                        (ins i256mem:$src),
                        "vmovntdqa\t{$src, $dst|$dst, $src}",
-                       [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.YMM.RM]>,
+                       [], SSEPackedInt>, Sched<[SchedWriteVecMoveLSNT.YMM.RM]>,
                        EVEX, T8, PD, EVEX_V256, EVEX_CD8<64, CD8VF>;
 
   def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
                       (ins i128mem:$src),
                       "vmovntdqa\t{$src, $dst|$dst, $src}",
-                      [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.XMM.RM]>,
+                      [], SSEPackedInt>, Sched<[SchedWriteVecMoveLSNT.XMM.RM]>,
                       EVEX, T8, PD, EVEX_V128, EVEX_CD8<64, CD8VF>;
 }
 
diff --git a/llvm/lib/Target/X86/X86SchedIceLake.td b/llvm/lib/Target/X86/X86SchedIceLake.td
index 48be485176c3fa..4bc9f76e6f2c4c 100644
--- a/llvm/lib/Target/X86/X86SchedIceLake.td
+++ b/llvm/lib/Target/X86/X86SchedIceLake.td
@@ -350,8 +350,8 @@ defm : ICXWriteResPair<WriteFVarBlendZ,[ICXPort015], 2, [2], 2, 7>;
 defm : X86WriteRes<WriteVecLoad,         [ICXPort23], 5, [1], 1>;
 defm : X86WriteRes<WriteVecLoadX,        [ICXPort23], 6, [1], 1>;
 defm : X86WriteRes<WriteVecLoadY,        [ICXPort23], 7, [1], 1>;
-defm : X86WriteRes<WriteVecLoadNT,       [ICXPort23], 6, [1], 1>;
-defm : X86WriteRes<WriteVecLoadNTY,      [ICXPort23], 7, [1], 1>;
+defm : X86WriteRes<WriteVecLoadNT,       [ICXPort23,ICXPort015], 7, [1,1], 2>;
+defm : X86WriteRes<WriteVecLoadNTY,      [ICXPort23,ICXPort015], 8, [1,1], 2>;
 defm : X86WriteRes<WriteVecMaskedLoad,   [ICXPort23,ICXPort015], 7, [1,1], 2>;
 defm : X86WriteRes<WriteVecMaskedLoadY,  [ICXPort23,ICXPort015], 8, [1,1], 2>;
 defm : X86WriteRes<WriteVecStore,        [ICXPort78,ICXPort49], 1, [1,1], 2>;
@@ -1361,8 +1361,7 @@ def ICXWriteResGroup95 : SchedWriteRes<[ICXPort23,ICXPort015]> {
   let NumMicroOps = 2;
   let ReleaseAtCycles = [1,1];
 }
-def: InstRW<[ICXWriteResGroup95], (instrs VMOVNTDQAZ128rm,
-                                          VPBLENDDrmi)>;
+def: InstRW<[ICXWriteResGroup95], (instrs VPBLENDDrmi)>;
 def: InstRW<[ICXWriteResGroup95, ReadAfterVecXLd],
                                   (instregex "VBLENDMPDZ128rm(b?)",
                                              "VBLENDMPSZ128rm(b?)",
@@ -1568,8 +1567,7 @@ def ICXWriteResGroup121 : SchedWriteRes<[ICXPort23,ICXPort015]> {
   let NumMicroOps = 2;
   let ReleaseAtCycles = [1,1];
 }
-def: InstRW<[ICXWriteResGroup121], (instrs VMOVNTDQAZ256rm,
-                                           VPBLENDDYrmi)>;
+def: InstRW<[ICXWriteResGroup121], (instrs VPBLENDDYrmi)>;
 def: InstRW<[ICXWriteResGroup121, ReadAfterVecYLd],
                                    (instregex "VBLENDMPD(Z|Z256)rm(b?)",
                                               "VBLENDMPS(Z|Z256)rm(b?)",
diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td
index 116aa3555a065c..7652eb7325ac1c 100644
--- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td
+++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td
@@ -345,8 +345,8 @@ defm : X86WriteResPairUnsupported<WriteFVarBlendZ>;
 defm : X86WriteRes<WriteVecLoad,         [SKLPort23], 5, [1], 1>;
 defm : X86WriteRes<WriteVecLoadX,        [SKLPort23], 6, [1], 1>;
 defm : X86WriteRes<WriteVecLoadY,        [SKLPort23], 7, [1], 1>;
-defm : X86WriteRes<WriteVecLoadNT,       [SKLPort23], 6, [1], 1>;
-defm : X86WriteRes<WriteVecLoadNTY,      [SKLPort23], 7, [1], 1>;
+defm : X86WriteRes<WriteVecLoadNT,       [SKLPort23,SKLPort015], 7, [1,1], 2>;
+defm : X86WriteRes<WriteVecLoadNTY,      [SKLPort23,SKLPort015], 8, [1,1], 2>;
 defm : X86WriteRes<WriteVecMaskedLoad,   [SKLPort23,SKLPort015], 7, [1,1], 2>;
 defm : X86WriteRes<WriteVecMaskedLoadY,  [SKLPort23,SKLPort015], 8, [1,1], 2>;
 defm : X86WriteRes<WriteVecStore,        [SKLPort237,SKLPort4], 1, [1,1], 2>;
diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
index 550edaa05f7baa..038650c6fd74ac 100644
--- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td
+++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
@@ -345,8 +345,8 @@ defm : SKXWriteResPair<WriteFVarBlendZ,[SKXPort015], 2, [2], 2, 7>;
 defm : X86WriteRes<WriteVecLoad,         [SKXPort23], 5, [1], 1>;
 defm : X86WriteRes<WriteVecLoadX,        [SKXPort23], 6, [1], 1>;
 defm : X86WriteRes<WriteVecLoadY,        [SKXPort23], 7, [1], 1>;
-defm : X86WriteRes<WriteVecLoadNT,       [SKXPort23], 6, [1], 1>;
-defm : X86WriteRes<WriteVecLoadNTY,      [SKXPort23], 7, [1], 1>;
+defm : X86WriteRes<WriteVecLoadNT,       [SKXPort23,SKXPort015], 7, [1,1], 2>;
+defm : X86WriteRes<WriteVecLoadNTY,      [SKXPort23,SKXPort015], 8, [1,1], 2>;
 defm : X86WriteRes<WriteVecMaskedLoad,   [SKXPort23,SKXPort015], 7, [1,1], 2>;
 defm : X86WriteRes<WriteVecMaskedLoadY,  [SKXPort23,SKXPort015], 8, [1,1], 2>;
 defm : X86WriteRes<WriteVecStore,        [SKXPort237,SKXPort4], 1, [1,1], 2>;
@@ -1336,8 +1336,7 @@ def SKXWriteResGroup95 : SchedWriteRes<[SKXPort23,SKXPort015]> {
   let NumMicroOps = 2;
   let ReleaseAtCycles = [1,1];
 }
-def: InstRW<[SKXWriteResGroup95], (instrs VMOVNTDQAZ128rm,
-                                          VPBLENDDrmi)>;
+def: InstRW<[SKXWriteResGroup95], (instrs VPBLENDDrmi)>;
 def: InstRW<[SKXWriteResGroup95, ReadAfterVecXLd],
                                   (instregex "VBLENDMPDZ128rm(b?)",
                                              "VBLENDMPSZ128rm(b?)",
@@ -1539,8 +1538,7 @@ def SKXWriteResGroup121 : SchedWriteRes<[SKXPort23,SKXPort015]> {
   let NumMicroOps = 2;
   let ReleaseAtCycles = [1,1];
 }
-def: InstRW<[SKXWriteResGroup121], (instrs VMOVNTDQAZ256rm,
-                                           VPBLENDDYrmi)>;
+def: InstRW<[SKXWriteResGroup121], (instrs VPBLENDDYrmi)>;
 def: InstRW<[SKXWriteResGroup121, ReadAfterVecYLd],
                                    (instregex "VBLENDMPD(Z|Z256)rm(b?)",
                                               "VBLENDMPS(Z|Z256)rm(b?)",
diff --git a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx1.s
index edd3e7cd8c0e79..0e7c5751e5c084 100644
--- a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx1.s
@@ -1300,8 +1300,8 @@ vzeroupper
 # CHECK-NEXT:  1      2     1.00                        vmovmskps	%ymm0, %ecx
 # CHECK-NEXT:  2      1     0.50           *            vmovntdq	%xmm0, (%rax)
 # CHECK-NEXT:  2      1     0.50           *            vmovntdq	%ymm0, (%rax)
-# CHECK-NEXT:  1      6     0.50    *                   vmovntdqa	(%rax), %xmm2
-# CHECK-NEXT:  1      7     0.50    *                   vmovntdqa	(%rax), %ymm2
+# CHECK-NEXT:  2      7     0.50    *                   vmovntdqa	(%rax), %xmm2
+# CHECK-NEXT:  2      8     0.50    *                   vmovntdqa	(%rax), %ymm2
 # CHECK-NEXT:  2      1     0.50           *            vmovntpd	%xmm0, (%rax)
 # CHECK-NEXT:  2      1     0.50           *            vmovntpd	%ymm0, (%rax)
 # CHECK-NEXT:  2      1     0.50           *            vmovntps	%xmm0, (%rax)
@@ -1738,7 +1738,7 @@ vzeroupper
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]
-# CHECK-NEXT:  -     126.00 325.58 256.58 160.50 160.50 19.00  270.58 6.25   19.00  19.00  19.00
+# CHECK-NEXT:  -     126.00 326.25 257.25 160.50 160.50 19.00  271.25 6.25   19.00  19.00  19.00
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   Instructions:
@@ -2012,8 +2012,8 @@ vzeroupper
 # CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -      -      -     vmovmskps	%ymm0, %ecx
 # CHECK-NEXT:  -      -      -      -      -      -     0.50    -      -     0.50   0.50   0.50   vmovntdq	%xmm0, (%rax)
 # CHECK-NEXT:  -      -      -      -      -      -     0.50    -      -     0.50   0.50   0.50   vmovntdq	%ymm0, (%rax)
-# CHECK-NEXT:  -      -      -      -     0.50   0.50    -      -      -      -      -      -     vmovntdqa	(%rax), %xmm2
-# CHECK-NEXT:  -      -      -      -     0.50   0.50    -      -      -      -      -      -     vmovntdqa	(%rax), %ymm2
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vmovntdqa	(%rax), %xmm2
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vmovntdqa	(%rax), %ymm2
 # CHECK-NEXT:  -      -      -      -      -      -     0.50    -      -     0.50   0.50   0.50   vmovntpd	%xmm0, (%rax)
 # CHECK-NEXT:  -      -      -      -      -      -     0.50    -      -     0.50   0.50   0.50   vmovntpd	%ymm0, (%rax)
 # CHECK-NEXT:  -      -      -      -      -      -     0.50    -      -     0.50   0.50   0.50   vmovntps	%xmm0, (%rax)
diff --git a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx2.s b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx2.s
index 81b178d12d2d33..c7a0be0cf9cde6 100644
--- a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx2.s
+++ b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx2.s
@@ -475,7 +475,7 @@ vpxor           (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  5      20    2.00    *                   vgatherqps	%xmm0, (%rax,%ymm1,2), %xmm2
 # CHECK-NEXT:  1      3     1.00                        vinserti128	$1, %xmm0, %ymm1, %ymm2
 # CHECK-NEXT:  2      7     0.50    *                   vinserti128	$1, (%rax), %ymm1, %ymm2
-# CHECK-NEXT:  1      7     0.50    *                   vmovntdqa	(%rax), %ymm0
+# CHECK-NEXT:  2      8     0.50    *                   vmovntdqa	(%rax), %ymm0
 # CHECK-NEXT:  2      4     1.00                        vmpsadbw	$1, %ymm0, %ymm1, %ymm2
 # CHECK-NEXT:  3      11    1.00    *                   vmpsadbw	$1, (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  1      1     0.50                        vpabsb	%ymm0, %ymm2
@@ -778,7 +778,7 @@ vpxor           (%rax), %ymm1, %ymm2
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]
-# CHECK-NEXT:  -      -     110.33 116.33 98.00  98.00  2.50   137.33  -     2.50   2.50   2.50
+# CHECK-NEXT:  -      -     110.67 116.67 98.00  98.00  2.50   137.67  -     2.50   2.50   2.50
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   Instructions:
@@ -797,7 +797,7 @@ vpxor           (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -     1.33   0.33   2.00   2.00    -     1.33    -      -      -      -     vgatherqps	%xmm0, (%rax,%ymm1,2), %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     vinserti128	$1, %xmm0, %ymm1, %ymm2
 # CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vinserti128	$1, (%rax), %ymm1, %ymm2
-# CHECK-NEXT:  -      -      -      -     0.50   0.50    -      -      -      -      -      -     vmovntdqa	(%rax), %ymm0
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vmovntdqa	(%rax), %ymm0
 # CHECK-NEXT:  -      -      -     0.50    -      -      -     1.50    -      -      -      -     vmpsadbw	$1, %ymm0, %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -     0.50   0.50   0.50    -     1.50    -      -      -      -     vmpsadbw	$1, (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -     0.50   0.50    -      -      -      -      -      -      -      -     vpabsb	%ymm0, %ymm2
diff --git a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512.s b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512.s
index 28abdd5de1ce97..8b495d6ee268e5 100644
--- a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512.s
+++ b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512.s
@@ -1499,7 +1499,7 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  2      1     0.50           *            vmovdqu64	%zmm16, (%rax) {%k1}
 # CHECK-NEXT:  1      1     0.50                        vmovdqu64	%zmm16, %zmm19 {%k1} {z}
 # CHECK-NEXT:  2      8     0.50    *                   vmovdqu64	(%rax), %zmm19 {%k1} {z}
-# CHECK-NEXT:  1      7     0.50    *                   vmovntdqa	(%rax), %zmm0
+# CHECK-NEXT:  2      8     0.50    *                   vmovntdqa	(%rax), %zmm0
 # CHECK-NEXT:  1      1     1.00                        vmovshdup	%zmm16, %zmm19
 # CHECK-NEXT:  2      8     0.50    *                   vmovshdup	(%rax), %zmm19
 # CHECK-NEXT:  1      1     1.00                        vmovshdup	%zmm16, %zmm19 {%k1}
@@ -2057,7 +2057,7 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]
-# CHECK-NEXT:  -     612.00 411.17 103.67 328.00 328.00 48.50  593.17 6.00   48.50  48.50  48.50
+# CHECK-NEXT:  -     612.00 411.50 104.00 328.00 328.00 48.50  593.50 6.00   48.50  48.50  48.50
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   Instructions:
@@ -2470,7 +2470,7 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  -      -      -      -      -      -     0.50    -      -     0.50   0.50   0.50   vmovdqu64	%zmm16, (%rax) {%k1}
 # CHECK-NEXT:  -      -     0.50    -      -      -      -     0.50    -      -      -      -     vmovdqu64	%zmm16, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vmovdqu64	(%rax), %zmm19 {%k1} {z}
-# CHECK-NEXT:  -      -      -      -     0.50   0.50    -      -      -      -      -      -     vmovntdqa	(%rax), %zmm0
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vmovntdqa	(%rax), %zmm0
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     vmovshdup	%zmm16, %zmm19
 # CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     vmovshdup	(%rax), %zmm19
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     vmovshdup	%zmm16, %zmm19 {%k1}
diff --git a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-sse41.s b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-sse41.s
index 6ff620b0779f3c..9748a278487704 100644
--- a/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-sse41.s
+++ b/llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-sse41.s
@@ -171,7 +171,7 @@ roundss     $1, (%rax), %xmm2
 # CHECK-NEXT:  3      2     1.00           *            extractps	$1, %xmm0, (%rax)
 # CHECK-NEXT:  1      1     1.00                        insertps	$1, %xmm0, %xmm2
 # CHECK-NEXT:  2      7     1.00    *                   insertps	$1, (%rax), %xmm2
-# CHECK-NEXT:  1      6     0.50    *                   movntdqa	(%rax), %xmm2
+# CHECK-NEXT:  2      7     0.50    *                   movntdqa	(%rax), %xmm2
 # CHECK-NEXT:  2      4     1.00                        mpsadbw	$1, %xmm0, %xmm2
 # CHECK-NEXT:  3      10    1.00    *                   mpsadbw	$1, (%rax), %xmm2
 # CHECK-NEXT:  1      3     1.00                        packusdw	%xmm0, %xmm2
@@ -268,7 +268,7 @@ roundss     $1, (%rax), %xmm2
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]
-# CHECK-NEXT:  -      -     36.67  46.17  22.00  22.00  2.50   49.17   -     2.50   2.50   2.50
+# CHECK-NEXT:  -      -     37.00  46.50  22.00  22.00  2.50   49.50   -     2.50   2.50   2.50
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   Instructions:
@@ -288,7 +288,7 @@ roundss     $1, (%rax), %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -     0.50   1.00    -     0.50   0.50   0.50   extractps	$1, %xmm0, (%rax)
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     insertps	$1, %xmm0, %xmm2
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -      -      -     insertps	$1, (%rax), %xmm2
-# CHECK-NEXT:  -      -      -      -     0.50   0.50    -      -      -      -      -      -     movntdqa	(%rax), %xmm2
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -      -      -     movntdqa	(%rax), %xmm2
 # CHECK-NEXT:  -      -      -     0.50    -      -      -     1.50    -      -      -      -     mpsadbw	$1, %xmm0, %xmm2
 # CHECK-NEXT:  -      -      -     0.50   0.50   0.50    -     1.50    -      -      -      -     mpsadbw	$1, (%rax), %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -     packusdw	%xmm0, %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s
index e05911e4709dc1..1c08bb82d40061 100644
--- a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s
@@ -1300,8 +1300,8 @@ vzeroupper
 # CHECK-NEXT:  1      2     1.00                        vmovmskps	%ymm0, %ecx
 # CHECK-NEXT:  2      1     1.00           *            vmovntdq	%xmm0, (%rax)
 # CHECK-NEXT:  2      1     1.00           *            vmovntdq	%ymm0, (%rax)
-# CHECK-NEXT:  1      6     0.50    *                   vmovntdqa	(%rax), %xmm2
-# CHECK-NEXT:  1      7     0.50    *                   vmovntdqa	(%rax), %ymm2
+# CHECK-NEXT:  2      7     0.50    *                   vmovntdqa	(%rax), %xmm2
+# CHECK-NEXT:  2      8     0.50    *                   vmovntdqa	(%rax), %ymm2
 # CHECK-NEXT:  2      1     1.00           *            vmovntpd	%xmm0, (%rax)
 # CHECK-NEXT:  2      1     1.00           *            vmovntpd	%ymm0, (%rax)
 # CHECK-NEXT:  2      1     1.00           *            vmovntps	%xmm0, (%rax)
@@ -1736,7 +1736,7 @@ vzeroupper
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]
-# CHECK-NEXT:  -     126.00 338.58 200.58 173.83 173.83 38.00  324.58 6.25   11.33
+# CHECK-NEXT:  -     126.00 339.25 201.25 173.83 173.83 38.00  325.25 6.25   11.33
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    Instructions:
@@ -2010,8 +2010,8 @@ vzeroupper
 # CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vmovmskps	%ymm0, %ecx
 # CHECK-NEXT:  -      -      -      -     0.33   0.33   1.00    -      -     0.33   vmovntdq	%xmm0, (%rax)
 # CHECK-NEXT:  -      -      -      -     0.33   0.33   1.00    -      -     0.33   vmovntdq	%ymm0, (%rax)
-# CHECK-NEXT:  -      -      -      -     0.50   0.50    -      -      -      -     vmovntdqa	(%rax), %xmm2
-# CHECK-NEXT:  -      -      -      -     0.50   0.50    -      -      -      -     vmovntdqa	(%rax), %ymm2
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vmovntdqa	(%rax), %xmm2
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vmovntdqa	(%rax), %ymm2
 # CHECK-NEXT:  -      -      -      -     0.33   0.33   1.00    -      -     0.33   vmovntpd	%xmm0, (%rax)
 # CHECK-NEXT:  -      -      -      -     0.33   0.33   1.00    -      -     0.33   vmovntpd	%ymm0, (%rax)
 # CHECK-NEXT:  -      -      -      -     0.33   0.33   1.00    -      -     0.33   vmovntps	%xmm0, (%rax)
diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx2.s b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx2.s
index d5f347b15548d1..e07b60a9853508 100644
--- a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx2.s
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx2.s
@@ -475,7 +475,7 @@ vpxor           (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  5      20    2.00    *                   vgatherqps	%xmm0, (%rax,%ymm1,2), %xmm2
 # CHECK-NEXT:  1      3     1.00                        vinserti128	$1, %xmm0, %ymm1, %ymm2
 # CHECK-NEXT:  2      7     0.50    *                   vinserti128	$1, (%rax), %ymm1, %ymm2
-# CHECK-NEXT:  1      7     0.50    *                   vmovntdqa	(%rax), %ymm0
+# CHECK-NEXT:  2      8     0.50    *                   vmovntdqa	(%rax), %ymm0
 # CHECK-NEXT:  2      4     2.00                        vmpsadbw	$1, %ymm0, %ymm1, %ymm2
 # CHECK-NEXT:  3      11    2.00    *                   vmpsadbw	$1, (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  1      1     0.50                        vpabsb	%ymm0, %ymm2
@@ -776,7 +776,7 @@ vpxor           (%rax), %ymm1, %ymm2
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]
-# CHECK-NEXT:  -      -     110.33 89.33  100.33 100.33 5.00   164.33  -     0.33
+# CHECK-NEXT:  -      -     110.67 89.67  100.33 100.33 5.00   164.67  -     0.33
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    Instructions:
@@ -795,7 +795,7 @@ vpxor           (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -     1.33   0.33   2.00   2.00    -     1.33    -      -     vgatherqps	%xmm0, (%rax,%ymm1,2), %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vinserti128	$1, %xmm0, %ymm1, %ymm2
 # CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vinserti128	$1, (%rax), %ymm1, %ymm2
-# CHECK-NEXT:  -      -      -      -     0.50   0.50    -      -      -      -     vmovntdqa	(%rax), %ymm0
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vmovntdqa	(%rax), %ymm0
 # CHECK-NEXT:  -      -      -      -      -      -      -     2.00    -      -     vmpsadbw	$1, %ymm0, %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     2.00    -      -     vmpsadbw	$1, (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -     0.50   0.50    -      -      -      -      -      -     vpabsb	%ymm0, %ymm2
diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse41.s b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse41.s
index 0eec7e4cf58e8a..307c858051e4a8 100644
--- a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse41.s
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse41.s
@@ -171,7 +171,7 @@ roundss     $1, (%rax), %xmm2
 # CHECK-NEXT:  3      2     1.00           *            extractps	$1, %xmm0, (%rax)
 # CHECK-NEXT:  1      1     1.00                        insertps	$1, %xmm0, %xmm2
 # CHECK-NEXT:  2      7     1.00    *                   insertps	$1, (%rax), %xmm2
-# CHECK-NEXT:  1      6     0.50    *                   movntdqa	(%rax), %xmm2
+# CHECK-NEXT:  2      7     0.50    *                   movntdqa	(%rax), %xmm2
 # CHECK-NEXT:  2      4     2.00                        mpsadbw	$1, %xmm0, %xmm2
 # CHECK-NEXT:  3      10    2.00    *                   mpsadbw	$1, (%rax), %xmm2
 # CHECK-NEXT:  1      1     1.00                        packusdw	%xmm0, %xmm2
@@ -266,7 +266,7 @@ roundss     $1, (%rax), %xmm2
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]
-# CHECK-NEXT:  -      -     38.83  30.33  23.67  23.67  5.00   63.33  0.50   1.67
+# CHECK-NEXT:  -      -     39.17  30.67  23.67  23.67  5.00   63.67  0.50   1.67
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    Instructions:
@@ -286,7 +286,7 @@ roundss     $1, (%rax), %xmm2
 # CHECK-NEXT:  -      -      -      -     0.33   0.33   1.00   1.00    -     0.33   extractps	$1, %xmm0, (%rax)
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     insertps	$1, %xmm0, %xmm2
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -     insertps	$1, (%rax), %xmm2
-# CHECK-NEXT:  -      -      -      -     0.50   0.50    -      -      -      -     movntdqa	(%rax), %xmm2
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     movntdqa	(%rax), %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -     2.00    -      -     mpsadbw	$1, %xmm0, %xmm2
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     2.00    -      -     mpsadbw	$1, (%rax), %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     packusdw	%xmm0, %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s
index 1f352c119f5234..6079e177ce61d1 100644
--- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s
@@ -1300,8 +1300,8 @@ vzeroupper
 # CHECK-NEXT:  1      2     1.00                        vmovmskps	%ymm0, %ecx
 # CHECK-NEXT:  2      1     1.00           *            vmovntdq	%xmm0, (%rax)
 # CHECK-NEXT:  2      1     1.00           *            vmovntdq	%ymm0, (%rax)
-# CHECK-NEXT:  1      6     0.50    *                   vmovntdqa	(%rax), %xmm2
-# CHECK-NEXT:  1      7     0.50    *                   vmovntdqa	(%rax), %ymm2
+# CHECK-NEXT:  2      7     0.50    *                   vmovntdqa	(%rax), %xmm2
+# CHECK-NEXT:  2      8     0.50    *                   vmovntdqa	(%rax), %ymm2
 # CHECK-NEXT:  2      1     1.00           *            vmovntpd	%xmm0, (%rax)
 # CHECK-NEXT:  2      1     1.00           *            vmovntpd	%ymm0, (%rax)
 # CHECK-NEXT:  2      1     1.00           *            vmovntps	%xmm0, (%rax)
@@ -1736,7 +1736,7 @@ vzeroupper
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]
-# CHECK-NEXT:  -     126.00 327.92 204.92 173.83 173.83 38.00  320.92 7.25   11.33
+# CHECK-NEXT:  -     126.00 328.58 205.58 173.83 173.83 38.00  321.58 7.25   11.33
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    Instructions:
@@ -2010,8 +2010,8 @@ vzeroupper
 # CHECK-NEXT:  -      -     1.00    -      -      -      -      -      -      -     vmovmskps	%ymm0, %ecx
 # CHECK-NEXT:  -      -      -      -     0.33   0.33   1.00    -      -     0.33   vmovntdq	%xmm0, (%rax)
 # CHECK-NEXT:  -      -      -      -     0.33   0.33   1.00    -      -     0.33   vmovntdq	%ymm0, (%rax)
-# CHECK-NEXT:  -      -      -      -     0.50   0.50    -      -      -      -     vmovntdqa	(%rax), %xmm2
-# CHECK-NEXT:  -      -      -      -     0.50   0.50    -      -      -      -     vmovntdqa	(%rax), %ymm2
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vmovntdqa	(%rax), %xmm2
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vmovntdqa	(%rax), %ymm2
 # CHECK-NEXT:  -      -      -      -     0.33   0.33   1.00    -      -     0.33   vmovntpd	%xmm0, (%rax)
 # CHECK-NEXT:  -      -      -      -     0.33   0.33   1.00    -      -     0.33   vmovntpd	%ymm0, (%rax)
 # CHECK-NEXT:  -      -      -      -     0.33   0.33   1.00    -      -     0.33   vmovntps	%xmm0, (%rax)
diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx2.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx2.s
index 0df51fab0bb0b3..6e75196b345849 100644
--- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx2.s
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx2.s
@@ -475,7 +475,7 @@ vpxor           (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  5      20    2.00    *                   vgatherqps	%xmm0, (%rax,%ymm1,2), %xmm2
 # CHECK-NEXT:  1      3     1.00                        vinserti128	$1, %xmm0, %ymm1, %ymm2
 # CHECK-NEXT:  2      7     0.50    *                   vinserti128	$1, (%rax), %ymm1, %ymm2
-# CHECK-NEXT:  1      7     0.50    *                   vmovntdqa	(%rax), %ymm0
+# CHECK-NEXT:  2      8     0.50    *                   vmovntdqa	(%rax), %ymm0
 # CHECK-NEXT:  2      4     2.00                        vmpsadbw	$1, %ymm0, %ymm1, %ymm2
 # CHECK-NEXT:  3      11    2.00    *                   vmpsadbw	$1, (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  1      1     0.50                        vpabsb	%ymm0, %ymm2
@@ -776,7 +776,7 @@ vpxor           (%rax), %ymm1, %ymm2
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]
-# CHECK-NEXT:  -      -     110.33 89.33  100.33 100.33 5.00   164.33  -     0.33
+# CHECK-NEXT:  -      -     110.67 89.67  100.33 100.33 5.00   164.67  -     0.33
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    Instructions:
@@ -795,7 +795,7 @@ vpxor           (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -     1.33   0.33   2.00   2.00    -     1.33    -      -     vgatherqps	%xmm0, (%rax,%ymm1,2), %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vinserti128	$1, %xmm0, %ymm1, %ymm2
 # CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vinserti128	$1, (%rax), %ymm1, %ymm2
-# CHECK-NEXT:  -      -      -      -     0.50   0.50    -      -      -      -     vmovntdqa	(%rax), %ymm0
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vmovntdqa	(%rax), %ymm0
 # CHECK-NEXT:  -      -      -      -      -      -      -     2.00    -      -     vmpsadbw	$1, %ymm0, %ymm1, %ymm2
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     2.00    -      -     vmpsadbw	$1, (%rax), %ymm1, %ymm2
 # CHECK-NEXT:  -      -     0.50   0.50    -      -      -      -      -      -     vpabsb	%ymm0, %ymm2
diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s
index e4f9be2fce1113..4f384dcf35c833 100644
--- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s
@@ -1499,7 +1499,7 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  2      1     1.00           *            vmovdqu64	%zmm16, (%rax) {%k1}
 # CHECK-NEXT:  1      1     0.50                        vmovdqu64	%zmm16, %zmm19 {%k1} {z}
 # CHECK-NEXT:  2      8     0.50    *                   vmovdqu64	(%rax), %zmm19 {%k1} {z}
-# CHECK-NEXT:  1      7     0.50    *                   vmovntdqa	(%rax), %zmm0
+# CHECK-NEXT:  2      8     0.50    *                   vmovntdqa	(%rax), %zmm0
 # CHECK-NEXT:  1      1     1.00                        vmovshdup	%zmm16, %zmm19
 # CHECK-NEXT:  2      8     0.50    *                   vmovshdup	(%rax), %zmm19
 # CHECK-NEXT:  1      1     1.00                        vmovshdup	%zmm16, %zmm19 {%k1}
@@ -2055,7 +2055,7 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]
-# CHECK-NEXT:  -     612.00 352.67 103.67 360.33 360.33 97.00  651.67 6.00   32.33
+# CHECK-NEXT:  -     612.00 353.00 104.00 360.33 360.33 97.00  652.00 6.00   32.33
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    Instructions:
@@ -2468,7 +2468,7 @@ vunpcklps         (%rax){1to16}, %zmm17, %zmm19 {z}{k1}
 # CHECK-NEXT:  -      -      -      -     0.33   0.33   1.00    -      -     0.33   vmovdqu64	%zmm16, (%rax) {%k1}
 # CHECK-NEXT:  -      -     0.50    -      -      -      -     0.50    -      -     vmovdqu64	%zmm16, %zmm19 {%k1} {z}
 # CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vmovdqu64	(%rax), %zmm19 {%k1} {z}
-# CHECK-NEXT:  -      -      -      -     0.50   0.50    -      -      -      -     vmovntdqa	(%rax), %zmm0
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vmovntdqa	(%rax), %zmm0
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vmovshdup	%zmm16, %zmm19
 # CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     vmovshdup	(%rax), %zmm19
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vmovshdup	%zmm16, %zmm19 {%k1}
diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse41.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse41.s
index e3f34fdc9430d3..c9d3a8e40b652e 100644
--- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse41.s
+++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse41.s
@@ -171,7 +171,7 @@ roundss     $1, (%rax), %xmm2
 # CHECK-NEXT:  3      2     1.00           *            extractps	$1, %xmm0, (%rax)
 # CHECK-NEXT:  1      1     1.00                        insertps	$1, %xmm0, %xmm2
 # CHECK-NEXT:  2      7     1.00    *                   insertps	$1, (%rax), %xmm2
-# CHECK-NEXT:  1      6     0.50    *                   movntdqa	(%rax), %xmm2
+# CHECK-NEXT:  2      7     0.50    *                   movntdqa	(%rax), %xmm2
 # CHECK-NEXT:  2      4     2.00                        mpsadbw	$1, %xmm0, %xmm2
 # CHECK-NEXT:  3      10    2.00    *                   mpsadbw	$1, (%rax), %xmm2
 # CHECK-NEXT:  1      1     1.00                        packusdw	%xmm0, %xmm2
@@ -266,7 +266,7 @@ roundss     $1, (%rax), %xmm2
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]
-# CHECK-NEXT:  -      -     38.17  29.67  23.67  23.67  5.00   64.67  0.50   1.67
+# CHECK-NEXT:  -      -     38.50  30.00  23.67  23.67  5.00   65.00  0.50   1.67
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    Instructions:
@@ -286,7 +286,7 @@ roundss     $1, (%rax), %xmm2
 # CHECK-NEXT:  -      -      -      -     0.33   0.33   1.00   1.00    -     0.33   extractps	$1, %xmm0, (%rax)
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     insertps	$1, %xmm0, %xmm2
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     1.00    -      -     insertps	$1, (%rax), %xmm2
-# CHECK-NEXT:  -      -      -      -     0.50   0.50    -      -      -      -     movntdqa	(%rax), %xmm2
+# CHECK-NEXT:  -      -     0.33   0.33   0.50   0.50    -     0.33    -      -     movntdqa	(%rax), %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -     2.00    -      -     mpsadbw	$1, %xmm0, %xmm2
 # CHECK-NEXT:  -      -      -      -     0.50   0.50    -     2.00    -      -     mpsadbw	$1, (%rax), %xmm2
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     packusdw	%xmm0, %xmm2



More information about the llvm-commits mailing list