[llvm] [AArch64] Update zero latency instructions in Neoverse scheduling tables (PR #165690)

Simon Wallis via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 18 03:13:13 PST 2025


https://github.com/simonwallis2 updated https://github.com/llvm/llvm-project/pull/165690

>From 90fdc605033f2142f647d4aa2273210f52393d2c Mon Sep 17 00:00:00 2001
From: Simon Wallis <simon.wallis2 at arm.com>
Date: Thu, 30 Oct 2025 10:43:12 +0000
Subject: [PATCH 1/7] [AArch64] Update zero latency instructions in scheduling
 tables for Neoverse cores

NeoverseZeroMove was introduced for Neoverse-V2 and was added to V3 and V3AE.
Use NeoverseZeroMove for Neoverse-V1, N2, N3 in the same way, including these instructions:
MOV Xd|Wd, #0|XZR|WZR

For all Neoverse targets, the following instructions are also decoded as not utilizing the scheduling and execution resources of the machine:
MOV Wd,Wn
MOV Xd,Xn

For Neoverse-N3 only, these instructions also have zero latency
FMOV Dd, Dn
FMOV Sd, Sn

Change-Id: I1a5f86e049798582d33d96ba99389e4b2ffb210e
---
 .../Target/AArch64/AArch64SchedNeoverseN2.td  | 30 ++++++++++++-
 .../Target/AArch64/AArch64SchedNeoverseN3.td  | 26 ++++++++++--
 .../Target/AArch64/AArch64SchedNeoverseV1.td  | 22 +++++++++-
 .../Target/AArch64/AArch64SchedNeoverseV2.td  |  5 ++-
 .../Target/AArch64/AArch64SchedNeoverseV3.td  |  5 ++-
 .../AArch64/AArch64SchedNeoverseV3AE.td       |  5 ++-
 .../AArch64/Neoverse/N2-basic-instructions.s  | 22 +++++-----
 .../AArch64/Neoverse/N3-basic-instructions.s  | 30 ++++++-------
 .../AArch64/Neoverse/V1-basic-instructions.s  | 22 +++++-----
 .../AArch64/Neoverse/V1-zero-dependency.s     | 42 +++++++++----------
 .../AArch64/Neoverse/V2-basic-instructions.s  | 10 ++---
 .../AArch64/Neoverse/V2-zero-lat-movs.s       | 24 +++++------
 .../AArch64/Neoverse/V3-basic-instructions.s  | 10 ++---
 .../AArch64/Neoverse/V3-zero-lat-movs.s       | 24 +++++------
 .../Neoverse/V3AE-basic-instructions.s        | 10 ++---
 .../AArch64/Neoverse/V3AE-zero-lat-movs.s     | 24 +++++------
 16 files changed, 192 insertions(+), 119 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td
index 50f10114989d0..d1ce5a13d0510 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td
@@ -72,6 +72,13 @@ def : WriteRes<WriteLDHi,    []> { let Latency = 4; }
 // Define customized scheduler read/write types specific to the Neoverse N2.
 
 //===----------------------------------------------------------------------===//
+
+// Define generic 0 micro-op types
+def N2Write_0c : SchedWriteRes<[]> {
+    let Latency = 0;
+    let NumMicroOps = 0;
+}
+
 // Define generic 1 micro-op types
 
 def N2Write_1c_1B   : SchedWriteRes<[N2UnitB]>   { let Latency = 1; }
@@ -645,6 +652,21 @@ def N2Write_11c_9L01_9S_9V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
   let NumMicroOps = 27;
 }
 
+//===----------------------------------------------------------------------===//
+// Define predicate-controlled types
+
+def N2Write_0or1c_1I : SchedWriteVariant<[
+                      SchedVar<NeoverseZeroMove, [N2Write_0c]>,
+                      SchedVar<NoSchedPred,      [N2Write_1c_1I]>]>;
+
+def N2Write_0or2c_1V : SchedWriteVariant<[
+                      SchedVar<NeoverseZeroMove, [N2Write_0c]>,
+                      SchedVar<NoSchedPred,      [N2Write_2c_1V]>]>;
+
+def N2Write_0or3c_1M0 : SchedWriteVariant<[
+                      SchedVar<NeoverseZeroMove, [N2Write_0c]>,
+                      SchedVar<NoSchedPred,      [N2Write_3c_1M0]>]>;
+
 //===----------------------------------------------------------------------===//
 // Define types for arithmetic and logical ops with short shifts
 def N2Write_Arith : SchedWriteVariant<[
@@ -680,6 +702,7 @@ def : InstRW<[N2Write_1c_1B_1S], (instrs BL, BLR)>;
 // ALU, basic
 // ALU, basic, flagset
 def : SchedAlias<WriteI,     N2Write_1c_1I>;
+def : InstRW<[N2Write_0or1c_1I], (instregex "^MOVZ[WX]i$")>;
 
 // ALU, extend and shift
 def : SchedAlias<WriteIEReg, N2Write_2c_1M>;
@@ -691,7 +714,8 @@ def : SchedAlias<WriteISReg, N2Write_Arith>;
 
 // Logical, shift, no flagset
 def : InstRW<[N2Write_1c_1I],
-             (instregex "^(AND|BIC|EON|EOR|ORN|ORR)[WX]rs$")>;
+             (instregex "^(AND|BIC|EON|EOR|ORN)[WX]rs$")>;
+def : InstRW<[N2Write_0or1c_1I], (instregex "^ORR[WX]rs$")>;
 
 // Logical, shift, flagset
 def : InstRW<[N2Write_Logical], (instregex "^(AND|BIC)S[WX]rs$")>;
@@ -882,7 +906,7 @@ def : SchedAlias<WriteFImm, N2Write_2c_1V>;
 def : InstRW<[N2Write_2c_1V], (instrs FMOVHr, FMOVSr, FMOVDr)>;
 
 // FP transfer, from gen to low half of vec reg
-def : InstRW<[N2Write_3c_1M0], (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr,
+def : InstRW<[N2Write_0or3c_1M0], (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr,
                                         FMOVHWr, FMOVHXr, FMOVSWr, FMOVDXr)>;
 
 // FP transfer, from gen to high half of vec reg
@@ -1225,6 +1249,8 @@ def : InstRW<[N2Write_3c_1V0], (instrs BFCVT)>;
 // ASIMD unzip/zip
 // Handled by SchedAlias<WriteV[dq], ...>
 
+def : InstRW<[N2Write_0or2c_1V], (instrs MOVID, MOVIv2d_ns)>;
+
 // ASIMD duplicate, gen reg
 def : InstRW<[N2Write_3c_1M0], (instregex "^DUPv.+gpr")>;
 
diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseN3.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseN3.td
index 411b372a3f533..32d48ca66ee2d 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseN3.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseN3.td
@@ -553,6 +553,22 @@ def N3Write_16c_16V0 : SchedWriteRes<[N3UnitV0, N3UnitV0, N3UnitV0, N3UnitV0,
     let NumMicroOps = 16;
 }
 
+
+//===----------------------------------------------------------------------===//
+// Define predicate-controlled types
+
+def N3Write_0or1c_1I : SchedWriteVariant<[
+                      SchedVar<NeoverseZeroMove, [N3Write_0c]>,
+                      SchedVar<NoSchedPred,      [N3Write_1c_1I]>]>;
+
+def N3Write_0or2c_1V : SchedWriteVariant<[
+                      SchedVar<NeoverseZeroMove, [N3Write_0c]>,
+                      SchedVar<NoSchedPred,      [N3Write_2c_1V]>]>;
+
+def N3Write_0or3c_1M0 : SchedWriteVariant<[
+                      SchedVar<NeoverseZeroMove, [N3Write_0c]>,
+                      SchedVar<NoSchedPred,      [N3Write_3c_1M0]>]>;
+
 // Miscellaneous
 // -----------------------------------------------------------------------------
 
@@ -581,6 +597,7 @@ def : InstRW<[N3Write_1c_1B_1S], (instrs BL, BLR)>;
 // Conditional compare
 // Conditional select
 def : SchedAlias<WriteI, N3Write_1c_1I>;
+def : InstRW<[N3Write_0or1c_1I], (instregex "^MOVZ[WX]i$")>;
 
 // ALU, extend and shift
 def : SchedAlias<WriteIEReg, N3Write_2c_1M>;
@@ -610,7 +627,8 @@ def : InstRW<[N3Write_1c_1I], (instrs GMI, SUBP, SUBPS)>;
 
 // Logical, shift, no flagset
 def : InstRW<[N3Write_1c_1I],
-             (instregex "^(AND|BIC|EON|EOR|ORN|ORR)[WX]rs$")>;
+             (instregex "^(AND|BIC|EON|EOR|ORN)[WX]rs$")>;
+def : InstRW<[N3Write_0or1c_1I], (instregex "^ORR[WX]rs$")>;
 
 // Logical, shift, flagset
 def : InstRW<[N3Write_2c_1M], (instregex "^(AND|BIC)S[WX]rs$")>;
@@ -855,10 +873,11 @@ def : SchedAlias<WriteFCvt, N3Write_3c_1V0>;
 def : SchedAlias<WriteFImm, N3Write_2c_1V>;
 
 // FP move, register
-def : InstRW<[N3Write_2c_1V], (instrs FMOVHr, FMOVSr, FMOVDr)>;
+def : InstRW<[N3Write_2c_1V], (instrs FMOVHr)>;
+def : InstRW<[N3Write_0c], (instrs FMOVSr, FMOVDr)>;
 
 // FP transfer, from gen to low half of vec reg
-def : InstRW<[N3Write_3c_1M0], (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr)>;
+def : InstRW<[N3Write_0or3c_1M0], (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr)>;
 
 // FP transfer, from gen to high half of vec reg
 def : InstRW<[N3Write_5c_1M0_1V], (instrs FMOVXDHighr)>;
@@ -1186,6 +1205,7 @@ def : InstRW<[N3Write_3c_1V0], (instrs BFCVT)>;
 // ASIMD transpose
 // ASIMD unzip/zip
 // Covered by WriteV[dq]
+def : InstRW<[N3Write_0or2c_1V], (instrs MOVID, MOVIv2d_ns)>;
 
 // ASIMD duplicate, gen reg
 def : InstRW<[N3Write_3c_1M0], (instregex "^DUPv.+gpr")>;
diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td
index 3cbfc59423c9a..8d33ca22616c2 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td
@@ -472,6 +472,21 @@ def V1Write_11c_9L01_9S_9V : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01,
                                             V1UnitV, V1UnitV, V1UnitV,
                                             V1UnitV, V1UnitV, V1UnitV]>;
 
+//===----------------------------------------------------------------------===//
+// Define predicate-controlled types
+
+def V1Write_0or1c_1I : SchedWriteVariant<[
+                      SchedVar<NeoverseZeroMove, [V1Write_0c_0Z]>,
+                      SchedVar<NoSchedPred,      [V1Write_1c_1I]>]>;
+
+def V1Write_0or2c_1V : SchedWriteVariant<[
+                      SchedVar<NeoverseZeroMove, [V1Write_0c_0Z]>,
+                      SchedVar<NoSchedPred,      [V1Write_2c_1V]>]>;
+
+def V1Write_0or3c_1M0 : SchedWriteVariant<[
+                      SchedVar<NeoverseZeroMove, [V1Write_0c_0Z]>,
+                      SchedVar<NoSchedPred,      [V1Write_3c_1M0]>]>;
+
 //===----------------------------------------------------------------------===//
 // Define forwarded types
 
@@ -603,6 +618,7 @@ def : InstRW<[V1Write_1c_1I_1Flg],
                         "^(ADC|SBC)S[WX]r$",
                         "^ANDS[WX]ri$",
                         "^(AND|BIC)S[WX]rr$")>;
+def : InstRW<[V1Write_0or1c_1I], (instregex "^MOVZ[WX]i$")>;
 
 // ALU, extend and shift
 def : SchedAlias<WriteIEReg, V1Write_2c_1M>;
@@ -623,7 +639,8 @@ def               : InstRW<[V1WriteISRegS],
                            (instregex "^(ADD|SUB)S(([WX]r[sx])|Xrx64)$")>;
 
 // Logical, shift, no flagset
-def : InstRW<[V1Write_1c_1I], (instregex "^(AND|BIC|EON|EOR|ORN|ORR)[WX]rs$")>;
+def : InstRW<[V1Write_1c_1I], (instregex "^(AND|BIC|EON|EOR|ORN)[WX]rs$")>;
+def : InstRW<[V1Write_0or1c_1I], (instregex "^ORR[WX]rs$")>;
 
 // Logical, shift, flagset
 def : InstRW<[V1Write_2c_1M_1Flg], (instregex "^(AND|BIC)S[WX]rs$")>;
@@ -805,7 +822,7 @@ def : SchedAlias<WriteFImm, V1Write_2c_1V>;
 def : InstRW<[V1Write_2c_1V], (instrs FMOVHr, FMOVSr, FMOVDr)>;
 
 // FP transfer, from gen to low half of vec reg
-def : InstRW<[V1Write_3c_1M0], (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr)>;
+def : InstRW<[V1Write_0or3c_1M0], (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr)>;
 
 // FP transfer, from gen to high half of vec reg
 def : InstRW<[V1Write_5c_1M0_1V], (instrs FMOVXDHighr)>;
@@ -1122,6 +1139,7 @@ def : InstRW<[V1Write_3c_1V02], (instrs BFCVT)>;
 // ASIMD transpose
 // ASIMD unzip/zip
 // Covered by "SchedAlias (WriteV[dq]...)" above
+def : InstRW<[V1Write_0or2c_1V], (instrs MOVID, MOVIv2d_ns)>;
 
 // ASIMD duplicate, gen reg
 def : InstRW<[V1Write_3c_1M0],
diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
index 2387f176f3051..1ef087f07022d 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
@@ -94,7 +94,10 @@ def : WriteRes<WriteLDHi,    []> { let Latency = 4; }
 //===----------------------------------------------------------------------===//
 
 // Define generic 0 micro-op types
-def V2Write_0c : SchedWriteRes<[]> { let Latency = 0; }
+def V2Write_0c : SchedWriteRes<[]> {
+    let Latency = 0;
+    let NumMicroOps = 0;
+}
 
 // Define generic 1 micro-op types
 
diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV3.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV3.td
index e23576a20d277..3dd2988088f0b 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV3.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV3.td
@@ -94,7 +94,10 @@ def : WriteRes<WriteLDHi,    []> { let Latency = 4; }
 //===----------------------------------------------------------------------===//
 
 // Define generic 0 micro-op types
-def V3Write_0c : SchedWriteRes<[]> { let Latency = 0; }
+def V3Write_0c : SchedWriteRes<[]> {
+    let Latency = 0;
+    let NumMicroOps = 0;
+}
 
 // Define generic 1 micro-op types
 
diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV3AE.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV3AE.td
index 0f1ec669a4e5e..19b56260387e1 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV3AE.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV3AE.td
@@ -89,7 +89,10 @@ def : WriteRes<WriteLDHi,    []> { let Latency = 4; }
 //===----------------------------------------------------------------------===//
 
 // Define generic 0 micro-op types
-def V3AEWrite_0c : SchedWriteRes<[]> { let Latency = 0; }
+def V3AEWrite_0c : SchedWriteRes<[]> {
+    let Latency = 0;
+    let NumMicroOps = 0;
+}
 
 // Define generic 1 micro-op types
 
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-basic-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-basic-instructions.s
index cf1cf0e98c801..d3343ab055887 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-basic-instructions.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-basic-instructions.s
@@ -2508,14 +2508,14 @@ drps
 # CHECK-NEXT:  1      2     0.50                        bics	x3, xzr, x3, lsl #1
 # CHECK-NEXT:  1      2     0.50                        tst	w3, w7, lsl #31
 # CHECK-NEXT:  1      2     0.50                        tst	x2, x20, asr #2
-# CHECK-NEXT:  1      1     0.25                        mov	x3, x6
-# CHECK-NEXT:  1      1     0.25                        mov	x3, xzr
-# CHECK-NEXT:  1      1     0.25                        mov	wzr, w2
-# CHECK-NEXT:  1      1     0.25                        mov	w3, w5
+# CHECK-NEXT:  0      0     0.00                        mov	x3, x6
+# CHECK-NEXT:  0      0     0.00                        mov	x3, xzr
+# CHECK-NEXT:  0      0     0.00                        mov	wzr, w2
+# CHECK-NEXT:  0      0     0.00                        mov	w3, w5
 # CHECK-NEXT:  1      1     0.25                        movz	w2, #0, lsl #16
 # CHECK-NEXT:  1      1     0.25                        mov	w2, #-1235
 # CHECK-NEXT:  1      1     0.25                        mov	x2, #5299989643264
-# CHECK-NEXT:  1      1     0.25                        mov	x2, #0
+# CHECK-NEXT:  0      0     0.00                        mov	x2, #0
 # CHECK-NEXT:  1      1     0.25                        movk	w3, #0
 # CHECK-NEXT:  1      1     0.25                        movz	x4, #0, lsl #16
 # CHECK-NEXT:  1      1     0.25                        movk	w5, #0, lsl #16
@@ -2557,7 +2557,7 @@ drps
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0.0]  [0.1]  [1.0]  [1.1]  [2]    [3.0]  [3.1]  [4]    [5]    [6.0]  [6.1]  [7]    [8]
-# CHECK-NEXT: 11.00  11.00  33.00  33.00  87.33  151.33 151.33 517.00 251.00 162.50 162.50 215.50 85.50
+# CHECK-NEXT: 11.00  11.00  33.00  33.00  87.33  151.33 151.33 515.75 249.75 161.25 161.25 215.50 85.50
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0.0]  [0.1]  [1.0]  [1.1]  [2]    [3.0]  [3.1]  [4]    [5]    [6.0]  [6.1]  [7]    [8]    Instructions:
@@ -3692,14 +3692,14 @@ drps
 # CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50    -      -      -      -     bics	x3, xzr, x3, lsl #1
 # CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50    -      -      -      -     tst	w3, w7, lsl #31
 # CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50    -      -      -      -     tst	x2, x20, asr #2
-# CHECK-NEXT:  -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -      -     mov	x3, x6
-# CHECK-NEXT:  -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -      -     mov	x3, xzr
-# CHECK-NEXT:  -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -      -     mov	wzr, w2
-# CHECK-NEXT:  -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -      -     mov	w3, w5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -     mov	x3, x6
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -     mov	x3, xzr
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -     mov	wzr, w2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -     mov	w3, w5
 # CHECK-NEXT:  -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -      -     movz	w2, #0, lsl #16
 # CHECK-NEXT:  -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -      -     mov	w2, #-1235
 # CHECK-NEXT:  -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -      -     mov	x2, #5299989643264
-# CHECK-NEXT:  -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -      -     mov	x2, #0
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -     mov	x2, #0
 # CHECK-NEXT:  -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -      -     movk	w3, #0
 # CHECK-NEXT:  -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -      -     movz	x4, #0, lsl #16
 # CHECK-NEXT:  -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -      -     movk	w5, #0, lsl #16
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/N3-basic-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/N3-basic-instructions.s
index b9758280e2491..f7311b5e41b2e 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/N3-basic-instructions.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/N3-basic-instructions.s
@@ -1888,7 +1888,7 @@ drps
 # CHECK-NEXT:  1      2     0.50                        fccmpe	d31, d5, #7, ne
 # CHECK-NEXT:  1      2     0.50                        fcsel	s3, s20, s9, pl
 # CHECK-NEXT:  1      2     0.50                        fcsel	d9, d10, d11, mi
-# CHECK-NEXT:  1      2     0.50                        fmov	s0, s1
+# CHECK-NEXT:  0      0     0.00                        fmov	s0, s1
 # CHECK-NEXT:  1      2     0.50                        fabs	s2, s3
 # CHECK-NEXT:  1      2     0.50                        fneg	s4, s5
 # CHECK-NEXT:  1      7     1.00                        fsqrt	s6, s7
@@ -1901,7 +1901,7 @@ drps
 # CHECK-NEXT:  1      3     1.00                        frinta	s20, s21
 # CHECK-NEXT:  1      3     1.00                        frintx	s22, s23
 # CHECK-NEXT:  1      3     1.00                        frinti	s24, s25
-# CHECK-NEXT:  1      2     0.50                        fmov	d0, d1
+# CHECK-NEXT:  0      0     0.00                        fmov	d0, d1
 # CHECK-NEXT:  1      2     0.50                        fabs	d2, d3
 # CHECK-NEXT:  1      2     0.50                        fneg	d4, d5
 # CHECK-NEXT:  1      12    1.00                        fsqrt	d6, d7
@@ -2508,14 +2508,14 @@ drps
 # CHECK-NEXT:  1      2     0.50                        bics	x3, xzr, x3, lsl #1
 # CHECK-NEXT:  1      2     0.50                        tst	w3, w7, lsl #31
 # CHECK-NEXT:  1      2     0.50                        tst	x2, x20, asr #2
-# CHECK-NEXT:  1      1     0.25                        mov	x3, x6
-# CHECK-NEXT:  1      1     0.25                        mov	x3, xzr
-# CHECK-NEXT:  1      1     0.25                        mov	wzr, w2
-# CHECK-NEXT:  1      1     0.25                        mov	w3, w5
+# CHECK-NEXT:  0      0     0.00                        mov	x3, x6
+# CHECK-NEXT:  0      0     0.00                        mov	x3, xzr
+# CHECK-NEXT:  0      0     0.00                        mov	wzr, w2
+# CHECK-NEXT:  0      0     0.00                        mov	w3, w5
 # CHECK-NEXT:  1      1     0.25                        movz	w2, #0, lsl #16
 # CHECK-NEXT:  1      1     0.25                        mov	w2, #-1235
 # CHECK-NEXT:  1      1     0.25                        mov	x2, #5299989643264
-# CHECK-NEXT:  1      1     0.25                        mov	x2, #0
+# CHECK-NEXT:  0      0     0.00                        mov	x2, #0
 # CHECK-NEXT:  1      1     0.25                        movk	w3, #0
 # CHECK-NEXT:  1      1     0.25                        movz	x4, #0, lsl #16
 # CHECK-NEXT:  1      1     0.25                        movk	w5, #0, lsl #16
@@ -2557,7 +2557,7 @@ drps
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0.0]  [0.1]  [1.0]  [1.1]  [2]    [3.0]  [3.1]  [4]    [5]    [6.0]  [6.1]  [7]    [8]
-# CHECK-NEXT: 11.00  11.00  33.00  33.00  99.33  163.33 163.33 357.75 212.75 156.25 156.25 184.50 64.50
+# CHECK-NEXT: 11.00  11.00  33.00  33.00  99.33  163.33 163.33 356.50 211.50 155.00 155.00 183.50 63.50
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0.0]  [0.1]  [1.0]  [1.1]  [2]    [3.0]  [3.1]  [4]    [5]    [6.0]  [6.1]  [7]    [8]    Instructions:
@@ -3072,7 +3072,7 @@ drps
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     0.50   0.50   fccmpe	d31, d5, #7, ne
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     0.50   0.50   fcsel	s3, s20, s9, pl
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     0.50   0.50   fcsel	d9, d10, d11, mi
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     0.50   0.50   fmov	s0, s1
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -     fmov	s0, s1
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     0.50   0.50   fabs	s2, s3
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     0.50   0.50   fneg	s4, s5
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     1.00    -     fsqrt	s6, s7
@@ -3085,7 +3085,7 @@ drps
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     1.00    -     frinta	s20, s21
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     1.00    -     frintx	s22, s23
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     1.00    -     frinti	s24, s25
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     0.50   0.50   fmov	d0, d1
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -     fmov	d0, d1
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     0.50   0.50   fabs	d2, d3
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     0.50   0.50   fneg	d4, d5
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     1.00    -     fsqrt	d6, d7
@@ -3692,14 +3692,14 @@ drps
 # CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50    -      -      -      -     bics	x3, xzr, x3, lsl #1
 # CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50    -      -      -      -     tst	w3, w7, lsl #31
 # CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50    -      -      -      -     tst	x2, x20, asr #2
-# CHECK-NEXT:  -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -      -     mov	x3, x6
-# CHECK-NEXT:  -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -      -     mov	x3, xzr
-# CHECK-NEXT:  -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -      -     mov	wzr, w2
-# CHECK-NEXT:  -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -      -     mov	w3, w5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -     mov	x3, x6
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -     mov	x3, xzr
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -     mov	wzr, w2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -     mov	w3, w5
 # CHECK-NEXT:  -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -      -     movz	w2, #0, lsl #16
 # CHECK-NEXT:  -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -      -     mov	w2, #-1235
 # CHECK-NEXT:  -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -      -     mov	x2, #5299989643264
-# CHECK-NEXT:  -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -      -     mov	x2, #0
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -     mov	x2, #0
 # CHECK-NEXT:  -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -      -     movk	w3, #0
 # CHECK-NEXT:  -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -      -     movz	x4, #0, lsl #16
 # CHECK-NEXT:  -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -      -     movk	w5, #0, lsl #16
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-basic-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-basic-instructions.s
index eddc3e565c353..f75222f27a94a 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-basic-instructions.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-basic-instructions.s
@@ -2673,14 +2673,14 @@ drps
 # CHECK-NEXT:  1      2     0.50                        bics	x3, xzr, x3, lsl #1
 # CHECK-NEXT:  1      2     0.50                        tst	w3, w7, lsl #31
 # CHECK-NEXT:  1      2     0.50                        tst	x2, x20, asr #2
-# CHECK-NEXT:  1      1     0.25                        mov	x3, x6
-# CHECK-NEXT:  1      1     0.25                        mov	x3, xzr
-# CHECK-NEXT:  1      1     0.25                        mov	wzr, w2
-# CHECK-NEXT:  1      1     0.25                        mov	w3, w5
+# CHECK-NEXT:  0      0     0.00                        mov	x3, x6
+# CHECK-NEXT:  0      0     0.00                        mov	x3, xzr
+# CHECK-NEXT:  0      0     0.00                        mov	wzr, w2
+# CHECK-NEXT:  0      0     0.00                        mov	w3, w5
 # CHECK-NEXT:  1      1     0.25                        movz	w2, #0, lsl #16
 # CHECK-NEXT:  1      1     0.25                        mov	w2, #-1235
 # CHECK-NEXT:  1      1     0.25                        mov	x2, #5299989643264
-# CHECK-NEXT:  1      1     0.25                        mov	x2, #0
+# CHECK-NEXT:  0      0     0.00                        mov	x2, #0
 # CHECK-NEXT:  1      1     0.25                        movk	w3, #0
 # CHECK-NEXT:  1      1     0.25                        movz	x4, #0, lsl #16
 # CHECK-NEXT:  1      1     0.25                        movk	w5, #0, lsl #16
@@ -2731,7 +2731,7 @@ drps
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0.0]  [0.1]  [1.0]  [1.1]  [2.0]  [2.1]  [2.2]  [3]    [4.0]  [4.1]  [5]    [6]    [7.0]  [7.1]  [8]    [9]    [10]   [11]
-# CHECK-NEXT: 13.00  13.00  40.50  40.50  48.00  48.00  48.00  96.67  175.17 175.17 322.50 209.50 142.00 142.00 189.00 55.50  65.50  13.00
+# CHECK-NEXT: 13.00  13.00  40.50  40.50  48.00  48.00  48.00  96.67  175.17 175.17 321.25 208.25 140.75 140.75 189.00 55.50  65.50  13.00
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0.0]  [0.1]  [1.0]  [1.1]  [2.0]  [2.1]  [2.2]  [3]    [4.0]  [4.1]  [5]    [6]    [7.0]  [7.1]  [8]    [9]    [10]   [11]   Instructions:
@@ -3944,14 +3944,14 @@ drps
 # CHECK-NEXT:  -      -      -      -     0.33   0.33   0.33    -      -      -     0.50   0.50    -      -      -      -      -      -     bics	x3, xzr, x3, lsl #1
 # CHECK-NEXT:  -      -      -      -     0.33   0.33   0.33    -      -      -     0.50   0.50    -      -      -      -      -      -     tst	w3, w7, lsl #31
 # CHECK-NEXT:  -      -      -      -     0.33   0.33   0.33    -      -      -     0.50   0.50    -      -      -      -      -      -     tst	x2, x20, asr #2
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -      -      -      -     mov	x3, x6
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -      -      -      -     mov	x3, xzr
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -      -      -      -     mov	wzr, w2
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -      -      -      -     mov	w3, w5
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     mov	x3, x6
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     mov	x3, xzr
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     mov	wzr, w2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     mov	w3, w5
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -      -      -      -     movz	w2, #0, lsl #16
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -      -      -      -     mov	w2, #-1235
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -      -      -      -     mov	x2, #5299989643264
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -      -      -      -     mov	x2, #0
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     mov	x2, #0
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -      -      -      -     movk	w3, #0
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -      -      -      -     movz	x4, #0, lsl #16
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     0.25   0.25   0.25   0.25    -      -      -      -     movk	w5, #0, lsl #16
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-zero-dependency.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-zero-dependency.s
index 3954cbd8c5490..7767b95ff98ea 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-zero-dependency.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-zero-dependency.s
@@ -6,13 +6,13 @@ cmp x0, #4
 
 # CHECK:      Iterations:        100
 # CHECK-NEXT: Instructions:      200
-# CHECK-NEXT: Total Cycles:      54
-# CHECK-NEXT: Total uOps:        200
+# CHECK-NEXT: Total Cycles:      37
+# CHECK-NEXT: Total uOps:        100
 
 # CHECK:      Dispatch Width:    8
-# CHECK-NEXT: uOps Per Cycle:    3.70
-# CHECK-NEXT: IPC:               3.70
-# CHECK-NEXT: Block RThroughput: 0.5
+# CHECK-NEXT: uOps Per Cycle:    2.70
+# CHECK-NEXT: IPC:               5.41
+# CHECK-NEXT: Block RThroughput: 0.3
 
 # CHECK:      Instruction Info:
 # CHECK-NEXT: [1]: #uOps
@@ -23,7 +23,7 @@ cmp x0, #4
 # CHECK-NEXT: [6]: HasSideEffects (U)
 
 # CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
-# CHECK-NEXT:  1      1     0.25                        mov	x0, x1
+# CHECK-NEXT:  0      0     0.00                        mov	x0, x1
 # CHECK-NEXT:  1      1     0.33                        cmp	x0, #4
 
 # CHECK:      Resources:
@@ -48,24 +48,24 @@ cmp x0, #4
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0.0]  [0.1]  [1.0]  [1.1]  [2.0]  [2.1]  [2.2]  [3]    [4.0]  [4.1]  [5]    [6]    [7.0]  [7.1]  [8]    [9]    [10]   [11]
-# CHECK-NEXT:  -      -      -      -     0.33   0.33   0.34    -      -      -     0.50   0.50   0.50   0.50    -      -      -      -
+# CHECK-NEXT:  -      -      -      -     0.33   0.33   0.34    -      -      -     0.22   0.22   0.28   0.28    -      -      -      -
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0.0]  [0.1]  [1.0]  [1.1]  [2.0]  [2.1]  [2.2]  [3]    [4.0]  [4.1]  [5]    [6]    [7.0]  [7.1]  [8]    [9]    [10]   [11]   Instructions:
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -     0.49   0.49   0.01   0.01    -      -      -      -     mov	x0, x1
-# CHECK-NEXT:  -      -      -      -     0.33   0.33   0.34    -      -      -     0.01   0.01   0.49   0.49    -      -      -      -     cmp	x0, #4
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -      -     mov	x0, x1
+# CHECK-NEXT:  -      -      -      -     0.33   0.33   0.34    -      -      -     0.22   0.22   0.28   0.28    -      -      -      -     cmp	x0, #4
 
 # CHECK:      Timeline view:
-# CHECK-NEXT: Index     012345
+# CHECK-NEXT: Index     01234
 
-# CHECK:      [0,0]     DeER .   mov	x0, x1
-# CHECK-NEXT: [0,1]     D=eER.   cmp	x0, #4
-# CHECK-NEXT: [1,0]     DeE-R.   mov	x0, x1
-# CHECK-NEXT: [1,1]     D=eER.   cmp	x0, #4
-# CHECK-NEXT: [2,0]     DeE-R.   mov	x0, x1
-# CHECK-NEXT: [2,1]     D=eER.   cmp	x0, #4
-# CHECK-NEXT: [3,0]     DeE-R.   mov	x0, x1
-# CHECK-NEXT: [3,1]     D==eER   cmp	x0, #4
+# CHECK:      [0,0]     DR  .   mov	x0, x1
+# CHECK-NEXT: [0,1]     DeER.   cmp	x0, #4
+# CHECK-NEXT: [1,0]     D--R.   mov	x0, x1
+# CHECK-NEXT: [1,1]     DeER.   cmp	x0, #4
+# CHECK-NEXT: [2,0]     D--R.   mov	x0, x1
+# CHECK-NEXT: [2,1]     DeER.   cmp	x0, #4
+# CHECK-NEXT: [3,0]     D--R.   mov	x0, x1
+# CHECK-NEXT: [3,1]     D=eER   cmp	x0, #4
 
 # CHECK:      Average Wait times (based on the timeline view):
 # CHECK-NEXT: [0]: Executions
@@ -74,6 +74,6 @@ cmp x0, #4
 # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
 
 # CHECK:            [0]    [1]    [2]    [3]
-# CHECK-NEXT: 0.     4     1.0    1.0    0.8       mov	x0, x1
-# CHECK-NEXT: 1.     4     2.3    0.3    0.0       cmp	x0, #4
-# CHECK-NEXT:        4     1.6    0.6    0.4       <total>
+# CHECK-NEXT: 0.     4     0.0    0.0    1.5       mov	x0, x1
+# CHECK-NEXT: 1.     4     1.3    1.3    0.0       cmp	x0, #4
+# CHECK-NEXT:        4     0.6    0.6    0.8       <total>
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-basic-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-basic-instructions.s
index 54b5f1644be48..9c987d54d2350 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-basic-instructions.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-basic-instructions.s
@@ -2536,14 +2536,14 @@ drps
 # CHECK-NEXT:  1      2     0.50                        bics	x3, xzr, x3, lsl #1
 # CHECK-NEXT:  1      2     0.50                        tst	w3, w7, lsl #31
 # CHECK-NEXT:  1      2     0.50                        tst	x2, x20, asr #2
-# CHECK-NEXT:  1      0     0.17                        mov	x3, x6
-# CHECK-NEXT:  1      0     0.17                        mov	x3, xzr
-# CHECK-NEXT:  1      0     0.17                        mov	wzr, w2
-# CHECK-NEXT:  1      0     0.17                        mov	w3, w5
+# CHECK-NEXT:  0      0     0.00                        mov	x3, x6
+# CHECK-NEXT:  0      0     0.00                        mov	x3, xzr
+# CHECK-NEXT:  0      0     0.00                        mov	wzr, w2
+# CHECK-NEXT:  0      0     0.00                        mov	w3, w5
 # CHECK-NEXT:  1      1     0.17                        movz	w2, #0, lsl #16
 # CHECK-NEXT:  1      1     0.17                        mov	w2, #-1235
 # CHECK-NEXT:  1      1     0.17                        mov	x2, #5299989643264
-# CHECK-NEXT:  1      0     0.17                        mov	x2, #0
+# CHECK-NEXT:  0      0     0.00                        mov	x2, #0
 # CHECK-NEXT:  1      1     0.17                        movk	w3, #0
 # CHECK-NEXT:  1      1     0.17                        movz	x4, #0, lsl #16
 # CHECK-NEXT:  1      1     0.17                        movk	w5, #0, lsl #16
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-zero-lat-movs.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-zero-lat-movs.s
index 3ddb525327015..1cec5897db425 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-zero-lat-movs.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-zero-lat-movs.s
@@ -23,18 +23,18 @@ mov  x1, x2
 # CHECK-NEXT: [6]: HasSideEffects (U)
 
 # CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
-# CHECK-NEXT:  1      0     0.17                        mov	x1, #0
-# CHECK-NEXT:  1      0     0.17                        mov	x1, xzr
-# CHECK-NEXT:  1      0     0.17                        mov	w1, #0
-# CHECK-NEXT:  1      0     0.17                        mov	w1, wzr
-# CHECK-NEXT:  1      0     0.17                        fmov	h1, wzr
-# CHECK-NEXT:  1      0     0.17                        fmov	h1, xzr
-# CHECK-NEXT:  1      0     0.17                        fmov	s1, wzr
-# CHECK-NEXT:  1      0     0.17                        fmov	d1, xzr
-# CHECK-NEXT:  1      0     0.17                        movi	d1, #0000000000000000
-# CHECK-NEXT:  1      0     0.17                        movi	v1.2d, #0000000000000000
-# CHECK-NEXT:  1      0     0.17                        mov	w1, w2
-# CHECK-NEXT:  1      0     0.17                        mov	x1, x2
+# CHECK-NEXT:  0      0     0.00                        mov	x1, #0
+# CHECK-NEXT:  0      0     0.00                        mov	x1, xzr
+# CHECK-NEXT:  0      0     0.00                        mov	w1, #0
+# CHECK-NEXT:  0      0     0.00                        mov	w1, wzr
+# CHECK-NEXT:  0      0     0.00                        fmov	h1, wzr
+# CHECK-NEXT:  0      0     0.00                        fmov	h1, xzr
+# CHECK-NEXT:  0      0     0.00                        fmov	s1, wzr
+# CHECK-NEXT:  0      0     0.00                        fmov	d1, xzr
+# CHECK-NEXT:  0      0     0.00                        movi	d1, #0000000000000000
+# CHECK-NEXT:  0      0     0.00                        movi	v1.2d, #0000000000000000
+# CHECK-NEXT:  0      0     0.00                        mov	w1, w2
+# CHECK-NEXT:  0      0     0.00                        mov	x1, x2
 
 # CHECK:      Resources:
 # CHECK-NEXT: [0.0] - V2UnitB
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3-basic-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3-basic-instructions.s
index 73fd95d6e4a5b..67af391e52863 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3-basic-instructions.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3-basic-instructions.s
@@ -2536,14 +2536,14 @@ drps
 # CHECK-NEXT:  1      2     0.50                        bics	x3, xzr, x3, lsl #1
 # CHECK-NEXT:  1      2     0.50                        tst	w3, w7, lsl #31
 # CHECK-NEXT:  1      2     0.50                        tst	x2, x20, asr #2
-# CHECK-NEXT:  1      0     0.10                        mov	x3, x6
-# CHECK-NEXT:  1      0     0.10                        mov	x3, xzr
-# CHECK-NEXT:  1      0     0.10                        mov	wzr, w2
-# CHECK-NEXT:  1      0     0.10                        mov	w3, w5
+# CHECK-NEXT:  0      0     0.00                        mov	x3, x6
+# CHECK-NEXT:  0      0     0.00                        mov	x3, xzr
+# CHECK-NEXT:  0      0     0.00                        mov	wzr, w2
+# CHECK-NEXT:  0      0     0.00                        mov	w3, w5
 # CHECK-NEXT:  1      1     0.13                        movz	w2, #0, lsl #16
 # CHECK-NEXT:  1      1     0.13                        mov	w2, #-1235
 # CHECK-NEXT:  1      1     0.13                        mov	x2, #5299989643264
-# CHECK-NEXT:  1      0     0.10                        mov	x2, #0
+# CHECK-NEXT:  0      0     0.00                        mov	x2, #0
 # CHECK-NEXT:  1      1     0.13                        movk	w3, #0
 # CHECK-NEXT:  1      1     0.13                        movz	x4, #0, lsl #16
 # CHECK-NEXT:  1      1     0.13                        movk	w5, #0, lsl #16
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3-zero-lat-movs.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3-zero-lat-movs.s
index 1eef230b8174e..9b4834b12a79b 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3-zero-lat-movs.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3-zero-lat-movs.s
@@ -23,18 +23,18 @@ mov  x1, x2
 # CHECK-NEXT: [6]: HasSideEffects (U)
 
 # CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
-# CHECK-NEXT:  1      0     0.10                        mov	x1, #0
-# CHECK-NEXT:  1      0     0.10                        mov	x1, xzr
-# CHECK-NEXT:  1      0     0.10                        mov	w1, #0
-# CHECK-NEXT:  1      0     0.10                        mov	w1, wzr
-# CHECK-NEXT:  1      0     0.10                        fmov	h1, wzr
-# CHECK-NEXT:  1      0     0.10                        fmov	h1, xzr
-# CHECK-NEXT:  1      0     0.10                        fmov	s1, wzr
-# CHECK-NEXT:  1      0     0.10                        fmov	d1, xzr
-# CHECK-NEXT:  1      0     0.10                        movi	d1, #0000000000000000
-# CHECK-NEXT:  1      0     0.10                        movi	v1.2d, #0000000000000000
-# CHECK-NEXT:  1      0     0.10                        mov	w1, w2
-# CHECK-NEXT:  1      0     0.10                        mov	x1, x2
+# CHECK-NEXT:  0      0     0.00                        mov	x1, #0
+# CHECK-NEXT:  0      0     0.00                        mov	x1, xzr
+# CHECK-NEXT:  0      0     0.00                        mov	w1, #0
+# CHECK-NEXT:  0      0     0.00                        mov	w1, wzr
+# CHECK-NEXT:  0      0     0.00                        fmov	h1, wzr
+# CHECK-NEXT:  0      0     0.00                        fmov	h1, xzr
+# CHECK-NEXT:  0      0     0.00                        fmov	s1, wzr
+# CHECK-NEXT:  0      0     0.00                        fmov	d1, xzr
+# CHECK-NEXT:  0      0     0.00                        movi	d1, #0000000000000000
+# CHECK-NEXT:  0      0     0.00                        movi	v1.2d, #0000000000000000
+# CHECK-NEXT:  0      0     0.00                        mov	w1, w2
+# CHECK-NEXT:  0      0     0.00                        mov	x1, x2
 
 # CHECK:      Resources:
 # CHECK-NEXT: [0.0] - V3UnitB
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3AE-basic-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3AE-basic-instructions.s
index 7ab2be5eaa365..5009ce1d54a86 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3AE-basic-instructions.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3AE-basic-instructions.s
@@ -2536,14 +2536,14 @@ drps
 # CHECK-NEXT:  1      2     0.50                        bics	x3, xzr, x3, lsl #1
 # CHECK-NEXT:  1      2     0.50                        tst	w3, w7, lsl #31
 # CHECK-NEXT:  1      2     0.50                        tst	x2, x20, asr #2
-# CHECK-NEXT:  1      0     0.10                        mov	x3, x6
-# CHECK-NEXT:  1      0     0.10                        mov	x3, xzr
-# CHECK-NEXT:  1      0     0.10                        mov	wzr, w2
-# CHECK-NEXT:  1      0     0.10                        mov	w3, w5
+# CHECK-NEXT:  0      0     0.00                        mov	x3, x6
+# CHECK-NEXT:  0      0     0.00                        mov	x3, xzr
+# CHECK-NEXT:  0      0     0.00                        mov	wzr, w2
+# CHECK-NEXT:  0      0     0.00                        mov	w3, w5
 # CHECK-NEXT:  1      1     0.13                        movz	w2, #0, lsl #16
 # CHECK-NEXT:  1      1     0.13                        mov	w2, #-1235
 # CHECK-NEXT:  1      1     0.13                        mov	x2, #5299989643264
-# CHECK-NEXT:  1      0     0.10                        mov	x2, #0
+# CHECK-NEXT:  0      0     0.00                        mov	x2, #0
 # CHECK-NEXT:  1      1     0.13                        movk	w3, #0
 # CHECK-NEXT:  1      1     0.13                        movz	x4, #0, lsl #16
 # CHECK-NEXT:  1      1     0.13                        movk	w5, #0, lsl #16
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3AE-zero-lat-movs.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3AE-zero-lat-movs.s
index a0840dcddcbab..783bea288b121 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3AE-zero-lat-movs.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3AE-zero-lat-movs.s
@@ -23,18 +23,18 @@ mov  x1, x2
 # CHECK-NEXT: [6]: HasSideEffects (U)
 
 # CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
-# CHECK-NEXT:  1      0     0.10                        mov	x1, #0
-# CHECK-NEXT:  1      0     0.10                        mov	x1, xzr
-# CHECK-NEXT:  1      0     0.10                        mov	w1, #0
-# CHECK-NEXT:  1      0     0.10                        mov	w1, wzr
-# CHECK-NEXT:  1      0     0.10                        fmov	h1, wzr
-# CHECK-NEXT:  1      0     0.10                        fmov	h1, xzr
-# CHECK-NEXT:  1      0     0.10                        fmov	s1, wzr
-# CHECK-NEXT:  1      0     0.10                        fmov	d1, xzr
-# CHECK-NEXT:  1      0     0.10                        movi	d1, #0000000000000000
-# CHECK-NEXT:  1      0     0.10                        movi	v1.2d, #0000000000000000
-# CHECK-NEXT:  1      0     0.10                        mov	w1, w2
-# CHECK-NEXT:  1      0     0.10                        mov	x1, x2
+# CHECK-NEXT:  0      0     0.00                        mov	x1, #0
+# CHECK-NEXT:  0      0     0.00                        mov	x1, xzr
+# CHECK-NEXT:  0      0     0.00                        mov	w1, #0
+# CHECK-NEXT:  0      0     0.00                        mov	w1, wzr
+# CHECK-NEXT:  0      0     0.00                        fmov	h1, wzr
+# CHECK-NEXT:  0      0     0.00                        fmov	h1, xzr
+# CHECK-NEXT:  0      0     0.00                        fmov	s1, wzr
+# CHECK-NEXT:  0      0     0.00                        fmov	d1, xzr
+# CHECK-NEXT:  0      0     0.00                        movi	d1, #0000000000000000
+# CHECK-NEXT:  0      0     0.00                        movi	v1.2d, #0000000000000000
+# CHECK-NEXT:  0      0     0.00                        mov	w1, w2
+# CHECK-NEXT:  0      0     0.00                        mov	x1, x2
 
 # CHECK:      Resources:
 # CHECK-NEXT: [0.0] - V3AEUnitB

>From 695b27d6540de00f9feb943744715758607380ff Mon Sep 17 00:00:00 2001
From: Simon Wallis <simon.wallis2 at arm.com>
Date: Thu, 30 Oct 2025 11:29:34 +0000
Subject: [PATCH 2/7] [AArch64] Update zero latency instructions in scheduling
 tables for Neoverse cores

NeoverseZeroMove was introduced for Neoverse-V2 and was added to V3 and V3AE.
Use NeoverseZeroMove for Neoverse-V1, N2, N3 in the same way, including these instructions:
MOV Xd|Wd, #0|XZR|WZR

For all Neoverse targets, the following instructions are also decoded as not utilizing the scheduling and execution resources of the machine:
MOV Wd,Wn
MOV Xd,Xn

For Neoverse-N3 only, these instructions also have zero latency
FMOV Dd, Dn
FMOV Sd, Sn

Change-Id: I955cfe3efc689bea305a708eb6d7259dced6fe04
---
 llvm/test/CodeGen/AArch64/pr164181.ll | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/test/CodeGen/AArch64/pr164181.ll b/llvm/test/CodeGen/AArch64/pr164181.ll
index 4ec63ecb2eeb4..987c92b084001 100644
--- a/llvm/test/CodeGen/AArch64/pr164181.ll
+++ b/llvm/test/CodeGen/AArch64/pr164181.ll
@@ -52,11 +52,11 @@ define void @f(i1 %var_0, i16 %var_1, i64 %var_2, i8 %var_3, i16 %var_4, i1 %var
 ; CHECK-NEXT:    mov x10, xzr
 ; CHECK-NEXT:    mov w23, wzr
 ; CHECK-NEXT:    mov w30, wzr
-; CHECK-NEXT:    ldrb w19, [sp, #240]
 ; CHECK-NEXT:    mov w25, wzr
 ; CHECK-NEXT:    mov x24, xzr
-; CHECK-NEXT:    str w8, [sp, #108] // 4-byte Folded Spill
 ; CHECK-NEXT:    mov x3, x26
+; CHECK-NEXT:    str w8, [sp, #108] // 4-byte Folded Spill
+; CHECK-NEXT:    ldrb w19, [sp, #240]
 ; CHECK-NEXT:    ldp x9, x8, [sp, #344]
 ; CHECK-NEXT:    str w12, [sp, #92] // 4-byte Folded Spill
 ; CHECK-NEXT:    mov w12, #1 // =0x1
@@ -123,8 +123,8 @@ define void @f(i1 %var_0, i16 %var_1, i64 %var_2, i8 %var_3, i16 %var_4, i1 %var
 ; CHECK-NEXT:    mov x12, #-30 // =0xffffffffffffffe2
 ; CHECK-NEXT:    add x19, x4, w8, sxtw #2
 ; CHECK-NEXT:    mov x9, xzr
-; CHECK-NEXT:    csel x12, x24, x12, lo
 ; CHECK-NEXT:    mov w4, w30
+; CHECK-NEXT:    csel x12, x24, x12, lo
 ; CHECK-NEXT:    str x12, [sp, #56] // 8-byte Folded Spill
 ; CHECK-NEXT:    b .LBB0_8
 ; CHECK-NEXT:    .p2align 5, , 16
@@ -341,8 +341,8 @@ define void @f(i1 %var_0, i16 %var_1, i64 %var_2, i8 %var_3, i16 %var_4, i1 %var
 ; CHECK-NEXT:    mov x24, x27
 ; CHECK-NEXT:    lsl x23, x14, #1
 ; CHECK-NEXT:    mov x27, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    madd x14, x14, x3, x11
 ; CHECK-NEXT:    mov w28, w30
+; CHECK-NEXT:    madd x14, x14, x3, x11
 ; CHECK-NEXT:    mov w3, #-7680 // =0xffffe200
 ; CHECK-NEXT:    b .LBB0_39
 ; CHECK-NEXT:    .p2align 5, , 16

>From 9cebca2344bd9e41b51a1b6ad8ed44c0682d1fe4 Mon Sep 17 00:00:00 2001
From: Simon Wallis <simon.wallis2 at arm.com>
Date: Fri, 7 Nov 2025 09:30:55 +0000
Subject: [PATCH 3/7] [AArch64] Update zero latency instructions in Neoverse
 scheduling tables

NeoverseZeroMove was introduced for Neoverse-V2 and was added to V3 and V3AE.
Use NeoverseZeroMove for Neoverse-V1, N2, N3 in the same way, including these instructions:
MOV Xd|Wd, #0|XZR|WZR

For all Neoverse targets, the following instructions are also decoded as not utilizing the scheduling and execution resources of the machine:
MOV Wd,Wn
MOV Xd,Xn

For Neoverse-N3 only, these instructions also have zero latency
FMOV Dd, Dn
FMOV Sd, Sn

Change-Id: Ie6b1f5c3f4d74f26bdd4c67c5e6c5acf6a8e00cc
---
 llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td      |  3 +--
 .../llvm-mca/AArch64/Neoverse/N2-basic-instructions.s  | 10 +++++-----
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td
index d1ce5a13d0510..b4690e6045f85 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td
@@ -906,8 +906,7 @@ def : SchedAlias<WriteFImm, N2Write_2c_1V>;
 def : InstRW<[N2Write_2c_1V], (instrs FMOVHr, FMOVSr, FMOVDr)>;
 
 // FP transfer, from gen to low half of vec reg
-def : InstRW<[N2Write_0or3c_1M0], (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr,
-                                        FMOVHWr, FMOVHXr, FMOVSWr, FMOVDXr)>;
+def : InstRW<[N2Write_0or3c_1M0], (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr)>;
 
 // FP transfer, from gen to high half of vec reg
 def : InstRW<[N2Write_5c_1M0_1V], (instrs FMOVXDHighr)>;
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-basic-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-basic-instructions.s
index d3343ab055887..b140e9a9927ff 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-basic-instructions.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-basic-instructions.s
@@ -2086,9 +2086,9 @@ drps
 # CHECK-NEXT:  1      3     0.50                        fcvtas	x27, d28
 # CHECK-NEXT:  1      3     0.50                        fcvtau	w29, d30
 # CHECK-NEXT:  1      3     0.50                        fcvtau	xzr, d0
-# CHECK-NEXT:  1      3     3.00                        fmov	w3, s9
+# CHECK-NEXT:  1      2     0.50                        fmov	w3, s9
 # CHECK-NEXT:  1      3     3.00                        fmov	s9, w3
-# CHECK-NEXT:  1      3     3.00                        fmov	x20, d31
+# CHECK-NEXT:  1      2     0.50                        fmov	x20, d31
 # CHECK-NEXT:  1      3     3.00                        fmov	d1, x15
 # CHECK-NEXT:  1      2     0.50                        fmov	x3, v12.d[1]
 # CHECK-NEXT:  2      5     1.00                        fmov	v1.d[1], x19
@@ -2557,7 +2557,7 @@ drps
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0.0]  [0.1]  [1.0]  [1.1]  [2]    [3.0]  [3.1]  [4]    [5]    [6.0]  [6.1]  [7]    [8]
-# CHECK-NEXT: 11.00  11.00  33.00  33.00  87.33  151.33 151.33 515.75 249.75 161.25 161.25 215.50 85.50
+# CHECK-NEXT: 11.00  11.00  33.00  33.00  87.33  151.33 151.33 509.75 249.75 161.25 161.25 216.50 86.50
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0.0]  [0.1]  [1.0]  [1.1]  [2]    [3.0]  [3.1]  [4]    [5]    [6.0]  [6.1]  [7]    [8]    Instructions:
@@ -3270,9 +3270,9 @@ drps
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     0.50   0.50   fcvtas	x27, d28
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     0.50   0.50   fcvtau	w29, d30
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     0.50   0.50   fcvtau	xzr, d0
-# CHECK-NEXT:  -      -      -      -      -      -      -     3.00    -      -      -      -      -     fmov	w3, s9
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     0.50   0.50   fmov	w3, s9
 # CHECK-NEXT:  -      -      -      -      -      -      -     3.00    -      -      -      -      -     fmov	s9, w3
-# CHECK-NEXT:  -      -      -      -      -      -      -     3.00    -      -      -      -      -     fmov	x20, d31
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     0.50   0.50   fmov	x20, d31
 # CHECK-NEXT:  -      -      -      -      -      -      -     3.00    -      -      -      -      -     fmov	d1, x15
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     0.50   0.50   fmov	x3, v12.d[1]
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -     0.50   0.50   fmov	v1.d[1], x19

>From 824b64079174a30813ef86bd672171994d80f17e Mon Sep 17 00:00:00 2001
From: Simon Wallis <simon.wallis2 at arm.com>
Date: Thu, 13 Nov 2025 10:45:23 +0000
Subject: [PATCH 4/7] [AArch64] Update zero latency instructions in Neoverse
 scheduling tables

NeoverseZeroMove was introduced for Neoverse-V2 and was added to V3 and V3AE.
Use NeoverseZeroMove for Neoverse-V1, N2, N3 in the same way, including these instructions:
MOV Xd|Wd, #0|XZR|WZR

For all Neoverse targets, the following instructions are also decoded as not utilizing the scheduling and execution resources of the machine:
MOV Wd,Wn
MOV Xd,Xn

For Neoverse-N3 only, these instructions also have zero latency
FMOV Dd, Dn
FMOV Sd, Sn

Change-Id: I7a6a971cf75c60d8f75b210f0529c4ad813775a3
---
 .../Target/AArch64/AArch64SchedNeoverseN2.td  |  5 +-
 .../Target/AArch64/AArch64SchedNeoverseN3.td  |  2 +-
 .../Target/AArch64/AArch64SchedNeoverseV1.td  |  2 +-
 .../Target/AArch64/AArch64SchedNeoverseV2.td  |  5 +-
 .../Target/AArch64/AArch64SchedNeoverseV3.td  |  5 +-
 .../AArch64/AArch64SchedNeoverseV3AE.td       |  5 +-
 .../AArch64/Neoverse/N2-basic-instructions.s  | 10 +--
 .../AArch64/Neoverse/N3-basic-instructions.s  | 14 ++--
 .../AArch64/Neoverse/N3-sve-instructions.s    |  2 +-
 .../AArch64/Neoverse/V1-basic-instructions.s  | 76 +++++++++----------
 .../llvm-mca/AArch64/Neoverse/V1-writeback.s  | 31 ++++----
 .../AArch64/Neoverse/V1-zero-dependency.s     |  6 +-
 .../AArch64/Neoverse/V2-basic-instructions.s  | 10 +--
 .../AArch64/Neoverse/V2-zero-lat-movs.s       | 24 +++---
 .../AArch64/Neoverse/V3-basic-instructions.s  | 10 +--
 .../AArch64/Neoverse/V3-zero-lat-movs.s       | 24 +++---
 .../Neoverse/V3AE-basic-instructions.s        | 10 +--
 .../AArch64/Neoverse/V3AE-zero-lat-movs.s     | 24 +++---
 18 files changed, 126 insertions(+), 139 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td
index b4690e6045f85..a02130f8390a7 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td
@@ -74,10 +74,7 @@ def : WriteRes<WriteLDHi,    []> { let Latency = 4; }
 //===----------------------------------------------------------------------===//
 
 // Define generic 0 micro-op types
-def N2Write_0c : SchedWriteRes<[]> {
-    let Latency = 0;
-    let NumMicroOps = 0;
-}
+def N2Write_0c : SchedWriteRes<[]> { let Latency = 0; }
 
 // Define generic 1 micro-op types
 
diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseN3.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseN3.td
index 32d48ca66ee2d..ae24fc1e35a89 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseN3.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseN3.td
@@ -75,7 +75,7 @@ def : WriteRes<WriteHint,    []> { let Latency = 1; }
 
 def N3Write_0c : SchedWriteRes<[]> {
     let Latency = 0;
-    let NumMicroOps = 0;
+    let NumMicroOps = 1;
 }
 
 def N3Write_4c : SchedWriteRes<[]> {
diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td
index 8d33ca22616c2..ac3b9a3d40192 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td
@@ -91,7 +91,7 @@ def : WriteRes<WriteHint,    []> { let Latency = 1; }
 //===----------------------------------------------------------------------===//
 // Define generic 0 micro-op types
 
-let Latency = 0, NumMicroOps = 0 in
+let Latency = 0, NumMicroOps = 1 in
 def V1Write_0c_0Z : SchedWriteRes<[]>;
 
 
diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
index 1ef087f07022d..2387f176f3051 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
@@ -94,10 +94,7 @@ def : WriteRes<WriteLDHi,    []> { let Latency = 4; }
 //===----------------------------------------------------------------------===//
 
 // Define generic 0 micro-op types
-def V2Write_0c : SchedWriteRes<[]> {
-    let Latency = 0;
-    let NumMicroOps = 0;
-}
+def V2Write_0c : SchedWriteRes<[]> { let Latency = 0; }
 
 // Define generic 1 micro-op types
 
diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV3.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV3.td
index 3dd2988088f0b..e23576a20d277 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV3.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV3.td
@@ -94,10 +94,7 @@ def : WriteRes<WriteLDHi,    []> { let Latency = 4; }
 //===----------------------------------------------------------------------===//
 
 // Define generic 0 micro-op types
-def V3Write_0c : SchedWriteRes<[]> {
-    let Latency = 0;
-    let NumMicroOps = 0;
-}
+def V3Write_0c : SchedWriteRes<[]> { let Latency = 0; }
 
 // Define generic 1 micro-op types
 
diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV3AE.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV3AE.td
index 19b56260387e1..0f1ec669a4e5e 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV3AE.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV3AE.td
@@ -89,10 +89,7 @@ def : WriteRes<WriteLDHi,    []> { let Latency = 4; }
 //===----------------------------------------------------------------------===//
 
 // Define generic 0 micro-op types
-def V3AEWrite_0c : SchedWriteRes<[]> {
-    let Latency = 0;
-    let NumMicroOps = 0;
-}
+def V3AEWrite_0c : SchedWriteRes<[]> { let Latency = 0; }
 
 // Define generic 1 micro-op types
 
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-basic-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-basic-instructions.s
index b140e9a9927ff..f6ea4c4769c0a 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-basic-instructions.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-basic-instructions.s
@@ -2508,14 +2508,14 @@ drps
 # CHECK-NEXT:  1      2     0.50                        bics	x3, xzr, x3, lsl #1
 # CHECK-NEXT:  1      2     0.50                        tst	w3, w7, lsl #31
 # CHECK-NEXT:  1      2     0.50                        tst	x2, x20, asr #2
-# CHECK-NEXT:  0      0     0.00                        mov	x3, x6
-# CHECK-NEXT:  0      0     0.00                        mov	x3, xzr
-# CHECK-NEXT:  0      0     0.00                        mov	wzr, w2
-# CHECK-NEXT:  0      0     0.00                        mov	w3, w5
+# CHECK-NEXT:  1      0     0.20                        mov	x3, x6
+# CHECK-NEXT:  1      0     0.20                        mov	x3, xzr
+# CHECK-NEXT:  1      0     0.20                        mov	wzr, w2
+# CHECK-NEXT:  1      0     0.20                        mov	w3, w5
 # CHECK-NEXT:  1      1     0.25                        movz	w2, #0, lsl #16
 # CHECK-NEXT:  1      1     0.25                        mov	w2, #-1235
 # CHECK-NEXT:  1      1     0.25                        mov	x2, #5299989643264
-# CHECK-NEXT:  0      0     0.00                        mov	x2, #0
+# CHECK-NEXT:  1      0     0.20                        mov	x2, #0
 # CHECK-NEXT:  1      1     0.25                        movk	w3, #0
 # CHECK-NEXT:  1      1     0.25                        movz	x4, #0, lsl #16
 # CHECK-NEXT:  1      1     0.25                        movk	w5, #0, lsl #16
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/N3-basic-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/N3-basic-instructions.s
index f7311b5e41b2e..5f48217f8fab9 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/N3-basic-instructions.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/N3-basic-instructions.s
@@ -1888,7 +1888,7 @@ drps
 # CHECK-NEXT:  1      2     0.50                        fccmpe	d31, d5, #7, ne
 # CHECK-NEXT:  1      2     0.50                        fcsel	s3, s20, s9, pl
 # CHECK-NEXT:  1      2     0.50                        fcsel	d9, d10, d11, mi
-# CHECK-NEXT:  0      0     0.00                        fmov	s0, s1
+# CHECK-NEXT:  1      0     0.20                        fmov	s0, s1
 # CHECK-NEXT:  1      2     0.50                        fabs	s2, s3
 # CHECK-NEXT:  1      2     0.50                        fneg	s4, s5
 # CHECK-NEXT:  1      7     1.00                        fsqrt	s6, s7
@@ -1901,7 +1901,7 @@ drps
 # CHECK-NEXT:  1      3     1.00                        frinta	s20, s21
 # CHECK-NEXT:  1      3     1.00                        frintx	s22, s23
 # CHECK-NEXT:  1      3     1.00                        frinti	s24, s25
-# CHECK-NEXT:  0      0     0.00                        fmov	d0, d1
+# CHECK-NEXT:  1      0     0.20                        fmov	d0, d1
 # CHECK-NEXT:  1      2     0.50                        fabs	d2, d3
 # CHECK-NEXT:  1      2     0.50                        fneg	d4, d5
 # CHECK-NEXT:  1      12    1.00                        fsqrt	d6, d7
@@ -2508,14 +2508,14 @@ drps
 # CHECK-NEXT:  1      2     0.50                        bics	x3, xzr, x3, lsl #1
 # CHECK-NEXT:  1      2     0.50                        tst	w3, w7, lsl #31
 # CHECK-NEXT:  1      2     0.50                        tst	x2, x20, asr #2
-# CHECK-NEXT:  0      0     0.00                        mov	x3, x6
-# CHECK-NEXT:  0      0     0.00                        mov	x3, xzr
-# CHECK-NEXT:  0      0     0.00                        mov	wzr, w2
-# CHECK-NEXT:  0      0     0.00                        mov	w3, w5
+# CHECK-NEXT:  1      0     0.20                        mov	x3, x6
+# CHECK-NEXT:  1      0     0.20                        mov	x3, xzr
+# CHECK-NEXT:  1      0     0.20                        mov	wzr, w2
+# CHECK-NEXT:  1      0     0.20                        mov	w3, w5
 # CHECK-NEXT:  1      1     0.25                        movz	w2, #0, lsl #16
 # CHECK-NEXT:  1      1     0.25                        mov	w2, #-1235
 # CHECK-NEXT:  1      1     0.25                        mov	x2, #5299989643264
-# CHECK-NEXT:  0      0     0.00                        mov	x2, #0
+# CHECK-NEXT:  1      0     0.20                        mov	x2, #0
 # CHECK-NEXT:  1      1     0.25                        movk	w3, #0
 # CHECK-NEXT:  1      1     0.25                        movz	x4, #0, lsl #16
 # CHECK-NEXT:  1      1     0.25                        movk	w5, #0, lsl #16
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/N3-sve-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/N3-sve-instructions.s
index 395aa1141abb5..8977802be6bfc 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/N3-sve-instructions.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/N3-sve-instructions.s
@@ -5471,7 +5471,7 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      2     0.50                        sel	z23.d, p11, z13.d, z8.d
 # CHECK-NEXT:  1      2     0.50                        sel	z23.h, p11, z13.h, z8.h
 # CHECK-NEXT:  1      2     0.50                        sel	z23.s, p11, z13.s, z8.s
-# CHECK-NEXT:  0      0     0.00           *      U     setffr
+# CHECK-NEXT:  1      0     0.20           *      U     setffr
 # CHECK-NEXT:  1      2     0.50                        shadd	z0.b, p0/m, z0.b, z1.b
 # CHECK-NEXT:  1      2     0.50                        shadd	z0.h, p0/m, z0.h, z1.h
 # CHECK-NEXT:  1      2     0.50                        shadd	z29.s, p7/m, z29.s, z30.s
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-basic-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-basic-instructions.s
index f75222f27a94a..eaf128acc0195 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-basic-instructions.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-basic-instructions.s
@@ -2562,78 +2562,78 @@ drps
 # CHECK-NEXT:  2      7     0.33    *                   ldr	q17, [x23, w9, sxtw]
 # CHECK-NEXT:  3      2     0.50           *            str	q18, [x22, w10, sxtw]
 # CHECK-NEXT:  2      7     0.33    *                   ldr	q19, [x21, wzr, sxtw #4]
-# CHECK-NEXT:  1      4     0.33    *                   ldp	w3, w5, [sp]
+# CHECK-NEXT:  2      4     0.33    *                   ldp	w3, w5, [sp]
 # CHECK-NEXT:  2      1     0.50           *            stp	wzr, w9, [sp, #252]
-# CHECK-NEXT:  1      4     0.33    *                   ldp	w2, wzr, [sp, #-256]
-# CHECK-NEXT:  1      4     0.33    *                   ldp	w9, w10, [sp, #4]
-# CHECK-NEXT:  2      5     0.33    *                   ldpsw	x9, x10, [sp, #4]
-# CHECK-NEXT:  2      5     0.33    *                   ldpsw	x9, x10, [x2, #-256]
-# CHECK-NEXT:  2      5     0.33    *                   ldpsw	x20, x30, [sp, #252]
+# CHECK-NEXT:  2      4     0.33    *                   ldp	w2, wzr, [sp, #-256]
+# CHECK-NEXT:  2      4     0.33    *                   ldp	w9, w10, [sp, #4]
+# CHECK-NEXT:  3      5     0.33    *                   ldpsw	x9, x10, [sp, #4]
+# CHECK-NEXT:  3      5     0.33    *                   ldpsw	x9, x10, [x2, #-256]
+# CHECK-NEXT:  3      5     0.33    *                   ldpsw	x20, x30, [sp, #252]
 # CHECK-NEXT:  2      4     0.67    *                   ldp	x21, x29, [x2, #504]
 # CHECK-NEXT:  2      4     0.67    *                   ldp	x22, x23, [x3, #-512]
 # CHECK-NEXT:  2      4     0.67    *                   ldp	x24, x25, [x4, #8]
-# CHECK-NEXT:  1      6     0.33    *                   ldp	s29, s28, [sp, #252]
+# CHECK-NEXT:  2      6     0.33    *                   ldp	s29, s28, [sp, #252]
 # CHECK-NEXT:  2      2     0.50           *            stp	s27, s26, [sp, #-256]
-# CHECK-NEXT:  1      6     0.33    *                   ldp	s1, s2, [x3, #44]
+# CHECK-NEXT:  2      6     0.33    *                   ldp	s1, s2, [x3, #44]
 # CHECK-NEXT:  2      2     0.50           *            stp	d3, d5, [x9, #504]
 # CHECK-NEXT:  2      2     0.50           *            stp	d7, d11, [x10, #-512]
 # CHECK-NEXT:  2      1     0.50           *            stnp	x20, x16, [x8]
 # CHECK-NEXT:  2      1     0.50           *            stp	x3, x6, [x16]
-# CHECK-NEXT:  1      6     0.33    *                   ldp	d2, d3, [x30, #-8]
+# CHECK-NEXT:  2      6     0.33    *                   ldp	d2, d3, [x30, #-8]
 # CHECK-NEXT:  2      2     0.50           *            stp	q3, q5, [sp]
 # CHECK-NEXT:  2      2     0.50           *            stp	q17, q19, [sp, #1008]
 # CHECK-NEXT:  2      6     0.67    *                   ldp	q23, q29, [x1, #-1024]
-# CHECK-NEXT:  2      4     0.33    *                   ldp	w3, w5, [sp], #0
+# CHECK-NEXT:  3      4     0.33    *                   ldp	w3, w5, [sp], #0
 # CHECK-NEXT:  3      1     0.50           *            stp	wzr, w9, [sp], #252
-# CHECK-NEXT:  2      4     0.33    *                   ldp	w2, wzr, [sp], #-256
-# CHECK-NEXT:  2      4     0.33    *                   ldp	w9, w10, [sp], #4
-# CHECK-NEXT:  3      5     0.50    *                   ldpsw	x9, x10, [sp], #4
-# CHECK-NEXT:  3      5     0.50    *                   ldpsw	x9, x10, [x2], #-256
-# CHECK-NEXT:  3      5     0.50    *                   ldpsw	x20, x30, [sp], #252
+# CHECK-NEXT:  3      4     0.33    *                   ldp	w2, wzr, [sp], #-256
+# CHECK-NEXT:  3      4     0.33    *                   ldp	w9, w10, [sp], #4
+# CHECK-NEXT:  4      5     0.50    *                   ldpsw	x9, x10, [sp], #4
+# CHECK-NEXT:  4      5     0.50    *                   ldpsw	x9, x10, [x2], #-256
+# CHECK-NEXT:  4      5     0.50    *                   ldpsw	x20, x30, [sp], #252
 # CHECK-NEXT:  3      4     0.67    *                   ldp	x21, x29, [x2], #504
 # CHECK-NEXT:  3      4     0.67    *                   ldp	x22, x23, [x3], #-512
 # CHECK-NEXT:  3      4     0.67    *                   ldp	x24, x25, [x4], #8
-# CHECK-NEXT:  2      6     0.33    *                   ldp	s29, s28, [sp], #252
+# CHECK-NEXT:  3      6     0.33    *                   ldp	s29, s28, [sp], #252
 # CHECK-NEXT:  3      2     0.50           *            stp	s27, s26, [sp], #-256
-# CHECK-NEXT:  2      6     0.33    *                   ldp	s1, s2, [x3], #44
+# CHECK-NEXT:  3      6     0.33    *                   ldp	s1, s2, [x3], #44
 # CHECK-NEXT:  3      2     0.50           *            stp	d3, d5, [x9], #504
 # CHECK-NEXT:  3      2     0.50           *            stp	d7, d11, [x10], #-512
-# CHECK-NEXT:  2      6     0.33    *                   ldp	d2, d3, [x30], #-8
+# CHECK-NEXT:  3      6     0.33    *                   ldp	d2, d3, [x30], #-8
 # CHECK-NEXT:  4      2     1.00           *            stp	q3, q5, [sp], #0
 # CHECK-NEXT:  4      2     1.00           *            stp	q17, q19, [sp], #1008
 # CHECK-NEXT:  3      6     0.67    *                   ldp	q23, q29, [x1], #-1024
-# CHECK-NEXT:  2      4     0.33    *                   ldp	w3, w5, [sp, #0]!
+# CHECK-NEXT:  3      4     0.33    *                   ldp	w3, w5, [sp, #0]!
 # CHECK-NEXT:  3      1     0.50           *            stp	wzr, w9, [sp, #252]!
-# CHECK-NEXT:  2      4     0.33    *                   ldp	w2, wzr, [sp, #-256]!
-# CHECK-NEXT:  2      4     0.33    *                   ldp	w9, w10, [sp, #4]!
-# CHECK-NEXT:  3      5     0.50    *                   ldpsw	x9, x10, [sp, #4]!
-# CHECK-NEXT:  3      5     0.50    *                   ldpsw	x9, x10, [x2, #-256]!
-# CHECK-NEXT:  3      5     0.50    *                   ldpsw	x20, x30, [sp, #252]!
+# CHECK-NEXT:  3      4     0.33    *                   ldp	w2, wzr, [sp, #-256]!
+# CHECK-NEXT:  3      4     0.33    *                   ldp	w9, w10, [sp, #4]!
+# CHECK-NEXT:  4      5     0.50    *                   ldpsw	x9, x10, [sp, #4]!
+# CHECK-NEXT:  4      5     0.50    *                   ldpsw	x9, x10, [x2, #-256]!
+# CHECK-NEXT:  4      5     0.50    *                   ldpsw	x20, x30, [sp, #252]!
 # CHECK-NEXT:  3      4     0.67    *                   ldp	x21, x29, [x2, #504]!
 # CHECK-NEXT:  3      4     0.67    *                   ldp	x22, x23, [x3, #-512]!
 # CHECK-NEXT:  3      4     0.67    *                   ldp	x24, x25, [x4, #8]!
-# CHECK-NEXT:  2      6     0.33    *                   ldp	s29, s28, [sp, #252]!
+# CHECK-NEXT:  3      6     0.33    *                   ldp	s29, s28, [sp, #252]!
 # CHECK-NEXT:  3      2     0.50           *            stp	s27, s26, [sp, #-256]!
-# CHECK-NEXT:  2      6     0.33    *                   ldp	s1, s2, [x3, #44]!
+# CHECK-NEXT:  3      6     0.33    *                   ldp	s1, s2, [x3, #44]!
 # CHECK-NEXT:  3      2     0.50           *            stp	d3, d5, [x9, #504]!
 # CHECK-NEXT:  3      2     0.50           *            stp	d7, d11, [x10, #-512]!
-# CHECK-NEXT:  2      6     0.33    *                   ldp	d2, d3, [x30, #-8]!
+# CHECK-NEXT:  3      6     0.33    *                   ldp	d2, d3, [x30, #-8]!
 # CHECK-NEXT:  4      2     1.00           *            stp	q3, q5, [sp, #0]!
 # CHECK-NEXT:  4      2     1.00           *            stp	q17, q19, [sp, #1008]!
 # CHECK-NEXT:  3      6     0.67    *                   ldp	q23, q29, [x1, #-1024]!
-# CHECK-NEXT:  1      4     0.33    *                   ldnp	w3, w5, [sp]
+# CHECK-NEXT:  2      4     0.33    *                   ldnp	w3, w5, [sp]
 # CHECK-NEXT:  2      1     0.50           *            stnp	wzr, w9, [sp, #252]
-# CHECK-NEXT:  1      4     0.33    *                   ldnp	w2, wzr, [sp, #-256]
-# CHECK-NEXT:  1      4     0.33    *                   ldnp	w9, w10, [sp, #4]
+# CHECK-NEXT:  2      4     0.33    *                   ldnp	w2, wzr, [sp, #-256]
+# CHECK-NEXT:  2      4     0.33    *                   ldnp	w9, w10, [sp, #4]
 # CHECK-NEXT:  2      4     0.67    *                   ldnp	x21, x29, [x2, #504]
 # CHECK-NEXT:  2      4     0.67    *                   ldnp	x22, x23, [x3, #-512]
 # CHECK-NEXT:  2      4     0.67    *                   ldnp	x24, x25, [x4, #8]
-# CHECK-NEXT:  1      6     0.33    *                   ldnp	s29, s28, [sp, #252]
+# CHECK-NEXT:  2      6     0.33    *                   ldnp	s29, s28, [sp, #252]
 # CHECK-NEXT:  2      2     0.50           *            stnp	s27, s26, [sp, #-256]
-# CHECK-NEXT:  1      6     0.33    *                   ldnp	s1, s2, [x3, #44]
+# CHECK-NEXT:  2      6     0.33    *                   ldnp	s1, s2, [x3, #44]
 # CHECK-NEXT:  2      2     0.50           *            stnp	d3, d5, [x9, #504]
 # CHECK-NEXT:  2      2     0.50           *            stnp	d7, d11, [x10, #-512]
-# CHECK-NEXT:  1      6     0.33    *                   ldnp	d2, d3, [x30, #-8]
+# CHECK-NEXT:  2      6     0.33    *                   ldnp	d2, d3, [x30, #-8]
 # CHECK-NEXT:  2      2     0.50           *            stnp	q3, q5, [sp]
 # CHECK-NEXT:  2      2     0.50           *            stnp	q17, q19, [sp, #1008]
 # CHECK-NEXT:  2      6     0.67    *                   ldnp	q23, q29, [x1, #-1024]
@@ -2673,14 +2673,14 @@ drps
 # CHECK-NEXT:  1      2     0.50                        bics	x3, xzr, x3, lsl #1
 # CHECK-NEXT:  1      2     0.50                        tst	w3, w7, lsl #31
 # CHECK-NEXT:  1      2     0.50                        tst	x2, x20, asr #2
-# CHECK-NEXT:  0      0     0.00                        mov	x3, x6
-# CHECK-NEXT:  0      0     0.00                        mov	x3, xzr
-# CHECK-NEXT:  0      0     0.00                        mov	wzr, w2
-# CHECK-NEXT:  0      0     0.00                        mov	w3, w5
+# CHECK-NEXT:  1      0     0.13                        mov	x3, x6
+# CHECK-NEXT:  1      0     0.13                        mov	x3, xzr
+# CHECK-NEXT:  1      0     0.13                        mov	wzr, w2
+# CHECK-NEXT:  1      0     0.13                        mov	w3, w5
 # CHECK-NEXT:  1      1     0.25                        movz	w2, #0, lsl #16
 # CHECK-NEXT:  1      1     0.25                        mov	w2, #-1235
 # CHECK-NEXT:  1      1     0.25                        mov	x2, #5299989643264
-# CHECK-NEXT:  0      0     0.00                        mov	x2, #0
+# CHECK-NEXT:  1      0     0.13                        mov	x2, #0
 # CHECK-NEXT:  1      1     0.25                        movk	w3, #0
 # CHECK-NEXT:  1      1     0.25                        movz	x4, #0, lsl #16
 # CHECK-NEXT:  1      1     0.25                        movk	w5, #0, lsl #16
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-writeback.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-writeback.s
index 1961b24ae6aac..fee62ce565b43 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-writeback.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-writeback.s
@@ -3140,13 +3140,13 @@ add x0, x27, 1
 
 # CHECK:      Iterations:        100
 # CHECK-NEXT: Instructions:      1000
-# CHECK-NEXT: Total Cycles:      609
-# CHECK-NEXT: Total uOps:        3300
+# CHECK-NEXT: Total Cycles:      708
+# CHECK-NEXT: Total uOps:        3500
 
 # CHECK:      Dispatch Width:    8
-# CHECK-NEXT: uOps Per Cycle:    5.42
-# CHECK-NEXT: IPC:               1.64
-# CHECK-NEXT: Block RThroughput: 4.1
+# CHECK-NEXT: uOps Per Cycle:    4.94
+# CHECK-NEXT: IPC:               1.41
+# CHECK-NEXT: Block RThroughput: 4.4
 
 # CHECK:      Timeline view:
 # CHECK-NEXT:                     01234
@@ -3161,7 +3161,7 @@ add x0, x27, 1
 # CHECK-NEXT: [0,6]     .    DeeeeeeE-R   ldp	s1, s2, [x27], #248
 # CHECK-NEXT: [0,7]     .    D=eE-----R   add	x0, x27, #1
 # CHECK-NEXT: [0,8]     .    D=eeeeeeER   ldp	d1, d2, [x27], #496
-# CHECK-NEXT: [0,9]     .    D==eE----R   add	x0, x27, #1
+# CHECK-NEXT: [0,9]     .    .D=eE----R   add	x0, x27, #1
 
 # CHECK:      Average Wait times (based on the timeline view):
 # CHECK-NEXT: [0]: Executions
@@ -3179,18 +3179,18 @@ add x0, x27, 1
 # CHECK-NEXT: 6.     1     1.0    0.0    1.0       ldp	s1, s2, [x27], #248
 # CHECK-NEXT: 7.     1     2.0    0.0    5.0       add	x0, x27, #1
 # CHECK-NEXT: 8.     1     2.0    0.0    0.0       ldp	d1, d2, [x27], #496
-# CHECK-NEXT: 9.     1     3.0    0.0    4.0       add	x0, x27, #1
-# CHECK-NEXT:        1     1.4    0.3    2.8       <total>
+# CHECK-NEXT: 9.     1     2.0    0.0    4.0       add	x0, x27, #1
+# CHECK-NEXT:        1     1.3    0.3    2.8       <total>
 
 # CHECK:      [44] Code Region - G45
 
 # CHECK:      Iterations:        100
 # CHECK-NEXT: Instructions:      1000
 # CHECK-NEXT: Total Cycles:      507
-# CHECK-NEXT: Total uOps:        1700
+# CHECK-NEXT: Total uOps:        2000
 
 # CHECK:      Dispatch Width:    8
-# CHECK-NEXT: uOps Per Cycle:    3.35
+# CHECK-NEXT: uOps Per Cycle:    3.94
 # CHECK-NEXT: IPC:               1.97
 # CHECK-NEXT: Block RThroughput: 2.5
 
@@ -3233,10 +3233,10 @@ add x0, x27, 1
 # CHECK:      Iterations:        100
 # CHECK-NEXT: Instructions:      1000
 # CHECK-NEXT: Total Cycles:      507
-# CHECK-NEXT: Total uOps:        1900
+# CHECK-NEXT: Total uOps:        2200
 
 # CHECK:      Dispatch Width:    8
-# CHECK-NEXT: uOps Per Cycle:    3.75
+# CHECK-NEXT: uOps Per Cycle:    4.34
 # CHECK-NEXT: IPC:               1.97
 # CHECK-NEXT: Block RThroughput: 3.0
 
@@ -3251,7 +3251,7 @@ add x0, x27, 1
 # CHECK-NEXT: [0,4]     .D=eeeeER ..   ldp	x1, x2, [x27, #496]!
 # CHECK-NEXT: [0,5]     .D==eE--R ..   add	x0, x27, #1
 # CHECK-NEXT: [0,6]     .D==eeeeeER.   ldpsw	x1, x2, [x27], #248
-# CHECK-NEXT: [0,7]     .D===eE---R.   add	x0, x27, #1
+# CHECK-NEXT: [0,7]     . D==eE---R.   add	x0, x27, #1
 # CHECK-NEXT: [0,8]     . D==eeeeeER   ldpsw	x1, x2, [x27, #248]!
 # CHECK-NEXT: [0,9]     . D===eE---R   add	x0, x27, #1
 
@@ -3269,10 +3269,10 @@ add x0, x27, 1
 # CHECK-NEXT: 4.     1     2.0    0.0    0.0       ldp	x1, x2, [x27, #496]!
 # CHECK-NEXT: 5.     1     3.0    0.0    2.0       add	x0, x27, #1
 # CHECK-NEXT: 6.     1     3.0    0.0    0.0       ldpsw	x1, x2, [x27], #248
-# CHECK-NEXT: 7.     1     4.0    0.0    3.0       add	x0, x27, #1
+# CHECK-NEXT: 7.     1     3.0    0.0    3.0       add	x0, x27, #1
 # CHECK-NEXT: 8.     1     3.0    0.0    0.0       ldpsw	x1, x2, [x27, #248]!
 # CHECK-NEXT: 9.     1     4.0    0.0    3.0       add	x0, x27, #1
-# CHECK-NEXT:        1     2.7    0.1    1.2       <total>
+# CHECK-NEXT:        1     2.6    0.1    1.2       <total>
 
 # CHECK:      [46] Code Region - G47
 
@@ -5272,4 +5272,3 @@ add x0, x27, 1
 # CHECK-NEXT: 2.     1     5.0    0.0    0.0       ldr	x2, [x1], #254
 # CHECK-NEXT: 3.     1     2.0    0.0    6.0       add	x0, x27, #1
 # CHECK-NEXT:        1     2.5    0.3    2.0       <total>
-
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-zero-dependency.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-zero-dependency.s
index 7767b95ff98ea..47c5b7cd513b3 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-zero-dependency.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-zero-dependency.s
@@ -7,10 +7,10 @@ cmp x0, #4
 # CHECK:      Iterations:        100
 # CHECK-NEXT: Instructions:      200
 # CHECK-NEXT: Total Cycles:      37
-# CHECK-NEXT: Total uOps:        100
+# CHECK-NEXT: Total uOps:        200
 
 # CHECK:      Dispatch Width:    8
-# CHECK-NEXT: uOps Per Cycle:    2.70
+# CHECK-NEXT: uOps Per Cycle:    5.41
 # CHECK-NEXT: IPC:               5.41
 # CHECK-NEXT: Block RThroughput: 0.3
 
@@ -23,7 +23,7 @@ cmp x0, #4
 # CHECK-NEXT: [6]: HasSideEffects (U)
 
 # CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
-# CHECK-NEXT:  0      0     0.00                        mov	x0, x1
+# CHECK-NEXT:  1      0     0.13                        mov	x0, x1
 # CHECK-NEXT:  1      1     0.33                        cmp	x0, #4
 
 # CHECK:      Resources:
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-basic-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-basic-instructions.s
index 9c987d54d2350..54b5f1644be48 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-basic-instructions.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-basic-instructions.s
@@ -2536,14 +2536,14 @@ drps
 # CHECK-NEXT:  1      2     0.50                        bics	x3, xzr, x3, lsl #1
 # CHECK-NEXT:  1      2     0.50                        tst	w3, w7, lsl #31
 # CHECK-NEXT:  1      2     0.50                        tst	x2, x20, asr #2
-# CHECK-NEXT:  0      0     0.00                        mov	x3, x6
-# CHECK-NEXT:  0      0     0.00                        mov	x3, xzr
-# CHECK-NEXT:  0      0     0.00                        mov	wzr, w2
-# CHECK-NEXT:  0      0     0.00                        mov	w3, w5
+# CHECK-NEXT:  1      0     0.17                        mov	x3, x6
+# CHECK-NEXT:  1      0     0.17                        mov	x3, xzr
+# CHECK-NEXT:  1      0     0.17                        mov	wzr, w2
+# CHECK-NEXT:  1      0     0.17                        mov	w3, w5
 # CHECK-NEXT:  1      1     0.17                        movz	w2, #0, lsl #16
 # CHECK-NEXT:  1      1     0.17                        mov	w2, #-1235
 # CHECK-NEXT:  1      1     0.17                        mov	x2, #5299989643264
-# CHECK-NEXT:  0      0     0.00                        mov	x2, #0
+# CHECK-NEXT:  1      0     0.17                        mov	x2, #0
 # CHECK-NEXT:  1      1     0.17                        movk	w3, #0
 # CHECK-NEXT:  1      1     0.17                        movz	x4, #0, lsl #16
 # CHECK-NEXT:  1      1     0.17                        movk	w5, #0, lsl #16
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-zero-lat-movs.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-zero-lat-movs.s
index 1cec5897db425..3ddb525327015 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-zero-lat-movs.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-zero-lat-movs.s
@@ -23,18 +23,18 @@ mov  x1, x2
 # CHECK-NEXT: [6]: HasSideEffects (U)
 
 # CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
-# CHECK-NEXT:  0      0     0.00                        mov	x1, #0
-# CHECK-NEXT:  0      0     0.00                        mov	x1, xzr
-# CHECK-NEXT:  0      0     0.00                        mov	w1, #0
-# CHECK-NEXT:  0      0     0.00                        mov	w1, wzr
-# CHECK-NEXT:  0      0     0.00                        fmov	h1, wzr
-# CHECK-NEXT:  0      0     0.00                        fmov	h1, xzr
-# CHECK-NEXT:  0      0     0.00                        fmov	s1, wzr
-# CHECK-NEXT:  0      0     0.00                        fmov	d1, xzr
-# CHECK-NEXT:  0      0     0.00                        movi	d1, #0000000000000000
-# CHECK-NEXT:  0      0     0.00                        movi	v1.2d, #0000000000000000
-# CHECK-NEXT:  0      0     0.00                        mov	w1, w2
-# CHECK-NEXT:  0      0     0.00                        mov	x1, x2
+# CHECK-NEXT:  1      0     0.17                        mov	x1, #0
+# CHECK-NEXT:  1      0     0.17                        mov	x1, xzr
+# CHECK-NEXT:  1      0     0.17                        mov	w1, #0
+# CHECK-NEXT:  1      0     0.17                        mov	w1, wzr
+# CHECK-NEXT:  1      0     0.17                        fmov	h1, wzr
+# CHECK-NEXT:  1      0     0.17                        fmov	h1, xzr
+# CHECK-NEXT:  1      0     0.17                        fmov	s1, wzr
+# CHECK-NEXT:  1      0     0.17                        fmov	d1, xzr
+# CHECK-NEXT:  1      0     0.17                        movi	d1, #0000000000000000
+# CHECK-NEXT:  1      0     0.17                        movi	v1.2d, #0000000000000000
+# CHECK-NEXT:  1      0     0.17                        mov	w1, w2
+# CHECK-NEXT:  1      0     0.17                        mov	x1, x2
 
 # CHECK:      Resources:
 # CHECK-NEXT: [0.0] - V2UnitB
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3-basic-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3-basic-instructions.s
index 67af391e52863..73fd95d6e4a5b 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3-basic-instructions.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3-basic-instructions.s
@@ -2536,14 +2536,14 @@ drps
 # CHECK-NEXT:  1      2     0.50                        bics	x3, xzr, x3, lsl #1
 # CHECK-NEXT:  1      2     0.50                        tst	w3, w7, lsl #31
 # CHECK-NEXT:  1      2     0.50                        tst	x2, x20, asr #2
-# CHECK-NEXT:  0      0     0.00                        mov	x3, x6
-# CHECK-NEXT:  0      0     0.00                        mov	x3, xzr
-# CHECK-NEXT:  0      0     0.00                        mov	wzr, w2
-# CHECK-NEXT:  0      0     0.00                        mov	w3, w5
+# CHECK-NEXT:  1      0     0.10                        mov	x3, x6
+# CHECK-NEXT:  1      0     0.10                        mov	x3, xzr
+# CHECK-NEXT:  1      0     0.10                        mov	wzr, w2
+# CHECK-NEXT:  1      0     0.10                        mov	w3, w5
 # CHECK-NEXT:  1      1     0.13                        movz	w2, #0, lsl #16
 # CHECK-NEXT:  1      1     0.13                        mov	w2, #-1235
 # CHECK-NEXT:  1      1     0.13                        mov	x2, #5299989643264
-# CHECK-NEXT:  0      0     0.00                        mov	x2, #0
+# CHECK-NEXT:  1      0     0.10                        mov	x2, #0
 # CHECK-NEXT:  1      1     0.13                        movk	w3, #0
 # CHECK-NEXT:  1      1     0.13                        movz	x4, #0, lsl #16
 # CHECK-NEXT:  1      1     0.13                        movk	w5, #0, lsl #16
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3-zero-lat-movs.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3-zero-lat-movs.s
index 9b4834b12a79b..1eef230b8174e 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3-zero-lat-movs.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3-zero-lat-movs.s
@@ -23,18 +23,18 @@ mov  x1, x2
 # CHECK-NEXT: [6]: HasSideEffects (U)
 
 # CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
-# CHECK-NEXT:  0      0     0.00                        mov	x1, #0
-# CHECK-NEXT:  0      0     0.00                        mov	x1, xzr
-# CHECK-NEXT:  0      0     0.00                        mov	w1, #0
-# CHECK-NEXT:  0      0     0.00                        mov	w1, wzr
-# CHECK-NEXT:  0      0     0.00                        fmov	h1, wzr
-# CHECK-NEXT:  0      0     0.00                        fmov	h1, xzr
-# CHECK-NEXT:  0      0     0.00                        fmov	s1, wzr
-# CHECK-NEXT:  0      0     0.00                        fmov	d1, xzr
-# CHECK-NEXT:  0      0     0.00                        movi	d1, #0000000000000000
-# CHECK-NEXT:  0      0     0.00                        movi	v1.2d, #0000000000000000
-# CHECK-NEXT:  0      0     0.00                        mov	w1, w2
-# CHECK-NEXT:  0      0     0.00                        mov	x1, x2
+# CHECK-NEXT:  1      0     0.10                        mov	x1, #0
+# CHECK-NEXT:  1      0     0.10                        mov	x1, xzr
+# CHECK-NEXT:  1      0     0.10                        mov	w1, #0
+# CHECK-NEXT:  1      0     0.10                        mov	w1, wzr
+# CHECK-NEXT:  1      0     0.10                        fmov	h1, wzr
+# CHECK-NEXT:  1      0     0.10                        fmov	h1, xzr
+# CHECK-NEXT:  1      0     0.10                        fmov	s1, wzr
+# CHECK-NEXT:  1      0     0.10                        fmov	d1, xzr
+# CHECK-NEXT:  1      0     0.10                        movi	d1, #0000000000000000
+# CHECK-NEXT:  1      0     0.10                        movi	v1.2d, #0000000000000000
+# CHECK-NEXT:  1      0     0.10                        mov	w1, w2
+# CHECK-NEXT:  1      0     0.10                        mov	x1, x2
 
 # CHECK:      Resources:
 # CHECK-NEXT: [0.0] - V3UnitB
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3AE-basic-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3AE-basic-instructions.s
index 5009ce1d54a86..7ab2be5eaa365 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3AE-basic-instructions.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3AE-basic-instructions.s
@@ -2536,14 +2536,14 @@ drps
 # CHECK-NEXT:  1      2     0.50                        bics	x3, xzr, x3, lsl #1
 # CHECK-NEXT:  1      2     0.50                        tst	w3, w7, lsl #31
 # CHECK-NEXT:  1      2     0.50                        tst	x2, x20, asr #2
-# CHECK-NEXT:  0      0     0.00                        mov	x3, x6
-# CHECK-NEXT:  0      0     0.00                        mov	x3, xzr
-# CHECK-NEXT:  0      0     0.00                        mov	wzr, w2
-# CHECK-NEXT:  0      0     0.00                        mov	w3, w5
+# CHECK-NEXT:  1      0     0.10                        mov	x3, x6
+# CHECK-NEXT:  1      0     0.10                        mov	x3, xzr
+# CHECK-NEXT:  1      0     0.10                        mov	wzr, w2
+# CHECK-NEXT:  1      0     0.10                        mov	w3, w5
 # CHECK-NEXT:  1      1     0.13                        movz	w2, #0, lsl #16
 # CHECK-NEXT:  1      1     0.13                        mov	w2, #-1235
 # CHECK-NEXT:  1      1     0.13                        mov	x2, #5299989643264
-# CHECK-NEXT:  0      0     0.00                        mov	x2, #0
+# CHECK-NEXT:  1      0     0.10                        mov	x2, #0
 # CHECK-NEXT:  1      1     0.13                        movk	w3, #0
 # CHECK-NEXT:  1      1     0.13                        movz	x4, #0, lsl #16
 # CHECK-NEXT:  1      1     0.13                        movk	w5, #0, lsl #16
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3AE-zero-lat-movs.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3AE-zero-lat-movs.s
index 783bea288b121..a0840dcddcbab 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3AE-zero-lat-movs.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V3AE-zero-lat-movs.s
@@ -23,18 +23,18 @@ mov  x1, x2
 # CHECK-NEXT: [6]: HasSideEffects (U)
 
 # CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
-# CHECK-NEXT:  0      0     0.00                        mov	x1, #0
-# CHECK-NEXT:  0      0     0.00                        mov	x1, xzr
-# CHECK-NEXT:  0      0     0.00                        mov	w1, #0
-# CHECK-NEXT:  0      0     0.00                        mov	w1, wzr
-# CHECK-NEXT:  0      0     0.00                        fmov	h1, wzr
-# CHECK-NEXT:  0      0     0.00                        fmov	h1, xzr
-# CHECK-NEXT:  0      0     0.00                        fmov	s1, wzr
-# CHECK-NEXT:  0      0     0.00                        fmov	d1, xzr
-# CHECK-NEXT:  0      0     0.00                        movi	d1, #0000000000000000
-# CHECK-NEXT:  0      0     0.00                        movi	v1.2d, #0000000000000000
-# CHECK-NEXT:  0      0     0.00                        mov	w1, w2
-# CHECK-NEXT:  0      0     0.00                        mov	x1, x2
+# CHECK-NEXT:  1      0     0.10                        mov	x1, #0
+# CHECK-NEXT:  1      0     0.10                        mov	x1, xzr
+# CHECK-NEXT:  1      0     0.10                        mov	w1, #0
+# CHECK-NEXT:  1      0     0.10                        mov	w1, wzr
+# CHECK-NEXT:  1      0     0.10                        fmov	h1, wzr
+# CHECK-NEXT:  1      0     0.10                        fmov	h1, xzr
+# CHECK-NEXT:  1      0     0.10                        fmov	s1, wzr
+# CHECK-NEXT:  1      0     0.10                        fmov	d1, xzr
+# CHECK-NEXT:  1      0     0.10                        movi	d1, #0000000000000000
+# CHECK-NEXT:  1      0     0.10                        movi	v1.2d, #0000000000000000
+# CHECK-NEXT:  1      0     0.10                        mov	w1, w2
+# CHECK-NEXT:  1      0     0.10                        mov	x1, x2
 
 # CHECK:      Resources:
 # CHECK-NEXT: [0.0] - V3AEUnitB

>From f2d0a292068262aef005694980443f6528765f67 Mon Sep 17 00:00:00 2001
From: Simon Wallis <simon.wallis2 at arm.com>
Date: Thu, 13 Nov 2025 10:50:20 +0000
Subject: [PATCH 5/7] [AArch64] Update zero latency instructions in Neoverse
 scheduling tables

NeoverseZeroMove was introduced for Neoverse-V2 and was added to V3 and V3AE.
Use NeoverseZeroMove for Neoverse-V1, N2, N3 in the same way, including these instructions:
MOV Xd|Wd, #0|XZR|WZR

For all Neoverse targets, the following instructions are also decoded as not utilizing the scheduling and execution resources of the machine:
MOV Wd,Wn
MOV Xd,Xn

For Neoverse-N3 only, these instructions also have zero latency
FMOV Dd, Dn
FMOV Sd, Sn

Change-Id: I95c53d373f35bb0bea5174a16c7ab3ac25acf684
---
 llvm/test/CodeGen/AArch64/pr164181.ll | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/test/CodeGen/AArch64/pr164181.ll b/llvm/test/CodeGen/AArch64/pr164181.ll
index 987c92b084001..4ec63ecb2eeb4 100644
--- a/llvm/test/CodeGen/AArch64/pr164181.ll
+++ b/llvm/test/CodeGen/AArch64/pr164181.ll
@@ -52,11 +52,11 @@ define void @f(i1 %var_0, i16 %var_1, i64 %var_2, i8 %var_3, i16 %var_4, i1 %var
 ; CHECK-NEXT:    mov x10, xzr
 ; CHECK-NEXT:    mov w23, wzr
 ; CHECK-NEXT:    mov w30, wzr
+; CHECK-NEXT:    ldrb w19, [sp, #240]
 ; CHECK-NEXT:    mov w25, wzr
 ; CHECK-NEXT:    mov x24, xzr
-; CHECK-NEXT:    mov x3, x26
 ; CHECK-NEXT:    str w8, [sp, #108] // 4-byte Folded Spill
-; CHECK-NEXT:    ldrb w19, [sp, #240]
+; CHECK-NEXT:    mov x3, x26
 ; CHECK-NEXT:    ldp x9, x8, [sp, #344]
 ; CHECK-NEXT:    str w12, [sp, #92] // 4-byte Folded Spill
 ; CHECK-NEXT:    mov w12, #1 // =0x1
@@ -123,8 +123,8 @@ define void @f(i1 %var_0, i16 %var_1, i64 %var_2, i8 %var_3, i16 %var_4, i1 %var
 ; CHECK-NEXT:    mov x12, #-30 // =0xffffffffffffffe2
 ; CHECK-NEXT:    add x19, x4, w8, sxtw #2
 ; CHECK-NEXT:    mov x9, xzr
-; CHECK-NEXT:    mov w4, w30
 ; CHECK-NEXT:    csel x12, x24, x12, lo
+; CHECK-NEXT:    mov w4, w30
 ; CHECK-NEXT:    str x12, [sp, #56] // 8-byte Folded Spill
 ; CHECK-NEXT:    b .LBB0_8
 ; CHECK-NEXT:    .p2align 5, , 16
@@ -341,8 +341,8 @@ define void @f(i1 %var_0, i16 %var_1, i64 %var_2, i8 %var_3, i16 %var_4, i1 %var
 ; CHECK-NEXT:    mov x24, x27
 ; CHECK-NEXT:    lsl x23, x14, #1
 ; CHECK-NEXT:    mov x27, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    mov w28, w30
 ; CHECK-NEXT:    madd x14, x14, x3, x11
+; CHECK-NEXT:    mov w28, w30
 ; CHECK-NEXT:    mov w3, #-7680 // =0xffffe200
 ; CHECK-NEXT:    b .LBB0_39
 ; CHECK-NEXT:    .p2align 5, , 16

>From 684ad3e5e2724ec3de3071cfb30fe26273a4cc91 Mon Sep 17 00:00:00 2001
From: Simon Wallis <simon.wallis2 at arm.com>
Date: Fri, 14 Nov 2025 13:21:45 +0000
Subject: [PATCH 6/7] [AArch64] Update zero latency instructions in Neoverse
 scheduling tables

NeoverseZeroMove was introduced for Neoverse-V2 and was added to V3 and V3AE.
Use NeoverseZeroMove for Neoverse-V1, N2, N3 in the same way, including these instructions:
MOV Xd|Wd, #0|XZR|WZR

For all Neoverse targets, the following instructions are also decoded as not utilizing the scheduling and execution resources of the machine:
MOV Wd,Wn
MOV Xd,Xn

For Neoverse-N3 only, these instructions also have zero latency
FMOV Dd, Dn
FMOV Sd, Sn

Change-Id: Ibbc0ba1da02dd4bf5ca28b33164d8fa4e93958d6
---
 .../Target/AArch64/AArch64SchedNeoverseN3.td  | 14 +++--
 .../Target/AArch64/AArch64SchedNeoverseV1.td  |  7 +--
 .../AArch64/AArch64SchedPredNeoverse.td       | 19 ++++++-
 .../AArch64/Neoverse/N3-neon-instructions.s   | 14 ++---
 .../AArch64/Neoverse/N3-sve-instructions.s    | 54 +++++++++----------
 5 files changed, 64 insertions(+), 44 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseN3.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseN3.td
index ae24fc1e35a89..1f78ebb57da5e 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseN3.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseN3.td
@@ -565,6 +565,10 @@ def N3Write_0or2c_1V : SchedWriteVariant<[
                       SchedVar<NeoverseZeroMove, [N3Write_0c]>,
                       SchedVar<NoSchedPred,      [N3Write_2c_1V]>]>;
 
+def N3Write_0or2c_1M : SchedWriteVariant<[
+                      SchedVar<NeoverseAllElement, [N3Write_0c]>,
+                      SchedVar<NoSchedPred,        [N3Write_2c_1M]>]>;
+
 def N3Write_0or3c_1M0 : SchedWriteVariant<[
                       SchedVar<NeoverseZeroMove, [N3Write_0c]>,
                       SchedVar<NoSchedPred,      [N3Write_3c_1M0]>]>;
@@ -981,6 +985,8 @@ def : InstRW<[WriteAdr, N3Write_2c_1L01_1V_1I], (instregex "^STP[SDQ](post|pre)$
 // ASIMD compare
 // ASIMD logical
 // ASIMD max/min, basic and pair-wise
+def : InstRW<[N3Write_0or2c_1V], (instregex "^ORRv16i8", "^ORRv8i8")>;
+
 def : SchedAlias<WriteVd, N3Write_2c_1V>;
 def : SchedAlias<WriteVq, N3Write_2c_1V>;
 
@@ -1584,10 +1590,11 @@ def : InstRW<[N3Write_2c_1M], (instregex "^REV_PP_[BHSD]")>;
 def : InstRW<[N3Write_1c_1M], (instrs SEL_PPPP)>;
 
 // Predicate set
-def : InstRW<[N3Write_2c_1M], (instregex "^PFALSE", "^PTRUE_[BHSD]")>;
+def : InstRW<[N3Write_0c], (instregex "^PFALSE")>;
+def : InstRW<[N3Write_0or2c_1M], (instregex "^PTRUE_[BHSD]")>;
 
 // Predicate set/initialize, set flags
-def : InstRW<[N3Write_2c_1M], (instregex "^PTRUES_[BHSD]")>;
+def : InstRW<[N3Write_0or2c_1M], (instregex "^PTRUES_[BHSD]")>;
 
 // Predicate find first/next
 def : InstRW<[N3Write_2c_1M], (instregex "^PFIRST_B$", "^PNEXT_[BHSD]$")>;
@@ -1810,10 +1817,11 @@ def : InstRW<[N3Write_5c_1M0_1V], (instregex "^INDEX_(IR|RI|RR)_D$")>;
 // Logical
 def : InstRW<[N3Write_2c_1V],
              (instregex "^(AND|EOR|ORR)_ZI",
-                        "^(AND|BIC|EOR|ORR)_ZZZ",
+                        "^(AND|BIC|EOR)_ZZZ",
                         "^EOR(BT|TB)_ZZZ_[BHSD]",
                         "^(AND|BIC|EOR|NOT|ORR)_(ZPmZ|ZPZZ)_[BHSD]",
                         "^NOT_ZPmZ_[BHSD]")>;
+def : InstRW<[N3Write_0or2c_1V], (instregex "^ORR_ZZZ")>;
 
 // Max/min, basic and pairwise
 def : InstRW<[N3Write_2c_1V], (instregex "^[SU](MAX|MIN)_ZI_[BHSD]",
diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td
index ac3b9a3d40192..a880c1a034598 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td
@@ -479,10 +479,6 @@ def V1Write_0or1c_1I : SchedWriteVariant<[
                       SchedVar<NeoverseZeroMove, [V1Write_0c_0Z]>,
                       SchedVar<NoSchedPred,      [V1Write_1c_1I]>]>;
 
-def V1Write_0or2c_1V : SchedWriteVariant<[
-                      SchedVar<NeoverseZeroMove, [V1Write_0c_0Z]>,
-                      SchedVar<NoSchedPred,      [V1Write_2c_1V]>]>;
-
 def V1Write_0or3c_1M0 : SchedWriteVariant<[
                       SchedVar<NeoverseZeroMove, [V1Write_0c_0Z]>,
                       SchedVar<NoSchedPred,      [V1Write_3c_1M0]>]>;
@@ -822,7 +818,7 @@ def : SchedAlias<WriteFImm, V1Write_2c_1V>;
 def : InstRW<[V1Write_2c_1V], (instrs FMOVHr, FMOVSr, FMOVDr)>;
 
 // FP transfer, from gen to low half of vec reg
-def : InstRW<[V1Write_0or3c_1M0], (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr)>;
+def : InstRW<[V1Write_3c_1M0], (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr)>;
 
 // FP transfer, from gen to high half of vec reg
 def : InstRW<[V1Write_5c_1M0_1V], (instrs FMOVXDHighr)>;
@@ -1139,7 +1135,6 @@ def : InstRW<[V1Write_3c_1V02], (instrs BFCVT)>;
 // ASIMD transpose
 // ASIMD unzip/zip
 // Covered by "SchedAlias (WriteV[dq]...)" above
-def : InstRW<[V1Write_0or2c_1V], (instrs MOVID, MOVIv2d_ns)>;
 
 // ASIMD duplicate, gen reg
 def : InstRW<[V1Write_3c_1M0],
diff --git a/llvm/lib/Target/AArch64/AArch64SchedPredNeoverse.td b/llvm/lib/Target/AArch64/AArch64SchedPredNeoverse.td
index 33b76a4f65f05..2d330de16280b 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedPredNeoverse.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedPredNeoverse.td
@@ -80,5 +80,22 @@ def NeoverseZeroMove : MCSchedPredicate<
                            // MOVI Dd, #0
                            // MOVI Vd.2D, #0
                            CheckAll<[CheckOpcode<[MOVID, MOVIv2d_ns]>,
-                                     CheckImmOperand<1, 0>]>
+                                     CheckImmOperand<1, 0>]>,
+                           // MOV Zd, Zn
+                           CheckAll<[CheckOpcode<[ORR_ZZZ]>,
+                                     CheckSameRegOperand<1, 2>]>,
+                           // MOV Vd, Vn
+                           CheckAll<[CheckOpcode<[ORRv16i8, ORRv8i8]>,
+                                     CheckSameRegOperand<1, 2>]>,
+                         ]>>;
+
+def NeoverseAllElement : MCSchedPredicate<
+                         CheckAny<[
+                           // PTRUE Pd, ALL
+                           // PTRUES Pd, ALL
+                           CheckAll<[CheckOpcode<[
+                                        PTRUE_B, PTRUE_H, PTRUE_S, PTRUE_D,
+                                        PTRUES_B, PTRUES_H, PTRUES_S, PTRUES_D]>,
+                                     CheckIsImmOperand<1>,
+                                     CheckImmOperand<1, 31>]>,
                          ]>>;
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/N3-neon-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/N3-neon-instructions.s
index dddaca34f68dd..9ef4dd9877a14 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/N3-neon-instructions.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/N3-neon-instructions.s
@@ -1445,8 +1445,8 @@ zip2	v0.8h, v0.8h, v0.8h
 # CHECK-NEXT:  2      5     1.00                        mov	v0.h[1], w8
 # CHECK-NEXT:  2      5     1.00                        mov	v0.s[2], w8
 # CHECK-NEXT:  2      5     1.00                        mov	v0.d[1], x8
-# CHECK-NEXT:  1      2     0.50                        mov	v0.16b, v0.16b
-# CHECK-NEXT:  1      2     0.50                        mov	v0.8b, v0.8b
+# CHECK-NEXT:  1      0     0.20                        mov	v0.16b, v0.16b
+# CHECK-NEXT:  1      0     0.20                        mov	v0.8b, v0.8b
 # CHECK-NEXT:  1      2     0.50                        movi	d15, #0xff00ff00ff00ff
 # CHECK-NEXT:  1      2     0.50                        movi	v0.16b, #31
 # CHECK-NEXT:  1      2     0.50                        movi	v0.2d, #0xff0000ff0000ffff
@@ -1467,7 +1467,7 @@ zip2	v0.8h, v0.8h, v0.8h
 # CHECK-NEXT:  1      2     0.50                        mvn	v0.16b, v0.16b
 # CHECK-NEXT:  1      2     0.50                        mvn	v0.8b, v0.8b
 # CHECK-NEXT:  1      2     0.50                        orn	v0.16b, v0.16b, v0.16b
-# CHECK-NEXT:  1      2     0.50                        mov	v0.16b, v0.16b
+# CHECK-NEXT:  1      0     0.20                        mov	v0.16b, v0.16b
 # CHECK-NEXT:  1      2     0.50                        orr	v0.8h, #31
 # CHECK-NEXT:  1      2     1.00                        pmul	v0.16b, v0.16b, v0.16b
 # CHECK-NEXT:  1      2     1.00                        pmul	v0.8b, v0.8b, v0.8b
@@ -2163,7 +2163,7 @@ zip2	v0.8h, v0.8h, v0.8h
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0.0]  [0.1]  [1.0]  [1.1]  [2]    [3.0]  [3.1]  [4]    [5]    [6.0]  [6.1]  [7]    [8]
-# CHECK-NEXT:  -      -      -      -     33.00  51.50  51.50  18.75  7.75   7.75   7.75   649.00 584.00
+# CHECK-NEXT:  -      -      -      -     33.00  51.50  51.50  18.75  7.75   7.75   7.75   647.50 582.50
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0.0]  [0.1]  [1.0]  [1.1]  [2]    [3.0]  [3.1]  [4]    [5]    [6.0]  [6.1]  [7]    [8]    Instructions:
@@ -2534,8 +2534,8 @@ zip2	v0.8h, v0.8h, v0.8h
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -     0.50   0.50   mov	v0.h[1], w8
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -     0.50   0.50   mov	v0.s[2], w8
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -     0.50   0.50   mov	v0.d[1], x8
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     0.50   0.50   mov	v0.16b, v0.16b
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     0.50   0.50   mov	v0.8b, v0.8b
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -     mov	v0.16b, v0.16b
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -     mov	v0.8b, v0.8b
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     0.50   0.50   movi	d15, #0xff00ff00ff00ff
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     0.50   0.50   movi	v0.16b, #31
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     0.50   0.50   movi	v0.2d, #0xff0000ff0000ffff
@@ -2556,7 +2556,7 @@ zip2	v0.8h, v0.8h, v0.8h
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     0.50   0.50   mvn	v0.16b, v0.16b
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     0.50   0.50   mvn	v0.8b, v0.8b
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     0.50   0.50   orn	v0.16b, v0.16b, v0.16b
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     0.50   0.50   mov	v0.16b, v0.16b
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -     mov	v0.16b, v0.16b
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     0.50   0.50   orr	v0.8h, #31
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     1.00    -     pmul	v0.16b, v0.16b, v0.16b
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     1.00    -     pmul	v0.8b, v0.8b, v0.8b
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/N3-sve-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/N3-sve-instructions.s
index 8977802be6bfc..3bc16e693e33f 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/N3-sve-instructions.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/N3-sve-instructions.s
@@ -4979,7 +4979,7 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      2     0.50                        mov	z0.d, p0/m, d0
 # CHECK-NEXT:  2      5     1.00                        mov	z0.d, p0/m, x0
 # CHECK-NEXT:  1      3     1.00                        mov	z0.d, x0
-# CHECK-NEXT:  1      2     0.50                        mov	z0.d, z0.d
+# CHECK-NEXT:  1      0     0.20                        mov	z0.d, z0.d
 # CHECK-NEXT:  1      2     0.50                        mov	z0.h, #-256
 # CHECK-NEXT:  1      2     0.50                        mov	z0.h, #-32768
 # CHECK-NEXT:  1      2     0.50                        mov	z0.h, #0
@@ -5039,7 +5039,7 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      2     0.50                        movprfx	z31.d, p7/z, z6.d
 # CHECK-NEXT:  2      5     1.00                        mov	z31.d, p7/m, sp
 # CHECK-NEXT:  1      3     1.00                        mov	z31.d, sp
-# CHECK-NEXT:  1      2     0.50                        mov	z31.d, z0.d
+# CHECK-NEXT:  1      0     0.20                        mov	z31.d, z0.d
 # CHECK-NEXT:  1      2     0.50                        mov	z31.d, z31.d[7]
 # CHECK-NEXT:  1      2     0.50                        mov	z31.h, p15/m, z31.h
 # CHECK-NEXT:  1      2     0.50                        mov	z31.h, p7/m, h31
@@ -5150,7 +5150,7 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  2      5     1.00                        orv	d0, p7, z31.d
 # CHECK-NEXT:  2      5     1.00                        orv	h0, p7, z31.h
 # CHECK-NEXT:  2      5     1.00                        orv	s0, p7, z31.s
-# CHECK-NEXT:  1      2     0.50                        pfalse	p15.b
+# CHECK-NEXT:  1      0     0.20                        pfalse	p15.b
 # CHECK-NEXT:  1      2     0.50                        pfirst	p0.b, p15, p0.b
 # CHECK-NEXT:  1      2     0.50                        pfirst	p15.b, p15, p15.b
 # CHECK-NEXT:  1      2     1.00                        pmul	z0.b, z1.b, z2.b
@@ -5280,11 +5280,11 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      2     0.50                        ptrue	p0.d, pow2
 # CHECK-NEXT:  1      2     0.50                        ptrue	p0.h, pow2
 # CHECK-NEXT:  1      2     0.50                        ptrue	p0.s, pow2
-# CHECK-NEXT:  1      2     0.50                        ptrue	p15.b
-# CHECK-NEXT:  1      2     0.50                        ptrue	p15.d
-# CHECK-NEXT:  1      2     0.50                        ptrue	p15.h
-# CHECK-NEXT:  1      2     0.50                        ptrue	p15.s
-# CHECK-NEXT:  1      2     0.50                        ptrue	p7.s
+# CHECK-NEXT:  1      0     0.20                        ptrue	p15.b
+# CHECK-NEXT:  1      0     0.20                        ptrue	p15.d
+# CHECK-NEXT:  1      0     0.20                        ptrue	p15.h
+# CHECK-NEXT:  1      0     0.20                        ptrue	p15.s
+# CHECK-NEXT:  1      0     0.20                        ptrue	p7.s
 # CHECK-NEXT:  1      2     0.50                        ptrue	p7.s, #14
 # CHECK-NEXT:  1      2     0.50                        ptrue	p7.s, #15
 # CHECK-NEXT:  1      2     0.50                        ptrue	p7.s, #16
@@ -5319,11 +5319,11 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  1      2     0.50                        ptrues	p0.d, pow2
 # CHECK-NEXT:  1      2     0.50                        ptrues	p0.h, pow2
 # CHECK-NEXT:  1      2     0.50                        ptrues	p0.s, pow2
-# CHECK-NEXT:  1      2     0.50                        ptrues	p15.b
-# CHECK-NEXT:  1      2     0.50                        ptrues	p15.d
-# CHECK-NEXT:  1      2     0.50                        ptrues	p15.h
-# CHECK-NEXT:  1      2     0.50                        ptrues	p15.s
-# CHECK-NEXT:  1      2     0.50                        ptrues	p7.s
+# CHECK-NEXT:  1      0     0.20                        ptrues	p15.b
+# CHECK-NEXT:  1      0     0.20                        ptrues	p15.d
+# CHECK-NEXT:  1      0     0.20                        ptrues	p15.h
+# CHECK-NEXT:  1      0     0.20                        ptrues	p15.s
+# CHECK-NEXT:  1      0     0.20                        ptrues	p7.s
 # CHECK-NEXT:  1      2     0.50                        ptrues	p7.s, #14
 # CHECK-NEXT:  1      2     0.50                        ptrues	p7.s, #15
 # CHECK-NEXT:  1      2     0.50                        ptrues	p7.s, #16
@@ -6847,7 +6847,7 @@ zip2	z31.s, z31.s, z31.s
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0.0]  [0.1]  [1.0]  [1.1]  [2]    [3.0]  [3.1]  [4]    [5]    [6.0]  [6.1]  [7]    [8]
-# CHECK-NEXT:  -      -      -      -     332.67 481.67 481.67 298.00 230.00 88.50  88.50  1558.00 1401.00
+# CHECK-NEXT:  -      -      -      -     332.67 481.67 481.67 292.50 224.50 88.50  88.50  1557.00 1400.00
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0.0]  [0.1]  [1.0]  [1.1]  [2]    [3.0]  [3.1]  [4]    [5]    [6.0]  [6.1]  [7]    [8]    Instructions:
@@ -8410,7 +8410,7 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     0.50   0.50   mov	z0.d, p0/m, d0
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -     0.50   0.50   mov	z0.d, p0/m, x0
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -      -     mov	z0.d, x0
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     0.50   0.50   mov	z0.d, z0.d
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -     mov	z0.d, z0.d
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     0.50   0.50   mov	z0.h, #-256
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     0.50   0.50   mov	z0.h, #-32768
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     0.50   0.50   mov	z0.h, #0
@@ -8470,7 +8470,7 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     0.50   0.50   movprfx	z31.d, p7/z, z6.d
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -     0.50   0.50   mov	z31.d, p7/m, sp
 # CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -      -      -      -     mov	z31.d, sp
-# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     0.50   0.50   mov	z31.d, z0.d
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -     mov	z31.d, z0.d
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     0.50   0.50   mov	z31.d, z31.d[7]
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     0.50   0.50   mov	z31.h, p15/m, z31.h
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     0.50   0.50   mov	z31.h, p7/m, h31
@@ -8581,7 +8581,7 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     0.50   1.50   orv	d0, p7, z31.d
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     0.50   1.50   orv	h0, p7, z31.h
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     0.50   1.50   orv	s0, p7, z31.s
-# CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50    -      -      -      -     pfalse	p15.b
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -     pfalse	p15.b
 # CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50    -      -      -      -     pfirst	p0.b, p15, p0.b
 # CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50    -      -      -      -     pfirst	p15.b, p15, p15.b
 # CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -     1.00    -     pmul	z0.b, z1.b, z2.b
@@ -8711,11 +8711,11 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50    -      -      -      -     ptrue	p0.d, pow2
 # CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50    -      -      -      -     ptrue	p0.h, pow2
 # CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50    -      -      -      -     ptrue	p0.s, pow2
-# CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50    -      -      -      -     ptrue	p15.b
-# CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50    -      -      -      -     ptrue	p15.d
-# CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50    -      -      -      -     ptrue	p15.h
-# CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50    -      -      -      -     ptrue	p15.s
-# CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50    -      -      -      -     ptrue	p7.s
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -     ptrue	p15.b
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -     ptrue	p15.d
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -     ptrue	p15.h
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -     ptrue	p15.s
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -     ptrue	p7.s
 # CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50    -      -      -      -     ptrue	p7.s, #14
 # CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50    -      -      -      -     ptrue	p7.s, #15
 # CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50    -      -      -      -     ptrue	p7.s, #16
@@ -8750,11 +8750,11 @@ zip2	z31.s, z31.s, z31.s
 # CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50    -      -      -      -     ptrues	p0.d, pow2
 # CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50    -      -      -      -     ptrues	p0.h, pow2
 # CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50    -      -      -      -     ptrues	p0.s, pow2
-# CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50    -      -      -      -     ptrues	p15.b
-# CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50    -      -      -      -     ptrues	p15.d
-# CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50    -      -      -      -     ptrues	p15.h
-# CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50    -      -      -      -     ptrues	p15.s
-# CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50    -      -      -      -     ptrues	p7.s
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -     ptrues	p15.b
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -     ptrues	p15.d
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -     ptrues	p15.h
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -     ptrues	p15.s
+# CHECK-NEXT:  -      -      -      -      -      -      -      -      -      -      -      -      -     ptrues	p7.s
 # CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50    -      -      -      -     ptrues	p7.s, #14
 # CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50    -      -      -      -     ptrues	p7.s, #15
 # CHECK-NEXT:  -      -      -      -      -      -      -     0.50   0.50    -      -      -      -     ptrues	p7.s, #16

>From 54fc6b557dfe7d1a3bdd77682af0641787dac9ce Mon Sep 17 00:00:00 2001
From: Simon Wallis <simon.wallis2 at arm.com>
Date: Tue, 18 Nov 2025 11:12:32 +0000
Subject: [PATCH 7/7] [AArch64] Update zero latency instructions in Neoverse
 scheduling tables

NeoverseZeroMove was introduced for Neoverse-V2 and was added to V3 and V3AE.
Use NeoverseZeroMove for Neoverse-V1, N2, N3 in the same way, including these instructions:
MOV Xd|Wd, #0|XZR|WZR

For all Neoverse targets, the following instructions are also decoded as not utilizing the scheduling and execution resources of the machine:
MOV Wd,Wn
MOV Xd,Xn

For Neoverse-N3 only, these instructions also have zero latency
FMOV Dd, Dn
FMOV Sd, Sn

Change-Id: I2d51b0ee6736d14f8212583f234431c555cc2574
---
 .../Target/AArch64/AArch64SchedNeoverseN3.td  | 10 +--
 .../Target/AArch64/AArch64SchedNeoverseV1.td  |  7 +-
 .../AArch64/AArch64SchedPredNeoverse.td       | 21 +++---
 .../AArch64/Neoverse/V1-basic-instructions.s  | 66 +++++++++----------
 .../llvm-mca/AArch64/Neoverse/V1-writeback.s  | 30 ++++-----
 5 files changed, 68 insertions(+), 66 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseN3.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseN3.td
index 1f78ebb57da5e..f195c83344c6a 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseN3.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseN3.td
@@ -566,8 +566,8 @@ def N3Write_0or2c_1V : SchedWriteVariant<[
                       SchedVar<NoSchedPred,      [N3Write_2c_1V]>]>;
 
 def N3Write_0or2c_1M : SchedWriteVariant<[
-                      SchedVar<NeoverseAllElement, [N3Write_0c]>,
-                      SchedVar<NoSchedPred,        [N3Write_2c_1M]>]>;
+                      SchedVar<NeoverseAllActivePredicate, [N3Write_0c]>,
+                      SchedVar<NoSchedPred,                [N3Write_2c_1M]>]>;
 
 def N3Write_0or3c_1M0 : SchedWriteVariant<[
                       SchedVar<NeoverseZeroMove, [N3Write_0c]>,
@@ -985,7 +985,7 @@ def : InstRW<[WriteAdr, N3Write_2c_1L01_1V_1I], (instregex "^STP[SDQ](post|pre)$
 // ASIMD compare
 // ASIMD logical
 // ASIMD max/min, basic and pair-wise
-def : InstRW<[N3Write_0or2c_1V], (instregex "^ORRv16i8", "^ORRv8i8")>;
+def : InstRW<[N3Write_0or2c_1V], (instrs ORRv16i8, ORRv8i8)>;
 
 def : SchedAlias<WriteVd, N3Write_2c_1V>;
 def : SchedAlias<WriteVq, N3Write_2c_1V>;
@@ -1590,7 +1590,7 @@ def : InstRW<[N3Write_2c_1M], (instregex "^REV_PP_[BHSD]")>;
 def : InstRW<[N3Write_1c_1M], (instrs SEL_PPPP)>;
 
 // Predicate set
-def : InstRW<[N3Write_0c], (instregex "^PFALSE")>;
+def : InstRW<[N3Write_0c], (instrs PFALSE)>;
 def : InstRW<[N3Write_0or2c_1M], (instregex "^PTRUE_[BHSD]")>;
 
 // Predicate set/initialize, set flags
@@ -1821,7 +1821,7 @@ def : InstRW<[N3Write_2c_1V],
                         "^EOR(BT|TB)_ZZZ_[BHSD]",
                         "^(AND|BIC|EOR|NOT|ORR)_(ZPmZ|ZPZZ)_[BHSD]",
                         "^NOT_ZPmZ_[BHSD]")>;
-def : InstRW<[N3Write_0or2c_1V], (instregex "^ORR_ZZZ")>;
+def : InstRW<[N3Write_0or2c_1V], (instrs ORR_ZZZ)>;
 
 // Max/min, basic and pairwise
 def : InstRW<[N3Write_2c_1V], (instregex "^[SU](MAX|MIN)_ZI_[BHSD]",
diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td
index a880c1a034598..bf65b31f88037 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td
@@ -91,9 +91,10 @@ def : WriteRes<WriteHint,    []> { let Latency = 1; }
 //===----------------------------------------------------------------------===//
 // Define generic 0 micro-op types
 
-let Latency = 0, NumMicroOps = 1 in
+let Latency = 0, NumMicroOps = 0 in
 def V1Write_0c_0Z : SchedWriteRes<[]>;
 
+def V1Write_0c : SchedWriteRes<[]> { let Latency = 0; }
 
 //===----------------------------------------------------------------------===//
 // Define generic 1 micro-op types
@@ -476,11 +477,11 @@ def V1Write_11c_9L01_9S_9V : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01,
 // Define predicate-controlled types
 
 def V1Write_0or1c_1I : SchedWriteVariant<[
-                      SchedVar<NeoverseZeroMove, [V1Write_0c_0Z]>,
+                      SchedVar<NeoverseZeroMove, [V1Write_0c]>,
                       SchedVar<NoSchedPred,      [V1Write_1c_1I]>]>;
 
 def V1Write_0or3c_1M0 : SchedWriteVariant<[
-                      SchedVar<NeoverseZeroMove, [V1Write_0c_0Z]>,
+                      SchedVar<NeoverseZeroMove, [V1Write_0c]>,
                       SchedVar<NoSchedPred,      [V1Write_3c_1M0]>]>;
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AArch64/AArch64SchedPredNeoverse.td b/llvm/lib/Target/AArch64/AArch64SchedPredNeoverse.td
index 2d330de16280b..f841e6072d2b4 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedPredNeoverse.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedPredNeoverse.td
@@ -89,13 +89,14 @@ def NeoverseZeroMove : MCSchedPredicate<
                                      CheckSameRegOperand<1, 2>]>,
                          ]>>;
 
-def NeoverseAllElement : MCSchedPredicate<
-                         CheckAny<[
-                           // PTRUE Pd, ALL
-                           // PTRUES Pd, ALL
-                           CheckAll<[CheckOpcode<[
-                                        PTRUE_B, PTRUE_H, PTRUE_S, PTRUE_D,
-                                        PTRUES_B, PTRUES_H, PTRUES_S, PTRUES_D]>,
-                                     CheckIsImmOperand<1>,
-                                     CheckImmOperand<1, 31>]>,
-                         ]>>;
+def NeoverseAllActivePredicate : MCSchedPredicate<
+                                   CheckAny<[
+                                     // PTRUE Pd, ALL
+                                     // PTRUES Pd, ALL
+                                     CheckAll<[
+                                       CheckOpcode<[
+                                         PTRUE_B, PTRUE_H, PTRUE_S, PTRUE_D,
+                                         PTRUES_B, PTRUES_H, PTRUES_S, PTRUES_D]>,
+                                       CheckIsImmOperand<1>,
+                                       CheckImmOperand<1, 31>]>,
+                                   ]>>;
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-basic-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-basic-instructions.s
index eaf128acc0195..787acbe91c057 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-basic-instructions.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-basic-instructions.s
@@ -2562,78 +2562,78 @@ drps
 # CHECK-NEXT:  2      7     0.33    *                   ldr	q17, [x23, w9, sxtw]
 # CHECK-NEXT:  3      2     0.50           *            str	q18, [x22, w10, sxtw]
 # CHECK-NEXT:  2      7     0.33    *                   ldr	q19, [x21, wzr, sxtw #4]
-# CHECK-NEXT:  2      4     0.33    *                   ldp	w3, w5, [sp]
+# CHECK-NEXT:  1      4     0.33    *                   ldp	w3, w5, [sp]
 # CHECK-NEXT:  2      1     0.50           *            stp	wzr, w9, [sp, #252]
-# CHECK-NEXT:  2      4     0.33    *                   ldp	w2, wzr, [sp, #-256]
-# CHECK-NEXT:  2      4     0.33    *                   ldp	w9, w10, [sp, #4]
-# CHECK-NEXT:  3      5     0.33    *                   ldpsw	x9, x10, [sp, #4]
-# CHECK-NEXT:  3      5     0.33    *                   ldpsw	x9, x10, [x2, #-256]
-# CHECK-NEXT:  3      5     0.33    *                   ldpsw	x20, x30, [sp, #252]
+# CHECK-NEXT:  1      4     0.33    *                   ldp	w2, wzr, [sp, #-256]
+# CHECK-NEXT:  1      4     0.33    *                   ldp	w9, w10, [sp, #4]
+# CHECK-NEXT:  2      5     0.33    *                   ldpsw	x9, x10, [sp, #4]
+# CHECK-NEXT:  2      5     0.33    *                   ldpsw	x9, x10, [x2, #-256]
+# CHECK-NEXT:  2      5     0.33    *                   ldpsw	x20, x30, [sp, #252]
 # CHECK-NEXT:  2      4     0.67    *                   ldp	x21, x29, [x2, #504]
 # CHECK-NEXT:  2      4     0.67    *                   ldp	x22, x23, [x3, #-512]
 # CHECK-NEXT:  2      4     0.67    *                   ldp	x24, x25, [x4, #8]
-# CHECK-NEXT:  2      6     0.33    *                   ldp	s29, s28, [sp, #252]
+# CHECK-NEXT:  1      6     0.33    *                   ldp	s29, s28, [sp, #252]
 # CHECK-NEXT:  2      2     0.50           *            stp	s27, s26, [sp, #-256]
-# CHECK-NEXT:  2      6     0.33    *                   ldp	s1, s2, [x3, #44]
+# CHECK-NEXT:  1      6     0.33    *                   ldp	s1, s2, [x3, #44]
 # CHECK-NEXT:  2      2     0.50           *            stp	d3, d5, [x9, #504]
 # CHECK-NEXT:  2      2     0.50           *            stp	d7, d11, [x10, #-512]
 # CHECK-NEXT:  2      1     0.50           *            stnp	x20, x16, [x8]
 # CHECK-NEXT:  2      1     0.50           *            stp	x3, x6, [x16]
-# CHECK-NEXT:  2      6     0.33    *                   ldp	d2, d3, [x30, #-8]
+# CHECK-NEXT:  1      6     0.33    *                   ldp	d2, d3, [x30, #-8]
 # CHECK-NEXT:  2      2     0.50           *            stp	q3, q5, [sp]
 # CHECK-NEXT:  2      2     0.50           *            stp	q17, q19, [sp, #1008]
 # CHECK-NEXT:  2      6     0.67    *                   ldp	q23, q29, [x1, #-1024]
-# CHECK-NEXT:  3      4     0.33    *                   ldp	w3, w5, [sp], #0
+# CHECK-NEXT:  2      4     0.33    *                   ldp	w3, w5, [sp], #0
 # CHECK-NEXT:  3      1     0.50           *            stp	wzr, w9, [sp], #252
-# CHECK-NEXT:  3      4     0.33    *                   ldp	w2, wzr, [sp], #-256
-# CHECK-NEXT:  3      4     0.33    *                   ldp	w9, w10, [sp], #4
-# CHECK-NEXT:  4      5     0.50    *                   ldpsw	x9, x10, [sp], #4
-# CHECK-NEXT:  4      5     0.50    *                   ldpsw	x9, x10, [x2], #-256
-# CHECK-NEXT:  4      5     0.50    *                   ldpsw	x20, x30, [sp], #252
+# CHECK-NEXT:  2      4     0.33    *                   ldp	w2, wzr, [sp], #-256
+# CHECK-NEXT:  2      4     0.33    *                   ldp	w9, w10, [sp], #4
+# CHECK-NEXT:  3      5     0.50    *                   ldpsw	x9, x10, [sp], #4
+# CHECK-NEXT:  3      5     0.50    *                   ldpsw	x9, x10, [x2], #-256
+# CHECK-NEXT:  3      5     0.50    *                   ldpsw	x20, x30, [sp], #252
 # CHECK-NEXT:  3      4     0.67    *                   ldp	x21, x29, [x2], #504
 # CHECK-NEXT:  3      4     0.67    *                   ldp	x22, x23, [x3], #-512
 # CHECK-NEXT:  3      4     0.67    *                   ldp	x24, x25, [x4], #8
-# CHECK-NEXT:  3      6     0.33    *                   ldp	s29, s28, [sp], #252
+# CHECK-NEXT:  2      6     0.33    *                   ldp	s29, s28, [sp], #252
 # CHECK-NEXT:  3      2     0.50           *            stp	s27, s26, [sp], #-256
-# CHECK-NEXT:  3      6     0.33    *                   ldp	s1, s2, [x3], #44
+# CHECK-NEXT:  2      6     0.33    *                   ldp	s1, s2, [x3], #44
 # CHECK-NEXT:  3      2     0.50           *            stp	d3, d5, [x9], #504
 # CHECK-NEXT:  3      2     0.50           *            stp	d7, d11, [x10], #-512
-# CHECK-NEXT:  3      6     0.33    *                   ldp	d2, d3, [x30], #-8
+# CHECK-NEXT:  2      6     0.33    *                   ldp	d2, d3, [x30], #-8
 # CHECK-NEXT:  4      2     1.00           *            stp	q3, q5, [sp], #0
 # CHECK-NEXT:  4      2     1.00           *            stp	q17, q19, [sp], #1008
 # CHECK-NEXT:  3      6     0.67    *                   ldp	q23, q29, [x1], #-1024
-# CHECK-NEXT:  3      4     0.33    *                   ldp	w3, w5, [sp, #0]!
+# CHECK-NEXT:  2      4     0.33    *                   ldp	w3, w5, [sp, #0]!
 # CHECK-NEXT:  3      1     0.50           *            stp	wzr, w9, [sp, #252]!
-# CHECK-NEXT:  3      4     0.33    *                   ldp	w2, wzr, [sp, #-256]!
-# CHECK-NEXT:  3      4     0.33    *                   ldp	w9, w10, [sp, #4]!
-# CHECK-NEXT:  4      5     0.50    *                   ldpsw	x9, x10, [sp, #4]!
-# CHECK-NEXT:  4      5     0.50    *                   ldpsw	x9, x10, [x2, #-256]!
-# CHECK-NEXT:  4      5     0.50    *                   ldpsw	x20, x30, [sp, #252]!
+# CHECK-NEXT:  2      4     0.33    *                   ldp	w2, wzr, [sp, #-256]!
+# CHECK-NEXT:  2      4     0.33    *                   ldp	w9, w10, [sp, #4]!
+# CHECK-NEXT:  3      5     0.50    *                   ldpsw	x9, x10, [sp, #4]!
+# CHECK-NEXT:  3      5     0.50    *                   ldpsw	x9, x10, [x2, #-256]!
+# CHECK-NEXT:  3      5     0.50    *                   ldpsw	x20, x30, [sp, #252]!
 # CHECK-NEXT:  3      4     0.67    *                   ldp	x21, x29, [x2, #504]!
 # CHECK-NEXT:  3      4     0.67    *                   ldp	x22, x23, [x3, #-512]!
 # CHECK-NEXT:  3      4     0.67    *                   ldp	x24, x25, [x4, #8]!
-# CHECK-NEXT:  3      6     0.33    *                   ldp	s29, s28, [sp, #252]!
+# CHECK-NEXT:  2      6     0.33    *                   ldp	s29, s28, [sp, #252]!
 # CHECK-NEXT:  3      2     0.50           *            stp	s27, s26, [sp, #-256]!
-# CHECK-NEXT:  3      6     0.33    *                   ldp	s1, s2, [x3, #44]!
+# CHECK-NEXT:  2      6     0.33    *                   ldp	s1, s2, [x3, #44]!
 # CHECK-NEXT:  3      2     0.50           *            stp	d3, d5, [x9, #504]!
 # CHECK-NEXT:  3      2     0.50           *            stp	d7, d11, [x10, #-512]!
-# CHECK-NEXT:  3      6     0.33    *                   ldp	d2, d3, [x30, #-8]!
+# CHECK-NEXT:  2      6     0.33    *                   ldp	d2, d3, [x30, #-8]!
 # CHECK-NEXT:  4      2     1.00           *            stp	q3, q5, [sp, #0]!
 # CHECK-NEXT:  4      2     1.00           *            stp	q17, q19, [sp, #1008]!
 # CHECK-NEXT:  3      6     0.67    *                   ldp	q23, q29, [x1, #-1024]!
-# CHECK-NEXT:  2      4     0.33    *                   ldnp	w3, w5, [sp]
+# CHECK-NEXT:  1      4     0.33    *                   ldnp	w3, w5, [sp]
 # CHECK-NEXT:  2      1     0.50           *            stnp	wzr, w9, [sp, #252]
-# CHECK-NEXT:  2      4     0.33    *                   ldnp	w2, wzr, [sp, #-256]
-# CHECK-NEXT:  2      4     0.33    *                   ldnp	w9, w10, [sp, #4]
+# CHECK-NEXT:  1      4     0.33    *                   ldnp	w2, wzr, [sp, #-256]
+# CHECK-NEXT:  1      4     0.33    *                   ldnp	w9, w10, [sp, #4]
 # CHECK-NEXT:  2      4     0.67    *                   ldnp	x21, x29, [x2, #504]
 # CHECK-NEXT:  2      4     0.67    *                   ldnp	x22, x23, [x3, #-512]
 # CHECK-NEXT:  2      4     0.67    *                   ldnp	x24, x25, [x4, #8]
-# CHECK-NEXT:  2      6     0.33    *                   ldnp	s29, s28, [sp, #252]
+# CHECK-NEXT:  1      6     0.33    *                   ldnp	s29, s28, [sp, #252]
 # CHECK-NEXT:  2      2     0.50           *            stnp	s27, s26, [sp, #-256]
-# CHECK-NEXT:  2      6     0.33    *                   ldnp	s1, s2, [x3, #44]
+# CHECK-NEXT:  1      6     0.33    *                   ldnp	s1, s2, [x3, #44]
 # CHECK-NEXT:  2      2     0.50           *            stnp	d3, d5, [x9, #504]
 # CHECK-NEXT:  2      2     0.50           *            stnp	d7, d11, [x10, #-512]
-# CHECK-NEXT:  2      6     0.33    *                   ldnp	d2, d3, [x30, #-8]
+# CHECK-NEXT:  1      6     0.33    *                   ldnp	d2, d3, [x30, #-8]
 # CHECK-NEXT:  2      2     0.50           *            stnp	q3, q5, [sp]
 # CHECK-NEXT:  2      2     0.50           *            stnp	q17, q19, [sp, #1008]
 # CHECK-NEXT:  2      6     0.67    *                   ldnp	q23, q29, [x1, #-1024]
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-writeback.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-writeback.s
index fee62ce565b43..5efe3d0bbf14f 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-writeback.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-writeback.s
@@ -3140,13 +3140,13 @@ add x0, x27, 1
 
 # CHECK:      Iterations:        100
 # CHECK-NEXT: Instructions:      1000
-# CHECK-NEXT: Total Cycles:      708
-# CHECK-NEXT: Total uOps:        3500
+# CHECK-NEXT: Total Cycles:      609
+# CHECK-NEXT: Total uOps:        3300
 
 # CHECK:      Dispatch Width:    8
-# CHECK-NEXT: uOps Per Cycle:    4.94
-# CHECK-NEXT: IPC:               1.41
-# CHECK-NEXT: Block RThroughput: 4.4
+# CHECK-NEXT: uOps Per Cycle:    5.42
+# CHECK-NEXT: IPC:               1.64
+# CHECK-NEXT: Block RThroughput: 4.1
 
 # CHECK:      Timeline view:
 # CHECK-NEXT:                     01234
@@ -3161,7 +3161,7 @@ add x0, x27, 1
 # CHECK-NEXT: [0,6]     .    DeeeeeeE-R   ldp	s1, s2, [x27], #248
 # CHECK-NEXT: [0,7]     .    D=eE-----R   add	x0, x27, #1
 # CHECK-NEXT: [0,8]     .    D=eeeeeeER   ldp	d1, d2, [x27], #496
-# CHECK-NEXT: [0,9]     .    .D=eE----R   add	x0, x27, #1
+# CHECK-NEXT: [0,9]     .    D==eE----R   add	x0, x27, #1
 
 # CHECK:      Average Wait times (based on the timeline view):
 # CHECK-NEXT: [0]: Executions
@@ -3179,18 +3179,18 @@ add x0, x27, 1
 # CHECK-NEXT: 6.     1     1.0    0.0    1.0       ldp	s1, s2, [x27], #248
 # CHECK-NEXT: 7.     1     2.0    0.0    5.0       add	x0, x27, #1
 # CHECK-NEXT: 8.     1     2.0    0.0    0.0       ldp	d1, d2, [x27], #496
-# CHECK-NEXT: 9.     1     2.0    0.0    4.0       add	x0, x27, #1
-# CHECK-NEXT:        1     1.3    0.3    2.8       <total>
+# CHECK-NEXT: 9.     1     3.0    0.0    4.0       add	x0, x27, #1
+# CHECK-NEXT:        1     1.4    0.3    2.8       <total>
 
 # CHECK:      [44] Code Region - G45
 
 # CHECK:      Iterations:        100
 # CHECK-NEXT: Instructions:      1000
 # CHECK-NEXT: Total Cycles:      507
-# CHECK-NEXT: Total uOps:        2000
+# CHECK-NEXT: Total uOps:        1700
 
 # CHECK:      Dispatch Width:    8
-# CHECK-NEXT: uOps Per Cycle:    3.94
+# CHECK-NEXT: uOps Per Cycle:    3.35
 # CHECK-NEXT: IPC:               1.97
 # CHECK-NEXT: Block RThroughput: 2.5
 
@@ -3233,10 +3233,10 @@ add x0, x27, 1
 # CHECK:      Iterations:        100
 # CHECK-NEXT: Instructions:      1000
 # CHECK-NEXT: Total Cycles:      507
-# CHECK-NEXT: Total uOps:        2200
+# CHECK-NEXT: Total uOps:        1900
 
 # CHECK:      Dispatch Width:    8
-# CHECK-NEXT: uOps Per Cycle:    4.34
+# CHECK-NEXT: uOps Per Cycle:    3.75
 # CHECK-NEXT: IPC:               1.97
 # CHECK-NEXT: Block RThroughput: 3.0
 
@@ -3251,7 +3251,7 @@ add x0, x27, 1
 # CHECK-NEXT: [0,4]     .D=eeeeER ..   ldp	x1, x2, [x27, #496]!
 # CHECK-NEXT: [0,5]     .D==eE--R ..   add	x0, x27, #1
 # CHECK-NEXT: [0,6]     .D==eeeeeER.   ldpsw	x1, x2, [x27], #248
-# CHECK-NEXT: [0,7]     . D==eE---R.   add	x0, x27, #1
+# CHECK-NEXT: [0,7]     .D===eE---R.   add	x0, x27, #1
 # CHECK-NEXT: [0,8]     . D==eeeeeER   ldpsw	x1, x2, [x27, #248]!
 # CHECK-NEXT: [0,9]     . D===eE---R   add	x0, x27, #1
 
@@ -3269,10 +3269,10 @@ add x0, x27, 1
 # CHECK-NEXT: 4.     1     2.0    0.0    0.0       ldp	x1, x2, [x27, #496]!
 # CHECK-NEXT: 5.     1     3.0    0.0    2.0       add	x0, x27, #1
 # CHECK-NEXT: 6.     1     3.0    0.0    0.0       ldpsw	x1, x2, [x27], #248
-# CHECK-NEXT: 7.     1     3.0    0.0    3.0       add	x0, x27, #1
+# CHECK-NEXT: 7.     1     4.0    0.0    3.0       add	x0, x27, #1
 # CHECK-NEXT: 8.     1     3.0    0.0    0.0       ldpsw	x1, x2, [x27, #248]!
 # CHECK-NEXT: 9.     1     4.0    0.0    3.0       add	x0, x27, #1
-# CHECK-NEXT:        1     2.6    0.1    1.2       <total>
+# CHECK-NEXT:        1     2.7    0.1    1.2       <total>
 
 # CHECK:      [46] Code Region - G47
 



More information about the llvm-commits mailing list