[llvm] b5d8a03 - [AArch64] Add missing ASIMD FP convert instructions to scheduling model (#115146)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 6 07:41:49 PST 2024
Author: Rin Dobrescu
Date: 2024-11-06T15:41:45Z
New Revision: b5d8a03de453b79ca3c0bf841931bcaacf2fc830
URL: https://github.com/llvm/llvm-project/commit/b5d8a03de453b79ca3c0bf841931bcaacf2fc830
DIFF: https://github.com/llvm/llvm-project/commit/b5d8a03de453b79ca3c0bf841931bcaacf2fc830.diff
LOG: [AArch64] Add missing ASIMD FP convert instructions to scheduling model (#115146)
Some ASIMD FP convert instructions have incorrect scheduling
information. These instructions currently have latency 2, throughput 4
and utilise pipeline V. This patch corrects the scheduling models to
match the relevant Software Optimization Guide.
The V1 and V2 Software Optimization Guide show that ASIMD FP convert
instructions should all utilise pipelines V02. Their execution latency
and throughput should also differ depending on form. See section 3.17
"ASIMD floating-point instructions" in the Neoverse-V1 and Neoverse-V2
Software Optimization Guide for characteristics of instruction
performance.
Reference:
- V1 SOG: https://developer.arm.com/documentation/109897/latest/
- V2 SOG: https://developer.arm.com/documentation/109898/latest/
Added:
Modified:
llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td
llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-neon-instructions.s
llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-neon-instructions.s
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td
index fb4d2f3d7bcd3a..3fd9dd3f2fff92 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td
@@ -1015,16 +1015,34 @@ def : InstRW<[V1Write_3c_1V02], (instregex "^FCVTNv[24]i32$",
"^FCVTXN(v[24]f32|v1i64)$")>;
// ASIMD FP convert, other, D-form F32 and Q-form F64
-def : InstRW<[V1Write_3c_1V02], (instregex "^[FSU]CVT[AMNPZ][SU]v2f(32|64)$",
- "^[SU]CVTFv2f(32|64)$")>;
+def : InstRW<[V1Write_3c_1V02], (instregex "^FCVT[AMNPZ][SU]v2f(32|64)$",
+ "^FCVT[AMNPZ][SU]v2i(32|64)_shift$",
+ "^FCVT[AMNPZ][SU]v1i64$",
+ "^FCVTZ[SU]d$",
+ "^[SU]CVTFv2f(32|64)$",
+ "^[SU]CVTFv2i(32|64)_shift$",
+ "^[SU]CVTFv1i64$",
+ "^[SU]CVTFd$")>;
// ASIMD FP convert, other, D-form F16 and Q-form F32
-def : InstRW<[V1Write_4c_2V02], (instregex "^[FSU]CVT[AMNPZ][SU]v4f(16|32)$",
- "^[SU]CVTFv4f(16|32)$")>;
+def : InstRW<[V1Write_4c_2V02], (instregex "^FCVT[AMNPZ][SU]v4f(16|32)$",
+ "^FCVT[AMNPZ][SU]v4i(16|32)_shift$",
+ "^FCVT[AMNPZ][SU]v1i32$",
+ "^FCVTZ[SU]s$",
+ "^[SU]CVTFv4f(16|32)$",
+ "^[SU]CVTFv4i(16|32)_shift$",
+ "^[SU]CVTFv1i32$",
+ "^[SU]CVTFs$")>;
// ASIMD FP convert, other, Q-form F16
-def : InstRW<[V1Write_6c_4V02], (instregex "^[FSU]CVT[AMNPZ][SU]v8f16$",
- "^[SU]CVTFv8f16$")>;
+def : InstRW<[V1Write_6c_4V02], (instregex "^FCVT[AMNPZ][SU]v8f16$",
+ "^FCVT[AMNPZ][SU]v8i16_shift$",
+ "^FCVT[AMNPZ][SU]v1f16$",
+ "^FCVTZ[SU]h$",
+ "^[SU]CVTFv8f16$",
+ "^[SU]CVTFv8i16_shift$",
+ "^[SU]CVTFv1i16$",
+ "^[SU]CVTFh$")>;
// ASIMD FP divide, D-form, F16
// ASIMD FP square root, D-form, F16
diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
index f884d20ab7eda9..2de5f59834d787 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
@@ -1567,25 +1567,31 @@ def : InstRW<[V2Write_3c_1V02], (instregex "^FCVTN(v2|v4)i32",
// ASIMD FP convert, other, D-form F32 and Q-form F64
def : InstRW<[V2Write_3c_1V02], (instregex "^FCVT[AMNPZ][SU]v2f(32|64)$",
+ "^FCVT[AMNPZ][SU]v2i(32|64)_shift$",
"^FCVT[AMNPZ][SU]v1i64$",
"^FCVTZ[SU]d$",
"^[SU]CVTFv2f(32|64)$",
+ "^[SU]CVTFv2i(32|64)_shift$",
"^[SU]CVTFv1i64$",
"^[SU]CVTFd$")>;
// ASIMD FP convert, other, D-form F16 and Q-form F32
def : InstRW<[V2Write_4c_2V02], (instregex "^FCVT[AMNPZ][SU]v4f(16|32)$",
+ "^FCVT[AMNPZ][SU]v4i(16|32)_shift$",
"^FCVT[AMNPZ][SU]v1i32$",
"^FCVTZ[SU]s$",
"^[SU]CVTFv4f(16|32)$",
+ "^[SU]CVTFv4i(16|32)_shift$",
"^[SU]CVTFv1i32$",
"^[SU]CVTFs$")>;
// ASIMD FP convert, other, Q-form F16
def : InstRW<[V2Write_6c_4V02], (instregex "^FCVT[AMNPZ][SU]v8f16$",
+ "^FCVT[AMNPZ][SU]v8i16_shift$",
"^FCVT[AMNPZ][SU]v1f16$",
"^FCVTZ[SU]h$",
"^[SU]CVTFv8f16$",
+ "^[SU]CVTFv8i16_shift$",
"^[SU]CVTFv1i16$",
"^[SU]CVTFh$")>;
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-neon-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-neon-instructions.s
index 65b73177c7b70a..5ad42d5ff0a131 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-neon-instructions.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-neon-instructions.s
@@ -1373,17 +1373,17 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: 1 2 0.25 fcmlt d20, d21, #0.0
# CHECK-NEXT: 1 2 0.25 fcmlt s10, s11, #0.0
# CHECK-NEXT: 1 2 0.25 fcmlt v0.4s, v0.4s, #0.0
-# CHECK-NEXT: 1 2 0.25 fcvtas d21, d14
-# CHECK-NEXT: 1 2 0.25 fcvtas s12, s13
-# CHECK-NEXT: 1 2 0.25 fcvtas h12, h13
+# CHECK-NEXT: 1 3 0.50 fcvtas d21, d14
+# CHECK-NEXT: 2 4 1.00 fcvtas s12, s13
+# CHECK-NEXT: 4 6 1.00 fcvtas h12, h13
# CHECK-NEXT: 1 3 0.50 fcvtas v0.2d, v0.2d
# CHECK-NEXT: 1 3 0.50 fcvtas v0.2s, v0.2s
# CHECK-NEXT: 2 4 1.00 fcvtas v0.4h, v0.4h
# CHECK-NEXT: 2 4 1.00 fcvtas v0.4s, v0.4s
# CHECK-NEXT: 4 6 1.00 fcvtas v0.8h, v0.8h
-# CHECK-NEXT: 1 2 0.25 fcvtau d21, d14
-# CHECK-NEXT: 1 2 0.25 fcvtau s12, s13
-# CHECK-NEXT: 1 2 0.25 fcvtau h12, h13
+# CHECK-NEXT: 1 3 0.50 fcvtau d21, d14
+# CHECK-NEXT: 2 4 1.00 fcvtau s12, s13
+# CHECK-NEXT: 4 6 1.00 fcvtau h12, h13
# CHECK-NEXT: 1 3 0.50 fcvtau v0.2d, v0.2d
# CHECK-NEXT: 1 3 0.50 fcvtau v0.2s, v0.2s
# CHECK-NEXT: 2 4 1.00 fcvtau v0.4h, v0.4h
@@ -1393,17 +1393,17 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: 2 4 1.00 fcvtl v0.4s, v0.4h
# CHECK-NEXT: 1 3 0.50 fcvtl2 v0.2d, v0.4s
# CHECK-NEXT: 2 4 1.00 fcvtl2 v0.4s, v0.8h
-# CHECK-NEXT: 1 2 0.25 fcvtms d21, d14
-# CHECK-NEXT: 1 2 0.25 fcvtms s22, s13
-# CHECK-NEXT: 1 2 0.25 fcvtms h22, h13
+# CHECK-NEXT: 1 3 0.50 fcvtms d21, d14
+# CHECK-NEXT: 2 4 1.00 fcvtms s22, s13
+# CHECK-NEXT: 4 6 1.00 fcvtms h22, h13
# CHECK-NEXT: 1 3 0.50 fcvtms v0.2d, v0.2d
# CHECK-NEXT: 1 3 0.50 fcvtms v0.2s, v0.2s
# CHECK-NEXT: 2 4 1.00 fcvtms v0.4h, v0.4h
# CHECK-NEXT: 2 4 1.00 fcvtms v0.4s, v0.4s
# CHECK-NEXT: 4 6 1.00 fcvtms v0.8h, v0.8h
-# CHECK-NEXT: 1 2 0.25 fcvtmu d21, d14
-# CHECK-NEXT: 1 2 0.25 fcvtmu s12, s13
-# CHECK-NEXT: 1 2 0.25 fcvtmu h12, h13
+# CHECK-NEXT: 1 3 0.50 fcvtmu d21, d14
+# CHECK-NEXT: 2 4 1.00 fcvtmu s12, s13
+# CHECK-NEXT: 4 6 1.00 fcvtmu h12, h13
# CHECK-NEXT: 1 3 0.50 fcvtmu v0.2d, v0.2d
# CHECK-NEXT: 1 3 0.50 fcvtmu v0.2s, v0.2s
# CHECK-NEXT: 2 4 1.00 fcvtmu v0.4h, v0.4h
@@ -1413,33 +1413,33 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: 2 4 1.00 fcvtn v0.4h, v0.4s
# CHECK-NEXT: 1 3 0.50 fcvtn2 v0.4s, v0.2d
# CHECK-NEXT: 2 4 1.00 fcvtn2 v0.8h, v0.4s
-# CHECK-NEXT: 1 2 0.25 fcvtns d21, d14
-# CHECK-NEXT: 1 2 0.25 fcvtns s22, s13
-# CHECK-NEXT: 1 2 0.25 fcvtns h22, h13
+# CHECK-NEXT: 1 3 0.50 fcvtns d21, d14
+# CHECK-NEXT: 2 4 1.00 fcvtns s22, s13
+# CHECK-NEXT: 4 6 1.00 fcvtns h22, h13
# CHECK-NEXT: 1 3 0.50 fcvtns v0.2d, v0.2d
# CHECK-NEXT: 1 3 0.50 fcvtns v0.2s, v0.2s
# CHECK-NEXT: 2 4 1.00 fcvtns v0.4h, v0.4h
# CHECK-NEXT: 2 4 1.00 fcvtns v0.4s, v0.4s
# CHECK-NEXT: 4 6 1.00 fcvtns v0.8h, v0.8h
-# CHECK-NEXT: 1 2 0.25 fcvtnu d21, d14
-# CHECK-NEXT: 1 2 0.25 fcvtnu s12, s13
-# CHECK-NEXT: 1 2 0.25 fcvtnu h12, h13
+# CHECK-NEXT: 1 3 0.50 fcvtnu d21, d14
+# CHECK-NEXT: 2 4 1.00 fcvtnu s12, s13
+# CHECK-NEXT: 4 6 1.00 fcvtnu h12, h13
# CHECK-NEXT: 1 3 0.50 fcvtnu v0.2d, v0.2d
# CHECK-NEXT: 1 3 0.50 fcvtnu v0.2s, v0.2s
# CHECK-NEXT: 2 4 1.00 fcvtnu v0.4h, v0.4h
# CHECK-NEXT: 2 4 1.00 fcvtnu v0.4s, v0.4s
# CHECK-NEXT: 4 6 1.00 fcvtnu v0.8h, v0.8h
-# CHECK-NEXT: 1 2 0.25 fcvtps d21, d14
-# CHECK-NEXT: 1 2 0.25 fcvtps s22, s13
-# CHECK-NEXT: 1 2 0.25 fcvtps h22, h13
+# CHECK-NEXT: 1 3 0.50 fcvtps d21, d14
+# CHECK-NEXT: 2 4 1.00 fcvtps s22, s13
+# CHECK-NEXT: 4 6 1.00 fcvtps h22, h13
# CHECK-NEXT: 1 3 0.50 fcvtps v0.2d, v0.2d
# CHECK-NEXT: 1 3 0.50 fcvtps v0.2s, v0.2s
# CHECK-NEXT: 2 4 1.00 fcvtps v0.4h, v0.4h
# CHECK-NEXT: 2 4 1.00 fcvtps v0.4s, v0.4s
# CHECK-NEXT: 4 6 1.00 fcvtps v0.8h, v0.8h
-# CHECK-NEXT: 1 2 0.25 fcvtpu d21, d14
-# CHECK-NEXT: 1 2 0.25 fcvtpu s12, s13
-# CHECK-NEXT: 1 2 0.25 fcvtpu h12, h13
+# CHECK-NEXT: 1 3 0.50 fcvtpu d21, d14
+# CHECK-NEXT: 2 4 1.00 fcvtpu s12, s13
+# CHECK-NEXT: 4 6 1.00 fcvtpu h12, h13
# CHECK-NEXT: 1 3 0.50 fcvtpu v0.2d, v0.2d
# CHECK-NEXT: 1 3 0.50 fcvtpu v0.2s, v0.2s
# CHECK-NEXT: 2 4 1.00 fcvtpu v0.4h, v0.4h
@@ -1448,33 +1448,33 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: 1 3 0.50 fcvtxn s22, d13
# CHECK-NEXT: 1 3 0.50 fcvtxn v0.2s, v0.2d
# CHECK-NEXT: 1 3 0.50 fcvtxn2 v0.4s, v0.2d
-# CHECK-NEXT: 1 2 0.25 fcvtzs d21, d12, #1
-# CHECK-NEXT: 1 2 0.25 fcvtzs d21, d14
-# CHECK-NEXT: 1 2 0.25 fcvtzs s12, s13
-# CHECK-NEXT: 1 2 0.25 fcvtzs s21, s12, #1
-# CHECK-NEXT: 1 2 0.25 fcvtzs h21, h14
-# CHECK-NEXT: 1 2 0.25 fcvtzs h21, h12, #1
+# CHECK-NEXT: 1 3 0.50 fcvtzs d21, d12, #1
+# CHECK-NEXT: 1 3 0.50 fcvtzs d21, d14
+# CHECK-NEXT: 2 4 1.00 fcvtzs s12, s13
+# CHECK-NEXT: 2 4 1.00 fcvtzs s21, s12, #1
+# CHECK-NEXT: 4 6 1.00 fcvtzs h21, h14
+# CHECK-NEXT: 4 6 1.00 fcvtzs h21, h12, #1
# CHECK-NEXT: 1 3 0.50 fcvtzs v0.2d, v0.2d
-# CHECK-NEXT: 1 2 0.25 fcvtzs v0.2d, v0.2d, #3
+# CHECK-NEXT: 1 3 0.50 fcvtzs v0.2d, v0.2d, #3
# CHECK-NEXT: 1 3 0.50 fcvtzs v0.2s, v0.2s
-# CHECK-NEXT: 1 2 0.25 fcvtzs v0.2s, v0.2s, #3
+# CHECK-NEXT: 1 3 0.50 fcvtzs v0.2s, v0.2s, #3
# CHECK-NEXT: 2 4 1.00 fcvtzs v0.4h, v0.4h
# CHECK-NEXT: 2 4 1.00 fcvtzs v0.4s, v0.4s
-# CHECK-NEXT: 1 2 0.25 fcvtzs v0.4s, v0.4s, #3
+# CHECK-NEXT: 2 4 1.00 fcvtzs v0.4s, v0.4s, #3
# CHECK-NEXT: 4 6 1.00 fcvtzs v0.8h, v0.8h
-# CHECK-NEXT: 1 2 0.25 fcvtzu d21, d12, #1
-# CHECK-NEXT: 1 2 0.25 fcvtzu d21, d14
-# CHECK-NEXT: 1 2 0.25 fcvtzu s12, s13
-# CHECK-NEXT: 1 2 0.25 fcvtzu s21, s12, #1
-# CHECK-NEXT: 1 2 0.25 fcvtzu h12, h13
-# CHECK-NEXT: 1 2 0.25 fcvtzu h21, h12, #1
+# CHECK-NEXT: 1 3 0.50 fcvtzu d21, d12, #1
+# CHECK-NEXT: 1 3 0.50 fcvtzu d21, d14
+# CHECK-NEXT: 2 4 1.00 fcvtzu s12, s13
+# CHECK-NEXT: 2 4 1.00 fcvtzu s21, s12, #1
+# CHECK-NEXT: 4 6 1.00 fcvtzu h12, h13
+# CHECK-NEXT: 4 6 1.00 fcvtzu h21, h12, #1
# CHECK-NEXT: 1 3 0.50 fcvtzu v0.2d, v0.2d
-# CHECK-NEXT: 1 2 0.25 fcvtzu v0.2d, v0.2d, #3
+# CHECK-NEXT: 1 3 0.50 fcvtzu v0.2d, v0.2d, #3
# CHECK-NEXT: 1 3 0.50 fcvtzu v0.2s, v0.2s
-# CHECK-NEXT: 1 2 0.25 fcvtzu v0.2s, v0.2s, #3
+# CHECK-NEXT: 1 3 0.50 fcvtzu v0.2s, v0.2s, #3
# CHECK-NEXT: 2 4 1.00 fcvtzu v0.4h, v0.4h
# CHECK-NEXT: 2 4 1.00 fcvtzu v0.4s, v0.4s
-# CHECK-NEXT: 1 2 0.25 fcvtzu v0.4s, v0.4s, #3
+# CHECK-NEXT: 2 4 1.00 fcvtzu v0.4s, v0.4s, #3
# CHECK-NEXT: 4 6 1.00 fcvtzu v0.8h, v0.8h
# CHECK-NEXT: 1 15 3.50 fdiv v0.2d, v0.2d, v0.2d
# CHECK-NEXT: 1 10 3.50 fdiv v0.2s, v0.2s, v0.2s
@@ -1768,17 +1768,17 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: 1 2 0.25 saddw2 v0.2d, v0.2d, v0.4s
# CHECK-NEXT: 1 2 0.25 saddw2 v0.4s, v0.4s, v0.8h
# CHECK-NEXT: 1 2 0.25 saddw2 v0.8h, v0.8h, v0.16b
-# CHECK-NEXT: 1 2 0.25 scvtf d21, d12
-# CHECK-NEXT: 1 2 0.25 scvtf d21, d12, #64
-# CHECK-NEXT: 1 2 0.25 scvtf s22, s13
-# CHECK-NEXT: 1 2 0.25 scvtf s22, s13, #32
+# CHECK-NEXT: 1 3 0.50 scvtf d21, d12
+# CHECK-NEXT: 1 3 0.50 scvtf d21, d12, #64
+# CHECK-NEXT: 2 4 1.00 scvtf s22, s13
+# CHECK-NEXT: 2 4 1.00 scvtf s22, s13, #32
# CHECK-NEXT: 1 3 0.50 scvtf v0.2d, v0.2d
-# CHECK-NEXT: 1 2 0.25 scvtf v0.2d, v0.2d, #3
+# CHECK-NEXT: 1 3 0.50 scvtf v0.2d, v0.2d, #3
# CHECK-NEXT: 1 3 0.50 scvtf v0.2s, v0.2s
-# CHECK-NEXT: 1 2 0.25 scvtf v0.2s, v0.2s, #3
+# CHECK-NEXT: 1 3 0.50 scvtf v0.2s, v0.2s, #3
# CHECK-NEXT: 2 4 1.00 scvtf v0.4h, v0.4h
# CHECK-NEXT: 2 4 1.00 scvtf v0.4s, v0.4s
-# CHECK-NEXT: 1 2 0.25 scvtf v0.4s, v0.4s, #3
+# CHECK-NEXT: 2 4 1.00 scvtf v0.4s, v0.4s, #3
# CHECK-NEXT: 4 6 1.00 scvtf v0.8h, v0.8h
# CHECK-NEXT: 1 3 0.25 sdot v0.2s, v0.8b, v0.4b[2]
# CHECK-NEXT: 1 3 0.25 sdot v0.2s, v0.8b, v0.8b
@@ -2210,17 +2210,17 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: 1 2 0.25 uaddw2 v0.2d, v0.2d, v0.4s
# CHECK-NEXT: 1 2 0.25 uaddw2 v0.4s, v0.4s, v0.8h
# CHECK-NEXT: 1 2 0.25 uaddw2 v0.8h, v0.8h, v0.16b
-# CHECK-NEXT: 1 2 0.25 ucvtf d21, d14
-# CHECK-NEXT: 1 2 0.25 ucvtf d21, d14, #64
-# CHECK-NEXT: 1 2 0.25 ucvtf s22, s13
-# CHECK-NEXT: 1 2 0.25 ucvtf s22, s13, #32
+# CHECK-NEXT: 1 3 0.50 ucvtf d21, d14
+# CHECK-NEXT: 1 3 0.50 ucvtf d21, d14, #64
+# CHECK-NEXT: 2 4 1.00 ucvtf s22, s13
+# CHECK-NEXT: 2 4 1.00 ucvtf s22, s13, #32
# CHECK-NEXT: 1 3 0.50 ucvtf v0.2d, v0.2d
-# CHECK-NEXT: 1 2 0.25 ucvtf v0.2d, v0.2d, #3
+# CHECK-NEXT: 1 3 0.50 ucvtf v0.2d, v0.2d, #3
# CHECK-NEXT: 1 3 0.50 ucvtf v0.2s, v0.2s
-# CHECK-NEXT: 1 2 0.25 ucvtf v0.2s, v0.2s, #3
+# CHECK-NEXT: 1 3 0.50 ucvtf v0.2s, v0.2s, #3
# CHECK-NEXT: 2 4 1.00 ucvtf v0.4h, v0.4h
# CHECK-NEXT: 2 4 1.00 ucvtf v0.4s, v0.4s
-# CHECK-NEXT: 1 2 0.25 ucvtf v0.4s, v0.4s, #3
+# CHECK-NEXT: 2 4 1.00 ucvtf v0.4s, v0.4s, #3
# CHECK-NEXT: 4 6 1.00 ucvtf v0.8h, v0.8h
# CHECK-NEXT: 1 3 0.25 udot v0.2s, v0.8b, v0.4b[2]
# CHECK-NEXT: 1 3 0.25 udot v0.2s, v0.8b, v0.8b
@@ -2465,7 +2465,7 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4] [5] [6.0] [6.1] [7] [8] [9] [10]
-# CHECK-NEXT: - - - - 26.67 49.17 49.17 18.75 7.75 7.75 7.75 401.00 370.50 355.50 325.00
+# CHECK-NEXT: - - - - 26.67 49.17 49.17 18.75 7.75 7.75 7.75 431.00 356.50 385.50 311.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4] [5] [6.0] [6.1] [7] [8] [9] [10] Instructions:
@@ -2614,17 +2614,17 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcmlt d20, d21, #0.0
# CHECK-NEXT: - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcmlt s10, s11, #0.0
# CHECK-NEXT: - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcmlt v0.4s, v0.4s, #0.0
-# CHECK-NEXT: - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcvtas d21, d14
-# CHECK-NEXT: - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcvtas s12, s13
-# CHECK-NEXT: - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcvtas h12, h13
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - 0.50 - fcvtas d21, d14
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - 1.00 - fcvtas s12, s13
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - 1.00 - fcvtas h12, h13
# CHECK-NEXT: - - - - - - - - - - - 0.50 - 0.50 - fcvtas v0.2d, v0.2d
# CHECK-NEXT: - - - - - - - - - - - 0.50 - 0.50 - fcvtas v0.2s, v0.2s
# CHECK-NEXT: - - - - - - - - - - - 1.00 - 1.00 - fcvtas v0.4h, v0.4h
# CHECK-NEXT: - - - - - - - - - - - 1.00 - 1.00 - fcvtas v0.4s, v0.4s
# CHECK-NEXT: - - - - - - - - - - - 1.00 - 1.00 - fcvtas v0.8h, v0.8h
-# CHECK-NEXT: - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcvtau d21, d14
-# CHECK-NEXT: - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcvtau s12, s13
-# CHECK-NEXT: - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcvtau h12, h13
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - 0.50 - fcvtau d21, d14
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - 1.00 - fcvtau s12, s13
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - 1.00 - fcvtau h12, h13
# CHECK-NEXT: - - - - - - - - - - - 0.50 - 0.50 - fcvtau v0.2d, v0.2d
# CHECK-NEXT: - - - - - - - - - - - 0.50 - 0.50 - fcvtau v0.2s, v0.2s
# CHECK-NEXT: - - - - - - - - - - - 1.00 - 1.00 - fcvtau v0.4h, v0.4h
@@ -2634,17 +2634,17 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - 1.00 - 1.00 - fcvtl v0.4s, v0.4h
# CHECK-NEXT: - - - - - - - - - - - 0.50 - 0.50 - fcvtl2 v0.2d, v0.4s
# CHECK-NEXT: - - - - - - - - - - - 1.00 - 1.00 - fcvtl2 v0.4s, v0.8h
-# CHECK-NEXT: - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcvtms d21, d14
-# CHECK-NEXT: - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcvtms s22, s13
-# CHECK-NEXT: - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcvtms h22, h13
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - 0.50 - fcvtms d21, d14
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - 1.00 - fcvtms s22, s13
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - 1.00 - fcvtms h22, h13
# CHECK-NEXT: - - - - - - - - - - - 0.50 - 0.50 - fcvtms v0.2d, v0.2d
# CHECK-NEXT: - - - - - - - - - - - 0.50 - 0.50 - fcvtms v0.2s, v0.2s
# CHECK-NEXT: - - - - - - - - - - - 1.00 - 1.00 - fcvtms v0.4h, v0.4h
# CHECK-NEXT: - - - - - - - - - - - 1.00 - 1.00 - fcvtms v0.4s, v0.4s
# CHECK-NEXT: - - - - - - - - - - - 1.00 - 1.00 - fcvtms v0.8h, v0.8h
-# CHECK-NEXT: - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcvtmu d21, d14
-# CHECK-NEXT: - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcvtmu s12, s13
-# CHECK-NEXT: - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcvtmu h12, h13
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - 0.50 - fcvtmu d21, d14
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - 1.00 - fcvtmu s12, s13
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - 1.00 - fcvtmu h12, h13
# CHECK-NEXT: - - - - - - - - - - - 0.50 - 0.50 - fcvtmu v0.2d, v0.2d
# CHECK-NEXT: - - - - - - - - - - - 0.50 - 0.50 - fcvtmu v0.2s, v0.2s
# CHECK-NEXT: - - - - - - - - - - - 1.00 - 1.00 - fcvtmu v0.4h, v0.4h
@@ -2654,33 +2654,33 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - 1.00 - 1.00 - fcvtn v0.4h, v0.4s
# CHECK-NEXT: - - - - - - - - - - - 0.50 - 0.50 - fcvtn2 v0.4s, v0.2d
# CHECK-NEXT: - - - - - - - - - - - 1.00 - 1.00 - fcvtn2 v0.8h, v0.4s
-# CHECK-NEXT: - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcvtns d21, d14
-# CHECK-NEXT: - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcvtns s22, s13
-# CHECK-NEXT: - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcvtns h22, h13
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - 0.50 - fcvtns d21, d14
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - 1.00 - fcvtns s22, s13
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - 1.00 - fcvtns h22, h13
# CHECK-NEXT: - - - - - - - - - - - 0.50 - 0.50 - fcvtns v0.2d, v0.2d
# CHECK-NEXT: - - - - - - - - - - - 0.50 - 0.50 - fcvtns v0.2s, v0.2s
# CHECK-NEXT: - - - - - - - - - - - 1.00 - 1.00 - fcvtns v0.4h, v0.4h
# CHECK-NEXT: - - - - - - - - - - - 1.00 - 1.00 - fcvtns v0.4s, v0.4s
# CHECK-NEXT: - - - - - - - - - - - 1.00 - 1.00 - fcvtns v0.8h, v0.8h
-# CHECK-NEXT: - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcvtnu d21, d14
-# CHECK-NEXT: - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcvtnu s12, s13
-# CHECK-NEXT: - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcvtnu h12, h13
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - 0.50 - fcvtnu d21, d14
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - 1.00 - fcvtnu s12, s13
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - 1.00 - fcvtnu h12, h13
# CHECK-NEXT: - - - - - - - - - - - 0.50 - 0.50 - fcvtnu v0.2d, v0.2d
# CHECK-NEXT: - - - - - - - - - - - 0.50 - 0.50 - fcvtnu v0.2s, v0.2s
# CHECK-NEXT: - - - - - - - - - - - 1.00 - 1.00 - fcvtnu v0.4h, v0.4h
# CHECK-NEXT: - - - - - - - - - - - 1.00 - 1.00 - fcvtnu v0.4s, v0.4s
# CHECK-NEXT: - - - - - - - - - - - 1.00 - 1.00 - fcvtnu v0.8h, v0.8h
-# CHECK-NEXT: - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcvtps d21, d14
-# CHECK-NEXT: - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcvtps s22, s13
-# CHECK-NEXT: - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcvtps h22, h13
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - 0.50 - fcvtps d21, d14
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - 1.00 - fcvtps s22, s13
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - 1.00 - fcvtps h22, h13
# CHECK-NEXT: - - - - - - - - - - - 0.50 - 0.50 - fcvtps v0.2d, v0.2d
# CHECK-NEXT: - - - - - - - - - - - 0.50 - 0.50 - fcvtps v0.2s, v0.2s
# CHECK-NEXT: - - - - - - - - - - - 1.00 - 1.00 - fcvtps v0.4h, v0.4h
# CHECK-NEXT: - - - - - - - - - - - 1.00 - 1.00 - fcvtps v0.4s, v0.4s
# CHECK-NEXT: - - - - - - - - - - - 1.00 - 1.00 - fcvtps v0.8h, v0.8h
-# CHECK-NEXT: - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcvtpu d21, d14
-# CHECK-NEXT: - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcvtpu s12, s13
-# CHECK-NEXT: - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcvtpu h12, h13
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - 0.50 - fcvtpu d21, d14
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - 1.00 - fcvtpu s12, s13
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - 1.00 - fcvtpu h12, h13
# CHECK-NEXT: - - - - - - - - - - - 0.50 - 0.50 - fcvtpu v0.2d, v0.2d
# CHECK-NEXT: - - - - - - - - - - - 0.50 - 0.50 - fcvtpu v0.2s, v0.2s
# CHECK-NEXT: - - - - - - - - - - - 1.00 - 1.00 - fcvtpu v0.4h, v0.4h
@@ -2689,33 +2689,33 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - 0.50 - 0.50 - fcvtxn s22, d13
# CHECK-NEXT: - - - - - - - - - - - 0.50 - 0.50 - fcvtxn v0.2s, v0.2d
# CHECK-NEXT: - - - - - - - - - - - 0.50 - 0.50 - fcvtxn2 v0.4s, v0.2d
-# CHECK-NEXT: - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcvtzs d21, d12, #1
-# CHECK-NEXT: - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcvtzs d21, d14
-# CHECK-NEXT: - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcvtzs s12, s13
-# CHECK-NEXT: - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcvtzs s21, s12, #1
-# CHECK-NEXT: - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcvtzs h21, h14
-# CHECK-NEXT: - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcvtzs h21, h12, #1
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - 0.50 - fcvtzs d21, d12, #1
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - 0.50 - fcvtzs d21, d14
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - 1.00 - fcvtzs s12, s13
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - 1.00 - fcvtzs s21, s12, #1
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - 1.00 - fcvtzs h21, h14
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - 1.00 - fcvtzs h21, h12, #1
# CHECK-NEXT: - - - - - - - - - - - 0.50 - 0.50 - fcvtzs v0.2d, v0.2d
-# CHECK-NEXT: - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcvtzs v0.2d, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - 0.50 - fcvtzs v0.2d, v0.2d, #3
# CHECK-NEXT: - - - - - - - - - - - 0.50 - 0.50 - fcvtzs v0.2s, v0.2s
-# CHECK-NEXT: - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcvtzs v0.2s, v0.2s, #3
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - 0.50 - fcvtzs v0.2s, v0.2s, #3
# CHECK-NEXT: - - - - - - - - - - - 1.00 - 1.00 - fcvtzs v0.4h, v0.4h
# CHECK-NEXT: - - - - - - - - - - - 1.00 - 1.00 - fcvtzs v0.4s, v0.4s
-# CHECK-NEXT: - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcvtzs v0.4s, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - 1.00 - fcvtzs v0.4s, v0.4s, #3
# CHECK-NEXT: - - - - - - - - - - - 1.00 - 1.00 - fcvtzs v0.8h, v0.8h
-# CHECK-NEXT: - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcvtzu d21, d12, #1
-# CHECK-NEXT: - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcvtzu d21, d14
-# CHECK-NEXT: - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcvtzu s12, s13
-# CHECK-NEXT: - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcvtzu s21, s12, #1
-# CHECK-NEXT: - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcvtzu h12, h13
-# CHECK-NEXT: - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcvtzu h21, h12, #1
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - 0.50 - fcvtzu d21, d12, #1
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - 0.50 - fcvtzu d21, d14
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - 1.00 - fcvtzu s12, s13
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - 1.00 - fcvtzu s21, s12, #1
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - 1.00 - fcvtzu h12, h13
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - 1.00 - fcvtzu h21, h12, #1
# CHECK-NEXT: - - - - - - - - - - - 0.50 - 0.50 - fcvtzu v0.2d, v0.2d
-# CHECK-NEXT: - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcvtzu v0.2d, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - 0.50 - fcvtzu v0.2d, v0.2d, #3
# CHECK-NEXT: - - - - - - - - - - - 0.50 - 0.50 - fcvtzu v0.2s, v0.2s
-# CHECK-NEXT: - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcvtzu v0.2s, v0.2s, #3
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - 0.50 - fcvtzu v0.2s, v0.2s, #3
# CHECK-NEXT: - - - - - - - - - - - 1.00 - 1.00 - fcvtzu v0.4h, v0.4h
# CHECK-NEXT: - - - - - - - - - - - 1.00 - 1.00 - fcvtzu v0.4s, v0.4s
-# CHECK-NEXT: - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcvtzu v0.4s, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - 1.00 - fcvtzu v0.4s, v0.4s, #3
# CHECK-NEXT: - - - - - - - - - - - 1.00 - 1.00 - fcvtzu v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - 3.50 - 3.50 - fdiv v0.2d, v0.2d, v0.2d
# CHECK-NEXT: - - - - - - - - - - - 3.50 - 3.50 - fdiv v0.2s, v0.2s, v0.2s
@@ -3009,17 +3009,17 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - 0.25 0.25 0.25 0.25 saddw2 v0.2d, v0.2d, v0.4s
# CHECK-NEXT: - - - - - - - - - - - 0.25 0.25 0.25 0.25 saddw2 v0.4s, v0.4s, v0.8h
# CHECK-NEXT: - - - - - - - - - - - 0.25 0.25 0.25 0.25 saddw2 v0.8h, v0.8h, v0.16b
-# CHECK-NEXT: - - - - - - - - - - - 0.25 0.25 0.25 0.25 scvtf d21, d12
-# CHECK-NEXT: - - - - - - - - - - - 0.25 0.25 0.25 0.25 scvtf d21, d12, #64
-# CHECK-NEXT: - - - - - - - - - - - 0.25 0.25 0.25 0.25 scvtf s22, s13
-# CHECK-NEXT: - - - - - - - - - - - 0.25 0.25 0.25 0.25 scvtf s22, s13, #32
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - 0.50 - scvtf d21, d12
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - 0.50 - scvtf d21, d12, #64
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - 1.00 - scvtf s22, s13
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - 1.00 - scvtf s22, s13, #32
# CHECK-NEXT: - - - - - - - - - - - 0.50 - 0.50 - scvtf v0.2d, v0.2d
-# CHECK-NEXT: - - - - - - - - - - - 0.25 0.25 0.25 0.25 scvtf v0.2d, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - 0.50 - scvtf v0.2d, v0.2d, #3
# CHECK-NEXT: - - - - - - - - - - - 0.50 - 0.50 - scvtf v0.2s, v0.2s
-# CHECK-NEXT: - - - - - - - - - - - 0.25 0.25 0.25 0.25 scvtf v0.2s, v0.2s, #3
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - 0.50 - scvtf v0.2s, v0.2s, #3
# CHECK-NEXT: - - - - - - - - - - - 1.00 - 1.00 - scvtf v0.4h, v0.4h
# CHECK-NEXT: - - - - - - - - - - - 1.00 - 1.00 - scvtf v0.4s, v0.4s
-# CHECK-NEXT: - - - - - - - - - - - 0.25 0.25 0.25 0.25 scvtf v0.4s, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - 1.00 - scvtf v0.4s, v0.4s, #3
# CHECK-NEXT: - - - - - - - - - - - 1.00 - 1.00 - scvtf v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - 0.25 0.25 0.25 0.25 sdot v0.2s, v0.8b, v0.4b[2]
# CHECK-NEXT: - - - - - - - - - - - 0.25 0.25 0.25 0.25 sdot v0.2s, v0.8b, v0.8b
@@ -3451,17 +3451,17 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - 0.25 0.25 0.25 0.25 uaddw2 v0.2d, v0.2d, v0.4s
# CHECK-NEXT: - - - - - - - - - - - 0.25 0.25 0.25 0.25 uaddw2 v0.4s, v0.4s, v0.8h
# CHECK-NEXT: - - - - - - - - - - - 0.25 0.25 0.25 0.25 uaddw2 v0.8h, v0.8h, v0.16b
-# CHECK-NEXT: - - - - - - - - - - - 0.25 0.25 0.25 0.25 ucvtf d21, d14
-# CHECK-NEXT: - - - - - - - - - - - 0.25 0.25 0.25 0.25 ucvtf d21, d14, #64
-# CHECK-NEXT: - - - - - - - - - - - 0.25 0.25 0.25 0.25 ucvtf s22, s13
-# CHECK-NEXT: - - - - - - - - - - - 0.25 0.25 0.25 0.25 ucvtf s22, s13, #32
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - 0.50 - ucvtf d21, d14
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - 0.50 - ucvtf d21, d14, #64
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - 1.00 - ucvtf s22, s13
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - 1.00 - ucvtf s22, s13, #32
# CHECK-NEXT: - - - - - - - - - - - 0.50 - 0.50 - ucvtf v0.2d, v0.2d
-# CHECK-NEXT: - - - - - - - - - - - 0.25 0.25 0.25 0.25 ucvtf v0.2d, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - 0.50 - ucvtf v0.2d, v0.2d, #3
# CHECK-NEXT: - - - - - - - - - - - 0.50 - 0.50 - ucvtf v0.2s, v0.2s
-# CHECK-NEXT: - - - - - - - - - - - 0.25 0.25 0.25 0.25 ucvtf v0.2s, v0.2s, #3
+# CHECK-NEXT: - - - - - - - - - - - 0.50 - 0.50 - ucvtf v0.2s, v0.2s, #3
# CHECK-NEXT: - - - - - - - - - - - 1.00 - 1.00 - ucvtf v0.4h, v0.4h
# CHECK-NEXT: - - - - - - - - - - - 1.00 - 1.00 - ucvtf v0.4s, v0.4s
-# CHECK-NEXT: - - - - - - - - - - - 0.25 0.25 0.25 0.25 ucvtf v0.4s, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - 1.00 - 1.00 - ucvtf v0.4s, v0.4s, #3
# CHECK-NEXT: - - - - - - - - - - - 1.00 - 1.00 - ucvtf v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - 0.25 0.25 0.25 0.25 udot v0.2s, v0.8b, v0.4b[2]
# CHECK-NEXT: - - - - - - - - - - - 0.25 0.25 0.25 0.25 udot v0.2s, v0.8b, v0.8b
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-neon-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-neon-instructions.s
index 4347ccf208a979..a174a1a207eba5 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-neon-instructions.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-neon-instructions.s
@@ -1466,12 +1466,12 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: 4 6 2.00 fcvtzs h21, h14
# CHECK-NEXT: 4 6 2.00 fcvtzs h21, h12, #1
# CHECK-NEXT: 1 3 0.50 fcvtzs v0.2d, v0.2d
-# CHECK-NEXT: 1 2 0.25 fcvtzs v0.2d, v0.2d, #3
+# CHECK-NEXT: 1 3 0.50 fcvtzs v0.2d, v0.2d, #3
# CHECK-NEXT: 1 3 0.50 fcvtzs v0.2s, v0.2s
-# CHECK-NEXT: 1 2 0.25 fcvtzs v0.2s, v0.2s, #3
+# CHECK-NEXT: 1 3 0.50 fcvtzs v0.2s, v0.2s, #3
# CHECK-NEXT: 2 4 1.00 fcvtzs v0.4h, v0.4h
# CHECK-NEXT: 2 4 1.00 fcvtzs v0.4s, v0.4s
-# CHECK-NEXT: 1 2 0.25 fcvtzs v0.4s, v0.4s, #3
+# CHECK-NEXT: 2 4 1.00 fcvtzs v0.4s, v0.4s, #3
# CHECK-NEXT: 4 6 2.00 fcvtzs v0.8h, v0.8h
# CHECK-NEXT: 1 3 0.50 fcvtzu d21, d12, #1
# CHECK-NEXT: 1 3 0.50 fcvtzu d21, d14
@@ -1480,12 +1480,12 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: 4 6 2.00 fcvtzu h12, h13
# CHECK-NEXT: 4 6 2.00 fcvtzu h21, h12, #1
# CHECK-NEXT: 1 3 0.50 fcvtzu v0.2d, v0.2d
-# CHECK-NEXT: 1 2 0.25 fcvtzu v0.2d, v0.2d, #3
+# CHECK-NEXT: 1 3 0.50 fcvtzu v0.2d, v0.2d, #3
# CHECK-NEXT: 1 3 0.50 fcvtzu v0.2s, v0.2s
-# CHECK-NEXT: 1 2 0.25 fcvtzu v0.2s, v0.2s, #3
+# CHECK-NEXT: 1 3 0.50 fcvtzu v0.2s, v0.2s, #3
# CHECK-NEXT: 2 4 1.00 fcvtzu v0.4h, v0.4h
# CHECK-NEXT: 2 4 1.00 fcvtzu v0.4s, v0.4s
-# CHECK-NEXT: 1 2 0.25 fcvtzu v0.4s, v0.4s, #3
+# CHECK-NEXT: 2 4 1.00 fcvtzu v0.4s, v0.4s, #3
# CHECK-NEXT: 4 6 2.00 fcvtzu v0.8h, v0.8h
# CHECK-NEXT: 1 15 7.00 fdiv v0.2d, v0.2d, v0.2d
# CHECK-NEXT: 1 10 2.50 fdiv v0.2s, v0.2s, v0.2s
@@ -1795,12 +1795,12 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: 2 4 1.00 scvtf s22, s13
# CHECK-NEXT: 2 4 1.00 scvtf s22, s13, #32
# CHECK-NEXT: 1 3 0.50 scvtf v0.2d, v0.2d
-# CHECK-NEXT: 1 2 0.25 scvtf v0.2d, v0.2d, #3
+# CHECK-NEXT: 1 3 0.50 scvtf v0.2d, v0.2d, #3
# CHECK-NEXT: 1 3 0.50 scvtf v0.2s, v0.2s
-# CHECK-NEXT: 1 2 0.25 scvtf v0.2s, v0.2s, #3
+# CHECK-NEXT: 1 3 0.50 scvtf v0.2s, v0.2s, #3
# CHECK-NEXT: 2 4 1.00 scvtf v0.4h, v0.4h
# CHECK-NEXT: 2 4 1.00 scvtf v0.4s, v0.4s
-# CHECK-NEXT: 1 2 0.25 scvtf v0.4s, v0.4s, #3
+# CHECK-NEXT: 2 4 1.00 scvtf v0.4s, v0.4s, #3
# CHECK-NEXT: 4 6 2.00 scvtf v0.8h, v0.8h
# CHECK-NEXT: 1 3 0.25 sdot v0.2s, v0.8b, v0.4b[2]
# CHECK-NEXT: 1 3 0.25 sdot v0.2s, v0.8b, v0.8b
@@ -2237,12 +2237,12 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: 2 4 1.00 ucvtf s22, s13
# CHECK-NEXT: 2 4 1.00 ucvtf s22, s13, #32
# CHECK-NEXT: 1 3 0.50 ucvtf v0.2d, v0.2d
-# CHECK-NEXT: 1 2 0.25 ucvtf v0.2d, v0.2d, #3
+# CHECK-NEXT: 1 3 0.50 ucvtf v0.2d, v0.2d, #3
# CHECK-NEXT: 1 3 0.50 ucvtf v0.2s, v0.2s
-# CHECK-NEXT: 1 2 0.25 ucvtf v0.2s, v0.2s, #3
+# CHECK-NEXT: 1 3 0.50 ucvtf v0.2s, v0.2s, #3
# CHECK-NEXT: 2 4 1.00 ucvtf v0.4h, v0.4h
# CHECK-NEXT: 2 4 1.00 ucvtf v0.4s, v0.4s
-# CHECK-NEXT: 1 2 0.25 ucvtf v0.4s, v0.4s, #3
+# CHECK-NEXT: 2 4 1.00 ucvtf v0.4s, v0.4s, #3
# CHECK-NEXT: 4 6 2.00 ucvtf v0.8h, v0.8h
# CHECK-NEXT: 1 3 0.25 udot v0.2s, v0.8b, v0.4b[2]
# CHECK-NEXT: 1 3 0.25 udot v0.2s, v0.8b, v0.8b
@@ -2489,7 +2489,7 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
-# CHECK-NEXT: - - - - 27.33 48.83 48.83 16.17 5.17 5.17 5.17 5.17 5.17 498.25 379.25 446.75 327.75
+# CHECK-NEXT: - - - - 27.33 48.83 48.83 16.17 5.17 5.17 5.17 5.17 5.17 503.25 376.25 451.75 324.75
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
@@ -2720,12 +2720,12 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - 2.00 - 2.00 - fcvtzs h21, h14
# CHECK-NEXT: - - - - - - - - - - - - - 2.00 - 2.00 - fcvtzs h21, h12, #1
# CHECK-NEXT: - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzs v0.2d, v0.2d
-# CHECK-NEXT: - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcvtzs v0.2d, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzs v0.2d, v0.2d, #3
# CHECK-NEXT: - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzs v0.2s, v0.2s
-# CHECK-NEXT: - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcvtzs v0.2s, v0.2s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzs v0.2s, v0.2s, #3
# CHECK-NEXT: - - - - - - - - - - - - - 1.00 - 1.00 - fcvtzs v0.4h, v0.4h
# CHECK-NEXT: - - - - - - - - - - - - - 1.00 - 1.00 - fcvtzs v0.4s, v0.4s
-# CHECK-NEXT: - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcvtzs v0.4s, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - 1.00 - 1.00 - fcvtzs v0.4s, v0.4s, #3
# CHECK-NEXT: - - - - - - - - - - - - - 2.00 - 2.00 - fcvtzs v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzu d21, d12, #1
# CHECK-NEXT: - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzu d21, d14
@@ -2734,12 +2734,12 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - 2.00 - 2.00 - fcvtzu h12, h13
# CHECK-NEXT: - - - - - - - - - - - - - 2.00 - 2.00 - fcvtzu h21, h12, #1
# CHECK-NEXT: - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzu v0.2d, v0.2d
-# CHECK-NEXT: - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcvtzu v0.2d, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzu v0.2d, v0.2d, #3
# CHECK-NEXT: - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzu v0.2s, v0.2s
-# CHECK-NEXT: - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcvtzu v0.2s, v0.2s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - 0.50 - 0.50 - fcvtzu v0.2s, v0.2s, #3
# CHECK-NEXT: - - - - - - - - - - - - - 1.00 - 1.00 - fcvtzu v0.4h, v0.4h
# CHECK-NEXT: - - - - - - - - - - - - - 1.00 - 1.00 - fcvtzu v0.4s, v0.4s
-# CHECK-NEXT: - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fcvtzu v0.4s, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - 1.00 - 1.00 - fcvtzu v0.4s, v0.4s, #3
# CHECK-NEXT: - - - - - - - - - - - - - 2.00 - 2.00 - fcvtzu v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - 7.00 - 7.00 - fdiv v0.2d, v0.2d, v0.2d
# CHECK-NEXT: - - - - - - - - - - - - - 2.50 - 2.50 - fdiv v0.2s, v0.2s, v0.2s
@@ -3049,12 +3049,12 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - 1.00 - 1.00 - scvtf s22, s13
# CHECK-NEXT: - - - - - - - - - - - - - 1.00 - 1.00 - scvtf s22, s13, #32
# CHECK-NEXT: - - - - - - - - - - - - - 0.50 - 0.50 - scvtf v0.2d, v0.2d
-# CHECK-NEXT: - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 scvtf v0.2d, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - 0.50 - 0.50 - scvtf v0.2d, v0.2d, #3
# CHECK-NEXT: - - - - - - - - - - - - - 0.50 - 0.50 - scvtf v0.2s, v0.2s
-# CHECK-NEXT: - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 scvtf v0.2s, v0.2s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - 0.50 - 0.50 - scvtf v0.2s, v0.2s, #3
# CHECK-NEXT: - - - - - - - - - - - - - 1.00 - 1.00 - scvtf v0.4h, v0.4h
# CHECK-NEXT: - - - - - - - - - - - - - 1.00 - 1.00 - scvtf v0.4s, v0.4s
-# CHECK-NEXT: - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 scvtf v0.4s, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - 1.00 - 1.00 - scvtf v0.4s, v0.4s, #3
# CHECK-NEXT: - - - - - - - - - - - - - 2.00 - 2.00 - scvtf v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sdot v0.2s, v0.8b, v0.4b[2]
# CHECK-NEXT: - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 sdot v0.2s, v0.8b, v0.8b
@@ -3491,12 +3491,12 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - 1.00 - 1.00 - ucvtf s22, s13
# CHECK-NEXT: - - - - - - - - - - - - - 1.00 - 1.00 - ucvtf s22, s13, #32
# CHECK-NEXT: - - - - - - - - - - - - - 0.50 - 0.50 - ucvtf v0.2d, v0.2d
-# CHECK-NEXT: - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ucvtf v0.2d, v0.2d, #3
+# CHECK-NEXT: - - - - - - - - - - - - - 0.50 - 0.50 - ucvtf v0.2d, v0.2d, #3
# CHECK-NEXT: - - - - - - - - - - - - - 0.50 - 0.50 - ucvtf v0.2s, v0.2s
-# CHECK-NEXT: - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ucvtf v0.2s, v0.2s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - 0.50 - 0.50 - ucvtf v0.2s, v0.2s, #3
# CHECK-NEXT: - - - - - - - - - - - - - 1.00 - 1.00 - ucvtf v0.4h, v0.4h
# CHECK-NEXT: - - - - - - - - - - - - - 1.00 - 1.00 - ucvtf v0.4s, v0.4s
-# CHECK-NEXT: - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 ucvtf v0.4s, v0.4s, #3
+# CHECK-NEXT: - - - - - - - - - - - - - 1.00 - 1.00 - ucvtf v0.4s, v0.4s, #3
# CHECK-NEXT: - - - - - - - - - - - - - 2.00 - 2.00 - ucvtf v0.8h, v0.8h
# CHECK-NEXT: - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 udot v0.2s, v0.8b, v0.4b[2]
# CHECK-NEXT: - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 udot v0.2s, v0.8b, v0.8b
More information about the llvm-commits
mailing list