[llvm] [AArch64] Add missing ASIMD FP convert instructions to scheduling model (PR #115146)

via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 6 03:07:06 PST 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-aarch64

Author: Rin Dobrescu (Rin18)

<details>
<summary>Changes</summary>

Some ASIMD FP convert instructions have incorrect scheduling information. These instructions currently have latency 2, throughput 4 and utilise pipeline V. This patch corrects the scheduling models to match the relevant Software Optimization Guide.

The V1 and V2 Software Optimization Guide show that ASIMD FP convert instructions should all utilise pipelines V02. Their execution latency and throughput should also differ depending on form. See section 3.17 "ASIMD floating-point instructions" in the Neoverse-V1 and Neoverse-V2 Software Optimization Guide for characteristics of instruction performance.

Reference:
- V1 SOG: https://developer.arm.com/documentation/109897/latest/
- V2 SOG: https://developer.arm.com/documentation/109898/latest/

---

Patch is 61.41 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/115146.diff


4 Files Affected:

- (modified) llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td (+24-6) 
- (modified) llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td (+6) 
- (modified) llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-neon-instructions.s (+113-113) 
- (modified) llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-neon-instructions.s (+25-25) 


``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td
index fb4d2f3d7bcd3a..3fd9dd3f2fff92 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td
@@ -1015,16 +1015,34 @@ def : InstRW<[V1Write_3c_1V02], (instregex "^FCVTNv[24]i32$",
                                            "^FCVTXN(v[24]f32|v1i64)$")>;
 
 // ASIMD FP convert, other, D-form F32 and Q-form F64
-def : InstRW<[V1Write_3c_1V02], (instregex "^[FSU]CVT[AMNPZ][SU]v2f(32|64)$",
-                                           "^[SU]CVTFv2f(32|64)$")>;
+def : InstRW<[V1Write_3c_1V02], (instregex "^FCVT[AMNPZ][SU]v2f(32|64)$",
+                                           "^FCVT[AMNPZ][SU]v2i(32|64)_shift$",
+                                           "^FCVT[AMNPZ][SU]v1i64$",
+                                           "^FCVTZ[SU]d$",
+                                           "^[SU]CVTFv2f(32|64)$",
+                                           "^[SU]CVTFv2i(32|64)_shift$",
+                                           "^[SU]CVTFv1i64$",
+                                           "^[SU]CVTFd$")>;
 
 // ASIMD FP convert, other, D-form F16 and Q-form F32
-def : InstRW<[V1Write_4c_2V02], (instregex "^[FSU]CVT[AMNPZ][SU]v4f(16|32)$",
-                                           "^[SU]CVTFv4f(16|32)$")>;
+def : InstRW<[V1Write_4c_2V02], (instregex "^FCVT[AMNPZ][SU]v4f(16|32)$",
+                                           "^FCVT[AMNPZ][SU]v4i(16|32)_shift$",
+                                           "^FCVT[AMNPZ][SU]v1i32$",
+                                           "^FCVTZ[SU]s$",
+                                           "^[SU]CVTFv4f(16|32)$",
+                                           "^[SU]CVTFv4i(16|32)_shift$",
+                                           "^[SU]CVTFv1i32$",
+                                           "^[SU]CVTFs$")>;
 
 // ASIMD FP convert, other, Q-form F16
-def : InstRW<[V1Write_6c_4V02], (instregex "^[FSU]CVT[AMNPZ][SU]v8f16$",
-                                           "^[SU]CVTFv8f16$")>;
+def : InstRW<[V1Write_6c_4V02], (instregex "^FCVT[AMNPZ][SU]v8f16$",
+                                           "^FCVT[AMNPZ][SU]v8i16_shift$",
+                                           "^FCVT[AMNPZ][SU]v1f16$",
+                                           "^FCVTZ[SU]h$",
+                                           "^[SU]CVTFv8f16$",
+                                           "^[SU]CVTFv8i16_shift$",
+                                           "^[SU]CVTFv1i16$",
+                                           "^[SU]CVTFh$")>;
 
 // ASIMD FP divide, D-form, F16
 // ASIMD FP square root, D-form, F16
diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
index f884d20ab7eda9..2de5f59834d787 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
@@ -1567,25 +1567,31 @@ def : InstRW<[V2Write_3c_1V02], (instregex "^FCVTN(v2|v4)i32",
 
 // ASIMD FP convert, other, D-form F32 and Q-form F64
 def : InstRW<[V2Write_3c_1V02], (instregex "^FCVT[AMNPZ][SU]v2f(32|64)$",
+                                           "^FCVT[AMNPZ][SU]v2i(32|64)_shift$",
                                            "^FCVT[AMNPZ][SU]v1i64$",
                                            "^FCVTZ[SU]d$",
                                            "^[SU]CVTFv2f(32|64)$",
+                                           "^[SU]CVTFv2i(32|64)_shift$",
                                            "^[SU]CVTFv1i64$",
                                            "^[SU]CVTFd$")>;
 
 // ASIMD FP convert, other, D-form F16 and Q-form F32
 def : InstRW<[V2Write_4c_2V02], (instregex "^FCVT[AMNPZ][SU]v4f(16|32)$",
+                                           "^FCVT[AMNPZ][SU]v4i(16|32)_shift$",
                                            "^FCVT[AMNPZ][SU]v1i32$",
                                            "^FCVTZ[SU]s$",
                                            "^[SU]CVTFv4f(16|32)$",
+                                           "^[SU]CVTFv4i(16|32)_shift$",
                                            "^[SU]CVTFv1i32$",
                                            "^[SU]CVTFs$")>;
 
 // ASIMD FP convert, other, Q-form F16
 def : InstRW<[V2Write_6c_4V02], (instregex "^FCVT[AMNPZ][SU]v8f16$",
+                                           "^FCVT[AMNPZ][SU]v8i16_shift$",
                                            "^FCVT[AMNPZ][SU]v1f16$",
                                            "^FCVTZ[SU]h$",
                                            "^[SU]CVTFv8f16$",
+                                           "^[SU]CVTFv8i16_shift$",
                                            "^[SU]CVTFv1i16$",
                                            "^[SU]CVTFh$")>;
 
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-neon-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-neon-instructions.s
index 65b73177c7b70a..5ad42d5ff0a131 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-neon-instructions.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-neon-instructions.s
@@ -1373,17 +1373,17 @@ zip2 v0.8h, v0.8h, v0.8h
 # CHECK-NEXT:  1      2     0.25                        fcmlt	d20, d21, #0.0
 # CHECK-NEXT:  1      2     0.25                        fcmlt	s10, s11, #0.0
 # CHECK-NEXT:  1      2     0.25                        fcmlt	v0.4s, v0.4s, #0.0
-# CHECK-NEXT:  1      2     0.25                        fcvtas	d21, d14
-# CHECK-NEXT:  1      2     0.25                        fcvtas	s12, s13
-# CHECK-NEXT:  1      2     0.25                        fcvtas	h12, h13
+# CHECK-NEXT:  1      3     0.50                        fcvtas	d21, d14
+# CHECK-NEXT:  2      4     1.00                        fcvtas	s12, s13
+# CHECK-NEXT:  4      6     1.00                        fcvtas	h12, h13
 # CHECK-NEXT:  1      3     0.50                        fcvtas	v0.2d, v0.2d
 # CHECK-NEXT:  1      3     0.50                        fcvtas	v0.2s, v0.2s
 # CHECK-NEXT:  2      4     1.00                        fcvtas	v0.4h, v0.4h
 # CHECK-NEXT:  2      4     1.00                        fcvtas	v0.4s, v0.4s
 # CHECK-NEXT:  4      6     1.00                        fcvtas	v0.8h, v0.8h
-# CHECK-NEXT:  1      2     0.25                        fcvtau	d21, d14
-# CHECK-NEXT:  1      2     0.25                        fcvtau	s12, s13
-# CHECK-NEXT:  1      2     0.25                        fcvtau	h12, h13
+# CHECK-NEXT:  1      3     0.50                        fcvtau	d21, d14
+# CHECK-NEXT:  2      4     1.00                        fcvtau	s12, s13
+# CHECK-NEXT:  4      6     1.00                        fcvtau	h12, h13
 # CHECK-NEXT:  1      3     0.50                        fcvtau	v0.2d, v0.2d
 # CHECK-NEXT:  1      3     0.50                        fcvtau	v0.2s, v0.2s
 # CHECK-NEXT:  2      4     1.00                        fcvtau	v0.4h, v0.4h
@@ -1393,17 +1393,17 @@ zip2 v0.8h, v0.8h, v0.8h
 # CHECK-NEXT:  2      4     1.00                        fcvtl	v0.4s, v0.4h
 # CHECK-NEXT:  1      3     0.50                        fcvtl2	v0.2d, v0.4s
 # CHECK-NEXT:  2      4     1.00                        fcvtl2	v0.4s, v0.8h
-# CHECK-NEXT:  1      2     0.25                        fcvtms	d21, d14
-# CHECK-NEXT:  1      2     0.25                        fcvtms	s22, s13
-# CHECK-NEXT:  1      2     0.25                        fcvtms	h22, h13
+# CHECK-NEXT:  1      3     0.50                        fcvtms	d21, d14
+# CHECK-NEXT:  2      4     1.00                        fcvtms	s22, s13
+# CHECK-NEXT:  4      6     1.00                        fcvtms	h22, h13
 # CHECK-NEXT:  1      3     0.50                        fcvtms	v0.2d, v0.2d
 # CHECK-NEXT:  1      3     0.50                        fcvtms	v0.2s, v0.2s
 # CHECK-NEXT:  2      4     1.00                        fcvtms	v0.4h, v0.4h
 # CHECK-NEXT:  2      4     1.00                        fcvtms	v0.4s, v0.4s
 # CHECK-NEXT:  4      6     1.00                        fcvtms	v0.8h, v0.8h
-# CHECK-NEXT:  1      2     0.25                        fcvtmu	d21, d14
-# CHECK-NEXT:  1      2     0.25                        fcvtmu	s12, s13
-# CHECK-NEXT:  1      2     0.25                        fcvtmu	h12, h13
+# CHECK-NEXT:  1      3     0.50                        fcvtmu	d21, d14
+# CHECK-NEXT:  2      4     1.00                        fcvtmu	s12, s13
+# CHECK-NEXT:  4      6     1.00                        fcvtmu	h12, h13
 # CHECK-NEXT:  1      3     0.50                        fcvtmu	v0.2d, v0.2d
 # CHECK-NEXT:  1      3     0.50                        fcvtmu	v0.2s, v0.2s
 # CHECK-NEXT:  2      4     1.00                        fcvtmu	v0.4h, v0.4h
@@ -1413,33 +1413,33 @@ zip2 v0.8h, v0.8h, v0.8h
 # CHECK-NEXT:  2      4     1.00                        fcvtn	v0.4h, v0.4s
 # CHECK-NEXT:  1      3     0.50                        fcvtn2	v0.4s, v0.2d
 # CHECK-NEXT:  2      4     1.00                        fcvtn2	v0.8h, v0.4s
-# CHECK-NEXT:  1      2     0.25                        fcvtns	d21, d14
-# CHECK-NEXT:  1      2     0.25                        fcvtns	s22, s13
-# CHECK-NEXT:  1      2     0.25                        fcvtns	h22, h13
+# CHECK-NEXT:  1      3     0.50                        fcvtns	d21, d14
+# CHECK-NEXT:  2      4     1.00                        fcvtns	s22, s13
+# CHECK-NEXT:  4      6     1.00                        fcvtns	h22, h13
 # CHECK-NEXT:  1      3     0.50                        fcvtns	v0.2d, v0.2d
 # CHECK-NEXT:  1      3     0.50                        fcvtns	v0.2s, v0.2s
 # CHECK-NEXT:  2      4     1.00                        fcvtns	v0.4h, v0.4h
 # CHECK-NEXT:  2      4     1.00                        fcvtns	v0.4s, v0.4s
 # CHECK-NEXT:  4      6     1.00                        fcvtns	v0.8h, v0.8h
-# CHECK-NEXT:  1      2     0.25                        fcvtnu	d21, d14
-# CHECK-NEXT:  1      2     0.25                        fcvtnu	s12, s13
-# CHECK-NEXT:  1      2     0.25                        fcvtnu	h12, h13
+# CHECK-NEXT:  1      3     0.50                        fcvtnu	d21, d14
+# CHECK-NEXT:  2      4     1.00                        fcvtnu	s12, s13
+# CHECK-NEXT:  4      6     1.00                        fcvtnu	h12, h13
 # CHECK-NEXT:  1      3     0.50                        fcvtnu	v0.2d, v0.2d
 # CHECK-NEXT:  1      3     0.50                        fcvtnu	v0.2s, v0.2s
 # CHECK-NEXT:  2      4     1.00                        fcvtnu	v0.4h, v0.4h
 # CHECK-NEXT:  2      4     1.00                        fcvtnu	v0.4s, v0.4s
 # CHECK-NEXT:  4      6     1.00                        fcvtnu	v0.8h, v0.8h
-# CHECK-NEXT:  1      2     0.25                        fcvtps	d21, d14
-# CHECK-NEXT:  1      2     0.25                        fcvtps	s22, s13
-# CHECK-NEXT:  1      2     0.25                        fcvtps	h22, h13
+# CHECK-NEXT:  1      3     0.50                        fcvtps	d21, d14
+# CHECK-NEXT:  2      4     1.00                        fcvtps	s22, s13
+# CHECK-NEXT:  4      6     1.00                        fcvtps	h22, h13
 # CHECK-NEXT:  1      3     0.50                        fcvtps	v0.2d, v0.2d
 # CHECK-NEXT:  1      3     0.50                        fcvtps	v0.2s, v0.2s
 # CHECK-NEXT:  2      4     1.00                        fcvtps	v0.4h, v0.4h
 # CHECK-NEXT:  2      4     1.00                        fcvtps	v0.4s, v0.4s
 # CHECK-NEXT:  4      6     1.00                        fcvtps	v0.8h, v0.8h
-# CHECK-NEXT:  1      2     0.25                        fcvtpu	d21, d14
-# CHECK-NEXT:  1      2     0.25                        fcvtpu	s12, s13
-# CHECK-NEXT:  1      2     0.25                        fcvtpu	h12, h13
+# CHECK-NEXT:  1      3     0.50                        fcvtpu	d21, d14
+# CHECK-NEXT:  2      4     1.00                        fcvtpu	s12, s13
+# CHECK-NEXT:  4      6     1.00                        fcvtpu	h12, h13
 # CHECK-NEXT:  1      3     0.50                        fcvtpu	v0.2d, v0.2d
 # CHECK-NEXT:  1      3     0.50                        fcvtpu	v0.2s, v0.2s
 # CHECK-NEXT:  2      4     1.00                        fcvtpu	v0.4h, v0.4h
@@ -1448,33 +1448,33 @@ zip2 v0.8h, v0.8h, v0.8h
 # CHECK-NEXT:  1      3     0.50                        fcvtxn	s22, d13
 # CHECK-NEXT:  1      3     0.50                        fcvtxn	v0.2s, v0.2d
 # CHECK-NEXT:  1      3     0.50                        fcvtxn2	v0.4s, v0.2d
-# CHECK-NEXT:  1      2     0.25                        fcvtzs	d21, d12, #1
-# CHECK-NEXT:  1      2     0.25                        fcvtzs	d21, d14
-# CHECK-NEXT:  1      2     0.25                        fcvtzs	s12, s13
-# CHECK-NEXT:  1      2     0.25                        fcvtzs	s21, s12, #1
-# CHECK-NEXT:  1      2     0.25                        fcvtzs	h21, h14
-# CHECK-NEXT:  1      2     0.25                        fcvtzs	h21, h12, #1
+# CHECK-NEXT:  1      3     0.50                        fcvtzs	d21, d12, #1
+# CHECK-NEXT:  1      3     0.50                        fcvtzs	d21, d14
+# CHECK-NEXT:  2      4     1.00                        fcvtzs	s12, s13
+# CHECK-NEXT:  2      4     1.00                        fcvtzs	s21, s12, #1
+# CHECK-NEXT:  4      6     1.00                        fcvtzs	h21, h14
+# CHECK-NEXT:  4      6     1.00                        fcvtzs	h21, h12, #1
 # CHECK-NEXT:  1      3     0.50                        fcvtzs	v0.2d, v0.2d
-# CHECK-NEXT:  1      2     0.25                        fcvtzs	v0.2d, v0.2d, #3
+# CHECK-NEXT:  1      3     0.50                        fcvtzs	v0.2d, v0.2d, #3
 # CHECK-NEXT:  1      3     0.50                        fcvtzs	v0.2s, v0.2s
-# CHECK-NEXT:  1      2     0.25                        fcvtzs	v0.2s, v0.2s, #3
+# CHECK-NEXT:  1      3     0.50                        fcvtzs	v0.2s, v0.2s, #3
 # CHECK-NEXT:  2      4     1.00                        fcvtzs	v0.4h, v0.4h
 # CHECK-NEXT:  2      4     1.00                        fcvtzs	v0.4s, v0.4s
-# CHECK-NEXT:  1      2     0.25                        fcvtzs	v0.4s, v0.4s, #3
+# CHECK-NEXT:  2      4     1.00                        fcvtzs	v0.4s, v0.4s, #3
 # CHECK-NEXT:  4      6     1.00                        fcvtzs	v0.8h, v0.8h
-# CHECK-NEXT:  1      2     0.25                        fcvtzu	d21, d12, #1
-# CHECK-NEXT:  1      2     0.25                        fcvtzu	d21, d14
-# CHECK-NEXT:  1      2     0.25                        fcvtzu	s12, s13
-# CHECK-NEXT:  1      2     0.25                        fcvtzu	s21, s12, #1
-# CHECK-NEXT:  1      2     0.25                        fcvtzu	h12, h13
-# CHECK-NEXT:  1      2     0.25                        fcvtzu	h21, h12, #1
+# CHECK-NEXT:  1      3     0.50                        fcvtzu	d21, d12, #1
+# CHECK-NEXT:  1      3     0.50                        fcvtzu	d21, d14
+# CHECK-NEXT:  2      4     1.00                        fcvtzu	s12, s13
+# CHECK-NEXT:  2      4     1.00                        fcvtzu	s21, s12, #1
+# CHECK-NEXT:  4      6     1.00                        fcvtzu	h12, h13
+# CHECK-NEXT:  4      6     1.00                        fcvtzu	h21, h12, #1
 # CHECK-NEXT:  1      3     0.50                        fcvtzu	v0.2d, v0.2d
-# CHECK-NEXT:  1      2     0.25                        fcvtzu	v0.2d, v0.2d, #3
+# CHECK-NEXT:  1      3     0.50                        fcvtzu	v0.2d, v0.2d, #3
 # CHECK-NEXT:  1      3     0.50                        fcvtzu	v0.2s, v0.2s
-# CHECK-NEXT:  1      2     0.25                        fcvtzu	v0.2s, v0.2s, #3
+# CHECK-NEXT:  1      3     0.50                        fcvtzu	v0.2s, v0.2s, #3
 # CHECK-NEXT:  2      4     1.00                        fcvtzu	v0.4h, v0.4h
 # CHECK-NEXT:  2      4     1.00                        fcvtzu	v0.4s, v0.4s
-# CHECK-NEXT:  1      2     0.25                        fcvtzu	v0.4s, v0.4s, #3
+# CHECK-NEXT:  2      4     1.00                        fcvtzu	v0.4s, v0.4s, #3
 # CHECK-NEXT:  4      6     1.00                        fcvtzu	v0.8h, v0.8h
 # CHECK-NEXT:  1      15    3.50                        fdiv	v0.2d, v0.2d, v0.2d
 # CHECK-NEXT:  1      10    3.50                        fdiv	v0.2s, v0.2s, v0.2s
@@ -1768,17 +1768,17 @@ zip2 v0.8h, v0.8h, v0.8h
 # CHECK-NEXT:  1      2     0.25                        saddw2	v0.2d, v0.2d, v0.4s
 # CHECK-NEXT:  1      2     0.25                        saddw2	v0.4s, v0.4s, v0.8h
 # CHECK-NEXT:  1      2     0.25                        saddw2	v0.8h, v0.8h, v0.16b
-# CHECK-NEXT:  1      2     0.25                        scvtf	d21, d12
-# CHECK-NEXT:  1      2     0.25                        scvtf	d21, d12, #64
-# CHECK-NEXT:  1      2     0.25                        scvtf	s22, s13
-# CHECK-NEXT:  1      2     0.25                        scvtf	s22, s13, #32
+# CHECK-NEXT:  1      3     0.50                        scvtf	d21, d12
+# CHECK-NEXT:  1      3     0.50                        scvtf	d21, d12, #64
+# CHECK-NEXT:  2      4     1.00                        scvtf	s22, s13
+# CHECK-NEXT:  2      4     1.00                        scvtf	s22, s13, #32
 # CHECK-NEXT:  1      3     0.50                        scvtf	v0.2d, v0.2d
-# CHECK-NEXT:  1      2     0.25                        scvtf	v0.2d, v0.2d, #3
+# CHECK-NEXT:  1      3     0.50                        scvtf	v0.2d, v0.2d, #3
 # CHECK-NEXT:  1      3     0.50                        scvtf	v0.2s, v0.2s
-# CHECK-NEXT:  1      2     0.25                        scvtf	v0.2s, v0.2s, #3
+# CHECK-NEXT:  1      3     0.50                        scvtf	v0.2s, v0.2s, #3
 # CHECK-NEXT:  2      4     1.00                        scvtf	v0.4h, v0.4h
 # CHECK-NEXT:  2      4     1.00                        scvtf	v0.4s, v0.4s
-# CHECK-NEXT:  1      2     0.25                        scvtf	v0.4s, v0.4s, #3
+# CHECK-NEXT:  2      4     1.00                        scvtf	v0.4s, v0.4s, #3
 # CHECK-NEXT:  4      6     1.00                        scvtf	v0.8h, v0.8h
 # CHECK-NEXT:  1      3     0.25                        sdot	v0.2s, v0.8b, v0.4b[2]
 # CHECK-NEXT:  1      3     0.25                        sdot	v0.2s, v0.8b, v0.8b
@@ -2210,17 +2210,17 @@ zip2 v0.8h, v0.8h, v0.8h
 # CHECK-NEXT:  1      2     0.25                        uaddw2	v0.2d, v0.2d, v0.4s
 # CHECK-NEXT:  1      2     0.25                        uaddw2	v0.4s, v0.4s, v0.8h
 # CHECK-NEXT:  1      2     0.25                        uaddw2	v0.8h, v0.8h, v0.16b
-# CHECK-NEXT:  1      2     0.25                        ucvtf	d21, d14
-# CHECK-NEXT:  1      2     0.25                        ucvtf	d21, d14, #64
-# CHECK-NEXT:  1      2     0.25                        ucvtf	s22, s13
-# CHECK-NEXT:  1      2     0.25                        ucvtf	s22, s13, #32
+# CHECK-NEXT:  1      3     0.50                        ucvtf	d21, d14
+# CHECK-NEXT:  1      3     0.50                        ucvtf	d21, d14, #64
+# CHECK-NEXT:  2      4     1.00                        ucvtf	s22, s13
+# CHECK-NEXT:  2      4     1.00                        ucvtf	s22, s13, #32
 # CHECK-NEXT:  1      3     0.50                        ucvtf	v0.2d, v0.2d
-# CHECK-NEXT:  1      2     0.25                        ucvtf	v0.2d, v0.2d, #3
+# CHECK-NEXT:  1      3     0.50                        ucvtf	v0.2d, v0.2d, #3
 # CHECK-NEXT:  1      3     0.50                        ucvtf	v0.2s, v0.2s
-# CHECK-NEXT:  1      2     0.25                        ucvtf	v0.2s, v0.2s, #3
+# CHECK-NEXT:  1      3     0.50                        ucvtf	v0.2s, v0.2s, #3
 # CHECK-NEXT:  2      4     1.00                        ucvtf	v0.4h, v0.4h
 # CHECK-NEXT:  2      4     1.00                        ucvtf	v0.4s, v0.4s
-# CHECK-NEXT:  1      2     0.25                        ucvtf	v0.4s, v0.4s, #3
+# CHECK-NEXT:  2      4     1.00                        ucvtf	v0.4s, v0.4s, #3
 # CHECK-NEXT:  4      6     1.00                        ucvtf	v0.8h, v0.8h
 # CHECK-NEXT:  1      3     0.25                        udot	v0.2s, v0.8b, v0.4b[2]
 # CHECK-NEXT:  1      3     0.25                        udot	v0.2s, v0.8b, v0.8b
@@ -2465,7 +2465,7 @@ zip2 v0.8h, v0.8h, v0.8h
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0.0]  [0.1]  [1.0]  [1.1]  [2]    [3.0]  [3.1]  [4]    [5]    [6.0]  [6.1]  [7]    [8]    [9]    [10]
-# CHECK-NEXT:  -      -      -      -     26.67  49.17  49.17  18.75  7.75   7.75   7.75   401.00 370.50 355.50 325.00
+# CHECK-NEXT:  -      -      -      -     26.67  49.17  49.17  18.75  7.75   7.75   7.75   431.00 356.50 385.50 311.00
 
 # CHEC...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/115146


More information about the llvm-commits mailing list