[llvm] [AArch64] Fix metrics of ASIMD instructions in Neoverse N3 (PR #169790)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 27 03:39:02 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Asher Dobrescu (Asher8118)
<details>
<summary>Changes</summary>
Some ASIMD instructions in the Neoverse N3 scheduler model seem to have been missed and have default definitions, which give them incorrect latency and throughput. This patch fixes such instructions to match the current N3 SWOG.
---
Patch is 85.96 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/169790.diff
2 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64SchedNeoverseN3.td (+19-19)
- (modified) llvm/test/tools/llvm-mca/AArch64/Neoverse/N3-neon-instructions.s (+171-171)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseN3.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseN3.td
index c73f60a1a7741..beeadd4403605 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseN3.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseN3.td
@@ -1073,7 +1073,7 @@ def : SchedAlias<WriteVq, N3Write_2c_1V>;
// ASIMD shift accumulate
def : InstRW<[N3Wr_ADA, N3Rd_ADA], (instregex "^[SU]ABAL?v",
"^[SU]ADALPv",
- "^[SU]R?SRAv")>;
+ "^[SU]R?SRA(v|d)")>;
// ASIMD arith, reduce, 4H/4S
def : InstRW<[N3Write_3c_1V1], (instregex "^[SU]?ADDL?Vv4i(16|32)v$")>;
@@ -1114,30 +1114,30 @@ def : InstRW<[N3Wr_VMAH, N3Rd_VMAH], (instregex "^SQRDMLAHv", "^SQRDMLSHv")>;
def : InstRW<[N3Wr_VMAL, N3Rd_VMAL], (instregex "^[SU]MLALv", "^[SU]MLSLv")>;
// ASIMD multiply accumulate saturating long
-def : InstRW<[N3Wr_VMASL, N3Rd_VMASL], (instregex "^SQDMLALv", "^SQDMLSLv")>;
+def : InstRW<[N3Wr_VMASL, N3Rd_VMASL], (instregex "^SQDMLAL(v|i16|i32)", "^SQDMLSL(v|i16|i32)")>;
// ASIMD multiply/multiply long (8x8) polynomial, D-form
// ASIMD multiply/multiply long (8x8) polynomial, Q-form
def : InstRW<[N3Write_2c_1V0], (instregex "^PMULL?(v8i8|v16i8)$")>;
// ASIMD multiply long
-def : InstRW<[N3Write_4c_1V0], (instregex "^[SU]MULLv", "^SQDMULLv")>;
+def : InstRW<[N3Write_4c_1V0], (instregex "^[SU]MULLv", "^SQDMULL(v|i16|i32)")>;
// ASIMD shift by immed, basic
-def : InstRW<[N3Write_2c_1V1], (instregex "^SHLv", "^SHLLv", "^SHRNv",
- "^SSHLLv", "^SSHRv", "^USHLLv",
- "^USHRv")>;
+def : InstRW<[N3Write_2c_1V1], (instregex "^SHL(v|d)", "^SHLLv", "^SHRNv",
+ "^SSHLLv", "^SSHR(v|d)", "^USHLLv",
+ "^USHR(v|d)")>;
// ASIMD shift by immed and insert, basic
-def : InstRW<[N3Write_2c_1V1], (instregex "^SLIv", "^SRIv")>;
+def : InstRW<[N3Write_2c_1V1], (instregex "^SLI(v|d)", "^SRI(v|d)")>;
// ASIMD shift by immed, complex
def : InstRW<[N3Write_4c_1V1],
- (instregex "^RSHRNv", "^SQRSHRNv", "^SQRSHRUNv",
+ (instregex "^RSHRNv", "^SQRSHRN(v|b|h|s)", "^SQRSHRUN(v|b|h|s)",
"^(SQSHLU?|UQSHL)[bhsd]$",
"^(SQSHLU?|UQSHL)(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v2i64)_shift$",
- "^SQSHRNv", "^SQSHRUNv", "^SRSHRv", "^UQRSHRNv",
- "^UQSHRNv", "^URSHRv")>;
+ "^SQSHRN(v|b|h|s)", "^SQSHRUN(v|b|h|s)", "^SRSHR(v|d)",
+ "^UQRSHRN(v|b|h|s)", "^UQSHRN(v|b|h|s)","^URSHR(v|d)")>;
// ASIMD shift by register, basic
def : InstRW<[N3Write_2c_1V1], (instregex "^[SU]SHLv")>;
@@ -1173,16 +1173,16 @@ def : InstRW<[N3Write_3c_1V0], (instregex "^FCVTL(v2|v4)i32")>;
def : InstRW<[N3Write_4c_2V0], (instregex "^FCVTN(v4|v8)i16")>;
// ASIMD FP convert, narrow (F64 to F32)
-def : InstRW<[N3Write_3c_1V0], (instregex "^FCVTN(v2|v4)i32",
+def : InstRW<[N3Write_3c_1V0], (instregex "^FCVTN(v2|v4)i32", "^FCVTXNv1i64",
"^FCVTXN(v2|v4)f32")>;
// ASIMD FP convert, other, D-form F32 and Q-form F64
-def : InstRW<[N3Write_3c_1V0], (instregex "^[FSU]CVT[AMNPZ][SU]v2f(32|64)$",
- "^[SU]CVTFv2f(32|64)$")>;
+def : InstRW<[N3Write_3c_1V0], (instregex "^[FSU]CVT[AMNPZ][SU](v2f(32|64)|s|d|v1i32|v1i64|v2i32_shift|v2i64_shift)$",
+ "^[SU]CVTF(v2f(32|64)|s|d|v1i32|v1i64|v2i32_shift|v2i64_shift)$")>;
// ASIMD FP convert, other, D-form F16 and Q-form F32
-def : InstRW<[N3Write_4c_2V0], (instregex "^[FSU]CVT[AMNPZ][SU]v4f(16|32)$",
- "^[SU]CVTFv4f(16|32)$")>;
+def : InstRW<[N3Write_4c_2V0], (instregex "^[FSU]CVT[AMNPZ][SU](v4f(16|32)|v4i(16|32)_shift)$",
+ "^[SU]CVTF(v4f(16|32)|v4i(16|32)_shift)$")>;
// ASIMD FP convert, other, Q-form F16
def : InstRW<[N3Write_6c_4V0], (instregex "^[FSU]CVT[AMNPZ][SU]v8f16$",
@@ -1217,7 +1217,7 @@ def : InstRW<[N3Write_4c_2V], (instregex "^(FMAX|FMIN)(NM)?Vv4(i16|i32)v$")>;
def : InstRW<[N3Write_6c_3V], (instregex "^(FMAX|FMIN)(NM)?Vv8i16v$")>;
// ASIMD FP multiply
-def : InstRW<[N3Wr_FPM], (instregex "^FMULv", "^FMULXv")>;
+def : InstRW<[N3Wr_FPM], (instregex "^FMULv", "^FMULX(v|32|64)")>;
// ASIMD FP multiply accumulate
def : InstRW<[N3Wr_FPMA, N3Rd_FPMA], (instregex "^FMLAv", "^FMLSv")>;
@@ -1305,9 +1305,9 @@ def : InstRW<[N3Write_4c_2V0], (instrs URECPEv4i32, URSQRTEv4i32)>;
// ASIMD reciprocal and square root estimate, D-form F32 and scalar forms
def : InstRW<[N3Write_3c_1V0], (instrs FRECPEv1f16, FRECPEv1i32,
- FRECPEv1i64, FRECPEv2f32,
+ FRECPEv1i64, FRECPEv2f32, FRECPEv2f64,
FRSQRTEv1f16, FRSQRTEv1i32,
- FRSQRTEv1i64, FRSQRTEv2f32)>;
+ FRSQRTEv1i64, FRSQRTEv2f32, FRSQRTEv2f64)>;
// ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32
def : InstRW<[N3Write_4c_2V0], (instrs FRECPEv4f16, FRECPEv4f32,
@@ -1320,7 +1320,7 @@ def : InstRW<[N3Write_6c_4V0], (instrs FRECPEv8f16, FRSQRTEv8f16)>;
def : InstRW<[N3Write_3c_1V0], (instregex "^FRECPXv")>;
// ASIMD reciprocal step
-def : InstRW<[N3Write_4c_1V], (instregex "^FRECPSv", "^FRSQRTSv")>;
+def : InstRW<[N3Write_4c_1V], (instregex "^FRECPS(v|32|64)", "^FRSQRTS(v|32|64)")>;
// ASIMD table lookup, 3 table regs
def : InstRW<[N3Write_4c_2V], (instrs TBLv8i8Three, TBLv16i8Three)>;
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/N3-neon-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/N3-neon-instructions.s
index dddaca34f68dd..da8c0c5154cdc 100644
--- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/N3-neon-instructions.s
+++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/N3-neon-instructions.s
@@ -1189,15 +1189,15 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: 1 2 0.50 fcmlt d20, d21, #0.0
# CHECK-NEXT: 1 2 0.50 fcmlt s10, s11, #0.0
# CHECK-NEXT: 1 2 0.50 fcmlt v0.4s, v0.4s, #0.0
-# CHECK-NEXT: 1 2 0.50 fcvtas d21, d14
-# CHECK-NEXT: 1 2 0.50 fcvtas s12, s13
+# CHECK-NEXT: 1 3 1.00 fcvtas d21, d14
+# CHECK-NEXT: 1 3 1.00 fcvtas s12, s13
# CHECK-NEXT: 1 3 1.00 fcvtas v0.2d, v0.2d
# CHECK-NEXT: 1 3 1.00 fcvtas v0.2s, v0.2s
# CHECK-NEXT: 2 4 2.00 fcvtas v0.4h, v0.4h
# CHECK-NEXT: 2 4 2.00 fcvtas v0.4s, v0.4s
# CHECK-NEXT: 4 6 4.00 fcvtas v0.8h, v0.8h
-# CHECK-NEXT: 1 2 0.50 fcvtau d21, d14
-# CHECK-NEXT: 1 2 0.50 fcvtau s12, s13
+# CHECK-NEXT: 1 3 1.00 fcvtau d21, d14
+# CHECK-NEXT: 1 3 1.00 fcvtau s12, s13
# CHECK-NEXT: 1 3 1.00 fcvtau v0.2d, v0.2d
# CHECK-NEXT: 1 3 1.00 fcvtau v0.2s, v0.2s
# CHECK-NEXT: 2 4 2.00 fcvtau v0.4h, v0.4h
@@ -1207,15 +1207,15 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: 2 4 2.00 fcvtl v0.4s, v0.4h
# CHECK-NEXT: 1 3 1.00 fcvtl2 v0.2d, v0.4s
# CHECK-NEXT: 2 4 2.00 fcvtl2 v0.4s, v0.8h
-# CHECK-NEXT: 1 2 0.50 fcvtms d21, d14
-# CHECK-NEXT: 1 2 0.50 fcvtms s22, s13
+# CHECK-NEXT: 1 3 1.00 fcvtms d21, d14
+# CHECK-NEXT: 1 3 1.00 fcvtms s22, s13
# CHECK-NEXT: 1 3 1.00 fcvtms v0.2d, v0.2d
# CHECK-NEXT: 1 3 1.00 fcvtms v0.2s, v0.2s
# CHECK-NEXT: 2 4 2.00 fcvtms v0.4h, v0.4h
# CHECK-NEXT: 2 4 2.00 fcvtms v0.4s, v0.4s
# CHECK-NEXT: 4 6 4.00 fcvtms v0.8h, v0.8h
-# CHECK-NEXT: 1 2 0.50 fcvtmu d21, d14
-# CHECK-NEXT: 1 2 0.50 fcvtmu s12, s13
+# CHECK-NEXT: 1 3 1.00 fcvtmu d21, d14
+# CHECK-NEXT: 1 3 1.00 fcvtmu s12, s13
# CHECK-NEXT: 1 3 1.00 fcvtmu v0.2d, v0.2d
# CHECK-NEXT: 1 3 1.00 fcvtmu v0.2s, v0.2s
# CHECK-NEXT: 2 4 2.00 fcvtmu v0.4h, v0.4h
@@ -1225,60 +1225,60 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: 2 4 2.00 fcvtn v0.4h, v0.4s
# CHECK-NEXT: 1 3 1.00 fcvtn2 v0.4s, v0.2d
# CHECK-NEXT: 2 4 2.00 fcvtn2 v0.8h, v0.4s
-# CHECK-NEXT: 1 2 0.50 fcvtns d21, d14
-# CHECK-NEXT: 1 2 0.50 fcvtns s22, s13
+# CHECK-NEXT: 1 3 1.00 fcvtns d21, d14
+# CHECK-NEXT: 1 3 1.00 fcvtns s22, s13
# CHECK-NEXT: 1 3 1.00 fcvtns v0.2d, v0.2d
# CHECK-NEXT: 1 3 1.00 fcvtns v0.2s, v0.2s
# CHECK-NEXT: 2 4 2.00 fcvtns v0.4h, v0.4h
# CHECK-NEXT: 2 4 2.00 fcvtns v0.4s, v0.4s
# CHECK-NEXT: 4 6 4.00 fcvtns v0.8h, v0.8h
-# CHECK-NEXT: 1 2 0.50 fcvtnu d21, d14
-# CHECK-NEXT: 1 2 0.50 fcvtnu s12, s13
+# CHECK-NEXT: 1 3 1.00 fcvtnu d21, d14
+# CHECK-NEXT: 1 3 1.00 fcvtnu s12, s13
# CHECK-NEXT: 1 3 1.00 fcvtnu v0.2d, v0.2d
# CHECK-NEXT: 1 3 1.00 fcvtnu v0.2s, v0.2s
# CHECK-NEXT: 2 4 2.00 fcvtnu v0.4h, v0.4h
# CHECK-NEXT: 2 4 2.00 fcvtnu v0.4s, v0.4s
# CHECK-NEXT: 4 6 4.00 fcvtnu v0.8h, v0.8h
-# CHECK-NEXT: 1 2 0.50 fcvtps d21, d14
-# CHECK-NEXT: 1 2 0.50 fcvtps s22, s13
+# CHECK-NEXT: 1 3 1.00 fcvtps d21, d14
+# CHECK-NEXT: 1 3 1.00 fcvtps s22, s13
# CHECK-NEXT: 1 3 1.00 fcvtps v0.2d, v0.2d
# CHECK-NEXT: 1 3 1.00 fcvtps v0.2s, v0.2s
# CHECK-NEXT: 2 4 2.00 fcvtps v0.4h, v0.4h
# CHECK-NEXT: 2 4 2.00 fcvtps v0.4s, v0.4s
# CHECK-NEXT: 4 6 4.00 fcvtps v0.8h, v0.8h
-# CHECK-NEXT: 1 2 0.50 fcvtpu d21, d14
-# CHECK-NEXT: 1 2 0.50 fcvtpu s12, s13
+# CHECK-NEXT: 1 3 1.00 fcvtpu d21, d14
+# CHECK-NEXT: 1 3 1.00 fcvtpu s12, s13
# CHECK-NEXT: 1 3 1.00 fcvtpu v0.2d, v0.2d
# CHECK-NEXT: 1 3 1.00 fcvtpu v0.2s, v0.2s
# CHECK-NEXT: 2 4 2.00 fcvtpu v0.4h, v0.4h
# CHECK-NEXT: 2 4 2.00 fcvtpu v0.4s, v0.4s
# CHECK-NEXT: 4 6 4.00 fcvtpu v0.8h, v0.8h
-# CHECK-NEXT: 1 2 0.50 fcvtxn s22, d13
+# CHECK-NEXT: 1 3 1.00 fcvtxn s22, d13
# CHECK-NEXT: 1 3 1.00 fcvtxn v0.2s, v0.2d
# CHECK-NEXT: 1 3 1.00 fcvtxn2 v0.4s, v0.2d
-# CHECK-NEXT: 1 2 0.50 fcvtzs d21, d12, #1
-# CHECK-NEXT: 1 2 0.50 fcvtzs d21, d14
-# CHECK-NEXT: 1 2 0.50 fcvtzs s12, s13
-# CHECK-NEXT: 1 2 0.50 fcvtzs s21, s12, #1
+# CHECK-NEXT: 1 3 1.00 fcvtzs d21, d12, #1
+# CHECK-NEXT: 1 3 1.00 fcvtzs d21, d14
+# CHECK-NEXT: 1 3 1.00 fcvtzs s12, s13
+# CHECK-NEXT: 1 3 1.00 fcvtzs s21, s12, #1
# CHECK-NEXT: 1 3 1.00 fcvtzs v0.2d, v0.2d
-# CHECK-NEXT: 1 2 0.50 fcvtzs v0.2d, v0.2d, #3
+# CHECK-NEXT: 1 3 1.00 fcvtzs v0.2d, v0.2d, #3
# CHECK-NEXT: 1 3 1.00 fcvtzs v0.2s, v0.2s
-# CHECK-NEXT: 1 2 0.50 fcvtzs v0.2s, v0.2s, #3
+# CHECK-NEXT: 1 3 1.00 fcvtzs v0.2s, v0.2s, #3
# CHECK-NEXT: 2 4 2.00 fcvtzs v0.4h, v0.4h
# CHECK-NEXT: 2 4 2.00 fcvtzs v0.4s, v0.4s
-# CHECK-NEXT: 1 2 0.50 fcvtzs v0.4s, v0.4s, #3
+# CHECK-NEXT: 2 4 2.00 fcvtzs v0.4s, v0.4s, #3
# CHECK-NEXT: 4 6 4.00 fcvtzs v0.8h, v0.8h
-# CHECK-NEXT: 1 2 0.50 fcvtzu d21, d12, #1
-# CHECK-NEXT: 1 2 0.50 fcvtzu d21, d14
-# CHECK-NEXT: 1 2 0.50 fcvtzu s12, s13
-# CHECK-NEXT: 1 2 0.50 fcvtzu s21, s12, #1
+# CHECK-NEXT: 1 3 1.00 fcvtzu d21, d12, #1
+# CHECK-NEXT: 1 3 1.00 fcvtzu d21, d14
+# CHECK-NEXT: 1 3 1.00 fcvtzu s12, s13
+# CHECK-NEXT: 1 3 1.00 fcvtzu s21, s12, #1
# CHECK-NEXT: 1 3 1.00 fcvtzu v0.2d, v0.2d
-# CHECK-NEXT: 1 2 0.50 fcvtzu v0.2d, v0.2d, #3
+# CHECK-NEXT: 1 3 1.00 fcvtzu v0.2d, v0.2d, #3
# CHECK-NEXT: 1 3 1.00 fcvtzu v0.2s, v0.2s
-# CHECK-NEXT: 1 2 0.50 fcvtzu v0.2s, v0.2s, #3
+# CHECK-NEXT: 1 3 1.00 fcvtzu v0.2s, v0.2s, #3
# CHECK-NEXT: 2 4 2.00 fcvtzu v0.4h, v0.4h
# CHECK-NEXT: 2 4 2.00 fcvtzu v0.4s, v0.4s
-# CHECK-NEXT: 1 2 0.50 fcvtzu v0.4s, v0.4s, #3
+# CHECK-NEXT: 2 4 2.00 fcvtzu v0.4s, v0.4s, #3
# CHECK-NEXT: 4 6 4.00 fcvtzu v0.8h, v0.8h
# CHECK-NEXT: 2 8 2.00 fdiv v0.2s, v0.2s, v0.2s
# CHECK-NEXT: 1 2 0.50 fmax v0.2d, v0.2d, v0.2d
@@ -1318,8 +1318,8 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: 1 3 0.50 fmul s0, s1, v0.s[3]
# CHECK-NEXT: 1 3 0.50 fmul v0.2s, v0.2s, v0.2s
# CHECK-NEXT: 1 3 0.50 fmulx d0, d4, v0.d[1]
-# CHECK-NEXT: 1 2 0.50 fmulx d23, d11, d1
-# CHECK-NEXT: 1 2 0.50 fmulx s20, s22, s15
+# CHECK-NEXT: 1 3 0.50 fmulx d23, d11, d1
+# CHECK-NEXT: 1 3 0.50 fmulx s20, s22, s15
# CHECK-NEXT: 1 3 0.50 fmulx s3, s5, v0.s[3]
# CHECK-NEXT: 1 3 0.50 fmulx v0.2d, v0.2d, v0.2d
# CHECK-NEXT: 1 3 0.50 fmulx v0.2s, v0.2s, v0.2s
@@ -1331,14 +1331,14 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: 1 2 0.50 fneg v0.8h, v0.8h
# CHECK-NEXT: 1 3 1.00 frecpe d13, d13
# CHECK-NEXT: 1 3 1.00 frecpe s19, s14
-# CHECK-NEXT: 1 2 0.50 frecpe v0.2d, v0.2d
+# CHECK-NEXT: 1 3 1.00 frecpe v0.2d, v0.2d
# CHECK-NEXT: 1 3 1.00 frecpe v0.2s, v0.2s
# CHECK-NEXT: 2 4 2.00 frecpe v0.4h, v0.4h
# CHECK-NEXT: 2 4 2.00 frecpe v0.4s, v0.4s
# CHECK-NEXT: 4 6 4.00 frecpe v0.8h, v0.8h
# CHECK-NEXT: 1 4 0.50 frecps v0.4s, v0.4s, v0.4s
-# CHECK-NEXT: 1 2 0.50 frecps d22, d30, d21
-# CHECK-NEXT: 1 2 0.50 frecps s21, s16, s13
+# CHECK-NEXT: 1 4 0.50 frecps d22, d30, d21
+# CHECK-NEXT: 1 4 0.50 frecps s21, s16, s13
# CHECK-NEXT: 1 3 1.00 frecpx d16, d19
# CHECK-NEXT: 1 3 1.00 frecpx s18, s10
# CHECK-NEXT: 1 3 1.00 frinta v0.2d, v0.2d
@@ -1378,13 +1378,13 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: 4 6 4.00 frintz v0.8h, v0.8h
# CHECK-NEXT: 1 3 1.00 frsqrte d21, d12
# CHECK-NEXT: 1 3 1.00 frsqrte s22, s13
-# CHECK-NEXT: 1 2 0.50 frsqrte v0.2d, v0.2d
+# CHECK-NEXT: 1 3 1.00 frsqrte v0.2d, v0.2d
# CHECK-NEXT: 1 3 1.00 frsqrte v0.2s, v0.2s
# CHECK-NEXT: 2 4 2.00 frsqrte v0.4h, v0.4h
# CHECK-NEXT: 2 4 2.00 frsqrte v0.4s, v0.4s
# CHECK-NEXT: 4 6 4.00 frsqrte v0.8h, v0.8h
-# CHECK-NEXT: 1 2 0.50 frsqrts d8, d22, d18
-# CHECK-NEXT: 1 2 0.50 frsqrts s21, s5, s12
+# CHECK-NEXT: 1 4 0.50 frsqrts d8, d22, d18
+# CHECK-NEXT: 1 4 0.50 frsqrts s21, s5, s12
# CHECK-NEXT: 1 4 0.50 frsqrts v0.2d, v0.2d, v0.2d
# CHECK-NEXT: 2 13 2.00 fsqrt v0.2d, v0.2d
# CHECK-NEXT: 2 8 2.00 fsqrt v0.2s, v0.2s
@@ -1543,20 +1543,20 @@ zip2 v0.8h, v0.8h, v0.8h
# CHECK-NEXT: 1 2 0.50 saddw2 v0.2d, v0.2d, v0.4s
# CHECK-NEXT: 1 2 0.50 saddw2 v0.4s, v0.4s, v0.8h
# CHECK-NEXT: 1 2 0.50 saddw2 v0.8h, v0.8h, v0.16b
-# CHECK-NEXT: 1 2 0.50 scvtf d21, d12
-# CHECK-NEXT: 1 2 0.50 scvtf d21, d12, #64
-# CHECK-NEXT: 1 2 0.50 scvtf s22, s13
-# CHECK-NEXT: 1 2 0.50 scvtf s22, s13, #32
+# CHECK-NEXT: 1 3 1.00 scvtf d21, d12
+# CHECK-NEXT: 1 3 1.00 scvtf d21, d12, #64
+# CHECK-NEXT: 1 3 1.00 scvtf s22, s13
+# CHECK-NEXT: 1 3 1.00 scvtf s22, s13, #32
# CHECK-NEXT: 1 3 1.00 scvtf v0.2d, v0.2d
-# CHECK-NEXT: 1 2 0.50 scvtf v0.2d, v0.2d, #3
+# CHECK-NEXT: 1 3 1.00 scvtf v0.2d, v0.2d, #3
# CHECK-NEXT: 1 3 1.00 scvtf v0.2s, v0.2s
-# CHECK-NEXT: 1 2 0.50 scvtf v0.2s, v0.2s, #3
+# CHECK-NEXT: 1 3 1.00 scvtf v0.2s, v0.2s, #3
# CHECK-NEXT: 2 4 2.00 scvtf v0.4h, v0.4h
# CHECK-NEXT: 2 4 2.00 scvt...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/169790
More information about the llvm-commits
mailing list