[llvm] [RISCV] Update SpacemiT-X60 vector floating-point instructions (PR #150618)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 25 06:30:42 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Mikhail R. Gadelha (mikhailramalho)
<details>
<summary>Changes</summary>
This PR adds hardware-measured latencies for all instructions defined in Section 13 of the RVV specification: "Vector Floating-Point Instructions" to the SpacemiT-X60 scheduling model.
---
Patch is 917.04 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/150618.diff
5 Files Affected:
- (modified) llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td (+124-39)
- (modified) llvm/test/tools/llvm-mca/RISCV/SpacemitX60/rvv-conversion.s (+451-451)
- (modified) llvm/test/tools/llvm-mca/RISCV/SpacemitX60/rvv-fma.s (+145-145)
- (modified) llvm/test/tools/llvm-mca/RISCV/SpacemitX60/rvv-fp.s (+1771-1771)
- (modified) llvm/test/tools/llvm-mca/RISCV/SpacemitX60/rvv-permutation.s (+31-31)
``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td b/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td
index bf23812c992ba..1af39014aab92 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td
@@ -75,6 +75,23 @@ class Get461018Latency<string mx> {
);
}
+// Used for: FP FMA operations, complex FP ops
+class Get6678Latency<string mx> {
+ int c = !cond(
+ !eq(mx, "M4") : 7,
+ !eq(mx, "M8") : 8,
+ true: 6
+ );
+}
+
+// Used for: e32 multiply pattern, some FP ops
+class Get5558Latency<string mx> {
+ int c = !cond(
+ !eq(mx, "M8") : 8,
+ true: 5
+ );
+}
+
// Used for: e64 multiply pattern, complex ops
class Get781632Latency<string mx> {
int c = !cond(
@@ -518,39 +535,47 @@ foreach mx = SchedMxListF in {
foreach sew = SchedSEWSet<mx, isF=1>.val in {
defvar IsWorstCase = SMX60IsWorstCaseMXSEW<mx, sew, SchedMxListF, isF=1>.c;
- defm "" : LMULSEWWriteResMXSEW<"WriteVFALUV", [SMX60_VFP], mx, sew, IsWorstCase>;
- defm "" : LMULSEWWriteResMXSEW<"WriteVFALUF", [SMX60_VFP], mx, sew, IsWorstCase>;
- defm "" : LMULSEWWriteResMXSEW<"WriteVFMulV", [SMX60_VFP], mx, sew, IsWorstCase>;
- defm "" : LMULSEWWriteResMXSEW<"WriteVFMulF", [SMX60_VFP], mx, sew, IsWorstCase>;
- defm "" : LMULSEWWriteResMXSEW<"WriteVFMulAddV", [SMX60_VFP], mx, sew, IsWorstCase>;
- defm "" : LMULSEWWriteResMXSEW<"WriteVFMulAddF", [SMX60_VFP], mx, sew, IsWorstCase>;
- }
-}
+ let Latency = Get458Latency<mx>.c, ReleaseAtCycles = [4] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFALUV", [SMX60_VFP], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFALUF", [SMX60_VFP], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFMinMaxV", [SMX60_VFP], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFMinMaxF", [SMX60_VFP], mx, sew, IsWorstCase>;
+ }
-foreach mx = SchedMxListF in {
- foreach sew = SchedSEWSet<mx, isF=1>.val in {
- defvar IsWorstCase = SMX60IsWorstCaseMXSEW<mx, sew, SchedMxListF, isF=1>.c;
+ let Latency = !if(!eq(sew, 64), Get5558Latency<mx>.c, Get458Latency<mx>.c), ReleaseAtCycles = [4] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFMulV", [SMX60_VFP], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFMulF", [SMX60_VFP], mx, sew, IsWorstCase>;
+ }
- defm "" : LMULSEWWriteResMXSEW<"WriteVFRecpV", [SMX60_VFP], mx, sew, IsWorstCase>;
- defm "" : LMULSEWWriteResMXSEW<"WriteVFSgnjV", [SMX60_VFP], mx, sew, IsWorstCase>;
- defm "" : LMULSEWWriteResMXSEW<"WriteVFSgnjF", [SMX60_VFP], mx, sew, IsWorstCase>;
- defm "" : LMULSEWWriteResMXSEW<"WriteVFMinMaxV", [SMX60_VFP], mx, sew, IsWorstCase>;
- defm "" : LMULSEWWriteResMXSEW<"WriteVFMinMaxF", [SMX60_VFP], mx, sew, IsWorstCase>;
+ let Latency = Get4816Latency<mx>.c, ReleaseAtCycles = [4] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFRecpV", [SMX60_VFP], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFSgnjV", [SMX60_VFP], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFSgnjF", [SMX60_VFP], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFCvtIToFV", [SMX60_VFP], mx, sew, IsWorstCase>;
+ }
- defm "" : LMULSEWWriteResMXSEW<"WriteVFCvtIToFV", [SMX60_VFP], mx, sew, IsWorstCase>;
+ // TODO: for some reason, the following cond is not working, and always use Get5558Latency
+ let Latency = !if(!eq(sew, 64), Get6678Latency<mx>.c, Get5558Latency<mx>.c), ReleaseAtCycles = [5] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFMulAddV", [SMX60_VFP], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFMulAddF", [SMX60_VFP], mx, sew, IsWorstCase>;
+ }
}
}
foreach mx = SchedMxList in {
defvar IsWorstCase = SMX60IsWorstCaseMX<mx, SchedMxList>.c;
- defm "" : LMULWriteResMX<"WriteVFCmpV", [SMX60_VFP], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFCmpF", [SMX60_VFP], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFClassV", [SMX60_VFP], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFMergeV", [SMX60_VFP], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFMovV", [SMX60_VFP], mx, IsWorstCase>;
+ let Latency = Get461018Latency<mx>.c, ReleaseAtCycles = [4] in {
+ defm "" : LMULWriteResMX<"WriteVFCmpV", [SMX60_VFP], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFCmpF", [SMX60_VFP], mx, IsWorstCase>;
+ }
- defm "" : LMULWriteResMX<"WriteVFCvtFToIV", [SMX60_VFP], mx, IsWorstCase>;
+ let Latency = Get4816Latency<mx>.c, ReleaseAtCycles = [4] in {
+ defm "" : LMULWriteResMX<"WriteVFClassV", [SMX60_VFP], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFMergeV", [SMX60_VFP], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFMovV", [SMX60_VFP], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFCvtFToIV", [SMX60_VFP], mx, IsWorstCase>;
+ }
}
// Widening
@@ -558,27 +583,49 @@ foreach mx = SchedMxListW in {
foreach sew = SchedSEWSet<mx, isF=0, isWidening=1>.val in {
defvar IsWorstCase = SMX60IsWorstCaseMXSEW<mx, sew, SchedMxListW>.c;
- defm "" : LMULSEWWriteResMXSEW<"WriteVFWCvtIToFV", [SMX60_VFP], mx, sew, IsWorstCase>;
+ let Latency = !mul(Get1248Latency<mx>.c, 4), ReleaseAtCycles = [4] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFWCvtIToFV", [SMX60_VFP], mx, sew, IsWorstCase>;
+ }
}
}
foreach mx = SchedMxListFW in {
defvar IsWorstCase = SMX60IsWorstCaseMX<mx, SchedMxListFW>.c;
- defm "" : LMULWriteResMX<"WriteVFWCvtFToIV", [SMX60_VFP], mx, IsWorstCase>;
+ let Latency = !mul(Get1248Latency<mx>.c, 4), ReleaseAtCycles = [4] in {
+ defm "" : LMULWriteResMX<"WriteVFWCvtFToIV", [SMX60_VFP], mx, IsWorstCase>;
+ }
}
foreach mx = SchedMxListFW in {
foreach sew = SchedSEWSet<mx, isF=1, isWidening=1>.val in {
defvar IsWorstCase = SMX60IsWorstCaseMXSEW<mx, sew, SchedMxListFW, isF=1>.c;
- defm "" : LMULSEWWriteResMXSEW<"WriteVFWALUV", [SMX60_VFP], mx, sew, IsWorstCase>;
- defm "" : LMULSEWWriteResMXSEW<"WriteVFWALUF", [SMX60_VFP], mx, sew, IsWorstCase>;
- defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulV", [SMX60_VFP], mx, sew, IsWorstCase>;
- defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulF", [SMX60_VFP], mx, sew, IsWorstCase>;
- defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulAddV", [SMX60_VFP], mx, sew, IsWorstCase>;
- defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulAddF", [SMX60_VFP], mx, sew, IsWorstCase>;
- defm "" : LMULSEWWriteResMXSEW<"WriteVFWCvtFToFV", [SMX60_VFP], mx, sew, IsWorstCase>;
+ let Latency = !mul(Get1248Latency<mx>.c, 4), ReleaseAtCycles = [4] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFWCvtFToFV", [SMX60_VFP], mx, sew, IsWorstCase>;
+
+ // Pattern for vfwsub/vfwadd.vv, vfwsub/vfwadd.vf: e16mf4=4, e16mf2=4, e16m1=4, e16m2=5,
+ // e16m4=8, e32mf2=4, e32m1=4, e32m2=5, e32m4=8
+ // Pattern for vfwsub/vfwadd.wv, vfwsub/vfwadd.wf: e16mf4=5, e16mf2=5, e16m1=5, e16m2=9,
+ // e16m4=17, e32mf2=5, e32m1=5, e32m2=9, e32m4=17
+ // TODO: Split .wf/.wv variants into separate scheduling classes to use 5/5/9/17
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFWALUV", [SMX60_VFP], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFWALUF", [SMX60_VFP], mx, sew, IsWorstCase>;
+ }
+
+ // Pattern for vfwmul.vv, vfwmul.vf: e16 = 4/4/4/6/8. We use 4/4/5/8 as approximation
+ // TODO: e32m4 = 8, but it's set to 5 here
+ let Latency = !if(!eq(sew, 32), Get5558Latency<mx>.c, Get4588Latency<mx>.c), ReleaseAtCycles = [4] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulV", [SMX60_VFP], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulF", [SMX60_VFP], mx, sew, IsWorstCase>;
+ }
+
+ // Pattern for vfwmacc, vfwnmacc, etc: e16 = 5/5/5/8; e32 = 6/6/7/8
+ // Use existing 6,6,7,8 as close approximation
+ let Latency = Get6678Latency<mx>.c, ReleaseAtCycles = [6] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulAddV", [SMX60_VFP], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulAddF", [SMX60_VFP], mx, sew, IsWorstCase>;
+ }
}
}
@@ -586,26 +633,64 @@ foreach mx = SchedMxListFW in {
foreach mx = SchedMxListW in {
defvar IsWorstCase = SMX60IsWorstCaseMX<mx, SchedMxListW>.c;
- defm "" : LMULWriteResMX<"WriteVFNCvtFToIV", [SMX60_VFP], mx, IsWorstCase>;
+ let Latency = !mul(Get1248Latency<mx>.c, 4), ReleaseAtCycles = [4] in {
+ defm "" : LMULWriteResMX<"WriteVFNCvtFToIV", [SMX60_VFP], mx, IsWorstCase>;
+ }
}
foreach mx = SchedMxListFW in {
foreach sew = SchedSEWSet<mx, isF=1, isWidening=1>.val in {
-
defvar IsWorstCase = SMX60IsWorstCaseMXSEW<mx, sew, SchedMxListFW, isF=1>.c;
- defm "" : LMULSEWWriteResMXSEW<"WriteVFNCvtIToFV", [SMX60_VFP], mx, sew, IsWorstCase>;
- defm "" : LMULSEWWriteResMXSEW<"WriteVFNCvtFToFV", [SMX60_VFP], mx, sew, IsWorstCase>;
+
+ let Latency = !mul(Get1248Latency<mx>.c, 4), ReleaseAtCycles = [4] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFNCvtIToFV", [SMX60_VFP], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFNCvtFToFV", [SMX60_VFP], mx, sew, IsWorstCase>;
+ }
}
}
// Vector Floating-Point Division and Square Root
+foreach mx = SchedMxListF in {
+ foreach sew = SchedSEWSet<mx, 1>.val in {
+ defvar IsWorstCase = SMX60IsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c;
+ defvar LMulLat = Get1248Latency<mx>.c;
+
+ // Pattern for vfdiv.vf: e16 = 12/24/48/96; e32 = 12/24/48/96; e64 = 18/36/72/144
+ // Pattern for vfrdiv.vf: e16 = 12/24/48/96; e32 = 12/24/48/96; e64 = 40/80/160/320
+ // We use the worst-case, vfdiv.vf is penalized in e64
+ // TODO: split vfdiv.vf and vfrdiv.vf into separate scheduling classes
+ let Latency = !if(!eq(sew, 64), !mul(LMulLat, 40), !mul(LMulLat, 12)), ReleaseAtCycles = [7] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFDivF", [SMX60_VFP], mx, sew, IsWorstCase>;
+ }
+
+ // Compute latency based on SEW and LMUL combination
+ defvar SEWLatencyFactor = !cond(
+ !eq(sew, 16) : 12, // e16: 12*LMUL
+ !eq(sew, 32) : 38, // e32: 38*LMUL
+ !eq(sew, 64) : 40 // e64: 40*LMUL
+ );
+
+ let Latency = !mul(LMulLat, SEWLatencyFactor), ReleaseAtCycles = [7] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFDivV", [SMX60_VFP], mx, sew, IsWorstCase>;
+ }
+ }
+}
+
+// Pattern for vfsqrt.v: e16 = 18/36/72/144; e32 = 38/76/152/304; e64 = 40/80/160/320
foreach mx = SchedMxListF in {
foreach sew = SchedSEWSet<mx, 1>.val in {
defvar IsWorstCase = SMX60IsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c;
- defm "" : LMULSEWWriteResMXSEW<"WriteVFDivV", [SMX60_VFP], mx, sew, IsWorstCase>;
- defm "" : LMULSEWWriteResMXSEW<"WriteVFDivF", [SMX60_VFP], mx, sew, IsWorstCase>;
- defm "" : LMULSEWWriteResMXSEW<"WriteVFSqrtV", [SMX60_VFP], mx, sew, IsWorstCase>;
+ // Compute latency based on SEW and LMUL combination
+ defvar SEWLatencyFactor = !cond(
+ !eq(sew, 16) : 18, // e16: 18*LMUL
+ !eq(sew, 32) : 38, // e32: 38*LMUL
+ !eq(sew, 64) : 40 // e64: 40*LMUL
+ );
+
+ let Latency = !mul(Get1248Latency<mx>.c, SEWLatencyFactor), ReleaseAtCycles = [18] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFSqrtV", [SMX60_VFP], mx, sew, IsWorstCase>;
+ }
}
}
diff --git a/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/rvv-conversion.s b/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/rvv-conversion.s
index 9592d1bf46b85..3f704c74247f2 100644
--- a/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/rvv-conversion.s
+++ b/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/rvv-conversion.s
@@ -727,455 +727,455 @@ vfwcvt.xu.f.v v8, v16
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m8, tu, mu
# CHECK-NEXT: 1 16 4.00 16 SMX60_VIEU[4] VZEXT_VF8 vzext.vf8 v8, v16
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, mf2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFCVT_F_XU_V vfcvt.f.xu.v v8, v8
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VFP[4] VFCVT_F_XU_V vfcvt.f.xu.v v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, mf4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFCVT_F_XU_V vfcvt.f.xu.v v8, v8
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VFP[4] VFCVT_F_XU_V vfcvt.f.xu.v v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFCVT_F_XU_V vfcvt.f.xu.v v8, v8
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VFP[4] VFCVT_F_XU_V vfcvt.f.xu.v v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFCVT_F_XU_V vfcvt.f.xu.v v8, v8
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VFP[4] VFCVT_F_XU_V vfcvt.f.xu.v v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFCVT_F_XU_V vfcvt.f.xu.v v8, v8
+# CHECK-NEXT: 1 8 4.00 8 SMX60_VFP[4] VFCVT_F_XU_V vfcvt.f.xu.v v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFCVT_F_XU_V vfcvt.f.xu.v v8, v8
+# CHECK-NEXT: 1 16 4.00 16 SMX60_VFP[4] VFCVT_F_XU_V vfcvt.f.xu.v v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, mf2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFCVT_F_XU_V vfcvt.f.xu.v v8, v8
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VFP[4] VFCVT_F_XU_V vfcvt.f.xu.v v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFCVT_F_XU_V vfcvt.f.xu.v v8, v8
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VFP[4] VFCVT_F_XU_V vfcvt.f.xu.v v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFCVT_F_XU_V vfcvt.f.xu.v v8, v8
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VFP[4] VFCVT_F_XU_V vfcvt.f.xu.v v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFCVT_F_XU_V vfcvt.f.xu.v v8, v8
+# CHECK-NEXT: 1 8 4.00 8 SMX60_VFP[4] VFCVT_F_XU_V vfcvt.f.xu.v v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFCVT_F_XU_V vfcvt.f.xu.v v8, v8
+# CHECK-NEXT: 1 16 4.00 16 SMX60_VFP[4] VFCVT_F_XU_V vfcvt.f.xu.v v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFCVT_F_XU_V vfcvt.f.xu.v v8, v8
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VFP[4] VFCVT_F_XU_V vfcvt.f.xu.v v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFCVT_F_XU_V vfcvt.f.xu.v v8, v8
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VFP[4] VFCVT_F_XU_V vfcvt.f.xu.v v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFCVT_F_XU_V vfcvt.f.xu.v v8, v8
+# CHECK-NEXT: 1 8 4.00 8 SMX60_VFP[4] VFCVT_F_XU_V vfcvt.f.xu.v v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFCVT_F_XU_V vfcvt.f.xu.v v8, v8
+# CHECK-NEXT: 1 16 4.00 16 SMX60_VFP[4] VFCVT_F_XU_V vfcvt.f.xu.v v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, mf2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFCVT_F_X_V vfcvt.f.x.v v8, v8
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VFP[4] VFCVT_F_X_V vfcvt.f.x.v v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, mf4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFCVT_F_X_V vfcvt.f.x.v v8, v8
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VFP[4] VFCVT_F_X_V vfcvt.f.x.v v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFCVT_F_X_V vfcvt.f.x.v v8, v8
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VFP[4] ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/150618
More information about the llvm-commits
mailing list