[llvm] [RISCV] Update SpacemiT-X60 vector permutation instructions latencies (PR #152738)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 8 08:00:33 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Mikhail R. Gadelha (mikhailramalho)
<details>
<summary>Changes</summary>
This PR adds hardware-measured latencies for all instructions defined in Section 16 of the RVV specification: "Vector Permutation Instructions" to the SpacemiT-X60 scheduling model.
---
Patch is 265.12 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/152738.diff
3 Files Affected:
- (modified) llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td (+63-20)
- (modified) llvm/test/tools/llvm-mca/RISCV/SpacemitX60/rvv-fp.s (+61-61)
- (modified) llvm/test/tools/llvm-mca/RISCV/SpacemitX60/rvv-permutation.s (+560-560)
``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td b/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td
index 5541506ceb1e3..7064459c22ab4 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td
@@ -683,39 +683,82 @@ foreach mx = SchedMxList in {
}
// 16. Vector Permutation Instructions
+// Slide
foreach mx = SchedMxList in {
defvar IsWorstCase = SMX60IsWorstCaseMX<mx, SchedMxList>.c;
- defm "" : LMULWriteResMX<"WriteVSlideI", [SMX60_VIEU], mx, IsWorstCase>;
+ // Latency for slide up: 4/4/8/16, ReleaseAtCycles is 2/4/8/16
+ defvar VSlideUpLat = ConstValueUntilLMULThenDouble<"M2", 4, mx>.c;
+ defvar VSlideUpOcc = ConstOneUntilMF2ThenDouble<mx>.c;
+ let Latency = VSlideUpLat, ReleaseAtCycles =[VSlideUpOcc] in {
+ defm "" : LMULWriteResMX<"WriteVSlideUpX", [SMX60_VIEU], mx, IsWorstCase>;
+ }
- defm "" : LMULWriteResMX<"WriteVISlide1X", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFSlide1F", [SMX60_VFP], mx, IsWorstCase>;
+ // Latency for slide down: 4/5/9/17, ReleaseAtCycles is 3/5/9/17
+ defvar VSlideDownLat = GetLMULValue<[4, 4, 4, 4, 5, 9, 17], mx>.c;
+ defvar VSlideDownOcc = GetLMULValue<[1, 1, 1, 3, 5, 9, 17], mx>.c;
+ let Latency = VSlideDownLat, ReleaseAtCycles =[VSlideDownOcc] in {
+ defm "" : LMULWriteResMX<"WriteVSlideDownX", [SMX60_VIEU], mx, IsWorstCase>;
+ }
+ // The following group slide up and down together, so we use the worst-case
+ // (slide down) for all.
+ let Latency = VSlideDownLat, ReleaseAtCycles =[VSlideDownOcc] in {
+ defm "" : LMULWriteResMX<"WriteVSlideI", [SMX60_VIEU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVISlide1X", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSlideUpX", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSlideDownX", [SMX60_VIEU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFSlide1F", [SMX60_VFP], mx, IsWorstCase>;
+ }
}
-def : WriteRes<WriteVMovXS, [SMX60_VIEU]>;
-def : WriteRes<WriteVMovSX, [SMX60_VIEU]>;
-
-def : WriteRes<WriteVMovFS, [SMX60_VIEU]>;
-def : WriteRes<WriteVMovSF, [SMX60_VIEU]>;
+// ReleaseAtCycles is 2/2/2/2/2/3/6, but we can't set based on MX for now
+// TODO: Split this into separate WriteRes for each MX
+let Latency = 6, ReleaseAtCycles = [6] in {
+ def : WriteRes<WriteVMovXS, [SMX60_VIEU]>;
+}
-// Gather and Compress
-foreach mx = SchedMxList in {
- foreach sew = SchedSEWSet<mx>.val in {
- defvar IsWorstCase = SMX60IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
- defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherVV", [SMX60_VIEU], mx, sew, IsWorstCase>;
- defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherEI16VV", [SMX60_VIEU], mx, sew, IsWorstCase>;
- defm "" : LMULSEWWriteResMXSEW<"WriteVCompressV", [SMX60_VIEU], mx, sew, IsWorstCase>;
- }
+// ReleaseAtCycles is 1/1/1/1/1/2/4, but we can't set based on MX for now
+// TODO: Split this into separate WriteRes for each MX
+let Latency = 4, ReleaseAtCycles = [4] in {
+ def : WriteRes<WriteVMovSX, [SMX60_VIEU]>;
+ def : WriteRes<WriteVMovFS, [SMX60_VIEU]>;
+ def : WriteRes<WriteVMovSF, [SMX60_VIEU]>;
}
+// Integer LMUL Gather and Compress
foreach mx = SchedMxList in {
defvar IsWorstCase = SMX60IsWorstCaseMX<mx, SchedMxList>.c;
- defm "" : LMULWriteResMX<"WriteVRGatherVX", [SMX60_VIEU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVRGatherVI", [SMX60_VIEU], mx, IsWorstCase>;
+ defvar VRGatherLat = ConstValueUntilLMULThenDouble<"M2", 4, mx>.c;
+ let Latency = VRGatherLat, ReleaseAtCycles = [ConstOneUntilMF2ThenDouble<mx>.c] in {
+ defm "" : LMULWriteResMX<"WriteVRGatherVX", [SMX60_VIEU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVRGatherVI", [SMX60_VIEU], mx, IsWorstCase>;
+ }
+
+ foreach sew = SchedSEWSet<mx>.val in {
+ defvar IsWorstCaseSEW = SMX60IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
+
+ defvar VRGatherVVLat = GetLMULValue<[4, 4, 4, 4, 16, 64, 256], mx>.c;
+ defvar VRGatherVVOcc = GetLMULValue<[1, 1, 1, 4, 16, 64, 256], mx>.c;
+ let Latency = VRGatherVVLat, ReleaseAtCycles = [VRGatherVVOcc] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherVV", [SMX60_VIEU], mx, sew, IsWorstCaseSEW>;
+ }
+ // For sew == 8, latency is half of the other cases, except for the fractional LMULs (const 4 cycles)
+ defvar VRGatherEI16Lat = !if(!eq(sew, 8),
+ GetLMULValue<[4, 4, 4, 8, 32, 128, 256], mx>.c,
+ GetLMULValue<[4, 4, 4, 4, 16, 64, 256], mx>.c);
+ defvar VRGatherEI16Occ = !if(!eq(sew, 8),
+ GetLMULValue<[1, 1, 2, 8, 32, 128, 256], mx>.c,
+ GetLMULValue<[1, 1, 1, 4, 16, 64, 256], mx>.c);
+ let Latency = VRGatherEI16Lat, ReleaseAtCycles = [VRGatherEI16Occ] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherEI16VV", [SMX60_VIEU], mx, sew, IsWorstCaseSEW>;
+ }
+
+ defvar VCompressVLat = GetLMULValue<[4, 4, 4, 4, 10, 36, 136], mx>.c;
+ defvar VCompressVOcc = GetLMULValue<[1, 1, 1, 3, 10, 36, 136], mx>.c;
+ let Latency = VCompressVLat, ReleaseAtCycles = [VCompressVOcc] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVCompressV", [SMX60_VIEU], mx, sew, IsWorstCaseSEW>;
+ }
+ }
}
// Others
diff --git a/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/rvv-fp.s b/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/rvv-fp.s
index f59c7987b615b..311a13c9427b1 100644
--- a/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/rvv-fp.s
+++ b/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/rvv-fp.s
@@ -2911,65 +2911,65 @@ vfwsub.wv v8, v16, v24
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m8, tu, mu
# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFMADD_VV vfmadd.vv v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, mf2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VFMV_F_S vfmv.f.s fs0, v8
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VFMV_F_S vfmv.f.s fs0, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, mf4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VFMV_F_S vfmv.f.s fs0, v8
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VFMV_F_S vfmv.f.s fs0, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VFMV_F_S vfmv.f.s fs0, v8
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VFMV_F_S vfmv.f.s fs0, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VFMV_F_S vfmv.f.s fs0, v8
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VFMV_F_S vfmv.f.s fs0, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VFMV_F_S vfmv.f.s fs0, v8
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VFMV_F_S vfmv.f.s fs0, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VFMV_F_S vfmv.f.s fs0, v8
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VFMV_F_S vfmv.f.s fs0, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, mf2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VFMV_F_S vfmv.f.s fs0, v8
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VFMV_F_S vfmv.f.s fs0, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VFMV_F_S vfmv.f.s fs0, v8
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VFMV_F_S vfmv.f.s fs0, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VFMV_F_S vfmv.f.s fs0, v8
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VFMV_F_S vfmv.f.s fs0, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VFMV_F_S vfmv.f.s fs0, v8
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VFMV_F_S vfmv.f.s fs0, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VFMV_F_S vfmv.f.s fs0, v8
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VFMV_F_S vfmv.f.s fs0, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VFMV_F_S vfmv.f.s fs0, v8
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VFMV_F_S vfmv.f.s fs0, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VFMV_F_S vfmv.f.s fs0, v8
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VFMV_F_S vfmv.f.s fs0, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VFMV_F_S vfmv.f.s fs0, v8
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VFMV_F_S vfmv.f.s fs0, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VFMV_F_S vfmv.f.s fs0, v8
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VFMV_F_S vfmv.f.s fs0, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, mf2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VFMV_S_F vfmv.s.f v8, fs0
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VFMV_S_F vfmv.s.f v8, fs0
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, mf4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VFMV_S_F vfmv.s.f v8, fs0
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VFMV_S_F vfmv.s.f v8, fs0
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VFMV_S_F vfmv.s.f v8, fs0
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VFMV_S_F vfmv.s.f v8, fs0
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VFMV_S_F vfmv.s.f v8, fs0
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VFMV_S_F vfmv.s.f v8, fs0
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VFMV_S_F vfmv.s.f v8, fs0
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VFMV_S_F vfmv.s.f v8, fs0
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VFMV_S_F vfmv.s.f v8, fs0
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VFMV_S_F vfmv.s.f v8, fs0
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, mf2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VFMV_S_F vfmv.s.f v8, fs0
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VFMV_S_F vfmv.s.f v8, fs0
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VFMV_S_F vfmv.s.f v8, fs0
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VFMV_S_F vfmv.s.f v8, fs0
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VFMV_S_F vfmv.s.f v8, fs0
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VFMV_S_F vfmv.s.f v8, fs0
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VFMV_S_F vfmv.s.f v8, fs0
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VFMV_S_F vfmv.s.f v8, fs0
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VFMV_S_F vfmv.s.f v8, fs0
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VFMV_S_F vfmv.s.f v8, fs0
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m1, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VFMV_S_F vfmv.s.f v8, fs0
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VFMV_S_F vfmv.s.f v8, fs0
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m2, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VFMV_S_F vfmv.s.f v8, fs0
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VFMV_S_F vfmv.s.f v8, fs0
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m4, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VFMV_S_F vfmv.s.f v8, fs0
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VFMV_S_F vfmv.s.f v8, fs0
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m8, tu, mu
-# CHECK-NEXT: 1 1 1.00 1 SMX60_VIEU VFMV_S_F vfmv.s.f v8, fs0
+# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VFMV_S_F vfmv.s.f v8, fs0
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, mf2, tu, mu
# CHECK-NEXT: 1 1 1.00 1 SMX60_VFP VFMV_V_F vfmv.v.f v8, fs0
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, mf4, tu, mu
@@ -3763,7 +3763,7 @@ vfwsub.wv v8, v16, v24
#...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/152738
More information about the llvm-commits
mailing list