[llvm] [RISCV] Update X60 ReleaseAtCycles for Vector Integer Arithmetic Instructions (PR #152557)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 7 10:32:01 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Mikhail R. Gadelha (mikhailramalho)
<details>
<summary>Changes</summary>
This PR updates the ReleaseAtCycles for all instructions described in Section 11 of the RVV Spec: Vector Integer Arithmetic Instructions. The data used comes from camel-cdr.
---
Patch is 1.97 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/152557.diff
9 Files Affected:
- (modified) llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td (+29-13)
- (modified) llvm/test/tools/llvm-mca/RISCV/SpacemitX60/rvv-arithmetic.s (+1177-1177)
- (modified) llvm/test/tools/llvm-mca/RISCV/SpacemitX60/rvv-bitwise.s (+937-937)
- (modified) llvm/test/tools/llvm-mca/RISCV/SpacemitX60/rvv-comparison.s (+721-721)
- (modified) llvm/test/tools/llvm-mca/RISCV/SpacemitX60/rvv-conversion.s (+89-89)
- (modified) llvm/test/tools/llvm-mca/RISCV/SpacemitX60/rvv-fma.s (+521-521)
- (modified) llvm/test/tools/llvm-mca/RISCV/SpacemitX60/rvv-minmax.s (+289-289)
- (modified) llvm/test/tools/llvm-mca/RISCV/SpacemitX60/rvv-mul-div.s (+753-753)
- (modified) llvm/test/tools/llvm-mca/RISCV/SpacemitX60/rvv-permutation.s (+217-217)
``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td b/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td
index 5541506ceb1e3..b35118d0a9560 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSpacemitX60.td
@@ -418,21 +418,28 @@ foreach LMul = [1, 2, 4, 8] in {
foreach mx = SchedMxList in {
defvar IsWorstCase = SMX60IsWorstCaseMX<mx, SchedMxList>.c;
- let Latency = Get4458Latency<mx>.c, ReleaseAtCycles = [4] in {
+ let Latency = Get4458Latency<mx>.c, ReleaseAtCycles = [ConstOneUntilM1ThenDouble<mx>.c] in {
defm "" : LMULWriteResMX<"WriteVIMinMaxV", [SMX60_VIEU], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVIMinMaxX", [SMX60_VIEU], mx, IsWorstCase>;
}
+ // Latency of vadd, vsub, vrsub: 4/4/5/8
+ // ReleaseAtCycles of vadd, vsub, vrsub: 1/2/4/8
+ // Latency of vand, vor, vxor: 4/4/8/16
+ // ReleaseAtCycles of vand, vor, vxor: 2/4/8/16
+ // They are grouped together, so we used the worst case 4/4/8/16 and 2/4/8/16
+ // TODO: use InstRW to override individual instructions' scheduling data
defvar VIALULat = ConstValueUntilLMULThenDouble<"M2", 4, mx>.c;
- let Latency = VIALULat, ReleaseAtCycles = [4] in {
- // Pattern of vadd, vsub, vrsub: 4/4/5/8
- // Pattern of vand, vor, vxor: 4/4/8/16
- // They are grouped together, so we used the worst case 4/4/8/16
- // TODO: use InstRW to override individual instructions' scheduling data
+ defvar VIALUOcc = ConstOneUntilMF2ThenDouble<mx>.c;
+ let Latency = VIALULat, ReleaseAtCycles = [VIALUOcc] in {
defm "" : LMULWriteResMX<"WriteVIALUV", [SMX60_VIEU], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVIALUX", [SMX60_VIEU], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVIALUI", [SMX60_VIEU], mx, IsWorstCase>;
+ }
+ defvar VILogicalLat = ConstValueUntilLMULThenDouble<"M2", 4, mx>.c;
+ defvar VILogicalOcc = ConstValueUntilLMULThenDouble<"MF2", 1, mx>.c;
+ let Latency = VILogicalLat, ReleaseAtCycles = [VILogicalOcc] in {
defm "" : LMULWriteResMX<"WriteVExtV", [SMX60_VIEU], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVIMergeV", [SMX60_VIEU], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVIMergeX", [SMX60_VIEU], mx, IsWorstCase>;
@@ -449,7 +456,9 @@ foreach mx = SchedMxList in {
defm "" : LMULWriteResMX<"WriteVICALUI", [SMX60_VIEU], mx, IsWorstCase>;
}
- let Latency = Get461018Latency<mx>.c, ReleaseAtCycles = [4] in {
+ // Slightly increase Occ when LMUL == M8
+ defvar VICmpCarryOcc = GetLMULValue<[1, 1, 1, 2, 4, 8, 18], mx>.c;
+ let Latency = Get461018Latency<mx>.c, ReleaseAtCycles = [VICmpCarryOcc] in {
defm "" : LMULWriteResMX<"WriteVICALUMV", [SMX60_VIEU], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVICALUMX", [SMX60_VIEU], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVICALUMI", [SMX60_VIEU], mx, IsWorstCase>;
@@ -458,10 +467,14 @@ foreach mx = SchedMxList in {
defm "" : LMULWriteResMX<"WriteVICmpI", [SMX60_VIEU], mx, IsWorstCase>;
}
- // Pattern of vmacc, vmadd, vmul, vmulh, etc.: e8/e16 = 4/4/5/8, e32 = 5,5,5,8,
+ // Latency of vmacc, vmadd, vmul, vmulh, etc.: e8/e16 = 4/4/5/8, e32 = 5,5,5,8,
// e64 = 7,8,16,32. We use the worst-case until we can split the SEW.
// TODO: change WriteVIMulV, etc to be defined with LMULSEWSchedWrites
- let Latency = ConstValueUntilLMULThenDoubleBase<"M2", 7, 8, mx>.c, ReleaseAtCycles = [7] in {
+ defvar VIMulLat = ConstValueUntilLMULThenDoubleBase<"M2", 7, 8, mx>.c;
+ // ReleaseAtCycles for vnmsac/vnmsub is 1/1/1/1/2/5 but we use the worse case
+ // here since they are grouped together with vmacc/vmadd/vmul/vmulh.
+ defvar VIMulOcc = ConstOneUntilM1ThenDouble<mx>.c;
+ let Latency = VIMulLat, ReleaseAtCycles = [VIMulOcc] in {
defm "" : LMULWriteResMX<"WriteVIMulV", [SMX60_VIEU], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVIMulX", [SMX60_VIEU], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVIMulAddV", [SMX60_VIEU], mx, IsWorstCase>;
@@ -475,7 +488,8 @@ foreach mx = SchedMxList in {
foreach mx = SchedMxListW in {
defvar IsWorstCase = SMX60IsWorstCaseMX<mx, SchedMxListW>.c;
- let Latency = Get4588Latency<mx>.c, ReleaseAtCycles = [4] in {
+ defvar VIWideningOcc = ConstOneUntilMF2ThenDouble<mx>.c;
+ let Latency = Get4588Latency<mx>.c, ReleaseAtCycles = [VIWideningOcc] in {
defm "" : LMULWriteResMX<"WriteVIWALUV", [SMX60_VIEU], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVIWALUX", [SMX60_VIEU], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVIWALUI", [SMX60_VIEU], mx, IsWorstCase>;
@@ -497,8 +511,9 @@ foreach mx = SchedMxList in {
foreach sew = SchedSEWSet<mx>.val in {
defvar IsWorstCase = SMX60IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
- defvar VIDivLat = ConstValueUntilLMULThenDouble<"MF2", 12, mx>.c;
- let Latency = VIDivLat, ReleaseAtCycles = [12] in {
+ // Not pipelined
+ defvar VIDivLatAndOcc = ConstValueUntilLMULThenDouble<"MF2", 12, mx>.c;
+ let Latency = VIDivLatAndOcc, ReleaseAtCycles = [VIDivLatAndOcc] in {
defm "" : LMULSEWWriteResMXSEW<"WriteVIDivV", [SMX60_VIEU], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVIDivX", [SMX60_VIEU], mx, sew, IsWorstCase>;
}
@@ -510,7 +525,8 @@ foreach mx = SchedMxListW in {
defvar IsWorstCase = SMX60IsWorstCaseMX<mx, SchedMxListW>.c;
defvar VNarrowingLat = ConstValueUntilLMULThenDouble<"M1", 4, mx>.c;
- let Latency = VNarrowingLat, ReleaseAtCycles = [4] in {
+ defvar VNarrowingOcc = ConstValueUntilLMULThenDouble<"MF4", 1, mx>.c;
+ let Latency = VNarrowingLat, ReleaseAtCycles = [VNarrowingOcc] in {
defm "" : LMULWriteResMX<"WriteVNShiftV", [SMX60_VIEU], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVNShiftX", [SMX60_VIEU], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVNShiftI", [SMX60_VIEU], mx, IsWorstCase>;
diff --git a/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/rvv-arithmetic.s b/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/rvv-arithmetic.s
index 5cf5ed575a3e2..7f521013cd795 100644
--- a/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/rvv-arithmetic.s
+++ b/llvm/test/tools/llvm-mca/RISCV/SpacemitX60/rvv-arithmetic.s
@@ -2322,685 +2322,685 @@ vwsub.wx v8, v16, x30
# CHECK: [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, mf2, tu, mu
-# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VADD_VI vadd.vi v8, v8, 12
+# CHECK-NEXT: 1 4 1.00 4 SMX60_VIEU VADD_VI vadd.vi v8, v8, 12
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, mf4, tu, mu
-# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VADD_VI vadd.vi v8, v8, 12
+# CHECK-NEXT: 1 4 1.00 4 SMX60_VIEU VADD_VI vadd.vi v8, v8, 12
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, mf8, tu, mu
-# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VADD_VI vadd.vi v8, v8, 12
+# CHECK-NEXT: 1 4 1.00 4 SMX60_VIEU VADD_VI vadd.vi v8, v8, 12
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, m1, tu, mu
-# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VADD_VI vadd.vi v8, v8, 12
+# CHECK-NEXT: 1 4 2.00 4 SMX60_VIEU[2] VADD_VI vadd.vi v8, v8, 12
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, m2, tu, mu
# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VADD_VI vadd.vi v8, v8, 12
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, m4, tu, mu
-# CHECK-NEXT: 1 8 4.00 8 SMX60_VIEU[4] VADD_VI vadd.vi v8, v8, 12
+# CHECK-NEXT: 1 8 8.00 8 SMX60_VIEU[8] VADD_VI vadd.vi v8, v8, 12
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, m8, tu, mu
-# CHECK-NEXT: 1 16 4.00 16 SMX60_VIEU[4] VADD_VI vadd.vi v8, v8, 12
+# CHECK-NEXT: 1 16 16.00 16 SMX60_VIEU[16] VADD_VI vadd.vi v8, v8, 12
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, mf2, tu, mu
-# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VADD_VI vadd.vi v8, v8, 12
+# CHECK-NEXT: 1 4 1.00 4 SMX60_VIEU VADD_VI vadd.vi v8, v8, 12
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, mf4, tu, mu
-# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VADD_VI vadd.vi v8, v8, 12
+# CHECK-NEXT: 1 4 1.00 4 SMX60_VIEU VADD_VI vadd.vi v8, v8, 12
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m1, tu, mu
-# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VADD_VI vadd.vi v8, v8, 12
+# CHECK-NEXT: 1 4 2.00 4 SMX60_VIEU[2] VADD_VI vadd.vi v8, v8, 12
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m2, tu, mu
# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VADD_VI vadd.vi v8, v8, 12
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m4, tu, mu
-# CHECK-NEXT: 1 8 4.00 8 SMX60_VIEU[4] VADD_VI vadd.vi v8, v8, 12
+# CHECK-NEXT: 1 8 8.00 8 SMX60_VIEU[8] VADD_VI vadd.vi v8, v8, 12
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, m8, tu, mu
-# CHECK-NEXT: 1 16 4.00 16 SMX60_VIEU[4] VADD_VI vadd.vi v8, v8, 12
+# CHECK-NEXT: 1 16 16.00 16 SMX60_VIEU[16] VADD_VI vadd.vi v8, v8, 12
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, mf2, tu, mu
-# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VADD_VI vadd.vi v8, v8, 12
+# CHECK-NEXT: 1 4 1.00 4 SMX60_VIEU VADD_VI vadd.vi v8, v8, 12
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m1, tu, mu
-# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VADD_VI vadd.vi v8, v8, 12
+# CHECK-NEXT: 1 4 2.00 4 SMX60_VIEU[2] VADD_VI vadd.vi v8, v8, 12
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m2, tu, mu
# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VADD_VI vadd.vi v8, v8, 12
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m4, tu, mu
-# CHECK-NEXT: 1 8 4.00 8 SMX60_VIEU[4] VADD_VI vadd.vi v8, v8, 12
+# CHECK-NEXT: 1 8 8.00 8 SMX60_VIEU[8] VADD_VI vadd.vi v8, v8, 12
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e32, m8, tu, mu
-# CHECK-NEXT: 1 16 4.00 16 SMX60_VIEU[4] VADD_VI vadd.vi v8, v8, 12
+# CHECK-NEXT: 1 16 16.00 16 SMX60_VIEU[16] VADD_VI vadd.vi v8, v8, 12
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m1, tu, mu
-# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VADD_VI vadd.vi v8, v8, 12
+# CHECK-NEXT: 1 4 2.00 4 SMX60_VIEU[2] VADD_VI vadd.vi v8, v8, 12
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m2, tu, mu
# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VADD_VI vadd.vi v8, v8, 12
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m4, tu, mu
-# CHECK-NEXT: 1 8 4.00 8 SMX60_VIEU[4] VADD_VI vadd.vi v8, v8, 12
+# CHECK-NEXT: 1 8 8.00 8 SMX60_VIEU[8] VADD_VI vadd.vi v8, v8, 12
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e64, m8, tu, mu
-# CHECK-NEXT: 1 16 4.00 16 SMX60_VIEU[4] VADD_VI vadd.vi v8, v8, 12
+# CHECK-NEXT: 1 16 16.00 16 SMX60_VIEU[16] VADD_VI vadd.vi v8, v8, 12
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, mf2, tu, mu
-# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VADD_VV vadd.vv v8, v8, v8
+# CHECK-NEXT: 1 4 1.00 4 SMX60_VIEU VADD_VV vadd.vv v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, mf4, tu, mu
-# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VADD_VV vadd.vv v8, v8, v8
+# CHECK-NEXT: 1 4 1.00 4 SMX60_VIEU VADD_VV vadd.vv v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, mf8, tu, mu
-# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VADD_VV vadd.vv v8, v8, v8
+# CHECK-NEXT: 1 4 1.00 4 SMX60_VIEU VADD_VV vadd.vv v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, m1, tu, mu
-# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VADD_VV vadd.vv v8, v8, v8
+# CHECK-NEXT: 1 4 2.00 4 SMX60_VIEU[2] VADD_VV vadd.vv v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, m2, tu, mu
# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VADD_VV vadd.vv v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, m4, tu, mu
-# CHECK-NEXT: 1 8 4.00 8 SMX60_VIEU[4] VADD_VV vadd.vv v8, v8, v8
+# CHECK-NEXT: 1 8 8.00 8 SMX60_VIEU[8] VADD_VV vadd.vv v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e8, m8, tu, mu
-# CHECK-NEXT: 1 16 4.00 16 SMX60_VIEU[4] VADD_VV vadd.vv v8, v8, v8
+# CHECK-NEXT: 1 16 16.00 16 SMX60_VIEU[16] VADD_VV vadd.vv v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, mf2, tu, mu
-# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VADD_VV vadd.vv v8, v8, v8
+# CHECK-NEXT: 1 4 1.00 4 SMX60_VIEU VADD_VV vadd.vv v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, e16, mf4, tu, mu
-# CHECK-NEXT: 1 4 4.00 4 SMX60_VIEU[4] VADD_VV vadd.vv v8, v8, v8
+# CHECK-NEXT: 1 4 1.00 4 SMX60_VIEU VADD_VV vadd.vv v8, v8, v8
# CHECK-NEXT: 1 1 1.00 U 1 SMX60_IEU,SMX60_IEUA VSETVLI vsetvli t3, zero, ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/152557
More information about the llvm-commits
mailing list