[llvm] [RISCV] Update SiFive P600's scheduling model on RVV instructions (PR #115243)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 6 16:33:16 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Min-Yih Hsu (mshockwave)
<details>
<summary>Changes</summary>
The biggest change is assigning vector crypto instructions to the correct processor resource.
The majority of these changed are guided by our RVV-capable llvm-exegesis.
---
Patch is 211.92 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/115243.diff
9 Files Affected:
- (modified) llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td (+461-117)
- (added) llvm/test/tools/llvm-mca/RISCV/SiFiveP600/mask.s (+129)
- (added) llvm/test/tools/llvm-mca/RISCV/SiFiveP600/vmv.s (+816)
- (modified) llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvbb.s (+169-169)
- (modified) llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvbc.s (+23-23)
- (modified) llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvkg.s (+27-27)
- (modified) llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvkned.s (+59-59)
- (modified) llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvknhb.s (+35-35)
- (modified) llvm/test/tools/llvm-mca/RISCV/SiFiveP600/zvksh.s (+16-16)
``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td
index 0543b999fd647d..c2d93d4c0a7f0a 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td
@@ -50,6 +50,240 @@ class SiFiveP600GetCyclesSegmented<string mx, int sew, int nf> {
int c = !mul(VLUpperBound, nf);
}
+class SiFiveP600VSM3CCycles<string mx> {
+ // c = ceil(LMUL / 2)
+ int c = !cond(!eq(mx, "M2") : 1,
+ !eq(mx, "M4") : 2,
+ !eq(mx, "M8") : 4,
+ true : 1);
+}
+
+class SiFiveP600RVVMultiplier<string mx> {
+ int c = !if(!eq(mx, "M8"), 2, 1);
+}
+
+// ======================================================================
+// The latency and occupancy data in this section are primarily evaluated
+// from llvm-exegesis.
+// ======================================================================
+
+class SiFiveP600VCryptoLatency<string mx> {
+ int c = !cond(
+ !eq(mx, "M4"): 4,
+ !eq(mx, "M8"): 8,
+ true: 2
+ );
+}
+
+class SiFiveP600VFMinMaxReduction<string mx, int sew> {
+ defvar E64Lat = !cond(
+ !eq(mx, "M1") : 4,
+ !eq(mx, "M2") : 6,
+ !eq(mx, "M4") : 8,
+ !eq(mx, "M8") : 10,
+ true: 2
+ );
+
+ defvar E64Cycles = !cond(
+ !eq(mx, "M1") : 3,
+ !eq(mx, "M2") : 4,
+ !eq(mx, "M4") : 5,
+ !eq(mx, "M8") : 6,
+ true: 2
+ );
+
+ int latency = !if(!eq(sew, 64), E64Lat, !add(E64Lat, 2));
+ int cycles = !if(!eq(sew, 64), E64Cycles, !add(E64Cycles, 1));
+}
+
+class SiFiveP600VFUnorderedReduction<string mx, int sew> {
+ defvar E64Lat = !cond(
+ !eq(mx, "M1") : 6,
+ !eq(mx, "M2") : 12,
+ !eq(mx, "M4") : 18,
+ !eq(mx, "M8") : 24,
+ true: 2
+ );
+
+ defvar E32Cycles = !cond(
+ !eq(mx, "M1") : 10,
+ !eq(mx, "M2") : 10,
+ !eq(mx, "M4") : 11,
+ !eq(mx, "M8") : 11,
+ true: 6
+ );
+
+ int latency = !if(!eq(sew, 64), E64Lat, !add(E64Lat, 4));
+ int cycles = !if(!eq(sew, 64), 6, E32Cycles);
+}
+
+class SiFiveP600VFWidenUnorderedReduction<string mx> {
+ int latency = !cond(
+ !eq(mx, "M1") : 10,
+ !eq(mx, "M2") : 18,
+ !eq(mx, "M4") : 24,
+ !eq(mx, "M8") : 30,
+ true: 6
+ );
+}
+
+class SiFiveP600VFOrderedReduction<string mx, int sew> {
+ defvar Base = !if(!eq(sew, 64), 6, 10);
+ int c = !cond(
+ !eq(mx, "M1") : Base,
+ !eq(mx, "M2") : !mul(Base, 2),
+ !eq(mx, "M4") : !mul(Base, 4),
+ !eq(mx, "M8") : !mul(Base, 8),
+ true: 6
+ );
+}
+
+class SiFiveP600VIReductionLatency<string mx> {
+ int c = !cond(
+ !eq(mx, "M2") : 4,
+ !eq(mx, "M4") : 8,
+ !eq(mx, "M8") : 16,
+ // M1 and lower
+ true: 2
+ );
+}
+
+class SiFiveP600VIMinMaxReductionLatency<string mx, int sew> {
+ // +-----+-----+-----+-----+----+
+ // | | E64 | E32 | E16 | E8 |
+ // +-----+-----+-----+-----+----+
+ // | MF8 | X | X | X | 4 |
+ // +-----+-----+-----+-----+----+
+ // | MF4 | X | X | 4 | 6 |
+ // +-----+-----+-----+-----+----+
+ // | MF2 | X | 4 | 6 | 8 |
+ // +-----+-----+-----+-----+----+
+ // | M1 | 4 | 6 | 8 | 10 |
+ // +-----+-----+-----+-----+----+
+ // | M2 | 6 | 8 | 10 | 12 |
+ // +-----+-----+-----+-----+----+
+ // | M4 | 8 | 10 | 12 | 14 |
+ // +-----+-----+-----+-----+----+
+ // | M8 | 10 | 12 | 14 | 16 |
+ // +-----+-----+-----+-----+----+
+ defvar BaseIndex = !cond(
+ !eq(sew, 64): 0,
+ !eq(sew, 32): 1,
+ !eq(sew, 16): 2,
+ !eq(sew, 8): 3
+ );
+
+ defvar Latencies = [4, 6, 8, 10, 12, 14, 16];
+
+ int c = !cond(
+ !eq(mx, "M1") : Latencies[BaseIndex],
+ !eq(mx, "M2") : Latencies[!add(BaseIndex, 1)],
+ !eq(mx, "M4") : Latencies[!add(BaseIndex, 2)],
+ !eq(mx, "M8") : Latencies[!add(BaseIndex, 3)],
+ // Fractional
+ !eq(mx, "MF2"): Latencies[!sub(BaseIndex, 1)],
+ !eq(mx, "MF4"): Latencies[!sub(BaseIndex, 2)],
+ !eq(mx, "MF8"): Latencies[!sub(BaseIndex, 3)],
+ );
+}
+
+class SiFiveP600VIMinMaxReductionCycles<string mx, int sew> {
+ // +-----+-----+-----+-----+----+
+ // | | E64 | E32 | E16 | E8 |
+ // +-----+-----+-----+-----+----+
+ // | MF8 | X | X | X | 3 |
+ // +-----+-----+-----+-----+----+
+ // | MF4 | X | X | 3 | 5 |
+ // +-----+-----+-----+-----+----+
+ // | MF2 | X | 3 | 5 | 6 |
+ // +-----+-----+-----+-----+----+
+ // | M1 | 3 | 4 | 6 | 8 |
+ // +-----+-----+-----+-----+----+
+ // | M2 | 4 | 5 | 8 | 9 |
+ // +-----+-----+-----+-----+----+
+ // | M4 | 5 | 6 | 10 | 11 |
+ // +-----+-----+-----+-----+----+
+ // | M8 | 7 | 8 | 9 | 11 |
+ // +-----+-----+-----+-----+----+
+ defvar Index = !cond(
+ !eq(sew, 64): 0,
+ !eq(sew, 32): 1,
+ !eq(sew, 16): 2,
+ !eq(sew, 8): 3
+ );
+
+ defvar Cycles = [
+ [0, 0, 0, 3],
+ [0, 0, 3, 5],
+ [0, 3, 5, 6],
+ [3, 4, 6, 8],
+ [4, 5, 8, 9],
+ [5, 6, 10, 11],
+ [7, 8, 9, 11]
+ ];
+
+ int c = !cond(
+ !eq(mx, "MF8"): Cycles[0][Index],
+ !eq(mx, "MF4"): Cycles[1][Index],
+ !eq(mx, "MF2"): Cycles[2][Index],
+ !eq(mx, "M1"): Cycles[3][Index],
+ !eq(mx, "M2"): Cycles[4][Index],
+ !eq(mx, "M4"): Cycles[5][Index],
+ !eq(mx, "M8"): Cycles[6][Index],
+ );
+}
+
+class SiFiveP600VSlide1<string mx> {
+ int c = !cond(
+ !eq(mx, "M2") : 3,
+ !eq(mx, "M4") : 4,
+ !eq(mx, "M8") : 8,
+ // M1 and lower
+ true: 2
+ );
+}
+
+class SiFiveP600VSlideI<string mx> {
+ int c = !cond(
+ !eq(mx, "M2") : 4,
+ !eq(mx, "M4") : 6,
+ !eq(mx, "M8") : 8,
+ // M1 and lower
+ true: 2
+ );
+}
+
+class SiFiveP600VSlideXComplex<string mx, bit isUp = false> {
+ int latency = !cond(
+ !eq(mx, "M2") : 11,
+ !eq(mx, "M4") : 14,
+ !eq(mx, "M8") : 20
+ );
+
+ int cycles = !cond(
+ !eq(mx, "M2") : !if(isUp, 10, 11),
+ !eq(mx, "M4") : !if(isUp, 12, 14),
+ !eq(mx, "M8") : !if(isUp, 16, 20)
+ );
+}
+
+class SiFiveP600VPermutationComplex<string mx> {
+ int c = !cond(
+ !eq(mx, "M2") : 12,
+ !eq(mx, "M4") : 16,
+ !eq(mx, "M8") : 24
+ );
+}
+
+class SiFiveP600VSHA2MSCycles<string mx, int sew> {
+ int c = !cond(
+ !eq(mx, "M2") : !if(!eq(sew, 32), 2, 3),
+ !eq(mx, "M4") : !if(!eq(sew, 32), 4, 6),
+ !eq(mx, "M8") : !if(!eq(sew, 32), 8, 12),
+ true: 1
+ );
+}
+
// SiFiveP600 machine model for scheduling and other instruction cost heuristics.
def SiFiveP600Model : SchedMachineModel {
let IssueWidth = 4; // 4 micro-ops are dispatched per cycle.
@@ -95,6 +329,12 @@ def SiFiveP600FloatDiv : ProcResource<1>;
def SiFiveP600VEXQ0 : ProcResource<1>;
def SiFiveP600VEXQ1 : ProcResource<1>;
def SiFiveP600VectorArith : ProcResGroup<[SiFiveP600VEXQ0, SiFiveP600VEXQ1]>;
+
+// Only VEXQ0 has mask unit.
+defvar SiFiveP600VectorMask = SiFiveP600VEXQ0;
+// Only VEXQ0 has vector crypto.
+defvar SiFiveP600VectorCrypto = SiFiveP600VEXQ0;
+
def SiFiveP600VLD : ProcResource<1>;
def SiFiveP600VST : ProcResource<1>;
def SiFiveP600VDiv : ProcResource<1>;
@@ -386,7 +626,7 @@ foreach LMul = [1, 2, 4, 8] in {
def : WriteRes<!cast<SchedWrite>("WriteVLD" # LMul # "R"), [SiFiveP600VLD]>;
def : WriteRes<!cast<SchedWrite>("WriteVST" # LMul # "R"), [SiFiveP600VST]>;
}
- let Latency = LMul, ReleaseAtCycles = [LMul] in {
+ let Latency = 2, ReleaseAtCycles = [LMul] in {
def : WriteRes<!cast<SchedWrite>("WriteVMov" # LMul # "V"), [SiFiveP600VectorArith]>;
}
}
@@ -395,37 +635,42 @@ foreach LMul = [1, 2, 4, 8] in {
foreach mx = SchedMxList in {
defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c;
defvar IsWorstCase = SiFiveP600IsWorstCaseMX<mx, SchedMxList>.c;
- let Latency = 1, ReleaseAtCycles = [LMulLat] in {
- defm "" : LMULWriteResMX<"WriteVIALUV", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIALUX", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIALUI", [SiFiveP600VectorArith], mx, IsWorstCase>;
+ let Latency = 2, ReleaseAtCycles = [LMulLat] in {
defm "" : LMULWriteResMX<"WriteVExtV", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVICALUV", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVICALUX", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVICALUI", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVICALUMV", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVICALUMX", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVICALUMI", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVICmpV", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVICmpX", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVICmpI", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIMergeV", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIMergeX", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIMergeI", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIMovV", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIMovX", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIMovI", [SiFiveP600VectorArith], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVICmpV", [SiFiveP600VectorMask], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVICmpX", [SiFiveP600VectorMask], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVICmpI", [SiFiveP600VectorMask], mx, IsWorstCase>;
}
- let Latency = 6, ReleaseAtCycles = [LMulLat] in {
- defm "" : LMULWriteResMX<"WriteVShiftV", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVShiftX", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVShiftI", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIMinMaxV", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIMinMaxX", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIMulV", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIMulX", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIMulAddV", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVIMulAddX", [SiFiveP600VectorArith], mx, IsWorstCase>;
+ let ReleaseAtCycles = [LMulLat] in {
+ let Latency = 6 in {
+ defm "" : LMULWriteResMX<"WriteVIMulV", [SiFiveP600VectorArith], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMulX", [SiFiveP600VectorArith], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMulAddV", [SiFiveP600VectorArith], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMulAddX", [SiFiveP600VectorArith], mx, IsWorstCase>;
+ }
+
+ let Latency = !mul(2, SiFiveP600RVVMultiplier<mx>.c) in {
+ defm "" : LMULWriteResMX<"WriteVIALUV", [SiFiveP600VectorArith], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIALUX", [SiFiveP600VectorArith], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIALUI", [SiFiveP600VectorArith], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVICALUV", [SiFiveP600VectorArith], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVICALUX", [SiFiveP600VectorArith], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVICALUI", [SiFiveP600VectorArith], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVICALUMV", [SiFiveP600VectorMask], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVICALUMX", [SiFiveP600VectorMask], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVICALUMI", [SiFiveP600VectorMask], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMergeV", [SiFiveP600VectorArith], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMergeX", [SiFiveP600VectorArith], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMergeI", [SiFiveP600VectorArith], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMovX", [SiFiveP600VectorArith], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMovI", [SiFiveP600VectorArith], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVShiftI", [SiFiveP600VectorArith], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVShiftV", [SiFiveP600VectorArith], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVShiftX", [SiFiveP600VectorArith], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMinMaxV", [SiFiveP600VectorArith], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMinMaxX", [SiFiveP600VectorArith], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMovV", [SiFiveP600VectorArith], mx, IsWorstCase>;
+ }
}
}
// Widening
@@ -440,7 +685,26 @@ foreach mx = SchedMxListW in {
defm "" : LMULWriteResMX<"WriteVIWMulX", [SiFiveP600VectorArith], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVIWMulAddV", [SiFiveP600VectorArith], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVIWMulAddX", [SiFiveP600VectorArith], mx, IsWorstCase>;
+
+ // Special case for variants with widen operands.
+ let ReleaseAtCycles = [!mul(LMulLat, 2)] in
+ def P600WriteVIWALUWidenOp_ # mx : SchedWriteRes<[SiFiveP600VectorArith]>;
}
+
+ defvar P600VIWALUBaseSchedRW = [!cast<SchedWrite>("P600WriteVIWALUWidenOp_" # mx),
+ !cast<SchedRead>("ReadVPassthru_" # mx),
+ !cast<SchedRead>("ReadVIALUV_" # mx),
+ !cast<SchedRead>("ReadVIALUV_" # mx)];
+
+ def : InstRW<P600VIWALUBaseSchedRW,
+ (instregex "^PseudoVW(ADD|SUB)[U]?_W(V|X)_" # mx # "$")>;
+ def : InstRW<P600VIWALUBaseSchedRW[0,2,3],
+ (instregex "^PseudoVW(ADD|SUB)[U]?_WV_" # mx # "_TIED$")>;
+
+ def : InstRW<!listconcat(P600VIWALUBaseSchedRW, [!cast<SchedRead>("ReadVMask")]),
+ (instregex "^PseudoVW(ADD|SUB)[U]?_W(V|X)_" # mx # "_MASK$")>;
+ def : InstRW<!listconcat(P600VIWALUBaseSchedRW[0,1,3], [!cast<SchedRead>("ReadVMask")]),
+ (instregex "^PseudoVW(ADD|SUB)[U]?_WV_" # mx # "_MASK_TIED$")>;
}
// Worst case needs 51/45/42/72 * lmul cycles for i8/16/32/64.
@@ -504,34 +768,34 @@ foreach mx = SchedMxListF in {
defm "" : LMULSEWWriteResMXSEW<"WriteVFMulAddV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVFMulAddF", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
}
- let Latency = 2, ReleaseAtCycles = [LMulLat] in
- defm "" : LMULSEWWriteResMXSEW<"WriteVFRecpV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
- let Latency = 3, ReleaseAtCycles = [LMulLat] in
- defm "" : LMULSEWWriteResMXSEW<"WriteVFCvtIToFV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
}
}
foreach mx = SchedMxListF in {
foreach sew = SchedSEWSet<mx, isF=1>.val in {
defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c;
- defvar IsWorstCase = SiFiveP600IsWorstCaseMXSEW<mx, sew, SchedMxList, isF=1>.c;
- let Latency = 1, ReleaseAtCycles = [LMulLat] in {
+ defvar IsWorstCase = SiFiveP600IsWorstCaseMXSEW<mx, sew, SchedMxListF, isF=1>.c;
+ let Latency = !mul(2, SiFiveP600RVVMultiplier<mx>.c), ReleaseAtCycles = [LMulLat] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFRecpV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFSgnjV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFSgnjF", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVFMinMaxV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVFMinMaxF", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
- defm "" : LMULSEWWriteResMXSEW<"WriteVFSgnjV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
- defm "" : LMULSEWWriteResMXSEW<"WriteVFSgnjF", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
}
+ let Latency = !if(!eq(mx, "M8"), 4, 3), ReleaseAtCycles = [!if(!eq(LMulLat, 1), 2, LMulLat)] in
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFCvtIToFV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
}
}
foreach mx = SchedMxList in {
defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c;
defvar IsWorstCase = SiFiveP600IsWorstCaseMX<mx, SchedMxList>.c;
- let Latency = 3, ReleaseAtCycles = [LMulLat] in
+ let Latency = !if(!eq(mx, "M8"), 4, 3), ReleaseAtCycles = [!if(!eq(LMulLat, 1), 2, LMulLat)] in
defm "" : LMULWriteResMX<"WriteVFCvtFToIV", [SiFiveP600VectorArith], mx, IsWorstCase>;
let Latency = 2, ReleaseAtCycles = [LMulLat] in {
- defm "" : LMULWriteResMX<"WriteVFCmpV", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFCmpF", [SiFiveP600VectorArith], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFCmpV", [SiFiveP600VectorMask], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFCmpF", [SiFiveP600VectorMask], mx, IsWorstCase>;
}
- let Latency = 1, ReleaseAtCycles = [LMulLat] in {
+ let Latency = !mul(2, SiFiveP600RVVMultiplier<mx>.c),
+ ReleaseAtCycles = [!if(!eq(LMulLat, 1), 2, LMulLat)] in {
defm "" : LMULWriteResMX<"WriteVFClassV", [SiFiveP600VectorArith], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVFMergeV", [SiFiveP600VectorArith], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVFMovV", [SiFiveP600VectorArith], mx, IsWorstCase>;
@@ -565,7 +829,31 @@ foreach mx = SchedMxListFW in {
defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulAddV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulAddF", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVFWCvtFToFV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
+
+ // Special case for variants with widen operands.
+ let ReleaseAtCycles = [!mul(LMulLat, 2)] in
+ def P600WriteVFWALUWidenOp_ # mx # _E # sew : SchedWriteRes<[SiFiveP600VectorArith]>;
}
+
+ defvar P600VFWALUBaseSchedRW = [!cast<SchedWrite>("P600WriteVFWALUWidenOp_" # mx # "_E" # sew),
+ !cast<SchedRead>("ReadVPassthru_" # mx # "_E" # sew),
+ !cast<SchedRead>("ReadVFWALUV_" # mx # "_E" # sew)];
+
+ def : InstRW<!listconcat(P600VFWALUBaseSchedRW, [!cast<SchedRead>("ReadVFWALUV_" # mx # "_E" # sew)]),
+ (instregex "^PseudoVFW(ADD|SUB)_WV_" # mx # "_E" # sew # "$")>;
+ def : InstRW<[P600VFWALUBaseSchedRW[0], P600VFWALUBaseSchedRW[2], !cast<SchedRead>("ReadVFWALUV_" # mx # "_E" # sew)],
+ (instregex "^PseudoVFW(ADD|SUB)_WV_" # mx # "_E" # sew # "_TIED$")>;
+
+ def : InstRW<!listconcat(P600VFWALUBaseSchedRW, [!cast<SchedRead>("ReadVFWALUF_" # mx # "_E" # sew)]),
+ (instregex "^PseudoVFW(ADD|SUB)_WFPR" # sew # "_" # mx # "_E" # sew # "$")>;
+
+ def : InstRW<!listconcat(P600VFWALUBaseSchedRW, [!cast<SchedRead>("ReadVFWALUV_" # mx # "_E" # sew), !cast<SchedRead>("ReadVMask")]),
+ (instregex "^PseudoVFW(ADD|SUB)_WV_" # mx # "_E" # sew # "_MASK$")>;
+ def : InstRW<[P600VFWALUBaseSchedRW[0], P600VFWALUBaseSchedRW[1], !cast<SchedRead>("ReadVFWALUV_" # mx # "_E" # sew), !cast<SchedRead>("ReadVMask")],
+ (instregex "^PseudoVFW(ADD|SUB)_WV_" # mx # "_E" # sew # "_MASK_TIED$")>;
+
+ def : InstRW<!listconcat(P600VFWALUBaseSchedRW, [!cast<SchedRead>("ReadVFWALUF_" # mx # "_E" # sew), !cast<SchedRead>("ReadVMask")]),
+ (instregex "^PseudoVFW(ADD|SUB)_WFPR" # sew # "_" # mx # "_E" # sew # "_MASK$")>;
}
}
// Narrowing
@@ -580,7 +868,7 @@ foreach mx = SchedMxListFW in {
foreach sew = SchedSEWSet<mx, isF=1, isWidening=1>.val in {
defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c;
defvar IsWorstCase = SiFiveP600IsWorstCaseMXSEW<mx, sew, SchedMxListFW, isF=1>.c;
- let Latency = 3, ReleaseAtCycles = [LMulLat] in {
+ let Latency = 3, ReleaseAtCycles = [!if(!eq(LMulLat, 1), 2, LMulLat)] in {
defm "" : LMULSEWWriteResMXSEW<"WriteVFNCvtIToFV", [SiF...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/115243
More information about the llvm-commits
mailing list