[llvm] [RISCV] Split vector FP pseudo instructions by SEW. NFC. (PR #87686)
Michael Maitland via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 9 11:18:54 PDT 2024
https://github.com/michaelmaitland updated https://github.com/llvm/llvm-project/pull/87686
>From 7ddebab9ee017f9b57c2260476b9ca9737f7543f Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Thu, 4 Apr 2024 06:55:47 -0700
Subject: [PATCH 1/6] [RISCV] Split PseudoVFADD, PseudoVFSUB, and PseudoVFRSUB
by SEW
Co-authored-by: Wang Pengcheng <wangpengcheng.pp at bytedance.com>
---
.../Target/RISCV/RISCVInstrInfoVPseudos.td | 26 ++++++++++---------
.../Target/RISCV/RISCVInstrInfoVSDPatterns.td | 6 ++---
.../Target/RISCV/RISCVInstrInfoVVLPatterns.td | 6 ++---
llvm/lib/Target/RISCV/RISCVSchedSiFive7.td | 16 +++++++++---
llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td | 16 +++++++++---
llvm/lib/Target/RISCV/RISCVScheduleV.td | 16 ++++++------
6 files changed, 52 insertions(+), 34 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index af20b11514ca1f..b4489ceb7cde50 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -2912,16 +2912,17 @@ multiclass VPseudoVMAX_VV_VF {
multiclass VPseudoVALU_VV_VF_RM {
foreach m = MxListF in {
- defm "" : VPseudoBinaryFV_VV_RM<m>,
- SchedBinary<"WriteVFALUV", "ReadVFALUV", "ReadVFALUV", m.MX,
- forceMergeOpRead=true>;
+ foreach e = SchedSEWSet<m.MX, isF=1>.val in
+ defm "" : VPseudoBinaryFV_VV_RM<m, "", sew=e>,
+ SchedBinary<"WriteVFALUV", "ReadVFALUV", "ReadVFALUV", m.MX, e,
+ forceMergeOpRead=true>;
}
foreach f = FPList in {
foreach m = f.MxList in {
- defm "" : VPseudoBinaryV_VF_RM<m, f>,
+ defm "" : VPseudoBinaryV_VF_RM<m, f, "", sew=f.SEW>,
SchedBinary<"WriteVFALUF", "ReadVFALUV", "ReadVFALUF", m.MX,
- forceMergeOpRead=true>;
+ f.SEW, forceMergeOpRead=true>;
}
}
}
@@ -2929,9 +2930,9 @@ multiclass VPseudoVALU_VV_VF_RM {
multiclass VPseudoVALU_VF_RM {
foreach f = FPList in {
foreach m = f.MxList in {
- defm "" : VPseudoBinaryV_VF_RM<m, f>,
+ defm "" : VPseudoBinaryV_VF_RM<m, f, "", sew=f.SEW>,
SchedBinary<"WriteVFALUF", "ReadVFALUV", "ReadVFALUF", m.MX,
- forceMergeOpRead=true>;
+ f.SEW, forceMergeOpRead=true>;
}
}
}
@@ -7056,11 +7057,12 @@ defm : VPatBinaryV_WV_WX_WI_RM<"int_riscv_vnclip", "PseudoVNCLIP",
//===----------------------------------------------------------------------===//
// 13.2. Vector Single-Width Floating-Point Add/Subtract Instructions
//===----------------------------------------------------------------------===//
-defm : VPatBinaryV_VV_VX_RM<"int_riscv_vfadd", "PseudoVFADD",
- AllFloatVectors>;
-defm : VPatBinaryV_VV_VX_RM<"int_riscv_vfsub", "PseudoVFSUB",
- AllFloatVectors>;
-defm : VPatBinaryV_VX_RM<"int_riscv_vfrsub", "PseudoVFRSUB", AllFloatVectors>;
+defm : VPatBinaryV_VV_VX_RM<"int_riscv_vfadd", "PseudoVFADD", AllFloatVectors,
+ isSEWAware = 1>;
+defm : VPatBinaryV_VV_VX_RM<"int_riscv_vfsub", "PseudoVFSUB", AllFloatVectors,
+ isSEWAware = 1>;
+defm : VPatBinaryV_VX_RM<"int_riscv_vfrsub", "PseudoVFRSUB", AllFloatVectors,
+ isSEWAware = 1>;
//===----------------------------------------------------------------------===//
// 13.3. Vector Widening Floating-Point Add/Subtract Instructions
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
index b4c6ba7e9723df..35ff98c506bd40 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
@@ -1204,9 +1204,9 @@ foreach mti = AllMasks in {
// 13. Vector Floating-Point Instructions
// 13.2. Vector Single-Width Floating-Point Add/Subtract Instructions
-defm : VPatBinaryFPSDNode_VV_VF_RM<any_fadd, "PseudoVFADD">;
-defm : VPatBinaryFPSDNode_VV_VF_RM<any_fsub, "PseudoVFSUB">;
-defm : VPatBinaryFPSDNode_R_VF_RM<any_fsub, "PseudoVFRSUB">;
+defm : VPatBinaryFPSDNode_VV_VF_RM<any_fadd, "PseudoVFADD", isSEWAware=1>;
+defm : VPatBinaryFPSDNode_VV_VF_RM<any_fsub, "PseudoVFSUB", isSEWAware=1>;
+defm : VPatBinaryFPSDNode_R_VF_RM<any_fsub, "PseudoVFRSUB", isSEWAware=1>;
// 13.3. Vector Widening Floating-Point Add/Subtract Instructions
defm : VPatWidenBinaryFPSDNode_VV_VF_WV_WF_RM<fadd, "PseudoVFWADD">;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index 73d52d5ecafb5d..fc6f68f5e14c94 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -2425,9 +2425,9 @@ foreach vtiToWti = AllWidenableIntVectors in
// 13. Vector Floating-Point Instructions
// 13.2. Vector Single-Width Floating-Point Add/Subtract Instructions
-defm : VPatBinaryFPVL_VV_VF_RM<any_riscv_fadd_vl, "PseudoVFADD">;
-defm : VPatBinaryFPVL_VV_VF_RM<any_riscv_fsub_vl, "PseudoVFSUB">;
-defm : VPatBinaryFPVL_R_VF_RM<any_riscv_fsub_vl, "PseudoVFRSUB">;
+defm : VPatBinaryFPVL_VV_VF_RM<any_riscv_fadd_vl, "PseudoVFADD", isSEWAware=1>;
+defm : VPatBinaryFPVL_VV_VF_RM<any_riscv_fsub_vl, "PseudoVFSUB", isSEWAware=1>;
+defm : VPatBinaryFPVL_R_VF_RM<any_riscv_fsub_vl, "PseudoVFRSUB", isSEWAware=1>;
// 13.3. Vector Widening Floating-Point Add/Subtract Instructions
defm : VPatBinaryFPWVL_VV_VF_WV_WF_RM<riscv_vfwadd_vl, riscv_vfwadd_w_vl, "PseudoVFWADD">;
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
index 54a13889ee698a..f14766c984391f 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
@@ -732,12 +732,20 @@ foreach mx = SchedMxListW in {
}
// 13. Vector Floating-Point Instructions
+foreach mx = SchedMxListF in {
+ foreach sew = SchedSEWSet<mx, isF=1>.val in {
+ defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
+ defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListF, isF=1>.c;
+ let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFALUV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFALUF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
+ }
+ }
+}
foreach mx = SchedMxList in {
defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
- defm "" : LMULWriteResMX<"WriteVFALUV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFALUF", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVFMulV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVFMulF", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVFMulAddV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
@@ -1137,8 +1145,8 @@ defm "" : LMULReadAdvanceW<"ReadVNClipV", 0>;
defm "" : LMULReadAdvanceW<"ReadVNClipX", 0>;
// 14. Vector Floating-Point Instructions
-defm "" : LMULReadAdvance<"ReadVFALUV", 0>;
-defm "" : LMULReadAdvance<"ReadVFALUF", 0>;
+defm "" : LMULSEWReadAdvanceF<"ReadVFALUV", 0>;
+defm "" : LMULSEWReadAdvanceF<"ReadVFALUF", 0>;
defm "" : LMULReadAdvanceFW<"ReadVFWALUV", 0>;
defm "" : LMULReadAdvanceFW<"ReadVFWALUF", 0>;
defm "" : LMULReadAdvance<"ReadVFMulV", 0>;
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td
index 54016959d348e3..4d81bafdf88b8d 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td
@@ -486,12 +486,20 @@ foreach mx = SchedMxList in {
}
// 13. Vector Floating-Point Instructions
+foreach mx = SchedMxListF in {
+ foreach sew = SchedSEWSet<mx, isF=1>.val in {
+ defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c;
+ defvar IsWorstCase = SiFiveP600IsWorstCaseMXSEW<mx, sew, SchedMxListF, isF=1>.c;
+ let Latency = 6, ReleaseAtCycles = [LMulLat] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFALUV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFALUF", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
+ }
+ }
+}
foreach mx = SchedMxList in {
defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c;
defvar IsWorstCase = SiFiveP600IsWorstCaseMX<mx, SchedMxList>.c;
let Latency = 6, ReleaseAtCycles = [LMulLat] in {
- defm "" : LMULWriteResMX<"WriteVFALUV", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFALUF", [SiFiveP600VectorArith], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVFMulV", [SiFiveP600VectorArith], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVFMulF", [SiFiveP600VectorArith], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVFMulAddV", [SiFiveP600VectorArith], mx, IsWorstCase>;
@@ -925,8 +933,8 @@ defm "" : LMULReadAdvanceW<"ReadVNClipV", 0>;
defm "" : LMULReadAdvanceW<"ReadVNClipX", 0>;
// 14. Vector Floating-Point Instructions
-defm "" : LMULReadAdvance<"ReadVFALUV", 0>;
-defm "" : LMULReadAdvance<"ReadVFALUF", 0>;
+defm "" : LMULSEWReadAdvanceF<"ReadVFALUV", 0>;
+defm "" : LMULSEWReadAdvanceF<"ReadVFALUF", 0>;
defm "" : LMULReadAdvanceFW<"ReadVFWALUV", 0>;
defm "" : LMULReadAdvanceFW<"ReadVFWALUF", 0>;
defm "" : LMULReadAdvance<"ReadVFMulV", 0>;
diff --git a/llvm/lib/Target/RISCV/RISCVScheduleV.td b/llvm/lib/Target/RISCV/RISCVScheduleV.td
index 379622d4ca834c..9b9c72e0767afe 100644
--- a/llvm/lib/Target/RISCV/RISCVScheduleV.td
+++ b/llvm/lib/Target/RISCV/RISCVScheduleV.td
@@ -397,8 +397,8 @@ defm "" : LMULSchedWritesW<"WriteVNClipI">;
// 13. Vector Floating-Point Instructions
// 13.2. Vector Single-Width Floating-Point Add/Subtract Instructions
-defm "" : LMULSchedWrites<"WriteVFALUV">;
-defm "" : LMULSchedWrites<"WriteVFALUF">;
+defm "" : LMULSEWSchedWritesF<"WriteVFALUV">;
+defm "" : LMULSEWSchedWritesF<"WriteVFALUF">;
// 13.3. Vector Widening Floating-Point Add/Subtract Instructions
defm "" : LMULSchedWritesFW<"WriteVFWALUV">;
defm "" : LMULSchedWritesFW<"WriteVFWALUF">;
@@ -622,8 +622,8 @@ defm "" : LMULSchedReadsW<"ReadVNClipX">;
// 13. Vector Floating-Point Instructions
// 13.2. Vector Single-Width Floating-Point Add/Subtract Instructions
-defm "" : LMULSchedReads<"ReadVFALUV">;
-defm "" : LMULSchedReads<"ReadVFALUF">;
+defm "" : LMULSEWSchedReadsF<"ReadVFALUV">;
+defm "" : LMULSEWSchedReadsF<"ReadVFALUF">;
// 13.3. Vector Widening Floating-Point Add/Subtract Instructions
defm "" : LMULSchedReadsFW<"ReadVFWALUV">;
defm "" : LMULSchedReadsFW<"ReadVFWALUF">;
@@ -868,8 +868,8 @@ defm "" : LMULWriteResW<"WriteVNClipX", []>;
defm "" : LMULWriteResW<"WriteVNClipI", []>;
// 13. Vector Floating-Point Instructions
-defm "" : LMULWriteRes<"WriteVFALUV", []>;
-defm "" : LMULWriteRes<"WriteVFALUF", []>;
+defm "" : LMULSEWWriteResF<"WriteVFALUV", []>;
+defm "" : LMULSEWWriteResF<"WriteVFALUF", []>;
defm "" : LMULWriteResFW<"WriteVFWALUV", []>;
defm "" : LMULWriteResFW<"WriteVFWALUF", []>;
defm "" : LMULWriteRes<"WriteVFMulV", []>;
@@ -1024,8 +1024,8 @@ defm "" : LMULReadAdvanceW<"ReadVNClipV", 0>;
defm "" : LMULReadAdvanceW<"ReadVNClipX", 0>;
// 13. Vector Floating-Point Instructions
-defm "" : LMULReadAdvance<"ReadVFALUV", 0>;
-defm "" : LMULReadAdvance<"ReadVFALUF", 0>;
+defm "" : LMULSEWReadAdvanceF<"ReadVFALUV", 0>;
+defm "" : LMULSEWReadAdvanceF<"ReadVFALUF", 0>;
defm "" : LMULReadAdvanceFW<"ReadVFWALUV", 0>;
defm "" : LMULReadAdvanceFW<"ReadVFWALUF", 0>;
defm "" : LMULReadAdvance<"ReadVFMulV", 0>;
>From 8c6e27d8366665bc664b30be37ca7790bdf1ea0d Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Thu, 4 Apr 2024 08:38:32 -0700
Subject: [PATCH 2/6] [RISCV] Split PseudoVFWADD, PseudoVFWSUB, and
PseudoVFWMUL by SEW
Co-authored-by: Wang Pengcheng <wangpengcheng.pp at bytedance.com>
---
llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 46 ++++++-
.../Target/RISCV/RISCVInstrInfoVPseudos.td | 121 ++++++++++--------
.../Target/RISCV/RISCVInstrInfoVSDPatterns.td | 12 +-
.../Target/RISCV/RISCVInstrInfoVVLPatterns.td | 43 ++++---
llvm/lib/Target/RISCV/RISCVSchedSiFive7.td | 24 ++--
llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td | 24 ++--
llvm/lib/Target/RISCV/RISCVScheduleV.td | 44 ++++---
7 files changed, 203 insertions(+), 111 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 5582de51b17d19..26961abd4da130 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -2929,14 +2929,13 @@ MachineInstr *RISCVInstrInfo::commuteInstructionImpl(MachineInstr &MI,
#define CASE_WIDEOP_OPCODE_LMULS(OP) \
CASE_WIDEOP_OPCODE_COMMON(OP, MF8): \
case CASE_WIDEOP_OPCODE_LMULS_MF4(OP)
-// clang-format on
#define CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, LMUL) \
case RISCV::PseudoV##OP##_##LMUL##_TIED: \
NewOpc = RISCV::PseudoV##OP##_##LMUL; \
break;
-#define CASE_WIDEOP_CHANGE_OPCODE_LMULS_MF4(OP) \
+#define CASE_WIDEOP_CHANGE_OPCODE_LMULS_MF4(OP) \
CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF4) \
CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF2) \
CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M1) \
@@ -2947,6 +2946,41 @@ MachineInstr *RISCVInstrInfo::commuteInstructionImpl(MachineInstr &MI,
CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF8) \
CASE_WIDEOP_CHANGE_OPCODE_LMULS_MF4(OP)
+// FP Widening Ops may by SEW aware. Create SEW aware cases for these cases.
+#define CASE_FP_WIDEOP_OPCODE_COMMON(OP, LMUL, SEW) \
+ RISCV::PseudoV##OP##_##LMUL##_##SEW##_TIED
+
+#define CASE_FP_WIDEOP_OPCODE_LMULS_MF4(OP) \
+ CASE_FP_WIDEOP_OPCODE_COMMON(OP, MF4, E16): \
+ case CASE_FP_WIDEOP_OPCODE_COMMON(OP, MF2, E16): \
+ case CASE_FP_WIDEOP_OPCODE_COMMON(OP, MF2, E32): \
+ case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M1, E16): \
+ case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M1, E32): \
+ case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M2, E16): \
+ case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M2, E32): \
+ case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M4, E16): \
+ case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M4, E32) \
+
+#define CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, LMUL, SEW) \
+ case RISCV::PseudoV##OP##_##LMUL##_##SEW##_TIED: \
+ NewOpc = RISCV::PseudoV##OP##_##LMUL##_##SEW; \
+ break;
+
+#define CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_MF4(OP) \
+ CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF4, E16) \
+ CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF2, E16) \
+ CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF2, E32) \
+ CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M1, E16) \
+ CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M1, E32) \
+ CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M2, E16) \
+ CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M2, E32) \
+ CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4, E16) \
+ CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4, E32) \
+
+#define CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS(OP) \
+ CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_MF4(OP)
+// clang-format on
+
MachineInstr *RISCVInstrInfo::convertToThreeAddress(MachineInstr &MI,
LiveVariables *LV,
LiveIntervals *LIS) const {
@@ -2954,8 +2988,8 @@ MachineInstr *RISCVInstrInfo::convertToThreeAddress(MachineInstr &MI,
switch (MI.getOpcode()) {
default:
return nullptr;
- case CASE_WIDEOP_OPCODE_LMULS_MF4(FWADD_WV):
- case CASE_WIDEOP_OPCODE_LMULS_MF4(FWSUB_WV): {
+ case CASE_FP_WIDEOP_OPCODE_LMULS_MF4(FWADD_WV):
+ case CASE_FP_WIDEOP_OPCODE_LMULS_MF4(FWSUB_WV): {
assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags) &&
MI.getNumExplicitOperands() == 7 &&
"Expect 7 explicit operands rd, rs2, rs1, rm, vl, sew, policy");
@@ -2968,8 +3002,8 @@ MachineInstr *RISCVInstrInfo::convertToThreeAddress(MachineInstr &MI,
switch (MI.getOpcode()) {
default:
llvm_unreachable("Unexpected opcode");
- CASE_WIDEOP_CHANGE_OPCODE_LMULS_MF4(FWADD_WV)
- CASE_WIDEOP_CHANGE_OPCODE_LMULS_MF4(FWSUB_WV)
+ CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_MF4(FWADD_WV)
+ CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_MF4(FWSUB_WV)
}
// clang-format on
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index b4489ceb7cde50..27ded64cfb3d24 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -2208,11 +2208,13 @@ multiclass VPseudoTiedBinaryRoundingMode<VReg RetClass,
DAGOperand Op2Class,
LMULInfo MInfo,
string Constraint = "",
+ int sew = 0,
int TargetConstraintType = 1> {
+ defvar suffix = !if(sew, "_" # MInfo.MX # "_E" # sew, "_" # MInfo.MX);
let VLMul = MInfo.value in {
- def "_" # MInfo.MX # "_TIED":
+ def suffix # "_TIED":
VPseudoTiedBinaryNoMaskRoundingMode<RetClass, Op2Class, Constraint, TargetConstraintType>;
- def "_" # MInfo.MX # "_MASK_TIED" :
+ def suffix # "_MASK_TIED" :
VPseudoTiedBinaryMaskRoundingMode<RetClass, Op2Class, Constraint, TargetConstraintType>,
RISCVMaskedPseudo<MaskIdx=2>;
}
@@ -2329,9 +2331,9 @@ multiclass VPseudoBinaryW_VV<LMULInfo m> {
"@earlyclobber $rd", TargetConstraintType=3>;
}
-multiclass VPseudoBinaryW_VV_RM<LMULInfo m> {
+multiclass VPseudoBinaryW_VV_RM<LMULInfo m, int sew = 0> {
defm _VV : VPseudoBinaryRoundingMode<m.wvrclass, m.vrclass, m.vrclass, m,
- "@earlyclobber $rd", UsesVXRM=0,
+ "@earlyclobber $rd", sew, UsesVXRM=0,
TargetConstraintType=3>;
}
@@ -2351,10 +2353,10 @@ multiclass VPseudoBinaryW_VF<LMULInfo m, FPR_Info f> {
"@earlyclobber $rd">;
}
-multiclass VPseudoBinaryW_VF_RM<LMULInfo m, FPR_Info f> {
+multiclass VPseudoBinaryW_VF_RM<LMULInfo m, FPR_Info f, int sew = 0> {
defm "_V" # f.FX : VPseudoBinaryRoundingMode<m.wvrclass, m.vrclass,
f.fprclass, m,
- "@earlyclobber $rd",
+ "@earlyclobber $rd", sew,
UsesVXRM=0,
TargetConstraintType=3>;
}
@@ -2366,11 +2368,13 @@ multiclass VPseudoBinaryW_WV<LMULInfo m> {
"@earlyclobber $rd", TargetConstraintType=3>;
}
-multiclass VPseudoBinaryW_WV_RM<LMULInfo m> {
+multiclass VPseudoBinaryW_WV_RM<LMULInfo m, int sew = 0> {
defm _WV : VPseudoBinaryRoundingMode<m.wvrclass, m.wvrclass, m.vrclass, m,
- "@earlyclobber $rd", UsesVXRM=0, TargetConstraintType=3>;
+ "@earlyclobber $rd", sew, UsesVXRM = 0,
+ TargetConstraintType = 3>;
defm _WV : VPseudoTiedBinaryRoundingMode<m.wvrclass, m.vrclass, m,
- "@earlyclobber $rd", TargetConstraintType=3>;
+ "@earlyclobber $rd", sew,
+ TargetConstraintType = 3>;
}
multiclass VPseudoBinaryW_WX<LMULInfo m> {
@@ -2382,11 +2386,11 @@ multiclass VPseudoBinaryW_WF<LMULInfo m, FPR_Info f, int TargetConstraintType =
f.fprclass, m, /*Constraint*/ "", TargetConstraintType=TargetConstraintType>;
}
-multiclass VPseudoBinaryW_WF_RM<LMULInfo m, FPR_Info f> {
+multiclass VPseudoBinaryW_WF_RM<LMULInfo m, FPR_Info f, int sew = 0> {
defm "_W" # f.FX : VPseudoBinaryRoundingMode<m.wvrclass, m.wvrclass,
f.fprclass, m,
Constraint="",
- sew=0,
+ sew=sew,
UsesVXRM=0,
TargetConstraintType=3>;
}
@@ -2974,16 +2978,17 @@ multiclass VPseudoVWMUL_VV_VX {
multiclass VPseudoVWMUL_VV_VF_RM {
foreach m = MxListFW in {
- defm "" : VPseudoBinaryW_VV_RM<m>,
+ foreach e = SchedSEWSet<m.MX, isF=1, isWidening=1>.val in
+ defm "" : VPseudoBinaryW_VV_RM<m, sew=e>,
SchedBinary<"WriteVFWMulV", "ReadVFWMulV", "ReadVFWMulV", m.MX,
- forceMergeOpRead=true>;
+ e, forceMergeOpRead=true>;
}
foreach f = FPListW in {
foreach m = f.MxListFW in {
- defm "" : VPseudoBinaryW_VF_RM<m, f>,
+ defm "" : VPseudoBinaryW_VF_RM<m, f, sew=f.SEW>,
SchedBinary<"WriteVFWMulF", "ReadVFWMulV", "ReadVFWMulF", m.MX,
- forceMergeOpRead=true>;
+ f.SEW, forceMergeOpRead=true>;
}
}
}
@@ -3002,31 +3007,33 @@ multiclass VPseudoVWALU_WV_WX {
multiclass VPseudoVFWALU_VV_VF_RM {
foreach m = MxListFW in {
- defm "" : VPseudoBinaryW_VV_RM<m>,
- SchedBinary<"WriteVFWALUV", "ReadVFWALUV", "ReadVFWALUV", m.MX,
- forceMergeOpRead=true>;
+ foreach e = SchedSEWSet<m.MX, isF=1, isWidening=1>.val in
+ defm "" : VPseudoBinaryW_VV_RM<m, sew=e>,
+ SchedBinary<"WriteVFWALUV", "ReadVFWALUV", "ReadVFWALUV", m.MX,
+ e, forceMergeOpRead=true>;
}
foreach f = FPListW in {
foreach m = f.MxListFW in {
- defm "" : VPseudoBinaryW_VF_RM<m, f>,
+ defm "" : VPseudoBinaryW_VF_RM<m, f, sew=f.SEW>,
SchedBinary<"WriteVFWALUF", "ReadVFWALUV", "ReadVFWALUF", m.MX,
- forceMergeOpRead=true>;
+ f.SEW, forceMergeOpRead=true>;
}
}
}
multiclass VPseudoVFWALU_WV_WF_RM {
foreach m = MxListFW in {
- defm "" : VPseudoBinaryW_WV_RM<m>,
- SchedBinary<"WriteVFWALUV", "ReadVFWALUV", "ReadVFWALUV", m.MX,
- forceMergeOpRead=true>;
+ foreach e = SchedSEWSet<m.MX, isF=1, isWidening=1>.val in
+ defm "" : VPseudoBinaryW_WV_RM<m, sew=e>,
+ SchedBinary<"WriteVFWALUV", "ReadVFWALUV", "ReadVFWALUV", m.MX,
+ e, forceMergeOpRead=true>;
}
foreach f = FPListW in {
foreach m = f.MxListFW in {
- defm "" : VPseudoBinaryW_WF_RM<m, f>,
+ defm "" : VPseudoBinaryW_WF_RM<m, f, sew=f.SEW>,
SchedBinary<"WriteVFWALUF", "ReadVFWALUV", "ReadVFWALUF", m.MX,
- forceMergeOpRead=true>;
+ f.SEW, forceMergeOpRead=true>;
}
}
}
@@ -5023,13 +5030,16 @@ multiclass VPatBinaryW_VV<string intrinsic, string instruction,
}
multiclass VPatBinaryW_VV_RM<string intrinsic, string instruction,
- list<VTypeInfoToWide> vtilist> {
+ list<VTypeInfoToWide> vtilist, bit isSEWAware = 0> {
foreach VtiToWti = vtilist in {
defvar Vti = VtiToWti.Vti;
defvar Wti = VtiToWti.Wti;
+ defvar name = !if(isSEWAware,
+ instruction # "_VV_" # Vti.LMul.MX # "_E" # Vti.SEW,
+ instruction # "_VV_" # Vti.LMul.MX);
let Predicates = !listconcat(GetVTypePredicates<Vti>.Predicates,
GetVTypePredicates<Wti>.Predicates) in
- defm : VPatBinaryRoundingMode<intrinsic, instruction # "_VV_" # Vti.LMul.MX,
+ defm : VPatBinaryRoundingMode<intrinsic, name,
Wti.Vector, Vti.Vector, Vti.Vector, Vti.Mask,
Vti.Log2SEW, Wti.RegClass,
Vti.RegClass, Vti.RegClass>;
@@ -5052,14 +5062,17 @@ multiclass VPatBinaryW_VX<string intrinsic, string instruction,
}
multiclass VPatBinaryW_VX_RM<string intrinsic, string instruction,
- list<VTypeInfoToWide> vtilist> {
+ list<VTypeInfoToWide> vtilist, bit isSEWAware = 0> {
foreach VtiToWti = vtilist in {
defvar Vti = VtiToWti.Vti;
defvar Wti = VtiToWti.Wti;
defvar kind = "V"#Vti.ScalarSuffix;
+ defvar name = !if(isSEWAware,
+ instruction#"_"#kind#"_"#Vti.LMul.MX # "_E" # Vti.SEW,
+ instruction#"_"#kind#"_"#Vti.LMul.MX);
let Predicates = !listconcat(GetVTypePredicates<Vti>.Predicates,
GetVTypePredicates<Wti>.Predicates) in
- defm : VPatBinaryRoundingMode<intrinsic, instruction#"_"#kind#"_"#Vti.LMul.MX,
+ defm : VPatBinaryRoundingMode<intrinsic, name,
Wti.Vector, Vti.Vector, Vti.Scalar, Vti.Mask,
Vti.Log2SEW, Wti.RegClass,
Vti.RegClass, Vti.ScalarRegClass>;
@@ -5096,27 +5109,30 @@ multiclass VPatBinaryW_WV<string intrinsic, string instruction,
}
multiclass VPatBinaryW_WV_RM<string intrinsic, string instruction,
- list<VTypeInfoToWide> vtilist> {
+ list<VTypeInfoToWide> vtilist, bit isSEWAware = 0> {
foreach VtiToWti = vtilist in {
defvar Vti = VtiToWti.Vti;
defvar Wti = VtiToWti.Wti;
+ defvar name = !if(isSEWAware,
+ instruction # "_WV_" # Vti.LMul.MX # "_E" # Vti.SEW,
+ instruction # "_WV_" # Vti.LMul.MX);
let Predicates = !listconcat(GetVTypePredicates<Vti>.Predicates,
GetVTypePredicates<Wti>.Predicates) in {
- def : VPatTiedBinaryNoMaskRoundingMode<intrinsic, instruction # "_WV_" # Vti.LMul.MX,
+ def : VPatTiedBinaryNoMaskRoundingMode<intrinsic, name,
Wti.Vector, Vti.Vector,
Vti.Log2SEW, Wti.RegClass, Vti.RegClass>;
- def : VPatBinaryNoMaskTURoundingMode<intrinsic, instruction # "_WV_" # Vti.LMul.MX,
+ def : VPatBinaryNoMaskTURoundingMode<intrinsic, name,
Wti.Vector, Wti.Vector, Vti.Vector, Vti.Log2SEW,
Wti.RegClass, Wti.RegClass, Vti.RegClass>;
let AddedComplexity = 1 in {
- def : VPatTiedBinaryNoMaskTURoundingMode<intrinsic, instruction # "_WV_" # Vti.LMul.MX,
+ def : VPatTiedBinaryNoMaskTURoundingMode<intrinsic, name,
Wti.Vector, Vti.Vector,
Vti.Log2SEW, Wti.RegClass, Vti.RegClass>;
- def : VPatTiedBinaryMaskRoundingMode<intrinsic, instruction # "_WV_" # Vti.LMul.MX,
+ def : VPatTiedBinaryMaskRoundingMode<intrinsic, name,
Wti.Vector, Vti.Vector, Vti.Mask,
Vti.Log2SEW, Wti.RegClass, Vti.RegClass>;
}
- def : VPatBinaryMaskTARoundingMode<intrinsic, instruction # "_WV_" # Vti.LMul.MX,
+ def : VPatBinaryMaskTARoundingMode<intrinsic, name,
Wti.Vector, Wti.Vector, Vti.Vector, Vti.Mask,
Vti.Log2SEW, Wti.RegClass,
Wti.RegClass, Vti.RegClass>;
@@ -5140,14 +5156,17 @@ multiclass VPatBinaryW_WX<string intrinsic, string instruction,
}
multiclass VPatBinaryW_WX_RM<string intrinsic, string instruction,
- list<VTypeInfoToWide> vtilist> {
+ list<VTypeInfoToWide> vtilist, bit isSEWAware = 0> {
foreach VtiToWti = vtilist in {
defvar Vti = VtiToWti.Vti;
defvar Wti = VtiToWti.Wti;
defvar kind = "W"#Vti.ScalarSuffix;
+ defvar name = !if(isSEWAware,
+ instruction#"_"#kind#"_"#Vti.LMul.MX#"_E"#Vti.SEW,
+ instruction#"_"#kind#"_"#Vti.LMul.MX);
let Predicates = !listconcat(GetVTypePredicates<Vti>.Predicates,
GetVTypePredicates<Wti>.Predicates) in
- defm : VPatBinaryRoundingMode<intrinsic, instruction#"_"#kind#"_"#Vti.LMul.MX,
+ defm : VPatBinaryRoundingMode<intrinsic, name,
Wti.Vector, Wti.Vector, Vti.Scalar, Vti.Mask,
Vti.Log2SEW, Wti.RegClass,
Wti.RegClass, Vti.ScalarRegClass>;
@@ -5412,20 +5431,22 @@ multiclass VPatBinaryW_VV_VX<string intrinsic, string instruction,
: VPatBinaryW_VV<intrinsic, instruction, vtilist>,
VPatBinaryW_VX<intrinsic, instruction, vtilist>;
-multiclass VPatBinaryW_VV_VX_RM<string intrinsic, string instruction,
- list<VTypeInfoToWide> vtilist>
- : VPatBinaryW_VV_RM<intrinsic, instruction, vtilist>,
- VPatBinaryW_VX_RM<intrinsic, instruction, vtilist>;
+multiclass
+ VPatBinaryW_VV_VX_RM<string intrinsic, string instruction,
+ list<VTypeInfoToWide> vtilist, bit isSEWAware = 0>
+ : VPatBinaryW_VV_RM<intrinsic, instruction, vtilist, isSEWAware>,
+ VPatBinaryW_VX_RM<intrinsic, instruction, vtilist, isSEWAware>;
multiclass VPatBinaryW_WV_WX<string intrinsic, string instruction,
list<VTypeInfoToWide> vtilist>
: VPatBinaryW_WV<intrinsic, instruction, vtilist>,
VPatBinaryW_WX<intrinsic, instruction, vtilist>;
-multiclass VPatBinaryW_WV_WX_RM<string intrinsic, string instruction,
- list<VTypeInfoToWide> vtilist>
- : VPatBinaryW_WV_RM<intrinsic, instruction, vtilist>,
- VPatBinaryW_WX_RM<intrinsic, instruction, vtilist>;
+multiclass
+ VPatBinaryW_WV_WX_RM<string intrinsic, string instruction,
+ list<VTypeInfoToWide> vtilist, bit isSEWAware = 0>
+ : VPatBinaryW_WV_RM<intrinsic, instruction, vtilist, isSEWAware>,
+ VPatBinaryW_WX_RM<intrinsic, instruction, vtilist, isSEWAware>;
multiclass VPatBinaryV_WV_WX_WI<string intrinsic, string instruction,
list<VTypeInfoToWide> vtilist>
@@ -7068,13 +7089,13 @@ defm : VPatBinaryV_VX_RM<"int_riscv_vfrsub", "PseudoVFRSUB", AllFloatVectors,
// 13.3. Vector Widening Floating-Point Add/Subtract Instructions
//===----------------------------------------------------------------------===//
defm : VPatBinaryW_VV_VX_RM<"int_riscv_vfwadd", "PseudoVFWADD",
- AllWidenableFloatVectors>;
+ AllWidenableFloatVectors, isSEWAware=1>;
defm : VPatBinaryW_VV_VX_RM<"int_riscv_vfwsub", "PseudoVFWSUB",
- AllWidenableFloatVectors>;
+ AllWidenableFloatVectors, isSEWAware=1>;
defm : VPatBinaryW_WV_WX_RM<"int_riscv_vfwadd_w", "PseudoVFWADD",
- AllWidenableFloatVectors>;
+ AllWidenableFloatVectors, isSEWAware=1>;
defm : VPatBinaryW_WV_WX_RM<"int_riscv_vfwsub_w", "PseudoVFWSUB",
- AllWidenableFloatVectors>;
+ AllWidenableFloatVectors, isSEWAware=1>;
//===----------------------------------------------------------------------===//
// 13.4. Vector Single-Width Floating-Point Multiply/Divide Instructions
@@ -7090,7 +7111,7 @@ defm : VPatBinaryV_VX_RM<"int_riscv_vfrdiv", "PseudoVFRDIV",
// 13.5. Vector Widening Floating-Point Multiply
//===----------------------------------------------------------------------===//
defm : VPatBinaryW_VV_VX_RM<"int_riscv_vfwmul", "PseudoVFWMUL",
- AllWidenableFloatVectors>;
+ AllWidenableFloatVectors, isSEWAware=1>;
//===----------------------------------------------------------------------===//
// 13.6. Vector Single-Width Floating-Point Fused Multiply-Add Instructions
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
index 35ff98c506bd40..e79af17d3e0586 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
@@ -621,7 +621,7 @@ multiclass VPatWidenBinaryFPSDNode_VV_VF_RM<SDNode op, string instruction_name>
(wti.Vector (riscv_fpextend_vl_oneuse
(vti.Vector vti.RegClass:$rs1),
(vti.Mask true_mask), (XLenVT srcvalue)))),
- (!cast<Instruction>(instruction_name#"_VV_"#vti.LMul.MX)
+ (!cast<Instruction>(instruction_name#"_VV_"#vti.LMul.MX#"_E"#vti.SEW)
(wti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs2,
vti.RegClass:$rs1,
// Value to indicate no rounding mode change in
@@ -634,7 +634,7 @@ multiclass VPatWidenBinaryFPSDNode_VV_VF_RM<SDNode op, string instruction_name>
(wti.Vector (riscv_fpextend_vl_oneuse
(vti.Vector (SplatFPOp (vti.Scalar vti.ScalarRegClass:$rs1))),
(vti.Mask true_mask), (XLenVT srcvalue)))),
- (!cast<Instruction>(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
+ (!cast<Instruction>(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX#"_E"#vti.SEW)
(wti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs2,
vti.ScalarRegClass:$rs1,
// Value to indicate no rounding mode change in
@@ -645,7 +645,7 @@ multiclass VPatWidenBinaryFPSDNode_VV_VF_RM<SDNode op, string instruction_name>
(vti.Vector vti.RegClass:$rs2),
(vti.Mask true_mask), (XLenVT srcvalue))),
(wti.Vector (SplatFPOp (fpext_oneuse (vti.Scalar vti.ScalarRegClass:$rs1))))),
- (!cast<Instruction>(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
+ (!cast<Instruction>(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX#"_E"#vti.SEW)
(wti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs2,
vti.ScalarRegClass:$rs1,
// Value to indicate no rounding mode change in
@@ -666,7 +666,7 @@ multiclass VPatWidenBinaryFPSDNode_WV_WF_RM<SDNode op, string instruction_name>
(wti.Vector (riscv_fpextend_vl_oneuse
(vti.Vector vti.RegClass:$rs1),
(vti.Mask true_mask), (XLenVT srcvalue)))),
- (!cast<Instruction>(instruction_name#"_WV_"#vti.LMul.MX#"_TIED")
+ (!cast<Instruction>(instruction_name#"_WV_"#vti.LMul.MX#"_E"#vti.SEW#"_TIED")
wti.RegClass:$rs2, vti.RegClass:$rs1,
// Value to indicate no rounding mode change in
// RISCVInsertReadWriteCSR
@@ -677,7 +677,7 @@ multiclass VPatWidenBinaryFPSDNode_WV_WF_RM<SDNode op, string instruction_name>
(wti.Vector (riscv_fpextend_vl_oneuse
(vti.Vector (SplatFPOp vti.ScalarRegClass:$rs1)),
(vti.Mask true_mask), (XLenVT srcvalue)))),
- (!cast<Instruction>(instruction_name#"_W"#vti.ScalarSuffix#"_"#vti.LMul.MX)
+ (!cast<Instruction>(instruction_name#"_W"#vti.ScalarSuffix#"_"#vti.LMul.MX#"_E"#vti.SEW)
(wti.Vector (IMPLICIT_DEF)), wti.RegClass:$rs2,
vti.ScalarRegClass:$rs1,
// Value to indicate no rounding mode change in
@@ -686,7 +686,7 @@ multiclass VPatWidenBinaryFPSDNode_WV_WF_RM<SDNode op, string instruction_name>
vti.AVL, vti.Log2SEW, TA_MA)>;
def : Pat<(op (wti.Vector wti.RegClass:$rs2),
(wti.Vector (SplatFPOp (fpext_oneuse (vti.Scalar vti.ScalarRegClass:$rs1))))),
- (!cast<Instruction>(instruction_name#"_W"#vti.ScalarSuffix#"_"#vti.LMul.MX)
+ (!cast<Instruction>(instruction_name#"_W"#vti.ScalarSuffix#"_"#vti.LMul.MX#"_E"#vti.SEW)
(wti.Vector (IMPLICIT_DEF)), wti.RegClass:$rs2,
vti.ScalarRegClass:$rs1,
// Value to indicate no rounding mode change in
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index fc6f68f5e14c94..3c92620cd555b9 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -725,23 +725,27 @@ multiclass VPatTiedBinaryNoMaskVL_V_RM<SDNode vop,
string suffix,
ValueType result_type,
ValueType op2_type,
- int sew,
+ int log2sew,
LMULInfo vlmul,
VReg result_reg_class,
- VReg op2_reg_class> {
+ VReg op2_reg_class,
+ bit isSEWAware = 0> {
+ defvar name = !if(isSEWAware,
+ instruction_name#"_"#suffix#"_"#vlmul.MX#"_E"#!shl(1, log2sew)#"_TIED",
+ instruction_name#"_"#suffix#"_"#vlmul.MX#"_TIED");
def : Pat<(result_type (vop
(result_type result_reg_class:$rs1),
(op2_type op2_reg_class:$rs2),
srcvalue,
true_mask,
VLOpFrag)),
- (!cast<Instruction>(instruction_name#"_"#suffix#"_"# vlmul.MX#"_TIED")
+ (!cast<Instruction>(name)
result_reg_class:$rs1,
op2_reg_class:$rs2,
// Value to indicate no rounding mode change in
// RISCVInsertReadWriteCSR
FRM_DYN,
- GPR:$vl, sew, TAIL_AGNOSTIC)>;
+ GPR:$vl, log2sew, TAIL_AGNOSTIC)>;
// Tail undisturbed
def : Pat<(riscv_vmerge_vl true_mask,
(result_type (vop
@@ -751,13 +755,13 @@ multiclass VPatTiedBinaryNoMaskVL_V_RM<SDNode vop,
true_mask,
VLOpFrag)),
result_reg_class:$rs1, result_reg_class:$rs1, VLOpFrag),
- (!cast<Instruction>(instruction_name#"_"#suffix#"_"# vlmul.MX#"_TIED")
+ (!cast<Instruction>(name)
result_reg_class:$rs1,
op2_reg_class:$rs2,
// Value to indicate no rounding mode change in
// RISCVInsertReadWriteCSR
FRM_DYN,
- GPR:$vl, sew, TU_MU)>;
+ GPR:$vl, log2sew, TU_MU)>;
}
class VPatBinaryVL_XI<SDPatternOperator vop,
@@ -1591,7 +1595,8 @@ multiclass VPatBinaryFPWVL_VV_VF<SDNode vop, string instruction_name> {
}
}
-multiclass VPatBinaryFPWVL_VV_VF_RM<SDNode vop, string instruction_name> {
+multiclass VPatBinaryFPWVL_VV_VF_RM<SDNode vop, string instruction_name,
+ bit isSEWAware = 0> {
foreach fvtiToFWti = AllWidenableFloatVectors in {
defvar vti = fvtiToFWti.Vti;
defvar wti = fvtiToFWti.Wti;
@@ -1600,11 +1605,11 @@ multiclass VPatBinaryFPWVL_VV_VF_RM<SDNode vop, string instruction_name> {
def : VPatBinaryVL_V_RM<vop, instruction_name, "VV",
wti.Vector, vti.Vector, vti.Vector, vti.Mask,
vti.Log2SEW, vti.LMul, wti.RegClass, vti.RegClass,
- vti.RegClass>;
+ vti.RegClass, isSEWAware>;
def : VPatBinaryVL_VF_RM<vop, instruction_name#"_V"#vti.ScalarSuffix,
wti.Vector, vti.Vector, vti.Vector, vti.Mask,
vti.Log2SEW, vti.LMul, wti.RegClass, vti.RegClass,
- vti.ScalarRegClass>;
+ vti.ScalarRegClass, isSEWAware>;
}
}
}
@@ -1631,8 +1636,9 @@ multiclass VPatBinaryFPWVL_VV_VF_WV_WF<SDNode vop, SDNode vop_w, string instruct
}
}
-multiclass VPatBinaryFPWVL_VV_VF_WV_WF_RM<SDNode vop, SDNode vop_w, string instruction_name>
- : VPatBinaryFPWVL_VV_VF_RM<vop, instruction_name> {
+multiclass VPatBinaryFPWVL_VV_VF_WV_WF_RM<
+ SDNode vop, SDNode vop_w, string instruction_name, bit isSEWAware = 0>
+ : VPatBinaryFPWVL_VV_VF_RM<vop, instruction_name, isSEWAware> {
foreach fvtiToFWti = AllWidenableFloatVectors in {
defvar vti = fvtiToFWti.Vti;
defvar wti = fvtiToFWti.Wti;
@@ -1640,15 +1646,16 @@ multiclass VPatBinaryFPWVL_VV_VF_WV_WF_RM<SDNode vop, SDNode vop_w, string instr
GetVTypePredicates<wti>.Predicates) in {
defm : VPatTiedBinaryNoMaskVL_V_RM<vop_w, instruction_name, "WV",
wti.Vector, vti.Vector, vti.Log2SEW,
- vti.LMul, wti.RegClass, vti.RegClass>;
+ vti.LMul, wti.RegClass, vti.RegClass,
+ isSEWAware>;
def : VPatBinaryVL_V_RM<vop_w, instruction_name, "WV",
wti.Vector, wti.Vector, vti.Vector, vti.Mask,
vti.Log2SEW, vti.LMul, wti.RegClass, wti.RegClass,
- vti.RegClass>;
+ vti.RegClass, isSEWAware>;
def : VPatBinaryVL_VF_RM<vop_w, instruction_name#"_W"#vti.ScalarSuffix,
wti.Vector, wti.Vector, vti.Vector, vti.Mask,
vti.Log2SEW, vti.LMul, wti.RegClass, wti.RegClass,
- vti.ScalarRegClass>;
+ vti.ScalarRegClass, isSEWAware>;
}
}
}
@@ -2430,8 +2437,10 @@ defm : VPatBinaryFPVL_VV_VF_RM<any_riscv_fsub_vl, "PseudoVFSUB", isSEWAware=1>;
defm : VPatBinaryFPVL_R_VF_RM<any_riscv_fsub_vl, "PseudoVFRSUB", isSEWAware=1>;
// 13.3. Vector Widening Floating-Point Add/Subtract Instructions
-defm : VPatBinaryFPWVL_VV_VF_WV_WF_RM<riscv_vfwadd_vl, riscv_vfwadd_w_vl, "PseudoVFWADD">;
-defm : VPatBinaryFPWVL_VV_VF_WV_WF_RM<riscv_vfwsub_vl, riscv_vfwsub_w_vl, "PseudoVFWSUB">;
+defm : VPatBinaryFPWVL_VV_VF_WV_WF_RM<riscv_vfwadd_vl, riscv_vfwadd_w_vl,
+ "PseudoVFWADD", isSEWAware=1>;
+defm : VPatBinaryFPWVL_VV_VF_WV_WF_RM<riscv_vfwsub_vl, riscv_vfwsub_w_vl,
+ "PseudoVFWSUB", isSEWAware=1>;
// 13.4. Vector Single-Width Floating-Point Multiply/Divide Instructions
defm : VPatBinaryFPVL_VV_VF_RM<any_riscv_fmul_vl, "PseudoVFMUL">;
@@ -2439,7 +2448,7 @@ defm : VPatBinaryFPVL_VV_VF_RM<any_riscv_fdiv_vl, "PseudoVFDIV", isSEWAware=1>;
defm : VPatBinaryFPVL_R_VF_RM<any_riscv_fdiv_vl, "PseudoVFRDIV", isSEWAware=1>;
// 13.5. Vector Widening Floating-Point Multiply Instructions
-defm : VPatBinaryFPWVL_VV_VF_RM<riscv_vfwmul_vl, "PseudoVFWMUL">;
+defm : VPatBinaryFPWVL_VV_VF_RM<riscv_vfwmul_vl, "PseudoVFWMUL", isSEWAware=1>;
// 13.6 Vector Single-Width Floating-Point Fused Multiply-Add Instructions.
defm : VPatFPMulAddVL_VV_VF_RM<any_riscv_vfmadd_vl, "PseudoVFMADD">;
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
index f14766c984391f..2b6fc5e59f803a 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
@@ -790,18 +790,26 @@ foreach mx = SchedMxListW in {
defm "" : LMULWriteResMX<"WriteVFWCvtIToFV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
}
}
+foreach mx = SchedMxListFW in {
+ foreach sew = SchedSEWSet<mx, isF=1, isWidening=1>.val in {
+ defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
+ defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListFW, isF=1>.c;
+ let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFWALUV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFWALUF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
+ }
+ }
+}
foreach mx = SchedMxListFW in {
defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListFW>.c;
let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
- defm "" : LMULWriteResMX<"WriteVFWALUV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFWMulV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVFWMulAddV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVFWCvtFToIV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVFWCvtFToFV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVFWMulAddF", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFWMulF", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFWALUF", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
}
}
// Narrowing
@@ -1147,14 +1155,14 @@ defm "" : LMULReadAdvanceW<"ReadVNClipX", 0>;
// 14. Vector Floating-Point Instructions
defm "" : LMULSEWReadAdvanceF<"ReadVFALUV", 0>;
defm "" : LMULSEWReadAdvanceF<"ReadVFALUF", 0>;
-defm "" : LMULReadAdvanceFW<"ReadVFWALUV", 0>;
-defm "" : LMULReadAdvanceFW<"ReadVFWALUF", 0>;
+defm "" : LMULSEWReadAdvanceFW<"ReadVFWALUV", 0>;
+defm "" : LMULSEWReadAdvanceFW<"ReadVFWALUF", 0>;
defm "" : LMULReadAdvance<"ReadVFMulV", 0>;
defm "" : LMULReadAdvance<"ReadVFMulF", 0>;
defm "" : LMULSEWReadAdvanceF<"ReadVFDivV", 0>;
defm "" : LMULSEWReadAdvanceF<"ReadVFDivF", 0>;
-defm "" : LMULReadAdvanceFW<"ReadVFWMulV", 0>;
-defm "" : LMULReadAdvanceFW<"ReadVFWMulF", 0>;
+defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulV", 0>;
+defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulF", 0>;
defm "" : LMULReadAdvance<"ReadVFMulAddV", 0>;
defm "" : LMULReadAdvance<"ReadVFMulAddF", 0>;
defm "" : LMULReadAdvanceFW<"ReadVFWMulAddV", 0>;
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td
index 4d81bafdf88b8d..00e92cde852272 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td
@@ -541,16 +541,24 @@ foreach mx = SchedMxListFW in {
defm "" : LMULWriteResMX<"WriteVFWCvtFToFV", [SiFiveP600VectorArith], mx, IsWorstCase>;
}
}
+foreach mx = SchedMxListFW in {
+ foreach sew = SchedSEWSet<mx, isF=1, isWidening=1>.val in {
+ defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c;
+ defvar IsWorstCase = SiFiveP600IsWorstCaseMXSEW<mx, sew, SchedMxListFW, isF=1>.c;
+ let Latency = 6, ReleaseAtCycles = [LMulLat] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFWALUV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFWALUF", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulF", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
+ }
+ }
+}
foreach mx = SchedMxListFW in {
defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c;
defvar IsWorstCase = SiFiveP600IsWorstCaseMX<mx, SchedMxListFW>.c;
let Latency = 6, ReleaseAtCycles = [LMulLat] in {
- defm "" : LMULWriteResMX<"WriteVFWALUV", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFWMulV", [SiFiveP600VectorArith], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVFWMulAddV", [SiFiveP600VectorArith], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVFWMulAddF", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFWMulF", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFWALUF", [SiFiveP600VectorArith], mx, IsWorstCase>;
}
}
// Narrowing
@@ -935,14 +943,14 @@ defm "" : LMULReadAdvanceW<"ReadVNClipX", 0>;
// 14. Vector Floating-Point Instructions
defm "" : LMULSEWReadAdvanceF<"ReadVFALUV", 0>;
defm "" : LMULSEWReadAdvanceF<"ReadVFALUF", 0>;
-defm "" : LMULReadAdvanceFW<"ReadVFWALUV", 0>;
-defm "" : LMULReadAdvanceFW<"ReadVFWALUF", 0>;
+defm "" : LMULSEWReadAdvanceFW<"ReadVFWALUV", 0>;
+defm "" : LMULSEWReadAdvanceFW<"ReadVFWALUF", 0>;
defm "" : LMULReadAdvance<"ReadVFMulV", 0>;
defm "" : LMULReadAdvance<"ReadVFMulF", 0>;
defm "" : LMULSEWReadAdvanceF<"ReadVFDivV", 0>;
defm "" : LMULSEWReadAdvanceF<"ReadVFDivF", 0>;
-defm "" : LMULReadAdvanceFW<"ReadVFWMulV", 0>;
-defm "" : LMULReadAdvanceFW<"ReadVFWMulF", 0>;
+defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulV", 0>;
+defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulF", 0>;
defm "" : LMULReadAdvance<"ReadVFMulAddV", 0>;
defm "" : LMULReadAdvance<"ReadVFMulAddF", 0>;
defm "" : LMULReadAdvanceFW<"ReadVFWMulAddV", 0>;
diff --git a/llvm/lib/Target/RISCV/RISCVScheduleV.td b/llvm/lib/Target/RISCV/RISCVScheduleV.td
index 9b9c72e0767afe..8b5bd7a0c28a6c 100644
--- a/llvm/lib/Target/RISCV/RISCVScheduleV.td
+++ b/llvm/lib/Target/RISCV/RISCVScheduleV.td
@@ -253,6 +253,18 @@ multiclass LMULReadAdvanceFW<string name, int val, list<SchedWrite> writes = []>
: LMULReadAdvanceImpl<name, val, writes>;
class LMULSchedWriteListFW<list<string> names> : LMULSchedWriteListImpl<names, SchedMxListFW>;
+multiclass LMULSEWSchedWritesFW<string name>
+ : LMULSEWSchedWritesImpl<name, SchedMxListFW, isF = 1, isWidening = 1>;
+multiclass LMULSEWSchedReadsFW<string name>
+ : LMULSEWSchedReadsImpl<name, SchedMxListFW, isF = 1, isWidening = 1>;
+multiclass LMULSEWWriteResFW<string name, list<ProcResourceKind> resources>
+ : LMULSEWWriteResImpl<name, resources, SchedMxListFW, isF = 1,
+ isWidening = 1>;
+multiclass
+ LMULSEWReadAdvanceFW<string name, int val, list<SchedWrite> writes = []>
+ : LMULSEWReadAdvanceImpl<name, val, writes, SchedMxListFW, isF = 1,
+ isWidening = 1>;
+
// 3.6 Vector Byte Length vlenb
def WriteRdVLENB : SchedWrite;
@@ -400,16 +412,16 @@ defm "" : LMULSchedWritesW<"WriteVNClipI">;
defm "" : LMULSEWSchedWritesF<"WriteVFALUV">;
defm "" : LMULSEWSchedWritesF<"WriteVFALUF">;
// 13.3. Vector Widening Floating-Point Add/Subtract Instructions
-defm "" : LMULSchedWritesFW<"WriteVFWALUV">;
-defm "" : LMULSchedWritesFW<"WriteVFWALUF">;
+defm "" : LMULSEWSchedWritesFW<"WriteVFWALUV">;
+defm "" : LMULSEWSchedWritesFW<"WriteVFWALUF">;
// 13.4. Vector Single-Width Floating-Point Multiply/Divide Instructions
defm "" : LMULSchedWrites<"WriteVFMulV">;
defm "" : LMULSchedWrites<"WriteVFMulF">;
defm "" : LMULSEWSchedWritesF<"WriteVFDivV">;
defm "" : LMULSEWSchedWritesF<"WriteVFDivF">;
// 13.5. Vector Widening Floating-Point Multiply
-defm "" : LMULSchedWritesFW<"WriteVFWMulV">;
-defm "" : LMULSchedWritesFW<"WriteVFWMulF">;
+defm "" : LMULSEWSchedWritesFW<"WriteVFWMulV">;
+defm "" : LMULSEWSchedWritesFW<"WriteVFWMulF">;
// 13.6. Vector Single-Width Floating-Point Fused Multiply-Add Instructions
defm "" : LMULSchedWrites<"WriteVFMulAddV">;
defm "" : LMULSchedWrites<"WriteVFMulAddF">;
@@ -625,16 +637,16 @@ defm "" : LMULSchedReadsW<"ReadVNClipX">;
defm "" : LMULSEWSchedReadsF<"ReadVFALUV">;
defm "" : LMULSEWSchedReadsF<"ReadVFALUF">;
// 13.3. Vector Widening Floating-Point Add/Subtract Instructions
-defm "" : LMULSchedReadsFW<"ReadVFWALUV">;
-defm "" : LMULSchedReadsFW<"ReadVFWALUF">;
+defm "" : LMULSEWSchedReadsFW<"ReadVFWALUV">;
+defm "" : LMULSEWSchedReadsFW<"ReadVFWALUF">;
// 13.4. Vector Single-Width Floating-Point Multiply/Divide Instructions
defm "" : LMULSchedReads<"ReadVFMulV">;
defm "" : LMULSchedReads<"ReadVFMulF">;
defm "" : LMULSEWSchedReadsF<"ReadVFDivV">;
defm "" : LMULSEWSchedReadsF<"ReadVFDivF">;
// 13.5. Vector Widening Floating-Point Multiply
-defm "" : LMULSchedReadsFW<"ReadVFWMulV">;
-defm "" : LMULSchedReadsFW<"ReadVFWMulF">;
+defm "" : LMULSEWSchedReadsFW<"ReadVFWMulV">;
+defm "" : LMULSEWSchedReadsFW<"ReadVFWMulF">;
// 13.6. Vector Single-Width Floating-Point Fused Multiply-Add Instructions
defm "" : LMULSchedReads<"ReadVFMulAddV">;
defm "" : LMULSchedReads<"ReadVFMulAddF">;
@@ -870,14 +882,14 @@ defm "" : LMULWriteResW<"WriteVNClipI", []>;
// 13. Vector Floating-Point Instructions
defm "" : LMULSEWWriteResF<"WriteVFALUV", []>;
defm "" : LMULSEWWriteResF<"WriteVFALUF", []>;
-defm "" : LMULWriteResFW<"WriteVFWALUV", []>;
-defm "" : LMULWriteResFW<"WriteVFWALUF", []>;
+defm "" : LMULSEWWriteResFW<"WriteVFWALUV", []>;
+defm "" : LMULSEWWriteResFW<"WriteVFWALUF", []>;
defm "" : LMULWriteRes<"WriteVFMulV", []>;
defm "" : LMULWriteRes<"WriteVFMulF", []>;
defm "" : LMULSEWWriteResF<"WriteVFDivV", []>;
defm "" : LMULSEWWriteResF<"WriteVFDivF", []>;
-defm "" : LMULWriteResFW<"WriteVFWMulV", []>;
-defm "" : LMULWriteResFW<"WriteVFWMulF", []>;
+defm "" : LMULSEWWriteResFW<"WriteVFWMulV", []>;
+defm "" : LMULSEWWriteResFW<"WriteVFWMulF", []>;
defm "" : LMULWriteRes<"WriteVFMulAddV", []>;
defm "" : LMULWriteRes<"WriteVFMulAddF", []>;
defm "" : LMULWriteResFW<"WriteVFWMulAddV", []>;
@@ -1026,14 +1038,14 @@ defm "" : LMULReadAdvanceW<"ReadVNClipX", 0>;
// 13. Vector Floating-Point Instructions
defm "" : LMULSEWReadAdvanceF<"ReadVFALUV", 0>;
defm "" : LMULSEWReadAdvanceF<"ReadVFALUF", 0>;
-defm "" : LMULReadAdvanceFW<"ReadVFWALUV", 0>;
-defm "" : LMULReadAdvanceFW<"ReadVFWALUF", 0>;
+defm "" : LMULSEWReadAdvanceFW<"ReadVFWALUV", 0>;
+defm "" : LMULSEWReadAdvanceFW<"ReadVFWALUF", 0>;
defm "" : LMULReadAdvance<"ReadVFMulV", 0>;
defm "" : LMULReadAdvance<"ReadVFMulF", 0>;
defm "" : LMULSEWReadAdvanceF<"ReadVFDivV", 0>;
defm "" : LMULSEWReadAdvanceF<"ReadVFDivF", 0>;
-defm "" : LMULReadAdvanceFW<"ReadVFWMulV", 0>;
-defm "" : LMULReadAdvanceFW<"ReadVFWMulF", 0>;
+defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulV", 0>;
+defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulF", 0>;
defm "" : LMULReadAdvance<"ReadVFMulAddV", 0>;
defm "" : LMULReadAdvance<"ReadVFMulAddF", 0>;
defm "" : LMULReadAdvanceFW<"ReadVFWMulAddV", 0>;
>From 70cb0ee53c1fa5f4a0ac28f9056c9067fea8a59a Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Thu, 4 Apr 2024 08:57:15 -0700
Subject: [PATCH 3/6] [RISCV] Split PseudoVFMUL by SEW
Co-authored-by: Wang Pengcheng <wangpengcheng.pp at bytedance.com>
---
llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td | 13 +++++++------
.../Target/RISCV/RISCVInstrInfoVSDPatterns.td | 2 +-
.../Target/RISCV/RISCVInstrInfoVVLPatterns.td | 2 +-
llvm/lib/Target/RISCV/RISCVSchedSiFive7.td | 8 ++++----
llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td | 9 +++++----
llvm/lib/Target/RISCV/RISCVScheduleV.td | 16 ++++++++--------
6 files changed, 26 insertions(+), 24 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index 27ded64cfb3d24..de72767db74b93 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -2827,16 +2827,17 @@ multiclass VPseudoVDIV_VV_VX {
multiclass VPseudoVFMUL_VV_VF_RM {
foreach m = MxListF in {
- defm "" : VPseudoBinaryFV_VV_RM<m>,
- SchedBinary<"WriteVFMulV", "ReadVFMulV", "ReadVFMulV", m.MX,
- forceMergeOpRead=true>;
+ foreach e = SchedSEWSet<m.MX, isF=1>.val in
+ defm "" : VPseudoBinaryFV_VV_RM<m, "", sew=e>,
+ SchedBinary<"WriteVFMulV", "ReadVFMulV", "ReadVFMulV", m.MX, e,
+ forceMergeOpRead=true>;
}
foreach f = FPList in {
foreach m = f.MxList in {
- defm "" : VPseudoBinaryV_VF_RM<m, f>,
+ defm "" : VPseudoBinaryV_VF_RM<m, f, "", sew=f.SEW>,
SchedBinary<"WriteVFMulF", "ReadVFMulV", "ReadVFMulF", m.MX,
- forceMergeOpRead=true>;
+ f.SEW, forceMergeOpRead=true>;
}
}
}
@@ -7101,7 +7102,7 @@ defm : VPatBinaryW_WV_WX_RM<"int_riscv_vfwsub_w", "PseudoVFWSUB",
// 13.4. Vector Single-Width Floating-Point Multiply/Divide Instructions
//===----------------------------------------------------------------------===//
defm : VPatBinaryV_VV_VX_RM<"int_riscv_vfmul", "PseudoVFMUL",
- AllFloatVectors>;
+ AllFloatVectors, isSEWAware=1>;
defm : VPatBinaryV_VV_VX_RM<"int_riscv_vfdiv", "PseudoVFDIV",
AllFloatVectors, isSEWAware=1>;
defm : VPatBinaryV_VX_RM<"int_riscv_vfrdiv", "PseudoVFRDIV",
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
index e79af17d3e0586..b90608c7e23cc7 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
@@ -1213,7 +1213,7 @@ defm : VPatWidenBinaryFPSDNode_VV_VF_WV_WF_RM<fadd, "PseudoVFWADD">;
defm : VPatWidenBinaryFPSDNode_VV_VF_WV_WF_RM<fsub, "PseudoVFWSUB">;
// 13.4. Vector Single-Width Floating-Point Multiply/Divide Instructions
-defm : VPatBinaryFPSDNode_VV_VF_RM<any_fmul, "PseudoVFMUL">;
+defm : VPatBinaryFPSDNode_VV_VF_RM<any_fmul, "PseudoVFMUL", isSEWAware=1>;
defm : VPatBinaryFPSDNode_VV_VF_RM<any_fdiv, "PseudoVFDIV", isSEWAware=1>;
defm : VPatBinaryFPSDNode_R_VF_RM<any_fdiv, "PseudoVFRDIV", isSEWAware=1>;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index 3c92620cd555b9..6e412cd86990f3 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -2443,7 +2443,7 @@ defm : VPatBinaryFPWVL_VV_VF_WV_WF_RM<riscv_vfwsub_vl, riscv_vfwsub_w_vl,
"PseudoVFWSUB", isSEWAware=1>;
// 13.4. Vector Single-Width Floating-Point Multiply/Divide Instructions
-defm : VPatBinaryFPVL_VV_VF_RM<any_riscv_fmul_vl, "PseudoVFMUL">;
+defm : VPatBinaryFPVL_VV_VF_RM<any_riscv_fmul_vl, "PseudoVFMUL", isSEWAware=1>;
defm : VPatBinaryFPVL_VV_VF_RM<any_riscv_fdiv_vl, "PseudoVFDIV", isSEWAware=1>;
defm : VPatBinaryFPVL_R_VF_RM<any_riscv_fdiv_vl, "PseudoVFRDIV", isSEWAware=1>;
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
index 2b6fc5e59f803a..af65b706b0e205 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
@@ -739,6 +739,8 @@ foreach mx = SchedMxListF in {
let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
defm "" : LMULSEWWriteResMXSEW<"WriteVFALUV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVFALUF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFMulV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFMulF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
}
}
}
@@ -746,8 +748,6 @@ foreach mx = SchedMxList in {
defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
- defm "" : LMULWriteResMX<"WriteVFMulV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFMulF", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVFMulAddV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVFMulAddF", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVFRecpV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
@@ -1157,8 +1157,8 @@ defm "" : LMULSEWReadAdvanceF<"ReadVFALUV", 0>;
defm "" : LMULSEWReadAdvanceF<"ReadVFALUF", 0>;
defm "" : LMULSEWReadAdvanceFW<"ReadVFWALUV", 0>;
defm "" : LMULSEWReadAdvanceFW<"ReadVFWALUF", 0>;
-defm "" : LMULReadAdvance<"ReadVFMulV", 0>;
-defm "" : LMULReadAdvance<"ReadVFMulF", 0>;
+defm "" : LMULSEWReadAdvanceF<"ReadVFMulV", 0>;
+defm "" : LMULSEWReadAdvanceF<"ReadVFMulF", 0>;
defm "" : LMULSEWReadAdvanceF<"ReadVFDivV", 0>;
defm "" : LMULSEWReadAdvanceF<"ReadVFDivF", 0>;
defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulV", 0>;
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td
index 00e92cde852272..5819ce5aa31532 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td
@@ -493,6 +493,9 @@ foreach mx = SchedMxListF in {
let Latency = 6, ReleaseAtCycles = [LMulLat] in {
defm "" : LMULSEWWriteResMXSEW<"WriteVFALUV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVFALUF", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFMulV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFMulF", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
+
}
}
}
@@ -500,8 +503,6 @@ foreach mx = SchedMxList in {
defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c;
defvar IsWorstCase = SiFiveP600IsWorstCaseMX<mx, SchedMxList>.c;
let Latency = 6, ReleaseAtCycles = [LMulLat] in {
- defm "" : LMULWriteResMX<"WriteVFMulV", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFMulF", [SiFiveP600VectorArith], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVFMulAddV", [SiFiveP600VectorArith], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVFMulAddF", [SiFiveP600VectorArith], mx, IsWorstCase>;
}
@@ -945,8 +946,8 @@ defm "" : LMULSEWReadAdvanceF<"ReadVFALUV", 0>;
defm "" : LMULSEWReadAdvanceF<"ReadVFALUF", 0>;
defm "" : LMULSEWReadAdvanceFW<"ReadVFWALUV", 0>;
defm "" : LMULSEWReadAdvanceFW<"ReadVFWALUF", 0>;
-defm "" : LMULReadAdvance<"ReadVFMulV", 0>;
-defm "" : LMULReadAdvance<"ReadVFMulF", 0>;
+defm "" : LMULSEWReadAdvanceF<"ReadVFMulV", 0>;
+defm "" : LMULSEWReadAdvanceF<"ReadVFMulF", 0>;
defm "" : LMULSEWReadAdvanceF<"ReadVFDivV", 0>;
defm "" : LMULSEWReadAdvanceF<"ReadVFDivF", 0>;
defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulV", 0>;
diff --git a/llvm/lib/Target/RISCV/RISCVScheduleV.td b/llvm/lib/Target/RISCV/RISCVScheduleV.td
index 8b5bd7a0c28a6c..9cb3cd1c3804d5 100644
--- a/llvm/lib/Target/RISCV/RISCVScheduleV.td
+++ b/llvm/lib/Target/RISCV/RISCVScheduleV.td
@@ -415,8 +415,8 @@ defm "" : LMULSEWSchedWritesF<"WriteVFALUF">;
defm "" : LMULSEWSchedWritesFW<"WriteVFWALUV">;
defm "" : LMULSEWSchedWritesFW<"WriteVFWALUF">;
// 13.4. Vector Single-Width Floating-Point Multiply/Divide Instructions
-defm "" : LMULSchedWrites<"WriteVFMulV">;
-defm "" : LMULSchedWrites<"WriteVFMulF">;
+defm "" : LMULSEWSchedWritesF<"WriteVFMulV">;
+defm "" : LMULSEWSchedWritesF<"WriteVFMulF">;
defm "" : LMULSEWSchedWritesF<"WriteVFDivV">;
defm "" : LMULSEWSchedWritesF<"WriteVFDivF">;
// 13.5. Vector Widening Floating-Point Multiply
@@ -640,8 +640,8 @@ defm "" : LMULSEWSchedReadsF<"ReadVFALUF">;
defm "" : LMULSEWSchedReadsFW<"ReadVFWALUV">;
defm "" : LMULSEWSchedReadsFW<"ReadVFWALUF">;
// 13.4. Vector Single-Width Floating-Point Multiply/Divide Instructions
-defm "" : LMULSchedReads<"ReadVFMulV">;
-defm "" : LMULSchedReads<"ReadVFMulF">;
+defm "" : LMULSEWSchedReadsF<"ReadVFMulV">;
+defm "" : LMULSEWSchedReadsF<"ReadVFMulF">;
defm "" : LMULSEWSchedReadsF<"ReadVFDivV">;
defm "" : LMULSEWSchedReadsF<"ReadVFDivF">;
// 13.5. Vector Widening Floating-Point Multiply
@@ -884,8 +884,8 @@ defm "" : LMULSEWWriteResF<"WriteVFALUV", []>;
defm "" : LMULSEWWriteResF<"WriteVFALUF", []>;
defm "" : LMULSEWWriteResFW<"WriteVFWALUV", []>;
defm "" : LMULSEWWriteResFW<"WriteVFWALUF", []>;
-defm "" : LMULWriteRes<"WriteVFMulV", []>;
-defm "" : LMULWriteRes<"WriteVFMulF", []>;
+defm "" : LMULSEWWriteResF<"WriteVFMulV", []>;
+defm "" : LMULSEWWriteResF<"WriteVFMulF", []>;
defm "" : LMULSEWWriteResF<"WriteVFDivV", []>;
defm "" : LMULSEWWriteResF<"WriteVFDivF", []>;
defm "" : LMULSEWWriteResFW<"WriteVFWMulV", []>;
@@ -1040,8 +1040,8 @@ defm "" : LMULSEWReadAdvanceF<"ReadVFALUV", 0>;
defm "" : LMULSEWReadAdvanceF<"ReadVFALUF", 0>;
defm "" : LMULSEWReadAdvanceFW<"ReadVFWALUV", 0>;
defm "" : LMULSEWReadAdvanceFW<"ReadVFWALUF", 0>;
-defm "" : LMULReadAdvance<"ReadVFMulV", 0>;
-defm "" : LMULReadAdvance<"ReadVFMulF", 0>;
+defm "" : LMULSEWReadAdvanceF<"ReadVFMulV", 0>;
+defm "" : LMULSEWReadAdvanceF<"ReadVFMulF", 0>;
defm "" : LMULSEWReadAdvanceF<"ReadVFDivV", 0>;
defm "" : LMULSEWReadAdvanceF<"ReadVFDivF", 0>;
defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulV", 0>;
>From 1c274d9f0d6d723d20985c4f2c86cbde396e7b10 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Thu, 4 Apr 2024 10:52:54 -0700
Subject: [PATCH 4/6] [RISCV] Split single width floating point fused
multiple-add pseudo instructions by SEW
Co-authored-by: Wang Pengcheng <wangpengcheng.pp at bytedance.com>
---
llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 254 +++++++++++-------
.../Target/RISCV/RISCVInstrInfoVPseudos.td | 93 ++++---
.../Target/RISCV/RISCVInstrInfoVSDPatterns.td | 2 +-
.../Target/RISCV/RISCVInstrInfoVVLPatterns.td | 4 +-
llvm/lib/Target/RISCV/RISCVSchedSiFive7.td | 8 +-
llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td | 19 +-
llvm/lib/Target/RISCV/RISCVScheduleV.td | 16 +-
7 files changed, 246 insertions(+), 150 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 26961abd4da130..4d9386b69d0f01 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -2603,31 +2603,54 @@ std::string RISCVInstrInfo::createMIROperandComment(
}
// clang-format off
-#define CASE_VFMA_OPCODE_COMMON(OP, TYPE, LMUL) \
+#define CASE_VMA_OPCODE_COMMON(OP, TYPE, LMUL) \
RISCV::PseudoV##OP##_##TYPE##_##LMUL
-#define CASE_VFMA_OPCODE_LMULS_M1(OP, TYPE) \
- CASE_VFMA_OPCODE_COMMON(OP, TYPE, M1): \
- case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M2): \
- case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M4): \
- case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M8)
+#define CASE_VMA_OPCODE_LMULS_M1(OP, TYPE) \
+ CASE_VMA_OPCODE_COMMON(OP, TYPE, M1): \
+ case CASE_VMA_OPCODE_COMMON(OP, TYPE, M2): \
+ case CASE_VMA_OPCODE_COMMON(OP, TYPE, M4): \
+ case CASE_VMA_OPCODE_COMMON(OP, TYPE, M8)
-#define CASE_VFMA_OPCODE_LMULS_MF2(OP, TYPE) \
- CASE_VFMA_OPCODE_COMMON(OP, TYPE, MF2): \
- case CASE_VFMA_OPCODE_LMULS_M1(OP, TYPE)
+#define CASE_VMA_OPCODE_LMULS_MF2(OP, TYPE) \
+ CASE_VMA_OPCODE_COMMON(OP, TYPE, MF2): \
+ case CASE_VMA_OPCODE_LMULS_M1(OP, TYPE)
-#define CASE_VFMA_OPCODE_LMULS_MF4(OP, TYPE) \
- CASE_VFMA_OPCODE_COMMON(OP, TYPE, MF4): \
- case CASE_VFMA_OPCODE_LMULS_MF2(OP, TYPE)
+#define CASE_VMA_OPCODE_LMULS_MF4(OP, TYPE) \
+ CASE_VMA_OPCODE_COMMON(OP, TYPE, MF4): \
+ case CASE_VMA_OPCODE_LMULS_MF2(OP, TYPE)
-#define CASE_VFMA_OPCODE_LMULS(OP, TYPE) \
- CASE_VFMA_OPCODE_COMMON(OP, TYPE, MF8): \
- case CASE_VFMA_OPCODE_LMULS_MF4(OP, TYPE)
+#define CASE_VMA_OPCODE_LMULS(OP, TYPE) \
+ CASE_VMA_OPCODE_COMMON(OP, TYPE, MF8): \
+ case CASE_VMA_OPCODE_LMULS_MF4(OP, TYPE)
+
+// VFMA instructions are SEW specific.
+#define CASE_VFMA_OPCODE_COMMON(OP, TYPE, LMUL, SEW) \
+ RISCV::PseudoV##OP##_##TYPE##_##LMUL##_##SEW
+
+#define CASE_VFMA_OPCODE_LMULS_M1(OP, TYPE, SEW) \
+ CASE_VFMA_OPCODE_COMMON(OP, TYPE, M1, SEW): \
+ case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M2, SEW): \
+ case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M4, SEW): \
+ case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M8, SEW)
+
+#define CASE_VFMA_OPCODE_LMULS_MF2(OP, TYPE, SEW) \
+ CASE_VFMA_OPCODE_COMMON(OP, TYPE, MF2, SEW): \
+ case CASE_VFMA_OPCODE_LMULS_M1(OP, TYPE, SEW)
+
+#define CASE_VFMA_OPCODE_LMULS_MF4(OP, TYPE, SEW) \
+ CASE_VFMA_OPCODE_COMMON(OP, TYPE, MF4, SEW): \
+ case CASE_VFMA_OPCODE_LMULS_MF2(OP, TYPE, SEW)
+
+#define CASE_VFMA_OPCODE_VV(OP) \
+ CASE_VFMA_OPCODE_LMULS_MF4(OP, VV, E16): \
+ case CASE_VFMA_OPCODE_LMULS_MF2(OP, VV, E32): \
+ case CASE_VFMA_OPCODE_LMULS_M1(OP, VV, E64)
#define CASE_VFMA_SPLATS(OP) \
- CASE_VFMA_OPCODE_LMULS_MF4(OP, VFPR16): \
- case CASE_VFMA_OPCODE_LMULS_MF2(OP, VFPR32): \
- case CASE_VFMA_OPCODE_LMULS_M1(OP, VFPR64)
+ CASE_VFMA_OPCODE_LMULS_MF4(OP, VFPR16, E16): \
+ case CASE_VFMA_OPCODE_LMULS_MF2(OP, VFPR32, E32): \
+ case CASE_VFMA_OPCODE_LMULS_M1(OP, VFPR64, E64)
// clang-format on
bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI,
@@ -2667,16 +2690,16 @@ bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI,
case CASE_VFMA_SPLATS(FNMSUB):
case CASE_VFMA_SPLATS(FNMACC):
case CASE_VFMA_SPLATS(FNMSAC):
- case CASE_VFMA_OPCODE_LMULS_MF4(FMACC, VV):
- case CASE_VFMA_OPCODE_LMULS_MF4(FMSAC, VV):
- case CASE_VFMA_OPCODE_LMULS_MF4(FNMACC, VV):
- case CASE_VFMA_OPCODE_LMULS_MF4(FNMSAC, VV):
- case CASE_VFMA_OPCODE_LMULS(MADD, VX):
- case CASE_VFMA_OPCODE_LMULS(NMSUB, VX):
- case CASE_VFMA_OPCODE_LMULS(MACC, VX):
- case CASE_VFMA_OPCODE_LMULS(NMSAC, VX):
- case CASE_VFMA_OPCODE_LMULS(MACC, VV):
- case CASE_VFMA_OPCODE_LMULS(NMSAC, VV): {
+ case CASE_VFMA_OPCODE_VV(FMACC):
+ case CASE_VFMA_OPCODE_VV(FMSAC):
+ case CASE_VFMA_OPCODE_VV(FNMACC):
+ case CASE_VFMA_OPCODE_VV(FNMSAC):
+ case CASE_VMA_OPCODE_LMULS(MADD, VX):
+ case CASE_VMA_OPCODE_LMULS(NMSUB, VX):
+ case CASE_VMA_OPCODE_LMULS(MACC, VX):
+ case CASE_VMA_OPCODE_LMULS(NMSAC, VX):
+ case CASE_VMA_OPCODE_LMULS(MACC, VV):
+ case CASE_VMA_OPCODE_LMULS(NMSAC, VV): {
// If the tail policy is undisturbed we can't commute.
assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags));
if ((MI.getOperand(MI.getNumExplicitOperands() - 1).getImm() & 1) == 0)
@@ -2691,12 +2714,12 @@ bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI,
return false;
return true;
}
- case CASE_VFMA_OPCODE_LMULS_MF4(FMADD, VV):
- case CASE_VFMA_OPCODE_LMULS_MF4(FMSUB, VV):
- case CASE_VFMA_OPCODE_LMULS_MF4(FNMADD, VV):
- case CASE_VFMA_OPCODE_LMULS_MF4(FNMSUB, VV):
- case CASE_VFMA_OPCODE_LMULS(MADD, VV):
- case CASE_VFMA_OPCODE_LMULS(NMSUB, VV): {
+ case CASE_VFMA_OPCODE_VV(FMADD):
+ case CASE_VFMA_OPCODE_VV(FMSUB):
+ case CASE_VFMA_OPCODE_VV(FNMADD):
+ case CASE_VFMA_OPCODE_VV(FNMSUB):
+ case CASE_VMA_OPCODE_LMULS(MADD, VV):
+ case CASE_VMA_OPCODE_LMULS(NMSUB, VV): {
// If the tail policy is undisturbed we can't commute.
assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags));
if ((MI.getOperand(MI.getNumExplicitOperands() - 1).getImm() & 1) == 0)
@@ -2765,33 +2788,68 @@ bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI,
return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
}
-#define CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, LMUL) \
+// clang-format off
+#define CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, LMUL) \
case RISCV::PseudoV##OLDOP##_##TYPE##_##LMUL: \
Opc = RISCV::PseudoV##NEWOP##_##TYPE##_##LMUL; \
break;
-#define CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE) \
- CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M1) \
- CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M2) \
- CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M4) \
- CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M8)
+#define CASE_VMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE) \
+ CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M1) \
+ CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M2) \
+ CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M4) \
+ CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M8)
+
+#define CASE_VMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, TYPE) \
+ CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF2) \
+ CASE_VMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE)
+
+#define CASE_VMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, TYPE) \
+ CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF4) \
+ CASE_VMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, TYPE)
+
+#define CASE_VMA_CHANGE_OPCODE_LMULS(OLDOP, NEWOP, TYPE) \
+ CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF8) \
+ CASE_VMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, TYPE)
+
+#define CASE_VMA_CHANGE_OPCODE_SPLATS(OLDOP, NEWOP) \
+ CASE_VMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, VFPR16) \
+ CASE_VMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VFPR32) \
+ CASE_VMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, VFPR64)
+
+// VFMA depends on SEW.
+#define CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, LMUL, SEW) \
+ case RISCV::PseudoV##OLDOP##_##TYPE##_##LMUL##_##SEW: \
+ Opc = RISCV::PseudoV##NEWOP##_##TYPE##_##LMUL##_##SEW; \
+ break;
+
+#define CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE, SEW) \
+ CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M1, SEW) \
+ CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M2, SEW) \
+ CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M4, SEW) \
+ CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M8, SEW)
+
+#define CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, TYPE, SEW) \
+ CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF2, SEW) \
+ CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE, SEW)
-#define CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, TYPE) \
- CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF2) \
- CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE)
+#define CASE_VFMA_CHANGE_OPCODE_VV(OLDOP, NEWOP) \
+ CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, VV, MF4, E16) \
+ CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VV, E16) \
+ CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VV, E32)
-#define CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, TYPE) \
- CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF4) \
- CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, TYPE)
+#define CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, TYPE, SEW) \
+ CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF4, SEW) \
+ CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, TYPE, SEW)
-#define CASE_VFMA_CHANGE_OPCODE_LMULS(OLDOP, NEWOP, TYPE) \
- CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF8) \
- CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, TYPE)
+#define CASE_VFMA_CHANGE_OPCODE_LMULS(OLDOP, NEWOP, TYPE, SEW) \
+ CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF8, SEW) \
+ CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, TYPE, SEW)
#define CASE_VFMA_CHANGE_OPCODE_SPLATS(OLDOP, NEWOP) \
- CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, VFPR16) \
- CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VFPR32) \
- CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, VFPR64)
+ CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, VFPR16, E16) \
+ CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VFPR32, E32) \
+ CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, VFPR64, E64)
MachineInstr *RISCVInstrInfo::commuteInstructionImpl(MachineInstr &MI,
bool NewMI,
@@ -2830,16 +2888,16 @@ MachineInstr *RISCVInstrInfo::commuteInstructionImpl(MachineInstr &MI,
case CASE_VFMA_SPLATS(FNMADD):
case CASE_VFMA_SPLATS(FNMSAC):
case CASE_VFMA_SPLATS(FNMSUB):
- case CASE_VFMA_OPCODE_LMULS_MF4(FMACC, VV):
- case CASE_VFMA_OPCODE_LMULS_MF4(FMSAC, VV):
- case CASE_VFMA_OPCODE_LMULS_MF4(FNMACC, VV):
- case CASE_VFMA_OPCODE_LMULS_MF4(FNMSAC, VV):
- case CASE_VFMA_OPCODE_LMULS(MADD, VX):
- case CASE_VFMA_OPCODE_LMULS(NMSUB, VX):
- case CASE_VFMA_OPCODE_LMULS(MACC, VX):
- case CASE_VFMA_OPCODE_LMULS(NMSAC, VX):
- case CASE_VFMA_OPCODE_LMULS(MACC, VV):
- case CASE_VFMA_OPCODE_LMULS(NMSAC, VV): {
+ case CASE_VFMA_OPCODE_VV(FMACC):
+ case CASE_VFMA_OPCODE_VV(FMSAC):
+ case CASE_VFMA_OPCODE_VV(FNMACC):
+ case CASE_VFMA_OPCODE_VV(FNMSAC):
+ case CASE_VMA_OPCODE_LMULS(MADD, VX):
+ case CASE_VMA_OPCODE_LMULS(NMSUB, VX):
+ case CASE_VMA_OPCODE_LMULS(MACC, VX):
+ case CASE_VMA_OPCODE_LMULS(NMSAC, VX):
+ case CASE_VMA_OPCODE_LMULS(MACC, VV):
+ case CASE_VMA_OPCODE_LMULS(NMSAC, VV): {
// It only make sense to toggle these between clobbering the
// addend/subtrahend/minuend one of the multiplicands.
assert((OpIdx1 == 1 || OpIdx2 == 1) && "Unexpected opcode index");
@@ -2856,16 +2914,16 @@ MachineInstr *RISCVInstrInfo::commuteInstructionImpl(MachineInstr &MI,
CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMADD, FNMACC)
CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMSAC, FNMSUB)
CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMSUB, FNMSAC)
- CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(FMACC, FMADD, VV)
- CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(FMSAC, FMSUB, VV)
- CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(FNMACC, FNMADD, VV)
- CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(FNMSAC, FNMSUB, VV)
- CASE_VFMA_CHANGE_OPCODE_LMULS(MACC, MADD, VX)
- CASE_VFMA_CHANGE_OPCODE_LMULS(MADD, MACC, VX)
- CASE_VFMA_CHANGE_OPCODE_LMULS(NMSAC, NMSUB, VX)
- CASE_VFMA_CHANGE_OPCODE_LMULS(NMSUB, NMSAC, VX)
- CASE_VFMA_CHANGE_OPCODE_LMULS(MACC, MADD, VV)
- CASE_VFMA_CHANGE_OPCODE_LMULS(NMSAC, NMSUB, VV)
+ CASE_VFMA_CHANGE_OPCODE_VV(FMACC, FMADD)
+ CASE_VFMA_CHANGE_OPCODE_VV(FMSAC, FMSUB)
+ CASE_VFMA_CHANGE_OPCODE_VV(FNMACC, FNMADD)
+ CASE_VFMA_CHANGE_OPCODE_VV(FNMSAC, FNMSUB)
+ CASE_VMA_CHANGE_OPCODE_LMULS(MACC, MADD, VX)
+ CASE_VMA_CHANGE_OPCODE_LMULS(MADD, MACC, VX)
+ CASE_VMA_CHANGE_OPCODE_LMULS(NMSAC, NMSUB, VX)
+ CASE_VMA_CHANGE_OPCODE_LMULS(NMSUB, NMSAC, VX)
+ CASE_VMA_CHANGE_OPCODE_LMULS(MACC, MADD, VV)
+ CASE_VMA_CHANGE_OPCODE_LMULS(NMSAC, NMSUB, VV)
}
auto &WorkingMI = cloneIfNew(MI);
@@ -2873,12 +2931,12 @@ MachineInstr *RISCVInstrInfo::commuteInstructionImpl(MachineInstr &MI,
return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
OpIdx1, OpIdx2);
}
- case CASE_VFMA_OPCODE_LMULS_MF4(FMADD, VV):
- case CASE_VFMA_OPCODE_LMULS_MF4(FMSUB, VV):
- case CASE_VFMA_OPCODE_LMULS_MF4(FNMADD, VV):
- case CASE_VFMA_OPCODE_LMULS_MF4(FNMSUB, VV):
- case CASE_VFMA_OPCODE_LMULS(MADD, VV):
- case CASE_VFMA_OPCODE_LMULS(NMSUB, VV): {
+ case CASE_VFMA_OPCODE_VV(FMADD):
+ case CASE_VFMA_OPCODE_VV(FMSUB):
+ case CASE_VFMA_OPCODE_VV(FNMADD):
+ case CASE_VFMA_OPCODE_VV(FNMSUB):
+ case CASE_VMA_OPCODE_LMULS(MADD, VV):
+ case CASE_VMA_OPCODE_LMULS(NMSUB, VV): {
assert((OpIdx1 == 1 || OpIdx2 == 1) && "Unexpected opcode index");
// If one of the operands, is the addend we need to change opcode.
// Otherwise we're just swapping 2 of the multiplicands.
@@ -2887,12 +2945,12 @@ MachineInstr *RISCVInstrInfo::commuteInstructionImpl(MachineInstr &MI,
switch (MI.getOpcode()) {
default:
llvm_unreachable("Unexpected opcode");
- CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(FMADD, FMACC, VV)
- CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(FMSUB, FMSAC, VV)
- CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(FNMADD, FNMACC, VV)
- CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(FNMSUB, FNMSAC, VV)
- CASE_VFMA_CHANGE_OPCODE_LMULS(MADD, MACC, VV)
- CASE_VFMA_CHANGE_OPCODE_LMULS(NMSUB, NMSAC, VV)
+ CASE_VFMA_CHANGE_OPCODE_VV(FMADD, FMACC)
+ CASE_VFMA_CHANGE_OPCODE_VV(FMSUB, FMSAC)
+ CASE_VFMA_CHANGE_OPCODE_VV(FNMADD, FNMACC)
+ CASE_VFMA_CHANGE_OPCODE_VV(FNMSUB, FNMSAC)
+ CASE_VMA_CHANGE_OPCODE_LMULS(MADD, MACC, VV)
+ CASE_VMA_CHANGE_OPCODE_LMULS(NMSUB, NMSAC, VV)
}
auto &WorkingMI = cloneIfNew(MI);
@@ -2908,12 +2966,17 @@ MachineInstr *RISCVInstrInfo::commuteInstructionImpl(MachineInstr &MI,
return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
}
-#undef CASE_VFMA_CHANGE_OPCODE_SPLATS
-#undef CASE_VFMA_CHANGE_OPCODE_LMULS
-#undef CASE_VFMA_CHANGE_OPCODE_COMMON
-#undef CASE_VFMA_SPLATS
-#undef CASE_VFMA_OPCODE_LMULS
+#undef CASE_VMA_OPCODE_COMMON
+#undef CASE_VMA_OPCODE_LMULS_M1
+#undef CASE_VMA_OPCODE_LMULS_MF2
+#undef CASE_VMA_OPCODE_LMULS_MF4
+#undef CASE_VMA_OPCODE_LMULS
#undef CASE_VFMA_OPCODE_COMMON
+#undef CASE_VFMA_OPCODE_LMULS_M1
+#undef CASE_VFMA_OPCODE_LMULS_MF2
+#undef CASE_VFMA_OPCODE_LMULS_MF4
+#undef CASE_VFMA_OPCODE_VV
+#undef CASE_VFMA_SPLATS
// clang-format off
#define CASE_WIDEOP_OPCODE_COMMON(OP, LMUL) \
@@ -3081,10 +3144,17 @@ MachineInstr *RISCVInstrInfo::convertToThreeAddress(MachineInstr &MI,
return MIB;
}
-#undef CASE_WIDEOP_CHANGE_OPCODE_LMULS
-#undef CASE_WIDEOP_CHANGE_OPCODE_COMMON
-#undef CASE_WIDEOP_OPCODE_LMULS
#undef CASE_WIDEOP_OPCODE_COMMON
+#undef CASE_WIDEOP_OPCODE_LMULS_MF4
+#undef CASE_WIDEOP_OPCODE_LMULS
+#undef CASE_WIDEOP_CHANGE_OPCODE_COMMON
+#undef CASE_WIDEOP_CHANGE_OPCODE_LMULS_MF4
+#undef CASE_WIDEOP_CHANGE_OPCODE_LMULS
+#undef CASE_FP_WIDEOP_OPCODE_COMMON
+#undef CASE_FP_WIDEOP_OPCODE_LMULS_MF4
+#undef CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON
+#undef CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_MF4
+#undef CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS
void RISCVInstrInfo::getVLENFactoredAmount(MachineFunction &MF,
MachineBasicBlock &MBB,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index de72767db74b93..f9e69c60fdd8c0 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -3229,15 +3229,17 @@ multiclass VPseudoTernaryWithPolicyRoundingMode<VReg RetClass,
DAGOperand Op2Class,
LMULInfo MInfo,
string Constraint = "",
+ int sew = 0,
bit Commutable = 0,
int TargetConstraintType = 1> {
let VLMul = MInfo.value in {
+ defvar suffix = !if(sew, "_" # MInfo.MX # "_E" # sew, "_" # MInfo.MX);
let isCommutable = Commutable in
- def "_" # MInfo.MX :
+ def suffix :
VPseudoTernaryNoMaskWithPolicyRoundingMode<RetClass, Op1Class,
Op2Class, Constraint,
TargetConstraintType>;
- def "_" # MInfo.MX # "_MASK" :
+ def suffix # "_MASK" :
VPseudoBinaryMaskPolicyRoundingMode<RetClass, Op1Class,
Op2Class, Constraint,
UsesVXRM_=0,
@@ -3251,9 +3253,9 @@ multiclass VPseudoTernaryV_VV_AAXA<LMULInfo m, string Constraint = ""> {
Constraint, Commutable=1>;
}
-multiclass VPseudoTernaryV_VV_AAXA_RM<LMULInfo m, string Constraint = ""> {
+multiclass VPseudoTernaryV_VV_AAXA_RM<LMULInfo m, string Constraint = "", int sew = 0> {
defm _VV : VPseudoTernaryWithPolicyRoundingMode<m.vrclass, m.vrclass, m.vrclass, m,
- Constraint, Commutable=1>;
+ Constraint, sew, Commutable=1>;
}
multiclass VPseudoTernaryV_VX_AAXA<LMULInfo m, string Constraint = ""> {
@@ -3267,10 +3269,11 @@ multiclass VPseudoTernaryV_VF_AAXA<LMULInfo m, FPR_Info f, string Constraint = "
Commutable=1>;
}
-multiclass VPseudoTernaryV_VF_AAXA_RM<LMULInfo m, FPR_Info f, string Constraint = ""> {
+multiclass VPseudoTernaryV_VF_AAXA_RM<LMULInfo m, FPR_Info f,
+ string Constraint = "", int sew = 0> {
defm "_V" # f.FX : VPseudoTernaryWithPolicyRoundingMode<m.vrclass, f.fprclass,
m.vrclass, m, Constraint,
- Commutable=1>;
+ sew, Commutable=1>;
}
multiclass VPseudoTernaryW_VV<LMULInfo m> {
@@ -3340,16 +3343,17 @@ multiclass VPseudoVMAC_VV_VX_AAXA<string Constraint = ""> {
multiclass VPseudoVMAC_VV_VF_AAXA_RM<string Constraint = ""> {
foreach m = MxListF in {
- defm "" : VPseudoTernaryV_VV_AAXA_RM<m, Constraint>,
- SchedTernary<"WriteVFMulAddV", "ReadVFMulAddV", "ReadVFMulAddV",
- "ReadVFMulAddV", m.MX>;
+ foreach e = SchedSEWSet<m.MX, isF=1>.val in
+ defm "" : VPseudoTernaryV_VV_AAXA_RM<m, Constraint, sew=e>,
+ SchedTernary<"WriteVFMulAddV", "ReadVFMulAddV", "ReadVFMulAddV",
+ "ReadVFMulAddV", m.MX, e>;
}
foreach f = FPList in {
foreach m = f.MxList in {
- defm "" : VPseudoTernaryV_VF_AAXA_RM<m, f, Constraint>,
+ defm "" : VPseudoTernaryV_VF_AAXA_RM<m, f, Constraint, sew=f.SEW>,
SchedTernary<"WriteVFMulAddF", "ReadVFMulAddV", "ReadVFMulAddF",
- "ReadVFMulAddV", m.MX>;
+ "ReadVFMulAddV", m.MX, f.SEW>;
}
}
}
@@ -4456,23 +4460,26 @@ class VPatTernaryNoMaskWithPolicyRoundingMode<string intrinsic,
ValueType result_type,
ValueType op1_type,
ValueType op2_type,
- int sew,
+ int log2sew,
LMULInfo vlmul,
VReg result_reg_class,
RegisterClass op1_reg_class,
- DAGOperand op2_kind> :
+ DAGOperand op2_kind,
+ bit isSEWAware = 0> :
Pat<(result_type (!cast<Intrinsic>(intrinsic)
(result_type result_reg_class:$rs3),
(op1_type op1_reg_class:$rs1),
(op2_type op2_kind:$rs2),
(XLenVT timm:$round),
VLOpFrag, (XLenVT timm:$policy))),
- (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX)
+ (!cast<Instruction>(!if(isSEWAware,
+ inst#"_"#kind#"_"#vlmul.MX#"_E"#!shl(1, log2sew),
+ inst#"_"#kind#"_"#vlmul.MX))
result_reg_class:$rs3,
(op1_type op1_reg_class:$rs1),
op2_kind:$rs2,
(XLenVT timm:$round),
- GPR:$vl, sew, (XLenVT timm:$policy))>;
+ GPR:$vl, log2sew, (XLenVT timm:$policy))>;
class VPatTernaryMask<string intrinsic,
string inst,
@@ -4531,11 +4538,12 @@ class VPatTernaryMaskPolicyRoundingMode<string intrinsic,
ValueType op1_type,
ValueType op2_type,
ValueType mask_type,
- int sew,
+ int log2sew,
LMULInfo vlmul,
VReg result_reg_class,
RegisterClass op1_reg_class,
- DAGOperand op2_kind> :
+ DAGOperand op2_kind,
+ bit isSEWAware = 0> :
Pat<(result_type (!cast<Intrinsic>(intrinsic#"_mask")
(result_type result_reg_class:$rs3),
(op1_type op1_reg_class:$rs1),
@@ -4543,13 +4551,15 @@ class VPatTernaryMaskPolicyRoundingMode<string intrinsic,
(mask_type V0),
(XLenVT timm:$round),
VLOpFrag, (XLenVT timm:$policy))),
- (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX # "_MASK")
+ (!cast<Instruction>(!if(isSEWAware,
+ inst#"_"#kind#"_"#vlmul.MX#"_E"#!shl(1, log2sew) # "_MASK",
+ inst#"_"#kind#"_"#vlmul.MX # "_MASK"))
result_reg_class:$rs3,
(op1_type op1_reg_class:$rs1),
op2_kind:$rs2,
(mask_type V0),
(XLenVT timm:$round),
- GPR:$vl, sew, (XLenVT timm:$policy))>;
+ GPR:$vl, log2sew, (XLenVT timm:$policy))>;
class VPatTernaryMaskTA<string intrinsic,
string inst,
@@ -5559,15 +5569,16 @@ multiclass VPatTernaryWithPolicyRoundingMode<string intrinsic,
LMULInfo vlmul,
VReg result_reg_class,
RegisterClass op1_reg_class,
- DAGOperand op2_kind> {
+ DAGOperand op2_kind,
+ bit isSEWAware = 0> {
def : VPatTernaryNoMaskWithPolicyRoundingMode<intrinsic, inst, kind, result_type,
op1_type, op2_type, sew, vlmul,
result_reg_class, op1_reg_class,
- op2_kind>;
+ op2_kind, isSEWAware>;
def : VPatTernaryMaskPolicyRoundingMode<intrinsic, inst, kind, result_type, op1_type,
op2_type, mask_type, sew, vlmul,
result_reg_class, op1_reg_class,
- op2_kind>;
+ op2_kind, isSEWAware>;
}
multiclass VPatTernaryTA<string intrinsic,
@@ -5621,13 +5632,13 @@ multiclass VPatTernaryV_VV_AAXA<string intrinsic, string instruction,
}
multiclass VPatTernaryV_VV_AAXA_RM<string intrinsic, string instruction,
- list<VTypeInfo> vtilist> {
+ list<VTypeInfo> vtilist, bit isSEWAware = 0> {
foreach vti = vtilist in
let Predicates = GetVTypePredicates<vti>.Predicates in
defm : VPatTernaryWithPolicyRoundingMode<intrinsic, instruction, "VV",
vti.Vector, vti.Vector, vti.Vector, vti.Mask,
vti.Log2SEW, vti.LMul, vti.RegClass,
- vti.RegClass, vti.RegClass>;
+ vti.RegClass, vti.RegClass, isSEWAware>;
}
multiclass VPatTernaryV_VX<string intrinsic, string instruction,
@@ -5652,14 +5663,14 @@ multiclass VPatTernaryV_VX_AAXA<string intrinsic, string instruction,
}
multiclass VPatTernaryV_VX_AAXA_RM<string intrinsic, string instruction,
- list<VTypeInfo> vtilist> {
+ list<VTypeInfo> vtilist, bit isSEWAware = 0> {
foreach vti = vtilist in
let Predicates = GetVTypePredicates<vti>.Predicates in
defm : VPatTernaryWithPolicyRoundingMode<intrinsic, instruction,
"V"#vti.ScalarSuffix,
vti.Vector, vti.Scalar, vti.Vector, vti.Mask,
vti.Log2SEW, vti.LMul, vti.RegClass,
- vti.ScalarRegClass, vti.RegClass>;
+ vti.ScalarRegClass, vti.RegClass, isSEWAware>;
}
multiclass VPatTernaryV_VI<string intrinsic, string instruction,
@@ -5737,9 +5748,9 @@ multiclass VPatTernaryV_VV_VX_AAXA<string intrinsic, string instruction,
VPatTernaryV_VX_AAXA<intrinsic, instruction, vtilist>;
multiclass VPatTernaryV_VV_VX_AAXA_RM<string intrinsic, string instruction,
- list<VTypeInfo> vtilist>
- : VPatTernaryV_VV_AAXA_RM<intrinsic, instruction, vtilist>,
- VPatTernaryV_VX_AAXA_RM<intrinsic, instruction, vtilist>;
+ list<VTypeInfo> vtilist, bit isSEWAware = 0>
+ : VPatTernaryV_VV_AAXA_RM<intrinsic, instruction, vtilist, isSEWAware>,
+ VPatTernaryV_VX_AAXA_RM<intrinsic, instruction, vtilist, isSEWAware>;
multiclass VPatTernaryV_VX_VI<string intrinsic, string instruction,
list<VTypeInfo> vtilist, Operand Imm_type = simm5>
@@ -7117,14 +7128,22 @@ defm : VPatBinaryW_VV_VX_RM<"int_riscv_vfwmul", "PseudoVFWMUL",
//===----------------------------------------------------------------------===//
// 13.6. Vector Single-Width Floating-Point Fused Multiply-Add Instructions
//===----------------------------------------------------------------------===//
-defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfmacc", "PseudoVFMACC", AllFloatVectors>;
-defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfnmacc", "PseudoVFNMACC", AllFloatVectors>;
-defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfmsac", "PseudoVFMSAC", AllFloatVectors>;
-defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfnmsac", "PseudoVFNMSAC", AllFloatVectors>;
-defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfmadd", "PseudoVFMADD", AllFloatVectors>;
-defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfnmadd", "PseudoVFNMADD", AllFloatVectors>;
-defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfmsub", "PseudoVFMSUB", AllFloatVectors>;
-defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfnmsub", "PseudoVFNMSUB", AllFloatVectors>;
+defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfmacc", "PseudoVFMACC",
+ AllFloatVectors, isSEWAware=1>;
+defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfnmacc", "PseudoVFNMACC",
+ AllFloatVectors, isSEWAware=1>;
+defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfmsac", "PseudoVFMSAC",
+ AllFloatVectors, isSEWAware=1>;
+defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfnmsac", "PseudoVFNMSAC",
+ AllFloatVectors, isSEWAware=1>;
+defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfmadd", "PseudoVFMADD",
+ AllFloatVectors, isSEWAware=1>;
+defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfnmadd", "PseudoVFNMADD",
+ AllFloatVectors, isSEWAware=1>;
+defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfmsub", "PseudoVFMSUB",
+ AllFloatVectors, isSEWAware=1>;
+defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfnmsub", "PseudoVFNMSUB",
+ AllFloatVectors, isSEWAware=1>;
//===----------------------------------------------------------------------===//
// 13.7. Vector Widening Floating-Point Fused Multiply-Add Instructions
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
index b90608c7e23cc7..009cfd1599cd5f 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
@@ -1224,7 +1224,7 @@ defm : VPatWidenBinaryFPSDNode_VV_VF_RM<fmul, "PseudoVFWMUL">;
foreach fvti = AllFloatVectors in {
// NOTE: We choose VFMADD because it has the most commuting freedom. So it
// works best with how TwoAddressInstructionPass tries commuting.
- defvar suffix = fvti.LMul.MX;
+ defvar suffix = fvti.LMul.MX # "_E" # fvti.SEW;
let Predicates = GetVTypePredicates<fvti>.Predicates in {
def : Pat<(fvti.Vector (any_fma fvti.RegClass:$rs1, fvti.RegClass:$rd,
fvti.RegClass:$rs2)),
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index 6e412cd86990f3..d00608d0342cc1 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -1850,7 +1850,7 @@ multiclass VPatFPMulAddVL_VV_VF<SDPatternOperator vop, string instruction_name>
multiclass VPatFPMulAddVL_VV_VF_RM<SDPatternOperator vop, string instruction_name> {
foreach vti = AllFloatVectors in {
- defvar suffix = vti.LMul.MX;
+ defvar suffix = vti.LMul.MX # "_E" # vti.SEW;
let Predicates = GetVTypePredicates<vti>.Predicates in {
def : Pat<(vti.Vector (vop vti.RegClass:$rs1, vti.RegClass:$rd,
vti.RegClass:$rs2, (vti.Mask V0),
@@ -1916,7 +1916,7 @@ multiclass VPatFPMulAccVL_VV_VF<PatFrag vop, string instruction_name> {
multiclass VPatFPMulAccVL_VV_VF_RM<PatFrag vop, string instruction_name> {
foreach vti = AllFloatVectors in {
- defvar suffix = vti.LMul.MX;
+ defvar suffix = vti.LMul.MX # "_E" # vti.SEW;
let Predicates = GetVTypePredicates<vti>.Predicates in {
def : Pat<(riscv_vmerge_vl (vti.Mask V0),
(vti.Vector (vop vti.RegClass:$rs1, vti.RegClass:$rs2,
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
index af65b706b0e205..16bfb509e08565 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
@@ -741,6 +741,8 @@ foreach mx = SchedMxListF in {
defm "" : LMULSEWWriteResMXSEW<"WriteVFALUF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVFMulV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVFMulF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFMulAddV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFMulAddF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
}
}
}
@@ -748,8 +750,6 @@ foreach mx = SchedMxList in {
defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
- defm "" : LMULWriteResMX<"WriteVFMulAddV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFMulAddF", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVFRecpV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVFCvtIToFV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVFCvtFToIV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
@@ -1163,8 +1163,8 @@ defm "" : LMULSEWReadAdvanceF<"ReadVFDivV", 0>;
defm "" : LMULSEWReadAdvanceF<"ReadVFDivF", 0>;
defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulV", 0>;
defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulF", 0>;
-defm "" : LMULReadAdvance<"ReadVFMulAddV", 0>;
-defm "" : LMULReadAdvance<"ReadVFMulAddF", 0>;
+defm "" : LMULSEWReadAdvanceF<"ReadVFMulAddV", 0>;
+defm "" : LMULSEWReadAdvanceF<"ReadVFMulAddF", 0>;
defm "" : LMULReadAdvanceFW<"ReadVFWMulAddV", 0>;
defm "" : LMULReadAdvanceFW<"ReadVFWMulAddF", 0>;
defm "" : LMULSEWReadAdvanceF<"ReadVFSqrtV", 0>;
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td
index 5819ce5aa31532..0896f3358d230b 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td
@@ -499,13 +499,20 @@ foreach mx = SchedMxListF in {
}
}
}
+
+foreach mx = SchedMxListF in {
+ foreach sew = SchedSEWSet<mx, isF=1>.val in {
+ defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c;
+ defvar IsWorstCase = SiFiveP600IsWorstCaseMXSEW<mx, sew, SchedMxListF, isF=1>.c;
+ let Latency = 6, ReleaseAtCycles = [LMulLat] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFMulAddV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFMulAddF", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
+ }
+ }
+}
foreach mx = SchedMxList in {
defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c;
defvar IsWorstCase = SiFiveP600IsWorstCaseMX<mx, SchedMxList>.c;
- let Latency = 6, ReleaseAtCycles = [LMulLat] in {
- defm "" : LMULWriteResMX<"WriteVFMulAddV", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFMulAddF", [SiFiveP600VectorArith], mx, IsWorstCase>;
- }
let Latency = 3, ReleaseAtCycles = [LMulLat] in {
defm "" : LMULWriteResMX<"WriteVFCvtIToFV", [SiFiveP600VectorArith], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVFCvtFToIV", [SiFiveP600VectorArith], mx, IsWorstCase>;
@@ -952,8 +959,8 @@ defm "" : LMULSEWReadAdvanceF<"ReadVFDivV", 0>;
defm "" : LMULSEWReadAdvanceF<"ReadVFDivF", 0>;
defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulV", 0>;
defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulF", 0>;
-defm "" : LMULReadAdvance<"ReadVFMulAddV", 0>;
-defm "" : LMULReadAdvance<"ReadVFMulAddF", 0>;
+defm "" : LMULSEWReadAdvanceF<"ReadVFMulAddV", 0>;
+defm "" : LMULSEWReadAdvanceF<"ReadVFMulAddF", 0>;
defm "" : LMULReadAdvanceFW<"ReadVFWMulAddV", 0>;
defm "" : LMULReadAdvanceFW<"ReadVFWMulAddF", 0>;
defm "" : LMULSEWReadAdvanceF<"ReadVFSqrtV", 0>;
diff --git a/llvm/lib/Target/RISCV/RISCVScheduleV.td b/llvm/lib/Target/RISCV/RISCVScheduleV.td
index 9cb3cd1c3804d5..40090d1db88748 100644
--- a/llvm/lib/Target/RISCV/RISCVScheduleV.td
+++ b/llvm/lib/Target/RISCV/RISCVScheduleV.td
@@ -423,8 +423,8 @@ defm "" : LMULSEWSchedWritesF<"WriteVFDivF">;
defm "" : LMULSEWSchedWritesFW<"WriteVFWMulV">;
defm "" : LMULSEWSchedWritesFW<"WriteVFWMulF">;
// 13.6. Vector Single-Width Floating-Point Fused Multiply-Add Instructions
-defm "" : LMULSchedWrites<"WriteVFMulAddV">;
-defm "" : LMULSchedWrites<"WriteVFMulAddF">;
+defm "" : LMULSEWSchedWritesF<"WriteVFMulAddV">;
+defm "" : LMULSEWSchedWritesF<"WriteVFMulAddF">;
// 13.7. Vector Widening Floating-Point Fused Multiply-Add Instructions
defm "" : LMULSchedWritesFW<"WriteVFWMulAddV">;
defm "" : LMULSchedWritesFW<"WriteVFWMulAddF">;
@@ -648,8 +648,8 @@ defm "" : LMULSEWSchedReadsF<"ReadVFDivF">;
defm "" : LMULSEWSchedReadsFW<"ReadVFWMulV">;
defm "" : LMULSEWSchedReadsFW<"ReadVFWMulF">;
// 13.6. Vector Single-Width Floating-Point Fused Multiply-Add Instructions
-defm "" : LMULSchedReads<"ReadVFMulAddV">;
-defm "" : LMULSchedReads<"ReadVFMulAddF">;
+defm "" : LMULSEWSchedReadsF<"ReadVFMulAddV">;
+defm "" : LMULSEWSchedReadsF<"ReadVFMulAddF">;
// 13.7. Vector Widening Floating-Point Fused Multiply-Add Instructions
defm "" : LMULSchedReadsFW<"ReadVFWMulAddV">;
defm "" : LMULSchedReadsFW<"ReadVFWMulAddF">;
@@ -890,8 +890,8 @@ defm "" : LMULSEWWriteResF<"WriteVFDivV", []>;
defm "" : LMULSEWWriteResF<"WriteVFDivF", []>;
defm "" : LMULSEWWriteResFW<"WriteVFWMulV", []>;
defm "" : LMULSEWWriteResFW<"WriteVFWMulF", []>;
-defm "" : LMULWriteRes<"WriteVFMulAddV", []>;
-defm "" : LMULWriteRes<"WriteVFMulAddF", []>;
+defm "" : LMULSEWWriteResF<"WriteVFMulAddV", []>;
+defm "" : LMULSEWWriteResF<"WriteVFMulAddF", []>;
defm "" : LMULWriteResFW<"WriteVFWMulAddV", []>;
defm "" : LMULWriteResFW<"WriteVFWMulAddF", []>;
defm "" : LMULSEWWriteResF<"WriteVFSqrtV", []>;
@@ -1046,8 +1046,8 @@ defm "" : LMULSEWReadAdvanceF<"ReadVFDivV", 0>;
defm "" : LMULSEWReadAdvanceF<"ReadVFDivF", 0>;
defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulV", 0>;
defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulF", 0>;
-defm "" : LMULReadAdvance<"ReadVFMulAddV", 0>;
-defm "" : LMULReadAdvance<"ReadVFMulAddF", 0>;
+defm "" : LMULSEWReadAdvanceF<"ReadVFMulAddV", 0>;
+defm "" : LMULSEWReadAdvanceF<"ReadVFMulAddF", 0>;
defm "" : LMULReadAdvanceFW<"ReadVFWMulAddV", 0>;
defm "" : LMULReadAdvanceFW<"ReadVFWMulAddF", 0>;
defm "" : LMULSEWReadAdvanceF<"ReadVFSqrtV", 0>;
>From e7ca41ec188df5a2f901093464f3ce76e30d5bb5 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Thu, 4 Apr 2024 11:58:29 -0700
Subject: [PATCH 5/6] [RISCV] Split widening floating point fused multiple-add
pseudo instructions by SEW
Co-authored-by: Wang Pengcheng <wangpengcheng.pp at bytedance.com>
---
llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 4 +-
.../Target/RISCV/RISCVInstrInfoVPseudos.td | 68 ++++++++++---------
.../Target/RISCV/RISCVInstrInfoVSDPatterns.td | 24 ++++---
.../Target/RISCV/RISCVInstrInfoVVLPatterns.td | 5 +-
llvm/lib/Target/RISCV/RISCVSchedSiFive7.td | 8 +--
llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td | 14 ++--
llvm/lib/Target/RISCV/RISCVScheduleV.td | 16 ++---
.../CodeGen/RISCV/rvv/fixed-vectors-fmf.ll | 4 +-
.../RISCV/rvv/pass-fast-math-flags-sdnode.ll | 4 +-
9 files changed, 74 insertions(+), 73 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 4d9386b69d0f01..8ac0e4a44c3881 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -2834,9 +2834,9 @@ bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI,
CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE, SEW)
#define CASE_VFMA_CHANGE_OPCODE_VV(OLDOP, NEWOP) \
- CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, VV, MF4, E16) \
CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VV, E16) \
- CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VV, E32)
+ CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VV, E32) \
+ CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, VV, E64)
#define CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, TYPE, SEW) \
CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF4, SEW) \
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index f9e69c60fdd8c0..e73e341419e758 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -3282,10 +3282,10 @@ multiclass VPseudoTernaryW_VV<LMULInfo m> {
constraint, /*Commutable*/ 0, TargetConstraintType=3>;
}
-multiclass VPseudoTernaryW_VV_RM<LMULInfo m> {
+multiclass VPseudoTernaryW_VV_RM<LMULInfo m, int sew = 0> {
defvar constraint = "@earlyclobber $rd";
defm _VV : VPseudoTernaryWithPolicyRoundingMode<m.wvrclass, m.vrclass, m.vrclass, m,
- constraint, /* Commutable */ 0,
+ constraint, sew, /* Commutable */ 0,
TargetConstraintType=3>;
}
@@ -3301,11 +3301,11 @@ multiclass VPseudoTernaryW_VF<LMULInfo m, FPR_Info f, int TargetConstraintType =
m.vrclass, m, constraint, /*Commutable*/ 0, TargetConstraintType>;
}
-multiclass VPseudoTernaryW_VF_RM<LMULInfo m, FPR_Info f> {
+multiclass VPseudoTernaryW_VF_RM<LMULInfo m, FPR_Info f, int sew = 0> {
defvar constraint = "@earlyclobber $rd";
defm "_V" # f.FX : VPseudoTernaryWithPolicyRoundingMode<m.wvrclass, f.fprclass,
m.vrclass, m, constraint,
- /* Commutable */ 0,
+ sew, /* Commutable */ 0,
TargetConstraintType=3>;
}
@@ -3391,16 +3391,17 @@ multiclass VPseudoVWMAC_VX {
multiclass VPseudoVWMAC_VV_VF_RM {
foreach m = MxListFW in {
- defm "" : VPseudoTernaryW_VV_RM<m>,
- SchedTernary<"WriteVFWMulAddV", "ReadVFWMulAddV",
- "ReadVFWMulAddV", "ReadVFWMulAddV", m.MX>;
+ foreach e = SchedSEWSet<m.MX, isF=1, isWidening=1>.val in
+ defm "" : VPseudoTernaryW_VV_RM<m, sew=e>,
+ SchedTernary<"WriteVFWMulAddV", "ReadVFWMulAddV",
+ "ReadVFWMulAddV", "ReadVFWMulAddV", m.MX, e>;
}
foreach f = FPListW in {
foreach m = f.MxListFW in {
- defm "" : VPseudoTernaryW_VF_RM<m, f>,
+ defm "" : VPseudoTernaryW_VF_RM<m, f, sew=f.SEW>,
SchedTernary<"WriteVFWMulAddF", "ReadVFWMulAddV",
- "ReadVFWMulAddF", "ReadVFWMulAddV", m.MX>;
+ "ReadVFWMulAddF", "ReadVFWMulAddV", m.MX, f.SEW>;
}
}
}
@@ -3408,17 +3409,18 @@ multiclass VPseudoVWMAC_VV_VF_RM {
multiclass VPseudoVWMAC_VV_VF_BF_RM {
foreach m = MxListFW in {
defvar mx = m.MX;
- defm "" : VPseudoTernaryW_VV_RM<m>,
- SchedTernary<"WriteVFWMulAddV", "ReadVFWMulAddV",
- "ReadVFWMulAddV", "ReadVFWMulAddV", mx>;
+ foreach e = SchedSEWSet<mx, isF=1, isWidening=1>.val in
+ defm "" : VPseudoTernaryW_VV_RM<m, sew=e>,
+ SchedTernary<"WriteVFWMulAddV", "ReadVFWMulAddV",
+ "ReadVFWMulAddV", "ReadVFWMulAddV", mx, e>;
}
foreach f = BFPListW in {
foreach m = f.MxListFW in {
defvar mx = m.MX;
- defm "" : VPseudoTernaryW_VF_RM<m, f>,
+ defm "" : VPseudoTernaryW_VF_RM<m, f, sew=f.SEW>,
SchedTernary<"WriteVFWMulAddF", "ReadVFWMulAddV",
- "ReadVFWMulAddF", "ReadVFWMulAddV", mx>;
+ "ReadVFWMulAddF", "ReadVFWMulAddV", mx, f.SEW>;
}
}
}
@@ -5698,7 +5700,7 @@ multiclass VPatTernaryW_VV<string intrinsic, string instruction,
}
multiclass VPatTernaryW_VV_RM<string intrinsic, string instruction,
- list<VTypeInfoToWide> vtilist> {
+ list<VTypeInfoToWide> vtilist, bit isSEWAware = 0> {
foreach vtiToWti = vtilist in {
defvar vti = vtiToWti.Vti;
defvar wti = vtiToWti.Wti;
@@ -5707,7 +5709,8 @@ multiclass VPatTernaryW_VV_RM<string intrinsic, string instruction,
defm : VPatTernaryWithPolicyRoundingMode<intrinsic, instruction, "VV",
wti.Vector, vti.Vector, vti.Vector,
vti.Mask, vti.Log2SEW, vti.LMul,
- wti.RegClass, vti.RegClass, vti.RegClass>;
+ wti.RegClass, vti.RegClass,
+ vti.RegClass, isSEWAware>;
}
}
@@ -5726,19 +5729,18 @@ multiclass VPatTernaryW_VX<string intrinsic, string instruction,
}
}
-multiclass VPatTernaryW_VX_RM<string intrinsic, string instruction,
- list<VTypeInfoToWide> vtilist> {
+multiclass
+ VPatTernaryW_VX_RM<string intrinsic, string instruction,
+ list<VTypeInfoToWide> vtilist, bit isSEWAware = 0> {
foreach vtiToWti = vtilist in {
defvar vti = vtiToWti.Vti;
defvar wti = vtiToWti.Wti;
let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
- GetVTypePredicates<wti>.Predicates) in
- defm : VPatTernaryWithPolicyRoundingMode<intrinsic, instruction,
- "V"#vti.ScalarSuffix,
- wti.Vector, vti.Scalar, vti.Vector,
- vti.Mask, vti.Log2SEW, vti.LMul,
- wti.RegClass, vti.ScalarRegClass,
- vti.RegClass>;
+ GetVTypePredicates<wti>.Predicates) in defm
+ : VPatTernaryWithPolicyRoundingMode<
+ intrinsic, instruction, "V" #vti.ScalarSuffix, wti.Vector,
+ vti.Scalar, vti.Vector, vti.Mask, vti.Log2SEW, vti.LMul,
+ wti.RegClass, vti.ScalarRegClass, vti.RegClass, isSEWAware>;
}
}
@@ -5770,9 +5772,9 @@ multiclass VPatTernaryW_VV_VX<string intrinsic, string instruction,
VPatTernaryW_VX<intrinsic, instruction, vtilist>;
multiclass VPatTernaryW_VV_VX_RM<string intrinsic, string instruction,
- list<VTypeInfoToWide> vtilist>
- : VPatTernaryW_VV_RM<intrinsic, instruction, vtilist>,
- VPatTernaryW_VX_RM<intrinsic, instruction, vtilist>;
+ list<VTypeInfoToWide> vtilist, bit isSEWAware = 1>
+ : VPatTernaryW_VV_RM<intrinsic, instruction, vtilist, isSEWAware>,
+ VPatTernaryW_VX_RM<intrinsic, instruction, vtilist, isSEWAware>;
multiclass VPatBinaryM_VV_VX<string intrinsic, string instruction,
list<VTypeInfo> vtilist>
@@ -7149,16 +7151,16 @@ defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfnmsub", "PseudoVFNMSUB",
// 13.7. Vector Widening Floating-Point Fused Multiply-Add Instructions
//===----------------------------------------------------------------------===//
defm : VPatTernaryW_VV_VX_RM<"int_riscv_vfwmacc", "PseudoVFWMACC",
- AllWidenableFloatVectors>;
+ AllWidenableFloatVectors, isSEWAware=1>;
defm : VPatTernaryW_VV_VX_RM<"int_riscv_vfwnmacc", "PseudoVFWNMACC",
- AllWidenableFloatVectors>;
+ AllWidenableFloatVectors, isSEWAware=1>;
defm : VPatTernaryW_VV_VX_RM<"int_riscv_vfwmsac", "PseudoVFWMSAC",
- AllWidenableFloatVectors>;
+ AllWidenableFloatVectors, isSEWAware=1>;
defm : VPatTernaryW_VV_VX_RM<"int_riscv_vfwnmsac", "PseudoVFWNMSAC",
- AllWidenableFloatVectors>;
+ AllWidenableFloatVectors, isSEWAware=1>;
let Predicates = [HasStdExtZvfbfwma] in
defm : VPatTernaryW_VV_VX_RM<"int_riscv_vfwmaccbf16", "PseudoVFWMACCBF16",
- AllWidenableBFloatToFloatVectors>;
+ AllWidenableBFloatToFloatVectors, isSEWAware=1>;
//===----------------------------------------------------------------------===//
// 13.8. Vector Floating-Point Square-Root Instruction
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
index 009cfd1599cd5f..6855647a50e132 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
@@ -706,6 +706,7 @@ multiclass VPatWidenFPMulAccSDNode_VV_VF_RM<string instruction_name> {
foreach vtiToWti = AllWidenableFloatVectors in {
defvar vti = vtiToWti.Vti;
defvar wti = vtiToWti.Wti;
+ defvar suffix = vti.LMul.MX # "_E" # vti.SEW;
let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
GetVTypePredicates<wti>.Predicates) in {
def : Pat<(fma (wti.Vector (riscv_fpextend_vl_oneuse
@@ -715,7 +716,7 @@ multiclass VPatWidenFPMulAccSDNode_VV_VF_RM<string instruction_name> {
(vti.Vector vti.RegClass:$rs2),
(vti.Mask true_mask), (XLenVT srcvalue))),
(wti.Vector wti.RegClass:$rd)),
- (!cast<Instruction>(instruction_name#"_VV_"#vti.LMul.MX)
+ (!cast<Instruction>(instruction_name#"_VV_"#suffix)
wti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
// Value to indicate no rounding mode change in
// RISCVInsertReadWriteCSR
@@ -727,7 +728,7 @@ multiclass VPatWidenFPMulAccSDNode_VV_VF_RM<string instruction_name> {
(vti.Vector vti.RegClass:$rs2),
(vti.Mask true_mask), (XLenVT srcvalue))),
(wti.Vector wti.RegClass:$rd)),
- (!cast<Instruction>(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
+ (!cast<Instruction>(instruction_name#"_V"#vti.ScalarSuffix#"_"#suffix)
wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
// Value to indicate no rounding mode change in
// RISCVInsertReadWriteCSR
@@ -741,6 +742,7 @@ multiclass VPatWidenFPNegMulAccSDNode_VV_VF_RM<string instruction_name> {
foreach vtiToWti = AllWidenableFloatVectors in {
defvar vti = vtiToWti.Vti;
defvar wti = vtiToWti.Wti;
+ defvar suffix = vti.LMul.MX # "_E" # vti.SEW;
let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
GetVTypePredicates<wti>.Predicates) in {
def : Pat<(fma (fneg (wti.Vector (riscv_fpextend_vl_oneuse
@@ -749,7 +751,7 @@ multiclass VPatWidenFPNegMulAccSDNode_VV_VF_RM<string instruction_name> {
(riscv_fpextend_vl_oneuse (vti.Vector vti.RegClass:$rs2),
(vti.Mask true_mask), (XLenVT srcvalue)),
(fneg wti.RegClass:$rd)),
- (!cast<Instruction>(instruction_name#"_VV_"#vti.LMul.MX)
+ (!cast<Instruction>(instruction_name#"_VV_"#suffix)
wti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
// Value to indicate no rounding mode change in
// RISCVInsertReadWriteCSR
@@ -760,7 +762,7 @@ multiclass VPatWidenFPNegMulAccSDNode_VV_VF_RM<string instruction_name> {
(vti.Vector vti.RegClass:$rs2),
(vti.Mask true_mask), (XLenVT srcvalue)))),
(fneg wti.RegClass:$rd)),
- (!cast<Instruction>(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
+ (!cast<Instruction>(instruction_name#"_V"#vti.ScalarSuffix#"_"#suffix)
wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
// Value to indicate no rounding mode change in
// RISCVInsertReadWriteCSR
@@ -770,7 +772,7 @@ multiclass VPatWidenFPNegMulAccSDNode_VV_VF_RM<string instruction_name> {
(riscv_fpextend_vl_oneuse (vti.Vector vti.RegClass:$rs2),
(vti.Mask true_mask), (XLenVT srcvalue)),
(fneg wti.RegClass:$rd)),
- (!cast<Instruction>(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
+ (!cast<Instruction>(instruction_name#"_V"#vti.ScalarSuffix#"_"#suffix)
wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
// Value to indicate no rounding mode change in
// RISCVInsertReadWriteCSR
@@ -784,6 +786,7 @@ multiclass VPatWidenFPMulSacSDNode_VV_VF_RM<string instruction_name> {
foreach vtiToWti = AllWidenableFloatVectors in {
defvar vti = vtiToWti.Vti;
defvar wti = vtiToWti.Wti;
+ defvar suffix = vti.LMul.MX # "_E" # vti.SEW;
let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
GetVTypePredicates<wti>.Predicates) in {
def : Pat<(fma (wti.Vector (riscv_fpextend_vl_oneuse
@@ -792,7 +795,7 @@ multiclass VPatWidenFPMulSacSDNode_VV_VF_RM<string instruction_name> {
(riscv_fpextend_vl_oneuse (vti.Vector vti.RegClass:$rs2),
(vti.Mask true_mask), (XLenVT srcvalue)),
(fneg wti.RegClass:$rd)),
- (!cast<Instruction>(instruction_name#"_VV_"#vti.LMul.MX)
+ (!cast<Instruction>(instruction_name#"_VV_"#suffix)
wti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
// Value to indicate no rounding mode change in
// RISCVInsertReadWriteCSR
@@ -802,7 +805,7 @@ multiclass VPatWidenFPMulSacSDNode_VV_VF_RM<string instruction_name> {
(riscv_fpextend_vl_oneuse (vti.Vector vti.RegClass:$rs2),
(vti.Mask true_mask), (XLenVT srcvalue)),
(fneg wti.RegClass:$rd)),
- (!cast<Instruction>(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
+ (!cast<Instruction>(instruction_name#"_V"#vti.ScalarSuffix#"_"#suffix)
wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
// Value to indicate no rounding mode change in
// RISCVInsertReadWriteCSR
@@ -816,6 +819,7 @@ multiclass VPatWidenFPNegMulSacSDNode_VV_VF_RM<string instruction_name> {
foreach vtiToWti = AllWidenableFloatVectors in {
defvar vti = vtiToWti.Vti;
defvar wti = vtiToWti.Wti;
+ defvar suffix = vti.LMul.MX # "_E" # vti.SEW;
let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
GetVTypePredicates<wti>.Predicates) in {
def : Pat<(fma (fneg (wti.Vector (riscv_fpextend_vl_oneuse
@@ -824,7 +828,7 @@ multiclass VPatWidenFPNegMulSacSDNode_VV_VF_RM<string instruction_name> {
(riscv_fpextend_vl_oneuse (vti.Vector vti.RegClass:$rs2),
(vti.Mask true_mask), (XLenVT srcvalue)),
wti.RegClass:$rd),
- (!cast<Instruction>(instruction_name#"_VV_"#vti.LMul.MX)
+ (!cast<Instruction>(instruction_name#"_VV_"#suffix)
wti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
// Value to indicate no rounding mode change in
// RISCVInsertReadWriteCSR
@@ -835,7 +839,7 @@ multiclass VPatWidenFPNegMulSacSDNode_VV_VF_RM<string instruction_name> {
(vti.Vector vti.RegClass:$rs2),
(vti.Mask true_mask), (XLenVT srcvalue)))),
wti.RegClass:$rd),
- (!cast<Instruction>(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
+ (!cast<Instruction>(instruction_name#"_V"#vti.ScalarSuffix#"_"#suffix)
wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
// Value to indicate no rounding mode change in
// RISCVInsertReadWriteCSR
@@ -845,7 +849,7 @@ multiclass VPatWidenFPNegMulSacSDNode_VV_VF_RM<string instruction_name> {
(riscv_fpextend_vl_oneuse (vti.Vector vti.RegClass:$rs2),
(vti.Mask true_mask), (XLenVT srcvalue)),
wti.RegClass:$rd),
- (!cast<Instruction>(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
+ (!cast<Instruction>(instruction_name#"_V"#vti.ScalarSuffix#"_"#suffix)
wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
// Value to indicate no rounding mode change in
// RISCVInsertReadWriteCSR
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index d00608d0342cc1..d76c3a5857c6fc 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -1994,13 +1994,14 @@ multiclass VPatWidenFPMulAccVL_VV_VF_RM<SDNode vop, string instruction_name> {
foreach vtiToWti = AllWidenableFloatVectors in {
defvar vti = vtiToWti.Vti;
defvar wti = vtiToWti.Wti;
+ defvar suffix = vti.LMul.MX # "_E" # vti.SEW;
let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
GetVTypePredicates<wti>.Predicates) in {
def : Pat<(vop (vti.Vector vti.RegClass:$rs1),
(vti.Vector vti.RegClass:$rs2),
(wti.Vector wti.RegClass:$rd), (vti.Mask V0),
VLOpFrag),
- (!cast<Instruction>(instruction_name#"_VV_"#vti.LMul.MX #"_MASK")
+ (!cast<Instruction>(instruction_name#"_VV_"#suffix#"_MASK")
wti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
(vti.Mask V0),
// Value to indicate no rounding mode change in
@@ -2011,7 +2012,7 @@ multiclass VPatWidenFPMulAccVL_VV_VF_RM<SDNode vop, string instruction_name> {
(vti.Vector vti.RegClass:$rs2),
(wti.Vector wti.RegClass:$rd), (vti.Mask V0),
VLOpFrag),
- (!cast<Instruction>(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX #"_MASK")
+ (!cast<Instruction>(instruction_name#"_V"#vti.ScalarSuffix#"_"#suffix#"_MASK")
wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
(vti.Mask V0),
// Value to indicate no rounding mode change in
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
index 16bfb509e08565..1dd4afa4ee48b1 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
@@ -799,6 +799,8 @@ foreach mx = SchedMxListFW in {
defm "" : LMULSEWWriteResMXSEW<"WriteVFWALUF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulAddV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulAddF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
}
}
}
@@ -806,10 +808,8 @@ foreach mx = SchedMxListFW in {
defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListFW>.c;
let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
- defm "" : LMULWriteResMX<"WriteVFWMulAddV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVFWCvtFToIV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVFWCvtFToFV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFWMulAddF", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
}
}
// Narrowing
@@ -1165,8 +1165,8 @@ defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulV", 0>;
defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulF", 0>;
defm "" : LMULSEWReadAdvanceF<"ReadVFMulAddV", 0>;
defm "" : LMULSEWReadAdvanceF<"ReadVFMulAddF", 0>;
-defm "" : LMULReadAdvanceFW<"ReadVFWMulAddV", 0>;
-defm "" : LMULReadAdvanceFW<"ReadVFWMulAddF", 0>;
+defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulAddV", 0>;
+defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulAddF", 0>;
defm "" : LMULSEWReadAdvanceF<"ReadVFSqrtV", 0>;
defm "" : LMULReadAdvance<"ReadVFRecpV", 0>;
defm "" : LMULReadAdvance<"ReadVFMinMaxV", 0>;
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td
index 0896f3358d230b..f63dcf20e683d9 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td
@@ -558,17 +558,11 @@ foreach mx = SchedMxListFW in {
defm "" : LMULSEWWriteResMXSEW<"WriteVFWALUF", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulF", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulAddV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFWMulAddF", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
}
}
}
-foreach mx = SchedMxListFW in {
- defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c;
- defvar IsWorstCase = SiFiveP600IsWorstCaseMX<mx, SchedMxListFW>.c;
- let Latency = 6, ReleaseAtCycles = [LMulLat] in {
- defm "" : LMULWriteResMX<"WriteVFWMulAddV", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFWMulAddF", [SiFiveP600VectorArith], mx, IsWorstCase>;
- }
-}
// Narrowing
foreach mx = SchedMxListW in {
defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c;
@@ -961,8 +955,8 @@ defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulV", 0>;
defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulF", 0>;
defm "" : LMULSEWReadAdvanceF<"ReadVFMulAddV", 0>;
defm "" : LMULSEWReadAdvanceF<"ReadVFMulAddF", 0>;
-defm "" : LMULReadAdvanceFW<"ReadVFWMulAddV", 0>;
-defm "" : LMULReadAdvanceFW<"ReadVFWMulAddF", 0>;
+defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulAddV", 0>;
+defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulAddF", 0>;
defm "" : LMULSEWReadAdvanceF<"ReadVFSqrtV", 0>;
defm "" : LMULReadAdvance<"ReadVFRecpV", 0>;
defm "" : LMULReadAdvance<"ReadVFCmpV", 0>;
diff --git a/llvm/lib/Target/RISCV/RISCVScheduleV.td b/llvm/lib/Target/RISCV/RISCVScheduleV.td
index 40090d1db88748..b82e86e9fcd583 100644
--- a/llvm/lib/Target/RISCV/RISCVScheduleV.td
+++ b/llvm/lib/Target/RISCV/RISCVScheduleV.td
@@ -426,8 +426,8 @@ defm "" : LMULSEWSchedWritesFW<"WriteVFWMulF">;
defm "" : LMULSEWSchedWritesF<"WriteVFMulAddV">;
defm "" : LMULSEWSchedWritesF<"WriteVFMulAddF">;
// 13.7. Vector Widening Floating-Point Fused Multiply-Add Instructions
-defm "" : LMULSchedWritesFW<"WriteVFWMulAddV">;
-defm "" : LMULSchedWritesFW<"WriteVFWMulAddF">;
+defm "" : LMULSEWSchedWritesFW<"WriteVFWMulAddV">;
+defm "" : LMULSEWSchedWritesFW<"WriteVFWMulAddF">;
// 13.8. Vector Floating-Point Square-Root Instruction
defm "" : LMULSEWSchedWritesF<"WriteVFSqrtV">;
// 13.9. Vector Floating-Point Reciprocal Square-Root Estimate Instruction
@@ -651,8 +651,8 @@ defm "" : LMULSEWSchedReadsFW<"ReadVFWMulF">;
defm "" : LMULSEWSchedReadsF<"ReadVFMulAddV">;
defm "" : LMULSEWSchedReadsF<"ReadVFMulAddF">;
// 13.7. Vector Widening Floating-Point Fused Multiply-Add Instructions
-defm "" : LMULSchedReadsFW<"ReadVFWMulAddV">;
-defm "" : LMULSchedReadsFW<"ReadVFWMulAddF">;
+defm "" : LMULSEWSchedReadsFW<"ReadVFWMulAddV">;
+defm "" : LMULSEWSchedReadsFW<"ReadVFWMulAddF">;
// 13.8. Vector Floating-Point Square-Root Instruction
defm "" : LMULSEWSchedReadsF<"ReadVFSqrtV">;
// 13.9. Vector Floating-Point Reciprocal Square-Root Estimate Instruction
@@ -892,8 +892,8 @@ defm "" : LMULSEWWriteResFW<"WriteVFWMulV", []>;
defm "" : LMULSEWWriteResFW<"WriteVFWMulF", []>;
defm "" : LMULSEWWriteResF<"WriteVFMulAddV", []>;
defm "" : LMULSEWWriteResF<"WriteVFMulAddF", []>;
-defm "" : LMULWriteResFW<"WriteVFWMulAddV", []>;
-defm "" : LMULWriteResFW<"WriteVFWMulAddF", []>;
+defm "" : LMULSEWWriteResFW<"WriteVFWMulAddV", []>;
+defm "" : LMULSEWWriteResFW<"WriteVFWMulAddF", []>;
defm "" : LMULSEWWriteResF<"WriteVFSqrtV", []>;
defm "" : LMULWriteRes<"WriteVFRecpV", []>;
defm "" : LMULWriteRes<"WriteVFMinMaxV", []>;
@@ -1048,8 +1048,8 @@ defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulV", 0>;
defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulF", 0>;
defm "" : LMULSEWReadAdvanceF<"ReadVFMulAddV", 0>;
defm "" : LMULSEWReadAdvanceF<"ReadVFMulAddF", 0>;
-defm "" : LMULReadAdvanceFW<"ReadVFWMulAddV", 0>;
-defm "" : LMULReadAdvanceFW<"ReadVFWMulAddF", 0>;
+defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulAddV", 0>;
+defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulAddF", 0>;
defm "" : LMULSEWReadAdvanceF<"ReadVFSqrtV", 0>;
defm "" : LMULReadAdvance<"ReadVFRecpV", 0>;
defm "" : LMULReadAdvance<"ReadVFMinMaxV", 0>;
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmf.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmf.ll
index 1c1440c3e6d0ed..a4851e9838fbfb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmf.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmf.ll
@@ -9,8 +9,8 @@ define <2 x double> @foo(<2 x double> %x, <2 x double> %y) {
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v9
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vr = COPY $v8
- ; CHECK-NEXT: [[PseudoVFADD_VV_M1_:%[0-9]+]]:vr = nnan ninf nsz arcp contract afn reassoc nofpexcept PseudoVFADD_VV_M1 $noreg, [[COPY1]], [[COPY]], 7, 2, 6 /* e64 */, 1 /* ta, mu */, implicit $frm
- ; CHECK-NEXT: $v8 = COPY [[PseudoVFADD_VV_M1_]]
+ ; CHECK-NEXT: [[PseudoVFADD_VV_M1_E64_:%[0-9]+]]:vr = nnan ninf nsz arcp contract afn reassoc nofpexcept PseudoVFADD_VV_M1_E64 $noreg, [[COPY1]], [[COPY]], 7, 2, 6 /* e64 */, 1 /* ta, mu */, implicit $frm
+ ; CHECK-NEXT: $v8 = COPY [[PseudoVFADD_VV_M1_E64_]]
; CHECK-NEXT: PseudoRET implicit $v8
%1 = fadd fast <2 x double> %x, %y
ret <2 x double> %1
diff --git a/llvm/test/CodeGen/RISCV/rvv/pass-fast-math-flags-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/pass-fast-math-flags-sdnode.ll
index 1ad8e2d66392a4..8457f3d2c149c1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/pass-fast-math-flags-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/pass-fast-math-flags-sdnode.ll
@@ -15,8 +15,8 @@ define <vscale x 1 x double> @foo(<vscale x 1 x double> %x, <vscale x 1 x double
; CHECK-NEXT: [[SLLI:%[0-9]+]]:gpr = SLLI [[COPY]], 32
; CHECK-NEXT: [[SRLI:%[0-9]+]]:gprnox0 = SRLI killed [[SLLI]], 32
; CHECK-NEXT: $v0 = COPY [[COPY1]]
- ; CHECK-NEXT: [[PseudoVFMUL_VV_M1_MASK:%[0-9]+]]:vrnov0 = nnan ninf nsz arcp contract afn reassoc nofpexcept PseudoVFMUL_VV_M1_MASK $noreg, [[COPY3]], [[COPY2]], $v0, 7, killed [[SRLI]], 6 /* e64 */, 1 /* ta, mu */, implicit $frm
- ; CHECK-NEXT: $v8 = COPY [[PseudoVFMUL_VV_M1_MASK]]
+ ; CHECK-NEXT: [[PseudoVFMUL_VV_M1_E64_MASK:%[0-9]+]]:vrnov0 = nnan ninf nsz arcp contract afn reassoc nofpexcept PseudoVFMUL_VV_M1_E64_MASK $noreg, [[COPY3]], [[COPY2]], $v0, 7, killed [[SRLI]], 6 /* e64 */, 1 /* ta, mu */, implicit $frm
+ ; CHECK-NEXT: $v8 = COPY [[PseudoVFMUL_VV_M1_E64_MASK]]
; CHECK-NEXT: PseudoRET implicit $v8
%1 = call fast <vscale x 1 x double> @llvm.vp.fmul.nxv1f64(<vscale x 1 x double> %x, <vscale x 1 x double> %y, <vscale x 1 x i1> %m, i32 %vl)
ret <vscale x 1 x double> %1
>From 6bc375a96fc071cf76210fa65b2cd548f07c8d64 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Thu, 4 Apr 2024 12:16:39 -0700
Subject: [PATCH 6/6] [RISCV] Split PseudoVFRSQRT7 and PseudoVFREC7 by SEW
Co-authored-by: Wang Pengcheng <wangpengcheng.pp at bytedance.com>
---
.../Target/RISCV/RISCVInstrInfoVPseudos.td | 62 +++++++++++--------
llvm/lib/Target/RISCV/RISCVSchedSiFive7.td | 4 +-
llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td | 11 +++-
llvm/lib/Target/RISCV/RISCVScheduleV.td | 8 +--
4 files changed, 52 insertions(+), 33 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index e73e341419e758..ab75cd4ae18e12 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -2572,14 +2572,16 @@ multiclass VPseudoVSQR_V_RM {
multiclass VPseudoVRCP_V {
foreach m = MxListF in {
defvar mx = m.MX;
- let VLMul = m.value in {
- def "_V_" # mx
- : VPseudoUnaryNoMask<m.vrclass, m.vrclass>,
- SchedUnary<"WriteVFRecpV", "ReadVFRecpV", mx, forceMergeOpRead=true>;
- def "_V_" # mx # "_MASK"
- : VPseudoUnaryMask<m.vrclass, m.vrclass>,
- RISCVMaskedPseudo<MaskIdx = 2>,
- SchedUnary<"WriteVFRecpV", "ReadVFRecpV", mx, forceMergeOpRead=true>;
+ foreach e = SchedSEWSet<mx, isF=1>.val in {
+ let VLMul = m.value in {
+ def "_V_" # mx # "_E" # e
+ : VPseudoUnaryNoMask<m.vrclass, m.vrclass>,
+ SchedUnary<"WriteVFRecpV", "ReadVFRecpV", mx, e, forceMergeOpRead=true>;
+ def "_V_" # mx # "_E" # e # "_MASK"
+ : VPseudoUnaryMask<m.vrclass, m.vrclass>,
+ RISCVMaskedPseudo<MaskIdx = 2>,
+ SchedUnary<"WriteVFRecpV", "ReadVFRecpV", mx, e, forceMergeOpRead=true>;
+ }
}
}
}
@@ -2587,14 +2589,16 @@ multiclass VPseudoVRCP_V {
multiclass VPseudoVRCP_V_RM {
foreach m = MxListF in {
defvar mx = m.MX;
- let VLMul = m.value in {
- def "_V_" # mx
- : VPseudoUnaryNoMaskRoundingMode<m.vrclass, m.vrclass>,
- SchedUnary<"WriteVFRecpV", "ReadVFRecpV", mx, forceMergeOpRead=true>;
- def "_V_" # mx # "_MASK"
- : VPseudoUnaryMaskRoundingMode<m.vrclass, m.vrclass>,
- RISCVMaskedPseudo<MaskIdx = 2>,
- SchedUnary<"WriteVFRecpV", "ReadVFRecpV", mx, forceMergeOpRead=true>;
+ foreach e = SchedSEWSet<mx, isF=1>.val in {
+ let VLMul = m.value in {
+ def "_V_" # mx # "_E" # e
+ : VPseudoUnaryNoMaskRoundingMode<m.vrclass, m.vrclass>,
+ SchedUnary<"WriteVFRecpV", "ReadVFRecpV", mx, e, forceMergeOpRead=true>;
+ def "_V_" # mx # "_E" # e # "_MASK"
+ : VPseudoUnaryMaskRoundingMode<m.vrclass, m.vrclass>,
+ RISCVMaskedPseudo<MaskIdx = 2>,
+ SchedUnary<"WriteVFRecpV", "ReadVFRecpV", mx, e, forceMergeOpRead=true>;
+ }
}
}
}
@@ -3934,12 +3938,16 @@ class VPatUnaryNoMask<string intrinsic_name,
int log2sew,
LMULInfo vlmul,
VReg result_reg_class,
- VReg op2_reg_class> :
+ VReg op2_reg_class,
+ bit isSEWAware = 0> :
Pat<(result_type (!cast<Intrinsic>(intrinsic_name)
(result_type result_reg_class:$merge),
(op2_type op2_reg_class:$rs2),
VLOpFrag)),
- (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX)
+ (!cast<Instruction>(
+ !if(isSEWAware,
+ inst#"_"#kind#"_"#vlmul.MX#"_E"#!shl(1, log2sew),
+ inst#"_"#kind#"_"#vlmul.MX))
(result_type result_reg_class:$merge),
(op2_type op2_reg_class:$rs2),
GPR:$vl, log2sew, TU_MU)>;
@@ -3978,13 +3986,17 @@ class VPatUnaryMask<string intrinsic_name,
int log2sew,
LMULInfo vlmul,
VReg result_reg_class,
- VReg op2_reg_class> :
+ VReg op2_reg_class,
+ bit isSEWAware = 0> :
Pat<(result_type (!cast<Intrinsic>(intrinsic_name#"_mask")
(result_type result_reg_class:$merge),
(op2_type op2_reg_class:$rs2),
(mask_type V0),
VLOpFrag, (XLenVT timm:$policy))),
- (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX#"_MASK")
+ (!cast<Instruction>(
+ !if(isSEWAware,
+ inst#"_"#kind#"_"#vlmul.MX#"_E"#!shl(1, log2sew)#"_MASK",
+ inst#"_"#kind#"_"#vlmul.MX#"_MASK"))
(result_type result_reg_class:$merge),
(op2_type op2_reg_class:$rs2),
(mask_type V0), GPR:$vl, log2sew, (XLenVT timm:$policy))>;
@@ -4676,15 +4688,15 @@ multiclass VPatUnaryV_VF<string intrinsic, string instruction, string suffix,
}
multiclass VPatUnaryV_V<string intrinsic, string instruction,
- list<VTypeInfo> vtilist> {
+ list<VTypeInfo> vtilist, bit isSEWAware = 0> {
foreach vti = vtilist in {
let Predicates = GetVTypePredicates<vti>.Predicates in {
def : VPatUnaryNoMask<intrinsic, instruction, "V",
vti.Vector, vti.Vector, vti.Log2SEW,
- vti.LMul, vti.RegClass, vti.RegClass>;
+ vti.LMul, vti.RegClass, vti.RegClass, isSEWAware>;
def : VPatUnaryMask<intrinsic, instruction, "V",
vti.Vector, vti.Vector, vti.Mask, vti.Log2SEW,
- vti.LMul, vti.RegClass, vti.RegClass>;
+ vti.LMul, vti.RegClass, vti.RegClass, isSEWAware>;
}
}
}
@@ -7170,12 +7182,12 @@ defm : VPatUnaryV_V_RM<"int_riscv_vfsqrt", "PseudoVFSQRT", AllFloatVectors, isSE
//===----------------------------------------------------------------------===//
// 13.9. Vector Floating-Point Reciprocal Square-Root Estimate Instruction
//===----------------------------------------------------------------------===//
-defm : VPatUnaryV_V<"int_riscv_vfrsqrt7", "PseudoVFRSQRT7", AllFloatVectors>;
+defm : VPatUnaryV_V<"int_riscv_vfrsqrt7", "PseudoVFRSQRT7", AllFloatVectors, isSEWAware=1>;
//===----------------------------------------------------------------------===//
// 13.10. Vector Floating-Point Reciprocal Estimate Instruction
//===----------------------------------------------------------------------===//
-defm : VPatUnaryV_V_RM<"int_riscv_vfrec7", "PseudoVFREC7", AllFloatVectors>;
+defm : VPatUnaryV_V_RM<"int_riscv_vfrec7", "PseudoVFREC7", AllFloatVectors, isSEWAware=1>;
//===----------------------------------------------------------------------===//
// 13.11. Vector Floating-Point Min/Max Instructions
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
index 1dd4afa4ee48b1..2a13cb4beac14e 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
@@ -743,6 +743,7 @@ foreach mx = SchedMxListF in {
defm "" : LMULSEWWriteResMXSEW<"WriteVFMulF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVFMulAddV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
defm "" : LMULSEWWriteResMXSEW<"WriteVFMulAddF", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFRecpV", [SiFive7VCQ, SiFive7VA], mx, sew, IsWorstCase>;
}
}
}
@@ -750,7 +751,6 @@ foreach mx = SchedMxList in {
defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
let Latency = 8, AcquireAtCycles = [0, 1], ReleaseAtCycles = [1, !add(1, Cycles)] in {
- defm "" : LMULWriteResMX<"WriteVFRecpV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVFCvtIToFV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVFCvtFToIV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
}
@@ -1168,7 +1168,7 @@ defm "" : LMULSEWReadAdvanceF<"ReadVFMulAddF", 0>;
defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulAddV", 0>;
defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulAddF", 0>;
defm "" : LMULSEWReadAdvanceF<"ReadVFSqrtV", 0>;
-defm "" : LMULReadAdvance<"ReadVFRecpV", 0>;
+defm "" : LMULSEWReadAdvanceF<"ReadVFRecpV", 0>;
defm "" : LMULReadAdvance<"ReadVFMinMaxV", 0>;
defm "" : LMULReadAdvance<"ReadVFMinMaxF", 0>;
defm "" : LMULReadAdvance<"ReadVFSgnjV", 0>;
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td
index f63dcf20e683d9..80090a0027149b 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td
@@ -510,6 +510,14 @@ foreach mx = SchedMxListF in {
}
}
}
+foreach mx = SchedMxListF in {
+ foreach sew = SchedSEWSet<mx, isF=1>.val in {
+ defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c;
+ defvar IsWorstCase = SiFiveP600IsWorstCaseMXSEW<mx, sew, SchedMxListF, isF=1>.c;
+ let Latency = 2, ReleaseAtCycles = [LMulLat] in
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFRecpV", [SiFiveP600VectorArith], mx, sew, IsWorstCase>;
+ }
+}
foreach mx = SchedMxList in {
defvar LMulLat = SiFiveP600GetLMulCycles<mx>.c;
defvar IsWorstCase = SiFiveP600IsWorstCaseMX<mx, SchedMxList>.c;
@@ -520,7 +528,6 @@ foreach mx = SchedMxList in {
let Latency = 2, ReleaseAtCycles = [LMulLat] in {
defm "" : LMULWriteResMX<"WriteVFCmpV", [SiFiveP600VectorArith], mx, IsWorstCase>;
defm "" : LMULWriteResMX<"WriteVFCmpF", [SiFiveP600VectorArith], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVFRecpV", [SiFiveP600VectorArith], mx, IsWorstCase>;
}
let Latency = 1, ReleaseAtCycles = [LMulLat] in {
defm "" : LMULWriteResMX<"WriteVFSgnjV", [SiFiveP600VectorArith], mx, IsWorstCase>;
@@ -958,7 +965,7 @@ defm "" : LMULSEWReadAdvanceF<"ReadVFMulAddF", 0>;
defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulAddV", 0>;
defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulAddF", 0>;
defm "" : LMULSEWReadAdvanceF<"ReadVFSqrtV", 0>;
-defm "" : LMULReadAdvance<"ReadVFRecpV", 0>;
+defm "" : LMULSEWReadAdvance<"ReadVFRecpV", 0>;
defm "" : LMULReadAdvance<"ReadVFCmpV", 0>;
defm "" : LMULReadAdvance<"ReadVFCmpF", 0>;
defm "" : LMULReadAdvance<"ReadVFMinMaxV", 0>;
diff --git a/llvm/lib/Target/RISCV/RISCVScheduleV.td b/llvm/lib/Target/RISCV/RISCVScheduleV.td
index b82e86e9fcd583..6070482ea17576 100644
--- a/llvm/lib/Target/RISCV/RISCVScheduleV.td
+++ b/llvm/lib/Target/RISCV/RISCVScheduleV.td
@@ -432,7 +432,7 @@ defm "" : LMULSEWSchedWritesFW<"WriteVFWMulAddF">;
defm "" : LMULSEWSchedWritesF<"WriteVFSqrtV">;
// 13.9. Vector Floating-Point Reciprocal Square-Root Estimate Instruction
// 13.10. Vector Floating-Point Reciprocal Estimate Instruction
-defm "" : LMULSchedWrites<"WriteVFRecpV">;
+defm "" : LMULSEWSchedWritesF<"WriteVFRecpV">;
// 13.11. Vector Floating-Point MIN/MAX Instructions
defm "" : LMULSchedWrites<"WriteVFMinMaxV">;
defm "" : LMULSchedWrites<"WriteVFMinMaxF">;
@@ -657,7 +657,7 @@ defm "" : LMULSEWSchedReadsFW<"ReadVFWMulAddF">;
defm "" : LMULSEWSchedReadsF<"ReadVFSqrtV">;
// 13.9. Vector Floating-Point Reciprocal Square-Root Estimate Instruction
// 13.10. Vector Floating-Point Reciprocal Estimate Instruction
-defm "" : LMULSchedReads<"ReadVFRecpV">;
+defm "" : LMULSEWSchedReadsF<"ReadVFRecpV">;
// 13.11. Vector Floating-Point MIN/MAX Instructions
defm "" : LMULSchedReads<"ReadVFMinMaxV">;
defm "" : LMULSchedReads<"ReadVFMinMaxF">;
@@ -895,7 +895,7 @@ defm "" : LMULSEWWriteResF<"WriteVFMulAddF", []>;
defm "" : LMULSEWWriteResFW<"WriteVFWMulAddV", []>;
defm "" : LMULSEWWriteResFW<"WriteVFWMulAddF", []>;
defm "" : LMULSEWWriteResF<"WriteVFSqrtV", []>;
-defm "" : LMULWriteRes<"WriteVFRecpV", []>;
+defm "" : LMULSEWWriteResF<"WriteVFRecpV", []>;
defm "" : LMULWriteRes<"WriteVFMinMaxV", []>;
defm "" : LMULWriteRes<"WriteVFMinMaxF", []>;
defm "" : LMULWriteRes<"WriteVFSgnjV", []>;
@@ -1051,7 +1051,7 @@ defm "" : LMULSEWReadAdvanceF<"ReadVFMulAddF", 0>;
defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulAddV", 0>;
defm "" : LMULSEWReadAdvanceFW<"ReadVFWMulAddF", 0>;
defm "" : LMULSEWReadAdvanceF<"ReadVFSqrtV", 0>;
-defm "" : LMULReadAdvance<"ReadVFRecpV", 0>;
+defm "" : LMULSEWReadAdvanceF<"ReadVFRecpV", 0>;
defm "" : LMULReadAdvance<"ReadVFMinMaxV", 0>;
defm "" : LMULReadAdvance<"ReadVFMinMaxF", 0>;
defm "" : LMULReadAdvance<"ReadVFSgnjV", 0>;
More information about the llvm-commits
mailing list