[llvm] [RISCV][VLOPT] Add support for Single-Width Floating-Point Fused Multiply-Add Instructions (PR #125652)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 4 00:50:59 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Luke Lau (lukel97)
<details>
<summary>Changes</summary>
These instructions have EEW=SEW for all operands
~Strangely enough we already had support for widening vfwmadd and friends, but not vfmadd yet.~
Nevermind, we only had op info support!
---
Full diff: https://github.com/llvm/llvm-project/pull/125652.diff
2 Files Affected:
- (modified) llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp (+34)
- (modified) llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll (+400)
``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index 0960245b8362d8..d4829bced24709 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -451,6 +451,23 @@ getOperandLog2EEW(const MachineOperand &MO, const MachineRegisterInfo *MRI) {
case RISCV::VFDIV_VF:
case RISCV::VFDIV_VV:
case RISCV::VFRDIV_VF:
+ // Vector Single-Width Floating-Point Fused Multiply-Add Instructions
+ case RISCV::VFMACC_VV:
+ case RISCV::VFMACC_VF:
+ case RISCV::VFNMACC_VV:
+ case RISCV::VFNMACC_VF:
+ case RISCV::VFMSAC_VV:
+ case RISCV::VFMSAC_VF:
+ case RISCV::VFNMSAC_VV:
+ case RISCV::VFNMSAC_VF:
+ case RISCV::VFMADD_VV:
+ case RISCV::VFMADD_VF:
+ case RISCV::VFNMADD_VV:
+ case RISCV::VFNMADD_VF:
+ case RISCV::VFMSUB_VV:
+ case RISCV::VFMSUB_VF:
+ case RISCV::VFNMSUB_VV:
+ case RISCV::VFNMSUB_VF:
// Vector Floating-Point Square-Root Instruction
case RISCV::VFSQRT_V:
// Vector Floating-Point Reciprocal Square-Root Estimate Instruction
@@ -1016,6 +1033,23 @@ static bool isSupportedInstr(const MachineInstr &MI) {
// Vector Widening Floating-Point Multiply
case RISCV::VFWMUL_VF:
case RISCV::VFWMUL_VV:
+ // Vector Single-Width Floating-Point Fused Multiply-Add Instructions
+ case RISCV::VFMACC_VV:
+ case RISCV::VFMACC_VF:
+ case RISCV::VFNMACC_VV:
+ case RISCV::VFNMACC_VF:
+ case RISCV::VFMSAC_VV:
+ case RISCV::VFMSAC_VF:
+ case RISCV::VFNMSAC_VV:
+ case RISCV::VFNMSAC_VF:
+ case RISCV::VFMADD_VV:
+ case RISCV::VFMADD_VF:
+ case RISCV::VFNMADD_VV:
+ case RISCV::VFNMADD_VF:
+ case RISCV::VFMSUB_VV:
+ case RISCV::VFMSUB_VF:
+ case RISCV::VFNMSUB_VV:
+ case RISCV::VFNMSUB_VF:
// Vector Floating-Point MIN/MAX Instructions
case RISCV::VFMIN_VF:
case RISCV::VFMIN_VV:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
index 163166c1df7434..b2166da42a5c92 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
@@ -4031,3 +4031,403 @@ define <vscale x 4 x float> @vfsgnjx_vf(<vscale x 4 x float> %a, float %b, iXLen
%2 = call <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %a, iXLen 7, iXLen %vl)
ret <vscale x 4 x float> %2
}
+
+define <vscale x 4 x float> @vfmacc_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfmacc_vv:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: fsrmi a1, 3
+; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT: vfmacc.vv v8, v10, v12
+; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT: vfadd.vv v8, v8, v12
+; NOVLOPT-NEXT: fsrm a1
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vfmacc_vv:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: fsrmi a1, 3
+; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT: vfmacc.vv v8, v10, v12
+; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vfadd.vv v8, v8, v12
+; VLOPT-NEXT: fsrm a1
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x float> @llvm.riscv.vfmacc(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen 3, iXLen -1, iXLen 0)
+ %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 3, iXLen %vl)
+ ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfmacc_vf(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfmacc_vf:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: fsrmi a1, 3
+; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT: vfmacc.vf v8, fa0, v10
+; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT: vfadd.vv v8, v8, v10
+; NOVLOPT-NEXT: fsrm a1
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vfmacc_vf:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: fsrmi a1, 3
+; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT: vfmacc.vf v8, fa0, v10
+; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vfadd.vv v8, v8, v10
+; VLOPT-NEXT: fsrm a1
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x float> @llvm.riscv.vfmacc(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen 3, iXLen -1, iXLen 0)
+ %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 3, iXLen %vl)
+ ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfnmacc_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfnmacc_vv:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: fsrmi a1, 3
+; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT: vfnmacc.vv v8, v10, v12
+; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT: vfadd.vv v8, v8, v12
+; NOVLOPT-NEXT: fsrm a1
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vfnmacc_vv:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: fsrmi a1, 3
+; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT: vfnmacc.vv v8, v10, v12
+; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vfadd.vv v8, v8, v12
+; VLOPT-NEXT: fsrm a1
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x float> @llvm.riscv.vfnmacc(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen 3, iXLen -1, iXLen 0)
+ %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 3, iXLen %vl)
+ ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfnmacc_vf(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfnmacc_vf:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: fsrmi a1, 3
+; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT: vfnmacc.vf v8, fa0, v10
+; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT: vfadd.vv v8, v8, v10
+; NOVLOPT-NEXT: fsrm a1
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vfnmacc_vf:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: fsrmi a1, 3
+; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT: vfnmacc.vf v8, fa0, v10
+; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vfadd.vv v8, v8, v10
+; VLOPT-NEXT: fsrm a1
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x float> @llvm.riscv.vfnmacc(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen 3, iXLen -1, iXLen 0)
+ %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 3, iXLen %vl)
+ ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfmsac_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfmsac_vv:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: fsrmi a1, 3
+; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT: vfmsac.vv v8, v10, v12
+; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT: vfadd.vv v8, v8, v12
+; NOVLOPT-NEXT: fsrm a1
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vfmsac_vv:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: fsrmi a1, 3
+; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT: vfmsac.vv v8, v10, v12
+; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vfadd.vv v8, v8, v12
+; VLOPT-NEXT: fsrm a1
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x float> @llvm.riscv.vfmsac(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen 3, iXLen -1, iXLen 0)
+ %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 3, iXLen %vl)
+ ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfmsac_vf(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfmsac_vf:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: fsrmi a1, 3
+; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT: vfmsac.vf v8, fa0, v10
+; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT: vfadd.vv v8, v8, v10
+; NOVLOPT-NEXT: fsrm a1
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vfmsac_vf:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: fsrmi a1, 3
+; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT: vfmsac.vf v8, fa0, v10
+; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vfadd.vv v8, v8, v10
+; VLOPT-NEXT: fsrm a1
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x float> @llvm.riscv.vfmsac(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen 3, iXLen -1, iXLen 0)
+ %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 3, iXLen %vl)
+ ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfnmsac_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfnmsac_vv:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: fsrmi a1, 3
+; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT: vfnmsac.vv v8, v10, v12
+; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT: vfadd.vv v8, v8, v12
+; NOVLOPT-NEXT: fsrm a1
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vfnmsac_vv:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: fsrmi a1, 3
+; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT: vfnmsac.vv v8, v10, v12
+; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vfadd.vv v8, v8, v12
+; VLOPT-NEXT: fsrm a1
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x float> @llvm.riscv.vfnmsac(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen 3, iXLen -1, iXLen 0)
+ %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 3, iXLen %vl)
+ ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfnmsac_vf(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfnmsac_vf:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: fsrmi a1, 3
+; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT: vfnmsac.vf v8, fa0, v10
+; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT: vfadd.vv v8, v8, v10
+; NOVLOPT-NEXT: fsrm a1
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vfnmsac_vf:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: fsrmi a1, 3
+; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT: vfnmsac.vf v8, fa0, v10
+; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vfadd.vv v8, v8, v10
+; VLOPT-NEXT: fsrm a1
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x float> @llvm.riscv.vfnmsac(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen 3, iXLen -1, iXLen 0)
+ %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 3, iXLen %vl)
+ ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfmadd_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfmadd_vv:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: fsrmi a1, 3
+; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT: vfmadd.vv v8, v10, v12
+; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT: vfadd.vv v8, v8, v12
+; NOVLOPT-NEXT: fsrm a1
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vfmadd_vv:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: fsrmi a1, 3
+; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT: vfmadd.vv v8, v10, v12
+; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vfadd.vv v8, v8, v12
+; VLOPT-NEXT: fsrm a1
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x float> @llvm.riscv.vfmadd(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen 3, iXLen -1, iXLen 0)
+ %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 3, iXLen %vl)
+ ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfmadd_vf(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfmadd_vf:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: fsrmi a1, 3
+; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT: vfmadd.vf v8, fa0, v10
+; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT: vfadd.vv v8, v8, v10
+; NOVLOPT-NEXT: fsrm a1
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vfmadd_vf:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: fsrmi a1, 3
+; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT: vfmadd.vf v8, fa0, v10
+; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vfadd.vv v8, v8, v10
+; VLOPT-NEXT: fsrm a1
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x float> @llvm.riscv.vfmadd(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen 3, iXLen -1, iXLen 0)
+ %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 3, iXLen %vl)
+ ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfnmadd_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfnmadd_vv:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: fsrmi a1, 3
+; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT: vfnmadd.vv v8, v10, v12
+; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT: vfadd.vv v8, v8, v12
+; NOVLOPT-NEXT: fsrm a1
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vfnmadd_vv:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: fsrmi a1, 3
+; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT: vfnmadd.vv v8, v10, v12
+; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vfadd.vv v8, v8, v12
+; VLOPT-NEXT: fsrm a1
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x float> @llvm.riscv.vfnmadd(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen 3, iXLen -1, iXLen 0)
+ %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 3, iXLen %vl)
+ ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfnmadd_vf(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfnmadd_vf:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: fsrmi a1, 3
+; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT: vfnmadd.vf v8, fa0, v10
+; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT: vfadd.vv v8, v8, v10
+; NOVLOPT-NEXT: fsrm a1
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vfnmadd_vf:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: fsrmi a1, 3
+; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT: vfnmadd.vf v8, fa0, v10
+; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vfadd.vv v8, v8, v10
+; VLOPT-NEXT: fsrm a1
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x float> @llvm.riscv.vfnmadd(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen 3, iXLen -1, iXLen 0)
+ %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 3, iXLen %vl)
+ ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfmsub_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfmsub_vv:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: fsrmi a1, 3
+; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT: vfmsub.vv v8, v10, v12
+; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT: vfadd.vv v8, v8, v12
+; NOVLOPT-NEXT: fsrm a1
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vfmsub_vv:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: fsrmi a1, 3
+; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT: vfmsub.vv v8, v10, v12
+; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vfadd.vv v8, v8, v12
+; VLOPT-NEXT: fsrm a1
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x float> @llvm.riscv.vfmsub(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen 3, iXLen -1, iXLen 0)
+ %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 3, iXLen %vl)
+ ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfmsub_vf(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfmsub_vf:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: fsrmi a1, 3
+; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT: vfmsub.vf v8, fa0, v10
+; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT: vfadd.vv v8, v8, v10
+; NOVLOPT-NEXT: fsrm a1
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vfmsub_vf:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: fsrmi a1, 3
+; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT: vfmsub.vf v8, fa0, v10
+; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vfadd.vv v8, v8, v10
+; VLOPT-NEXT: fsrm a1
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x float> @llvm.riscv.vfmsub(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen 3, iXLen -1, iXLen 0)
+ %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 3, iXLen %vl)
+ ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfnmsub_vv(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfnmsub_vv:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: fsrmi a1, 3
+; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT: vfnmsub.vv v8, v10, v12
+; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT: vfadd.vv v8, v8, v12
+; NOVLOPT-NEXT: fsrm a1
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vfnmsub_vv:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: fsrmi a1, 3
+; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT: vfnmsub.vv v8, v10, v12
+; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vfadd.vv v8, v8, v12
+; VLOPT-NEXT: fsrm a1
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x float> @llvm.riscv.vfnmsub(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, iXLen 3, iXLen -1, iXLen 0)
+ %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 3, iXLen %vl)
+ ret <vscale x 4 x float> %2
+}
+
+define <vscale x 4 x float> @vfnmsub_vf(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vfnmsub_vf:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: fsrmi a1, 3
+; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT: vfnmsub.vf v8, fa0, v10
+; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT: vfadd.vv v8, v8, v10
+; NOVLOPT-NEXT: fsrm a1
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vfnmsub_vf:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: fsrmi a1, 3
+; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT: vfnmsub.vf v8, fa0, v10
+; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vfadd.vv v8, v8, v10
+; VLOPT-NEXT: fsrm a1
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x float> @llvm.riscv.vfnmsub(<vscale x 4 x float> %a, float %b, <vscale x 4 x float> %c, iXLen 3, iXLen -1, iXLen 0)
+ %2 = call <vscale x 4 x float> @llvm.riscv.vfadd(<vscale x 4 x float> poison, <vscale x 4 x float> %1, <vscale x 4 x float> %c, iXLen 3, iXLen %vl)
+ ret <vscale x 4 x float> %2
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/125652
More information about the llvm-commits
mailing list