[llvm] 3a573dc - [RISCV][VLOPT] Add support for integer multiply-add instructions (#112216)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 10 08:53:06 PST 2024
Author: Michael Maitland
Date: 2024-12-10T11:53:03-05:00
New Revision: 3a573dcdc88d4571496dc26e514c5708e1e2c786
URL: https://github.com/llvm/llvm-project/commit/3a573dcdc88d4571496dc26e514c5708e1e2c786
DIFF: https://github.com/llvm/llvm-project/commit/3a573dcdc88d4571496dc26e514c5708e1e2c786.diff
LOG: [RISCV][VLOPT] Add support for integer multiply-add instructions (#112216)
This adds support for these instructions.
Added:
Modified:
llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index e75eddb423e4b9..62c21fce61c373 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -570,7 +570,14 @@ static bool isSupportedInstr(const MachineInstr &MI) {
case RISCV::VWMULU_VV:
case RISCV::VWMULU_VX:
// Vector Single-Width Integer Multiply-Add Instructions
- // FIXME: Add support
+ case RISCV::VMACC_VV:
+ case RISCV::VMACC_VX:
+ case RISCV::VNMSAC_VV:
+ case RISCV::VNMSAC_VX:
+ case RISCV::VMADD_VV:
+ case RISCV::VMADD_VX:
+ case RISCV::VNMSUB_VV:
+ case RISCV::VNMSUB_VX:
// Vector Widening Integer Multiply-Add Instructions
case RISCV::VWMACCU_VV:
case RISCV::VWMACCU_VX:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
index 39cc90b812f99e..a6d0e806228f7a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
@@ -1269,6 +1269,182 @@ define <vscale x 4 x i32> @vwmacc_vv(<vscale x 4 x i32> %a, <vscale x 4 x i16> %
ret <vscale x 4 x i32> %2
}
+define <vscale x 4 x i32> @vmacc_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) {
+; NOVLOPT-LABEL: vmacc_vv:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT: vmacc.vv v8, v8, v10
+; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT: vadd.vv v8, v8, v10
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vmacc_vv:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT: vmacc.vv v8, v8, v10
+; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vadd.vv v8, v8, v10
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x i32> @llvm.riscv.vmacc.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1, iXLen 0)
+ %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl)
+ ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vmacc_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
+; NOVLOPT-LABEL: vmacc_vx:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT: vmv2r.v v10, v8
+; NOVLOPT-NEXT: vmacc.vx v10, a0, v8
+; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; NOVLOPT-NEXT: vadd.vv v8, v10, v8
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vmacc_vx:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: vsetvli zero, a1, e32, m2, tu, ma
+; VLOPT-NEXT: vmv2r.v v10, v8
+; VLOPT-NEXT: vmacc.vx v10, a0, v8
+; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vadd.vv v8, v10, v8
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x i32> @llvm.riscv.vmacc.nxv4i32.i32(<vscale x 4 x i32> %a, i32 %b, <vscale x 4 x i32> %a, iXLen -1, iXLen 0)
+ %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
+ ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vmadd_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) {
+; NOVLOPT-LABEL: vmadd_vv:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT: vmadd.vv v8, v8, v10
+; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT: vadd.vv v8, v8, v10
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vmadd_vv:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT: vmadd.vv v8, v8, v10
+; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vadd.vv v8, v8, v10
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x i32> @llvm.riscv.vmadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1, iXLen 0)
+ %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl)
+ ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vmadd_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
+; NOVLOPT-LABEL: vmadd_vx:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT: vmv2r.v v10, v8
+; NOVLOPT-NEXT: vmadd.vx v10, a0, v8
+; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; NOVLOPT-NEXT: vadd.vv v8, v10, v8
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vmadd_vx:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: vsetvli zero, a1, e32, m2, tu, ma
+; VLOPT-NEXT: vmv2r.v v10, v8
+; VLOPT-NEXT: vmadd.vx v10, a0, v8
+; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vadd.vv v8, v10, v8
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x i32> @llvm.riscv.vmadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, i32 %b, <vscale x 4 x i32> %a, iXLen -1, iXLen 0)
+ %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
+ ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vnmsac_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) {
+; NOVLOPT-LABEL: vnmsac_vv:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT: vnmsac.vv v8, v8, v10
+; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT: vadd.vv v8, v8, v10
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vnmsac_vv:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT: vnmsac.vv v8, v8, v10
+; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vadd.vv v8, v8, v10
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x i32> @llvm.riscv.vnmsac.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1, iXLen 0)
+ %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl)
+ ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vnmsac_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
+; NOVLOPT-LABEL: vnmsac_vx:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT: vmv2r.v v10, v8
+; NOVLOPT-NEXT: vnmsac.vx v10, a0, v8
+; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; NOVLOPT-NEXT: vadd.vv v8, v10, v8
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vnmsac_vx:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: vsetvli zero, a1, e32, m2, tu, ma
+; VLOPT-NEXT: vmv2r.v v10, v8
+; VLOPT-NEXT: vnmsac.vx v10, a0, v8
+; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vadd.vv v8, v10, v8
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x i32> @llvm.riscv.vnmsac.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, i32 %b, <vscale x 4 x i32> %a, iXLen -1, iXLen 0)
+ %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
+ ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vnmsub_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) {
+; NOVLOPT-LABEL: vnmsub_vv:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT: vnmsub.vv v8, v8, v10
+; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT: vadd.vv v8, v8, v10
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vnmsub_vv:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT: vnmsub.vv v8, v8, v10
+; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vadd.vv v8, v8, v10
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x i32> @llvm.riscv.vnmsub.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1, iXLen 0)
+ %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl)
+ ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vnmsub_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
+; NOVLOPT-LABEL: vnmsub_vx:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT: vmv2r.v v10, v8
+; NOVLOPT-NEXT: vnmsub.vx v10, a0, v8
+; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; NOVLOPT-NEXT: vadd.vv v8, v10, v8
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vnmsub_vx:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: vsetvli zero, a1, e32, m2, tu, ma
+; VLOPT-NEXT: vmv2r.v v10, v8
+; VLOPT-NEXT: vnmsub.vx v10, a0, v8
+; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vadd.vv v8, v10, v8
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x i32> @llvm.riscv.vnmsub.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, i32 %b, <vscale x 4 x i32> %a, iXLen -1, iXLen 0)
+ %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
+ ret <vscale x 4 x i32> %2
+}
+
define <vscale x 4 x i32> @vwmacc_vx(<vscale x 4 x i32> %a, i16 %b, <vscale x 4 x i16> %c, iXLen %vl) {
; NOVLOPT-LABEL: vwmacc_vx:
; NOVLOPT: # %bb.0:
More information about the llvm-commits
mailing list