[llvm] [RISCV][VLOPT] Add support for integer multiply-add instructions (PR #112216)
Michael Maitland via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 10 08:52:52 PST 2024
https://github.com/michaelmaitland updated https://github.com/llvm/llvm-project/pull/112216
>From 42cfde59092f9605109cb0dedc050c806748aab0 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Mon, 14 Oct 2024 07:25:20 -0700
Subject: [PATCH 1/5] [RISCV][VLOPT] Add support for integer multiply-add
instructions
---
llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp | 9 +-
llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll | 176 +++++++++++++++++++
2 files changed, 184 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index e75eddb423e4b9..62c21fce61c373 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -570,7 +570,14 @@ static bool isSupportedInstr(const MachineInstr &MI) {
case RISCV::VWMULU_VV:
case RISCV::VWMULU_VX:
// Vector Single-Width Integer Multiply-Add Instructions
- // FIXME: Add support
+ case RISCV::VMACC_VV:
+ case RISCV::VMACC_VX:
+ case RISCV::VNMSAC_VV:
+ case RISCV::VNMSAC_VX:
+ case RISCV::VMADD_VV:
+ case RISCV::VMADD_VX:
+ case RISCV::VNMSUB_VV:
+ case RISCV::VNMSUB_VX:
// Vector Widening Integer Multiply-Add Instructions
case RISCV::VWMACCU_VV:
case RISCV::VWMACCU_VX:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
index 39cc90b812f99e..6653b02f624bbc 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
@@ -1269,6 +1269,182 @@ define <vscale x 4 x i32> @vwmacc_vv(<vscale x 4 x i32> %a, <vscale x 4 x i16> %
ret <vscale x 4 x i32> %2
}
+define <vscale x 4 x i32> @vmacc_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) {
+; NOVLOPT-LABEL: vmacc_vv:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT: vmacc.vv v8, v8, v10
+; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT: vmul.vv v8, v8, v10
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vmacc_vv:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT: vmacc.vv v8, v8, v10
+; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vmul.vv v8, v8, v10
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x i32> @llvm.riscv.vmacc.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1, iXLen 0)
+ %2 = call <vscale x 4 x i32> @llvm.riscv.vmul.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl)
+ ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vmacc_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
+; NOVLOPT-LABEL: vmacc_vx:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: vmv2r.v v10, v8
+; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT: vmacc.vx v10, a0, v8
+; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; NOVLOPT-NEXT: vmul.vv v8, v10, v8
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vmacc_vx:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: vmv2r.v v10, v8
+; VLOPT-NEXT: vsetvli zero, a1, e32, m2, tu, ma
+; VLOPT-NEXT: vmacc.vx v10, a0, v8
+; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vmul.vv v8, v10, v8
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x i32> @llvm.riscv.vmacc.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, i32 %b, <vscale x 4 x i32> %a, iXLen -1, iXLen 0)
+ %2 = call <vscale x 4 x i32> @llvm.riscv.vmul.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
+ ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vmadd_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) {
+; NOVLOPT-LABEL: vmadd_vv:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT: vmadd.vv v8, v8, v10
+; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT: vmul.vv v8, v8, v10
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vmadd_vv:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT: vmadd.vv v8, v8, v10
+; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vmul.vv v8, v8, v10
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x i32> @llvm.riscv.vmadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1, iXLen 0)
+ %2 = call <vscale x 4 x i32> @llvm.riscv.vmul.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl)
+ ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vmadd_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
+; NOVLOPT-LABEL: vmadd_vx:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: vmv2r.v v10, v8
+; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT: vmadd.vx v10, a0, v8
+; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; NOVLOPT-NEXT: vmul.vv v8, v10, v8
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vmadd_vx:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: vmv2r.v v10, v8
+; VLOPT-NEXT: vsetvli zero, a1, e32, m2, tu, ma
+; VLOPT-NEXT: vmadd.vx v10, a0, v8
+; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vmul.vv v8, v10, v8
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x i32> @llvm.riscv.vmadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, i32 %b, <vscale x 4 x i32> %a, iXLen -1, iXLen 0)
+ %2 = call <vscale x 4 x i32> @llvm.riscv.vmul.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
+ ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vnmsac_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) {
+; NOVLOPT-LABEL: vnmsac_vv:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT: vnmsac.vv v8, v8, v10
+; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT: vmul.vv v8, v8, v10
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vnmsac_vv:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT: vnmsac.vv v8, v8, v10
+; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vmul.vv v8, v8, v10
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x i32> @llvm.riscv.vnmsac.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1, iXLen 0)
+ %2 = call <vscale x 4 x i32> @llvm.riscv.vmul.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl)
+ ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vnmsac_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
+; NOVLOPT-LABEL: vnmsac_vx:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: vmv2r.v v10, v8
+; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT: vnmsac.vx v10, a0, v8
+; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; NOVLOPT-NEXT: vmul.vv v8, v10, v8
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vnmsac_vx:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: vmv2r.v v10, v8
+; VLOPT-NEXT: vsetvli zero, a1, e32, m2, tu, ma
+; VLOPT-NEXT: vnmsac.vx v10, a0, v8
+; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vmul.vv v8, v10, v8
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x i32> @llvm.riscv.vnmsac.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, i32 %b, <vscale x 4 x i32> %a, iXLen -1, iXLen 0)
+ %2 = call <vscale x 4 x i32> @llvm.riscv.vmul.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
+ ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vnmsub_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) {
+; NOVLOPT-LABEL: vnmsub_vv:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT: vnmsub.vv v8, v8, v10
+; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT: vmul.vv v8, v8, v10
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vnmsub_vv:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT: vnmsub.vv v8, v8, v10
+; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vmul.vv v8, v8, v10
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x i32> @llvm.riscv.vnmsub.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1, iXLen 0)
+ %2 = call <vscale x 4 x i32> @llvm.riscv.vmul.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl)
+ ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vnmsub_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
+; NOVLOPT-LABEL: vnmsub_vx:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: vmv2r.v v10, v8
+; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT: vnmsub.vx v10, a0, v8
+; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; NOVLOPT-NEXT: vmul.vv v8, v10, v8
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vnmsub_vx:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: vmv2r.v v10, v8
+; VLOPT-NEXT: vsetvli zero, a1, e32, m2, tu, ma
+; VLOPT-NEXT: vnmsub.vx v10, a0, v8
+; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vmul.vv v8, v10, v8
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x i32> @llvm.riscv.vnmsub.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, i32 %b, <vscale x 4 x i32> %a, iXLen -1, iXLen 0)
+ %2 = call <vscale x 4 x i32> @llvm.riscv.vmul.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
+ ret <vscale x 4 x i32> %2
+}
+
define <vscale x 4 x i32> @vwmacc_vx(<vscale x 4 x i32> %a, i16 %b, <vscale x 4 x i16> %c, iXLen %vl) {
; NOVLOPT-LABEL: vwmacc_vx:
; NOVLOPT: # %bb.0:
>From f6cb23e2e9bda729cfd6cd5d1b79daa0e5f4a50b Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Wed, 16 Oct 2024 06:45:26 -0700
Subject: [PATCH 2/5] fixup! add test for source operand
---
llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll | 21 ++++++++++++++++++++
1 file changed, 21 insertions(+)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
index 6653b02f624bbc..6c9dd8f424d720 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
@@ -1290,6 +1290,27 @@ define <vscale x 4 x i32> @vmacc_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b
ret <vscale x 4 x i32> %2
}
+define <vscale x 4 x i32> @vmacc_vv_use(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) {
+; NOVLOPT-LABEL: vmacc_vv_use:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT: vmul.vv v12, v8, v10
+; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
+; NOVLOPT-NEXT: vmacc.vv v8, v12, v10
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vmacc_vv_use:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT: vmul.vv v12, v8, v10
+; VLOPT-NEXT: vsetvli zero, zero, e32, m2, tu, ma
+; VLOPT-NEXT: vmacc.vv v8, v12, v10
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x i32> @llvm.riscv.vmul.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1)
+ %2 = call <vscale x 4 x i32> @llvm.riscv.vmacc.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl, iXLen 0)
+ ret <vscale x 4 x i32> %2
+}
+
define <vscale x 4 x i32> @vmacc_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
; NOVLOPT-LABEL: vmacc_vx:
; NOVLOPT: # %bb.0:
>From b2f7095decee08e4ffc98c9dd1770eedaebff3f0 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Fri, 6 Dec 2024 12:44:42 -0800
Subject: [PATCH 3/5] fixup! respond to review
---
llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll | 42 ++++++++++----------
1 file changed, 21 insertions(+), 21 deletions(-)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
index 6c9dd8f424d720..6de6d96fee7cb0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
@@ -1275,7 +1275,7 @@ define <vscale x 4 x i32> @vmacc_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b
; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, tu, ma
; NOVLOPT-NEXT: vmacc.vv v8, v8, v10
; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; NOVLOPT-NEXT: vmul.vv v8, v8, v10
+; NOVLOPT-NEXT: vadd.vv v8, v8, v10
; NOVLOPT-NEXT: ret
;
; VLOPT-LABEL: vmacc_vv:
@@ -1283,10 +1283,10 @@ define <vscale x 4 x i32> @vmacc_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b
; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
; VLOPT-NEXT: vmacc.vv v8, v8, v10
; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; VLOPT-NEXT: vmul.vv v8, v8, v10
+; VLOPT-NEXT: vadd.vv v8, v8, v10
; VLOPT-NEXT: ret
%1 = call <vscale x 4 x i32> @llvm.riscv.vmacc.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1, iXLen 0)
- %2 = call <vscale x 4 x i32> @llvm.riscv.vmul.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl)
+ %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl)
ret <vscale x 4 x i32> %2
}
@@ -1314,23 +1314,23 @@ define <vscale x 4 x i32> @vmacc_vv_use(<vscale x 4 x i32> %a, <vscale x 4 x i32
define <vscale x 4 x i32> @vmacc_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
; NOVLOPT-LABEL: vmacc_vx:
; NOVLOPT: # %bb.0:
-; NOVLOPT-NEXT: vmv2r.v v10, v8
; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT: vmv2r.v v10, v8
; NOVLOPT-NEXT: vmacc.vx v10, a0, v8
; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; NOVLOPT-NEXT: vmul.vv v8, v10, v8
+; NOVLOPT-NEXT: vadd.vv v8, v10, v8
; NOVLOPT-NEXT: ret
;
; VLOPT-LABEL: vmacc_vx:
; VLOPT: # %bb.0:
-; VLOPT-NEXT: vmv2r.v v10, v8
; VLOPT-NEXT: vsetvli zero, a1, e32, m2, tu, ma
+; VLOPT-NEXT: vmv2r.v v10, v8
; VLOPT-NEXT: vmacc.vx v10, a0, v8
; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; VLOPT-NEXT: vmul.vv v8, v10, v8
+; VLOPT-NEXT: vadd.vv v8, v10, v8
; VLOPT-NEXT: ret
- %1 = call <vscale x 4 x i32> @llvm.riscv.vmacc.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, i32 %b, <vscale x 4 x i32> %a, iXLen -1, iXLen 0)
- %2 = call <vscale x 4 x i32> @llvm.riscv.vmul.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
+ %1 = call <vscale x 4 x i32> @llvm.riscv.vmacc.nxv4i32.i32(<vscale x 4 x i32> %a, i32 %b, <vscale x 4 x i32> %a, iXLen -1, iXLen 0)
+ %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
ret <vscale x 4 x i32> %2
}
@@ -1351,15 +1351,15 @@ define <vscale x 4 x i32> @vmadd_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b
; VLOPT-NEXT: vmul.vv v8, v8, v10
; VLOPT-NEXT: ret
%1 = call <vscale x 4 x i32> @llvm.riscv.vmadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1, iXLen 0)
- %2 = call <vscale x 4 x i32> @llvm.riscv.vmul.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl)
+ %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl)
ret <vscale x 4 x i32> %2
}
define <vscale x 4 x i32> @vmadd_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
; NOVLOPT-LABEL: vmadd_vx:
; NOVLOPT: # %bb.0:
-; NOVLOPT-NEXT: vmv2r.v v10, v8
; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT: vmv2r.v v10, v8
; NOVLOPT-NEXT: vmadd.vx v10, a0, v8
; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
; NOVLOPT-NEXT: vmul.vv v8, v10, v8
@@ -1367,14 +1367,14 @@ define <vscale x 4 x i32> @vmadd_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
;
; VLOPT-LABEL: vmadd_vx:
; VLOPT: # %bb.0:
-; VLOPT-NEXT: vmv2r.v v10, v8
; VLOPT-NEXT: vsetvli zero, a1, e32, m2, tu, ma
+; VLOPT-NEXT: vmv2r.v v10, v8
; VLOPT-NEXT: vmadd.vx v10, a0, v8
; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; VLOPT-NEXT: vmul.vv v8, v10, v8
; VLOPT-NEXT: ret
%1 = call <vscale x 4 x i32> @llvm.riscv.vmadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, i32 %b, <vscale x 4 x i32> %a, iXLen -1, iXLen 0)
- %2 = call <vscale x 4 x i32> @llvm.riscv.vmul.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
+ %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
ret <vscale x 4 x i32> %2
}
@@ -1395,15 +1395,15 @@ define <vscale x 4 x i32> @vnmsac_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %
; VLOPT-NEXT: vmul.vv v8, v8, v10
; VLOPT-NEXT: ret
%1 = call <vscale x 4 x i32> @llvm.riscv.vnmsac.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1, iXLen 0)
- %2 = call <vscale x 4 x i32> @llvm.riscv.vmul.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl)
+ %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl)
ret <vscale x 4 x i32> %2
}
define <vscale x 4 x i32> @vnmsac_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
; NOVLOPT-LABEL: vnmsac_vx:
; NOVLOPT: # %bb.0:
-; NOVLOPT-NEXT: vmv2r.v v10, v8
; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT: vmv2r.v v10, v8
; NOVLOPT-NEXT: vnmsac.vx v10, a0, v8
; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
; NOVLOPT-NEXT: vmul.vv v8, v10, v8
@@ -1411,14 +1411,14 @@ define <vscale x 4 x i32> @vnmsac_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
;
; VLOPT-LABEL: vnmsac_vx:
; VLOPT: # %bb.0:
-; VLOPT-NEXT: vmv2r.v v10, v8
; VLOPT-NEXT: vsetvli zero, a1, e32, m2, tu, ma
+; VLOPT-NEXT: vmv2r.v v10, v8
; VLOPT-NEXT: vnmsac.vx v10, a0, v8
; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; VLOPT-NEXT: vmul.vv v8, v10, v8
; VLOPT-NEXT: ret
%1 = call <vscale x 4 x i32> @llvm.riscv.vnmsac.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, i32 %b, <vscale x 4 x i32> %a, iXLen -1, iXLen 0)
- %2 = call <vscale x 4 x i32> @llvm.riscv.vmul.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
+ %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
ret <vscale x 4 x i32> %2
}
@@ -1439,15 +1439,15 @@ define <vscale x 4 x i32> @vnmsub_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %
; VLOPT-NEXT: vmul.vv v8, v8, v10
; VLOPT-NEXT: ret
%1 = call <vscale x 4 x i32> @llvm.riscv.vnmsub.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1, iXLen 0)
- %2 = call <vscale x 4 x i32> @llvm.riscv.vmul.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl)
+ %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl)
ret <vscale x 4 x i32> %2
}
define <vscale x 4 x i32> @vnmsub_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
; NOVLOPT-LABEL: vnmsub_vx:
; NOVLOPT: # %bb.0:
-; NOVLOPT-NEXT: vmv2r.v v10, v8
; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, tu, ma
+; NOVLOPT-NEXT: vmv2r.v v10, v8
; NOVLOPT-NEXT: vnmsub.vx v10, a0, v8
; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
; NOVLOPT-NEXT: vmul.vv v8, v10, v8
@@ -1455,14 +1455,14 @@ define <vscale x 4 x i32> @vnmsub_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
;
; VLOPT-LABEL: vnmsub_vx:
; VLOPT: # %bb.0:
-; VLOPT-NEXT: vmv2r.v v10, v8
; VLOPT-NEXT: vsetvli zero, a1, e32, m2, tu, ma
+; VLOPT-NEXT: vmv2r.v v10, v8
; VLOPT-NEXT: vnmsub.vx v10, a0, v8
; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; VLOPT-NEXT: vmul.vv v8, v10, v8
; VLOPT-NEXT: ret
%1 = call <vscale x 4 x i32> @llvm.riscv.vnmsub.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, i32 %b, <vscale x 4 x i32> %a, iXLen -1, iXLen 0)
- %2 = call <vscale x 4 x i32> @llvm.riscv.vmul.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
+ %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
ret <vscale x 4 x i32> %2
}
>From 7f9d14a29bff39de1a0a9c7caa91f69b7baa2e3d Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Tue, 10 Dec 2024 08:50:56 -0800
Subject: [PATCH 4/5] fixup! update test checks
---
llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll | 24 ++++++++++----------
1 file changed, 12 insertions(+), 12 deletions(-)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
index 6de6d96fee7cb0..5ce79bcc6a9d82 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
@@ -1340,7 +1340,7 @@ define <vscale x 4 x i32> @vmadd_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b
; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, tu, ma
; NOVLOPT-NEXT: vmadd.vv v8, v8, v10
; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; NOVLOPT-NEXT: vmul.vv v8, v8, v10
+; NOVLOPT-NEXT: vadd.vv v8, v8, v10
; NOVLOPT-NEXT: ret
;
; VLOPT-LABEL: vmadd_vv:
@@ -1348,7 +1348,7 @@ define <vscale x 4 x i32> @vmadd_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b
; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
; VLOPT-NEXT: vmadd.vv v8, v8, v10
; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; VLOPT-NEXT: vmul.vv v8, v8, v10
+; VLOPT-NEXT: vadd.vv v8, v8, v10
; VLOPT-NEXT: ret
%1 = call <vscale x 4 x i32> @llvm.riscv.vmadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1, iXLen 0)
%2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl)
@@ -1362,7 +1362,7 @@ define <vscale x 4 x i32> @vmadd_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
; NOVLOPT-NEXT: vmv2r.v v10, v8
; NOVLOPT-NEXT: vmadd.vx v10, a0, v8
; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; NOVLOPT-NEXT: vmul.vv v8, v10, v8
+; NOVLOPT-NEXT: vadd.vv v8, v10, v8
; NOVLOPT-NEXT: ret
;
; VLOPT-LABEL: vmadd_vx:
@@ -1371,7 +1371,7 @@ define <vscale x 4 x i32> @vmadd_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
; VLOPT-NEXT: vmv2r.v v10, v8
; VLOPT-NEXT: vmadd.vx v10, a0, v8
; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; VLOPT-NEXT: vmul.vv v8, v10, v8
+; VLOPT-NEXT: vadd.vv v8, v10, v8
; VLOPT-NEXT: ret
%1 = call <vscale x 4 x i32> @llvm.riscv.vmadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, i32 %b, <vscale x 4 x i32> %a, iXLen -1, iXLen 0)
%2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
@@ -1384,7 +1384,7 @@ define <vscale x 4 x i32> @vnmsac_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %
; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, tu, ma
; NOVLOPT-NEXT: vnmsac.vv v8, v8, v10
; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; NOVLOPT-NEXT: vmul.vv v8, v8, v10
+; NOVLOPT-NEXT: vadd.vv v8, v8, v10
; NOVLOPT-NEXT: ret
;
; VLOPT-LABEL: vnmsac_vv:
@@ -1392,7 +1392,7 @@ define <vscale x 4 x i32> @vnmsac_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %
; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
; VLOPT-NEXT: vnmsac.vv v8, v8, v10
; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; VLOPT-NEXT: vmul.vv v8, v8, v10
+; VLOPT-NEXT: vadd.vv v8, v8, v10
; VLOPT-NEXT: ret
%1 = call <vscale x 4 x i32> @llvm.riscv.vnmsac.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1, iXLen 0)
%2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl)
@@ -1406,7 +1406,7 @@ define <vscale x 4 x i32> @vnmsac_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
; NOVLOPT-NEXT: vmv2r.v v10, v8
; NOVLOPT-NEXT: vnmsac.vx v10, a0, v8
; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; NOVLOPT-NEXT: vmul.vv v8, v10, v8
+; NOVLOPT-NEXT: vadd.vv v8, v10, v8
; NOVLOPT-NEXT: ret
;
; VLOPT-LABEL: vnmsac_vx:
@@ -1415,7 +1415,7 @@ define <vscale x 4 x i32> @vnmsac_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
; VLOPT-NEXT: vmv2r.v v10, v8
; VLOPT-NEXT: vnmsac.vx v10, a0, v8
; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; VLOPT-NEXT: vmul.vv v8, v10, v8
+; VLOPT-NEXT: vadd.vv v8, v10, v8
; VLOPT-NEXT: ret
%1 = call <vscale x 4 x i32> @llvm.riscv.vnmsac.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, i32 %b, <vscale x 4 x i32> %a, iXLen -1, iXLen 0)
%2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
@@ -1428,7 +1428,7 @@ define <vscale x 4 x i32> @vnmsub_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %
; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, tu, ma
; NOVLOPT-NEXT: vnmsub.vv v8, v8, v10
; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; NOVLOPT-NEXT: vmul.vv v8, v8, v10
+; NOVLOPT-NEXT: vadd.vv v8, v8, v10
; NOVLOPT-NEXT: ret
;
; VLOPT-LABEL: vnmsub_vv:
@@ -1436,7 +1436,7 @@ define <vscale x 4 x i32> @vnmsub_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %
; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
; VLOPT-NEXT: vnmsub.vv v8, v8, v10
; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; VLOPT-NEXT: vmul.vv v8, v8, v10
+; VLOPT-NEXT: vadd.vv v8, v8, v10
; VLOPT-NEXT: ret
%1 = call <vscale x 4 x i32> @llvm.riscv.vnmsub.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1, iXLen 0)
%2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl)
@@ -1450,7 +1450,7 @@ define <vscale x 4 x i32> @vnmsub_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
; NOVLOPT-NEXT: vmv2r.v v10, v8
; NOVLOPT-NEXT: vnmsub.vx v10, a0, v8
; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; NOVLOPT-NEXT: vmul.vv v8, v10, v8
+; NOVLOPT-NEXT: vadd.vv v8, v10, v8
; NOVLOPT-NEXT: ret
;
; VLOPT-LABEL: vnmsub_vx:
@@ -1459,7 +1459,7 @@ define <vscale x 4 x i32> @vnmsub_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
; VLOPT-NEXT: vmv2r.v v10, v8
; VLOPT-NEXT: vnmsub.vx v10, a0, v8
; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; VLOPT-NEXT: vmul.vv v8, v10, v8
+; VLOPT-NEXT: vadd.vv v8, v10, v8
; VLOPT-NEXT: ret
%1 = call <vscale x 4 x i32> @llvm.riscv.vnmsub.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, i32 %b, <vscale x 4 x i32> %a, iXLen -1, iXLen 0)
%2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
>From 2c25c6bccf96d38649e065dd3c9a645b6587a112 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Tue, 10 Dec 2024 08:52:26 -0800
Subject: [PATCH 5/5] fixup! drop test case that is in vop_vv
---
llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll | 21 --------------------
1 file changed, 21 deletions(-)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
index 5ce79bcc6a9d82..a6d0e806228f7a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
@@ -1290,27 +1290,6 @@ define <vscale x 4 x i32> @vmacc_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b
ret <vscale x 4 x i32> %2
}
-define <vscale x 4 x i32> @vmacc_vv_use(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) {
-; NOVLOPT-LABEL: vmacc_vv_use:
-; NOVLOPT: # %bb.0:
-; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; NOVLOPT-NEXT: vmul.vv v12, v8, v10
-; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
-; NOVLOPT-NEXT: vmacc.vv v8, v12, v10
-; NOVLOPT-NEXT: ret
-;
-; VLOPT-LABEL: vmacc_vv_use:
-; VLOPT: # %bb.0:
-; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; VLOPT-NEXT: vmul.vv v12, v8, v10
-; VLOPT-NEXT: vsetvli zero, zero, e32, m2, tu, ma
-; VLOPT-NEXT: vmacc.vv v8, v12, v10
-; VLOPT-NEXT: ret
- %1 = call <vscale x 4 x i32> @llvm.riscv.vmul.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1)
- %2 = call <vscale x 4 x i32> @llvm.riscv.vmacc.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl, iXLen 0)
- ret <vscale x 4 x i32> %2
-}
-
define <vscale x 4 x i32> @vmacc_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
; NOVLOPT-LABEL: vmacc_vx:
; NOVLOPT: # %bb.0:
More information about the llvm-commits
mailing list