[llvm] [RISCV][VLOPT] Add support for widening integer mul-add instructions (PR #112219)
Michael Maitland via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 15 15:42:41 PDT 2024
https://github.com/michaelmaitland updated https://github.com/llvm/llvm-project/pull/112219
>From 53726c479520855aa4a2a33f56a70741ee0598c1 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Mon, 14 Oct 2024 07:46:51 -0700
Subject: [PATCH 1/6] [RISCV][VLOPT] Add support for 11.14 widening integer
mul-add instructions
---
llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp | 12 +-
llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll | 137 ++++++++++++++++---
2 files changed, 129 insertions(+), 20 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index 088f6d62dcbe78..fbfbc724dc1061 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -422,8 +422,8 @@ static OperandInfo getOperandInfo(const MachineInstr &MI,
case RISCV::VWSUB_WX:
// Vector Widening Integer Multiply-Add Instructions
// Destination EEW=2*SEW and EMUL=2*LMUL. Source EEW=SEW and EMUL=LMUL.
- // Even though the add is a 2*SEW addition, the operands of the add are the
- // Dest which is 2*SEW and the result of the multiply which is 2*SEW.
+ // A SEW-bit*SEW-bit multiply of the sources forms a 2*SEW-bit value, which
+ // is then added to the 2*SEW-bit Dest.
case RISCV::VWMACCU_VV:
case RISCV::VWMACCU_VX:
case RISCV::VWMACC_VV:
@@ -567,9 +567,13 @@ static bool isSupportedInstr(const MachineInstr &MI) {
// Vector Single-Width Integer Multiply-Add Instructions
// FIXME: Add support
// Vector Widening Integer Multiply-Add Instructions
- // FIXME: Add support
- case RISCV::VWMACC_VX:
+ case RISCV::VWMACCU_VV:
case RISCV::VWMACCU_VX:
+ case RISCV::VWMACC_VV:
+ case RISCV::VWMACC_VX:
+ case RISCV::VWMACCSU_VV:
+ case RISCV::VWMACCSU_VX:
+ case RISCV::VWMACCUS_VX:
// Vector Integer Merge Instructions
// FIXME: Add support
// Vector Integer Move Instructions
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
index a360ae1998f77a..6c7abee4069c1e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
@@ -1122,44 +1122,149 @@ define <vscale x 4 x i32> @vrem_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
ret <vscale x 4 x i32> %2
}
-define <vscale x 4 x i32> @vwmacc_vx(<vscale x 4 x i16> %a, i16 %b, iXLen %vl) {
+define <vscale x 4 x i32> @vwmacc_vv(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vwmacc_vv:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: vsetvli a1, zero, e16, m1, tu, ma
+; NOVLOPT-NEXT: vwmacc.vv v8, v10, v11
+; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT: vadd.vv v8, v8, v8
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vwmacc_vv:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: vsetvli zero, a0, e16, m1, tu, ma
+; VLOPT-NEXT: vwmacc.vv v8, v10, v11
+; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vadd.vv v8, v8, v8
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x i32> @llvm.riscv.vwmacc.nxv4i32.nxv4i16(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, iXLen -1, iXLen 0)
+ %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl)
+ ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vwmacc_vx(<vscale x 4 x i32> %a, i16 %b, <vscale x 4 x i16> %c, iXLen %vl) {
; NOVLOPT-LABEL: vwmacc_vx:
; NOVLOPT: # %bb.0:
-; NOVLOPT-NEXT: vsetvli a2, zero, e16, m1, ta, ma
-; NOVLOPT-NEXT: vwmacc.vx v10, a0, v8
+; NOVLOPT-NEXT: vsetvli a2, zero, e16, m1, tu, ma
+; NOVLOPT-NEXT: vwmacc.vx v8, a0, v10
; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; NOVLOPT-NEXT: vadd.vv v8, v10, v10
+; NOVLOPT-NEXT: vadd.vv v8, v8, v8
; NOVLOPT-NEXT: ret
;
; VLOPT-LABEL: vwmacc_vx:
; VLOPT: # %bb.0:
-; VLOPT-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; VLOPT-NEXT: vwmacc.vx v10, a0, v8
+; VLOPT-NEXT: vsetvli zero, a1, e16, m1, tu, ma
+; VLOPT-NEXT: vwmacc.vx v8, a0, v10
; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; VLOPT-NEXT: vadd.vv v8, v10, v10
+; VLOPT-NEXT: vadd.vv v8, v8, v8
; VLOPT-NEXT: ret
- %1 = call <vscale x 4 x i32> @llvm.riscv.vwmacc.nxv4i32.i16(<vscale x 4 x i32> poison, i16 %b, <vscale x 4 x i16> %a, iXLen -1, iXLen 0)
+ %1 = call <vscale x 4 x i32> @llvm.riscv.vwmacc.nxv4i32.i16(<vscale x 4 x i32> %a, i16 %b, <vscale x 4 x i16> %c, iXLen -1, iXLen 0)
%2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl)
ret <vscale x 4 x i32> %2
}
-define <vscale x 4 x i32> @vwmaccu_vx(<vscale x 4 x i16> %a, i16 %b, iXLen %vl) {
+define <vscale x 4 x i32> @vwmaccu_vv(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vwmaccu_vv:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: vsetvli a1, zero, e16, m1, tu, ma
+; NOVLOPT-NEXT: vwmaccu.vv v8, v10, v11
+; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT: vadd.vv v8, v8, v8
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vwmaccu_vv:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: vsetvli zero, a0, e16, m1, tu, ma
+; VLOPT-NEXT: vwmaccu.vv v8, v10, v11
+; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vadd.vv v8, v8, v8
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x i32> @llvm.riscv.vwmaccu.nxv4i32.nxv4i16(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, iXLen -1, iXLen 0)
+ %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl)
+ ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vwmaccu_vx(<vscale x 4 x i32> %a, i16 %b, <vscale x 4 x i16> %c, iXLen %vl) {
; NOVLOPT-LABEL: vwmaccu_vx:
; NOVLOPT: # %bb.0:
-; NOVLOPT-NEXT: vsetvli a2, zero, e16, m1, ta, ma
-; NOVLOPT-NEXT: vwmaccu.vx v10, a0, v8
+; NOVLOPT-NEXT: vsetvli a2, zero, e16, m1, tu, ma
+; NOVLOPT-NEXT: vwmaccu.vx v8, a0, v10
; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; NOVLOPT-NEXT: vadd.vv v8, v10, v10
+; NOVLOPT-NEXT: vadd.vv v8, v8, v8
; NOVLOPT-NEXT: ret
;
; VLOPT-LABEL: vwmaccu_vx:
; VLOPT: # %bb.0:
-; VLOPT-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; VLOPT-NEXT: vwmaccu.vx v10, a0, v8
+; VLOPT-NEXT: vsetvli zero, a1, e16, m1, tu, ma
+; VLOPT-NEXT: vwmaccu.vx v8, a0, v10
; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; VLOPT-NEXT: vadd.vv v8, v10, v10
+; VLOPT-NEXT: vadd.vv v8, v8, v8
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x i32> @llvm.riscv.vwmaccu.nxv4i32.i16(<vscale x 4 x i32> %a, i16 %b, <vscale x 4 x i16> %c, iXLen -1, iXLen 0)
+ %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl)
+ ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vwmaccsu_vv(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vwmaccsu_vv:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: vsetvli a1, zero, e16, m1, tu, ma
+; NOVLOPT-NEXT: vwmaccsu.vv v8, v10, v11
+; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT: vadd.vv v8, v8, v8
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vwmaccsu_vv:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: vsetvli zero, a0, e16, m1, tu, ma
+; VLOPT-NEXT: vwmaccsu.vv v8, v10, v11
+; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vadd.vv v8, v8, v8
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x i32> @llvm.riscv.vwmaccsu.nxv4i32.nxv4i16(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, iXLen -1, iXLen 0)
+ %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl)
+ ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vwmaccsu_vx(<vscale x 4 x i32> %a, i16 %b, <vscale x 4 x i16> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vwmaccsu_vx:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: vsetvli a2, zero, e16, m1, tu, ma
+; NOVLOPT-NEXT: vwmaccsu.vx v8, a0, v10
+; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; NOVLOPT-NEXT: vadd.vv v8, v8, v8
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vwmaccsu_vx:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: vsetvli zero, a1, e16, m1, tu, ma
+; VLOPT-NEXT: vwmaccsu.vx v8, a0, v10
+; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vadd.vv v8, v8, v8
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x i32> @llvm.riscv.vwmaccsu.nxv4i32.i16(<vscale x 4 x i32> %a, i16 %b, <vscale x 4 x i16> %c, iXLen -1, iXLen 0)
+ %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl)
+ ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vwmaccus_vx(<vscale x 4 x i32> %a, i16 %b, <vscale x 4 x i16> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vwmaccus_vx:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: vsetvli a2, zero, e16, m1, tu, ma
+; NOVLOPT-NEXT: vwmaccus.vx v8, a0, v10
+; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; NOVLOPT-NEXT: vadd.vv v8, v8, v8
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vwmaccus_vx:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: vsetvli zero, a1, e16, m1, tu, ma
+; VLOPT-NEXT: vwmaccus.vx v8, a0, v10
+; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vadd.vv v8, v8, v8
; VLOPT-NEXT: ret
- %1 = call <vscale x 4 x i32> @llvm.riscv.vwmaccu.nxv4i32.i16(<vscale x 4 x i32> poison, i16 %b, <vscale x 4 x i16> %a, iXLen -1, iXLen 0)
+ %1 = call <vscale x 4 x i32> @llvm.riscv.vwmaccus.nxv4i32.i16(<vscale x 4 x i32> %a, i16 %b, <vscale x 4 x i16> %c, iXLen -1, iXLen 0)
%2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl)
ret <vscale x 4 x i32> %2
}
>From fb347ac530c5a6b40640030ef543b7d2c72abe36 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Mon, 14 Oct 2024 12:40:48 -0700
Subject: [PATCH 2/6] fixup! test getOperandInfo for sources
---
llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll | 94 +++++++++++++++-----
1 file changed, 73 insertions(+), 21 deletions(-)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
index 6c7abee4069c1e..bbaf70eae10fa7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
@@ -1164,48 +1164,100 @@ define <vscale x 4 x i32> @vwmacc_vx(<vscale x 4 x i32> %a, i16 %b, <vscale x 4
ret <vscale x 4 x i32> %2
}
-define <vscale x 4 x i32> @vwmaccu_vv(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, iXLen %vl) {
-; NOVLOPT-LABEL: vwmaccu_vv:
+define <vscale x 4 x i64> @vwmaccu_vv_nopropagate(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, <vscale x 4 x i64> %d, iXLen %vl) {
+; NOVLOPT-LABEL: vwmaccu_vv_nopropagate:
; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: vmv2r.v v16, v8
; NOVLOPT-NEXT: vsetvli a1, zero, e16, m1, tu, ma
-; NOVLOPT-NEXT: vwmaccu.vv v8, v10, v11
-; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; NOVLOPT-NEXT: vadd.vv v8, v8, v8
+; NOVLOPT-NEXT: vwmaccu.vv v16, v10, v11
+; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
+; NOVLOPT-NEXT: vwmaccu.vv v12, v16, v8
+; NOVLOPT-NEXT: vmv4r.v v8, v12
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vwmaccu_vv_nopropagate:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: vmv2r.v v16, v8
+; VLOPT-NEXT: vsetvli a1, zero, e16, m1, tu, ma
+; VLOPT-NEXT: vwmaccu.vv v16, v10, v11
+; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT: vwmaccu.vv v12, v16, v8
+; VLOPT-NEXT: vmv4r.v v8, v12
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x i32> @llvm.riscv.vwmaccu.nxv4i32.nxv4i16(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, iXLen -1, iXLen 0)
+ %2 = call <vscale x 4 x i64> @llvm.riscv.vwmaccu.nxv4i64.nxv4i32(<vscale x 4 x i64> %d, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl, iXLen 0)
+ ret <vscale x 4 x i64> %2
+}
+
+define <vscale x 4 x i64> @vwmaccu_vv_propagate(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, <vscale x 4 x i64> %d, iXLen %vl) {
+; NOVLOPT-LABEL: vwmaccu_vv_propagate:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: vmv2r.v v16, v8
+; NOVLOPT-NEXT: vsetvli a1, zero, e16, m1, tu, ma
+; NOVLOPT-NEXT: vwmaccu.vv v16, v10, v11
+; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
+; NOVLOPT-NEXT: vwmaccu.vv v12, v8, v16
+; NOVLOPT-NEXT: vmv4r.v v8, v12
; NOVLOPT-NEXT: ret
;
-; VLOPT-LABEL: vwmaccu_vv:
+; VLOPT-LABEL: vwmaccu_vv_propagate:
; VLOPT: # %bb.0:
+; VLOPT-NEXT: vmv2r.v v16, v8
; VLOPT-NEXT: vsetvli zero, a0, e16, m1, tu, ma
-; VLOPT-NEXT: vwmaccu.vv v8, v10, v11
-; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; VLOPT-NEXT: vadd.vv v8, v8, v8
+; VLOPT-NEXT: vwmaccu.vv v16, v10, v11
+; VLOPT-NEXT: vsetvli zero, zero, e32, m2, tu, ma
+; VLOPT-NEXT: vwmaccu.vv v12, v8, v16
+; VLOPT-NEXT: vmv4r.v v8, v12
; VLOPT-NEXT: ret
%1 = call <vscale x 4 x i32> @llvm.riscv.vwmaccu.nxv4i32.nxv4i16(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, iXLen -1, iXLen 0)
- %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl)
- ret <vscale x 4 x i32> %2
+ %2 = call <vscale x 4 x i64> @llvm.riscv.vwmaccu.nxv4i64.nxv4i32(<vscale x 4 x i64> %d, <vscale x 4 x i32> %a, <vscale x 4 x i32> %1, iXLen %vl, iXLen 0)
+ ret <vscale x 4 x i64> %2
}
-define <vscale x 4 x i32> @vwmaccu_vx(<vscale x 4 x i32> %a, i16 %b, <vscale x 4 x i16> %c, iXLen %vl) {
-; NOVLOPT-LABEL: vwmaccu_vx:
+define <vscale x 4 x i32> @vwmaccu_vx_nopropagate(<vscale x 4 x i32> %a, i16 %b, <vscale x 4 x i16> %c, <vscale x 4 x i16> %d, i16 %e, iXLen %vl) {
+; NOVLOPT-LABEL: vwmaccu_vx_nopropagate:
; NOVLOPT: # %bb.0:
-; NOVLOPT-NEXT: vsetvli a2, zero, e16, m1, tu, ma
+; NOVLOPT-NEXT: vsetvli a3, zero, e16, m1, tu, ma
; NOVLOPT-NEXT: vwmaccu.vx v8, a0, v10
-; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; NOVLOPT-NEXT: vadd.vv v8, v8, v8
+; NOVLOPT-NEXT: vsetvli zero, a2, e16, m1, tu, ma
+; NOVLOPT-NEXT: vwmaccu.vx v8, a1, v11
; NOVLOPT-NEXT: ret
;
-; VLOPT-LABEL: vwmaccu_vx:
+; VLOPT-LABEL: vwmaccu_vx_nopropagate:
; VLOPT: # %bb.0:
-; VLOPT-NEXT: vsetvli zero, a1, e16, m1, tu, ma
+; VLOPT-NEXT: vsetvli a3, zero, e16, m1, tu, ma
; VLOPT-NEXT: vwmaccu.vx v8, a0, v10
-; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; VLOPT-NEXT: vadd.vv v8, v8, v8
+; VLOPT-NEXT: vsetvli zero, a2, e16, m1, tu, ma
+; VLOPT-NEXT: vwmaccu.vx v8, a1, v11
; VLOPT-NEXT: ret
%1 = call <vscale x 4 x i32> @llvm.riscv.vwmaccu.nxv4i32.i16(<vscale x 4 x i32> %a, i16 %b, <vscale x 4 x i16> %c, iXLen -1, iXLen 0)
- %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl)
+ %2 = call <vscale x 4 x i32> @llvm.riscv.vwmaccu.nxv4i32.i16(<vscale x 4 x i32> %1, i16 %e, <vscale x 4 x i16> %d, iXLen %vl, iXLen 0)
ret <vscale x 4 x i32> %2
}
+define <vscale x 4 x i64> @vwmaccu_vx_propagate(<vscale x 4 x i32> %a, i16 %b, <vscale x 4 x i16> %c, <vscale x 4 x i64> %d, i32 %e, iXLen %vl) {
+; NOVLOPT-LABEL: vwmaccu_vx_propagate:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: vsetvli a3, zero, e16, m1, tu, ma
+; NOVLOPT-NEXT: vwmaccu.vx v8, a0, v10
+; NOVLOPT-NEXT: vsetvli zero, a2, e32, m2, tu, ma
+; NOVLOPT-NEXT: vwmaccu.vx v12, a1, v8
+; NOVLOPT-NEXT: vmv4r.v v8, v12
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vwmaccu_vx_propagate:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: vsetvli zero, a2, e16, m1, tu, ma
+; VLOPT-NEXT: vwmaccu.vx v8, a0, v10
+; VLOPT-NEXT: vsetvli zero, zero, e32, m2, tu, ma
+; VLOPT-NEXT: vwmaccu.vx v12, a1, v8
+; VLOPT-NEXT: vmv4r.v v8, v12
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x i32> @llvm.riscv.vwmaccu.nxv4i32.i16(<vscale x 4 x i32> %a, i16 %b, <vscale x 4 x i16> %c, iXLen -1, iXLen 0)
+ %2 = call <vscale x 4 x i64> @llvm.riscv.vwmaccu.nxv4i64.i32(<vscale x 4 x i64> %d, i32 %e, <vscale x 4 x i32> %1, iXLen %vl, iXLen 0)
+ ret <vscale x 4 x i64> %2
+}
+
define <vscale x 4 x i32> @vwmaccsu_vv(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, iXLen %vl) {
; NOVLOPT-LABEL: vwmaccsu_vv:
; NOVLOPT: # %bb.0:
>From 27379bb153377b4a69e9ef8c951012e318b28146 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Tue, 15 Oct 2024 08:55:54 -0700
Subject: [PATCH 3/6] fixup! revise tests
---
llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll | 102 +++++++++----------
1 file changed, 51 insertions(+), 51 deletions(-)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
index bbaf70eae10fa7..e4d90c242962f4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
@@ -1122,8 +1122,8 @@ define <vscale x 4 x i32> @vrem_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
ret <vscale x 4 x i32> %2
}
-define <vscale x 4 x i32> @vwmacc_vv(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, iXLen %vl) {
-; NOVLOPT-LABEL: vwmacc_vv:
+define <vscale x 4 x i32> @vwmacc_vv_propagate(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vwmacc_vv_propagate:
; NOVLOPT: # %bb.0:
; NOVLOPT-NEXT: vsetvli a1, zero, e16, m1, tu, ma
; NOVLOPT-NEXT: vwmacc.vv v8, v10, v11
@@ -1131,7 +1131,7 @@ define <vscale x 4 x i32> @vwmacc_vv(<vscale x 4 x i32> %a, <vscale x 4 x i16> %
; NOVLOPT-NEXT: vadd.vv v8, v8, v8
; NOVLOPT-NEXT: ret
;
-; VLOPT-LABEL: vwmacc_vv:
+; VLOPT-LABEL: vwmacc_vv_propagate:
; VLOPT: # %bb.0:
; VLOPT-NEXT: vsetvli zero, a0, e16, m1, tu, ma
; VLOPT-NEXT: vwmacc.vv v8, v10, v11
@@ -1143,8 +1143,33 @@ define <vscale x 4 x i32> @vwmacc_vv(<vscale x 4 x i32> %a, <vscale x 4 x i16> %
ret <vscale x 4 x i32> %2
}
-define <vscale x 4 x i32> @vwmacc_vx(<vscale x 4 x i32> %a, i16 %b, <vscale x 4 x i16> %c, iXLen %vl) {
-; NOVLOPT-LABEL: vwmacc_vx:
+define <vscale x 4 x i64> @vwmacc_vv_nopropagate(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, <vscale x 4 x i64> %d, iXLen %vl) {
+; NOVLOPT-LABEL: vwmacc_vv_nopropagate:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: vmv2r.v v16, v8
+; NOVLOPT-NEXT: vsetvli a1, zero, e16, m1, tu, ma
+; NOVLOPT-NEXT: vwmacc.vv v16, v10, v11
+; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
+; NOVLOPT-NEXT: vwmacc.vv v12, v16, v8
+; NOVLOPT-NEXT: vmv4r.v v8, v12
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: vwmacc_vv_nopropagate:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: vmv2r.v v16, v8
+; VLOPT-NEXT: vsetvli a1, zero, e16, m1, tu, ma
+; VLOPT-NEXT: vwmacc.vv v16, v10, v11
+; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
+; VLOPT-NEXT: vwmacc.vv v12, v16, v8
+; VLOPT-NEXT: vmv4r.v v8, v12
+; VLOPT-NEXT: ret
+ %1 = call <vscale x 4 x i32> @llvm.riscv.vwmacc.nxv4i32.nxv4i16(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, iXLen -1, iXLen 0)
+ %2 = call <vscale x 4 x i64> @llvm.riscv.vwmacc.nxv4i64.nxv4i32(<vscale x 4 x i64> %d, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl, iXLen 0)
+ ret <vscale x 4 x i64> %2
+}
+
+define <vscale x 4 x i32> @vwmacc_vx_propagate(<vscale x 4 x i32> %a, i16 %b, <vscale x 4 x i16> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vwmacc_vx_propagate:
; NOVLOPT: # %bb.0:
; NOVLOPT-NEXT: vsetvli a2, zero, e16, m1, tu, ma
; NOVLOPT-NEXT: vwmacc.vx v8, a0, v10
@@ -1152,7 +1177,7 @@ define <vscale x 4 x i32> @vwmacc_vx(<vscale x 4 x i32> %a, i16 %b, <vscale x 4
; NOVLOPT-NEXT: vadd.vv v8, v8, v8
; NOVLOPT-NEXT: ret
;
-; VLOPT-LABEL: vwmacc_vx:
+; VLOPT-LABEL: vwmacc_vx_propagate:
; VLOPT: # %bb.0:
; VLOPT-NEXT: vsetvli zero, a1, e16, m1, tu, ma
; VLOPT-NEXT: vwmacc.vx v8, a0, v10
@@ -1164,33 +1189,29 @@ define <vscale x 4 x i32> @vwmacc_vx(<vscale x 4 x i32> %a, i16 %b, <vscale x 4
ret <vscale x 4 x i32> %2
}
-define <vscale x 4 x i64> @vwmaccu_vv_nopropagate(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, <vscale x 4 x i64> %d, iXLen %vl) {
-; NOVLOPT-LABEL: vwmaccu_vv_nopropagate:
+define <vscale x 4 x i32> @vwmacc_vx_nopropagate(<vscale x 4 x i32> %a, i16 %b, <vscale x 4 x i16> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vwmacc_vx_nopropagate:
; NOVLOPT: # %bb.0:
-; NOVLOPT-NEXT: vmv2r.v v16, v8
-; NOVLOPT-NEXT: vsetvli a1, zero, e16, m1, tu, ma
-; NOVLOPT-NEXT: vwmaccu.vv v16, v10, v11
-; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
-; NOVLOPT-NEXT: vwmaccu.vv v12, v16, v8
-; NOVLOPT-NEXT: vmv4r.v v8, v12
+; NOVLOPT-NEXT: vsetvli a2, zero, e16, m1, tu, ma
+; NOVLOPT-NEXT: vwmacc.vx v8, a0, v10
+; NOVLOPT-NEXT: vsetvli zero, a1, e16, m1, tu, ma
+; NOVLOPT-NEXT: vwmacc.vx v8, a0, v10
; NOVLOPT-NEXT: ret
;
-; VLOPT-LABEL: vwmaccu_vv_nopropagate:
+; VLOPT-LABEL: vwmacc_vx_nopropagate:
; VLOPT: # %bb.0:
-; VLOPT-NEXT: vmv2r.v v16, v8
-; VLOPT-NEXT: vsetvli a1, zero, e16, m1, tu, ma
-; VLOPT-NEXT: vwmaccu.vv v16, v10, v11
-; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
-; VLOPT-NEXT: vwmaccu.vv v12, v16, v8
-; VLOPT-NEXT: vmv4r.v v8, v12
+; VLOPT-NEXT: vsetvli a2, zero, e16, m1, tu, ma
+; VLOPT-NEXT: vwmacc.vx v8, a0, v10
+; VLOPT-NEXT: vsetvli zero, a1, e16, m1, tu, ma
+; VLOPT-NEXT: vwmacc.vx v8, a0, v10
; VLOPT-NEXT: ret
- %1 = call <vscale x 4 x i32> @llvm.riscv.vwmaccu.nxv4i32.nxv4i16(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, iXLen -1, iXLen 0)
- %2 = call <vscale x 4 x i64> @llvm.riscv.vwmaccu.nxv4i64.nxv4i32(<vscale x 4 x i64> %d, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl, iXLen 0)
- ret <vscale x 4 x i64> %2
+ %1 = call <vscale x 4 x i32> @llvm.riscv.vwmacc.nxv4i32.i16(<vscale x 4 x i32> %a, i16 %b, <vscale x 4 x i16> %c, iXLen -1, iXLen 0)
+ %2 = call <vscale x 4 x i32> @llvm.riscv.vwmacc.nxv4i32.i16(<vscale x 4 x i32> %1, i16 %b, <vscale x 4 x i16> %c, iXLen %vl, iXLen 0)
+ ret <vscale x 4 x i32> %2
}
-define <vscale x 4 x i64> @vwmaccu_vv_propagate(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, <vscale x 4 x i64> %d, iXLen %vl) {
-; NOVLOPT-LABEL: vwmaccu_vv_propagate:
+define <vscale x 4 x i64> @vwmaccu_vv(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, <vscale x 4 x i64> %d, iXLen %vl) {
+; NOVLOPT-LABEL: vwmaccu_vv:
; NOVLOPT: # %bb.0:
; NOVLOPT-NEXT: vmv2r.v v16, v8
; NOVLOPT-NEXT: vsetvli a1, zero, e16, m1, tu, ma
@@ -1200,7 +1221,7 @@ define <vscale x 4 x i64> @vwmaccu_vv_propagate(<vscale x 4 x i32> %a, <vscale x
; NOVLOPT-NEXT: vmv4r.v v8, v12
; NOVLOPT-NEXT: ret
;
-; VLOPT-LABEL: vwmaccu_vv_propagate:
+; VLOPT-LABEL: vwmaccu_vv:
; VLOPT: # %bb.0:
; VLOPT-NEXT: vmv2r.v v16, v8
; VLOPT-NEXT: vsetvli zero, a0, e16, m1, tu, ma
@@ -1214,29 +1235,8 @@ define <vscale x 4 x i64> @vwmaccu_vv_propagate(<vscale x 4 x i32> %a, <vscale x
ret <vscale x 4 x i64> %2
}
-define <vscale x 4 x i32> @vwmaccu_vx_nopropagate(<vscale x 4 x i32> %a, i16 %b, <vscale x 4 x i16> %c, <vscale x 4 x i16> %d, i16 %e, iXLen %vl) {
-; NOVLOPT-LABEL: vwmaccu_vx_nopropagate:
-; NOVLOPT: # %bb.0:
-; NOVLOPT-NEXT: vsetvli a3, zero, e16, m1, tu, ma
-; NOVLOPT-NEXT: vwmaccu.vx v8, a0, v10
-; NOVLOPT-NEXT: vsetvli zero, a2, e16, m1, tu, ma
-; NOVLOPT-NEXT: vwmaccu.vx v8, a1, v11
-; NOVLOPT-NEXT: ret
-;
-; VLOPT-LABEL: vwmaccu_vx_nopropagate:
-; VLOPT: # %bb.0:
-; VLOPT-NEXT: vsetvli a3, zero, e16, m1, tu, ma
-; VLOPT-NEXT: vwmaccu.vx v8, a0, v10
-; VLOPT-NEXT: vsetvli zero, a2, e16, m1, tu, ma
-; VLOPT-NEXT: vwmaccu.vx v8, a1, v11
-; VLOPT-NEXT: ret
- %1 = call <vscale x 4 x i32> @llvm.riscv.vwmaccu.nxv4i32.i16(<vscale x 4 x i32> %a, i16 %b, <vscale x 4 x i16> %c, iXLen -1, iXLen 0)
- %2 = call <vscale x 4 x i32> @llvm.riscv.vwmaccu.nxv4i32.i16(<vscale x 4 x i32> %1, i16 %e, <vscale x 4 x i16> %d, iXLen %vl, iXLen 0)
- ret <vscale x 4 x i32> %2
-}
-
-define <vscale x 4 x i64> @vwmaccu_vx_propagate(<vscale x 4 x i32> %a, i16 %b, <vscale x 4 x i16> %c, <vscale x 4 x i64> %d, i32 %e, iXLen %vl) {
-; NOVLOPT-LABEL: vwmaccu_vx_propagate:
+define <vscale x 4 x i64> @vwmaccu_vx(<vscale x 4 x i32> %a, i16 %b, <vscale x 4 x i16> %c, <vscale x 4 x i64> %d, i32 %e, iXLen %vl) {
+; NOVLOPT-LABEL: vwmaccu_vx:
; NOVLOPT: # %bb.0:
; NOVLOPT-NEXT: vsetvli a3, zero, e16, m1, tu, ma
; NOVLOPT-NEXT: vwmaccu.vx v8, a0, v10
@@ -1245,7 +1245,7 @@ define <vscale x 4 x i64> @vwmaccu_vx_propagate(<vscale x 4 x i32> %a, i16 %b, <
; NOVLOPT-NEXT: vmv4r.v v8, v12
; NOVLOPT-NEXT: ret
;
-; VLOPT-LABEL: vwmaccu_vx_propagate:
+; VLOPT-LABEL: vwmaccu_vx:
; VLOPT: # %bb.0:
; VLOPT-NEXT: vsetvli zero, a2, e16, m1, tu, ma
; VLOPT-NEXT: vwmaccu.vx v8, a0, v10
>From ed89c46aa5b80a09f5d64b6e2dbd4ea1306b57e4 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Tue, 15 Oct 2024 11:32:21 -0700
Subject: [PATCH 4/6] fixup! fix nopropagate bug
---
llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp | 6 +-
llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll | 68 ++++----------------
2 files changed, 15 insertions(+), 59 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index fbfbc724dc1061..23eedf923f8158 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -423,7 +423,8 @@ static OperandInfo getOperandInfo(const MachineInstr &MI,
// Vector Widening Integer Multiply-Add Instructions
// Destination EEW=2*SEW and EMUL=2*LMUL. Source EEW=SEW and EMUL=LMUL.
// A SEW-bit*SEW-bit multiply of the sources forms a 2*SEW-bit value, which
- // is then added to the 2*SEW-bit Dest.
+ // is then added to the 2*SEW-bit Dest. These instructions never have a
+ // passthru operand.
case RISCV::VWMACCU_VV:
case RISCV::VWMACCU_VX:
case RISCV::VWMACC_VV:
@@ -431,8 +432,7 @@ static OperandInfo getOperandInfo(const MachineInstr &MI,
case RISCV::VWMACCSU_VV:
case RISCV::VWMACCSU_VX:
case RISCV::VWMACCUS_VX: {
- bool IsOp1 = HasPassthru ? MO.getOperandNo() == 2 : MO.getOperandNo() == 1;
- bool TwoTimes = IsMODef || IsOp1;
+ bool TwoTimes = IsMODef || MO.getOperandNo() == 1;
unsigned Log2EEW = TwoTimes ? MILog2SEW + 1 : MILog2SEW;
RISCVII::VLMUL EMUL =
TwoTimes ? RISCVVType::twoTimesVLMUL(MIVLMul) : MIVLMul;
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
index e4d90c242962f4..25cbb95aab6da5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
@@ -1122,54 +1122,31 @@ define <vscale x 4 x i32> @vrem_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
ret <vscale x 4 x i32> %2
}
-define <vscale x 4 x i32> @vwmacc_vv_propagate(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, iXLen %vl) {
-; NOVLOPT-LABEL: vwmacc_vv_propagate:
+define <vscale x 4 x i64> @vwmacc_vv(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, <vscale x 4 x i64> %d, iXLen %vl) {
+; NOVLOPT-LABEL: vwmacc_vv:
; NOVLOPT: # %bb.0:
; NOVLOPT-NEXT: vsetvli a1, zero, e16, m1, tu, ma
; NOVLOPT-NEXT: vwmacc.vv v8, v10, v11
-; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
-; NOVLOPT-NEXT: vadd.vv v8, v8, v8
-; NOVLOPT-NEXT: ret
-;
-; VLOPT-LABEL: vwmacc_vv_propagate:
-; VLOPT: # %bb.0:
-; VLOPT-NEXT: vsetvli zero, a0, e16, m1, tu, ma
-; VLOPT-NEXT: vwmacc.vv v8, v10, v11
-; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; VLOPT-NEXT: vadd.vv v8, v8, v8
-; VLOPT-NEXT: ret
- %1 = call <vscale x 4 x i32> @llvm.riscv.vwmacc.nxv4i32.nxv4i16(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, iXLen -1, iXLen 0)
- %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl)
- ret <vscale x 4 x i32> %2
-}
-
-define <vscale x 4 x i64> @vwmacc_vv_nopropagate(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, <vscale x 4 x i64> %d, iXLen %vl) {
-; NOVLOPT-LABEL: vwmacc_vv_nopropagate:
-; NOVLOPT: # %bb.0:
-; NOVLOPT-NEXT: vmv2r.v v16, v8
-; NOVLOPT-NEXT: vsetvli a1, zero, e16, m1, tu, ma
-; NOVLOPT-NEXT: vwmacc.vv v16, v10, v11
; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
-; NOVLOPT-NEXT: vwmacc.vv v12, v16, v8
+; NOVLOPT-NEXT: vwmacc.vv v12, v8, v8
; NOVLOPT-NEXT: vmv4r.v v8, v12
; NOVLOPT-NEXT: ret
;
-; VLOPT-LABEL: vwmacc_vv_nopropagate:
+; VLOPT-LABEL: vwmacc_vv:
; VLOPT: # %bb.0:
-; VLOPT-NEXT: vmv2r.v v16, v8
-; VLOPT-NEXT: vsetvli a1, zero, e16, m1, tu, ma
-; VLOPT-NEXT: vwmacc.vv v16, v10, v11
-; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
-; VLOPT-NEXT: vwmacc.vv v12, v16, v8
+; VLOPT-NEXT: vsetvli zero, a0, e16, m1, tu, ma
+; VLOPT-NEXT: vwmacc.vv v8, v10, v11
+; VLOPT-NEXT: vsetvli zero, zero, e32, m2, tu, ma
+; VLOPT-NEXT: vwmacc.vv v12, v8, v8
; VLOPT-NEXT: vmv4r.v v8, v12
; VLOPT-NEXT: ret
%1 = call <vscale x 4 x i32> @llvm.riscv.vwmacc.nxv4i32.nxv4i16(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, iXLen -1, iXLen 0)
- %2 = call <vscale x 4 x i64> @llvm.riscv.vwmacc.nxv4i64.nxv4i32(<vscale x 4 x i64> %d, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl, iXLen 0)
+ %2 = call <vscale x 4 x i64> @llvm.riscv.vwmacc.nxv4i32.nxv4i16(<vscale x 4 x i64> %d, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl, iXLen 0)
ret <vscale x 4 x i64> %2
}
-define <vscale x 4 x i32> @vwmacc_vx_propagate(<vscale x 4 x i32> %a, i16 %b, <vscale x 4 x i16> %c, iXLen %vl) {
-; NOVLOPT-LABEL: vwmacc_vx_propagate:
+define <vscale x 4 x i32> @vwmacc_vx(<vscale x 4 x i32> %a, i16 %b, <vscale x 4 x i16> %c, iXLen %vl) {
+; NOVLOPT-LABEL: vwmacc_vx:
; NOVLOPT: # %bb.0:
; NOVLOPT-NEXT: vsetvli a2, zero, e16, m1, tu, ma
; NOVLOPT-NEXT: vwmacc.vx v8, a0, v10
@@ -1177,7 +1154,7 @@ define <vscale x 4 x i32> @vwmacc_vx_propagate(<vscale x 4 x i32> %a, i16 %b, <v
; NOVLOPT-NEXT: vadd.vv v8, v8, v8
; NOVLOPT-NEXT: ret
;
-; VLOPT-LABEL: vwmacc_vx_propagate:
+; VLOPT-LABEL: vwmacc_vx:
; VLOPT: # %bb.0:
; VLOPT-NEXT: vsetvli zero, a1, e16, m1, tu, ma
; VLOPT-NEXT: vwmacc.vx v8, a0, v10
@@ -1189,27 +1166,6 @@ define <vscale x 4 x i32> @vwmacc_vx_propagate(<vscale x 4 x i32> %a, i16 %b, <v
ret <vscale x 4 x i32> %2
}
-define <vscale x 4 x i32> @vwmacc_vx_nopropagate(<vscale x 4 x i32> %a, i16 %b, <vscale x 4 x i16> %c, iXLen %vl) {
-; NOVLOPT-LABEL: vwmacc_vx_nopropagate:
-; NOVLOPT: # %bb.0:
-; NOVLOPT-NEXT: vsetvli a2, zero, e16, m1, tu, ma
-; NOVLOPT-NEXT: vwmacc.vx v8, a0, v10
-; NOVLOPT-NEXT: vsetvli zero, a1, e16, m1, tu, ma
-; NOVLOPT-NEXT: vwmacc.vx v8, a0, v10
-; NOVLOPT-NEXT: ret
-;
-; VLOPT-LABEL: vwmacc_vx_nopropagate:
-; VLOPT: # %bb.0:
-; VLOPT-NEXT: vsetvli a2, zero, e16, m1, tu, ma
-; VLOPT-NEXT: vwmacc.vx v8, a0, v10
-; VLOPT-NEXT: vsetvli zero, a1, e16, m1, tu, ma
-; VLOPT-NEXT: vwmacc.vx v8, a0, v10
-; VLOPT-NEXT: ret
- %1 = call <vscale x 4 x i32> @llvm.riscv.vwmacc.nxv4i32.i16(<vscale x 4 x i32> %a, i16 %b, <vscale x 4 x i16> %c, iXLen -1, iXLen 0)
- %2 = call <vscale x 4 x i32> @llvm.riscv.vwmacc.nxv4i32.i16(<vscale x 4 x i32> %1, i16 %b, <vscale x 4 x i16> %c, iXLen %vl, iXLen 0)
- ret <vscale x 4 x i32> %2
-}
-
define <vscale x 4 x i64> @vwmaccu_vv(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, <vscale x 4 x i64> %d, iXLen %vl) {
; NOVLOPT-LABEL: vwmaccu_vv:
; NOVLOPT: # %bb.0:
>From 81ce6dabafc5940db6053606061245968a794dc8 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Tue, 15 Oct 2024 11:36:00 -0700
Subject: [PATCH 5/6] fixup! add test that we bail out on tied def
---
llvm/test/CodeGen/RISCV/rvv/vl-opt.ll | 13 +++++++++++++
1 file changed, 13 insertions(+)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll
index 0b3e67ec895566..677f01de44143c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll
@@ -111,6 +111,19 @@ define <vscale x 4 x i32> @different_imm_vl_with_tu(<vscale x 4 x i32> %passthru
ret <vscale x 4 x i32> %w
}
+define <vscale x 4 x i32> @dont_optimize_tied_def(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, iXLen %vl) {
+; CHECK-LABEL: dont_optimize_tied_def:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, tu, ma
+; CHECK-NEXT: vwmacc.vv v8, v10, v11
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma
+; CHECK-NEXT: vwmacc.vv v8, v10, v11
+; CHECK-NEXT: ret
+ %1 = call <vscale x 4 x i32> @llvm.riscv.vwmacc.nxv4i32.nxv4i16(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, iXLen -1, iXLen 0)
+ %2 = call <vscale x 4 x i32> @llvm.riscv.vwmacc.nxv4i32.nxv4i16(<vscale x 4 x i32> %1, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, iXLen %vl, iXLen 0)
+ ret <vscale x 4 x i32> %2
+}
+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; NOVLOPT: {{.*}}
; VLOPT: {{.*}}
>From e456af859f485bd3d938aeb426682cb8b6946170 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Tue, 15 Oct 2024 15:42:24 -0700
Subject: [PATCH 6/6] fixup! relocate to widening section
---
llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp | 31 +++++++++++-----------
1 file changed, 16 insertions(+), 15 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index 23eedf923f8158..8868e3b7395adb 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -404,7 +404,19 @@ static OperandInfo getOperandInfo(const MachineInstr &MI,
case RISCV::VWMULSU_VV:
case RISCV::VWMULSU_VX:
case RISCV::VWMULU_VV:
- case RISCV::VWMULU_VX: {
+ case RISCV::VWMULU_VX:
+ // Vector Widening Integer Multiply-Add Instructions
+ // Destination EEW=2*SEW and EMUL=2*LMUL. Source EEW=SEW and EMUL=LMUL.
+ // A SEW-bit*SEW-bit multiply of the sources forms a 2*SEW-bit value, which
+ // is then added to the 2*SEW-bit Dest. These instructions never have a
+ // passthru operand.
+ case RISCV::VWMACCU_VV:
+ case RISCV::VWMACCU_VX:
+ case RISCV::VWMACC_VV:
+ case RISCV::VWMACC_VX:
+ case RISCV::VWMACCSU_VV:
+ case RISCV::VWMACCSU_VX:
+ case RISCV::VWMACCUS_VX: {
unsigned Log2EEW = IsMODef ? MILog2SEW + 1 : MILog2SEW;
RISCVII::VLMUL EMUL =
IsMODef ? RISCVVType::twoTimesVLMUL(MIVLMul) : MIVLMul;
@@ -419,20 +431,9 @@ static OperandInfo getOperandInfo(const MachineInstr &MI,
case RISCV::VWADD_WV:
case RISCV::VWADD_WX:
case RISCV::VWSUB_WV:
- case RISCV::VWSUB_WX:
- // Vector Widening Integer Multiply-Add Instructions
- // Destination EEW=2*SEW and EMUL=2*LMUL. Source EEW=SEW and EMUL=LMUL.
- // A SEW-bit*SEW-bit multiply of the sources forms a 2*SEW-bit value, which
- // is then added to the 2*SEW-bit Dest. These instructions never have a
- // passthru operand.
- case RISCV::VWMACCU_VV:
- case RISCV::VWMACCU_VX:
- case RISCV::VWMACC_VV:
- case RISCV::VWMACC_VX:
- case RISCV::VWMACCSU_VV:
- case RISCV::VWMACCSU_VX:
- case RISCV::VWMACCUS_VX: {
- bool TwoTimes = IsMODef || MO.getOperandNo() == 1;
+ case RISCV::VWSUB_WX: {
+ bool IsOp1 = HasPassthru ? MO.getOperandNo() == 2 : MO.getOperandNo() == 1;
+ bool TwoTimes = IsMODef || IsOp1;
unsigned Log2EEW = TwoTimes ? MILog2SEW + 1 : MILog2SEW;
RISCVII::VLMUL EMUL =
TwoTimes ? RISCVVType::twoTimesVLMUL(MIVLMul) : MIVLMul;
More information about the llvm-commits
mailing list