[llvm] [RISCV][VLOPT] Add getOperandInfo for saturating signed multiply (PR #120351)

Mon Dec 30 05:58:50 PST 2024

https://github.com/michaelmaitland updated https://github.com/llvm/llvm-project/pull/120351

>From 92a73e85fb69ebba3b7ff538261f3adaf48fa7b0 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Tue, 17 Dec 2024 19:33:23 -0800
Subject: [PATCH 1/3] [RISCV][VLOPT] Add getOperandInfo for saturating signed
 multiply

These instructions are covered by the existing tests. We do not add them to
isSupportedInstr because they have a tied def which means they will never
get to that point in isCandidate.
---
 llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index e8719d02cfa0aa..9ee6ec2498d309 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -378,6 +378,11 @@ static OperandInfo getOperandInfo(const MachineOperand &MO,
   case RISCV::VASUBU_VX:
   case RISCV::VASUB_VV:
   case RISCV::VASUB_VX:
+  // Vector Single-Width Fractional Multiply with Rounding and Saturation
+  // EEW=SEW. EMUL=LMUL. The instruction produces 2*SEW product internally but
+  // saturates to fit into SEW bits.
+  case RISCV::VSMUL_VV:
+  case RISCV::VSMUL_VX:
   // Vector Single-Width Scaling Shift Instructions
   // EEW=SEW. EMUL=LMUL.
   case RISCV::VSSRL_VI:

>From fc5c72cefd4fef7b42290b2b54e592fb1cfa5be3 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Tue, 17 Dec 2024 21:04:52 -0800
Subject: [PATCH 2/3] fixup! add vsmul to isSupported

---
 llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp   |  5 ++-
 llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll | 45 ++++++++++++++++++++
 2 files changed, 49 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index 9ee6ec2498d309..3692bbc9dfe07b 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -745,6 +745,9 @@ static bool isSupportedInstr(const MachineInstr &MI) {
   case RISCV::VMV_V_I:
   case RISCV::VMV_V_X:
   case RISCV::VMV_V_V:
+  // Vector Single-Width Fractional Multiply with Rounding and Saturation
+  case RISCV::VSMUL_VV:
+  case RISCV::VSMUL_VX:
 
   // Vector Crypto
   case RISCV::VWSLL_VI:
@@ -840,7 +843,7 @@ bool RISCVVLOptimizer::isCandidate(const MachineInstr &MI) const {
   const MCInstrDesc &Desc = MI.getDesc();
   if (!RISCVII::hasVLOp(Desc.TSFlags) || !RISCVII::hasSEWOp(Desc.TSFlags))
     return false;
-  if (MI.getNumDefs() != 1)
+  if (MI.getNumExplicitDefs() != 1)
     return false;
 
   // If we're not using VLMAX, then we need to be careful whether we are using
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
index 55a50a15c788c2..bec2c3f4ca2a3d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
@@ -2925,3 +2925,48 @@ define <vscale x 4 x i32> @vid.v(<vscale x 4 x i32> %c, iXLen %vl) {
   %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %c, iXLen %vl)
   ret <vscale x 4 x i32> %2
 }
+
+define <vscale x 4 x i32> @vsmul_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) {
+; NOVLOPT-LABEL: vsmul_vv:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    csrwi vxrm, 0
+; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vsmul.vv v8, v8, v10
+; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vadd.vv v8, v8, v10
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vsmul_vv:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    csrwi vxrm, 0
+; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; VLOPT-NEXT:    vsmul.vv v8, v8, v10
+; VLOPT-NEXT:    vadd.vv v8, v8, v10
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i32> @llvm.riscv.vsmul.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen 0, iXLen -1)
+  %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl)
+  ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vsmul_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
+; NOVLOPT-LABEL: vsmul_vx:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    csrwi vxrm, 0
+; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vsmul.vx v10, v8, a0
+; NOVLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vadd.vv v8, v10, v8
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: vsmul_vx:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    csrwi vxrm, 0
+; VLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; VLOPT-NEXT:    vsmul.vx v10, v8, a0
+; VLOPT-NEXT:    vadd.vv v8, v10, v8
+; VLOPT-NEXT:    ret
+  %1 = call <vscale x 4 x i32> @llvm.riscv.vsmul.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 %b, iXLen 0, iXLen -1)
+  %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
+  ret <vscale x 4 x i32> %2
+}
+

>From 34616c97b228072c696ee71a4db660a5af7e6e16 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Mon, 30 Dec 2024 05:58:16 -0800
Subject: [PATCH 3/3] Revert "fixup! add vsmul to isSupported"

This reverts commit fc5c72cefd4fef7b42290b2b54e592fb1cfa5be3.
---
 llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp   |  5 +--
 llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll | 45 --------------------
 2 files changed, 1 insertion(+), 49 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index 3692bbc9dfe07b..9ee6ec2498d309 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -745,9 +745,6 @@ static bool isSupportedInstr(const MachineInstr &MI) {
   case RISCV::VMV_V_I:
   case RISCV::VMV_V_X:
   case RISCV::VMV_V_V:
-  // Vector Single-Width Fractional Multiply with Rounding and Saturation
-  case RISCV::VSMUL_VV:
-  case RISCV::VSMUL_VX:
 
   // Vector Crypto
   case RISCV::VWSLL_VI:
@@ -843,7 +840,7 @@ bool RISCVVLOptimizer::isCandidate(const MachineInstr &MI) const {
   const MCInstrDesc &Desc = MI.getDesc();
   if (!RISCVII::hasVLOp(Desc.TSFlags) || !RISCVII::hasSEWOp(Desc.TSFlags))
     return false;
-  if (MI.getNumExplicitDefs() != 1)
+  if (MI.getNumDefs() != 1)
     return false;
 
   // If we're not using VLMAX, then we need to be careful whether we are using
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
index bec2c3f4ca2a3d..55a50a15c788c2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
@@ -2925,48 +2925,3 @@ define <vscale x 4 x i32> @vid.v(<vscale x 4 x i32> %c, iXLen %vl) {
   %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %c, iXLen %vl)
   ret <vscale x 4 x i32> %2
 }
-
-define <vscale x 4 x i32> @vsmul_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) {
-; NOVLOPT-LABEL: vsmul_vv:
-; NOVLOPT:       # %bb.0:
-; NOVLOPT-NEXT:    csrwi vxrm, 0
-; NOVLOPT-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; NOVLOPT-NEXT:    vsmul.vv v8, v8, v10
-; NOVLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; NOVLOPT-NEXT:    vadd.vv v8, v8, v10
-; NOVLOPT-NEXT:    ret
-;
-; VLOPT-LABEL: vsmul_vv:
-; VLOPT:       # %bb.0:
-; VLOPT-NEXT:    csrwi vxrm, 0
-; VLOPT-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
-; VLOPT-NEXT:    vsmul.vv v8, v8, v10
-; VLOPT-NEXT:    vadd.vv v8, v8, v10
-; VLOPT-NEXT:    ret
-  %1 = call <vscale x 4 x i32> @llvm.riscv.vsmul.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen 0, iXLen -1)
-  %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl)
-  ret <vscale x 4 x i32> %2
-}
-
-define <vscale x 4 x i32> @vsmul_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
-; NOVLOPT-LABEL: vsmul_vx:
-; NOVLOPT:       # %bb.0:
-; NOVLOPT-NEXT:    csrwi vxrm, 0
-; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
-; NOVLOPT-NEXT:    vsmul.vx v10, v8, a0
-; NOVLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; NOVLOPT-NEXT:    vadd.vv v8, v10, v8
-; NOVLOPT-NEXT:    ret
-;
-; VLOPT-LABEL: vsmul_vx:
-; VLOPT:       # %bb.0:
-; VLOPT-NEXT:    csrwi vxrm, 0
-; VLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
-; VLOPT-NEXT:    vsmul.vx v10, v8, a0
-; VLOPT-NEXT:    vadd.vv v8, v10, v8
-; VLOPT-NEXT:    ret
-  %1 = call <vscale x 4 x i32> @llvm.riscv.vsmul.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 %b, iXLen 0, iXLen -1)
-  %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
-  ret <vscale x 4 x i32> %2
-}
-