[llvm] [DAG][RISCV] Use vp.<binop> when widening illegal types for binops which can trap (PR #105214)
Philip Reames via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 20 13:50:49 PDT 2024
https://github.com/preames updated https://github.com/llvm/llvm-project/pull/105214
>From 073c512cd4c52c866423de01ffed2088685f781c Mon Sep 17 00:00:00 2001
From: Philip Reames <preames at rivosinc.com>
Date: Tue, 20 Aug 2024 10:32:47 -0700
Subject: [PATCH 1/3] [DAG][RISCV] Use vp.<binop> when widening illegal types
This allows the use a single wider operation with a restricted EVL
instead of having to split and cover via decreasing powers-of-two
sizes.
On RISCV, this avoids the need for a bunch of vslidedown and
vslideup instructions to extract subvectors, and VL toggles to
switch between the various widths.
Note there is a potential downside of using vp nodes; we loose any
generic DAG combines which might have applied to the split form.
---
llvm/include/llvm/CodeGen/ISDOpcodes.h | 2 +-
.../SelectionDAG/LegalizeVectorTypes.cpp | 18 ++++
llvm/lib/CodeGen/SelectionDAG/MatchContext.h | 16 +--
.../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 4 +-
.../CodeGen/RISCV/rvv/fixed-vectors-int.ll | 101 ++++--------------
5 files changed, 50 insertions(+), 91 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
index b8f8818a749528..86ff2628975942 100644
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -1519,7 +1519,7 @@ std::optional<unsigned> getVPExplicitVectorLengthIdx(unsigned Opcode);
std::optional<unsigned> getBaseOpcodeForVP(unsigned Opcode, bool hasFPExcept);
/// Translate this non-VP Opcode to its corresponding VP Opcode.
-unsigned getVPForBaseOpcode(unsigned Opcode);
+std::optional<unsigned> getVPForBaseOpcode(unsigned Opcode);
//===--------------------------------------------------------------------===//
/// MemIndexedMode enum - This enum defines the load / store indexed
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 8c0c2616250c60..39d9e550b9442c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -4845,6 +4845,24 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, Flags);
}
+ // Generate a vp.op if it is custom/legal for the target. This avoids need
+ // to split and tile the subvectors (below), because the inactive lanes can
+ // simple be disabled. To avoid possible recursion, only do this if the
+ // widened mask type is legal.
+ if (auto VPOpcode = ISD::getVPForBaseOpcode(Opcode);
+ VPOpcode && TLI.isOperationLegalOrCustom(*VPOpcode, WidenVT)) {
+ if (EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
+ WidenVT.getVectorElementCount());
+ TLI.isTypeLegal(WideMaskVT)) {
+ SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+ SDValue Mask = DAG.getAllOnesConstant(dl, WideMaskVT);
+ SDValue EVL = DAG.getElementCount(dl, TLI.getVPExplicitVectorLengthTy(),
+ N->getValueType(0).getVectorElementCount());
+ return DAG.getNode(*VPOpcode, dl, WidenVT, InOp1, InOp2, Mask, EVL, Flags);
+ }
+ }
+
// FIXME: Improve support for scalable vectors.
assert(!VT.isScalableVector() && "Scalable vectors not handled yet.");
diff --git a/llvm/lib/CodeGen/SelectionDAG/MatchContext.h b/llvm/lib/CodeGen/SelectionDAG/MatchContext.h
index 8f03532af99e86..ff564c0be40c4a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/MatchContext.h
+++ b/llvm/lib/CodeGen/SelectionDAG/MatchContext.h
@@ -110,7 +110,7 @@ class VPMatchContext {
// SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT) { return
// DAG.getNode(Opcode, DL, VT); }
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue Operand) {
- unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
+ unsigned VPOpcode = *ISD::getVPForBaseOpcode(Opcode);
assert(ISD::getVPMaskIdx(VPOpcode) == 1 &&
ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 2);
return DAG.getNode(VPOpcode, DL, VT,
@@ -119,7 +119,7 @@ class VPMatchContext {
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
SDValue N2) {
- unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
+ unsigned VPOpcode = *ISD::getVPForBaseOpcode(Opcode);
assert(ISD::getVPMaskIdx(VPOpcode) == 2 &&
ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 3);
return DAG.getNode(VPOpcode, DL, VT, {N1, N2, RootMaskOp, RootVectorLenOp});
@@ -127,7 +127,7 @@ class VPMatchContext {
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
SDValue N2, SDValue N3) {
- unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
+ unsigned VPOpcode = *ISD::getVPForBaseOpcode(Opcode);
assert(ISD::getVPMaskIdx(VPOpcode) == 3 &&
ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 4);
return DAG.getNode(VPOpcode, DL, VT,
@@ -136,7 +136,7 @@ class VPMatchContext {
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue Operand,
SDNodeFlags Flags) {
- unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
+ unsigned VPOpcode = *ISD::getVPForBaseOpcode(Opcode);
assert(ISD::getVPMaskIdx(VPOpcode) == 1 &&
ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 2);
return DAG.getNode(VPOpcode, DL, VT, {Operand, RootMaskOp, RootVectorLenOp},
@@ -145,7 +145,7 @@ class VPMatchContext {
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
SDValue N2, SDNodeFlags Flags) {
- unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
+ unsigned VPOpcode = *ISD::getVPForBaseOpcode(Opcode);
assert(ISD::getVPMaskIdx(VPOpcode) == 2 &&
ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 3);
return DAG.getNode(VPOpcode, DL, VT, {N1, N2, RootMaskOp, RootVectorLenOp},
@@ -154,7 +154,7 @@ class VPMatchContext {
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
SDValue N2, SDValue N3, SDNodeFlags Flags) {
- unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
+ unsigned VPOpcode = *ISD::getVPForBaseOpcode(Opcode);
assert(ISD::getVPMaskIdx(VPOpcode) == 3 &&
ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 4);
return DAG.getNode(VPOpcode, DL, VT,
@@ -162,13 +162,13 @@ class VPMatchContext {
}
bool isOperationLegal(unsigned Op, EVT VT) const {
- unsigned VPOp = ISD::getVPForBaseOpcode(Op);
+ unsigned VPOp = *ISD::getVPForBaseOpcode(Op);
return TLI.isOperationLegal(VPOp, VT);
}
bool isOperationLegalOrCustom(unsigned Op, EVT VT,
bool LegalOnly = false) const {
- unsigned VPOp = ISD::getVPForBaseOpcode(Op);
+ unsigned VPOp = *ISD::getVPForBaseOpcode(Op);
return TLI.isOperationLegalOrCustom(VPOp, VT, LegalOnly);
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 8d6f5334c9600b..18a3b7bce104a7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -550,10 +550,10 @@ std::optional<unsigned> ISD::getBaseOpcodeForVP(unsigned VPOpcode,
return std::nullopt;
}
-unsigned ISD::getVPForBaseOpcode(unsigned Opcode) {
+std::optional<unsigned> ISD::getVPForBaseOpcode(unsigned Opcode) {
switch (Opcode) {
default:
- llvm_unreachable("can not translate this Opcode to VP.");
+ return std::nullopt;
#define BEGIN_REGISTER_VP_SDNODE(VPOPC, ...) break;
#define VP_PROPERTY_FUNCTIONAL_SDOPC(SDOPC) case ISD::SDOPC:
#define END_REGISTER_VP_SDNODE(VPOPC) return ISD::VPOPC;
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
index ea2cdae903e5a0..f411ddf41d903f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
@@ -776,18 +776,9 @@ define void @sdiv_v6i16(ptr %x, ptr %y) {
; CHECK-LABEL: sdiv_v6i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; CHECK-NEXT: vle16.v v8, (a1)
-; CHECK-NEXT: vle16.v v9, (a0)
-; CHECK-NEXT: vsetivli zero, 2, e16, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v10, v8, 4
-; CHECK-NEXT: vslidedown.vi v11, v9, 4
-; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; CHECK-NEXT: vdiv.vv v10, v11, v10
-; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT: vdiv.vv v8, v9, v8
-; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT: vslideup.vi v8, v10, 4
-; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vle16.v v9, (a1)
+; CHECK-NEXT: vdiv.vv v8, v8, v9
; CHECK-NEXT: vse16.v v8, (a0)
; CHECK-NEXT: ret
%a = load <6 x i16>, ptr %x
@@ -865,18 +856,9 @@ define void @srem_v6i16(ptr %x, ptr %y) {
; CHECK-LABEL: srem_v6i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; CHECK-NEXT: vle16.v v8, (a1)
-; CHECK-NEXT: vle16.v v9, (a0)
-; CHECK-NEXT: vsetivli zero, 2, e16, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v10, v8, 4
-; CHECK-NEXT: vslidedown.vi v11, v9, 4
-; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; CHECK-NEXT: vrem.vv v10, v11, v10
-; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT: vrem.vv v8, v9, v8
-; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT: vslideup.vi v8, v10, 4
-; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vle16.v v9, (a1)
+; CHECK-NEXT: vrem.vv v8, v8, v9
; CHECK-NEXT: vse16.v v8, (a0)
; CHECK-NEXT: ret
%a = load <6 x i16>, ptr %x
@@ -954,18 +936,9 @@ define void @udiv_v6i16(ptr %x, ptr %y) {
; CHECK-LABEL: udiv_v6i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; CHECK-NEXT: vle16.v v8, (a1)
-; CHECK-NEXT: vle16.v v9, (a0)
-; CHECK-NEXT: vsetivli zero, 2, e16, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v10, v8, 4
-; CHECK-NEXT: vslidedown.vi v11, v9, 4
-; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; CHECK-NEXT: vdivu.vv v10, v11, v10
-; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT: vdivu.vv v8, v9, v8
-; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT: vslideup.vi v8, v10, 4
-; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vle16.v v9, (a1)
+; CHECK-NEXT: vdivu.vv v8, v8, v9
; CHECK-NEXT: vse16.v v8, (a0)
; CHECK-NEXT: ret
%a = load <6 x i16>, ptr %x
@@ -1043,18 +1016,9 @@ define void @urem_v6i16(ptr %x, ptr %y) {
; CHECK-LABEL: urem_v6i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; CHECK-NEXT: vle16.v v8, (a1)
-; CHECK-NEXT: vle16.v v9, (a0)
-; CHECK-NEXT: vsetivli zero, 2, e16, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v10, v8, 4
-; CHECK-NEXT: vslidedown.vi v11, v9, 4
-; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; CHECK-NEXT: vremu.vv v10, v11, v10
-; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT: vremu.vv v8, v9, v8
-; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT: vslideup.vi v8, v10, 4
-; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vle16.v v9, (a1)
+; CHECK-NEXT: vremu.vv v8, v8, v9
; CHECK-NEXT: vse16.v v8, (a0)
; CHECK-NEXT: ret
%a = load <6 x i16>, ptr %x
@@ -1192,23 +1156,12 @@ define void @mulhu_v6i16(ptr %x) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; CHECK-NEXT: vid.v v9
-; CHECK-NEXT: vadd.vi v9, v9, 12
-; CHECK-NEXT: vsetivli zero, 2, e16, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v10, v8, 4
-; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; CHECK-NEXT: vdivu.vv v9, v10, v9
-; CHECK-NEXT: lui a1, 45217
-; CHECK-NEXT: addi a1, a1, -1785
-; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT: vmv.s.x v10, a1
-; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT: vsext.vf2 v11, v10
-; CHECK-NEXT: vdivu.vv v8, v8, v11
+; CHECK-NEXT: lui a1, %hi(.LCPI67_0)
+; CHECK-NEXT: addi a1, a1, %lo(.LCPI67_0)
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT: vslideup.vi v8, v9, 4
+; CHECK-NEXT: vle16.v v9, (a1)
; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vdivu.vv v8, v8, v9
; CHECK-NEXT: vse16.v v8, (a0)
; CHECK-NEXT: ret
%a = load <6 x i16>, ptr %x
@@ -1353,25 +1306,13 @@ define void @mulhs_v6i16(ptr %x) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; CHECK-NEXT: vmv.v.i v9, 7
-; CHECK-NEXT: vid.v v10
-; CHECK-NEXT: li a1, -14
-; CHECK-NEXT: vmadd.vx v10, a1, v9
-; CHECK-NEXT: vsetivli zero, 2, e16, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v9, v8, 4
-; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; CHECK-NEXT: vdiv.vv v9, v9, v10
-; CHECK-NEXT: lui a1, 1020016
-; CHECK-NEXT: addi a1, a1, 2041
-; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT: vmv.s.x v10, a1
-; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT: vsext.vf2 v11, v10
-; CHECK-NEXT: vdiv.vv v8, v8, v11
+; CHECK-NEXT: li a1, 22
+; CHECK-NEXT: vmv.s.x v0, a1
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT: vslideup.vi v8, v9, 4
+; CHECK-NEXT: vmv.v.i v9, -7
+; CHECK-NEXT: vmerge.vim v9, v9, 7, v0
; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vdiv.vv v8, v8, v9
; CHECK-NEXT: vse16.v v8, (a0)
; CHECK-NEXT: ret
%a = load <6 x i16>, ptr %x
>From d61669643db15f91806a71fb56514124313bdf37 Mon Sep 17 00:00:00 2001
From: Philip Reames <preames at rivosinc.com>
Date: Tue, 20 Aug 2024 12:05:42 -0700
Subject: [PATCH 2/3] Clang format
---
llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 39d9e550b9442c..6c47f473760fa3 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -4857,9 +4857,11 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
SDValue InOp1 = GetWidenedVector(N->getOperand(0));
SDValue InOp2 = GetWidenedVector(N->getOperand(1));
SDValue Mask = DAG.getAllOnesConstant(dl, WideMaskVT);
- SDValue EVL = DAG.getElementCount(dl, TLI.getVPExplicitVectorLengthTy(),
- N->getValueType(0).getVectorElementCount());
- return DAG.getNode(*VPOpcode, dl, WidenVT, InOp1, InOp2, Mask, EVL, Flags);
+ SDValue EVL =
+ DAG.getElementCount(dl, TLI.getVPExplicitVectorLengthTy(),
+ N->getValueType(0).getVectorElementCount());
+ return DAG.getNode(*VPOpcode, dl, WidenVT, InOp1, InOp2, Mask, EVL,
+ Flags);
}
}
>From f74ae4975de81ca475be76a185006208abe8317f Mon Sep 17 00:00:00 2001
From: Philip Reames <preames at rivosinc.com>
Date: Tue, 20 Aug 2024 12:18:46 -0700
Subject: [PATCH 3/3] Typo fix
---
llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 6c47f473760fa3..aad0047b4839a8 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -4847,7 +4847,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
// Generate a vp.op if it is custom/legal for the target. This avoids need
// to split and tile the subvectors (below), because the inactive lanes can
- // simple be disabled. To avoid possible recursion, only do this if the
+ // simply be disabled. To avoid possible recursion, only do this if the
// widened mask type is legal.
if (auto VPOpcode = ISD::getVPForBaseOpcode(Opcode);
VPOpcode && TLI.isOperationLegalOrCustom(*VPOpcode, WidenVT)) {
More information about the llvm-commits
mailing list