[llvm] 9c66036 - [RISCV] Support vwsll in combineBinOp_VLToVWBinOp_VL (#87620)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 9 01:10:39 PDT 2024
Author: Luke Lau
Date: 2024-04-09T16:10:35+08:00
New Revision: 9c660362c4fb05c0198b9d3ed65b2344706129bd
URL: https://github.com/llvm/llvm-project/commit/9c660362c4fb05c0198b9d3ed65b2344706129bd
DIFF: https://github.com/llvm/llvm-project/commit/9c660362c4fb05c0198b9d3ed65b2344706129bd.diff
LOG: [RISCV] Support vwsll in combineBinOp_VLToVWBinOp_VL (#87620)
If the subtarget has +zvbb then we can attempt folding shl and shl_vl to
vwsll nodes.
There are few test cases where we still don't pick up the vwsll:
- For fixed vector vwsll.vi on RV32, see the FIXME for VMV_V_X_VL in
fillUpExtensionSupport for support implicit sign extension
- For scalable vector vwsll.vi we need to support ISD::SPLAT_VECTOR, see
#87249
Added:
Modified:
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsll.ll
llvm/test/CodeGen/RISCV/rvv/vwsll-sdnode.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index c9727a3e5a8db3..80cc41b458ca81 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -13543,6 +13543,7 @@ enum ExtKind : uint8_t { ZExt = 1 << 0, SExt = 1 << 1, FPExt = 1 << 2 };
/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w
/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
/// mul | mul_vl -> vwmul(u) | vwmul_su
+/// shl | shl_vl -> vwsll
/// fadd -> vfwadd | vfwadd_w
/// fsub -> vfwsub | vfwsub_w
/// fmul -> vfwmul
@@ -13712,6 +13713,9 @@ struct NodeExtensionHelper {
case ISD::MUL:
case RISCVISD::MUL_VL:
return RISCVISD::VWMULU_VL;
+ case ISD::SHL:
+ case RISCVISD::SHL_VL:
+ return RISCVISD::VWSLL_VL;
default:
llvm_unreachable("Unexpected opcode");
}
@@ -13853,7 +13857,8 @@ struct NodeExtensionHelper {
}
/// Check if \p Root supports any extension folding combines.
- static bool isSupportedRoot(const SDNode *Root) {
+ static bool isSupportedRoot(const SDNode *Root,
+ const RISCVSubtarget &Subtarget) {
switch (Root->getOpcode()) {
case ISD::ADD:
case ISD::SUB:
@@ -13879,6 +13884,11 @@ struct NodeExtensionHelper {
case RISCVISD::VFWADD_W_VL:
case RISCVISD::VFWSUB_W_VL:
return true;
+ case ISD::SHL:
+ return Root->getValueType(0).isScalableVector() &&
+ Subtarget.hasStdExtZvbb();
+ case RISCVISD::SHL_VL:
+ return Subtarget.hasStdExtZvbb();
default:
return false;
}
@@ -13887,8 +13897,9 @@ struct NodeExtensionHelper {
/// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx).
NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
- assert(isSupportedRoot(Root) && "Trying to build an helper with an "
- "unsupported root");
+ assert(isSupportedRoot(Root, Subtarget) &&
+ "Trying to build an helper with an "
+ "unsupported root");
assert(OperandIdx < 2 && "Requesting something else than LHS or RHS");
assert(DAG.getTargetLoweringInfo().isTypeLegal(Root->getValueType(0)));
OrigOperand = Root->getOperand(OperandIdx);
@@ -13928,12 +13939,13 @@ struct NodeExtensionHelper {
static std::pair<SDValue, SDValue>
getMaskAndVL(const SDNode *Root, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
- assert(isSupportedRoot(Root) && "Unexpected root");
+ assert(isSupportedRoot(Root, Subtarget) && "Unexpected root");
switch (Root->getOpcode()) {
case ISD::ADD:
case ISD::SUB:
case ISD::MUL:
- case ISD::OR: {
+ case ISD::OR:
+ case ISD::SHL: {
SDLoc DL(Root);
MVT VT = Root->getSimpleValueType(0);
return getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
@@ -13964,6 +13976,8 @@ struct NodeExtensionHelper {
case RISCVISD::VWSUBU_W_VL:
case RISCVISD::FSUB_VL:
case RISCVISD::VFWSUB_W_VL:
+ case ISD::SHL:
+ case RISCVISD::SHL_VL:
return false;
default:
llvm_unreachable("Unexpected opcode");
@@ -14017,6 +14031,7 @@ struct CombineResult {
case ISD::SUB:
case ISD::MUL:
case ISD::OR:
+ case ISD::SHL:
Merge = DAG.getUNDEF(Root->getValueType(0));
break;
}
@@ -14178,6 +14193,11 @@ NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) {
// mul -> vwmulsu
Strategies.push_back(canFoldToVW_SU);
break;
+ case ISD::SHL:
+ case RISCVISD::SHL_VL:
+ // shl -> vwsll
+ Strategies.push_back(canFoldToVWWithZEXT);
+ break;
case RISCVISD::VWADD_W_VL:
case RISCVISD::VWSUB_W_VL:
// vwadd_w|vwsub_w -> vwadd|vwsub
@@ -14205,6 +14225,7 @@ NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) {
/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w
/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
/// mul | mul_vl -> vwmul(u) | vwmul_su
+/// shl | shl_vl -> vwsll
/// fadd_vl -> vfwadd | vfwadd_w
/// fsub_vl -> vfwsub | vfwsub_w
/// fmul_vl -> vfwmul
@@ -14219,7 +14240,7 @@ static SDValue combineBinOp_VLToVWBinOp_VL(SDNode *N,
if (DCI.isBeforeLegalize())
return SDValue();
- if (!NodeExtensionHelper::isSupportedRoot(N))
+ if (!NodeExtensionHelper::isSupportedRoot(N, Subtarget))
return SDValue();
SmallVector<SDNode *> Worklist;
@@ -14230,7 +14251,7 @@ static SDValue combineBinOp_VLToVWBinOp_VL(SDNode *N,
while (!Worklist.empty()) {
SDNode *Root = Worklist.pop_back_val();
- if (!NodeExtensionHelper::isSupportedRoot(Root))
+ if (!NodeExtensionHelper::isSupportedRoot(Root, Subtarget))
return SDValue();
NodeExtensionHelper LHS(N, 0, DAG, Subtarget);
@@ -16325,9 +16346,12 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
VPSN->getMemOperand(), IndexType);
break;
}
+ case RISCVISD::SHL_VL:
+ if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
+ return V;
+ [[fallthrough]];
case RISCVISD::SRA_VL:
- case RISCVISD::SRL_VL:
- case RISCVISD::SHL_VL: {
+ case RISCVISD::SRL_VL: {
SDValue ShAmt = N->getOperand(1);
if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {
// We don't need the upper 32 bits of a 64-bit element for a shift amount.
@@ -16347,6 +16371,10 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
[[fallthrough]];
case ISD::SRL:
case ISD::SHL: {
+ if (N->getOpcode() == ISD::SHL) {
+ if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
+ return V;
+ }
SDValue ShAmt = N->getOperand(1);
if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {
// We don't need the upper 32 bits of a 64-bit element for a shift amount.
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsll.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsll.ll
index 59b3d752cc20f0..af67b9920ed1ed 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsll.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsll.ll
@@ -111,8 +111,7 @@ define <4 x i64> @vwsll_vx_i32_v4i64_zext(<4 x i32> %a, i32 %b) {
; CHECK-ZVBB-LABEL: vwsll_vx_i32_v4i64_zext:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT: vmv.v.x v9, a0
-; CHECK-ZVBB-NEXT: vwsll.vv v10, v8, v9
+; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0
; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
; CHECK-ZVBB-NEXT: ret
%head = insertelement <4 x i32> poison, i32 %b, i32 0
@@ -371,8 +370,7 @@ define <8 x i32> @vwsll_vx_i16_v8i32_zext(<8 x i16> %a, i16 %b) {
; CHECK-ZVBB-LABEL: vwsll_vx_i16_v8i32_zext:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-ZVBB-NEXT: vmv.v.x v9, a0
-; CHECK-ZVBB-NEXT: vwsll.vv v10, v8, v9
+; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0
; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
; CHECK-ZVBB-NEXT: ret
%head = insertelement <8 x i16> poison, i16 %b, i32 0
@@ -642,8 +640,7 @@ define <16 x i16> @vwsll_vx_i8_v16i16_zext(<16 x i8> %a, i8 %b) {
; CHECK-ZVBB-LABEL: vwsll_vx_i8_v16i16_zext:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-ZVBB-NEXT: vmv.v.x v9, a0
-; CHECK-ZVBB-NEXT: vwsll.vv v10, v8, v9
+; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0
; CHECK-ZVBB-NEXT: vmv2r.v v8, v10
; CHECK-ZVBB-NEXT: ret
%head = insertelement <16 x i8> poison, i8 %b, i32 0
@@ -710,10 +707,10 @@ define <4 x i64> @vwsll_vv_v4i64_v4i8_zext(<4 x i8> %a, <4 x i8> %b) {
;
; CHECK-ZVBB-LABEL: vwsll_vv_v4i64_v4i8_zext:
; CHECK-ZVBB: # %bb.0:
-; CHECK-ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-ZVBB-NEXT: vzext.vf8 v10, v8
-; CHECK-ZVBB-NEXT: vzext.vf8 v12, v9
-; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12
+; CHECK-ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-ZVBB-NEXT: vzext.vf4 v10, v8
+; CHECK-ZVBB-NEXT: vzext.vf4 v11, v9
+; CHECK-ZVBB-NEXT: vwsll.vv v8, v10, v11
; CHECK-ZVBB-NEXT: ret
%x = zext <4 x i8> %a to <4 x i64>
%y = zext <4 x i8> %b to <4 x i64>
@@ -784,11 +781,8 @@ define <4 x i64> @vwsll_vx_i32_v4i64_v4i8_zext(<4 x i8> %a, i32 %b) {
; CHECK-ZVBB-LABEL: vwsll_vx_i32_v4i64_v4i8_zext:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT: vmv.v.x v9, a0
-; CHECK-ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-ZVBB-NEXT: vzext.vf8 v10, v8
-; CHECK-ZVBB-NEXT: vzext.vf2 v12, v9
-; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12
+; CHECK-ZVBB-NEXT: vzext.vf4 v10, v8
+; CHECK-ZVBB-NEXT: vwsll.vx v8, v10, a0
; CHECK-ZVBB-NEXT: ret
%head = insertelement <4 x i32> poison, i32 %b, i32 0
%splat = shufflevector <4 x i32> %head, <4 x i32> poison, <4 x i32> zeroinitializer
@@ -839,12 +833,9 @@ define <4 x i64> @vwsll_vx_i16_v4i64_v4i8_zext(<4 x i8> %a, i16 %b) {
;
; CHECK-ZVBB-LABEL: vwsll_vx_i16_v4i64_v4i8_zext:
; CHECK-ZVBB: # %bb.0:
-; CHECK-ZVBB-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-ZVBB-NEXT: vmv.v.x v9, a0
-; CHECK-ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-ZVBB-NEXT: vzext.vf8 v10, v8
-; CHECK-ZVBB-NEXT: vzext.vf4 v12, v9
-; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12
+; CHECK-ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-ZVBB-NEXT: vzext.vf4 v10, v8
+; CHECK-ZVBB-NEXT: vwsll.vx v8, v10, a0
; CHECK-ZVBB-NEXT: ret
%head = insertelement <4 x i16> poison, i16 %b, i32 0
%splat = shufflevector <4 x i16> %head, <4 x i16> poison, <4 x i32> zeroinitializer
@@ -895,12 +886,9 @@ define <4 x i64> @vwsll_vx_i8_v4i64_v4i8_zext(<4 x i8> %a, i8 %b) {
;
; CHECK-ZVBB-LABEL: vwsll_vx_i8_v4i64_v4i8_zext:
; CHECK-ZVBB: # %bb.0:
-; CHECK-ZVBB-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
-; CHECK-ZVBB-NEXT: vmv.v.x v9, a0
-; CHECK-ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-ZVBB-NEXT: vzext.vf8 v10, v8
-; CHECK-ZVBB-NEXT: vzext.vf8 v12, v9
-; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12
+; CHECK-ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-ZVBB-NEXT: vzext.vf4 v10, v8
+; CHECK-ZVBB-NEXT: vwsll.vx v8, v10, a0
; CHECK-ZVBB-NEXT: ret
%head = insertelement <4 x i8> poison, i8 %b, i32 0
%splat = shufflevector <4 x i8> %head, <4 x i8> poison, <4 x i32> zeroinitializer
@@ -918,12 +906,19 @@ define <4 x i64> @vwsll_vi_v4i64_v4i8(<4 x i8> %a) {
; CHECK-NEXT: vsll.vi v8, v10, 2
; CHECK-NEXT: ret
;
-; CHECK-ZVBB-LABEL: vwsll_vi_v4i64_v4i8:
-; CHECK-ZVBB: # %bb.0:
-; CHECK-ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-ZVBB-NEXT: vzext.vf8 v10, v8
-; CHECK-ZVBB-NEXT: vsll.vi v8, v10, 2
-; CHECK-ZVBB-NEXT: ret
+; CHECK-ZVBB-RV32-LABEL: vwsll_vi_v4i64_v4i8:
+; CHECK-ZVBB-RV32: # %bb.0:
+; CHECK-ZVBB-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-ZVBB-RV32-NEXT: vzext.vf8 v10, v8
+; CHECK-ZVBB-RV32-NEXT: vsll.vi v8, v10, 2
+; CHECK-ZVBB-RV32-NEXT: ret
+;
+; CHECK-ZVBB-RV64-LABEL: vwsll_vi_v4i64_v4i8:
+; CHECK-ZVBB-RV64: # %bb.0:
+; CHECK-ZVBB-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-ZVBB-RV64-NEXT: vzext.vf4 v10, v8
+; CHECK-ZVBB-RV64-NEXT: vwsll.vi v8, v10, 2
+; CHECK-ZVBB-RV64-NEXT: ret
%x = zext <4 x i8> %a to <4 x i64>
%z = shl <4 x i64> %x, splat (i64 2)
ret <4 x i64> %z
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsll-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vwsll-sdnode.ll
index 082de2e7bf77bf..72fc9c918f22c4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwsll-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwsll-sdnode.ll
@@ -665,10 +665,10 @@ define <vscale x 2 x i64> @vwsll_vv_nxv2i64_nxv2i8_zext(<vscale x 2 x i8> %a, <v
;
; CHECK-ZVBB-LABEL: vwsll_vv_nxv2i64_nxv2i8_zext:
; CHECK-ZVBB: # %bb.0:
-; CHECK-ZVBB-NEXT: vsetvli a0, zero, e64, m2, ta, ma
-; CHECK-ZVBB-NEXT: vzext.vf8 v10, v8
-; CHECK-ZVBB-NEXT: vzext.vf8 v12, v9
-; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12
+; CHECK-ZVBB-NEXT: vsetvli a0, zero, e32, m1, ta, ma
+; CHECK-ZVBB-NEXT: vzext.vf4 v10, v8
+; CHECK-ZVBB-NEXT: vzext.vf4 v11, v9
+; CHECK-ZVBB-NEXT: vwsll.vv v8, v10, v11
; CHECK-ZVBB-NEXT: ret
%x = zext <vscale x 2 x i8> %a to <vscale x 2 x i64>
%y = zext <vscale x 2 x i8> %b to <vscale x 2 x i64>
@@ -739,11 +739,8 @@ define <vscale x 2 x i64> @vwsll_vx_i32_nxv2i64_nxv2i8_zext(<vscale x 2 x i8> %a
; CHECK-ZVBB-LABEL: vwsll_vx_i32_nxv2i64_nxv2i8_zext:
; CHECK-ZVBB: # %bb.0:
; CHECK-ZVBB-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; CHECK-ZVBB-NEXT: vmv.v.x v9, a0
-; CHECK-ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-ZVBB-NEXT: vzext.vf8 v10, v8
-; CHECK-ZVBB-NEXT: vzext.vf2 v12, v9
-; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12
+; CHECK-ZVBB-NEXT: vzext.vf4 v10, v8
+; CHECK-ZVBB-NEXT: vwsll.vx v8, v10, a0
; CHECK-ZVBB-NEXT: ret
%head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0
%splat = shufflevector <vscale x 2 x i32> %head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
@@ -794,12 +791,9 @@ define <vscale x 2 x i64> @vwsll_vx_i16_nxv2i64_nxv2i8_zext(<vscale x 2 x i8> %a
;
; CHECK-ZVBB-LABEL: vwsll_vx_i16_nxv2i64_nxv2i8_zext:
; CHECK-ZVBB: # %bb.0:
-; CHECK-ZVBB-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
-; CHECK-ZVBB-NEXT: vmv.v.x v9, a0
-; CHECK-ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-ZVBB-NEXT: vzext.vf8 v10, v8
-; CHECK-ZVBB-NEXT: vzext.vf4 v12, v9
-; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12
+; CHECK-ZVBB-NEXT: vsetvli a1, zero, e32, m1, ta, ma
+; CHECK-ZVBB-NEXT: vzext.vf4 v10, v8
+; CHECK-ZVBB-NEXT: vwsll.vx v8, v10, a0
; CHECK-ZVBB-NEXT: ret
%head = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0
%splat = shufflevector <vscale x 2 x i16> %head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
@@ -850,12 +844,9 @@ define <vscale x 2 x i64> @vwsll_vx_i8_nxv2i64_nxv2i8_zext(<vscale x 2 x i8> %a,
;
; CHECK-ZVBB-LABEL: vwsll_vx_i8_nxv2i64_nxv2i8_zext:
; CHECK-ZVBB: # %bb.0:
-; CHECK-ZVBB-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
-; CHECK-ZVBB-NEXT: vmv.v.x v9, a0
-; CHECK-ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-ZVBB-NEXT: vzext.vf8 v10, v8
-; CHECK-ZVBB-NEXT: vzext.vf8 v12, v9
-; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12
+; CHECK-ZVBB-NEXT: vsetvli a1, zero, e32, m1, ta, ma
+; CHECK-ZVBB-NEXT: vzext.vf4 v10, v8
+; CHECK-ZVBB-NEXT: vwsll.vx v8, v10, a0
; CHECK-ZVBB-NEXT: ret
%head = insertelement <vscale x 2 x i8> poison, i8 %b, i32 0
%splat = shufflevector <vscale x 2 x i8> %head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
More information about the llvm-commits
mailing list