[llvm] 7cb452b - [SelectionDAG][VP] Add widening support for VP_MERGE
Fraser Cormack via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 25 03:09:55 PST 2022
Author: Fraser Cormack
Date: 2022-01-25T10:59:40Z
New Revision: 7cb452bfde1086f7bcddfd6de5594ebcb4c11bf5
URL: https://github.com/llvm/llvm-project/commit/7cb452bfde1086f7bcddfd6de5594ebcb4c11bf5
DIFF: https://github.com/llvm/llvm-project/commit/7cb452bfde1086f7bcddfd6de5594ebcb4c11bf5.diff
LOG: [SelectionDAG][VP] Add widening support for VP_MERGE
This patch adds widening support for ISD::VP_MERGE, which widens
identically to VP_SELECT and similarly to other select-like nodes.
Reviewed By: craig.topper
Differential Revision: https://reviews.llvm.org/D118030
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll
llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 5dd5db2ad51f..0bd44ce4c872 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -3231,6 +3231,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::VSELECT:
case ISD::SELECT:
case ISD::VP_SELECT:
+ case ISD::VP_MERGE:
Res = WidenVecRes_Select(N);
break;
case ISD::SELECT_CC: Res = WidenVecRes_SELECT_CC(N); break;
@@ -4782,7 +4783,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Select(SDNode *N) {
SDValue InOp1 = GetWidenedVector(N->getOperand(1));
SDValue InOp2 = GetWidenedVector(N->getOperand(2));
assert(InOp1.getValueType() == WidenVT && InOp2.getValueType() == WidenVT);
- return Opcode == ISD::VP_SELECT
+ return Opcode == ISD::VP_SELECT || Opcode == ISD::VP_MERGE
? DAG.getNode(Opcode, SDLoc(N), WidenVT, Cond1, InOp1, InOp2,
N->getOperand(3))
: DAG.getNode(Opcode, SDLoc(N), WidenVT, Cond1, InOp1, InOp2);
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll
index e904e8a4d495..8ac3184f02c4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll
@@ -78,6 +78,43 @@ define <4 x i8> @vpmerge_vi_v4i8(<4 x i8> %vb, <4 x i1> %m, i32 zeroext %evl) {
ret <4 x i8> %v
}
+declare <6 x i8> @llvm.vp.merge.v6i8(<6 x i1>, <6 x i8>, <6 x i8>, i32)
+
+define <6 x i8> @vpmerge_vv_v6i8(<6 x i8> %va, <6 x i8> %vb, <6 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_v6i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, mu
+; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+ %v = call <6 x i8> @llvm.vp.merge.v6i8(<6 x i1> %m, <6 x i8> %va, <6 x i8> %vb, i32 %evl)
+ ret <6 x i8> %v
+}
+
+define <6 x i8> @vpmerge_vx_v6i8(i8 %a, <6 x i8> %vb, <6 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vx_v6i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, mu
+; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <6 x i8> poison, i8 %a, i32 0
+ %va = shufflevector <6 x i8> %elt.head, <6 x i8> poison, <6 x i32> zeroinitializer
+ %v = call <6 x i8> @llvm.vp.merge.v6i8(<6 x i1> %m, <6 x i8> %va, <6 x i8> %vb, i32 %evl)
+ ret <6 x i8> %v
+}
+
+define <6 x i8> @vpmerge_vi_v6i8(<6 x i8> %vb, <6 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vi_v6i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, mu
+; CHECK-NEXT: vmerge.vim v8, v8, 2, v0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <6 x i8> poison, i8 2, i32 0
+ %va = shufflevector <6 x i8> %elt.head, <6 x i8> poison, <6 x i32> zeroinitializer
+ %v = call <6 x i8> @llvm.vp.merge.v6i8(<6 x i1> %m, <6 x i8> %va, <6 x i8> %vb, i32 %evl)
+ ret <6 x i8> %v
+}
+
declare <8 x i8> @llvm.vp.merge.v8i8(<8 x i1>, <8 x i8>, <8 x i8>, i32)
define <8 x i8> @vpmerge_vv_v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 zeroext %evl) {
@@ -981,10 +1018,10 @@ define <32 x double> @vpmerge_vv_v32f64(<32 x double> %va, <32 x double> %vb, <3
; RV32-NEXT: addi a1, sp, 16
; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
; RV32-NEXT: li a1, 0
-; RV32-NEXT: bltu a2, a3, .LBB72_2
+; RV32-NEXT: bltu a2, a3, .LBB75_2
; RV32-NEXT: # %bb.1:
; RV32-NEXT: mv a1, a3
-; RV32-NEXT: .LBB72_2:
+; RV32-NEXT: .LBB75_2:
; RV32-NEXT: vle64.v v8, (a0)
; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
; RV32-NEXT: vslidedown.vi v0, v1, 2
@@ -1001,10 +1038,10 @@ define <32 x double> @vpmerge_vv_v32f64(<32 x double> %va, <32 x double> %vb, <3
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload
; RV32-NEXT: vmerge.vvm v16, v16, v24, v0
-; RV32-NEXT: bltu a2, a0, .LBB72_4
+; RV32-NEXT: bltu a2, a0, .LBB75_4
; RV32-NEXT: # %bb.3:
; RV32-NEXT: li a2, 16
-; RV32-NEXT: .LBB72_4:
+; RV32-NEXT: .LBB75_4:
; RV32-NEXT: vsetvli zero, a2, e64, m8, tu, mu
; RV32-NEXT: vmv1r.v v0, v1
; RV32-NEXT: addi a0, sp, 16
@@ -1037,10 +1074,10 @@ define <32 x double> @vpmerge_vv_v32f64(<32 x double> %va, <32 x double> %vb, <3
; RV64-NEXT: addi a1, a1, 16
; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
; RV64-NEXT: li a1, 0
-; RV64-NEXT: bltu a2, a3, .LBB72_2
+; RV64-NEXT: bltu a2, a3, .LBB75_2
; RV64-NEXT: # %bb.1:
; RV64-NEXT: mv a1, a3
-; RV64-NEXT: .LBB72_2:
+; RV64-NEXT: .LBB75_2:
; RV64-NEXT: vle64.v v8, (a0)
; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
; RV64-NEXT: vslidedown.vi v0, v1, 2
@@ -1049,10 +1086,10 @@ define <32 x double> @vpmerge_vv_v32f64(<32 x double> %va, <32 x double> %vb, <3
; RV64-NEXT: addi a1, sp, 16
; RV64-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload
; RV64-NEXT: vmerge.vvm v24, v24, v16, v0
-; RV64-NEXT: bltu a2, a0, .LBB72_4
+; RV64-NEXT: bltu a2, a0, .LBB75_4
; RV64-NEXT: # %bb.3:
; RV64-NEXT: li a2, 16
-; RV64-NEXT: .LBB72_4:
+; RV64-NEXT: .LBB75_4:
; RV64-NEXT: vsetvli zero, a2, e64, m8, tu, mu
; RV64-NEXT: vmv1r.v v0, v1
; RV64-NEXT: csrr a0, vlenb
@@ -1077,19 +1114,19 @@ define <32 x double> @vpmerge_vf_v32f64(double %a, <32 x double> %vb, <32 x i1>
; CHECK-NEXT: addi a2, a0, -16
; CHECK-NEXT: vmv1r.v v24, v0
; CHECK-NEXT: li a1, 0
-; CHECK-NEXT: bltu a0, a2, .LBB73_2
+; CHECK-NEXT: bltu a0, a2, .LBB76_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a1, a2
-; CHECK-NEXT: .LBB73_2:
+; CHECK-NEXT: .LBB76_2:
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
; CHECK-NEXT: vslidedown.vi v0, v24, 2
; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, mu
; CHECK-NEXT: li a1, 16
; CHECK-NEXT: vfmerge.vfm v16, v16, fa0, v0
-; CHECK-NEXT: bltu a0, a1, .LBB73_4
+; CHECK-NEXT: bltu a0, a1, .LBB76_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: li a0, 16
-; CHECK-NEXT: .LBB73_4:
+; CHECK-NEXT: .LBB76_4:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0
diff --git a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll
index 653217a20c54..6a4ac666b110 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll
@@ -78,6 +78,43 @@ define <vscale x 2 x i8> @vpmerge_vi_nxv2i8(<vscale x 2 x i8> %vb, <vscale x 2 x
ret <vscale x 2 x i8> %v
}
+declare <vscale x 3 x i8> @llvm.vp.merge.nxv3i8(<vscale x 3 x i1>, <vscale x 3 x i8>, <vscale x 3 x i8>, i32)
+
+define <vscale x 3 x i8> @vpmerge_vv_nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %vb, <vscale x 3 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_nxv3i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, mu
+; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 3 x i8> @llvm.vp.merge.nxv3i8(<vscale x 3 x i1> %m, <vscale x 3 x i8> %va, <vscale x 3 x i8> %vb, i32 %evl)
+ ret <vscale x 3 x i8> %v
+}
+
+define <vscale x 3 x i8> @vpmerge_vx_nxv3i8(i8 %a, <vscale x 3 x i8> %vb, <vscale x 3 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vx_nxv3i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, mu
+; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 3 x i8> poison, i8 %a, i32 0
+ %va = shufflevector <vscale x 3 x i8> %elt.head, <vscale x 3 x i8> poison, <vscale x 3 x i32> zeroinitializer
+ %v = call <vscale x 3 x i8> @llvm.vp.merge.nxv3i8(<vscale x 3 x i1> %m, <vscale x 3 x i8> %va, <vscale x 3 x i8> %vb, i32 %evl)
+ ret <vscale x 3 x i8> %v
+}
+
+define <vscale x 3 x i8> @vpmerge_vi_nxv3i8(<vscale x 3 x i8> %vb, <vscale x 3 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vi_nxv3i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, mu
+; CHECK-NEXT: vmerge.vim v8, v8, 2, v0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 3 x i8> poison, i8 2, i32 0
+ %va = shufflevector <vscale x 3 x i8> %elt.head, <vscale x 3 x i8> poison, <vscale x 3 x i32> zeroinitializer
+ %v = call <vscale x 3 x i8> @llvm.vp.merge.nxv3i8(<vscale x 3 x i1> %m, <vscale x 3 x i8> %va, <vscale x 3 x i8> %vb, i32 %evl)
+ ret <vscale x 3 x i8> %v
+}
+
declare <vscale x 4 x i8> @llvm.vp.merge.nxv4i8(<vscale x 4 x i1>, <vscale x 4 x i8>, <vscale x 4 x i8>, i32)
define <vscale x 4 x i8> @vpmerge_vv_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb, <vscale x 4 x i1> %m, i32 zeroext %evl) {
@@ -295,10 +332,10 @@ define <vscale x 128 x i8> @vpmerge_vv_nxv128i8(<vscale x 128 x i8> %va, <vscale
; RV32-NEXT: addi a2, sp, 16
; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
; RV32-NEXT: li a2, 0
-; RV32-NEXT: bltu a3, a4, .LBB21_2
+; RV32-NEXT: bltu a3, a4, .LBB24_2
; RV32-NEXT: # %bb.1:
; RV32-NEXT: mv a2, a4
-; RV32-NEXT: .LBB21_2:
+; RV32-NEXT: .LBB24_2:
; RV32-NEXT: vl8r.v v8, (a0)
; RV32-NEXT: vsetvli zero, a2, e8, m8, tu, mu
; RV32-NEXT: vmv1r.v v0, v2
@@ -313,10 +350,10 @@ define <vscale x 128 x i8> @vpmerge_vv_nxv128i8(<vscale x 128 x i8> %va, <vscale
; RV32-NEXT: addi a0, a0, 16
; RV32-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vmerge.vvm v16, v16, v24, v0
-; RV32-NEXT: bltu a3, a1, .LBB21_4
+; RV32-NEXT: bltu a3, a1, .LBB24_4
; RV32-NEXT: # %bb.3:
; RV32-NEXT: mv a3, a1
-; RV32-NEXT: .LBB21_4:
+; RV32-NEXT: .LBB24_4:
; RV32-NEXT: vsetvli zero, a3, e8, m8, tu, mu
; RV32-NEXT: vmv1r.v v0, v1
; RV32-NEXT: addi a0, sp, 16
@@ -347,18 +384,18 @@ define <vscale x 128 x i8> @vpmerge_vv_nxv128i8(<vscale x 128 x i8> %va, <vscale
; RV64-NEXT: addi a2, sp, 16
; RV64-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
; RV64-NEXT: li a2, 0
-; RV64-NEXT: bltu a3, a4, .LBB21_2
+; RV64-NEXT: bltu a3, a4, .LBB24_2
; RV64-NEXT: # %bb.1:
; RV64-NEXT: mv a2, a4
-; RV64-NEXT: .LBB21_2:
+; RV64-NEXT: .LBB24_2:
; RV64-NEXT: vl8r.v v8, (a0)
; RV64-NEXT: vsetvli zero, a2, e8, m8, tu, mu
; RV64-NEXT: vmv1r.v v0, v2
; RV64-NEXT: vmerge.vvm v24, v24, v16, v0
-; RV64-NEXT: bltu a3, a1, .LBB21_4
+; RV64-NEXT: bltu a3, a1, .LBB24_4
; RV64-NEXT: # %bb.3:
; RV64-NEXT: mv a3, a1
-; RV64-NEXT: .LBB21_4:
+; RV64-NEXT: .LBB24_4:
; RV64-NEXT: vsetvli zero, a3, e8, m8, tu, mu
; RV64-NEXT: vmv1r.v v0, v1
; RV64-NEXT: addi a0, sp, 16
@@ -380,20 +417,20 @@ define <vscale x 128 x i8> @vpmerge_vx_nxv128i8(i8 %a, <vscale x 128 x i8> %vb,
; CHECK-NEXT: csrr a3, vlenb
; CHECK-NEXT: slli a3, a3, 3
; CHECK-NEXT: mv a4, a2
-; CHECK-NEXT: bltu a2, a3, .LBB22_2
+; CHECK-NEXT: bltu a2, a3, .LBB25_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a4, a3
-; CHECK-NEXT: .LBB22_2:
+; CHECK-NEXT: .LBB25_2:
; CHECK-NEXT: li a5, 0
; CHECK-NEXT: vsetvli a6, zero, e8, m8, ta, mu
; CHECK-NEXT: vlm.v v24, (a1)
; CHECK-NEXT: vsetvli zero, a4, e8, m8, tu, mu
; CHECK-NEXT: sub a1, a2, a3
; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0
-; CHECK-NEXT: bltu a2, a1, .LBB22_4
+; CHECK-NEXT: bltu a2, a1, .LBB25_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: mv a5, a1
-; CHECK-NEXT: .LBB22_4:
+; CHECK-NEXT: .LBB25_4:
; CHECK-NEXT: vsetvli zero, a5, e8, m8, tu, mu
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vmerge.vxm v16, v16, a0, v0
@@ -410,20 +447,20 @@ define <vscale x 128 x i8> @vpmerge_vi_nxv128i8(<vscale x 128 x i8> %vb, <vscale
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: slli a2, a2, 3
; CHECK-NEXT: mv a3, a1
-; CHECK-NEXT: bltu a1, a2, .LBB23_2
+; CHECK-NEXT: bltu a1, a2, .LBB26_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a3, a2
-; CHECK-NEXT: .LBB23_2:
+; CHECK-NEXT: .LBB26_2:
; CHECK-NEXT: li a4, 0
; CHECK-NEXT: vsetvli a5, zero, e8, m8, ta, mu
; CHECK-NEXT: vlm.v v24, (a0)
; CHECK-NEXT: vsetvli zero, a3, e8, m8, tu, mu
; CHECK-NEXT: sub a0, a1, a2
; CHECK-NEXT: vmerge.vim v8, v8, 2, v0
-; CHECK-NEXT: bltu a1, a0, .LBB23_4
+; CHECK-NEXT: bltu a1, a0, .LBB26_4
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: mv a4, a0
-; CHECK-NEXT: .LBB23_4:
+; CHECK-NEXT: .LBB26_4:
; CHECK-NEXT: vsetvli zero, a4, e8, m8, tu, mu
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vmerge.vim v16, v16, 2, v0
More information about the llvm-commits
mailing list