[llvm] 5527139 - [RISCV][VP] Add RVV codegen for [nX]vXi1 vp.select
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sun Jan 2 23:16:20 PST 2022
Author: Victor Perez
Date: 2022-01-02T23:12:32-08:00
New Revision: 5527139302d9b0416b9fa7f1b84760d6acacda12
URL: https://github.com/llvm/llvm-project/commit/5527139302d9b0416b9fa7f1b84760d6acacda12
DIFF: https://github.com/llvm/llvm-project/commit/5527139302d9b0416b9fa7f1b84760d6acacda12.diff
LOG: [RISCV][VP] Add RVV codegen for [nX]vXi1 vp.select
Expand [nX]vXi1 vp.select the same way as [nX]vXi1 vselect.
Reviewed By: craig.topper
Differential Revision: https://reviews.llvm.org/D115546
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll
llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 1493f36fcd3ef..96c5a79cf9958 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -133,6 +133,7 @@ class VectorLegalizer {
/// Implement vselect in terms of XOR, AND, OR when blend is not
/// supported by the target.
SDValue ExpandVSELECT(SDNode *Node);
+ SDValue ExpandVP_SELECT(SDNode *Node);
SDValue ExpandSELECT(SDNode *Node);
std::pair<SDValue, SDValue> ExpandLoad(SDNode *N);
SDValue ExpandStore(SDNode *N);
@@ -349,6 +350,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::CTPOP:
case ISD::SELECT:
case ISD::VSELECT:
+ case ISD::VP_SELECT:
case ISD::SELECT_CC:
case ISD::ZERO_EXTEND:
case ISD::ANY_EXTEND:
@@ -718,6 +720,9 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
case ISD::VSELECT:
Results.push_back(ExpandVSELECT(Node));
return;
+ case ISD::VP_SELECT:
+ Results.push_back(ExpandVP_SELECT(Node));
+ return;
case ISD::SELECT:
Results.push_back(ExpandSELECT(Node));
return;
@@ -1195,6 +1200,37 @@ SDValue VectorLegalizer::ExpandVSELECT(SDNode *Node) {
return DAG.getNode(ISD::BITCAST, DL, Node->getValueType(0), Val);
}
+SDValue VectorLegalizer::ExpandVP_SELECT(SDNode *Node) {
+ // Implement VP_SELECT in terms of VP_XOR, VP_AND and VP_OR on platforms which
+ // do not support it natively.
+ SDLoc DL(Node);
+
+ SDValue Mask = Node->getOperand(0);
+ SDValue Op1 = Node->getOperand(1);
+ SDValue Op2 = Node->getOperand(2);
+ SDValue EVL = Node->getOperand(3);
+
+ EVT VT = Mask.getValueType();
+
+ // If we can't even use the basic vector operations of
+ // VP_AND,VP_OR,VP_XOR, we will have to scalarize the op.
+ if (TLI.getOperationAction(ISD::VP_AND, VT) == TargetLowering::Expand ||
+ TLI.getOperationAction(ISD::VP_XOR, VT) == TargetLowering::Expand ||
+ TLI.getOperationAction(ISD::VP_OR, VT) == TargetLowering::Expand)
+ return DAG.UnrollVectorOp(Node);
+
+ // This operation also isn't safe when the operands aren't also booleans.
+ if (Op1.getValueType().getVectorElementType() != MVT::i1)
+ return DAG.UnrollVectorOp(Node);
+
+ SDValue Ones = DAG.getAllOnesConstant(DL, VT);
+ SDValue NotMask = DAG.getNode(ISD::VP_XOR, DL, VT, Mask, Ones, Mask, EVL);
+
+ Op1 = DAG.getNode(ISD::VP_AND, DL, VT, Op1, Mask, Mask, EVL);
+ Op2 = DAG.getNode(ISD::VP_AND, DL, VT, Op2, NotMask, Mask, EVL);
+ return DAG.getNode(ISD::VP_OR, DL, VT, Op1, Op2, Mask, EVL);
+}
+
void VectorLegalizer::ExpandFP_TO_UINT(SDNode *Node,
SmallVectorImpl<SDValue> &Results) {
// Attempt to expand using TargetLowering.
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index c782a6be4d64e..6154385d69010 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -561,6 +561,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::SELECT_CC, VT, Expand);
setOperationAction(ISD::VSELECT, VT, Expand);
+ setOperationAction(ISD::VP_SELECT, VT, Expand);
setOperationAction(ISD::VP_AND, VT, Custom);
setOperationAction(ISD::VP_OR, VT, Custom);
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll
index a6b5af9e625a9..38897b32b8e50 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll
@@ -4,6 +4,76 @@
; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-zfh,+experimental-v -target-abi=lp64d -riscv-v-vector-bits-min=128 \
; RUN: -verify-machineinstrs < %s | FileCheck %s
+declare <1 x i1> @llvm.vp.select.v1i1(<1 x i1>, <1 x i1>, <1 x i1>, i32)
+
+define <1 x i1> @select_v1i1(<1 x i1> %a, <1 x i1> %b, <1 x i1> %c, i32 zeroext %evl) {
+; CHECK-LABEL: select_v1i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu
+; CHECK-NEXT: vmandn.mm v9, v9, v0
+; CHECK-NEXT: vmand.mm v8, v8, v0
+; CHECK-NEXT: vmor.mm v0, v8, v9
+; CHECK-NEXT: ret
+ %v = call <1 x i1> @llvm.vp.select.v1i1(<1 x i1> %a, <1 x i1> %b, <1 x i1> %c, i32 %evl)
+ ret <1 x i1> %v
+}
+
+declare <2 x i1> @llvm.vp.select.v2i1(<2 x i1>, <2 x i1>, <2 x i1>, i32)
+
+define <2 x i1> @select_v2i1(<2 x i1> %a, <2 x i1> %b, <2 x i1> %c, i32 zeroext %evl) {
+; CHECK-LABEL: select_v2i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu
+; CHECK-NEXT: vmandn.mm v9, v9, v0
+; CHECK-NEXT: vmand.mm v8, v8, v0
+; CHECK-NEXT: vmor.mm v0, v8, v9
+; CHECK-NEXT: ret
+ %v = call <2 x i1> @llvm.vp.select.v2i1(<2 x i1> %a, <2 x i1> %b, <2 x i1> %c, i32 %evl)
+ ret <2 x i1> %v
+}
+
+declare <4 x i1> @llvm.vp.select.v4i1(<4 x i1>, <4 x i1>, <4 x i1>, i32)
+
+define <4 x i1> @select_v4i1(<4 x i1> %a, <4 x i1> %b, <4 x i1> %c, i32 zeroext %evl) {
+; CHECK-LABEL: select_v4i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu
+; CHECK-NEXT: vmandn.mm v9, v9, v0
+; CHECK-NEXT: vmand.mm v8, v8, v0
+; CHECK-NEXT: vmor.mm v0, v8, v9
+; CHECK-NEXT: ret
+ %v = call <4 x i1> @llvm.vp.select.v4i1(<4 x i1> %a, <4 x i1> %b, <4 x i1> %c, i32 %evl)
+ ret <4 x i1> %v
+}
+
+declare <8 x i1> @llvm.vp.select.v8i1(<8 x i1>, <8 x i1>, <8 x i1>, i32)
+
+define <8 x i1> @select_v8i1(<8 x i1> %a, <8 x i1> %b, <8 x i1> %c, i32 zeroext %evl) {
+; CHECK-LABEL: select_v8i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu
+; CHECK-NEXT: vmandn.mm v9, v9, v0
+; CHECK-NEXT: vmand.mm v8, v8, v0
+; CHECK-NEXT: vmor.mm v0, v8, v9
+; CHECK-NEXT: ret
+ %v = call <8 x i1> @llvm.vp.select.v8i1(<8 x i1> %a, <8 x i1> %b, <8 x i1> %c, i32 %evl)
+ ret <8 x i1> %v
+}
+
+declare <16 x i1> @llvm.vp.select.v16i1(<16 x i1>, <16 x i1>, <16 x i1>, i32)
+
+define <16 x i1> @select_v16i1(<16 x i1> %a, <16 x i1> %b, <16 x i1> %c, i32 zeroext %evl) {
+; CHECK-LABEL: select_v16i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu
+; CHECK-NEXT: vmandn.mm v9, v9, v0
+; CHECK-NEXT: vmand.mm v8, v8, v0
+; CHECK-NEXT: vmor.mm v0, v8, v9
+; CHECK-NEXT: ret
+ %v = call <16 x i1> @llvm.vp.select.v16i1(<16 x i1> %a, <16 x i1> %b, <16 x i1> %c, i32 %evl)
+ ret <16 x i1> %v
+}
+
declare <2 x i8> @llvm.vp.select.v2i8(<2 x i1>, <2 x i8>, <2 x i8>, i32)
define <2 x i8> @select_v2i8(<2 x i1> %a, <2 x i8> %b, <2 x i8> %c, i32 zeroext %evl) {
diff --git a/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll
index 7d3ccc3912383..ca407f8436e3d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll
@@ -4,6 +4,104 @@
; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-zfh,+experimental-v -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s
+declare <vscale x 1 x i1> @llvm.vp.select.nxv1i1(<vscale x 1 x i1>, <vscale x 1 x i1>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x i1> @select_nxv1i1(<vscale x 1 x i1> %a, <vscale x 1 x i1> %b, <vscale x 1 x i1> %c, i32 zeroext %evl) {
+; CHECK-LABEL: select_nxv1i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu
+; CHECK-NEXT: vmandn.mm v9, v9, v0
+; CHECK-NEXT: vmand.mm v8, v8, v0
+; CHECK-NEXT: vmor.mm v0, v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x i1> @llvm.vp.select.nxv1i1(<vscale x 1 x i1> %a, <vscale x 1 x i1> %b, <vscale x 1 x i1> %c, i32 %evl)
+ ret <vscale x 1 x i1> %v
+}
+
+declare <vscale x 2 x i1> @llvm.vp.select.nxv2i1(<vscale x 2 x i1>, <vscale x 2 x i1>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x i1> @select_nxv2i1(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b, <vscale x 2 x i1> %c, i32 zeroext %evl) {
+; CHECK-LABEL: select_nxv2i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu
+; CHECK-NEXT: vmandn.mm v9, v9, v0
+; CHECK-NEXT: vmand.mm v8, v8, v0
+; CHECK-NEXT: vmor.mm v0, v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x i1> @llvm.vp.select.nxv2i1(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b, <vscale x 2 x i1> %c, i32 %evl)
+ ret <vscale x 2 x i1> %v
+}
+
+declare <vscale x 4 x i1> @llvm.vp.select.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>, <vscale x 4 x i1>, i32)
+
+define <vscale x 4 x i1> @select_nxv4i1(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b, <vscale x 4 x i1> %c, i32 zeroext %evl) {
+; CHECK-LABEL: select_nxv4i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu
+; CHECK-NEXT: vmandn.mm v9, v9, v0
+; CHECK-NEXT: vmand.mm v8, v8, v0
+; CHECK-NEXT: vmor.mm v0, v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x i1> @llvm.vp.select.nxv4i1(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b, <vscale x 4 x i1> %c, i32 %evl)
+ ret <vscale x 4 x i1> %v
+}
+
+declare <vscale x 8 x i1> @llvm.vp.select.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x i1> @select_nxv8i1(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b, <vscale x 8 x i1> %c, i32 zeroext %evl) {
+; CHECK-LABEL: select_nxv8i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu
+; CHECK-NEXT: vmandn.mm v9, v9, v0
+; CHECK-NEXT: vmand.mm v8, v8, v0
+; CHECK-NEXT: vmor.mm v0, v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x i1> @llvm.vp.select.nxv8i1(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b, <vscale x 8 x i1> %c, i32 %evl)
+ ret <vscale x 8 x i1> %v
+}
+
+declare <vscale x 16 x i1> @llvm.vp.select.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, i32)
+
+define <vscale x 16 x i1> @select_nxv16i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b, <vscale x 16 x i1> %c, i32 zeroext %evl) {
+; CHECK-LABEL: select_nxv16i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu
+; CHECK-NEXT: vmandn.mm v9, v9, v0
+; CHECK-NEXT: vmand.mm v8, v8, v0
+; CHECK-NEXT: vmor.mm v0, v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i1> @llvm.vp.select.nxv16i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b, <vscale x 16 x i1> %c, i32 %evl)
+ ret <vscale x 16 x i1> %v
+}
+
+declare <vscale x 32 x i1> @llvm.vp.select.nxv32i1(<vscale x 32 x i1>, <vscale x 32 x i1>, <vscale x 32 x i1>, i32)
+
+define <vscale x 32 x i1> @select_nxv32i1(<vscale x 32 x i1> %a, <vscale x 32 x i1> %b, <vscale x 32 x i1> %c, i32 zeroext %evl) {
+; CHECK-LABEL: select_nxv32i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu
+; CHECK-NEXT: vmandn.mm v9, v9, v0
+; CHECK-NEXT: vmand.mm v8, v8, v0
+; CHECK-NEXT: vmor.mm v0, v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 32 x i1> @llvm.vp.select.nxv32i1(<vscale x 32 x i1> %a, <vscale x 32 x i1> %b, <vscale x 32 x i1> %c, i32 %evl)
+ ret <vscale x 32 x i1> %v
+}
+
+declare <vscale x 64 x i1> @llvm.vp.select.nxv64i1(<vscale x 64 x i1>, <vscale x 64 x i1>, <vscale x 64 x i1>, i32)
+
+define <vscale x 64 x i1> @select_nxv64i1(<vscale x 64 x i1> %a, <vscale x 64 x i1> %b, <vscale x 64 x i1> %c, i32 zeroext %evl) {
+; CHECK-LABEL: select_nxv64i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu
+; CHECK-NEXT: vmandn.mm v9, v9, v0
+; CHECK-NEXT: vmand.mm v8, v8, v0
+; CHECK-NEXT: vmor.mm v0, v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 64 x i1> @llvm.vp.select.nxv64i1(<vscale x 64 x i1> %a, <vscale x 64 x i1> %b, <vscale x 64 x i1> %c, i32 %evl)
+ ret <vscale x 64 x i1> %v
+}
+
declare <vscale x 1 x i8> @llvm.vp.select.nxv1i8(<vscale x 1 x i1>, <vscale x 1 x i8>, <vscale x 1 x i8>, i32)
define <vscale x 1 x i8> @select_nxv1i8(<vscale x 1 x i1> %a, <vscale x 1 x i8> %b, <vscale x 1 x i8> %c, i32 zeroext %evl) {
More information about the llvm-commits
mailing list