[llvm] 38efa68 - [LegalizeTypes][VP] Add splitting support for vp.select
Victor Perez via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 7 00:46:12 PST 2022
Author: Victor Perez
Date: 2022-01-07T08:46:01Z
New Revision: 38efa68b083e419301d4df55f1cb48a90cf088b2
URL: https://github.com/llvm/llvm-project/commit/38efa68b083e419301d4df55f1cb48a90cf088b2
DIFF: https://github.com/llvm/llvm-project/commit/38efa68b083e419301d4df55f1cb48a90cf088b2.diff
LOG: [LegalizeTypes][VP] Add splitting support for vp.select
Split vp.select in a similar way as vselect, splitting also the length
parameter.
Reviewed By: craig.topper
Differential Revision: https://reviews.llvm.org/D116651
Added:
Modified:
llvm/include/llvm/CodeGen/SelectionDAG.h
llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll
llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h
index c6f45ae25eed4..00f02e3f386a1 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -2001,6 +2001,9 @@ class SelectionDAG {
return SplitVector(N, DL, LoVT, HiVT);
}
+ /// Split the explicit vector length parameter of a VP operation.
+ std::pair<SDValue, SDValue> SplitEVL(SDValue N, EVT VecVT, const SDLoc &DL);
+
/// Split the node's operand with EXTRACT_SUBVECTOR and
/// return the low/high part.
std::pair<SDValue, SDValue> SplitVectorOperand(const SDNode *N, unsigned OpNo)
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 27f9cede19220..6bf38d7296a8d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -1193,7 +1193,7 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
llvm_unreachable("Do not know how to expand the result of this operator!");
case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break;
- case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break;
+ case ISD::SELECT: SplitRes_Select(N, Lo, Hi); break;
case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;
case ISD::MERGE_VALUES: ExpandRes_MERGE_VALUES(N, ResNo, Lo, Hi); break;
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 17705b519a85c..8c7b90b6cd336 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -2252,7 +2252,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::ARITH_FENCE: SplitRes_ARITH_FENCE(N, Lo, Hi); break;
case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, ResNo, Lo, Hi); break;
- case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break;
+ case ISD::SELECT: SplitRes_Select(N, Lo, Hi); break;
case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;
case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break;
case ISD::FREEZE: SplitRes_FREEZE(N, Lo, Hi); break;
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index fe1f88c3d9700..f0f32118540eb 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -1022,7 +1022,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
void SplitRes_MERGE_VALUES(SDNode *N, unsigned ResNo,
SDValue &Lo, SDValue &Hi);
void SplitRes_ARITH_FENCE (SDNode *N, SDValue &Lo, SDValue &Hi);
- void SplitRes_SELECT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitRes_Select (SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitRes_SELECT_CC (SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitRes_UNDEF (SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitRes_FREEZE (SDNode *N, SDValue &Lo, SDValue &Hi);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index 3d3c9a2ad837c..c3a77879f225b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -506,9 +506,10 @@ void DAGTypeLegalizer::SplitRes_MERGE_VALUES(SDNode *N, unsigned ResNo,
GetSplitOp(Op, Lo, Hi);
}
-void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo, SDValue &Hi) {
+void DAGTypeLegalizer::SplitRes_Select(SDNode *N, SDValue &Lo, SDValue &Hi) {
SDValue LL, LH, RL, RH, CL, CH;
SDLoc dl(N);
+ unsigned Opcode = N->getOpcode();
GetSplitOp(N->getOperand(1), LL, LH);
GetSplitOp(N->getOperand(2), RL, RH);
@@ -539,8 +540,18 @@ void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo, SDValue &Hi) {
std::tie(CL, CH) = DAG.SplitVector(Cond, dl);
}
- Lo = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), CL, LL, RL);
- Hi = DAG.getNode(N->getOpcode(), dl, LH.getValueType(), CH, LH, RH);
+ if (Opcode != ISD::VP_SELECT) {
+ Lo = DAG.getNode(Opcode, dl, LL.getValueType(), CL, LL, RL);
+ Hi = DAG.getNode(Opcode, dl, LH.getValueType(), CH, LH, RH);
+ return;
+ }
+
+ SDValue EVLLo, EVLHi;
+ std::tie(EVLLo, EVLHi) =
+ DAG.SplitEVL(N->getOperand(3), N->getValueType(0), dl);
+
+ Lo = DAG.getNode(Opcode, dl, LL.getValueType(), CL, LL, RL, EVLLo);
+ Hi = DAG.getNode(Opcode, dl, LH.getValueType(), CH, LH, RH, EVLHi);
}
void DAGTypeLegalizer::SplitRes_SELECT_CC(SDNode *N, SDValue &Lo,
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index df936f99770d2..e33c313f40e36 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -914,7 +914,8 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, ResNo, Lo, Hi); break;
case ISD::VSELECT:
- case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break;
+ case ISD::SELECT:
+ case ISD::VP_SELECT: SplitRes_Select(N, Lo, Hi); break;
case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;
case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break;
case ISD::BITCAST: SplitVecRes_BITCAST(N, Lo, Hi); break;
@@ -1140,21 +1141,9 @@ void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi) {
else
std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, SDLoc(Mask));
- // Split the vector length parameter.
- // %evl -> umin(%evl, %halfnumelts) and usubsat(%evl - %halfnumelts).
- SDValue EVL = N->getOperand(3);
- EVT VecVT = N->getValueType(0);
- EVT EVLVT = EVL.getValueType();
- assert(VecVT.getVectorElementCount().isKnownEven() &&
- "Expecting the mask to be an evenly-sized vector");
- unsigned HalfMinNumElts = VecVT.getVectorMinNumElements() / 2;
- SDValue HalfNumElts =
- VecVT.isFixedLengthVector()
- ? DAG.getConstant(HalfMinNumElts, dl, EVLVT)
- : DAG.getVScale(dl, EVLVT,
- APInt(EVLVT.getScalarSizeInBits(), HalfMinNumElts));
- SDValue EVLLo = DAG.getNode(ISD::UMIN, dl, EVLVT, EVL, HalfNumElts);
- SDValue EVLHi = DAG.getNode(ISD::USUBSAT, dl, EVLVT, EVL, HalfNumElts);
+ SDValue EVLLo, EVLHi;
+ std::tie(EVLLo, EVLHi) =
+ DAG.SplitEVL(N->getOperand(3), N->getValueType(0), dl);
Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(),
{LHSLo, RHSLo, MaskLo, EVLLo}, Flags);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index da3beaee81f16..2afef122ab5c4 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -10648,6 +10648,23 @@ SelectionDAG::SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT,
return std::make_pair(Lo, Hi);
}
+std::pair<SDValue, SDValue> SelectionDAG::SplitEVL(SDValue N, EVT VecVT,
+ const SDLoc &DL) {
+ // Split the vector length parameter.
+ // %evl -> umin(%evl, %halfnumelts) and usubsat(%evl - %halfnumelts).
+ EVT VT = N.getValueType();
+ assert(VecVT.getVectorElementCount().isKnownEven() &&
+ "Expecting the mask to be an evenly-sized vector");
+ unsigned HalfMinNumElts = VecVT.getVectorMinNumElements() / 2;
+ SDValue HalfNumElts =
+ VecVT.isFixedLengthVector()
+ ? getConstant(HalfMinNumElts, DL, VT)
+ : getVScale(DL, VT, APInt(VT.getScalarSizeInBits(), HalfMinNumElts));
+ SDValue Lo = getNode(ISD::UMIN, DL, VT, N, HalfNumElts);
+ SDValue Hi = getNode(ISD::USUBSAT, DL, VT, N, HalfNumElts);
+ return std::make_pair(Lo, Hi);
+}
+
/// Widen the vector up to the next power of two using INSERT_SUBVECTOR.
SDValue SelectionDAG::WidenVector(const SDValue &N, const SDLoc &DL) {
EVT VT = N.getValueType();
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll
index b4e8417a6c025..78932c752496a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+experimental-zfh,+experimental-v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+experimental-zfh,+experimental-v,+m -target-abi=ilp32d -riscv-v-vector-bits-min=128 \
; RUN: -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-zfh,+experimental-v -target-abi=lp64d -riscv-v-vector-bits-min=128 \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-zfh,+experimental-v,+m -target-abi=lp64d -riscv-v-vector-bits-min=128 \
; RUN: -verify-machineinstrs < %s | FileCheck %s
declare <1 x i1> @llvm.vp.select.v1i1(<1 x i1>, <1 x i1>, <1 x i1>, i32)
@@ -146,6 +146,131 @@ define <16 x i8> @select_v16i8(<16 x i1> %a, <16 x i8> %b, <16 x i8> %c, i32 zer
ret <16 x i8> %v
}
+declare <256 x i8> @llvm.vp.select.v256i8(<256 x i1>, <256 x i8>, <256 x i8>, i32)
+
+define <256 x i8> @select_v256i8(<256 x i1> %a, <256 x i8> %b, <256 x i8> %c, i32 zeroext %evl) {
+; CHECK-LABEL: select_v256i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: li a4, 24
+; CHECK-NEXT: mul a2, a2, a4
+; CHECK-NEXT: sub sp, sp, a2
+; CHECK-NEXT: li a4, 128
+; CHECK-NEXT: vsetvli zero, a4, e8, m8, ta, mu
+; CHECK-NEXT: vle8.v v24, (a1)
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 4
+; CHECK-NEXT: add a2, sp, a2
+; CHECK-NEXT: addi a2, a2, 16
+; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 3
+; CHECK-NEXT: add a2, sp, a2
+; CHECK-NEXT: addi a2, a2, 16
+; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
+; CHECK-NEXT: vmv1r.v v1, v8
+; CHECK-NEXT: addi a1, a1, 128
+; CHECK-NEXT: mv a2, a3
+; CHECK-NEXT: bltu a3, a4, .LBB11_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: li a2, 128
+; CHECK-NEXT: .LBB11_2:
+; CHECK-NEXT: li a4, 0
+; CHECK-NEXT: vle8.v v8, (a0)
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vle8.v v24, (a1)
+; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, mu
+; CHECK-NEXT: addi a0, a3, -128
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vl8re8.v v16, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0
+; CHECK-NEXT: bltu a3, a0, .LBB11_4
+; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: mv a4, a0
+; CHECK-NEXT: .LBB11_4:
+; CHECK-NEXT: vsetvli zero, a4, e8, m8, ta, mu
+; CHECK-NEXT: vmv1r.v v0, v1
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: li a1, 24
+; CHECK-NEXT: mul a0, a0, a1
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %v = call <256 x i8> @llvm.vp.select.v256i8(<256 x i1> %a, <256 x i8> %b, <256 x i8> %c, i32 %evl)
+ ret <256 x i8> %v
+}
+
+define <256 x i8> @select_evl_v256i8(<256 x i1> %a, <256 x i8> %b, <256 x i8> %c) {
+; CHECK-LABEL: select_evl_v256i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: li a3, 24
+; CHECK-NEXT: mul a2, a2, a3
+; CHECK-NEXT: sub sp, sp, a2
+; CHECK-NEXT: li a2, 128
+; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, mu
+; CHECK-NEXT: vle8.v v24, (a0)
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: addi a0, a1, 128
+; CHECK-NEXT: vle8.v v24, (a0)
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vmv1r.v v9, v0
+; CHECK-NEXT: vle8.v v16, (a1)
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, mu
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vmerge.vvm v24, v24, v16, v0
+; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, mu
+; CHECK-NEXT: vmv1r.v v0, v9
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vl8re8.v v16, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0
+; CHECK-NEXT: vmv8r.v v16, v24
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: li a1, 24
+; CHECK-NEXT: mul a0, a0, a1
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %v = call <256 x i8> @llvm.vp.select.v256i8(<256 x i1> %a, <256 x i8> %b, <256 x i8> %c, i32 129)
+ ret <256 x i8> %v
+}
+
declare <2 x i16> @llvm.vp.select.v2i16(<2 x i1>, <2 x i16>, <2 x i16>, i32)
define <2 x i16> @select_v2i16(<2 x i1> %a, <2 x i16> %b, <2 x i16> %c, i32 zeroext %evl) {
@@ -290,6 +415,91 @@ define <16 x i64> @select_v16i64(<16 x i1> %a, <16 x i64> %b, <16 x i64> %c, i32
ret <16 x i64> %v
}
+declare <32 x i64> @llvm.vp.select.v32i64(<32 x i1>, <32 x i64>, <32 x i64>, i32)
+
+define <32 x i64> @select_v32i64(<32 x i1> %a, <32 x i64> %b, <32 x i64> %c, i32 zeroext %evl) {
+; CHECK-LABEL: select_v32i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: li a3, 24
+; CHECK-NEXT: mul a1, a1, a3
+; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: addi a1, a0, 128
+; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, mu
+; CHECK-NEXT: vle64.v v24, (a1)
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: addi a3, a2, -16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: vmv1r.v v24, v0
+; CHECK-NEXT: li a1, 0
+; CHECK-NEXT: bltu a2, a3, .LBB25_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a1, a3
+; CHECK-NEXT: .LBB25_2:
+; CHECK-NEXT: vle64.v v8, (a0)
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
+; CHECK-NEXT: vslidedown.vi v0, v24, 2
+; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu
+; CHECK-NEXT: li a0, 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vl8re8.v v8, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vmerge.vvm v16, v8, v16, v0
+; CHECK-NEXT: bltu a2, a0, .LBB25_4
+; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: li a2, 16
+; CHECK-NEXT: .LBB25_4:
+; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu
+; CHECK-NEXT: vmv1r.v v0, v24
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vmerge.vvm v8, v8, v24, v0
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: li a1, 24
+; CHECK-NEXT: mul a0, a0, a1
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %v = call <32 x i64> @llvm.vp.select.v32i64(<32 x i1> %a, <32 x i64> %b, <32 x i64> %c, i32 %evl)
+ ret <32 x i64> %v
+}
+
+define <32 x i64> @select_evl_v32i64(<32 x i1> %a, <32 x i64> %b, <32 x i64> %c) {
+; CHECK-LABEL: select_evl_v32i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, mu
+; CHECK-NEXT: vle64.v v24, (a0)
+; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0
+; CHECK-NEXT: addi a0, a0, 128
+; CHECK-NEXT: vle64.v v24, (a0)
+; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
+; CHECK-NEXT: vslidedown.vi v0, v0, 2
+; CHECK-NEXT: vsetivli zero, 1, e64, m8, ta, mu
+; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0
+; CHECK-NEXT: ret
+ %v = call <32 x i64> @llvm.vp.select.v32i64(<32 x i1> %a, <32 x i64> %b, <32 x i64> %c, i32 17)
+ ret <32 x i64> %v
+}
+
declare <2 x half> @llvm.vp.select.v2f16(<2 x i1>, <2 x half>, <2 x half>, i32)
define <2 x half> @select_v2f16(<2 x i1> %a, <2 x half> %b, <2 x half> %c, i32 zeroext %evl) {
@@ -386,6 +596,61 @@ define <16 x float> @select_v16f32(<16 x i1> %a, <16 x float> %b, <16 x float> %
ret <16 x float> %v
}
+declare <64 x float> @llvm.vp.select.v64f32(<64 x i1>, <64 x float>, <64 x float>, i32)
+
+define <64 x float> @select_v64f32(<64 x i1> %a, <64 x float> %b, <64 x float> %c, i32 zeroext %evl) {
+; CHECK-LABEL: select_v64f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: li a3, 32
+; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, mu
+; CHECK-NEXT: vle32.v v24, (a0)
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: addi a0, a0, 128
+; CHECK-NEXT: mv a1, a2
+; CHECK-NEXT: bltu a2, a3, .LBB35_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: li a1, 32
+; CHECK-NEXT: .LBB35_2:
+; CHECK-NEXT: li a3, 0
+; CHECK-NEXT: vle32.v v16, (a0)
+; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu
+; CHECK-NEXT: addi a0, a2, -32
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vl8re8.v v24, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0
+; CHECK-NEXT: bltu a2, a0, .LBB35_4
+; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: mv a3, a0
+; CHECK-NEXT: .LBB35_4:
+; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, mu
+; CHECK-NEXT: vslidedown.vi v0, v0, 4
+; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, mu
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %v = call <64 x float> @llvm.vp.select.v64f32(<64 x i1> %a, <64 x float> %b, <64 x float> %c, i32 %evl)
+ ret <64 x float> %v
+}
+
declare <2 x double> @llvm.vp.select.v2f64(<2 x i1>, <2 x double>, <2 x double>, i32)
define <2 x double> @select_v2f64(<2 x i1> %a, <2 x double> %b, <2 x double> %c, i32 zeroext %evl) {
diff --git a/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll
index 1137350fa0906..43ea3cf78a9f0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+experimental-zfh,+experimental-v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+m,+experimental-zfh,+experimental-v -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-zfh,+experimental-v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+m,+experimental-zfh,+experimental-v -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s
declare <vscale x 1 x i1> @llvm.vp.select.nxv1i1(<vscale x 1 x i1>, <vscale x 1 x i1>, <vscale x 1 x i1>, i32)
@@ -342,6 +342,124 @@ define <vscale x 16 x i32> @select_nxv16i32(<vscale x 16 x i1> %a, <vscale x 16
ret <vscale x 16 x i32> %v
}
+declare <vscale x 32 x i32> @llvm.vp.select.nxv32i32(<vscale x 32 x i1>, <vscale x 32 x i32>, <vscale x 32 x i32>, i32)
+
+define <vscale x 32 x i32> @select_nxv32i32(<vscale x 32 x i1> %a, <vscale x 32 x i32> %b, <vscale x 32 x i32> %c, i32 zeroext %evl) {
+; CHECK-LABEL: select_nxv32i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: vmv1r.v v1, v0
+; CHECK-NEXT: li a3, 0
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a4, a1, 3
+; CHECK-NEXT: add a4, a0, a4
+; CHECK-NEXT: vl8re32.v v8, (a4)
+; CHECK-NEXT: srli a5, a1, 2
+; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, mu
+; CHECK-NEXT: slli a1, a1, 1
+; CHECK-NEXT: sub a4, a2, a1
+; CHECK-NEXT: vslidedown.vx v0, v0, a5
+; CHECK-NEXT: bltu a2, a4, .LBB27_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a3, a4
+; CHECK-NEXT: .LBB27_2:
+; CHECK-NEXT: vl8re32.v v24, (a0)
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, mu
+; CHECK-NEXT: vmerge.vvm v16, v8, v16, v0
+; CHECK-NEXT: bltu a2, a1, .LBB27_4
+; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: mv a2, a1
+; CHECK-NEXT: .LBB27_4:
+; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, mu
+; CHECK-NEXT: vmv1r.v v0, v1
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %v = call <vscale x 32 x i32> @llvm.vp.select.nxv32i32(<vscale x 32 x i1> %a, <vscale x 32 x i32> %b, <vscale x 32 x i32> %c, i32 %evl)
+ ret <vscale x 32 x i32> %v
+}
+
+declare i32 @llvm.vscale.i32()
+
+define <vscale x 32 x i32> @select_evl_nxv32i32(<vscale x 32 x i1> %a, <vscale x 32 x i32> %b, <vscale x 32 x i32> %c) {
+; CHECK-LABEL: select_evl_nxv32i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: vmv1r.v v1, v0
+; CHECK-NEXT: li a3, 0
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a2, a1, 3
+; CHECK-NEXT: add a2, a0, a2
+; CHECK-NEXT: vl8re32.v v8, (a2)
+; CHECK-NEXT: srli a5, a1, 2
+; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, mu
+; CHECK-NEXT: slli a2, a1, 1
+; CHECK-NEXT: sub a4, a1, a2
+; CHECK-NEXT: vslidedown.vx v0, v0, a5
+; CHECK-NEXT: bltu a1, a4, .LBB28_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a3, a4
+; CHECK-NEXT: .LBB28_2:
+; CHECK-NEXT: vl8re32.v v24, (a0)
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, mu
+; CHECK-NEXT: vmerge.vvm v16, v8, v16, v0
+; CHECK-NEXT: bltu a1, a2, .LBB28_4
+; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: mv a1, a2
+; CHECK-NEXT: .LBB28_4:
+; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu
+; CHECK-NEXT: vmv1r.v v0, v1
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %evl = call i32 @llvm.vscale.i32()
+ %evl0 = mul i32 %evl, 8
+ %v = call <vscale x 32 x i32> @llvm.vp.select.nxv32i32(<vscale x 32 x i1> %a, <vscale x 32 x i32> %b, <vscale x 32 x i32> %c, i32 %evl0)
+ ret <vscale x 32 x i32> %v
+}
+
declare <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1>, <vscale x 1 x i64>, <vscale x 1 x i64>, i32)
define <vscale x 1 x i64> @select_nxv1i64(<vscale x 1 x i1> %a, <vscale x 1 x i64> %b, <vscale x 1 x i64> %c, i32 zeroext %evl) {
@@ -569,3 +687,60 @@ define <vscale x 8 x double> @select_nxv8f64(<vscale x 8 x i1> %a, <vscale x 8 x
%v = call <vscale x 8 x double> @llvm.vp.select.nxv8f64(<vscale x 8 x i1> %a, <vscale x 8 x double> %b, <vscale x 8 x double> %c, i32 %evl)
ret <vscale x 8 x double> %v
}
+
+declare <vscale x 16 x double> @llvm.vp.select.nxv16f64(<vscale x 16 x i1>, <vscale x 16 x double>, <vscale x 16 x double>, i32)
+
+define <vscale x 16 x double> @select_nxv16f64(<vscale x 16 x i1> %a, <vscale x 16 x double> %b, <vscale x 16 x double> %c, i32 zeroext %evl) {
+; CHECK-LABEL: select_nxv16f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: vmv1r.v v1, v0
+; CHECK-NEXT: li a3, 0
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a4, a1, 3
+; CHECK-NEXT: add a4, a0, a4
+; CHECK-NEXT: vl8re64.v v8, (a4)
+; CHECK-NEXT: srli a5, a1, 3
+; CHECK-NEXT: vsetvli a4, zero, e8, mf4, ta, mu
+; CHECK-NEXT: sub a4, a2, a1
+; CHECK-NEXT: vslidedown.vx v0, v0, a5
+; CHECK-NEXT: bltu a2, a4, .LBB48_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a3, a4
+; CHECK-NEXT: .LBB48_2:
+; CHECK-NEXT: vl8re64.v v24, (a0)
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, mu
+; CHECK-NEXT: vmerge.vvm v16, v8, v16, v0
+; CHECK-NEXT: bltu a2, a1, .LBB48_4
+; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: mv a2, a1
+; CHECK-NEXT: .LBB48_4:
+; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu
+; CHECK-NEXT: vmv1r.v v0, v1
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vl8re8.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x double> @llvm.vp.select.nxv16f64(<vscale x 16 x i1> %a, <vscale x 16 x double> %b, <vscale x 16 x double> %c, i32 %evl)
+ ret <vscale x 16 x double> %v
+}
More information about the llvm-commits
mailing list