[llvm] [VP][RISCV] Introduce vp.splat and RISC-V. (PR #98731)
Yeting Kuo via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 16 02:10:28 PDT 2024
https://github.com/yetingk updated https://github.com/llvm/llvm-project/pull/98731
>From 61078222e63cbd1ce77ab3fc166c24bfc65ce4cd Mon Sep 17 00:00:00 2001
From: Yeting Kuo <yeting.kuo at sifive.com>
Date: Thu, 11 Jul 2024 18:20:52 -0700
Subject: [PATCH 1/3] [VP][RISCV] Introduce vp.splat and RISC-V.
This patch introduces a vp intrinsic for splat. It's helpful for
IR-level passes to create a splat with specific vector length.
---
llvm/docs/LangRef.rst | 45 ++
llvm/include/llvm/IR/Intrinsics.td | 7 +
llvm/include/llvm/IR/VPIntrinsics.def | 7 +
.../SelectionDAG/LegalizeIntegerTypes.cpp | 17 +-
llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 2 +
.../SelectionDAG/LegalizeVectorTypes.cpp | 26 +
llvm/lib/IR/IntrinsicInst.cpp | 3 +
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 31 +-
llvm/lib/Target/RISCV/RISCVISelLowering.h | 1 +
.../RISCV/rvv/fixed-vectors-vp-splat.ll | 452 +++++++++++++++++
llvm/test/CodeGen/RISCV/rvv/vp-splat.ll | 464 ++++++++++++++++++
llvm/unittests/IR/VPIntrinsicTest.cpp | 2 +
12 files changed, 1052 insertions(+), 5 deletions(-)
create mode 100644 llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-splat.ll
create mode 100644 llvm/test/CodeGen/RISCV/rvv/vp-splat.ll
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index ae39217dc8ff8..37d32f3be7a0a 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -22841,6 +22841,51 @@ Examples:
llvm.experimental.vp.splice(<A,B,C,D>, <E,F,G,H>, -2, 3, 2); ==> <B, C, poison, poison> trailing elements
+.. _int_experimental_vp_splat:
+
+
+'``llvm.experimental.vp.splat``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+This is an overloaded intrinsic.
+
+::
+
+ declare <2 x double> @llvm.experimental.vp.splat.v2f64(<2 x double> %vec, <2 x i1> %mask, i32 %evl)
+ declare <vscale x 4 x i32> @llvm.experimental.vp.splat.nxv4i32(<vscale x 4 x i32> %vec, <vscale x 4 x i1> %mask, i32 %evl)
+
+Overview:
+"""""""""
+
+The '``llvm.experimental.vp.splat.*``' intrinsic is to create a prdicated splat
+with specific effective vector length.
+
+Arguments:
+""""""""""
+
+The result is a vector and it is a splat of the second scalar operand. The
+second argument ``mask`` is a vector mask and has the same number of elements as
+the result. The third argument is the explicit vector length of the operation.
+
+Semantics:
+""""""""""
+
+This intrinsic splats a vector with ``evl`` elements of a scalar operand.
+The lanes in the result vector disabled by ``mask`` are ``poison``. The
+elements past ``evl`` are poison.
+
+Examples:
+"""""""""
+
+.. code-block:: llvm
+
+ %r = call <4 x float> @llvm.vp.splat.v4f32(float %a, <4 x i1> %mask, i32 %evl)
+ ;; For all lanes below %evl, %r is lane-wise equivalent to %also.r
+ %also.r = select <4 x i1> %mask, <4 x float> splat(float %a), <4 x float> poison
+
+
.. _int_experimental_vp_reverse:
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 65a9b68b5229d..0be7e963954ef 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -2319,6 +2319,13 @@ def int_experimental_vp_reverse:
llvm_i32_ty],
[IntrNoMem]>;
+def int_experimental_vp_splat:
+ DefaultAttrsIntrinsic<[llvm_anyvector_ty],
+ [LLVMVectorElementType<0>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty],
+ [IntrNoMem]>;
+
def int_vp_is_fpclass:
DefaultAttrsIntrinsic<[ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
[ llvm_anyvector_ty,
diff --git a/llvm/include/llvm/IR/VPIntrinsics.def b/llvm/include/llvm/IR/VPIntrinsics.def
index 8eced073501e8..9d219e27b359e 100644
--- a/llvm/include/llvm/IR/VPIntrinsics.def
+++ b/llvm/include/llvm/IR/VPIntrinsics.def
@@ -777,6 +777,13 @@ END_REGISTER_VP(experimental_vp_reverse, EXPERIMENTAL_VP_REVERSE)
///// } Shuffles
+// llvm.vp.splat(ptr,val,mask,vlen)
+BEGIN_REGISTER_VP_INTRINSIC(experimental_vp_splat, 1, 2)
+BEGIN_REGISTER_VP_SDNODE(EXPERIMENTAL_VP_SPLAT, -1, experimental_vp_splat, 1, 2)
+VP_PROPERTY_NO_FUNCTIONAL
+HELPER_MAP_VPID_TO_VPSD(experimental_vp_splat, EXPERIMENTAL_VP_SPLAT)
+END_REGISTER_VP(experimental_vp_splat, EXPERIMENTAL_VP_SPLAT)
+
#undef BEGIN_REGISTER_VP
#undef BEGIN_REGISTER_VP_INTRINSIC
#undef BEGIN_REGISTER_VP_SDNODE
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index fed5ebcc3c903..f21c2ba98e567 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -137,6 +137,7 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
break;
case ISD::SPLAT_VECTOR:
case ISD::SCALAR_TO_VECTOR:
+ case ISD::EXPERIMENTAL_VP_SPLAT:
Res = PromoteIntRes_ScalarOp(N);
break;
case ISD::STEP_VECTOR: Res = PromoteIntRes_STEP_VECTOR(N); break;
@@ -1916,6 +1917,7 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
break;
case ISD::SPLAT_VECTOR:
case ISD::SCALAR_TO_VECTOR:
+ case ISD::EXPERIMENTAL_VP_SPLAT:
Res = PromoteIntOp_ScalarOp(N);
break;
case ISD::VSELECT:
@@ -2211,10 +2213,14 @@ SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N,
}
SDValue DAGTypeLegalizer::PromoteIntOp_ScalarOp(SDNode *N) {
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ if (N->getOpcode() == ISD::EXPERIMENTAL_VP_SPLAT)
+ return DAG.getNode(ISD::EXPERIMENTAL_VP_SPLAT, SDLoc(N), N->getValueType(0),
+ Op, N->getOperand(1), N->getOperand(2));
+
// Integer SPLAT_VECTOR/SCALAR_TO_VECTOR operands are implicitly truncated,
// so just promote the operand in place.
- return SDValue(DAG.UpdateNodeOperands(N,
- GetPromotedInteger(N->getOperand(0))), 0);
+ return SDValue(DAG.UpdateNodeOperands(N, Op), 0);
}
SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) {
@@ -5231,6 +5237,7 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break;
case ISD::INSERT_VECTOR_ELT: Res = ExpandOp_INSERT_VECTOR_ELT(N); break;
case ISD::SCALAR_TO_VECTOR: Res = ExpandOp_SCALAR_TO_VECTOR(N); break;
+ case ISD::EXPERIMENTAL_VP_SPLAT:
case ISD::SPLAT_VECTOR: Res = ExpandIntOp_SPLAT_VECTOR(N); break;
case ISD::SELECT_CC: Res = ExpandIntOp_SELECT_CC(N); break;
case ISD::SETCC: Res = ExpandIntOp_SETCC(N); break;
@@ -5859,7 +5866,11 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ScalarOp(SDNode *N) {
EVT NOutElemVT = NOutVT.getVectorElementType();
SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutElemVT, N->getOperand(0));
-
+ if (N->isVPOpcode()) {
+ SDValue Mask = N->getOperand(1);
+ SDValue VL = N->getOperand(2);
+ return DAG.getNode(N->getOpcode(), dl, NOutVT, Op, Mask, VL);
+ }
return DAG.getNode(N->getOpcode(), dl, NOutVT, Op);
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 85f947efe2c75..f20cfe6de60cc 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -915,6 +915,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
void SplitVecRes_Gather(MemSDNode *VPGT, SDValue &Lo, SDValue &Hi,
bool SplitSETCC = false);
void SplitVecRes_ScalarOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_VP_SPLAT(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_STEP_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_VECTOR_REVERSE(SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -1052,6 +1053,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue WidenVecOp_MGATHER(SDNode* N, unsigned OpNo);
SDValue WidenVecOp_MSCATTER(SDNode* N, unsigned OpNo);
SDValue WidenVecOp_VP_SCATTER(SDNode* N, unsigned OpNo);
+ SDValue WidenVecOp_VP_SPLAT(SDNode *N, unsigned OpNo);
SDValue WidenVecOp_SETCC(SDNode* N);
SDValue WidenVecOp_STRICT_FSETCC(SDNode* N);
SDValue WidenVecOp_VSELECT(SDNode *N);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index bbf08e862da12..5015a665b1eb6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -1076,6 +1076,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FCOPYSIGN: SplitVecRes_FPOp_MultiType(N, Lo, Hi); break;
case ISD::IS_FPCLASS: SplitVecRes_IS_FPCLASS(N, Lo, Hi); break;
case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break;
+ case ISD::EXPERIMENTAL_VP_SPLAT: SplitVecRes_VP_SPLAT(N, Lo, Hi); break;
case ISD::SPLAT_VECTOR:
case ISD::SCALAR_TO_VECTOR:
SplitVecRes_ScalarOp(N, Lo, Hi);
@@ -1992,6 +1993,16 @@ void DAGTypeLegalizer::SplitVecRes_ScalarOp(SDNode *N, SDValue &Lo,
}
}
+void DAGTypeLegalizer::SplitVecRes_VP_SPLAT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDLoc dl(N);
+ auto [LoVT, HiVT] = DAG.GetSplitDestVTs(N->getValueType(0));
+ auto [MaskLo, MaskHi] = SplitMask(N->getOperand(1));
+ auto [EVLLo, EVLHi] = DAG.SplitEVL(N->getOperand(2), N->getValueType(0), dl);
+ Lo = DAG.getNode(N->getOpcode(), dl, LoVT, N->getOperand(0), MaskLo, EVLLo);
+ Hi = DAG.getNode(N->getOpcode(), dl, HiVT, N->getOperand(0), MaskHi, EVLHi);
+}
+
void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
SDValue &Hi) {
assert(ISD::isUNINDEXEDLoad(LD) && "Indexed load during type legalization!");
@@ -4284,6 +4295,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::STEP_VECTOR:
case ISD::SPLAT_VECTOR:
case ISD::SCALAR_TO_VECTOR:
+ case ISD::EXPERIMENTAL_VP_SPLAT:
Res = WidenVecRes_ScalarOp(N);
break;
case ISD::SIGN_EXTEND_INREG: Res = WidenVecRes_InregOp(N); break;
@@ -5814,6 +5826,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_VP_GATHER(VPGatherSDNode *N) {
SDValue DAGTypeLegalizer::WidenVecRes_ScalarOp(SDNode *N) {
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ if (N->isVPOpcode())
+ return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, N->getOperand(0),
+ N->getOperand(1), N->getOperand(2));
return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, N->getOperand(0));
}
@@ -6353,6 +6368,10 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
Res = WidenVecOp_FP_TO_XINT_SAT(N);
break;
+ case ISD::EXPERIMENTAL_VP_SPLAT:
+ Res = WidenVecOp_VP_SPLAT(N, OpNo);
+ break;
+
case ISD::VECREDUCE_FADD:
case ISD::VECREDUCE_FMUL:
case ISD::VECREDUCE_ADD:
@@ -6813,6 +6832,13 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
report_fatal_error("Unable to widen vector store");
}
+SDValue DAGTypeLegalizer::WidenVecOp_VP_SPLAT(SDNode *N, unsigned OpNo) {
+ assert(OpNo == 1 && "Can widen only mask operand of vp_splat");
+ return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
+ N->getOperand(0), GetWidenedVector(N->getOperand(1)),
+ N->getOperand(2));
+}
+
SDValue DAGTypeLegalizer::WidenVecOp_VP_STORE(SDNode *N, unsigned OpNo) {
assert((OpNo == 1 || OpNo == 3) &&
"Can widen only data or mask operand of vp_store");
diff --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp
index e17755c8ad57b..64a14da55b15e 100644
--- a/llvm/lib/IR/IntrinsicInst.cpp
+++ b/llvm/lib/IR/IntrinsicInst.cpp
@@ -699,6 +699,9 @@ Function *VPIntrinsic::getDeclarationForParams(Module *M, Intrinsic::ID VPID,
VPFunc = Intrinsic::getDeclaration(
M, VPID, {Params[0]->getType(), Params[1]->getType()});
break;
+ case Intrinsic::experimental_vp_splat:
+ VPFunc = Intrinsic::getDeclaration(M, VPID, ReturnType);
+ break;
}
assert(VPFunc && "Could not declare VP intrinsic");
return VPFunc;
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 19f958ccfd2e1..b51b821450e63 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -705,7 +705,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX,
ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE,
ISD::VP_SADDSAT, ISD::VP_UADDSAT, ISD::VP_SSUBSAT,
- ISD::VP_USUBSAT, ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF};
+ ISD::VP_USUBSAT, ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF,
+ ISD::EXPERIMENTAL_VP_SPLAT};
static const unsigned FloatingPointVPOps[] = {
ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
@@ -721,7 +722,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::VP_LRINT,
ISD::VP_LLRINT, ISD::EXPERIMENTAL_VP_REVERSE,
ISD::EXPERIMENTAL_VP_SPLICE, ISD::VP_REDUCE_FMINIMUM,
- ISD::VP_REDUCE_FMAXIMUM};
+ ISD::VP_REDUCE_FMAXIMUM, ISD::EXPERIMENTAL_VP_SPLAT};
static const unsigned IntegerVecReduceOps[] = {
ISD::VECREDUCE_ADD, ISD::VECREDUCE_AND, ISD::VECREDUCE_OR,
@@ -7268,6 +7269,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return lowerVPSpliceExperimental(Op, DAG);
case ISD::EXPERIMENTAL_VP_REVERSE:
return lowerVPReverseExperimental(Op, DAG);
+ case ISD::EXPERIMENTAL_VP_SPLAT:
+ return lowerVPSplatExperimental(Op, DAG);
case ISD::CLEAR_CACHE: {
assert(getTargetMachine().getTargetTriple().isOSLinux() &&
"llvm.clear_cache only needs custom lower on Linux targets");
@@ -11630,6 +11633,30 @@ RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op,
return convertFromScalableVector(VT, Result, DAG, Subtarget);
}
+SDValue
+RISCVTargetLowering::lowerVPSplatExperimental(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ SDValue Val = Op.getOperand(0);
+ SDValue Mask = Op.getOperand(1);
+ SDValue VL = Op.getOperand(2);
+ MVT VT = Op.getSimpleValueType();
+
+ MVT ContainerVT = VT;
+ if (VT.isFixedLengthVector()) {
+ ContainerVT = getContainerForFixedLengthVector(VT);
+ MVT MaskVT = getMaskTypeFor(ContainerVT);
+ Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
+ }
+
+ SDValue Result = lowerScalarSplat(SDValue(), Val, VL, ContainerVT, DL,
+ DAG, Subtarget);
+
+ if (!VT.isFixedLengthVector())
+ return Result;
+ return convertFromScalableVector(VT, Result, DAG, Subtarget);
+}
+
SDValue
RISCVTargetLowering::lowerVPReverseExperimental(SDValue Op,
SelectionDAG &DAG) const {
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 7d8bceb5cb341..449ff24492c69 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -972,6 +972,7 @@ class RISCVTargetLowering : public TargetLowering {
SDValue lowerLogicVPOp(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVPExtMaskOp(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVPSetCCMaskOp(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerVPSplatExperimental(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVPSpliceExperimental(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVPReverseExperimental(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVPFPIntConvOp(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-splat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-splat.ll
new file mode 100644
index 0000000000000..2913cbdf0fffd
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-splat.ll
@@ -0,0 +1,452 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+
+define <1 x i8> @vp_splat_v1i8(i8 %val, <1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_v1i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: ret
+ %splat = call <1 x i8> @llvm.experimental.vp.splat.v1i8(i8 %val, <1 x i1> %m, i32 %evl)
+ ret <1 x i8> %splat
+}
+
+define <2 x i8> @vp_splat_v2i8(i8 %val, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_v2i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: ret
+ %splat = call <2 x i8> @llvm.experimental.vp.splat.v2i8(i8 %val, <2 x i1> %m, i32 %evl)
+ ret <2 x i8> %splat
+}
+
+define <4 x i8> @vp_splat_v4i8(i8 %val, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_v4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: ret
+ %splat = call <4 x i8> @llvm.experimental.vp.splat.v4i8(i8 %val, <4 x i1> %m, i32 %evl)
+ ret <4 x i8> %splat
+}
+
+define <8 x i8> @vp_splat_v8i8(i8 %val, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_v8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: ret
+ %splat = call <8 x i8> @llvm.experimental.vp.splat.v8i8(i8 %val, <8 x i1> %m, i32 %evl)
+ ret <8 x i8> %splat
+}
+
+define <16 x i8> @vp_splat_v16i8(i8 %val, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: ret
+ %splat = call <16 x i8> @llvm.experimental.vp.splat.v16i8(i8 %val, <16 x i1> %m, i32 %evl)
+ ret <16 x i8> %splat
+}
+
+define <32 x i8> @vp_splat_v32i8(i8 %val, <32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_v32i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: ret
+ %splat = call <32 x i8> @llvm.experimental.vp.splat.v32i8(i8 %val, <32 x i1> %m, i32 %evl)
+ ret <32 x i8> %splat
+}
+
+define <64 x i8> @vp_splat_v64i8(i8 %val, <64 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_v64i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: ret
+ %splat = call <64 x i8> @llvm.experimental.vp.splat.v64i8(i8 %val, <64 x i1> %m, i32 %evl)
+ ret <64 x i8> %splat
+}
+
+define <1 x i16> @vp_splat_v1i16(i16 %val, <1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_v1i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: ret
+ %splat = call <1 x i16> @llvm.experimental.vp.splat.v1i16(i16 %val, <1 x i1> %m, i32 %evl)
+ ret <1 x i16> %splat
+}
+
+define <2 x i16> @vp_splat_v2i16(i16 %val, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_v2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: ret
+ %splat = call <2 x i16> @llvm.experimental.vp.splat.v2i16(i16 %val, <2 x i1> %m, i32 %evl)
+ ret <2 x i16> %splat
+}
+
+define <4 x i16> @vp_splat_v4i16(i16 %val, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_v4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: ret
+ %splat = call <4 x i16> @llvm.experimental.vp.splat.v4i16(i16 %val, <4 x i1> %m, i32 %evl)
+ ret <4 x i16> %splat
+}
+
+define <8 x i16> @vp_splat_v8i16(i16 %val, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: ret
+ %splat = call <8 x i16> @llvm.experimental.vp.splat.v8i16(i16 %val, <8 x i1> %m, i32 %evl)
+ ret <8 x i16> %splat
+}
+
+define <16 x i16> @vp_splat_v16i16(i16 %val, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_v16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: ret
+ %splat = call <16 x i16> @llvm.experimental.vp.splat.v16i16(i16 %val, <16 x i1> %m, i32 %evl)
+ ret <16 x i16> %splat
+}
+
+define <32 x i16> @vp_splat_v32i16(i16 %val, <32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_v32i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: ret
+ %splat = call <32 x i16> @llvm.experimental.vp.splat.v32i16(i16 %val, <32 x i1> %m, i32 %evl)
+ ret <32 x i16> %splat
+}
+
+define <1 x i32> @vp_splat_v1i32(i32 %val, <1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_v1i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: ret
+ %splat = call <1 x i32> @llvm.experimental.vp.splat.v1i32(i32 %val, <1 x i1> %m, i32 %evl)
+ ret <1 x i32> %splat
+}
+
+define <2 x i32> @vp_splat_v2i32(i32 %val, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_v2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: ret
+ %splat = call <2 x i32> @llvm.experimental.vp.splat.v2i32(i32 %val, <2 x i1> %m, i32 %evl)
+ ret <2 x i32> %splat
+}
+
+define <4 x i32> @vp_splat_v4i32(i32 %val, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: ret
+ %splat = call <4 x i32> @llvm.experimental.vp.splat.v4i32(i32 %val, <4 x i1> %m, i32 %evl)
+ ret <4 x i32> %splat
+}
+
+define <8 x i32> @vp_splat_v8i32(i32 %val, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: ret
+ %splat = call <8 x i32> @llvm.experimental.vp.splat.v8i32(i32 %val, <8 x i1> %m, i32 %evl)
+ ret <8 x i32> %splat
+}
+
+define <16 x i32> @vp_splat_v16i32(i32 %val, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_v16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: ret
+ %splat = call <16 x i32> @llvm.experimental.vp.splat.v16i32(i32 %val, <16 x i1> %m, i32 %evl)
+ ret <16 x i32> %splat
+}
+
+define <1 x i64> @vp_splat_v1i64(i64 %val, <1 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vp_splat_v1i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT: vlse64.v v8, (a0), zero
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vp_splat_v1i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; RV64-NEXT: vmv.v.x v8, a0
+; RV64-NEXT: ret
+ %splat = call <1 x i64> @llvm.experimental.vp.splat.v1i64(i64 %val, <1 x i1> %m, i32 %evl)
+ ret <1 x i64> %splat
+}
+
+define <2 x i64> @vp_splat_v2i64(i64 %val, <2 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vp_splat_v2i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; RV32-NEXT: vlse64.v v8, (a0), zero
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vp_splat_v2i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; RV64-NEXT: vmv.v.x v8, a0
+; RV64-NEXT: ret
+ %splat = call <2 x i64> @llvm.experimental.vp.splat.v2i64(i64 %val, <2 x i1> %m, i32 %evl)
+ ret <2 x i64> %splat
+}
+
+define <4 x i64> @vp_splat_v4i64(i64 %val, <4 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vp_splat_v4i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT: vlse64.v v8, (a0), zero
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vp_splat_v4i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; RV64-NEXT: vmv.v.x v8, a0
+; RV64-NEXT: ret
+ %splat = call <4 x i64> @llvm.experimental.vp.splat.v4i64(i64 %val, <4 x i1> %m, i32 %evl)
+ ret <4 x i64> %splat
+}
+
+define <8 x i64> @vp_splat_v8i64(i64 %val, <8 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vp_splat_v8i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; RV32-NEXT: vlse64.v v8, (a0), zero
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vp_splat_v8i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; RV64-NEXT: vmv.v.x v8, a0
+; RV64-NEXT: ret
+ %splat = call <8 x i64> @llvm.experimental.vp.splat.v8i64(i64 %val, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %splat
+}
+
+define <1 x half> @vp_splat_v1f16(half %val, <1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_v1f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vfmv.v.f v8, fa0
+; CHECK-NEXT: ret
+ %splat = call <1 x half> @llvm.experimental.vp.splat.v1f16(half %val, <1 x i1> %m, i32 %evl)
+ ret <1 x half> %splat
+}
+
+define <2 x half> @vp_splat_v2f16(half %val, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_v2f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vfmv.v.f v8, fa0
+; CHECK-NEXT: ret
+ %splat = call <2 x half> @llvm.experimental.vp.splat.v2f16(half %val, <2 x i1> %m, i32 %evl)
+ ret <2 x half> %splat
+}
+
+define <4 x half> @vp_splat_v4f16(half %val, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_v4f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vfmv.v.f v8, fa0
+; CHECK-NEXT: ret
+ %splat = call <4 x half> @llvm.experimental.vp.splat.v4f16(half %val, <4 x i1> %m, i32 %evl)
+ ret <4 x half> %splat
+}
+
+define <8 x half> @vp_splat_v8f16(half %val, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_v8f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vfmv.v.f v8, fa0
+; CHECK-NEXT: ret
+ %splat = call <8 x half> @llvm.experimental.vp.splat.v8f16(half %val, <8 x i1> %m, i32 %evl)
+ ret <8 x half> %splat
+}
+
+define <16 x half> @vp_splat_v16f16(half %val, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_v16f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vfmv.v.f v8, fa0
+; CHECK-NEXT: ret
+ %splat = call <16 x half> @llvm.experimental.vp.splat.v16f16(half %val, <16 x i1> %m, i32 %evl)
+ ret <16 x half> %splat
+}
+
+define <32 x half> @vp_splat_v32f16(half %val, <32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_v32f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT: vfmv.v.f v8, fa0
+; CHECK-NEXT: ret
+ %splat = call <32 x half> @llvm.experimental.vp.splat.v32f16(half %val, <32 x i1> %m, i32 %evl)
+ ret <32 x half> %splat
+}
+
+define <1 x float> @vp_splat_v1f32(float %val, <1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_v1f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vfmv.v.f v8, fa0
+; CHECK-NEXT: ret
+ %splat = call <1 x float> @llvm.experimental.vp.splat.v1f32(float %val, <1 x i1> %m, i32 %evl)
+ ret <1 x float> %splat
+}
+
+define <2 x float> @vp_splat_v2f32(float %val, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_v2f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vfmv.v.f v8, fa0
+; CHECK-NEXT: ret
+ %splat = call <2 x float> @llvm.experimental.vp.splat.v2f32(float %val, <2 x i1> %m, i32 %evl)
+ ret <2 x float> %splat
+}
+
+define <4 x float> @vp_splat_v4f32(float %val, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_v4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vfmv.v.f v8, fa0
+; CHECK-NEXT: ret
+ %splat = call <4 x float> @llvm.experimental.vp.splat.v4f32(float %val, <4 x i1> %m, i32 %evl)
+ ret <4 x float> %splat
+}
+
+define <8 x float> @vp_splat_v8f32(float %val, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_v8f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vfmv.v.f v8, fa0
+; CHECK-NEXT: ret
+ %splat = call <8 x float> @llvm.experimental.vp.splat.v8f32(float %val, <8 x i1> %m, i32 %evl)
+ ret <8 x float> %splat
+}
+
+define <16 x float> @vp_splat_v16f32(float %val, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_v16f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vfmv.v.f v8, fa0
+; CHECK-NEXT: ret
+ %splat = call <16 x float> @llvm.experimental.vp.splat.v16f32(float %val, <16 x i1> %m, i32 %evl)
+ ret <16 x float> %splat
+}
+
+define <1 x double> @vp_splat_v1f64(double %val, <1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_v1f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vfmv.v.f v8, fa0
+; CHECK-NEXT: ret
+ %splat = call <1 x double> @llvm.experimental.vp.splat.v1f64(double %val, <1 x i1> %m, i32 %evl)
+ ret <1 x double> %splat
+}
+
+define <2 x double> @vp_splat_v2f64(double %val, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_v2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vfmv.v.f v8, fa0
+; CHECK-NEXT: ret
+ %splat = call <2 x double> @llvm.experimental.vp.splat.v2f64(double %val, <2 x i1> %m, i32 %evl)
+ ret <2 x double> %splat
+}
+
+define <4 x double> @vp_splat_v4f64(double %val, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_v4f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT: vfmv.v.f v8, fa0
+; CHECK-NEXT: ret
+ %splat = call <4 x double> @llvm.experimental.vp.splat.v4f64(double %val, <4 x i1> %m, i32 %evl)
+ ret <4 x double> %splat
+}
+
+define <8 x double> @vp_splat_v8f64(double %val, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_v8f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT: vfmv.v.f v8, fa0
+; CHECK-NEXT: ret
+ %splat = call <8 x double> @llvm.experimental.vp.splat.v8f64(double %val, <8 x i1> %m, i32 %evl)
+ ret <8 x double> %splat
+}
+
+define <16 x i31> @vp_splat_v16i31(i31 %val, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_v16i31:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: ret
+ %splat = call <16 x i31> @llvm.experimental.vp.splat.v16i31(i31 %val, <16 x i1> %m, i32 %evl)
+ ret <16 x i31> %splat
+}
+
+define <15 x i32> @vp_splat_v15i32(i32 %val, <15 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_v15i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: ret
+ %splat = call <15 x i32> @llvm.experimental.vp.splat.v15i32(i32 %val, <15 x i1> %m, i32 %evl)
+ ret <15 x i32> %splat
+}
+
+; Split case.
+define <32 x i32> @vp_splat_v32i32(i32 %val, <32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_v32i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: ret
+ %splat = call <32 x i32> @llvm.experimental.vp.splat.v32i32(i32 %val, <32 x i1> %m, i32 %evl)
+ ret <32 x i32> %splat
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-splat.ll b/llvm/test/CodeGen/RISCV/rvv/vp-splat.ll
new file mode 100644
index 0000000000000..5fbdefda9f402
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vp-splat.ll
@@ -0,0 +1,464 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+
+define <vscale x 1 x i8> @vp_splat_nxv1i8(i8 %val, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_nxv1i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: ret
+ %splat = call <vscale x 1 x i8> @llvm.experimental.vp.splat.nxv1i8(i8 %val, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i8> %splat
+}
+
+define <vscale x 2 x i8> @vp_splat_nxv2i8(i8 %val, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_nxv2i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: ret
+ %splat = call <vscale x 2 x i8> @llvm.experimental.vp.splat.nxv2i8(i8 %val, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i8> %splat
+}
+
+define <vscale x 4 x i8> @vp_splat_nxv4i8(i8 %val, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_nxv4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: ret
+ %splat = call <vscale x 4 x i8> @llvm.experimental.vp.splat.nxv4i8(i8 %val, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i8> %splat
+}
+
+define <vscale x 8 x i8> @vp_splat_nxv8i8(i8 %val, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_nxv8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: ret
+ %splat = call <vscale x 8 x i8> @llvm.experimental.vp.splat.nxv8i8(i8 %val, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i8> %splat
+}
+
+define <vscale x 16 x i8> @vp_splat_nxv16i8(i8 %val, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_nxv16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: ret
+ %splat = call <vscale x 16 x i8> @llvm.experimental.vp.splat.nxv16i8(i8 %val, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i8> %splat
+}
+
+define <vscale x 32 x i8> @vp_splat_nxv32i8(i8 %val, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_nxv32i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: ret
+ %splat = call <vscale x 32 x i8> @llvm.experimental.vp.splat.nxv32i8(i8 %val, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i8> %splat
+}
+
+define <vscale x 64 x i8> @vp_splat_nxv64i8(i8 %val, <vscale x 64 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_nxv64i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: ret
+ %splat = call <vscale x 64 x i8> @llvm.experimental.vp.splat.nxv64i8(i8 %val, <vscale x 64 x i1> %m, i32 %evl)
+ ret <vscale x 64 x i8> %splat
+}
+
+define <vscale x 1 x i16> @vp_splat_nxv1i16(i16 %val, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_nxv1i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: ret
+ %splat = call <vscale x 1 x i16> @llvm.experimental.vp.splat.nxv1i16(i16 %val, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i16> %splat
+}
+
+define <vscale x 2 x i16> @vp_splat_nxv2i16(i16 %val, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_nxv2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: ret
+ %splat = call <vscale x 2 x i16> @llvm.experimental.vp.splat.nxv2i16(i16 %val, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i16> %splat
+}
+
+define <vscale x 4 x i16> @vp_splat_nxv4i16(i16 %val, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_nxv4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: ret
+ %splat = call <vscale x 4 x i16> @llvm.experimental.vp.splat.nxv4i16(i16 %val, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i16> %splat
+}
+
+define <vscale x 8 x i16> @vp_splat_nxv8i16(i16 %val, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_nxv8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: ret
+ %splat = call <vscale x 8 x i16> @llvm.experimental.vp.splat.nxv8i16(i16 %val, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i16> %splat
+}
+
+define <vscale x 16 x i16> @vp_splat_nxv16i16(i16 %val, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_nxv16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: ret
+ %splat = call <vscale x 16 x i16> @llvm.experimental.vp.splat.nxv16i16(i16 %val, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i16> %splat
+}
+
+define <vscale x 32 x i16> @vp_splat_nxv32i16(i16 %val, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_nxv32i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: ret
+ %splat = call <vscale x 32 x i16> @llvm.experimental.vp.splat.nxv32i16(i16 %val, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i16> %splat
+}
+
+define <vscale x 1 x i32> @vp_splat_nxv1i32(i32 %val, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_nxv1i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: ret
+ %splat = call <vscale x 1 x i32> @llvm.experimental.vp.splat.nxv1i32(i32 %val, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i32> %splat
+}
+
+define <vscale x 2 x i32> @vp_splat_nxv2i32(i32 %val, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_nxv2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: ret
+ %splat = call <vscale x 2 x i32> @llvm.experimental.vp.splat.nxv2i32(i32 %val, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i32> %splat
+}
+
+define <vscale x 4 x i32> @vp_splat_nxv4i32(i32 %val, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_nxv4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: ret
+ %splat = call <vscale x 4 x i32> @llvm.experimental.vp.splat.nxv4i32(i32 %val, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i32> %splat
+}
+
+define <vscale x 8 x i32> @vp_splat_nxv8i32(i32 %val, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_nxv8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: ret
+ %splat = call <vscale x 8 x i32> @llvm.experimental.vp.splat.nxv8i32(i32 %val, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i32> %splat
+}
+
+define <vscale x 16 x i32> @vp_splat_nxv16i32(i32 %val, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_nxv16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: ret
+ %splat = call <vscale x 16 x i32> @llvm.experimental.vp.splat.nxv16i32(i32 %val, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i32> %splat
+}
+
+define <vscale x 1 x i64> @vp_splat_nxv1i64(i64 %val, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vp_splat_nxv1i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
+; RV32-NEXT: vlse64.v v8, (a0), zero
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vp_splat_nxv1i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; RV64-NEXT: vmv.v.x v8, a0
+; RV64-NEXT: ret
+ %splat = call <vscale x 1 x i64> @llvm.experimental.vp.splat.nxv1i64(i64 %val, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %splat
+}
+
+define <vscale x 2 x i64> @vp_splat_nxv2i64(i64 %val, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vp_splat_nxv2i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
+; RV32-NEXT: vlse64.v v8, (a0), zero
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vp_splat_nxv2i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; RV64-NEXT: vmv.v.x v8, a0
+; RV64-NEXT: ret
+ %splat = call <vscale x 2 x i64> @llvm.experimental.vp.splat.nxv2i64(i64 %val, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i64> %splat
+}
+
+define <vscale x 4 x i64> @vp_splat_nxv4i64(i64 %val, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vp_splat_nxv4i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
+; RV32-NEXT: vlse64.v v8, (a0), zero
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vp_splat_nxv4i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; RV64-NEXT: vmv.v.x v8, a0
+; RV64-NEXT: ret
+ %splat = call <vscale x 4 x i64> @llvm.experimental.vp.splat.nxv4i64(i64 %val, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i64> %splat
+}
+
+define <vscale x 8 x i64> @vp_splat_nxv8i64(i64 %val, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vp_splat_nxv8i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
+; RV32-NEXT: vlse64.v v8, (a0), zero
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vp_splat_nxv8i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT: vmv.v.x v8, a0
+; RV64-NEXT: ret
+ %splat = call <vscale x 8 x i64> @llvm.experimental.vp.splat.nxv8i64(i64 %val, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i64> %splat
+}
+
+define <vscale x 1 x half> @vp_splat_nxv1f16(half %val, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_nxv1f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vfmv.v.f v8, fa0
+; CHECK-NEXT: ret
+ %splat = call <vscale x 1 x half> @llvm.experimental.vp.splat.nxv1f16(half %val, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x half> %splat
+}
+
+define <vscale x 2 x half> @vp_splat_nxv2f16(half %val, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_nxv2f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vfmv.v.f v8, fa0
+; CHECK-NEXT: ret
+ %splat = call <vscale x 2 x half> @llvm.experimental.vp.splat.nxv2f16(half %val, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x half> %splat
+}
+
+define <vscale x 4 x half> @vp_splat_nxv4f16(half %val, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_nxv4f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vfmv.v.f v8, fa0
+; CHECK-NEXT: ret
+ %splat = call <vscale x 4 x half> @llvm.experimental.vp.splat.nxv4f16(half %val, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x half> %splat
+}
+
+define <vscale x 8 x half> @vp_splat_nxv8f16(half %val, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_nxv8f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vfmv.v.f v8, fa0
+; CHECK-NEXT: ret
+ %splat = call <vscale x 8 x half> @llvm.experimental.vp.splat.nxv8f16(half %val, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x half> %splat
+}
+
+define <vscale x 16 x half> @vp_splat_nxv16f16(half %val, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_nxv16f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT: vfmv.v.f v8, fa0
+; CHECK-NEXT: ret
+ %splat = call <vscale x 16 x half> @llvm.experimental.vp.splat.nxv16f16(half %val, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x half> %splat
+}
+
+define <vscale x 32 x half> @vp_splat_nxv32f16(half %val, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_nxv32f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT: vfmv.v.f v8, fa0
+; CHECK-NEXT: ret
+ %splat = call <vscale x 32 x half> @llvm.experimental.vp.splat.nxv32f16(half %val, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x half> %splat
+}
+
+define <vscale x 1 x float> @vp_splat_nxv1f32(float %val, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_nxv1f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vfmv.v.f v8, fa0
+; CHECK-NEXT: ret
+ %splat = call <vscale x 1 x float> @llvm.experimental.vp.splat.nxv1f32(float %val, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x float> %splat
+}
+
+define <vscale x 2 x float> @vp_splat_nxv2f32(float %val, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_nxv2f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vfmv.v.f v8, fa0
+; CHECK-NEXT: ret
+ %splat = call <vscale x 2 x float> @llvm.experimental.vp.splat.nxv2f32(float %val, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x float> %splat
+}
+
+define <vscale x 4 x float> @vp_splat_nxv4f32(float %val, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_nxv4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vfmv.v.f v8, fa0
+; CHECK-NEXT: ret
+ %splat = call <vscale x 4 x float> @llvm.experimental.vp.splat.nxv4f32(float %val, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x float> %splat
+}
+
+define <vscale x 8 x float> @vp_splat_nxv8f32(float %val, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_nxv8f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vfmv.v.f v8, fa0
+; CHECK-NEXT: ret
+ %splat = call <vscale x 8 x float> @llvm.experimental.vp.splat.nxv8f32(float %val, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x float> %splat
+}
+
+define <vscale x 16 x float> @vp_splat_nxv16f32(float %val, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_nxv16f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vfmv.v.f v8, fa0
+; CHECK-NEXT: ret
+ %splat = call <vscale x 16 x float> @llvm.experimental.vp.splat.nxv16f32(float %val, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x float> %splat
+}
+
+define <vscale x 1 x double> @vp_splat_nxv1f64(double %val, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_nxv1f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vfmv.v.f v8, fa0
+; CHECK-NEXT: ret
+ %splat = call <vscale x 1 x double> @llvm.experimental.vp.splat.nxv1f64(double %val, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x double> %splat
+}
+
+define <vscale x 2 x double> @vp_splat_nxv2f64(double %val, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_nxv2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT: vfmv.v.f v8, fa0
+; CHECK-NEXT: ret
+ %splat = call <vscale x 2 x double> @llvm.experimental.vp.splat.nxv2f64(double %val, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x double> %splat
+}
+
+define <vscale x 4 x double> @vp_splat_nxv4f64(double %val, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_nxv4f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT: vfmv.v.f v8, fa0
+; CHECK-NEXT: ret
+ %splat = call <vscale x 4 x double> @llvm.experimental.vp.splat.nxv4f64(double %val, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x double> %splat
+}
+
+define <vscale x 8 x double> @vp_splat_nxv8f64(double %val, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_nxv8f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vfmv.v.f v8, fa0
+; CHECK-NEXT: ret
+ %splat = call <vscale x 8 x double> @llvm.experimental.vp.splat.nxv8f64(double %val, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x double> %splat
+}
+
+define <vscale x 16 x i31> @vp_splat_nxv16i31(i31 %val, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_nxv16i31:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: ret
+ %splat = call <vscale x 16 x i31> @llvm.experimental.vp.splat.nxv16i31(i31 %val, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i31> %splat
+}
+
+define <vscale x 15 x i32> @vp_splat_nxv15i32(i32 %val, <vscale x 15 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_nxv15i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: ret
+ %splat = call <vscale x 15 x i32> @llvm.experimental.vp.splat.nxv15i32(i32 %val, <vscale x 15 x i1> %m, i32 %evl)
+ ret <vscale x 15 x i32> %splat
+}
+
+; Split case.
+define <vscale x 32 x i32> @vp_splat_nxv32i32(i32 %val, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_nxv32i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 1
+; CHECK-NEXT: sub a3, a1, a2
+; CHECK-NEXT: sltu a4, a1, a3
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: and a3, a4, a3
+; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
+; CHECK-NEXT: vmv.v.x v16, a0
+; CHECK-NEXT: bltu a1, a2, .LBB39_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a1, a2
+; CHECK-NEXT: .LBB39_2:
+; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: ret
+ %splat = call <vscale x 32 x i32> @llvm.experimental.vp.splat.nxv32i32(i32 %val, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i32> %splat
+}
diff --git a/llvm/unittests/IR/VPIntrinsicTest.cpp b/llvm/unittests/IR/VPIntrinsicTest.cpp
index d6508abd5197e..eab2850ca4e1e 100644
--- a/llvm/unittests/IR/VPIntrinsicTest.cpp
+++ b/llvm/unittests/IR/VPIntrinsicTest.cpp
@@ -108,6 +108,8 @@ class VPIntrinsicTest : public testing::Test {
"addrspace(1)*, i32, <8 x i1>, i32) ";
Str << " declare <8 x i32> @llvm.vp.gather.v8i32.v8p0i32(<8 x i32*>, <8 x "
"i1>, i32) ";
+ Str << " declare <8 x i32> @llvm.experimental.vp.splat.v8i32(i32, <8 x "
+ "i1>, i32) ";
for (const char *ReductionOpcode : ReductionIntOpcodes)
Str << " declare i32 @llvm.vp.reduce." << ReductionOpcode
>From ddf92da37981463bd738be4d0d6d0dfe74aa0915 Mon Sep 17 00:00:00 2001
From: Yeting Kuo <yeting.kuo at sifive.com>
Date: Sun, 14 Jul 2024 19:32:01 -0700
Subject: [PATCH 2/3] fixup! code format.
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 9 ++++-----
1 file changed, 4 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index b51b821450e63..bedd60001a44f 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -11633,9 +11633,8 @@ RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op,
return convertFromScalableVector(VT, Result, DAG, Subtarget);
}
-SDValue
-RISCVTargetLowering::lowerVPSplatExperimental(SDValue Op,
- SelectionDAG &DAG) const {
+SDValue RISCVTargetLowering::lowerVPSplatExperimental(SDValue Op,
+ SelectionDAG &DAG) const {
SDLoc DL(Op);
SDValue Val = Op.getOperand(0);
SDValue Mask = Op.getOperand(1);
@@ -11649,8 +11648,8 @@ RISCVTargetLowering::lowerVPSplatExperimental(SDValue Op,
Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
}
- SDValue Result = lowerScalarSplat(SDValue(), Val, VL, ContainerVT, DL,
- DAG, Subtarget);
+ SDValue Result =
+ lowerScalarSplat(SDValue(), Val, VL, ContainerVT, DL, DAG, Subtarget);
if (!VT.isFixedLengthVector())
return Result;
>From 83e97d341043e86aa3c495207d80641d49d04d98 Mon Sep 17 00:00:00 2001
From: Yeting Kuo <yeting.kuo at sifive.com>
Date: Tue, 16 Jul 2024 02:06:26 -0700
Subject: [PATCH 3/3] Address comment.
---
llvm/docs/LangRef.rst | 14 ++++++++------
llvm/include/llvm/IR/VPIntrinsics.def | 2 +-
.../CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp | 13 ++++++-------
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 2 +-
4 files changed, 16 insertions(+), 15 deletions(-)
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 37d32f3be7a0a..828f5c5dee23f 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -22853,26 +22853,26 @@ This is an overloaded intrinsic.
::
- declare <2 x double> @llvm.experimental.vp.splat.v2f64(<2 x double> %vec, <2 x i1> %mask, i32 %evl)
- declare <vscale x 4 x i32> @llvm.experimental.vp.splat.nxv4i32(<vscale x 4 x i32> %vec, <vscale x 4 x i1> %mask, i32 %evl)
+ declare <2 x double> @llvm.experimental.vp.splat.v2f64(double %scalar, <2 x i1> %mask, i32 %evl)
+ declare <vscale x 4 x i32> @llvm.experimental.vp.splat.nxv4i32(i32 %scalar, <vscale x 4 x i1> %mask, i32 %evl)
Overview:
"""""""""
-The '``llvm.experimental.vp.splat.*``' intrinsic is to create a prdicated splat
+The '``llvm.experimental.vp.splat.*``' intrinsic is to create a predicated splat
with specific effective vector length.
Arguments:
""""""""""
-The result is a vector and it is a splat of the second scalar operand. The
+The result is a vector and it is a splat of the first scalar argument. The
second argument ``mask`` is a vector mask and has the same number of elements as
the result. The third argument is the explicit vector length of the operation.
Semantics:
""""""""""
-This intrinsic splats a vector with ``evl`` elements of a scalar operand.
+This intrinsic splats a vector with ``evl`` elements of a scalar argument.
The lanes in the result vector disabled by ``mask`` are ``poison``. The
elements past ``evl`` are poison.
@@ -22883,7 +22883,9 @@ Examples:
%r = call <4 x float> @llvm.vp.splat.v4f32(float %a, <4 x i1> %mask, i32 %evl)
;; For all lanes below %evl, %r is lane-wise equivalent to %also.r
- %also.r = select <4 x i1> %mask, <4 x float> splat(float %a), <4 x float> poison
+ %e = insertelement <4 x float> poison, float %a, i32 0
+ %s = shufflevector <4 x float> %e, <4 x float> poison, <4 x i32> zeroinitializer
+ %also.r = select <4 x i1> %mask, <4 x float> %s, <4 x float> poison
.. _int_experimental_vp_reverse:
diff --git a/llvm/include/llvm/IR/VPIntrinsics.def b/llvm/include/llvm/IR/VPIntrinsics.def
index 9d219e27b359e..a4a1000d37259 100644
--- a/llvm/include/llvm/IR/VPIntrinsics.def
+++ b/llvm/include/llvm/IR/VPIntrinsics.def
@@ -777,7 +777,7 @@ END_REGISTER_VP(experimental_vp_reverse, EXPERIMENTAL_VP_REVERSE)
///// } Shuffles
-// llvm.vp.splat(ptr,val,mask,vlen)
+// llvm.vp.splat(val,mask,vlen)
BEGIN_REGISTER_VP_INTRINSIC(experimental_vp_splat, 1, 2)
BEGIN_REGISTER_VP_SDNODE(EXPERIMENTAL_VP_SPLAT, -1, experimental_vp_splat, 1, 2)
VP_PROPERTY_NO_FUNCTIONAL
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index f21c2ba98e567..74a143f63dbbe 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -2215,8 +2215,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N,
SDValue DAGTypeLegalizer::PromoteIntOp_ScalarOp(SDNode *N) {
SDValue Op = GetPromotedInteger(N->getOperand(0));
if (N->getOpcode() == ISD::EXPERIMENTAL_VP_SPLAT)
- return DAG.getNode(ISD::EXPERIMENTAL_VP_SPLAT, SDLoc(N), N->getValueType(0),
- Op, N->getOperand(1), N->getOperand(2));
+ return SDValue(
+ DAG.UpdateNodeOperands(N, Op, N->getOperand(1), N->getOperand(2)), 0);
// Integer SPLAT_VECTOR/SCALAR_TO_VECTOR operands are implicitly truncated,
// so just promote the operand in place.
@@ -5866,11 +5866,10 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ScalarOp(SDNode *N) {
EVT NOutElemVT = NOutVT.getVectorElementType();
SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutElemVT, N->getOperand(0));
- if (N->isVPOpcode()) {
- SDValue Mask = N->getOperand(1);
- SDValue VL = N->getOperand(2);
- return DAG.getNode(N->getOpcode(), dl, NOutVT, Op, Mask, VL);
- }
+ if (N->isVPOpcode())
+ return DAG.getNode(N->getOpcode(), dl, NOutVT, Op, N->getOperand(1),
+ N->getOperand(2));
+
return DAG.getNode(N->getOpcode(), dl, NOutVT, Op);
}
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index bedd60001a44f..98d9b4286b708 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -11649,7 +11649,7 @@ SDValue RISCVTargetLowering::lowerVPSplatExperimental(SDValue Op,
}
SDValue Result =
- lowerScalarSplat(SDValue(), Val, VL, ContainerVT, DL, DAG, Subtarget);
+ lowerScalarSplat(SDValue(), Val, VL, ContainerVT, DL, DAG, Subtarget);
if (!VT.isFixedLengthVector())
return Result;
More information about the llvm-commits
mailing list