[llvm] [VP][RISCV][WIP] Add a vp.load.ff intrinsic for fault only first load. (PR #128593)
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 5 22:17:20 PST 2025
https://github.com/topperc updated https://github.com/llvm/llvm-project/pull/128593
>From b4e4666fe2a91fdae98cee86a550ef6818c10b4e Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Mon, 24 Feb 2025 13:05:35 -0800
Subject: [PATCH 1/7] [VP][RISCV][WIP] Add a vp.load.ff intrinsic for fault
only first load.
Seems there's been some interested in supporting early-exit loops
recently. https://discourse.llvm.org/t/rfc-supporting-more-early-exit-loops/84690
This patch was extracted from our downstream where we've been using
it in our vectorizer.
Still need to write up LangRef. Type legalization is also missing.
---
llvm/include/llvm/CodeGen/SelectionDAG.h | 2 +
llvm/include/llvm/CodeGen/SelectionDAGNodes.h | 17 +
llvm/include/llvm/IR/Intrinsics.td | 6 +
llvm/include/llvm/IR/VPIntrinsics.def | 6 +
.../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 28 +
.../SelectionDAG/SelectionDAGBuilder.cpp | 32 +
.../SelectionDAG/SelectionDAGBuilder.h | 2 +
llvm/lib/IR/IntrinsicInst.cpp | 5 +
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 58 +
llvm/lib/Target/RISCV/RISCVISelLowering.h | 1 +
.../RISCV/rvv/fixed-vectors-vploadff.ll | 633 +++++++++
llvm/test/CodeGen/RISCV/rvv/vploadff.ll | 1127 +++++++++++++++++
llvm/unittests/IR/VPIntrinsicTest.cpp | 2 +
13 files changed, 1919 insertions(+)
create mode 100644 llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vploadff.ll
create mode 100644 llvm/test/CodeGen/RISCV/rvv/vploadff.ll
diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h
index aa0dfbe666cde..b00b939ab2afc 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -1572,6 +1572,8 @@ class SelectionDAG {
SDValue getMaskedHistogram(SDVTList VTs, EVT MemVT, const SDLoc &dl,
ArrayRef<SDValue> Ops, MachineMemOperand *MMO,
ISD::MemIndexType IndexType);
+ SDValue getLoadFFVP(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr,
+ SDValue Mask, SDValue EVL, MachineMemOperand *MMO);
SDValue getGetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr, EVT MemVT,
MachineMemOperand *MMO);
diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
index 20283ad8f2689..007055d88424b 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -3057,6 +3057,23 @@ class MaskedHistogramSDNode : public MaskedGatherScatterSDNode {
}
};
+class VPLoadFFSDNode : public MemSDNode {
+public:
+ friend class SelectionDAG;
+
+ VPLoadFFSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, EVT MemVT,
+ MachineMemOperand *MMO)
+ : MemSDNode(ISD::VP_LOAD_FF, Order, dl, VTs, MemVT, MMO) {}
+
+ const SDValue &getBasePtr() const { return getOperand(1); }
+ const SDValue &getMask() const { return getOperand(2); }
+ const SDValue &getVectorLength() const { return getOperand(3); }
+
+ static bool classof(const SDNode *N) {
+ return N->getOpcode() == ISD::VP_LOAD_FF;
+ }
+};
+
class FPStateAccessSDNode : public MemSDNode {
public:
friend class SelectionDAG;
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 14ecae41ff08f..0c26c7bcfbad8 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -1911,6 +1911,12 @@ def int_vp_load : DefaultAttrsIntrinsic<[ llvm_anyvector_ty],
llvm_i32_ty],
[ NoCapture<ArgIndex<0>>, IntrNoSync, IntrReadMem, IntrWillReturn, IntrArgMemOnly ]>;
+def int_vp_load_ff : DefaultAttrsIntrinsic<[ llvm_anyvector_ty, llvm_i32_ty ],
+ [ llvm_anyptr_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty],
+ [ NoCapture<ArgIndex<0>>, IntrNoSync, IntrReadMem, IntrWillReturn, IntrArgMemOnly ]>;
+
def int_vp_gather: DefaultAttrsIntrinsic<[ llvm_anyvector_ty],
[ LLVMVectorOfAnyPointersToElt<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
diff --git a/llvm/include/llvm/IR/VPIntrinsics.def b/llvm/include/llvm/IR/VPIntrinsics.def
index 55f4719da7c8b..4a71097226f18 100644
--- a/llvm/include/llvm/IR/VPIntrinsics.def
+++ b/llvm/include/llvm/IR/VPIntrinsics.def
@@ -587,6 +587,12 @@ VP_PROPERTY_FUNCTIONAL_OPC(Load)
VP_PROPERTY_FUNCTIONAL_INTRINSIC(masked_load)
END_REGISTER_VP(vp_load, VP_LOAD)
+BEGIN_REGISTER_VP_INTRINSIC(vp_load_ff, 1, 2)
+// val,chain = VP_LOAD_FF chain,base,mask,evl
+BEGIN_REGISTER_VP_SDNODE(VP_LOAD_FF, -1, vp_load_ff, 2, 3)
+HELPER_MAP_VPID_TO_VPSD(vp_load_ff, VP_LOAD_FF)
+VP_PROPERTY_NO_FUNCTIONAL
+END_REGISTER_VP(vp_load_ff, VP_LOAD_FF)
// llvm.experimental.vp.strided.load(ptr,stride,mask,vlen)
BEGIN_REGISTER_VP_INTRINSIC(experimental_vp_strided_load, 2, 3)
// chain = EXPERIMENTAL_VP_STRIDED_LOAD chain,base,offset,stride,mask,evl
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 9e61df7047d4a..ff6ef4d02c520 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -10139,6 +10139,34 @@ SDValue SelectionDAG::getMaskedHistogram(SDVTList VTs, EVT MemVT,
return V;
}
+SDValue SelectionDAG::getLoadFFVP(EVT VT, const SDLoc &dl, SDValue Chain,
+ SDValue Ptr, SDValue Mask, SDValue EVL,
+ MachineMemOperand *MMO) {
+ SDVTList VTs = getVTList(VT, EVL.getValueType(), MVT::Other);
+ SDValue Ops[] = {Chain, Ptr, Mask, EVL};
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::VP_LOAD_FF, VTs, Ops);
+ ID.AddInteger(VT.getRawBits());
+ ID.AddInteger(getSyntheticNodeSubclassData<VPLoadFFSDNode>(dl.getIROrder(),
+ VTs, VT, MMO));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ ID.AddInteger(MMO->getFlags());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
+ cast<VPLoadFFSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ auto *N = newSDNode<VPLoadFFSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs,
+ VT, MMO);
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
SDValue SelectionDAG::getGetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr,
EVT MemVT, MachineMemOperand *MMO) {
assert(Chain.getValueType() == MVT::Other && "Invalid chain type");
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 1c58a7f05446c..a287bdeb1eb90 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -8462,6 +8462,35 @@ void SelectionDAGBuilder::visitVPLoad(
setValue(&VPIntrin, LD);
}
+void SelectionDAGBuilder::visitVPLoadFF(
+ const VPIntrinsic &VPIntrin, EVT VT, EVT EVLVT,
+ const SmallVectorImpl<SDValue> &OpValues) {
+ assert(OpValues.size() == 3);
+ SDLoc DL = getCurSDLoc();
+ Value *PtrOperand = VPIntrin.getArgOperand(0);
+ MaybeAlign Alignment = VPIntrin.getPointerAlignment();
+ AAMDNodes AAInfo = VPIntrin.getAAMetadata();
+ const MDNode *Ranges = VPIntrin.getMetadata(LLVMContext::MD_range);
+ SDValue LD;
+ bool AddToChain = true;
+ // Do not serialize variable-length loads of constant memory with
+ // anything.
+ if (!Alignment)
+ Alignment = DAG.getEVTAlign(VT);
+ MemoryLocation ML = MemoryLocation::getAfter(PtrOperand, AAInfo);
+ AddToChain = !BatchAA || !BatchAA->pointsToConstantMemory(ML);
+ SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad,
+ MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges);
+ LD = DAG.getLoadFFVP(VT, DL, InChain, OpValues[0], OpValues[1], OpValues[2],
+ MMO);
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, EVLVT, LD.getValue(1));
+ if (AddToChain)
+ PendingLoads.push_back(LD.getValue(2));
+ setValue(&VPIntrin, DAG.getMergeValues({LD.getValue(0), Trunc}, DL));
+}
+
void SelectionDAGBuilder::visitVPGather(
const VPIntrinsic &VPIntrin, EVT VT,
const SmallVectorImpl<SDValue> &OpValues) {
@@ -8695,6 +8724,9 @@ void SelectionDAGBuilder::visitVectorPredicationIntrinsic(
case ISD::VP_LOAD:
visitVPLoad(VPIntrin, ValueVTs[0], OpValues);
break;
+ case ISD::VP_LOAD_FF:
+ visitVPLoadFF(VPIntrin, ValueVTs[0], ValueVTs[1], OpValues);
+ break;
case ISD::VP_GATHER:
visitVPGather(VPIntrin, ValueVTs[0], OpValues);
break;
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 8496f8ae78ce6..b30695876828d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -632,6 +632,8 @@ class SelectionDAGBuilder {
void visitVectorExtractLastActive(const CallInst &I, unsigned Intrinsic);
void visitVPLoad(const VPIntrinsic &VPIntrin, EVT VT,
const SmallVectorImpl<SDValue> &OpValues);
+ void visitVPLoadFF(const VPIntrinsic &VPIntrin, EVT VT, EVT EVLVT,
+ const SmallVectorImpl<SDValue> &OpValues);
void visitVPStore(const VPIntrinsic &VPIntrin,
const SmallVectorImpl<SDValue> &OpValues);
void visitVPGather(const VPIntrinsic &VPIntrin, EVT VT,
diff --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp
index 256bce1abe71f..7ddea32f57f02 100644
--- a/llvm/lib/IR/IntrinsicInst.cpp
+++ b/llvm/lib/IR/IntrinsicInst.cpp
@@ -448,6 +448,7 @@ VPIntrinsic::getMemoryPointerParamPos(Intrinsic::ID VPID) {
case Intrinsic::experimental_vp_strided_store:
return 1;
case Intrinsic::vp_load:
+ case Intrinsic::vp_load_ff:
case Intrinsic::vp_gather:
case Intrinsic::experimental_vp_strided_load:
return 0;
@@ -671,6 +672,10 @@ Function *VPIntrinsic::getOrInsertDeclarationForParams(
VPFunc = Intrinsic::getOrInsertDeclaration(
M, VPID, {ReturnType, Params[0]->getType()});
break;
+ case Intrinsic::vp_load_ff:
+ VPFunc = Intrinsic::getOrInsertDeclaration(
+ M, VPID, {ReturnType->getStructElementType(0), Params[0]->getType()});
+ break;
case Intrinsic::experimental_vp_strided_load:
VPFunc = Intrinsic::getOrInsertDeclaration(
M, VPID, {ReturnType, Params[0]->getType(), Params[1]->getType()});
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 6076fe56416ad..3da92c0f43590 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -880,6 +880,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
{ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
VT, Custom);
+ setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR},
@@ -1031,6 +1032,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
{ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
VT, Custom);
+ setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::SELECT_CC, VT, Expand);
@@ -1101,6 +1103,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
ISD::VP_SCATTER},
VT, Custom);
+ setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
setOperationAction(ISD::FNEG, VT, Expand);
setOperationAction(ISD::FABS, VT, Expand);
@@ -1269,6 +1272,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
ISD::VP_SCATTER},
VT, Custom);
+ setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
setOperationAction({ISD::ADD, ISD::MUL, ISD::SUB, ISD::AND, ISD::OR,
ISD::XOR, ISD::SDIV, ISD::SREM, ISD::UDIV,
@@ -1357,6 +1361,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::VP_SCATTER, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
ISD::EXPERIMENTAL_VP_STRIDED_STORE},
VT, Custom);
+ setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
setOperationAction({ISD::STRICT_FP_ROUND, ISD::STRICT_FP_EXTEND}, VT,
@@ -7616,6 +7621,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
case ISD::MLOAD:
case ISD::VP_LOAD:
return lowerMaskedLoad(Op, DAG);
+ case ISD::VP_LOAD_FF:
+ return lowerLoadFF(Op, DAG);
case ISD::MSTORE:
case ISD::VP_STORE:
return lowerMaskedStore(Op, DAG);
@@ -11965,6 +11972,57 @@ SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
return DAG.getMergeValues({Result, Chain}, DL);
}
+SDValue RISCVTargetLowering::lowerLoadFF(SDValue Op, SelectionDAG &DAG) const {
+ assert(Op.getResNo() == 0);
+ SDLoc DL(Op);
+ MVT VT = Op.getSimpleValueType();
+
+ const auto *VPLoadFF = cast<VPLoadFFSDNode>(Op);
+ EVT MemVT = VPLoadFF->getMemoryVT();
+ MachineMemOperand *MMO = VPLoadFF->getMemOperand();
+ SDValue Chain = VPLoadFF->getChain();
+ SDValue BasePtr = VPLoadFF->getBasePtr();
+
+ SDValue Mask = VPLoadFF->getMask();
+ SDValue VL = VPLoadFF->getVectorLength();
+
+ bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
+
+ MVT XLenVT = Subtarget.getXLenVT();
+
+ MVT ContainerVT = VT;
+ if (VT.isFixedLengthVector()) {
+ ContainerVT = getContainerForFixedLengthVector(VT);
+ if (!IsUnmasked) {
+ MVT MaskVT = getMaskTypeFor(ContainerVT);
+ Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
+ }
+ }
+
+ unsigned IntID =
+ IsUnmasked ? Intrinsic::riscv_vleff : Intrinsic::riscv_vleff_mask;
+ SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
+ Ops.push_back(DAG.getUNDEF(ContainerVT));
+ Ops.push_back(BasePtr);
+ if (!IsUnmasked)
+ Ops.push_back(Mask);
+ Ops.push_back(VL);
+ if (!IsUnmasked)
+ Ops.push_back(DAG.getTargetConstant(RISCVVType::TAIL_AGNOSTIC, DL, XLenVT));
+
+ SDVTList VTs = DAG.getVTList({ContainerVT, Op->getValueType(1), MVT::Other});
+
+ SDValue Result =
+ DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
+ SDValue OutVL = Result.getValue(1);
+ Chain = Result.getValue(2);
+
+ if (VT.isFixedLengthVector())
+ Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
+
+ return DAG.getMergeValues({Result, OutVL, Chain}, DL);
+}
+
SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 26b888653c81d..8bba8c50ba862 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -991,6 +991,7 @@ class RISCVTargetLowering : public TargetLowering {
SDValue lowerVECTOR_SPLICE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerABS(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerMaskedLoad(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerLoadFF(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerMaskedStore(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVectorCompress(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFixedLengthVectorFCOPYSIGNToRVV(SDValue Op,
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vploadff.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vploadff.ll
new file mode 100644
index 0000000000000..9f982293256ac
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vploadff.ll
@@ -0,0 +1,633 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zvfbfmin,+v \
+; RUN: -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zvfbfmin,+v \
+; RUN: -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+zvfbfmin,+v \
+; RUN: -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+zvfbfmin,+v \
+; RUN: -verify-machineinstrs < %s | FileCheck %s
+
+declare { <2 x i8>, i32 } @llvm.vp.load.ff.v2i8.p0(ptr, <2 x i1>, i32)
+
+define { <2 x i8>, i32 } @vploadff_v2i8(ptr %ptr, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_v2i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT: vle8ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <2 x i8>, i32 } @llvm.vp.load.ff.v2i8.p0(ptr %ptr, <2 x i1> %m, i32 %evl)
+ ret { <2 x i8>, i32 } %load
+}
+
+define { <2 x i8>, i32 } @vploadff_v2i8_allones_mask(ptr %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_v2i8_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT: vle8ff.v v8, (a0)
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %a = insertelement <2 x i1> poison, i1 true, i32 0
+ %b = shufflevector <2 x i1> %a, <2 x i1> poison, <2 x i32> zeroinitializer
+ %load = call { <2 x i8>, i32 } @llvm.vp.load.ff.v2i8.p0(ptr %ptr, <2 x i1> %b, i32 %evl)
+ ret { <2 x i8>, i32 } %load
+}
+
+declare { <4 x i8>, i32 } @llvm.vp.load.ff.v4i8.p0(ptr, <4 x i1>, i32)
+
+define { <4 x i8>, i32 } @vploadff_v4i8(ptr %ptr, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_v4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-NEXT: vle8ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <4 x i8>, i32 } @llvm.vp.load.ff.v4i8.p0(ptr %ptr, <4 x i1> %m, i32 %evl)
+ ret { <4 x i8>, i32 } %load
+}
+
+define { <4 x i8>, i32 } @vploadff_v4i8_allones_mask(ptr %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_v4i8_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-NEXT: vle8ff.v v8, (a0)
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %a = insertelement <4 x i1> poison, i1 true, i32 0
+ %b = shufflevector <4 x i1> %a, <4 x i1> poison, <4 x i32> zeroinitializer
+ %load = call { <4 x i8>, i32 } @llvm.vp.load.ff.v4i8.p0(ptr %ptr, <4 x i1> %b, i32 %evl)
+ ret { <4 x i8>, i32 } %load
+}
+
+declare { <8 x i8>, i32 } @llvm.vp.load.ff.v8i8.p0(ptr, <8 x i1>, i32)
+
+define { <8 x i8>, i32 } @vploadff_v8i8(ptr %ptr, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_v8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT: vle8ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <8 x i8>, i32 } @llvm.vp.load.ff.v8i8.p0(ptr %ptr, <8 x i1> %m, i32 %evl)
+ ret { <8 x i8>, i32 } %load
+}
+
+define { <8 x i8>, i32 } @vploadff_v8i8_allones_mask(ptr %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_v8i8_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT: vle8ff.v v8, (a0)
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %a = insertelement <8 x i1> poison, i1 true, i32 0
+ %b = shufflevector <8 x i1> %a, <8 x i1> poison, <8 x i32> zeroinitializer
+ %load = call { <8 x i8>, i32 } @llvm.vp.load.ff.v8i8.p0(ptr %ptr, <8 x i1> %b, i32 %evl)
+ ret { <8 x i8>, i32 } %load
+}
+
+declare { <2 x i16>, i32 } @llvm.vp.load.ff.v2i16.p0(ptr, <2 x i1>, i32)
+
+define { <2 x i16>, i32 } @vploadff_v2i16(ptr %ptr, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_v2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-NEXT: vle16ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <2 x i16>, i32 } @llvm.vp.load.ff.v2i16.p0(ptr %ptr, <2 x i1> %m, i32 %evl)
+ ret { <2 x i16>, i32 } %load
+}
+
+define { <2 x i16>, i32 } @vploadff_v2i16_allones_mask(ptr %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_v2i16_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-NEXT: vle16ff.v v8, (a0)
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %a = insertelement <2 x i1> poison, i1 true, i32 0
+ %b = shufflevector <2 x i1> %a, <2 x i1> poison, <2 x i32> zeroinitializer
+ %load = call { <2 x i16>, i32 } @llvm.vp.load.ff.v2i16.p0(ptr %ptr, <2 x i1> %b, i32 %evl)
+ ret { <2 x i16>, i32 } %load
+}
+
+declare { <4 x i16>, i32 } @llvm.vp.load.ff.v4i16.p0(ptr, <4 x i1>, i32)
+
+define { <4 x i16>, i32 } @vploadff_v4i16(ptr %ptr, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_v4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-NEXT: vle16ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <4 x i16>, i32 } @llvm.vp.load.ff.v4i16.p0(ptr %ptr, <4 x i1> %m, i32 %evl)
+ ret { <4 x i16>, i32 } %load
+}
+
+define { <4 x i16>, i32 } @vploadff_v4i16_allones_mask(ptr %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_v4i16_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-NEXT: vle16ff.v v8, (a0)
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %a = insertelement <4 x i1> poison, i1 true, i32 0
+ %b = shufflevector <4 x i1> %a, <4 x i1> poison, <4 x i32> zeroinitializer
+ %load = call { <4 x i16>, i32 } @llvm.vp.load.ff.v4i16.p0(ptr %ptr, <4 x i1> %b, i32 %evl)
+ ret { <4 x i16>, i32 } %load
+}
+
+declare { <8 x i16>, i32 } @llvm.vp.load.ff.v8i16.p0(ptr, <8 x i1>, i32)
+
+define { <8 x i16>, i32 } @vploadff_v8i16(ptr %ptr, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT: vle16ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <8 x i16>, i32 } @llvm.vp.load.ff.v8i16.p0(ptr %ptr, <8 x i1> %m, i32 %evl)
+ ret { <8 x i16>, i32 } %load
+}
+
+define { <8 x i16>, i32 } @vploadff_v8i16_allones_mask(ptr %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_v8i16_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT: vle16ff.v v8, (a0)
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %a = insertelement <8 x i1> poison, i1 true, i32 0
+ %b = shufflevector <8 x i1> %a, <8 x i1> poison, <8 x i32> zeroinitializer
+ %load = call { <8 x i16>, i32 } @llvm.vp.load.ff.v8i16.p0(ptr %ptr, <8 x i1> %b, i32 %evl)
+ ret { <8 x i16>, i32 } %load
+}
+
+declare { <2 x i32>, i32 } @llvm.vp.load.ff.v2i32.p0(ptr, <2 x i1>, i32)
+
+define { <2 x i32>, i32 } @vploadff_v2i32(ptr %ptr, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_v2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-NEXT: vle32ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <2 x i32>, i32 } @llvm.vp.load.ff.v2i32.p0(ptr %ptr, <2 x i1> %m, i32 %evl)
+ ret { <2 x i32>, i32 } %load
+}
+
+define { <2 x i32>, i32 } @vploadff_v2i32_allones_mask(ptr %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_v2i32_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-NEXT: vle32ff.v v8, (a0)
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %a = insertelement <2 x i1> poison, i1 true, i32 0
+ %b = shufflevector <2 x i1> %a, <2 x i1> poison, <2 x i32> zeroinitializer
+ %load = call { <2 x i32>, i32 } @llvm.vp.load.ff.v2i32.p0(ptr %ptr, <2 x i1> %b, i32 %evl)
+ ret { <2 x i32>, i32 } %load
+}
+
+declare { <4 x i32>, i32 } @llvm.vp.load.ff.v4i32.p0(ptr, <4 x i1>, i32)
+
+define { <4 x i32>, i32 } @vploadff_v4i32(ptr %ptr, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT: vle32ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <4 x i32>, i32 } @llvm.vp.load.ff.v4i32.p0(ptr %ptr, <4 x i1> %m, i32 %evl)
+ ret { <4 x i32>, i32 } %load
+}
+
+define { <4 x i32>, i32 } @vploadff_v4i32_allones_mask(ptr %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_v4i32_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT: vle32ff.v v8, (a0)
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %a = insertelement <4 x i1> poison, i1 true, i32 0
+ %b = shufflevector <4 x i1> %a, <4 x i1> poison, <4 x i32> zeroinitializer
+ %load = call { <4 x i32>, i32 } @llvm.vp.load.ff.v4i32.p0(ptr %ptr, <4 x i1> %b, i32 %evl)
+ ret { <4 x i32>, i32 } %load
+}
+
+declare { <8 x i32>, i32 } @llvm.vp.load.ff.v8i32.p0(ptr, <8 x i1>, i32)
+
+define { <8 x i32>, i32 } @vploadff_v8i32(ptr %ptr, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT: vle32ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <8 x i32>, i32 } @llvm.vp.load.ff.v8i32.p0(ptr %ptr, <8 x i1> %m, i32 %evl)
+ ret { <8 x i32>, i32 } %load
+}
+
+define { <8 x i32>, i32 } @vploadff_v8i32_allones_mask(ptr %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_v8i32_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT: vle32ff.v v8, (a0)
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %a = insertelement <8 x i1> poison, i1 true, i32 0
+ %b = shufflevector <8 x i1> %a, <8 x i1> poison, <8 x i32> zeroinitializer
+ %load = call { <8 x i32>, i32 } @llvm.vp.load.ff.v8i32.p0(ptr %ptr, <8 x i1> %b, i32 %evl)
+ ret { <8 x i32>, i32 } %load
+}
+
+declare { <2 x i64>, i32 } @llvm.vp.load.ff.v2i64.p0(ptr, <2 x i1>, i32)
+
+define { <2 x i64>, i32 } @vploadff_v2i64(ptr %ptr, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_v2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-NEXT: vle64ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <2 x i64>, i32 } @llvm.vp.load.ff.v2i64.p0(ptr %ptr, <2 x i1> %m, i32 %evl)
+ ret { <2 x i64>, i32 } %load
+}
+
+define { <2 x i64>, i32 } @vploadff_v2i64_allones_mask(ptr %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_v2i64_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-NEXT: vle64ff.v v8, (a0)
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %a = insertelement <2 x i1> poison, i1 true, i32 0
+ %b = shufflevector <2 x i1> %a, <2 x i1> poison, <2 x i32> zeroinitializer
+ %load = call { <2 x i64>, i32 } @llvm.vp.load.ff.v2i64.p0(ptr %ptr, <2 x i1> %b, i32 %evl)
+ ret { <2 x i64>, i32 } %load
+}
+
+declare { <4 x i64>, i32 } @llvm.vp.load.ff.v4i64.p0(ptr, <4 x i1>, i32)
+
+define { <4 x i64>, i32 } @vploadff_v4i64(ptr %ptr, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_v4i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-NEXT: vle64ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <4 x i64>, i32 } @llvm.vp.load.ff.v4i64.p0(ptr %ptr, <4 x i1> %m, i32 %evl)
+ ret { <4 x i64>, i32 } %load
+}
+
+define { <4 x i64>, i32 } @vploadff_v4i64_allones_mask(ptr %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_v4i64_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-NEXT: vle64ff.v v8, (a0)
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %a = insertelement <4 x i1> poison, i1 true, i32 0
+ %b = shufflevector <4 x i1> %a, <4 x i1> poison, <4 x i32> zeroinitializer
+ %load = call { <4 x i64>, i32 } @llvm.vp.load.ff.v4i64.p0(ptr %ptr, <4 x i1> %b, i32 %evl)
+ ret { <4 x i64>, i32 } %load
+}
+
+declare { <8 x i64>, i32 } @llvm.vp.load.ff.v8i64.p0(ptr, <8 x i1>, i32)
+
+define { <8 x i64>, i32 } @vploadff_v8i64(ptr %ptr, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_v8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-NEXT: vle64ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <8 x i64>, i32 } @llvm.vp.load.ff.v8i64.p0(ptr %ptr, <8 x i1> %m, i32 %evl)
+ ret { <8 x i64>, i32 } %load
+}
+
+define { <8 x i64>, i32 } @vploadff_v8i64_allones_mask(ptr %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_v8i64_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-NEXT: vle64ff.v v8, (a0)
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %a = insertelement <8 x i1> poison, i1 true, i32 0
+ %b = shufflevector <8 x i1> %a, <8 x i1> poison, <8 x i32> zeroinitializer
+ %load = call { <8 x i64>, i32 } @llvm.vp.load.ff.v8i64.p0(ptr %ptr, <8 x i1> %b, i32 %evl)
+ ret { <8 x i64>, i32 } %load
+}
+
+declare { <2 x half>, i32 } @llvm.vp.load.ff.v2f16.p0(ptr, <2 x i1>, i32)
+
+define { <2 x half>, i32 } @vploadff_v2f16(ptr %ptr, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_v2f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-NEXT: vle16ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <2 x half>, i32 } @llvm.vp.load.ff.v2f16.p0(ptr %ptr, <2 x i1> %m, i32 %evl)
+ ret { <2 x half>, i32 } %load
+}
+
+define { <2 x half>, i32 } @vploadff_v2f16_allones_mask(ptr %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_v2f16_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-NEXT: vle16ff.v v8, (a0)
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %a = insertelement <2 x i1> poison, i1 true, i32 0
+ %b = shufflevector <2 x i1> %a, <2 x i1> poison, <2 x i32> zeroinitializer
+ %load = call { <2 x half>, i32 } @llvm.vp.load.ff.v2f16.p0(ptr %ptr, <2 x i1> %b, i32 %evl)
+ ret { <2 x half>, i32 } %load
+}
+
+declare { <4 x half>, i32 } @llvm.vp.load.ff.v4f16.p0(ptr, <4 x i1>, i32)
+
+define { <4 x half>, i32 } @vploadff_v4f16(ptr %ptr, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_v4f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-NEXT: vle16ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <4 x half>, i32 } @llvm.vp.load.ff.v4f16.p0(ptr %ptr, <4 x i1> %m, i32 %evl)
+ ret { <4 x half>, i32 } %load
+}
+
+define { <4 x half>, i32 } @vploadff_v4f16_allones_mask(ptr %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_v4f16_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-NEXT: vle16ff.v v8, (a0)
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %a = insertelement <4 x i1> poison, i1 true, i32 0
+ %b = shufflevector <4 x i1> %a, <4 x i1> poison, <4 x i32> zeroinitializer
+ %load = call { <4 x half>, i32 } @llvm.vp.load.ff.v4f16.p0(ptr %ptr, <4 x i1> %b, i32 %evl)
+ ret { <4 x half>, i32 } %load
+}
+
+declare { <8 x half>, i32 } @llvm.vp.load.ff.v8f16.p0(ptr, <8 x i1>, i32)
+
+define { <8 x half>, i32 } @vploadff_v8f16(ptr %ptr, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_v8f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT: vle16ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <8 x half>, i32 } @llvm.vp.load.ff.v8f16.p0(ptr %ptr, <8 x i1> %m, i32 %evl)
+ ret { <8 x half>, i32 } %load
+}
+
+define { <8 x half>, i32 } @vploadff_v8f16_allones_mask(ptr %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_v8f16_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT: vle16ff.v v8, (a0)
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %a = insertelement <8 x i1> poison, i1 true, i32 0
+ %b = shufflevector <8 x i1> %a, <8 x i1> poison, <8 x i32> zeroinitializer
+ %load = call { <8 x half>, i32 } @llvm.vp.load.ff.v8f16.p0(ptr %ptr, <8 x i1> %b, i32 %evl)
+ ret { <8 x half>, i32 } %load
+}
+
+declare { <2 x float>, i32 } @llvm.vp.load.ff.v2f32.p0(ptr, <2 x i1>, i32)
+
+define { <2 x float>, i32 } @vploadff_v2f32(ptr %ptr, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_v2f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-NEXT: vle32ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <2 x float>, i32 } @llvm.vp.load.ff.v2f32.p0(ptr %ptr, <2 x i1> %m, i32 %evl)
+ ret { <2 x float>, i32 } %load
+}
+
+define { <2 x float>, i32 } @vploadff_v2f32_allones_mask(ptr %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_v2f32_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-NEXT: vle32ff.v v8, (a0)
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %a = insertelement <2 x i1> poison, i1 true, i32 0
+ %b = shufflevector <2 x i1> %a, <2 x i1> poison, <2 x i32> zeroinitializer
+ %load = call { <2 x float>, i32 } @llvm.vp.load.ff.v2f32.p0(ptr %ptr, <2 x i1> %b, i32 %evl)
+ ret { <2 x float>, i32 } %load
+}
+
+declare { <4 x float>, i32 } @llvm.vp.load.ff.v4f32.p0(ptr, <4 x i1>, i32)
+
+define { <4 x float>, i32 } @vploadff_v4f32(ptr %ptr, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_v4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT: vle32ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <4 x float>, i32 } @llvm.vp.load.ff.v4f32.p0(ptr %ptr, <4 x i1> %m, i32 %evl)
+ ret { <4 x float>, i32 } %load
+}
+
+define { <4 x float>, i32 } @vploadff_v4f32_allones_mask(ptr %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_v4f32_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT: vle32ff.v v8, (a0)
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %a = insertelement <4 x i1> poison, i1 true, i32 0
+ %b = shufflevector <4 x i1> %a, <4 x i1> poison, <4 x i32> zeroinitializer
+ %load = call { <4 x float>, i32 } @llvm.vp.load.ff.v4f32.p0(ptr %ptr, <4 x i1> %b, i32 %evl)
+ ret { <4 x float>, i32 } %load
+}
+
+declare { <8 x float>, i32 } @llvm.vp.load.ff.v8f32.p0(ptr, <8 x i1>, i32)
+
+define { <8 x float>, i32 } @vploadff_v8f32(ptr %ptr, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_v8f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT: vle32ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <8 x float>, i32 } @llvm.vp.load.ff.v8f32.p0(ptr %ptr, <8 x i1> %m, i32 %evl)
+ ret { <8 x float>, i32 } %load
+}
+
+define { <8 x float>, i32 } @vploadff_v8f32_allones_mask(ptr %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_v8f32_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT: vle32ff.v v8, (a0)
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %a = insertelement <8 x i1> poison, i1 true, i32 0
+ %b = shufflevector <8 x i1> %a, <8 x i1> poison, <8 x i32> zeroinitializer
+ %load = call { <8 x float>, i32 } @llvm.vp.load.ff.v8f32.p0(ptr %ptr, <8 x i1> %b, i32 %evl)
+ ret { <8 x float>, i32 } %load
+}
+
+declare { <2 x double>, i32 } @llvm.vp.load.ff.v2f64.p0(ptr, <2 x i1>, i32)
+
+define { <2 x double>, i32 } @vploadff_v2f64(ptr %ptr, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_v2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-NEXT: vle64ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <2 x double>, i32 } @llvm.vp.load.ff.v2f64.p0(ptr %ptr, <2 x i1> %m, i32 %evl)
+ ret { <2 x double>, i32 } %load
+}
+
+define { <2 x double>, i32 } @vploadff_v2f64_allones_mask(ptr %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_v2f64_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-NEXT: vle64ff.v v8, (a0)
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %a = insertelement <2 x i1> poison, i1 true, i32 0
+ %b = shufflevector <2 x i1> %a, <2 x i1> poison, <2 x i32> zeroinitializer
+ %load = call { <2 x double>, i32 } @llvm.vp.load.ff.v2f64.p0(ptr %ptr, <2 x i1> %b, i32 %evl)
+ ret { <2 x double>, i32 } %load
+}
+
+declare { <4 x double>, i32 } @llvm.vp.load.ff.v4f64.p0(ptr, <4 x i1>, i32)
+
+define { <4 x double>, i32 } @vploadff_v4f64(ptr %ptr, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_v4f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-NEXT: vle64ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <4 x double>, i32 } @llvm.vp.load.ff.v4f64.p0(ptr %ptr, <4 x i1> %m, i32 %evl)
+ ret { <4 x double>, i32 } %load
+}
+
+define { <4 x double>, i32 } @vploadff_v4f64_allones_mask(ptr %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_v4f64_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-NEXT: vle64ff.v v8, (a0)
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %a = insertelement <4 x i1> poison, i1 true, i32 0
+ %b = shufflevector <4 x i1> %a, <4 x i1> poison, <4 x i32> zeroinitializer
+ %load = call { <4 x double>, i32 } @llvm.vp.load.ff.v4f64.p0(ptr %ptr, <4 x i1> %b, i32 %evl)
+ ret { <4 x double>, i32 } %load
+}
+
+declare { <8 x double>, i32 } @llvm.vp.load.ff.v8f64.p0(ptr, <8 x i1>, i32)
+
+define { <8 x double>, i32 } @vploadff_v8f64(ptr %ptr, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_v8f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-NEXT: vle64ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <8 x double>, i32 } @llvm.vp.load.ff.v8f64.p0(ptr %ptr, <8 x i1> %m, i32 %evl)
+ ret { <8 x double>, i32 } %load
+}
+
+define { <8 x double>, i32 } @vploadff_v8f64_allones_mask(ptr %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_v8f64_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-NEXT: vle64ff.v v8, (a0)
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %a = insertelement <8 x i1> poison, i1 true, i32 0
+ %b = shufflevector <8 x i1> %a, <8 x i1> poison, <8 x i32> zeroinitializer
+ %load = call { <8 x double>, i32 } @llvm.vp.load.ff.v8f64.p0(ptr %ptr, <8 x i1> %b, i32 %evl)
+ ret { <8 x double>, i32 } %load
+}
+
+declare { <2 x bfloat>, i32 } @llvm.vp.load.ff.v2bf16.p0(ptr, <2 x i1>, i32)
+
+define { <2 x bfloat>, i32 } @vploadff_v2bf16(ptr %ptr, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_v2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-NEXT: vle16ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <2 x bfloat>, i32 } @llvm.vp.load.ff.v2bf16.p0(ptr %ptr, <2 x i1> %m, i32 %evl)
+ ret { <2 x bfloat>, i32 } %load
+}
+
+define { <2 x bfloat>, i32 } @vploadff_v2bf16_allones_mask(ptr %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_v2bf16_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-NEXT: vle16ff.v v8, (a0)
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %a = insertelement <2 x i1> poison, i1 true, i32 0
+ %b = shufflevector <2 x i1> %a, <2 x i1> poison, <2 x i32> zeroinitializer
+ %load = call { <2 x bfloat>, i32 } @llvm.vp.load.ff.v2bf16.p0(ptr %ptr, <2 x i1> %b, i32 %evl)
+ ret { <2 x bfloat>, i32 } %load
+}
+
+declare { <4 x bfloat>, i32 } @llvm.vp.load.ff.v4bf16.p0(ptr, <4 x i1>, i32)
+
+define { <4 x bfloat>, i32 } @vploadff_v4bf16(ptr %ptr, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_v4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-NEXT: vle16ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <4 x bfloat>, i32 } @llvm.vp.load.ff.v4bf16.p0(ptr %ptr, <4 x i1> %m, i32 %evl)
+ ret { <4 x bfloat>, i32 } %load
+}
+
+define { <4 x bfloat>, i32 } @vploadff_v4bf16_allones_mask(ptr %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_v4bf16_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-NEXT: vle16ff.v v8, (a0)
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %a = insertelement <4 x i1> poison, i1 true, i32 0
+ %b = shufflevector <4 x i1> %a, <4 x i1> poison, <4 x i32> zeroinitializer
+ %load = call { <4 x bfloat>, i32 } @llvm.vp.load.ff.v4bf16.p0(ptr %ptr, <4 x i1> %b, i32 %evl)
+ ret { <4 x bfloat>, i32 } %load
+}
+
+declare { <8 x bfloat>, i32 } @llvm.vp.load.ff.v8bf16.p0(ptr, <8 x i1>, i32)
+
+define { <8 x bfloat>, i32 } @vploadff_v8bf16(ptr %ptr, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_v8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT: vle16ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <8 x bfloat>, i32 } @llvm.vp.load.ff.v8bf16.p0(ptr %ptr, <8 x i1> %m, i32 %evl)
+ ret { <8 x bfloat>, i32 } %load
+}
+
+define { <8 x bfloat>, i32 } @vploadff_v8bf16_allones_mask(ptr %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_v8bf16_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT: vle16ff.v v8, (a0)
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %a = insertelement <8 x i1> poison, i1 true, i32 0
+ %b = shufflevector <8 x i1> %a, <8 x i1> poison, <8 x i32> zeroinitializer
+ %load = call { <8 x bfloat>, i32 } @llvm.vp.load.ff.v8bf16.p0(ptr %ptr, <8 x i1> %b, i32 %evl)
+ ret { <8 x bfloat>, i32 } %load
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vploadff.ll b/llvm/test/CodeGen/RISCV/rvv/vploadff.ll
new file mode 100644
index 0000000000000..11812eec6ac46
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vploadff.ll
@@ -0,0 +1,1127 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zvfbfmin,+v \
+; RUN: -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zvfbfmin,+v \
+; RUN: -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+zvfbfmin,+v \
+; RUN: -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+zvfbfmin,+v \
+; RUN: -verify-machineinstrs < %s | FileCheck %s
+
+declare { <vscale x 1 x i8>, i32 } @llvm.vp.load.ff.nxv1i8.p0(ptr, <vscale x 1 x i1>, i32)
+
+define { <vscale x 1 x i8>, i32 } @vploadff_nxv1i8(ptr %ptr, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv1i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT: vle8ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <vscale x 1 x i8>, i32 } @llvm.vp.load.ff.nxv1i8.p0(ptr %ptr, <vscale x 1 x i1> %m, i32 %evl)
+ ret { <vscale x 1 x i8>, i32 } %load
+}
+
+define { <vscale x 1 x i8>, i32 } @vploadff_nxv1i8_allones_mask(ptr %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv1i8_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT: vle8ff.v v8, (a0)
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %a = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+ %b = shufflevector <vscale x 1 x i1> %a, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %load = call { <vscale x 1 x i8>, i32 } @llvm.vp.load.ff.nxv1i8.p0(ptr %ptr, <vscale x 1 x i1> %b, i32 %evl)
+ ret { <vscale x 1 x i8>, i32 } %load
+}
+
+declare { <vscale x 2 x i8>, i32 } @llvm.vp.load.ff.nxv2i8.p0(ptr, <vscale x 2 x i1>, i32)
+
+define { <vscale x 2 x i8>, i32 } @vploadff_nxv2i8(ptr %ptr, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv2i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-NEXT: vle8ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <vscale x 2 x i8>, i32 } @llvm.vp.load.ff.nxv2i8.p0(ptr %ptr, <vscale x 2 x i1> %m, i32 %evl)
+ ret { <vscale x 2 x i8>, i32 } %load
+}
+
+define { <vscale x 2 x i8>, i32 } @vploadff_nxv2i8_allones_mask(ptr %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv2i8_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-NEXT: vle8ff.v v8, (a0)
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %a = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+ %b = shufflevector <vscale x 2 x i1> %a, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+ %load = call { <vscale x 2 x i8>, i32 } @llvm.vp.load.ff.nxv2i8.p0(ptr %ptr, <vscale x 2 x i1> %b, i32 %evl)
+ ret { <vscale x 2 x i8>, i32 } %load
+}
+
+declare { <vscale x 4 x i8>, i32 } @llvm.vp.load.ff.nxv4i8.p0(ptr, <vscale x 4 x i1>, i32)
+
+define { <vscale x 4 x i8>, i32 } @vploadff_nxv4i8(ptr %ptr, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT: vle8ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <vscale x 4 x i8>, i32 } @llvm.vp.load.ff.nxv4i8.p0(ptr %ptr, <vscale x 4 x i1> %m, i32 %evl)
+ ret { <vscale x 4 x i8>, i32 } %load
+}
+
+define { <vscale x 4 x i8>, i32 } @vploadff_nxv4i8_allones_mask(ptr %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv4i8_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT: vle8ff.v v8, (a0)
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %a = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+ %b = shufflevector <vscale x 4 x i1> %a, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+ %load = call { <vscale x 4 x i8>, i32 } @llvm.vp.load.ff.nxv4i8.p0(ptr %ptr, <vscale x 4 x i1> %b, i32 %evl)
+ ret { <vscale x 4 x i8>, i32 } %load
+}
+
+declare { <vscale x 8 x i8>, i32 } @llvm.vp.load.ff.nxv8i8.p0(ptr, <vscale x 8 x i1>, i32)
+
+define { <vscale x 8 x i8>, i32 } @vploadff_nxv8i8(ptr %ptr, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT: vle8ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <vscale x 8 x i8>, i32 } @llvm.vp.load.ff.nxv8i8.p0(ptr %ptr, <vscale x 8 x i1> %m, i32 %evl)
+ ret { <vscale x 8 x i8>, i32 } %load
+}
+
+define { <vscale x 8 x i8>, i32 } @vploadff_nxv8i8_allones_mask(ptr %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv8i8_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT: vle8ff.v v8, (a0)
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %a = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+ %b = shufflevector <vscale x 8 x i1> %a, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %load = call { <vscale x 8 x i8>, i32 } @llvm.vp.load.ff.nxv8i8.p0(ptr %ptr, <vscale x 8 x i1> %b, i32 %evl)
+ ret { <vscale x 8 x i8>, i32 } %load
+}
+
+declare { <vscale x 16 x i8>, i32 } @llvm.vp.load.ff.nxv16i8.p0(ptr, <vscale x 16 x i1>, i32)
+
+define { <vscale x 16 x i8>, i32 } @vploadff_nxv16i8(ptr %ptr, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-NEXT: vle8ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <vscale x 16 x i8>, i32 } @llvm.vp.load.ff.nxv16i8.p0(ptr %ptr, <vscale x 16 x i1> %m, i32 %evl)
+ ret { <vscale x 16 x i8>, i32 } %load
+}
+
+define { <vscale x 16 x i8>, i32 } @vploadff_nxv16i8_allones_mask(ptr %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv16i8_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-NEXT: vle8ff.v v8, (a0)
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %a = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+ %b = shufflevector <vscale x 16 x i1> %a, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+ %load = call { <vscale x 16 x i8>, i32 } @llvm.vp.load.ff.nxv16i8.p0(ptr %ptr, <vscale x 16 x i1> %b, i32 %evl)
+ ret { <vscale x 16 x i8>, i32 } %load
+}
+
+declare { <vscale x 32 x i8>, i32 } @llvm.vp.load.ff.nxv32i8.p0(ptr, <vscale x 32 x i1>, i32)
+
+define { <vscale x 32 x i8>, i32 } @vploadff_nxv32i8(ptr %ptr, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv32i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-NEXT: vle8ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <vscale x 32 x i8>, i32 } @llvm.vp.load.ff.nxv32i8.p0(ptr %ptr, <vscale x 32 x i1> %m, i32 %evl)
+ ret { <vscale x 32 x i8>, i32 } %load
+}
+
+define { <vscale x 32 x i8>, i32 } @vploadff_nxv32i8_allones_mask(ptr %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv32i8_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-NEXT: vle8ff.v v8, (a0)
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %a = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
+ %b = shufflevector <vscale x 32 x i1> %a, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
+ %load = call { <vscale x 32 x i8>, i32 } @llvm.vp.load.ff.nxv32i8.p0(ptr %ptr, <vscale x 32 x i1> %b, i32 %evl)
+ ret { <vscale x 32 x i8>, i32 } %load
+}
+
+declare { <vscale x 64 x i8>, i32 } @llvm.vp.load.ff.nxv64i8.p0(ptr, <vscale x 64 x i1>, i32)
+
+define { <vscale x 64 x i8>, i32 } @vploadff_nxv64i8(ptr %ptr, <vscale x 64 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv64i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT: vle8ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <vscale x 64 x i8>, i32 } @llvm.vp.load.ff.nxv64i8.p0(ptr %ptr, <vscale x 64 x i1> %m, i32 %evl)
+ ret { <vscale x 64 x i8>, i32 } %load
+}
+
+define { <vscale x 64 x i8>, i32 } @vploadff_nxv64i8_allones_mask(ptr %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv64i8_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT: vle8ff.v v8, (a0)
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %a = insertelement <vscale x 64 x i1> poison, i1 true, i32 0
+ %b = shufflevector <vscale x 64 x i1> %a, <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer
+ %load = call { <vscale x 64 x i8>, i32 } @llvm.vp.load.ff.nxv64i8.p0(ptr %ptr, <vscale x 64 x i1> %b, i32 %evl)
+ ret { <vscale x 64 x i8>, i32 } %load
+}
+
+declare { <vscale x 1 x i16>, i32 } @llvm.vp.load.ff.nxv1i16.p0(ptr, <vscale x 1 x i1>, i32)
+
+define { <vscale x 1 x i16>, i32 } @vploadff_nxv1i16(ptr %ptr, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv1i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-NEXT: vle16ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <vscale x 1 x i16>, i32 } @llvm.vp.load.ff.nxv1i16.p0(ptr %ptr, <vscale x 1 x i1> %m, i32 %evl)
+ ret { <vscale x 1 x i16>, i32 } %load
+}
+
+define { <vscale x 1 x i16>, i32 } @vploadff_nxv1i16_allones_mask(ptr %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv1i16_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-NEXT: vle16ff.v v8, (a0)
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %a = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+ %b = shufflevector <vscale x 1 x i1> %a, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %load = call { <vscale x 1 x i16>, i32 } @llvm.vp.load.ff.nxv1i16.p0(ptr %ptr, <vscale x 1 x i1> %b, i32 %evl)
+ ret { <vscale x 1 x i16>, i32 } %load
+}
+
+declare { <vscale x 2 x i16>, i32 } @llvm.vp.load.ff.nxv2i16.p0(ptr, <vscale x 2 x i1>, i32)
+
+define { <vscale x 2 x i16>, i32 } @vploadff_nxv2i16(ptr %ptr, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-NEXT: vle16ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <vscale x 2 x i16>, i32 } @llvm.vp.load.ff.nxv2i16.p0(ptr %ptr, <vscale x 2 x i1> %m, i32 %evl)
+ ret { <vscale x 2 x i16>, i32 } %load
+}
+
+define { <vscale x 2 x i16>, i32 } @vploadff_nxv2i16_allones_mask(ptr %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv2i16_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-NEXT: vle16ff.v v8, (a0)
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %a = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+ %b = shufflevector <vscale x 2 x i1> %a, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+ %load = call { <vscale x 2 x i16>, i32 } @llvm.vp.load.ff.nxv2i16.p0(ptr %ptr, <vscale x 2 x i1> %b, i32 %evl)
+ ret { <vscale x 2 x i16>, i32 } %load
+}
+
+declare { <vscale x 4 x i16>, i32 } @llvm.vp.load.ff.nxv4i16.p0(ptr, <vscale x 4 x i1>, i32)
+
+define { <vscale x 4 x i16>, i32 } @vploadff_nxv4i16(ptr %ptr, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT: vle16ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <vscale x 4 x i16>, i32 } @llvm.vp.load.ff.nxv4i16.p0(ptr %ptr, <vscale x 4 x i1> %m, i32 %evl)
+ ret { <vscale x 4 x i16>, i32 } %load
+}
+
+define { <vscale x 4 x i16>, i32 } @vploadff_nxv4i16_allones_mask(ptr %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv4i16_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT: vle16ff.v v8, (a0)
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %a = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+ %b = shufflevector <vscale x 4 x i1> %a, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+ %load = call { <vscale x 4 x i16>, i32 } @llvm.vp.load.ff.nxv4i16.p0(ptr %ptr, <vscale x 4 x i1> %b, i32 %evl)
+ ret { <vscale x 4 x i16>, i32 } %load
+}
+
+declare { <vscale x 8 x i16>, i32 } @llvm.vp.load.ff.nxv8i16.p0(ptr, <vscale x 8 x i1>, i32)
+
+define { <vscale x 8 x i16>, i32 } @vploadff_nxv8i16(ptr %ptr, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-NEXT: vle16ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <vscale x 8 x i16>, i32 } @llvm.vp.load.ff.nxv8i16.p0(ptr %ptr, <vscale x 8 x i1> %m, i32 %evl)
+ ret { <vscale x 8 x i16>, i32 } %load
+}
+
+define { <vscale x 8 x i16>, i32 } @vploadff_nxv8i16_allones_mask(ptr %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv8i16_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-NEXT: vle16ff.v v8, (a0)
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %a = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+ %b = shufflevector <vscale x 8 x i1> %a, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %load = call { <vscale x 8 x i16>, i32 } @llvm.vp.load.ff.nxv8i16.p0(ptr %ptr, <vscale x 8 x i1> %b, i32 %evl)
+ ret { <vscale x 8 x i16>, i32 } %load
+}
+
+declare { <vscale x 16 x i16>, i32 } @llvm.vp.load.ff.nxv16i16.p0(ptr, <vscale x 16 x i1>, i32)
+
+define { <vscale x 16 x i16>, i32 } @vploadff_nxv16i16(ptr %ptr, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-NEXT: vle16ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <vscale x 16 x i16>, i32 } @llvm.vp.load.ff.nxv16i16.p0(ptr %ptr, <vscale x 16 x i1> %m, i32 %evl)
+ ret { <vscale x 16 x i16>, i32 } %load
+}
+
+define { <vscale x 16 x i16>, i32 } @vploadff_nxv16i16_allones_mask(ptr %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv16i16_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-NEXT: vle16ff.v v8, (a0)
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %a = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+ %b = shufflevector <vscale x 16 x i1> %a, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+ %load = call { <vscale x 16 x i16>, i32 } @llvm.vp.load.ff.nxv16i16.p0(ptr %ptr, <vscale x 16 x i1> %b, i32 %evl)
+ ret { <vscale x 16 x i16>, i32 } %load
+}
+
+declare { <vscale x 32 x i16>, i32 } @llvm.vp.load.ff.nxv32i16.p0(ptr, <vscale x 32 x i1>, i32)
+
+define { <vscale x 32 x i16>, i32 } @vploadff_nxv32i16(ptr %ptr, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv32i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-NEXT: vle16ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <vscale x 32 x i16>, i32 } @llvm.vp.load.ff.nxv32i16.p0(ptr %ptr, <vscale x 32 x i1> %m, i32 %evl)
+ ret { <vscale x 32 x i16>, i32 } %load
+}
+
+define { <vscale x 32 x i16>, i32 } @vploadff_nxv32i16_allones_mask(ptr %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv32i16_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-NEXT: vle16ff.v v8, (a0)
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %a = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
+ %b = shufflevector <vscale x 32 x i1> %a, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
+ %load = call { <vscale x 32 x i16>, i32 } @llvm.vp.load.ff.nxv32i16.p0(ptr %ptr, <vscale x 32 x i1> %b, i32 %evl)
+ ret { <vscale x 32 x i16>, i32 } %load
+}
+
+declare { <vscale x 1 x i32>, i32 } @llvm.vp.load.ff.nxv1i32.p0(ptr, <vscale x 1 x i1>, i32)
+
+define { <vscale x 1 x i32>, i32 } @vploadff_nxv1i32(ptr %ptr, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv1i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-NEXT: vle32ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <vscale x 1 x i32>, i32 } @llvm.vp.load.ff.nxv1i32.p0(ptr %ptr, <vscale x 1 x i1> %m, i32 %evl)
+ ret { <vscale x 1 x i32>, i32 } %load
+}
+
+define { <vscale x 1 x i32>, i32 } @vploadff_nxv1i32_allones_mask(ptr %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv1i32_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-NEXT: vle32ff.v v8, (a0)
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %a = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+ %b = shufflevector <vscale x 1 x i1> %a, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %load = call { <vscale x 1 x i32>, i32 } @llvm.vp.load.ff.nxv1i32.p0(ptr %ptr, <vscale x 1 x i1> %b, i32 %evl)
+ ret { <vscale x 1 x i32>, i32 } %load
+}
+
+declare { <vscale x 2 x i32>, i32 } @llvm.vp.load.ff.nxv2i32.p0(ptr, <vscale x 2 x i1>, i32)
+
+define { <vscale x 2 x i32>, i32 } @vploadff_nxv2i32(ptr %ptr, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT: vle32ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <vscale x 2 x i32>, i32 } @llvm.vp.load.ff.nxv2i32.p0(ptr %ptr, <vscale x 2 x i1> %m, i32 %evl)
+ ret { <vscale x 2 x i32>, i32 } %load
+}
+
+define { <vscale x 2 x i32>, i32 } @vploadff_nxv2i32_allones_mask(ptr %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv2i32_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT: vle32ff.v v8, (a0)
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %a = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+ %b = shufflevector <vscale x 2 x i1> %a, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+ %load = call { <vscale x 2 x i32>, i32 } @llvm.vp.load.ff.nxv2i32.p0(ptr %ptr, <vscale x 2 x i1> %b, i32 %evl)
+ ret { <vscale x 2 x i32>, i32 } %load
+}
+
+declare { <vscale x 4 x i32>, i32 } @llvm.vp.load.ff.nxv4i32.p0(ptr, <vscale x 4 x i1>, i32)
+
+define { <vscale x 4 x i32>, i32 } @vploadff_nxv4i32(ptr %ptr, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT: vle32ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <vscale x 4 x i32>, i32 } @llvm.vp.load.ff.nxv4i32.p0(ptr %ptr, <vscale x 4 x i1> %m, i32 %evl)
+ ret { <vscale x 4 x i32>, i32 } %load
+}
+
+define { <vscale x 4 x i32>, i32 } @vploadff_nxv4i32_allones_mask(ptr %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv4i32_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT: vle32ff.v v8, (a0)
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %a = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+ %b = shufflevector <vscale x 4 x i1> %a, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+ %load = call { <vscale x 4 x i32>, i32 } @llvm.vp.load.ff.nxv4i32.p0(ptr %ptr, <vscale x 4 x i1> %b, i32 %evl)
+ ret { <vscale x 4 x i32>, i32 } %load
+}
+
+declare { <vscale x 8 x i32>, i32 } @llvm.vp.load.ff.nxv8i32.p0(ptr, <vscale x 8 x i1>, i32)
+
+define { <vscale x 8 x i32>, i32 } @vploadff_nxv8i32(ptr %ptr, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-NEXT: vle32ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <vscale x 8 x i32>, i32 } @llvm.vp.load.ff.nxv8i32.p0(ptr %ptr, <vscale x 8 x i1> %m, i32 %evl)
+ ret { <vscale x 8 x i32>, i32 } %load
+}
+
+define { <vscale x 8 x i32>, i32 } @vploadff_nxv8i32_allones_mask(ptr %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv8i32_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-NEXT: vle32ff.v v8, (a0)
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %a = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+ %b = shufflevector <vscale x 8 x i1> %a, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %load = call { <vscale x 8 x i32>, i32 } @llvm.vp.load.ff.nxv8i32.p0(ptr %ptr, <vscale x 8 x i1> %b, i32 %evl)
+ ret { <vscale x 8 x i32>, i32 } %load
+}
+
+declare { <vscale x 16 x i32>, i32 } @llvm.vp.load.ff.nxv16i32.p0(ptr, <vscale x 16 x i1>, i32)
+
+define { <vscale x 16 x i32>, i32 } @vploadff_nxv16i32(ptr %ptr, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT: vle32ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <vscale x 16 x i32>, i32 } @llvm.vp.load.ff.nxv16i32.p0(ptr %ptr, <vscale x 16 x i1> %m, i32 %evl)
+ ret { <vscale x 16 x i32>, i32 } %load
+}
+
+define { <vscale x 16 x i32>, i32 } @vploadff_nxv16i32_allones_mask(ptr %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv16i32_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT: vle32ff.v v8, (a0)
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %a = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+ %b = shufflevector <vscale x 16 x i1> %a, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+ %load = call { <vscale x 16 x i32>, i32 } @llvm.vp.load.ff.nxv16i32.p0(ptr %ptr, <vscale x 16 x i1> %b, i32 %evl)
+ ret { <vscale x 16 x i32>, i32 } %load
+}
+
+declare { <vscale x 1 x i64>, i32 } @llvm.vp.load.ff.nxv1i64.p0(ptr, <vscale x 1 x i1>, i32)
+
+define { <vscale x 1 x i64>, i32 } @vploadff_nxv1i64(ptr %ptr, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv1i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-NEXT: vle64ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <vscale x 1 x i64>, i32 } @llvm.vp.load.ff.nxv1i64.p0(ptr %ptr, <vscale x 1 x i1> %m, i32 %evl)
+ ret { <vscale x 1 x i64>, i32 } %load
+}
+
+define { <vscale x 1 x i64>, i32 } @vploadff_nxv1i64_allones_mask(ptr %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv1i64_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-NEXT: vle64ff.v v8, (a0)
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %a = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+ %b = shufflevector <vscale x 1 x i1> %a, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %load = call { <vscale x 1 x i64>, i32 } @llvm.vp.load.ff.nxv1i64.p0(ptr %ptr, <vscale x 1 x i1> %b, i32 %evl)
+ ret { <vscale x 1 x i64>, i32 } %load
+}
+
+declare { <vscale x 2 x i64>, i32 } @llvm.vp.load.ff.nxv2i64.p0(ptr, <vscale x 2 x i1>, i32)
+
+define { <vscale x 2 x i64>, i32 } @vploadff_nxv2i64(ptr %ptr, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-NEXT: vle64ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <vscale x 2 x i64>, i32 } @llvm.vp.load.ff.nxv2i64.p0(ptr %ptr, <vscale x 2 x i1> %m, i32 %evl)
+ ret { <vscale x 2 x i64>, i32 } %load
+}
+
+define { <vscale x 2 x i64>, i32 } @vploadff_nxv2i64_allones_mask(ptr %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv2i64_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-NEXT: vle64ff.v v8, (a0)
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %a = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+ %b = shufflevector <vscale x 2 x i1> %a, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+ %load = call { <vscale x 2 x i64>, i32 } @llvm.vp.load.ff.nxv2i64.p0(ptr %ptr, <vscale x 2 x i1> %b, i32 %evl)
+ ret { <vscale x 2 x i64>, i32 } %load
+}
+
+declare { <vscale x 4 x i64>, i32 } @llvm.vp.load.ff.nxv4i64.p0(ptr, <vscale x 4 x i1>, i32)
+
+define { <vscale x 4 x i64>, i32 } @vploadff_nxv4i64(ptr %ptr, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv4i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-NEXT: vle64ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <vscale x 4 x i64>, i32 } @llvm.vp.load.ff.nxv4i64.p0(ptr %ptr, <vscale x 4 x i1> %m, i32 %evl)
+ ret { <vscale x 4 x i64>, i32 } %load
+}
+
+define { <vscale x 4 x i64>, i32 } @vploadff_nxv4i64_allones_mask(ptr %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv4i64_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-NEXT: vle64ff.v v8, (a0)
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %a = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+ %b = shufflevector <vscale x 4 x i1> %a, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+ %load = call { <vscale x 4 x i64>, i32 } @llvm.vp.load.ff.nxv4i64.p0(ptr %ptr, <vscale x 4 x i1> %b, i32 %evl)
+ ret { <vscale x 4 x i64>, i32 } %load
+}
+
+declare { <vscale x 8 x i64>, i32 } @llvm.vp.load.ff.nxv8i64.p0(ptr, <vscale x 8 x i1>, i32)
+
+define { <vscale x 8 x i64>, i32 } @vploadff_nxv8i64(ptr %ptr, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-NEXT: vle64ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <vscale x 8 x i64>, i32 } @llvm.vp.load.ff.nxv8i64.p0(ptr %ptr, <vscale x 8 x i1> %m, i32 %evl)
+ ret { <vscale x 8 x i64>, i32 } %load
+}
+
+define { <vscale x 8 x i64>, i32 } @vploadff_nxv8i64_allones_mask(ptr %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv8i64_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-NEXT: vle64ff.v v8, (a0)
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %a = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+ %b = shufflevector <vscale x 8 x i1> %a, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %load = call { <vscale x 8 x i64>, i32 } @llvm.vp.load.ff.nxv8i64.p0(ptr %ptr, <vscale x 8 x i1> %b, i32 %evl)
+ ret { <vscale x 8 x i64>, i32 } %load
+}
+
+declare { <vscale x 1 x half>, i32 } @llvm.vp.load.ff.nxv1f16.p0(ptr, <vscale x 1 x i1>, i32)
+
+define { <vscale x 1 x half>, i32 } @vploadff_nxv1f16(ptr %ptr, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv1f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-NEXT: vle16ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <vscale x 1 x half>, i32 } @llvm.vp.load.ff.nxv1f16.p0(ptr %ptr, <vscale x 1 x i1> %m, i32 %evl)
+ ret { <vscale x 1 x half>, i32 } %load
+}
+
+define { <vscale x 1 x half>, i32 } @vploadff_nxv1f16_allones_mask(ptr %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv1f16_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-NEXT: vle16ff.v v8, (a0)
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %a = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+ %b = shufflevector <vscale x 1 x i1> %a, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %load = call { <vscale x 1 x half>, i32 } @llvm.vp.load.ff.nxv1f16.p0(ptr %ptr, <vscale x 1 x i1> %b, i32 %evl)
+ ret { <vscale x 1 x half>, i32 } %load
+}
+
+declare { <vscale x 2 x half>, i32 } @llvm.vp.load.ff.nxv2f16.p0(ptr, <vscale x 2 x i1>, i32)
+
+define { <vscale x 2 x half>, i32 } @vploadff_nxv2f16(ptr %ptr, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv2f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-NEXT: vle16ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <vscale x 2 x half>, i32 } @llvm.vp.load.ff.nxv2f16.p0(ptr %ptr, <vscale x 2 x i1> %m, i32 %evl)
+ ret { <vscale x 2 x half>, i32 } %load
+}
+
+define { <vscale x 2 x half>, i32 } @vploadff_nxv2f16_allones_mask(ptr %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv2f16_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-NEXT: vle16ff.v v8, (a0)
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %a = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+ %b = shufflevector <vscale x 2 x i1> %a, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+ %load = call { <vscale x 2 x half>, i32 } @llvm.vp.load.ff.nxv2f16.p0(ptr %ptr, <vscale x 2 x i1> %b, i32 %evl)
+ ret { <vscale x 2 x half>, i32 } %load
+}
+
+declare { <vscale x 4 x half>, i32 } @llvm.vp.load.ff.nxv4f16.p0(ptr, <vscale x 4 x i1>, i32)
+
+define { <vscale x 4 x half>, i32 } @vploadff_nxv4f16(ptr %ptr, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv4f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT: vle16ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <vscale x 4 x half>, i32 } @llvm.vp.load.ff.nxv4f16.p0(ptr %ptr, <vscale x 4 x i1> %m, i32 %evl)
+ ret { <vscale x 4 x half>, i32 } %load
+}
+
+define { <vscale x 4 x half>, i32 } @vploadff_nxv4f16_allones_mask(ptr %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv4f16_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT: vle16ff.v v8, (a0)
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %a = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+ %b = shufflevector <vscale x 4 x i1> %a, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+ %load = call { <vscale x 4 x half>, i32 } @llvm.vp.load.ff.nxv4f16.p0(ptr %ptr, <vscale x 4 x i1> %b, i32 %evl)
+ ret { <vscale x 4 x half>, i32 } %load
+}
+
+declare { <vscale x 8 x half>, i32 } @llvm.vp.load.ff.nxv8f16.p0(ptr, <vscale x 8 x i1>, i32)
+
+define { <vscale x 8 x half>, i32 } @vploadff_nxv8f16(ptr %ptr, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv8f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-NEXT: vle16ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <vscale x 8 x half>, i32 } @llvm.vp.load.ff.nxv8f16.p0(ptr %ptr, <vscale x 8 x i1> %m, i32 %evl)
+ ret { <vscale x 8 x half>, i32 } %load
+}
+
+define { <vscale x 8 x half>, i32 } @vploadff_nxv8f16_allones_mask(ptr %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv8f16_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-NEXT: vle16ff.v v8, (a0)
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %a = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+ %b = shufflevector <vscale x 8 x i1> %a, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %load = call { <vscale x 8 x half>, i32 } @llvm.vp.load.ff.nxv8f16.p0(ptr %ptr, <vscale x 8 x i1> %b, i32 %evl)
+ ret { <vscale x 8 x half>, i32 } %load
+}
+
+declare { <vscale x 16 x half>, i32 } @llvm.vp.load.ff.nxv16f16.p0(ptr, <vscale x 16 x i1>, i32)
+
+define { <vscale x 16 x half>, i32 } @vploadff_nxv16f16(ptr %ptr, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv16f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-NEXT: vle16ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <vscale x 16 x half>, i32 } @llvm.vp.load.ff.nxv16f16.p0(ptr %ptr, <vscale x 16 x i1> %m, i32 %evl)
+ ret { <vscale x 16 x half>, i32 } %load
+}
+
+define { <vscale x 16 x half>, i32 } @vploadff_nxv16f16_allones_mask(ptr %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv16f16_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-NEXT: vle16ff.v v8, (a0)
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %a = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+ %b = shufflevector <vscale x 16 x i1> %a, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+ %load = call { <vscale x 16 x half>, i32 } @llvm.vp.load.ff.nxv16f16.p0(ptr %ptr, <vscale x 16 x i1> %b, i32 %evl)
+ ret { <vscale x 16 x half>, i32 } %load
+}
+
+declare { <vscale x 32 x half>, i32 } @llvm.vp.load.ff.nxv32f16.p0(ptr, <vscale x 32 x i1>, i32)
+
+define { <vscale x 32 x half>, i32 } @vploadff_nxv32f16(ptr %ptr, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv32f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-NEXT: vle16ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <vscale x 32 x half>, i32 } @llvm.vp.load.ff.nxv32f16.p0(ptr %ptr, <vscale x 32 x i1> %m, i32 %evl)
+ ret { <vscale x 32 x half>, i32 } %load
+}
+
+define { <vscale x 32 x half>, i32 } @vploadff_nxv32f16_allones_mask(ptr %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv32f16_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-NEXT: vle16ff.v v8, (a0)
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %a = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
+ %b = shufflevector <vscale x 32 x i1> %a, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
+ %load = call { <vscale x 32 x half>, i32 } @llvm.vp.load.ff.nxv32f16.p0(ptr %ptr, <vscale x 32 x i1> %b, i32 %evl)
+ ret { <vscale x 32 x half>, i32 } %load
+}
+
+declare { <vscale x 1 x float>, i32 } @llvm.vp.load.ff.nxv1f32.p0(ptr, <vscale x 1 x i1>, i32)
+
+define { <vscale x 1 x float>, i32 } @vploadff_nxv1f32(ptr %ptr, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv1f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-NEXT: vle32ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <vscale x 1 x float>, i32 } @llvm.vp.load.ff.nxv1f32.p0(ptr %ptr, <vscale x 1 x i1> %m, i32 %evl)
+ ret { <vscale x 1 x float>, i32 } %load
+}
+
+define { <vscale x 1 x float>, i32 } @vploadff_nxv1f32_allones_mask(ptr %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv1f32_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; CHECK-NEXT: vle32ff.v v8, (a0)
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %a = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+ %b = shufflevector <vscale x 1 x i1> %a, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %load = call { <vscale x 1 x float>, i32 } @llvm.vp.load.ff.nxv1f32.p0(ptr %ptr, <vscale x 1 x i1> %b, i32 %evl)
+ ret { <vscale x 1 x float>, i32 } %load
+}
+
+declare { <vscale x 2 x float>, i32 } @llvm.vp.load.ff.nxv2f32.p0(ptr, <vscale x 2 x i1>, i32)
+
+define { <vscale x 2 x float>, i32 } @vploadff_nxv2f32(ptr %ptr, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv2f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT: vle32ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <vscale x 2 x float>, i32 } @llvm.vp.load.ff.nxv2f32.p0(ptr %ptr, <vscale x 2 x i1> %m, i32 %evl)
+ ret { <vscale x 2 x float>, i32 } %load
+}
+
+define { <vscale x 2 x float>, i32 } @vploadff_nxv2f32_allones_mask(ptr %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv2f32_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT: vle32ff.v v8, (a0)
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %a = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+ %b = shufflevector <vscale x 2 x i1> %a, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+ %load = call { <vscale x 2 x float>, i32 } @llvm.vp.load.ff.nxv2f32.p0(ptr %ptr, <vscale x 2 x i1> %b, i32 %evl)
+ ret { <vscale x 2 x float>, i32 } %load
+}
+
+declare { <vscale x 4 x float>, i32 } @llvm.vp.load.ff.nxv4f32.p0(ptr, <vscale x 4 x i1>, i32)
+
+define { <vscale x 4 x float>, i32 } @vploadff_nxv4f32(ptr %ptr, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT: vle32ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <vscale x 4 x float>, i32 } @llvm.vp.load.ff.nxv4f32.p0(ptr %ptr, <vscale x 4 x i1> %m, i32 %evl)
+ ret { <vscale x 4 x float>, i32 } %load
+}
+
+define { <vscale x 4 x float>, i32 } @vploadff_nxv4f32_allones_mask(ptr %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv4f32_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT: vle32ff.v v8, (a0)
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %a = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+ %b = shufflevector <vscale x 4 x i1> %a, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+ %load = call { <vscale x 4 x float>, i32 } @llvm.vp.load.ff.nxv4f32.p0(ptr %ptr, <vscale x 4 x i1> %b, i32 %evl)
+ ret { <vscale x 4 x float>, i32 } %load
+}
+
+declare { <vscale x 8 x float>, i32 } @llvm.vp.load.ff.nxv8f32.p0(ptr, <vscale x 8 x i1>, i32)
+
+define { <vscale x 8 x float>, i32 } @vploadff_nxv8f32(ptr %ptr, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv8f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-NEXT: vle32ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <vscale x 8 x float>, i32 } @llvm.vp.load.ff.nxv8f32.p0(ptr %ptr, <vscale x 8 x i1> %m, i32 %evl)
+ ret { <vscale x 8 x float>, i32 } %load
+}
+
+define { <vscale x 8 x float>, i32 } @vploadff_nxv8f32_allones_mask(ptr %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv8f32_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; CHECK-NEXT: vle32ff.v v8, (a0)
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %a = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+ %b = shufflevector <vscale x 8 x i1> %a, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %load = call { <vscale x 8 x float>, i32 } @llvm.vp.load.ff.nxv8f32.p0(ptr %ptr, <vscale x 8 x i1> %b, i32 %evl)
+ ret { <vscale x 8 x float>, i32 } %load
+}
+
+declare { <vscale x 16 x float>, i32 } @llvm.vp.load.ff.nxv16f32.p0(ptr, <vscale x 16 x i1>, i32)
+
+define { <vscale x 16 x float>, i32 } @vploadff_nxv16f32(ptr %ptr, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv16f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT: vle32ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <vscale x 16 x float>, i32 } @llvm.vp.load.ff.nxv16f32.p0(ptr %ptr, <vscale x 16 x i1> %m, i32 %evl)
+ ret { <vscale x 16 x float>, i32 } %load
+}
+
+define { <vscale x 16 x float>, i32 } @vploadff_nxv16f32_allones_mask(ptr %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv16f32_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT: vle32ff.v v8, (a0)
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %a = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+ %b = shufflevector <vscale x 16 x i1> %a, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+ %load = call { <vscale x 16 x float>, i32 } @llvm.vp.load.ff.nxv16f32.p0(ptr %ptr, <vscale x 16 x i1> %b, i32 %evl)
+ ret { <vscale x 16 x float>, i32 } %load
+}
+
+declare { <vscale x 1 x double>, i32 } @llvm.vp.load.ff.nxv1f64.p0(ptr, <vscale x 1 x i1>, i32)
+
+define { <vscale x 1 x double>, i32 } @vploadff_nxv1f64(ptr %ptr, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv1f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-NEXT: vle64ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <vscale x 1 x double>, i32 } @llvm.vp.load.ff.nxv1f64.p0(ptr %ptr, <vscale x 1 x i1> %m, i32 %evl)
+ ret { <vscale x 1 x double>, i32 } %load
+}
+
+define { <vscale x 1 x double>, i32 } @vploadff_nxv1f64_allones_mask(ptr %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv1f64_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; CHECK-NEXT: vle64ff.v v8, (a0)
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %a = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+ %b = shufflevector <vscale x 1 x i1> %a, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %load = call { <vscale x 1 x double>, i32 } @llvm.vp.load.ff.nxv1f64.p0(ptr %ptr, <vscale x 1 x i1> %b, i32 %evl)
+ ret { <vscale x 1 x double>, i32 } %load
+}
+
+declare { <vscale x 2 x double>, i32 } @llvm.vp.load.ff.nxv2f64.p0(ptr, <vscale x 2 x i1>, i32)
+
+define { <vscale x 2 x double>, i32 } @vploadff_nxv2f64(ptr %ptr, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-NEXT: vle64ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <vscale x 2 x double>, i32 } @llvm.vp.load.ff.nxv2f64.p0(ptr %ptr, <vscale x 2 x i1> %m, i32 %evl)
+ ret { <vscale x 2 x double>, i32 } %load
+}
+
+define { <vscale x 2 x double>, i32 } @vploadff_nxv2f64_allones_mask(ptr %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv2f64_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
+; CHECK-NEXT: vle64ff.v v8, (a0)
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %a = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+ %b = shufflevector <vscale x 2 x i1> %a, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+ %load = call { <vscale x 2 x double>, i32 } @llvm.vp.load.ff.nxv2f64.p0(ptr %ptr, <vscale x 2 x i1> %b, i32 %evl)
+ ret { <vscale x 2 x double>, i32 } %load
+}
+
+declare { <vscale x 4 x double>, i32 } @llvm.vp.load.ff.nxv4f64.p0(ptr, <vscale x 4 x i1>, i32)
+
+define { <vscale x 4 x double>, i32 } @vploadff_nxv4f64(ptr %ptr, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv4f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-NEXT: vle64ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <vscale x 4 x double>, i32 } @llvm.vp.load.ff.nxv4f64.p0(ptr %ptr, <vscale x 4 x i1> %m, i32 %evl)
+ ret { <vscale x 4 x double>, i32 } %load
+}
+
+define { <vscale x 4 x double>, i32 } @vploadff_nxv4f64_allones_mask(ptr %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv4f64_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
+; CHECK-NEXT: vle64ff.v v8, (a0)
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %a = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+ %b = shufflevector <vscale x 4 x i1> %a, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+ %load = call { <vscale x 4 x double>, i32 } @llvm.vp.load.ff.nxv4f64.p0(ptr %ptr, <vscale x 4 x i1> %b, i32 %evl)
+ ret { <vscale x 4 x double>, i32 } %load
+}
+
+declare { <vscale x 8 x double>, i32 } @llvm.vp.load.ff.nxv8f64.p0(ptr, <vscale x 8 x i1>, i32)
+
+define { <vscale x 8 x double>, i32 } @vploadff_nxv8f64(ptr %ptr, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv8f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-NEXT: vle64ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <vscale x 8 x double>, i32 } @llvm.vp.load.ff.nxv8f64.p0(ptr %ptr, <vscale x 8 x i1> %m, i32 %evl)
+ ret { <vscale x 8 x double>, i32 } %load
+}
+
+define { <vscale x 8 x double>, i32 } @vploadff_nxv8f64_allones_mask(ptr %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv8f64_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
+; CHECK-NEXT: vle64ff.v v8, (a0)
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %a = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+ %b = shufflevector <vscale x 8 x i1> %a, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %load = call { <vscale x 8 x double>, i32 } @llvm.vp.load.ff.nxv8f64.p0(ptr %ptr, <vscale x 8 x i1> %b, i32 %evl)
+ ret { <vscale x 8 x double>, i32 } %load
+}
+
+declare { <vscale x 1 x bfloat>, i32 } @llvm.vp.load.ff.nxv1bf16.p0(ptr, <vscale x 1 x i1>, i32)
+
+define { <vscale x 1 x bfloat>, i32 } @vploadff_nxv1bf16(ptr %ptr, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-NEXT: vle16ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <vscale x 1 x bfloat>, i32 } @llvm.vp.load.ff.nxv1bf16.p0(ptr %ptr, <vscale x 1 x i1> %m, i32 %evl)
+ ret { <vscale x 1 x bfloat>, i32 } %load
+}
+
+define { <vscale x 1 x bfloat>, i32 } @vploadff_nxv1bf16_allones_mask(ptr %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv1bf16_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
+; CHECK-NEXT: vle16ff.v v8, (a0)
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %a = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
+ %b = shufflevector <vscale x 1 x i1> %a, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %load = call { <vscale x 1 x bfloat>, i32 } @llvm.vp.load.ff.nxv1bf16.p0(ptr %ptr, <vscale x 1 x i1> %b, i32 %evl)
+ ret { <vscale x 1 x bfloat>, i32 } %load
+}
+
+declare { <vscale x 2 x bfloat>, i32 } @llvm.vp.load.ff.nxv2bf16.p0(ptr, <vscale x 2 x i1>, i32)
+
+define { <vscale x 2 x bfloat>, i32 } @vploadff_nxv2bf16(ptr %ptr, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-NEXT: vle16ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <vscale x 2 x bfloat>, i32 } @llvm.vp.load.ff.nxv2bf16.p0(ptr %ptr, <vscale x 2 x i1> %m, i32 %evl)
+ ret { <vscale x 2 x bfloat>, i32 } %load
+}
+
+define { <vscale x 2 x bfloat>, i32 } @vploadff_nxv2bf16_allones_mask(ptr %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv2bf16_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
+; CHECK-NEXT: vle16ff.v v8, (a0)
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %a = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
+ %b = shufflevector <vscale x 2 x i1> %a, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+ %load = call { <vscale x 2 x bfloat>, i32 } @llvm.vp.load.ff.nxv2bf16.p0(ptr %ptr, <vscale x 2 x i1> %b, i32 %evl)
+ ret { <vscale x 2 x bfloat>, i32 } %load
+}
+
+declare { <vscale x 4 x bfloat>, i32 } @llvm.vp.load.ff.nxv4bf16.p0(ptr, <vscale x 4 x i1>, i32)
+
+define { <vscale x 4 x bfloat>, i32 } @vploadff_nxv4bf16(ptr %ptr, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT: vle16ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <vscale x 4 x bfloat>, i32 } @llvm.vp.load.ff.nxv4bf16.p0(ptr %ptr, <vscale x 4 x i1> %m, i32 %evl)
+ ret { <vscale x 4 x bfloat>, i32 } %load
+}
+
+define { <vscale x 4 x bfloat>, i32 } @vploadff_nxv4bf16_allones_mask(ptr %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv4bf16_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
+; CHECK-NEXT: vle16ff.v v8, (a0)
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %a = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
+ %b = shufflevector <vscale x 4 x i1> %a, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+ %load = call { <vscale x 4 x bfloat>, i32 } @llvm.vp.load.ff.nxv4bf16.p0(ptr %ptr, <vscale x 4 x i1> %b, i32 %evl)
+ ret { <vscale x 4 x bfloat>, i32 } %load
+}
+
+declare { <vscale x 8 x bfloat>, i32 } @llvm.vp.load.ff.nxv8bf16.p0(ptr, <vscale x 8 x i1>, i32)
+
+define { <vscale x 8 x bfloat>, i32 } @vploadff_nxv8bf16(ptr %ptr, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-NEXT: vle16ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <vscale x 8 x bfloat>, i32 } @llvm.vp.load.ff.nxv8bf16.p0(ptr %ptr, <vscale x 8 x i1> %m, i32 %evl)
+ ret { <vscale x 8 x bfloat>, i32 } %load
+}
+
+define { <vscale x 8 x bfloat>, i32 } @vploadff_nxv8bf16_allones_mask(ptr %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv8bf16_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
+; CHECK-NEXT: vle16ff.v v8, (a0)
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %a = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
+ %b = shufflevector <vscale x 8 x i1> %a, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %load = call { <vscale x 8 x bfloat>, i32 } @llvm.vp.load.ff.nxv8bf16.p0(ptr %ptr, <vscale x 8 x i1> %b, i32 %evl)
+ ret { <vscale x 8 x bfloat>, i32 } %load
+}
+
+declare { <vscale x 16 x bfloat>, i32 } @llvm.vp.load.ff.nxv16bf16.p0(ptr, <vscale x 16 x i1>, i32)
+
+define { <vscale x 16 x bfloat>, i32 } @vploadff_nxv16bf16(ptr %ptr, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-NEXT: vle16ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <vscale x 16 x bfloat>, i32 } @llvm.vp.load.ff.nxv16bf16.p0(ptr %ptr, <vscale x 16 x i1> %m, i32 %evl)
+ ret { <vscale x 16 x bfloat>, i32 } %load
+}
+
+define { <vscale x 16 x bfloat>, i32 } @vploadff_nxv16bf16_allones_mask(ptr %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv16bf16_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; CHECK-NEXT: vle16ff.v v8, (a0)
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %a = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
+ %b = shufflevector <vscale x 16 x i1> %a, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+ %load = call { <vscale x 16 x bfloat>, i32 } @llvm.vp.load.ff.nxv16bf16.p0(ptr %ptr, <vscale x 16 x i1> %b, i32 %evl)
+ ret { <vscale x 16 x bfloat>, i32 } %load
+}
+
+declare { <vscale x 32 x bfloat>, i32 } @llvm.vp.load.ff.nxv32bf16.p0(ptr, <vscale x 32 x i1>, i32)
+
+define { <vscale x 32 x bfloat>, i32 } @vploadff_nxv32bf16(ptr %ptr, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-NEXT: vle16ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <vscale x 32 x bfloat>, i32 } @llvm.vp.load.ff.nxv32bf16.p0(ptr %ptr, <vscale x 32 x i1> %m, i32 %evl)
+ ret { <vscale x 32 x bfloat>, i32 } %load
+}
+
+define { <vscale x 32 x bfloat>, i32 } @vploadff_nxv32bf16_allones_mask(ptr %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv32bf16_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
+; CHECK-NEXT: vle16ff.v v8, (a0)
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %a = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
+ %b = shufflevector <vscale x 32 x i1> %a, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
+ %load = call { <vscale x 32 x bfloat>, i32 } @llvm.vp.load.ff.nxv32bf16.p0(ptr %ptr, <vscale x 32 x i1> %b, i32 %evl)
+ ret { <vscale x 32 x bfloat>, i32 } %load
+}
diff --git a/llvm/unittests/IR/VPIntrinsicTest.cpp b/llvm/unittests/IR/VPIntrinsicTest.cpp
index d6ad7599ce461..a101979ee6a4a 100644
--- a/llvm/unittests/IR/VPIntrinsicTest.cpp
+++ b/llvm/unittests/IR/VPIntrinsicTest.cpp
@@ -100,6 +100,8 @@ class VPIntrinsicTest : public testing::Test {
"i32*>, <8 x i1>, i32) ";
Str << " declare <8 x i32> @llvm.vp.load.v8i32.p0v8i32(<8 x i32>*, <8 x "
"i1>, i32) ";
+ Str << " declare {<8 x i32>, i32} @llvm.vp.load.ff.v8i32.p0v8i32(<8 x "
+ "i32>*, <8 x i1>, i32) ";
Str << "declare <8 x i32> "
"@llvm.experimental.vp.strided.load.v8i32.i32(i32*, i32, <8 "
"x i1>, i32) ";
>From 2fb476edf27c38f52bce772f2ce52e7891c5b71d Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Mon, 3 Mar 2025 16:13:07 -0800
Subject: [PATCH 2/7] fixup! Add documentation.
---
llvm/docs/LangRef.rst | 57 +++++++++++++++++++++++++++++++++++++++++++
1 file changed, 57 insertions(+)
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 33c85c7ba9d29..9acda87804ec7 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -23943,6 +23943,63 @@ Examples:
%also.r = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %ptr, i32 2, <8 x i1> %mask, <8 x i8> poison)
+.. _int_vp_ff_load:
+
+'``llvm.vp.ff.load``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+This is an overloaded intrinsic.
+
+::
+
+ declare {<4 x float>, i32} @llvm.vp.load.ff.v4f32.p0(ptr %ptr, <4 x i1> %mask, i32 %evl)
+ declare {<vscale x 2 x i16>, i32} @llvm.vp.load.ff.nxv2i16.p0(ptr %ptr, <vscale x 2 x i1> %mask, i32 %evl)
+ declare {<8 x float>, i32} @llvm.vp.load.ff.v8f32.p1(ptr addrspace(1) %ptr, <8 x i1> %mask, i32 %evl)
+ declare {<vscale x 1 x i64>, i32} @llvm.vp.load.ff.nxv1i64.p6(ptr addrspace(6) %ptr, <vscale x 1 x i1> %mask, i32 %evl)
+
+Overview:
+"""""""""
+
+The '``llvm.vp.load.ff.*``' intrinsic is similar to '``llvm.vp.load.*``', but
+will not trap if there are not ``evl`` readable elements at the pointer.
+
+Arguments:
+""""""""""
+
+The first argument is the base pointer for the load. The second argument is a
+vector of boolean values with the same number of elements as the first return
+type. The third is the explicit vector length of the operation. The first
+return type and underlying type of the base pointer are the same vector types.
+
+The :ref:`align <attr_align>` parameter attribute can be provided for the first
+argument.
+
+Semantics:
+""""""""""
+
+The '``llvm.vp.load.ff``' intrinsic reads a vector from memory similar to
+'``llvm.vp.load``, but will only trap if the first lane is unreadable. If
+any other lane is unreadable, the number of successfully read lanes will
+be returned in the second return value. The result in the first return value
+for the lanes that were not successfully read is
+:ref:`poison value <poisonvalues>`. If ``evl`` is 0, no read occurs and thus no
+trap can occur for the first lane. If ``mask`` is 0 for the first lane, no
+trap occurs. This intrinsic is allowed to read fewer than ``evl`` lanes even
+if no trap would occur. If ``evl`` is non-zero, the result in the second result
+must be at least 1 even if the first lane is disabled by ``mask``.
+
+The default alignment is taken as the ABI alignment of the first return
+type as specified by the :ref:`datalayout string<langref_datalayout>`.
+
+Examples:
+"""""""""
+
+.. code-block:: text
+
+ %r = call {<8 x i8>, i32} @llvm.vp.load.ff.v8i8.p0(ptr align 2 %ptr, <8 x i1> %mask, i32 %evl)
+
.. _int_vp_store:
'``llvm.vp.store``' Intrinsic
>From 4f2fbff97a72284d3d46184fa5b0af64096d85c7 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Mon, 3 Mar 2025 17:57:32 -0800
Subject: [PATCH 3/7] fixup! remove intrinsic declare and use splat
---
.../RISCV/rvv/fixed-vectors-vploadff.ll | 144 ++--------
llvm/test/CodeGen/RISCV/rvv/vploadff.ll | 258 +++---------------
2 files changed, 67 insertions(+), 335 deletions(-)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vploadff.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vploadff.ll
index 9f982293256ac..474f859e9789d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vploadff.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vploadff.ll
@@ -8,8 +8,6 @@
; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+zvfbfmin,+v \
; RUN: -verify-machineinstrs < %s | FileCheck %s
-declare { <2 x i8>, i32 } @llvm.vp.load.ff.v2i8.p0(ptr, <2 x i1>, i32)
-
define { <2 x i8>, i32 } @vploadff_v2i8(ptr %ptr, <2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vploadff_v2i8:
; CHECK: # %bb.0:
@@ -28,14 +26,10 @@ define { <2 x i8>, i32 } @vploadff_v2i8_allones_mask(ptr %ptr, i32 zeroext %evl)
; CHECK-NEXT: vle8ff.v v8, (a0)
; CHECK-NEXT: csrr a0, vl
; CHECK-NEXT: ret
- %a = insertelement <2 x i1> poison, i1 true, i32 0
- %b = shufflevector <2 x i1> %a, <2 x i1> poison, <2 x i32> zeroinitializer
- %load = call { <2 x i8>, i32 } @llvm.vp.load.ff.v2i8.p0(ptr %ptr, <2 x i1> %b, i32 %evl)
+ %load = call { <2 x i8>, i32 } @llvm.vp.load.ff.v2i8.p0(ptr %ptr, <2 x i1> splat (i1 true), i32 %evl)
ret { <2 x i8>, i32 } %load
}
-declare { <4 x i8>, i32 } @llvm.vp.load.ff.v4i8.p0(ptr, <4 x i1>, i32)
-
define { <4 x i8>, i32 } @vploadff_v4i8(ptr %ptr, <4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vploadff_v4i8:
; CHECK: # %bb.0:
@@ -54,14 +48,10 @@ define { <4 x i8>, i32 } @vploadff_v4i8_allones_mask(ptr %ptr, i32 zeroext %evl)
; CHECK-NEXT: vle8ff.v v8, (a0)
; CHECK-NEXT: csrr a0, vl
; CHECK-NEXT: ret
- %a = insertelement <4 x i1> poison, i1 true, i32 0
- %b = shufflevector <4 x i1> %a, <4 x i1> poison, <4 x i32> zeroinitializer
- %load = call { <4 x i8>, i32 } @llvm.vp.load.ff.v4i8.p0(ptr %ptr, <4 x i1> %b, i32 %evl)
+ %load = call { <4 x i8>, i32 } @llvm.vp.load.ff.v4i8.p0(ptr %ptr, <4 x i1> splat (i1 true), i32 %evl)
ret { <4 x i8>, i32 } %load
}
-declare { <8 x i8>, i32 } @llvm.vp.load.ff.v8i8.p0(ptr, <8 x i1>, i32)
-
define { <8 x i8>, i32 } @vploadff_v8i8(ptr %ptr, <8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vploadff_v8i8:
; CHECK: # %bb.0:
@@ -80,14 +70,10 @@ define { <8 x i8>, i32 } @vploadff_v8i8_allones_mask(ptr %ptr, i32 zeroext %evl)
; CHECK-NEXT: vle8ff.v v8, (a0)
; CHECK-NEXT: csrr a0, vl
; CHECK-NEXT: ret
- %a = insertelement <8 x i1> poison, i1 true, i32 0
- %b = shufflevector <8 x i1> %a, <8 x i1> poison, <8 x i32> zeroinitializer
- %load = call { <8 x i8>, i32 } @llvm.vp.load.ff.v8i8.p0(ptr %ptr, <8 x i1> %b, i32 %evl)
+ %load = call { <8 x i8>, i32 } @llvm.vp.load.ff.v8i8.p0(ptr %ptr, <8 x i1> splat (i1 true), i32 %evl)
ret { <8 x i8>, i32 } %load
}
-declare { <2 x i16>, i32 } @llvm.vp.load.ff.v2i16.p0(ptr, <2 x i1>, i32)
-
define { <2 x i16>, i32 } @vploadff_v2i16(ptr %ptr, <2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vploadff_v2i16:
; CHECK: # %bb.0:
@@ -106,14 +92,10 @@ define { <2 x i16>, i32 } @vploadff_v2i16_allones_mask(ptr %ptr, i32 zeroext %ev
; CHECK-NEXT: vle16ff.v v8, (a0)
; CHECK-NEXT: csrr a0, vl
; CHECK-NEXT: ret
- %a = insertelement <2 x i1> poison, i1 true, i32 0
- %b = shufflevector <2 x i1> %a, <2 x i1> poison, <2 x i32> zeroinitializer
- %load = call { <2 x i16>, i32 } @llvm.vp.load.ff.v2i16.p0(ptr %ptr, <2 x i1> %b, i32 %evl)
+ %load = call { <2 x i16>, i32 } @llvm.vp.load.ff.v2i16.p0(ptr %ptr, <2 x i1> splat (i1 true), i32 %evl)
ret { <2 x i16>, i32 } %load
}
-declare { <4 x i16>, i32 } @llvm.vp.load.ff.v4i16.p0(ptr, <4 x i1>, i32)
-
define { <4 x i16>, i32 } @vploadff_v4i16(ptr %ptr, <4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vploadff_v4i16:
; CHECK: # %bb.0:
@@ -132,14 +114,10 @@ define { <4 x i16>, i32 } @vploadff_v4i16_allones_mask(ptr %ptr, i32 zeroext %ev
; CHECK-NEXT: vle16ff.v v8, (a0)
; CHECK-NEXT: csrr a0, vl
; CHECK-NEXT: ret
- %a = insertelement <4 x i1> poison, i1 true, i32 0
- %b = shufflevector <4 x i1> %a, <4 x i1> poison, <4 x i32> zeroinitializer
- %load = call { <4 x i16>, i32 } @llvm.vp.load.ff.v4i16.p0(ptr %ptr, <4 x i1> %b, i32 %evl)
+ %load = call { <4 x i16>, i32 } @llvm.vp.load.ff.v4i16.p0(ptr %ptr, <4 x i1> splat (i1 true), i32 %evl)
ret { <4 x i16>, i32 } %load
}
-declare { <8 x i16>, i32 } @llvm.vp.load.ff.v8i16.p0(ptr, <8 x i1>, i32)
-
define { <8 x i16>, i32 } @vploadff_v8i16(ptr %ptr, <8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vploadff_v8i16:
; CHECK: # %bb.0:
@@ -158,14 +136,10 @@ define { <8 x i16>, i32 } @vploadff_v8i16_allones_mask(ptr %ptr, i32 zeroext %ev
; CHECK-NEXT: vle16ff.v v8, (a0)
; CHECK-NEXT: csrr a0, vl
; CHECK-NEXT: ret
- %a = insertelement <8 x i1> poison, i1 true, i32 0
- %b = shufflevector <8 x i1> %a, <8 x i1> poison, <8 x i32> zeroinitializer
- %load = call { <8 x i16>, i32 } @llvm.vp.load.ff.v8i16.p0(ptr %ptr, <8 x i1> %b, i32 %evl)
+ %load = call { <8 x i16>, i32 } @llvm.vp.load.ff.v8i16.p0(ptr %ptr, <8 x i1> splat (i1 true), i32 %evl)
ret { <8 x i16>, i32 } %load
}
-declare { <2 x i32>, i32 } @llvm.vp.load.ff.v2i32.p0(ptr, <2 x i1>, i32)
-
define { <2 x i32>, i32 } @vploadff_v2i32(ptr %ptr, <2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vploadff_v2i32:
; CHECK: # %bb.0:
@@ -184,14 +158,10 @@ define { <2 x i32>, i32 } @vploadff_v2i32_allones_mask(ptr %ptr, i32 zeroext %ev
; CHECK-NEXT: vle32ff.v v8, (a0)
; CHECK-NEXT: csrr a0, vl
; CHECK-NEXT: ret
- %a = insertelement <2 x i1> poison, i1 true, i32 0
- %b = shufflevector <2 x i1> %a, <2 x i1> poison, <2 x i32> zeroinitializer
- %load = call { <2 x i32>, i32 } @llvm.vp.load.ff.v2i32.p0(ptr %ptr, <2 x i1> %b, i32 %evl)
+ %load = call { <2 x i32>, i32 } @llvm.vp.load.ff.v2i32.p0(ptr %ptr, <2 x i1> splat (i1 true), i32 %evl)
ret { <2 x i32>, i32 } %load
}
-declare { <4 x i32>, i32 } @llvm.vp.load.ff.v4i32.p0(ptr, <4 x i1>, i32)
-
define { <4 x i32>, i32 } @vploadff_v4i32(ptr %ptr, <4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vploadff_v4i32:
; CHECK: # %bb.0:
@@ -210,14 +180,10 @@ define { <4 x i32>, i32 } @vploadff_v4i32_allones_mask(ptr %ptr, i32 zeroext %ev
; CHECK-NEXT: vle32ff.v v8, (a0)
; CHECK-NEXT: csrr a0, vl
; CHECK-NEXT: ret
- %a = insertelement <4 x i1> poison, i1 true, i32 0
- %b = shufflevector <4 x i1> %a, <4 x i1> poison, <4 x i32> zeroinitializer
- %load = call { <4 x i32>, i32 } @llvm.vp.load.ff.v4i32.p0(ptr %ptr, <4 x i1> %b, i32 %evl)
+ %load = call { <4 x i32>, i32 } @llvm.vp.load.ff.v4i32.p0(ptr %ptr, <4 x i1> splat (i1 true), i32 %evl)
ret { <4 x i32>, i32 } %load
}
-declare { <8 x i32>, i32 } @llvm.vp.load.ff.v8i32.p0(ptr, <8 x i1>, i32)
-
define { <8 x i32>, i32 } @vploadff_v8i32(ptr %ptr, <8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vploadff_v8i32:
; CHECK: # %bb.0:
@@ -236,14 +202,10 @@ define { <8 x i32>, i32 } @vploadff_v8i32_allones_mask(ptr %ptr, i32 zeroext %ev
; CHECK-NEXT: vle32ff.v v8, (a0)
; CHECK-NEXT: csrr a0, vl
; CHECK-NEXT: ret
- %a = insertelement <8 x i1> poison, i1 true, i32 0
- %b = shufflevector <8 x i1> %a, <8 x i1> poison, <8 x i32> zeroinitializer
- %load = call { <8 x i32>, i32 } @llvm.vp.load.ff.v8i32.p0(ptr %ptr, <8 x i1> %b, i32 %evl)
+ %load = call { <8 x i32>, i32 } @llvm.vp.load.ff.v8i32.p0(ptr %ptr, <8 x i1> splat (i1 true), i32 %evl)
ret { <8 x i32>, i32 } %load
}
-declare { <2 x i64>, i32 } @llvm.vp.load.ff.v2i64.p0(ptr, <2 x i1>, i32)
-
define { <2 x i64>, i32 } @vploadff_v2i64(ptr %ptr, <2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vploadff_v2i64:
; CHECK: # %bb.0:
@@ -262,14 +224,10 @@ define { <2 x i64>, i32 } @vploadff_v2i64_allones_mask(ptr %ptr, i32 zeroext %ev
; CHECK-NEXT: vle64ff.v v8, (a0)
; CHECK-NEXT: csrr a0, vl
; CHECK-NEXT: ret
- %a = insertelement <2 x i1> poison, i1 true, i32 0
- %b = shufflevector <2 x i1> %a, <2 x i1> poison, <2 x i32> zeroinitializer
- %load = call { <2 x i64>, i32 } @llvm.vp.load.ff.v2i64.p0(ptr %ptr, <2 x i1> %b, i32 %evl)
+ %load = call { <2 x i64>, i32 } @llvm.vp.load.ff.v2i64.p0(ptr %ptr, <2 x i1> splat (i1 true), i32 %evl)
ret { <2 x i64>, i32 } %load
}
-declare { <4 x i64>, i32 } @llvm.vp.load.ff.v4i64.p0(ptr, <4 x i1>, i32)
-
define { <4 x i64>, i32 } @vploadff_v4i64(ptr %ptr, <4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vploadff_v4i64:
; CHECK: # %bb.0:
@@ -288,14 +246,10 @@ define { <4 x i64>, i32 } @vploadff_v4i64_allones_mask(ptr %ptr, i32 zeroext %ev
; CHECK-NEXT: vle64ff.v v8, (a0)
; CHECK-NEXT: csrr a0, vl
; CHECK-NEXT: ret
- %a = insertelement <4 x i1> poison, i1 true, i32 0
- %b = shufflevector <4 x i1> %a, <4 x i1> poison, <4 x i32> zeroinitializer
- %load = call { <4 x i64>, i32 } @llvm.vp.load.ff.v4i64.p0(ptr %ptr, <4 x i1> %b, i32 %evl)
+ %load = call { <4 x i64>, i32 } @llvm.vp.load.ff.v4i64.p0(ptr %ptr, <4 x i1> splat (i1 true), i32 %evl)
ret { <4 x i64>, i32 } %load
}
-declare { <8 x i64>, i32 } @llvm.vp.load.ff.v8i64.p0(ptr, <8 x i1>, i32)
-
define { <8 x i64>, i32 } @vploadff_v8i64(ptr %ptr, <8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vploadff_v8i64:
; CHECK: # %bb.0:
@@ -314,14 +268,10 @@ define { <8 x i64>, i32 } @vploadff_v8i64_allones_mask(ptr %ptr, i32 zeroext %ev
; CHECK-NEXT: vle64ff.v v8, (a0)
; CHECK-NEXT: csrr a0, vl
; CHECK-NEXT: ret
- %a = insertelement <8 x i1> poison, i1 true, i32 0
- %b = shufflevector <8 x i1> %a, <8 x i1> poison, <8 x i32> zeroinitializer
- %load = call { <8 x i64>, i32 } @llvm.vp.load.ff.v8i64.p0(ptr %ptr, <8 x i1> %b, i32 %evl)
+ %load = call { <8 x i64>, i32 } @llvm.vp.load.ff.v8i64.p0(ptr %ptr, <8 x i1> splat (i1 true), i32 %evl)
ret { <8 x i64>, i32 } %load
}
-declare { <2 x half>, i32 } @llvm.vp.load.ff.v2f16.p0(ptr, <2 x i1>, i32)
-
define { <2 x half>, i32 } @vploadff_v2f16(ptr %ptr, <2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vploadff_v2f16:
; CHECK: # %bb.0:
@@ -340,14 +290,10 @@ define { <2 x half>, i32 } @vploadff_v2f16_allones_mask(ptr %ptr, i32 zeroext %e
; CHECK-NEXT: vle16ff.v v8, (a0)
; CHECK-NEXT: csrr a0, vl
; CHECK-NEXT: ret
- %a = insertelement <2 x i1> poison, i1 true, i32 0
- %b = shufflevector <2 x i1> %a, <2 x i1> poison, <2 x i32> zeroinitializer
- %load = call { <2 x half>, i32 } @llvm.vp.load.ff.v2f16.p0(ptr %ptr, <2 x i1> %b, i32 %evl)
+ %load = call { <2 x half>, i32 } @llvm.vp.load.ff.v2f16.p0(ptr %ptr, <2 x i1> splat (i1 true), i32 %evl)
ret { <2 x half>, i32 } %load
}
-declare { <4 x half>, i32 } @llvm.vp.load.ff.v4f16.p0(ptr, <4 x i1>, i32)
-
define { <4 x half>, i32 } @vploadff_v4f16(ptr %ptr, <4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vploadff_v4f16:
; CHECK: # %bb.0:
@@ -366,14 +312,10 @@ define { <4 x half>, i32 } @vploadff_v4f16_allones_mask(ptr %ptr, i32 zeroext %e
; CHECK-NEXT: vle16ff.v v8, (a0)
; CHECK-NEXT: csrr a0, vl
; CHECK-NEXT: ret
- %a = insertelement <4 x i1> poison, i1 true, i32 0
- %b = shufflevector <4 x i1> %a, <4 x i1> poison, <4 x i32> zeroinitializer
- %load = call { <4 x half>, i32 } @llvm.vp.load.ff.v4f16.p0(ptr %ptr, <4 x i1> %b, i32 %evl)
+ %load = call { <4 x half>, i32 } @llvm.vp.load.ff.v4f16.p0(ptr %ptr, <4 x i1> splat (i1 true), i32 %evl)
ret { <4 x half>, i32 } %load
}
-declare { <8 x half>, i32 } @llvm.vp.load.ff.v8f16.p0(ptr, <8 x i1>, i32)
-
define { <8 x half>, i32 } @vploadff_v8f16(ptr %ptr, <8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vploadff_v8f16:
; CHECK: # %bb.0:
@@ -392,14 +334,10 @@ define { <8 x half>, i32 } @vploadff_v8f16_allones_mask(ptr %ptr, i32 zeroext %e
; CHECK-NEXT: vle16ff.v v8, (a0)
; CHECK-NEXT: csrr a0, vl
; CHECK-NEXT: ret
- %a = insertelement <8 x i1> poison, i1 true, i32 0
- %b = shufflevector <8 x i1> %a, <8 x i1> poison, <8 x i32> zeroinitializer
- %load = call { <8 x half>, i32 } @llvm.vp.load.ff.v8f16.p0(ptr %ptr, <8 x i1> %b, i32 %evl)
+ %load = call { <8 x half>, i32 } @llvm.vp.load.ff.v8f16.p0(ptr %ptr, <8 x i1> splat (i1 true), i32 %evl)
ret { <8 x half>, i32 } %load
}
-declare { <2 x float>, i32 } @llvm.vp.load.ff.v2f32.p0(ptr, <2 x i1>, i32)
-
define { <2 x float>, i32 } @vploadff_v2f32(ptr %ptr, <2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vploadff_v2f32:
; CHECK: # %bb.0:
@@ -418,14 +356,10 @@ define { <2 x float>, i32 } @vploadff_v2f32_allones_mask(ptr %ptr, i32 zeroext %
; CHECK-NEXT: vle32ff.v v8, (a0)
; CHECK-NEXT: csrr a0, vl
; CHECK-NEXT: ret
- %a = insertelement <2 x i1> poison, i1 true, i32 0
- %b = shufflevector <2 x i1> %a, <2 x i1> poison, <2 x i32> zeroinitializer
- %load = call { <2 x float>, i32 } @llvm.vp.load.ff.v2f32.p0(ptr %ptr, <2 x i1> %b, i32 %evl)
+ %load = call { <2 x float>, i32 } @llvm.vp.load.ff.v2f32.p0(ptr %ptr, <2 x i1> splat (i1 true), i32 %evl)
ret { <2 x float>, i32 } %load
}
-declare { <4 x float>, i32 } @llvm.vp.load.ff.v4f32.p0(ptr, <4 x i1>, i32)
-
define { <4 x float>, i32 } @vploadff_v4f32(ptr %ptr, <4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vploadff_v4f32:
; CHECK: # %bb.0:
@@ -444,14 +378,10 @@ define { <4 x float>, i32 } @vploadff_v4f32_allones_mask(ptr %ptr, i32 zeroext %
; CHECK-NEXT: vle32ff.v v8, (a0)
; CHECK-NEXT: csrr a0, vl
; CHECK-NEXT: ret
- %a = insertelement <4 x i1> poison, i1 true, i32 0
- %b = shufflevector <4 x i1> %a, <4 x i1> poison, <4 x i32> zeroinitializer
- %load = call { <4 x float>, i32 } @llvm.vp.load.ff.v4f32.p0(ptr %ptr, <4 x i1> %b, i32 %evl)
+ %load = call { <4 x float>, i32 } @llvm.vp.load.ff.v4f32.p0(ptr %ptr, <4 x i1> splat (i1 true), i32 %evl)
ret { <4 x float>, i32 } %load
}
-declare { <8 x float>, i32 } @llvm.vp.load.ff.v8f32.p0(ptr, <8 x i1>, i32)
-
define { <8 x float>, i32 } @vploadff_v8f32(ptr %ptr, <8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vploadff_v8f32:
; CHECK: # %bb.0:
@@ -470,14 +400,10 @@ define { <8 x float>, i32 } @vploadff_v8f32_allones_mask(ptr %ptr, i32 zeroext %
; CHECK-NEXT: vle32ff.v v8, (a0)
; CHECK-NEXT: csrr a0, vl
; CHECK-NEXT: ret
- %a = insertelement <8 x i1> poison, i1 true, i32 0
- %b = shufflevector <8 x i1> %a, <8 x i1> poison, <8 x i32> zeroinitializer
- %load = call { <8 x float>, i32 } @llvm.vp.load.ff.v8f32.p0(ptr %ptr, <8 x i1> %b, i32 %evl)
+ %load = call { <8 x float>, i32 } @llvm.vp.load.ff.v8f32.p0(ptr %ptr, <8 x i1> splat (i1 true), i32 %evl)
ret { <8 x float>, i32 } %load
}
-declare { <2 x double>, i32 } @llvm.vp.load.ff.v2f64.p0(ptr, <2 x i1>, i32)
-
define { <2 x double>, i32 } @vploadff_v2f64(ptr %ptr, <2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vploadff_v2f64:
; CHECK: # %bb.0:
@@ -496,14 +422,10 @@ define { <2 x double>, i32 } @vploadff_v2f64_allones_mask(ptr %ptr, i32 zeroext
; CHECK-NEXT: vle64ff.v v8, (a0)
; CHECK-NEXT: csrr a0, vl
; CHECK-NEXT: ret
- %a = insertelement <2 x i1> poison, i1 true, i32 0
- %b = shufflevector <2 x i1> %a, <2 x i1> poison, <2 x i32> zeroinitializer
- %load = call { <2 x double>, i32 } @llvm.vp.load.ff.v2f64.p0(ptr %ptr, <2 x i1> %b, i32 %evl)
+ %load = call { <2 x double>, i32 } @llvm.vp.load.ff.v2f64.p0(ptr %ptr, <2 x i1> splat (i1 true), i32 %evl)
ret { <2 x double>, i32 } %load
}
-declare { <4 x double>, i32 } @llvm.vp.load.ff.v4f64.p0(ptr, <4 x i1>, i32)
-
define { <4 x double>, i32 } @vploadff_v4f64(ptr %ptr, <4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vploadff_v4f64:
; CHECK: # %bb.0:
@@ -522,14 +444,10 @@ define { <4 x double>, i32 } @vploadff_v4f64_allones_mask(ptr %ptr, i32 zeroext
; CHECK-NEXT: vle64ff.v v8, (a0)
; CHECK-NEXT: csrr a0, vl
; CHECK-NEXT: ret
- %a = insertelement <4 x i1> poison, i1 true, i32 0
- %b = shufflevector <4 x i1> %a, <4 x i1> poison, <4 x i32> zeroinitializer
- %load = call { <4 x double>, i32 } @llvm.vp.load.ff.v4f64.p0(ptr %ptr, <4 x i1> %b, i32 %evl)
+ %load = call { <4 x double>, i32 } @llvm.vp.load.ff.v4f64.p0(ptr %ptr, <4 x i1> splat (i1 true), i32 %evl)
ret { <4 x double>, i32 } %load
}
-declare { <8 x double>, i32 } @llvm.vp.load.ff.v8f64.p0(ptr, <8 x i1>, i32)
-
define { <8 x double>, i32 } @vploadff_v8f64(ptr %ptr, <8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vploadff_v8f64:
; CHECK: # %bb.0:
@@ -548,14 +466,10 @@ define { <8 x double>, i32 } @vploadff_v8f64_allones_mask(ptr %ptr, i32 zeroext
; CHECK-NEXT: vle64ff.v v8, (a0)
; CHECK-NEXT: csrr a0, vl
; CHECK-NEXT: ret
- %a = insertelement <8 x i1> poison, i1 true, i32 0
- %b = shufflevector <8 x i1> %a, <8 x i1> poison, <8 x i32> zeroinitializer
- %load = call { <8 x double>, i32 } @llvm.vp.load.ff.v8f64.p0(ptr %ptr, <8 x i1> %b, i32 %evl)
+ %load = call { <8 x double>, i32 } @llvm.vp.load.ff.v8f64.p0(ptr %ptr, <8 x i1> splat (i1 true), i32 %evl)
ret { <8 x double>, i32 } %load
}
-declare { <2 x bfloat>, i32 } @llvm.vp.load.ff.v2bf16.p0(ptr, <2 x i1>, i32)
-
define { <2 x bfloat>, i32 } @vploadff_v2bf16(ptr %ptr, <2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vploadff_v2bf16:
; CHECK: # %bb.0:
@@ -574,14 +488,10 @@ define { <2 x bfloat>, i32 } @vploadff_v2bf16_allones_mask(ptr %ptr, i32 zeroext
; CHECK-NEXT: vle16ff.v v8, (a0)
; CHECK-NEXT: csrr a0, vl
; CHECK-NEXT: ret
- %a = insertelement <2 x i1> poison, i1 true, i32 0
- %b = shufflevector <2 x i1> %a, <2 x i1> poison, <2 x i32> zeroinitializer
- %load = call { <2 x bfloat>, i32 } @llvm.vp.load.ff.v2bf16.p0(ptr %ptr, <2 x i1> %b, i32 %evl)
+ %load = call { <2 x bfloat>, i32 } @llvm.vp.load.ff.v2bf16.p0(ptr %ptr, <2 x i1> splat (i1 true), i32 %evl)
ret { <2 x bfloat>, i32 } %load
}
-declare { <4 x bfloat>, i32 } @llvm.vp.load.ff.v4bf16.p0(ptr, <4 x i1>, i32)
-
define { <4 x bfloat>, i32 } @vploadff_v4bf16(ptr %ptr, <4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vploadff_v4bf16:
; CHECK: # %bb.0:
@@ -600,14 +510,10 @@ define { <4 x bfloat>, i32 } @vploadff_v4bf16_allones_mask(ptr %ptr, i32 zeroext
; CHECK-NEXT: vle16ff.v v8, (a0)
; CHECK-NEXT: csrr a0, vl
; CHECK-NEXT: ret
- %a = insertelement <4 x i1> poison, i1 true, i32 0
- %b = shufflevector <4 x i1> %a, <4 x i1> poison, <4 x i32> zeroinitializer
- %load = call { <4 x bfloat>, i32 } @llvm.vp.load.ff.v4bf16.p0(ptr %ptr, <4 x i1> %b, i32 %evl)
+ %load = call { <4 x bfloat>, i32 } @llvm.vp.load.ff.v4bf16.p0(ptr %ptr, <4 x i1> splat (i1 true), i32 %evl)
ret { <4 x bfloat>, i32 } %load
}
-declare { <8 x bfloat>, i32 } @llvm.vp.load.ff.v8bf16.p0(ptr, <8 x i1>, i32)
-
define { <8 x bfloat>, i32 } @vploadff_v8bf16(ptr %ptr, <8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vploadff_v8bf16:
; CHECK: # %bb.0:
@@ -626,8 +532,6 @@ define { <8 x bfloat>, i32 } @vploadff_v8bf16_allones_mask(ptr %ptr, i32 zeroext
; CHECK-NEXT: vle16ff.v v8, (a0)
; CHECK-NEXT: csrr a0, vl
; CHECK-NEXT: ret
- %a = insertelement <8 x i1> poison, i1 true, i32 0
- %b = shufflevector <8 x i1> %a, <8 x i1> poison, <8 x i32> zeroinitializer
- %load = call { <8 x bfloat>, i32 } @llvm.vp.load.ff.v8bf16.p0(ptr %ptr, <8 x i1> %b, i32 %evl)
+ %load = call { <8 x bfloat>, i32 } @llvm.vp.load.ff.v8bf16.p0(ptr %ptr, <8 x i1> splat (i1 true), i32 %evl)
ret { <8 x bfloat>, i32 } %load
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vploadff.ll b/llvm/test/CodeGen/RISCV/rvv/vploadff.ll
index 11812eec6ac46..ae439fd0ce3eb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vploadff.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vploadff.ll
@@ -8,8 +8,6 @@
; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+zvfbfmin,+v \
; RUN: -verify-machineinstrs < %s | FileCheck %s
-declare { <vscale x 1 x i8>, i32 } @llvm.vp.load.ff.nxv1i8.p0(ptr, <vscale x 1 x i1>, i32)
-
define { <vscale x 1 x i8>, i32 } @vploadff_nxv1i8(ptr %ptr, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vploadff_nxv1i8:
; CHECK: # %bb.0:
@@ -28,14 +26,10 @@ define { <vscale x 1 x i8>, i32 } @vploadff_nxv1i8_allones_mask(ptr %ptr, i32 ze
; CHECK-NEXT: vle8ff.v v8, (a0)
; CHECK-NEXT: csrr a0, vl
; CHECK-NEXT: ret
- %a = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
- %b = shufflevector <vscale x 1 x i1> %a, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
- %load = call { <vscale x 1 x i8>, i32 } @llvm.vp.load.ff.nxv1i8.p0(ptr %ptr, <vscale x 1 x i1> %b, i32 %evl)
+ %load = call { <vscale x 1 x i8>, i32 } @llvm.vp.load.ff.nxv1i8.p0(ptr %ptr, <vscale x 1 x i1> splat (i1 true), i32 %evl)
ret { <vscale x 1 x i8>, i32 } %load
}
-declare { <vscale x 2 x i8>, i32 } @llvm.vp.load.ff.nxv2i8.p0(ptr, <vscale x 2 x i1>, i32)
-
define { <vscale x 2 x i8>, i32 } @vploadff_nxv2i8(ptr %ptr, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vploadff_nxv2i8:
; CHECK: # %bb.0:
@@ -54,14 +48,10 @@ define { <vscale x 2 x i8>, i32 } @vploadff_nxv2i8_allones_mask(ptr %ptr, i32 ze
; CHECK-NEXT: vle8ff.v v8, (a0)
; CHECK-NEXT: csrr a0, vl
; CHECK-NEXT: ret
- %a = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
- %b = shufflevector <vscale x 2 x i1> %a, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
- %load = call { <vscale x 2 x i8>, i32 } @llvm.vp.load.ff.nxv2i8.p0(ptr %ptr, <vscale x 2 x i1> %b, i32 %evl)
+ %load = call { <vscale x 2 x i8>, i32 } @llvm.vp.load.ff.nxv2i8.p0(ptr %ptr, <vscale x 2 x i1> splat (i1 true), i32 %evl)
ret { <vscale x 2 x i8>, i32 } %load
}
-declare { <vscale x 4 x i8>, i32 } @llvm.vp.load.ff.nxv4i8.p0(ptr, <vscale x 4 x i1>, i32)
-
define { <vscale x 4 x i8>, i32 } @vploadff_nxv4i8(ptr %ptr, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vploadff_nxv4i8:
; CHECK: # %bb.0:
@@ -80,14 +70,10 @@ define { <vscale x 4 x i8>, i32 } @vploadff_nxv4i8_allones_mask(ptr %ptr, i32 ze
; CHECK-NEXT: vle8ff.v v8, (a0)
; CHECK-NEXT: csrr a0, vl
; CHECK-NEXT: ret
- %a = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
- %b = shufflevector <vscale x 4 x i1> %a, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
- %load = call { <vscale x 4 x i8>, i32 } @llvm.vp.load.ff.nxv4i8.p0(ptr %ptr, <vscale x 4 x i1> %b, i32 %evl)
+ %load = call { <vscale x 4 x i8>, i32 } @llvm.vp.load.ff.nxv4i8.p0(ptr %ptr, <vscale x 4 x i1> splat (i1 true), i32 %evl)
ret { <vscale x 4 x i8>, i32 } %load
}
-declare { <vscale x 8 x i8>, i32 } @llvm.vp.load.ff.nxv8i8.p0(ptr, <vscale x 8 x i1>, i32)
-
define { <vscale x 8 x i8>, i32 } @vploadff_nxv8i8(ptr %ptr, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vploadff_nxv8i8:
; CHECK: # %bb.0:
@@ -106,14 +92,10 @@ define { <vscale x 8 x i8>, i32 } @vploadff_nxv8i8_allones_mask(ptr %ptr, i32 ze
; CHECK-NEXT: vle8ff.v v8, (a0)
; CHECK-NEXT: csrr a0, vl
; CHECK-NEXT: ret
- %a = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
- %b = shufflevector <vscale x 8 x i1> %a, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
- %load = call { <vscale x 8 x i8>, i32 } @llvm.vp.load.ff.nxv8i8.p0(ptr %ptr, <vscale x 8 x i1> %b, i32 %evl)
+ %load = call { <vscale x 8 x i8>, i32 } @llvm.vp.load.ff.nxv8i8.p0(ptr %ptr, <vscale x 8 x i1> splat (i1 true), i32 %evl)
ret { <vscale x 8 x i8>, i32 } %load
}
-declare { <vscale x 16 x i8>, i32 } @llvm.vp.load.ff.nxv16i8.p0(ptr, <vscale x 16 x i1>, i32)
-
define { <vscale x 16 x i8>, i32 } @vploadff_nxv16i8(ptr %ptr, <vscale x 16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vploadff_nxv16i8:
; CHECK: # %bb.0:
@@ -132,14 +114,10 @@ define { <vscale x 16 x i8>, i32 } @vploadff_nxv16i8_allones_mask(ptr %ptr, i32
; CHECK-NEXT: vle8ff.v v8, (a0)
; CHECK-NEXT: csrr a0, vl
; CHECK-NEXT: ret
- %a = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
- %b = shufflevector <vscale x 16 x i1> %a, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
- %load = call { <vscale x 16 x i8>, i32 } @llvm.vp.load.ff.nxv16i8.p0(ptr %ptr, <vscale x 16 x i1> %b, i32 %evl)
+ %load = call { <vscale x 16 x i8>, i32 } @llvm.vp.load.ff.nxv16i8.p0(ptr %ptr, <vscale x 16 x i1> splat (i1 true), i32 %evl)
ret { <vscale x 16 x i8>, i32 } %load
}
-declare { <vscale x 32 x i8>, i32 } @llvm.vp.load.ff.nxv32i8.p0(ptr, <vscale x 32 x i1>, i32)
-
define { <vscale x 32 x i8>, i32 } @vploadff_nxv32i8(ptr %ptr, <vscale x 32 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vploadff_nxv32i8:
; CHECK: # %bb.0:
@@ -158,14 +136,10 @@ define { <vscale x 32 x i8>, i32 } @vploadff_nxv32i8_allones_mask(ptr %ptr, i32
; CHECK-NEXT: vle8ff.v v8, (a0)
; CHECK-NEXT: csrr a0, vl
; CHECK-NEXT: ret
- %a = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
- %b = shufflevector <vscale x 32 x i1> %a, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
- %load = call { <vscale x 32 x i8>, i32 } @llvm.vp.load.ff.nxv32i8.p0(ptr %ptr, <vscale x 32 x i1> %b, i32 %evl)
+ %load = call { <vscale x 32 x i8>, i32 } @llvm.vp.load.ff.nxv32i8.p0(ptr %ptr, <vscale x 32 x i1> splat (i1 true), i32 %evl)
ret { <vscale x 32 x i8>, i32 } %load
}
-declare { <vscale x 64 x i8>, i32 } @llvm.vp.load.ff.nxv64i8.p0(ptr, <vscale x 64 x i1>, i32)
-
define { <vscale x 64 x i8>, i32 } @vploadff_nxv64i8(ptr %ptr, <vscale x 64 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vploadff_nxv64i8:
; CHECK: # %bb.0:
@@ -184,14 +158,10 @@ define { <vscale x 64 x i8>, i32 } @vploadff_nxv64i8_allones_mask(ptr %ptr, i32
; CHECK-NEXT: vle8ff.v v8, (a0)
; CHECK-NEXT: csrr a0, vl
; CHECK-NEXT: ret
- %a = insertelement <vscale x 64 x i1> poison, i1 true, i32 0
- %b = shufflevector <vscale x 64 x i1> %a, <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer
- %load = call { <vscale x 64 x i8>, i32 } @llvm.vp.load.ff.nxv64i8.p0(ptr %ptr, <vscale x 64 x i1> %b, i32 %evl)
+ %load = call { <vscale x 64 x i8>, i32 } @llvm.vp.load.ff.nxv64i8.p0(ptr %ptr, <vscale x 64 x i1> splat (i1 true), i32 %evl)
ret { <vscale x 64 x i8>, i32 } %load
}
-declare { <vscale x 1 x i16>, i32 } @llvm.vp.load.ff.nxv1i16.p0(ptr, <vscale x 1 x i1>, i32)
-
define { <vscale x 1 x i16>, i32 } @vploadff_nxv1i16(ptr %ptr, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vploadff_nxv1i16:
; CHECK: # %bb.0:
@@ -210,14 +180,10 @@ define { <vscale x 1 x i16>, i32 } @vploadff_nxv1i16_allones_mask(ptr %ptr, i32
; CHECK-NEXT: vle16ff.v v8, (a0)
; CHECK-NEXT: csrr a0, vl
; CHECK-NEXT: ret
- %a = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
- %b = shufflevector <vscale x 1 x i1> %a, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
- %load = call { <vscale x 1 x i16>, i32 } @llvm.vp.load.ff.nxv1i16.p0(ptr %ptr, <vscale x 1 x i1> %b, i32 %evl)
+ %load = call { <vscale x 1 x i16>, i32 } @llvm.vp.load.ff.nxv1i16.p0(ptr %ptr, <vscale x 1 x i1> splat (i1 true), i32 %evl)
ret { <vscale x 1 x i16>, i32 } %load
}
-declare { <vscale x 2 x i16>, i32 } @llvm.vp.load.ff.nxv2i16.p0(ptr, <vscale x 2 x i1>, i32)
-
define { <vscale x 2 x i16>, i32 } @vploadff_nxv2i16(ptr %ptr, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vploadff_nxv2i16:
; CHECK: # %bb.0:
@@ -236,14 +202,10 @@ define { <vscale x 2 x i16>, i32 } @vploadff_nxv2i16_allones_mask(ptr %ptr, i32
; CHECK-NEXT: vle16ff.v v8, (a0)
; CHECK-NEXT: csrr a0, vl
; CHECK-NEXT: ret
- %a = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
- %b = shufflevector <vscale x 2 x i1> %a, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
- %load = call { <vscale x 2 x i16>, i32 } @llvm.vp.load.ff.nxv2i16.p0(ptr %ptr, <vscale x 2 x i1> %b, i32 %evl)
+ %load = call { <vscale x 2 x i16>, i32 } @llvm.vp.load.ff.nxv2i16.p0(ptr %ptr, <vscale x 2 x i1> splat (i1 true), i32 %evl)
ret { <vscale x 2 x i16>, i32 } %load
}
-declare { <vscale x 4 x i16>, i32 } @llvm.vp.load.ff.nxv4i16.p0(ptr, <vscale x 4 x i1>, i32)
-
define { <vscale x 4 x i16>, i32 } @vploadff_nxv4i16(ptr %ptr, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vploadff_nxv4i16:
; CHECK: # %bb.0:
@@ -262,14 +224,10 @@ define { <vscale x 4 x i16>, i32 } @vploadff_nxv4i16_allones_mask(ptr %ptr, i32
; CHECK-NEXT: vle16ff.v v8, (a0)
; CHECK-NEXT: csrr a0, vl
; CHECK-NEXT: ret
- %a = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
- %b = shufflevector <vscale x 4 x i1> %a, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
- %load = call { <vscale x 4 x i16>, i32 } @llvm.vp.load.ff.nxv4i16.p0(ptr %ptr, <vscale x 4 x i1> %b, i32 %evl)
+ %load = call { <vscale x 4 x i16>, i32 } @llvm.vp.load.ff.nxv4i16.p0(ptr %ptr, <vscale x 4 x i1> splat (i1 true), i32 %evl)
ret { <vscale x 4 x i16>, i32 } %load
}
-declare { <vscale x 8 x i16>, i32 } @llvm.vp.load.ff.nxv8i16.p0(ptr, <vscale x 8 x i1>, i32)
-
define { <vscale x 8 x i16>, i32 } @vploadff_nxv8i16(ptr %ptr, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vploadff_nxv8i16:
; CHECK: # %bb.0:
@@ -288,14 +246,10 @@ define { <vscale x 8 x i16>, i32 } @vploadff_nxv8i16_allones_mask(ptr %ptr, i32
; CHECK-NEXT: vle16ff.v v8, (a0)
; CHECK-NEXT: csrr a0, vl
; CHECK-NEXT: ret
- %a = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
- %b = shufflevector <vscale x 8 x i1> %a, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
- %load = call { <vscale x 8 x i16>, i32 } @llvm.vp.load.ff.nxv8i16.p0(ptr %ptr, <vscale x 8 x i1> %b, i32 %evl)
+ %load = call { <vscale x 8 x i16>, i32 } @llvm.vp.load.ff.nxv8i16.p0(ptr %ptr, <vscale x 8 x i1> splat (i1 true), i32 %evl)
ret { <vscale x 8 x i16>, i32 } %load
}
-declare { <vscale x 16 x i16>, i32 } @llvm.vp.load.ff.nxv16i16.p0(ptr, <vscale x 16 x i1>, i32)
-
define { <vscale x 16 x i16>, i32 } @vploadff_nxv16i16(ptr %ptr, <vscale x 16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vploadff_nxv16i16:
; CHECK: # %bb.0:
@@ -314,14 +268,10 @@ define { <vscale x 16 x i16>, i32 } @vploadff_nxv16i16_allones_mask(ptr %ptr, i3
; CHECK-NEXT: vle16ff.v v8, (a0)
; CHECK-NEXT: csrr a0, vl
; CHECK-NEXT: ret
- %a = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
- %b = shufflevector <vscale x 16 x i1> %a, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
- %load = call { <vscale x 16 x i16>, i32 } @llvm.vp.load.ff.nxv16i16.p0(ptr %ptr, <vscale x 16 x i1> %b, i32 %evl)
+ %load = call { <vscale x 16 x i16>, i32 } @llvm.vp.load.ff.nxv16i16.p0(ptr %ptr, <vscale x 16 x i1> splat (i1 true), i32 %evl)
ret { <vscale x 16 x i16>, i32 } %load
}
-declare { <vscale x 32 x i16>, i32 } @llvm.vp.load.ff.nxv32i16.p0(ptr, <vscale x 32 x i1>, i32)
-
define { <vscale x 32 x i16>, i32 } @vploadff_nxv32i16(ptr %ptr, <vscale x 32 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vploadff_nxv32i16:
; CHECK: # %bb.0:
@@ -340,14 +290,10 @@ define { <vscale x 32 x i16>, i32 } @vploadff_nxv32i16_allones_mask(ptr %ptr, i3
; CHECK-NEXT: vle16ff.v v8, (a0)
; CHECK-NEXT: csrr a0, vl
; CHECK-NEXT: ret
- %a = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
- %b = shufflevector <vscale x 32 x i1> %a, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
- %load = call { <vscale x 32 x i16>, i32 } @llvm.vp.load.ff.nxv32i16.p0(ptr %ptr, <vscale x 32 x i1> %b, i32 %evl)
+ %load = call { <vscale x 32 x i16>, i32 } @llvm.vp.load.ff.nxv32i16.p0(ptr %ptr, <vscale x 32 x i1> splat (i1 true), i32 %evl)
ret { <vscale x 32 x i16>, i32 } %load
}
-declare { <vscale x 1 x i32>, i32 } @llvm.vp.load.ff.nxv1i32.p0(ptr, <vscale x 1 x i1>, i32)
-
define { <vscale x 1 x i32>, i32 } @vploadff_nxv1i32(ptr %ptr, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vploadff_nxv1i32:
; CHECK: # %bb.0:
@@ -366,14 +312,10 @@ define { <vscale x 1 x i32>, i32 } @vploadff_nxv1i32_allones_mask(ptr %ptr, i32
; CHECK-NEXT: vle32ff.v v8, (a0)
; CHECK-NEXT: csrr a0, vl
; CHECK-NEXT: ret
- %a = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
- %b = shufflevector <vscale x 1 x i1> %a, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
- %load = call { <vscale x 1 x i32>, i32 } @llvm.vp.load.ff.nxv1i32.p0(ptr %ptr, <vscale x 1 x i1> %b, i32 %evl)
+ %load = call { <vscale x 1 x i32>, i32 } @llvm.vp.load.ff.nxv1i32.p0(ptr %ptr, <vscale x 1 x i1> splat (i1 true), i32 %evl)
ret { <vscale x 1 x i32>, i32 } %load
}
-declare { <vscale x 2 x i32>, i32 } @llvm.vp.load.ff.nxv2i32.p0(ptr, <vscale x 2 x i1>, i32)
-
define { <vscale x 2 x i32>, i32 } @vploadff_nxv2i32(ptr %ptr, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vploadff_nxv2i32:
; CHECK: # %bb.0:
@@ -392,14 +334,10 @@ define { <vscale x 2 x i32>, i32 } @vploadff_nxv2i32_allones_mask(ptr %ptr, i32
; CHECK-NEXT: vle32ff.v v8, (a0)
; CHECK-NEXT: csrr a0, vl
; CHECK-NEXT: ret
- %a = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
- %b = shufflevector <vscale x 2 x i1> %a, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
- %load = call { <vscale x 2 x i32>, i32 } @llvm.vp.load.ff.nxv2i32.p0(ptr %ptr, <vscale x 2 x i1> %b, i32 %evl)
+ %load = call { <vscale x 2 x i32>, i32 } @llvm.vp.load.ff.nxv2i32.p0(ptr %ptr, <vscale x 2 x i1> splat (i1 true), i32 %evl)
ret { <vscale x 2 x i32>, i32 } %load
}
-declare { <vscale x 4 x i32>, i32 } @llvm.vp.load.ff.nxv4i32.p0(ptr, <vscale x 4 x i1>, i32)
-
define { <vscale x 4 x i32>, i32 } @vploadff_nxv4i32(ptr %ptr, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vploadff_nxv4i32:
; CHECK: # %bb.0:
@@ -418,14 +356,10 @@ define { <vscale x 4 x i32>, i32 } @vploadff_nxv4i32_allones_mask(ptr %ptr, i32
; CHECK-NEXT: vle32ff.v v8, (a0)
; CHECK-NEXT: csrr a0, vl
; CHECK-NEXT: ret
- %a = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
- %b = shufflevector <vscale x 4 x i1> %a, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
- %load = call { <vscale x 4 x i32>, i32 } @llvm.vp.load.ff.nxv4i32.p0(ptr %ptr, <vscale x 4 x i1> %b, i32 %evl)
+ %load = call { <vscale x 4 x i32>, i32 } @llvm.vp.load.ff.nxv4i32.p0(ptr %ptr, <vscale x 4 x i1> splat (i1 true), i32 %evl)
ret { <vscale x 4 x i32>, i32 } %load
}
-declare { <vscale x 8 x i32>, i32 } @llvm.vp.load.ff.nxv8i32.p0(ptr, <vscale x 8 x i1>, i32)
-
define { <vscale x 8 x i32>, i32 } @vploadff_nxv8i32(ptr %ptr, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vploadff_nxv8i32:
; CHECK: # %bb.0:
@@ -444,14 +378,10 @@ define { <vscale x 8 x i32>, i32 } @vploadff_nxv8i32_allones_mask(ptr %ptr, i32
; CHECK-NEXT: vle32ff.v v8, (a0)
; CHECK-NEXT: csrr a0, vl
; CHECK-NEXT: ret
- %a = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
- %b = shufflevector <vscale x 8 x i1> %a, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
- %load = call { <vscale x 8 x i32>, i32 } @llvm.vp.load.ff.nxv8i32.p0(ptr %ptr, <vscale x 8 x i1> %b, i32 %evl)
+ %load = call { <vscale x 8 x i32>, i32 } @llvm.vp.load.ff.nxv8i32.p0(ptr %ptr, <vscale x 8 x i1> splat (i1 true), i32 %evl)
ret { <vscale x 8 x i32>, i32 } %load
}
-declare { <vscale x 16 x i32>, i32 } @llvm.vp.load.ff.nxv16i32.p0(ptr, <vscale x 16 x i1>, i32)
-
define { <vscale x 16 x i32>, i32 } @vploadff_nxv16i32(ptr %ptr, <vscale x 16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vploadff_nxv16i32:
; CHECK: # %bb.0:
@@ -470,14 +400,10 @@ define { <vscale x 16 x i32>, i32 } @vploadff_nxv16i32_allones_mask(ptr %ptr, i3
; CHECK-NEXT: vle32ff.v v8, (a0)
; CHECK-NEXT: csrr a0, vl
; CHECK-NEXT: ret
- %a = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
- %b = shufflevector <vscale x 16 x i1> %a, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
- %load = call { <vscale x 16 x i32>, i32 } @llvm.vp.load.ff.nxv16i32.p0(ptr %ptr, <vscale x 16 x i1> %b, i32 %evl)
+ %load = call { <vscale x 16 x i32>, i32 } @llvm.vp.load.ff.nxv16i32.p0(ptr %ptr, <vscale x 16 x i1> splat (i1 true), i32 %evl)
ret { <vscale x 16 x i32>, i32 } %load
}
-declare { <vscale x 1 x i64>, i32 } @llvm.vp.load.ff.nxv1i64.p0(ptr, <vscale x 1 x i1>, i32)
-
define { <vscale x 1 x i64>, i32 } @vploadff_nxv1i64(ptr %ptr, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vploadff_nxv1i64:
; CHECK: # %bb.0:
@@ -496,14 +422,10 @@ define { <vscale x 1 x i64>, i32 } @vploadff_nxv1i64_allones_mask(ptr %ptr, i32
; CHECK-NEXT: vle64ff.v v8, (a0)
; CHECK-NEXT: csrr a0, vl
; CHECK-NEXT: ret
- %a = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
- %b = shufflevector <vscale x 1 x i1> %a, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
- %load = call { <vscale x 1 x i64>, i32 } @llvm.vp.load.ff.nxv1i64.p0(ptr %ptr, <vscale x 1 x i1> %b, i32 %evl)
+ %load = call { <vscale x 1 x i64>, i32 } @llvm.vp.load.ff.nxv1i64.p0(ptr %ptr, <vscale x 1 x i1> splat (i1 true), i32 %evl)
ret { <vscale x 1 x i64>, i32 } %load
}
-declare { <vscale x 2 x i64>, i32 } @llvm.vp.load.ff.nxv2i64.p0(ptr, <vscale x 2 x i1>, i32)
-
define { <vscale x 2 x i64>, i32 } @vploadff_nxv2i64(ptr %ptr, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vploadff_nxv2i64:
; CHECK: # %bb.0:
@@ -522,14 +444,10 @@ define { <vscale x 2 x i64>, i32 } @vploadff_nxv2i64_allones_mask(ptr %ptr, i32
; CHECK-NEXT: vle64ff.v v8, (a0)
; CHECK-NEXT: csrr a0, vl
; CHECK-NEXT: ret
- %a = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
- %b = shufflevector <vscale x 2 x i1> %a, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
- %load = call { <vscale x 2 x i64>, i32 } @llvm.vp.load.ff.nxv2i64.p0(ptr %ptr, <vscale x 2 x i1> %b, i32 %evl)
+ %load = call { <vscale x 2 x i64>, i32 } @llvm.vp.load.ff.nxv2i64.p0(ptr %ptr, <vscale x 2 x i1> splat (i1 true), i32 %evl)
ret { <vscale x 2 x i64>, i32 } %load
}
-declare { <vscale x 4 x i64>, i32 } @llvm.vp.load.ff.nxv4i64.p0(ptr, <vscale x 4 x i1>, i32)
-
define { <vscale x 4 x i64>, i32 } @vploadff_nxv4i64(ptr %ptr, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vploadff_nxv4i64:
; CHECK: # %bb.0:
@@ -548,14 +466,10 @@ define { <vscale x 4 x i64>, i32 } @vploadff_nxv4i64_allones_mask(ptr %ptr, i32
; CHECK-NEXT: vle64ff.v v8, (a0)
; CHECK-NEXT: csrr a0, vl
; CHECK-NEXT: ret
- %a = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
- %b = shufflevector <vscale x 4 x i1> %a, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
- %load = call { <vscale x 4 x i64>, i32 } @llvm.vp.load.ff.nxv4i64.p0(ptr %ptr, <vscale x 4 x i1> %b, i32 %evl)
+ %load = call { <vscale x 4 x i64>, i32 } @llvm.vp.load.ff.nxv4i64.p0(ptr %ptr, <vscale x 4 x i1> splat (i1 true), i32 %evl)
ret { <vscale x 4 x i64>, i32 } %load
}
-declare { <vscale x 8 x i64>, i32 } @llvm.vp.load.ff.nxv8i64.p0(ptr, <vscale x 8 x i1>, i32)
-
define { <vscale x 8 x i64>, i32 } @vploadff_nxv8i64(ptr %ptr, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vploadff_nxv8i64:
; CHECK: # %bb.0:
@@ -574,14 +488,10 @@ define { <vscale x 8 x i64>, i32 } @vploadff_nxv8i64_allones_mask(ptr %ptr, i32
; CHECK-NEXT: vle64ff.v v8, (a0)
; CHECK-NEXT: csrr a0, vl
; CHECK-NEXT: ret
- %a = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
- %b = shufflevector <vscale x 8 x i1> %a, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
- %load = call { <vscale x 8 x i64>, i32 } @llvm.vp.load.ff.nxv8i64.p0(ptr %ptr, <vscale x 8 x i1> %b, i32 %evl)
+ %load = call { <vscale x 8 x i64>, i32 } @llvm.vp.load.ff.nxv8i64.p0(ptr %ptr, <vscale x 8 x i1> splat (i1 true), i32 %evl)
ret { <vscale x 8 x i64>, i32 } %load
}
-declare { <vscale x 1 x half>, i32 } @llvm.vp.load.ff.nxv1f16.p0(ptr, <vscale x 1 x i1>, i32)
-
define { <vscale x 1 x half>, i32 } @vploadff_nxv1f16(ptr %ptr, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vploadff_nxv1f16:
; CHECK: # %bb.0:
@@ -600,14 +510,10 @@ define { <vscale x 1 x half>, i32 } @vploadff_nxv1f16_allones_mask(ptr %ptr, i32
; CHECK-NEXT: vle16ff.v v8, (a0)
; CHECK-NEXT: csrr a0, vl
; CHECK-NEXT: ret
- %a = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
- %b = shufflevector <vscale x 1 x i1> %a, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
- %load = call { <vscale x 1 x half>, i32 } @llvm.vp.load.ff.nxv1f16.p0(ptr %ptr, <vscale x 1 x i1> %b, i32 %evl)
+ %load = call { <vscale x 1 x half>, i32 } @llvm.vp.load.ff.nxv1f16.p0(ptr %ptr, <vscale x 1 x i1> splat (i1 true), i32 %evl)
ret { <vscale x 1 x half>, i32 } %load
}
-declare { <vscale x 2 x half>, i32 } @llvm.vp.load.ff.nxv2f16.p0(ptr, <vscale x 2 x i1>, i32)
-
define { <vscale x 2 x half>, i32 } @vploadff_nxv2f16(ptr %ptr, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vploadff_nxv2f16:
; CHECK: # %bb.0:
@@ -626,14 +532,10 @@ define { <vscale x 2 x half>, i32 } @vploadff_nxv2f16_allones_mask(ptr %ptr, i32
; CHECK-NEXT: vle16ff.v v8, (a0)
; CHECK-NEXT: csrr a0, vl
; CHECK-NEXT: ret
- %a = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
- %b = shufflevector <vscale x 2 x i1> %a, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
- %load = call { <vscale x 2 x half>, i32 } @llvm.vp.load.ff.nxv2f16.p0(ptr %ptr, <vscale x 2 x i1> %b, i32 %evl)
+ %load = call { <vscale x 2 x half>, i32 } @llvm.vp.load.ff.nxv2f16.p0(ptr %ptr, <vscale x 2 x i1> splat (i1 true), i32 %evl)
ret { <vscale x 2 x half>, i32 } %load
}
-declare { <vscale x 4 x half>, i32 } @llvm.vp.load.ff.nxv4f16.p0(ptr, <vscale x 4 x i1>, i32)
-
define { <vscale x 4 x half>, i32 } @vploadff_nxv4f16(ptr %ptr, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vploadff_nxv4f16:
; CHECK: # %bb.0:
@@ -652,14 +554,10 @@ define { <vscale x 4 x half>, i32 } @vploadff_nxv4f16_allones_mask(ptr %ptr, i32
; CHECK-NEXT: vle16ff.v v8, (a0)
; CHECK-NEXT: csrr a0, vl
; CHECK-NEXT: ret
- %a = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
- %b = shufflevector <vscale x 4 x i1> %a, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
- %load = call { <vscale x 4 x half>, i32 } @llvm.vp.load.ff.nxv4f16.p0(ptr %ptr, <vscale x 4 x i1> %b, i32 %evl)
+ %load = call { <vscale x 4 x half>, i32 } @llvm.vp.load.ff.nxv4f16.p0(ptr %ptr, <vscale x 4 x i1> splat (i1 true), i32 %evl)
ret { <vscale x 4 x half>, i32 } %load
}
-declare { <vscale x 8 x half>, i32 } @llvm.vp.load.ff.nxv8f16.p0(ptr, <vscale x 8 x i1>, i32)
-
define { <vscale x 8 x half>, i32 } @vploadff_nxv8f16(ptr %ptr, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vploadff_nxv8f16:
; CHECK: # %bb.0:
@@ -678,14 +576,10 @@ define { <vscale x 8 x half>, i32 } @vploadff_nxv8f16_allones_mask(ptr %ptr, i32
; CHECK-NEXT: vle16ff.v v8, (a0)
; CHECK-NEXT: csrr a0, vl
; CHECK-NEXT: ret
- %a = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
- %b = shufflevector <vscale x 8 x i1> %a, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
- %load = call { <vscale x 8 x half>, i32 } @llvm.vp.load.ff.nxv8f16.p0(ptr %ptr, <vscale x 8 x i1> %b, i32 %evl)
+ %load = call { <vscale x 8 x half>, i32 } @llvm.vp.load.ff.nxv8f16.p0(ptr %ptr, <vscale x 8 x i1> splat (i1 true), i32 %evl)
ret { <vscale x 8 x half>, i32 } %load
}
-declare { <vscale x 16 x half>, i32 } @llvm.vp.load.ff.nxv16f16.p0(ptr, <vscale x 16 x i1>, i32)
-
define { <vscale x 16 x half>, i32 } @vploadff_nxv16f16(ptr %ptr, <vscale x 16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vploadff_nxv16f16:
; CHECK: # %bb.0:
@@ -704,14 +598,10 @@ define { <vscale x 16 x half>, i32 } @vploadff_nxv16f16_allones_mask(ptr %ptr, i
; CHECK-NEXT: vle16ff.v v8, (a0)
; CHECK-NEXT: csrr a0, vl
; CHECK-NEXT: ret
- %a = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
- %b = shufflevector <vscale x 16 x i1> %a, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
- %load = call { <vscale x 16 x half>, i32 } @llvm.vp.load.ff.nxv16f16.p0(ptr %ptr, <vscale x 16 x i1> %b, i32 %evl)
+ %load = call { <vscale x 16 x half>, i32 } @llvm.vp.load.ff.nxv16f16.p0(ptr %ptr, <vscale x 16 x i1> splat (i1 true), i32 %evl)
ret { <vscale x 16 x half>, i32 } %load
}
-declare { <vscale x 32 x half>, i32 } @llvm.vp.load.ff.nxv32f16.p0(ptr, <vscale x 32 x i1>, i32)
-
define { <vscale x 32 x half>, i32 } @vploadff_nxv32f16(ptr %ptr, <vscale x 32 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vploadff_nxv32f16:
; CHECK: # %bb.0:
@@ -730,14 +620,10 @@ define { <vscale x 32 x half>, i32 } @vploadff_nxv32f16_allones_mask(ptr %ptr, i
; CHECK-NEXT: vle16ff.v v8, (a0)
; CHECK-NEXT: csrr a0, vl
; CHECK-NEXT: ret
- %a = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
- %b = shufflevector <vscale x 32 x i1> %a, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
- %load = call { <vscale x 32 x half>, i32 } @llvm.vp.load.ff.nxv32f16.p0(ptr %ptr, <vscale x 32 x i1> %b, i32 %evl)
+ %load = call { <vscale x 32 x half>, i32 } @llvm.vp.load.ff.nxv32f16.p0(ptr %ptr, <vscale x 32 x i1> splat (i1 true), i32 %evl)
ret { <vscale x 32 x half>, i32 } %load
}
-declare { <vscale x 1 x float>, i32 } @llvm.vp.load.ff.nxv1f32.p0(ptr, <vscale x 1 x i1>, i32)
-
define { <vscale x 1 x float>, i32 } @vploadff_nxv1f32(ptr %ptr, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vploadff_nxv1f32:
; CHECK: # %bb.0:
@@ -756,14 +642,10 @@ define { <vscale x 1 x float>, i32 } @vploadff_nxv1f32_allones_mask(ptr %ptr, i3
; CHECK-NEXT: vle32ff.v v8, (a0)
; CHECK-NEXT: csrr a0, vl
; CHECK-NEXT: ret
- %a = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
- %b = shufflevector <vscale x 1 x i1> %a, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
- %load = call { <vscale x 1 x float>, i32 } @llvm.vp.load.ff.nxv1f32.p0(ptr %ptr, <vscale x 1 x i1> %b, i32 %evl)
+ %load = call { <vscale x 1 x float>, i32 } @llvm.vp.load.ff.nxv1f32.p0(ptr %ptr, <vscale x 1 x i1> splat (i1 true), i32 %evl)
ret { <vscale x 1 x float>, i32 } %load
}
-declare { <vscale x 2 x float>, i32 } @llvm.vp.load.ff.nxv2f32.p0(ptr, <vscale x 2 x i1>, i32)
-
define { <vscale x 2 x float>, i32 } @vploadff_nxv2f32(ptr %ptr, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vploadff_nxv2f32:
; CHECK: # %bb.0:
@@ -782,14 +664,10 @@ define { <vscale x 2 x float>, i32 } @vploadff_nxv2f32_allones_mask(ptr %ptr, i3
; CHECK-NEXT: vle32ff.v v8, (a0)
; CHECK-NEXT: csrr a0, vl
; CHECK-NEXT: ret
- %a = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
- %b = shufflevector <vscale x 2 x i1> %a, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
- %load = call { <vscale x 2 x float>, i32 } @llvm.vp.load.ff.nxv2f32.p0(ptr %ptr, <vscale x 2 x i1> %b, i32 %evl)
+ %load = call { <vscale x 2 x float>, i32 } @llvm.vp.load.ff.nxv2f32.p0(ptr %ptr, <vscale x 2 x i1> splat (i1 true), i32 %evl)
ret { <vscale x 2 x float>, i32 } %load
}
-declare { <vscale x 4 x float>, i32 } @llvm.vp.load.ff.nxv4f32.p0(ptr, <vscale x 4 x i1>, i32)
-
define { <vscale x 4 x float>, i32 } @vploadff_nxv4f32(ptr %ptr, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vploadff_nxv4f32:
; CHECK: # %bb.0:
@@ -808,14 +686,10 @@ define { <vscale x 4 x float>, i32 } @vploadff_nxv4f32_allones_mask(ptr %ptr, i3
; CHECK-NEXT: vle32ff.v v8, (a0)
; CHECK-NEXT: csrr a0, vl
; CHECK-NEXT: ret
- %a = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
- %b = shufflevector <vscale x 4 x i1> %a, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
- %load = call { <vscale x 4 x float>, i32 } @llvm.vp.load.ff.nxv4f32.p0(ptr %ptr, <vscale x 4 x i1> %b, i32 %evl)
+ %load = call { <vscale x 4 x float>, i32 } @llvm.vp.load.ff.nxv4f32.p0(ptr %ptr, <vscale x 4 x i1> splat (i1 true), i32 %evl)
ret { <vscale x 4 x float>, i32 } %load
}
-declare { <vscale x 8 x float>, i32 } @llvm.vp.load.ff.nxv8f32.p0(ptr, <vscale x 8 x i1>, i32)
-
define { <vscale x 8 x float>, i32 } @vploadff_nxv8f32(ptr %ptr, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vploadff_nxv8f32:
; CHECK: # %bb.0:
@@ -834,14 +708,10 @@ define { <vscale x 8 x float>, i32 } @vploadff_nxv8f32_allones_mask(ptr %ptr, i3
; CHECK-NEXT: vle32ff.v v8, (a0)
; CHECK-NEXT: csrr a0, vl
; CHECK-NEXT: ret
- %a = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
- %b = shufflevector <vscale x 8 x i1> %a, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
- %load = call { <vscale x 8 x float>, i32 } @llvm.vp.load.ff.nxv8f32.p0(ptr %ptr, <vscale x 8 x i1> %b, i32 %evl)
+ %load = call { <vscale x 8 x float>, i32 } @llvm.vp.load.ff.nxv8f32.p0(ptr %ptr, <vscale x 8 x i1> splat (i1 true), i32 %evl)
ret { <vscale x 8 x float>, i32 } %load
}
-declare { <vscale x 16 x float>, i32 } @llvm.vp.load.ff.nxv16f32.p0(ptr, <vscale x 16 x i1>, i32)
-
define { <vscale x 16 x float>, i32 } @vploadff_nxv16f32(ptr %ptr, <vscale x 16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vploadff_nxv16f32:
; CHECK: # %bb.0:
@@ -860,14 +730,10 @@ define { <vscale x 16 x float>, i32 } @vploadff_nxv16f32_allones_mask(ptr %ptr,
; CHECK-NEXT: vle32ff.v v8, (a0)
; CHECK-NEXT: csrr a0, vl
; CHECK-NEXT: ret
- %a = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
- %b = shufflevector <vscale x 16 x i1> %a, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
- %load = call { <vscale x 16 x float>, i32 } @llvm.vp.load.ff.nxv16f32.p0(ptr %ptr, <vscale x 16 x i1> %b, i32 %evl)
+ %load = call { <vscale x 16 x float>, i32 } @llvm.vp.load.ff.nxv16f32.p0(ptr %ptr, <vscale x 16 x i1> splat (i1 true), i32 %evl)
ret { <vscale x 16 x float>, i32 } %load
}
-declare { <vscale x 1 x double>, i32 } @llvm.vp.load.ff.nxv1f64.p0(ptr, <vscale x 1 x i1>, i32)
-
define { <vscale x 1 x double>, i32 } @vploadff_nxv1f64(ptr %ptr, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vploadff_nxv1f64:
; CHECK: # %bb.0:
@@ -886,14 +752,10 @@ define { <vscale x 1 x double>, i32 } @vploadff_nxv1f64_allones_mask(ptr %ptr, i
; CHECK-NEXT: vle64ff.v v8, (a0)
; CHECK-NEXT: csrr a0, vl
; CHECK-NEXT: ret
- %a = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
- %b = shufflevector <vscale x 1 x i1> %a, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
- %load = call { <vscale x 1 x double>, i32 } @llvm.vp.load.ff.nxv1f64.p0(ptr %ptr, <vscale x 1 x i1> %b, i32 %evl)
+ %load = call { <vscale x 1 x double>, i32 } @llvm.vp.load.ff.nxv1f64.p0(ptr %ptr, <vscale x 1 x i1> splat (i1 true), i32 %evl)
ret { <vscale x 1 x double>, i32 } %load
}
-declare { <vscale x 2 x double>, i32 } @llvm.vp.load.ff.nxv2f64.p0(ptr, <vscale x 2 x i1>, i32)
-
define { <vscale x 2 x double>, i32 } @vploadff_nxv2f64(ptr %ptr, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vploadff_nxv2f64:
; CHECK: # %bb.0:
@@ -912,14 +774,10 @@ define { <vscale x 2 x double>, i32 } @vploadff_nxv2f64_allones_mask(ptr %ptr, i
; CHECK-NEXT: vle64ff.v v8, (a0)
; CHECK-NEXT: csrr a0, vl
; CHECK-NEXT: ret
- %a = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
- %b = shufflevector <vscale x 2 x i1> %a, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
- %load = call { <vscale x 2 x double>, i32 } @llvm.vp.load.ff.nxv2f64.p0(ptr %ptr, <vscale x 2 x i1> %b, i32 %evl)
+ %load = call { <vscale x 2 x double>, i32 } @llvm.vp.load.ff.nxv2f64.p0(ptr %ptr, <vscale x 2 x i1> splat (i1 true), i32 %evl)
ret { <vscale x 2 x double>, i32 } %load
}
-declare { <vscale x 4 x double>, i32 } @llvm.vp.load.ff.nxv4f64.p0(ptr, <vscale x 4 x i1>, i32)
-
define { <vscale x 4 x double>, i32 } @vploadff_nxv4f64(ptr %ptr, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vploadff_nxv4f64:
; CHECK: # %bb.0:
@@ -938,14 +796,10 @@ define { <vscale x 4 x double>, i32 } @vploadff_nxv4f64_allones_mask(ptr %ptr, i
; CHECK-NEXT: vle64ff.v v8, (a0)
; CHECK-NEXT: csrr a0, vl
; CHECK-NEXT: ret
- %a = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
- %b = shufflevector <vscale x 4 x i1> %a, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
- %load = call { <vscale x 4 x double>, i32 } @llvm.vp.load.ff.nxv4f64.p0(ptr %ptr, <vscale x 4 x i1> %b, i32 %evl)
+ %load = call { <vscale x 4 x double>, i32 } @llvm.vp.load.ff.nxv4f64.p0(ptr %ptr, <vscale x 4 x i1> splat (i1 true), i32 %evl)
ret { <vscale x 4 x double>, i32 } %load
}
-declare { <vscale x 8 x double>, i32 } @llvm.vp.load.ff.nxv8f64.p0(ptr, <vscale x 8 x i1>, i32)
-
define { <vscale x 8 x double>, i32 } @vploadff_nxv8f64(ptr %ptr, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vploadff_nxv8f64:
; CHECK: # %bb.0:
@@ -964,14 +818,10 @@ define { <vscale x 8 x double>, i32 } @vploadff_nxv8f64_allones_mask(ptr %ptr, i
; CHECK-NEXT: vle64ff.v v8, (a0)
; CHECK-NEXT: csrr a0, vl
; CHECK-NEXT: ret
- %a = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
- %b = shufflevector <vscale x 8 x i1> %a, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
- %load = call { <vscale x 8 x double>, i32 } @llvm.vp.load.ff.nxv8f64.p0(ptr %ptr, <vscale x 8 x i1> %b, i32 %evl)
+ %load = call { <vscale x 8 x double>, i32 } @llvm.vp.load.ff.nxv8f64.p0(ptr %ptr, <vscale x 8 x i1> splat (i1 true), i32 %evl)
ret { <vscale x 8 x double>, i32 } %load
}
-declare { <vscale x 1 x bfloat>, i32 } @llvm.vp.load.ff.nxv1bf16.p0(ptr, <vscale x 1 x i1>, i32)
-
define { <vscale x 1 x bfloat>, i32 } @vploadff_nxv1bf16(ptr %ptr, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vploadff_nxv1bf16:
; CHECK: # %bb.0:
@@ -990,14 +840,10 @@ define { <vscale x 1 x bfloat>, i32 } @vploadff_nxv1bf16_allones_mask(ptr %ptr,
; CHECK-NEXT: vle16ff.v v8, (a0)
; CHECK-NEXT: csrr a0, vl
; CHECK-NEXT: ret
- %a = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
- %b = shufflevector <vscale x 1 x i1> %a, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
- %load = call { <vscale x 1 x bfloat>, i32 } @llvm.vp.load.ff.nxv1bf16.p0(ptr %ptr, <vscale x 1 x i1> %b, i32 %evl)
+ %load = call { <vscale x 1 x bfloat>, i32 } @llvm.vp.load.ff.nxv1bf16.p0(ptr %ptr, <vscale x 1 x i1> splat (i1 true), i32 %evl)
ret { <vscale x 1 x bfloat>, i32 } %load
}
-declare { <vscale x 2 x bfloat>, i32 } @llvm.vp.load.ff.nxv2bf16.p0(ptr, <vscale x 2 x i1>, i32)
-
define { <vscale x 2 x bfloat>, i32 } @vploadff_nxv2bf16(ptr %ptr, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vploadff_nxv2bf16:
; CHECK: # %bb.0:
@@ -1016,14 +862,10 @@ define { <vscale x 2 x bfloat>, i32 } @vploadff_nxv2bf16_allones_mask(ptr %ptr,
; CHECK-NEXT: vle16ff.v v8, (a0)
; CHECK-NEXT: csrr a0, vl
; CHECK-NEXT: ret
- %a = insertelement <vscale x 2 x i1> poison, i1 true, i32 0
- %b = shufflevector <vscale x 2 x i1> %a, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
- %load = call { <vscale x 2 x bfloat>, i32 } @llvm.vp.load.ff.nxv2bf16.p0(ptr %ptr, <vscale x 2 x i1> %b, i32 %evl)
+ %load = call { <vscale x 2 x bfloat>, i32 } @llvm.vp.load.ff.nxv2bf16.p0(ptr %ptr, <vscale x 2 x i1> splat (i1 true), i32 %evl)
ret { <vscale x 2 x bfloat>, i32 } %load
}
-declare { <vscale x 4 x bfloat>, i32 } @llvm.vp.load.ff.nxv4bf16.p0(ptr, <vscale x 4 x i1>, i32)
-
define { <vscale x 4 x bfloat>, i32 } @vploadff_nxv4bf16(ptr %ptr, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vploadff_nxv4bf16:
; CHECK: # %bb.0:
@@ -1042,14 +884,10 @@ define { <vscale x 4 x bfloat>, i32 } @vploadff_nxv4bf16_allones_mask(ptr %ptr,
; CHECK-NEXT: vle16ff.v v8, (a0)
; CHECK-NEXT: csrr a0, vl
; CHECK-NEXT: ret
- %a = insertelement <vscale x 4 x i1> poison, i1 true, i32 0
- %b = shufflevector <vscale x 4 x i1> %a, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
- %load = call { <vscale x 4 x bfloat>, i32 } @llvm.vp.load.ff.nxv4bf16.p0(ptr %ptr, <vscale x 4 x i1> %b, i32 %evl)
+ %load = call { <vscale x 4 x bfloat>, i32 } @llvm.vp.load.ff.nxv4bf16.p0(ptr %ptr, <vscale x 4 x i1> splat (i1 true), i32 %evl)
ret { <vscale x 4 x bfloat>, i32 } %load
}
-declare { <vscale x 8 x bfloat>, i32 } @llvm.vp.load.ff.nxv8bf16.p0(ptr, <vscale x 8 x i1>, i32)
-
define { <vscale x 8 x bfloat>, i32 } @vploadff_nxv8bf16(ptr %ptr, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vploadff_nxv8bf16:
; CHECK: # %bb.0:
@@ -1068,14 +906,10 @@ define { <vscale x 8 x bfloat>, i32 } @vploadff_nxv8bf16_allones_mask(ptr %ptr,
; CHECK-NEXT: vle16ff.v v8, (a0)
; CHECK-NEXT: csrr a0, vl
; CHECK-NEXT: ret
- %a = insertelement <vscale x 8 x i1> poison, i1 true, i32 0
- %b = shufflevector <vscale x 8 x i1> %a, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
- %load = call { <vscale x 8 x bfloat>, i32 } @llvm.vp.load.ff.nxv8bf16.p0(ptr %ptr, <vscale x 8 x i1> %b, i32 %evl)
+ %load = call { <vscale x 8 x bfloat>, i32 } @llvm.vp.load.ff.nxv8bf16.p0(ptr %ptr, <vscale x 8 x i1> splat (i1 true), i32 %evl)
ret { <vscale x 8 x bfloat>, i32 } %load
}
-declare { <vscale x 16 x bfloat>, i32 } @llvm.vp.load.ff.nxv16bf16.p0(ptr, <vscale x 16 x i1>, i32)
-
define { <vscale x 16 x bfloat>, i32 } @vploadff_nxv16bf16(ptr %ptr, <vscale x 16 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vploadff_nxv16bf16:
; CHECK: # %bb.0:
@@ -1094,14 +928,10 @@ define { <vscale x 16 x bfloat>, i32 } @vploadff_nxv16bf16_allones_mask(ptr %ptr
; CHECK-NEXT: vle16ff.v v8, (a0)
; CHECK-NEXT: csrr a0, vl
; CHECK-NEXT: ret
- %a = insertelement <vscale x 16 x i1> poison, i1 true, i32 0
- %b = shufflevector <vscale x 16 x i1> %a, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
- %load = call { <vscale x 16 x bfloat>, i32 } @llvm.vp.load.ff.nxv16bf16.p0(ptr %ptr, <vscale x 16 x i1> %b, i32 %evl)
+ %load = call { <vscale x 16 x bfloat>, i32 } @llvm.vp.load.ff.nxv16bf16.p0(ptr %ptr, <vscale x 16 x i1> splat (i1 true), i32 %evl)
ret { <vscale x 16 x bfloat>, i32 } %load
}
-declare { <vscale x 32 x bfloat>, i32 } @llvm.vp.load.ff.nxv32bf16.p0(ptr, <vscale x 32 x i1>, i32)
-
define { <vscale x 32 x bfloat>, i32 } @vploadff_nxv32bf16(ptr %ptr, <vscale x 32 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vploadff_nxv32bf16:
; CHECK: # %bb.0:
@@ -1120,8 +950,6 @@ define { <vscale x 32 x bfloat>, i32 } @vploadff_nxv32bf16_allones_mask(ptr %ptr
; CHECK-NEXT: vle16ff.v v8, (a0)
; CHECK-NEXT: csrr a0, vl
; CHECK-NEXT: ret
- %a = insertelement <vscale x 32 x i1> poison, i1 true, i32 0
- %b = shufflevector <vscale x 32 x i1> %a, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
- %load = call { <vscale x 32 x bfloat>, i32 } @llvm.vp.load.ff.nxv32bf16.p0(ptr %ptr, <vscale x 32 x i1> %b, i32 %evl)
+ %load = call { <vscale x 32 x bfloat>, i32 } @llvm.vp.load.ff.nxv32bf16.p0(ptr %ptr, <vscale x 32 x i1> splat (i1 true), i32 %evl)
ret { <vscale x 32 x bfloat>, i32 } %load
}
>From 93d6ab797ca53b5b85487200e2d448749dd971b7 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Mon, 3 Mar 2025 19:03:34 -0800
Subject: [PATCH 4/7] fixup! Add SplitVectorRes support
---
llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 1 +
.../SelectionDAG/LegalizeVectorTypes.cpp | 50 +++++++++++++++++++
.../RISCV/rvv/fixed-vectors-vploadff.ll | 38 ++++++++++++++
llvm/test/CodeGen/RISCV/rvv/vploadff.ll | 42 ++++++++++++++++
4 files changed, 131 insertions(+)
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 74d7210743372..f3aa5340defac 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -958,6 +958,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi);
void SplitVecRes_VP_LOAD(VPLoadSDNode *LD, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_VP_LOAD_FF(VPLoadFFSDNode *LD, SDValue &Lo, SDValue &Hi);
void SplitVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *SLD, SDValue &Lo,
SDValue &Hi);
void SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, SDValue &Lo, SDValue &Hi);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 9d42ec2fdf859..1d94a931c1743 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -1163,6 +1163,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::VP_LOAD:
SplitVecRes_VP_LOAD(cast<VPLoadSDNode>(N), Lo, Hi);
break;
+ case ISD::VP_LOAD_FF:
+ SplitVecRes_VP_LOAD_FF(cast<VPLoadFFSDNode>(N), Lo, Hi);
+ break;
case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
SplitVecRes_VP_STRIDED_LOAD(cast<VPStridedLoadSDNode>(N), Lo, Hi);
break;
@@ -2232,6 +2235,53 @@ void DAGTypeLegalizer::SplitVecRes_VP_LOAD(VPLoadSDNode *LD, SDValue &Lo,
ReplaceValueWith(SDValue(LD, 1), Ch);
}
+void DAGTypeLegalizer::SplitVecRes_VP_LOAD_FF(VPLoadFFSDNode *LD, SDValue &Lo,
+ SDValue &Hi) {
+ EVT LoVT, HiVT;
+ SDLoc dl(LD);
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(LD->getValueType(0));
+
+ SDValue Ch = LD->getChain();
+ SDValue Ptr = LD->getBasePtr();
+ Align Alignment = LD->getOriginalAlign();
+ SDValue Mask = LD->getMask();
+ SDValue EVL = LD->getVectorLength();
+ EVT MemoryVT = LD->getMemoryVT();
+
+ EVT LoMemVT, HiMemVT;
+ bool HiIsEmpty = false;
+ std::tie(LoMemVT, HiMemVT) =
+ DAG.GetDependentSplitDestVTs(MemoryVT, LoVT, &HiIsEmpty);
+
+ // Split Mask operand
+ SDValue MaskLo, MaskHi;
+ if (Mask.getOpcode() == ISD::SETCC) {
+ SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi);
+ } else {
+ if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Mask, MaskLo, MaskHi);
+ else
+ std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
+ }
+
+ // Split EVL operand
+ SDValue EVLLo, EVLHi;
+ std::tie(EVLLo, EVLHi) = DAG.SplitEVL(EVL, LD->getValueType(0), dl);
+
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ LD->getPointerInfo(), MachineMemOperand::MOLoad,
+ LocationSize::beforeOrAfterPointer(), Alignment, LD->getAAInfo(),
+ LD->getRanges());
+
+ Lo = DAG.getLoadFFVP(LoVT, dl, Ch, Ptr, MaskLo, EVLLo, MMO);
+
+ // Fill the upper half with poison.
+ Hi = DAG.getUNDEF(HiVT);
+
+ ReplaceValueWith(SDValue(LD, 1), Lo.getValue(1));
+ ReplaceValueWith(SDValue(LD, 2), Lo.getValue(2));
+}
+
void DAGTypeLegalizer::SplitVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *SLD,
SDValue &Lo, SDValue &Hi) {
assert(SLD->isUnindexed() &&
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vploadff.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vploadff.ll
index 474f859e9789d..84b531c0d85b8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vploadff.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vploadff.ll
@@ -272,6 +272,44 @@ define { <8 x i64>, i32 } @vploadff_v8i64_allones_mask(ptr %ptr, i32 zeroext %ev
ret { <8 x i64>, i32 } %load
}
+define { <32 x i64>, i32 } @vploadff_v32i64(ptr %ptr, <32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_v32i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a3, 16
+; CHECK-NEXT: bltu a2, a3, .LBB24_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: li a2, 16
+; CHECK-NEXT: .LBB24_2:
+; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-NEXT: vle64ff.v v8, (a1), v0.t
+; CHECK-NEXT: csrr a1, vl
+; CHECK-NEXT: sw a1, 256(a0)
+; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; CHECK-NEXT: vse64.v v8, (a0)
+; CHECK-NEXT: ret
+ %load = call { <32 x i64>, i32 } @llvm.vp.load.ff.v32i64.p0(ptr %ptr, <32 x i1> %m, i32 %evl)
+ ret { <32 x i64>, i32 } %load
+}
+
+define { <32 x i64>, i32 } @vploadff_v32i64_allones_mask(ptr %ptr, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_v32i64_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a3, 16
+; CHECK-NEXT: bltu a2, a3, .LBB25_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: li a2, 16
+; CHECK-NEXT: .LBB25_2:
+; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-NEXT: vle64ff.v v8, (a1)
+; CHECK-NEXT: csrr a1, vl
+; CHECK-NEXT: sw a1, 256(a0)
+; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; CHECK-NEXT: vse64.v v8, (a0)
+; CHECK-NEXT: ret
+ %load = call { <32 x i64>, i32 } @llvm.vp.load.ff.v32i64.p0(ptr %ptr, <32 x i1> splat (i1 true), i32 %evl)
+ ret { <32 x i64>, i32 } %load
+}
+
define { <2 x half>, i32 } @vploadff_v2f16(ptr %ptr, <2 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vploadff_v2f16:
; CHECK: # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vploadff.ll b/llvm/test/CodeGen/RISCV/rvv/vploadff.ll
index ae439fd0ce3eb..bbf0c27118793 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vploadff.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vploadff.ll
@@ -162,6 +162,48 @@ define { <vscale x 64 x i8>, i32 } @vploadff_nxv64i8_allones_mask(ptr %ptr, i32
ret { <vscale x 64 x i8>, i32 } %load
}
+define <vscale x 128 x i8> @vploadff_nxv128i8(ptr %ptr, ptr %evl_out, <vscale x 128 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv128i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a3, vlenb
+; CHECK-NEXT: slli a3, a3, 3
+; CHECK-NEXT: bltu a2, a3, .LBB14_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a2, a3
+; CHECK-NEXT: .LBB14_2:
+; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; CHECK-NEXT: vle8ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: sw a0, 0(a1)
+; CHECK-NEXT: ret
+ %load = call { <vscale x 128 x i8>, i32 } @llvm.vp.load.ff.nxv128i8.p0(ptr %ptr, <vscale x 128 x i1> %m, i32 %evl)
+ %result0 = extractvalue { <vscale x 128 x i8>, i32 } %load, 0
+ %result1 = extractvalue { <vscale x 128 x i8>, i32 } %load, 1
+ store i32 %result1, ptr %evl_out
+ ret <vscale x 128 x i8> %result0
+}
+
+define <vscale x 128 x i8> @vploadff_nxv128i8_allones_mask(ptr %ptr, ptr %evl_out, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv128i8_allones_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a3, vlenb
+; CHECK-NEXT: slli a3, a3, 3
+; CHECK-NEXT: bltu a2, a3, .LBB15_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a2, a3
+; CHECK-NEXT: .LBB15_2:
+; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; CHECK-NEXT: vle8ff.v v8, (a0)
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: sw a0, 0(a1)
+; CHECK-NEXT: ret
+ %load = call { <vscale x 128 x i8>, i32 } @llvm.vp.load.ff.nxv128i8.p0(ptr %ptr, <vscale x 128 x i1> splat (i1 true), i32 %evl)
+ %result0 = extractvalue { <vscale x 128 x i8>, i32 } %load, 0
+ %result1 = extractvalue { <vscale x 128 x i8>, i32 } %load, 1
+ store i32 %result1, ptr %evl_out
+ ret <vscale x 128 x i8> %result0
+}
+
define { <vscale x 1 x i16>, i32 } @vploadff_nxv1i16(ptr %ptr, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; CHECK-LABEL: vploadff_nxv1i16:
; CHECK: # %bb.0:
>From 5f0f3cccb0286c21c3eb8dcf1830049a9e5e5add Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Tue, 4 Mar 2025 11:34:10 -0800
Subject: [PATCH 5/7] fixup! Use LocationSize::beforeOrAfterPointer().
---
llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index f2e234f98a943..5cf13f16fec5d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -8493,7 +8493,7 @@ void SelectionDAGBuilder::visitVPLoadFF(
SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad,
- MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges);
+ LocationSize::beforeOrAfterPointer(), *Alignment, AAInfo, Ranges);
LD = DAG.getLoadFFVP(VT, DL, InChain, OpValues[0], OpValues[1], OpValues[2],
MMO);
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, EVLVT, LD.getValue(1));
>From 6720314499cb8aed0e6043bf01f187af3a932d2c Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Tue, 4 Mar 2025 12:10:01 -0800
Subject: [PATCH 6/7] fixup! Add to AddNodeIDCustom.
---
llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index d9bca40567165..f90ffb99999a3 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -846,6 +846,14 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
ID.AddInteger(ELD->getMemOperand()->getFlags());
break;
}
+ case ISD::VP_LOAD_FF: {
+ const VPLoadFFSDNode *LD = cast<VPLoadFFSDNode>(N);
+ ID.AddInteger(LD->getMemoryVT().getRawBits());
+ ID.AddInteger(LD->getRawSubclassData());
+ ID.AddInteger(LD->getPointerInfo().getAddrSpace());
+ ID.AddInteger(LD->getMemOperand()->getFlags());
+ break;
+ }
case ISD::VP_STORE: {
const VPStoreSDNode *EST = cast<VPStoreSDNode>(N);
ID.AddInteger(EST->getMemoryVT().getRawBits());
>From 3a8289fbb3aaf73afccd135e98b6f6d606da65ed Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Wed, 5 Mar 2025 22:15:52 -0800
Subject: [PATCH 7/7] fixup! Add WidenVecRes_VP_LOAD_FF
---
llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 1 +
.../SelectionDAG/LegalizeVectorTypes.cpp | 26 +++++++++++++++++++
.../RISCV/rvv/fixed-vectors-vploadff.ll | 11 ++++++++
llvm/test/CodeGen/RISCV/rvv/vploadff.ll | 11 ++++++++
4 files changed, 49 insertions(+)
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index f3aa5340defac..fde49c2aebea9 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -1061,6 +1061,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N);
SDValue WidenVecRes_LOAD(SDNode* N);
SDValue WidenVecRes_VP_LOAD(VPLoadSDNode *N);
+ SDValue WidenVecRes_VP_LOAD_FF(VPLoadFFSDNode *N);
SDValue WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N);
SDValue WidenVecRes_VECTOR_COMPRESS(SDNode *N);
SDValue WidenVecRes_MLOAD(MaskedLoadSDNode* N);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 1d94a931c1743..c8db247551c7e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -4649,6 +4649,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::VP_LOAD:
Res = WidenVecRes_VP_LOAD(cast<VPLoadSDNode>(N));
break;
+ case ISD::VP_LOAD_FF:
+ Res = WidenVecRes_VP_LOAD_FF(cast<VPLoadFFSDNode>(N));
+ break;
case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
Res = WidenVecRes_VP_STRIDED_LOAD(cast<VPStridedLoadSDNode>(N));
break;
@@ -6113,6 +6116,29 @@ SDValue DAGTypeLegalizer::WidenVecRes_VP_LOAD(VPLoadSDNode *N) {
return Res;
}
+SDValue DAGTypeLegalizer::WidenVecRes_VP_LOAD_FF(VPLoadFFSDNode *N) {
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue Mask = N->getMask();
+ SDValue EVL = N->getVectorLength();
+ SDLoc dl(N);
+
+ // The mask should be widened as well
+ assert(getTypeAction(Mask.getValueType()) ==
+ TargetLowering::TypeWidenVector &&
+ "Unable to widen binary VP op");
+ Mask = GetWidenedVector(Mask);
+ assert(Mask.getValueType().getVectorElementCount() ==
+ TLI.getTypeToTransformTo(*DAG.getContext(), Mask.getValueType())
+ .getVectorElementCount() &&
+ "Unable to widen vector load");
+
+ SDValue Res = DAG.getLoadFFVP(WidenVT, dl, N->getChain(), N->getBasePtr(),
+ Mask, EVL, N->getMemOperand());
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ ReplaceValueWith(SDValue(N, 2), Res.getValue(2));
+ return Res;
+}
+
SDValue DAGTypeLegalizer::WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N) {
SDLoc DL(N);
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vploadff.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vploadff.ll
index 84b531c0d85b8..5b01976dbbebd 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vploadff.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vploadff.ll
@@ -573,3 +573,14 @@ define { <8 x bfloat>, i32 } @vploadff_v8bf16_allones_mask(ptr %ptr, i32 zeroext
%load = call { <8 x bfloat>, i32 } @llvm.vp.load.ff.v8bf16.p0(ptr %ptr, <8 x i1> splat (i1 true), i32 %evl)
ret { <8 x bfloat>, i32 } %load
}
+
+define { <7 x i8>, i32 } @vploadff_v7i8(ptr %ptr, <7 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_v7i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT: vle8ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <7 x i8>, i32 } @llvm.vp.load.ff.v7i8.p0(ptr %ptr, <7 x i1> %m, i32 %evl)
+ ret { <7 x i8>, i32 } %load
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vploadff.ll b/llvm/test/CodeGen/RISCV/rvv/vploadff.ll
index bbf0c27118793..9e08938a9fe6c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vploadff.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vploadff.ll
@@ -995,3 +995,14 @@ define { <vscale x 32 x bfloat>, i32 } @vploadff_nxv32bf16_allones_mask(ptr %ptr
%load = call { <vscale x 32 x bfloat>, i32 } @llvm.vp.load.ff.nxv32bf16.p0(ptr %ptr, <vscale x 32 x i1> splat (i1 true), i32 %evl)
ret { <vscale x 32 x bfloat>, i32 } %load
}
+
+define { <vscale x 3 x i8>, i32 } @vploadff_nxv3i8(ptr %ptr, <vscale x 3 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vploadff_nxv3i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT: vle8ff.v v8, (a0), v0.t
+; CHECK-NEXT: csrr a0, vl
+; CHECK-NEXT: ret
+ %load = call { <vscale x 3 x i8>, i32 } @llvm.vp.load.ff.nxv3i8.p0(ptr %ptr, <vscale x 3 x i1> %m, i32 %evl)
+ ret { <vscale x 3 x i8>, i32 } %load
+}
More information about the llvm-commits
mailing list