[llvm] [RISCV] Emit VP strided loads/stores in RISCVGatherScatterLowering (PR #98111)
Luke Lau via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 15 19:50:12 PDT 2024
https://github.com/lukel97 updated https://github.com/llvm/llvm-project/pull/98111
>From e223af289ac94052f8414582ff5e5d6110dcc133 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Tue, 9 Jul 2024 12:47:30 +0800
Subject: [PATCH] [RISCV] Emit VP strided loads/stores in
RISCVGatherScatterLowering
RISCVGatherScatterLowering is the last user of riscv_masked_strided_{load,store} after #98131 and #98112, this patch changes it to emit the VP equivalent instead. This allows us to remove the masked_strided intrinsics so we have only have one lowering path.
riscv_masked_strided_{load,store} didn't have AVL operands and were always VLMAX, so this passes in the fixed or scalable element count to the EVL instead, which RISCVVectorPeephole should now convert to VLMAX after #97800
---
llvm/include/llvm/IR/IntrinsicsRISCV.td | 14 --
.../RISCV/RISCVGatherScatterLowering.cpp | 18 +-
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 159 ------------------
llvm/test/CodeGen/RISCV/pr89833.ll | 16 --
.../rvv/fixed-vectors-strided-load-store.ll | 106 +++++++-----
.../CodeGen/RISCV/rvv/mscatter-combine.ll | 2 +-
.../rvv/strided-load-store-intrinsics.ll | 133 ---------------
.../CodeGen/RISCV/rvv/strided-load-store.ll | 54 ++++--
8 files changed, 115 insertions(+), 387 deletions(-)
delete mode 100644 llvm/test/CodeGen/RISCV/pr89833.ll
delete mode 100644 llvm/test/CodeGen/RISCV/rvv/strided-load-store-intrinsics.ll
diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td
index 2da154c300344..57702aa50774b 100644
--- a/llvm/include/llvm/IR/IntrinsicsRISCV.td
+++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td
@@ -1710,20 +1710,6 @@ let TargetPrefix = "riscv" in {
defm vsuxseg # nf : RISCVISegStore<nf>;
}
- // Strided loads/stores for fixed vectors.
- def int_riscv_masked_strided_load
- : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
- [LLVMMatchType<0>, llvm_anyptr_ty,
- llvm_anyint_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
- [NoCapture<ArgIndex<1>>, IntrReadMem]>;
- def int_riscv_masked_strided_store
- : DefaultAttrsIntrinsic<[],
- [llvm_anyvector_ty, llvm_anyptr_ty,
- llvm_anyint_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
- [NoCapture<ArgIndex<1>>, IntrWriteMem]>;
-
// Segment loads/stores for fixed vectors.
foreach nf = [2, 3, 4, 5, 6, 7, 8] in {
def int_riscv_seg # nf # _load
diff --git a/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp b/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp
index d9971791a2cfa..881be28bfe79e 100644
--- a/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp
@@ -515,17 +515,23 @@ bool RISCVGatherScatterLowering::tryCreateStridedLoadStore(IntrinsicInst *II,
Builder.SetInsertPoint(II);
+ Value *EVL = Builder.CreateElementCount(
+ IntegerType::get(Ctx, 32), cast<VectorType>(DataType)->getElementCount());
+
CallInst *Call;
- if (II->getIntrinsicID() == Intrinsic::masked_gather)
+ if (II->getIntrinsicID() == Intrinsic::masked_gather) {
Call = Builder.CreateIntrinsic(
- Intrinsic::riscv_masked_strided_load,
+ Intrinsic::experimental_vp_strided_load,
{DataType, BasePtr->getType(), Stride->getType()},
- {II->getArgOperand(3), BasePtr, Stride, II->getArgOperand(2)});
- else
+ {BasePtr, Stride, II->getArgOperand(2), EVL});
+ Call = Builder.CreateIntrinsic(
+ Intrinsic::vp_select, {DataType},
+ {II->getOperand(2), Call, II->getArgOperand(3), EVL});
+ } else
Call = Builder.CreateIntrinsic(
- Intrinsic::riscv_masked_strided_store,
+ Intrinsic::experimental_vp_strided_store,
{DataType, BasePtr->getType(), Stride->getType()},
- {II->getArgOperand(0), BasePtr, Stride, II->getArgOperand(3)});
+ {II->getArgOperand(0), BasePtr, Stride, II->getArgOperand(3), EVL});
Call->takeName(II);
II->replaceAllUsesWith(Call);
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 8b5e56bff4097..23185ec5116de 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1621,12 +1621,6 @@ bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
MachineMemOperand::MOVolatile;
return true;
- case Intrinsic::riscv_masked_strided_load:
- return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ false,
- /*IsUnitStrided*/ false);
- case Intrinsic::riscv_masked_strided_store:
- return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ true,
- /*IsUnitStrided*/ false);
case Intrinsic::riscv_seg2_load:
case Intrinsic::riscv_seg3_load:
case Intrinsic::riscv_seg4_load:
@@ -9401,81 +9395,6 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
switch (IntNo) {
default:
break;
- case Intrinsic::riscv_masked_strided_load: {
- SDLoc DL(Op);
- MVT XLenVT = Subtarget.getXLenVT();
-
- // If the mask is known to be all ones, optimize to an unmasked intrinsic;
- // the selection of the masked intrinsics doesn't do this for us.
- SDValue Mask = Op.getOperand(5);
- bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
-
- MVT VT = Op->getSimpleValueType(0);
- MVT ContainerVT = VT;
- if (VT.isFixedLengthVector())
- ContainerVT = getContainerForFixedLengthVector(VT);
-
- SDValue PassThru = Op.getOperand(2);
- if (!IsUnmasked) {
- MVT MaskVT = getMaskTypeFor(ContainerVT);
- if (VT.isFixedLengthVector()) {
- Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
- PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
- }
- }
-
- auto *Load = cast<MemIntrinsicSDNode>(Op);
- SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
- SDValue Ptr = Op.getOperand(3);
- SDValue Stride = Op.getOperand(4);
- SDValue Result, Chain;
-
- // TODO: We restrict this to unmasked loads currently in consideration of
- // the complexity of handling all falses masks.
- MVT ScalarVT = ContainerVT.getVectorElementType();
- if (IsUnmasked && isNullConstant(Stride) && ContainerVT.isInteger()) {
- SDValue ScalarLoad =
- DAG.getExtLoad(ISD::EXTLOAD, DL, XLenVT, Load->getChain(), Ptr,
- ScalarVT, Load->getMemOperand());
- Chain = ScalarLoad.getValue(1);
- Result = lowerScalarSplat(SDValue(), ScalarLoad, VL, ContainerVT, DL, DAG,
- Subtarget);
- } else if (IsUnmasked && isNullConstant(Stride) && isTypeLegal(ScalarVT)) {
- SDValue ScalarLoad = DAG.getLoad(ScalarVT, DL, Load->getChain(), Ptr,
- Load->getMemOperand());
- Chain = ScalarLoad.getValue(1);
- Result = DAG.getSplat(ContainerVT, DL, ScalarLoad);
- } else {
- SDValue IntID = DAG.getTargetConstant(
- IsUnmasked ? Intrinsic::riscv_vlse : Intrinsic::riscv_vlse_mask, DL,
- XLenVT);
-
- SmallVector<SDValue, 8> Ops{Load->getChain(), IntID};
- if (IsUnmasked)
- Ops.push_back(DAG.getUNDEF(ContainerVT));
- else
- Ops.push_back(PassThru);
- Ops.push_back(Ptr);
- Ops.push_back(Stride);
- if (!IsUnmasked)
- Ops.push_back(Mask);
- Ops.push_back(VL);
- if (!IsUnmasked) {
- SDValue Policy =
- DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
- Ops.push_back(Policy);
- }
-
- SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
- Result =
- DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
- Load->getMemoryVT(), Load->getMemOperand());
- Chain = Result.getValue(1);
- }
- if (VT.isFixedLengthVector())
- Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
- return DAG.getMergeValues({Result, Chain}, DL);
- }
case Intrinsic::riscv_seg2_load:
case Intrinsic::riscv_seg3_load:
case Intrinsic::riscv_seg4_load:
@@ -9555,47 +9474,6 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
switch (IntNo) {
default:
break;
- case Intrinsic::riscv_masked_strided_store: {
- SDLoc DL(Op);
- MVT XLenVT = Subtarget.getXLenVT();
-
- // If the mask is known to be all ones, optimize to an unmasked intrinsic;
- // the selection of the masked intrinsics doesn't do this for us.
- SDValue Mask = Op.getOperand(5);
- bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
-
- SDValue Val = Op.getOperand(2);
- MVT VT = Val.getSimpleValueType();
- MVT ContainerVT = VT;
- if (VT.isFixedLengthVector()) {
- ContainerVT = getContainerForFixedLengthVector(VT);
- Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
- }
- if (!IsUnmasked) {
- MVT MaskVT = getMaskTypeFor(ContainerVT);
- if (VT.isFixedLengthVector())
- Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
- }
-
- SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
-
- SDValue IntID = DAG.getTargetConstant(
- IsUnmasked ? Intrinsic::riscv_vsse : Intrinsic::riscv_vsse_mask, DL,
- XLenVT);
-
- auto *Store = cast<MemIntrinsicSDNode>(Op);
- SmallVector<SDValue, 8> Ops{Store->getChain(), IntID};
- Ops.push_back(Val);
- Ops.push_back(Op.getOperand(3)); // Ptr
- Ops.push_back(Op.getOperand(4)); // Stride
- if (!IsUnmasked)
- Ops.push_back(Mask);
- Ops.push_back(VL);
-
- return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, Store->getVTList(),
- Ops, Store->getMemoryVT(),
- Store->getMemOperand());
- }
case Intrinsic::riscv_seg2_store:
case Intrinsic::riscv_seg3_store:
case Intrinsic::riscv_seg4_store:
@@ -17509,43 +17387,6 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
// By default we do not combine any intrinsic.
default:
return SDValue();
- case Intrinsic::riscv_masked_strided_load: {
- MVT VT = N->getSimpleValueType(0);
- auto *Load = cast<MemIntrinsicSDNode>(N);
- SDValue PassThru = N->getOperand(2);
- SDValue Base = N->getOperand(3);
- SDValue Stride = N->getOperand(4);
- SDValue Mask = N->getOperand(5);
-
- // If the stride is equal to the element size in bytes, we can use
- // a masked.load.
- const unsigned ElementSize = VT.getScalarStoreSize();
- if (auto *StrideC = dyn_cast<ConstantSDNode>(Stride);
- StrideC && StrideC->getZExtValue() == ElementSize)
- return DAG.getMaskedLoad(VT, DL, Load->getChain(), Base,
- DAG.getUNDEF(XLenVT), Mask, PassThru,
- Load->getMemoryVT(), Load->getMemOperand(),
- ISD::UNINDEXED, ISD::NON_EXTLOAD);
- return SDValue();
- }
- case Intrinsic::riscv_masked_strided_store: {
- auto *Store = cast<MemIntrinsicSDNode>(N);
- SDValue Value = N->getOperand(2);
- SDValue Base = N->getOperand(3);
- SDValue Stride = N->getOperand(4);
- SDValue Mask = N->getOperand(5);
-
- // If the stride is equal to the element size in bytes, we can use
- // a masked.store.
- const unsigned ElementSize = Value.getValueType().getScalarStoreSize();
- if (auto *StrideC = dyn_cast<ConstantSDNode>(Stride);
- StrideC && StrideC->getZExtValue() == ElementSize)
- return DAG.getMaskedStore(Store->getChain(), DL, Value, Base,
- DAG.getUNDEF(XLenVT), Mask,
- Value.getValueType(), Store->getMemOperand(),
- ISD::UNINDEXED, false);
- return SDValue();
- }
case Intrinsic::riscv_vcpop:
case Intrinsic::riscv_vcpop_mask:
case Intrinsic::riscv_vfirst:
diff --git a/llvm/test/CodeGen/RISCV/pr89833.ll b/llvm/test/CodeGen/RISCV/pr89833.ll
deleted file mode 100644
index 54a985040e758..0000000000000
--- a/llvm/test/CodeGen/RISCV/pr89833.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=riscv64 -mattr=+v | FileCheck %s
-
-declare void @llvm.riscv.masked.strided.store.nxv16i8.p0.i64(<vscale x 16 x i8>, ptr, i64, <vscale x 16 x i1>)
-
-define void @test(<vscale x 16 x i16> %value, <vscale x 16 x i1> %mask) {
-; CHECK-LABEL: test:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma
-; CHECK-NEXT: vnsrl.wi v12, v8, 0
-; CHECK-NEXT: vse8.v v12, (zero), v0.t
-; CHECK-NEXT: ret
- %trunc = trunc <vscale x 16 x i16> %value to <vscale x 16 x i8>
- call void @llvm.riscv.masked.strided.store.nxv16i8.p0.i64(<vscale x 16 x i8> %trunc, ptr null, i64 1, <vscale x 16 x i1> %mask)
- ret void
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store.ll
index ab5885a604443..d723c2f6df1af 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store.ll
@@ -16,7 +16,8 @@ define void @gather(ptr noalias nocapture %A, ptr noalias nocapture readonly %B)
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 [[VEC_IND_SCALAR]]
-; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.riscv.masked.strided.load.v32i8.p0.i64(<32 x i8> undef, ptr [[TMP0]], i64 5, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i8> @llvm.experimental.vp.strided.load.v32i8.p0.i64(ptr [[TMP0]], i64 5, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 32)
+; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.vp.select.v32i8(<32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <32 x i8> [[TMP1]], <32 x i8> undef, i32 32)
; CHECK-NEXT: [[I2:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[I2]], align 1
; CHECK-NEXT: [[I4:%.*]] = add <32 x i8> [[WIDE_LOAD]], [[WIDE_MASKED_GATHER]]
@@ -58,7 +59,8 @@ define void @gather_masked(ptr noalias nocapture %A, ptr noalias nocapture reado
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 [[VEC_IND_SCALAR]]
-; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.riscv.masked.strided.load.v32i8.p0.i64(<32 x i8> [[MASKEDOFF:%.*]], ptr [[TMP0]], i64 5, <32 x i1> <i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true>)
+; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i8> @llvm.experimental.vp.strided.load.v32i8.p0.i64(ptr [[TMP0]], i64 5, <32 x i1> <i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true>, i32 32)
+; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.vp.select.v32i8(<32 x i1> <i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true>, <32 x i8> [[TMP1]], <32 x i8> [[MASKEDOFF:%.*]], i32 32)
; CHECK-NEXT: [[I2:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[I2]], align 1
; CHECK-NEXT: [[I4:%.*]] = add <32 x i8> [[WIDE_LOAD]], [[WIDE_MASKED_GATHER]]
@@ -100,7 +102,8 @@ define void @gather_negative_stride(ptr noalias nocapture %A, ptr noalias nocapt
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 155, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 [[VEC_IND_SCALAR]]
-; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.riscv.masked.strided.load.v32i8.p0.i64(<32 x i8> undef, ptr [[TMP0]], i64 -5, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i8> @llvm.experimental.vp.strided.load.v32i8.p0.i64(ptr [[TMP0]], i64 -5, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 32)
+; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.vp.select.v32i8(<32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <32 x i8> [[TMP1]], <32 x i8> undef, i32 32)
; CHECK-NEXT: [[I2:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[I2]], align 1
; CHECK-NEXT: [[I4:%.*]] = add <32 x i8> [[WIDE_LOAD]], [[WIDE_MASKED_GATHER]]
@@ -142,7 +145,8 @@ define void @gather_zero_stride(ptr noalias nocapture %A, ptr noalias nocapture
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 [[VEC_IND_SCALAR]]
-; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.riscv.masked.strided.load.v32i8.p0.i64(<32 x i8> undef, ptr [[TMP0]], i64 0, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i8> @llvm.experimental.vp.strided.load.v32i8.p0.i64(ptr [[TMP0]], i64 0, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 32)
+; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.vp.select.v32i8(<32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <32 x i8> [[TMP1]], <32 x i8> undef, i32 32)
; CHECK-NEXT: [[I2:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[I2]], align 1
; CHECK-NEXT: [[I4:%.*]] = add <32 x i8> [[WIDE_LOAD]], [[WIDE_MASKED_GATHER]]
@@ -190,9 +194,10 @@ define void @scatter(ptr noalias nocapture %A, ptr noalias nocapture readonly %B
; CHECK-NEXT: [[I:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[I]], align 1
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[VEC_IND_SCALAR]]
-; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.riscv.masked.strided.load.v32i8.p0.i64(<32 x i8> undef, ptr [[TMP0]], i64 5, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i8> @llvm.experimental.vp.strided.load.v32i8.p0.i64(ptr [[TMP0]], i64 5, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 32)
+; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.vp.select.v32i8(<32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <32 x i8> [[TMP1]], <32 x i8> undef, i32 32)
; CHECK-NEXT: [[I4:%.*]] = add <32 x i8> [[WIDE_MASKED_GATHER]], [[WIDE_LOAD]]
-; CHECK-NEXT: call void @llvm.riscv.masked.strided.store.v32i8.p0.i64(<32 x i8> [[I4]], ptr [[TMP0]], i64 5, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v32i8.p0.i64(<32 x i8> [[I4]], ptr [[TMP0]], i64 5, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 32)
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add i64 [[VEC_IND_SCALAR]], 160
; CHECK-NEXT: [[I5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
@@ -232,9 +237,10 @@ define void @scatter_masked(ptr noalias nocapture %A, ptr noalias nocapture read
; CHECK-NEXT: [[I:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[I]], align 1
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[VEC_IND_SCALAR]]
-; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.riscv.masked.strided.load.v32i8.p0.i64(<32 x i8> [[MASKEDOFF:%.*]], ptr [[TMP0]], i64 5, <32 x i1> <i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true>)
+; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i8> @llvm.experimental.vp.strided.load.v32i8.p0.i64(ptr [[TMP0]], i64 5, <32 x i1> <i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true>, i32 32)
+; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.vp.select.v32i8(<32 x i1> <i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true>, <32 x i8> [[TMP1]], <32 x i8> [[MASKEDOFF:%.*]], i32 32)
; CHECK-NEXT: [[I4:%.*]] = add <32 x i8> [[WIDE_MASKED_GATHER]], [[WIDE_LOAD]]
-; CHECK-NEXT: call void @llvm.riscv.masked.strided.store.v32i8.p0.i64(<32 x i8> [[I4]], ptr [[TMP0]], i64 5, <32 x i1> <i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true>)
+; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v32i8.p0.i64(<32 x i8> [[I4]], ptr [[TMP0]], i64 5, <32 x i1> <i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true>, i32 32)
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add i64 [[VEC_IND_SCALAR]], 160
; CHECK-NEXT: [[I5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
@@ -276,7 +282,8 @@ define void @gather_pow2(ptr noalias nocapture %A, ptr noalias nocapture readonl
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[B:%.*]], i64 [[VEC_IND_SCALAR]]
-; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.riscv.masked.strided.load.v8i32.p0.i64(<8 x i32> undef, ptr [[TMP0]], i64 16, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP0]], i64 16, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
+; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> [[TMP1]], <8 x i32> undef, i32 8)
; CHECK-NEXT: [[I2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[I2]], align 1
; CHECK-NEXT: [[I4:%.*]] = add <8 x i32> [[WIDE_LOAD]], [[WIDE_MASKED_GATHER]]
@@ -321,7 +328,8 @@ define void @gather_unknown_pow2(ptr noalias nocapture %A, ptr noalias nocapture
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[B:%.*]], i64 [[VEC_IND_SCALAR]]
-; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.riscv.masked.strided.load.v8i32.p0.i64(<8 x i32> undef, ptr [[TMP1]], i64 [[TMP0]], <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP1]], i64 [[TMP0]], <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
+; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> [[TMP2]], <8 x i32> undef, i32 8)
; CHECK-NEXT: [[I2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[I2]], align 1
; CHECK-NEXT: [[I4:%.*]] = add <8 x i32> [[WIDE_LOAD]], [[WIDE_MASKED_GATHER]]
@@ -418,9 +426,10 @@ define void @scatter_pow2(ptr noalias nocapture %A, ptr noalias nocapture readon
; CHECK-NEXT: [[I:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[I]], align 1
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[VEC_IND_SCALAR]]
-; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.riscv.masked.strided.load.v8i32.p0.i64(<8 x i32> undef, ptr [[TMP0]], i64 16, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP0]], i64 16, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
+; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> [[TMP1]], <8 x i32> undef, i32 8)
; CHECK-NEXT: [[I4:%.*]] = add <8 x i32> [[WIDE_MASKED_GATHER]], [[WIDE_LOAD]]
-; CHECK-NEXT: call void @llvm.riscv.masked.strided.store.v8i32.p0.i64(<8 x i32> [[I4]], ptr [[TMP0]], i64 16, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v8i32.p0.i64(<8 x i32> [[I4]], ptr [[TMP0]], i64 16, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add i64 [[VEC_IND_SCALAR]], 32
; CHECK-NEXT: [[I5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
@@ -468,8 +477,10 @@ define void @struct_gather(ptr noalias nocapture %A, ptr noalias nocapture reado
; CHECK-NEXT: [[VEC_IND_SCALAR1:%.*]] = phi i64 [ 8, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR2:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_FOO:%.*]], ptr [[B:%.*]], i64 [[VEC_IND_SCALAR]], i32 1
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_FOO]], ptr [[B]], i64 [[VEC_IND_SCALAR1]], i32 1
-; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.riscv.masked.strided.load.v8i32.p0.i64(<8 x i32> undef, ptr [[TMP0]], i64 16, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
-; CHECK-NEXT: [[WIDE_MASKED_GATHER9:%.*]] = call <8 x i32> @llvm.riscv.masked.strided.load.v8i32.p0.i64(<8 x i32> undef, ptr [[TMP1]], i64 16, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP0]], i64 16, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
+; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> [[TMP2]], <8 x i32> undef, i32 8)
+; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP1]], i64 16, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
+; CHECK-NEXT: [[WIDE_MASKED_GATHER9:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> [[TMP3]], <8 x i32> undef, i32 8)
; CHECK-NEXT: [[I2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[I2]], align 4
; CHECK-NEXT: [[I4:%.*]] = getelementptr inbounds i32, ptr [[I2]], i64 8
@@ -537,29 +548,37 @@ define void @gather_unroll(ptr noalias nocapture %A, ptr noalias nocapture reado
; CHECK-NEXT: [[VEC_IND_SCALAR11:%.*]] = phi i64 [ 12, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR12:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND_SCALAR13:%.*]] = phi i64 [ 3, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR14:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[B:%.*]], i64 [[VEC_IND_SCALAR]]
-; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.riscv.masked.strided.load.v8i32.p0.i64(<8 x i32> undef, ptr [[TMP0]], i64 64, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
-; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[VEC_IND_SCALAR1]]
-; CHECK-NEXT: [[WIDE_MASKED_GATHER52:%.*]] = call <8 x i32> @llvm.riscv.masked.strided.load.v8i32.p0.i64(<8 x i32> undef, ptr [[TMP1]], i64 16, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP0]], i64 64, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
+; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> [[TMP1]], <8 x i32> undef, i32 8)
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[VEC_IND_SCALAR1]]
+; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP2]], i64 16, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
+; CHECK-NEXT: [[WIDE_MASKED_GATHER52:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> [[TMP3]], <8 x i32> undef, i32 8)
; CHECK-NEXT: [[I3:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_GATHER52]], [[WIDE_MASKED_GATHER]]
-; CHECK-NEXT: call void @llvm.riscv.masked.strided.store.v8i32.p0.i64(<8 x i32> [[I3]], ptr [[TMP1]], i64 16, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
-; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[B]], i64 [[VEC_IND_SCALAR3]]
-; CHECK-NEXT: [[WIDE_MASKED_GATHER53:%.*]] = call <8 x i32> @llvm.riscv.masked.strided.load.v8i32.p0.i64(<8 x i32> undef, ptr [[TMP2]], i64 64, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[A]], i64 [[VEC_IND_SCALAR5]]
-; CHECK-NEXT: [[WIDE_MASKED_GATHER54:%.*]] = call <8 x i32> @llvm.riscv.masked.strided.load.v8i32.p0.i64(<8 x i32> undef, ptr [[TMP3]], i64 16, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v8i32.p0.i64(<8 x i32> [[I3]], ptr [[TMP2]], i64 16, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[B]], i64 [[VEC_IND_SCALAR3]]
+; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP4]], i64 64, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
+; CHECK-NEXT: [[WIDE_MASKED_GATHER53:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> [[TMP5]], <8 x i32> undef, i32 8)
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[A]], i64 [[VEC_IND_SCALAR5]]
+; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP6]], i64 16, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
+; CHECK-NEXT: [[WIDE_MASKED_GATHER54:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> [[TMP7]], <8 x i32> undef, i32 8)
; CHECK-NEXT: [[I8:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_GATHER54]], [[WIDE_MASKED_GATHER53]]
-; CHECK-NEXT: call void @llvm.riscv.masked.strided.store.v8i32.p0.i64(<8 x i32> [[I8]], ptr [[TMP3]], i64 16, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
-; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[B]], i64 [[VEC_IND_SCALAR7]]
-; CHECK-NEXT: [[WIDE_MASKED_GATHER55:%.*]] = call <8 x i32> @llvm.riscv.masked.strided.load.v8i32.p0.i64(<8 x i32> undef, ptr [[TMP4]], i64 64, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[A]], i64 [[VEC_IND_SCALAR9]]
-; CHECK-NEXT: [[WIDE_MASKED_GATHER56:%.*]] = call <8 x i32> @llvm.riscv.masked.strided.load.v8i32.p0.i64(<8 x i32> undef, ptr [[TMP5]], i64 16, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v8i32.p0.i64(<8 x i32> [[I8]], ptr [[TMP6]], i64 16, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[B]], i64 [[VEC_IND_SCALAR7]]
+; CHECK-NEXT: [[TMP9:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP8]], i64 64, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
+; CHECK-NEXT: [[WIDE_MASKED_GATHER55:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> [[TMP9]], <8 x i32> undef, i32 8)
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[A]], i64 [[VEC_IND_SCALAR9]]
+; CHECK-NEXT: [[TMP11:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP10]], i64 16, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
+; CHECK-NEXT: [[WIDE_MASKED_GATHER56:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> [[TMP11]], <8 x i32> undef, i32 8)
; CHECK-NEXT: [[I13:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_GATHER56]], [[WIDE_MASKED_GATHER55]]
-; CHECK-NEXT: call void @llvm.riscv.masked.strided.store.v8i32.p0.i64(<8 x i32> [[I13]], ptr [[TMP5]], i64 16, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[B]], i64 [[VEC_IND_SCALAR11]]
-; CHECK-NEXT: [[WIDE_MASKED_GATHER57:%.*]] = call <8 x i32> @llvm.riscv.masked.strided.load.v8i32.p0.i64(<8 x i32> undef, ptr [[TMP6]], i64 64, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
-; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[A]], i64 [[VEC_IND_SCALAR13]]
-; CHECK-NEXT: [[WIDE_MASKED_GATHER58:%.*]] = call <8 x i32> @llvm.riscv.masked.strided.load.v8i32.p0.i64(<8 x i32> undef, ptr [[TMP7]], i64 16, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v8i32.p0.i64(<8 x i32> [[I13]], ptr [[TMP10]], i64 16, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[B]], i64 [[VEC_IND_SCALAR11]]
+; CHECK-NEXT: [[TMP13:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP12]], i64 64, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
+; CHECK-NEXT: [[WIDE_MASKED_GATHER57:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> [[TMP13]], <8 x i32> undef, i32 8)
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[A]], i64 [[VEC_IND_SCALAR13]]
+; CHECK-NEXT: [[TMP15:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP14]], i64 16, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
+; CHECK-NEXT: [[WIDE_MASKED_GATHER58:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> [[TMP15]], <8 x i32> undef, i32 8)
; CHECK-NEXT: [[I18:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_GATHER58]], [[WIDE_MASKED_GATHER57]]
-; CHECK-NEXT: call void @llvm.riscv.masked.strided.store.v8i32.p0.i64(<8 x i32> [[I18]], ptr [[TMP7]], i64 16, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v8i32.p0.i64(<8 x i32> [[I18]], ptr [[TMP14]], i64 16, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add i64 [[VEC_IND_SCALAR]], 128
; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR2]] = add i64 [[VEC_IND_SCALAR1]], 32
@@ -636,8 +655,10 @@ define void @gather_of_pointers(ptr noalias nocapture %arg, ptr noalias nocaptur
; V-NEXT: [[I3_SCALAR1:%.*]] = phi i64 [ 10, [[BB]] ], [ [[I16_SCALAR2:%.*]], [[BB2]] ]
; V-NEXT: [[TMP0:%.*]] = getelementptr ptr, ptr [[ARG1:%.*]], i64 [[I3_SCALAR]]
; V-NEXT: [[TMP1:%.*]] = getelementptr ptr, ptr [[ARG1]], i64 [[I3_SCALAR1]]
-; V-NEXT: [[I9:%.*]] = call <2 x ptr> @llvm.riscv.masked.strided.load.v2p0.p0.i64(<2 x ptr> undef, ptr [[TMP0]], i64 40, <2 x i1> <i1 true, i1 true>)
-; V-NEXT: [[I10:%.*]] = call <2 x ptr> @llvm.riscv.masked.strided.load.v2p0.p0.i64(<2 x ptr> undef, ptr [[TMP1]], i64 40, <2 x i1> <i1 true, i1 true>)
+; V-NEXT: [[TMP2:%.*]] = call <2 x ptr> @llvm.experimental.vp.strided.load.v2p0.p0.i64(ptr [[TMP0]], i64 40, <2 x i1> <i1 true, i1 true>, i32 2)
+; V-NEXT: [[I9:%.*]] = call <2 x ptr> @llvm.vp.select.v2p0(<2 x i1> <i1 true, i1 true>, <2 x ptr> [[TMP2]], <2 x ptr> undef, i32 2)
+; V-NEXT: [[TMP3:%.*]] = call <2 x ptr> @llvm.experimental.vp.strided.load.v2p0.p0.i64(ptr [[TMP1]], i64 40, <2 x i1> <i1 true, i1 true>, i32 2)
+; V-NEXT: [[I10:%.*]] = call <2 x ptr> @llvm.vp.select.v2p0(<2 x i1> <i1 true, i1 true>, <2 x ptr> [[TMP3]], <2 x ptr> undef, i32 2)
; V-NEXT: [[I11:%.*]] = getelementptr inbounds ptr, ptr [[ARG:%.*]], i64 [[I]]
; V-NEXT: store <2 x ptr> [[I9]], ptr [[I11]], align 8
; V-NEXT: [[I13:%.*]] = getelementptr inbounds ptr, ptr [[I11]], i64 2
@@ -717,8 +738,8 @@ define void @scatter_of_pointers(ptr noalias nocapture %arg, ptr noalias nocaptu
; V-NEXT: [[I9:%.*]] = load <2 x ptr>, ptr [[I7]], align 8
; V-NEXT: [[TMP0:%.*]] = getelementptr ptr, ptr [[ARG:%.*]], i64 [[I3_SCALAR]]
; V-NEXT: [[TMP1:%.*]] = getelementptr ptr, ptr [[ARG]], i64 [[I3_SCALAR1]]
-; V-NEXT: call void @llvm.riscv.masked.strided.store.v2p0.p0.i64(<2 x ptr> [[I6]], ptr [[TMP0]], i64 40, <2 x i1> <i1 true, i1 true>)
-; V-NEXT: call void @llvm.riscv.masked.strided.store.v2p0.p0.i64(<2 x ptr> [[I9]], ptr [[TMP1]], i64 40, <2 x i1> <i1 true, i1 true>)
+; V-NEXT: call void @llvm.experimental.vp.strided.store.v2p0.p0.i64(<2 x ptr> [[I6]], ptr [[TMP0]], i64 40, <2 x i1> <i1 true, i1 true>, i32 2)
+; V-NEXT: call void @llvm.experimental.vp.strided.store.v2p0.p0.i64(<2 x ptr> [[I9]], ptr [[TMP1]], i64 40, <2 x i1> <i1 true, i1 true>, i32 2)
; V-NEXT: [[I15]] = add nuw i64 [[I]], 4
; V-NEXT: [[I16_SCALAR]] = add i64 [[I3_SCALAR]], 20
; V-NEXT: [[I16_SCALAR2]] = add i64 [[I3_SCALAR1]], 20
@@ -801,7 +822,8 @@ define void @strided_load_startval_add_with_splat(ptr noalias nocapture %arg, pt
; CHECK-NEXT: [[I17_SCALAR:%.*]] = phi i64 [ [[START]], [[BB9]] ], [ [[I28_SCALAR:%.*]], [[BB15]] ]
; CHECK-NEXT: [[I18:%.*]] = add i64 [[I16]], [[I4]]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[ARG1:%.*]], i64 [[I17_SCALAR]]
-; CHECK-NEXT: [[I21:%.*]] = call <32 x i8> @llvm.riscv.masked.strided.load.v32i8.p0.i64(<32 x i8> undef, ptr [[TMP0]], i64 5, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i8> @llvm.experimental.vp.strided.load.v32i8.p0.i64(ptr [[TMP0]], i64 5, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 32)
+; CHECK-NEXT: [[I21:%.*]] = call <32 x i8> @llvm.vp.select.v32i8(<32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <32 x i8> [[TMP1]], <32 x i8> undef, i32 32)
; CHECK-NEXT: [[I22:%.*]] = getelementptr inbounds i8, ptr [[ARG:%.*]], i64 [[I18]]
; CHECK-NEXT: [[I24:%.*]] = load <32 x i8>, ptr [[I22]], align 1
; CHECK-NEXT: [[I25:%.*]] = add <32 x i8> [[I24]], [[I21]]
@@ -909,7 +931,8 @@ define void @gather_no_scalar_remainder(ptr noalias nocapture noundef %arg, ptr
; CHECK-NEXT: [[I5:%.*]] = phi i64 [ [[I13:%.*]], [[BB4]] ], [ 0, [[BB2]] ]
; CHECK-NEXT: [[I6_SCALAR:%.*]] = phi i64 [ 0, [[BB2]] ], [ [[I14_SCALAR:%.*]], [[BB4]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[ARG1:%.*]], i64 [[I6_SCALAR]]
-; CHECK-NEXT: [[I9:%.*]] = call <16 x i8> @llvm.riscv.masked.strided.load.v16i8.p0.i64(<16 x i8> undef, ptr [[TMP0]], i64 5, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i8> @llvm.experimental.vp.strided.load.v16i8.p0.i64(ptr [[TMP0]], i64 5, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 16)
+; CHECK-NEXT: [[I9:%.*]] = call <16 x i8> @llvm.vp.select.v16i8(<16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> [[TMP1]], <16 x i8> undef, i32 16)
; CHECK-NEXT: [[I10:%.*]] = getelementptr inbounds i8, ptr [[ARG:%.*]], i64 [[I5]]
; CHECK-NEXT: [[I11:%.*]] = load <16 x i8>, ptr [[I10]], align 1
; CHECK-NEXT: [[I12:%.*]] = add <16 x i8> [[I11]], [[I9]]
@@ -951,8 +974,9 @@ bb16: ; preds = %bb4, %bb
define <8 x i8> @broadcast_ptr_base(ptr %a) {
; CHECK-LABEL: @broadcast_ptr_base(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = call <8 x i8> @llvm.riscv.masked.strided.load.v8i8.p0.i64(<8 x i8> poison, ptr [[A:%.*]], i64 64, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
-; CHECK-NEXT: ret <8 x i8> [[TMP0]]
+; CHECK-NEXT: [[TMP0:%.*]] = call <8 x i8> @llvm.experimental.vp.strided.load.v8i8.p0.i64(ptr [[A:%.*]], i64 64, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
+; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i8> @llvm.vp.select.v8i8(<8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i8> [[TMP0]], <8 x i8> poison, i32 8)
+; CHECK-NEXT: ret <8 x i8> [[TMP1]]
;
entry:
%0 = insertelement <8 x ptr> poison, ptr %a, i64 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/mscatter-combine.ll b/llvm/test/CodeGen/RISCV/rvv/mscatter-combine.ll
index 1c3b429202adf..71293aa5619b3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/mscatter-combine.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/mscatter-combine.ll
@@ -116,7 +116,7 @@ define void @stride_one_store(i64 %n, ptr %p) {
; RV64: # %bb.0:
; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
; RV64-NEXT: vmv.v.i v8, 0
-; RV64-NEXT: vs1r.v v8, (a1)
+; RV64-NEXT: vse64.v v8, (a1)
; RV64-NEXT: ret
%step = tail call <vscale x 1 x i64> @llvm.experimental.stepvector.nxv1i64()
%gep = getelementptr inbounds i64, ptr %p, <vscale x 1 x i64> %step
diff --git a/llvm/test/CodeGen/RISCV/rvv/strided-load-store-intrinsics.ll b/llvm/test/CodeGen/RISCV/rvv/strided-load-store-intrinsics.ll
deleted file mode 100644
index f8a5b0f9d03a6..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/strided-load-store-intrinsics.ll
+++ /dev/null
@@ -1,133 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v | FileCheck %s
-
-declare <32 x i8> @llvm.riscv.masked.strided.load.v32i8.p0.i64(<32 x i8>, ptr, i64, <32 x i1>)
-declare <2 x i64> @llvm.riscv.masked.strided.load.v2i64.p0.i64(<2 x i64>, ptr, i64, <2 x i1>)
-
-declare void @llvm.riscv.masked.strided.store.v32i8.p0.i64(<32 x i8>, ptr, i64, <32 x i1>)
-declare void @llvm.riscv.masked.strided.store.v2i64.p0.i64(<2 x i64>, ptr, i64, <2 x i1>)
-
-define <32 x i8> @strided_load_i8(ptr %p, i64 %stride, <32 x i1> %m) {
-; CHECK-LABEL: strided_load_i8:
-; CHECK: # %bb.0:
-; CHECK-NEXT: li a2, 32
-; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma
-; CHECK-NEXT: vlse8.v v8, (a0), a1, v0.t
-; CHECK-NEXT: ret
- %res = call <32 x i8> @llvm.riscv.masked.strided.load.v32i8.p0.i64(<32 x i8> undef, ptr %p, i64 %stride, <32 x i1> %m)
- ret <32 x i8> %res
-}
-
-define <2 x i64> @strided_load_i64(ptr %p, i64 %stride, <2 x i1> %m) {
-; CHECK-LABEL: strided_load_i64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT: vlse64.v v8, (a0), a1, v0.t
-; CHECK-NEXT: ret
- %res = call <2 x i64> @llvm.riscv.masked.strided.load.v2i64.p0.i64(<2 x i64> undef, ptr %p, i64 %stride, <2 x i1> %m)
- ret <2 x i64> %res
-}
-
-define <32 x i8> @strided_load_i8_splat(ptr %p, <32 x i1> %m) {
-; CHECK-LABEL: strided_load_i8_splat:
-; CHECK: # %bb.0:
-; CHECK-NEXT: li a1, 32
-; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
-; CHECK-NEXT: vlse8.v v8, (a0), zero, v0.t
-; CHECK-NEXT: ret
- %res = call <32 x i8> @llvm.riscv.masked.strided.load.v32i8.p0.i64(<32 x i8> undef, ptr %p, i64 0, <32 x i1> %m)
- ret <32 x i8> %res
-}
-
-define <32 x i8> @strided_load_i8_reverse(ptr %p, <32 x i1> %m) {
-; CHECK-LABEL: strided_load_i8_reverse:
-; CHECK: # %bb.0:
-; CHECK-NEXT: li a1, 32
-; CHECK-NEXT: li a2, -1
-; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
-; CHECK-NEXT: vlse8.v v8, (a0), a2, v0.t
-; CHECK-NEXT: ret
- %res = call <32 x i8> @llvm.riscv.masked.strided.load.v32i8.p0.i64(<32 x i8> undef, ptr %p, i64 -1, <32 x i1> %m)
- ret <32 x i8> %res
-}
-
-define <32 x i8> @strided_load_i8_nostride(ptr %p, <32 x i1> %m) {
-; CHECK-LABEL: strided_load_i8_nostride:
-; CHECK: # %bb.0:
-; CHECK-NEXT: li a1, 32
-; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
-; CHECK-NEXT: vle8.v v8, (a0), v0.t
-; CHECK-NEXT: ret
- %res = call <32 x i8> @llvm.riscv.masked.strided.load.v32i8.p0.i64(<32 x i8> undef, ptr %p, i64 1, <32 x i1> %m)
- ret <32 x i8> %res
-}
-
-
-define void @strided_store_i8(ptr %p, <32 x i8> %v, i64 %stride, <32 x i1> %m) {
-; CHECK-LABEL: strided_store_i8:
-; CHECK: # %bb.0:
-; CHECK-NEXT: li a2, 32
-; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma
-; CHECK-NEXT: vsse8.v v8, (a0), a1, v0.t
-; CHECK-NEXT: ret
- call void @llvm.riscv.masked.strided.store.v32i8.p0.i64(<32 x i8> %v, ptr %p, i64 %stride, <32 x i1> %m)
- ret void
-}
-
-define void @strided_store_i8_zero(ptr %p, <32 x i8> %v, <32 x i1> %m) {
-; CHECK-LABEL: strided_store_i8_zero:
-; CHECK: # %bb.0:
-; CHECK-NEXT: li a1, 32
-; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
-; CHECK-NEXT: vsse8.v v8, (a0), zero, v0.t
-; CHECK-NEXT: ret
- call void @llvm.riscv.masked.strided.store.v32i8.p0.i64(<32 x i8> %v, ptr %p, i64 0, <32 x i1> %m)
- ret void
-}
-
-define void @strided_store_i8_nostride(ptr %p, <32 x i8> %v, <32 x i1> %m) {
-; CHECK-LABEL: strided_store_i8_nostride:
-; CHECK: # %bb.0:
-; CHECK-NEXT: li a1, 32
-; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
-; CHECK-NEXT: vse8.v v8, (a0), v0.t
-; CHECK-NEXT: ret
- call void @llvm.riscv.masked.strided.store.v32i8.p0.i64(<32 x i8> %v, ptr %p, i64 1, <32 x i1> %m)
- ret void
-}
-
-define void @strided_store_i8_reverse(ptr %p, <32 x i8> %v, <32 x i1> %m) {
-; CHECK-LABEL: strided_store_i8_reverse:
-; CHECK: # %bb.0:
-; CHECK-NEXT: li a1, 32
-; CHECK-NEXT: li a2, -1
-; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
-; CHECK-NEXT: vsse8.v v8, (a0), a2, v0.t
-; CHECK-NEXT: ret
- call void @llvm.riscv.masked.strided.store.v32i8.p0.i64(<32 x i8> %v, ptr %p, i64 -1, <32 x i1> %m)
- ret void
-}
-
-declare void @llvm.riscv.masked.strided.store.nxv1i64.p0.i64(<vscale x 1 x i64>, ptr, i64, <vscale x 1 x i1>)
-
-declare <vscale x 1 x i64> @llvm.riscv.masked.strided.load.nxv1i64.p0.i64(<vscale x 1 x i64>, ptr, i64, <vscale x 1 x i1>)
-
-define <vscale x 1 x i64> @strided_load_vscale_i64(ptr %p, i64 %stride, <vscale x 1 x i1> %m) {
-; CHECK-LABEL: strided_load_vscale_i64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64, m1, ta, ma
-; CHECK-NEXT: vlse64.v v8, (a0), a1, v0.t
-; CHECK-NEXT: ret
- %res = call <vscale x 1 x i64> @llvm.riscv.masked.strided.load.nxv1i64.p0.i64(<vscale x 1 x i64> undef, ptr %p, i64 %stride, <vscale x 1 x i1> %m)
- ret <vscale x 1 x i64> %res
-}
-
-define void @strided_store_vscale_i64(ptr %p, <vscale x 1 x i64> %v, i64 %stride, <vscale x 1 x i1> %m) {
-; CHECK-LABEL: strided_store_vscale_i64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64, m1, ta, ma
-; CHECK-NEXT: vsse64.v v8, (a0), a1, v0.t
-; CHECK-NEXT: ret
- call void @llvm.riscv.masked.strided.store.nxv1i64.p0.i64(<vscale x 1 x i64> %v, ptr %p, i64 %stride, <vscale x 1 x i1> %m)
- ret void
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/strided-load-store.ll b/llvm/test/CodeGen/RISCV/rvv/strided-load-store.ll
index 70412de1d0e91..381d1183995be 100644
--- a/llvm/test/CodeGen/RISCV/rvv/strided-load-store.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/strided-load-store.ll
@@ -16,12 +16,14 @@ define <vscale x 1 x i64> @gather(ptr %a, i32 %len) {
; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[ACCUM:%.*]] = phi <vscale x 1 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_FOO:%.*]], ptr [[A:%.*]], i64 [[VEC_IND_SCALAR]], i32 3
-; CHECK-NEXT: [[GATHER:%.*]] = call <vscale x 1 x i64> @llvm.riscv.masked.strided.load.nxv1i64.p0.i64(<vscale x 1 x i64> undef, ptr [[TMP1]], i64 16, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer))
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[TMP1]], i64 16, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), i32 [[TMP2]])
+; CHECK-NEXT: [[GATHER:%.*]] = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i64> undef, i32 [[TMP2]])
; CHECK-NEXT: [[ACCUM_NEXT]] = add <vscale x 1 x i64> [[ACCUM]], [[GATHER]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP0]]
; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add i64 [[VEC_IND_SCALAR]], [[TMP0]]
-; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i64 [[INDEX_NEXT]], [[WIDE_TRIP_COUNT]]
-; CHECK-NEXT: br i1 [[TMP2]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]]
+; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i64 [[INDEX_NEXT]], [[WIDE_TRIP_COUNT]]
+; CHECK-NEXT: br i1 [[TMP4]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]]
; CHECK: for.cond.cleanup:
; CHECK-NEXT: ret <vscale x 1 x i64> [[ACCUM_NEXT]]
;
@@ -59,7 +61,9 @@ define <vscale x 1 x i64> @gather_disjoint_or(ptr %a, i64 %len) {
; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 1, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[ACCUM:%.*]] = phi <vscale x 1 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i64, ptr [[A:%.*]], i64 [[VEC_IND_SCALAR]]
-; CHECK-NEXT: [[GATHER:%.*]] = call <vscale x 1 x i64> @llvm.riscv.masked.strided.load.nxv1i64.p0.i64(<vscale x 1 x i64> poison, ptr [[TMP0]], i64 16, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer))
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[TMP0]], i64 16, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), i32 [[TMP1]])
+; CHECK-NEXT: [[GATHER:%.*]] = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> poison, i32 [[TMP1]])
; CHECK-NEXT: [[ACCUM_NEXT]] = add <vscale x 1 x i64> [[ACCUM]], [[GATHER]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[VSCALE]]
; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add i64 [[VEC_IND_SCALAR]], 2
@@ -111,11 +115,12 @@ define void @scatter(ptr %a, i32 %len) {
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_FOO:%.*]], ptr [[A:%.*]], i64 [[VEC_IND_SCALAR]], i32 3
-; CHECK-NEXT: call void @llvm.riscv.masked.strided.store.nxv1i64.p0.i64(<vscale x 1 x i64> zeroinitializer, ptr [[TMP1]], i64 16, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer))
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.nxv1i64.p0.i64(<vscale x 1 x i64> zeroinitializer, ptr [[TMP1]], i64 16, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), i32 [[TMP2]])
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP0]]
; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add i64 [[VEC_IND_SCALAR]], [[TMP0]]
-; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i64 [[INDEX_NEXT]], [[WIDE_TRIP_COUNT]]
-; CHECK-NEXT: br i1 [[TMP2]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]]
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[INDEX_NEXT]], [[WIDE_TRIP_COUNT]]
+; CHECK-NEXT: br i1 [[TMP3]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]]
; CHECK: for.cond.cleanup:
; CHECK-NEXT: ret void
;
@@ -144,7 +149,9 @@ for.cond.cleanup: ; preds = %vector.body
define <vscale x 1 x i64> @gather_loopless(ptr %p, i64 %stride) {
; CHECK-LABEL: @gather_loopless(
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[STRIDE:%.*]], 4
-; CHECK-NEXT: [[X:%.*]] = call <vscale x 1 x i64> @llvm.riscv.masked.strided.load.nxv1i64.p0.i64(<vscale x 1 x i64> poison, ptr [[P:%.*]], i64 [[TMP1]], <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer))
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[P:%.*]], i64 [[TMP1]], <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), i32 [[TMP2]])
+; CHECK-NEXT: [[X:%.*]] = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i64> poison, i32 [[TMP2]])
; CHECK-NEXT: ret <vscale x 1 x i64> [[X]]
;
%step = call <vscale x 1 x i64> @llvm.experimental.stepvector.nxv1i64()
@@ -164,7 +171,9 @@ define <vscale x 1 x i64> @gather_loopless(ptr %p, i64 %stride) {
define <vscale x 1 x i64> @straightline_offset_add(ptr %p, i64 %offset) {
; CHECK-LABEL: @straightline_offset_add(
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[OFFSET:%.*]]
-; CHECK-NEXT: [[X:%.*]] = call <vscale x 1 x i64> @llvm.riscv.masked.strided.load.nxv1i64.p0.i64(<vscale x 1 x i64> poison, ptr [[TMP1]], i64 4, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer))
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[TMP1]], i64 4, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), i32 [[TMP2]])
+; CHECK-NEXT: [[X:%.*]] = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i64> poison, i32 [[TMP2]])
; CHECK-NEXT: ret <vscale x 1 x i64> [[X]]
;
%step = call <vscale x 1 x i64> @llvm.experimental.stepvector.nxv1i64()
@@ -184,7 +193,9 @@ define <vscale x 1 x i64> @straightline_offset_add(ptr %p, i64 %offset) {
define <vscale x 1 x i64> @straightline_offset_disjoint_or(ptr %p, i64 %offset) {
; CHECK-LABEL: @straightline_offset_disjoint_or(
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 1
-; CHECK-NEXT: [[X:%.*]] = call <vscale x 1 x i64> @llvm.riscv.masked.strided.load.nxv1i64.p0.i64(<vscale x 1 x i64> poison, ptr [[TMP1]], i64 8, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer))
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[TMP1]], i64 8, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), i32 [[TMP2]])
+; CHECK-NEXT: [[X:%.*]] = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i64> poison, i32 [[TMP2]])
; CHECK-NEXT: ret <vscale x 1 x i64> [[X]]
;
%step = call <vscale x 1 x i64> @llvm.experimental.stepvector.nxv1i64()
@@ -202,7 +213,9 @@ define <vscale x 1 x i64> @straightline_offset_disjoint_or(ptr %p, i64 %offset)
define <vscale x 1 x i64> @straightline_offset_shl(ptr %p) {
; CHECK-LABEL: @straightline_offset_shl(
-; CHECK-NEXT: [[X:%.*]] = call <vscale x 1 x i64> @llvm.riscv.masked.strided.load.nxv1i64.p0.i64(<vscale x 1 x i64> poison, ptr [[P:%.*]], i64 32, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer))
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[P:%.*]], i64 32, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), i32 [[TMP1]])
+; CHECK-NEXT: [[X:%.*]] = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> poison, i32 [[TMP1]])
; CHECK-NEXT: ret <vscale x 1 x i64> [[X]]
;
%step = call <vscale x 1 x i64> @llvm.experimental.stepvector.nxv1i64()
@@ -245,7 +258,9 @@ define <vscale x 1 x i64> @straightline_offset_shl_nonc(ptr %p, i64 %shift) {
; CHECK-LABEL: @straightline_offset_shl_nonc(
; CHECK-NEXT: [[TMP1:%.*]] = shl i64 1, [[SHIFT:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4
-; CHECK-NEXT: [[X:%.*]] = call <vscale x 1 x i64> @llvm.riscv.masked.strided.load.nxv1i64.p0.i64(<vscale x 1 x i64> poison, ptr [[P:%.*]], i64 [[TMP2]], <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer))
+; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[P:%.*]], i64 [[TMP2]], <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), i32 [[TMP3]])
+; CHECK-NEXT: [[X:%.*]] = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i64> [[TMP4]], <vscale x 1 x i64> poison, i32 [[TMP3]])
; CHECK-NEXT: ret <vscale x 1 x i64> [[X]]
;
%step = call <vscale x 1 x i64> @llvm.experimental.stepvector.nxv1i64()
@@ -265,7 +280,8 @@ define <vscale x 1 x i64> @straightline_offset_shl_nonc(ptr %p, i64 %shift) {
define void @scatter_loopless(<vscale x 1 x i64> %x, ptr %p, i64 %stride) {
; CHECK-LABEL: @scatter_loopless(
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[STRIDE:%.*]], 4
-; CHECK-NEXT: call void @llvm.riscv.masked.strided.store.nxv1i64.p0.i64(<vscale x 1 x i64> [[X:%.*]], ptr [[P:%.*]], i64 [[TMP1]], <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer))
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.nxv1i64.p0.i64(<vscale x 1 x i64> [[X:%.*]], ptr [[P:%.*]], i64 [[TMP1]], <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), i32 [[TMP2]])
; CHECK-NEXT: ret void
;
%step = call <vscale x 1 x i64> @llvm.experimental.stepvector.nxv1i64()
@@ -301,8 +317,10 @@ define void @constant_stride(<vscale x 1 x i64> %x, ptr %p, i64 %stride) {
define <vscale x 1 x i64> @vector_base_scalar_offset(ptr %p, i64 %offset) {
; CHECK-LABEL: @vector_base_scalar_offset(
-; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[OFFSET:%.*]]
-; CHECK-NEXT: [[X:%.*]] = call <vscale x 1 x i64> @llvm.riscv.masked.strided.load.nxv1i64.p0.i64(<vscale x 1 x i64> poison, ptr [[TMP1]], i64 8, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer))
+; CHECK-NEXT: [[PTRS2OFFSET:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[OFFSET:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[PTRS2OFFSET]], i64 8, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), i32 [[TMP1]])
+; CHECK-NEXT: [[X:%.*]] = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> poison, i32 [[TMP1]])
; CHECK-NEXT: ret <vscale x 1 x i64> [[X]]
;
%step = call <vscale x 1 x i64> @llvm.experimental.stepvector.nxv1i64()
@@ -319,8 +337,10 @@ define <vscale x 1 x i64> @vector_base_scalar_offset(ptr %p, i64 %offset) {
define <vscale x 1 x i64> @splat_base_scalar_offset(ptr %p, i64 %offset) {
; CHECK-LABEL: @splat_base_scalar_offset(
-; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[OFFSET:%.*]]
-; CHECK-NEXT: [[X:%.*]] = call <vscale x 1 x i64> @llvm.riscv.masked.strided.load.nxv1i64.p0.i64(<vscale x 1 x i64> poison, ptr [[TMP1]], i64 0, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer))
+; CHECK-NEXT: [[PTRSOFFSET:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[OFFSET:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[PTRSOFFSET]], i64 0, <vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), i32 [[TMP1]])
+; CHECK-NEXT: [[X:%.*]] = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> shufflevector (<vscale x 1 x i1> insertelement (<vscale x 1 x i1> poison, i1 true, i64 0), <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> poison, i32 [[TMP1]])
; CHECK-NEXT: ret <vscale x 1 x i64> [[X]]
;
%head = insertelement <vscale x 1 x ptr> poison, ptr %p, i32 0
More information about the llvm-commits
mailing list