[llvm] [VP][RISCV] Add llvm.experimental.vp.reverse. (PR #70405)
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 27 14:41:15 PDT 2023
https://github.com/topperc updated https://github.com/llvm/llvm-project/pull/70405
>From 87a782fa5cdaaad92a1952cf26aa61f3724c3641 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Thu, 26 Oct 2023 18:00:25 -0700
Subject: [PATCH 1/3] [VP][RISCV] Add llvm.experimental.vp.reverse.
This is similar to vector.reverse, but only reverses the first
EVL elements.
I extracted this code from our downstream. Some of it may have
come from https://repo.hca.bsc.es/gitlab/rferrer/llvm-epi/ originally.
---
llvm/docs/LangRef.rst | 36 ++
llvm/include/llvm/IR/Intrinsics.td | 7 +
llvm/include/llvm/IR/VPIntrinsics.def | 6 +
llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 1 +
.../SelectionDAG/LegalizeVectorTypes.cpp | 54 ++
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 152 ++++-
llvm/lib/Target/RISCV/RISCVISelLowering.h | 1 +
.../rvv/vp-reverse-float-fixed-vectors.ll | 68 ++
.../CodeGen/RISCV/rvv/vp-reverse-float.ll | 266 ++++++++
.../RISCV/rvv/vp-reverse-int-fixed-vectors.ll | 134 ++++
llvm/test/CodeGen/RISCV/rvv/vp-reverse-int.ll | 595 ++++++++++++++++++
.../rvv/vp-reverse-mask-fixed-vectors.ll | 155 +++++
.../test/CodeGen/RISCV/rvv/vp-reverse-mask.ll | 292 +++++++++
llvm/unittests/IR/VPIntrinsicTest.cpp | 1 +
14 files changed, 1765 insertions(+), 3 deletions(-)
create mode 100644 llvm/test/CodeGen/RISCV/rvv/vp-reverse-float-fixed-vectors.ll
create mode 100644 llvm/test/CodeGen/RISCV/rvv/vp-reverse-float.ll
create mode 100644 llvm/test/CodeGen/RISCV/rvv/vp-reverse-int-fixed-vectors.ll
create mode 100644 llvm/test/CodeGen/RISCV/rvv/vp-reverse-int.ll
create mode 100644 llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask-fixed-vectors.ll
create mode 100644 llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask.ll
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index c97a7ae372bc6eb..6d90c1bf7e4aa48 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -21547,6 +21547,42 @@ Examples:
llvm.experimental.vp.splice(<A,B,C,D>, <E,F,G,H>, -2, 3, 2); ==> <B, C, poison, poison> trailing elements
+.. _int_experimental_vp_reverse:
+
+
+'``llvm.experimental.vp.reverse``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+This is an overloaded intrinsic.
+
+::
+
+ declare <2 x double> @llvm.experimental.vp.reverse.v2f64(<2 x double> %vec, <2 x i1> %mask, i32 %evl)
+ declare <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> %vec, <vscale x 4 x i1> %mask, i32 %evl)
+
+Overview:
+"""""""""
+
+The '``llvm.experimental.vp.reverse.*``' intrinsic is the vector length
+predicated version of the '``llvm.experimental.vector.reverse.*``' intrinsic.
+
+Arguments:
+""""""""""
+
+The result and the first argument ``vec`` are vectors with the same type.
+The second argument ``mask`` is a vector mask and has the same number of
+elements as the result. The third argument is the explicit vector length of
+the operation.
+
+Semantics:
+""""""""""
+
+This intrinsic reverses the order of the first ``evl`` elements in a vector.
+The lanes in the result vector disabled by ``mask`` are ``poison``. The
+elements past ``evl`` are poison.
+
.. _int_vp_load:
'``llvm.vp.load``' Intrinsic
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 80c3d8d403d9115..8ad08ce8a3082a0 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -2176,6 +2176,13 @@ def int_experimental_vp_splice:
llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
+def int_experimental_vp_reverse:
+ DefaultAttrsIntrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty],
+ [IntrNoMem]>;
+
def int_vp_is_fpclass:
DefaultAttrsIntrinsic<[ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
[ llvm_anyvector_ty,
diff --git a/llvm/include/llvm/IR/VPIntrinsics.def b/llvm/include/llvm/IR/VPIntrinsics.def
index 55a68ff5768ddb0..0b6e266f7dbc509 100644
--- a/llvm/include/llvm/IR/VPIntrinsics.def
+++ b/llvm/include/llvm/IR/VPIntrinsics.def
@@ -702,6 +702,12 @@ BEGIN_REGISTER_VP(experimental_vp_splice, 3, 5, EXPERIMENTAL_VP_SPLICE, -1)
VP_PROPERTY_FUNCTIONAL_INTRINSIC(experimental_vector_splice)
END_REGISTER_VP(experimental_vp_splice, EXPERIMENTAL_VP_SPLICE)
+// llvm.experimental.vp.reverse(x,mask,vlen)
+BEGIN_REGISTER_VP(experimental_vp_reverse, 1, 2,
+ EXPERIMENTAL_VP_REVERSE, -1)
+VP_PROPERTY_FUNCTIONAL_INTRINSIC(experimental_vector_reverse)
+END_REGISTER_VP(experimental_vp_reverse, EXPERIMENTAL_VP_REVERSE)
+
///// } Shuffles
#undef BEGIN_REGISTER_VP
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index ee4278ceb729b61..d8b2792cc05f931 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -888,6 +888,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
void SplitVecRes_VECTOR_INTERLEAVE(SDNode *N);
void SplitVecRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_FP_TO_XINT_SAT(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_VP_REVERSE(SDNode *N, SDValue &Lo, SDValue &Hi);
// Vector Operand Splitting: <128 x ty> -> 2 x <64 x ty>.
bool SplitVectorOperand(SDNode *N, unsigned OpNo);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 88e61ff3a3c6f89..ba81c540536186c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -1209,6 +1209,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::UDIVFIXSAT:
SplitVecRes_FIX(N, Lo, Hi);
break;
+ case ISD::EXPERIMENTAL_VP_REVERSE:
+ SplitVecRes_VP_REVERSE(N, Lo, Hi);
+ break;
}
// If Lo/Hi is null, the sub-method took care of registering results etc.
@@ -2857,6 +2860,57 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SPLICE(SDNode *N, SDValue &Lo,
DAG.getVectorIdxConstant(LoVT.getVectorMinNumElements(), DL));
}
+void DAGTypeLegalizer::SplitVecRes_VP_REVERSE(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ EVT VT = N->getValueType(0);
+ SDValue Val = N->getOperand(0);
+ SDValue Mask = N->getOperand(1);
+ SDValue EVL = N->getOperand(2);
+ SDLoc DL(N);
+
+ // Fallback to VP_STRIDED_STORE to stack followed by VP_LOAD.
+ Align Alignment = DAG.getReducedAlign(VT, /*UseABI=*/false);
+
+ EVT MemVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
+ VT.getVectorElementCount());
+ SDValue StackPtr = DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment);
+ EVT PtrVT = StackPtr.getValueType();
+ auto &MF = DAG.getMachineFunction();
+ auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+ auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
+
+ MachineMemOperand *StoreMMO = DAG.getMachineFunction().getMachineMemOperand(
+ PtrInfo, MachineMemOperand::MOStore, MemoryLocation::UnknownSize,
+ Alignment);
+ MachineMemOperand *LoadMMO = DAG.getMachineFunction().getMachineMemOperand(
+ PtrInfo, MachineMemOperand::MOLoad, MemoryLocation::UnknownSize,
+ Alignment);
+
+ unsigned EltWidth = VT.getScalarSizeInBits() / 8;
+ SDValue NumElemMinus1 =
+ DAG.getNode(ISD::SUB, DL, PtrVT, DAG.getZExtOrTrunc(EVL, DL, PtrVT),
+ DAG.getConstant(1, DL, PtrVT));
+ SDValue StartOffset = DAG.getNode(ISD::MUL, DL, PtrVT, NumElemMinus1,
+ DAG.getConstant(EltWidth, DL, PtrVT));
+ SDValue StorePtr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, StartOffset);
+ SDValue Stride = DAG.getConstant(-(int64_t)EltWidth, DL, PtrVT);
+
+ SDValue TrueMask = DAG.getBoolConstant(true, DL, Mask.getValueType(), VT);
+ SDValue Store = DAG.getStridedStoreVP(DAG.getEntryNode(), DL, Val, StorePtr,
+ DAG.getUNDEF(PtrVT), Stride, TrueMask,
+ EVL, MemVT, StoreMMO, ISD::UNINDEXED);
+
+ SDValue Load = DAG.getLoadVP(VT, DL, Store, StackPtr, Mask, EVL, LoadMMO);
+
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
+ Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoVT, Load,
+ DAG.getVectorIdxConstant(0, DL));
+ Hi =
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HiVT, Load,
+ DAG.getVectorIdxConstant(LoVT.getVectorMinNumElements(), DL));
+}
+
void DAGTypeLegalizer::SplitVecRes_VECTOR_DEINTERLEAVE(SDNode *N) {
SDValue Op0Lo, Op0Hi, Op1Lo, Op1Hi;
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index beb371063f89b2d..dabff24eef7023e 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -597,7 +597,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND,
ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN,
ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX,
- ISD::VP_ABS};
+ ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE};
static const unsigned FloatingPointVPOps[] = {
ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
@@ -609,7 +609,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM,
ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,
ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO,
- ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS};
+ ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS,
+ ISD::EXPERIMENTAL_VP_REVERSE};
static const unsigned IntegerVecReduceOps[] = {
ISD::VECREDUCE_ADD, ISD::VECREDUCE_AND, ISD::VECREDUCE_OR,
@@ -694,6 +695,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VECTOR_REVERSE, VT, Custom);
+ setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
+
setOperationPromotedToType(
ISD::VECTOR_SPLICE, VT,
MVT::getVectorVT(MVT::i8, VT.getVectorElementCount()));
@@ -1064,6 +1067,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
ISD::VP_SETCC, ISD::VP_TRUNCATE},
VT, Custom);
+
+ setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
continue;
}
@@ -1315,7 +1320,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setTargetDAGCombine({ISD::FCOPYSIGN, ISD::MGATHER, ISD::MSCATTER,
ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA, ISD::SRL,
ISD::SHL, ISD::STORE, ISD::SPLAT_VECTOR,
- ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS});
+ ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS,
+ ISD::EXPERIMENTAL_VP_REVERSE});
if (Subtarget.hasVendorXTHeadMemPair())
setTargetDAGCombine({ISD::LOAD, ISD::STORE});
if (Subtarget.useRVVForFixedLengthVectors())
@@ -6406,6 +6412,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
!Subtarget.hasVInstructionsF16()))
return SplitVPOp(Op, DAG);
return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
+ case ISD::EXPERIMENTAL_VP_REVERSE:
+ return lowerVPReverseExperimental(Op, DAG);
}
}
@@ -10223,6 +10231,144 @@ SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op,
return convertFromScalableVector(VT, Result, DAG, Subtarget);
}
+SDValue
+RISCVTargetLowering::lowerVPReverseExperimental(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ MVT VT = Op.getSimpleValueType();
+ MVT XLenVT = Subtarget.getXLenVT();
+
+ SDValue Op1 = Op.getOperand(0);
+ SDValue Mask = Op.getOperand(1);
+ SDValue EVL = Op.getOperand(2);
+
+ MVT ContainerVT = VT;
+ if (VT.isFixedLengthVector()) {
+ ContainerVT = getContainerForFixedLengthVector(VT);
+ Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
+ MVT MaskVT = getMaskTypeFor(ContainerVT);
+ Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
+ }
+
+ MVT GatherVT = ContainerVT;
+ MVT IndicesVT = ContainerVT.changeVectorElementTypeToInteger();
+ // Check if we are working with mask vectors
+ bool IsMaskVector = ContainerVT.getVectorElementType() == MVT::i1;
+ if (IsMaskVector) {
+ switch(ContainerVT.getVectorElementCount().getKnownMinValue()) {
+ default: llvm_unreachable("Invalid factor size");
+ case 1: IndicesVT = MVT::i64; break;
+ case 2: IndicesVT = MVT::i32; break;
+ case 4: IndicesVT = MVT::i16; break;
+ case 8:
+ case 16:
+ case 32:
+ case 64: IndicesVT = MVT::i8; break;
+ }
+ GatherVT = IndicesVT = ContainerVT.changeVectorElementType(IndicesVT);
+
+ // Expand input operand
+ SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
+ DAG.getUNDEF(IndicesVT),
+ DAG.getConstant(1, DL, XLenVT), EVL);
+ SDValue VMV0 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
+ DAG.getUNDEF(IndicesVT),
+ DAG.getConstant(0, DL, XLenVT), EVL);
+ Op1 = DAG.getNode(RISCVISD::VSELECT_VL, DL, IndicesVT, Op1, SplatOne, VMV0,
+ EVL);
+ }
+
+ unsigned EltSize = GatherVT.getScalarSizeInBits();
+ unsigned MinSize = GatherVT.getSizeInBits().getKnownMinValue();
+ unsigned MaxVLMAX = 0;
+ unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
+ if (VectorBitsMax != 0)
+ MaxVLMAX =
+ RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
+
+ unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
+ // If this is SEW=8 and VLMAX is unknown or more than 256, we need
+ // to use vrgatherei16.vv.
+ // TODO: It's also possible to use vrgatherei16.vv for other types to
+ // decrease register width for the index calculation.
+ // NOTE: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
+ if ((MaxVLMAX == 0 || MaxVLMAX > 256) && EltSize == 8) {
+ // If this is LMUL=8, we have to split before using vrgatherei16.vv.
+ // Split the vector in half and reverse each half using a full register
+ // reverse.
+ // Swap the halves and concatenate them.
+ // Slide the concatenated result by (VLMax - VL).
+ if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(GatherVT);
+ SDValue Lo, Hi;
+ std::tie(Lo, Hi) = DAG.SplitVector(Op1, DL);
+
+ SDValue LoRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
+ SDValue HiRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
+
+ // Reassemble the low and high pieces reversed.
+ // NOTE: this Result is unmasked (because we do not need masks for
+ // shuffles). If in the future this has to change, we can use a SELECT_VL
+ // between Result and UNDEF using the mask originally passed to VP_REVERSE
+ SDValue Result =
+ DAG.getNode(ISD::CONCAT_VECTORS, DL, GatherVT, HiRev, LoRev);
+
+ // Slide off any elements from past EVL that were reversed into the low
+ // elements.
+ unsigned MinElts = GatherVT.getVectorMinNumElements();
+ SDValue VLMax = DAG.getNode(ISD::VSCALE, DL, XLenVT,
+ DAG.getConstant(MinElts, DL, XLenVT));
+ SDValue Diff = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, EVL);
+
+ SDValue TrueMask = getAllOnesMask(ContainerVT, EVL, DL, DAG);
+ Result =
+ getVSlidedown(DAG, Subtarget, DL, GatherVT, DAG.getUNDEF(GatherVT),
+ Result, Diff, TrueMask, EVL);
+
+ if (IsMaskVector) {
+ // Truncate Result back to a mask vector
+ Result =
+ DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
+ {Result, DAG.getConstant(0, DL, GatherVT),
+ DAG.getCondCode(ISD::SETNE),
+ DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
+ }
+
+ if (!VT.isFixedLengthVector())
+ return Result;
+ return convertFromScalableVector(VT, Result, DAG, Subtarget);
+ }
+
+ // Just promote the int type to i16 which will double the LMUL.
+ IndicesVT = MVT::getVectorVT(MVT::i16, IndicesVT.getVectorElementCount());
+ GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
+ }
+
+ SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IndicesVT, Mask, EVL);
+ SDValue VecLen =
+ DAG.getNode(ISD::SUB, DL, XLenVT, EVL, DAG.getConstant(1, DL, XLenVT));
+ SDValue VecLenSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
+ DAG.getUNDEF(IndicesVT), VecLen, EVL);
+ SDValue VRSUB =
+ DAG.getNode(RISCVISD::SUB_VL, DL, IndicesVT, VecLenSplat, VID,
+ DAG.getUNDEF(IndicesVT), Mask, EVL);
+ SDValue Result = DAG.getNode(GatherOpc, DL, GatherVT, Op1, VRSUB,
+ DAG.getUNDEF(GatherVT), Mask, EVL);
+
+ if (IsMaskVector) {
+ // Truncate Result back to a mask vector
+ Result = DAG.getNode(
+ RISCVISD::SETCC_VL, DL, ContainerVT,
+ {Result, DAG.getConstant(0, DL, GatherVT), DAG.getCondCode(ISD::SETNE),
+ DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
+ }
+
+ if (!VT.isFixedLengthVector())
+ return Result;
+ return convertFromScalableVector(VT, Result, DAG, Subtarget);
+}
+
SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op,
SelectionDAG &DAG) const {
MVT VT = Op.getSimpleValueType();
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 5ca6376f858c44d..8c10c9839e16d47 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -895,6 +895,7 @@ class RISCVTargetLowering : public TargetLowering {
SDValue lowerLogicVPOp(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVPExtMaskOp(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVPSetCCMaskOp(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerVPReverseExperimental(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVPFPIntConvOp(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVPStridedLoad(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVPStridedStore(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-float-fixed-vectors.ll b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-float-fixed-vectors.ll
new file mode 100644
index 000000000000000..cc13a97ddce0e83
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-float-fixed-vectors.ll
@@ -0,0 +1,68 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -mattr=+m,+f,+d,+v -verify-machineinstrs -riscv-v-vector-bits-min=128 \
+; RUN: < %s | FileCheck %s
+
+define <2 x double> @test_vp_reverse_v2f64_masked(<2 x double> %src, <2 x i1> %mask, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_v2f64_masked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vid.v v9, v0.t
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vrsub.vx v10, v9, a0, v0.t
+; CHECK-NEXT: vrgather.vv v9, v8, v10, v0.t
+; CHECK-NEXT: vmv.v.v v8, v9
+; CHECK-NEXT: ret
+ %dst = call <2 x double> @llvm.experimental.vp.reverse.v2f64(<2 x double> %src, <2 x i1> %mask, i32 %evl)
+ ret <2 x double> %dst
+}
+
+define <2 x double> @test_vp_reverse_v2f64(<2 x double> %src, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_v2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, a0, -1
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vid.v v9
+; CHECK-NEXT: vrsub.vx v10, v9, a1
+; CHECK-NEXT: vrgather.vv v9, v8, v10
+; CHECK-NEXT: vmv.v.v v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <2 x i1> undef, i1 1, i32 0
+ %allones = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer
+
+ %dst = call <2 x double> @llvm.experimental.vp.reverse.v2f64(<2 x double> %src, <2 x i1> %allones, i32 %evl)
+ ret <2 x double> %dst
+}
+
+define <4 x float> @test_vp_reverse_v4f32_masked(<4 x float> %src, <4 x i1> %mask, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_v4f32_masked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vid.v v9, v0.t
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vrsub.vx v10, v9, a0, v0.t
+; CHECK-NEXT: vrgather.vv v9, v8, v10, v0.t
+; CHECK-NEXT: vmv.v.v v8, v9
+; CHECK-NEXT: ret
+ %dst = call <4 x float> @llvm.experimental.vp.reverse.v4f32(<4 x float> %src, <4 x i1> %mask, i32 %evl)
+ ret <4 x float> %dst
+}
+
+define <4 x float> @test_vp_reverse_v4f32(<4 x float> %src, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_v4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, a0, -1
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vid.v v9
+; CHECK-NEXT: vrsub.vx v10, v9, a1
+; CHECK-NEXT: vrgather.vv v9, v8, v10
+; CHECK-NEXT: vmv.v.v v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <4 x i1> undef, i1 1, i32 0
+ %allones = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer
+
+ %dst = call <4 x float> @llvm.experimental.vp.reverse.v4f32(<4 x float> %src, <4 x i1> %allones, i32 %evl)
+ ret <4 x float> %dst
+}
+
+declare <2 x double> @llvm.experimental.vp.reverse.v2f64(<2 x double>,<2 x i1>,i32)
+declare <4 x float> @llvm.experimental.vp.reverse.v4f32(<4 x float>,<4 x i1>,i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-float.ll b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-float.ll
new file mode 100644
index 000000000000000..adc1ca6c8586838
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-float.ll
@@ -0,0 +1,266 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -mattr=+m,+f,+d,+v -verify-machineinstrs < %s | FileCheck %s
+
+define <vscale x 1 x double> @test_vp_reverse_nxv1f64_masked(<vscale x 1 x double> %src, <vscale x 1 x i1> %mask, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv1f64_masked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vid.v v9, v0.t
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vrsub.vx v10, v9, a0, v0.t
+; CHECK-NEXT: vrgather.vv v9, v8, v10, v0.t
+; CHECK-NEXT: vmv.v.v v8, v9
+; CHECK-NEXT: ret
+ %dst = call <vscale x 1 x double> @llvm.experimental.vp.reverse.nxv1f64(<vscale x 1 x double> %src, <vscale x 1 x i1> %mask, i32 %evl)
+ ret <vscale x 1 x double> %dst
+}
+
+define <vscale x 1 x double> @test_vp_reverse_nxv1f64(<vscale x 1 x double> %src, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv1f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, a0, -1
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vid.v v9
+; CHECK-NEXT: vrsub.vx v10, v9, a1
+; CHECK-NEXT: vrgather.vv v9, v8, v10
+; CHECK-NEXT: vmv.v.v v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 1 x i1> undef, i1 1, i32 0
+ %allones = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> undef, <vscale x 1 x i32> zeroinitializer
+
+ %dst = call <vscale x 1 x double> @llvm.experimental.vp.reverse.nxv1f64(<vscale x 1 x double> %src, <vscale x 1 x i1> %allones, i32 %evl)
+ ret <vscale x 1 x double> %dst
+}
+
+define <vscale x 2 x float> @test_vp_reverse_nxv2f32_masked(<vscale x 2 x float> %src, <vscale x 2 x i1> %mask, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv2f32_masked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vid.v v9, v0.t
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vrsub.vx v10, v9, a0, v0.t
+; CHECK-NEXT: vrgather.vv v9, v8, v10, v0.t
+; CHECK-NEXT: vmv.v.v v8, v9
+; CHECK-NEXT: ret
+ %dst = call <vscale x 2 x float> @llvm.experimental.vp.reverse.nxv2f32(<vscale x 2 x float> %src, <vscale x 2 x i1> %mask, i32 %evl)
+ ret <vscale x 2 x float> %dst
+}
+
+define <vscale x 2 x float> @test_vp_reverse_nxv2f32(<vscale x 2 x float> %src, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv2f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, a0, -1
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vid.v v9
+; CHECK-NEXT: vrsub.vx v10, v9, a1
+; CHECK-NEXT: vrgather.vv v9, v8, v10
+; CHECK-NEXT: vmv.v.v v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 2 x i1> undef, i1 1, i32 0
+ %allones = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
+
+ %dst = call <vscale x 2 x float> @llvm.experimental.vp.reverse.nxv2f32(<vscale x 2 x float> %src, <vscale x 2 x i1> %allones, i32 %evl)
+ ret <vscale x 2 x float> %dst
+}
+
+define <vscale x 2 x double> @test_vp_reverse_nxv2f64_masked(<vscale x 2 x double> %src, <vscale x 2 x i1> %mask, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv2f64_masked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT: vid.v v10, v0.t
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vrsub.vx v12, v10, a0, v0.t
+; CHECK-NEXT: vrgather.vv v10, v8, v12, v0.t
+; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: ret
+ %dst = call <vscale x 2 x double> @llvm.experimental.vp.reverse.nxv2f64(<vscale x 2 x double> %src, <vscale x 2 x i1> %mask, i32 %evl)
+ ret <vscale x 2 x double> %dst
+}
+
+define <vscale x 2 x double> @test_vp_reverse_nxv2f64(<vscale x 2 x double> %src, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, a0, -1
+; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT: vid.v v10
+; CHECK-NEXT: vrsub.vx v12, v10, a1
+; CHECK-NEXT: vrgather.vv v10, v8, v12
+; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 2 x i1> undef, i1 1, i32 0
+ %allones = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
+
+ %dst = call <vscale x 2 x double> @llvm.experimental.vp.reverse.nxv2f64(<vscale x 2 x double> %src, <vscale x 2 x i1> %allones, i32 %evl)
+ ret <vscale x 2 x double> %dst
+}
+
+define <vscale x 4 x float> @test_vp_reverse_nxv4f32_masked(<vscale x 4 x float> %src, <vscale x 4 x i1> %mask, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv4f32_masked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vid.v v10, v0.t
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vrsub.vx v12, v10, a0, v0.t
+; CHECK-NEXT: vrgather.vv v10, v8, v12, v0.t
+; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: ret
+ %dst = call <vscale x 4 x float> @llvm.experimental.vp.reverse.nxv4f32(<vscale x 4 x float> %src, <vscale x 4 x i1> %mask, i32 %evl)
+ ret <vscale x 4 x float> %dst
+}
+
+define <vscale x 4 x float> @test_vp_reverse_nxv4f32(<vscale x 4 x float> %src, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, a0, -1
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vid.v v10
+; CHECK-NEXT: vrsub.vx v12, v10, a1
+; CHECK-NEXT: vrgather.vv v10, v8, v12
+; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 4 x i1> undef, i1 1, i32 0
+ %allones = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
+
+ %dst = call <vscale x 4 x float> @llvm.experimental.vp.reverse.nxv4f32(<vscale x 4 x float> %src, <vscale x 4 x i1> %allones, i32 %evl)
+ ret <vscale x 4 x float> %dst
+}
+
+define <vscale x 4 x double> @test_vp_reverse_nxv4f64_masked(<vscale x 4 x double> %src, <vscale x 4 x i1> %mask, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv4f64_masked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT: vid.v v12, v0.t
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vrsub.vx v16, v12, a0, v0.t
+; CHECK-NEXT: vrgather.vv v12, v8, v16, v0.t
+; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: ret
+ %dst = call <vscale x 4 x double> @llvm.experimental.vp.reverse.nxv4f64(<vscale x 4 x double> %src, <vscale x 4 x i1> %mask, i32 %evl)
+ ret <vscale x 4 x double> %dst
+}
+
+define <vscale x 4 x double> @test_vp_reverse_nxv4f64(<vscale x 4 x double> %src, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv4f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, a0, -1
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT: vid.v v12
+; CHECK-NEXT: vrsub.vx v16, v12, a1
+; CHECK-NEXT: vrgather.vv v12, v8, v16
+; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 4 x i1> undef, i1 1, i32 0
+ %allones = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
+
+ %dst = call <vscale x 4 x double> @llvm.experimental.vp.reverse.nxv4f64(<vscale x 4 x double> %src, <vscale x 4 x i1> %allones, i32 %evl)
+ ret <vscale x 4 x double> %dst
+}
+
+define <vscale x 8 x float> @test_vp_reverse_nxv8f32_masked(<vscale x 8 x float> %src, <vscale x 8 x i1> %mask, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv8f32_masked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vid.v v12, v0.t
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vrsub.vx v16, v12, a0, v0.t
+; CHECK-NEXT: vrgather.vv v12, v8, v16, v0.t
+; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: ret
+ %dst = call <vscale x 8 x float> @llvm.experimental.vp.reverse.nxv8f32(<vscale x 8 x float> %src, <vscale x 8 x i1> %mask, i32 %evl)
+ ret <vscale x 8 x float> %dst
+}
+
+define <vscale x 8 x float> @test_vp_reverse_nxv8f32(<vscale x 8 x float> %src, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv8f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, a0, -1
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vid.v v12
+; CHECK-NEXT: vrsub.vx v16, v12, a1
+; CHECK-NEXT: vrgather.vv v12, v8, v16
+; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x i1> undef, i1 1, i32 0
+ %allones = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
+
+ %dst = call <vscale x 8 x float> @llvm.experimental.vp.reverse.nxv8f32(<vscale x 8 x float> %src, <vscale x 8 x i1> %allones, i32 %evl)
+ ret <vscale x 8 x float> %dst
+}
+
+define <vscale x 8 x double> @test_vp_reverse_nxv8f64_masked(<vscale x 8 x double> %src, <vscale x 8 x i1> %mask, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv8f64_masked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vid.v v16, v0.t
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vrsub.vx v24, v16, a0, v0.t
+; CHECK-NEXT: vrgather.vv v16, v8, v24, v0.t
+; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: ret
+ %dst = call <vscale x 8 x double> @llvm.experimental.vp.reverse.nxv8f64(<vscale x 8 x double> %src, <vscale x 8 x i1> %mask, i32 %evl)
+ ret <vscale x 8 x double> %dst
+}
+
+define <vscale x 8 x double> @test_vp_reverse_nxv8f64(<vscale x 8 x double> %src, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv8f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, a0, -1
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vid.v v16
+; CHECK-NEXT: vrsub.vx v24, v16, a1
+; CHECK-NEXT: vrgather.vv v16, v8, v24
+; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x i1> undef, i1 1, i32 0
+ %allones = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
+
+ %dst = call <vscale x 8 x double> @llvm.experimental.vp.reverse.nxv8f64(<vscale x 8 x double> %src, <vscale x 8 x i1> %allones, i32 %evl)
+ ret <vscale x 8 x double> %dst
+}
+
+define <vscale x 16 x float> @test_vp_reverse_nxv16f32_masked(<vscale x 16 x float> %src, <vscale x 16 x i1> %mask, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv16f32_masked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vid.v v16, v0.t
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vrsub.vx v24, v16, a0, v0.t
+; CHECK-NEXT: vrgather.vv v16, v8, v24, v0.t
+; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: ret
+ %dst = call <vscale x 16 x float> @llvm.experimental.vp.reverse.nxv16f32(<vscale x 16 x float> %src, <vscale x 16 x i1> %mask, i32 %evl)
+ ret <vscale x 16 x float> %dst
+}
+
+define <vscale x 16 x float> @test_vp_reverse_nxv16f32(<vscale x 16 x float> %src, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv16f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, a0, -1
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vid.v v16
+; CHECK-NEXT: vrsub.vx v24, v16, a1
+; CHECK-NEXT: vrgather.vv v16, v8, v24
+; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 16 x i1> undef, i1 1, i32 0
+ %allones = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> undef, <vscale x 16 x i32> zeroinitializer
+
+ %dst = call <vscale x 16 x float> @llvm.experimental.vp.reverse.nxv16f32(<vscale x 16 x float> %src, <vscale x 16 x i1> %allones, i32 %evl)
+ ret <vscale x 16 x float> %dst
+}
+
+; LMUL = 1
+declare <vscale x 1 x double> @llvm.experimental.vp.reverse.nxv1f64(<vscale x 1 x double>,<vscale x 1 x i1>,i32)
+declare <vscale x 2 x float> @llvm.experimental.vp.reverse.nxv2f32(<vscale x 2 x float>,<vscale x 2 x i1>,i32)
+
+; LMUL = 2
+declare <vscale x 2 x double> @llvm.experimental.vp.reverse.nxv2f64(<vscale x 2 x double>,<vscale x 2 x i1>,i32)
+declare <vscale x 4 x float> @llvm.experimental.vp.reverse.nxv4f32(<vscale x 4 x float>,<vscale x 4 x i1>,i32)
+
+; LMUL = 4
+declare <vscale x 4 x double> @llvm.experimental.vp.reverse.nxv4f64(<vscale x 4 x double>,<vscale x 4 x i1>,i32)
+declare <vscale x 8 x float> @llvm.experimental.vp.reverse.nxv8f32(<vscale x 8 x float>,<vscale x 8 x i1>,i32)
+
+; LMUL = 8
+declare <vscale x 8 x double> @llvm.experimental.vp.reverse.nxv8f64(<vscale x 8 x double>,<vscale x 8 x i1>,i32)
+declare <vscale x 16 x float> @llvm.experimental.vp.reverse.nxv16f32(<vscale x 16 x float>,<vscale x 16 x i1>,i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-int-fixed-vectors.ll b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-int-fixed-vectors.ll
new file mode 100644
index 000000000000000..d7fc8838f430d18
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-int-fixed-vectors.ll
@@ -0,0 +1,134 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs -riscv-v-vector-bits-min=128 \
+; RUN: < %s | FileCheck %s
+
+define <2 x i64> @test_vp_reverse_v2i64_masked(<2 x i64> %src, <2 x i1> %mask, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_v2i64_masked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vid.v v9, v0.t
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vrsub.vx v10, v9, a0, v0.t
+; CHECK-NEXT: vrgather.vv v9, v8, v10, v0.t
+; CHECK-NEXT: vmv.v.v v8, v9
+; CHECK-NEXT: ret
+ %dst = call <2 x i64> @llvm.experimental.vp.reverse.v2i64(<2 x i64> %src, <2 x i1> %mask, i32 %evl)
+ ret <2 x i64> %dst
+}
+
+define <2 x i64> @test_vp_reverse_v2i64(<2 x i64> %src, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_v2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, a0, -1
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vid.v v9
+; CHECK-NEXT: vrsub.vx v10, v9, a1
+; CHECK-NEXT: vrgather.vv v9, v8, v10
+; CHECK-NEXT: vmv.v.v v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <2 x i1> undef, i1 1, i32 0
+ %allones = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer
+
+ %dst = call <2 x i64> @llvm.experimental.vp.reverse.v2i64(<2 x i64> %src, <2 x i1> %allones, i32 %evl)
+ ret <2 x i64> %dst
+}
+
+define <4 x i32> @test_vp_reverse_v4i32_masked(<4 x i32> %src, <4 x i1> %mask, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_v4i32_masked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vid.v v9, v0.t
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vrsub.vx v10, v9, a0, v0.t
+; CHECK-NEXT: vrgather.vv v9, v8, v10, v0.t
+; CHECK-NEXT: vmv.v.v v8, v9
+; CHECK-NEXT: ret
+ %dst = call <4 x i32> @llvm.experimental.vp.reverse.v4i32(<4 x i32> %src, <4 x i1> %mask, i32 %evl)
+ ret <4 x i32> %dst
+}
+
+define <4 x i32> @test_vp_reverse_v4i32(<4 x i32> %src, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, a0, -1
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vid.v v9
+; CHECK-NEXT: vrsub.vx v10, v9, a1
+; CHECK-NEXT: vrgather.vv v9, v8, v10
+; CHECK-NEXT: vmv.v.v v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <4 x i1> undef, i1 1, i32 0
+ %allones = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer
+
+ %dst = call <4 x i32> @llvm.experimental.vp.reverse.v4i32(<4 x i32> %src, <4 x i1> %allones, i32 %evl)
+ ret <4 x i32> %dst
+}
+
+define <8 x i16> @test_vp_reverse_v8i16_masked(<8 x i16> %src, <8 x i1> %mask, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_v8i16_masked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vid.v v9, v0.t
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vrsub.vx v10, v9, a0, v0.t
+; CHECK-NEXT: vrgather.vv v9, v8, v10, v0.t
+; CHECK-NEXT: vmv.v.v v8, v9
+; CHECK-NEXT: ret
+ %dst = call <8 x i16> @llvm.experimental.vp.reverse.v8i16(<8 x i16> %src, <8 x i1> %mask, i32 %evl)
+ ret <8 x i16> %dst
+}
+
+define <8 x i16> @test_vp_reverse_v8i16(<8 x i16> %src, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, a0, -1
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vid.v v9
+; CHECK-NEXT: vrsub.vx v10, v9, a1
+; CHECK-NEXT: vrgather.vv v9, v8, v10
+; CHECK-NEXT: vmv.v.v v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <8 x i1> undef, i1 1, i32 0
+ %allones = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer
+
+ %dst = call <8 x i16> @llvm.experimental.vp.reverse.v8i16(<8 x i16> %src, <8 x i1> %allones, i32 %evl)
+ ret <8 x i16> %dst
+}
+
+define <16 x i8> @test_vp_reverse_v16i8_masked(<16 x i8> %src, <16 x i1> %mask, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_v16i8_masked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vid.v v10, v0.t
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
+; CHECK-NEXT: vrgatherei16.vv v9, v8, v10, v0.t
+; CHECK-NEXT: vmv.v.v v8, v9
+; CHECK-NEXT: ret
+ %dst = call <16 x i8> @llvm.experimental.vp.reverse.v16i8(<16 x i8> %src, <16 x i1> %mask, i32 %evl)
+ ret <16 x i8> %dst
+}
+
+define <16 x i8> @test_vp_reverse_v16i8(<16 x i8> %src, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, a0, -1
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vid.v v10
+; CHECK-NEXT: vrsub.vx v10, v10, a1
+; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
+; CHECK-NEXT: vrgatherei16.vv v9, v8, v10
+; CHECK-NEXT: vmv.v.v v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <16 x i1> undef, i1 1, i32 0
+ %allones = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer
+
+ %dst = call <16 x i8> @llvm.experimental.vp.reverse.v16i8(<16 x i8> %src, <16 x i1> %allones, i32 %evl)
+ ret <16 x i8> %dst
+}
+
+declare <2 x i64> @llvm.experimental.vp.reverse.v2i64(<2 x i64>,<2 x i1>,i32)
+declare <4 x i32> @llvm.experimental.vp.reverse.v4i32(<4 x i32>,<4 x i1>,i32)
+declare <8 x i16> @llvm.experimental.vp.reverse.v8i16(<8 x i16>,<8 x i1>,i32)
+declare <16 x i8> @llvm.experimental.vp.reverse.v16i8(<16 x i8>,<16 x i1>,i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-int.ll b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-int.ll
new file mode 100644
index 000000000000000..61b493f401e40cf
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-int.ll
@@ -0,0 +1,595 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s
+
+define <vscale x 1 x i64> @test_vp_reverse_nxv1i64_masked(<vscale x 1 x i64> %src, <vscale x 1 x i1> %mask, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv1i64_masked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vid.v v9, v0.t
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vrsub.vx v10, v9, a0, v0.t
+; CHECK-NEXT: vrgather.vv v9, v8, v10, v0.t
+; CHECK-NEXT: vmv.v.v v8, v9
+; CHECK-NEXT: ret
+ %dst = call <vscale x 1 x i64> @llvm.experimental.vp.reverse.nxv1i64(<vscale x 1 x i64> %src, <vscale x 1 x i1> %mask, i32 %evl)
+ ret <vscale x 1 x i64> %dst
+}
+
+define <vscale x 1 x i64> @test_vp_reverse_nxv1i64(<vscale x 1 x i64> %src, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv1i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, a0, -1
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vid.v v9
+; CHECK-NEXT: vrsub.vx v10, v9, a1
+; CHECK-NEXT: vrgather.vv v9, v8, v10
+; CHECK-NEXT: vmv.v.v v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 1 x i1> undef, i1 1, i32 0
+ %allones = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> undef, <vscale x 1 x i32> zeroinitializer
+
+ %dst = call <vscale x 1 x i64> @llvm.experimental.vp.reverse.nxv1i64(<vscale x 1 x i64> %src, <vscale x 1 x i1> %allones, i32 %evl)
+ ret <vscale x 1 x i64> %dst
+}
+
+define <vscale x 2 x i32> @test_vp_reverse_nxv2i32_masked(<vscale x 2 x i32> %src, <vscale x 2 x i1> %mask, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv2i32_masked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vid.v v9, v0.t
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vrsub.vx v10, v9, a0, v0.t
+; CHECK-NEXT: vrgather.vv v9, v8, v10, v0.t
+; CHECK-NEXT: vmv.v.v v8, v9
+; CHECK-NEXT: ret
+ %dst = call <vscale x 2 x i32> @llvm.experimental.vp.reverse.nxv2i32(<vscale x 2 x i32> %src, <vscale x 2 x i1> %mask, i32 %evl)
+ ret <vscale x 2 x i32> %dst
+}
+
+define <vscale x 2 x i32> @test_vp_reverse_nxv2i32(<vscale x 2 x i32> %src, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, a0, -1
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vid.v v9
+; CHECK-NEXT: vrsub.vx v10, v9, a1
+; CHECK-NEXT: vrgather.vv v9, v8, v10
+; CHECK-NEXT: vmv.v.v v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 2 x i1> undef, i1 1, i32 0
+ %allones = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
+
+ %dst = call <vscale x 2 x i32> @llvm.experimental.vp.reverse.nxv2i32(<vscale x 2 x i32> %src, <vscale x 2 x i1> %allones, i32 %evl)
+ ret <vscale x 2 x i32> %dst
+}
+
+define <vscale x 4 x i16> @test_vp_reverse_nxv4i16_masked(<vscale x 4 x i16> %src, <vscale x 4 x i1> %mask, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv4i16_masked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vid.v v9, v0.t
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vrsub.vx v10, v9, a0, v0.t
+; CHECK-NEXT: vrgather.vv v9, v8, v10, v0.t
+; CHECK-NEXT: vmv.v.v v8, v9
+; CHECK-NEXT: ret
+ %dst = call <vscale x 4 x i16> @llvm.experimental.vp.reverse.nxv4i16(<vscale x 4 x i16> %src, <vscale x 4 x i1> %mask, i32 %evl)
+ ret <vscale x 4 x i16> %dst
+}
+
+define <vscale x 4 x i16> @test_vp_reverse_nxv4i16(<vscale x 4 x i16> %src, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, a0, -1
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vid.v v9
+; CHECK-NEXT: vrsub.vx v10, v9, a1
+; CHECK-NEXT: vrgather.vv v9, v8, v10
+; CHECK-NEXT: vmv.v.v v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 4 x i1> undef, i1 1, i32 0
+ %allones = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
+
+ %dst = call <vscale x 4 x i16> @llvm.experimental.vp.reverse.nxv4i16(<vscale x 4 x i16> %src, <vscale x 4 x i1> %allones, i32 %evl)
+ ret <vscale x 4 x i16> %dst
+}
+
+define <vscale x 8 x i8> @test_vp_reverse_nxv8i8_masked(<vscale x 8 x i8> %src, <vscale x 8 x i1> %mask, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv8i8_masked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vid.v v10, v0.t
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
+; CHECK-NEXT: vrgatherei16.vv v9, v8, v10, v0.t
+; CHECK-NEXT: vmv.v.v v8, v9
+; CHECK-NEXT: ret
+ %dst = call <vscale x 8 x i8> @llvm.experimental.vp.reverse.nxv8i8(<vscale x 8 x i8> %src, <vscale x 8 x i1> %mask, i32 %evl)
+ ret <vscale x 8 x i8> %dst
+}
+
+define <vscale x 8 x i8> @test_vp_reverse_nxv8i8(<vscale x 8 x i8> %src, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, a0, -1
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vid.v v10
+; CHECK-NEXT: vrsub.vx v10, v10, a1
+; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
+; CHECK-NEXT: vrgatherei16.vv v9, v8, v10
+; CHECK-NEXT: vmv.v.v v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x i1> undef, i1 1, i32 0
+ %allones = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
+
+ %dst = call <vscale x 8 x i8> @llvm.experimental.vp.reverse.nxv8i8(<vscale x 8 x i8> %src, <vscale x 8 x i1> %allones, i32 %evl)
+ ret <vscale x 8 x i8> %dst
+}
+
+define <vscale x 2 x i64> @test_vp_reverse_nxv2i64_masked(<vscale x 2 x i64> %src, <vscale x 2 x i1> %mask, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv2i64_masked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT: vid.v v10, v0.t
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vrsub.vx v12, v10, a0, v0.t
+; CHECK-NEXT: vrgather.vv v10, v8, v12, v0.t
+; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: ret
+ %dst = call <vscale x 2 x i64> @llvm.experimental.vp.reverse.nxv2i64(<vscale x 2 x i64> %src, <vscale x 2 x i1> %mask, i32 %evl)
+ ret <vscale x 2 x i64> %dst
+}
+
+define <vscale x 2 x i64> @test_vp_reverse_nxv2i64(<vscale x 2 x i64> %src, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, a0, -1
+; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
+; CHECK-NEXT: vid.v v10
+; CHECK-NEXT: vrsub.vx v12, v10, a1
+; CHECK-NEXT: vrgather.vv v10, v8, v12
+; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 2 x i1> undef, i1 1, i32 0
+ %allones = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
+
+ %dst = call <vscale x 2 x i64> @llvm.experimental.vp.reverse.nxv2i64(<vscale x 2 x i64> %src, <vscale x 2 x i1> %allones, i32 %evl)
+ ret <vscale x 2 x i64> %dst
+}
+
+define <vscale x 4 x i32> @test_vp_reverse_nxv4i32_masked(<vscale x 4 x i32> %src, <vscale x 4 x i1> %mask, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv4i32_masked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vid.v v10, v0.t
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vrsub.vx v12, v10, a0, v0.t
+; CHECK-NEXT: vrgather.vv v10, v8, v12, v0.t
+; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: ret
+ %dst = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> %src, <vscale x 4 x i1> %mask, i32 %evl)
+ ret <vscale x 4 x i32> %dst
+}
+
+define <vscale x 4 x i32> @test_vp_reverse_nxv4i32(<vscale x 4 x i32> %src, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, a0, -1
+; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT: vid.v v10
+; CHECK-NEXT: vrsub.vx v12, v10, a1
+; CHECK-NEXT: vrgather.vv v10, v8, v12
+; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 4 x i1> undef, i1 1, i32 0
+ %allones = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
+
+ %dst = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> %src, <vscale x 4 x i1> %allones, i32 %evl)
+ ret <vscale x 4 x i32> %dst
+}
+
+define <vscale x 8 x i16> @test_vp_reverse_nxv8i16_masked(<vscale x 8 x i16> %src, <vscale x 8 x i1> %mask, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv8i16_masked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vid.v v10, v0.t
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vrsub.vx v12, v10, a0, v0.t
+; CHECK-NEXT: vrgather.vv v10, v8, v12, v0.t
+; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: ret
+ %dst = call <vscale x 8 x i16> @llvm.experimental.vp.reverse.nxv8i16(<vscale x 8 x i16> %src, <vscale x 8 x i1> %mask, i32 %evl)
+ ret <vscale x 8 x i16> %dst
+}
+
+define <vscale x 8 x i16> @test_vp_reverse_nxv8i16(<vscale x 8 x i16> %src, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, a0, -1
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vid.v v10
+; CHECK-NEXT: vrsub.vx v12, v10, a1
+; CHECK-NEXT: vrgather.vv v10, v8, v12
+; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x i1> undef, i1 1, i32 0
+ %allones = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
+
+ %dst = call <vscale x 8 x i16> @llvm.experimental.vp.reverse.nxv8i16(<vscale x 8 x i16> %src, <vscale x 8 x i1> %allones, i32 %evl)
+ ret <vscale x 8 x i16> %dst
+}
+
+define <vscale x 16 x i8> @test_vp_reverse_nxv16i8_masked(<vscale x 16 x i8> %src, <vscale x 16 x i1> %mask, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv16i8_masked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT: vid.v v12, v0.t
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vrsub.vx v12, v12, a0, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma
+; CHECK-NEXT: vrgatherei16.vv v10, v8, v12, v0.t
+; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: ret
+ %dst = call <vscale x 16 x i8> @llvm.experimental.vp.reverse.nxv16i8(<vscale x 16 x i8> %src, <vscale x 16 x i1> %mask, i32 %evl)
+ ret <vscale x 16 x i8> %dst
+}
+
+define <vscale x 16 x i8> @test_vp_reverse_nxv16i8(<vscale x 16 x i8> %src, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, a0, -1
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT: vid.v v12
+; CHECK-NEXT: vrsub.vx v12, v12, a1
+; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma
+; CHECK-NEXT: vrgatherei16.vv v10, v8, v12
+; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 16 x i1> undef, i1 1, i32 0
+ %allones = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> undef, <vscale x 16 x i32> zeroinitializer
+
+ %dst = call <vscale x 16 x i8> @llvm.experimental.vp.reverse.nxv16i8(<vscale x 16 x i8> %src, <vscale x 16 x i1> %allones, i32 %evl)
+ ret <vscale x 16 x i8> %dst
+}
+
+define <vscale x 4 x i64> @test_vp_reverse_nxv4i64_masked(<vscale x 4 x i64> %src, <vscale x 4 x i1> %mask, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv4i64_masked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT: vid.v v12, v0.t
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vrsub.vx v16, v12, a0, v0.t
+; CHECK-NEXT: vrgather.vv v12, v8, v16, v0.t
+; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: ret
+ %dst = call <vscale x 4 x i64> @llvm.experimental.vp.reverse.nxv4i64(<vscale x 4 x i64> %src, <vscale x 4 x i1> %mask, i32 %evl)
+ ret <vscale x 4 x i64> %dst
+}
+
+define <vscale x 4 x i64> @test_vp_reverse_nxv4i64(<vscale x 4 x i64> %src, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv4i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, a0, -1
+; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
+; CHECK-NEXT: vid.v v12
+; CHECK-NEXT: vrsub.vx v16, v12, a1
+; CHECK-NEXT: vrgather.vv v12, v8, v16
+; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 4 x i1> undef, i1 1, i32 0
+ %allones = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
+
+ %dst = call <vscale x 4 x i64> @llvm.experimental.vp.reverse.nxv4i64(<vscale x 4 x i64> %src, <vscale x 4 x i1> %allones, i32 %evl)
+ ret <vscale x 4 x i64> %dst
+}
+
+define <vscale x 8 x i32> @test_vp_reverse_nxv8i32_masked(<vscale x 8 x i32> %src, <vscale x 8 x i1> %mask, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv8i32_masked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vid.v v12, v0.t
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vrsub.vx v16, v12, a0, v0.t
+; CHECK-NEXT: vrgather.vv v12, v8, v16, v0.t
+; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: ret
+ %dst = call <vscale x 8 x i32> @llvm.experimental.vp.reverse.nxv8i32(<vscale x 8 x i32> %src, <vscale x 8 x i1> %mask, i32 %evl)
+ ret <vscale x 8 x i32> %dst
+}
+
+define <vscale x 8 x i32> @test_vp_reverse_nxv8i32(<vscale x 8 x i32> %src, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, a0, -1
+; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; CHECK-NEXT: vid.v v12
+; CHECK-NEXT: vrsub.vx v16, v12, a1
+; CHECK-NEXT: vrgather.vv v12, v8, v16
+; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x i1> undef, i1 1, i32 0
+ %allones = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
+
+ %dst = call <vscale x 8 x i32> @llvm.experimental.vp.reverse.nxv8i32(<vscale x 8 x i32> %src, <vscale x 8 x i1> %allones, i32 %evl)
+ ret <vscale x 8 x i32> %dst
+}
+
+define <vscale x 16 x i16> @test_vp_reverse_nxv16i16_masked(<vscale x 16 x i16> %src, <vscale x 16 x i1> %mask, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv16i16_masked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT: vid.v v12, v0.t
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vrsub.vx v16, v12, a0, v0.t
+; CHECK-NEXT: vrgather.vv v12, v8, v16, v0.t
+; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: ret
+ %dst = call <vscale x 16 x i16> @llvm.experimental.vp.reverse.nxv16i16(<vscale x 16 x i16> %src, <vscale x 16 x i1> %mask, i32 %evl)
+ ret <vscale x 16 x i16> %dst
+}
+
+define <vscale x 16 x i16> @test_vp_reverse_nxv16i16(<vscale x 16 x i16> %src, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, a0, -1
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT: vid.v v12
+; CHECK-NEXT: vrsub.vx v16, v12, a1
+; CHECK-NEXT: vrgather.vv v12, v8, v16
+; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 16 x i1> undef, i1 1, i32 0
+ %allones = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> undef, <vscale x 16 x i32> zeroinitializer
+
+ %dst = call <vscale x 16 x i16> @llvm.experimental.vp.reverse.nxv16i16(<vscale x 16 x i16> %src, <vscale x 16 x i1> %allones, i32 %evl)
+ ret <vscale x 16 x i16> %dst
+}
+
+define <vscale x 32 x i8> @test_vp_reverse_nxv32i8_masked(<vscale x 32 x i8> %src, <vscale x 32 x i1> %mask, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv32i8_masked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT: vid.v v16, v0.t
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vrsub.vx v16, v16, a0, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma
+; CHECK-NEXT: vrgatherei16.vv v12, v8, v16, v0.t
+; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: ret
+ %dst = call <vscale x 32 x i8> @llvm.experimental.vp.reverse.nxv32i8(<vscale x 32 x i8> %src, <vscale x 32 x i1> %mask, i32 %evl)
+ ret <vscale x 32 x i8> %dst
+}
+
+define <vscale x 32 x i8> @test_vp_reverse_nxv32i8(<vscale x 32 x i8> %src, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv32i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, a0, -1
+; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT: vid.v v16
+; CHECK-NEXT: vrsub.vx v16, v16, a1
+; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma
+; CHECK-NEXT: vrgatherei16.vv v12, v8, v16
+; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 32 x i1> undef, i1 1, i32 0
+ %allones = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> undef, <vscale x 32 x i32> zeroinitializer
+
+ %dst = call <vscale x 32 x i8> @llvm.experimental.vp.reverse.nxv32i8(<vscale x 32 x i8> %src, <vscale x 32 x i1> %allones, i32 %evl)
+ ret <vscale x 32 x i8> %dst
+}
+
+define <vscale x 8 x i64> @test_vp_reverse_nxv8i64_masked(<vscale x 8 x i64> %src, <vscale x 8 x i1> %mask, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv8i64_masked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vid.v v16, v0.t
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vrsub.vx v24, v16, a0, v0.t
+; CHECK-NEXT: vrgather.vv v16, v8, v24, v0.t
+; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: ret
+ %dst = call <vscale x 8 x i64> @llvm.experimental.vp.reverse.nxv8i64(<vscale x 8 x i64> %src, <vscale x 8 x i1> %mask, i32 %evl)
+ ret <vscale x 8 x i64> %dst
+}
+
+define <vscale x 8 x i64> @test_vp_reverse_nxv8i64(<vscale x 8 x i64> %src, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, a0, -1
+; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
+; CHECK-NEXT: vid.v v16
+; CHECK-NEXT: vrsub.vx v24, v16, a1
+; CHECK-NEXT: vrgather.vv v16, v8, v24
+; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x i1> undef, i1 1, i32 0
+ %allones = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
+
+ %dst = call <vscale x 8 x i64> @llvm.experimental.vp.reverse.nxv8i64(<vscale x 8 x i64> %src, <vscale x 8 x i1> %allones, i32 %evl)
+ ret <vscale x 8 x i64> %dst
+}
+
+define <vscale x 16 x i32> @test_vp_reverse_nxv16i32_masked(<vscale x 16 x i32> %src, <vscale x 16 x i1> %mask, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv16i32_masked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vid.v v16, v0.t
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vrsub.vx v24, v16, a0, v0.t
+; CHECK-NEXT: vrgather.vv v16, v8, v24, v0.t
+; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: ret
+ %dst = call <vscale x 16 x i32> @llvm.experimental.vp.reverse.nxv16i32(<vscale x 16 x i32> %src, <vscale x 16 x i1> %mask, i32 %evl)
+ ret <vscale x 16 x i32> %dst
+}
+
+define <vscale x 16 x i32> @test_vp_reverse_nxv16i32(<vscale x 16 x i32> %src, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, a0, -1
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT: vid.v v16
+; CHECK-NEXT: vrsub.vx v24, v16, a1
+; CHECK-NEXT: vrgather.vv v16, v8, v24
+; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 16 x i1> undef, i1 1, i32 0
+ %allones = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> undef, <vscale x 16 x i32> zeroinitializer
+
+ %dst = call <vscale x 16 x i32> @llvm.experimental.vp.reverse.nxv16i32(<vscale x 16 x i32> %src, <vscale x 16 x i1> %allones, i32 %evl)
+ ret <vscale x 16 x i32> %dst
+}
+
+define <vscale x 32 x i16> @test_vp_reverse_nxv32i16_masked(<vscale x 32 x i16> %src, <vscale x 32 x i1> %mask, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv32i16_masked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT: vid.v v16, v0.t
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vrsub.vx v24, v16, a0, v0.t
+; CHECK-NEXT: vrgather.vv v16, v8, v24, v0.t
+; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: ret
+ %dst = call <vscale x 32 x i16> @llvm.experimental.vp.reverse.nxv32i16(<vscale x 32 x i16> %src, <vscale x 32 x i1> %mask, i32 %evl)
+ ret <vscale x 32 x i16> %dst
+}
+
+define <vscale x 32 x i16> @test_vp_reverse_nxv32i16(<vscale x 32 x i16> %src, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv32i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, a0, -1
+; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT: vid.v v16
+; CHECK-NEXT: vrsub.vx v24, v16, a1
+; CHECK-NEXT: vrgather.vv v16, v8, v24
+; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 32 x i1> undef, i1 1, i32 0
+ %allones = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> undef, <vscale x 32 x i32> zeroinitializer
+
+ %dst = call <vscale x 32 x i16> @llvm.experimental.vp.reverse.nxv32i16(<vscale x 32 x i16> %src, <vscale x 32 x i1> %allones, i32 %evl)
+ ret <vscale x 32 x i16> %dst
+}
+
+define <vscale x 64 x i8> @test_vp_reverse_nxv64i8_masked(<vscale x 64 x i8> %src, <vscale x 64 x i1> %mask, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv64i8_masked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a2, a1, 2
+; CHECK-NEXT: addi a2, a2, -1
+; CHECK-NEXT: vsetvli a3, zero, e16, m8, ta, ma
+; CHECK-NEXT: vid.v v16
+; CHECK-NEXT: vrsub.vx v16, v16, a2
+; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma
+; CHECK-NEXT: vrgatherei16.vv v28, v8, v16
+; CHECK-NEXT: vrgatherei16.vv v24, v12, v16
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: sub a1, a1, a0
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT: vslidedown.vx v8, v24, a1
+; CHECK-NEXT: ret
+ %dst = call <vscale x 64 x i8> @llvm.experimental.vp.reverse.nxv64i8(<vscale x 64 x i8> %src, <vscale x 64 x i1> %mask, i32 %evl)
+ ret <vscale x 64 x i8> %dst
+}
+
+define <vscale x 64 x i8> @test_vp_reverse_nxv64i8(<vscale x 64 x i8> %src, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv64i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a2, a1, 2
+; CHECK-NEXT: addi a2, a2, -1
+; CHECK-NEXT: vsetvli a3, zero, e16, m8, ta, ma
+; CHECK-NEXT: vid.v v16
+; CHECK-NEXT: vrsub.vx v16, v16, a2
+; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma
+; CHECK-NEXT: vrgatherei16.vv v28, v8, v16
+; CHECK-NEXT: vrgatherei16.vv v24, v12, v16
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: sub a1, a1, a0
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT: vslidedown.vx v8, v24, a1
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 64 x i1> undef, i1 1, i32 0
+ %allones = shufflevector <vscale x 64 x i1> %head, <vscale x 64 x i1> undef, <vscale x 64 x i32> zeroinitializer
+
+ %dst = call <vscale x 64 x i8> @llvm.experimental.vp.reverse.nxv64i8(<vscale x 64 x i8> %src, <vscale x 64 x i1> %allones, i32 %evl)
+ ret <vscale x 64 x i8> %dst
+}
+
+define <vscale x 128 x i8> @test_vp_reverse_nxv128i8(<vscale x 128 x i8> %src, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv128i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: mv a2, a0
+; CHECK-NEXT: bltu a0, a1, .LBB32_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: mv a2, a1
+; CHECK-NEXT: .LBB32_2:
+; CHECK-NEXT: addi sp, sp, -80
+; CHECK-NEXT: .cfi_def_cfa_offset 80
+; CHECK-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset ra, -8
+; CHECK-NEXT: .cfi_offset s0, -16
+; CHECK-NEXT: addi s0, sp, 80
+; CHECK-NEXT: .cfi_def_cfa s0, 0
+; CHECK-NEXT: csrr a3, vlenb
+; CHECK-NEXT: slli a3, a3, 4
+; CHECK-NEXT: sub sp, sp, a3
+; CHECK-NEXT: andi sp, sp, -64
+; CHECK-NEXT: addi a3, sp, 64
+; CHECK-NEXT: add a4, a0, a3
+; CHECK-NEXT: addi a4, a4, -1
+; CHECK-NEXT: li a5, -1
+; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; CHECK-NEXT: vsse8.v v8, (a4), a5
+; CHECK-NEXT: sub a4, a4, a2
+; CHECK-NEXT: sub a6, a0, a1
+; CHECK-NEXT: sltu a0, a0, a6
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: and a0, a0, a6
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT: vsse8.v v16, (a4), a5
+; CHECK-NEXT: add a1, a3, a1
+; CHECK-NEXT: vle8.v v16, (a1)
+; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma
+; CHECK-NEXT: vle8.v v8, (a3)
+; CHECK-NEXT: addi sp, s0, -80
+; CHECK-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 80
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 128 x i1> undef, i1 1, i32 0
+ %allones = shufflevector <vscale x 128 x i1> %head, <vscale x 128 x i1> undef, <vscale x 128 x i32> zeroinitializer
+
+ %dst = call <vscale x 128 x i8> @llvm.experimental.vp.reverse.nxv128i8(<vscale x 128 x i8> %src, <vscale x 128 x i1> %allones, i32 %evl)
+ ret <vscale x 128 x i8> %dst
+}
+
+; LMUL = 1
+declare <vscale x 1 x i64> @llvm.experimental.vp.reverse.nxv1i64(<vscale x 1 x i64>,<vscale x 1 x i1>,i32)
+declare <vscale x 2 x i32> @llvm.experimental.vp.reverse.nxv2i32(<vscale x 2 x i32>,<vscale x 2 x i1>,i32)
+declare <vscale x 4 x i16> @llvm.experimental.vp.reverse.nxv4i16(<vscale x 4 x i16>,<vscale x 4 x i1>,i32)
+declare <vscale x 8 x i8> @llvm.experimental.vp.reverse.nxv8i8(<vscale x 8 x i8>,<vscale x 8 x i1>,i32)
+
+; LMUL = 2
+declare <vscale x 2 x i64> @llvm.experimental.vp.reverse.nxv2i64(<vscale x 2 x i64>,<vscale x 2 x i1>,i32)
+declare <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32>,<vscale x 4 x i1>,i32)
+declare <vscale x 8 x i16> @llvm.experimental.vp.reverse.nxv8i16(<vscale x 8 x i16>,<vscale x 8 x i1>,i32)
+declare <vscale x 16 x i8> @llvm.experimental.vp.reverse.nxv16i8(<vscale x 16 x i8>,<vscale x 16 x i1>,i32)
+
+; LMUL = 4
+declare <vscale x 4 x i64> @llvm.experimental.vp.reverse.nxv4i64(<vscale x 4 x i64>,<vscale x 4 x i1>,i32)
+declare <vscale x 8 x i32> @llvm.experimental.vp.reverse.nxv8i32(<vscale x 8 x i32>,<vscale x 8 x i1>,i32)
+declare <vscale x 16 x i16> @llvm.experimental.vp.reverse.nxv16i16(<vscale x 16 x i16>,<vscale x 16 x i1>,i32)
+declare <vscale x 32 x i8> @llvm.experimental.vp.reverse.nxv32i8(<vscale x 32 x i8>,<vscale x 32 x i1>,i32)
+
+; LMUL = 8
+declare <vscale x 8 x i64> @llvm.experimental.vp.reverse.nxv8i64(<vscale x 8 x i64>,<vscale x 8 x i1>,i32)
+declare <vscale x 16 x i32> @llvm.experimental.vp.reverse.nxv16i32(<vscale x 16 x i32>,<vscale x 16 x i1>,i32)
+declare <vscale x 32 x i16> @llvm.experimental.vp.reverse.nxv32i16(<vscale x 32 x i16>,<vscale x 32 x i1>,i32)
+declare <vscale x 64 x i8> @llvm.experimental.vp.reverse.nxv64i8(<vscale x 64 x i8>,<vscale x 64 x i1>,i32)
+
+declare <vscale x 128 x i8> @llvm.experimental.vp.reverse.nxv128i8(<vscale x 128 x i8>,<vscale x 128 x i1>,i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask-fixed-vectors.ll b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask-fixed-vectors.ll
new file mode 100644
index 000000000000000..d84ad058fbf1cf5
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask-fixed-vectors.ll
@@ -0,0 +1,155 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs -riscv-v-vector-bits-min=128 \
+; RUN: < %s | FileCheck %s
+
+define <2 x i1> @test_vp_reverse_v2i1_masked(<2 x i1> %src, <2 x i1> %mask, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_v2i1_masked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v9, 0
+; CHECK-NEXT: vmerge.vim v9, v9, 1, v0
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vid.v v10, v0.t
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t
+; CHECK-NEXT: vrgather.vv v11, v9, v10, v0.t
+; CHECK-NEXT: vmsne.vi v0, v11, 0, v0.t
+; CHECK-NEXT: ret
+ %dst = call <2 x i1> @llvm.experimental.vp.reverse.v2i1(<2 x i1> %src, <2 x i1> %mask, i32 %evl)
+ ret <2 x i1> %dst
+}
+
+define <2 x i1> @test_vp_reverse_v2i1(<2 x i1> %src, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_v2i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, a0, -1
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vid.v v8
+; CHECK-NEXT: vrsub.vx v8, v8, a1
+; CHECK-NEXT: vmv.v.i v9, 0
+; CHECK-NEXT: vmerge.vim v9, v9, 1, v0
+; CHECK-NEXT: vrgather.vv v10, v9, v8
+; CHECK-NEXT: vmsne.vi v0, v10, 0
+; CHECK-NEXT: ret
+ %head = insertelement <2 x i1> undef, i1 1, i32 0
+ %allones = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer
+
+ %dst = call <2 x i1> @llvm.experimental.vp.reverse.v2i1(<2 x i1> %src, <2 x i1> %allones, i32 %evl)
+ ret <2 x i1> %dst
+}
+
+define <4 x i1> @test_vp_reverse_v4i1_masked(<4 x i1> %src, <4 x i1> %mask, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_v4i1_masked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v9, 0
+; CHECK-NEXT: vmerge.vim v9, v9, 1, v0
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vid.v v10, v0.t
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t
+; CHECK-NEXT: vrgather.vv v11, v9, v10, v0.t
+; CHECK-NEXT: vmsne.vi v0, v11, 0, v0.t
+; CHECK-NEXT: ret
+ %dst = call <4 x i1> @llvm.experimental.vp.reverse.v4i1(<4 x i1> %src, <4 x i1> %mask, i32 %evl)
+ ret <4 x i1> %dst
+}
+
+define <4 x i1> @test_vp_reverse_v4i1(<4 x i1> %src, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_v4i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, a0, -1
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vid.v v8
+; CHECK-NEXT: vrsub.vx v8, v8, a1
+; CHECK-NEXT: vmv.v.i v9, 0
+; CHECK-NEXT: vmerge.vim v9, v9, 1, v0
+; CHECK-NEXT: vrgather.vv v10, v9, v8
+; CHECK-NEXT: vmsne.vi v0, v10, 0
+; CHECK-NEXT: ret
+ %head = insertelement <4 x i1> undef, i1 1, i32 0
+ %allones = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer
+
+ %dst = call <4 x i1> @llvm.experimental.vp.reverse.v4i1(<4 x i1> %src, <4 x i1> %allones, i32 %evl)
+ ret <4 x i1> %dst
+}
+
+define <8 x i1> @test_vp_reverse_v8i1_masked(<8 x i1> %src, <8 x i1> %mask, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_v8i1_masked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v9, 0
+; CHECK-NEXT: vmerge.vim v9, v9, 1, v0
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vid.v v10, v0.t
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t
+; CHECK-NEXT: vrgather.vv v11, v9, v10, v0.t
+; CHECK-NEXT: vmsne.vi v0, v11, 0, v0.t
+; CHECK-NEXT: ret
+ %dst = call <8 x i1> @llvm.experimental.vp.reverse.v8i1(<8 x i1> %src, <8 x i1> %mask, i32 %evl)
+ ret <8 x i1> %dst
+}
+
+define <8 x i1> @test_vp_reverse_v8i1(<8 x i1> %src, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_v8i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, a0, -1
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vid.v v8
+; CHECK-NEXT: vrsub.vx v8, v8, a1
+; CHECK-NEXT: vmv.v.i v9, 0
+; CHECK-NEXT: vmerge.vim v9, v9, 1, v0
+; CHECK-NEXT: vrgather.vv v10, v9, v8
+; CHECK-NEXT: vmsne.vi v0, v10, 0
+; CHECK-NEXT: ret
+ %head = insertelement <8 x i1> undef, i1 1, i32 0
+ %allones = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer
+
+ %dst = call <8 x i1> @llvm.experimental.vp.reverse.v8i1(<8 x i1> %src, <8 x i1> %allones, i32 %evl)
+ ret <8 x i1> %dst
+}
+
+define <16 x i1> @test_vp_reverse_v16i1_masked(<16 x i1> %src, <16 x i1> %mask, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_v16i1_masked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v9, 0
+; CHECK-NEXT: vmerge.vim v9, v9, 1, v0
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vid.v v10, v0.t
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
+; CHECK-NEXT: vrgatherei16.vv v12, v9, v10, v0.t
+; CHECK-NEXT: vmsne.vi v0, v12, 0, v0.t
+; CHECK-NEXT: ret
+ %dst = call <16 x i1> @llvm.experimental.vp.reverse.v16i1(<16 x i1> %src, <16 x i1> %mask, i32 %evl)
+ ret <16 x i1> %dst
+}
+
+define <16 x i1> @test_vp_reverse_v16i1(<16 x i1> %src, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_v16i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, a0, -1
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vid.v v8
+; CHECK-NEXT: vrsub.vx v8, v8, a1
+; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v10, 0
+; CHECK-NEXT: vmerge.vim v10, v10, 1, v0
+; CHECK-NEXT: vrgatherei16.vv v11, v10, v8
+; CHECK-NEXT: vmsne.vi v0, v11, 0
+; CHECK-NEXT: ret
+ %head = insertelement <16 x i1> undef, i1 1, i32 0
+ %allones = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer
+
+ %dst = call <16 x i1> @llvm.experimental.vp.reverse.v16i1(<16 x i1> %src, <16 x i1> %allones, i32 %evl)
+ ret <16 x i1> %dst
+}
+
+declare <2 x i1> @llvm.experimental.vp.reverse.v2i1(<2 x i1>,<2 x i1>,i32)
+declare <4 x i1> @llvm.experimental.vp.reverse.v4i1(<4 x i1>,<4 x i1>,i32)
+declare <8 x i1> @llvm.experimental.vp.reverse.v8i1(<8 x i1>,<8 x i1>,i32)
+declare <16 x i1> @llvm.experimental.vp.reverse.v16i1(<16 x i1>,<16 x i1>,i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask.ll b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask.ll
new file mode 100644
index 000000000000000..8f165fb8f17794f
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask.ll
@@ -0,0 +1,292 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s
+
+define <vscale x 1 x i1> @test_vp_reverse_nxv1i1_masked(<vscale x 1 x i1> %src, <vscale x 1 x i1> %mask, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv1i1_masked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v9, 0
+; CHECK-NEXT: vmerge.vim v9, v9, 1, v0
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vid.v v10, v0.t
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t
+; CHECK-NEXT: vrgather.vv v11, v9, v10, v0.t
+; CHECK-NEXT: vmsne.vi v0, v11, 0, v0.t
+; CHECK-NEXT: ret
+ %dst = call <vscale x 1 x i1> @llvm.experimental.vp.reverse.nxv1i1(<vscale x 1 x i1> %src, <vscale x 1 x i1> %mask, i32 %evl)
+ ret <vscale x 1 x i1> %dst
+}
+
+define <vscale x 1 x i1> @test_vp_reverse_nxv1i1(<vscale x 1 x i1> %src, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv1i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, a0, -1
+; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT: vid.v v8
+; CHECK-NEXT: vrsub.vx v8, v8, a1
+; CHECK-NEXT: vmv.v.i v9, 0
+; CHECK-NEXT: vmerge.vim v9, v9, 1, v0
+; CHECK-NEXT: vrgather.vv v10, v9, v8
+; CHECK-NEXT: vmsne.vi v0, v10, 0
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 1 x i1> undef, i1 1, i32 0
+ %allones = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> undef, <vscale x 1 x i32> zeroinitializer
+
+ %dst = call <vscale x 1 x i1> @llvm.experimental.vp.reverse.nxv1i1(<vscale x 1 x i1> %src, <vscale x 1 x i1> %allones, i32 %evl)
+ ret <vscale x 1 x i1> %dst
+}
+
+define <vscale x 2 x i1> @test_vp_reverse_nxv2i1_masked(<vscale x 2 x i1> %src, <vscale x 2 x i1> %mask, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv2i1_masked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v9, 0
+; CHECK-NEXT: vmerge.vim v9, v9, 1, v0
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vid.v v10, v0.t
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t
+; CHECK-NEXT: vrgather.vv v11, v9, v10, v0.t
+; CHECK-NEXT: vmsne.vi v0, v11, 0, v0.t
+; CHECK-NEXT: ret
+ %dst = call <vscale x 2 x i1> @llvm.experimental.vp.reverse.nxv2i1(<vscale x 2 x i1> %src, <vscale x 2 x i1> %mask, i32 %evl)
+ ret <vscale x 2 x i1> %dst
+}
+
+define <vscale x 2 x i1> @test_vp_reverse_nxv2i1(<vscale x 2 x i1> %src, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv2i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, a0, -1
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vid.v v8
+; CHECK-NEXT: vrsub.vx v8, v8, a1
+; CHECK-NEXT: vmv.v.i v9, 0
+; CHECK-NEXT: vmerge.vim v9, v9, 1, v0
+; CHECK-NEXT: vrgather.vv v10, v9, v8
+; CHECK-NEXT: vmsne.vi v0, v10, 0
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 2 x i1> undef, i1 1, i32 0
+ %allones = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
+
+ %dst = call <vscale x 2 x i1> @llvm.experimental.vp.reverse.nxv2i1(<vscale x 2 x i1> %src, <vscale x 2 x i1> %allones, i32 %evl)
+ ret <vscale x 2 x i1> %dst
+}
+
+define <vscale x 4 x i1> @test_vp_reverse_nxv4i1_masked(<vscale x 4 x i1> %src, <vscale x 4 x i1> %mask, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv4i1_masked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v9, 0
+; CHECK-NEXT: vmerge.vim v9, v9, 1, v0
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vid.v v10, v0.t
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t
+; CHECK-NEXT: vrgather.vv v11, v9, v10, v0.t
+; CHECK-NEXT: vmsne.vi v0, v11, 0, v0.t
+; CHECK-NEXT: ret
+ %dst = call <vscale x 4 x i1> @llvm.experimental.vp.reverse.nxv4i1(<vscale x 4 x i1> %src, <vscale x 4 x i1> %mask, i32 %evl)
+ ret <vscale x 4 x i1> %dst
+}
+
+define <vscale x 4 x i1> @test_vp_reverse_nxv4i1(<vscale x 4 x i1> %src, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv4i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, a0, -1
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT: vid.v v8
+; CHECK-NEXT: vrsub.vx v8, v8, a1
+; CHECK-NEXT: vmv.v.i v9, 0
+; CHECK-NEXT: vmerge.vim v9, v9, 1, v0
+; CHECK-NEXT: vrgather.vv v10, v9, v8
+; CHECK-NEXT: vmsne.vi v0, v10, 0
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 4 x i1> undef, i1 1, i32 0
+ %allones = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
+
+ %dst = call <vscale x 4 x i1> @llvm.experimental.vp.reverse.nxv4i1(<vscale x 4 x i1> %src, <vscale x 4 x i1> %allones, i32 %evl)
+ ret <vscale x 4 x i1> %dst
+}
+
+define <vscale x 8 x i1> @test_vp_reverse_nxv8i1_masked(<vscale x 8 x i1> %src, <vscale x 8 x i1> %mask, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv8i1_masked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v9, 0
+; CHECK-NEXT: vmerge.vim v9, v9, 1, v0
+; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vid.v v10, v0.t
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
+; CHECK-NEXT: vrgatherei16.vv v12, v9, v10, v0.t
+; CHECK-NEXT: vmsne.vi v0, v12, 0, v0.t
+; CHECK-NEXT: ret
+ %dst = call <vscale x 8 x i1> @llvm.experimental.vp.reverse.nxv8i1(<vscale x 8 x i1> %src, <vscale x 8 x i1> %mask, i32 %evl)
+ ret <vscale x 8 x i1> %dst
+}
+
+define <vscale x 8 x i1> @test_vp_reverse_nxv8i1(<vscale x 8 x i1> %src, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv8i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, a0, -1
+; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; CHECK-NEXT: vid.v v8
+; CHECK-NEXT: vrsub.vx v8, v8, a1
+; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v10, 0
+; CHECK-NEXT: vmerge.vim v10, v10, 1, v0
+; CHECK-NEXT: vrgatherei16.vv v11, v10, v8
+; CHECK-NEXT: vmsne.vi v0, v11, 0
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x i1> undef, i1 1, i32 0
+ %allones = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
+
+ %dst = call <vscale x 8 x i1> @llvm.experimental.vp.reverse.nxv8i1(<vscale x 8 x i1> %src, <vscale x 8 x i1> %allones, i32 %evl)
+ ret <vscale x 8 x i1> %dst
+}
+
+define <vscale x 16 x i1> @test_vp_reverse_nxv16i1_masked(<vscale x 16 x i1> %src, <vscale x 16 x i1> %mask, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv16i1_masked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-NEXT: vmv.v.i v10, 0
+; CHECK-NEXT: vmerge.vim v10, v10, 1, v0
+; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vid.v v12, v0.t
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vrsub.vx v12, v12, a0, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma
+; CHECK-NEXT: vrgatherei16.vv v16, v10, v12, v0.t
+; CHECK-NEXT: vmsne.vi v8, v16, 0, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: ret
+ %dst = call <vscale x 16 x i1> @llvm.experimental.vp.reverse.nxv16i1(<vscale x 16 x i1> %src, <vscale x 16 x i1> %mask, i32 %evl)
+ ret <vscale x 16 x i1> %dst
+}
+
+define <vscale x 16 x i1> @test_vp_reverse_nxv16i1(<vscale x 16 x i1> %src, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv16i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, a0, -1
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT: vid.v v8
+; CHECK-NEXT: vrsub.vx v8, v8, a1
+; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma
+; CHECK-NEXT: vmv.v.i v12, 0
+; CHECK-NEXT: vmerge.vim v12, v12, 1, v0
+; CHECK-NEXT: vrgatherei16.vv v14, v12, v8
+; CHECK-NEXT: vmsne.vi v0, v14, 0
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 16 x i1> undef, i1 1, i32 0
+ %allones = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> undef, <vscale x 16 x i32> zeroinitializer
+
+ %dst = call <vscale x 16 x i1> @llvm.experimental.vp.reverse.nxv16i1(<vscale x 16 x i1> %src, <vscale x 16 x i1> %allones, i32 %evl)
+ ret <vscale x 16 x i1> %dst
+}
+
+define <vscale x 32 x i1> @test_vp_reverse_nxv32i1_masked(<vscale x 32 x i1> %src, <vscale x 32 x i1> %mask, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv32i1_masked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
+; CHECK-NEXT: vmv.v.i v12, 0
+; CHECK-NEXT: vmerge.vim v12, v12, 1, v0
+; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vid.v v16, v0.t
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vrsub.vx v16, v16, a0, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma
+; CHECK-NEXT: vrgatherei16.vv v24, v12, v16, v0.t
+; CHECK-NEXT: vmsne.vi v8, v24, 0, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: ret
+ %dst = call <vscale x 32 x i1> @llvm.experimental.vp.reverse.nxv32i1(<vscale x 32 x i1> %src, <vscale x 32 x i1> %mask, i32 %evl)
+ ret <vscale x 32 x i1> %dst
+}
+
+define <vscale x 32 x i1> @test_vp_reverse_nxv32i1(<vscale x 32 x i1> %src, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv32i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, a0, -1
+; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; CHECK-NEXT: vid.v v8
+; CHECK-NEXT: vrsub.vx v8, v8, a1
+; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma
+; CHECK-NEXT: vmv.v.i v16, 0
+; CHECK-NEXT: vmerge.vim v16, v16, 1, v0
+; CHECK-NEXT: vrgatherei16.vv v20, v16, v8
+; CHECK-NEXT: vmsne.vi v0, v20, 0
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 32 x i1> undef, i1 1, i32 0
+ %allones = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> undef, <vscale x 32 x i32> zeroinitializer
+
+ %dst = call <vscale x 32 x i1> @llvm.experimental.vp.reverse.nxv32i1(<vscale x 32 x i1> %src, <vscale x 32 x i1> %allones, i32 %evl)
+ ret <vscale x 32 x i1> %dst
+}
+
+define <vscale x 64 x i1> @test_vp_reverse_nxv64i1_masked(<vscale x 64 x i1> %src, <vscale x 64 x i1> %mask, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv64i1_masked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT: vmv.v.i v16, 0
+; CHECK-NEXT: vmerge.vim v16, v16, 1, v0
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a2, a1, 2
+; CHECK-NEXT: addi a2, a2, -1
+; CHECK-NEXT: vsetvli a3, zero, e16, m8, ta, ma
+; CHECK-NEXT: vid.v v24
+; CHECK-NEXT: vrsub.vx v0, v24, a2
+; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma
+; CHECK-NEXT: vrgatherei16.vv v28, v16, v0
+; CHECK-NEXT: vrgatherei16.vv v24, v20, v0
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: sub a1, a1, a0
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT: vslidedown.vx v16, v24, a1
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: vmsne.vi v8, v16, 0, v0.t
+; CHECK-NEXT: vmv1r.v v0, v8
+; CHECK-NEXT: ret
+ %dst = call <vscale x 64 x i1> @llvm.experimental.vp.reverse.nxv64i1(<vscale x 64 x i1> %src, <vscale x 64 x i1> %mask, i32 %evl)
+ ret <vscale x 64 x i1> %dst
+}
+
+define <vscale x 64 x i1> @test_vp_reverse_nxv64i1(<vscale x 64 x i1> %src, i32 zeroext %evl) {
+; CHECK-LABEL: test_vp_reverse_nxv64i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a2, a1, 2
+; CHECK-NEXT: addi a2, a2, -1
+; CHECK-NEXT: vsetvli a3, zero, e16, m8, ta, ma
+; CHECK-NEXT: vid.v v16
+; CHECK-NEXT: vrsub.vx v24, v16, a2
+; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma
+; CHECK-NEXT: vrgatherei16.vv v20, v8, v24
+; CHECK-NEXT: vrgatherei16.vv v16, v12, v24
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: sub a1, a1, a0
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT: vslidedown.vx v8, v16, a1
+; CHECK-NEXT: vmsne.vi v0, v8, 0
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 64 x i1> undef, i1 1, i32 0
+ %allones = shufflevector <vscale x 64 x i1> %head, <vscale x 64 x i1> undef, <vscale x 64 x i32> zeroinitializer
+
+ %dst = call <vscale x 64 x i1> @llvm.experimental.vp.reverse.nxv64i1(<vscale x 64 x i1> %src, <vscale x 64 x i1> %allones, i32 %evl)
+ ret <vscale x 64 x i1> %dst
+}
+
+declare <vscale x 1 x i1> @llvm.experimental.vp.reverse.nxv1i1(<vscale x 1 x i1>,<vscale x 1 x i1>,i32)
+declare <vscale x 2 x i1> @llvm.experimental.vp.reverse.nxv2i1(<vscale x 2 x i1>,<vscale x 2 x i1>,i32)
+declare <vscale x 4 x i1> @llvm.experimental.vp.reverse.nxv4i1(<vscale x 4 x i1>,<vscale x 4 x i1>,i32)
+declare <vscale x 8 x i1> @llvm.experimental.vp.reverse.nxv8i1(<vscale x 8 x i1>,<vscale x 8 x i1>,i32)
+declare <vscale x 16 x i1> @llvm.experimental.vp.reverse.nxv16i1(<vscale x 16 x i1>,<vscale x 16 x i1>,i32)
+declare <vscale x 32 x i1> @llvm.experimental.vp.reverse.nxv32i1(<vscale x 32 x i1>,<vscale x 32 x i1>,i32)
+declare <vscale x 64 x i1> @llvm.experimental.vp.reverse.nxv64i1(<vscale x 64 x i1>,<vscale x 64 x i1>,i32)
diff --git a/llvm/unittests/IR/VPIntrinsicTest.cpp b/llvm/unittests/IR/VPIntrinsicTest.cpp
index 9590d9b068ecfb1..c757654ddd5f393 100644
--- a/llvm/unittests/IR/VPIntrinsicTest.cpp
+++ b/llvm/unittests/IR/VPIntrinsicTest.cpp
@@ -148,6 +148,7 @@ class VPIntrinsicTest : public testing::Test {
Str << " declare <8 x i1> @llvm.vp.icmp.v8i16"
<< "(<8 x i16>, <8 x i16>, metadata, <8 x i1>, i32) ";
+ Str << " declare <8 x i32> @llvm.experimental.vp.reverse.v8i32(<8 x i32>, <8 x i1>, i32) ";
Str << " declare <8 x i16> @llvm.vp.abs.v8i16"
<< "(<8 x i16>, i1 immarg, <8 x i1>, i32) ";
Str << " declare <8 x i16> @llvm.vp.bitreverse.v8i16"
>From 28872355b4b749a828ebb2e857311aabe2cc00b5 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Thu, 26 Oct 2023 20:37:10 -0700
Subject: [PATCH 2/3] clang-format
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 5 ++---
llvm/unittests/IR/VPIntrinsicTest.cpp | 3 ++-
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index dabff24eef7023e..7ddffd02257c3e2 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -10350,9 +10350,8 @@ RISCVTargetLowering::lowerVPReverseExperimental(SDValue Op,
DAG.getNode(ISD::SUB, DL, XLenVT, EVL, DAG.getConstant(1, DL, XLenVT));
SDValue VecLenSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
DAG.getUNDEF(IndicesVT), VecLen, EVL);
- SDValue VRSUB =
- DAG.getNode(RISCVISD::SUB_VL, DL, IndicesVT, VecLenSplat, VID,
- DAG.getUNDEF(IndicesVT), Mask, EVL);
+ SDValue VRSUB = DAG.getNode(RISCVISD::SUB_VL, DL, IndicesVT, VecLenSplat, VID,
+ DAG.getUNDEF(IndicesVT), Mask, EVL);
SDValue Result = DAG.getNode(GatherOpc, DL, GatherVT, Op1, VRSUB,
DAG.getUNDEF(GatherVT), Mask, EVL);
diff --git a/llvm/unittests/IR/VPIntrinsicTest.cpp b/llvm/unittests/IR/VPIntrinsicTest.cpp
index c757654ddd5f393..a3bef3d42adb0e0 100644
--- a/llvm/unittests/IR/VPIntrinsicTest.cpp
+++ b/llvm/unittests/IR/VPIntrinsicTest.cpp
@@ -148,7 +148,8 @@ class VPIntrinsicTest : public testing::Test {
Str << " declare <8 x i1> @llvm.vp.icmp.v8i16"
<< "(<8 x i16>, <8 x i16>, metadata, <8 x i1>, i32) ";
- Str << " declare <8 x i32> @llvm.experimental.vp.reverse.v8i32(<8 x i32>, <8 x i1>, i32) ";
+ Str << " declare <8 x i32> @llvm.experimental.vp.reverse.v8i32(<8 x i32>, "
+ "<8 x i1>, i32) ";
Str << " declare <8 x i16> @llvm.vp.abs.v8i16"
<< "(<8 x i16>, i1 immarg, <8 x i1>, i32) ";
Str << " declare <8 x i16> @llvm.vp.bitreverse.v8i16"
>From 2ef59e456653e9631de03a1bdff7aed19ecbc8a4 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Fri, 27 Oct 2023 14:40:48 -0700
Subject: [PATCH 3/3] Remove VP_PROPERTY_FUNCTIONAL_INTRINSIC
---
llvm/include/llvm/IR/VPIntrinsics.def | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/include/llvm/IR/VPIntrinsics.def b/llvm/include/llvm/IR/VPIntrinsics.def
index 0b6e266f7dbc509..5ab4b98dd805c3e 100644
--- a/llvm/include/llvm/IR/VPIntrinsics.def
+++ b/llvm/include/llvm/IR/VPIntrinsics.def
@@ -705,7 +705,7 @@ END_REGISTER_VP(experimental_vp_splice, EXPERIMENTAL_VP_SPLICE)
// llvm.experimental.vp.reverse(x,mask,vlen)
BEGIN_REGISTER_VP(experimental_vp_reverse, 1, 2,
EXPERIMENTAL_VP_REVERSE, -1)
-VP_PROPERTY_FUNCTIONAL_INTRINSIC(experimental_vector_reverse)
+VP_PROPERTY_NO_FUNCTIONAL
END_REGISTER_VP(experimental_vp_reverse, EXPERIMENTAL_VP_REVERSE)
///// } Shuffles
More information about the llvm-commits
mailing list