[llvm] 6f17613 - [RISCV][VP] Lower VP ISD nodes to RVV instructions
Fraser Cormack via llvm-commits
llvm-commits at lists.llvm.org
Wed May 5 04:40:02 PDT 2021
Author: Fraser Cormack
Date: 2021-05-05T12:32:24+01:00
New Revision: 6f17613bfb95583f96a35ed589b67f07c5b028ab
URL: https://github.com/llvm/llvm-project/commit/6f17613bfb95583f96a35ed589b67f07c5b028ab
DIFF: https://github.com/llvm/llvm-project/commit/6f17613bfb95583f96a35ed589b67f07c5b028ab.diff
LOG: [RISCV][VP] Lower VP ISD nodes to RVV instructions
This patch supports all of the current set of VP integer binary
intrinsics by lowering them to to RVV instructions. It does so by using
the existing RISCVISD *_VL custom nodes as an intermediate layer. Both
scalable and fixed-length vectors are supported by using this method.
One notable change to the existing vector codegen strategy is that
scalable all-ones and all-zeros mask SPLAT_VECTORs are now lowered to
RISCVISD VMSET_VL and VMCLR_VL nodes to match their fixed-length
BUILD_VECTOR counterparts. This allows them to reuse the existing
"all-ones" VL patterns.
To reduce the size of the phabricator diff, some tests are intentionally
left out and will be added later if the patch is accepted.
Reviewed By: craig.topper
Differential Revision: https://reviews.llvm.org/D101826
Added:
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrsub-vp.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsra-vp.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsub-vp.ll
llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll
llvm/test/CodeGen/RISCV/rvv/vrsub-vp.ll
llvm/test/CodeGen/RISCV/rvv/vsra-vp.ll
llvm/test/CodeGen/RISCV/rvv/vsub-vp.ll
Modified:
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
llvm/lib/Target/RISCV/RISCVISelLowering.h
llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 613ec56bf0ea2..b50d3df63bd78 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -411,6 +411,11 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
+ static unsigned IntegerVPOps[] = {
+ ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL, ISD::VP_SDIV, ISD::VP_UDIV,
+ ISD::VP_SREM, ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR,
+ ISD::VP_ASHR, ISD::VP_LSHR, ISD::VP_SHL};
+
if (!Subtarget.is64Bit()) {
// We must custom-lower certain vXi64 operations on RV32 due to the vector
// element type being illegal.
@@ -496,6 +501,13 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
+ for (unsigned VPOpc : IntegerVPOps) {
+ setOperationAction(VPOpc, VT, Custom);
+ // RV64 must custom-legalize the i32 EVL parameter.
+ if (Subtarget.is64Bit())
+ setOperationAction(VPOpc, MVT::i32, Custom);
+ }
+
setOperationAction(ISD::MLOAD, VT, Custom);
setOperationAction(ISD::MSTORE, VT, Custom);
setOperationAction(ISD::MGATHER, VT, Custom);
@@ -695,6 +707,13 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
+
+ for (unsigned VPOpc : IntegerVPOps) {
+ setOperationAction(VPOpc, VT, Custom);
+ // RV64 must custom-legalize the i32 EVL parameter.
+ if (Subtarget.is64Bit())
+ setOperationAction(VPOpc, MVT::i32, Custom);
+ }
}
for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) {
@@ -2367,6 +2386,32 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return lowerGET_ROUNDING(Op, DAG);
case ISD::SET_ROUNDING:
return lowerSET_ROUNDING(Op, DAG);
+ case ISD::VP_ADD:
+ return lowerVPOp(Op, DAG, RISCVISD::ADD_VL);
+ case ISD::VP_SUB:
+ return lowerVPOp(Op, DAG, RISCVISD::SUB_VL);
+ case ISD::VP_MUL:
+ return lowerVPOp(Op, DAG, RISCVISD::MUL_VL);
+ case ISD::VP_SDIV:
+ return lowerVPOp(Op, DAG, RISCVISD::SDIV_VL);
+ case ISD::VP_UDIV:
+ return lowerVPOp(Op, DAG, RISCVISD::UDIV_VL);
+ case ISD::VP_SREM:
+ return lowerVPOp(Op, DAG, RISCVISD::SREM_VL);
+ case ISD::VP_UREM:
+ return lowerVPOp(Op, DAG, RISCVISD::UREM_VL);
+ case ISD::VP_AND:
+ return lowerVPOp(Op, DAG, RISCVISD::AND_VL);
+ case ISD::VP_OR:
+ return lowerVPOp(Op, DAG, RISCVISD::OR_VL);
+ case ISD::VP_XOR:
+ return lowerVPOp(Op, DAG, RISCVISD::XOR_VL);
+ case ISD::VP_ASHR:
+ return lowerVPOp(Op, DAG, RISCVISD::SRA_VL);
+ case ISD::VP_LSHR:
+ return lowerVPOp(Op, DAG, RISCVISD::SRL_VL);
+ case ISD::VP_SHL:
+ return lowerVPOp(Op, DAG, RISCVISD::SHL_VL);
}
}
@@ -2828,12 +2873,18 @@ SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
// legal equivalently-sized i8 type, so we can use that as a go-between.
SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,
SelectionDAG &DAG) const {
- SDValue SplatVal = Op.getOperand(0);
- // All-zeros or all-ones splats are handled specially.
- if (isa<ConstantSDNode>(SplatVal))
- return Op;
SDLoc DL(Op);
MVT VT = Op.getSimpleValueType();
+ SDValue SplatVal = Op.getOperand(0);
+ // All-zeros or all-ones splats are handled specially.
+ if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) {
+ SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
+ return DAG.getNode(RISCVISD::VMSET_VL, DL, VT, VL);
+ }
+ if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) {
+ SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
+ return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL);
+ }
MVT XLenVT = Subtarget.getXLenVT();
assert(SplatVal.getValueType() == XLenVT &&
"Unexpected type for i1 splat value");
@@ -4215,6 +4266,50 @@ SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op, SelectionDAG &DAG,
return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
}
+// Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node:
+// * Operands of each node are assumed to be in the same order.
+// * The EVL operand is promoted from i32 to i64 on RV64.
+// * Fixed-length vectors are converted to their scalable-vector container
+// types.
+SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG,
+ unsigned RISCVISDOpc) const {
+ SDLoc DL(Op);
+ MVT VT = Op.getSimpleValueType();
+ Optional<unsigned> EVLIdx = ISD::getVPExplicitVectorLengthIdx(Op.getOpcode());
+
+ SmallVector<SDValue, 4> Ops;
+ MVT XLenVT = Subtarget.getXLenVT();
+
+ for (const auto &OpIdx : enumerate(Op->ops())) {
+ SDValue V = OpIdx.value();
+ if ((unsigned)OpIdx.index() == EVLIdx) {
+ Ops.push_back(DAG.getZExtOrTrunc(V, DL, XLenVT));
+ continue;
+ }
+ assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
+ // Pass through operands which aren't fixed-length vectors.
+ if (!V.getValueType().isFixedLengthVector()) {
+ Ops.push_back(V);
+ continue;
+ }
+ // "cast" fixed length vector to a scalable vector.
+ MVT OpVT = V.getSimpleValueType();
+ MVT ContainerVT = getContainerForFixedLengthVector(OpVT);
+ assert(useRVVForFixedLengthVectorVT(OpVT) &&
+ "Only fixed length vectors are supported!");
+ Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
+ }
+
+ if (!VT.isFixedLengthVector())
+ return DAG.getNode(RISCVISDOpc, DL, VT, Ops);
+
+ MVT ContainerVT = getContainerForFixedLengthVector(VT);
+
+ SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops);
+
+ return convertFromScalableVector(VT, VPOp, DAG, Subtarget);
+}
+
// Custom lower MGATHER to a legalized form for RVV. It will then be matched to
// a RVV indexed load. The RVV indexed load instructions only support the
// "unsigned unscaled" addressing mode; indices are implicitly zero-extended or
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index b0cdbbc27ba07..5fc82ec6abdce 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -543,6 +543,7 @@ class RISCVTargetLowering : public TargetLowering {
SelectionDAG &DAG) const;
SDValue lowerToScalableOp(SDValue Op, SelectionDAG &DAG, unsigned NewOpc,
bool HasMask = true) const;
+ SDValue lowerVPOp(SDValue Op, SelectionDAG &DAG, unsigned RISCVISDOpc) const;
SDValue lowerFixedLengthVectorExtendToRVV(SDValue Op, SelectionDAG &DAG,
unsigned ExtendOpc) const;
SDValue lowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
index a3ec02b87d608..36e29a0f28411 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
@@ -28,6 +28,14 @@ def SDTSplatI64 : SDTypeProfile<1, 1, [
def rv32_splat_i64 : SDNode<"RISCVISD::SPLAT_VECTOR_I64", SDTSplatI64>;
+def SDT_RISCVVMSETCLR_VL : SDTypeProfile<1, 1, [SDTCVecEltisVT<0, i1>,
+ SDTCisVT<1, XLenVT>]>;
+def riscv_vmclr_vl : SDNode<"RISCVISD::VMCLR_VL", SDT_RISCVVMSETCLR_VL>;
+def riscv_vmset_vl : SDNode<"RISCVISD::VMSET_VL", SDT_RISCVVMSETCLR_VL>;
+
+def rvv_vnot : PatFrag<(ops node:$in),
+ (xor node:$in, (riscv_vmset_vl (XLenVT srcvalue)))>;
+
// Give explicit Complexity to prefer simm5/uimm5.
def SplatPat : ComplexPattern<vAny, 1, "selectVSplat", [splat_vector, rv32_splat_i64], [], 1>;
def SplatPat_simm5 : ComplexPattern<vAny, 1, "selectVSplatSimm5", [splat_vector, rv32_splat_i64], [], 2>;
@@ -503,25 +511,25 @@ foreach mti = AllMasks in {
(!cast<Instruction>("PseudoVMXOR_MM_"#mti.LMul.MX)
VR:$rs1, VR:$rs2, mti.AVL, mti.Log2SEW)>;
- def : Pat<(mti.Mask (vnot (and VR:$rs1, VR:$rs2))),
+ def : Pat<(mti.Mask (rvv_vnot (and VR:$rs1, VR:$rs2))),
(!cast<Instruction>("PseudoVMNAND_MM_"#mti.LMul.MX)
VR:$rs1, VR:$rs2, mti.AVL, mti.Log2SEW)>;
- def : Pat<(mti.Mask (vnot (or VR:$rs1, VR:$rs2))),
+ def : Pat<(mti.Mask (rvv_vnot (or VR:$rs1, VR:$rs2))),
(!cast<Instruction>("PseudoVMNOR_MM_"#mti.LMul.MX)
VR:$rs1, VR:$rs2, mti.AVL, mti.Log2SEW)>;
- def : Pat<(mti.Mask (vnot (xor VR:$rs1, VR:$rs2))),
+ def : Pat<(mti.Mask (rvv_vnot (xor VR:$rs1, VR:$rs2))),
(!cast<Instruction>("PseudoVMXNOR_MM_"#mti.LMul.MX)
VR:$rs1, VR:$rs2, mti.AVL, mti.Log2SEW)>;
- def : Pat<(mti.Mask (and VR:$rs1, (vnot VR:$rs2))),
+ def : Pat<(mti.Mask (and VR:$rs1, (rvv_vnot VR:$rs2))),
(!cast<Instruction>("PseudoVMANDNOT_MM_"#mti.LMul.MX)
VR:$rs1, VR:$rs2, mti.AVL, mti.Log2SEW)>;
- def : Pat<(mti.Mask (or VR:$rs1, (vnot VR:$rs2))),
+ def : Pat<(mti.Mask (or VR:$rs1, (rvv_vnot VR:$rs2))),
(!cast<Instruction>("PseudoVMORNOT_MM_"#mti.LMul.MX)
VR:$rs1, VR:$rs2, mti.AVL, mti.Log2SEW)>;
- // Handle vnot the same as the vnot.mm pseudoinstruction.
- def : Pat<(mti.Mask (vnot VR:$rs)),
+ // Handle rvv_vnot the same as the vnot.mm pseudoinstruction.
+ def : Pat<(mti.Mask (rvv_vnot VR:$rs)),
(!cast<Instruction>("PseudoVMNAND_MM_"#mti.LMul.MX)
VR:$rs, VR:$rs, mti.AVL, mti.Log2SEW)>;
}
@@ -725,13 +733,6 @@ foreach vti = AllIntegerVectors in {
(!cast<Instruction>("PseudoVMV_V_I_" # vti.LMul.MX)
simm5:$rs1, vti.AVL, vti.Log2SEW)>;
}
-
-foreach mti = AllMasks in {
- def : Pat<(mti.Mask immAllOnesV),
- (!cast<Instruction>("PseudoVMSET_M_"#mti.BX) mti.AVL, mti.Log2SEW)>;
- def : Pat<(mti.Mask immAllZerosV),
- (!cast<Instruction>("PseudoVMCLR_M_"#mti.BX) mti.AVL, mti.Log2SEW)>;
-}
} // Predicates = [HasStdExtV]
let Predicates = [HasStdExtV, HasStdExtF] in {
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index 90dc35a1bc4cc..719e8d8b384d0 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -188,11 +188,6 @@ def riscv_vmand_vl : SDNode<"RISCVISD::VMAND_VL", SDT_RISCVMaskBinOp_VL, [SDNPCo
def riscv_vmor_vl : SDNode<"RISCVISD::VMOR_VL", SDT_RISCVMaskBinOp_VL, [SDNPCommutative]>;
def riscv_vmxor_vl : SDNode<"RISCVISD::VMXOR_VL", SDT_RISCVMaskBinOp_VL, [SDNPCommutative]>;
-def SDT_RISCVVMSETCLR_VL : SDTypeProfile<1, 1, [SDTCVecEltisVT<0, i1>,
- SDTCisVT<1, XLenVT>]>;
-def riscv_vmclr_vl : SDNode<"RISCVISD::VMCLR_VL", SDT_RISCVVMSETCLR_VL>;
-def riscv_vmset_vl : SDNode<"RISCVISD::VMSET_VL", SDT_RISCVVMSETCLR_VL>;
-
def true_mask : PatLeaf<(riscv_vmset_vl (XLenVT srcvalue))>;
def riscv_vmnot_vl : PatFrag<(ops node:$rs, node:$vl),
@@ -243,38 +238,49 @@ def sew16uimm5 : ComplexPattern<XLenVT, 1, "selectRVVUimm5<16>", []>;
def sew32uimm5 : ComplexPattern<XLenVT, 1, "selectRVVUimm5<32>", []>;
def sew64uimm5 : ComplexPattern<XLenVT, 1, "selectRVVUimm5<64>", []>;
-class VPatBinaryVL_VV<SDNode vop,
- string instruction_name,
- ValueType result_type,
- ValueType op_type,
- ValueType mask_type,
- int sew,
- LMULInfo vlmul,
- VReg RetClass,
- VReg op_reg_class> :
- Pat<(result_type (vop
- (op_type op_reg_class:$rs1),
- (op_type op_reg_class:$rs2),
- (mask_type true_mask),
- VLOpFrag)),
- (!cast<Instruction>(instruction_name#"_VV_"# vlmul.MX)
+multiclass VPatBinaryVL_VV<SDNode vop,
+ string instruction_name,
+ ValueType result_type,
+ ValueType op_type,
+ ValueType mask_type,
+ int sew,
+ LMULInfo vlmul,
+ VReg RetClass,
+ VReg op_reg_class> {
+ def : Pat<(result_type (vop
+ (op_type op_reg_class:$rs1),
+ (op_type op_reg_class:$rs2),
+ (mask_type true_mask),
+ VLOpFrag)),
+ (!cast<Instruction>(instruction_name#"_VV_"# vlmul.MX)
+ op_reg_class:$rs1,
+ op_reg_class:$rs2,
+ GPR:$vl, sew)>;
+ def : Pat<(result_type (vop
+ (op_type op_reg_class:$rs1),
+ (op_type op_reg_class:$rs2),
+ (mask_type VMV0:$vm),
+ VLOpFrag)),
+ (!cast<Instruction>(instruction_name#"_VV_"# vlmul.MX#"_MASK")
+ (op_type (IMPLICIT_DEF)),
op_reg_class:$rs1,
op_reg_class:$rs2,
- GPR:$vl, sew)>;
+ VMV0:$vm, GPR:$vl, sew)>;
+}
-class VPatBinaryVL_XI<SDNode vop,
- string instruction_name,
- string suffix,
- ValueType result_type,
- ValueType vop_type,
- ValueType mask_type,
- int sew,
- LMULInfo vlmul,
- VReg RetClass,
- VReg vop_reg_class,
- ComplexPattern SplatPatKind,
- DAGOperand xop_kind> :
- Pat<(result_type (vop
+multiclass VPatBinaryVL_XI<SDNode vop,
+ string instruction_name,
+ string suffix,
+ ValueType result_type,
+ ValueType vop_type,
+ ValueType mask_type,
+ int sew,
+ LMULInfo vlmul,
+ VReg RetClass,
+ VReg vop_reg_class,
+ ComplexPattern SplatPatKind,
+ DAGOperand xop_kind> {
+ def : Pat<(result_type (vop
(vop_type vop_reg_class:$rs1),
(vop_type (SplatPatKind (XLenVT xop_kind:$rs2))),
(mask_type true_mask),
@@ -283,34 +289,45 @@ class VPatBinaryVL_XI<SDNode vop,
vop_reg_class:$rs1,
xop_kind:$rs2,
GPR:$vl, sew)>;
+ def : Pat<(result_type (vop
+ (vop_type vop_reg_class:$rs1),
+ (vop_type (SplatPatKind (XLenVT xop_kind:$rs2))),
+ (mask_type VMV0:$vm),
+ VLOpFrag)),
+ (!cast<Instruction>(instruction_name#_#suffix#_# vlmul.MX#"_MASK")
+ (vop_type (IMPLICIT_DEF)),
+ vop_reg_class:$rs1,
+ xop_kind:$rs2,
+ VMV0:$vm, GPR:$vl, sew)>;
+}
multiclass VPatBinaryVL_VV_VX<SDNode vop, string instruction_name> {
foreach vti = AllIntegerVectors in {
- def : VPatBinaryVL_VV<vop, instruction_name,
- vti.Vector, vti.Vector, vti.Mask, vti.Log2SEW,
- vti.LMul, vti.RegClass, vti.RegClass>;
- def : VPatBinaryVL_XI<vop, instruction_name, "VX",
- vti.Vector, vti.Vector, vti.Mask, vti.Log2SEW,
- vti.LMul, vti.RegClass, vti.RegClass,
- SplatPat, GPR>;
+ defm : VPatBinaryVL_VV<vop, instruction_name,
+ vti.Vector, vti.Vector, vti.Mask, vti.Log2SEW,
+ vti.LMul, vti.RegClass, vti.RegClass>;
+ defm : VPatBinaryVL_XI<vop, instruction_name, "VX",
+ vti.Vector, vti.Vector, vti.Mask, vti.Log2SEW,
+ vti.LMul, vti.RegClass, vti.RegClass,
+ SplatPat, GPR>;
}
}
multiclass VPatBinaryVL_VV_VX_VI<SDNode vop, string instruction_name,
Operand ImmType = simm5> {
foreach vti = AllIntegerVectors in {
- def : VPatBinaryVL_VV<vop, instruction_name,
- vti.Vector, vti.Vector, vti.Mask, vti.Log2SEW,
- vti.LMul, vti.RegClass, vti.RegClass>;
- def : VPatBinaryVL_XI<vop, instruction_name, "VX",
- vti.Vector, vti.Vector, vti.Mask, vti.Log2SEW,
- vti.LMul, vti.RegClass, vti.RegClass,
- SplatPat, GPR>;
- def : VPatBinaryVL_XI<vop, instruction_name, "VI",
- vti.Vector, vti.Vector, vti.Mask, vti.Log2SEW,
- vti.LMul, vti.RegClass, vti.RegClass,
- !cast<ComplexPattern>(SplatPat#_#ImmType),
- ImmType>;
+ defm : VPatBinaryVL_VV<vop, instruction_name,
+ vti.Vector, vti.Vector, vti.Mask, vti.Log2SEW,
+ vti.LMul, vti.RegClass, vti.RegClass>;
+ defm : VPatBinaryVL_XI<vop, instruction_name, "VX",
+ vti.Vector, vti.Vector, vti.Mask, vti.Log2SEW,
+ vti.LMul, vti.RegClass, vti.RegClass,
+ SplatPat, GPR>;
+ defm : VPatBinaryVL_XI<vop, instruction_name, "VI",
+ vti.Vector, vti.Vector, vti.Mask, vti.Log2SEW,
+ vti.LMul, vti.RegClass, vti.RegClass,
+ !cast<ComplexPattern>(SplatPat#_#ImmType),
+ ImmType>;
}
}
@@ -335,9 +352,9 @@ class VPatBinaryVL_VF<SDNode vop,
multiclass VPatBinaryFPVL_VV_VF<SDNode vop, string instruction_name> {
foreach vti = AllFloatVectors in {
- def : VPatBinaryVL_VV<vop, instruction_name,
- vti.Vector, vti.Vector, vti.Mask, vti.Log2SEW,
- vti.LMul, vti.RegClass, vti.RegClass>;
+ defm : VPatBinaryVL_VV<vop, instruction_name,
+ vti.Vector, vti.Vector, vti.Mask, vti.Log2SEW,
+ vti.LMul, vti.RegClass, vti.RegClass>;
def : VPatBinaryVL_VF<vop, instruction_name#"_V"#vti.ScalarSuffix,
vti.Vector, vti.Vector, vti.Mask, vti.Log2SEW,
vti.LMul, vti.RegClass, vti.RegClass,
@@ -587,11 +604,23 @@ foreach vti = AllIntegerVectors in {
VLOpFrag),
(!cast<Instruction>("PseudoVRSUB_VX_"# vti.LMul.MX)
vti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.Log2SEW)>;
+ def : Pat<(riscv_sub_vl (vti.Vector (SplatPat (XLenVT GPR:$rs2))),
+ (vti.Vector vti.RegClass:$rs1), (vti.Mask VMV0:$vm),
+ VLOpFrag),
+ (!cast<Instruction>("PseudoVRSUB_VX_"# vti.LMul.MX#"_MASK")
+ (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, GPR:$rs2,
+ VMV0:$vm, GPR:$vl, vti.Log2SEW)>;
def : Pat<(riscv_sub_vl (vti.Vector (SplatPat_simm5 simm5:$rs2)),
(vti.Vector vti.RegClass:$rs1), (vti.Mask true_mask),
VLOpFrag),
(!cast<Instruction>("PseudoVRSUB_VI_"# vti.LMul.MX)
vti.RegClass:$rs1, simm5:$rs2, GPR:$vl, vti.Log2SEW)>;
+ def : Pat<(riscv_sub_vl (vti.Vector (SplatPat_simm5 simm5:$rs2)),
+ (vti.Vector vti.RegClass:$rs1), (vti.Mask VMV0:$vm),
+ VLOpFrag),
+ (!cast<Instruction>("PseudoVRSUB_VI_"# vti.LMul.MX#"_MASK")
+ (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, simm5:$rs2,
+ VMV0:$vm, GPR:$vl, vti.Log2SEW)>;
}
// 12.3. Vector Integer Extension
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index 0ee7649ca3a67..9da0a95808a89 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -131,6 +131,14 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
bool isLegalMaskedScatter(Type *DataType, Align Alignment) {
return isLegalMaskedGatherScatter(DataType, Alignment);
}
+
+ /// \returns How the target needs this vector-predicated operation to be
+ /// transformed.
+ TargetTransformInfo::VPLegalization
+ getVPLegalizationStrategy(const VPIntrinsic &PI) const {
+ using VPLegalization = TargetTransformInfo::VPLegalization;
+ return VPLegalization(VPLegalization::Legal, VPLegalization::Legal);
+ }
};
} // end namespace llvm
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll
new file mode 100644
index 0000000000000..3e3f940af772f
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll
@@ -0,0 +1,1333 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
+; RUN: | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
+; RUN: | FileCheck %s --check-prefixes=CHECK,RV64
+
+declare <2 x i8> @llvm.vp.add.v2i8(<2 x i8>, <2 x i8>, <2 x i1>, i32)
+
+define <2 x i8> @vadd_vv_v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_v2i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <2 x i8> @llvm.vp.add.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 %evl)
+ ret <2 x i8> %v
+}
+
+define <2 x i8> @vadd_vv_v2i8_unmasked(<2 x i8> %va, <2 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_v2i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer
+ %v = call <2 x i8> @llvm.vp.add.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 %evl)
+ ret <2 x i8> %v
+}
+
+define <2 x i8> @vadd_vx_v2i8(<2 x i8> %va, i8 %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vx_v2i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu
+; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <2 x i8> %elt.head, <2 x i8> undef, <2 x i32> zeroinitializer
+ %v = call <2 x i8> @llvm.vp.add.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i8> %v
+}
+
+define <2 x i8> @vadd_vx_v2i8_unmasked(<2 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vx_v2i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu
+; CHECK-NEXT: vadd.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <2 x i8> %elt.head, <2 x i8> undef, <2 x i32> zeroinitializer
+ %head = insertelement <2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer
+ %v = call <2 x i8> @llvm.vp.add.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i8> %v
+}
+
+define <2 x i8> @vadd_vi_v2i8(<2 x i8> %va, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_v2i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i8> undef, i8 -1, i32 0
+ %vb = shufflevector <2 x i8> %elt.head, <2 x i8> undef, <2 x i32> zeroinitializer
+ %v = call <2 x i8> @llvm.vp.add.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i8> %v
+}
+
+define <2 x i8> @vadd_vi_v2i8_unmasked(<2 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_v2i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i8> undef, i8 -1, i32 0
+ %vb = shufflevector <2 x i8> %elt.head, <2 x i8> undef, <2 x i32> zeroinitializer
+ %head = insertelement <2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer
+ %v = call <2 x i8> @llvm.vp.add.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i8> %v
+}
+
+declare <4 x i8> @llvm.vp.add.v4i8(<4 x i8>, <4 x i8>, <4 x i1>, i32)
+
+define <4 x i8> @vadd_vv_v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_v4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <4 x i8> @llvm.vp.add.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 %evl)
+ ret <4 x i8> %v
+}
+
+define <4 x i8> @vadd_vv_v4i8_unmasked(<4 x i8> %va, <4 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_v4i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer
+ %v = call <4 x i8> @llvm.vp.add.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 %evl)
+ ret <4 x i8> %v
+}
+
+define <4 x i8> @vadd_vx_v4i8(<4 x i8> %va, i8 %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vx_v4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu
+; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <4 x i8> %elt.head, <4 x i8> undef, <4 x i32> zeroinitializer
+ %v = call <4 x i8> @llvm.vp.add.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i8> %v
+}
+
+define <4 x i8> @vadd_vx_v4i8_unmasked(<4 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vx_v4i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu
+; CHECK-NEXT: vadd.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <4 x i8> %elt.head, <4 x i8> undef, <4 x i32> zeroinitializer
+ %head = insertelement <4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer
+ %v = call <4 x i8> @llvm.vp.add.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i8> %v
+}
+
+define <4 x i8> @vadd_vi_v4i8(<4 x i8> %va, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_v4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i8> undef, i8 -1, i32 0
+ %vb = shufflevector <4 x i8> %elt.head, <4 x i8> undef, <4 x i32> zeroinitializer
+ %v = call <4 x i8> @llvm.vp.add.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i8> %v
+}
+
+define <4 x i8> @vadd_vi_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_v4i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i8> undef, i8 -1, i32 0
+ %vb = shufflevector <4 x i8> %elt.head, <4 x i8> undef, <4 x i32> zeroinitializer
+ %head = insertelement <4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer
+ %v = call <4 x i8> @llvm.vp.add.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i8> %v
+}
+
+declare <8 x i8> @llvm.vp.add.v8i8(<8 x i8>, <8 x i8>, <8 x i1>, i32)
+
+define <8 x i8> @vadd_vv_v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_v8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <8 x i8> @llvm.vp.add.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 %evl)
+ ret <8 x i8> %v
+}
+
+define <8 x i8> @vadd_vv_v8i8_unmasked(<8 x i8> %va, <8 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_v8i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer
+ %v = call <8 x i8> @llvm.vp.add.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 %evl)
+ ret <8 x i8> %v
+}
+
+define <8 x i8> @vadd_vx_v8i8(<8 x i8> %va, i8 %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vx_v8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu
+; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <8 x i8> %elt.head, <8 x i8> undef, <8 x i32> zeroinitializer
+ %v = call <8 x i8> @llvm.vp.add.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i8> %v
+}
+
+define <8 x i8> @vadd_vx_v8i8_unmasked(<8 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vx_v8i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu
+; CHECK-NEXT: vadd.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <8 x i8> %elt.head, <8 x i8> undef, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer
+ %v = call <8 x i8> @llvm.vp.add.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i8> %v
+}
+
+define <8 x i8> @vadd_vi_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_v8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i8> undef, i8 -1, i32 0
+ %vb = shufflevector <8 x i8> %elt.head, <8 x i8> undef, <8 x i32> zeroinitializer
+ %v = call <8 x i8> @llvm.vp.add.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i8> %v
+}
+
+define <8 x i8> @vadd_vi_v8i8_unmasked(<8 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_v8i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i8> undef, i8 -1, i32 0
+ %vb = shufflevector <8 x i8> %elt.head, <8 x i8> undef, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer
+ %v = call <8 x i8> @llvm.vp.add.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i8> %v
+}
+
+declare <16 x i8> @llvm.vp.add.v16i8(<16 x i8>, <16 x i8>, <16 x i1>, i32)
+
+define <16 x i8> @vadd_vv_v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <16 x i8> @llvm.vp.add.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 %evl)
+ ret <16 x i8> %v
+}
+
+define <16 x i8> @vadd_vv_v16i8_unmasked(<16 x i8> %va, <16 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_v16i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <16 x i1> undef, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer
+ %v = call <16 x i8> @llvm.vp.add.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 %evl)
+ ret <16 x i8> %v
+}
+
+define <16 x i8> @vadd_vx_v16i8(<16 x i8> %va, i8 %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vx_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu
+; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <16 x i8> %elt.head, <16 x i8> undef, <16 x i32> zeroinitializer
+ %v = call <16 x i8> @llvm.vp.add.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i8> %v
+}
+
+define <16 x i8> @vadd_vx_v16i8_unmasked(<16 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vx_v16i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu
+; CHECK-NEXT: vadd.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <16 x i8> %elt.head, <16 x i8> undef, <16 x i32> zeroinitializer
+ %head = insertelement <16 x i1> undef, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer
+ %v = call <16 x i8> @llvm.vp.add.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i8> %v
+}
+
+define <16 x i8> @vadd_vi_v16i8(<16 x i8> %va, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i8> undef, i8 -1, i32 0
+ %vb = shufflevector <16 x i8> %elt.head, <16 x i8> undef, <16 x i32> zeroinitializer
+ %v = call <16 x i8> @llvm.vp.add.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i8> %v
+}
+
+define <16 x i8> @vadd_vi_v16i8_unmasked(<16 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_v16i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i8> undef, i8 -1, i32 0
+ %vb = shufflevector <16 x i8> %elt.head, <16 x i8> undef, <16 x i32> zeroinitializer
+ %head = insertelement <16 x i1> undef, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer
+ %v = call <16 x i8> @llvm.vp.add.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i8> %v
+}
+
+declare <2 x i16> @llvm.vp.add.v2i16(<2 x i16>, <2 x i16>, <2 x i1>, i32)
+
+define <2 x i16> @vadd_vv_v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_v2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <2 x i16> @llvm.vp.add.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 %evl)
+ ret <2 x i16> %v
+}
+
+define <2 x i16> @vadd_vv_v2i16_unmasked(<2 x i16> %va, <2 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_v2i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer
+ %v = call <2 x i16> @llvm.vp.add.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 %evl)
+ ret <2 x i16> %v
+}
+
+define <2 x i16> @vadd_vx_v2i16(<2 x i16> %va, i16 %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vx_v2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu
+; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <2 x i16> %elt.head, <2 x i16> undef, <2 x i32> zeroinitializer
+ %v = call <2 x i16> @llvm.vp.add.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i16> %v
+}
+
+define <2 x i16> @vadd_vx_v2i16_unmasked(<2 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vx_v2i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu
+; CHECK-NEXT: vadd.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <2 x i16> %elt.head, <2 x i16> undef, <2 x i32> zeroinitializer
+ %head = insertelement <2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer
+ %v = call <2 x i16> @llvm.vp.add.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i16> %v
+}
+
+define <2 x i16> @vadd_vi_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_v2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i16> undef, i16 -1, i32 0
+ %vb = shufflevector <2 x i16> %elt.head, <2 x i16> undef, <2 x i32> zeroinitializer
+ %v = call <2 x i16> @llvm.vp.add.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i16> %v
+}
+
+define <2 x i16> @vadd_vi_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_v2i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i16> undef, i16 -1, i32 0
+ %vb = shufflevector <2 x i16> %elt.head, <2 x i16> undef, <2 x i32> zeroinitializer
+ %head = insertelement <2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer
+ %v = call <2 x i16> @llvm.vp.add.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i16> %v
+}
+
+declare <4 x i16> @llvm.vp.add.v4i16(<4 x i16>, <4 x i16>, <4 x i1>, i32)
+
+define <4 x i16> @vadd_vv_v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_v4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <4 x i16> @llvm.vp.add.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 %evl)
+ ret <4 x i16> %v
+}
+
+define <4 x i16> @vadd_vv_v4i16_unmasked(<4 x i16> %va, <4 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_v4i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer
+ %v = call <4 x i16> @llvm.vp.add.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 %evl)
+ ret <4 x i16> %v
+}
+
+define <4 x i16> @vadd_vx_v4i16(<4 x i16> %va, i16 %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vx_v4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu
+; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <4 x i16> %elt.head, <4 x i16> undef, <4 x i32> zeroinitializer
+ %v = call <4 x i16> @llvm.vp.add.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i16> %v
+}
+
+define <4 x i16> @vadd_vx_v4i16_unmasked(<4 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vx_v4i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu
+; CHECK-NEXT: vadd.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <4 x i16> %elt.head, <4 x i16> undef, <4 x i32> zeroinitializer
+ %head = insertelement <4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer
+ %v = call <4 x i16> @llvm.vp.add.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i16> %v
+}
+
+define <4 x i16> @vadd_vi_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_v4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i16> undef, i16 -1, i32 0
+ %vb = shufflevector <4 x i16> %elt.head, <4 x i16> undef, <4 x i32> zeroinitializer
+ %v = call <4 x i16> @llvm.vp.add.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i16> %v
+}
+
+define <4 x i16> @vadd_vi_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_v4i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i16> undef, i16 -1, i32 0
+ %vb = shufflevector <4 x i16> %elt.head, <4 x i16> undef, <4 x i32> zeroinitializer
+ %head = insertelement <4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer
+ %v = call <4 x i16> @llvm.vp.add.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i16> %v
+}
+
+declare <8 x i16> @llvm.vp.add.v8i16(<8 x i16>, <8 x i16>, <8 x i1>, i32)
+
+define <8 x i16> @vadd_vv_v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <8 x i16> @llvm.vp.add.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 %evl)
+ ret <8 x i16> %v
+}
+
+define <8 x i16> @vadd_vv_v8i16_unmasked(<8 x i16> %va, <8 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_v8i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer
+ %v = call <8 x i16> @llvm.vp.add.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 %evl)
+ ret <8 x i16> %v
+}
+
+define <8 x i16> @vadd_vx_v8i16(<8 x i16> %va, i16 %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vx_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu
+; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <8 x i16> %elt.head, <8 x i16> undef, <8 x i32> zeroinitializer
+ %v = call <8 x i16> @llvm.vp.add.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i16> %v
+}
+
+define <8 x i16> @vadd_vx_v8i16_unmasked(<8 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vx_v8i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu
+; CHECK-NEXT: vadd.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <8 x i16> %elt.head, <8 x i16> undef, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer
+ %v = call <8 x i16> @llvm.vp.add.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i16> %v
+}
+
+define <8 x i16> @vadd_vi_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i16> undef, i16 -1, i32 0
+ %vb = shufflevector <8 x i16> %elt.head, <8 x i16> undef, <8 x i32> zeroinitializer
+ %v = call <8 x i16> @llvm.vp.add.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i16> %v
+}
+
+define <8 x i16> @vadd_vi_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_v8i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i16> undef, i16 -1, i32 0
+ %vb = shufflevector <8 x i16> %elt.head, <8 x i16> undef, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer
+ %v = call <8 x i16> @llvm.vp.add.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i16> %v
+}
+
+declare <16 x i16> @llvm.vp.add.v16i16(<16 x i16>, <16 x i16>, <16 x i1>, i32)
+
+define <16 x i16> @vadd_vv_v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_v16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t
+; CHECK-NEXT: ret
+ %v = call <16 x i16> @llvm.vp.add.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 %evl)
+ ret <16 x i16> %v
+}
+
+define <16 x i16> @vadd_vv_v16i16_unmasked(<16 x i16> %va, <16 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_v16i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v10
+; CHECK-NEXT: ret
+ %head = insertelement <16 x i1> undef, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer
+ %v = call <16 x i16> @llvm.vp.add.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 %evl)
+ ret <16 x i16> %v
+}
+
+define <16 x i16> @vadd_vx_v16i16(<16 x i16> %va, i16 %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vx_v16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,m2,ta,mu
+; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <16 x i16> %elt.head, <16 x i16> undef, <16 x i32> zeroinitializer
+ %v = call <16 x i16> @llvm.vp.add.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i16> %v
+}
+
+define <16 x i16> @vadd_vx_v16i16_unmasked(<16 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vx_v16i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,m2,ta,mu
+; CHECK-NEXT: vadd.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <16 x i16> %elt.head, <16 x i16> undef, <16 x i32> zeroinitializer
+ %head = insertelement <16 x i1> undef, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer
+ %v = call <16 x i16> @llvm.vp.add.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i16> %v
+}
+
+define <16 x i16> @vadd_vi_v16i16(<16 x i16> %va, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_v16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i16> undef, i16 -1, i32 0
+ %vb = shufflevector <16 x i16> %elt.head, <16 x i16> undef, <16 x i32> zeroinitializer
+ %v = call <16 x i16> @llvm.vp.add.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i16> %v
+}
+
+define <16 x i16> @vadd_vi_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_v16i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i16> undef, i16 -1, i32 0
+ %vb = shufflevector <16 x i16> %elt.head, <16 x i16> undef, <16 x i32> zeroinitializer
+ %head = insertelement <16 x i1> undef, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer
+ %v = call <16 x i16> @llvm.vp.add.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i16> %v
+}
+
+declare <2 x i32> @llvm.vp.add.v2i32(<2 x i32>, <2 x i32>, <2 x i1>, i32)
+
+define <2 x i32> @vadd_vv_v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_v2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <2 x i32> @llvm.vp.add.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 %evl)
+ ret <2 x i32> %v
+}
+
+define <2 x i32> @vadd_vv_v2i32_unmasked(<2 x i32> %va, <2 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_v2i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer
+ %v = call <2 x i32> @llvm.vp.add.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 %evl)
+ ret <2 x i32> %v
+}
+
+define <2 x i32> @vadd_vx_v2i32(<2 x i32> %va, i32 %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vx_v2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu
+; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i32> undef, i32 %b, i32 0
+ %vb = shufflevector <2 x i32> %elt.head, <2 x i32> undef, <2 x i32> zeroinitializer
+ %v = call <2 x i32> @llvm.vp.add.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i32> %v
+}
+
+define <2 x i32> @vadd_vx_v2i32_unmasked(<2 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vx_v2i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu
+; CHECK-NEXT: vadd.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i32> undef, i32 %b, i32 0
+ %vb = shufflevector <2 x i32> %elt.head, <2 x i32> undef, <2 x i32> zeroinitializer
+ %head = insertelement <2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer
+ %v = call <2 x i32> @llvm.vp.add.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i32> %v
+}
+
+define <2 x i32> @vadd_vi_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_v2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i32> undef, i32 -1, i32 0
+ %vb = shufflevector <2 x i32> %elt.head, <2 x i32> undef, <2 x i32> zeroinitializer
+ %v = call <2 x i32> @llvm.vp.add.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i32> %v
+}
+
+define <2 x i32> @vadd_vi_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_v2i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i32> undef, i32 -1, i32 0
+ %vb = shufflevector <2 x i32> %elt.head, <2 x i32> undef, <2 x i32> zeroinitializer
+ %head = insertelement <2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer
+ %v = call <2 x i32> @llvm.vp.add.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i32> %v
+}
+
+declare <4 x i32> @llvm.vp.add.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
+
+define <4 x i32> @vadd_vv_v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <4 x i32> @llvm.vp.add.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 %evl)
+ ret <4 x i32> %v
+}
+
+define <4 x i32> @vadd_vv_v4i32_unmasked(<4 x i32> %va, <4 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_v4i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer
+ %v = call <4 x i32> @llvm.vp.add.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 %evl)
+ ret <4 x i32> %v
+}
+
+define <4 x i32> @vadd_vx_v4i32(<4 x i32> %va, i32 %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vx_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu
+; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i32> undef, i32 %b, i32 0
+ %vb = shufflevector <4 x i32> %elt.head, <4 x i32> undef, <4 x i32> zeroinitializer
+ %v = call <4 x i32> @llvm.vp.add.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i32> %v
+}
+
+define <4 x i32> @vadd_vx_v4i32_unmasked(<4 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vx_v4i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu
+; CHECK-NEXT: vadd.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i32> undef, i32 %b, i32 0
+ %vb = shufflevector <4 x i32> %elt.head, <4 x i32> undef, <4 x i32> zeroinitializer
+ %head = insertelement <4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer
+ %v = call <4 x i32> @llvm.vp.add.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i32> %v
+}
+
+define <4 x i32> @vadd_vi_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i32> undef, i32 -1, i32 0
+ %vb = shufflevector <4 x i32> %elt.head, <4 x i32> undef, <4 x i32> zeroinitializer
+ %v = call <4 x i32> @llvm.vp.add.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i32> %v
+}
+
+define <4 x i32> @vadd_vi_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_v4i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i32> undef, i32 -1, i32 0
+ %vb = shufflevector <4 x i32> %elt.head, <4 x i32> undef, <4 x i32> zeroinitializer
+ %head = insertelement <4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer
+ %v = call <4 x i32> @llvm.vp.add.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i32> %v
+}
+
+declare <8 x i32> @llvm.vp.add.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
+
+define <8 x i32> @vadd_vv_v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t
+; CHECK-NEXT: ret
+ %v = call <8 x i32> @llvm.vp.add.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 %evl)
+ ret <8 x i32> %v
+}
+
+define <8 x i32> @vadd_vv_v8i32_unmasked(<8 x i32> %va, <8 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_v8i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v10
+; CHECK-NEXT: ret
+ %head = insertelement <8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer
+ %v = call <8 x i32> @llvm.vp.add.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 %evl)
+ ret <8 x i32> %v
+}
+
+define <8 x i32> @vadd_vx_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vx_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e32,m2,ta,mu
+; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i32> undef, i32 %b, i32 0
+ %vb = shufflevector <8 x i32> %elt.head, <8 x i32> undef, <8 x i32> zeroinitializer
+ %v = call <8 x i32> @llvm.vp.add.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i32> %v
+}
+
+define <8 x i32> @vadd_vx_v8i32_unmasked(<8 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vx_v8i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e32,m2,ta,mu
+; CHECK-NEXT: vadd.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i32> undef, i32 %b, i32 0
+ %vb = shufflevector <8 x i32> %elt.head, <8 x i32> undef, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer
+ %v = call <8 x i32> @llvm.vp.add.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i32> %v
+}
+
+define <8 x i32> @vadd_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i32> undef, i32 -1, i32 0
+ %vb = shufflevector <8 x i32> %elt.head, <8 x i32> undef, <8 x i32> zeroinitializer
+ %v = call <8 x i32> @llvm.vp.add.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i32> %v
+}
+
+define <8 x i32> @vadd_vi_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_v8i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i32> undef, i32 -1, i32 0
+ %vb = shufflevector <8 x i32> %elt.head, <8 x i32> undef, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer
+ %v = call <8 x i32> @llvm.vp.add.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i32> %v
+}
+
+declare <16 x i32> @llvm.vp.add.v16i32(<16 x i32>, <16 x i32>, <16 x i1>, i32)
+
+define <16 x i32> @vadd_vv_v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_v16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m4,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v12, v0.t
+; CHECK-NEXT: ret
+ %v = call <16 x i32> @llvm.vp.add.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 %evl)
+ ret <16 x i32> %v
+}
+
+define <16 x i32> @vadd_vv_v16i32_unmasked(<16 x i32> %va, <16 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_v16i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m4,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v12
+; CHECK-NEXT: ret
+ %head = insertelement <16 x i1> undef, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer
+ %v = call <16 x i32> @llvm.vp.add.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 %evl)
+ ret <16 x i32> %v
+}
+
+define <16 x i32> @vadd_vx_v16i32(<16 x i32> %va, i32 %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vx_v16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e32,m4,ta,mu
+; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i32> undef, i32 %b, i32 0
+ %vb = shufflevector <16 x i32> %elt.head, <16 x i32> undef, <16 x i32> zeroinitializer
+ %v = call <16 x i32> @llvm.vp.add.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i32> %v
+}
+
+define <16 x i32> @vadd_vx_v16i32_unmasked(<16 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vx_v16i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e32,m4,ta,mu
+; CHECK-NEXT: vadd.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i32> undef, i32 %b, i32 0
+ %vb = shufflevector <16 x i32> %elt.head, <16 x i32> undef, <16 x i32> zeroinitializer
+ %head = insertelement <16 x i1> undef, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer
+ %v = call <16 x i32> @llvm.vp.add.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i32> %v
+}
+
+define <16 x i32> @vadd_vi_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_v16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m4,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i32> undef, i32 -1, i32 0
+ %vb = shufflevector <16 x i32> %elt.head, <16 x i32> undef, <16 x i32> zeroinitializer
+ %v = call <16 x i32> @llvm.vp.add.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i32> %v
+}
+
+define <16 x i32> @vadd_vi_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_v16i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m4,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i32> undef, i32 -1, i32 0
+ %vb = shufflevector <16 x i32> %elt.head, <16 x i32> undef, <16 x i32> zeroinitializer
+ %head = insertelement <16 x i1> undef, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer
+ %v = call <16 x i32> @llvm.vp.add.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i32> %v
+}
+
+declare <2 x i64> @llvm.vp.add.v2i64(<2 x i64>, <2 x i64>, <2 x i1>, i32)
+
+define <2 x i64> @vadd_vv_v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_v2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <2 x i64> @llvm.vp.add.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 %evl)
+ ret <2 x i64> %v
+}
+
+define <2 x i64> @vadd_vv_v2i64_unmasked(<2 x i64> %va, <2 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_v2i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer
+ %v = call <2 x i64> @llvm.vp.add.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 %evl)
+ ret <2 x i64> %v
+}
+
+define <2 x i64> @vadd_vx_v2i64(<2 x i64> %va, i64 %b, <2 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vadd_vx_v2i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v25, (a0), zero
+; RV32-NEXT: vsetvli a0, a2, e64,m1,ta,mu
+; RV32-NEXT: vadd.vv v8, v8, v25, v0.t
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vadd_vx_v2i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a1, a1, e64,m1,ta,mu
+; RV64-NEXT: vadd.vx v8, v8, a0, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <2 x i64> undef, i64 %b, i32 0
+ %vb = shufflevector <2 x i64> %elt.head, <2 x i64> undef, <2 x i32> zeroinitializer
+ %v = call <2 x i64> @llvm.vp.add.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i64> %v
+}
+
+define <2 x i64> @vadd_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vadd_vx_v2i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v25, (a0), zero
+; RV32-NEXT: vsetvli a0, a2, e64,m1,ta,mu
+; RV32-NEXT: vadd.vv v8, v8, v25
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vadd_vx_v2i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a1, a1, e64,m1,ta,mu
+; RV64-NEXT: vadd.vx v8, v8, a0
+; RV64-NEXT: ret
+ %elt.head = insertelement <2 x i64> undef, i64 %b, i32 0
+ %vb = shufflevector <2 x i64> %elt.head, <2 x i64> undef, <2 x i32> zeroinitializer
+ %head = insertelement <2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer
+ %v = call <2 x i64> @llvm.vp.add.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i64> %v
+}
+
+define <2 x i64> @vadd_vi_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_v2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i64> undef, i64 -1, i32 0
+ %vb = shufflevector <2 x i64> %elt.head, <2 x i64> undef, <2 x i32> zeroinitializer
+ %v = call <2 x i64> @llvm.vp.add.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i64> %v
+}
+
+define <2 x i64> @vadd_vi_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_v2i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i64> undef, i64 -1, i32 0
+ %vb = shufflevector <2 x i64> %elt.head, <2 x i64> undef, <2 x i32> zeroinitializer
+ %head = insertelement <2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer
+ %v = call <2 x i64> @llvm.vp.add.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i64> %v
+}
+
+declare <4 x i64> @llvm.vp.add.v4i64(<4 x i64>, <4 x i64>, <4 x i1>, i32)
+
+define <4 x i64> @vadd_vv_v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_v4i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t
+; CHECK-NEXT: ret
+ %v = call <4 x i64> @llvm.vp.add.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 %evl)
+ ret <4 x i64> %v
+}
+
+define <4 x i64> @vadd_vv_v4i64_unmasked(<4 x i64> %va, <4 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_v4i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v10
+; CHECK-NEXT: ret
+ %head = insertelement <4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer
+ %v = call <4 x i64> @llvm.vp.add.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 %evl)
+ ret <4 x i64> %v
+}
+
+define <4 x i64> @vadd_vx_v4i64(<4 x i64> %va, i64 %b, <4 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vadd_vx_v4i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetivli a0, 4, e64,m2,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v26, (a0), zero
+; RV32-NEXT: vsetvli a0, a2, e64,m2,ta,mu
+; RV32-NEXT: vadd.vv v8, v8, v26, v0.t
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vadd_vx_v4i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a1, a1, e64,m2,ta,mu
+; RV64-NEXT: vadd.vx v8, v8, a0, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <4 x i64> undef, i64 %b, i32 0
+ %vb = shufflevector <4 x i64> %elt.head, <4 x i64> undef, <4 x i32> zeroinitializer
+ %v = call <4 x i64> @llvm.vp.add.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i64> %v
+}
+
+define <4 x i64> @vadd_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vadd_vx_v4i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetivli a0, 4, e64,m2,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v26, (a0), zero
+; RV32-NEXT: vsetvli a0, a2, e64,m2,ta,mu
+; RV32-NEXT: vadd.vv v8, v8, v26
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vadd_vx_v4i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a1, a1, e64,m2,ta,mu
+; RV64-NEXT: vadd.vx v8, v8, a0
+; RV64-NEXT: ret
+ %elt.head = insertelement <4 x i64> undef, i64 %b, i32 0
+ %vb = shufflevector <4 x i64> %elt.head, <4 x i64> undef, <4 x i32> zeroinitializer
+ %head = insertelement <4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer
+ %v = call <4 x i64> @llvm.vp.add.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i64> %v
+}
+
+define <4 x i64> @vadd_vi_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_v4i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i64> undef, i64 -1, i32 0
+ %vb = shufflevector <4 x i64> %elt.head, <4 x i64> undef, <4 x i32> zeroinitializer
+ %v = call <4 x i64> @llvm.vp.add.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i64> %v
+}
+
+define <4 x i64> @vadd_vi_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_v4i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i64> undef, i64 -1, i32 0
+ %vb = shufflevector <4 x i64> %elt.head, <4 x i64> undef, <4 x i32> zeroinitializer
+ %head = insertelement <4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer
+ %v = call <4 x i64> @llvm.vp.add.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i64> %v
+}
+
+declare <8 x i64> @llvm.vp.add.v8i64(<8 x i64>, <8 x i64>, <8 x i1>, i32)
+
+define <8 x i64> @vadd_vv_v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_v8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m4,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v12, v0.t
+; CHECK-NEXT: ret
+ %v = call <8 x i64> @llvm.vp.add.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vadd_vv_v8i64_unmasked(<8 x i64> %va, <8 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_v8i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m4,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v12
+; CHECK-NEXT: ret
+ %head = insertelement <8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer
+ %v = call <8 x i64> @llvm.vp.add.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vadd_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vadd_vx_v8i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v28, (a0), zero
+; RV32-NEXT: vsetvli a0, a2, e64,m4,ta,mu
+; RV32-NEXT: vadd.vv v8, v8, v28, v0.t
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vadd_vx_v8i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a1, a1, e64,m4,ta,mu
+; RV64-NEXT: vadd.vx v8, v8, a0, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <8 x i64> undef, i64 %b, i32 0
+ %vb = shufflevector <8 x i64> %elt.head, <8 x i64> undef, <8 x i32> zeroinitializer
+ %v = call <8 x i64> @llvm.vp.add.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vadd_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vadd_vx_v8i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v28, (a0), zero
+; RV32-NEXT: vsetvli a0, a2, e64,m4,ta,mu
+; RV32-NEXT: vadd.vv v8, v8, v28
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vadd_vx_v8i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a1, a1, e64,m4,ta,mu
+; RV64-NEXT: vadd.vx v8, v8, a0
+; RV64-NEXT: ret
+ %elt.head = insertelement <8 x i64> undef, i64 %b, i32 0
+ %vb = shufflevector <8 x i64> %elt.head, <8 x i64> undef, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer
+ %v = call <8 x i64> @llvm.vp.add.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vadd_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_v8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m4,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i64> undef, i64 -1, i32 0
+ %vb = shufflevector <8 x i64> %elt.head, <8 x i64> undef, <8 x i32> zeroinitializer
+ %v = call <8 x i64> @llvm.vp.add.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vadd_vi_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_v8i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m4,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i64> undef, i64 -1, i32 0
+ %vb = shufflevector <8 x i64> %elt.head, <8 x i64> undef, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer
+ %v = call <8 x i64> @llvm.vp.add.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+declare <16 x i64> @llvm.vp.add.v16i64(<16 x i64>, <16 x i64>, <16 x i1>, i32)
+
+define <16 x i64> @vadd_vv_v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_v16i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m8,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v16, v0.t
+; CHECK-NEXT: ret
+ %v = call <16 x i64> @llvm.vp.add.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 %evl)
+ ret <16 x i64> %v
+}
+
+define <16 x i64> @vadd_vv_v16i64_unmasked(<16 x i64> %va, <16 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_v16i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m8,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v16
+; CHECK-NEXT: ret
+ %head = insertelement <16 x i1> undef, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer
+ %v = call <16 x i64> @llvm.vp.add.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 %evl)
+ ret <16 x i64> %v
+}
+
+define <16 x i64> @vadd_vx_v16i64(<16 x i64> %va, i64 %b, <16 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vadd_vx_v16i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v16, (a0), zero
+; RV32-NEXT: vsetvli a0, a2, e64,m8,ta,mu
+; RV32-NEXT: vadd.vv v8, v8, v16, v0.t
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vadd_vx_v16i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a1, a1, e64,m8,ta,mu
+; RV64-NEXT: vadd.vx v8, v8, a0, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <16 x i64> undef, i64 %b, i32 0
+ %vb = shufflevector <16 x i64> %elt.head, <16 x i64> undef, <16 x i32> zeroinitializer
+ %v = call <16 x i64> @llvm.vp.add.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i64> %v
+}
+
+define <16 x i64> @vadd_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vadd_vx_v16i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v16, (a0), zero
+; RV32-NEXT: vsetvli a0, a2, e64,m8,ta,mu
+; RV32-NEXT: vadd.vv v8, v8, v16
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vadd_vx_v16i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a1, a1, e64,m8,ta,mu
+; RV64-NEXT: vadd.vx v8, v8, a0
+; RV64-NEXT: ret
+ %elt.head = insertelement <16 x i64> undef, i64 %b, i32 0
+ %vb = shufflevector <16 x i64> %elt.head, <16 x i64> undef, <16 x i32> zeroinitializer
+ %head = insertelement <16 x i1> undef, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer
+ %v = call <16 x i64> @llvm.vp.add.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i64> %v
+}
+
+define <16 x i64> @vadd_vi_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_v16i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m8,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i64> undef, i64 -1, i32 0
+ %vb = shufflevector <16 x i64> %elt.head, <16 x i64> undef, <16 x i32> zeroinitializer
+ %v = call <16 x i64> @llvm.vp.add.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i64> %v
+}
+
+define <16 x i64> @vadd_vi_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_v16i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m8,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i64> undef, i64 -1, i32 0
+ %vb = shufflevector <16 x i64> %elt.head, <16 x i64> undef, <16 x i32> zeroinitializer
+ %head = insertelement <16 x i1> undef, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer
+ %v = call <16 x i64> @llvm.vp.add.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i64> %v
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrsub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrsub-vp.ll
new file mode 100644
index 0000000000000..dda1beffeeeab
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrsub-vp.ll
@@ -0,0 +1,981 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
+; RUN: | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
+; RUN: | FileCheck %s --check-prefixes=CHECK,RV64
+
+declare <2 x i8> @llvm.vp.sub.v2i8(<2 x i8>, <2 x i8>, <2 x i1>, i32)
+
+define <2 x i8> @vrsub_vx_v2i8(<2 x i8> %va, i8 %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vx_v2i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu
+; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <2 x i8> %elt.head, <2 x i8> undef, <2 x i32> zeroinitializer
+ %v = call <2 x i8> @llvm.vp.sub.v2i8(<2 x i8> %vb, <2 x i8> %va, <2 x i1> %m, i32 %evl)
+ ret <2 x i8> %v
+}
+
+define <2 x i8> @vrsub_vx_v2i8_unmasked(<2 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vx_v2i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu
+; CHECK-NEXT: vrsub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <2 x i8> %elt.head, <2 x i8> undef, <2 x i32> zeroinitializer
+ %head = insertelement <2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer
+ %v = call <2 x i8> @llvm.vp.sub.v2i8(<2 x i8> %vb, <2 x i8> %va, <2 x i1> %m, i32 %evl)
+ ret <2 x i8> %v
+}
+
+define <2 x i8> @vrsub_vi_v2i8(<2 x i8> %va, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_v2i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i8> undef, i8 2, i32 0
+ %vb = shufflevector <2 x i8> %elt.head, <2 x i8> undef, <2 x i32> zeroinitializer
+ %v = call <2 x i8> @llvm.vp.sub.v2i8(<2 x i8> %vb, <2 x i8> %va, <2 x i1> %m, i32 %evl)
+ ret <2 x i8> %v
+}
+
+define <2 x i8> @vrsub_vi_v2i8_unmasked(<2 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_v2i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i8> undef, i8 2, i32 0
+ %vb = shufflevector <2 x i8> %elt.head, <2 x i8> undef, <2 x i32> zeroinitializer
+ %head = insertelement <2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer
+ %v = call <2 x i8> @llvm.vp.sub.v2i8(<2 x i8> %vb, <2 x i8> %va, <2 x i1> %m, i32 %evl)
+ ret <2 x i8> %v
+}
+
+declare <4 x i8> @llvm.vp.sub.v4i8(<4 x i8>, <4 x i8>, <4 x i1>, i32)
+
+define <4 x i8> @vrsub_vx_v4i8(<4 x i8> %va, i8 %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vx_v4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu
+; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <4 x i8> %elt.head, <4 x i8> undef, <4 x i32> zeroinitializer
+ %v = call <4 x i8> @llvm.vp.sub.v4i8(<4 x i8> %vb, <4 x i8> %va, <4 x i1> %m, i32 %evl)
+ ret <4 x i8> %v
+}
+
+define <4 x i8> @vrsub_vx_v4i8_unmasked(<4 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vx_v4i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu
+; CHECK-NEXT: vrsub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <4 x i8> %elt.head, <4 x i8> undef, <4 x i32> zeroinitializer
+ %head = insertelement <4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer
+ %v = call <4 x i8> @llvm.vp.sub.v4i8(<4 x i8> %vb, <4 x i8> %va, <4 x i1> %m, i32 %evl)
+ ret <4 x i8> %v
+}
+
+define <4 x i8> @vrsub_vi_v4i8(<4 x i8> %va, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_v4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i8> undef, i8 2, i32 0
+ %vb = shufflevector <4 x i8> %elt.head, <4 x i8> undef, <4 x i32> zeroinitializer
+ %v = call <4 x i8> @llvm.vp.sub.v4i8(<4 x i8> %vb, <4 x i8> %va, <4 x i1> %m, i32 %evl)
+ ret <4 x i8> %v
+}
+
+define <4 x i8> @vrsub_vi_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_v4i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i8> undef, i8 2, i32 0
+ %vb = shufflevector <4 x i8> %elt.head, <4 x i8> undef, <4 x i32> zeroinitializer
+ %head = insertelement <4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer
+ %v = call <4 x i8> @llvm.vp.sub.v4i8(<4 x i8> %vb, <4 x i8> %va, <4 x i1> %m, i32 %evl)
+ ret <4 x i8> %v
+}
+
+declare <8 x i8> @llvm.vp.sub.v8i8(<8 x i8>, <8 x i8>, <8 x i1>, i32)
+
+define <8 x i8> @vrsub_vx_v8i8(<8 x i8> %va, i8 %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vx_v8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu
+; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <8 x i8> %elt.head, <8 x i8> undef, <8 x i32> zeroinitializer
+ %v = call <8 x i8> @llvm.vp.sub.v8i8(<8 x i8> %vb, <8 x i8> %va, <8 x i1> %m, i32 %evl)
+ ret <8 x i8> %v
+}
+
+define <8 x i8> @vrsub_vx_v8i8_unmasked(<8 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vx_v8i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu
+; CHECK-NEXT: vrsub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <8 x i8> %elt.head, <8 x i8> undef, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer
+ %v = call <8 x i8> @llvm.vp.sub.v8i8(<8 x i8> %vb, <8 x i8> %va, <8 x i1> %m, i32 %evl)
+ ret <8 x i8> %v
+}
+
+define <8 x i8> @vrsub_vi_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_v8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i8> undef, i8 2, i32 0
+ %vb = shufflevector <8 x i8> %elt.head, <8 x i8> undef, <8 x i32> zeroinitializer
+ %v = call <8 x i8> @llvm.vp.sub.v8i8(<8 x i8> %vb, <8 x i8> %va, <8 x i1> %m, i32 %evl)
+ ret <8 x i8> %v
+}
+
+define <8 x i8> @vrsub_vi_v8i8_unmasked(<8 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_v8i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i8> undef, i8 2, i32 0
+ %vb = shufflevector <8 x i8> %elt.head, <8 x i8> undef, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer
+ %v = call <8 x i8> @llvm.vp.sub.v8i8(<8 x i8> %vb, <8 x i8> %va, <8 x i1> %m, i32 %evl)
+ ret <8 x i8> %v
+}
+
+declare <16 x i8> @llvm.vp.sub.v16i8(<16 x i8>, <16 x i8>, <16 x i1>, i32)
+
+define <16 x i8> @vrsub_vx_v16i8(<16 x i8> %va, i8 %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vx_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu
+; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <16 x i8> %elt.head, <16 x i8> undef, <16 x i32> zeroinitializer
+ %v = call <16 x i8> @llvm.vp.sub.v16i8(<16 x i8> %vb, <16 x i8> %va, <16 x i1> %m, i32 %evl)
+ ret <16 x i8> %v
+}
+
+define <16 x i8> @vrsub_vx_v16i8_unmasked(<16 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vx_v16i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu
+; CHECK-NEXT: vrsub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <16 x i8> %elt.head, <16 x i8> undef, <16 x i32> zeroinitializer
+ %head = insertelement <16 x i1> undef, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer
+ %v = call <16 x i8> @llvm.vp.sub.v16i8(<16 x i8> %vb, <16 x i8> %va, <16 x i1> %m, i32 %evl)
+ ret <16 x i8> %v
+}
+
+define <16 x i8> @vrsub_vi_v16i8(<16 x i8> %va, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i8> undef, i8 2, i32 0
+ %vb = shufflevector <16 x i8> %elt.head, <16 x i8> undef, <16 x i32> zeroinitializer
+ %v = call <16 x i8> @llvm.vp.sub.v16i8(<16 x i8> %vb, <16 x i8> %va, <16 x i1> %m, i32 %evl)
+ ret <16 x i8> %v
+}
+
+define <16 x i8> @vrsub_vi_v16i8_unmasked(<16 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_v16i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i8> undef, i8 2, i32 0
+ %vb = shufflevector <16 x i8> %elt.head, <16 x i8> undef, <16 x i32> zeroinitializer
+ %head = insertelement <16 x i1> undef, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer
+ %v = call <16 x i8> @llvm.vp.sub.v16i8(<16 x i8> %vb, <16 x i8> %va, <16 x i1> %m, i32 %evl)
+ ret <16 x i8> %v
+}
+
+declare <2 x i16> @llvm.vp.sub.v2i16(<2 x i16>, <2 x i16>, <2 x i1>, i32)
+
+define <2 x i16> @vrsub_vx_v2i16(<2 x i16> %va, i16 %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vx_v2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu
+; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <2 x i16> %elt.head, <2 x i16> undef, <2 x i32> zeroinitializer
+ %v = call <2 x i16> @llvm.vp.sub.v2i16(<2 x i16> %vb, <2 x i16> %va, <2 x i1> %m, i32 %evl)
+ ret <2 x i16> %v
+}
+
+define <2 x i16> @vrsub_vx_v2i16_unmasked(<2 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vx_v2i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu
+; CHECK-NEXT: vrsub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <2 x i16> %elt.head, <2 x i16> undef, <2 x i32> zeroinitializer
+ %head = insertelement <2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer
+ %v = call <2 x i16> @llvm.vp.sub.v2i16(<2 x i16> %vb, <2 x i16> %va, <2 x i1> %m, i32 %evl)
+ ret <2 x i16> %v
+}
+
+define <2 x i16> @vrsub_vi_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_v2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i16> undef, i16 2, i32 0
+ %vb = shufflevector <2 x i16> %elt.head, <2 x i16> undef, <2 x i32> zeroinitializer
+ %v = call <2 x i16> @llvm.vp.sub.v2i16(<2 x i16> %vb, <2 x i16> %va, <2 x i1> %m, i32 %evl)
+ ret <2 x i16> %v
+}
+
+define <2 x i16> @vrsub_vi_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_v2i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i16> undef, i16 2, i32 0
+ %vb = shufflevector <2 x i16> %elt.head, <2 x i16> undef, <2 x i32> zeroinitializer
+ %head = insertelement <2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer
+ %v = call <2 x i16> @llvm.vp.sub.v2i16(<2 x i16> %vb, <2 x i16> %va, <2 x i1> %m, i32 %evl)
+ ret <2 x i16> %v
+}
+
+declare <4 x i16> @llvm.vp.sub.v4i16(<4 x i16>, <4 x i16>, <4 x i1>, i32)
+
+define <4 x i16> @vrsub_vx_v4i16(<4 x i16> %va, i16 %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vx_v4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu
+; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <4 x i16> %elt.head, <4 x i16> undef, <4 x i32> zeroinitializer
+ %v = call <4 x i16> @llvm.vp.sub.v4i16(<4 x i16> %vb, <4 x i16> %va, <4 x i1> %m, i32 %evl)
+ ret <4 x i16> %v
+}
+
+define <4 x i16> @vrsub_vx_v4i16_unmasked(<4 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vx_v4i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu
+; CHECK-NEXT: vrsub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <4 x i16> %elt.head, <4 x i16> undef, <4 x i32> zeroinitializer
+ %head = insertelement <4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer
+ %v = call <4 x i16> @llvm.vp.sub.v4i16(<4 x i16> %vb, <4 x i16> %va, <4 x i1> %m, i32 %evl)
+ ret <4 x i16> %v
+}
+
+define <4 x i16> @vrsub_vi_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_v4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i16> undef, i16 2, i32 0
+ %vb = shufflevector <4 x i16> %elt.head, <4 x i16> undef, <4 x i32> zeroinitializer
+ %v = call <4 x i16> @llvm.vp.sub.v4i16(<4 x i16> %vb, <4 x i16> %va, <4 x i1> %m, i32 %evl)
+ ret <4 x i16> %v
+}
+
+define <4 x i16> @vrsub_vi_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_v4i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i16> undef, i16 2, i32 0
+ %vb = shufflevector <4 x i16> %elt.head, <4 x i16> undef, <4 x i32> zeroinitializer
+ %head = insertelement <4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer
+ %v = call <4 x i16> @llvm.vp.sub.v4i16(<4 x i16> %vb, <4 x i16> %va, <4 x i1> %m, i32 %evl)
+ ret <4 x i16> %v
+}
+
+declare <8 x i16> @llvm.vp.sub.v8i16(<8 x i16>, <8 x i16>, <8 x i1>, i32)
+
+define <8 x i16> @vrsub_vx_v8i16(<8 x i16> %va, i16 %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vx_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu
+; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <8 x i16> %elt.head, <8 x i16> undef, <8 x i32> zeroinitializer
+ %v = call <8 x i16> @llvm.vp.sub.v8i16(<8 x i16> %vb, <8 x i16> %va, <8 x i1> %m, i32 %evl)
+ ret <8 x i16> %v
+}
+
+define <8 x i16> @vrsub_vx_v8i16_unmasked(<8 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vx_v8i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu
+; CHECK-NEXT: vrsub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <8 x i16> %elt.head, <8 x i16> undef, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer
+ %v = call <8 x i16> @llvm.vp.sub.v8i16(<8 x i16> %vb, <8 x i16> %va, <8 x i1> %m, i32 %evl)
+ ret <8 x i16> %v
+}
+
+define <8 x i16> @vrsub_vi_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i16> undef, i16 2, i32 0
+ %vb = shufflevector <8 x i16> %elt.head, <8 x i16> undef, <8 x i32> zeroinitializer
+ %v = call <8 x i16> @llvm.vp.sub.v8i16(<8 x i16> %vb, <8 x i16> %va, <8 x i1> %m, i32 %evl)
+ ret <8 x i16> %v
+}
+
+define <8 x i16> @vrsub_vi_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_v8i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i16> undef, i16 2, i32 0
+ %vb = shufflevector <8 x i16> %elt.head, <8 x i16> undef, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer
+ %v = call <8 x i16> @llvm.vp.sub.v8i16(<8 x i16> %vb, <8 x i16> %va, <8 x i1> %m, i32 %evl)
+ ret <8 x i16> %v
+}
+
+declare <16 x i16> @llvm.vp.sub.v16i16(<16 x i16>, <16 x i16>, <16 x i1>, i32)
+
+define <16 x i16> @vrsub_vx_v16i16(<16 x i16> %va, i16 %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vx_v16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,m2,ta,mu
+; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <16 x i16> %elt.head, <16 x i16> undef, <16 x i32> zeroinitializer
+ %v = call <16 x i16> @llvm.vp.sub.v16i16(<16 x i16> %vb, <16 x i16> %va, <16 x i1> %m, i32 %evl)
+ ret <16 x i16> %v
+}
+
+define <16 x i16> @vrsub_vx_v16i16_unmasked(<16 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vx_v16i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,m2,ta,mu
+; CHECK-NEXT: vrsub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <16 x i16> %elt.head, <16 x i16> undef, <16 x i32> zeroinitializer
+ %head = insertelement <16 x i1> undef, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer
+ %v = call <16 x i16> @llvm.vp.sub.v16i16(<16 x i16> %vb, <16 x i16> %va, <16 x i1> %m, i32 %evl)
+ ret <16 x i16> %v
+}
+
+define <16 x i16> @vrsub_vi_v16i16(<16 x i16> %va, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_v16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i16> undef, i16 2, i32 0
+ %vb = shufflevector <16 x i16> %elt.head, <16 x i16> undef, <16 x i32> zeroinitializer
+ %v = call <16 x i16> @llvm.vp.sub.v16i16(<16 x i16> %vb, <16 x i16> %va, <16 x i1> %m, i32 %evl)
+ ret <16 x i16> %v
+}
+
+define <16 x i16> @vrsub_vi_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_v16i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i16> undef, i16 2, i32 0
+ %vb = shufflevector <16 x i16> %elt.head, <16 x i16> undef, <16 x i32> zeroinitializer
+ %head = insertelement <16 x i1> undef, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer
+ %v = call <16 x i16> @llvm.vp.sub.v16i16(<16 x i16> %vb, <16 x i16> %va, <16 x i1> %m, i32 %evl)
+ ret <16 x i16> %v
+}
+
+declare <2 x i32> @llvm.vp.sub.v2i32(<2 x i32>, <2 x i32>, <2 x i1>, i32)
+
+define <2 x i32> @vrsub_vx_v2i32(<2 x i32> %va, i32 %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vx_v2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu
+; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i32> undef, i32 %b, i32 0
+ %vb = shufflevector <2 x i32> %elt.head, <2 x i32> undef, <2 x i32> zeroinitializer
+ %v = call <2 x i32> @llvm.vp.sub.v2i32(<2 x i32> %vb, <2 x i32> %va, <2 x i1> %m, i32 %evl)
+ ret <2 x i32> %v
+}
+
+define <2 x i32> @vrsub_vx_v2i32_unmasked(<2 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vx_v2i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu
+; CHECK-NEXT: vrsub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i32> undef, i32 %b, i32 0
+ %vb = shufflevector <2 x i32> %elt.head, <2 x i32> undef, <2 x i32> zeroinitializer
+ %head = insertelement <2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer
+ %v = call <2 x i32> @llvm.vp.sub.v2i32(<2 x i32> %vb, <2 x i32> %va, <2 x i1> %m, i32 %evl)
+ ret <2 x i32> %v
+}
+
+define <2 x i32> @vrsub_vi_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_v2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i32> undef, i32 2, i32 0
+ %vb = shufflevector <2 x i32> %elt.head, <2 x i32> undef, <2 x i32> zeroinitializer
+ %v = call <2 x i32> @llvm.vp.sub.v2i32(<2 x i32> %vb, <2 x i32> %va, <2 x i1> %m, i32 %evl)
+ ret <2 x i32> %v
+}
+
+define <2 x i32> @vrsub_vi_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_v2i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i32> undef, i32 2, i32 0
+ %vb = shufflevector <2 x i32> %elt.head, <2 x i32> undef, <2 x i32> zeroinitializer
+ %head = insertelement <2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer
+ %v = call <2 x i32> @llvm.vp.sub.v2i32(<2 x i32> %vb, <2 x i32> %va, <2 x i1> %m, i32 %evl)
+ ret <2 x i32> %v
+}
+
+declare <4 x i32> @llvm.vp.sub.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
+
+define <4 x i32> @vrsub_vx_v4i32(<4 x i32> %va, i32 %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vx_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu
+; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i32> undef, i32 %b, i32 0
+ %vb = shufflevector <4 x i32> %elt.head, <4 x i32> undef, <4 x i32> zeroinitializer
+ %v = call <4 x i32> @llvm.vp.sub.v4i32(<4 x i32> %vb, <4 x i32> %va, <4 x i1> %m, i32 %evl)
+ ret <4 x i32> %v
+}
+
+define <4 x i32> @vrsub_vx_v4i32_unmasked(<4 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vx_v4i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu
+; CHECK-NEXT: vrsub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i32> undef, i32 %b, i32 0
+ %vb = shufflevector <4 x i32> %elt.head, <4 x i32> undef, <4 x i32> zeroinitializer
+ %head = insertelement <4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer
+ %v = call <4 x i32> @llvm.vp.sub.v4i32(<4 x i32> %vb, <4 x i32> %va, <4 x i1> %m, i32 %evl)
+ ret <4 x i32> %v
+}
+
+define <4 x i32> @vrsub_vi_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i32> undef, i32 2, i32 0
+ %vb = shufflevector <4 x i32> %elt.head, <4 x i32> undef, <4 x i32> zeroinitializer
+ %v = call <4 x i32> @llvm.vp.sub.v4i32(<4 x i32> %vb, <4 x i32> %va, <4 x i1> %m, i32 %evl)
+ ret <4 x i32> %v
+}
+
+define <4 x i32> @vrsub_vi_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_v4i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i32> undef, i32 2, i32 0
+ %vb = shufflevector <4 x i32> %elt.head, <4 x i32> undef, <4 x i32> zeroinitializer
+ %head = insertelement <4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer
+ %v = call <4 x i32> @llvm.vp.sub.v4i32(<4 x i32> %vb, <4 x i32> %va, <4 x i1> %m, i32 %evl)
+ ret <4 x i32> %v
+}
+
+declare <8 x i32> @llvm.vp.sub.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
+
+define <8 x i32> @vrsub_vx_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vx_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e32,m2,ta,mu
+; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i32> undef, i32 %b, i32 0
+ %vb = shufflevector <8 x i32> %elt.head, <8 x i32> undef, <8 x i32> zeroinitializer
+ %v = call <8 x i32> @llvm.vp.sub.v8i32(<8 x i32> %vb, <8 x i32> %va, <8 x i1> %m, i32 %evl)
+ ret <8 x i32> %v
+}
+
+define <8 x i32> @vrsub_vx_v8i32_unmasked(<8 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vx_v8i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e32,m2,ta,mu
+; CHECK-NEXT: vrsub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i32> undef, i32 %b, i32 0
+ %vb = shufflevector <8 x i32> %elt.head, <8 x i32> undef, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer
+ %v = call <8 x i32> @llvm.vp.sub.v8i32(<8 x i32> %vb, <8 x i32> %va, <8 x i1> %m, i32 %evl)
+ ret <8 x i32> %v
+}
+
+define <8 x i32> @vrsub_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i32> undef, i32 2, i32 0
+ %vb = shufflevector <8 x i32> %elt.head, <8 x i32> undef, <8 x i32> zeroinitializer
+ %v = call <8 x i32> @llvm.vp.sub.v8i32(<8 x i32> %vb, <8 x i32> %va, <8 x i1> %m, i32 %evl)
+ ret <8 x i32> %v
+}
+
+define <8 x i32> @vrsub_vi_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_v8i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i32> undef, i32 2, i32 0
+ %vb = shufflevector <8 x i32> %elt.head, <8 x i32> undef, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer
+ %v = call <8 x i32> @llvm.vp.sub.v8i32(<8 x i32> %vb, <8 x i32> %va, <8 x i1> %m, i32 %evl)
+ ret <8 x i32> %v
+}
+
+declare <16 x i32> @llvm.vp.sub.v16i32(<16 x i32>, <16 x i32>, <16 x i1>, i32)
+
+define <16 x i32> @vrsub_vx_v16i32(<16 x i32> %va, i32 %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vx_v16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e32,m4,ta,mu
+; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i32> undef, i32 %b, i32 0
+ %vb = shufflevector <16 x i32> %elt.head, <16 x i32> undef, <16 x i32> zeroinitializer
+ %v = call <16 x i32> @llvm.vp.sub.v16i32(<16 x i32> %vb, <16 x i32> %va, <16 x i1> %m, i32 %evl)
+ ret <16 x i32> %v
+}
+
+define <16 x i32> @vrsub_vx_v16i32_unmasked(<16 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vx_v16i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e32,m4,ta,mu
+; CHECK-NEXT: vrsub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i32> undef, i32 %b, i32 0
+ %vb = shufflevector <16 x i32> %elt.head, <16 x i32> undef, <16 x i32> zeroinitializer
+ %head = insertelement <16 x i1> undef, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer
+ %v = call <16 x i32> @llvm.vp.sub.v16i32(<16 x i32> %vb, <16 x i32> %va, <16 x i1> %m, i32 %evl)
+ ret <16 x i32> %v
+}
+
+define <16 x i32> @vrsub_vi_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_v16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m4,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i32> undef, i32 2, i32 0
+ %vb = shufflevector <16 x i32> %elt.head, <16 x i32> undef, <16 x i32> zeroinitializer
+ %v = call <16 x i32> @llvm.vp.sub.v16i32(<16 x i32> %vb, <16 x i32> %va, <16 x i1> %m, i32 %evl)
+ ret <16 x i32> %v
+}
+
+define <16 x i32> @vrsub_vi_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_v16i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m4,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i32> undef, i32 2, i32 0
+ %vb = shufflevector <16 x i32> %elt.head, <16 x i32> undef, <16 x i32> zeroinitializer
+ %head = insertelement <16 x i1> undef, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer
+ %v = call <16 x i32> @llvm.vp.sub.v16i32(<16 x i32> %vb, <16 x i32> %va, <16 x i1> %m, i32 %evl)
+ ret <16 x i32> %v
+}
+
+declare <2 x i64> @llvm.vp.sub.v2i64(<2 x i64>, <2 x i64>, <2 x i1>, i32)
+
+define <2 x i64> @vrsub_vx_v2i64(<2 x i64> %va, i64 %b, <2 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vrsub_vx_v2i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v25, (a0), zero
+; RV32-NEXT: vsetvli a0, a2, e64,m1,ta,mu
+; RV32-NEXT: vsub.vv v8, v25, v8, v0.t
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vrsub_vx_v2i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a1, a1, e64,m1,ta,mu
+; RV64-NEXT: vrsub.vx v8, v8, a0, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <2 x i64> undef, i64 %b, i32 0
+ %vb = shufflevector <2 x i64> %elt.head, <2 x i64> undef, <2 x i32> zeroinitializer
+ %v = call <2 x i64> @llvm.vp.sub.v2i64(<2 x i64> %vb, <2 x i64> %va, <2 x i1> %m, i32 %evl)
+ ret <2 x i64> %v
+}
+
+define <2 x i64> @vrsub_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vrsub_vx_v2i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v25, (a0), zero
+; RV32-NEXT: vsetvli a0, a2, e64,m1,ta,mu
+; RV32-NEXT: vsub.vv v8, v25, v8
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vrsub_vx_v2i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a1, a1, e64,m1,ta,mu
+; RV64-NEXT: vrsub.vx v8, v8, a0
+; RV64-NEXT: ret
+ %elt.head = insertelement <2 x i64> undef, i64 %b, i32 0
+ %vb = shufflevector <2 x i64> %elt.head, <2 x i64> undef, <2 x i32> zeroinitializer
+ %head = insertelement <2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer
+ %v = call <2 x i64> @llvm.vp.sub.v2i64(<2 x i64> %vb, <2 x i64> %va, <2 x i1> %m, i32 %evl)
+ ret <2 x i64> %v
+}
+
+define <2 x i64> @vrsub_vi_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_v2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i64> undef, i64 2, i32 0
+ %vb = shufflevector <2 x i64> %elt.head, <2 x i64> undef, <2 x i32> zeroinitializer
+ %v = call <2 x i64> @llvm.vp.sub.v2i64(<2 x i64> %vb, <2 x i64> %va, <2 x i1> %m, i32 %evl)
+ ret <2 x i64> %v
+}
+
+define <2 x i64> @vrsub_vi_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_v2i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i64> undef, i64 2, i32 0
+ %vb = shufflevector <2 x i64> %elt.head, <2 x i64> undef, <2 x i32> zeroinitializer
+ %head = insertelement <2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer
+ %v = call <2 x i64> @llvm.vp.sub.v2i64(<2 x i64> %vb, <2 x i64> %va, <2 x i1> %m, i32 %evl)
+ ret <2 x i64> %v
+}
+
+declare <4 x i64> @llvm.vp.sub.v4i64(<4 x i64>, <4 x i64>, <4 x i1>, i32)
+
+define <4 x i64> @vrsub_vx_v4i64(<4 x i64> %va, i64 %b, <4 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vrsub_vx_v4i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetivli a0, 4, e64,m2,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v26, (a0), zero
+; RV32-NEXT: vsetvli a0, a2, e64,m2,ta,mu
+; RV32-NEXT: vsub.vv v8, v26, v8, v0.t
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vrsub_vx_v4i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a1, a1, e64,m2,ta,mu
+; RV64-NEXT: vrsub.vx v8, v8, a0, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <4 x i64> undef, i64 %b, i32 0
+ %vb = shufflevector <4 x i64> %elt.head, <4 x i64> undef, <4 x i32> zeroinitializer
+ %v = call <4 x i64> @llvm.vp.sub.v4i64(<4 x i64> %vb, <4 x i64> %va, <4 x i1> %m, i32 %evl)
+ ret <4 x i64> %v
+}
+
+define <4 x i64> @vrsub_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vrsub_vx_v4i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetivli a0, 4, e64,m2,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v26, (a0), zero
+; RV32-NEXT: vsetvli a0, a2, e64,m2,ta,mu
+; RV32-NEXT: vsub.vv v8, v26, v8
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vrsub_vx_v4i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a1, a1, e64,m2,ta,mu
+; RV64-NEXT: vrsub.vx v8, v8, a0
+; RV64-NEXT: ret
+ %elt.head = insertelement <4 x i64> undef, i64 %b, i32 0
+ %vb = shufflevector <4 x i64> %elt.head, <4 x i64> undef, <4 x i32> zeroinitializer
+ %head = insertelement <4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer
+ %v = call <4 x i64> @llvm.vp.sub.v4i64(<4 x i64> %vb, <4 x i64> %va, <4 x i1> %m, i32 %evl)
+ ret <4 x i64> %v
+}
+
+define <4 x i64> @vrsub_vi_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_v4i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i64> undef, i64 2, i32 0
+ %vb = shufflevector <4 x i64> %elt.head, <4 x i64> undef, <4 x i32> zeroinitializer
+ %v = call <4 x i64> @llvm.vp.sub.v4i64(<4 x i64> %vb, <4 x i64> %va, <4 x i1> %m, i32 %evl)
+ ret <4 x i64> %v
+}
+
+define <4 x i64> @vrsub_vi_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_v4i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i64> undef, i64 2, i32 0
+ %vb = shufflevector <4 x i64> %elt.head, <4 x i64> undef, <4 x i32> zeroinitializer
+ %head = insertelement <4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer
+ %v = call <4 x i64> @llvm.vp.sub.v4i64(<4 x i64> %vb, <4 x i64> %va, <4 x i1> %m, i32 %evl)
+ ret <4 x i64> %v
+}
+
+declare <8 x i64> @llvm.vp.sub.v8i64(<8 x i64>, <8 x i64>, <8 x i1>, i32)
+
+define <8 x i64> @vrsub_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vrsub_vx_v8i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v28, (a0), zero
+; RV32-NEXT: vsetvli a0, a2, e64,m4,ta,mu
+; RV32-NEXT: vsub.vv v8, v28, v8, v0.t
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vrsub_vx_v8i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a1, a1, e64,m4,ta,mu
+; RV64-NEXT: vrsub.vx v8, v8, a0, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <8 x i64> undef, i64 %b, i32 0
+ %vb = shufflevector <8 x i64> %elt.head, <8 x i64> undef, <8 x i32> zeroinitializer
+ %v = call <8 x i64> @llvm.vp.sub.v8i64(<8 x i64> %vb, <8 x i64> %va, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vrsub_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vrsub_vx_v8i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v28, (a0), zero
+; RV32-NEXT: vsetvli a0, a2, e64,m4,ta,mu
+; RV32-NEXT: vsub.vv v8, v28, v8
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vrsub_vx_v8i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a1, a1, e64,m4,ta,mu
+; RV64-NEXT: vrsub.vx v8, v8, a0
+; RV64-NEXT: ret
+ %elt.head = insertelement <8 x i64> undef, i64 %b, i32 0
+ %vb = shufflevector <8 x i64> %elt.head, <8 x i64> undef, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer
+ %v = call <8 x i64> @llvm.vp.sub.v8i64(<8 x i64> %vb, <8 x i64> %va, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vrsub_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_v8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m4,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i64> undef, i64 2, i32 0
+ %vb = shufflevector <8 x i64> %elt.head, <8 x i64> undef, <8 x i32> zeroinitializer
+ %v = call <8 x i64> @llvm.vp.sub.v8i64(<8 x i64> %vb, <8 x i64> %va, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vrsub_vi_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_v8i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m4,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i64> undef, i64 2, i32 0
+ %vb = shufflevector <8 x i64> %elt.head, <8 x i64> undef, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer
+ %v = call <8 x i64> @llvm.vp.sub.v8i64(<8 x i64> %vb, <8 x i64> %va, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+declare <16 x i64> @llvm.vp.sub.v16i64(<16 x i64>, <16 x i64>, <16 x i1>, i32)
+
+define <16 x i64> @vrsub_vx_v16i64(<16 x i64> %va, i64 %b, <16 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vrsub_vx_v16i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v16, (a0), zero
+; RV32-NEXT: vsetvli a0, a2, e64,m8,ta,mu
+; RV32-NEXT: vsub.vv v8, v16, v8, v0.t
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vrsub_vx_v16i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a1, a1, e64,m8,ta,mu
+; RV64-NEXT: vrsub.vx v8, v8, a0, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <16 x i64> undef, i64 %b, i32 0
+ %vb = shufflevector <16 x i64> %elt.head, <16 x i64> undef, <16 x i32> zeroinitializer
+ %v = call <16 x i64> @llvm.vp.sub.v16i64(<16 x i64> %vb, <16 x i64> %va, <16 x i1> %m, i32 %evl)
+ ret <16 x i64> %v
+}
+
+define <16 x i64> @vrsub_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vrsub_vx_v16i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v16, (a0), zero
+; RV32-NEXT: vsetvli a0, a2, e64,m8,ta,mu
+; RV32-NEXT: vsub.vv v8, v16, v8
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vrsub_vx_v16i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a1, a1, e64,m8,ta,mu
+; RV64-NEXT: vrsub.vx v8, v8, a0
+; RV64-NEXT: ret
+ %elt.head = insertelement <16 x i64> undef, i64 %b, i32 0
+ %vb = shufflevector <16 x i64> %elt.head, <16 x i64> undef, <16 x i32> zeroinitializer
+ %head = insertelement <16 x i1> undef, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer
+ %v = call <16 x i64> @llvm.vp.sub.v16i64(<16 x i64> %vb, <16 x i64> %va, <16 x i1> %m, i32 %evl)
+ ret <16 x i64> %v
+}
+
+define <16 x i64> @vrsub_vi_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_v16i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m8,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i64> undef, i64 2, i32 0
+ %vb = shufflevector <16 x i64> %elt.head, <16 x i64> undef, <16 x i32> zeroinitializer
+ %v = call <16 x i64> @llvm.vp.sub.v16i64(<16 x i64> %vb, <16 x i64> %va, <16 x i1> %m, i32 %evl)
+ ret <16 x i64> %v
+}
+
+define <16 x i64> @vrsub_vi_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_v16i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m8,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i64> undef, i64 2, i32 0
+ %vb = shufflevector <16 x i64> %elt.head, <16 x i64> undef, <16 x i32> zeroinitializer
+ %head = insertelement <16 x i1> undef, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer
+ %v = call <16 x i64> @llvm.vp.sub.v16i64(<16 x i64> %vb, <16 x i64> %va, <16 x i1> %m, i32 %evl)
+ ret <16 x i64> %v
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsra-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsra-vp.ll
new file mode 100644
index 0000000000000..06a16e3d1a0f7
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsra-vp.ll
@@ -0,0 +1,1333 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
+; RUN: | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
+; RUN: | FileCheck %s --check-prefixes=CHECK,RV64
+
+declare <2 x i8> @llvm.vp.ashr.v2i8(<2 x i8>, <2 x i8>, <2 x i1>, i32)
+
+define <2 x i8> @vsra_vv_v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_v2i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <2 x i8> @llvm.vp.ashr.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 %evl)
+ ret <2 x i8> %v
+}
+
+define <2 x i8> @vsra_vv_v2i8_unmasked(<2 x i8> %va, <2 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_v2i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer
+ %v = call <2 x i8> @llvm.vp.ashr.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 %evl)
+ ret <2 x i8> %v
+}
+
+define <2 x i8> @vsra_vx_v2i8(<2 x i8> %va, i8 %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vx_v2i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu
+; CHECK-NEXT: vsra.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <2 x i8> %elt.head, <2 x i8> undef, <2 x i32> zeroinitializer
+ %v = call <2 x i8> @llvm.vp.ashr.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i8> %v
+}
+
+define <2 x i8> @vsra_vx_v2i8_unmasked(<2 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vx_v2i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu
+; CHECK-NEXT: vsra.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <2 x i8> %elt.head, <2 x i8> undef, <2 x i32> zeroinitializer
+ %head = insertelement <2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer
+ %v = call <2 x i8> @llvm.vp.ashr.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i8> %v
+}
+
+define <2 x i8> @vsra_vi_v2i8(<2 x i8> %va, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_v2i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i8> undef, i8 5, i32 0
+ %vb = shufflevector <2 x i8> %elt.head, <2 x i8> undef, <2 x i32> zeroinitializer
+ %v = call <2 x i8> @llvm.vp.ashr.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i8> %v
+}
+
+define <2 x i8> @vsra_vi_v2i8_unmasked(<2 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_v2i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i8> undef, i8 5, i32 0
+ %vb = shufflevector <2 x i8> %elt.head, <2 x i8> undef, <2 x i32> zeroinitializer
+ %head = insertelement <2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer
+ %v = call <2 x i8> @llvm.vp.ashr.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i8> %v
+}
+
+declare <4 x i8> @llvm.vp.ashr.v4i8(<4 x i8>, <4 x i8>, <4 x i1>, i32)
+
+define <4 x i8> @vsra_vv_v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_v4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <4 x i8> @llvm.vp.ashr.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 %evl)
+ ret <4 x i8> %v
+}
+
+define <4 x i8> @vsra_vv_v4i8_unmasked(<4 x i8> %va, <4 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_v4i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer
+ %v = call <4 x i8> @llvm.vp.ashr.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 %evl)
+ ret <4 x i8> %v
+}
+
+define <4 x i8> @vsra_vx_v4i8(<4 x i8> %va, i8 %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vx_v4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu
+; CHECK-NEXT: vsra.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <4 x i8> %elt.head, <4 x i8> undef, <4 x i32> zeroinitializer
+ %v = call <4 x i8> @llvm.vp.ashr.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i8> %v
+}
+
+define <4 x i8> @vsra_vx_v4i8_unmasked(<4 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vx_v4i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu
+; CHECK-NEXT: vsra.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <4 x i8> %elt.head, <4 x i8> undef, <4 x i32> zeroinitializer
+ %head = insertelement <4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer
+ %v = call <4 x i8> @llvm.vp.ashr.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i8> %v
+}
+
+define <4 x i8> @vsra_vi_v4i8(<4 x i8> %va, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_v4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i8> undef, i8 5, i32 0
+ %vb = shufflevector <4 x i8> %elt.head, <4 x i8> undef, <4 x i32> zeroinitializer
+ %v = call <4 x i8> @llvm.vp.ashr.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i8> %v
+}
+
+define <4 x i8> @vsra_vi_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_v4i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i8> undef, i8 5, i32 0
+ %vb = shufflevector <4 x i8> %elt.head, <4 x i8> undef, <4 x i32> zeroinitializer
+ %head = insertelement <4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer
+ %v = call <4 x i8> @llvm.vp.ashr.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i8> %v
+}
+
+declare <8 x i8> @llvm.vp.ashr.v8i8(<8 x i8>, <8 x i8>, <8 x i1>, i32)
+
+define <8 x i8> @vsra_vv_v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_v8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <8 x i8> @llvm.vp.ashr.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 %evl)
+ ret <8 x i8> %v
+}
+
+define <8 x i8> @vsra_vv_v8i8_unmasked(<8 x i8> %va, <8 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_v8i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer
+ %v = call <8 x i8> @llvm.vp.ashr.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 %evl)
+ ret <8 x i8> %v
+}
+
+define <8 x i8> @vsra_vx_v8i8(<8 x i8> %va, i8 %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vx_v8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu
+; CHECK-NEXT: vsra.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <8 x i8> %elt.head, <8 x i8> undef, <8 x i32> zeroinitializer
+ %v = call <8 x i8> @llvm.vp.ashr.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i8> %v
+}
+
+define <8 x i8> @vsra_vx_v8i8_unmasked(<8 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vx_v8i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu
+; CHECK-NEXT: vsra.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <8 x i8> %elt.head, <8 x i8> undef, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer
+ %v = call <8 x i8> @llvm.vp.ashr.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i8> %v
+}
+
+define <8 x i8> @vsra_vi_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_v8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i8> undef, i8 5, i32 0
+ %vb = shufflevector <8 x i8> %elt.head, <8 x i8> undef, <8 x i32> zeroinitializer
+ %v = call <8 x i8> @llvm.vp.ashr.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i8> %v
+}
+
+define <8 x i8> @vsra_vi_v8i8_unmasked(<8 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_v8i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i8> undef, i8 5, i32 0
+ %vb = shufflevector <8 x i8> %elt.head, <8 x i8> undef, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer
+ %v = call <8 x i8> @llvm.vp.ashr.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i8> %v
+}
+
+declare <16 x i8> @llvm.vp.ashr.v16i8(<16 x i8>, <16 x i8>, <16 x i1>, i32)
+
+define <16 x i8> @vsra_vv_v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <16 x i8> @llvm.vp.ashr.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 %evl)
+ ret <16 x i8> %v
+}
+
+define <16 x i8> @vsra_vv_v16i8_unmasked(<16 x i8> %va, <16 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_v16i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <16 x i1> undef, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer
+ %v = call <16 x i8> @llvm.vp.ashr.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 %evl)
+ ret <16 x i8> %v
+}
+
+define <16 x i8> @vsra_vx_v16i8(<16 x i8> %va, i8 %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vx_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu
+; CHECK-NEXT: vsra.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <16 x i8> %elt.head, <16 x i8> undef, <16 x i32> zeroinitializer
+ %v = call <16 x i8> @llvm.vp.ashr.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i8> %v
+}
+
+define <16 x i8> @vsra_vx_v16i8_unmasked(<16 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vx_v16i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu
+; CHECK-NEXT: vsra.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <16 x i8> %elt.head, <16 x i8> undef, <16 x i32> zeroinitializer
+ %head = insertelement <16 x i1> undef, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer
+ %v = call <16 x i8> @llvm.vp.ashr.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i8> %v
+}
+
+define <16 x i8> @vsra_vi_v16i8(<16 x i8> %va, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i8> undef, i8 5, i32 0
+ %vb = shufflevector <16 x i8> %elt.head, <16 x i8> undef, <16 x i32> zeroinitializer
+ %v = call <16 x i8> @llvm.vp.ashr.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i8> %v
+}
+
+define <16 x i8> @vsra_vi_v16i8_unmasked(<16 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_v16i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i8> undef, i8 5, i32 0
+ %vb = shufflevector <16 x i8> %elt.head, <16 x i8> undef, <16 x i32> zeroinitializer
+ %head = insertelement <16 x i1> undef, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer
+ %v = call <16 x i8> @llvm.vp.ashr.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i8> %v
+}
+
+declare <2 x i16> @llvm.vp.ashr.v2i16(<2 x i16>, <2 x i16>, <2 x i1>, i32)
+
+define <2 x i16> @vsra_vv_v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_v2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <2 x i16> @llvm.vp.ashr.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 %evl)
+ ret <2 x i16> %v
+}
+
+define <2 x i16> @vsra_vv_v2i16_unmasked(<2 x i16> %va, <2 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_v2i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer
+ %v = call <2 x i16> @llvm.vp.ashr.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 %evl)
+ ret <2 x i16> %v
+}
+
+define <2 x i16> @vsra_vx_v2i16(<2 x i16> %va, i16 %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vx_v2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu
+; CHECK-NEXT: vsra.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <2 x i16> %elt.head, <2 x i16> undef, <2 x i32> zeroinitializer
+ %v = call <2 x i16> @llvm.vp.ashr.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i16> %v
+}
+
+define <2 x i16> @vsra_vx_v2i16_unmasked(<2 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vx_v2i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu
+; CHECK-NEXT: vsra.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <2 x i16> %elt.head, <2 x i16> undef, <2 x i32> zeroinitializer
+ %head = insertelement <2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer
+ %v = call <2 x i16> @llvm.vp.ashr.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i16> %v
+}
+
+define <2 x i16> @vsra_vi_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_v2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i16> undef, i16 5, i32 0
+ %vb = shufflevector <2 x i16> %elt.head, <2 x i16> undef, <2 x i32> zeroinitializer
+ %v = call <2 x i16> @llvm.vp.ashr.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i16> %v
+}
+
+define <2 x i16> @vsra_vi_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_v2i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i16> undef, i16 5, i32 0
+ %vb = shufflevector <2 x i16> %elt.head, <2 x i16> undef, <2 x i32> zeroinitializer
+ %head = insertelement <2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer
+ %v = call <2 x i16> @llvm.vp.ashr.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i16> %v
+}
+
+declare <4 x i16> @llvm.vp.ashr.v4i16(<4 x i16>, <4 x i16>, <4 x i1>, i32)
+
+define <4 x i16> @vsra_vv_v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_v4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <4 x i16> @llvm.vp.ashr.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 %evl)
+ ret <4 x i16> %v
+}
+
+define <4 x i16> @vsra_vv_v4i16_unmasked(<4 x i16> %va, <4 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_v4i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer
+ %v = call <4 x i16> @llvm.vp.ashr.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 %evl)
+ ret <4 x i16> %v
+}
+
+define <4 x i16> @vsra_vx_v4i16(<4 x i16> %va, i16 %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vx_v4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu
+; CHECK-NEXT: vsra.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <4 x i16> %elt.head, <4 x i16> undef, <4 x i32> zeroinitializer
+ %v = call <4 x i16> @llvm.vp.ashr.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i16> %v
+}
+
+define <4 x i16> @vsra_vx_v4i16_unmasked(<4 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vx_v4i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu
+; CHECK-NEXT: vsra.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <4 x i16> %elt.head, <4 x i16> undef, <4 x i32> zeroinitializer
+ %head = insertelement <4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer
+ %v = call <4 x i16> @llvm.vp.ashr.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i16> %v
+}
+
+define <4 x i16> @vsra_vi_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_v4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i16> undef, i16 5, i32 0
+ %vb = shufflevector <4 x i16> %elt.head, <4 x i16> undef, <4 x i32> zeroinitializer
+ %v = call <4 x i16> @llvm.vp.ashr.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i16> %v
+}
+
+define <4 x i16> @vsra_vi_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_v4i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i16> undef, i16 5, i32 0
+ %vb = shufflevector <4 x i16> %elt.head, <4 x i16> undef, <4 x i32> zeroinitializer
+ %head = insertelement <4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer
+ %v = call <4 x i16> @llvm.vp.ashr.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i16> %v
+}
+
+declare <8 x i16> @llvm.vp.ashr.v8i16(<8 x i16>, <8 x i16>, <8 x i1>, i32)
+
+define <8 x i16> @vsra_vv_v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <8 x i16> @llvm.vp.ashr.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 %evl)
+ ret <8 x i16> %v
+}
+
+define <8 x i16> @vsra_vv_v8i16_unmasked(<8 x i16> %va, <8 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_v8i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer
+ %v = call <8 x i16> @llvm.vp.ashr.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 %evl)
+ ret <8 x i16> %v
+}
+
+define <8 x i16> @vsra_vx_v8i16(<8 x i16> %va, i16 %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vx_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu
+; CHECK-NEXT: vsra.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <8 x i16> %elt.head, <8 x i16> undef, <8 x i32> zeroinitializer
+ %v = call <8 x i16> @llvm.vp.ashr.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i16> %v
+}
+
+define <8 x i16> @vsra_vx_v8i16_unmasked(<8 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vx_v8i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu
+; CHECK-NEXT: vsra.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <8 x i16> %elt.head, <8 x i16> undef, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer
+ %v = call <8 x i16> @llvm.vp.ashr.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i16> %v
+}
+
+define <8 x i16> @vsra_vi_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i16> undef, i16 5, i32 0
+ %vb = shufflevector <8 x i16> %elt.head, <8 x i16> undef, <8 x i32> zeroinitializer
+ %v = call <8 x i16> @llvm.vp.ashr.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i16> %v
+}
+
+define <8 x i16> @vsra_vi_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_v8i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i16> undef, i16 5, i32 0
+ %vb = shufflevector <8 x i16> %elt.head, <8 x i16> undef, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer
+ %v = call <8 x i16> @llvm.vp.ashr.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i16> %v
+}
+
+declare <16 x i16> @llvm.vp.ashr.v16i16(<16 x i16>, <16 x i16>, <16 x i1>, i32)
+
+define <16 x i16> @vsra_vv_v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_v16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v10, v0.t
+; CHECK-NEXT: ret
+ %v = call <16 x i16> @llvm.vp.ashr.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 %evl)
+ ret <16 x i16> %v
+}
+
+define <16 x i16> @vsra_vv_v16i16_unmasked(<16 x i16> %va, <16 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_v16i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v10
+; CHECK-NEXT: ret
+ %head = insertelement <16 x i1> undef, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer
+ %v = call <16 x i16> @llvm.vp.ashr.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 %evl)
+ ret <16 x i16> %v
+}
+
+define <16 x i16> @vsra_vx_v16i16(<16 x i16> %va, i16 %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vx_v16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,m2,ta,mu
+; CHECK-NEXT: vsra.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <16 x i16> %elt.head, <16 x i16> undef, <16 x i32> zeroinitializer
+ %v = call <16 x i16> @llvm.vp.ashr.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i16> %v
+}
+
+define <16 x i16> @vsra_vx_v16i16_unmasked(<16 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vx_v16i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,m2,ta,mu
+; CHECK-NEXT: vsra.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <16 x i16> %elt.head, <16 x i16> undef, <16 x i32> zeroinitializer
+ %head = insertelement <16 x i1> undef, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer
+ %v = call <16 x i16> @llvm.vp.ashr.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i16> %v
+}
+
+define <16 x i16> @vsra_vi_v16i16(<16 x i16> %va, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_v16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i16> undef, i16 5, i32 0
+ %vb = shufflevector <16 x i16> %elt.head, <16 x i16> undef, <16 x i32> zeroinitializer
+ %v = call <16 x i16> @llvm.vp.ashr.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i16> %v
+}
+
+define <16 x i16> @vsra_vi_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_v16i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i16> undef, i16 5, i32 0
+ %vb = shufflevector <16 x i16> %elt.head, <16 x i16> undef, <16 x i32> zeroinitializer
+ %head = insertelement <16 x i1> undef, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer
+ %v = call <16 x i16> @llvm.vp.ashr.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i16> %v
+}
+
+declare <2 x i32> @llvm.vp.ashr.v2i32(<2 x i32>, <2 x i32>, <2 x i1>, i32)
+
+define <2 x i32> @vsra_vv_v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_v2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <2 x i32> @llvm.vp.ashr.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 %evl)
+ ret <2 x i32> %v
+}
+
+define <2 x i32> @vsra_vv_v2i32_unmasked(<2 x i32> %va, <2 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_v2i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer
+ %v = call <2 x i32> @llvm.vp.ashr.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 %evl)
+ ret <2 x i32> %v
+}
+
+define <2 x i32> @vsra_vx_v2i32(<2 x i32> %va, i32 %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vx_v2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu
+; CHECK-NEXT: vsra.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i32> undef, i32 %b, i32 0
+ %vb = shufflevector <2 x i32> %elt.head, <2 x i32> undef, <2 x i32> zeroinitializer
+ %v = call <2 x i32> @llvm.vp.ashr.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i32> %v
+}
+
+define <2 x i32> @vsra_vx_v2i32_unmasked(<2 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vx_v2i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu
+; CHECK-NEXT: vsra.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i32> undef, i32 %b, i32 0
+ %vb = shufflevector <2 x i32> %elt.head, <2 x i32> undef, <2 x i32> zeroinitializer
+ %head = insertelement <2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer
+ %v = call <2 x i32> @llvm.vp.ashr.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i32> %v
+}
+
+define <2 x i32> @vsra_vi_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_v2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i32> undef, i32 5, i32 0
+ %vb = shufflevector <2 x i32> %elt.head, <2 x i32> undef, <2 x i32> zeroinitializer
+ %v = call <2 x i32> @llvm.vp.ashr.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i32> %v
+}
+
+define <2 x i32> @vsra_vi_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_v2i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i32> undef, i32 5, i32 0
+ %vb = shufflevector <2 x i32> %elt.head, <2 x i32> undef, <2 x i32> zeroinitializer
+ %head = insertelement <2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer
+ %v = call <2 x i32> @llvm.vp.ashr.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i32> %v
+}
+
+declare <4 x i32> @llvm.vp.ashr.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
+
+define <4 x i32> @vsra_vv_v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <4 x i32> @llvm.vp.ashr.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 %evl)
+ ret <4 x i32> %v
+}
+
+define <4 x i32> @vsra_vv_v4i32_unmasked(<4 x i32> %va, <4 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_v4i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer
+ %v = call <4 x i32> @llvm.vp.ashr.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 %evl)
+ ret <4 x i32> %v
+}
+
+define <4 x i32> @vsra_vx_v4i32(<4 x i32> %va, i32 %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vx_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu
+; CHECK-NEXT: vsra.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i32> undef, i32 %b, i32 0
+ %vb = shufflevector <4 x i32> %elt.head, <4 x i32> undef, <4 x i32> zeroinitializer
+ %v = call <4 x i32> @llvm.vp.ashr.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i32> %v
+}
+
+define <4 x i32> @vsra_vx_v4i32_unmasked(<4 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vx_v4i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu
+; CHECK-NEXT: vsra.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i32> undef, i32 %b, i32 0
+ %vb = shufflevector <4 x i32> %elt.head, <4 x i32> undef, <4 x i32> zeroinitializer
+ %head = insertelement <4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer
+ %v = call <4 x i32> @llvm.vp.ashr.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i32> %v
+}
+
+define <4 x i32> @vsra_vi_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i32> undef, i32 5, i32 0
+ %vb = shufflevector <4 x i32> %elt.head, <4 x i32> undef, <4 x i32> zeroinitializer
+ %v = call <4 x i32> @llvm.vp.ashr.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i32> %v
+}
+
+define <4 x i32> @vsra_vi_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_v4i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i32> undef, i32 5, i32 0
+ %vb = shufflevector <4 x i32> %elt.head, <4 x i32> undef, <4 x i32> zeroinitializer
+ %head = insertelement <4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer
+ %v = call <4 x i32> @llvm.vp.ashr.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i32> %v
+}
+
+declare <8 x i32> @llvm.vp.ashr.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
+
+define <8 x i32> @vsra_vv_v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v10, v0.t
+; CHECK-NEXT: ret
+ %v = call <8 x i32> @llvm.vp.ashr.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 %evl)
+ ret <8 x i32> %v
+}
+
+define <8 x i32> @vsra_vv_v8i32_unmasked(<8 x i32> %va, <8 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_v8i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v10
+; CHECK-NEXT: ret
+ %head = insertelement <8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer
+ %v = call <8 x i32> @llvm.vp.ashr.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 %evl)
+ ret <8 x i32> %v
+}
+
+define <8 x i32> @vsra_vx_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vx_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e32,m2,ta,mu
+; CHECK-NEXT: vsra.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i32> undef, i32 %b, i32 0
+ %vb = shufflevector <8 x i32> %elt.head, <8 x i32> undef, <8 x i32> zeroinitializer
+ %v = call <8 x i32> @llvm.vp.ashr.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i32> %v
+}
+
+define <8 x i32> @vsra_vx_v8i32_unmasked(<8 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vx_v8i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e32,m2,ta,mu
+; CHECK-NEXT: vsra.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i32> undef, i32 %b, i32 0
+ %vb = shufflevector <8 x i32> %elt.head, <8 x i32> undef, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer
+ %v = call <8 x i32> @llvm.vp.ashr.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i32> %v
+}
+
+define <8 x i32> @vsra_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i32> undef, i32 5, i32 0
+ %vb = shufflevector <8 x i32> %elt.head, <8 x i32> undef, <8 x i32> zeroinitializer
+ %v = call <8 x i32> @llvm.vp.ashr.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i32> %v
+}
+
+define <8 x i32> @vsra_vi_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_v8i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i32> undef, i32 5, i32 0
+ %vb = shufflevector <8 x i32> %elt.head, <8 x i32> undef, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer
+ %v = call <8 x i32> @llvm.vp.ashr.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i32> %v
+}
+
+declare <16 x i32> @llvm.vp.ashr.v16i32(<16 x i32>, <16 x i32>, <16 x i1>, i32)
+
+define <16 x i32> @vsra_vv_v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_v16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m4,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v12, v0.t
+; CHECK-NEXT: ret
+ %v = call <16 x i32> @llvm.vp.ashr.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 %evl)
+ ret <16 x i32> %v
+}
+
+define <16 x i32> @vsra_vv_v16i32_unmasked(<16 x i32> %va, <16 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_v16i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m4,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v12
+; CHECK-NEXT: ret
+ %head = insertelement <16 x i1> undef, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer
+ %v = call <16 x i32> @llvm.vp.ashr.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 %evl)
+ ret <16 x i32> %v
+}
+
+define <16 x i32> @vsra_vx_v16i32(<16 x i32> %va, i32 %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vx_v16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e32,m4,ta,mu
+; CHECK-NEXT: vsra.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i32> undef, i32 %b, i32 0
+ %vb = shufflevector <16 x i32> %elt.head, <16 x i32> undef, <16 x i32> zeroinitializer
+ %v = call <16 x i32> @llvm.vp.ashr.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i32> %v
+}
+
+define <16 x i32> @vsra_vx_v16i32_unmasked(<16 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vx_v16i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e32,m4,ta,mu
+; CHECK-NEXT: vsra.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i32> undef, i32 %b, i32 0
+ %vb = shufflevector <16 x i32> %elt.head, <16 x i32> undef, <16 x i32> zeroinitializer
+ %head = insertelement <16 x i1> undef, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer
+ %v = call <16 x i32> @llvm.vp.ashr.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i32> %v
+}
+
+define <16 x i32> @vsra_vi_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_v16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m4,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i32> undef, i32 5, i32 0
+ %vb = shufflevector <16 x i32> %elt.head, <16 x i32> undef, <16 x i32> zeroinitializer
+ %v = call <16 x i32> @llvm.vp.ashr.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i32> %v
+}
+
+define <16 x i32> @vsra_vi_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_v16i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m4,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i32> undef, i32 5, i32 0
+ %vb = shufflevector <16 x i32> %elt.head, <16 x i32> undef, <16 x i32> zeroinitializer
+ %head = insertelement <16 x i1> undef, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer
+ %v = call <16 x i32> @llvm.vp.ashr.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i32> %v
+}
+
+declare <2 x i64> @llvm.vp.ashr.v2i64(<2 x i64>, <2 x i64>, <2 x i1>, i32)
+
+define <2 x i64> @vsra_vv_v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_v2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <2 x i64> @llvm.vp.ashr.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 %evl)
+ ret <2 x i64> %v
+}
+
+define <2 x i64> @vsra_vv_v2i64_unmasked(<2 x i64> %va, <2 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_v2i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer
+ %v = call <2 x i64> @llvm.vp.ashr.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 %evl)
+ ret <2 x i64> %v
+}
+
+define <2 x i64> @vsra_vx_v2i64(<2 x i64> %va, i64 %b, <2 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vsra_vx_v2i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v25, (a0), zero
+; RV32-NEXT: vsetvli a0, a2, e64,m1,ta,mu
+; RV32-NEXT: vsra.vv v8, v8, v25, v0.t
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vsra_vx_v2i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a1, a1, e64,m1,ta,mu
+; RV64-NEXT: vsra.vx v8, v8, a0, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <2 x i64> undef, i64 %b, i32 0
+ %vb = shufflevector <2 x i64> %elt.head, <2 x i64> undef, <2 x i32> zeroinitializer
+ %v = call <2 x i64> @llvm.vp.ashr.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i64> %v
+}
+
+define <2 x i64> @vsra_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vsra_vx_v2i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v25, (a0), zero
+; RV32-NEXT: vsetvli a0, a2, e64,m1,ta,mu
+; RV32-NEXT: vsra.vv v8, v8, v25
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vsra_vx_v2i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a1, a1, e64,m1,ta,mu
+; RV64-NEXT: vsra.vx v8, v8, a0
+; RV64-NEXT: ret
+ %elt.head = insertelement <2 x i64> undef, i64 %b, i32 0
+ %vb = shufflevector <2 x i64> %elt.head, <2 x i64> undef, <2 x i32> zeroinitializer
+ %head = insertelement <2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer
+ %v = call <2 x i64> @llvm.vp.ashr.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i64> %v
+}
+
+define <2 x i64> @vsra_vi_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_v2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i64> undef, i64 5, i32 0
+ %vb = shufflevector <2 x i64> %elt.head, <2 x i64> undef, <2 x i32> zeroinitializer
+ %v = call <2 x i64> @llvm.vp.ashr.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i64> %v
+}
+
+define <2 x i64> @vsra_vi_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_v2i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i64> undef, i64 5, i32 0
+ %vb = shufflevector <2 x i64> %elt.head, <2 x i64> undef, <2 x i32> zeroinitializer
+ %head = insertelement <2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer
+ %v = call <2 x i64> @llvm.vp.ashr.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i64> %v
+}
+
+declare <4 x i64> @llvm.vp.ashr.v4i64(<4 x i64>, <4 x i64>, <4 x i1>, i32)
+
+define <4 x i64> @vsra_vv_v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_v4i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v10, v0.t
+; CHECK-NEXT: ret
+ %v = call <4 x i64> @llvm.vp.ashr.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 %evl)
+ ret <4 x i64> %v
+}
+
+define <4 x i64> @vsra_vv_v4i64_unmasked(<4 x i64> %va, <4 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_v4i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v10
+; CHECK-NEXT: ret
+ %head = insertelement <4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer
+ %v = call <4 x i64> @llvm.vp.ashr.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 %evl)
+ ret <4 x i64> %v
+}
+
+define <4 x i64> @vsra_vx_v4i64(<4 x i64> %va, i64 %b, <4 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vsra_vx_v4i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetivli a0, 4, e64,m2,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v26, (a0), zero
+; RV32-NEXT: vsetvli a0, a2, e64,m2,ta,mu
+; RV32-NEXT: vsra.vv v8, v8, v26, v0.t
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vsra_vx_v4i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a1, a1, e64,m2,ta,mu
+; RV64-NEXT: vsra.vx v8, v8, a0, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <4 x i64> undef, i64 %b, i32 0
+ %vb = shufflevector <4 x i64> %elt.head, <4 x i64> undef, <4 x i32> zeroinitializer
+ %v = call <4 x i64> @llvm.vp.ashr.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i64> %v
+}
+
+define <4 x i64> @vsra_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vsra_vx_v4i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetivli a0, 4, e64,m2,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v26, (a0), zero
+; RV32-NEXT: vsetvli a0, a2, e64,m2,ta,mu
+; RV32-NEXT: vsra.vv v8, v8, v26
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vsra_vx_v4i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a1, a1, e64,m2,ta,mu
+; RV64-NEXT: vsra.vx v8, v8, a0
+; RV64-NEXT: ret
+ %elt.head = insertelement <4 x i64> undef, i64 %b, i32 0
+ %vb = shufflevector <4 x i64> %elt.head, <4 x i64> undef, <4 x i32> zeroinitializer
+ %head = insertelement <4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer
+ %v = call <4 x i64> @llvm.vp.ashr.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i64> %v
+}
+
+define <4 x i64> @vsra_vi_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_v4i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i64> undef, i64 5, i32 0
+ %vb = shufflevector <4 x i64> %elt.head, <4 x i64> undef, <4 x i32> zeroinitializer
+ %v = call <4 x i64> @llvm.vp.ashr.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i64> %v
+}
+
+define <4 x i64> @vsra_vi_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_v4i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i64> undef, i64 5, i32 0
+ %vb = shufflevector <4 x i64> %elt.head, <4 x i64> undef, <4 x i32> zeroinitializer
+ %head = insertelement <4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer
+ %v = call <4 x i64> @llvm.vp.ashr.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i64> %v
+}
+
+declare <8 x i64> @llvm.vp.ashr.v8i64(<8 x i64>, <8 x i64>, <8 x i1>, i32)
+
+define <8 x i64> @vsra_vv_v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_v8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m4,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v12, v0.t
+; CHECK-NEXT: ret
+ %v = call <8 x i64> @llvm.vp.ashr.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vsra_vv_v8i64_unmasked(<8 x i64> %va, <8 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_v8i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m4,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v12
+; CHECK-NEXT: ret
+ %head = insertelement <8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer
+ %v = call <8 x i64> @llvm.vp.ashr.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vsra_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vsra_vx_v8i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v28, (a0), zero
+; RV32-NEXT: vsetvli a0, a2, e64,m4,ta,mu
+; RV32-NEXT: vsra.vv v8, v8, v28, v0.t
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vsra_vx_v8i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a1, a1, e64,m4,ta,mu
+; RV64-NEXT: vsra.vx v8, v8, a0, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <8 x i64> undef, i64 %b, i32 0
+ %vb = shufflevector <8 x i64> %elt.head, <8 x i64> undef, <8 x i32> zeroinitializer
+ %v = call <8 x i64> @llvm.vp.ashr.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vsra_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vsra_vx_v8i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v28, (a0), zero
+; RV32-NEXT: vsetvli a0, a2, e64,m4,ta,mu
+; RV32-NEXT: vsra.vv v8, v8, v28
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vsra_vx_v8i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a1, a1, e64,m4,ta,mu
+; RV64-NEXT: vsra.vx v8, v8, a0
+; RV64-NEXT: ret
+ %elt.head = insertelement <8 x i64> undef, i64 %b, i32 0
+ %vb = shufflevector <8 x i64> %elt.head, <8 x i64> undef, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer
+ %v = call <8 x i64> @llvm.vp.ashr.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vsra_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_v8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m4,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i64> undef, i64 5, i32 0
+ %vb = shufflevector <8 x i64> %elt.head, <8 x i64> undef, <8 x i32> zeroinitializer
+ %v = call <8 x i64> @llvm.vp.ashr.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vsra_vi_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_v8i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m4,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i64> undef, i64 5, i32 0
+ %vb = shufflevector <8 x i64> %elt.head, <8 x i64> undef, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer
+ %v = call <8 x i64> @llvm.vp.ashr.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+declare <16 x i64> @llvm.vp.ashr.v16i64(<16 x i64>, <16 x i64>, <16 x i1>, i32)
+
+define <16 x i64> @vsra_vv_v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_v16i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m8,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v16, v0.t
+; CHECK-NEXT: ret
+ %v = call <16 x i64> @llvm.vp.ashr.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 %evl)
+ ret <16 x i64> %v
+}
+
+define <16 x i64> @vsra_vv_v16i64_unmasked(<16 x i64> %va, <16 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_v16i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m8,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v16
+; CHECK-NEXT: ret
+ %head = insertelement <16 x i1> undef, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer
+ %v = call <16 x i64> @llvm.vp.ashr.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 %evl)
+ ret <16 x i64> %v
+}
+
+define <16 x i64> @vsra_vx_v16i64(<16 x i64> %va, i64 %b, <16 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vsra_vx_v16i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v16, (a0), zero
+; RV32-NEXT: vsetvli a0, a2, e64,m8,ta,mu
+; RV32-NEXT: vsra.vv v8, v8, v16, v0.t
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vsra_vx_v16i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a1, a1, e64,m8,ta,mu
+; RV64-NEXT: vsra.vx v8, v8, a0, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <16 x i64> undef, i64 %b, i32 0
+ %vb = shufflevector <16 x i64> %elt.head, <16 x i64> undef, <16 x i32> zeroinitializer
+ %v = call <16 x i64> @llvm.vp.ashr.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i64> %v
+}
+
+define <16 x i64> @vsra_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vsra_vx_v16i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v16, (a0), zero
+; RV32-NEXT: vsetvli a0, a2, e64,m8,ta,mu
+; RV32-NEXT: vsra.vv v8, v8, v16
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vsra_vx_v16i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a1, a1, e64,m8,ta,mu
+; RV64-NEXT: vsra.vx v8, v8, a0
+; RV64-NEXT: ret
+ %elt.head = insertelement <16 x i64> undef, i64 %b, i32 0
+ %vb = shufflevector <16 x i64> %elt.head, <16 x i64> undef, <16 x i32> zeroinitializer
+ %head = insertelement <16 x i1> undef, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer
+ %v = call <16 x i64> @llvm.vp.ashr.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i64> %v
+}
+
+define <16 x i64> @vsra_vi_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_v16i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m8,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i64> undef, i64 5, i32 0
+ %vb = shufflevector <16 x i64> %elt.head, <16 x i64> undef, <16 x i32> zeroinitializer
+ %v = call <16 x i64> @llvm.vp.ashr.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i64> %v
+}
+
+define <16 x i64> @vsra_vi_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_v16i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m8,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i64> undef, i64 5, i32 0
+ %vb = shufflevector <16 x i64> %elt.head, <16 x i64> undef, <16 x i32> zeroinitializer
+ %head = insertelement <16 x i1> undef, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer
+ %v = call <16 x i64> @llvm.vp.ashr.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i64> %v
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsub-vp.ll
new file mode 100644
index 0000000000000..d30e4313f8f03
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsub-vp.ll
@@ -0,0 +1,917 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
+; RUN: | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
+; RUN: | FileCheck %s --check-prefixes=CHECK,RV64
+
+declare <2 x i8> @llvm.vp.sub.v2i8(<2 x i8>, <2 x i8>, <2 x i1>, i32)
+
+define <2 x i8> @vsub_vv_v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_v2i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <2 x i8> @llvm.vp.sub.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 %evl)
+ ret <2 x i8> %v
+}
+
+define <2 x i8> @vsub_vv_v2i8_unmasked(<2 x i8> %va, <2 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_v2i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer
+ %v = call <2 x i8> @llvm.vp.sub.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 %evl)
+ ret <2 x i8> %v
+}
+
+define <2 x i8> @vsub_vx_v2i8(<2 x i8> %va, i8 %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vx_v2i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu
+; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <2 x i8> %elt.head, <2 x i8> undef, <2 x i32> zeroinitializer
+ %v = call <2 x i8> @llvm.vp.sub.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i8> %v
+}
+
+define <2 x i8> @vsub_vx_v2i8_unmasked(<2 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vx_v2i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu
+; CHECK-NEXT: vsub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <2 x i8> %elt.head, <2 x i8> undef, <2 x i32> zeroinitializer
+ %head = insertelement <2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer
+ %v = call <2 x i8> @llvm.vp.sub.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i8> %v
+}
+
+declare <4 x i8> @llvm.vp.sub.v4i8(<4 x i8>, <4 x i8>, <4 x i1>, i32)
+
+define <4 x i8> @vsub_vv_v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_v4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <4 x i8> @llvm.vp.sub.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 %evl)
+ ret <4 x i8> %v
+}
+
+define <4 x i8> @vsub_vv_v4i8_unmasked(<4 x i8> %va, <4 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_v4i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer
+ %v = call <4 x i8> @llvm.vp.sub.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 %evl)
+ ret <4 x i8> %v
+}
+
+define <4 x i8> @vsub_vx_v4i8(<4 x i8> %va, i8 %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vx_v4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu
+; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <4 x i8> %elt.head, <4 x i8> undef, <4 x i32> zeroinitializer
+ %v = call <4 x i8> @llvm.vp.sub.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i8> %v
+}
+
+define <4 x i8> @vsub_vx_v4i8_unmasked(<4 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vx_v4i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu
+; CHECK-NEXT: vsub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <4 x i8> %elt.head, <4 x i8> undef, <4 x i32> zeroinitializer
+ %head = insertelement <4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer
+ %v = call <4 x i8> @llvm.vp.sub.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i8> %v
+}
+
+declare <8 x i8> @llvm.vp.sub.v8i8(<8 x i8>, <8 x i8>, <8 x i1>, i32)
+
+define <8 x i8> @vsub_vv_v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_v8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <8 x i8> @llvm.vp.sub.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 %evl)
+ ret <8 x i8> %v
+}
+
+define <8 x i8> @vsub_vv_v8i8_unmasked(<8 x i8> %va, <8 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_v8i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer
+ %v = call <8 x i8> @llvm.vp.sub.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 %evl)
+ ret <8 x i8> %v
+}
+
+define <8 x i8> @vsub_vx_v8i8(<8 x i8> %va, i8 %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vx_v8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu
+; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <8 x i8> %elt.head, <8 x i8> undef, <8 x i32> zeroinitializer
+ %v = call <8 x i8> @llvm.vp.sub.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i8> %v
+}
+
+define <8 x i8> @vsub_vx_v8i8_unmasked(<8 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vx_v8i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu
+; CHECK-NEXT: vsub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <8 x i8> %elt.head, <8 x i8> undef, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer
+ %v = call <8 x i8> @llvm.vp.sub.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i8> %v
+}
+
+declare <16 x i8> @llvm.vp.sub.v16i8(<16 x i8>, <16 x i8>, <16 x i1>, i32)
+
+define <16 x i8> @vsub_vv_v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <16 x i8> @llvm.vp.sub.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 %evl)
+ ret <16 x i8> %v
+}
+
+define <16 x i8> @vsub_vv_v16i8_unmasked(<16 x i8> %va, <16 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_v16i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <16 x i1> undef, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer
+ %v = call <16 x i8> @llvm.vp.sub.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 %evl)
+ ret <16 x i8> %v
+}
+
+define <16 x i8> @vsub_vx_v16i8(<16 x i8> %va, i8 %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vx_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu
+; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <16 x i8> %elt.head, <16 x i8> undef, <16 x i32> zeroinitializer
+ %v = call <16 x i8> @llvm.vp.sub.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i8> %v
+}
+
+define <16 x i8> @vsub_vx_v16i8_unmasked(<16 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vx_v16i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu
+; CHECK-NEXT: vsub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <16 x i8> %elt.head, <16 x i8> undef, <16 x i32> zeroinitializer
+ %head = insertelement <16 x i1> undef, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer
+ %v = call <16 x i8> @llvm.vp.sub.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i8> %v
+}
+
+declare <2 x i16> @llvm.vp.sub.v2i16(<2 x i16>, <2 x i16>, <2 x i1>, i32)
+
+define <2 x i16> @vsub_vv_v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_v2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <2 x i16> @llvm.vp.sub.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 %evl)
+ ret <2 x i16> %v
+}
+
+define <2 x i16> @vsub_vv_v2i16_unmasked(<2 x i16> %va, <2 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_v2i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer
+ %v = call <2 x i16> @llvm.vp.sub.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 %evl)
+ ret <2 x i16> %v
+}
+
+define <2 x i16> @vsub_vx_v2i16(<2 x i16> %va, i16 %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vx_v2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu
+; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <2 x i16> %elt.head, <2 x i16> undef, <2 x i32> zeroinitializer
+ %v = call <2 x i16> @llvm.vp.sub.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i16> %v
+}
+
+define <2 x i16> @vsub_vx_v2i16_unmasked(<2 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vx_v2i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu
+; CHECK-NEXT: vsub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <2 x i16> %elt.head, <2 x i16> undef, <2 x i32> zeroinitializer
+ %head = insertelement <2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer
+ %v = call <2 x i16> @llvm.vp.sub.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i16> %v
+}
+
+declare <4 x i16> @llvm.vp.sub.v4i16(<4 x i16>, <4 x i16>, <4 x i1>, i32)
+
+define <4 x i16> @vsub_vv_v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_v4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <4 x i16> @llvm.vp.sub.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 %evl)
+ ret <4 x i16> %v
+}
+
+define <4 x i16> @vsub_vv_v4i16_unmasked(<4 x i16> %va, <4 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_v4i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer
+ %v = call <4 x i16> @llvm.vp.sub.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 %evl)
+ ret <4 x i16> %v
+}
+
+define <4 x i16> @vsub_vx_v4i16(<4 x i16> %va, i16 %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vx_v4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu
+; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <4 x i16> %elt.head, <4 x i16> undef, <4 x i32> zeroinitializer
+ %v = call <4 x i16> @llvm.vp.sub.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i16> %v
+}
+
+define <4 x i16> @vsub_vx_v4i16_unmasked(<4 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vx_v4i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu
+; CHECK-NEXT: vsub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <4 x i16> %elt.head, <4 x i16> undef, <4 x i32> zeroinitializer
+ %head = insertelement <4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer
+ %v = call <4 x i16> @llvm.vp.sub.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i16> %v
+}
+
+declare <8 x i16> @llvm.vp.sub.v8i16(<8 x i16>, <8 x i16>, <8 x i1>, i32)
+
+define <8 x i16> @vsub_vv_v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <8 x i16> @llvm.vp.sub.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 %evl)
+ ret <8 x i16> %v
+}
+
+define <8 x i16> @vsub_vv_v8i16_unmasked(<8 x i16> %va, <8 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_v8i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer
+ %v = call <8 x i16> @llvm.vp.sub.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 %evl)
+ ret <8 x i16> %v
+}
+
+define <8 x i16> @vsub_vx_v8i16(<8 x i16> %va, i16 %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vx_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu
+; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <8 x i16> %elt.head, <8 x i16> undef, <8 x i32> zeroinitializer
+ %v = call <8 x i16> @llvm.vp.sub.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i16> %v
+}
+
+define <8 x i16> @vsub_vx_v8i16_unmasked(<8 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vx_v8i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu
+; CHECK-NEXT: vsub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <8 x i16> %elt.head, <8 x i16> undef, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer
+ %v = call <8 x i16> @llvm.vp.sub.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i16> %v
+}
+
+declare <16 x i16> @llvm.vp.sub.v16i16(<16 x i16>, <16 x i16>, <16 x i1>, i32)
+
+define <16 x i16> @vsub_vv_v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_v16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
+; CHECK-NEXT: ret
+ %v = call <16 x i16> @llvm.vp.sub.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 %evl)
+ ret <16 x i16> %v
+}
+
+define <16 x i16> @vsub_vv_v16i16_unmasked(<16 x i16> %va, <16 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_v16i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v10
+; CHECK-NEXT: ret
+ %head = insertelement <16 x i1> undef, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer
+ %v = call <16 x i16> @llvm.vp.sub.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 %evl)
+ ret <16 x i16> %v
+}
+
+define <16 x i16> @vsub_vx_v16i16(<16 x i16> %va, i16 %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vx_v16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,m2,ta,mu
+; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <16 x i16> %elt.head, <16 x i16> undef, <16 x i32> zeroinitializer
+ %v = call <16 x i16> @llvm.vp.sub.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i16> %v
+}
+
+define <16 x i16> @vsub_vx_v16i16_unmasked(<16 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vx_v16i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,m2,ta,mu
+; CHECK-NEXT: vsub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <16 x i16> %elt.head, <16 x i16> undef, <16 x i32> zeroinitializer
+ %head = insertelement <16 x i1> undef, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer
+ %v = call <16 x i16> @llvm.vp.sub.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i16> %v
+}
+
+declare <2 x i32> @llvm.vp.sub.v2i32(<2 x i32>, <2 x i32>, <2 x i1>, i32)
+
+define <2 x i32> @vsub_vv_v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_v2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <2 x i32> @llvm.vp.sub.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 %evl)
+ ret <2 x i32> %v
+}
+
+define <2 x i32> @vsub_vv_v2i32_unmasked(<2 x i32> %va, <2 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_v2i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer
+ %v = call <2 x i32> @llvm.vp.sub.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 %evl)
+ ret <2 x i32> %v
+}
+
+define <2 x i32> @vsub_vx_v2i32(<2 x i32> %va, i32 %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vx_v2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu
+; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i32> undef, i32 %b, i32 0
+ %vb = shufflevector <2 x i32> %elt.head, <2 x i32> undef, <2 x i32> zeroinitializer
+ %v = call <2 x i32> @llvm.vp.sub.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i32> %v
+}
+
+define <2 x i32> @vsub_vx_v2i32_unmasked(<2 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vx_v2i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu
+; CHECK-NEXT: vsub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <2 x i32> undef, i32 %b, i32 0
+ %vb = shufflevector <2 x i32> %elt.head, <2 x i32> undef, <2 x i32> zeroinitializer
+ %head = insertelement <2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer
+ %v = call <2 x i32> @llvm.vp.sub.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i32> %v
+}
+
+declare <4 x i32> @llvm.vp.sub.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
+
+define <4 x i32> @vsub_vv_v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <4 x i32> @llvm.vp.sub.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 %evl)
+ ret <4 x i32> %v
+}
+
+define <4 x i32> @vsub_vv_v4i32_unmasked(<4 x i32> %va, <4 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_v4i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer
+ %v = call <4 x i32> @llvm.vp.sub.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 %evl)
+ ret <4 x i32> %v
+}
+
+define <4 x i32> @vsub_vx_v4i32(<4 x i32> %va, i32 %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vx_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu
+; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i32> undef, i32 %b, i32 0
+ %vb = shufflevector <4 x i32> %elt.head, <4 x i32> undef, <4 x i32> zeroinitializer
+ %v = call <4 x i32> @llvm.vp.sub.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i32> %v
+}
+
+define <4 x i32> @vsub_vx_v4i32_unmasked(<4 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vx_v4i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu
+; CHECK-NEXT: vsub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <4 x i32> undef, i32 %b, i32 0
+ %vb = shufflevector <4 x i32> %elt.head, <4 x i32> undef, <4 x i32> zeroinitializer
+ %head = insertelement <4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer
+ %v = call <4 x i32> @llvm.vp.sub.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i32> %v
+}
+
+declare <8 x i32> @llvm.vp.sub.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
+
+define <8 x i32> @vsub_vv_v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
+; CHECK-NEXT: ret
+ %v = call <8 x i32> @llvm.vp.sub.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 %evl)
+ ret <8 x i32> %v
+}
+
+define <8 x i32> @vsub_vv_v8i32_unmasked(<8 x i32> %va, <8 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_v8i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v10
+; CHECK-NEXT: ret
+ %head = insertelement <8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer
+ %v = call <8 x i32> @llvm.vp.sub.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 %evl)
+ ret <8 x i32> %v
+}
+
+define <8 x i32> @vsub_vx_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vx_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e32,m2,ta,mu
+; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i32> undef, i32 %b, i32 0
+ %vb = shufflevector <8 x i32> %elt.head, <8 x i32> undef, <8 x i32> zeroinitializer
+ %v = call <8 x i32> @llvm.vp.sub.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i32> %v
+}
+
+define <8 x i32> @vsub_vx_v8i32_unmasked(<8 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vx_v8i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e32,m2,ta,mu
+; CHECK-NEXT: vsub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <8 x i32> undef, i32 %b, i32 0
+ %vb = shufflevector <8 x i32> %elt.head, <8 x i32> undef, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer
+ %v = call <8 x i32> @llvm.vp.sub.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i32> %v
+}
+
+declare <16 x i32> @llvm.vp.sub.v16i32(<16 x i32>, <16 x i32>, <16 x i1>, i32)
+
+define <16 x i32> @vsub_vv_v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_v16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m4,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v12, v0.t
+; CHECK-NEXT: ret
+ %v = call <16 x i32> @llvm.vp.sub.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 %evl)
+ ret <16 x i32> %v
+}
+
+define <16 x i32> @vsub_vv_v16i32_unmasked(<16 x i32> %va, <16 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_v16i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m4,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v12
+; CHECK-NEXT: ret
+ %head = insertelement <16 x i1> undef, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer
+ %v = call <16 x i32> @llvm.vp.sub.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 %evl)
+ ret <16 x i32> %v
+}
+
+define <16 x i32> @vsub_vx_v16i32(<16 x i32> %va, i32 %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vx_v16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e32,m4,ta,mu
+; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i32> undef, i32 %b, i32 0
+ %vb = shufflevector <16 x i32> %elt.head, <16 x i32> undef, <16 x i32> zeroinitializer
+ %v = call <16 x i32> @llvm.vp.sub.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i32> %v
+}
+
+define <16 x i32> @vsub_vx_v16i32_unmasked(<16 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vx_v16i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e32,m4,ta,mu
+; CHECK-NEXT: vsub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <16 x i32> undef, i32 %b, i32 0
+ %vb = shufflevector <16 x i32> %elt.head, <16 x i32> undef, <16 x i32> zeroinitializer
+ %head = insertelement <16 x i1> undef, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer
+ %v = call <16 x i32> @llvm.vp.sub.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i32> %v
+}
+
+declare <2 x i64> @llvm.vp.sub.v2i64(<2 x i64>, <2 x i64>, <2 x i1>, i32)
+
+define <2 x i64> @vsub_vv_v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_v2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <2 x i64> @llvm.vp.sub.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 %evl)
+ ret <2 x i64> %v
+}
+
+define <2 x i64> @vsub_vv_v2i64_unmasked(<2 x i64> %va, <2 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_v2i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer
+ %v = call <2 x i64> @llvm.vp.sub.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 %evl)
+ ret <2 x i64> %v
+}
+
+define <2 x i64> @vsub_vx_v2i64(<2 x i64> %va, i64 %b, <2 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vsub_vx_v2i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v25, (a0), zero
+; RV32-NEXT: vsetvli a0, a2, e64,m1,ta,mu
+; RV32-NEXT: vsub.vv v8, v8, v25, v0.t
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vsub_vx_v2i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a1, a1, e64,m1,ta,mu
+; RV64-NEXT: vsub.vx v8, v8, a0, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <2 x i64> undef, i64 %b, i32 0
+ %vb = shufflevector <2 x i64> %elt.head, <2 x i64> undef, <2 x i32> zeroinitializer
+ %v = call <2 x i64> @llvm.vp.sub.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i64> %v
+}
+
+define <2 x i64> @vsub_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vsub_vx_v2i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v25, (a0), zero
+; RV32-NEXT: vsetvli a0, a2, e64,m1,ta,mu
+; RV32-NEXT: vsub.vv v8, v8, v25
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vsub_vx_v2i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a1, a1, e64,m1,ta,mu
+; RV64-NEXT: vsub.vx v8, v8, a0
+; RV64-NEXT: ret
+ %elt.head = insertelement <2 x i64> undef, i64 %b, i32 0
+ %vb = shufflevector <2 x i64> %elt.head, <2 x i64> undef, <2 x i32> zeroinitializer
+ %head = insertelement <2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer
+ %v = call <2 x i64> @llvm.vp.sub.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x i64> %v
+}
+
+declare <4 x i64> @llvm.vp.sub.v4i64(<4 x i64>, <4 x i64>, <4 x i1>, i32)
+
+define <4 x i64> @vsub_vv_v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_v4i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
+; CHECK-NEXT: ret
+ %v = call <4 x i64> @llvm.vp.sub.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 %evl)
+ ret <4 x i64> %v
+}
+
+define <4 x i64> @vsub_vv_v4i64_unmasked(<4 x i64> %va, <4 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_v4i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v10
+; CHECK-NEXT: ret
+ %head = insertelement <4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer
+ %v = call <4 x i64> @llvm.vp.sub.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 %evl)
+ ret <4 x i64> %v
+}
+
+define <4 x i64> @vsub_vx_v4i64(<4 x i64> %va, i64 %b, <4 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vsub_vx_v4i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetivli a0, 4, e64,m2,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v26, (a0), zero
+; RV32-NEXT: vsetvli a0, a2, e64,m2,ta,mu
+; RV32-NEXT: vsub.vv v8, v8, v26, v0.t
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vsub_vx_v4i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a1, a1, e64,m2,ta,mu
+; RV64-NEXT: vsub.vx v8, v8, a0, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <4 x i64> undef, i64 %b, i32 0
+ %vb = shufflevector <4 x i64> %elt.head, <4 x i64> undef, <4 x i32> zeroinitializer
+ %v = call <4 x i64> @llvm.vp.sub.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i64> %v
+}
+
+define <4 x i64> @vsub_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vsub_vx_v4i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetivli a0, 4, e64,m2,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v26, (a0), zero
+; RV32-NEXT: vsetvli a0, a2, e64,m2,ta,mu
+; RV32-NEXT: vsub.vv v8, v8, v26
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vsub_vx_v4i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a1, a1, e64,m2,ta,mu
+; RV64-NEXT: vsub.vx v8, v8, a0
+; RV64-NEXT: ret
+ %elt.head = insertelement <4 x i64> undef, i64 %b, i32 0
+ %vb = shufflevector <4 x i64> %elt.head, <4 x i64> undef, <4 x i32> zeroinitializer
+ %head = insertelement <4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer
+ %v = call <4 x i64> @llvm.vp.sub.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x i64> %v
+}
+
+declare <8 x i64> @llvm.vp.sub.v8i64(<8 x i64>, <8 x i64>, <8 x i1>, i32)
+
+define <8 x i64> @vsub_vv_v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_v8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m4,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v12, v0.t
+; CHECK-NEXT: ret
+ %v = call <8 x i64> @llvm.vp.sub.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vsub_vv_v8i64_unmasked(<8 x i64> %va, <8 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_v8i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m4,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v12
+; CHECK-NEXT: ret
+ %head = insertelement <8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer
+ %v = call <8 x i64> @llvm.vp.sub.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vsub_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vsub_vx_v8i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v28, (a0), zero
+; RV32-NEXT: vsetvli a0, a2, e64,m4,ta,mu
+; RV32-NEXT: vsub.vv v8, v8, v28, v0.t
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vsub_vx_v8i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a1, a1, e64,m4,ta,mu
+; RV64-NEXT: vsub.vx v8, v8, a0, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <8 x i64> undef, i64 %b, i32 0
+ %vb = shufflevector <8 x i64> %elt.head, <8 x i64> undef, <8 x i32> zeroinitializer
+ %v = call <8 x i64> @llvm.vp.sub.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+define <8 x i64> @vsub_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vsub_vx_v8i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v28, (a0), zero
+; RV32-NEXT: vsetvli a0, a2, e64,m4,ta,mu
+; RV32-NEXT: vsub.vv v8, v8, v28
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vsub_vx_v8i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a1, a1, e64,m4,ta,mu
+; RV64-NEXT: vsub.vx v8, v8, a0
+; RV64-NEXT: ret
+ %elt.head = insertelement <8 x i64> undef, i64 %b, i32 0
+ %vb = shufflevector <8 x i64> %elt.head, <8 x i64> undef, <8 x i32> zeroinitializer
+ %head = insertelement <8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer
+ %v = call <8 x i64> @llvm.vp.sub.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x i64> %v
+}
+
+declare <16 x i64> @llvm.vp.sub.v16i64(<16 x i64>, <16 x i64>, <16 x i1>, i32)
+
+define <16 x i64> @vsub_vv_v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_v16i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m8,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v16, v0.t
+; CHECK-NEXT: ret
+ %v = call <16 x i64> @llvm.vp.sub.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 %evl)
+ ret <16 x i64> %v
+}
+
+define <16 x i64> @vsub_vv_v16i64_unmasked(<16 x i64> %va, <16 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_v16i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m8,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v16
+; CHECK-NEXT: ret
+ %head = insertelement <16 x i1> undef, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer
+ %v = call <16 x i64> @llvm.vp.sub.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 %evl)
+ ret <16 x i64> %v
+}
+
+define <16 x i64> @vsub_vx_v16i64(<16 x i64> %va, i64 %b, <16 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vsub_vx_v16i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v16, (a0), zero
+; RV32-NEXT: vsetvli a0, a2, e64,m8,ta,mu
+; RV32-NEXT: vsub.vv v8, v8, v16, v0.t
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vsub_vx_v16i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a1, a1, e64,m8,ta,mu
+; RV64-NEXT: vsub.vx v8, v8, a0, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <16 x i64> undef, i64 %b, i32 0
+ %vb = shufflevector <16 x i64> %elt.head, <16 x i64> undef, <16 x i32> zeroinitializer
+ %v = call <16 x i64> @llvm.vp.sub.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i64> %v
+}
+
+define <16 x i64> @vsub_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vsub_vx_v16i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v16, (a0), zero
+; RV32-NEXT: vsetvli a0, a2, e64,m8,ta,mu
+; RV32-NEXT: vsub.vv v8, v8, v16
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vsub_vx_v16i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a1, a1, e64,m8,ta,mu
+; RV64-NEXT: vsub.vx v8, v8, a0
+; RV64-NEXT: ret
+ %elt.head = insertelement <16 x i64> undef, i64 %b, i32 0
+ %vb = shufflevector <16 x i64> %elt.head, <16 x i64> undef, <16 x i32> zeroinitializer
+ %head = insertelement <16 x i1> undef, i1 true, i32 0
+ %m = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer
+ %v = call <16 x i64> @llvm.vp.sub.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x i64> %v
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll
new file mode 100644
index 0000000000000..053ed605756c1
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll
@@ -0,0 +1,1789 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s \
+; RUN: | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s \
+; RUN: | FileCheck %s --check-prefixes=CHECK,RV64
+
+declare <vscale x 1 x i8> @llvm.vp.add.nxv1i8(<vscale x 1 x i8>, <vscale x 1 x i8>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x i8> @vadd_vv_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_nxv1i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,mf8,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x i8> @llvm.vp.add.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %b, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i8> %v
+}
+
+define <vscale x 1 x i8> @vadd_vv_nxv1i8_unmasked(<vscale x 1 x i8> %va, <vscale x 1 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_nxv1i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,mf8,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 1 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> undef, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i8> @llvm.vp.add.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %b, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i8> %v
+}
+
+define <vscale x 1 x i8> @vadd_vx_nxv1i8(<vscale x 1 x i8> %va, i8 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vx_nxv1i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,mf8,ta,mu
+; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> undef, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i8> @llvm.vp.add.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i8> %v
+}
+
+define <vscale x 1 x i8> @vadd_vx_nxv1i8_unmasked(<vscale x 1 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vx_nxv1i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,mf8,ta,mu
+; CHECK-NEXT: vadd.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> undef, <vscale x 1 x i32> zeroinitializer
+ %head = insertelement <vscale x 1 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> undef, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i8> @llvm.vp.add.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i8> %v
+}
+
+define <vscale x 1 x i8> @vadd_vi_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_nxv1i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,mf8,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i8> undef, i8 -1, i32 0
+ %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> undef, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i8> @llvm.vp.add.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i8> %v
+}
+
+define <vscale x 1 x i8> @vadd_vi_nxv1i8_unmasked(<vscale x 1 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_nxv1i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,mf8,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i8> undef, i8 -1, i32 0
+ %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> undef, <vscale x 1 x i32> zeroinitializer
+ %head = insertelement <vscale x 1 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> undef, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i8> @llvm.vp.add.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i8> %v
+}
+
+declare <vscale x 2 x i8> @llvm.vp.add.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x i8> @vadd_vv_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_nxv2i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,mf4,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x i8> @llvm.vp.add.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %b, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i8> %v
+}
+
+define <vscale x 2 x i8> @vadd_vv_nxv2i8_unmasked(<vscale x 2 x i8> %va, <vscale x 2 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_nxv2i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,mf4,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i8> @llvm.vp.add.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %b, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i8> %v
+}
+
+define <vscale x 2 x i8> @vadd_vx_nxv2i8(<vscale x 2 x i8> %va, i8 %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vx_nxv2i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,mf4,ta,mu
+; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> undef, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i8> @llvm.vp.add.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i8> %v
+}
+
+define <vscale x 2 x i8> @vadd_vx_nxv2i8_unmasked(<vscale x 2 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vx_nxv2i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,mf4,ta,mu
+; CHECK-NEXT: vadd.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> undef, <vscale x 2 x i32> zeroinitializer
+ %head = insertelement <vscale x 2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i8> @llvm.vp.add.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i8> %v
+}
+
+define <vscale x 2 x i8> @vadd_vi_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_nxv2i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,mf4,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i8> undef, i8 -1, i32 0
+ %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> undef, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i8> @llvm.vp.add.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i8> %v
+}
+
+define <vscale x 2 x i8> @vadd_vi_nxv2i8_unmasked(<vscale x 2 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_nxv2i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,mf4,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i8> undef, i8 -1, i32 0
+ %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> undef, <vscale x 2 x i32> zeroinitializer
+ %head = insertelement <vscale x 2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i8> @llvm.vp.add.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i8> %v
+}
+
+declare <vscale x 4 x i8> @llvm.vp.add.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i8>, <vscale x 4 x i1>, i32)
+
+define <vscale x 4 x i8> @vadd_vv_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_nxv4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,mf2,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x i8> @llvm.vp.add.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %b, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 4 x i8> @vadd_vv_nxv4i8_unmasked(<vscale x 4 x i8> %va, <vscale x 4 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_nxv4i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,mf2,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i8> @llvm.vp.add.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %b, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 4 x i8> @vadd_vx_nxv4i8(<vscale x 4 x i8> %va, i8 %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vx_nxv4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,mf2,ta,mu
+; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> undef, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i8> @llvm.vp.add.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 4 x i8> @vadd_vx_nxv4i8_unmasked(<vscale x 4 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vx_nxv4i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,mf2,ta,mu
+; CHECK-NEXT: vadd.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> undef, <vscale x 4 x i32> zeroinitializer
+ %head = insertelement <vscale x 4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i8> @llvm.vp.add.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 4 x i8> @vadd_vi_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_nxv4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,mf2,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i8> undef, i8 -1, i32 0
+ %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> undef, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i8> @llvm.vp.add.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 4 x i8> @vadd_vi_nxv4i8_unmasked(<vscale x 4 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_nxv4i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,mf2,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i8> undef, i8 -1, i32 0
+ %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> undef, <vscale x 4 x i32> zeroinitializer
+ %head = insertelement <vscale x 4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i8> @llvm.vp.add.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i8> %v
+}
+
+declare <vscale x 8 x i8> @llvm.vp.add.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x i8> @vadd_vv_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_nxv8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x i8> @llvm.vp.add.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %b, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 8 x i8> @vadd_vv_nxv8i8_unmasked(<vscale x 8 x i8> %va, <vscale x 8 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_nxv8i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i8> @llvm.vp.add.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %b, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 8 x i8> @vadd_vx_nxv8i8(<vscale x 8 x i8> %va, i8 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vx_nxv8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu
+; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> undef, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i8> @llvm.vp.add.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 8 x i8> @vadd_vx_nxv8i8_unmasked(<vscale x 8 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vx_nxv8i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu
+; CHECK-NEXT: vadd.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> undef, <vscale x 8 x i32> zeroinitializer
+ %head = insertelement <vscale x 8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i8> @llvm.vp.add.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 8 x i8> @vadd_vi_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_nxv8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i8> undef, i8 -1, i32 0
+ %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> undef, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i8> @llvm.vp.add.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 8 x i8> @vadd_vi_nxv8i8_unmasked(<vscale x 8 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_nxv8i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i8> undef, i8 -1, i32 0
+ %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> undef, <vscale x 8 x i32> zeroinitializer
+ %head = insertelement <vscale x 8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i8> @llvm.vp.add.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i8> %v
+}
+
+declare <vscale x 16 x i8> @llvm.vp.add.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, i32)
+
+define <vscale x 16 x i8> @vadd_vv_nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_nxv16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m2,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i8> @llvm.vp.add.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %b, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @vadd_vv_nxv16i8_unmasked(<vscale x 16 x i8> %va, <vscale x 16 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_nxv16i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m2,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v10
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 16 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> undef, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i8> @llvm.vp.add.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %b, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @vadd_vx_nxv16i8(<vscale x 16 x i8> %va, i8 %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vx_nxv16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,m2,ta,mu
+; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i8> @llvm.vp.add.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @vadd_vx_nxv16i8_unmasked(<vscale x 16 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vx_nxv16i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,m2,ta,mu
+; CHECK-NEXT: vadd.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
+ %head = insertelement <vscale x 16 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> undef, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i8> @llvm.vp.add.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @vadd_vi_nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_nxv16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m2,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i8> undef, i8 -1, i32 0
+ %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i8> @llvm.vp.add.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @vadd_vi_nxv16i8_unmasked(<vscale x 16 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_nxv16i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m2,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i8> undef, i8 -1, i32 0
+ %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
+ %head = insertelement <vscale x 16 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> undef, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i8> @llvm.vp.add.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i8> %v
+}
+
+declare <vscale x 32 x i8> @llvm.vp.add.nxv32i8(<vscale x 32 x i8>, <vscale x 32 x i8>, <vscale x 32 x i1>, i32)
+
+define <vscale x 32 x i8> @vadd_vv_nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %b, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_nxv32i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m4,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v12, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 32 x i8> @llvm.vp.add.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %b, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i8> %v
+}
+
+define <vscale x 32 x i8> @vadd_vv_nxv32i8_unmasked(<vscale x 32 x i8> %va, <vscale x 32 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_nxv32i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m4,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v12
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 32 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> undef, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i8> @llvm.vp.add.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %b, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i8> %v
+}
+
+define <vscale x 32 x i8> @vadd_vx_nxv32i8(<vscale x 32 x i8> %va, i8 %b, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vx_nxv32i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,m4,ta,mu
+; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> undef, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i8> @llvm.vp.add.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %vb, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i8> %v
+}
+
+define <vscale x 32 x i8> @vadd_vx_nxv32i8_unmasked(<vscale x 32 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vx_nxv32i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,m4,ta,mu
+; CHECK-NEXT: vadd.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> undef, <vscale x 32 x i32> zeroinitializer
+ %head = insertelement <vscale x 32 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> undef, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i8> @llvm.vp.add.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %vb, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i8> %v
+}
+
+define <vscale x 32 x i8> @vadd_vi_nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_nxv32i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m4,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x i8> undef, i8 -1, i32 0
+ %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> undef, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i8> @llvm.vp.add.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %vb, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i8> %v
+}
+
+define <vscale x 32 x i8> @vadd_vi_nxv32i8_unmasked(<vscale x 32 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_nxv32i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m4,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x i8> undef, i8 -1, i32 0
+ %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> undef, <vscale x 32 x i32> zeroinitializer
+ %head = insertelement <vscale x 32 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> undef, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i8> @llvm.vp.add.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %vb, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i8> %v
+}
+
+declare <vscale x 64 x i8> @llvm.vp.add.nxv64i8(<vscale x 64 x i8>, <vscale x 64 x i8>, <vscale x 64 x i1>, i32)
+
+define <vscale x 64 x i8> @vadd_vv_nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %b, <vscale x 64 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_nxv64i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m8,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v16, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 64 x i8> @llvm.vp.add.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %b, <vscale x 64 x i1> %m, i32 %evl)
+ ret <vscale x 64 x i8> %v
+}
+
+define <vscale x 64 x i8> @vadd_vv_nxv64i8_unmasked(<vscale x 64 x i8> %va, <vscale x 64 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_nxv64i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m8,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v16
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 64 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 64 x i1> %head, <vscale x 64 x i1> undef, <vscale x 64 x i32> zeroinitializer
+ %v = call <vscale x 64 x i8> @llvm.vp.add.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %b, <vscale x 64 x i1> %m, i32 %evl)
+ ret <vscale x 64 x i8> %v
+}
+
+define <vscale x 64 x i8> @vadd_vx_nxv64i8(<vscale x 64 x i8> %va, i8 %b, <vscale x 64 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vx_nxv64i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,m8,ta,mu
+; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 64 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> undef, <vscale x 64 x i32> zeroinitializer
+ %v = call <vscale x 64 x i8> @llvm.vp.add.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %vb, <vscale x 64 x i1> %m, i32 %evl)
+ ret <vscale x 64 x i8> %v
+}
+
+define <vscale x 64 x i8> @vadd_vx_nxv64i8_unmasked(<vscale x 64 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vx_nxv64i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,m8,ta,mu
+; CHECK-NEXT: vadd.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 64 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> undef, <vscale x 64 x i32> zeroinitializer
+ %head = insertelement <vscale x 64 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 64 x i1> %head, <vscale x 64 x i1> undef, <vscale x 64 x i32> zeroinitializer
+ %v = call <vscale x 64 x i8> @llvm.vp.add.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %vb, <vscale x 64 x i1> %m, i32 %evl)
+ ret <vscale x 64 x i8> %v
+}
+
+define <vscale x 64 x i8> @vadd_vi_nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_nxv64i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m8,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 64 x i8> undef, i8 -1, i32 0
+ %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> undef, <vscale x 64 x i32> zeroinitializer
+ %v = call <vscale x 64 x i8> @llvm.vp.add.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %vb, <vscale x 64 x i1> %m, i32 %evl)
+ ret <vscale x 64 x i8> %v
+}
+
+define <vscale x 64 x i8> @vadd_vi_nxv64i8_unmasked(<vscale x 64 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_nxv64i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m8,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 64 x i8> undef, i8 -1, i32 0
+ %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> undef, <vscale x 64 x i32> zeroinitializer
+ %head = insertelement <vscale x 64 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 64 x i1> %head, <vscale x 64 x i1> undef, <vscale x 64 x i32> zeroinitializer
+ %v = call <vscale x 64 x i8> @llvm.vp.add.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %vb, <vscale x 64 x i1> %m, i32 %evl)
+ ret <vscale x 64 x i8> %v
+}
+
+declare <vscale x 1 x i16> @llvm.vp.add.nxv1i16(<vscale x 1 x i16>, <vscale x 1 x i16>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x i16> @vadd_vv_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_nxv1i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x i16> @llvm.vp.add.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %b, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i16> %v
+}
+
+define <vscale x 1 x i16> @vadd_vv_nxv1i16_unmasked(<vscale x 1 x i16> %va, <vscale x 1 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_nxv1i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 1 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> undef, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i16> @llvm.vp.add.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %b, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i16> %v
+}
+
+define <vscale x 1 x i16> @vadd_vx_nxv1i16(<vscale x 1 x i16> %va, i16 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vx_nxv1i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,mf4,ta,mu
+; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> undef, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i16> @llvm.vp.add.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i16> %v
+}
+
+define <vscale x 1 x i16> @vadd_vx_nxv1i16_unmasked(<vscale x 1 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vx_nxv1i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,mf4,ta,mu
+; CHECK-NEXT: vadd.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> undef, <vscale x 1 x i32> zeroinitializer
+ %head = insertelement <vscale x 1 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> undef, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i16> @llvm.vp.add.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i16> %v
+}
+
+define <vscale x 1 x i16> @vadd_vi_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_nxv1i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i16> undef, i16 -1, i32 0
+ %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> undef, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i16> @llvm.vp.add.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i16> %v
+}
+
+define <vscale x 1 x i16> @vadd_vi_nxv1i16_unmasked(<vscale x 1 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_nxv1i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i16> undef, i16 -1, i32 0
+ %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> undef, <vscale x 1 x i32> zeroinitializer
+ %head = insertelement <vscale x 1 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> undef, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i16> @llvm.vp.add.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i16> %v
+}
+
+declare <vscale x 2 x i16> @llvm.vp.add.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i16>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x i16> @vadd_vv_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_nxv2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x i16> @llvm.vp.add.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %b, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i16> %v
+}
+
+define <vscale x 2 x i16> @vadd_vv_nxv2i16_unmasked(<vscale x 2 x i16> %va, <vscale x 2 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_nxv2i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i16> @llvm.vp.add.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %b, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i16> %v
+}
+
+define <vscale x 2 x i16> @vadd_vx_nxv2i16(<vscale x 2 x i16> %va, i16 %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vx_nxv2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,mf2,ta,mu
+; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> undef, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i16> @llvm.vp.add.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i16> %v
+}
+
+define <vscale x 2 x i16> @vadd_vx_nxv2i16_unmasked(<vscale x 2 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vx_nxv2i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,mf2,ta,mu
+; CHECK-NEXT: vadd.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> undef, <vscale x 2 x i32> zeroinitializer
+ %head = insertelement <vscale x 2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i16> @llvm.vp.add.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i16> %v
+}
+
+define <vscale x 2 x i16> @vadd_vi_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_nxv2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i16> undef, i16 -1, i32 0
+ %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> undef, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i16> @llvm.vp.add.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i16> %v
+}
+
+define <vscale x 2 x i16> @vadd_vi_nxv2i16_unmasked(<vscale x 2 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_nxv2i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i16> undef, i16 -1, i32 0
+ %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> undef, <vscale x 2 x i32> zeroinitializer
+ %head = insertelement <vscale x 2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i16> @llvm.vp.add.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i16> %v
+}
+
+declare <vscale x 4 x i16> @llvm.vp.add.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i16>, <vscale x 4 x i1>, i32)
+
+define <vscale x 4 x i16> @vadd_vv_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_nxv4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x i16> @llvm.vp.add.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %b, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 4 x i16> @vadd_vv_nxv4i16_unmasked(<vscale x 4 x i16> %va, <vscale x 4 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_nxv4i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i16> @llvm.vp.add.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %b, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 4 x i16> @vadd_vx_nxv4i16(<vscale x 4 x i16> %va, i16 %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vx_nxv4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu
+; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> undef, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i16> @llvm.vp.add.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 4 x i16> @vadd_vx_nxv4i16_unmasked(<vscale x 4 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vx_nxv4i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu
+; CHECK-NEXT: vadd.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> undef, <vscale x 4 x i32> zeroinitializer
+ %head = insertelement <vscale x 4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i16> @llvm.vp.add.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 4 x i16> @vadd_vi_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_nxv4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i16> undef, i16 -1, i32 0
+ %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> undef, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i16> @llvm.vp.add.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 4 x i16> @vadd_vi_nxv4i16_unmasked(<vscale x 4 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_nxv4i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i16> undef, i16 -1, i32 0
+ %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> undef, <vscale x 4 x i32> zeroinitializer
+ %head = insertelement <vscale x 4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i16> @llvm.vp.add.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i16> %v
+}
+
+declare <vscale x 8 x i16> @llvm.vp.add.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x i16> @vadd_vv_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_nxv8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x i16> @llvm.vp.add.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %b, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i16> %v
+}
+
+define <vscale x 8 x i16> @vadd_vv_nxv8i16_unmasked(<vscale x 8 x i16> %va, <vscale x 8 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_nxv8i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v10
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i16> @llvm.vp.add.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %b, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i16> %v
+}
+
+define <vscale x 8 x i16> @vadd_vx_nxv8i16(<vscale x 8 x i16> %va, i16 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vx_nxv8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,m2,ta,mu
+; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i16> @llvm.vp.add.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i16> %v
+}
+
+define <vscale x 8 x i16> @vadd_vx_nxv8i16_unmasked(<vscale x 8 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vx_nxv8i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,m2,ta,mu
+; CHECK-NEXT: vadd.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
+ %head = insertelement <vscale x 8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i16> @llvm.vp.add.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i16> %v
+}
+
+define <vscale x 8 x i16> @vadd_vi_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_nxv8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i16> undef, i16 -1, i32 0
+ %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i16> @llvm.vp.add.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i16> %v
+}
+
+define <vscale x 8 x i16> @vadd_vi_nxv8i16_unmasked(<vscale x 8 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_nxv8i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i16> undef, i16 -1, i32 0
+ %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
+ %head = insertelement <vscale x 8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i16> @llvm.vp.add.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i16> %v
+}
+
+declare <vscale x 16 x i16> @llvm.vp.add.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i16>, <vscale x 16 x i1>, i32)
+
+define <vscale x 16 x i16> @vadd_vv_nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_nxv16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m4,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v12, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i16> @llvm.vp.add.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %b, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i16> %v
+}
+
+define <vscale x 16 x i16> @vadd_vv_nxv16i16_unmasked(<vscale x 16 x i16> %va, <vscale x 16 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_nxv16i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m4,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v12
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 16 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> undef, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i16> @llvm.vp.add.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %b, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i16> %v
+}
+
+define <vscale x 16 x i16> @vadd_vx_nxv16i16(<vscale x 16 x i16> %va, i16 %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vx_nxv16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,m4,ta,mu
+; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> undef, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i16> @llvm.vp.add.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i16> %v
+}
+
+define <vscale x 16 x i16> @vadd_vx_nxv16i16_unmasked(<vscale x 16 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vx_nxv16i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,m4,ta,mu
+; CHECK-NEXT: vadd.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> undef, <vscale x 16 x i32> zeroinitializer
+ %head = insertelement <vscale x 16 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> undef, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i16> @llvm.vp.add.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i16> %v
+}
+
+define <vscale x 16 x i16> @vadd_vi_nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_nxv16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m4,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i16> undef, i16 -1, i32 0
+ %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> undef, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i16> @llvm.vp.add.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i16> %v
+}
+
+define <vscale x 16 x i16> @vadd_vi_nxv16i16_unmasked(<vscale x 16 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_nxv16i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m4,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i16> undef, i16 -1, i32 0
+ %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> undef, <vscale x 16 x i32> zeroinitializer
+ %head = insertelement <vscale x 16 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> undef, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i16> @llvm.vp.add.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i16> %v
+}
+
+declare <vscale x 32 x i16> @llvm.vp.add.nxv32i16(<vscale x 32 x i16>, <vscale x 32 x i16>, <vscale x 32 x i1>, i32)
+
+define <vscale x 32 x i16> @vadd_vv_nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %b, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_nxv32i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m8,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v16, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 32 x i16> @llvm.vp.add.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %b, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i16> %v
+}
+
+define <vscale x 32 x i16> @vadd_vv_nxv32i16_unmasked(<vscale x 32 x i16> %va, <vscale x 32 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_nxv32i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m8,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v16
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 32 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> undef, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i16> @llvm.vp.add.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %b, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i16> %v
+}
+
+define <vscale x 32 x i16> @vadd_vx_nxv32i16(<vscale x 32 x i16> %va, i16 %b, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vx_nxv32i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,m8,ta,mu
+; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> undef, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i16> @llvm.vp.add.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %vb, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i16> %v
+}
+
+define <vscale x 32 x i16> @vadd_vx_nxv32i16_unmasked(<vscale x 32 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vx_nxv32i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,m8,ta,mu
+; CHECK-NEXT: vadd.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> undef, <vscale x 32 x i32> zeroinitializer
+ %head = insertelement <vscale x 32 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> undef, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i16> @llvm.vp.add.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %vb, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i16> %v
+}
+
+define <vscale x 32 x i16> @vadd_vi_nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_nxv32i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m8,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x i16> undef, i16 -1, i32 0
+ %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> undef, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i16> @llvm.vp.add.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %vb, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i16> %v
+}
+
+define <vscale x 32 x i16> @vadd_vi_nxv32i16_unmasked(<vscale x 32 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_nxv32i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m8,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x i16> undef, i16 -1, i32 0
+ %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> undef, <vscale x 32 x i32> zeroinitializer
+ %head = insertelement <vscale x 32 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> undef, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i16> @llvm.vp.add.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %vb, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i16> %v
+}
+
+declare <vscale x 1 x i32> @llvm.vp.add.nxv1i32(<vscale x 1 x i32>, <vscale x 1 x i32>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x i32> @vadd_vv_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_nxv1i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x i32> @llvm.vp.add.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %b, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i32> %v
+}
+
+define <vscale x 1 x i32> @vadd_vv_nxv1i32_unmasked(<vscale x 1 x i32> %va, <vscale x 1 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_nxv1i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 1 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> undef, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i32> @llvm.vp.add.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %b, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i32> %v
+}
+
+define <vscale x 1 x i32> @vadd_vx_nxv1i32(<vscale x 1 x i32> %va, i32 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vx_nxv1i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e32,mf2,ta,mu
+; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i32> undef, i32 %b, i32 0
+ %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> undef, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i32> @llvm.vp.add.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i32> %v
+}
+
+define <vscale x 1 x i32> @vadd_vx_nxv1i32_unmasked(<vscale x 1 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vx_nxv1i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e32,mf2,ta,mu
+; CHECK-NEXT: vadd.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i32> undef, i32 %b, i32 0
+ %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> undef, <vscale x 1 x i32> zeroinitializer
+ %head = insertelement <vscale x 1 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> undef, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i32> @llvm.vp.add.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i32> %v
+}
+
+define <vscale x 1 x i32> @vadd_vi_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_nxv1i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i32> undef, i32 -1, i32 0
+ %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> undef, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i32> @llvm.vp.add.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i32> %v
+}
+
+define <vscale x 1 x i32> @vadd_vi_nxv1i32_unmasked(<vscale x 1 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_nxv1i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i32> undef, i32 -1, i32 0
+ %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> undef, <vscale x 1 x i32> zeroinitializer
+ %head = insertelement <vscale x 1 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> undef, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i32> @llvm.vp.add.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i32> %v
+}
+
+declare <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x i32> @vadd_vv_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_nxv2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %b, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 2 x i32> @vadd_vv_nxv2i32_unmasked(<vscale x 2 x i32> %va, <vscale x 2 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_nxv2i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %b, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 2 x i32> @vadd_vx_nxv2i32(<vscale x 2 x i32> %va, i32 %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vx_nxv2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu
+; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i32> undef, i32 %b, i32 0
+ %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> undef, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 2 x i32> @vadd_vx_nxv2i32_unmasked(<vscale x 2 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vx_nxv2i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu
+; CHECK-NEXT: vadd.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i32> undef, i32 %b, i32 0
+ %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> undef, <vscale x 2 x i32> zeroinitializer
+ %head = insertelement <vscale x 2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 2 x i32> @vadd_vi_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_nxv2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i32> undef, i32 -1, i32 0
+ %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> undef, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 2 x i32> @vadd_vi_nxv2i32_unmasked(<vscale x 2 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_nxv2i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i32> undef, i32 -1, i32 0
+ %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> undef, <vscale x 2 x i32> zeroinitializer
+ %head = insertelement <vscale x 2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i32> %v
+}
+
+declare <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
+
+define <vscale x 4 x i32> @vadd_vv_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_nxv4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %b, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i32> %v
+}
+
+define <vscale x 4 x i32> @vadd_vv_nxv4i32_unmasked(<vscale x 4 x i32> %va, <vscale x 4 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_nxv4i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v10
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %b, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i32> %v
+}
+
+define <vscale x 4 x i32> @vadd_vx_nxv4i32(<vscale x 4 x i32> %va, i32 %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vx_nxv4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e32,m2,ta,mu
+; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i32> undef, i32 %b, i32 0
+ %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i32> %v
+}
+
+define <vscale x 4 x i32> @vadd_vx_nxv4i32_unmasked(<vscale x 4 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vx_nxv4i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e32,m2,ta,mu
+; CHECK-NEXT: vadd.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i32> undef, i32 %b, i32 0
+ %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
+ %head = insertelement <vscale x 4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i32> %v
+}
+
+define <vscale x 4 x i32> @vadd_vi_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_nxv4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i32> undef, i32 -1, i32 0
+ %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i32> %v
+}
+
+define <vscale x 4 x i32> @vadd_vi_nxv4i32_unmasked(<vscale x 4 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_nxv4i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i32> undef, i32 -1, i32 0
+ %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
+ %head = insertelement <vscale x 4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i32> %v
+}
+
+declare <vscale x 8 x i32> @llvm.vp.add.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i32>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x i32> @vadd_vv_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_nxv8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m4,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v12, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x i32> @llvm.vp.add.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %b, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @vadd_vv_nxv8i32_unmasked(<vscale x 8 x i32> %va, <vscale x 8 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_nxv8i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m4,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v12
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i32> @llvm.vp.add.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %b, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @vadd_vx_nxv8i32(<vscale x 8 x i32> %va, i32 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vx_nxv8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e32,m4,ta,mu
+; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i32> undef, i32 %b, i32 0
+ %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> undef, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i32> @llvm.vp.add.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @vadd_vx_nxv8i32_unmasked(<vscale x 8 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vx_nxv8i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e32,m4,ta,mu
+; CHECK-NEXT: vadd.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i32> undef, i32 %b, i32 0
+ %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> undef, <vscale x 8 x i32> zeroinitializer
+ %head = insertelement <vscale x 8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i32> @llvm.vp.add.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @vadd_vi_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_nxv8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m4,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i32> undef, i32 -1, i32 0
+ %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> undef, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i32> @llvm.vp.add.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @vadd_vi_nxv8i32_unmasked(<vscale x 8 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_nxv8i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m4,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i32> undef, i32 -1, i32 0
+ %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> undef, <vscale x 8 x i32> zeroinitializer
+ %head = insertelement <vscale x 8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i32> @llvm.vp.add.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i32> %v
+}
+
+declare <vscale x 16 x i32> @llvm.vp.add.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i32>, <vscale x 16 x i1>, i32)
+
+define <vscale x 16 x i32> @vadd_vv_nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_nxv16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m8,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v16, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i32> @llvm.vp.add.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %b, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @vadd_vv_nxv16i32_unmasked(<vscale x 16 x i32> %va, <vscale x 16 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_nxv16i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m8,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v16
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 16 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> undef, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i32> @llvm.vp.add.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %b, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @vadd_vx_nxv16i32(<vscale x 16 x i32> %va, i32 %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vx_nxv16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e32,m8,ta,mu
+; CHECK-NEXT: vadd.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i32> undef, i32 %b, i32 0
+ %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> undef, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i32> @llvm.vp.add.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @vadd_vx_nxv16i32_unmasked(<vscale x 16 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vx_nxv16i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e32,m8,ta,mu
+; CHECK-NEXT: vadd.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i32> undef, i32 %b, i32 0
+ %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> undef, <vscale x 16 x i32> zeroinitializer
+ %head = insertelement <vscale x 16 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> undef, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i32> @llvm.vp.add.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @vadd_vi_nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_nxv16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m8,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i32> undef, i32 -1, i32 0
+ %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> undef, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i32> @llvm.vp.add.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @vadd_vi_nxv16i32_unmasked(<vscale x 16 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_nxv16i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m8,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i32> undef, i32 -1, i32 0
+ %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> undef, <vscale x 16 x i32> zeroinitializer
+ %head = insertelement <vscale x 16 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> undef, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i32> @llvm.vp.add.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i32> %v
+}
+
+declare <vscale x 1 x i64> @llvm.vp.add.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x i64> @vadd_vv_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_nxv1i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x i64> @llvm.vp.add.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %b, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 1 x i64> @vadd_vv_nxv1i64_unmasked(<vscale x 1 x i64> %va, <vscale x 1 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_nxv1i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 1 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> undef, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i64> @llvm.vp.add.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %b, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 1 x i64> @vadd_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vadd_vx_nxv1i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v25, (a0), zero
+; RV32-NEXT: vsetvli a0, a2, e64,m1,ta,mu
+; RV32-NEXT: vadd.vv v8, v8, v25, v0.t
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vadd_vx_nxv1i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a1, a1, e64,m1,ta,mu
+; RV64-NEXT: vadd.vx v8, v8, a0, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i64> undef, i64 %b, i32 0
+ %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> undef, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i64> @llvm.vp.add.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 1 x i64> @vadd_vx_nxv1i64_unmasked(<vscale x 1 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vadd_vx_nxv1i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v25, (a0), zero
+; RV32-NEXT: vsetvli a0, a2, e64,m1,ta,mu
+; RV32-NEXT: vadd.vv v8, v8, v25
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vadd_vx_nxv1i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a1, a1, e64,m1,ta,mu
+; RV64-NEXT: vadd.vx v8, v8, a0
+; RV64-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i64> undef, i64 %b, i32 0
+ %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> undef, <vscale x 1 x i32> zeroinitializer
+ %head = insertelement <vscale x 1 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> undef, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i64> @llvm.vp.add.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 1 x i64> @vadd_vi_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_nxv1i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i64> undef, i64 -1, i32 0
+ %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> undef, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i64> @llvm.vp.add.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 1 x i64> @vadd_vi_nxv1i64_unmasked(<vscale x 1 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_nxv1i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i64> undef, i64 -1, i32 0
+ %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> undef, <vscale x 1 x i32> zeroinitializer
+ %head = insertelement <vscale x 1 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> undef, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i64> @llvm.vp.add.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
+
+declare <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x i64> @vadd_vv_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_nxv2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v10, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %b, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i64> %v
+}
+
+define <vscale x 2 x i64> @vadd_vv_nxv2i64_unmasked(<vscale x 2 x i64> %va, <vscale x 2 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_nxv2i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v10
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %b, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i64> %v
+}
+
+define <vscale x 2 x i64> @vadd_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vadd_vx_nxv2i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetvli a0, zero, e64,m2,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v26, (a0), zero
+; RV32-NEXT: vsetvli a0, a2, e64,m2,ta,mu
+; RV32-NEXT: vadd.vv v8, v8, v26, v0.t
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vadd_vx_nxv2i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a1, a1, e64,m2,ta,mu
+; RV64-NEXT: vadd.vx v8, v8, a0, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i64> undef, i64 %b, i32 0
+ %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i64> %v
+}
+
+define <vscale x 2 x i64> @vadd_vx_nxv2i64_unmasked(<vscale x 2 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vadd_vx_nxv2i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetvli a0, zero, e64,m2,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v26, (a0), zero
+; RV32-NEXT: vsetvli a0, a2, e64,m2,ta,mu
+; RV32-NEXT: vadd.vv v8, v8, v26
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vadd_vx_nxv2i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a1, a1, e64,m2,ta,mu
+; RV64-NEXT: vadd.vx v8, v8, a0
+; RV64-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i64> undef, i64 %b, i32 0
+ %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
+ %head = insertelement <vscale x 2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i64> %v
+}
+
+define <vscale x 2 x i64> @vadd_vi_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_nxv2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i64> undef, i64 -1, i32 0
+ %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i64> %v
+}
+
+define <vscale x 2 x i64> @vadd_vi_nxv2i64_unmasked(<vscale x 2 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_nxv2i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i64> undef, i64 -1, i32 0
+ %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
+ %head = insertelement <vscale x 2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i64> %v
+}
+
+declare <vscale x 4 x i64> @llvm.vp.add.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i64>, <vscale x 4 x i1>, i32)
+
+define <vscale x 4 x i64> @vadd_vv_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_nxv4i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m4,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v12, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x i64> @llvm.vp.add.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %b, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i64> %v
+}
+
+define <vscale x 4 x i64> @vadd_vv_nxv4i64_unmasked(<vscale x 4 x i64> %va, <vscale x 4 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_nxv4i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m4,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v12
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i64> @llvm.vp.add.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %b, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i64> %v
+}
+
+define <vscale x 4 x i64> @vadd_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vadd_vx_nxv4i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetvli a0, zero, e64,m4,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v28, (a0), zero
+; RV32-NEXT: vsetvli a0, a2, e64,m4,ta,mu
+; RV32-NEXT: vadd.vv v8, v8, v28, v0.t
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vadd_vx_nxv4i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a1, a1, e64,m4,ta,mu
+; RV64-NEXT: vadd.vx v8, v8, a0, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i64> undef, i64 %b, i32 0
+ %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i64> @llvm.vp.add.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i64> %v
+}
+
+define <vscale x 4 x i64> @vadd_vx_nxv4i64_unmasked(<vscale x 4 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vadd_vx_nxv4i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetvli a0, zero, e64,m4,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v28, (a0), zero
+; RV32-NEXT: vsetvli a0, a2, e64,m4,ta,mu
+; RV32-NEXT: vadd.vv v8, v8, v28
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vadd_vx_nxv4i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a1, a1, e64,m4,ta,mu
+; RV64-NEXT: vadd.vx v8, v8, a0
+; RV64-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i64> undef, i64 %b, i32 0
+ %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer
+ %head = insertelement <vscale x 4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i64> @llvm.vp.add.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i64> %v
+}
+
+define <vscale x 4 x i64> @vadd_vi_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_nxv4i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m4,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i64> undef, i64 -1, i32 0
+ %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i64> @llvm.vp.add.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i64> %v
+}
+
+define <vscale x 4 x i64> @vadd_vi_nxv4i64_unmasked(<vscale x 4 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_nxv4i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m4,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i64> undef, i64 -1, i32 0
+ %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer
+ %head = insertelement <vscale x 4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i64> @llvm.vp.add.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i64> %v
+}
+
+declare <vscale x 8 x i64> @llvm.vp.add.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i64>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x i64> @vadd_vv_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_nxv8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m8,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v16, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x i64> @llvm.vp.add.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %b, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vadd_vv_nxv8i64_unmasked(<vscale x 8 x i64> %va, <vscale x 8 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vv_nxv8i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m8,ta,mu
+; CHECK-NEXT: vadd.vv v8, v8, v16
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i64> @llvm.vp.add.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %b, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vadd_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vadd_vx_nxv8i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetvli a0, zero, e64,m8,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v16, (a0), zero
+; RV32-NEXT: vsetvli a0, a2, e64,m8,ta,mu
+; RV32-NEXT: vadd.vv v8, v8, v16, v0.t
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vadd_vx_nxv8i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a1, a1, e64,m8,ta,mu
+; RV64-NEXT: vadd.vx v8, v8, a0, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i64> undef, i64 %b, i32 0
+ %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> undef, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i64> @llvm.vp.add.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vadd_vx_nxv8i64_unmasked(<vscale x 8 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vadd_vx_nxv8i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetvli a0, zero, e64,m8,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v16, (a0), zero
+; RV32-NEXT: vsetvli a0, a2, e64,m8,ta,mu
+; RV32-NEXT: vadd.vv v8, v8, v16
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vadd_vx_nxv8i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a1, a1, e64,m8,ta,mu
+; RV64-NEXT: vadd.vx v8, v8, a0
+; RV64-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i64> undef, i64 %b, i32 0
+ %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> undef, <vscale x 8 x i32> zeroinitializer
+ %head = insertelement <vscale x 8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i64> @llvm.vp.add.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vadd_vi_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_nxv8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m8,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i64> undef, i64 -1, i32 0
+ %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> undef, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i64> @llvm.vp.add.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vadd_vi_nxv8i64_unmasked(<vscale x 8 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vi_nxv8i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m8,ta,mu
+; CHECK-NEXT: vadd.vi v8, v8, -1
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i64> undef, i64 -1, i32 0
+ %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> undef, <vscale x 8 x i32> zeroinitializer
+ %head = insertelement <vscale x 8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i64> @llvm.vp.add.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i64> %v
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vrsub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vrsub-vp.ll
new file mode 100644
index 0000000000000..869608dfd3425
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vrsub-vp.ll
@@ -0,0 +1,1305 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s \
+; RUN: | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s \
+; RUN: | FileCheck %s --check-prefixes=CHECK,RV64
+
+declare <vscale x 1 x i8> @llvm.vp.sub.nxv1i8(<vscale x 1 x i8>, <vscale x 1 x i8>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x i8> @vrsub_vx_nxv1i8(<vscale x 1 x i8> %va, i8 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vx_nxv1i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,mf8,ta,mu
+; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> undef, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i8> @llvm.vp.sub.nxv1i8(<vscale x 1 x i8> %vb, <vscale x 1 x i8> %va, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i8> %v
+}
+
+define <vscale x 1 x i8> @vrsub_vx_nxv1i8_unmasked(<vscale x 1 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vx_nxv1i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,mf8,ta,mu
+; CHECK-NEXT: vrsub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> undef, <vscale x 1 x i32> zeroinitializer
+ %head = insertelement <vscale x 1 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> undef, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i8> @llvm.vp.sub.nxv1i8(<vscale x 1 x i8> %vb, <vscale x 1 x i8> %va, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i8> %v
+}
+
+define <vscale x 1 x i8> @vrsub_vi_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_nxv1i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,mf8,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i8> undef, i8 2, i32 0
+ %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> undef, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i8> @llvm.vp.sub.nxv1i8(<vscale x 1 x i8> %vb, <vscale x 1 x i8> %va, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i8> %v
+}
+
+define <vscale x 1 x i8> @vrsub_vi_nxv1i8_unmasked(<vscale x 1 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_nxv1i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,mf8,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i8> undef, i8 2, i32 0
+ %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> undef, <vscale x 1 x i32> zeroinitializer
+ %head = insertelement <vscale x 1 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> undef, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i8> @llvm.vp.sub.nxv1i8(<vscale x 1 x i8> %vb, <vscale x 1 x i8> %va, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i8> %v
+}
+
+declare <vscale x 2 x i8> @llvm.vp.sub.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x i8> @vrsub_vx_nxv2i8(<vscale x 2 x i8> %va, i8 %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vx_nxv2i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,mf4,ta,mu
+; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> undef, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i8> @llvm.vp.sub.nxv2i8(<vscale x 2 x i8> %vb, <vscale x 2 x i8> %va, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i8> %v
+}
+
+define <vscale x 2 x i8> @vrsub_vx_nxv2i8_unmasked(<vscale x 2 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vx_nxv2i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,mf4,ta,mu
+; CHECK-NEXT: vrsub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> undef, <vscale x 2 x i32> zeroinitializer
+ %head = insertelement <vscale x 2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i8> @llvm.vp.sub.nxv2i8(<vscale x 2 x i8> %vb, <vscale x 2 x i8> %va, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i8> %v
+}
+
+define <vscale x 2 x i8> @vrsub_vi_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_nxv2i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,mf4,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i8> undef, i8 2, i32 0
+ %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> undef, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i8> @llvm.vp.sub.nxv2i8(<vscale x 2 x i8> %vb, <vscale x 2 x i8> %va, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i8> %v
+}
+
+define <vscale x 2 x i8> @vrsub_vi_nxv2i8_unmasked(<vscale x 2 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_nxv2i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,mf4,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i8> undef, i8 2, i32 0
+ %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> undef, <vscale x 2 x i32> zeroinitializer
+ %head = insertelement <vscale x 2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i8> @llvm.vp.sub.nxv2i8(<vscale x 2 x i8> %vb, <vscale x 2 x i8> %va, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i8> %v
+}
+
+declare <vscale x 4 x i8> @llvm.vp.sub.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i8>, <vscale x 4 x i1>, i32)
+
+define <vscale x 4 x i8> @vrsub_vx_nxv4i8(<vscale x 4 x i8> %va, i8 %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vx_nxv4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,mf2,ta,mu
+; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> undef, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i8> @llvm.vp.sub.nxv4i8(<vscale x 4 x i8> %vb, <vscale x 4 x i8> %va, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 4 x i8> @vrsub_vx_nxv4i8_unmasked(<vscale x 4 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vx_nxv4i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,mf2,ta,mu
+; CHECK-NEXT: vrsub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> undef, <vscale x 4 x i32> zeroinitializer
+ %head = insertelement <vscale x 4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i8> @llvm.vp.sub.nxv4i8(<vscale x 4 x i8> %vb, <vscale x 4 x i8> %va, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 4 x i8> @vrsub_vi_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_nxv4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,mf2,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i8> undef, i8 2, i32 0
+ %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> undef, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i8> @llvm.vp.sub.nxv4i8(<vscale x 4 x i8> %vb, <vscale x 4 x i8> %va, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 4 x i8> @vrsub_vi_nxv4i8_unmasked(<vscale x 4 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_nxv4i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,mf2,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i8> undef, i8 2, i32 0
+ %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> undef, <vscale x 4 x i32> zeroinitializer
+ %head = insertelement <vscale x 4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i8> @llvm.vp.sub.nxv4i8(<vscale x 4 x i8> %vb, <vscale x 4 x i8> %va, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i8> %v
+}
+
+declare <vscale x 8 x i8> @llvm.vp.sub.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x i8> @vrsub_vx_nxv8i8(<vscale x 8 x i8> %va, i8 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vx_nxv8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu
+; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> undef, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i8> @llvm.vp.sub.nxv8i8(<vscale x 8 x i8> %vb, <vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 8 x i8> @vrsub_vx_nxv8i8_unmasked(<vscale x 8 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vx_nxv8i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu
+; CHECK-NEXT: vrsub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> undef, <vscale x 8 x i32> zeroinitializer
+ %head = insertelement <vscale x 8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i8> @llvm.vp.sub.nxv8i8(<vscale x 8 x i8> %vb, <vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 8 x i8> @vrsub_vi_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_nxv8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i8> undef, i8 2, i32 0
+ %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> undef, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i8> @llvm.vp.sub.nxv8i8(<vscale x 8 x i8> %vb, <vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 8 x i8> @vrsub_vi_nxv8i8_unmasked(<vscale x 8 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_nxv8i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i8> undef, i8 2, i32 0
+ %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> undef, <vscale x 8 x i32> zeroinitializer
+ %head = insertelement <vscale x 8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i8> @llvm.vp.sub.nxv8i8(<vscale x 8 x i8> %vb, <vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i8> %v
+}
+
+declare <vscale x 16 x i8> @llvm.vp.sub.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, i32)
+
+define <vscale x 16 x i8> @vrsub_vx_nxv16i8(<vscale x 16 x i8> %va, i8 %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vx_nxv16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,m2,ta,mu
+; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i8> @llvm.vp.sub.nxv16i8(<vscale x 16 x i8> %vb, <vscale x 16 x i8> %va, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @vrsub_vx_nxv16i8_unmasked(<vscale x 16 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vx_nxv16i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,m2,ta,mu
+; CHECK-NEXT: vrsub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
+ %head = insertelement <vscale x 16 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> undef, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i8> @llvm.vp.sub.nxv16i8(<vscale x 16 x i8> %vb, <vscale x 16 x i8> %va, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @vrsub_vi_nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_nxv16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m2,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i8> undef, i8 2, i32 0
+ %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i8> @llvm.vp.sub.nxv16i8(<vscale x 16 x i8> %vb, <vscale x 16 x i8> %va, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @vrsub_vi_nxv16i8_unmasked(<vscale x 16 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_nxv16i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m2,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i8> undef, i8 2, i32 0
+ %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
+ %head = insertelement <vscale x 16 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> undef, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i8> @llvm.vp.sub.nxv16i8(<vscale x 16 x i8> %vb, <vscale x 16 x i8> %va, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i8> %v
+}
+
+declare <vscale x 32 x i8> @llvm.vp.sub.nxv32i8(<vscale x 32 x i8>, <vscale x 32 x i8>, <vscale x 32 x i1>, i32)
+
+define <vscale x 32 x i8> @vrsub_vx_nxv32i8(<vscale x 32 x i8> %va, i8 %b, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vx_nxv32i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,m4,ta,mu
+; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> undef, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i8> @llvm.vp.sub.nxv32i8(<vscale x 32 x i8> %vb, <vscale x 32 x i8> %va, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i8> %v
+}
+
+define <vscale x 32 x i8> @vrsub_vx_nxv32i8_unmasked(<vscale x 32 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vx_nxv32i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,m4,ta,mu
+; CHECK-NEXT: vrsub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> undef, <vscale x 32 x i32> zeroinitializer
+ %head = insertelement <vscale x 32 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> undef, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i8> @llvm.vp.sub.nxv32i8(<vscale x 32 x i8> %vb, <vscale x 32 x i8> %va, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i8> %v
+}
+
+define <vscale x 32 x i8> @vrsub_vi_nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_nxv32i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m4,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x i8> undef, i8 2, i32 0
+ %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> undef, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i8> @llvm.vp.sub.nxv32i8(<vscale x 32 x i8> %vb, <vscale x 32 x i8> %va, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i8> %v
+}
+
+define <vscale x 32 x i8> @vrsub_vi_nxv32i8_unmasked(<vscale x 32 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_nxv32i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m4,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x i8> undef, i8 2, i32 0
+ %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> undef, <vscale x 32 x i32> zeroinitializer
+ %head = insertelement <vscale x 32 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> undef, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i8> @llvm.vp.sub.nxv32i8(<vscale x 32 x i8> %vb, <vscale x 32 x i8> %va, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i8> %v
+}
+
+declare <vscale x 64 x i8> @llvm.vp.sub.nxv64i8(<vscale x 64 x i8>, <vscale x 64 x i8>, <vscale x 64 x i1>, i32)
+
+define <vscale x 64 x i8> @vrsub_vx_nxv64i8(<vscale x 64 x i8> %va, i8 %b, <vscale x 64 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vx_nxv64i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,m8,ta,mu
+; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 64 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> undef, <vscale x 64 x i32> zeroinitializer
+ %v = call <vscale x 64 x i8> @llvm.vp.sub.nxv64i8(<vscale x 64 x i8> %vb, <vscale x 64 x i8> %va, <vscale x 64 x i1> %m, i32 %evl)
+ ret <vscale x 64 x i8> %v
+}
+
+define <vscale x 64 x i8> @vrsub_vx_nxv64i8_unmasked(<vscale x 64 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vx_nxv64i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,m8,ta,mu
+; CHECK-NEXT: vrsub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 64 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> undef, <vscale x 64 x i32> zeroinitializer
+ %head = insertelement <vscale x 64 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 64 x i1> %head, <vscale x 64 x i1> undef, <vscale x 64 x i32> zeroinitializer
+ %v = call <vscale x 64 x i8> @llvm.vp.sub.nxv64i8(<vscale x 64 x i8> %vb, <vscale x 64 x i8> %va, <vscale x 64 x i1> %m, i32 %evl)
+ ret <vscale x 64 x i8> %v
+}
+
+define <vscale x 64 x i8> @vrsub_vi_nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_nxv64i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m8,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 64 x i8> undef, i8 2, i32 0
+ %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> undef, <vscale x 64 x i32> zeroinitializer
+ %v = call <vscale x 64 x i8> @llvm.vp.sub.nxv64i8(<vscale x 64 x i8> %vb, <vscale x 64 x i8> %va, <vscale x 64 x i1> %m, i32 %evl)
+ ret <vscale x 64 x i8> %v
+}
+
+define <vscale x 64 x i8> @vrsub_vi_nxv64i8_unmasked(<vscale x 64 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_nxv64i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m8,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 64 x i8> undef, i8 2, i32 0
+ %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> undef, <vscale x 64 x i32> zeroinitializer
+ %head = insertelement <vscale x 64 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 64 x i1> %head, <vscale x 64 x i1> undef, <vscale x 64 x i32> zeroinitializer
+ %v = call <vscale x 64 x i8> @llvm.vp.sub.nxv64i8(<vscale x 64 x i8> %vb, <vscale x 64 x i8> %va, <vscale x 64 x i1> %m, i32 %evl)
+ ret <vscale x 64 x i8> %v
+}
+
+declare <vscale x 1 x i16> @llvm.vp.sub.nxv1i16(<vscale x 1 x i16>, <vscale x 1 x i16>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x i16> @vrsub_vx_nxv1i16(<vscale x 1 x i16> %va, i16 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vx_nxv1i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,mf4,ta,mu
+; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> undef, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i16> @llvm.vp.sub.nxv1i16(<vscale x 1 x i16> %vb, <vscale x 1 x i16> %va, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i16> %v
+}
+
+define <vscale x 1 x i16> @vrsub_vx_nxv1i16_unmasked(<vscale x 1 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vx_nxv1i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,mf4,ta,mu
+; CHECK-NEXT: vrsub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> undef, <vscale x 1 x i32> zeroinitializer
+ %head = insertelement <vscale x 1 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> undef, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i16> @llvm.vp.sub.nxv1i16(<vscale x 1 x i16> %vb, <vscale x 1 x i16> %va, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i16> %v
+}
+
+define <vscale x 1 x i16> @vrsub_vi_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_nxv1i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i16> undef, i16 2, i32 0
+ %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> undef, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i16> @llvm.vp.sub.nxv1i16(<vscale x 1 x i16> %vb, <vscale x 1 x i16> %va, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i16> %v
+}
+
+define <vscale x 1 x i16> @vrsub_vi_nxv1i16_unmasked(<vscale x 1 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_nxv1i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i16> undef, i16 2, i32 0
+ %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> undef, <vscale x 1 x i32> zeroinitializer
+ %head = insertelement <vscale x 1 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> undef, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i16> @llvm.vp.sub.nxv1i16(<vscale x 1 x i16> %vb, <vscale x 1 x i16> %va, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i16> %v
+}
+
+declare <vscale x 2 x i16> @llvm.vp.sub.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i16>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x i16> @vrsub_vx_nxv2i16(<vscale x 2 x i16> %va, i16 %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vx_nxv2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,mf2,ta,mu
+; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> undef, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i16> @llvm.vp.sub.nxv2i16(<vscale x 2 x i16> %vb, <vscale x 2 x i16> %va, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i16> %v
+}
+
+define <vscale x 2 x i16> @vrsub_vx_nxv2i16_unmasked(<vscale x 2 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vx_nxv2i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,mf2,ta,mu
+; CHECK-NEXT: vrsub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> undef, <vscale x 2 x i32> zeroinitializer
+ %head = insertelement <vscale x 2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i16> @llvm.vp.sub.nxv2i16(<vscale x 2 x i16> %vb, <vscale x 2 x i16> %va, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i16> %v
+}
+
+define <vscale x 2 x i16> @vrsub_vi_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_nxv2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i16> undef, i16 2, i32 0
+ %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> undef, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i16> @llvm.vp.sub.nxv2i16(<vscale x 2 x i16> %vb, <vscale x 2 x i16> %va, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i16> %v
+}
+
+define <vscale x 2 x i16> @vrsub_vi_nxv2i16_unmasked(<vscale x 2 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_nxv2i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i16> undef, i16 2, i32 0
+ %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> undef, <vscale x 2 x i32> zeroinitializer
+ %head = insertelement <vscale x 2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i16> @llvm.vp.sub.nxv2i16(<vscale x 2 x i16> %vb, <vscale x 2 x i16> %va, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i16> %v
+}
+
+declare <vscale x 4 x i16> @llvm.vp.sub.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i16>, <vscale x 4 x i1>, i32)
+
+define <vscale x 4 x i16> @vrsub_vx_nxv4i16(<vscale x 4 x i16> %va, i16 %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vx_nxv4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu
+; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> undef, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i16> @llvm.vp.sub.nxv4i16(<vscale x 4 x i16> %vb, <vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 4 x i16> @vrsub_vx_nxv4i16_unmasked(<vscale x 4 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vx_nxv4i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu
+; CHECK-NEXT: vrsub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> undef, <vscale x 4 x i32> zeroinitializer
+ %head = insertelement <vscale x 4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i16> @llvm.vp.sub.nxv4i16(<vscale x 4 x i16> %vb, <vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 4 x i16> @vrsub_vi_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_nxv4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i16> undef, i16 2, i32 0
+ %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> undef, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i16> @llvm.vp.sub.nxv4i16(<vscale x 4 x i16> %vb, <vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 4 x i16> @vrsub_vi_nxv4i16_unmasked(<vscale x 4 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_nxv4i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i16> undef, i16 2, i32 0
+ %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> undef, <vscale x 4 x i32> zeroinitializer
+ %head = insertelement <vscale x 4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i16> @llvm.vp.sub.nxv4i16(<vscale x 4 x i16> %vb, <vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i16> %v
+}
+
+declare <vscale x 8 x i16> @llvm.vp.sub.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x i16> @vrsub_vx_nxv8i16(<vscale x 8 x i16> %va, i16 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vx_nxv8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,m2,ta,mu
+; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i16> @llvm.vp.sub.nxv8i16(<vscale x 8 x i16> %vb, <vscale x 8 x i16> %va, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i16> %v
+}
+
+define <vscale x 8 x i16> @vrsub_vx_nxv8i16_unmasked(<vscale x 8 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vx_nxv8i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,m2,ta,mu
+; CHECK-NEXT: vrsub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
+ %head = insertelement <vscale x 8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i16> @llvm.vp.sub.nxv8i16(<vscale x 8 x i16> %vb, <vscale x 8 x i16> %va, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i16> %v
+}
+
+define <vscale x 8 x i16> @vrsub_vi_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_nxv8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i16> undef, i16 2, i32 0
+ %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i16> @llvm.vp.sub.nxv8i16(<vscale x 8 x i16> %vb, <vscale x 8 x i16> %va, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i16> %v
+}
+
+define <vscale x 8 x i16> @vrsub_vi_nxv8i16_unmasked(<vscale x 8 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_nxv8i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i16> undef, i16 2, i32 0
+ %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
+ %head = insertelement <vscale x 8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i16> @llvm.vp.sub.nxv8i16(<vscale x 8 x i16> %vb, <vscale x 8 x i16> %va, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i16> %v
+}
+
+declare <vscale x 16 x i16> @llvm.vp.sub.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i16>, <vscale x 16 x i1>, i32)
+
+define <vscale x 16 x i16> @vrsub_vx_nxv16i16(<vscale x 16 x i16> %va, i16 %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vx_nxv16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,m4,ta,mu
+; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> undef, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i16> @llvm.vp.sub.nxv16i16(<vscale x 16 x i16> %vb, <vscale x 16 x i16> %va, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i16> %v
+}
+
+define <vscale x 16 x i16> @vrsub_vx_nxv16i16_unmasked(<vscale x 16 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vx_nxv16i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,m4,ta,mu
+; CHECK-NEXT: vrsub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> undef, <vscale x 16 x i32> zeroinitializer
+ %head = insertelement <vscale x 16 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> undef, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i16> @llvm.vp.sub.nxv16i16(<vscale x 16 x i16> %vb, <vscale x 16 x i16> %va, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i16> %v
+}
+
+define <vscale x 16 x i16> @vrsub_vi_nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_nxv16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m4,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i16> undef, i16 2, i32 0
+ %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> undef, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i16> @llvm.vp.sub.nxv16i16(<vscale x 16 x i16> %vb, <vscale x 16 x i16> %va, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i16> %v
+}
+
+define <vscale x 16 x i16> @vrsub_vi_nxv16i16_unmasked(<vscale x 16 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_nxv16i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m4,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i16> undef, i16 2, i32 0
+ %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> undef, <vscale x 16 x i32> zeroinitializer
+ %head = insertelement <vscale x 16 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> undef, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i16> @llvm.vp.sub.nxv16i16(<vscale x 16 x i16> %vb, <vscale x 16 x i16> %va, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i16> %v
+}
+
+declare <vscale x 32 x i16> @llvm.vp.sub.nxv32i16(<vscale x 32 x i16>, <vscale x 32 x i16>, <vscale x 32 x i1>, i32)
+
+define <vscale x 32 x i16> @vrsub_vx_nxv32i16(<vscale x 32 x i16> %va, i16 %b, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vx_nxv32i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,m8,ta,mu
+; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> undef, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i16> @llvm.vp.sub.nxv32i16(<vscale x 32 x i16> %vb, <vscale x 32 x i16> %va, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i16> %v
+}
+
+define <vscale x 32 x i16> @vrsub_vx_nxv32i16_unmasked(<vscale x 32 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vx_nxv32i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,m8,ta,mu
+; CHECK-NEXT: vrsub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> undef, <vscale x 32 x i32> zeroinitializer
+ %head = insertelement <vscale x 32 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> undef, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i16> @llvm.vp.sub.nxv32i16(<vscale x 32 x i16> %vb, <vscale x 32 x i16> %va, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i16> %v
+}
+
+define <vscale x 32 x i16> @vrsub_vi_nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_nxv32i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m8,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x i16> undef, i16 2, i32 0
+ %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> undef, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i16> @llvm.vp.sub.nxv32i16(<vscale x 32 x i16> %vb, <vscale x 32 x i16> %va, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i16> %v
+}
+
+define <vscale x 32 x i16> @vrsub_vi_nxv32i16_unmasked(<vscale x 32 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_nxv32i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m8,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x i16> undef, i16 2, i32 0
+ %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> undef, <vscale x 32 x i32> zeroinitializer
+ %head = insertelement <vscale x 32 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> undef, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i16> @llvm.vp.sub.nxv32i16(<vscale x 32 x i16> %vb, <vscale x 32 x i16> %va, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i16> %v
+}
+
+declare <vscale x 1 x i32> @llvm.vp.sub.nxv1i32(<vscale x 1 x i32>, <vscale x 1 x i32>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x i32> @vrsub_vx_nxv1i32(<vscale x 1 x i32> %va, i32 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vx_nxv1i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e32,mf2,ta,mu
+; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i32> undef, i32 %b, i32 0
+ %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> undef, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i32> @llvm.vp.sub.nxv1i32(<vscale x 1 x i32> %vb, <vscale x 1 x i32> %va, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i32> %v
+}
+
+define <vscale x 1 x i32> @vrsub_vx_nxv1i32_unmasked(<vscale x 1 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vx_nxv1i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e32,mf2,ta,mu
+; CHECK-NEXT: vrsub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i32> undef, i32 %b, i32 0
+ %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> undef, <vscale x 1 x i32> zeroinitializer
+ %head = insertelement <vscale x 1 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> undef, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i32> @llvm.vp.sub.nxv1i32(<vscale x 1 x i32> %vb, <vscale x 1 x i32> %va, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i32> %v
+}
+
+define <vscale x 1 x i32> @vrsub_vi_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_nxv1i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i32> undef, i32 2, i32 0
+ %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> undef, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i32> @llvm.vp.sub.nxv1i32(<vscale x 1 x i32> %vb, <vscale x 1 x i32> %va, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i32> %v
+}
+
+define <vscale x 1 x i32> @vrsub_vi_nxv1i32_unmasked(<vscale x 1 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_nxv1i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i32> undef, i32 2, i32 0
+ %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> undef, <vscale x 1 x i32> zeroinitializer
+ %head = insertelement <vscale x 1 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> undef, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i32> @llvm.vp.sub.nxv1i32(<vscale x 1 x i32> %vb, <vscale x 1 x i32> %va, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i32> %v
+}
+
+declare <vscale x 2 x i32> @llvm.vp.sub.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x i32> @vrsub_vx_nxv2i32(<vscale x 2 x i32> %va, i32 %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vx_nxv2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu
+; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i32> undef, i32 %b, i32 0
+ %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> undef, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i32> @llvm.vp.sub.nxv2i32(<vscale x 2 x i32> %vb, <vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 2 x i32> @vrsub_vx_nxv2i32_unmasked(<vscale x 2 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vx_nxv2i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu
+; CHECK-NEXT: vrsub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i32> undef, i32 %b, i32 0
+ %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> undef, <vscale x 2 x i32> zeroinitializer
+ %head = insertelement <vscale x 2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i32> @llvm.vp.sub.nxv2i32(<vscale x 2 x i32> %vb, <vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 2 x i32> @vrsub_vi_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_nxv2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i32> undef, i32 2, i32 0
+ %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> undef, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i32> @llvm.vp.sub.nxv2i32(<vscale x 2 x i32> %vb, <vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 2 x i32> @vrsub_vi_nxv2i32_unmasked(<vscale x 2 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_nxv2i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i32> undef, i32 2, i32 0
+ %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> undef, <vscale x 2 x i32> zeroinitializer
+ %head = insertelement <vscale x 2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i32> @llvm.vp.sub.nxv2i32(<vscale x 2 x i32> %vb, <vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i32> %v
+}
+
+declare <vscale x 4 x i32> @llvm.vp.sub.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
+
+define <vscale x 4 x i32> @vrsub_vx_nxv4i32(<vscale x 4 x i32> %va, i32 %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vx_nxv4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e32,m2,ta,mu
+; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i32> undef, i32 %b, i32 0
+ %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i32> @llvm.vp.sub.nxv4i32(<vscale x 4 x i32> %vb, <vscale x 4 x i32> %va, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i32> %v
+}
+
+define <vscale x 4 x i32> @vrsub_vx_nxv4i32_unmasked(<vscale x 4 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vx_nxv4i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e32,m2,ta,mu
+; CHECK-NEXT: vrsub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i32> undef, i32 %b, i32 0
+ %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
+ %head = insertelement <vscale x 4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i32> @llvm.vp.sub.nxv4i32(<vscale x 4 x i32> %vb, <vscale x 4 x i32> %va, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i32> %v
+}
+
+define <vscale x 4 x i32> @vrsub_vi_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_nxv4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i32> undef, i32 2, i32 0
+ %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i32> @llvm.vp.sub.nxv4i32(<vscale x 4 x i32> %vb, <vscale x 4 x i32> %va, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i32> %v
+}
+
+define <vscale x 4 x i32> @vrsub_vi_nxv4i32_unmasked(<vscale x 4 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_nxv4i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i32> undef, i32 2, i32 0
+ %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
+ %head = insertelement <vscale x 4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i32> @llvm.vp.sub.nxv4i32(<vscale x 4 x i32> %vb, <vscale x 4 x i32> %va, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i32> %v
+}
+
+declare <vscale x 8 x i32> @llvm.vp.sub.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i32>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x i32> @vrsub_vx_nxv8i32(<vscale x 8 x i32> %va, i32 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vx_nxv8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e32,m4,ta,mu
+; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i32> undef, i32 %b, i32 0
+ %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> undef, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i32> @llvm.vp.sub.nxv8i32(<vscale x 8 x i32> %vb, <vscale x 8 x i32> %va, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @vrsub_vx_nxv8i32_unmasked(<vscale x 8 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vx_nxv8i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e32,m4,ta,mu
+; CHECK-NEXT: vrsub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i32> undef, i32 %b, i32 0
+ %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> undef, <vscale x 8 x i32> zeroinitializer
+ %head = insertelement <vscale x 8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i32> @llvm.vp.sub.nxv8i32(<vscale x 8 x i32> %vb, <vscale x 8 x i32> %va, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @vrsub_vi_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_nxv8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m4,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i32> undef, i32 2, i32 0
+ %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> undef, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i32> @llvm.vp.sub.nxv8i32(<vscale x 8 x i32> %vb, <vscale x 8 x i32> %va, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @vrsub_vi_nxv8i32_unmasked(<vscale x 8 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_nxv8i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m4,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i32> undef, i32 2, i32 0
+ %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> undef, <vscale x 8 x i32> zeroinitializer
+ %head = insertelement <vscale x 8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i32> @llvm.vp.sub.nxv8i32(<vscale x 8 x i32> %vb, <vscale x 8 x i32> %va, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i32> %v
+}
+
+declare <vscale x 16 x i32> @llvm.vp.sub.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i32>, <vscale x 16 x i1>, i32)
+
+define <vscale x 16 x i32> @vrsub_vx_nxv16i32(<vscale x 16 x i32> %va, i32 %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vx_nxv16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e32,m8,ta,mu
+; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i32> undef, i32 %b, i32 0
+ %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> undef, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i32> @llvm.vp.sub.nxv16i32(<vscale x 16 x i32> %vb, <vscale x 16 x i32> %va, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @vrsub_vx_nxv16i32_unmasked(<vscale x 16 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vx_nxv16i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e32,m8,ta,mu
+; CHECK-NEXT: vrsub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i32> undef, i32 %b, i32 0
+ %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> undef, <vscale x 16 x i32> zeroinitializer
+ %head = insertelement <vscale x 16 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> undef, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i32> @llvm.vp.sub.nxv16i32(<vscale x 16 x i32> %vb, <vscale x 16 x i32> %va, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @vrsub_vi_nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_nxv16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m8,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i32> undef, i32 2, i32 0
+ %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> undef, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i32> @llvm.vp.sub.nxv16i32(<vscale x 16 x i32> %vb, <vscale x 16 x i32> %va, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @vrsub_vi_nxv16i32_unmasked(<vscale x 16 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_nxv16i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m8,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i32> undef, i32 2, i32 0
+ %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> undef, <vscale x 16 x i32> zeroinitializer
+ %head = insertelement <vscale x 16 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> undef, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i32> @llvm.vp.sub.nxv16i32(<vscale x 16 x i32> %vb, <vscale x 16 x i32> %va, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i32> %v
+}
+
+declare <vscale x 1 x i64> @llvm.vp.sub.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x i64> @vrsub_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vrsub_vx_nxv1i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v25, (a0), zero
+; RV32-NEXT: vsetvli a0, a2, e64,m1,ta,mu
+; RV32-NEXT: vsub.vv v8, v25, v8, v0.t
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vrsub_vx_nxv1i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a1, a1, e64,m1,ta,mu
+; RV64-NEXT: vrsub.vx v8, v8, a0, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i64> undef, i64 %b, i32 0
+ %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> undef, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i64> @llvm.vp.sub.nxv1i64(<vscale x 1 x i64> %vb, <vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 1 x i64> @vrsub_vx_nxv1i64_unmasked(<vscale x 1 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vrsub_vx_nxv1i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v25, (a0), zero
+; RV32-NEXT: vsetvli a0, a2, e64,m1,ta,mu
+; RV32-NEXT: vsub.vv v8, v25, v8
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vrsub_vx_nxv1i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a1, a1, e64,m1,ta,mu
+; RV64-NEXT: vrsub.vx v8, v8, a0
+; RV64-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i64> undef, i64 %b, i32 0
+ %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> undef, <vscale x 1 x i32> zeroinitializer
+ %head = insertelement <vscale x 1 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> undef, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i64> @llvm.vp.sub.nxv1i64(<vscale x 1 x i64> %vb, <vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 1 x i64> @vrsub_vi_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_nxv1i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i64> undef, i64 2, i32 0
+ %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> undef, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i64> @llvm.vp.sub.nxv1i64(<vscale x 1 x i64> %vb, <vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 1 x i64> @vrsub_vi_nxv1i64_unmasked(<vscale x 1 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_nxv1i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i64> undef, i64 2, i32 0
+ %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> undef, <vscale x 1 x i32> zeroinitializer
+ %head = insertelement <vscale x 1 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> undef, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i64> @llvm.vp.sub.nxv1i64(<vscale x 1 x i64> %vb, <vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
+
+declare <vscale x 2 x i64> @llvm.vp.sub.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x i64> @vrsub_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vrsub_vx_nxv2i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetvli a0, zero, e64,m2,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v26, (a0), zero
+; RV32-NEXT: vsetvli a0, a2, e64,m2,ta,mu
+; RV32-NEXT: vsub.vv v8, v26, v8, v0.t
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vrsub_vx_nxv2i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a1, a1, e64,m2,ta,mu
+; RV64-NEXT: vrsub.vx v8, v8, a0, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i64> undef, i64 %b, i32 0
+ %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i64> @llvm.vp.sub.nxv2i64(<vscale x 2 x i64> %vb, <vscale x 2 x i64> %va, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i64> %v
+}
+
+define <vscale x 2 x i64> @vrsub_vx_nxv2i64_unmasked(<vscale x 2 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vrsub_vx_nxv2i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetvli a0, zero, e64,m2,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v26, (a0), zero
+; RV32-NEXT: vsetvli a0, a2, e64,m2,ta,mu
+; RV32-NEXT: vsub.vv v8, v26, v8
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vrsub_vx_nxv2i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a1, a1, e64,m2,ta,mu
+; RV64-NEXT: vrsub.vx v8, v8, a0
+; RV64-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i64> undef, i64 %b, i32 0
+ %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
+ %head = insertelement <vscale x 2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i64> @llvm.vp.sub.nxv2i64(<vscale x 2 x i64> %vb, <vscale x 2 x i64> %va, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i64> %v
+}
+
+define <vscale x 2 x i64> @vrsub_vi_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_nxv2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i64> undef, i64 2, i32 0
+ %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i64> @llvm.vp.sub.nxv2i64(<vscale x 2 x i64> %vb, <vscale x 2 x i64> %va, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i64> %v
+}
+
+define <vscale x 2 x i64> @vrsub_vi_nxv2i64_unmasked(<vscale x 2 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_nxv2i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i64> undef, i64 2, i32 0
+ %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
+ %head = insertelement <vscale x 2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i64> @llvm.vp.sub.nxv2i64(<vscale x 2 x i64> %vb, <vscale x 2 x i64> %va, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i64> %v
+}
+
+declare <vscale x 4 x i64> @llvm.vp.sub.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i64>, <vscale x 4 x i1>, i32)
+
+define <vscale x 4 x i64> @vrsub_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vrsub_vx_nxv4i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetvli a0, zero, e64,m4,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v28, (a0), zero
+; RV32-NEXT: vsetvli a0, a2, e64,m4,ta,mu
+; RV32-NEXT: vsub.vv v8, v28, v8, v0.t
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vrsub_vx_nxv4i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a1, a1, e64,m4,ta,mu
+; RV64-NEXT: vrsub.vx v8, v8, a0, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i64> undef, i64 %b, i32 0
+ %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i64> @llvm.vp.sub.nxv4i64(<vscale x 4 x i64> %vb, <vscale x 4 x i64> %va, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i64> %v
+}
+
+define <vscale x 4 x i64> @vrsub_vx_nxv4i64_unmasked(<vscale x 4 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vrsub_vx_nxv4i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetvli a0, zero, e64,m4,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v28, (a0), zero
+; RV32-NEXT: vsetvli a0, a2, e64,m4,ta,mu
+; RV32-NEXT: vsub.vv v8, v28, v8
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vrsub_vx_nxv4i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a1, a1, e64,m4,ta,mu
+; RV64-NEXT: vrsub.vx v8, v8, a0
+; RV64-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i64> undef, i64 %b, i32 0
+ %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer
+ %head = insertelement <vscale x 4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i64> @llvm.vp.sub.nxv4i64(<vscale x 4 x i64> %vb, <vscale x 4 x i64> %va, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i64> %v
+}
+
+define <vscale x 4 x i64> @vrsub_vi_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_nxv4i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m4,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i64> undef, i64 2, i32 0
+ %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i64> @llvm.vp.sub.nxv4i64(<vscale x 4 x i64> %vb, <vscale x 4 x i64> %va, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i64> %v
+}
+
+define <vscale x 4 x i64> @vrsub_vi_nxv4i64_unmasked(<vscale x 4 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_nxv4i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m4,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i64> undef, i64 2, i32 0
+ %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer
+ %head = insertelement <vscale x 4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i64> @llvm.vp.sub.nxv4i64(<vscale x 4 x i64> %vb, <vscale x 4 x i64> %va, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i64> %v
+}
+
+declare <vscale x 8 x i64> @llvm.vp.sub.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i64>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x i64> @vrsub_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vrsub_vx_nxv8i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetvli a0, zero, e64,m8,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v16, (a0), zero
+; RV32-NEXT: vsetvli a0, a2, e64,m8,ta,mu
+; RV32-NEXT: vsub.vv v8, v16, v8, v0.t
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vrsub_vx_nxv8i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a1, a1, e64,m8,ta,mu
+; RV64-NEXT: vrsub.vx v8, v8, a0, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i64> undef, i64 %b, i32 0
+ %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> undef, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i64> @llvm.vp.sub.nxv8i64(<vscale x 8 x i64> %vb, <vscale x 8 x i64> %va, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vrsub_vx_nxv8i64_unmasked(<vscale x 8 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vrsub_vx_nxv8i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetvli a0, zero, e64,m8,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v16, (a0), zero
+; RV32-NEXT: vsetvli a0, a2, e64,m8,ta,mu
+; RV32-NEXT: vsub.vv v8, v16, v8
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vrsub_vx_nxv8i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a1, a1, e64,m8,ta,mu
+; RV64-NEXT: vrsub.vx v8, v8, a0
+; RV64-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i64> undef, i64 %b, i32 0
+ %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> undef, <vscale x 8 x i32> zeroinitializer
+ %head = insertelement <vscale x 8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i64> @llvm.vp.sub.nxv8i64(<vscale x 8 x i64> %vb, <vscale x 8 x i64> %va, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vrsub_vi_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_nxv8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m8,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i64> undef, i64 2, i32 0
+ %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> undef, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i64> @llvm.vp.sub.nxv8i64(<vscale x 8 x i64> %vb, <vscale x 8 x i64> %va, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vrsub_vi_nxv8i64_unmasked(<vscale x 8 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vrsub_vi_nxv8i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m8,ta,mu
+; CHECK-NEXT: vrsub.vi v8, v8, 2
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i64> undef, i64 2, i32 0
+ %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> undef, <vscale x 8 x i32> zeroinitializer
+ %head = insertelement <vscale x 8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i64> @llvm.vp.sub.nxv8i64(<vscale x 8 x i64> %vb, <vscale x 8 x i64> %va, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i64> %v
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsra-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vsra-vp.ll
new file mode 100644
index 0000000000000..6ce734adc24ab
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vsra-vp.ll
@@ -0,0 +1,1789 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s \
+; RUN: | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s \
+; RUN: | FileCheck %s --check-prefixes=CHECK,RV64
+
+declare <vscale x 1 x i8> @llvm.vp.ashr.nxv1i8(<vscale x 1 x i8>, <vscale x 1 x i8>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x i8> @vsra_vv_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_nxv1i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,mf8,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x i8> @llvm.vp.ashr.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %b, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i8> %v
+}
+
+define <vscale x 1 x i8> @vsra_vv_nxv1i8_unmasked(<vscale x 1 x i8> %va, <vscale x 1 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_nxv1i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,mf8,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 1 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> undef, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i8> @llvm.vp.ashr.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %b, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i8> %v
+}
+
+define <vscale x 1 x i8> @vsra_vx_nxv1i8(<vscale x 1 x i8> %va, i8 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vx_nxv1i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,mf8,ta,mu
+; CHECK-NEXT: vsra.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> undef, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i8> @llvm.vp.ashr.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i8> %v
+}
+
+define <vscale x 1 x i8> @vsra_vx_nxv1i8_unmasked(<vscale x 1 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vx_nxv1i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,mf8,ta,mu
+; CHECK-NEXT: vsra.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> undef, <vscale x 1 x i32> zeroinitializer
+ %head = insertelement <vscale x 1 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> undef, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i8> @llvm.vp.ashr.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i8> %v
+}
+
+define <vscale x 1 x i8> @vsra_vi_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_nxv1i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,mf8,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i8> undef, i8 5, i32 0
+ %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> undef, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i8> @llvm.vp.ashr.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i8> %v
+}
+
+define <vscale x 1 x i8> @vsra_vi_nxv1i8_unmasked(<vscale x 1 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_nxv1i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,mf8,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i8> undef, i8 5, i32 0
+ %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> undef, <vscale x 1 x i32> zeroinitializer
+ %head = insertelement <vscale x 1 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> undef, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i8> @llvm.vp.ashr.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i8> %v
+}
+
+declare <vscale x 2 x i8> @llvm.vp.ashr.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x i8> @vsra_vv_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_nxv2i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,mf4,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x i8> @llvm.vp.ashr.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %b, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i8> %v
+}
+
+define <vscale x 2 x i8> @vsra_vv_nxv2i8_unmasked(<vscale x 2 x i8> %va, <vscale x 2 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_nxv2i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,mf4,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i8> @llvm.vp.ashr.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %b, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i8> %v
+}
+
+define <vscale x 2 x i8> @vsra_vx_nxv2i8(<vscale x 2 x i8> %va, i8 %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vx_nxv2i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,mf4,ta,mu
+; CHECK-NEXT: vsra.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> undef, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i8> @llvm.vp.ashr.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i8> %v
+}
+
+define <vscale x 2 x i8> @vsra_vx_nxv2i8_unmasked(<vscale x 2 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vx_nxv2i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,mf4,ta,mu
+; CHECK-NEXT: vsra.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> undef, <vscale x 2 x i32> zeroinitializer
+ %head = insertelement <vscale x 2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i8> @llvm.vp.ashr.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i8> %v
+}
+
+define <vscale x 2 x i8> @vsra_vi_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_nxv2i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,mf4,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i8> undef, i8 5, i32 0
+ %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> undef, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i8> @llvm.vp.ashr.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i8> %v
+}
+
+define <vscale x 2 x i8> @vsra_vi_nxv2i8_unmasked(<vscale x 2 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_nxv2i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,mf4,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i8> undef, i8 5, i32 0
+ %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> undef, <vscale x 2 x i32> zeroinitializer
+ %head = insertelement <vscale x 2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i8> @llvm.vp.ashr.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i8> %v
+}
+
+declare <vscale x 4 x i8> @llvm.vp.ashr.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i8>, <vscale x 4 x i1>, i32)
+
+define <vscale x 4 x i8> @vsra_vv_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_nxv4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,mf2,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x i8> @llvm.vp.ashr.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %b, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 4 x i8> @vsra_vv_nxv4i8_unmasked(<vscale x 4 x i8> %va, <vscale x 4 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_nxv4i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,mf2,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i8> @llvm.vp.ashr.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %b, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 4 x i8> @vsra_vx_nxv4i8(<vscale x 4 x i8> %va, i8 %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vx_nxv4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,mf2,ta,mu
+; CHECK-NEXT: vsra.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> undef, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i8> @llvm.vp.ashr.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 4 x i8> @vsra_vx_nxv4i8_unmasked(<vscale x 4 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vx_nxv4i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,mf2,ta,mu
+; CHECK-NEXT: vsra.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> undef, <vscale x 4 x i32> zeroinitializer
+ %head = insertelement <vscale x 4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i8> @llvm.vp.ashr.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 4 x i8> @vsra_vi_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_nxv4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,mf2,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i8> undef, i8 5, i32 0
+ %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> undef, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i8> @llvm.vp.ashr.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 4 x i8> @vsra_vi_nxv4i8_unmasked(<vscale x 4 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_nxv4i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,mf2,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i8> undef, i8 5, i32 0
+ %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> undef, <vscale x 4 x i32> zeroinitializer
+ %head = insertelement <vscale x 4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i8> @llvm.vp.ashr.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i8> %v
+}
+
+declare <vscale x 8 x i8> @llvm.vp.ashr.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x i8> @vsra_vv_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_nxv8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x i8> @llvm.vp.ashr.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %b, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 8 x i8> @vsra_vv_nxv8i8_unmasked(<vscale x 8 x i8> %va, <vscale x 8 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_nxv8i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i8> @llvm.vp.ashr.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %b, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 8 x i8> @vsra_vx_nxv8i8(<vscale x 8 x i8> %va, i8 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vx_nxv8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu
+; CHECK-NEXT: vsra.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> undef, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i8> @llvm.vp.ashr.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 8 x i8> @vsra_vx_nxv8i8_unmasked(<vscale x 8 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vx_nxv8i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu
+; CHECK-NEXT: vsra.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> undef, <vscale x 8 x i32> zeroinitializer
+ %head = insertelement <vscale x 8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i8> @llvm.vp.ashr.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 8 x i8> @vsra_vi_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_nxv8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i8> undef, i8 5, i32 0
+ %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> undef, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i8> @llvm.vp.ashr.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 8 x i8> @vsra_vi_nxv8i8_unmasked(<vscale x 8 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_nxv8i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i8> undef, i8 5, i32 0
+ %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> undef, <vscale x 8 x i32> zeroinitializer
+ %head = insertelement <vscale x 8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i8> @llvm.vp.ashr.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i8> %v
+}
+
+declare <vscale x 16 x i8> @llvm.vp.ashr.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, i32)
+
+define <vscale x 16 x i8> @vsra_vv_nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_nxv16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m2,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v10, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i8> @llvm.vp.ashr.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %b, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @vsra_vv_nxv16i8_unmasked(<vscale x 16 x i8> %va, <vscale x 16 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_nxv16i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m2,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v10
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 16 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> undef, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i8> @llvm.vp.ashr.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %b, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @vsra_vx_nxv16i8(<vscale x 16 x i8> %va, i8 %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vx_nxv16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,m2,ta,mu
+; CHECK-NEXT: vsra.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i8> @llvm.vp.ashr.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @vsra_vx_nxv16i8_unmasked(<vscale x 16 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vx_nxv16i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,m2,ta,mu
+; CHECK-NEXT: vsra.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
+ %head = insertelement <vscale x 16 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> undef, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i8> @llvm.vp.ashr.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @vsra_vi_nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_nxv16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m2,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i8> undef, i8 5, i32 0
+ %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i8> @llvm.vp.ashr.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @vsra_vi_nxv16i8_unmasked(<vscale x 16 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_nxv16i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m2,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i8> undef, i8 5, i32 0
+ %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
+ %head = insertelement <vscale x 16 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> undef, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i8> @llvm.vp.ashr.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i8> %v
+}
+
+declare <vscale x 32 x i8> @llvm.vp.ashr.nxv32i8(<vscale x 32 x i8>, <vscale x 32 x i8>, <vscale x 32 x i1>, i32)
+
+define <vscale x 32 x i8> @vsra_vv_nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %b, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_nxv32i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m4,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v12, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 32 x i8> @llvm.vp.ashr.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %b, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i8> %v
+}
+
+define <vscale x 32 x i8> @vsra_vv_nxv32i8_unmasked(<vscale x 32 x i8> %va, <vscale x 32 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_nxv32i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m4,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v12
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 32 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> undef, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i8> @llvm.vp.ashr.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %b, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i8> %v
+}
+
+define <vscale x 32 x i8> @vsra_vx_nxv32i8(<vscale x 32 x i8> %va, i8 %b, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vx_nxv32i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,m4,ta,mu
+; CHECK-NEXT: vsra.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> undef, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i8> @llvm.vp.ashr.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %vb, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i8> %v
+}
+
+define <vscale x 32 x i8> @vsra_vx_nxv32i8_unmasked(<vscale x 32 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vx_nxv32i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,m4,ta,mu
+; CHECK-NEXT: vsra.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> undef, <vscale x 32 x i32> zeroinitializer
+ %head = insertelement <vscale x 32 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> undef, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i8> @llvm.vp.ashr.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %vb, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i8> %v
+}
+
+define <vscale x 32 x i8> @vsra_vi_nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_nxv32i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m4,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x i8> undef, i8 5, i32 0
+ %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> undef, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i8> @llvm.vp.ashr.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %vb, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i8> %v
+}
+
+define <vscale x 32 x i8> @vsra_vi_nxv32i8_unmasked(<vscale x 32 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_nxv32i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m4,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x i8> undef, i8 5, i32 0
+ %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> undef, <vscale x 32 x i32> zeroinitializer
+ %head = insertelement <vscale x 32 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> undef, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i8> @llvm.vp.ashr.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %vb, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i8> %v
+}
+
+declare <vscale x 64 x i8> @llvm.vp.ashr.nxv64i8(<vscale x 64 x i8>, <vscale x 64 x i8>, <vscale x 64 x i1>, i32)
+
+define <vscale x 64 x i8> @vsra_vv_nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %b, <vscale x 64 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_nxv64i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m8,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v16, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 64 x i8> @llvm.vp.ashr.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %b, <vscale x 64 x i1> %m, i32 %evl)
+ ret <vscale x 64 x i8> %v
+}
+
+define <vscale x 64 x i8> @vsra_vv_nxv64i8_unmasked(<vscale x 64 x i8> %va, <vscale x 64 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_nxv64i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m8,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v16
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 64 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 64 x i1> %head, <vscale x 64 x i1> undef, <vscale x 64 x i32> zeroinitializer
+ %v = call <vscale x 64 x i8> @llvm.vp.ashr.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %b, <vscale x 64 x i1> %m, i32 %evl)
+ ret <vscale x 64 x i8> %v
+}
+
+define <vscale x 64 x i8> @vsra_vx_nxv64i8(<vscale x 64 x i8> %va, i8 %b, <vscale x 64 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vx_nxv64i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,m8,ta,mu
+; CHECK-NEXT: vsra.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 64 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> undef, <vscale x 64 x i32> zeroinitializer
+ %v = call <vscale x 64 x i8> @llvm.vp.ashr.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %vb, <vscale x 64 x i1> %m, i32 %evl)
+ ret <vscale x 64 x i8> %v
+}
+
+define <vscale x 64 x i8> @vsra_vx_nxv64i8_unmasked(<vscale x 64 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vx_nxv64i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,m8,ta,mu
+; CHECK-NEXT: vsra.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 64 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> undef, <vscale x 64 x i32> zeroinitializer
+ %head = insertelement <vscale x 64 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 64 x i1> %head, <vscale x 64 x i1> undef, <vscale x 64 x i32> zeroinitializer
+ %v = call <vscale x 64 x i8> @llvm.vp.ashr.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %vb, <vscale x 64 x i1> %m, i32 %evl)
+ ret <vscale x 64 x i8> %v
+}
+
+define <vscale x 64 x i8> @vsra_vi_nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_nxv64i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m8,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 64 x i8> undef, i8 5, i32 0
+ %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> undef, <vscale x 64 x i32> zeroinitializer
+ %v = call <vscale x 64 x i8> @llvm.vp.ashr.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %vb, <vscale x 64 x i1> %m, i32 %evl)
+ ret <vscale x 64 x i8> %v
+}
+
+define <vscale x 64 x i8> @vsra_vi_nxv64i8_unmasked(<vscale x 64 x i8> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_nxv64i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m8,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 64 x i8> undef, i8 5, i32 0
+ %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> undef, <vscale x 64 x i32> zeroinitializer
+ %head = insertelement <vscale x 64 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 64 x i1> %head, <vscale x 64 x i1> undef, <vscale x 64 x i32> zeroinitializer
+ %v = call <vscale x 64 x i8> @llvm.vp.ashr.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %vb, <vscale x 64 x i1> %m, i32 %evl)
+ ret <vscale x 64 x i8> %v
+}
+
+declare <vscale x 1 x i16> @llvm.vp.ashr.nxv1i16(<vscale x 1 x i16>, <vscale x 1 x i16>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x i16> @vsra_vv_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_nxv1i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x i16> @llvm.vp.ashr.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %b, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i16> %v
+}
+
+define <vscale x 1 x i16> @vsra_vv_nxv1i16_unmasked(<vscale x 1 x i16> %va, <vscale x 1 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_nxv1i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 1 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> undef, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i16> @llvm.vp.ashr.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %b, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i16> %v
+}
+
+define <vscale x 1 x i16> @vsra_vx_nxv1i16(<vscale x 1 x i16> %va, i16 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vx_nxv1i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,mf4,ta,mu
+; CHECK-NEXT: vsra.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> undef, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i16> @llvm.vp.ashr.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i16> %v
+}
+
+define <vscale x 1 x i16> @vsra_vx_nxv1i16_unmasked(<vscale x 1 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vx_nxv1i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,mf4,ta,mu
+; CHECK-NEXT: vsra.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> undef, <vscale x 1 x i32> zeroinitializer
+ %head = insertelement <vscale x 1 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> undef, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i16> @llvm.vp.ashr.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i16> %v
+}
+
+define <vscale x 1 x i16> @vsra_vi_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_nxv1i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i16> undef, i16 5, i32 0
+ %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> undef, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i16> @llvm.vp.ashr.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i16> %v
+}
+
+define <vscale x 1 x i16> @vsra_vi_nxv1i16_unmasked(<vscale x 1 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_nxv1i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i16> undef, i16 5, i32 0
+ %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> undef, <vscale x 1 x i32> zeroinitializer
+ %head = insertelement <vscale x 1 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> undef, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i16> @llvm.vp.ashr.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i16> %v
+}
+
+declare <vscale x 2 x i16> @llvm.vp.ashr.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i16>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x i16> @vsra_vv_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_nxv2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x i16> @llvm.vp.ashr.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %b, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i16> %v
+}
+
+define <vscale x 2 x i16> @vsra_vv_nxv2i16_unmasked(<vscale x 2 x i16> %va, <vscale x 2 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_nxv2i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i16> @llvm.vp.ashr.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %b, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i16> %v
+}
+
+define <vscale x 2 x i16> @vsra_vx_nxv2i16(<vscale x 2 x i16> %va, i16 %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vx_nxv2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,mf2,ta,mu
+; CHECK-NEXT: vsra.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> undef, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i16> @llvm.vp.ashr.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i16> %v
+}
+
+define <vscale x 2 x i16> @vsra_vx_nxv2i16_unmasked(<vscale x 2 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vx_nxv2i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,mf2,ta,mu
+; CHECK-NEXT: vsra.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> undef, <vscale x 2 x i32> zeroinitializer
+ %head = insertelement <vscale x 2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i16> @llvm.vp.ashr.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i16> %v
+}
+
+define <vscale x 2 x i16> @vsra_vi_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_nxv2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i16> undef, i16 5, i32 0
+ %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> undef, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i16> @llvm.vp.ashr.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i16> %v
+}
+
+define <vscale x 2 x i16> @vsra_vi_nxv2i16_unmasked(<vscale x 2 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_nxv2i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i16> undef, i16 5, i32 0
+ %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> undef, <vscale x 2 x i32> zeroinitializer
+ %head = insertelement <vscale x 2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i16> @llvm.vp.ashr.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i16> %v
+}
+
+declare <vscale x 4 x i16> @llvm.vp.ashr.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i16>, <vscale x 4 x i1>, i32)
+
+define <vscale x 4 x i16> @vsra_vv_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_nxv4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x i16> @llvm.vp.ashr.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %b, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 4 x i16> @vsra_vv_nxv4i16_unmasked(<vscale x 4 x i16> %va, <vscale x 4 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_nxv4i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i16> @llvm.vp.ashr.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %b, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 4 x i16> @vsra_vx_nxv4i16(<vscale x 4 x i16> %va, i16 %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vx_nxv4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu
+; CHECK-NEXT: vsra.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> undef, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i16> @llvm.vp.ashr.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 4 x i16> @vsra_vx_nxv4i16_unmasked(<vscale x 4 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vx_nxv4i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu
+; CHECK-NEXT: vsra.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> undef, <vscale x 4 x i32> zeroinitializer
+ %head = insertelement <vscale x 4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i16> @llvm.vp.ashr.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 4 x i16> @vsra_vi_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_nxv4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i16> undef, i16 5, i32 0
+ %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> undef, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i16> @llvm.vp.ashr.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 4 x i16> @vsra_vi_nxv4i16_unmasked(<vscale x 4 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_nxv4i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i16> undef, i16 5, i32 0
+ %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> undef, <vscale x 4 x i32> zeroinitializer
+ %head = insertelement <vscale x 4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i16> @llvm.vp.ashr.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i16> %v
+}
+
+declare <vscale x 8 x i16> @llvm.vp.ashr.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x i16> @vsra_vv_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_nxv8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v10, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x i16> @llvm.vp.ashr.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %b, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i16> %v
+}
+
+define <vscale x 8 x i16> @vsra_vv_nxv8i16_unmasked(<vscale x 8 x i16> %va, <vscale x 8 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_nxv8i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v10
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i16> @llvm.vp.ashr.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %b, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i16> %v
+}
+
+define <vscale x 8 x i16> @vsra_vx_nxv8i16(<vscale x 8 x i16> %va, i16 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vx_nxv8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,m2,ta,mu
+; CHECK-NEXT: vsra.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i16> @llvm.vp.ashr.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i16> %v
+}
+
+define <vscale x 8 x i16> @vsra_vx_nxv8i16_unmasked(<vscale x 8 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vx_nxv8i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,m2,ta,mu
+; CHECK-NEXT: vsra.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
+ %head = insertelement <vscale x 8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i16> @llvm.vp.ashr.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i16> %v
+}
+
+define <vscale x 8 x i16> @vsra_vi_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_nxv8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i16> undef, i16 5, i32 0
+ %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i16> @llvm.vp.ashr.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i16> %v
+}
+
+define <vscale x 8 x i16> @vsra_vi_nxv8i16_unmasked(<vscale x 8 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_nxv8i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i16> undef, i16 5, i32 0
+ %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
+ %head = insertelement <vscale x 8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i16> @llvm.vp.ashr.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i16> %v
+}
+
+declare <vscale x 16 x i16> @llvm.vp.ashr.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i16>, <vscale x 16 x i1>, i32)
+
+define <vscale x 16 x i16> @vsra_vv_nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_nxv16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m4,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v12, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i16> @llvm.vp.ashr.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %b, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i16> %v
+}
+
+define <vscale x 16 x i16> @vsra_vv_nxv16i16_unmasked(<vscale x 16 x i16> %va, <vscale x 16 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_nxv16i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m4,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v12
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 16 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> undef, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i16> @llvm.vp.ashr.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %b, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i16> %v
+}
+
+define <vscale x 16 x i16> @vsra_vx_nxv16i16(<vscale x 16 x i16> %va, i16 %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vx_nxv16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,m4,ta,mu
+; CHECK-NEXT: vsra.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> undef, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i16> @llvm.vp.ashr.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i16> %v
+}
+
+define <vscale x 16 x i16> @vsra_vx_nxv16i16_unmasked(<vscale x 16 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vx_nxv16i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,m4,ta,mu
+; CHECK-NEXT: vsra.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> undef, <vscale x 16 x i32> zeroinitializer
+ %head = insertelement <vscale x 16 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> undef, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i16> @llvm.vp.ashr.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i16> %v
+}
+
+define <vscale x 16 x i16> @vsra_vi_nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_nxv16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m4,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i16> undef, i16 5, i32 0
+ %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> undef, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i16> @llvm.vp.ashr.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i16> %v
+}
+
+define <vscale x 16 x i16> @vsra_vi_nxv16i16_unmasked(<vscale x 16 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_nxv16i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m4,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i16> undef, i16 5, i32 0
+ %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> undef, <vscale x 16 x i32> zeroinitializer
+ %head = insertelement <vscale x 16 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> undef, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i16> @llvm.vp.ashr.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i16> %v
+}
+
+declare <vscale x 32 x i16> @llvm.vp.ashr.nxv32i16(<vscale x 32 x i16>, <vscale x 32 x i16>, <vscale x 32 x i1>, i32)
+
+define <vscale x 32 x i16> @vsra_vv_nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %b, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_nxv32i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m8,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v16, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 32 x i16> @llvm.vp.ashr.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %b, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i16> %v
+}
+
+define <vscale x 32 x i16> @vsra_vv_nxv32i16_unmasked(<vscale x 32 x i16> %va, <vscale x 32 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_nxv32i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m8,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v16
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 32 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> undef, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i16> @llvm.vp.ashr.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %b, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i16> %v
+}
+
+define <vscale x 32 x i16> @vsra_vx_nxv32i16(<vscale x 32 x i16> %va, i16 %b, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vx_nxv32i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,m8,ta,mu
+; CHECK-NEXT: vsra.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> undef, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i16> @llvm.vp.ashr.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %vb, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i16> %v
+}
+
+define <vscale x 32 x i16> @vsra_vx_nxv32i16_unmasked(<vscale x 32 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vx_nxv32i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,m8,ta,mu
+; CHECK-NEXT: vsra.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> undef, <vscale x 32 x i32> zeroinitializer
+ %head = insertelement <vscale x 32 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> undef, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i16> @llvm.vp.ashr.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %vb, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i16> %v
+}
+
+define <vscale x 32 x i16> @vsra_vi_nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_nxv32i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m8,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x i16> undef, i16 5, i32 0
+ %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> undef, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i16> @llvm.vp.ashr.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %vb, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i16> %v
+}
+
+define <vscale x 32 x i16> @vsra_vi_nxv32i16_unmasked(<vscale x 32 x i16> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_nxv32i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m8,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x i16> undef, i16 5, i32 0
+ %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> undef, <vscale x 32 x i32> zeroinitializer
+ %head = insertelement <vscale x 32 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> undef, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i16> @llvm.vp.ashr.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %vb, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i16> %v
+}
+
+declare <vscale x 1 x i32> @llvm.vp.ashr.nxv1i32(<vscale x 1 x i32>, <vscale x 1 x i32>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x i32> @vsra_vv_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_nxv1i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x i32> @llvm.vp.ashr.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %b, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i32> %v
+}
+
+define <vscale x 1 x i32> @vsra_vv_nxv1i32_unmasked(<vscale x 1 x i32> %va, <vscale x 1 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_nxv1i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 1 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> undef, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i32> @llvm.vp.ashr.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %b, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i32> %v
+}
+
+define <vscale x 1 x i32> @vsra_vx_nxv1i32(<vscale x 1 x i32> %va, i32 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vx_nxv1i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e32,mf2,ta,mu
+; CHECK-NEXT: vsra.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i32> undef, i32 %b, i32 0
+ %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> undef, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i32> @llvm.vp.ashr.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i32> %v
+}
+
+define <vscale x 1 x i32> @vsra_vx_nxv1i32_unmasked(<vscale x 1 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vx_nxv1i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e32,mf2,ta,mu
+; CHECK-NEXT: vsra.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i32> undef, i32 %b, i32 0
+ %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> undef, <vscale x 1 x i32> zeroinitializer
+ %head = insertelement <vscale x 1 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> undef, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i32> @llvm.vp.ashr.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i32> %v
+}
+
+define <vscale x 1 x i32> @vsra_vi_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_nxv1i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i32> undef, i32 5, i32 0
+ %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> undef, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i32> @llvm.vp.ashr.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i32> %v
+}
+
+define <vscale x 1 x i32> @vsra_vi_nxv1i32_unmasked(<vscale x 1 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_nxv1i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i32> undef, i32 5, i32 0
+ %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> undef, <vscale x 1 x i32> zeroinitializer
+ %head = insertelement <vscale x 1 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> undef, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i32> @llvm.vp.ashr.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i32> %v
+}
+
+declare <vscale x 2 x i32> @llvm.vp.ashr.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x i32> @vsra_vv_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_nxv2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x i32> @llvm.vp.ashr.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %b, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 2 x i32> @vsra_vv_nxv2i32_unmasked(<vscale x 2 x i32> %va, <vscale x 2 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_nxv2i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i32> @llvm.vp.ashr.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %b, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 2 x i32> @vsra_vx_nxv2i32(<vscale x 2 x i32> %va, i32 %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vx_nxv2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu
+; CHECK-NEXT: vsra.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i32> undef, i32 %b, i32 0
+ %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> undef, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i32> @llvm.vp.ashr.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 2 x i32> @vsra_vx_nxv2i32_unmasked(<vscale x 2 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vx_nxv2i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu
+; CHECK-NEXT: vsra.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i32> undef, i32 %b, i32 0
+ %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> undef, <vscale x 2 x i32> zeroinitializer
+ %head = insertelement <vscale x 2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i32> @llvm.vp.ashr.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 2 x i32> @vsra_vi_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_nxv2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i32> undef, i32 5, i32 0
+ %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> undef, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i32> @llvm.vp.ashr.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 2 x i32> @vsra_vi_nxv2i32_unmasked(<vscale x 2 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_nxv2i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i32> undef, i32 5, i32 0
+ %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> undef, <vscale x 2 x i32> zeroinitializer
+ %head = insertelement <vscale x 2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i32> @llvm.vp.ashr.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i32> %v
+}
+
+declare <vscale x 4 x i32> @llvm.vp.ashr.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
+
+define <vscale x 4 x i32> @vsra_vv_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_nxv4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v10, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x i32> @llvm.vp.ashr.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %b, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i32> %v
+}
+
+define <vscale x 4 x i32> @vsra_vv_nxv4i32_unmasked(<vscale x 4 x i32> %va, <vscale x 4 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_nxv4i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v10
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i32> @llvm.vp.ashr.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %b, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i32> %v
+}
+
+define <vscale x 4 x i32> @vsra_vx_nxv4i32(<vscale x 4 x i32> %va, i32 %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vx_nxv4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e32,m2,ta,mu
+; CHECK-NEXT: vsra.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i32> undef, i32 %b, i32 0
+ %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i32> @llvm.vp.ashr.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i32> %v
+}
+
+define <vscale x 4 x i32> @vsra_vx_nxv4i32_unmasked(<vscale x 4 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vx_nxv4i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e32,m2,ta,mu
+; CHECK-NEXT: vsra.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i32> undef, i32 %b, i32 0
+ %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
+ %head = insertelement <vscale x 4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i32> @llvm.vp.ashr.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i32> %v
+}
+
+define <vscale x 4 x i32> @vsra_vi_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_nxv4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i32> undef, i32 5, i32 0
+ %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i32> @llvm.vp.ashr.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i32> %v
+}
+
+define <vscale x 4 x i32> @vsra_vi_nxv4i32_unmasked(<vscale x 4 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_nxv4i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i32> undef, i32 5, i32 0
+ %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
+ %head = insertelement <vscale x 4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i32> @llvm.vp.ashr.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i32> %v
+}
+
+declare <vscale x 8 x i32> @llvm.vp.ashr.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i32>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x i32> @vsra_vv_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_nxv8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m4,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v12, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x i32> @llvm.vp.ashr.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %b, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @vsra_vv_nxv8i32_unmasked(<vscale x 8 x i32> %va, <vscale x 8 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_nxv8i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m4,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v12
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i32> @llvm.vp.ashr.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %b, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @vsra_vx_nxv8i32(<vscale x 8 x i32> %va, i32 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vx_nxv8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e32,m4,ta,mu
+; CHECK-NEXT: vsra.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i32> undef, i32 %b, i32 0
+ %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> undef, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i32> @llvm.vp.ashr.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @vsra_vx_nxv8i32_unmasked(<vscale x 8 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vx_nxv8i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e32,m4,ta,mu
+; CHECK-NEXT: vsra.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i32> undef, i32 %b, i32 0
+ %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> undef, <vscale x 8 x i32> zeroinitializer
+ %head = insertelement <vscale x 8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i32> @llvm.vp.ashr.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @vsra_vi_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_nxv8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m4,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i32> undef, i32 5, i32 0
+ %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> undef, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i32> @llvm.vp.ashr.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @vsra_vi_nxv8i32_unmasked(<vscale x 8 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_nxv8i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m4,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i32> undef, i32 5, i32 0
+ %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> undef, <vscale x 8 x i32> zeroinitializer
+ %head = insertelement <vscale x 8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i32> @llvm.vp.ashr.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i32> %v
+}
+
+declare <vscale x 16 x i32> @llvm.vp.ashr.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i32>, <vscale x 16 x i1>, i32)
+
+define <vscale x 16 x i32> @vsra_vv_nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_nxv16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m8,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v16, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i32> @llvm.vp.ashr.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %b, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @vsra_vv_nxv16i32_unmasked(<vscale x 16 x i32> %va, <vscale x 16 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_nxv16i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m8,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v16
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 16 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> undef, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i32> @llvm.vp.ashr.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %b, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @vsra_vx_nxv16i32(<vscale x 16 x i32> %va, i32 %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vx_nxv16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e32,m8,ta,mu
+; CHECK-NEXT: vsra.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i32> undef, i32 %b, i32 0
+ %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> undef, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i32> @llvm.vp.ashr.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @vsra_vx_nxv16i32_unmasked(<vscale x 16 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vx_nxv16i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e32,m8,ta,mu
+; CHECK-NEXT: vsra.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i32> undef, i32 %b, i32 0
+ %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> undef, <vscale x 16 x i32> zeroinitializer
+ %head = insertelement <vscale x 16 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> undef, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i32> @llvm.vp.ashr.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @vsra_vi_nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_nxv16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m8,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i32> undef, i32 5, i32 0
+ %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> undef, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i32> @llvm.vp.ashr.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @vsra_vi_nxv16i32_unmasked(<vscale x 16 x i32> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_nxv16i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m8,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i32> undef, i32 5, i32 0
+ %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> undef, <vscale x 16 x i32> zeroinitializer
+ %head = insertelement <vscale x 16 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> undef, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i32> @llvm.vp.ashr.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i32> %v
+}
+
+declare <vscale x 1 x i64> @llvm.vp.ashr.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x i64> @vsra_vv_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_nxv1i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x i64> @llvm.vp.ashr.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %b, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 1 x i64> @vsra_vv_nxv1i64_unmasked(<vscale x 1 x i64> %va, <vscale x 1 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_nxv1i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 1 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> undef, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i64> @llvm.vp.ashr.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %b, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 1 x i64> @vsra_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vsra_vx_nxv1i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v25, (a0), zero
+; RV32-NEXT: vsetvli a0, a2, e64,m1,ta,mu
+; RV32-NEXT: vsra.vv v8, v8, v25, v0.t
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vsra_vx_nxv1i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a1, a1, e64,m1,ta,mu
+; RV64-NEXT: vsra.vx v8, v8, a0, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i64> undef, i64 %b, i32 0
+ %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> undef, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i64> @llvm.vp.ashr.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 1 x i64> @vsra_vx_nxv1i64_unmasked(<vscale x 1 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vsra_vx_nxv1i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v25, (a0), zero
+; RV32-NEXT: vsetvli a0, a2, e64,m1,ta,mu
+; RV32-NEXT: vsra.vv v8, v8, v25
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vsra_vx_nxv1i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a1, a1, e64,m1,ta,mu
+; RV64-NEXT: vsra.vx v8, v8, a0
+; RV64-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i64> undef, i64 %b, i32 0
+ %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> undef, <vscale x 1 x i32> zeroinitializer
+ %head = insertelement <vscale x 1 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> undef, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i64> @llvm.vp.ashr.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 1 x i64> @vsra_vi_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_nxv1i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i64> undef, i64 5, i32 0
+ %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> undef, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i64> @llvm.vp.ashr.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 1 x i64> @vsra_vi_nxv1i64_unmasked(<vscale x 1 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_nxv1i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i64> undef, i64 5, i32 0
+ %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> undef, <vscale x 1 x i32> zeroinitializer
+ %head = insertelement <vscale x 1 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> undef, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i64> @llvm.vp.ashr.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
+
+declare <vscale x 2 x i64> @llvm.vp.ashr.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x i64> @vsra_vv_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_nxv2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v10, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x i64> @llvm.vp.ashr.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %b, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i64> %v
+}
+
+define <vscale x 2 x i64> @vsra_vv_nxv2i64_unmasked(<vscale x 2 x i64> %va, <vscale x 2 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_nxv2i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v10
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i64> @llvm.vp.ashr.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %b, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i64> %v
+}
+
+define <vscale x 2 x i64> @vsra_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vsra_vx_nxv2i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetvli a0, zero, e64,m2,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v26, (a0), zero
+; RV32-NEXT: vsetvli a0, a2, e64,m2,ta,mu
+; RV32-NEXT: vsra.vv v8, v8, v26, v0.t
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vsra_vx_nxv2i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a1, a1, e64,m2,ta,mu
+; RV64-NEXT: vsra.vx v8, v8, a0, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i64> undef, i64 %b, i32 0
+ %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i64> @llvm.vp.ashr.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i64> %v
+}
+
+define <vscale x 2 x i64> @vsra_vx_nxv2i64_unmasked(<vscale x 2 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vsra_vx_nxv2i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetvli a0, zero, e64,m2,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v26, (a0), zero
+; RV32-NEXT: vsetvli a0, a2, e64,m2,ta,mu
+; RV32-NEXT: vsra.vv v8, v8, v26
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vsra_vx_nxv2i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a1, a1, e64,m2,ta,mu
+; RV64-NEXT: vsra.vx v8, v8, a0
+; RV64-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i64> undef, i64 %b, i32 0
+ %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
+ %head = insertelement <vscale x 2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i64> @llvm.vp.ashr.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i64> %v
+}
+
+define <vscale x 2 x i64> @vsra_vi_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_nxv2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i64> undef, i64 5, i32 0
+ %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i64> @llvm.vp.ashr.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i64> %v
+}
+
+define <vscale x 2 x i64> @vsra_vi_nxv2i64_unmasked(<vscale x 2 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_nxv2i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i64> undef, i64 5, i32 0
+ %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
+ %head = insertelement <vscale x 2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i64> @llvm.vp.ashr.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i64> %v
+}
+
+declare <vscale x 4 x i64> @llvm.vp.ashr.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i64>, <vscale x 4 x i1>, i32)
+
+define <vscale x 4 x i64> @vsra_vv_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_nxv4i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m4,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v12, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x i64> @llvm.vp.ashr.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %b, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i64> %v
+}
+
+define <vscale x 4 x i64> @vsra_vv_nxv4i64_unmasked(<vscale x 4 x i64> %va, <vscale x 4 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_nxv4i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m4,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v12
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i64> @llvm.vp.ashr.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %b, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i64> %v
+}
+
+define <vscale x 4 x i64> @vsra_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vsra_vx_nxv4i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetvli a0, zero, e64,m4,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v28, (a0), zero
+; RV32-NEXT: vsetvli a0, a2, e64,m4,ta,mu
+; RV32-NEXT: vsra.vv v8, v8, v28, v0.t
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vsra_vx_nxv4i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a1, a1, e64,m4,ta,mu
+; RV64-NEXT: vsra.vx v8, v8, a0, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i64> undef, i64 %b, i32 0
+ %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i64> @llvm.vp.ashr.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i64> %v
+}
+
+define <vscale x 4 x i64> @vsra_vx_nxv4i64_unmasked(<vscale x 4 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vsra_vx_nxv4i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetvli a0, zero, e64,m4,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v28, (a0), zero
+; RV32-NEXT: vsetvli a0, a2, e64,m4,ta,mu
+; RV32-NEXT: vsra.vv v8, v8, v28
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vsra_vx_nxv4i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a1, a1, e64,m4,ta,mu
+; RV64-NEXT: vsra.vx v8, v8, a0
+; RV64-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i64> undef, i64 %b, i32 0
+ %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer
+ %head = insertelement <vscale x 4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i64> @llvm.vp.ashr.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i64> %v
+}
+
+define <vscale x 4 x i64> @vsra_vi_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_nxv4i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m4,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i64> undef, i64 5, i32 0
+ %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i64> @llvm.vp.ashr.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i64> %v
+}
+
+define <vscale x 4 x i64> @vsra_vi_nxv4i64_unmasked(<vscale x 4 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_nxv4i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m4,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i64> undef, i64 5, i32 0
+ %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer
+ %head = insertelement <vscale x 4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i64> @llvm.vp.ashr.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i64> %v
+}
+
+declare <vscale x 8 x i64> @llvm.vp.ashr.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i64>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x i64> @vsra_vv_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_nxv8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m8,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v16, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x i64> @llvm.vp.ashr.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %b, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vsra_vv_nxv8i64_unmasked(<vscale x 8 x i64> %va, <vscale x 8 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vv_nxv8i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m8,ta,mu
+; CHECK-NEXT: vsra.vv v8, v8, v16
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i64> @llvm.vp.ashr.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %b, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vsra_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vsra_vx_nxv8i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetvli a0, zero, e64,m8,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v16, (a0), zero
+; RV32-NEXT: vsetvli a0, a2, e64,m8,ta,mu
+; RV32-NEXT: vsra.vv v8, v8, v16, v0.t
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vsra_vx_nxv8i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a1, a1, e64,m8,ta,mu
+; RV64-NEXT: vsra.vx v8, v8, a0, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i64> undef, i64 %b, i32 0
+ %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> undef, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i64> @llvm.vp.ashr.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vsra_vx_nxv8i64_unmasked(<vscale x 8 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vsra_vx_nxv8i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetvli a0, zero, e64,m8,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v16, (a0), zero
+; RV32-NEXT: vsetvli a0, a2, e64,m8,ta,mu
+; RV32-NEXT: vsra.vv v8, v8, v16
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vsra_vx_nxv8i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a1, a1, e64,m8,ta,mu
+; RV64-NEXT: vsra.vx v8, v8, a0
+; RV64-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i64> undef, i64 %b, i32 0
+ %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> undef, <vscale x 8 x i32> zeroinitializer
+ %head = insertelement <vscale x 8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i64> @llvm.vp.ashr.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vsra_vi_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_nxv8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m8,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i64> undef, i64 5, i32 0
+ %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> undef, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i64> @llvm.vp.ashr.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vsra_vi_nxv8i64_unmasked(<vscale x 8 x i64> %va, i32 zeroext %evl) {
+; CHECK-LABEL: vsra_vi_nxv8i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m8,ta,mu
+; CHECK-NEXT: vsra.vi v8, v8, 5
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i64> undef, i64 5, i32 0
+ %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> undef, <vscale x 8 x i32> zeroinitializer
+ %head = insertelement <vscale x 8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i64> @llvm.vp.ashr.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i64> %v
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vsub-vp.ll
new file mode 100644
index 0000000000000..c17e76c7b6b29
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vsub-vp.ll
@@ -0,0 +1,1217 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s \
+; RUN: | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s \
+; RUN: | FileCheck %s --check-prefixes=CHECK,RV64
+
+declare <vscale x 1 x i8> @llvm.vp.sub.nxv1i8(<vscale x 1 x i8>, <vscale x 1 x i8>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x i8> @vsub_vv_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_nxv1i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,mf8,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x i8> @llvm.vp.sub.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %b, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i8> %v
+}
+
+define <vscale x 1 x i8> @vsub_vv_nxv1i8_unmasked(<vscale x 1 x i8> %va, <vscale x 1 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_nxv1i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,mf8,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 1 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> undef, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i8> @llvm.vp.sub.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %b, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i8> %v
+}
+
+define <vscale x 1 x i8> @vsub_vx_nxv1i8(<vscale x 1 x i8> %va, i8 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vx_nxv1i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,mf8,ta,mu
+; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> undef, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i8> @llvm.vp.sub.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i8> %v
+}
+
+define <vscale x 1 x i8> @vsub_vx_nxv1i8_unmasked(<vscale x 1 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vx_nxv1i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,mf8,ta,mu
+; CHECK-NEXT: vsub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> undef, <vscale x 1 x i32> zeroinitializer
+ %head = insertelement <vscale x 1 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> undef, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i8> @llvm.vp.sub.nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i8> %v
+}
+
+declare <vscale x 2 x i8> @llvm.vp.sub.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x i8> @vsub_vv_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_nxv2i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,mf4,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x i8> @llvm.vp.sub.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %b, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i8> %v
+}
+
+define <vscale x 2 x i8> @vsub_vv_nxv2i8_unmasked(<vscale x 2 x i8> %va, <vscale x 2 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_nxv2i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,mf4,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i8> @llvm.vp.sub.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %b, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i8> %v
+}
+
+define <vscale x 2 x i8> @vsub_vx_nxv2i8(<vscale x 2 x i8> %va, i8 %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vx_nxv2i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,mf4,ta,mu
+; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> undef, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i8> @llvm.vp.sub.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i8> %v
+}
+
+define <vscale x 2 x i8> @vsub_vx_nxv2i8_unmasked(<vscale x 2 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vx_nxv2i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,mf4,ta,mu
+; CHECK-NEXT: vsub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> undef, <vscale x 2 x i32> zeroinitializer
+ %head = insertelement <vscale x 2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i8> @llvm.vp.sub.nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i8> %v
+}
+
+declare <vscale x 4 x i8> @llvm.vp.sub.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i8>, <vscale x 4 x i1>, i32)
+
+define <vscale x 4 x i8> @vsub_vv_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_nxv4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,mf2,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x i8> @llvm.vp.sub.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %b, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 4 x i8> @vsub_vv_nxv4i8_unmasked(<vscale x 4 x i8> %va, <vscale x 4 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_nxv4i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,mf2,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i8> @llvm.vp.sub.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %b, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 4 x i8> @vsub_vx_nxv4i8(<vscale x 4 x i8> %va, i8 %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vx_nxv4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,mf2,ta,mu
+; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> undef, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i8> @llvm.vp.sub.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 4 x i8> @vsub_vx_nxv4i8_unmasked(<vscale x 4 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vx_nxv4i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,mf2,ta,mu
+; CHECK-NEXT: vsub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> undef, <vscale x 4 x i32> zeroinitializer
+ %head = insertelement <vscale x 4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i8> @llvm.vp.sub.nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i8> %v
+}
+
+declare <vscale x 8 x i8> @llvm.vp.sub.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x i8> @vsub_vv_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_nxv8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x i8> @llvm.vp.sub.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %b, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 8 x i8> @vsub_vv_nxv8i8_unmasked(<vscale x 8 x i8> %va, <vscale x 8 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_nxv8i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m1,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i8> @llvm.vp.sub.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %b, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 8 x i8> @vsub_vx_nxv8i8(<vscale x 8 x i8> %va, i8 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vx_nxv8i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu
+; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> undef, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i8> @llvm.vp.sub.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 8 x i8> @vsub_vx_nxv8i8_unmasked(<vscale x 8 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vx_nxv8i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu
+; CHECK-NEXT: vsub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> undef, <vscale x 8 x i32> zeroinitializer
+ %head = insertelement <vscale x 8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i8> @llvm.vp.sub.nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i8> %v
+}
+
+declare <vscale x 16 x i8> @llvm.vp.sub.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, i32)
+
+define <vscale x 16 x i8> @vsub_vv_nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_nxv16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m2,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i8> @llvm.vp.sub.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %b, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @vsub_vv_nxv16i8_unmasked(<vscale x 16 x i8> %va, <vscale x 16 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_nxv16i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m2,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v10
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 16 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> undef, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i8> @llvm.vp.sub.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %b, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @vsub_vx_nxv16i8(<vscale x 16 x i8> %va, i8 %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vx_nxv16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,m2,ta,mu
+; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i8> @llvm.vp.sub.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @vsub_vx_nxv16i8_unmasked(<vscale x 16 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vx_nxv16i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,m2,ta,mu
+; CHECK-NEXT: vsub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
+ %head = insertelement <vscale x 16 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> undef, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i8> @llvm.vp.sub.nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i8> %v
+}
+
+declare <vscale x 32 x i8> @llvm.vp.sub.nxv32i8(<vscale x 32 x i8>, <vscale x 32 x i8>, <vscale x 32 x i1>, i32)
+
+define <vscale x 32 x i8> @vsub_vv_nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %b, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_nxv32i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m4,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v12, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 32 x i8> @llvm.vp.sub.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %b, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i8> %v
+}
+
+define <vscale x 32 x i8> @vsub_vv_nxv32i8_unmasked(<vscale x 32 x i8> %va, <vscale x 32 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_nxv32i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m4,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v12
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 32 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> undef, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i8> @llvm.vp.sub.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %b, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i8> %v
+}
+
+define <vscale x 32 x i8> @vsub_vx_nxv32i8(<vscale x 32 x i8> %va, i8 %b, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vx_nxv32i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,m4,ta,mu
+; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> undef, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i8> @llvm.vp.sub.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %vb, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i8> %v
+}
+
+define <vscale x 32 x i8> @vsub_vx_nxv32i8_unmasked(<vscale x 32 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vx_nxv32i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,m4,ta,mu
+; CHECK-NEXT: vsub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> undef, <vscale x 32 x i32> zeroinitializer
+ %head = insertelement <vscale x 32 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> undef, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i8> @llvm.vp.sub.nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %vb, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i8> %v
+}
+
+declare <vscale x 64 x i8> @llvm.vp.sub.nxv64i8(<vscale x 64 x i8>, <vscale x 64 x i8>, <vscale x 64 x i1>, i32)
+
+define <vscale x 64 x i8> @vsub_vv_nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %b, <vscale x 64 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_nxv64i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m8,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v16, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 64 x i8> @llvm.vp.sub.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %b, <vscale x 64 x i1> %m, i32 %evl)
+ ret <vscale x 64 x i8> %v
+}
+
+define <vscale x 64 x i8> @vsub_vv_nxv64i8_unmasked(<vscale x 64 x i8> %va, <vscale x 64 x i8> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_nxv64i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e8,m8,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v16
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 64 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 64 x i1> %head, <vscale x 64 x i1> undef, <vscale x 64 x i32> zeroinitializer
+ %v = call <vscale x 64 x i8> @llvm.vp.sub.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %b, <vscale x 64 x i1> %m, i32 %evl)
+ ret <vscale x 64 x i8> %v
+}
+
+define <vscale x 64 x i8> @vsub_vx_nxv64i8(<vscale x 64 x i8> %va, i8 %b, <vscale x 64 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vx_nxv64i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,m8,ta,mu
+; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 64 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> undef, <vscale x 64 x i32> zeroinitializer
+ %v = call <vscale x 64 x i8> @llvm.vp.sub.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %vb, <vscale x 64 x i1> %m, i32 %evl)
+ ret <vscale x 64 x i8> %v
+}
+
+define <vscale x 64 x i8> @vsub_vx_nxv64i8_unmasked(<vscale x 64 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vx_nxv64i8_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e8,m8,ta,mu
+; CHECK-NEXT: vsub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 64 x i8> undef, i8 %b, i32 0
+ %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> undef, <vscale x 64 x i32> zeroinitializer
+ %head = insertelement <vscale x 64 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 64 x i1> %head, <vscale x 64 x i1> undef, <vscale x 64 x i32> zeroinitializer
+ %v = call <vscale x 64 x i8> @llvm.vp.sub.nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %vb, <vscale x 64 x i1> %m, i32 %evl)
+ ret <vscale x 64 x i8> %v
+}
+
+declare <vscale x 1 x i16> @llvm.vp.sub.nxv1i16(<vscale x 1 x i16>, <vscale x 1 x i16>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x i16> @vsub_vv_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_nxv1i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x i16> @llvm.vp.sub.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %b, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i16> %v
+}
+
+define <vscale x 1 x i16> @vsub_vv_nxv1i16_unmasked(<vscale x 1 x i16> %va, <vscale x 1 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_nxv1i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 1 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> undef, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i16> @llvm.vp.sub.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %b, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i16> %v
+}
+
+define <vscale x 1 x i16> @vsub_vx_nxv1i16(<vscale x 1 x i16> %va, i16 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vx_nxv1i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,mf4,ta,mu
+; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> undef, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i16> @llvm.vp.sub.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i16> %v
+}
+
+define <vscale x 1 x i16> @vsub_vx_nxv1i16_unmasked(<vscale x 1 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vx_nxv1i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,mf4,ta,mu
+; CHECK-NEXT: vsub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> undef, <vscale x 1 x i32> zeroinitializer
+ %head = insertelement <vscale x 1 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> undef, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i16> @llvm.vp.sub.nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i16> %v
+}
+
+declare <vscale x 2 x i16> @llvm.vp.sub.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i16>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x i16> @vsub_vv_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_nxv2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x i16> @llvm.vp.sub.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %b, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i16> %v
+}
+
+define <vscale x 2 x i16> @vsub_vv_nxv2i16_unmasked(<vscale x 2 x i16> %va, <vscale x 2 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_nxv2i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i16> @llvm.vp.sub.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %b, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i16> %v
+}
+
+define <vscale x 2 x i16> @vsub_vx_nxv2i16(<vscale x 2 x i16> %va, i16 %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vx_nxv2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,mf2,ta,mu
+; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> undef, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i16> @llvm.vp.sub.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i16> %v
+}
+
+define <vscale x 2 x i16> @vsub_vx_nxv2i16_unmasked(<vscale x 2 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vx_nxv2i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,mf2,ta,mu
+; CHECK-NEXT: vsub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> undef, <vscale x 2 x i32> zeroinitializer
+ %head = insertelement <vscale x 2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i16> @llvm.vp.sub.nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i16> %v
+}
+
+declare <vscale x 4 x i16> @llvm.vp.sub.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i16>, <vscale x 4 x i1>, i32)
+
+define <vscale x 4 x i16> @vsub_vv_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_nxv4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x i16> @llvm.vp.sub.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %b, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 4 x i16> @vsub_vv_nxv4i16_unmasked(<vscale x 4 x i16> %va, <vscale x 4 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_nxv4i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i16> @llvm.vp.sub.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %b, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 4 x i16> @vsub_vx_nxv4i16(<vscale x 4 x i16> %va, i16 %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vx_nxv4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu
+; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> undef, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i16> @llvm.vp.sub.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 4 x i16> @vsub_vx_nxv4i16_unmasked(<vscale x 4 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vx_nxv4i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu
+; CHECK-NEXT: vsub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> undef, <vscale x 4 x i32> zeroinitializer
+ %head = insertelement <vscale x 4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i16> @llvm.vp.sub.nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i16> %v
+}
+
+declare <vscale x 8 x i16> @llvm.vp.sub.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x i16> @vsub_vv_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_nxv8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x i16> @llvm.vp.sub.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %b, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i16> %v
+}
+
+define <vscale x 8 x i16> @vsub_vv_nxv8i16_unmasked(<vscale x 8 x i16> %va, <vscale x 8 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_nxv8i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v10
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i16> @llvm.vp.sub.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %b, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i16> %v
+}
+
+define <vscale x 8 x i16> @vsub_vx_nxv8i16(<vscale x 8 x i16> %va, i16 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vx_nxv8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,m2,ta,mu
+; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i16> @llvm.vp.sub.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i16> %v
+}
+
+define <vscale x 8 x i16> @vsub_vx_nxv8i16_unmasked(<vscale x 8 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vx_nxv8i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,m2,ta,mu
+; CHECK-NEXT: vsub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
+ %head = insertelement <vscale x 8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i16> @llvm.vp.sub.nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i16> %v
+}
+
+declare <vscale x 16 x i16> @llvm.vp.sub.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i16>, <vscale x 16 x i1>, i32)
+
+define <vscale x 16 x i16> @vsub_vv_nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_nxv16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m4,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v12, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i16> @llvm.vp.sub.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %b, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i16> %v
+}
+
+define <vscale x 16 x i16> @vsub_vv_nxv16i16_unmasked(<vscale x 16 x i16> %va, <vscale x 16 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_nxv16i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m4,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v12
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 16 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> undef, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i16> @llvm.vp.sub.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %b, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i16> %v
+}
+
+define <vscale x 16 x i16> @vsub_vx_nxv16i16(<vscale x 16 x i16> %va, i16 %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vx_nxv16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,m4,ta,mu
+; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> undef, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i16> @llvm.vp.sub.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i16> %v
+}
+
+define <vscale x 16 x i16> @vsub_vx_nxv16i16_unmasked(<vscale x 16 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vx_nxv16i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,m4,ta,mu
+; CHECK-NEXT: vsub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> undef, <vscale x 16 x i32> zeroinitializer
+ %head = insertelement <vscale x 16 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> undef, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i16> @llvm.vp.sub.nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i16> %v
+}
+
+declare <vscale x 32 x i16> @llvm.vp.sub.nxv32i16(<vscale x 32 x i16>, <vscale x 32 x i16>, <vscale x 32 x i1>, i32)
+
+define <vscale x 32 x i16> @vsub_vv_nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %b, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_nxv32i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m8,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v16, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 32 x i16> @llvm.vp.sub.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %b, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i16> %v
+}
+
+define <vscale x 32 x i16> @vsub_vv_nxv32i16_unmasked(<vscale x 32 x i16> %va, <vscale x 32 x i16> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_nxv32i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e16,m8,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v16
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 32 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> undef, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i16> @llvm.vp.sub.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %b, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i16> %v
+}
+
+define <vscale x 32 x i16> @vsub_vx_nxv32i16(<vscale x 32 x i16> %va, i16 %b, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vx_nxv32i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,m8,ta,mu
+; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> undef, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i16> @llvm.vp.sub.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %vb, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i16> %v
+}
+
+define <vscale x 32 x i16> @vsub_vx_nxv32i16_unmasked(<vscale x 32 x i16> %va, i16 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vx_nxv32i16_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e16,m8,ta,mu
+; CHECK-NEXT: vsub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 32 x i16> undef, i16 %b, i32 0
+ %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> undef, <vscale x 32 x i32> zeroinitializer
+ %head = insertelement <vscale x 32 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 32 x i1> %head, <vscale x 32 x i1> undef, <vscale x 32 x i32> zeroinitializer
+ %v = call <vscale x 32 x i16> @llvm.vp.sub.nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %vb, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i16> %v
+}
+
+declare <vscale x 1 x i32> @llvm.vp.sub.nxv1i32(<vscale x 1 x i32>, <vscale x 1 x i32>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x i32> @vsub_vv_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_nxv1i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x i32> @llvm.vp.sub.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %b, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i32> %v
+}
+
+define <vscale x 1 x i32> @vsub_vv_nxv1i32_unmasked(<vscale x 1 x i32> %va, <vscale x 1 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_nxv1i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 1 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> undef, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i32> @llvm.vp.sub.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %b, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i32> %v
+}
+
+define <vscale x 1 x i32> @vsub_vx_nxv1i32(<vscale x 1 x i32> %va, i32 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vx_nxv1i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e32,mf2,ta,mu
+; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i32> undef, i32 %b, i32 0
+ %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> undef, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i32> @llvm.vp.sub.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i32> %v
+}
+
+define <vscale x 1 x i32> @vsub_vx_nxv1i32_unmasked(<vscale x 1 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vx_nxv1i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e32,mf2,ta,mu
+; CHECK-NEXT: vsub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i32> undef, i32 %b, i32 0
+ %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> undef, <vscale x 1 x i32> zeroinitializer
+ %head = insertelement <vscale x 1 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> undef, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i32> @llvm.vp.sub.nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i32> %v
+}
+
+declare <vscale x 2 x i32> @llvm.vp.sub.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x i32> @vsub_vv_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_nxv2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x i32> @llvm.vp.sub.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %b, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 2 x i32> @vsub_vv_nxv2i32_unmasked(<vscale x 2 x i32> %va, <vscale x 2 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_nxv2i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i32> @llvm.vp.sub.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %b, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 2 x i32> @vsub_vx_nxv2i32(<vscale x 2 x i32> %va, i32 %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vx_nxv2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu
+; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i32> undef, i32 %b, i32 0
+ %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> undef, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i32> @llvm.vp.sub.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 2 x i32> @vsub_vx_nxv2i32_unmasked(<vscale x 2 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vx_nxv2i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu
+; CHECK-NEXT: vsub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i32> undef, i32 %b, i32 0
+ %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> undef, <vscale x 2 x i32> zeroinitializer
+ %head = insertelement <vscale x 2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i32> @llvm.vp.sub.nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i32> %v
+}
+
+declare <vscale x 4 x i32> @llvm.vp.sub.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
+
+define <vscale x 4 x i32> @vsub_vv_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_nxv4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x i32> @llvm.vp.sub.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %b, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i32> %v
+}
+
+define <vscale x 4 x i32> @vsub_vv_nxv4i32_unmasked(<vscale x 4 x i32> %va, <vscale x 4 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_nxv4i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v10
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i32> @llvm.vp.sub.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %b, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i32> %v
+}
+
+define <vscale x 4 x i32> @vsub_vx_nxv4i32(<vscale x 4 x i32> %va, i32 %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vx_nxv4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e32,m2,ta,mu
+; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i32> undef, i32 %b, i32 0
+ %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i32> @llvm.vp.sub.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i32> %v
+}
+
+define <vscale x 4 x i32> @vsub_vx_nxv4i32_unmasked(<vscale x 4 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vx_nxv4i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e32,m2,ta,mu
+; CHECK-NEXT: vsub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i32> undef, i32 %b, i32 0
+ %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
+ %head = insertelement <vscale x 4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i32> @llvm.vp.sub.nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i32> %v
+}
+
+declare <vscale x 8 x i32> @llvm.vp.sub.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i32>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x i32> @vsub_vv_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_nxv8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m4,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v12, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x i32> @llvm.vp.sub.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %b, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @vsub_vv_nxv8i32_unmasked(<vscale x 8 x i32> %va, <vscale x 8 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_nxv8i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m4,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v12
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i32> @llvm.vp.sub.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %b, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @vsub_vx_nxv8i32(<vscale x 8 x i32> %va, i32 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vx_nxv8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e32,m4,ta,mu
+; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i32> undef, i32 %b, i32 0
+ %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> undef, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i32> @llvm.vp.sub.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @vsub_vx_nxv8i32_unmasked(<vscale x 8 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vx_nxv8i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e32,m4,ta,mu
+; CHECK-NEXT: vsub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i32> undef, i32 %b, i32 0
+ %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> undef, <vscale x 8 x i32> zeroinitializer
+ %head = insertelement <vscale x 8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i32> @llvm.vp.sub.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i32> %v
+}
+
+declare <vscale x 16 x i32> @llvm.vp.sub.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i32>, <vscale x 16 x i1>, i32)
+
+define <vscale x 16 x i32> @vsub_vv_nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_nxv16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m8,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v16, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i32> @llvm.vp.sub.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %b, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @vsub_vv_nxv16i32_unmasked(<vscale x 16 x i32> %va, <vscale x 16 x i32> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_nxv16i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e32,m8,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v16
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 16 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> undef, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i32> @llvm.vp.sub.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %b, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @vsub_vx_nxv16i32(<vscale x 16 x i32> %va, i32 %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vx_nxv16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e32,m8,ta,mu
+; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i32> undef, i32 %b, i32 0
+ %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> undef, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i32> @llvm.vp.sub.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @vsub_vx_nxv16i32_unmasked(<vscale x 16 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vx_nxv16i32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, a1, e32,m8,ta,mu
+; CHECK-NEXT: vsub.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %elt.head = insertelement <vscale x 16 x i32> undef, i32 %b, i32 0
+ %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> undef, <vscale x 16 x i32> zeroinitializer
+ %head = insertelement <vscale x 16 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 16 x i1> %head, <vscale x 16 x i1> undef, <vscale x 16 x i32> zeroinitializer
+ %v = call <vscale x 16 x i32> @llvm.vp.sub.nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i32> %v
+}
+
+declare <vscale x 1 x i64> @llvm.vp.sub.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x i64> @vsub_vv_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_nxv1i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 1 x i64> @llvm.vp.sub.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %b, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 1 x i64> @vsub_vv_nxv1i64_unmasked(<vscale x 1 x i64> %va, <vscale x 1 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_nxv1i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 1 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> undef, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i64> @llvm.vp.sub.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %b, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 1 x i64> @vsub_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vsub_vx_nxv1i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v25, (a0), zero
+; RV32-NEXT: vsetvli a0, a2, e64,m1,ta,mu
+; RV32-NEXT: vsub.vv v8, v8, v25, v0.t
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vsub_vx_nxv1i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a1, a1, e64,m1,ta,mu
+; RV64-NEXT: vsub.vx v8, v8, a0, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i64> undef, i64 %b, i32 0
+ %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> undef, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i64> @llvm.vp.sub.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 1 x i64> @vsub_vx_nxv1i64_unmasked(<vscale x 1 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vsub_vx_nxv1i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v25, (a0), zero
+; RV32-NEXT: vsetvli a0, a2, e64,m1,ta,mu
+; RV32-NEXT: vsub.vv v8, v8, v25
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vsub_vx_nxv1i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a1, a1, e64,m1,ta,mu
+; RV64-NEXT: vsub.vx v8, v8, a0
+; RV64-NEXT: ret
+ %elt.head = insertelement <vscale x 1 x i64> undef, i64 %b, i32 0
+ %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> undef, <vscale x 1 x i32> zeroinitializer
+ %head = insertelement <vscale x 1 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> undef, <vscale x 1 x i32> zeroinitializer
+ %v = call <vscale x 1 x i64> @llvm.vp.sub.nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i64> %v
+}
+
+declare <vscale x 2 x i64> @llvm.vp.sub.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x i64> @vsub_vv_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_nxv2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v10, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x i64> @llvm.vp.sub.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %b, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i64> %v
+}
+
+define <vscale x 2 x i64> @vsub_vv_nxv2i64_unmasked(<vscale x 2 x i64> %va, <vscale x 2 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_nxv2i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v10
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i64> @llvm.vp.sub.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %b, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i64> %v
+}
+
+define <vscale x 2 x i64> @vsub_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vsub_vx_nxv2i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetvli a0, zero, e64,m2,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v26, (a0), zero
+; RV32-NEXT: vsetvli a0, a2, e64,m2,ta,mu
+; RV32-NEXT: vsub.vv v8, v8, v26, v0.t
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vsub_vx_nxv2i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a1, a1, e64,m2,ta,mu
+; RV64-NEXT: vsub.vx v8, v8, a0, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i64> undef, i64 %b, i32 0
+ %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i64> @llvm.vp.sub.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i64> %v
+}
+
+define <vscale x 2 x i64> @vsub_vx_nxv2i64_unmasked(<vscale x 2 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vsub_vx_nxv2i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetvli a0, zero, e64,m2,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v26, (a0), zero
+; RV32-NEXT: vsetvli a0, a2, e64,m2,ta,mu
+; RV32-NEXT: vsub.vv v8, v8, v26
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vsub_vx_nxv2i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a1, a1, e64,m2,ta,mu
+; RV64-NEXT: vsub.vx v8, v8, a0
+; RV64-NEXT: ret
+ %elt.head = insertelement <vscale x 2 x i64> undef, i64 %b, i32 0
+ %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
+ %head = insertelement <vscale x 2 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
+ %v = call <vscale x 2 x i64> @llvm.vp.sub.nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i64> %v
+}
+
+declare <vscale x 4 x i64> @llvm.vp.sub.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i64>, <vscale x 4 x i1>, i32)
+
+define <vscale x 4 x i64> @vsub_vv_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_nxv4i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m4,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v12, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x i64> @llvm.vp.sub.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %b, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i64> %v
+}
+
+define <vscale x 4 x i64> @vsub_vv_nxv4i64_unmasked(<vscale x 4 x i64> %va, <vscale x 4 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_nxv4i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m4,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v12
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i64> @llvm.vp.sub.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %b, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i64> %v
+}
+
+define <vscale x 4 x i64> @vsub_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vsub_vx_nxv4i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetvli a0, zero, e64,m4,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v28, (a0), zero
+; RV32-NEXT: vsetvli a0, a2, e64,m4,ta,mu
+; RV32-NEXT: vsub.vv v8, v8, v28, v0.t
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vsub_vx_nxv4i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a1, a1, e64,m4,ta,mu
+; RV64-NEXT: vsub.vx v8, v8, a0, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i64> undef, i64 %b, i32 0
+ %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i64> @llvm.vp.sub.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i64> %v
+}
+
+define <vscale x 4 x i64> @vsub_vx_nxv4i64_unmasked(<vscale x 4 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vsub_vx_nxv4i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetvli a0, zero, e64,m4,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v28, (a0), zero
+; RV32-NEXT: vsetvli a0, a2, e64,m4,ta,mu
+; RV32-NEXT: vsub.vv v8, v8, v28
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vsub_vx_nxv4i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a1, a1, e64,m4,ta,mu
+; RV64-NEXT: vsub.vx v8, v8, a0
+; RV64-NEXT: ret
+ %elt.head = insertelement <vscale x 4 x i64> undef, i64 %b, i32 0
+ %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer
+ %head = insertelement <vscale x 4 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 4 x i1> %head, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
+ %v = call <vscale x 4 x i64> @llvm.vp.sub.nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i64> %v
+}
+
+declare <vscale x 8 x i64> @llvm.vp.sub.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i64>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x i64> @vsub_vv_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_nxv8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m8,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v16, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x i64> @llvm.vp.sub.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %b, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vsub_vv_nxv8i64_unmasked(<vscale x 8 x i64> %va, <vscale x 8 x i64> %b, i32 zeroext %evl) {
+; CHECK-LABEL: vsub_vv_nxv8i64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, a0, e64,m8,ta,mu
+; CHECK-NEXT: vsub.vv v8, v8, v16
+; CHECK-NEXT: ret
+ %head = insertelement <vscale x 8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i64> @llvm.vp.sub.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %b, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vsub_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vsub_vx_nxv8i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetvli a0, zero, e64,m8,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v16, (a0), zero
+; RV32-NEXT: vsetvli a0, a2, e64,m8,ta,mu
+; RV32-NEXT: vsub.vv v8, v8, v16, v0.t
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vsub_vx_nxv8i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a1, a1, e64,m8,ta,mu
+; RV64-NEXT: vsub.vx v8, v8, a0, v0.t
+; RV64-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i64> undef, i64 %b, i32 0
+ %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> undef, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i64> @llvm.vp.sub.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vsub_vx_nxv8i64_unmasked(<vscale x 8 x i64> %va, i64 %b, i32 zeroext %evl) {
+; RV32-LABEL: vsub_vx_nxv8i64_unmasked:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetvli a0, zero, e64,m8,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v16, (a0), zero
+; RV32-NEXT: vsetvli a0, a2, e64,m8,ta,mu
+; RV32-NEXT: vsub.vv v8, v8, v16
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vsub_vx_nxv8i64_unmasked:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a1, a1, e64,m8,ta,mu
+; RV64-NEXT: vsub.vx v8, v8, a0
+; RV64-NEXT: ret
+ %elt.head = insertelement <vscale x 8 x i64> undef, i64 %b, i32 0
+ %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> undef, <vscale x 8 x i32> zeroinitializer
+ %head = insertelement <vscale x 8 x i1> undef, i1 true, i32 0
+ %m = shufflevector <vscale x 8 x i1> %head, <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
+ %v = call <vscale x 8 x i64> @llvm.vp.sub.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i64> %v
+}
More information about the llvm-commits
mailing list