[llvm] b73d151 - [RISCV] Add DAG combines to transform ADD_VL/SUB_VL into widening add/sub.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 2 10:15:02 PST 2022
Author: Craig Topper
Date: 2022-02-02T10:03:08-08:00
New Revision: b73d151a118ffacffae772c9ddca8e276be2af43
URL: https://github.com/llvm/llvm-project/commit/b73d151a118ffacffae772c9ddca8e276be2af43
DIFF: https://github.com/llvm/llvm-project/commit/b73d151a118ffacffae772c9ddca8e276be2af43.diff
LOG: [RISCV] Add DAG combines to transform ADD_VL/SUB_VL into widening add/sub.
This adds or reuses ISD opcodes for vadd.wv, vaddu.wv, vadd.vv, vaddu.vv
and a similar set for sub.
I've included support for narrowing scalar splats that have known
sign/zero bits similar to what was done for MUL_VL.
The conversion to vwadd.vv proceeds in two phases. First we'll form
a vwadd.wv by narrowing one of the operands. Then we'll visit the
vwadd.wv to try to narrow the other operand. This turned out to be
simpler than catching all the cases in one step. The forming of of
vwadd.wv can happen for either operand for add, but only the right
hand side for sub since sub isn't commutable.
An interesting quirk is that ADD_VL and VZEXT_VL/VSEXT_VL are formed
during vector op legalization, but VMV_V_X_VL isn't usually formed
until op legalization when BUILD_VECTORS are handled. This leads to
VWADD_W_VL forming in one DAG combine round, and then a later DAG combine
round sees the VMV_V_X_VL and needs to commute the operands to get the
splat in position. This alone necessitated a VWADD_W_VL combine function
which made forming vwadd.vv in two stages an easy choice.
I've left out trying hard to form vwadd.wx instructions for now. It would
only save an extend in the scalar domain which isn't as interesting.
Might need to review the test coverage a bit. Most of the vwadd.wv
instructions are coming from vXi64 tests on rv64. The tests were
copy pasted from the existing multiply tests.
Reviewed By: rogfer01
Differential Revision: https://reviews.llvm.org/D117954
Added:
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwadd.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwaddu.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsubu.ll
Modified:
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
llvm/lib/Target/RISCV/RISCVISelLowering.h
llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 97d24c8e9c0bb..73f89ce676530 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -7363,6 +7363,129 @@ static SDValue performANY_EXTENDCombine(SDNode *N,
return SDValue(N, 0);
}
+// Try to form vwadd(u).wv/wx or vwsub(u).wv/wx. It might later be optimized to
+// vwadd(u).vv/vx or vwsub(u).vv/vx.
+static SDValue combineADDSUB_VLToVWADDSUB_VL(SDNode *N, SelectionDAG &DAG,
+ bool Commute = false) {
+ assert((N->getOpcode() == RISCVISD::ADD_VL ||
+ N->getOpcode() == RISCVISD::SUB_VL) &&
+ "Unexpected opcode");
+ bool IsAdd = N->getOpcode() == RISCVISD::ADD_VL;
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ if (Commute)
+ std::swap(Op0, Op1);
+
+ MVT VT = N->getSimpleValueType(0);
+
+ // Determine the narrow size for a widening add/sub.
+ unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
+ MVT NarrowVT = MVT::getVectorVT(MVT::getIntegerVT(NarrowSize),
+ VT.getVectorElementCount());
+
+ SDValue Mask = N->getOperand(2);
+ SDValue VL = N->getOperand(3);
+
+ SDLoc DL(N);
+
+ // If the RHS is a sext or zext, we can form a widening op.
+ if ((Op1.getOpcode() == RISCVISD::VZEXT_VL ||
+ Op1.getOpcode() == RISCVISD::VSEXT_VL) &&
+ Op1.hasOneUse() && Op1.getOperand(1) == Mask && Op1.getOperand(2) == VL) {
+ unsigned ExtOpc = Op1.getOpcode();
+ Op1 = Op1.getOperand(0);
+ // Re-introduce narrower extends if needed.
+ if (Op1.getValueType() != NarrowVT)
+ Op1 = DAG.getNode(ExtOpc, DL, NarrowVT, Op1, Mask, VL);
+
+ unsigned WOpc;
+ if (ExtOpc == RISCVISD::VSEXT_VL)
+ WOpc = IsAdd ? RISCVISD::VWADD_W_VL : RISCVISD::VWSUB_W_VL;
+ else
+ WOpc = IsAdd ? RISCVISD::VWADDU_W_VL : RISCVISD::VWSUBU_W_VL;
+
+ return DAG.getNode(WOpc, DL, VT, Op0, Op1, Mask, VL);
+ }
+
+ // FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar
+ // sext/zext?
+
+ return SDValue();
+}
+
+// Try to convert vwadd(u).wv/wx or vwsub(u).wv/wx to vwadd(u).vv/vx or
+// vwsub(u).vv/vx.
+static SDValue combineVWADD_W_VL_VWSUB_W_VL(SDNode *N, SelectionDAG &DAG) {
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ SDValue Mask = N->getOperand(2);
+ SDValue VL = N->getOperand(3);
+
+ MVT VT = N->getSimpleValueType(0);
+ MVT NarrowVT = Op1.getSimpleValueType();
+ unsigned NarrowSize = NarrowVT.getScalarSizeInBits();
+
+ unsigned VOpc;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("Unexpected opcode");
+ case RISCVISD::VWADD_W_VL: VOpc = RISCVISD::VWADD_VL; break;
+ case RISCVISD::VWSUB_W_VL: VOpc = RISCVISD::VWSUB_VL; break;
+ case RISCVISD::VWADDU_W_VL: VOpc = RISCVISD::VWADDU_VL; break;
+ case RISCVISD::VWSUBU_W_VL: VOpc = RISCVISD::VWSUBU_VL; break;
+ }
+
+ bool IsSigned = N->getOpcode() == RISCVISD::VWADD_W_VL ||
+ N->getOpcode() == RISCVISD::VWSUB_W_VL;
+
+ SDLoc DL(N);
+
+ // If the LHS is a sext or zext, we can narrow this op to the same size as
+ // the RHS.
+ if (((Op0.getOpcode() == RISCVISD::VZEXT_VL && !IsSigned) ||
+ (Op0.getOpcode() == RISCVISD::VSEXT_VL && IsSigned)) &&
+ Op0.hasOneUse() && Op0.getOperand(1) == Mask && Op0.getOperand(2) == VL) {
+ unsigned ExtOpc = Op0.getOpcode();
+ Op0 = Op0.getOperand(0);
+ // Re-introduce narrower extends if needed.
+ if (Op0.getValueType() != NarrowVT)
+ Op0 = DAG.getNode(ExtOpc, DL, NarrowVT, Op0, Mask, VL);
+ return DAG.getNode(VOpc, DL, VT, Op0, Op1, Mask, VL);
+ }
+
+ bool IsAdd = N->getOpcode() == RISCVISD::VWADD_W_VL ||
+ N->getOpcode() == RISCVISD::VWADDU_W_VL;
+
+ // Look for splats on the left hand side of a vwadd(u).wv. We might be able
+ // to commute and use a vwadd(u).vx instead.
+ if (IsAdd && Op0.getOpcode() == RISCVISD::VMV_V_X_VL &&
+ Op0.getOperand(1) == VL) {
+ Op0 = Op0.getOperand(0);
+
+ // See if have enough sign bits or zero bits in the scalar to use a
+ // widening add/sub by splatting to smaller element size.
+ unsigned EltBits = VT.getScalarSizeInBits();
+ unsigned ScalarBits = Op0.getValueSizeInBits();
+ // Make sure we're getting all element bits from the scalar register.
+ // FIXME: Support implicit sign extension of vmv.v.x?
+ if (ScalarBits < EltBits)
+ return SDValue();
+
+ if (IsSigned) {
+ if (DAG.ComputeNumSignBits(Op0) <= (ScalarBits - NarrowSize))
+ return SDValue();
+ } else {
+ APInt Mask = APInt::getBitsSetFrom(ScalarBits, NarrowSize);
+ if (!DAG.MaskedValueIsZero(Op0, Mask))
+ return SDValue();
+ }
+
+ Op0 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT, Op0, VL);
+ return DAG.getNode(VOpc, DL, VT, Op1, Op0, Mask, VL);
+ }
+
+ return SDValue();
+}
+
// Try to form VWMUL, VWMULU or VWMULSU.
// TODO: Support VWMULSU.vx with a sign extend Op and a splat of scalar Op.
static SDValue combineMUL_VLToVWMUL_VL(SDNode *N, SelectionDAG &DAG,
@@ -8018,6 +8141,17 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
}
break;
}
+ case RISCVISD::ADD_VL:
+ if (SDValue V = combineADDSUB_VLToVWADDSUB_VL(N, DAG, /*Commute*/ false))
+ return V;
+ return combineADDSUB_VLToVWADDSUB_VL(N, DAG, /*Commute*/ true);
+ case RISCVISD::SUB_VL:
+ return combineADDSUB_VLToVWADDSUB_VL(N, DAG);
+ case RISCVISD::VWADD_W_VL:
+ case RISCVISD::VWADDU_W_VL:
+ case RISCVISD::VWSUB_W_VL:
+ case RISCVISD::VWSUBU_W_VL:
+ return combineVWADD_W_VL_VWSUB_W_VL(N, DAG);
case RISCVISD::MUL_VL:
if (SDValue V = combineMUL_VLToVWMUL_VL(N, DAG, /*Commute*/ false))
return V;
@@ -10222,7 +10356,14 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(VWMUL_VL)
NODE_NAME_CASE(VWMULU_VL)
NODE_NAME_CASE(VWMULSU_VL)
+ NODE_NAME_CASE(VWADD_VL)
NODE_NAME_CASE(VWADDU_VL)
+ NODE_NAME_CASE(VWSUB_VL)
+ NODE_NAME_CASE(VWSUBU_VL)
+ NODE_NAME_CASE(VWADD_W_VL)
+ NODE_NAME_CASE(VWADDU_W_VL)
+ NODE_NAME_CASE(VWSUB_W_VL)
+ NODE_NAME_CASE(VWSUBU_W_VL)
NODE_NAME_CASE(SETCC_VL)
NODE_NAME_CASE(VSELECT_VL)
NODE_NAME_CASE(VP_MERGE_VL)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 840a821870a79..39bfcfa8b818b 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -246,7 +246,14 @@ enum NodeType : unsigned {
VWMUL_VL,
VWMULU_VL,
VWMULSU_VL,
+ VWADD_VL,
VWADDU_VL,
+ VWSUB_VL,
+ VWSUBU_VL,
+ VWADD_W_VL,
+ VWADDU_W_VL,
+ VWSUB_W_VL,
+ VWSUBU_W_VL,
// Vector compare producing a mask. Fourth operand is input mask. Fifth
// operand is VL.
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index e71c498fd5f49..138c4001f1ec7 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -229,7 +229,22 @@ def SDT_RISCVVWBinOp_VL : SDTypeProfile<1, 4, [SDTCisVec<0>,
def riscv_vwmul_vl : SDNode<"RISCVISD::VWMUL_VL", SDT_RISCVVWBinOp_VL, [SDNPCommutative]>;
def riscv_vwmulu_vl : SDNode<"RISCVISD::VWMULU_VL", SDT_RISCVVWBinOp_VL, [SDNPCommutative]>;
def riscv_vwmulsu_vl : SDNode<"RISCVISD::VWMULSU_VL", SDT_RISCVVWBinOp_VL>;
+def riscv_vwadd_vl : SDNode<"RISCVISD::VWADD_VL", SDT_RISCVVWBinOp_VL, [SDNPCommutative]>;
def riscv_vwaddu_vl : SDNode<"RISCVISD::VWADDU_VL", SDT_RISCVVWBinOp_VL, [SDNPCommutative]>;
+def riscv_vwsub_vl : SDNode<"RISCVISD::VWSUB_VL", SDT_RISCVVWBinOp_VL, [SDNPCommutative]>;
+def riscv_vwsubu_vl : SDNode<"RISCVISD::VWSUBU_VL", SDT_RISCVVWBinOp_VL, [SDNPCommutative]>;
+
+def SDT_RISCVVWBinOpW_VL : SDTypeProfile<1, 4, [SDTCisVec<0>,
+ SDTCisSameAs<0, 1>,
+ SDTCisSameNumEltsAs<1, 2>,
+ SDTCisOpSmallerThanOp<2, 1>,
+ SDTCisSameNumEltsAs<1, 3>,
+ SDTCVecEltisVT<3, i1>,
+ SDTCisVT<4, XLenVT>]>;
+def riscv_vwadd_w_vl : SDNode<"RISCVISD::VWADD_W_VL", SDT_RISCVVWBinOpW_VL>;
+def riscv_vwaddu_w_vl : SDNode<"RISCVISD::VWADDU_W_VL", SDT_RISCVVWBinOpW_VL>;
+def riscv_vwsub_w_vl : SDNode<"RISCVISD::VWSUB_W_VL", SDT_RISCVVWBinOpW_VL>;
+def riscv_vwsubu_w_vl : SDNode<"RISCVISD::VWSUBU_W_VL", SDT_RISCVVWBinOpW_VL>;
def SDTRVVVecReduce : SDTypeProfile<1, 5, [
SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisSameAs<0, 3>,
@@ -267,32 +282,35 @@ def sew16simm5 : ComplexPattern<XLenVT, 1, "selectRVVSimm5<16>", []>;
def sew32simm5 : ComplexPattern<XLenVT, 1, "selectRVVSimm5<32>", []>;
def sew64simm5 : ComplexPattern<XLenVT, 1, "selectRVVSimm5<64>", []>;
-multiclass VPatBinaryVL_VV<SDNode vop,
- string instruction_name,
- ValueType result_type,
- ValueType op_type,
- ValueType mask_type,
- int sew,
- LMULInfo vlmul,
- VReg op_reg_class> {
+multiclass VPatBinaryVL_V<SDNode vop,
+ string instruction_name,
+ string suffix,
+ ValueType result_type,
+ ValueType op1_type,
+ ValueType op2_type,
+ ValueType mask_type,
+ int sew,
+ LMULInfo vlmul,
+ VReg op1_reg_class,
+ VReg op2_reg_class> {
def : Pat<(result_type (vop
- (op_type op_reg_class:$rs1),
- (op_type op_reg_class:$rs2),
+ (op1_type op1_reg_class:$rs1),
+ (op2_type op2_reg_class:$rs2),
(mask_type true_mask),
VLOpFrag)),
- (!cast<Instruction>(instruction_name#"_VV_"# vlmul.MX)
- op_reg_class:$rs1,
- op_reg_class:$rs2,
+ (!cast<Instruction>(instruction_name#"_"#suffix#"_"# vlmul.MX)
+ op1_reg_class:$rs1,
+ op2_reg_class:$rs2,
GPR:$vl, sew)>;
def : Pat<(result_type (vop
- (op_type op_reg_class:$rs1),
- (op_type op_reg_class:$rs2),
+ (op1_type op1_reg_class:$rs1),
+ (op2_type op2_reg_class:$rs2),
(mask_type V0),
VLOpFrag)),
- (!cast<Instruction>(instruction_name#"_VV_"# vlmul.MX#"_MASK")
+ (!cast<Instruction>(instruction_name#"_"#suffix#"_"# vlmul.MX#"_MASK")
(result_type (IMPLICIT_DEF)),
- op_reg_class:$rs1,
- op_reg_class:$rs2,
+ op1_reg_class:$rs1,
+ op2_reg_class:$rs2,
(mask_type V0), GPR:$vl, sew, TAIL_AGNOSTIC)>;
}
@@ -300,7 +318,8 @@ multiclass VPatBinaryVL_XI<SDNode vop,
string instruction_name,
string suffix,
ValueType result_type,
- ValueType vop_type,
+ ValueType vop1_type,
+ ValueType vop2_type,
ValueType mask_type,
int sew,
LMULInfo vlmul,
@@ -308,8 +327,8 @@ multiclass VPatBinaryVL_XI<SDNode vop,
ComplexPattern SplatPatKind,
DAGOperand xop_kind> {
def : Pat<(result_type (vop
- (vop_type vop_reg_class:$rs1),
- (vop_type (SplatPatKind (XLenVT xop_kind:$rs2))),
+ (vop1_type vop_reg_class:$rs1),
+ (vop2_type (SplatPatKind (XLenVT xop_kind:$rs2))),
(mask_type true_mask),
VLOpFrag)),
(!cast<Instruction>(instruction_name#_#suffix#_# vlmul.MX)
@@ -317,8 +336,8 @@ multiclass VPatBinaryVL_XI<SDNode vop,
xop_kind:$rs2,
GPR:$vl, sew)>;
def : Pat<(result_type (vop
- (vop_type vop_reg_class:$rs1),
- (vop_type (SplatPatKind (XLenVT xop_kind:$rs2))),
+ (vop1_type vop_reg_class:$rs1),
+ (vop2_type (SplatPatKind (XLenVT xop_kind:$rs2))),
(mask_type V0),
VLOpFrag)),
(!cast<Instruction>(instruction_name#_#suffix#_# vlmul.MX#"_MASK")
@@ -330,12 +349,12 @@ multiclass VPatBinaryVL_XI<SDNode vop,
multiclass VPatBinaryVL_VV_VX<SDNode vop, string instruction_name> {
foreach vti = AllIntegerVectors in {
- defm : VPatBinaryVL_VV<vop, instruction_name,
- vti.Vector, vti.Vector, vti.Mask, vti.Log2SEW,
- vti.LMul, vti.RegClass>;
+ defm : VPatBinaryVL_V<vop, instruction_name, "VV",
+ vti.Vector, vti.Vector, vti.Vector, vti.Mask,
+ vti.Log2SEW, vti.LMul, vti.RegClass, vti.RegClass>;
defm : VPatBinaryVL_XI<vop, instruction_name, "VX",
- vti.Vector, vti.Vector, vti.Mask, vti.Log2SEW,
- vti.LMul, vti.RegClass, SplatPat, GPR>;
+ vti.Vector, vti.Vector, vti.Vector, vti.Mask,
+ vti.Log2SEW, vti.LMul, vti.RegClass, SplatPat, GPR>;
}
}
@@ -344,8 +363,8 @@ multiclass VPatBinaryVL_VV_VX_VI<SDNode vop, string instruction_name,
: VPatBinaryVL_VV_VX<vop, instruction_name> {
foreach vti = AllIntegerVectors in {
defm : VPatBinaryVL_XI<vop, instruction_name, "VI",
- vti.Vector, vti.Vector, vti.Mask, vti.Log2SEW,
- vti.LMul, vti.RegClass,
+ vti.Vector, vti.Vector, vti.Vector, vti.Mask,
+ vti.Log2SEW, vti.LMul, vti.RegClass,
!cast<ComplexPattern>(SplatPat#_#ImmType),
ImmType>;
}
@@ -355,12 +374,26 @@ multiclass VPatBinaryWVL_VV_VX<SDNode vop, string instruction_name> {
foreach VtiToWti = AllWidenableIntVectors in {
defvar vti = VtiToWti.Vti;
defvar wti = VtiToWti.Wti;
- defm : VPatBinaryVL_VV<vop, instruction_name,
- wti.Vector, vti.Vector, vti.Mask, vti.Log2SEW,
- vti.LMul, vti.RegClass>;
+ defm : VPatBinaryVL_V<vop, instruction_name, "VV",
+ wti.Vector, vti.Vector, vti.Vector, vti.Mask,
+ vti.Log2SEW, vti.LMul, vti.RegClass, vti.RegClass>;
defm : VPatBinaryVL_XI<vop, instruction_name, "VX",
- wti.Vector, vti.Vector, vti.Mask, vti.Log2SEW,
- vti.LMul, vti.RegClass, SplatPat, GPR>;
+ wti.Vector, vti.Vector, vti.Vector, vti.Mask,
+ vti.Log2SEW, vti.LMul, vti.RegClass, SplatPat, GPR>;
+ }
+}
+multiclass VPatBinaryWVL_VV_VX_WV_WX<SDNode vop, SDNode vop_w,
+ string instruction_name>
+ : VPatBinaryWVL_VV_VX<vop, instruction_name> {
+ foreach VtiToWti = AllWidenableIntVectors in {
+ defvar vti = VtiToWti.Vti;
+ defvar wti = VtiToWti.Wti;
+ defm : VPatBinaryVL_V<vop_w, instruction_name, "WV",
+ wti.Vector, wti.Vector, vti.Vector, vti.Mask,
+ vti.Log2SEW, vti.LMul, wti.RegClass, vti.RegClass>;
+ defm : VPatBinaryVL_XI<vop_w, instruction_name, "WX",
+ wti.Vector, wti.Vector, vti.Vector, vti.Mask,
+ vti.Log2SEW, vti.LMul, wti.RegClass, SplatPat, GPR>;
}
}
@@ -394,9 +427,9 @@ multiclass VPatBinaryVL_VF<SDNode vop,
multiclass VPatBinaryFPVL_VV_VF<SDNode vop, string instruction_name> {
foreach vti = AllFloatVectors in {
- defm : VPatBinaryVL_VV<vop, instruction_name,
- vti.Vector, vti.Vector, vti.Mask, vti.Log2SEW,
- vti.LMul, vti.RegClass>;
+ defm : VPatBinaryVL_V<vop, instruction_name, "VV",
+ vti.Vector, vti.Vector, vti.Vector, vti.Mask,
+ vti.Log2SEW, vti.LMul, vti.RegClass, vti.RegClass>;
defm : VPatBinaryVL_VF<vop, instruction_name#"_V"#vti.ScalarSuffix,
vti.Vector, vti.Vector, vti.Mask, vti.Log2SEW,
vti.LMul, vti.RegClass, vti.ScalarRegClass>;
@@ -723,7 +756,10 @@ foreach vti = AllIntegerVectors in {
}
// 12.2. Vector Widening Integer Add/Subtract
-defm : VPatBinaryWVL_VV_VX<riscv_vwaddu_vl, "PseudoVWADDU">;
+defm : VPatBinaryWVL_VV_VX_WV_WX<riscv_vwadd_vl, riscv_vwadd_w_vl, "PseudoVWADD">;
+defm : VPatBinaryWVL_VV_VX_WV_WX<riscv_vwaddu_vl, riscv_vwaddu_w_vl, "PseudoVWADDU">;
+defm : VPatBinaryWVL_VV_VX_WV_WX<riscv_vwsub_vl, riscv_vwsub_w_vl, "PseudoVWSUB">;
+defm : VPatBinaryWVL_VV_VX_WV_WX<riscv_vwsubu_vl, riscv_vwsubu_w_vl, "PseudoVWSUBU">;
// 12.3. Vector Integer Extension
defm : VPatExtendSDNode_V_VL<riscv_zext_vl, "PseudoVZEXT", "VF2",
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwadd.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwadd.ll
new file mode 100644
index 0000000000000..bb7105d84667f
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwadd.ll
@@ -0,0 +1,866 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+
+define <2 x i16> @vwadd_v2i16(<2 x i8>* %x, <2 x i8>* %y) {
+; CHECK-LABEL: vwadd_v2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
+; CHECK-NEXT: vle8.v v9, (a0)
+; CHECK-NEXT: vle8.v v10, (a1)
+; CHECK-NEXT: vwadd.vv v8, v9, v10
+; CHECK-NEXT: ret
+ %a = load <2 x i8>, <2 x i8>* %x
+ %b = load <2 x i8>, <2 x i8>* %y
+ %c = sext <2 x i8> %a to <2 x i16>
+ %d = sext <2 x i8> %b to <2 x i16>
+ %e = add <2 x i16> %c, %d
+ ret <2 x i16> %e
+}
+
+define <4 x i16> @vwadd_v4i16(<4 x i8>* %x, <4 x i8>* %y) {
+; CHECK-LABEL: vwadd_v4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu
+; CHECK-NEXT: vle8.v v9, (a0)
+; CHECK-NEXT: vle8.v v10, (a1)
+; CHECK-NEXT: vwadd.vv v8, v9, v10
+; CHECK-NEXT: ret
+ %a = load <4 x i8>, <4 x i8>* %x
+ %b = load <4 x i8>, <4 x i8>* %y
+ %c = sext <4 x i8> %a to <4 x i16>
+ %d = sext <4 x i8> %b to <4 x i16>
+ %e = add <4 x i16> %c, %d
+ ret <4 x i16> %e
+}
+
+define <2 x i32> @vwadd_v2i32(<2 x i16>* %x, <2 x i16>* %y) {
+; CHECK-LABEL: vwadd_v2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
+; CHECK-NEXT: vle16.v v9, (a0)
+; CHECK-NEXT: vle16.v v10, (a1)
+; CHECK-NEXT: vwadd.vv v8, v9, v10
+; CHECK-NEXT: ret
+ %a = load <2 x i16>, <2 x i16>* %x
+ %b = load <2 x i16>, <2 x i16>* %y
+ %c = sext <2 x i16> %a to <2 x i32>
+ %d = sext <2 x i16> %b to <2 x i32>
+ %e = add <2 x i32> %c, %d
+ ret <2 x i32> %e
+}
+
+define <8 x i16> @vwadd_v8i16(<8 x i8>* %x, <8 x i8>* %y) {
+; CHECK-LABEL: vwadd_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
+; CHECK-NEXT: vle8.v v9, (a0)
+; CHECK-NEXT: vle8.v v10, (a1)
+; CHECK-NEXT: vwadd.vv v8, v9, v10
+; CHECK-NEXT: ret
+ %a = load <8 x i8>, <8 x i8>* %x
+ %b = load <8 x i8>, <8 x i8>* %y
+ %c = sext <8 x i8> %a to <8 x i16>
+ %d = sext <8 x i8> %b to <8 x i16>
+ %e = add <8 x i16> %c, %d
+ ret <8 x i16> %e
+}
+
+define <4 x i32> @vwadd_v4i32(<4 x i16>* %x, <4 x i16>* %y) {
+; CHECK-LABEL: vwadd_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
+; CHECK-NEXT: vle16.v v9, (a0)
+; CHECK-NEXT: vle16.v v10, (a1)
+; CHECK-NEXT: vwadd.vv v8, v9, v10
+; CHECK-NEXT: ret
+ %a = load <4 x i16>, <4 x i16>* %x
+ %b = load <4 x i16>, <4 x i16>* %y
+ %c = sext <4 x i16> %a to <4 x i32>
+ %d = sext <4 x i16> %b to <4 x i32>
+ %e = add <4 x i32> %c, %d
+ ret <4 x i32> %e
+}
+
+define <2 x i64> @vwadd_v2i64(<2 x i32>* %x, <2 x i32>* %y) {
+; CHECK-LABEL: vwadd_v2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
+; CHECK-NEXT: vle32.v v9, (a0)
+; CHECK-NEXT: vle32.v v10, (a1)
+; CHECK-NEXT: vwadd.vv v8, v9, v10
+; CHECK-NEXT: ret
+ %a = load <2 x i32>, <2 x i32>* %x
+ %b = load <2 x i32>, <2 x i32>* %y
+ %c = sext <2 x i32> %a to <2 x i64>
+ %d = sext <2 x i32> %b to <2 x i64>
+ %e = add <2 x i64> %c, %d
+ ret <2 x i64> %e
+}
+
+define <16 x i16> @vwadd_v16i16(<16 x i8>* %x, <16 x i8>* %y) {
+; CHECK-LABEL: vwadd_v16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu
+; CHECK-NEXT: vle8.v v10, (a0)
+; CHECK-NEXT: vle8.v v11, (a1)
+; CHECK-NEXT: vwadd.vv v8, v10, v11
+; CHECK-NEXT: ret
+ %a = load <16 x i8>, <16 x i8>* %x
+ %b = load <16 x i8>, <16 x i8>* %y
+ %c = sext <16 x i8> %a to <16 x i16>
+ %d = sext <16 x i8> %b to <16 x i16>
+ %e = add <16 x i16> %c, %d
+ ret <16 x i16> %e
+}
+
+define <8 x i32> @vwadd_v8i32(<8 x i16>* %x, <8 x i16>* %y) {
+; CHECK-LABEL: vwadd_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu
+; CHECK-NEXT: vle16.v v10, (a0)
+; CHECK-NEXT: vle16.v v11, (a1)
+; CHECK-NEXT: vwadd.vv v8, v10, v11
+; CHECK-NEXT: ret
+ %a = load <8 x i16>, <8 x i16>* %x
+ %b = load <8 x i16>, <8 x i16>* %y
+ %c = sext <8 x i16> %a to <8 x i32>
+ %d = sext <8 x i16> %b to <8 x i32>
+ %e = add <8 x i32> %c, %d
+ ret <8 x i32> %e
+}
+
+define <4 x i64> @vwadd_v4i64(<4 x i32>* %x, <4 x i32>* %y) {
+; CHECK-LABEL: vwadd_v4i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
+; CHECK-NEXT: vle32.v v10, (a0)
+; CHECK-NEXT: vle32.v v11, (a1)
+; CHECK-NEXT: vwadd.vv v8, v10, v11
+; CHECK-NEXT: ret
+ %a = load <4 x i32>, <4 x i32>* %x
+ %b = load <4 x i32>, <4 x i32>* %y
+ %c = sext <4 x i32> %a to <4 x i64>
+ %d = sext <4 x i32> %b to <4 x i64>
+ %e = add <4 x i64> %c, %d
+ ret <4 x i64> %e
+}
+
+define <32 x i16> @vwadd_v32i16(<32 x i8>* %x, <32 x i8>* %y) {
+; CHECK-LABEL: vwadd_v32i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a2, 32
+; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, mu
+; CHECK-NEXT: vle8.v v12, (a0)
+; CHECK-NEXT: vle8.v v14, (a1)
+; CHECK-NEXT: vwadd.vv v8, v12, v14
+; CHECK-NEXT: ret
+ %a = load <32 x i8>, <32 x i8>* %x
+ %b = load <32 x i8>, <32 x i8>* %y
+ %c = sext <32 x i8> %a to <32 x i16>
+ %d = sext <32 x i8> %b to <32 x i16>
+ %e = add <32 x i16> %c, %d
+ ret <32 x i16> %e
+}
+
+define <16 x i32> @vwadd_v16i32(<16 x i16>* %x, <16 x i16>* %y) {
+; CHECK-LABEL: vwadd_v16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu
+; CHECK-NEXT: vle16.v v12, (a0)
+; CHECK-NEXT: vle16.v v14, (a1)
+; CHECK-NEXT: vwadd.vv v8, v12, v14
+; CHECK-NEXT: ret
+ %a = load <16 x i16>, <16 x i16>* %x
+ %b = load <16 x i16>, <16 x i16>* %y
+ %c = sext <16 x i16> %a to <16 x i32>
+ %d = sext <16 x i16> %b to <16 x i32>
+ %e = add <16 x i32> %c, %d
+ ret <16 x i32> %e
+}
+
+define <8 x i64> @vwadd_v8i64(<8 x i32>* %x, <8 x i32>* %y) {
+; CHECK-LABEL: vwadd_v8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu
+; CHECK-NEXT: vle32.v v12, (a0)
+; CHECK-NEXT: vle32.v v14, (a1)
+; CHECK-NEXT: vwadd.vv v8, v12, v14
+; CHECK-NEXT: ret
+ %a = load <8 x i32>, <8 x i32>* %x
+ %b = load <8 x i32>, <8 x i32>* %y
+ %c = sext <8 x i32> %a to <8 x i64>
+ %d = sext <8 x i32> %b to <8 x i64>
+ %e = add <8 x i64> %c, %d
+ ret <8 x i64> %e
+}
+
+define <64 x i16> @vwadd_v64i16(<64 x i8>* %x, <64 x i8>* %y) {
+; CHECK-LABEL: vwadd_v64i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a2, 64
+; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, mu
+; CHECK-NEXT: vle8.v v16, (a0)
+; CHECK-NEXT: vle8.v v20, (a1)
+; CHECK-NEXT: vwadd.vv v8, v16, v20
+; CHECK-NEXT: ret
+ %a = load <64 x i8>, <64 x i8>* %x
+ %b = load <64 x i8>, <64 x i8>* %y
+ %c = sext <64 x i8> %a to <64 x i16>
+ %d = sext <64 x i8> %b to <64 x i16>
+ %e = add <64 x i16> %c, %d
+ ret <64 x i16> %e
+}
+
+define <32 x i32> @vwadd_v32i32(<32 x i16>* %x, <32 x i16>* %y) {
+; CHECK-LABEL: vwadd_v32i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a2, 32
+; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, mu
+; CHECK-NEXT: vle16.v v16, (a0)
+; CHECK-NEXT: vle16.v v20, (a1)
+; CHECK-NEXT: vwadd.vv v8, v16, v20
+; CHECK-NEXT: ret
+ %a = load <32 x i16>, <32 x i16>* %x
+ %b = load <32 x i16>, <32 x i16>* %y
+ %c = sext <32 x i16> %a to <32 x i32>
+ %d = sext <32 x i16> %b to <32 x i32>
+ %e = add <32 x i32> %c, %d
+ ret <32 x i32> %e
+}
+
+define <16 x i64> @vwadd_v16i64(<16 x i32>* %x, <16 x i32>* %y) {
+; CHECK-LABEL: vwadd_v16i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu
+; CHECK-NEXT: vle32.v v16, (a0)
+; CHECK-NEXT: vle32.v v20, (a1)
+; CHECK-NEXT: vwadd.vv v8, v16, v20
+; CHECK-NEXT: ret
+ %a = load <16 x i32>, <16 x i32>* %x
+ %b = load <16 x i32>, <16 x i32>* %y
+ %c = sext <16 x i32> %a to <16 x i64>
+ %d = sext <16 x i32> %b to <16 x i64>
+ %e = add <16 x i64> %c, %d
+ ret <16 x i64> %e
+}
+
+define <128 x i16> @vwadd_v128i16(<128 x i8>* %x, <128 x i8>* %y) nounwind {
+; CHECK-LABEL: vwadd_v128i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 3
+; CHECK-NEXT: sub sp, sp, a2
+; CHECK-NEXT: li a2, 128
+; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, mu
+; CHECK-NEXT: vle8.v v16, (a0)
+; CHECK-NEXT: vle8.v v24, (a1)
+; CHECK-NEXT: li a0, 64
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu
+; CHECK-NEXT: vslidedown.vx v8, v16, a0
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: vslidedown.vx v0, v24, a0
+; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu
+; CHECK-NEXT: vwadd.vv v8, v16, v24
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vwadd.vv v16, v24, v0
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %a = load <128 x i8>, <128 x i8>* %x
+ %b = load <128 x i8>, <128 x i8>* %y
+ %c = sext <128 x i8> %a to <128 x i16>
+ %d = sext <128 x i8> %b to <128 x i16>
+ %e = add <128 x i16> %c, %d
+ ret <128 x i16> %e
+}
+
+define <64 x i32> @vwadd_v64i32(<64 x i16>* %x, <64 x i16>* %y) nounwind {
+; CHECK-LABEL: vwadd_v64i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 3
+; CHECK-NEXT: sub sp, sp, a2
+; CHECK-NEXT: li a2, 64
+; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, mu
+; CHECK-NEXT: vle16.v v16, (a0)
+; CHECK-NEXT: vle16.v v24, (a1)
+; CHECK-NEXT: li a0, 32
+; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu
+; CHECK-NEXT: vslidedown.vx v8, v16, a0
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: vslidedown.vx v0, v24, a0
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT: vwadd.vv v8, v16, v24
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vwadd.vv v16, v24, v0
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %a = load <64 x i16>, <64 x i16>* %x
+ %b = load <64 x i16>, <64 x i16>* %y
+ %c = sext <64 x i16> %a to <64 x i32>
+ %d = sext <64 x i16> %b to <64 x i32>
+ %e = add <64 x i32> %c, %d
+ ret <64 x i32> %e
+}
+
+define <32 x i64> @vwadd_v32i64(<32 x i32>* %x, <32 x i32>* %y) nounwind {
+; CHECK-LABEL: vwadd_v32i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 3
+; CHECK-NEXT: sub sp, sp, a2
+; CHECK-NEXT: li a2, 32
+; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, mu
+; CHECK-NEXT: vle32.v v16, (a0)
+; CHECK-NEXT: vle32.v v24, (a1)
+; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, mu
+; CHECK-NEXT: vslidedown.vi v8, v16, 16
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vslidedown.vi v0, v24, 16
+; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu
+; CHECK-NEXT: vwadd.vv v8, v16, v24
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vwadd.vv v16, v24, v0
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %a = load <32 x i32>, <32 x i32>* %x
+ %b = load <32 x i32>, <32 x i32>* %y
+ %c = sext <32 x i32> %a to <32 x i64>
+ %d = sext <32 x i32> %b to <32 x i64>
+ %e = add <32 x i64> %c, %d
+ ret <32 x i64> %e
+}
+
+define <2 x i32> @vwadd_v2i32_v2i8(<2 x i8>* %x, <2 x i8>* %y) {
+; CHECK-LABEL: vwadd_v2i32_v2i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
+; CHECK-NEXT: vle8.v v8, (a1)
+; CHECK-NEXT: vle8.v v9, (a0)
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
+; CHECK-NEXT: vsext.vf2 v10, v8
+; CHECK-NEXT: vsext.vf2 v11, v9
+; CHECK-NEXT: vwadd.vv v8, v11, v10
+; CHECK-NEXT: ret
+ %a = load <2 x i8>, <2 x i8>* %x
+ %b = load <2 x i8>, <2 x i8>* %y
+ %c = sext <2 x i8> %a to <2 x i32>
+ %d = sext <2 x i8> %b to <2 x i32>
+ %e = add <2 x i32> %c, %d
+ ret <2 x i32> %e
+}
+
+define <4 x i32> @vwadd_v4i32_v4i8_v4i16(<4 x i8>* %x, <4 x i16>* %y) {
+; CHECK-LABEL: vwadd_v4i32_v4i8_v4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu
+; CHECK-NEXT: vle8.v v8, (a0)
+; CHECK-NEXT: vle16.v v9, (a1)
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
+; CHECK-NEXT: vsext.vf2 v10, v8
+; CHECK-NEXT: vwadd.vv v8, v10, v9
+; CHECK-NEXT: ret
+ %a = load <4 x i8>, <4 x i8>* %x
+ %b = load <4 x i16>, <4 x i16>* %y
+ %c = sext <4 x i8> %a to <4 x i32>
+ %d = sext <4 x i16> %b to <4 x i32>
+ %e = add <4 x i32> %c, %d
+ ret <4 x i32> %e
+}
+
+define <4 x i64> @vwadd_v4i64_v4i32_v4i8(<4 x i32>* %x, <4 x i8>* %y) {
+; CHECK-LABEL: vwadd_v4i64_v4i32_v4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
+; CHECK-NEXT: vle8.v v8, (a1)
+; CHECK-NEXT: vle32.v v10, (a0)
+; CHECK-NEXT: vsext.vf4 v11, v8
+; CHECK-NEXT: vwadd.vv v8, v10, v11
+; CHECK-NEXT: ret
+ %a = load <4 x i32>, <4 x i32>* %x
+ %b = load <4 x i8>, <4 x i8>* %y
+ %c = sext <4 x i32> %a to <4 x i64>
+ %d = sext <4 x i8> %b to <4 x i64>
+ %e = add <4 x i64> %c, %d
+ ret <4 x i64> %e
+}
+
+define <2 x i16> @vwadd_vx_v2i16(<2 x i8>* %x, i8 %y) {
+; CHECK-LABEL: vwadd_vx_v2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
+; CHECK-NEXT: vle8.v v9, (a0)
+; CHECK-NEXT: vwadd.vx v8, v9, a1
+; CHECK-NEXT: ret
+ %a = load <2 x i8>, <2 x i8>* %x
+ %b = insertelement <2 x i8> undef, i8 %y, i32 0
+ %c = shufflevector <2 x i8> %b, <2 x i8> undef, <2 x i32> zeroinitializer
+ %d = sext <2 x i8> %a to <2 x i16>
+ %e = sext <2 x i8> %c to <2 x i16>
+ %f = add <2 x i16> %d, %e
+ ret <2 x i16> %f
+}
+
+define <4 x i16> @vwadd_vx_v4i16(<4 x i8>* %x, i8 %y) {
+; CHECK-LABEL: vwadd_vx_v4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu
+; CHECK-NEXT: vle8.v v9, (a0)
+; CHECK-NEXT: vwadd.vx v8, v9, a1
+; CHECK-NEXT: ret
+ %a = load <4 x i8>, <4 x i8>* %x
+ %b = insertelement <4 x i8> undef, i8 %y, i32 0
+ %c = shufflevector <4 x i8> %b, <4 x i8> undef, <4 x i32> zeroinitializer
+ %d = sext <4 x i8> %a to <4 x i16>
+ %e = sext <4 x i8> %c to <4 x i16>
+ %f = add <4 x i16> %d, %e
+ ret <4 x i16> %f
+}
+
+define <2 x i32> @vwadd_vx_v2i32(<2 x i16>* %x, i16 %y) {
+; CHECK-LABEL: vwadd_vx_v2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
+; CHECK-NEXT: vle16.v v9, (a0)
+; CHECK-NEXT: vwadd.vx v8, v9, a1
+; CHECK-NEXT: ret
+ %a = load <2 x i16>, <2 x i16>* %x
+ %b = insertelement <2 x i16> undef, i16 %y, i32 0
+ %c = shufflevector <2 x i16> %b, <2 x i16> undef, <2 x i32> zeroinitializer
+ %d = sext <2 x i16> %a to <2 x i32>
+ %e = sext <2 x i16> %c to <2 x i32>
+ %f = add <2 x i32> %d, %e
+ ret <2 x i32> %f
+}
+
+define <8 x i16> @vwadd_vx_v8i16(<8 x i8>* %x, i8 %y) {
+; CHECK-LABEL: vwadd_vx_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
+; CHECK-NEXT: vle8.v v9, (a0)
+; CHECK-NEXT: vwadd.vx v8, v9, a1
+; CHECK-NEXT: ret
+ %a = load <8 x i8>, <8 x i8>* %x
+ %b = insertelement <8 x i8> undef, i8 %y, i32 0
+ %c = shufflevector <8 x i8> %b, <8 x i8> undef, <8 x i32> zeroinitializer
+ %d = sext <8 x i8> %a to <8 x i16>
+ %e = sext <8 x i8> %c to <8 x i16>
+ %f = add <8 x i16> %d, %e
+ ret <8 x i16> %f
+}
+
+define <4 x i32> @vwadd_vx_v4i32(<4 x i16>* %x, i16 %y) {
+; CHECK-LABEL: vwadd_vx_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
+; CHECK-NEXT: vle16.v v9, (a0)
+; CHECK-NEXT: vwadd.vx v8, v9, a1
+; CHECK-NEXT: ret
+ %a = load <4 x i16>, <4 x i16>* %x
+ %b = insertelement <4 x i16> undef, i16 %y, i32 0
+ %c = shufflevector <4 x i16> %b, <4 x i16> undef, <4 x i32> zeroinitializer
+ %d = sext <4 x i16> %a to <4 x i32>
+ %e = sext <4 x i16> %c to <4 x i32>
+ %f = add <4 x i32> %d, %e
+ ret <4 x i32> %f
+}
+
+define <2 x i64> @vwadd_vx_v2i64(<2 x i32>* %x, i32 %y) {
+; CHECK-LABEL: vwadd_vx_v2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
+; CHECK-NEXT: vle32.v v9, (a0)
+; CHECK-NEXT: vwadd.vx v8, v9, a1
+; CHECK-NEXT: ret
+ %a = load <2 x i32>, <2 x i32>* %x
+ %b = insertelement <2 x i32> undef, i32 %y, i64 0
+ %c = shufflevector <2 x i32> %b, <2 x i32> undef, <2 x i32> zeroinitializer
+ %d = sext <2 x i32> %a to <2 x i64>
+ %e = sext <2 x i32> %c to <2 x i64>
+ %f = add <2 x i64> %d, %e
+ ret <2 x i64> %f
+}
+
+define <16 x i16> @vwadd_vx_v16i16(<16 x i8>* %x, i8 %y) {
+; CHECK-LABEL: vwadd_vx_v16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu
+; CHECK-NEXT: vle8.v v10, (a0)
+; CHECK-NEXT: vwadd.vx v8, v10, a1
+; CHECK-NEXT: ret
+ %a = load <16 x i8>, <16 x i8>* %x
+ %b = insertelement <16 x i8> undef, i8 %y, i32 0
+ %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
+ %d = sext <16 x i8> %a to <16 x i16>
+ %e = sext <16 x i8> %c to <16 x i16>
+ %f = add <16 x i16> %d, %e
+ ret <16 x i16> %f
+}
+
+define <8 x i32> @vwadd_vx_v8i32(<8 x i16>* %x, i16 %y) {
+; CHECK-LABEL: vwadd_vx_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu
+; CHECK-NEXT: vle16.v v10, (a0)
+; CHECK-NEXT: vwadd.vx v8, v10, a1
+; CHECK-NEXT: ret
+ %a = load <8 x i16>, <8 x i16>* %x
+ %b = insertelement <8 x i16> undef, i16 %y, i32 0
+ %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
+ %d = sext <8 x i16> %a to <8 x i32>
+ %e = sext <8 x i16> %c to <8 x i32>
+ %f = add <8 x i32> %d, %e
+ ret <8 x i32> %f
+}
+
+define <4 x i64> @vwadd_vx_v4i64(<4 x i32>* %x, i32 %y) {
+; CHECK-LABEL: vwadd_vx_v4i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
+; CHECK-NEXT: vle32.v v10, (a0)
+; CHECK-NEXT: vwadd.vx v8, v10, a1
+; CHECK-NEXT: ret
+ %a = load <4 x i32>, <4 x i32>* %x
+ %b = insertelement <4 x i32> undef, i32 %y, i64 0
+ %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
+ %d = sext <4 x i32> %a to <4 x i64>
+ %e = sext <4 x i32> %c to <4 x i64>
+ %f = add <4 x i64> %d, %e
+ ret <4 x i64> %f
+}
+
+define <32 x i16> @vwadd_vx_v32i16(<32 x i8>* %x, i8 %y) {
+; CHECK-LABEL: vwadd_vx_v32i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a2, 32
+; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, mu
+; CHECK-NEXT: vle8.v v12, (a0)
+; CHECK-NEXT: vwadd.vx v8, v12, a1
+; CHECK-NEXT: ret
+ %a = load <32 x i8>, <32 x i8>* %x
+ %b = insertelement <32 x i8> undef, i8 %y, i32 0
+ %c = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer
+ %d = sext <32 x i8> %a to <32 x i16>
+ %e = sext <32 x i8> %c to <32 x i16>
+ %f = add <32 x i16> %d, %e
+ ret <32 x i16> %f
+}
+
+define <16 x i32> @vwadd_vx_v16i32(<16 x i16>* %x, i16 %y) {
+; CHECK-LABEL: vwadd_vx_v16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu
+; CHECK-NEXT: vle16.v v12, (a0)
+; CHECK-NEXT: vwadd.vx v8, v12, a1
+; CHECK-NEXT: ret
+ %a = load <16 x i16>, <16 x i16>* %x
+ %b = insertelement <16 x i16> undef, i16 %y, i32 0
+ %c = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer
+ %d = sext <16 x i16> %a to <16 x i32>
+ %e = sext <16 x i16> %c to <16 x i32>
+ %f = add <16 x i32> %d, %e
+ ret <16 x i32> %f
+}
+
+define <8 x i64> @vwadd_vx_v8i64(<8 x i32>* %x, i32 %y) {
+; CHECK-LABEL: vwadd_vx_v8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu
+; CHECK-NEXT: vle32.v v12, (a0)
+; CHECK-NEXT: vwadd.vx v8, v12, a1
+; CHECK-NEXT: ret
+ %a = load <8 x i32>, <8 x i32>* %x
+ %b = insertelement <8 x i32> undef, i32 %y, i64 0
+ %c = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> zeroinitializer
+ %d = sext <8 x i32> %a to <8 x i64>
+ %e = sext <8 x i32> %c to <8 x i64>
+ %f = add <8 x i64> %d, %e
+ ret <8 x i64> %f
+}
+
+define <64 x i16> @vwadd_vx_v64i16(<64 x i8>* %x, i8 %y) {
+; CHECK-LABEL: vwadd_vx_v64i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a2, 64
+; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, mu
+; CHECK-NEXT: vle8.v v16, (a0)
+; CHECK-NEXT: vwadd.vx v8, v16, a1
+; CHECK-NEXT: ret
+ %a = load <64 x i8>, <64 x i8>* %x
+ %b = insertelement <64 x i8> undef, i8 %y, i32 0
+ %c = shufflevector <64 x i8> %b, <64 x i8> undef, <64 x i32> zeroinitializer
+ %d = sext <64 x i8> %a to <64 x i16>
+ %e = sext <64 x i8> %c to <64 x i16>
+ %f = add <64 x i16> %d, %e
+ ret <64 x i16> %f
+}
+
+define <32 x i32> @vwadd_vx_v32i32(<32 x i16>* %x, i16 %y) {
+; CHECK-LABEL: vwadd_vx_v32i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a2, 32
+; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, mu
+; CHECK-NEXT: vle16.v v16, (a0)
+; CHECK-NEXT: vwadd.vx v8, v16, a1
+; CHECK-NEXT: ret
+ %a = load <32 x i16>, <32 x i16>* %x
+ %b = insertelement <32 x i16> undef, i16 %y, i32 0
+ %c = shufflevector <32 x i16> %b, <32 x i16> undef, <32 x i32> zeroinitializer
+ %d = sext <32 x i16> %a to <32 x i32>
+ %e = sext <32 x i16> %c to <32 x i32>
+ %f = add <32 x i32> %d, %e
+ ret <32 x i32> %f
+}
+
+define <16 x i64> @vwadd_vx_v16i64(<16 x i32>* %x, i32 %y) {
+; CHECK-LABEL: vwadd_vx_v16i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu
+; CHECK-NEXT: vle32.v v16, (a0)
+; CHECK-NEXT: vwadd.vx v8, v16, a1
+; CHECK-NEXT: ret
+ %a = load <16 x i32>, <16 x i32>* %x
+ %b = insertelement <16 x i32> undef, i32 %y, i64 0
+ %c = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer
+ %d = sext <16 x i32> %a to <16 x i64>
+ %e = sext <16 x i32> %c to <16 x i64>
+ %f = add <16 x i64> %d, %e
+ ret <16 x i64> %f
+}
+
+define <8 x i16> @vwadd_vx_v8i16_i8(<8 x i8>* %x, i8* %y) {
+; CHECK-LABEL: vwadd_vx_v8i16_i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
+; CHECK-NEXT: vle8.v v9, (a0)
+; CHECK-NEXT: lb a0, 0(a1)
+; CHECK-NEXT: vwadd.vx v8, v9, a0
+; CHECK-NEXT: ret
+ %a = load <8 x i8>, <8 x i8>* %x
+ %b = load i8, i8* %y
+ %c = sext i8 %b to i16
+ %d = insertelement <8 x i16> undef, i16 %c, i32 0
+ %e = shufflevector <8 x i16> %d, <8 x i16> undef, <8 x i32> zeroinitializer
+ %f = sext <8 x i8> %a to <8 x i16>
+ %g = add <8 x i16> %e, %f
+ ret <8 x i16> %g
+}
+
+define <8 x i16> @vwadd_vx_v8i16_i16(<8 x i8>* %x, i16* %y) {
+; CHECK-LABEL: vwadd_vx_v8i16_i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
+; CHECK-NEXT: vle8.v v9, (a0)
+; CHECK-NEXT: vlse16.v v10, (a1), zero
+; CHECK-NEXT: vwadd.wv v8, v10, v9
+; CHECK-NEXT: ret
+ %a = load <8 x i8>, <8 x i8>* %x
+ %b = load i16, i16* %y
+ %d = insertelement <8 x i16> undef, i16 %b, i32 0
+ %e = shufflevector <8 x i16> %d, <8 x i16> undef, <8 x i32> zeroinitializer
+ %f = sext <8 x i8> %a to <8 x i16>
+ %g = add <8 x i16> %e, %f
+ ret <8 x i16> %g
+}
+
+define <4 x i32> @vwadd_vx_v4i32_i8(<4 x i16>* %x, i8* %y) {
+; CHECK-LABEL: vwadd_vx_v4i32_i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
+; CHECK-NEXT: vle16.v v9, (a0)
+; CHECK-NEXT: lb a0, 0(a1)
+; CHECK-NEXT: vwadd.vx v8, v9, a0
+; CHECK-NEXT: ret
+ %a = load <4 x i16>, <4 x i16>* %x
+ %b = load i8, i8* %y
+ %c = sext i8 %b to i32
+ %d = insertelement <4 x i32> undef, i32 %c, i32 0
+ %e = shufflevector <4 x i32> %d, <4 x i32> undef, <4 x i32> zeroinitializer
+ %f = sext <4 x i16> %a to <4 x i32>
+ %g = add <4 x i32> %e, %f
+ ret <4 x i32> %g
+}
+
+define <4 x i32> @vwadd_vx_v4i32_i16(<4 x i16>* %x, i16* %y) {
+; CHECK-LABEL: vwadd_vx_v4i32_i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
+; CHECK-NEXT: vle16.v v9, (a0)
+; CHECK-NEXT: lh a0, 0(a1)
+; CHECK-NEXT: vwadd.vx v8, v9, a0
+; CHECK-NEXT: ret
+ %a = load <4 x i16>, <4 x i16>* %x
+ %b = load i16, i16* %y
+ %c = sext i16 %b to i32
+ %d = insertelement <4 x i32> undef, i32 %c, i32 0
+ %e = shufflevector <4 x i32> %d, <4 x i32> undef, <4 x i32> zeroinitializer
+ %f = sext <4 x i16> %a to <4 x i32>
+ %g = add <4 x i32> %e, %f
+ ret <4 x i32> %g
+}
+
+define <4 x i32> @vwadd_vx_v4i32_i32(<4 x i16>* %x, i32* %y) {
+; CHECK-LABEL: vwadd_vx_v4i32_i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
+; CHECK-NEXT: vle16.v v9, (a0)
+; CHECK-NEXT: vlse32.v v10, (a1), zero
+; CHECK-NEXT: vwadd.wv v8, v10, v9
+; CHECK-NEXT: ret
+ %a = load <4 x i16>, <4 x i16>* %x
+ %b = load i32, i32* %y
+ %d = insertelement <4 x i32> undef, i32 %b, i32 0
+ %e = shufflevector <4 x i32> %d, <4 x i32> undef, <4 x i32> zeroinitializer
+ %f = sext <4 x i16> %a to <4 x i32>
+ %g = add <4 x i32> %e, %f
+ ret <4 x i32> %g
+}
+
+define <2 x i64> @vwadd_vx_v2i64_i8(<2 x i32>* %x, i8* %y) nounwind {
+; RV32-LABEL: vwadd_vx_v2i64_i8:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
+; RV32-NEXT: lb a1, 0(a1)
+; RV32-NEXT: vle32.v v9, (a0)
+; RV32-NEXT: srai a0, a1, 31
+; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw a0, 12(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v10, (a0), zero
+; RV32-NEXT: vwadd.wv v8, v10, v9
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vwadd_vx_v2i64_i8:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
+; RV64-NEXT: vle32.v v9, (a0)
+; RV64-NEXT: lb a0, 0(a1)
+; RV64-NEXT: vwadd.vx v8, v9, a0
+; RV64-NEXT: ret
+ %a = load <2 x i32>, <2 x i32>* %x
+ %b = load i8, i8* %y
+ %c = sext i8 %b to i64
+ %d = insertelement <2 x i64> undef, i64 %c, i64 0
+ %e = shufflevector <2 x i64> %d, <2 x i64> undef, <2 x i32> zeroinitializer
+ %f = sext <2 x i32> %a to <2 x i64>
+ %g = add <2 x i64> %e, %f
+ ret <2 x i64> %g
+}
+
+define <2 x i64> @vwadd_vx_v2i64_i16(<2 x i32>* %x, i16* %y) nounwind {
+; RV32-LABEL: vwadd_vx_v2i64_i16:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
+; RV32-NEXT: lh a1, 0(a1)
+; RV32-NEXT: vle32.v v9, (a0)
+; RV32-NEXT: srai a0, a1, 31
+; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw a0, 12(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v10, (a0), zero
+; RV32-NEXT: vwadd.wv v8, v10, v9
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vwadd_vx_v2i64_i16:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
+; RV64-NEXT: vle32.v v9, (a0)
+; RV64-NEXT: lh a0, 0(a1)
+; RV64-NEXT: vwadd.vx v8, v9, a0
+; RV64-NEXT: ret
+ %a = load <2 x i32>, <2 x i32>* %x
+ %b = load i16, i16* %y
+ %c = sext i16 %b to i64
+ %d = insertelement <2 x i64> undef, i64 %c, i64 0
+ %e = shufflevector <2 x i64> %d, <2 x i64> undef, <2 x i32> zeroinitializer
+ %f = sext <2 x i32> %a to <2 x i64>
+ %g = add <2 x i64> %e, %f
+ ret <2 x i64> %g
+}
+
+define <2 x i64> @vwadd_vx_v2i64_i32(<2 x i32>* %x, i32* %y) nounwind {
+; RV32-LABEL: vwadd_vx_v2i64_i32:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
+; RV32-NEXT: lw a1, 0(a1)
+; RV32-NEXT: vle32.v v9, (a0)
+; RV32-NEXT: srai a0, a1, 31
+; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw a0, 12(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v10, (a0), zero
+; RV32-NEXT: vwadd.wv v8, v10, v9
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vwadd_vx_v2i64_i32:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
+; RV64-NEXT: vle32.v v9, (a0)
+; RV64-NEXT: lw a0, 0(a1)
+; RV64-NEXT: vwadd.vx v8, v9, a0
+; RV64-NEXT: ret
+ %a = load <2 x i32>, <2 x i32>* %x
+ %b = load i32, i32* %y
+ %c = sext i32 %b to i64
+ %d = insertelement <2 x i64> undef, i64 %c, i64 0
+ %e = shufflevector <2 x i64> %d, <2 x i64> undef, <2 x i32> zeroinitializer
+ %f = sext <2 x i32> %a to <2 x i64>
+ %g = add <2 x i64> %e, %f
+ ret <2 x i64> %g
+}
+
+define <2 x i64> @vwadd_vx_v2i64_i64(<2 x i32>* %x, i64* %y) nounwind {
+; RV32-LABEL: vwadd_vx_v2i64_i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
+; RV32-NEXT: lw a2, 4(a1)
+; RV32-NEXT: lw a1, 0(a1)
+; RV32-NEXT: vle32.v v9, (a0)
+; RV32-NEXT: sw a2, 12(sp)
+; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v10, (a0), zero
+; RV32-NEXT: vwadd.wv v8, v10, v9
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vwadd_vx_v2i64_i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
+; RV64-NEXT: vle32.v v9, (a0)
+; RV64-NEXT: vlse64.v v10, (a1), zero
+; RV64-NEXT: vwadd.wv v8, v10, v9
+; RV64-NEXT: ret
+ %a = load <2 x i32>, <2 x i32>* %x
+ %b = load i64, i64* %y
+ %d = insertelement <2 x i64> undef, i64 %b, i64 0
+ %e = shufflevector <2 x i64> %d, <2 x i64> undef, <2 x i32> zeroinitializer
+ %f = sext <2 x i32> %a to <2 x i64>
+ %g = add <2 x i64> %e, %f
+ ret <2 x i64> %g
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwaddu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwaddu.ll
new file mode 100644
index 0000000000000..d2d2a035c0364
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwaddu.ll
@@ -0,0 +1,863 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+
+define <2 x i16> @vwaddu_v2i16(<2 x i8>* %x, <2 x i8>* %y) {
+; CHECK-LABEL: vwaddu_v2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
+; CHECK-NEXT: vle8.v v9, (a0)
+; CHECK-NEXT: vle8.v v10, (a1)
+; CHECK-NEXT: vwaddu.vv v8, v9, v10
+; CHECK-NEXT: ret
+ %a = load <2 x i8>, <2 x i8>* %x
+ %b = load <2 x i8>, <2 x i8>* %y
+ %c = zext <2 x i8> %a to <2 x i16>
+ %d = zext <2 x i8> %b to <2 x i16>
+ %e = add <2 x i16> %c, %d
+ ret <2 x i16> %e
+}
+
+define <4 x i16> @vwaddu_v4i16(<4 x i8>* %x, <4 x i8>* %y) {
+; CHECK-LABEL: vwaddu_v4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu
+; CHECK-NEXT: vle8.v v9, (a0)
+; CHECK-NEXT: vle8.v v10, (a1)
+; CHECK-NEXT: vwaddu.vv v8, v9, v10
+; CHECK-NEXT: ret
+ %a = load <4 x i8>, <4 x i8>* %x
+ %b = load <4 x i8>, <4 x i8>* %y
+ %c = zext <4 x i8> %a to <4 x i16>
+ %d = zext <4 x i8> %b to <4 x i16>
+ %e = add <4 x i16> %c, %d
+ ret <4 x i16> %e
+}
+
+define <2 x i32> @vwaddu_v2i32(<2 x i16>* %x, <2 x i16>* %y) {
+; CHECK-LABEL: vwaddu_v2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
+; CHECK-NEXT: vle16.v v9, (a0)
+; CHECK-NEXT: vle16.v v10, (a1)
+; CHECK-NEXT: vwaddu.vv v8, v9, v10
+; CHECK-NEXT: ret
+ %a = load <2 x i16>, <2 x i16>* %x
+ %b = load <2 x i16>, <2 x i16>* %y
+ %c = zext <2 x i16> %a to <2 x i32>
+ %d = zext <2 x i16> %b to <2 x i32>
+ %e = add <2 x i32> %c, %d
+ ret <2 x i32> %e
+}
+
+define <8 x i16> @vwaddu_v8i16(<8 x i8>* %x, <8 x i8>* %y) {
+; CHECK-LABEL: vwaddu_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
+; CHECK-NEXT: vle8.v v9, (a0)
+; CHECK-NEXT: vle8.v v10, (a1)
+; CHECK-NEXT: vwaddu.vv v8, v9, v10
+; CHECK-NEXT: ret
+ %a = load <8 x i8>, <8 x i8>* %x
+ %b = load <8 x i8>, <8 x i8>* %y
+ %c = zext <8 x i8> %a to <8 x i16>
+ %d = zext <8 x i8> %b to <8 x i16>
+ %e = add <8 x i16> %c, %d
+ ret <8 x i16> %e
+}
+
+define <4 x i32> @vwaddu_v4i32(<4 x i16>* %x, <4 x i16>* %y) {
+; CHECK-LABEL: vwaddu_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
+; CHECK-NEXT: vle16.v v9, (a0)
+; CHECK-NEXT: vle16.v v10, (a1)
+; CHECK-NEXT: vwaddu.vv v8, v9, v10
+; CHECK-NEXT: ret
+ %a = load <4 x i16>, <4 x i16>* %x
+ %b = load <4 x i16>, <4 x i16>* %y
+ %c = zext <4 x i16> %a to <4 x i32>
+ %d = zext <4 x i16> %b to <4 x i32>
+ %e = add <4 x i32> %c, %d
+ ret <4 x i32> %e
+}
+
+define <2 x i64> @vwaddu_v2i64(<2 x i32>* %x, <2 x i32>* %y) {
+; CHECK-LABEL: vwaddu_v2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
+; CHECK-NEXT: vle32.v v9, (a0)
+; CHECK-NEXT: vle32.v v10, (a1)
+; CHECK-NEXT: vwaddu.vv v8, v9, v10
+; CHECK-NEXT: ret
+ %a = load <2 x i32>, <2 x i32>* %x
+ %b = load <2 x i32>, <2 x i32>* %y
+ %c = zext <2 x i32> %a to <2 x i64>
+ %d = zext <2 x i32> %b to <2 x i64>
+ %e = add <2 x i64> %c, %d
+ ret <2 x i64> %e
+}
+
+define <16 x i16> @vwaddu_v16i16(<16 x i8>* %x, <16 x i8>* %y) {
+; CHECK-LABEL: vwaddu_v16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu
+; CHECK-NEXT: vle8.v v10, (a0)
+; CHECK-NEXT: vle8.v v11, (a1)
+; CHECK-NEXT: vwaddu.vv v8, v10, v11
+; CHECK-NEXT: ret
+ %a = load <16 x i8>, <16 x i8>* %x
+ %b = load <16 x i8>, <16 x i8>* %y
+ %c = zext <16 x i8> %a to <16 x i16>
+ %d = zext <16 x i8> %b to <16 x i16>
+ %e = add <16 x i16> %c, %d
+ ret <16 x i16> %e
+}
+
+define <8 x i32> @vwaddu_v8i32(<8 x i16>* %x, <8 x i16>* %y) {
+; CHECK-LABEL: vwaddu_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu
+; CHECK-NEXT: vle16.v v10, (a0)
+; CHECK-NEXT: vle16.v v11, (a1)
+; CHECK-NEXT: vwaddu.vv v8, v10, v11
+; CHECK-NEXT: ret
+ %a = load <8 x i16>, <8 x i16>* %x
+ %b = load <8 x i16>, <8 x i16>* %y
+ %c = zext <8 x i16> %a to <8 x i32>
+ %d = zext <8 x i16> %b to <8 x i32>
+ %e = add <8 x i32> %c, %d
+ ret <8 x i32> %e
+}
+
+define <4 x i64> @vwaddu_v4i64(<4 x i32>* %x, <4 x i32>* %y) {
+; CHECK-LABEL: vwaddu_v4i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
+; CHECK-NEXT: vle32.v v10, (a0)
+; CHECK-NEXT: vle32.v v11, (a1)
+; CHECK-NEXT: vwaddu.vv v8, v10, v11
+; CHECK-NEXT: ret
+ %a = load <4 x i32>, <4 x i32>* %x
+ %b = load <4 x i32>, <4 x i32>* %y
+ %c = zext <4 x i32> %a to <4 x i64>
+ %d = zext <4 x i32> %b to <4 x i64>
+ %e = add <4 x i64> %c, %d
+ ret <4 x i64> %e
+}
+
+define <32 x i16> @vwaddu_v32i16(<32 x i8>* %x, <32 x i8>* %y) {
+; CHECK-LABEL: vwaddu_v32i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a2, 32
+; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, mu
+; CHECK-NEXT: vle8.v v12, (a0)
+; CHECK-NEXT: vle8.v v14, (a1)
+; CHECK-NEXT: vwaddu.vv v8, v12, v14
+; CHECK-NEXT: ret
+ %a = load <32 x i8>, <32 x i8>* %x
+ %b = load <32 x i8>, <32 x i8>* %y
+ %c = zext <32 x i8> %a to <32 x i16>
+ %d = zext <32 x i8> %b to <32 x i16>
+ %e = add <32 x i16> %c, %d
+ ret <32 x i16> %e
+}
+
+define <16 x i32> @vwaddu_v16i32(<16 x i16>* %x, <16 x i16>* %y) {
+; CHECK-LABEL: vwaddu_v16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu
+; CHECK-NEXT: vle16.v v12, (a0)
+; CHECK-NEXT: vle16.v v14, (a1)
+; CHECK-NEXT: vwaddu.vv v8, v12, v14
+; CHECK-NEXT: ret
+ %a = load <16 x i16>, <16 x i16>* %x
+ %b = load <16 x i16>, <16 x i16>* %y
+ %c = zext <16 x i16> %a to <16 x i32>
+ %d = zext <16 x i16> %b to <16 x i32>
+ %e = add <16 x i32> %c, %d
+ ret <16 x i32> %e
+}
+
+define <8 x i64> @vwaddu_v8i64(<8 x i32>* %x, <8 x i32>* %y) {
+; CHECK-LABEL: vwaddu_v8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu
+; CHECK-NEXT: vle32.v v12, (a0)
+; CHECK-NEXT: vle32.v v14, (a1)
+; CHECK-NEXT: vwaddu.vv v8, v12, v14
+; CHECK-NEXT: ret
+ %a = load <8 x i32>, <8 x i32>* %x
+ %b = load <8 x i32>, <8 x i32>* %y
+ %c = zext <8 x i32> %a to <8 x i64>
+ %d = zext <8 x i32> %b to <8 x i64>
+ %e = add <8 x i64> %c, %d
+ ret <8 x i64> %e
+}
+
+define <64 x i16> @vwaddu_v64i16(<64 x i8>* %x, <64 x i8>* %y) {
+; CHECK-LABEL: vwaddu_v64i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a2, 64
+; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, mu
+; CHECK-NEXT: vle8.v v16, (a0)
+; CHECK-NEXT: vle8.v v20, (a1)
+; CHECK-NEXT: vwaddu.vv v8, v16, v20
+; CHECK-NEXT: ret
+ %a = load <64 x i8>, <64 x i8>* %x
+ %b = load <64 x i8>, <64 x i8>* %y
+ %c = zext <64 x i8> %a to <64 x i16>
+ %d = zext <64 x i8> %b to <64 x i16>
+ %e = add <64 x i16> %c, %d
+ ret <64 x i16> %e
+}
+
+define <32 x i32> @vwaddu_v32i32(<32 x i16>* %x, <32 x i16>* %y) {
+; CHECK-LABEL: vwaddu_v32i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a2, 32
+; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, mu
+; CHECK-NEXT: vle16.v v16, (a0)
+; CHECK-NEXT: vle16.v v20, (a1)
+; CHECK-NEXT: vwaddu.vv v8, v16, v20
+; CHECK-NEXT: ret
+ %a = load <32 x i16>, <32 x i16>* %x
+ %b = load <32 x i16>, <32 x i16>* %y
+ %c = zext <32 x i16> %a to <32 x i32>
+ %d = zext <32 x i16> %b to <32 x i32>
+ %e = add <32 x i32> %c, %d
+ ret <32 x i32> %e
+}
+
+define <16 x i64> @vwaddu_v16i64(<16 x i32>* %x, <16 x i32>* %y) {
+; CHECK-LABEL: vwaddu_v16i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu
+; CHECK-NEXT: vle32.v v16, (a0)
+; CHECK-NEXT: vle32.v v20, (a1)
+; CHECK-NEXT: vwaddu.vv v8, v16, v20
+; CHECK-NEXT: ret
+ %a = load <16 x i32>, <16 x i32>* %x
+ %b = load <16 x i32>, <16 x i32>* %y
+ %c = zext <16 x i32> %a to <16 x i64>
+ %d = zext <16 x i32> %b to <16 x i64>
+ %e = add <16 x i64> %c, %d
+ ret <16 x i64> %e
+}
+
+define <128 x i16> @vwaddu_v128i16(<128 x i8>* %x, <128 x i8>* %y) nounwind {
+; CHECK-LABEL: vwaddu_v128i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 3
+; CHECK-NEXT: sub sp, sp, a2
+; CHECK-NEXT: li a2, 128
+; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, mu
+; CHECK-NEXT: vle8.v v16, (a0)
+; CHECK-NEXT: vle8.v v24, (a1)
+; CHECK-NEXT: li a0, 64
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu
+; CHECK-NEXT: vslidedown.vx v8, v16, a0
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: vslidedown.vx v0, v24, a0
+; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu
+; CHECK-NEXT: vwaddu.vv v8, v16, v24
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vwaddu.vv v16, v24, v0
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %a = load <128 x i8>, <128 x i8>* %x
+ %b = load <128 x i8>, <128 x i8>* %y
+ %c = zext <128 x i8> %a to <128 x i16>
+ %d = zext <128 x i8> %b to <128 x i16>
+ %e = add <128 x i16> %c, %d
+ ret <128 x i16> %e
+}
+
+define <64 x i32> @vwaddu_v64i32(<64 x i16>* %x, <64 x i16>* %y) nounwind {
+; CHECK-LABEL: vwaddu_v64i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 3
+; CHECK-NEXT: sub sp, sp, a2
+; CHECK-NEXT: li a2, 64
+; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, mu
+; CHECK-NEXT: vle16.v v16, (a0)
+; CHECK-NEXT: vle16.v v24, (a1)
+; CHECK-NEXT: li a0, 32
+; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu
+; CHECK-NEXT: vslidedown.vx v8, v16, a0
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: vslidedown.vx v0, v24, a0
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT: vwaddu.vv v8, v16, v24
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vwaddu.vv v16, v24, v0
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %a = load <64 x i16>, <64 x i16>* %x
+ %b = load <64 x i16>, <64 x i16>* %y
+ %c = zext <64 x i16> %a to <64 x i32>
+ %d = zext <64 x i16> %b to <64 x i32>
+ %e = add <64 x i32> %c, %d
+ ret <64 x i32> %e
+}
+
+define <32 x i64> @vwaddu_v32i64(<32 x i32>* %x, <32 x i32>* %y) nounwind {
+; CHECK-LABEL: vwaddu_v32i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 3
+; CHECK-NEXT: sub sp, sp, a2
+; CHECK-NEXT: li a2, 32
+; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, mu
+; CHECK-NEXT: vle32.v v16, (a0)
+; CHECK-NEXT: vle32.v v24, (a1)
+; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, mu
+; CHECK-NEXT: vslidedown.vi v8, v16, 16
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vslidedown.vi v0, v24, 16
+; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu
+; CHECK-NEXT: vwaddu.vv v8, v16, v24
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vwaddu.vv v16, v24, v0
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %a = load <32 x i32>, <32 x i32>* %x
+ %b = load <32 x i32>, <32 x i32>* %y
+ %c = zext <32 x i32> %a to <32 x i64>
+ %d = zext <32 x i32> %b to <32 x i64>
+ %e = add <32 x i64> %c, %d
+ ret <32 x i64> %e
+}
+
+define <2 x i32> @vwaddu_v2i32_v2i8(<2 x i8>* %x, <2 x i8>* %y) {
+; CHECK-LABEL: vwaddu_v2i32_v2i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
+; CHECK-NEXT: vle8.v v8, (a1)
+; CHECK-NEXT: vle8.v v9, (a0)
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
+; CHECK-NEXT: vzext.vf2 v10, v8
+; CHECK-NEXT: vzext.vf2 v11, v9
+; CHECK-NEXT: vwaddu.vv v8, v11, v10
+; CHECK-NEXT: ret
+ %a = load <2 x i8>, <2 x i8>* %x
+ %b = load <2 x i8>, <2 x i8>* %y
+ %c = zext <2 x i8> %a to <2 x i32>
+ %d = zext <2 x i8> %b to <2 x i32>
+ %e = add <2 x i32> %c, %d
+ ret <2 x i32> %e
+}
+
+define <4 x i32> @vwaddu_v4i32_v4i8_v4i16(<4 x i8>* %x, <4 x i16>* %y) {
+; CHECK-LABEL: vwaddu_v4i32_v4i8_v4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu
+; CHECK-NEXT: vle8.v v8, (a0)
+; CHECK-NEXT: vle16.v v9, (a1)
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
+; CHECK-NEXT: vzext.vf2 v10, v8
+; CHECK-NEXT: vwaddu.vv v8, v10, v9
+; CHECK-NEXT: ret
+ %a = load <4 x i8>, <4 x i8>* %x
+ %b = load <4 x i16>, <4 x i16>* %y
+ %c = zext <4 x i8> %a to <4 x i32>
+ %d = zext <4 x i16> %b to <4 x i32>
+ %e = add <4 x i32> %c, %d
+ ret <4 x i32> %e
+}
+
+define <4 x i64> @vwaddu_v4i64_v4i32_v4i8(<4 x i32>* %x, <4 x i8>* %y) {
+; CHECK-LABEL: vwaddu_v4i64_v4i32_v4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
+; CHECK-NEXT: vle8.v v8, (a1)
+; CHECK-NEXT: vle32.v v10, (a0)
+; CHECK-NEXT: vzext.vf4 v11, v8
+; CHECK-NEXT: vwaddu.vv v8, v10, v11
+; CHECK-NEXT: ret
+ %a = load <4 x i32>, <4 x i32>* %x
+ %b = load <4 x i8>, <4 x i8>* %y
+ %c = zext <4 x i32> %a to <4 x i64>
+ %d = zext <4 x i8> %b to <4 x i64>
+ %e = add <4 x i64> %c, %d
+ ret <4 x i64> %e
+}
+
+define <2 x i16> @vwaddu_vx_v2i16(<2 x i8>* %x, i8 %y) {
+; CHECK-LABEL: vwaddu_vx_v2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
+; CHECK-NEXT: vle8.v v9, (a0)
+; CHECK-NEXT: vwaddu.vx v8, v9, a1
+; CHECK-NEXT: ret
+ %a = load <2 x i8>, <2 x i8>* %x
+ %b = insertelement <2 x i8> undef, i8 %y, i32 0
+ %c = shufflevector <2 x i8> %b, <2 x i8> undef, <2 x i32> zeroinitializer
+ %d = zext <2 x i8> %a to <2 x i16>
+ %e = zext <2 x i8> %c to <2 x i16>
+ %f = add <2 x i16> %d, %e
+ ret <2 x i16> %f
+}
+
+define <4 x i16> @vwaddu_vx_v4i16(<4 x i8>* %x, i8 %y) {
+; CHECK-LABEL: vwaddu_vx_v4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu
+; CHECK-NEXT: vle8.v v9, (a0)
+; CHECK-NEXT: vwaddu.vx v8, v9, a1
+; CHECK-NEXT: ret
+ %a = load <4 x i8>, <4 x i8>* %x
+ %b = insertelement <4 x i8> undef, i8 %y, i32 0
+ %c = shufflevector <4 x i8> %b, <4 x i8> undef, <4 x i32> zeroinitializer
+ %d = zext <4 x i8> %a to <4 x i16>
+ %e = zext <4 x i8> %c to <4 x i16>
+ %f = add <4 x i16> %d, %e
+ ret <4 x i16> %f
+}
+
+define <2 x i32> @vwaddu_vx_v2i32(<2 x i16>* %x, i16 %y) {
+; CHECK-LABEL: vwaddu_vx_v2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
+; CHECK-NEXT: vle16.v v9, (a0)
+; CHECK-NEXT: vwaddu.vx v8, v9, a1
+; CHECK-NEXT: ret
+ %a = load <2 x i16>, <2 x i16>* %x
+ %b = insertelement <2 x i16> undef, i16 %y, i32 0
+ %c = shufflevector <2 x i16> %b, <2 x i16> undef, <2 x i32> zeroinitializer
+ %d = zext <2 x i16> %a to <2 x i32>
+ %e = zext <2 x i16> %c to <2 x i32>
+ %f = add <2 x i32> %d, %e
+ ret <2 x i32> %f
+}
+
+define <8 x i16> @vwaddu_vx_v8i16(<8 x i8>* %x, i8 %y) {
+; CHECK-LABEL: vwaddu_vx_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
+; CHECK-NEXT: vle8.v v9, (a0)
+; CHECK-NEXT: vwaddu.vx v8, v9, a1
+; CHECK-NEXT: ret
+ %a = load <8 x i8>, <8 x i8>* %x
+ %b = insertelement <8 x i8> undef, i8 %y, i32 0
+ %c = shufflevector <8 x i8> %b, <8 x i8> undef, <8 x i32> zeroinitializer
+ %d = zext <8 x i8> %a to <8 x i16>
+ %e = zext <8 x i8> %c to <8 x i16>
+ %f = add <8 x i16> %d, %e
+ ret <8 x i16> %f
+}
+
+define <4 x i32> @vwaddu_vx_v4i32(<4 x i16>* %x, i16 %y) {
+; CHECK-LABEL: vwaddu_vx_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
+; CHECK-NEXT: vle16.v v9, (a0)
+; CHECK-NEXT: vwaddu.vx v8, v9, a1
+; CHECK-NEXT: ret
+ %a = load <4 x i16>, <4 x i16>* %x
+ %b = insertelement <4 x i16> undef, i16 %y, i32 0
+ %c = shufflevector <4 x i16> %b, <4 x i16> undef, <4 x i32> zeroinitializer
+ %d = zext <4 x i16> %a to <4 x i32>
+ %e = zext <4 x i16> %c to <4 x i32>
+ %f = add <4 x i32> %d, %e
+ ret <4 x i32> %f
+}
+
+define <2 x i64> @vwaddu_vx_v2i64(<2 x i32>* %x, i32 %y) {
+; CHECK-LABEL: vwaddu_vx_v2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
+; CHECK-NEXT: vle32.v v9, (a0)
+; CHECK-NEXT: vwaddu.vx v8, v9, a1
+; CHECK-NEXT: ret
+ %a = load <2 x i32>, <2 x i32>* %x
+ %b = insertelement <2 x i32> undef, i32 %y, i64 0
+ %c = shufflevector <2 x i32> %b, <2 x i32> undef, <2 x i32> zeroinitializer
+ %d = zext <2 x i32> %a to <2 x i64>
+ %e = zext <2 x i32> %c to <2 x i64>
+ %f = add <2 x i64> %d, %e
+ ret <2 x i64> %f
+}
+
+define <16 x i16> @vwaddu_vx_v16i16(<16 x i8>* %x, i8 %y) {
+; CHECK-LABEL: vwaddu_vx_v16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu
+; CHECK-NEXT: vle8.v v10, (a0)
+; CHECK-NEXT: vwaddu.vx v8, v10, a1
+; CHECK-NEXT: ret
+ %a = load <16 x i8>, <16 x i8>* %x
+ %b = insertelement <16 x i8> undef, i8 %y, i32 0
+ %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
+ %d = zext <16 x i8> %a to <16 x i16>
+ %e = zext <16 x i8> %c to <16 x i16>
+ %f = add <16 x i16> %d, %e
+ ret <16 x i16> %f
+}
+
+define <8 x i32> @vwaddu_vx_v8i32(<8 x i16>* %x, i16 %y) {
+; CHECK-LABEL: vwaddu_vx_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu
+; CHECK-NEXT: vle16.v v10, (a0)
+; CHECK-NEXT: vwaddu.vx v8, v10, a1
+; CHECK-NEXT: ret
+ %a = load <8 x i16>, <8 x i16>* %x
+ %b = insertelement <8 x i16> undef, i16 %y, i32 0
+ %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
+ %d = zext <8 x i16> %a to <8 x i32>
+ %e = zext <8 x i16> %c to <8 x i32>
+ %f = add <8 x i32> %d, %e
+ ret <8 x i32> %f
+}
+
+define <4 x i64> @vwaddu_vx_v4i64(<4 x i32>* %x, i32 %y) {
+; CHECK-LABEL: vwaddu_vx_v4i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
+; CHECK-NEXT: vle32.v v10, (a0)
+; CHECK-NEXT: vwaddu.vx v8, v10, a1
+; CHECK-NEXT: ret
+ %a = load <4 x i32>, <4 x i32>* %x
+ %b = insertelement <4 x i32> undef, i32 %y, i64 0
+ %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
+ %d = zext <4 x i32> %a to <4 x i64>
+ %e = zext <4 x i32> %c to <4 x i64>
+ %f = add <4 x i64> %d, %e
+ ret <4 x i64> %f
+}
+
+define <32 x i16> @vwaddu_vx_v32i16(<32 x i8>* %x, i8 %y) {
+; CHECK-LABEL: vwaddu_vx_v32i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a2, 32
+; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, mu
+; CHECK-NEXT: vle8.v v12, (a0)
+; CHECK-NEXT: vwaddu.vx v8, v12, a1
+; CHECK-NEXT: ret
+ %a = load <32 x i8>, <32 x i8>* %x
+ %b = insertelement <32 x i8> undef, i8 %y, i32 0
+ %c = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer
+ %d = zext <32 x i8> %a to <32 x i16>
+ %e = zext <32 x i8> %c to <32 x i16>
+ %f = add <32 x i16> %d, %e
+ ret <32 x i16> %f
+}
+
+define <16 x i32> @vwaddu_vx_v16i32(<16 x i16>* %x, i16 %y) {
+; CHECK-LABEL: vwaddu_vx_v16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu
+; CHECK-NEXT: vle16.v v12, (a0)
+; CHECK-NEXT: vwaddu.vx v8, v12, a1
+; CHECK-NEXT: ret
+ %a = load <16 x i16>, <16 x i16>* %x
+ %b = insertelement <16 x i16> undef, i16 %y, i32 0
+ %c = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer
+ %d = zext <16 x i16> %a to <16 x i32>
+ %e = zext <16 x i16> %c to <16 x i32>
+ %f = add <16 x i32> %d, %e
+ ret <16 x i32> %f
+}
+
+define <8 x i64> @vwaddu_vx_v8i64(<8 x i32>* %x, i32 %y) {
+; CHECK-LABEL: vwaddu_vx_v8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu
+; CHECK-NEXT: vle32.v v12, (a0)
+; CHECK-NEXT: vwaddu.vx v8, v12, a1
+; CHECK-NEXT: ret
+ %a = load <8 x i32>, <8 x i32>* %x
+ %b = insertelement <8 x i32> undef, i32 %y, i64 0
+ %c = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> zeroinitializer
+ %d = zext <8 x i32> %a to <8 x i64>
+ %e = zext <8 x i32> %c to <8 x i64>
+ %f = add <8 x i64> %d, %e
+ ret <8 x i64> %f
+}
+
+define <64 x i16> @vwaddu_vx_v64i16(<64 x i8>* %x, i8 %y) {
+; CHECK-LABEL: vwaddu_vx_v64i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a2, 64
+; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, mu
+; CHECK-NEXT: vle8.v v16, (a0)
+; CHECK-NEXT: vwaddu.vx v8, v16, a1
+; CHECK-NEXT: ret
+ %a = load <64 x i8>, <64 x i8>* %x
+ %b = insertelement <64 x i8> undef, i8 %y, i32 0
+ %c = shufflevector <64 x i8> %b, <64 x i8> undef, <64 x i32> zeroinitializer
+ %d = zext <64 x i8> %a to <64 x i16>
+ %e = zext <64 x i8> %c to <64 x i16>
+ %f = add <64 x i16> %d, %e
+ ret <64 x i16> %f
+}
+
+define <32 x i32> @vwaddu_vx_v32i32(<32 x i16>* %x, i16 %y) {
+; CHECK-LABEL: vwaddu_vx_v32i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a2, 32
+; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, mu
+; CHECK-NEXT: vle16.v v16, (a0)
+; CHECK-NEXT: vwaddu.vx v8, v16, a1
+; CHECK-NEXT: ret
+ %a = load <32 x i16>, <32 x i16>* %x
+ %b = insertelement <32 x i16> undef, i16 %y, i32 0
+ %c = shufflevector <32 x i16> %b, <32 x i16> undef, <32 x i32> zeroinitializer
+ %d = zext <32 x i16> %a to <32 x i32>
+ %e = zext <32 x i16> %c to <32 x i32>
+ %f = add <32 x i32> %d, %e
+ ret <32 x i32> %f
+}
+
+define <16 x i64> @vwaddu_vx_v16i64(<16 x i32>* %x, i32 %y) {
+; CHECK-LABEL: vwaddu_vx_v16i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu
+; CHECK-NEXT: vle32.v v16, (a0)
+; CHECK-NEXT: vwaddu.vx v8, v16, a1
+; CHECK-NEXT: ret
+ %a = load <16 x i32>, <16 x i32>* %x
+ %b = insertelement <16 x i32> undef, i32 %y, i64 0
+ %c = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer
+ %d = zext <16 x i32> %a to <16 x i64>
+ %e = zext <16 x i32> %c to <16 x i64>
+ %f = add <16 x i64> %d, %e
+ ret <16 x i64> %f
+}
+
+define <8 x i16> @vwaddu_vx_v8i16_i8(<8 x i8>* %x, i8* %y) {
+; CHECK-LABEL: vwaddu_vx_v8i16_i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
+; CHECK-NEXT: vle8.v v9, (a0)
+; CHECK-NEXT: lbu a0, 0(a1)
+; CHECK-NEXT: vwaddu.vx v8, v9, a0
+; CHECK-NEXT: ret
+ %a = load <8 x i8>, <8 x i8>* %x
+ %b = load i8, i8* %y
+ %c = zext i8 %b to i16
+ %d = insertelement <8 x i16> undef, i16 %c, i32 0
+ %e = shufflevector <8 x i16> %d, <8 x i16> undef, <8 x i32> zeroinitializer
+ %f = zext <8 x i8> %a to <8 x i16>
+ %g = add <8 x i16> %e, %f
+ ret <8 x i16> %g
+}
+
+define <8 x i16> @vwaddu_vx_v8i16_i16(<8 x i8>* %x, i16* %y) {
+; CHECK-LABEL: vwaddu_vx_v8i16_i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
+; CHECK-NEXT: vle8.v v9, (a0)
+; CHECK-NEXT: vlse16.v v10, (a1), zero
+; CHECK-NEXT: vwaddu.wv v8, v10, v9
+; CHECK-NEXT: ret
+ %a = load <8 x i8>, <8 x i8>* %x
+ %b = load i16, i16* %y
+ %d = insertelement <8 x i16> undef, i16 %b, i32 0
+ %e = shufflevector <8 x i16> %d, <8 x i16> undef, <8 x i32> zeroinitializer
+ %f = zext <8 x i8> %a to <8 x i16>
+ %g = add <8 x i16> %e, %f
+ ret <8 x i16> %g
+}
+
+define <4 x i32> @vwaddu_vx_v4i32_i8(<4 x i16>* %x, i8* %y) {
+; CHECK-LABEL: vwaddu_vx_v4i32_i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
+; CHECK-NEXT: vle16.v v9, (a0)
+; CHECK-NEXT: lbu a0, 0(a1)
+; CHECK-NEXT: vwaddu.vx v8, v9, a0
+; CHECK-NEXT: ret
+ %a = load <4 x i16>, <4 x i16>* %x
+ %b = load i8, i8* %y
+ %c = zext i8 %b to i32
+ %d = insertelement <4 x i32> undef, i32 %c, i32 0
+ %e = shufflevector <4 x i32> %d, <4 x i32> undef, <4 x i32> zeroinitializer
+ %f = zext <4 x i16> %a to <4 x i32>
+ %g = add <4 x i32> %e, %f
+ ret <4 x i32> %g
+}
+
+define <4 x i32> @vwaddu_vx_v4i32_i16(<4 x i16>* %x, i16* %y) {
+; CHECK-LABEL: vwaddu_vx_v4i32_i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
+; CHECK-NEXT: vle16.v v9, (a0)
+; CHECK-NEXT: lhu a0, 0(a1)
+; CHECK-NEXT: vwaddu.vx v8, v9, a0
+; CHECK-NEXT: ret
+ %a = load <4 x i16>, <4 x i16>* %x
+ %b = load i16, i16* %y
+ %c = zext i16 %b to i32
+ %d = insertelement <4 x i32> undef, i32 %c, i32 0
+ %e = shufflevector <4 x i32> %d, <4 x i32> undef, <4 x i32> zeroinitializer
+ %f = zext <4 x i16> %a to <4 x i32>
+ %g = add <4 x i32> %e, %f
+ ret <4 x i32> %g
+}
+
+define <4 x i32> @vwaddu_vx_v4i32_i32(<4 x i16>* %x, i32* %y) {
+; CHECK-LABEL: vwaddu_vx_v4i32_i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
+; CHECK-NEXT: vle16.v v9, (a0)
+; CHECK-NEXT: vlse32.v v10, (a1), zero
+; CHECK-NEXT: vwaddu.wv v8, v10, v9
+; CHECK-NEXT: ret
+ %a = load <4 x i16>, <4 x i16>* %x
+ %b = load i32, i32* %y
+ %d = insertelement <4 x i32> undef, i32 %b, i32 0
+ %e = shufflevector <4 x i32> %d, <4 x i32> undef, <4 x i32> zeroinitializer
+ %f = zext <4 x i16> %a to <4 x i32>
+ %g = add <4 x i32> %e, %f
+ ret <4 x i32> %g
+}
+
+define <2 x i64> @vwaddu_vx_v2i64_i8(<2 x i32>* %x, i8* %y) nounwind {
+; RV32-LABEL: vwaddu_vx_v2i64_i8:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
+; RV32-NEXT: lbu a1, 0(a1)
+; RV32-NEXT: vle32.v v9, (a0)
+; RV32-NEXT: sw zero, 12(sp)
+; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v10, (a0), zero
+; RV32-NEXT: vwaddu.wv v8, v10, v9
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vwaddu_vx_v2i64_i8:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
+; RV64-NEXT: vle32.v v9, (a0)
+; RV64-NEXT: lbu a0, 0(a1)
+; RV64-NEXT: vwaddu.vx v8, v9, a0
+; RV64-NEXT: ret
+ %a = load <2 x i32>, <2 x i32>* %x
+ %b = load i8, i8* %y
+ %c = zext i8 %b to i64
+ %d = insertelement <2 x i64> undef, i64 %c, i64 0
+ %e = shufflevector <2 x i64> %d, <2 x i64> undef, <2 x i32> zeroinitializer
+ %f = zext <2 x i32> %a to <2 x i64>
+ %g = add <2 x i64> %e, %f
+ ret <2 x i64> %g
+}
+
+define <2 x i64> @vwaddu_vx_v2i64_i16(<2 x i32>* %x, i16* %y) nounwind {
+; RV32-LABEL: vwaddu_vx_v2i64_i16:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
+; RV32-NEXT: lhu a1, 0(a1)
+; RV32-NEXT: vle32.v v9, (a0)
+; RV32-NEXT: sw zero, 12(sp)
+; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v10, (a0), zero
+; RV32-NEXT: vwaddu.wv v8, v10, v9
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vwaddu_vx_v2i64_i16:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
+; RV64-NEXT: vle32.v v9, (a0)
+; RV64-NEXT: lhu a0, 0(a1)
+; RV64-NEXT: vwaddu.vx v8, v9, a0
+; RV64-NEXT: ret
+ %a = load <2 x i32>, <2 x i32>* %x
+ %b = load i16, i16* %y
+ %c = zext i16 %b to i64
+ %d = insertelement <2 x i64> undef, i64 %c, i64 0
+ %e = shufflevector <2 x i64> %d, <2 x i64> undef, <2 x i32> zeroinitializer
+ %f = zext <2 x i32> %a to <2 x i64>
+ %g = add <2 x i64> %e, %f
+ ret <2 x i64> %g
+}
+
+define <2 x i64> @vwaddu_vx_v2i64_i32(<2 x i32>* %x, i32* %y) nounwind {
+; RV32-LABEL: vwaddu_vx_v2i64_i32:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
+; RV32-NEXT: lw a1, 0(a1)
+; RV32-NEXT: vle32.v v9, (a0)
+; RV32-NEXT: sw zero, 12(sp)
+; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v10, (a0), zero
+; RV32-NEXT: vwaddu.wv v8, v10, v9
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vwaddu_vx_v2i64_i32:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
+; RV64-NEXT: vle32.v v9, (a0)
+; RV64-NEXT: lwu a0, 0(a1)
+; RV64-NEXT: vwaddu.vx v8, v9, a0
+; RV64-NEXT: ret
+ %a = load <2 x i32>, <2 x i32>* %x
+ %b = load i32, i32* %y
+ %c = zext i32 %b to i64
+ %d = insertelement <2 x i64> undef, i64 %c, i64 0
+ %e = shufflevector <2 x i64> %d, <2 x i64> undef, <2 x i32> zeroinitializer
+ %f = zext <2 x i32> %a to <2 x i64>
+ %g = add <2 x i64> %e, %f
+ ret <2 x i64> %g
+}
+
+define <2 x i64> @vwaddu_vx_v2i64_i64(<2 x i32>* %x, i64* %y) nounwind {
+; RV32-LABEL: vwaddu_vx_v2i64_i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
+; RV32-NEXT: lw a2, 4(a1)
+; RV32-NEXT: lw a1, 0(a1)
+; RV32-NEXT: vle32.v v9, (a0)
+; RV32-NEXT: sw a2, 12(sp)
+; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v10, (a0), zero
+; RV32-NEXT: vwaddu.wv v8, v10, v9
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vwaddu_vx_v2i64_i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
+; RV64-NEXT: vle32.v v9, (a0)
+; RV64-NEXT: vlse64.v v10, (a1), zero
+; RV64-NEXT: vwaddu.wv v8, v10, v9
+; RV64-NEXT: ret
+ %a = load <2 x i32>, <2 x i32>* %x
+ %b = load i64, i64* %y
+ %d = insertelement <2 x i64> undef, i64 %b, i64 0
+ %e = shufflevector <2 x i64> %d, <2 x i64> undef, <2 x i32> zeroinitializer
+ %f = zext <2 x i32> %a to <2 x i64>
+ %g = add <2 x i64> %e, %f
+ ret <2 x i64> %g
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub.ll
new file mode 100644
index 0000000000000..2dcec1db91867
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub.ll
@@ -0,0 +1,884 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+
+define <2 x i16> @vwsub_v2i16(<2 x i8>* %x, <2 x i8>* %y) {
+; CHECK-LABEL: vwsub_v2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
+; CHECK-NEXT: vle8.v v9, (a0)
+; CHECK-NEXT: vle8.v v10, (a1)
+; CHECK-NEXT: vwsub.vv v8, v9, v10
+; CHECK-NEXT: ret
+ %a = load <2 x i8>, <2 x i8>* %x
+ %b = load <2 x i8>, <2 x i8>* %y
+ %c = sext <2 x i8> %a to <2 x i16>
+ %d = sext <2 x i8> %b to <2 x i16>
+ %e = sub <2 x i16> %c, %d
+ ret <2 x i16> %e
+}
+
+define <4 x i16> @vwsub_v4i16(<4 x i8>* %x, <4 x i8>* %y) {
+; CHECK-LABEL: vwsub_v4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu
+; CHECK-NEXT: vle8.v v9, (a0)
+; CHECK-NEXT: vle8.v v10, (a1)
+; CHECK-NEXT: vwsub.vv v8, v9, v10
+; CHECK-NEXT: ret
+ %a = load <4 x i8>, <4 x i8>* %x
+ %b = load <4 x i8>, <4 x i8>* %y
+ %c = sext <4 x i8> %a to <4 x i16>
+ %d = sext <4 x i8> %b to <4 x i16>
+ %e = sub <4 x i16> %c, %d
+ ret <4 x i16> %e
+}
+
+define <2 x i32> @vwsub_v2i32(<2 x i16>* %x, <2 x i16>* %y) {
+; CHECK-LABEL: vwsub_v2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
+; CHECK-NEXT: vle16.v v9, (a0)
+; CHECK-NEXT: vle16.v v10, (a1)
+; CHECK-NEXT: vwsub.vv v8, v9, v10
+; CHECK-NEXT: ret
+ %a = load <2 x i16>, <2 x i16>* %x
+ %b = load <2 x i16>, <2 x i16>* %y
+ %c = sext <2 x i16> %a to <2 x i32>
+ %d = sext <2 x i16> %b to <2 x i32>
+ %e = sub <2 x i32> %c, %d
+ ret <2 x i32> %e
+}
+
+define <8 x i16> @vwsub_v8i16(<8 x i8>* %x, <8 x i8>* %y) {
+; CHECK-LABEL: vwsub_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
+; CHECK-NEXT: vle8.v v9, (a0)
+; CHECK-NEXT: vle8.v v10, (a1)
+; CHECK-NEXT: vwsub.vv v8, v9, v10
+; CHECK-NEXT: ret
+ %a = load <8 x i8>, <8 x i8>* %x
+ %b = load <8 x i8>, <8 x i8>* %y
+ %c = sext <8 x i8> %a to <8 x i16>
+ %d = sext <8 x i8> %b to <8 x i16>
+ %e = sub <8 x i16> %c, %d
+ ret <8 x i16> %e
+}
+
+define <4 x i32> @vwsub_v4i32(<4 x i16>* %x, <4 x i16>* %y) {
+; CHECK-LABEL: vwsub_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
+; CHECK-NEXT: vle16.v v9, (a0)
+; CHECK-NEXT: vle16.v v10, (a1)
+; CHECK-NEXT: vwsub.vv v8, v9, v10
+; CHECK-NEXT: ret
+ %a = load <4 x i16>, <4 x i16>* %x
+ %b = load <4 x i16>, <4 x i16>* %y
+ %c = sext <4 x i16> %a to <4 x i32>
+ %d = sext <4 x i16> %b to <4 x i32>
+ %e = sub <4 x i32> %c, %d
+ ret <4 x i32> %e
+}
+
+define <2 x i64> @vwsub_v2i64(<2 x i32>* %x, <2 x i32>* %y) {
+; CHECK-LABEL: vwsub_v2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
+; CHECK-NEXT: vle32.v v9, (a0)
+; CHECK-NEXT: vle32.v v10, (a1)
+; CHECK-NEXT: vwsub.vv v8, v9, v10
+; CHECK-NEXT: ret
+ %a = load <2 x i32>, <2 x i32>* %x
+ %b = load <2 x i32>, <2 x i32>* %y
+ %c = sext <2 x i32> %a to <2 x i64>
+ %d = sext <2 x i32> %b to <2 x i64>
+ %e = sub <2 x i64> %c, %d
+ ret <2 x i64> %e
+}
+
+define <16 x i16> @vwsub_v16i16(<16 x i8>* %x, <16 x i8>* %y) {
+; CHECK-LABEL: vwsub_v16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu
+; CHECK-NEXT: vle8.v v10, (a0)
+; CHECK-NEXT: vle8.v v11, (a1)
+; CHECK-NEXT: vwsub.vv v8, v10, v11
+; CHECK-NEXT: ret
+ %a = load <16 x i8>, <16 x i8>* %x
+ %b = load <16 x i8>, <16 x i8>* %y
+ %c = sext <16 x i8> %a to <16 x i16>
+ %d = sext <16 x i8> %b to <16 x i16>
+ %e = sub <16 x i16> %c, %d
+ ret <16 x i16> %e
+}
+
+define <8 x i32> @vwsub_v8i32(<8 x i16>* %x, <8 x i16>* %y) {
+; CHECK-LABEL: vwsub_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu
+; CHECK-NEXT: vle16.v v10, (a0)
+; CHECK-NEXT: vle16.v v11, (a1)
+; CHECK-NEXT: vwsub.vv v8, v10, v11
+; CHECK-NEXT: ret
+ %a = load <8 x i16>, <8 x i16>* %x
+ %b = load <8 x i16>, <8 x i16>* %y
+ %c = sext <8 x i16> %a to <8 x i32>
+ %d = sext <8 x i16> %b to <8 x i32>
+ %e = sub <8 x i32> %c, %d
+ ret <8 x i32> %e
+}
+
+define <4 x i64> @vwsub_v4i64(<4 x i32>* %x, <4 x i32>* %y) {
+; CHECK-LABEL: vwsub_v4i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
+; CHECK-NEXT: vle32.v v10, (a0)
+; CHECK-NEXT: vle32.v v11, (a1)
+; CHECK-NEXT: vwsub.vv v8, v10, v11
+; CHECK-NEXT: ret
+ %a = load <4 x i32>, <4 x i32>* %x
+ %b = load <4 x i32>, <4 x i32>* %y
+ %c = sext <4 x i32> %a to <4 x i64>
+ %d = sext <4 x i32> %b to <4 x i64>
+ %e = sub <4 x i64> %c, %d
+ ret <4 x i64> %e
+}
+
+define <32 x i16> @vwsub_v32i16(<32 x i8>* %x, <32 x i8>* %y) {
+; CHECK-LABEL: vwsub_v32i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a2, 32
+; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, mu
+; CHECK-NEXT: vle8.v v12, (a0)
+; CHECK-NEXT: vle8.v v14, (a1)
+; CHECK-NEXT: vwsub.vv v8, v12, v14
+; CHECK-NEXT: ret
+ %a = load <32 x i8>, <32 x i8>* %x
+ %b = load <32 x i8>, <32 x i8>* %y
+ %c = sext <32 x i8> %a to <32 x i16>
+ %d = sext <32 x i8> %b to <32 x i16>
+ %e = sub <32 x i16> %c, %d
+ ret <32 x i16> %e
+}
+
+define <16 x i32> @vwsub_v16i32(<16 x i16>* %x, <16 x i16>* %y) {
+; CHECK-LABEL: vwsub_v16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu
+; CHECK-NEXT: vle16.v v12, (a0)
+; CHECK-NEXT: vle16.v v14, (a1)
+; CHECK-NEXT: vwsub.vv v8, v12, v14
+; CHECK-NEXT: ret
+ %a = load <16 x i16>, <16 x i16>* %x
+ %b = load <16 x i16>, <16 x i16>* %y
+ %c = sext <16 x i16> %a to <16 x i32>
+ %d = sext <16 x i16> %b to <16 x i32>
+ %e = sub <16 x i32> %c, %d
+ ret <16 x i32> %e
+}
+
+define <8 x i64> @vwsub_v8i64(<8 x i32>* %x, <8 x i32>* %y) {
+; CHECK-LABEL: vwsub_v8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu
+; CHECK-NEXT: vle32.v v12, (a0)
+; CHECK-NEXT: vle32.v v14, (a1)
+; CHECK-NEXT: vwsub.vv v8, v12, v14
+; CHECK-NEXT: ret
+ %a = load <8 x i32>, <8 x i32>* %x
+ %b = load <8 x i32>, <8 x i32>* %y
+ %c = sext <8 x i32> %a to <8 x i64>
+ %d = sext <8 x i32> %b to <8 x i64>
+ %e = sub <8 x i64> %c, %d
+ ret <8 x i64> %e
+}
+
+define <64 x i16> @vwsub_v64i16(<64 x i8>* %x, <64 x i8>* %y) {
+; CHECK-LABEL: vwsub_v64i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a2, 64
+; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, mu
+; CHECK-NEXT: vle8.v v16, (a0)
+; CHECK-NEXT: vle8.v v20, (a1)
+; CHECK-NEXT: vwsub.vv v8, v16, v20
+; CHECK-NEXT: ret
+ %a = load <64 x i8>, <64 x i8>* %x
+ %b = load <64 x i8>, <64 x i8>* %y
+ %c = sext <64 x i8> %a to <64 x i16>
+ %d = sext <64 x i8> %b to <64 x i16>
+ %e = sub <64 x i16> %c, %d
+ ret <64 x i16> %e
+}
+
+define <32 x i32> @vwsub_v32i32(<32 x i16>* %x, <32 x i16>* %y) {
+; CHECK-LABEL: vwsub_v32i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a2, 32
+; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, mu
+; CHECK-NEXT: vle16.v v16, (a0)
+; CHECK-NEXT: vle16.v v20, (a1)
+; CHECK-NEXT: vwsub.vv v8, v16, v20
+; CHECK-NEXT: ret
+ %a = load <32 x i16>, <32 x i16>* %x
+ %b = load <32 x i16>, <32 x i16>* %y
+ %c = sext <32 x i16> %a to <32 x i32>
+ %d = sext <32 x i16> %b to <32 x i32>
+ %e = sub <32 x i32> %c, %d
+ ret <32 x i32> %e
+}
+
+define <16 x i64> @vwsub_v16i64(<16 x i32>* %x, <16 x i32>* %y) {
+; CHECK-LABEL: vwsub_v16i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu
+; CHECK-NEXT: vle32.v v16, (a0)
+; CHECK-NEXT: vle32.v v20, (a1)
+; CHECK-NEXT: vwsub.vv v8, v16, v20
+; CHECK-NEXT: ret
+ %a = load <16 x i32>, <16 x i32>* %x
+ %b = load <16 x i32>, <16 x i32>* %y
+ %c = sext <16 x i32> %a to <16 x i64>
+ %d = sext <16 x i32> %b to <16 x i64>
+ %e = sub <16 x i64> %c, %d
+ ret <16 x i64> %e
+}
+
+define <128 x i16> @vwsub_v128i16(<128 x i8>* %x, <128 x i8>* %y) nounwind {
+; CHECK-LABEL: vwsub_v128i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 3
+; CHECK-NEXT: sub sp, sp, a2
+; CHECK-NEXT: li a2, 128
+; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, mu
+; CHECK-NEXT: vle8.v v16, (a0)
+; CHECK-NEXT: vle8.v v24, (a1)
+; CHECK-NEXT: li a0, 64
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu
+; CHECK-NEXT: vslidedown.vx v8, v16, a0
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: vslidedown.vx v0, v24, a0
+; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu
+; CHECK-NEXT: vwsub.vv v8, v16, v24
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vwsub.vv v16, v24, v0
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %a = load <128 x i8>, <128 x i8>* %x
+ %b = load <128 x i8>, <128 x i8>* %y
+ %c = sext <128 x i8> %a to <128 x i16>
+ %d = sext <128 x i8> %b to <128 x i16>
+ %e = sub <128 x i16> %c, %d
+ ret <128 x i16> %e
+}
+
+define <64 x i32> @vwsub_v64i32(<64 x i16>* %x, <64 x i16>* %y) nounwind {
+; CHECK-LABEL: vwsub_v64i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 3
+; CHECK-NEXT: sub sp, sp, a2
+; CHECK-NEXT: li a2, 64
+; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, mu
+; CHECK-NEXT: vle16.v v16, (a0)
+; CHECK-NEXT: vle16.v v24, (a1)
+; CHECK-NEXT: li a0, 32
+; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu
+; CHECK-NEXT: vslidedown.vx v8, v16, a0
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: vslidedown.vx v0, v24, a0
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT: vwsub.vv v8, v16, v24
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vwsub.vv v16, v24, v0
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %a = load <64 x i16>, <64 x i16>* %x
+ %b = load <64 x i16>, <64 x i16>* %y
+ %c = sext <64 x i16> %a to <64 x i32>
+ %d = sext <64 x i16> %b to <64 x i32>
+ %e = sub <64 x i32> %c, %d
+ ret <64 x i32> %e
+}
+
+define <32 x i64> @vwsub_v32i64(<32 x i32>* %x, <32 x i32>* %y) nounwind {
+; CHECK-LABEL: vwsub_v32i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 3
+; CHECK-NEXT: sub sp, sp, a2
+; CHECK-NEXT: li a2, 32
+; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, mu
+; CHECK-NEXT: vle32.v v16, (a0)
+; CHECK-NEXT: vle32.v v24, (a1)
+; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, mu
+; CHECK-NEXT: vslidedown.vi v8, v16, 16
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vslidedown.vi v0, v24, 16
+; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu
+; CHECK-NEXT: vwsub.vv v8, v16, v24
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vwsub.vv v16, v24, v0
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %a = load <32 x i32>, <32 x i32>* %x
+ %b = load <32 x i32>, <32 x i32>* %y
+ %c = sext <32 x i32> %a to <32 x i64>
+ %d = sext <32 x i32> %b to <32 x i64>
+ %e = sub <32 x i64> %c, %d
+ ret <32 x i64> %e
+}
+
+define <2 x i32> @vwsub_v2i32_v2i8(<2 x i8>* %x, <2 x i8>* %y) {
+; CHECK-LABEL: vwsub_v2i32_v2i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
+; CHECK-NEXT: vle8.v v8, (a1)
+; CHECK-NEXT: vle8.v v9, (a0)
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
+; CHECK-NEXT: vsext.vf2 v10, v8
+; CHECK-NEXT: vsext.vf2 v11, v9
+; CHECK-NEXT: vwsub.vv v8, v11, v10
+; CHECK-NEXT: ret
+ %a = load <2 x i8>, <2 x i8>* %x
+ %b = load <2 x i8>, <2 x i8>* %y
+ %c = sext <2 x i8> %a to <2 x i32>
+ %d = sext <2 x i8> %b to <2 x i32>
+ %e = sub <2 x i32> %c, %d
+ ret <2 x i32> %e
+}
+
+define <4 x i32> @vwsub_v4i32_v4i8_v4i16(<4 x i8>* %x, <4 x i16>* %y) {
+; CHECK-LABEL: vwsub_v4i32_v4i8_v4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu
+; CHECK-NEXT: vle8.v v8, (a0)
+; CHECK-NEXT: vle16.v v9, (a1)
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
+; CHECK-NEXT: vsext.vf2 v10, v8
+; CHECK-NEXT: vwsub.vv v8, v10, v9
+; CHECK-NEXT: ret
+ %a = load <4 x i8>, <4 x i8>* %x
+ %b = load <4 x i16>, <4 x i16>* %y
+ %c = sext <4 x i8> %a to <4 x i32>
+ %d = sext <4 x i16> %b to <4 x i32>
+ %e = sub <4 x i32> %c, %d
+ ret <4 x i32> %e
+}
+
+define <4 x i64> @vwsub_v4i64_v4i32_v4i8(<4 x i32>* %x, <4 x i8>* %y) {
+; CHECK-LABEL: vwsub_v4i64_v4i32_v4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
+; CHECK-NEXT: vle8.v v8, (a1)
+; CHECK-NEXT: vle32.v v10, (a0)
+; CHECK-NEXT: vsext.vf4 v11, v8
+; CHECK-NEXT: vwsub.vv v8, v10, v11
+; CHECK-NEXT: ret
+ %a = load <4 x i32>, <4 x i32>* %x
+ %b = load <4 x i8>, <4 x i8>* %y
+ %c = sext <4 x i32> %a to <4 x i64>
+ %d = sext <4 x i8> %b to <4 x i64>
+ %e = sub <4 x i64> %c, %d
+ ret <4 x i64> %e
+}
+
+define <2 x i16> @vwsub_vx_v2i16(<2 x i8>* %x, i8 %y) {
+; CHECK-LABEL: vwsub_vx_v2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
+; CHECK-NEXT: vle8.v v9, (a0)
+; CHECK-NEXT: vwsub.vx v8, v9, a1
+; CHECK-NEXT: ret
+ %a = load <2 x i8>, <2 x i8>* %x
+ %b = insertelement <2 x i8> undef, i8 %y, i32 0
+ %c = shufflevector <2 x i8> %b, <2 x i8> undef, <2 x i32> zeroinitializer
+ %d = sext <2 x i8> %a to <2 x i16>
+ %e = sext <2 x i8> %c to <2 x i16>
+ %f = sub <2 x i16> %d, %e
+ ret <2 x i16> %f
+}
+
+define <4 x i16> @vwsub_vx_v4i16(<4 x i8>* %x, i8 %y) {
+; CHECK-LABEL: vwsub_vx_v4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu
+; CHECK-NEXT: vle8.v v9, (a0)
+; CHECK-NEXT: vwsub.vx v8, v9, a1
+; CHECK-NEXT: ret
+ %a = load <4 x i8>, <4 x i8>* %x
+ %b = insertelement <4 x i8> undef, i8 %y, i32 0
+ %c = shufflevector <4 x i8> %b, <4 x i8> undef, <4 x i32> zeroinitializer
+ %d = sext <4 x i8> %a to <4 x i16>
+ %e = sext <4 x i8> %c to <4 x i16>
+ %f = sub <4 x i16> %d, %e
+ ret <4 x i16> %f
+}
+
+define <2 x i32> @vwsub_vx_v2i32(<2 x i16>* %x, i16 %y) {
+; CHECK-LABEL: vwsub_vx_v2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
+; CHECK-NEXT: vle16.v v9, (a0)
+; CHECK-NEXT: vwsub.vx v8, v9, a1
+; CHECK-NEXT: ret
+ %a = load <2 x i16>, <2 x i16>* %x
+ %b = insertelement <2 x i16> undef, i16 %y, i32 0
+ %c = shufflevector <2 x i16> %b, <2 x i16> undef, <2 x i32> zeroinitializer
+ %d = sext <2 x i16> %a to <2 x i32>
+ %e = sext <2 x i16> %c to <2 x i32>
+ %f = sub <2 x i32> %d, %e
+ ret <2 x i32> %f
+}
+
+define <8 x i16> @vwsub_vx_v8i16(<8 x i8>* %x, i8 %y) {
+; CHECK-LABEL: vwsub_vx_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
+; CHECK-NEXT: vle8.v v9, (a0)
+; CHECK-NEXT: vwsub.vx v8, v9, a1
+; CHECK-NEXT: ret
+ %a = load <8 x i8>, <8 x i8>* %x
+ %b = insertelement <8 x i8> undef, i8 %y, i32 0
+ %c = shufflevector <8 x i8> %b, <8 x i8> undef, <8 x i32> zeroinitializer
+ %d = sext <8 x i8> %a to <8 x i16>
+ %e = sext <8 x i8> %c to <8 x i16>
+ %f = sub <8 x i16> %d, %e
+ ret <8 x i16> %f
+}
+
+define <4 x i32> @vwsub_vx_v4i32(<4 x i16>* %x, i16 %y) {
+; CHECK-LABEL: vwsub_vx_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
+; CHECK-NEXT: vle16.v v9, (a0)
+; CHECK-NEXT: vwsub.vx v8, v9, a1
+; CHECK-NEXT: ret
+ %a = load <4 x i16>, <4 x i16>* %x
+ %b = insertelement <4 x i16> undef, i16 %y, i32 0
+ %c = shufflevector <4 x i16> %b, <4 x i16> undef, <4 x i32> zeroinitializer
+ %d = sext <4 x i16> %a to <4 x i32>
+ %e = sext <4 x i16> %c to <4 x i32>
+ %f = sub <4 x i32> %d, %e
+ ret <4 x i32> %f
+}
+
+define <2 x i64> @vwsub_vx_v2i64(<2 x i32>* %x, i32 %y) {
+; CHECK-LABEL: vwsub_vx_v2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
+; CHECK-NEXT: vle32.v v9, (a0)
+; CHECK-NEXT: vwsub.vx v8, v9, a1
+; CHECK-NEXT: ret
+ %a = load <2 x i32>, <2 x i32>* %x
+ %b = insertelement <2 x i32> undef, i32 %y, i64 0
+ %c = shufflevector <2 x i32> %b, <2 x i32> undef, <2 x i32> zeroinitializer
+ %d = sext <2 x i32> %a to <2 x i64>
+ %e = sext <2 x i32> %c to <2 x i64>
+ %f = sub <2 x i64> %d, %e
+ ret <2 x i64> %f
+}
+
+define <16 x i16> @vwsub_vx_v16i16(<16 x i8>* %x, i8 %y) {
+; CHECK-LABEL: vwsub_vx_v16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu
+; CHECK-NEXT: vle8.v v10, (a0)
+; CHECK-NEXT: vwsub.vx v8, v10, a1
+; CHECK-NEXT: ret
+ %a = load <16 x i8>, <16 x i8>* %x
+ %b = insertelement <16 x i8> undef, i8 %y, i32 0
+ %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
+ %d = sext <16 x i8> %a to <16 x i16>
+ %e = sext <16 x i8> %c to <16 x i16>
+ %f = sub <16 x i16> %d, %e
+ ret <16 x i16> %f
+}
+
+define <8 x i32> @vwsub_vx_v8i32(<8 x i16>* %x, i16 %y) {
+; CHECK-LABEL: vwsub_vx_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu
+; CHECK-NEXT: vle16.v v10, (a0)
+; CHECK-NEXT: vwsub.vx v8, v10, a1
+; CHECK-NEXT: ret
+ %a = load <8 x i16>, <8 x i16>* %x
+ %b = insertelement <8 x i16> undef, i16 %y, i32 0
+ %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
+ %d = sext <8 x i16> %a to <8 x i32>
+ %e = sext <8 x i16> %c to <8 x i32>
+ %f = sub <8 x i32> %d, %e
+ ret <8 x i32> %f
+}
+
+define <4 x i64> @vwsub_vx_v4i64(<4 x i32>* %x, i32 %y) {
+; CHECK-LABEL: vwsub_vx_v4i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
+; CHECK-NEXT: vle32.v v10, (a0)
+; CHECK-NEXT: vwsub.vx v8, v10, a1
+; CHECK-NEXT: ret
+ %a = load <4 x i32>, <4 x i32>* %x
+ %b = insertelement <4 x i32> undef, i32 %y, i64 0
+ %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
+ %d = sext <4 x i32> %a to <4 x i64>
+ %e = sext <4 x i32> %c to <4 x i64>
+ %f = sub <4 x i64> %d, %e
+ ret <4 x i64> %f
+}
+
+define <32 x i16> @vwsub_vx_v32i16(<32 x i8>* %x, i8 %y) {
+; CHECK-LABEL: vwsub_vx_v32i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a2, 32
+; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, mu
+; CHECK-NEXT: vle8.v v12, (a0)
+; CHECK-NEXT: vwsub.vx v8, v12, a1
+; CHECK-NEXT: ret
+ %a = load <32 x i8>, <32 x i8>* %x
+ %b = insertelement <32 x i8> undef, i8 %y, i32 0
+ %c = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer
+ %d = sext <32 x i8> %a to <32 x i16>
+ %e = sext <32 x i8> %c to <32 x i16>
+ %f = sub <32 x i16> %d, %e
+ ret <32 x i16> %f
+}
+
+define <16 x i32> @vwsub_vx_v16i32(<16 x i16>* %x, i16 %y) {
+; CHECK-LABEL: vwsub_vx_v16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu
+; CHECK-NEXT: vle16.v v12, (a0)
+; CHECK-NEXT: vwsub.vx v8, v12, a1
+; CHECK-NEXT: ret
+ %a = load <16 x i16>, <16 x i16>* %x
+ %b = insertelement <16 x i16> undef, i16 %y, i32 0
+ %c = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer
+ %d = sext <16 x i16> %a to <16 x i32>
+ %e = sext <16 x i16> %c to <16 x i32>
+ %f = sub <16 x i32> %d, %e
+ ret <16 x i32> %f
+}
+
+define <8 x i64> @vwsub_vx_v8i64(<8 x i32>* %x, i32 %y) {
+; CHECK-LABEL: vwsub_vx_v8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu
+; CHECK-NEXT: vle32.v v12, (a0)
+; CHECK-NEXT: vwsub.vx v8, v12, a1
+; CHECK-NEXT: ret
+ %a = load <8 x i32>, <8 x i32>* %x
+ %b = insertelement <8 x i32> undef, i32 %y, i64 0
+ %c = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> zeroinitializer
+ %d = sext <8 x i32> %a to <8 x i64>
+ %e = sext <8 x i32> %c to <8 x i64>
+ %f = sub <8 x i64> %d, %e
+ ret <8 x i64> %f
+}
+
+define <64 x i16> @vwsub_vx_v64i16(<64 x i8>* %x, i8 %y) {
+; CHECK-LABEL: vwsub_vx_v64i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a2, 64
+; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, mu
+; CHECK-NEXT: vle8.v v16, (a0)
+; CHECK-NEXT: vwsub.vx v8, v16, a1
+; CHECK-NEXT: ret
+ %a = load <64 x i8>, <64 x i8>* %x
+ %b = insertelement <64 x i8> undef, i8 %y, i32 0
+ %c = shufflevector <64 x i8> %b, <64 x i8> undef, <64 x i32> zeroinitializer
+ %d = sext <64 x i8> %a to <64 x i16>
+ %e = sext <64 x i8> %c to <64 x i16>
+ %f = sub <64 x i16> %d, %e
+ ret <64 x i16> %f
+}
+
+define <32 x i32> @vwsub_vx_v32i32(<32 x i16>* %x, i16 %y) {
+; CHECK-LABEL: vwsub_vx_v32i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a2, 32
+; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, mu
+; CHECK-NEXT: vle16.v v16, (a0)
+; CHECK-NEXT: vwsub.vx v8, v16, a1
+; CHECK-NEXT: ret
+ %a = load <32 x i16>, <32 x i16>* %x
+ %b = insertelement <32 x i16> undef, i16 %y, i32 0
+ %c = shufflevector <32 x i16> %b, <32 x i16> undef, <32 x i32> zeroinitializer
+ %d = sext <32 x i16> %a to <32 x i32>
+ %e = sext <32 x i16> %c to <32 x i32>
+ %f = sub <32 x i32> %d, %e
+ ret <32 x i32> %f
+}
+
+define <16 x i64> @vwsub_vx_v16i64(<16 x i32>* %x, i32 %y) {
+; CHECK-LABEL: vwsub_vx_v16i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu
+; CHECK-NEXT: vle32.v v16, (a0)
+; CHECK-NEXT: vwsub.vx v8, v16, a1
+; CHECK-NEXT: ret
+ %a = load <16 x i32>, <16 x i32>* %x
+ %b = insertelement <16 x i32> undef, i32 %y, i64 0
+ %c = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer
+ %d = sext <16 x i32> %a to <16 x i64>
+ %e = sext <16 x i32> %c to <16 x i64>
+ %f = sub <16 x i64> %d, %e
+ ret <16 x i64> %f
+}
+
+define <8 x i16> @vwsub_vx_v8i16_i8(<8 x i8>* %x, i8* %y) {
+; CHECK-LABEL: vwsub_vx_v8i16_i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
+; CHECK-NEXT: lb a1, 0(a1)
+; CHECK-NEXT: vle8.v v9, (a0)
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu
+; CHECK-NEXT: vmv.v.x v10, a1
+; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu
+; CHECK-NEXT: vwsub.wv v8, v10, v9
+; CHECK-NEXT: ret
+ %a = load <8 x i8>, <8 x i8>* %x
+ %b = load i8, i8* %y
+ %c = sext i8 %b to i16
+ %d = insertelement <8 x i16> undef, i16 %c, i32 0
+ %e = shufflevector <8 x i16> %d, <8 x i16> undef, <8 x i32> zeroinitializer
+ %f = sext <8 x i8> %a to <8 x i16>
+ %g = sub <8 x i16> %e, %f
+ ret <8 x i16> %g
+}
+
+define <8 x i16> @vwsub_vx_v8i16_i16(<8 x i8>* %x, i16* %y) {
+; CHECK-LABEL: vwsub_vx_v8i16_i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
+; CHECK-NEXT: vle8.v v9, (a0)
+; CHECK-NEXT: vlse16.v v10, (a1), zero
+; CHECK-NEXT: vwsub.wv v8, v10, v9
+; CHECK-NEXT: ret
+ %a = load <8 x i8>, <8 x i8>* %x
+ %b = load i16, i16* %y
+ %d = insertelement <8 x i16> undef, i16 %b, i32 0
+ %e = shufflevector <8 x i16> %d, <8 x i16> undef, <8 x i32> zeroinitializer
+ %f = sext <8 x i8> %a to <8 x i16>
+ %g = sub <8 x i16> %e, %f
+ ret <8 x i16> %g
+}
+
+define <4 x i32> @vwsub_vx_v4i32_i8(<4 x i16>* %x, i8* %y) {
+; CHECK-LABEL: vwsub_vx_v4i32_i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
+; CHECK-NEXT: lb a1, 0(a1)
+; CHECK-NEXT: vle16.v v9, (a0)
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
+; CHECK-NEXT: vmv.v.x v10, a1
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
+; CHECK-NEXT: vwsub.wv v8, v10, v9
+; CHECK-NEXT: ret
+ %a = load <4 x i16>, <4 x i16>* %x
+ %b = load i8, i8* %y
+ %c = sext i8 %b to i32
+ %d = insertelement <4 x i32> undef, i32 %c, i32 0
+ %e = shufflevector <4 x i32> %d, <4 x i32> undef, <4 x i32> zeroinitializer
+ %f = sext <4 x i16> %a to <4 x i32>
+ %g = sub <4 x i32> %e, %f
+ ret <4 x i32> %g
+}
+
+define <4 x i32> @vwsub_vx_v4i32_i16(<4 x i16>* %x, i16* %y) {
+; CHECK-LABEL: vwsub_vx_v4i32_i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
+; CHECK-NEXT: lh a1, 0(a1)
+; CHECK-NEXT: vle16.v v9, (a0)
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
+; CHECK-NEXT: vmv.v.x v10, a1
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
+; CHECK-NEXT: vwsub.wv v8, v10, v9
+; CHECK-NEXT: ret
+ %a = load <4 x i16>, <4 x i16>* %x
+ %b = load i16, i16* %y
+ %c = sext i16 %b to i32
+ %d = insertelement <4 x i32> undef, i32 %c, i32 0
+ %e = shufflevector <4 x i32> %d, <4 x i32> undef, <4 x i32> zeroinitializer
+ %f = sext <4 x i16> %a to <4 x i32>
+ %g = sub <4 x i32> %e, %f
+ ret <4 x i32> %g
+}
+
+define <4 x i32> @vwsub_vx_v4i32_i32(<4 x i16>* %x, i32* %y) {
+; CHECK-LABEL: vwsub_vx_v4i32_i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
+; CHECK-NEXT: vle16.v v9, (a0)
+; CHECK-NEXT: vlse32.v v10, (a1), zero
+; CHECK-NEXT: vwsub.wv v8, v10, v9
+; CHECK-NEXT: ret
+ %a = load <4 x i16>, <4 x i16>* %x
+ %b = load i32, i32* %y
+ %d = insertelement <4 x i32> undef, i32 %b, i32 0
+ %e = shufflevector <4 x i32> %d, <4 x i32> undef, <4 x i32> zeroinitializer
+ %f = sext <4 x i16> %a to <4 x i32>
+ %g = sub <4 x i32> %e, %f
+ ret <4 x i32> %g
+}
+
+define <2 x i64> @vwsub_vx_v2i64_i8(<2 x i32>* %x, i8* %y) nounwind {
+; RV32-LABEL: vwsub_vx_v2i64_i8:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
+; RV32-NEXT: lb a1, 0(a1)
+; RV32-NEXT: vle32.v v9, (a0)
+; RV32-NEXT: srai a0, a1, 31
+; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw a0, 12(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v10, (a0), zero
+; RV32-NEXT: vwsub.wv v8, v10, v9
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vwsub_vx_v2i64_i8:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
+; RV64-NEXT: lb a1, 0(a1)
+; RV64-NEXT: vle32.v v9, (a0)
+; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu
+; RV64-NEXT: vmv.v.x v10, a1
+; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
+; RV64-NEXT: vwsub.wv v8, v10, v9
+; RV64-NEXT: ret
+ %a = load <2 x i32>, <2 x i32>* %x
+ %b = load i8, i8* %y
+ %c = sext i8 %b to i64
+ %d = insertelement <2 x i64> undef, i64 %c, i64 0
+ %e = shufflevector <2 x i64> %d, <2 x i64> undef, <2 x i32> zeroinitializer
+ %f = sext <2 x i32> %a to <2 x i64>
+ %g = sub <2 x i64> %e, %f
+ ret <2 x i64> %g
+}
+
+define <2 x i64> @vwsub_vx_v2i64_i16(<2 x i32>* %x, i16* %y) nounwind {
+; RV32-LABEL: vwsub_vx_v2i64_i16:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
+; RV32-NEXT: lh a1, 0(a1)
+; RV32-NEXT: vle32.v v9, (a0)
+; RV32-NEXT: srai a0, a1, 31
+; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw a0, 12(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v10, (a0), zero
+; RV32-NEXT: vwsub.wv v8, v10, v9
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vwsub_vx_v2i64_i16:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
+; RV64-NEXT: lh a1, 0(a1)
+; RV64-NEXT: vle32.v v9, (a0)
+; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu
+; RV64-NEXT: vmv.v.x v10, a1
+; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
+; RV64-NEXT: vwsub.wv v8, v10, v9
+; RV64-NEXT: ret
+ %a = load <2 x i32>, <2 x i32>* %x
+ %b = load i16, i16* %y
+ %c = sext i16 %b to i64
+ %d = insertelement <2 x i64> undef, i64 %c, i64 0
+ %e = shufflevector <2 x i64> %d, <2 x i64> undef, <2 x i32> zeroinitializer
+ %f = sext <2 x i32> %a to <2 x i64>
+ %g = sub <2 x i64> %e, %f
+ ret <2 x i64> %g
+}
+
+define <2 x i64> @vwsub_vx_v2i64_i32(<2 x i32>* %x, i32* %y) nounwind {
+; RV32-LABEL: vwsub_vx_v2i64_i32:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
+; RV32-NEXT: lw a1, 0(a1)
+; RV32-NEXT: vle32.v v9, (a0)
+; RV32-NEXT: srai a0, a1, 31
+; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: sw a0, 12(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v10, (a0), zero
+; RV32-NEXT: vwsub.wv v8, v10, v9
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vwsub_vx_v2i64_i32:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
+; RV64-NEXT: lw a1, 0(a1)
+; RV64-NEXT: vle32.v v9, (a0)
+; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu
+; RV64-NEXT: vmv.v.x v10, a1
+; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
+; RV64-NEXT: vwsub.wv v8, v10, v9
+; RV64-NEXT: ret
+ %a = load <2 x i32>, <2 x i32>* %x
+ %b = load i32, i32* %y
+ %c = sext i32 %b to i64
+ %d = insertelement <2 x i64> undef, i64 %c, i64 0
+ %e = shufflevector <2 x i64> %d, <2 x i64> undef, <2 x i32> zeroinitializer
+ %f = sext <2 x i32> %a to <2 x i64>
+ %g = sub <2 x i64> %e, %f
+ ret <2 x i64> %g
+}
+
+define <2 x i64> @vwsub_vx_v2i64_i64(<2 x i32>* %x, i64* %y) nounwind {
+; RV32-LABEL: vwsub_vx_v2i64_i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
+; RV32-NEXT: lw a2, 4(a1)
+; RV32-NEXT: lw a1, 0(a1)
+; RV32-NEXT: vle32.v v9, (a0)
+; RV32-NEXT: sw a2, 12(sp)
+; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v10, (a0), zero
+; RV32-NEXT: vwsub.wv v8, v10, v9
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vwsub_vx_v2i64_i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
+; RV64-NEXT: vle32.v v9, (a0)
+; RV64-NEXT: vlse64.v v10, (a1), zero
+; RV64-NEXT: vwsub.wv v8, v10, v9
+; RV64-NEXT: ret
+ %a = load <2 x i32>, <2 x i32>* %x
+ %b = load i64, i64* %y
+ %d = insertelement <2 x i64> undef, i64 %b, i64 0
+ %e = shufflevector <2 x i64> %d, <2 x i64> undef, <2 x i32> zeroinitializer
+ %f = sext <2 x i32> %a to <2 x i64>
+ %g = sub <2 x i64> %e, %f
+ ret <2 x i64> %g
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsubu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsubu.ll
new file mode 100644
index 0000000000000..98fc661e558aa
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsubu.ll
@@ -0,0 +1,881 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+
+define <2 x i16> @vwsubu_v2i16(<2 x i8>* %x, <2 x i8>* %y) {
+; CHECK-LABEL: vwsubu_v2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
+; CHECK-NEXT: vle8.v v9, (a0)
+; CHECK-NEXT: vle8.v v10, (a1)
+; CHECK-NEXT: vwsubu.vv v8, v9, v10
+; CHECK-NEXT: ret
+ %a = load <2 x i8>, <2 x i8>* %x
+ %b = load <2 x i8>, <2 x i8>* %y
+ %c = zext <2 x i8> %a to <2 x i16>
+ %d = zext <2 x i8> %b to <2 x i16>
+ %e = sub <2 x i16> %c, %d
+ ret <2 x i16> %e
+}
+
+define <4 x i16> @vwsubu_v4i16(<4 x i8>* %x, <4 x i8>* %y) {
+; CHECK-LABEL: vwsubu_v4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu
+; CHECK-NEXT: vle8.v v9, (a0)
+; CHECK-NEXT: vle8.v v10, (a1)
+; CHECK-NEXT: vwsubu.vv v8, v9, v10
+; CHECK-NEXT: ret
+ %a = load <4 x i8>, <4 x i8>* %x
+ %b = load <4 x i8>, <4 x i8>* %y
+ %c = zext <4 x i8> %a to <4 x i16>
+ %d = zext <4 x i8> %b to <4 x i16>
+ %e = sub <4 x i16> %c, %d
+ ret <4 x i16> %e
+}
+
+define <2 x i32> @vwsubu_v2i32(<2 x i16>* %x, <2 x i16>* %y) {
+; CHECK-LABEL: vwsubu_v2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
+; CHECK-NEXT: vle16.v v9, (a0)
+; CHECK-NEXT: vle16.v v10, (a1)
+; CHECK-NEXT: vwsubu.vv v8, v9, v10
+; CHECK-NEXT: ret
+ %a = load <2 x i16>, <2 x i16>* %x
+ %b = load <2 x i16>, <2 x i16>* %y
+ %c = zext <2 x i16> %a to <2 x i32>
+ %d = zext <2 x i16> %b to <2 x i32>
+ %e = sub <2 x i32> %c, %d
+ ret <2 x i32> %e
+}
+
+define <8 x i16> @vwsubu_v8i16(<8 x i8>* %x, <8 x i8>* %y) {
+; CHECK-LABEL: vwsubu_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
+; CHECK-NEXT: vle8.v v9, (a0)
+; CHECK-NEXT: vle8.v v10, (a1)
+; CHECK-NEXT: vwsubu.vv v8, v9, v10
+; CHECK-NEXT: ret
+ %a = load <8 x i8>, <8 x i8>* %x
+ %b = load <8 x i8>, <8 x i8>* %y
+ %c = zext <8 x i8> %a to <8 x i16>
+ %d = zext <8 x i8> %b to <8 x i16>
+ %e = sub <8 x i16> %c, %d
+ ret <8 x i16> %e
+}
+
+define <4 x i32> @vwsubu_v4i32(<4 x i16>* %x, <4 x i16>* %y) {
+; CHECK-LABEL: vwsubu_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
+; CHECK-NEXT: vle16.v v9, (a0)
+; CHECK-NEXT: vle16.v v10, (a1)
+; CHECK-NEXT: vwsubu.vv v8, v9, v10
+; CHECK-NEXT: ret
+ %a = load <4 x i16>, <4 x i16>* %x
+ %b = load <4 x i16>, <4 x i16>* %y
+ %c = zext <4 x i16> %a to <4 x i32>
+ %d = zext <4 x i16> %b to <4 x i32>
+ %e = sub <4 x i32> %c, %d
+ ret <4 x i32> %e
+}
+
+define <2 x i64> @vwsubu_v2i64(<2 x i32>* %x, <2 x i32>* %y) {
+; CHECK-LABEL: vwsubu_v2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
+; CHECK-NEXT: vle32.v v9, (a0)
+; CHECK-NEXT: vle32.v v10, (a1)
+; CHECK-NEXT: vwsubu.vv v8, v9, v10
+; CHECK-NEXT: ret
+ %a = load <2 x i32>, <2 x i32>* %x
+ %b = load <2 x i32>, <2 x i32>* %y
+ %c = zext <2 x i32> %a to <2 x i64>
+ %d = zext <2 x i32> %b to <2 x i64>
+ %e = sub <2 x i64> %c, %d
+ ret <2 x i64> %e
+}
+
+define <16 x i16> @vwsubu_v16i16(<16 x i8>* %x, <16 x i8>* %y) {
+; CHECK-LABEL: vwsubu_v16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu
+; CHECK-NEXT: vle8.v v10, (a0)
+; CHECK-NEXT: vle8.v v11, (a1)
+; CHECK-NEXT: vwsubu.vv v8, v10, v11
+; CHECK-NEXT: ret
+ %a = load <16 x i8>, <16 x i8>* %x
+ %b = load <16 x i8>, <16 x i8>* %y
+ %c = zext <16 x i8> %a to <16 x i16>
+ %d = zext <16 x i8> %b to <16 x i16>
+ %e = sub <16 x i16> %c, %d
+ ret <16 x i16> %e
+}
+
+define <8 x i32> @vwsubu_v8i32(<8 x i16>* %x, <8 x i16>* %y) {
+; CHECK-LABEL: vwsubu_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu
+; CHECK-NEXT: vle16.v v10, (a0)
+; CHECK-NEXT: vle16.v v11, (a1)
+; CHECK-NEXT: vwsubu.vv v8, v10, v11
+; CHECK-NEXT: ret
+ %a = load <8 x i16>, <8 x i16>* %x
+ %b = load <8 x i16>, <8 x i16>* %y
+ %c = zext <8 x i16> %a to <8 x i32>
+ %d = zext <8 x i16> %b to <8 x i32>
+ %e = sub <8 x i32> %c, %d
+ ret <8 x i32> %e
+}
+
+define <4 x i64> @vwsubu_v4i64(<4 x i32>* %x, <4 x i32>* %y) {
+; CHECK-LABEL: vwsubu_v4i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
+; CHECK-NEXT: vle32.v v10, (a0)
+; CHECK-NEXT: vle32.v v11, (a1)
+; CHECK-NEXT: vwsubu.vv v8, v10, v11
+; CHECK-NEXT: ret
+ %a = load <4 x i32>, <4 x i32>* %x
+ %b = load <4 x i32>, <4 x i32>* %y
+ %c = zext <4 x i32> %a to <4 x i64>
+ %d = zext <4 x i32> %b to <4 x i64>
+ %e = sub <4 x i64> %c, %d
+ ret <4 x i64> %e
+}
+
+define <32 x i16> @vwsubu_v32i16(<32 x i8>* %x, <32 x i8>* %y) {
+; CHECK-LABEL: vwsubu_v32i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a2, 32
+; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, mu
+; CHECK-NEXT: vle8.v v12, (a0)
+; CHECK-NEXT: vle8.v v14, (a1)
+; CHECK-NEXT: vwsubu.vv v8, v12, v14
+; CHECK-NEXT: ret
+ %a = load <32 x i8>, <32 x i8>* %x
+ %b = load <32 x i8>, <32 x i8>* %y
+ %c = zext <32 x i8> %a to <32 x i16>
+ %d = zext <32 x i8> %b to <32 x i16>
+ %e = sub <32 x i16> %c, %d
+ ret <32 x i16> %e
+}
+
+define <16 x i32> @vwsubu_v16i32(<16 x i16>* %x, <16 x i16>* %y) {
+; CHECK-LABEL: vwsubu_v16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu
+; CHECK-NEXT: vle16.v v12, (a0)
+; CHECK-NEXT: vle16.v v14, (a1)
+; CHECK-NEXT: vwsubu.vv v8, v12, v14
+; CHECK-NEXT: ret
+ %a = load <16 x i16>, <16 x i16>* %x
+ %b = load <16 x i16>, <16 x i16>* %y
+ %c = zext <16 x i16> %a to <16 x i32>
+ %d = zext <16 x i16> %b to <16 x i32>
+ %e = sub <16 x i32> %c, %d
+ ret <16 x i32> %e
+}
+
+define <8 x i64> @vwsubu_v8i64(<8 x i32>* %x, <8 x i32>* %y) {
+; CHECK-LABEL: vwsubu_v8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu
+; CHECK-NEXT: vle32.v v12, (a0)
+; CHECK-NEXT: vle32.v v14, (a1)
+; CHECK-NEXT: vwsubu.vv v8, v12, v14
+; CHECK-NEXT: ret
+ %a = load <8 x i32>, <8 x i32>* %x
+ %b = load <8 x i32>, <8 x i32>* %y
+ %c = zext <8 x i32> %a to <8 x i64>
+ %d = zext <8 x i32> %b to <8 x i64>
+ %e = sub <8 x i64> %c, %d
+ ret <8 x i64> %e
+}
+
+define <64 x i16> @vwsubu_v64i16(<64 x i8>* %x, <64 x i8>* %y) {
+; CHECK-LABEL: vwsubu_v64i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a2, 64
+; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, mu
+; CHECK-NEXT: vle8.v v16, (a0)
+; CHECK-NEXT: vle8.v v20, (a1)
+; CHECK-NEXT: vwsubu.vv v8, v16, v20
+; CHECK-NEXT: ret
+ %a = load <64 x i8>, <64 x i8>* %x
+ %b = load <64 x i8>, <64 x i8>* %y
+ %c = zext <64 x i8> %a to <64 x i16>
+ %d = zext <64 x i8> %b to <64 x i16>
+ %e = sub <64 x i16> %c, %d
+ ret <64 x i16> %e
+}
+
+define <32 x i32> @vwsubu_v32i32(<32 x i16>* %x, <32 x i16>* %y) {
+; CHECK-LABEL: vwsubu_v32i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a2, 32
+; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, mu
+; CHECK-NEXT: vle16.v v16, (a0)
+; CHECK-NEXT: vle16.v v20, (a1)
+; CHECK-NEXT: vwsubu.vv v8, v16, v20
+; CHECK-NEXT: ret
+ %a = load <32 x i16>, <32 x i16>* %x
+ %b = load <32 x i16>, <32 x i16>* %y
+ %c = zext <32 x i16> %a to <32 x i32>
+ %d = zext <32 x i16> %b to <32 x i32>
+ %e = sub <32 x i32> %c, %d
+ ret <32 x i32> %e
+}
+
+define <16 x i64> @vwsubu_v16i64(<16 x i32>* %x, <16 x i32>* %y) {
+; CHECK-LABEL: vwsubu_v16i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu
+; CHECK-NEXT: vle32.v v16, (a0)
+; CHECK-NEXT: vle32.v v20, (a1)
+; CHECK-NEXT: vwsubu.vv v8, v16, v20
+; CHECK-NEXT: ret
+ %a = load <16 x i32>, <16 x i32>* %x
+ %b = load <16 x i32>, <16 x i32>* %y
+ %c = zext <16 x i32> %a to <16 x i64>
+ %d = zext <16 x i32> %b to <16 x i64>
+ %e = sub <16 x i64> %c, %d
+ ret <16 x i64> %e
+}
+
+define <128 x i16> @vwsubu_v128i16(<128 x i8>* %x, <128 x i8>* %y) nounwind {
+; CHECK-LABEL: vwsubu_v128i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 3
+; CHECK-NEXT: sub sp, sp, a2
+; CHECK-NEXT: li a2, 128
+; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, mu
+; CHECK-NEXT: vle8.v v16, (a0)
+; CHECK-NEXT: vle8.v v24, (a1)
+; CHECK-NEXT: li a0, 64
+; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu
+; CHECK-NEXT: vslidedown.vx v8, v16, a0
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: vslidedown.vx v0, v24, a0
+; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu
+; CHECK-NEXT: vwsubu.vv v8, v16, v24
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vwsubu.vv v16, v24, v0
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %a = load <128 x i8>, <128 x i8>* %x
+ %b = load <128 x i8>, <128 x i8>* %y
+ %c = zext <128 x i8> %a to <128 x i16>
+ %d = zext <128 x i8> %b to <128 x i16>
+ %e = sub <128 x i16> %c, %d
+ ret <128 x i16> %e
+}
+
+define <64 x i32> @vwsubu_v64i32(<64 x i16>* %x, <64 x i16>* %y) nounwind {
+; CHECK-LABEL: vwsubu_v64i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 3
+; CHECK-NEXT: sub sp, sp, a2
+; CHECK-NEXT: li a2, 64
+; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, mu
+; CHECK-NEXT: vle16.v v16, (a0)
+; CHECK-NEXT: vle16.v v24, (a1)
+; CHECK-NEXT: li a0, 32
+; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu
+; CHECK-NEXT: vslidedown.vx v8, v16, a0
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: vslidedown.vx v0, v24, a0
+; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT: vwsubu.vv v8, v16, v24
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vwsubu.vv v16, v24, v0
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %a = load <64 x i16>, <64 x i16>* %x
+ %b = load <64 x i16>, <64 x i16>* %y
+ %c = zext <64 x i16> %a to <64 x i32>
+ %d = zext <64 x i16> %b to <64 x i32>
+ %e = sub <64 x i32> %c, %d
+ ret <64 x i32> %e
+}
+
+define <32 x i64> @vwsubu_v32i64(<32 x i32>* %x, <32 x i32>* %y) nounwind {
+; CHECK-LABEL: vwsubu_v32i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 3
+; CHECK-NEXT: sub sp, sp, a2
+; CHECK-NEXT: li a2, 32
+; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, mu
+; CHECK-NEXT: vle32.v v16, (a0)
+; CHECK-NEXT: vle32.v v24, (a1)
+; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, mu
+; CHECK-NEXT: vslidedown.vi v8, v16, 16
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vslidedown.vi v0, v24, 16
+; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu
+; CHECK-NEXT: vwsubu.vv v8, v16, v24
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vwsubu.vv v16, v24, v0
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %a = load <32 x i32>, <32 x i32>* %x
+ %b = load <32 x i32>, <32 x i32>* %y
+ %c = zext <32 x i32> %a to <32 x i64>
+ %d = zext <32 x i32> %b to <32 x i64>
+ %e = sub <32 x i64> %c, %d
+ ret <32 x i64> %e
+}
+
+define <2 x i32> @vwsubu_v2i32_v2i8(<2 x i8>* %x, <2 x i8>* %y) {
+; CHECK-LABEL: vwsubu_v2i32_v2i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
+; CHECK-NEXT: vle8.v v8, (a1)
+; CHECK-NEXT: vle8.v v9, (a0)
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
+; CHECK-NEXT: vzext.vf2 v10, v8
+; CHECK-NEXT: vzext.vf2 v11, v9
+; CHECK-NEXT: vwsubu.vv v8, v11, v10
+; CHECK-NEXT: ret
+ %a = load <2 x i8>, <2 x i8>* %x
+ %b = load <2 x i8>, <2 x i8>* %y
+ %c = zext <2 x i8> %a to <2 x i32>
+ %d = zext <2 x i8> %b to <2 x i32>
+ %e = sub <2 x i32> %c, %d
+ ret <2 x i32> %e
+}
+
+define <4 x i32> @vwsubu_v4i32_v4i8_v4i16(<4 x i8>* %x, <4 x i16>* %y) {
+; CHECK-LABEL: vwsubu_v4i32_v4i8_v4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu
+; CHECK-NEXT: vle8.v v8, (a0)
+; CHECK-NEXT: vle16.v v9, (a1)
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
+; CHECK-NEXT: vzext.vf2 v10, v8
+; CHECK-NEXT: vwsubu.vv v8, v10, v9
+; CHECK-NEXT: ret
+ %a = load <4 x i8>, <4 x i8>* %x
+ %b = load <4 x i16>, <4 x i16>* %y
+ %c = zext <4 x i8> %a to <4 x i32>
+ %d = zext <4 x i16> %b to <4 x i32>
+ %e = sub <4 x i32> %c, %d
+ ret <4 x i32> %e
+}
+
+define <4 x i64> @vwsubu_v4i64_v4i32_v4i8(<4 x i32>* %x, <4 x i8>* %y) {
+; CHECK-LABEL: vwsubu_v4i64_v4i32_v4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
+; CHECK-NEXT: vle8.v v8, (a1)
+; CHECK-NEXT: vle32.v v10, (a0)
+; CHECK-NEXT: vzext.vf4 v11, v8
+; CHECK-NEXT: vwsubu.vv v8, v10, v11
+; CHECK-NEXT: ret
+ %a = load <4 x i32>, <4 x i32>* %x
+ %b = load <4 x i8>, <4 x i8>* %y
+ %c = zext <4 x i32> %a to <4 x i64>
+ %d = zext <4 x i8> %b to <4 x i64>
+ %e = sub <4 x i64> %c, %d
+ ret <4 x i64> %e
+}
+
+define <2 x i16> @vwsubu_vx_v2i16(<2 x i8>* %x, i8 %y) {
+; CHECK-LABEL: vwsubu_vx_v2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
+; CHECK-NEXT: vle8.v v9, (a0)
+; CHECK-NEXT: vwsubu.vx v8, v9, a1
+; CHECK-NEXT: ret
+ %a = load <2 x i8>, <2 x i8>* %x
+ %b = insertelement <2 x i8> undef, i8 %y, i32 0
+ %c = shufflevector <2 x i8> %b, <2 x i8> undef, <2 x i32> zeroinitializer
+ %d = zext <2 x i8> %a to <2 x i16>
+ %e = zext <2 x i8> %c to <2 x i16>
+ %f = sub <2 x i16> %d, %e
+ ret <2 x i16> %f
+}
+
+define <4 x i16> @vwsubu_vx_v4i16(<4 x i8>* %x, i8 %y) {
+; CHECK-LABEL: vwsubu_vx_v4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu
+; CHECK-NEXT: vle8.v v9, (a0)
+; CHECK-NEXT: vwsubu.vx v8, v9, a1
+; CHECK-NEXT: ret
+ %a = load <4 x i8>, <4 x i8>* %x
+ %b = insertelement <4 x i8> undef, i8 %y, i32 0
+ %c = shufflevector <4 x i8> %b, <4 x i8> undef, <4 x i32> zeroinitializer
+ %d = zext <4 x i8> %a to <4 x i16>
+ %e = zext <4 x i8> %c to <4 x i16>
+ %f = sub <4 x i16> %d, %e
+ ret <4 x i16> %f
+}
+
+define <2 x i32> @vwsubu_vx_v2i32(<2 x i16>* %x, i16 %y) {
+; CHECK-LABEL: vwsubu_vx_v2i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
+; CHECK-NEXT: vle16.v v9, (a0)
+; CHECK-NEXT: vwsubu.vx v8, v9, a1
+; CHECK-NEXT: ret
+ %a = load <2 x i16>, <2 x i16>* %x
+ %b = insertelement <2 x i16> undef, i16 %y, i32 0
+ %c = shufflevector <2 x i16> %b, <2 x i16> undef, <2 x i32> zeroinitializer
+ %d = zext <2 x i16> %a to <2 x i32>
+ %e = zext <2 x i16> %c to <2 x i32>
+ %f = sub <2 x i32> %d, %e
+ ret <2 x i32> %f
+}
+
+define <8 x i16> @vwsubu_vx_v8i16(<8 x i8>* %x, i8 %y) {
+; CHECK-LABEL: vwsubu_vx_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
+; CHECK-NEXT: vle8.v v9, (a0)
+; CHECK-NEXT: vwsubu.vx v8, v9, a1
+; CHECK-NEXT: ret
+ %a = load <8 x i8>, <8 x i8>* %x
+ %b = insertelement <8 x i8> undef, i8 %y, i32 0
+ %c = shufflevector <8 x i8> %b, <8 x i8> undef, <8 x i32> zeroinitializer
+ %d = zext <8 x i8> %a to <8 x i16>
+ %e = zext <8 x i8> %c to <8 x i16>
+ %f = sub <8 x i16> %d, %e
+ ret <8 x i16> %f
+}
+
+define <4 x i32> @vwsubu_vx_v4i32(<4 x i16>* %x, i16 %y) {
+; CHECK-LABEL: vwsubu_vx_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
+; CHECK-NEXT: vle16.v v9, (a0)
+; CHECK-NEXT: vwsubu.vx v8, v9, a1
+; CHECK-NEXT: ret
+ %a = load <4 x i16>, <4 x i16>* %x
+ %b = insertelement <4 x i16> undef, i16 %y, i32 0
+ %c = shufflevector <4 x i16> %b, <4 x i16> undef, <4 x i32> zeroinitializer
+ %d = zext <4 x i16> %a to <4 x i32>
+ %e = zext <4 x i16> %c to <4 x i32>
+ %f = sub <4 x i32> %d, %e
+ ret <4 x i32> %f
+}
+
+define <2 x i64> @vwsubu_vx_v2i64(<2 x i32>* %x, i32 %y) {
+; CHECK-LABEL: vwsubu_vx_v2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
+; CHECK-NEXT: vle32.v v9, (a0)
+; CHECK-NEXT: vwsubu.vx v8, v9, a1
+; CHECK-NEXT: ret
+ %a = load <2 x i32>, <2 x i32>* %x
+ %b = insertelement <2 x i32> undef, i32 %y, i64 0
+ %c = shufflevector <2 x i32> %b, <2 x i32> undef, <2 x i32> zeroinitializer
+ %d = zext <2 x i32> %a to <2 x i64>
+ %e = zext <2 x i32> %c to <2 x i64>
+ %f = sub <2 x i64> %d, %e
+ ret <2 x i64> %f
+}
+
+define <16 x i16> @vwsubu_vx_v16i16(<16 x i8>* %x, i8 %y) {
+; CHECK-LABEL: vwsubu_vx_v16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu
+; CHECK-NEXT: vle8.v v10, (a0)
+; CHECK-NEXT: vwsubu.vx v8, v10, a1
+; CHECK-NEXT: ret
+ %a = load <16 x i8>, <16 x i8>* %x
+ %b = insertelement <16 x i8> undef, i8 %y, i32 0
+ %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
+ %d = zext <16 x i8> %a to <16 x i16>
+ %e = zext <16 x i8> %c to <16 x i16>
+ %f = sub <16 x i16> %d, %e
+ ret <16 x i16> %f
+}
+
+define <8 x i32> @vwsubu_vx_v8i32(<8 x i16>* %x, i16 %y) {
+; CHECK-LABEL: vwsubu_vx_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu
+; CHECK-NEXT: vle16.v v10, (a0)
+; CHECK-NEXT: vwsubu.vx v8, v10, a1
+; CHECK-NEXT: ret
+ %a = load <8 x i16>, <8 x i16>* %x
+ %b = insertelement <8 x i16> undef, i16 %y, i32 0
+ %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
+ %d = zext <8 x i16> %a to <8 x i32>
+ %e = zext <8 x i16> %c to <8 x i32>
+ %f = sub <8 x i32> %d, %e
+ ret <8 x i32> %f
+}
+
+define <4 x i64> @vwsubu_vx_v4i64(<4 x i32>* %x, i32 %y) {
+; CHECK-LABEL: vwsubu_vx_v4i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
+; CHECK-NEXT: vle32.v v10, (a0)
+; CHECK-NEXT: vwsubu.vx v8, v10, a1
+; CHECK-NEXT: ret
+ %a = load <4 x i32>, <4 x i32>* %x
+ %b = insertelement <4 x i32> undef, i32 %y, i64 0
+ %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
+ %d = zext <4 x i32> %a to <4 x i64>
+ %e = zext <4 x i32> %c to <4 x i64>
+ %f = sub <4 x i64> %d, %e
+ ret <4 x i64> %f
+}
+
+define <32 x i16> @vwsubu_vx_v32i16(<32 x i8>* %x, i8 %y) {
+; CHECK-LABEL: vwsubu_vx_v32i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a2, 32
+; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, mu
+; CHECK-NEXT: vle8.v v12, (a0)
+; CHECK-NEXT: vwsubu.vx v8, v12, a1
+; CHECK-NEXT: ret
+ %a = load <32 x i8>, <32 x i8>* %x
+ %b = insertelement <32 x i8> undef, i8 %y, i32 0
+ %c = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer
+ %d = zext <32 x i8> %a to <32 x i16>
+ %e = zext <32 x i8> %c to <32 x i16>
+ %f = sub <32 x i16> %d, %e
+ ret <32 x i16> %f
+}
+
+define <16 x i32> @vwsubu_vx_v16i32(<16 x i16>* %x, i16 %y) {
+; CHECK-LABEL: vwsubu_vx_v16i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu
+; CHECK-NEXT: vle16.v v12, (a0)
+; CHECK-NEXT: vwsubu.vx v8, v12, a1
+; CHECK-NEXT: ret
+ %a = load <16 x i16>, <16 x i16>* %x
+ %b = insertelement <16 x i16> undef, i16 %y, i32 0
+ %c = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer
+ %d = zext <16 x i16> %a to <16 x i32>
+ %e = zext <16 x i16> %c to <16 x i32>
+ %f = sub <16 x i32> %d, %e
+ ret <16 x i32> %f
+}
+
+define <8 x i64> @vwsubu_vx_v8i64(<8 x i32>* %x, i32 %y) {
+; CHECK-LABEL: vwsubu_vx_v8i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu
+; CHECK-NEXT: vle32.v v12, (a0)
+; CHECK-NEXT: vwsubu.vx v8, v12, a1
+; CHECK-NEXT: ret
+ %a = load <8 x i32>, <8 x i32>* %x
+ %b = insertelement <8 x i32> undef, i32 %y, i64 0
+ %c = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> zeroinitializer
+ %d = zext <8 x i32> %a to <8 x i64>
+ %e = zext <8 x i32> %c to <8 x i64>
+ %f = sub <8 x i64> %d, %e
+ ret <8 x i64> %f
+}
+
+define <64 x i16> @vwsubu_vx_v64i16(<64 x i8>* %x, i8 %y) {
+; CHECK-LABEL: vwsubu_vx_v64i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a2, 64
+; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, mu
+; CHECK-NEXT: vle8.v v16, (a0)
+; CHECK-NEXT: vwsubu.vx v8, v16, a1
+; CHECK-NEXT: ret
+ %a = load <64 x i8>, <64 x i8>* %x
+ %b = insertelement <64 x i8> undef, i8 %y, i32 0
+ %c = shufflevector <64 x i8> %b, <64 x i8> undef, <64 x i32> zeroinitializer
+ %d = zext <64 x i8> %a to <64 x i16>
+ %e = zext <64 x i8> %c to <64 x i16>
+ %f = sub <64 x i16> %d, %e
+ ret <64 x i16> %f
+}
+
+define <32 x i32> @vwsubu_vx_v32i32(<32 x i16>* %x, i16 %y) {
+; CHECK-LABEL: vwsubu_vx_v32i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a2, 32
+; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, mu
+; CHECK-NEXT: vle16.v v16, (a0)
+; CHECK-NEXT: vwsubu.vx v8, v16, a1
+; CHECK-NEXT: ret
+ %a = load <32 x i16>, <32 x i16>* %x
+ %b = insertelement <32 x i16> undef, i16 %y, i32 0
+ %c = shufflevector <32 x i16> %b, <32 x i16> undef, <32 x i32> zeroinitializer
+ %d = zext <32 x i16> %a to <32 x i32>
+ %e = zext <32 x i16> %c to <32 x i32>
+ %f = sub <32 x i32> %d, %e
+ ret <32 x i32> %f
+}
+
+define <16 x i64> @vwsubu_vx_v16i64(<16 x i32>* %x, i32 %y) {
+; CHECK-LABEL: vwsubu_vx_v16i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu
+; CHECK-NEXT: vle32.v v16, (a0)
+; CHECK-NEXT: vwsubu.vx v8, v16, a1
+; CHECK-NEXT: ret
+ %a = load <16 x i32>, <16 x i32>* %x
+ %b = insertelement <16 x i32> undef, i32 %y, i64 0
+ %c = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer
+ %d = zext <16 x i32> %a to <16 x i64>
+ %e = zext <16 x i32> %c to <16 x i64>
+ %f = sub <16 x i64> %d, %e
+ ret <16 x i64> %f
+}
+
+define <8 x i16> @vwsubu_vx_v8i16_i8(<8 x i8>* %x, i8* %y) {
+; CHECK-LABEL: vwsubu_vx_v8i16_i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
+; CHECK-NEXT: lbu a1, 0(a1)
+; CHECK-NEXT: vle8.v v9, (a0)
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu
+; CHECK-NEXT: vmv.v.x v10, a1
+; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu
+; CHECK-NEXT: vwsubu.wv v8, v10, v9
+; CHECK-NEXT: ret
+ %a = load <8 x i8>, <8 x i8>* %x
+ %b = load i8, i8* %y
+ %c = zext i8 %b to i16
+ %d = insertelement <8 x i16> undef, i16 %c, i32 0
+ %e = shufflevector <8 x i16> %d, <8 x i16> undef, <8 x i32> zeroinitializer
+ %f = zext <8 x i8> %a to <8 x i16>
+ %g = sub <8 x i16> %e, %f
+ ret <8 x i16> %g
+}
+
+define <8 x i16> @vwsubu_vx_v8i16_i16(<8 x i8>* %x, i16* %y) {
+; CHECK-LABEL: vwsubu_vx_v8i16_i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
+; CHECK-NEXT: vle8.v v9, (a0)
+; CHECK-NEXT: vlse16.v v10, (a1), zero
+; CHECK-NEXT: vwsubu.wv v8, v10, v9
+; CHECK-NEXT: ret
+ %a = load <8 x i8>, <8 x i8>* %x
+ %b = load i16, i16* %y
+ %d = insertelement <8 x i16> undef, i16 %b, i32 0
+ %e = shufflevector <8 x i16> %d, <8 x i16> undef, <8 x i32> zeroinitializer
+ %f = zext <8 x i8> %a to <8 x i16>
+ %g = sub <8 x i16> %e, %f
+ ret <8 x i16> %g
+}
+
+define <4 x i32> @vwsubu_vx_v4i32_i8(<4 x i16>* %x, i8* %y) {
+; CHECK-LABEL: vwsubu_vx_v4i32_i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
+; CHECK-NEXT: lbu a1, 0(a1)
+; CHECK-NEXT: vle16.v v9, (a0)
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
+; CHECK-NEXT: vmv.v.x v10, a1
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
+; CHECK-NEXT: vwsubu.wv v8, v10, v9
+; CHECK-NEXT: ret
+ %a = load <4 x i16>, <4 x i16>* %x
+ %b = load i8, i8* %y
+ %c = zext i8 %b to i32
+ %d = insertelement <4 x i32> undef, i32 %c, i32 0
+ %e = shufflevector <4 x i32> %d, <4 x i32> undef, <4 x i32> zeroinitializer
+ %f = zext <4 x i16> %a to <4 x i32>
+ %g = sub <4 x i32> %e, %f
+ ret <4 x i32> %g
+}
+
+define <4 x i32> @vwsubu_vx_v4i32_i16(<4 x i16>* %x, i16* %y) {
+; CHECK-LABEL: vwsubu_vx_v4i32_i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
+; CHECK-NEXT: lhu a1, 0(a1)
+; CHECK-NEXT: vle16.v v9, (a0)
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
+; CHECK-NEXT: vmv.v.x v10, a1
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
+; CHECK-NEXT: vwsubu.wv v8, v10, v9
+; CHECK-NEXT: ret
+ %a = load <4 x i16>, <4 x i16>* %x
+ %b = load i16, i16* %y
+ %c = zext i16 %b to i32
+ %d = insertelement <4 x i32> undef, i32 %c, i32 0
+ %e = shufflevector <4 x i32> %d, <4 x i32> undef, <4 x i32> zeroinitializer
+ %f = zext <4 x i16> %a to <4 x i32>
+ %g = sub <4 x i32> %e, %f
+ ret <4 x i32> %g
+}
+
+define <4 x i32> @vwsubu_vx_v4i32_i32(<4 x i16>* %x, i32* %y) {
+; CHECK-LABEL: vwsubu_vx_v4i32_i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
+; CHECK-NEXT: vle16.v v9, (a0)
+; CHECK-NEXT: vlse32.v v10, (a1), zero
+; CHECK-NEXT: vwsubu.wv v8, v10, v9
+; CHECK-NEXT: ret
+ %a = load <4 x i16>, <4 x i16>* %x
+ %b = load i32, i32* %y
+ %d = insertelement <4 x i32> undef, i32 %b, i32 0
+ %e = shufflevector <4 x i32> %d, <4 x i32> undef, <4 x i32> zeroinitializer
+ %f = zext <4 x i16> %a to <4 x i32>
+ %g = sub <4 x i32> %e, %f
+ ret <4 x i32> %g
+}
+
+define <2 x i64> @vwsubu_vx_v2i64_i8(<2 x i32>* %x, i8* %y) nounwind {
+; RV32-LABEL: vwsubu_vx_v2i64_i8:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
+; RV32-NEXT: lbu a1, 0(a1)
+; RV32-NEXT: vle32.v v9, (a0)
+; RV32-NEXT: sw zero, 12(sp)
+; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v10, (a0), zero
+; RV32-NEXT: vwsubu.wv v8, v10, v9
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vwsubu_vx_v2i64_i8:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
+; RV64-NEXT: lbu a1, 0(a1)
+; RV64-NEXT: vle32.v v9, (a0)
+; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu
+; RV64-NEXT: vmv.v.x v10, a1
+; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
+; RV64-NEXT: vwsubu.wv v8, v10, v9
+; RV64-NEXT: ret
+ %a = load <2 x i32>, <2 x i32>* %x
+ %b = load i8, i8* %y
+ %c = zext i8 %b to i64
+ %d = insertelement <2 x i64> undef, i64 %c, i64 0
+ %e = shufflevector <2 x i64> %d, <2 x i64> undef, <2 x i32> zeroinitializer
+ %f = zext <2 x i32> %a to <2 x i64>
+ %g = sub <2 x i64> %e, %f
+ ret <2 x i64> %g
+}
+
+define <2 x i64> @vwsubu_vx_v2i64_i16(<2 x i32>* %x, i16* %y) nounwind {
+; RV32-LABEL: vwsubu_vx_v2i64_i16:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
+; RV32-NEXT: lhu a1, 0(a1)
+; RV32-NEXT: vle32.v v9, (a0)
+; RV32-NEXT: sw zero, 12(sp)
+; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v10, (a0), zero
+; RV32-NEXT: vwsubu.wv v8, v10, v9
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vwsubu_vx_v2i64_i16:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
+; RV64-NEXT: lhu a1, 0(a1)
+; RV64-NEXT: vle32.v v9, (a0)
+; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu
+; RV64-NEXT: vmv.v.x v10, a1
+; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
+; RV64-NEXT: vwsubu.wv v8, v10, v9
+; RV64-NEXT: ret
+ %a = load <2 x i32>, <2 x i32>* %x
+ %b = load i16, i16* %y
+ %c = zext i16 %b to i64
+ %d = insertelement <2 x i64> undef, i64 %c, i64 0
+ %e = shufflevector <2 x i64> %d, <2 x i64> undef, <2 x i32> zeroinitializer
+ %f = zext <2 x i32> %a to <2 x i64>
+ %g = sub <2 x i64> %e, %f
+ ret <2 x i64> %g
+}
+
+define <2 x i64> @vwsubu_vx_v2i64_i32(<2 x i32>* %x, i32* %y) nounwind {
+; RV32-LABEL: vwsubu_vx_v2i64_i32:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
+; RV32-NEXT: lw a1, 0(a1)
+; RV32-NEXT: vle32.v v9, (a0)
+; RV32-NEXT: sw zero, 12(sp)
+; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v10, (a0), zero
+; RV32-NEXT: vwsubu.wv v8, v10, v9
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vwsubu_vx_v2i64_i32:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
+; RV64-NEXT: lwu a1, 0(a1)
+; RV64-NEXT: vle32.v v9, (a0)
+; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu
+; RV64-NEXT: vmv.v.x v10, a1
+; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
+; RV64-NEXT: vwsubu.wv v8, v10, v9
+; RV64-NEXT: ret
+ %a = load <2 x i32>, <2 x i32>* %x
+ %b = load i32, i32* %y
+ %c = zext i32 %b to i64
+ %d = insertelement <2 x i64> undef, i64 %c, i64 0
+ %e = shufflevector <2 x i64> %d, <2 x i64> undef, <2 x i32> zeroinitializer
+ %f = zext <2 x i32> %a to <2 x i64>
+ %g = sub <2 x i64> %e, %f
+ ret <2 x i64> %g
+}
+
+define <2 x i64> @vwsubu_vx_v2i64_i64(<2 x i32>* %x, i64* %y) nounwind {
+; RV32-LABEL: vwsubu_vx_v2i64_i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
+; RV32-NEXT: lw a2, 4(a1)
+; RV32-NEXT: lw a1, 0(a1)
+; RV32-NEXT: vle32.v v9, (a0)
+; RV32-NEXT: sw a2, 12(sp)
+; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v10, (a0), zero
+; RV32-NEXT: vwsubu.wv v8, v10, v9
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vwsubu_vx_v2i64_i64:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
+; RV64-NEXT: vle32.v v9, (a0)
+; RV64-NEXT: vlse64.v v10, (a1), zero
+; RV64-NEXT: vwsubu.wv v8, v10, v9
+; RV64-NEXT: ret
+ %a = load <2 x i32>, <2 x i32>* %x
+ %b = load i64, i64* %y
+ %d = insertelement <2 x i64> undef, i64 %b, i64 0
+ %e = shufflevector <2 x i64> %d, <2 x i64> undef, <2 x i32> zeroinitializer
+ %f = zext <2 x i32> %a to <2 x i64>
+ %g = sub <2 x i64> %e, %f
+ ret <2 x i64> %g
+}
More information about the llvm-commits
mailing list