[llvm] 875c76d - [RISCV] Add support for matching .vx and .vi forms of binary instructions for fixed vectors.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 12 09:18:31 PST 2021
Author: Craig Topper
Date: 2021-02-12T09:18:10-08:00
New Revision: 875c76de2b6ad67b10c027ed74422642cf4d1aed
URL: https://github.com/llvm/llvm-project/commit/875c76de2b6ad67b10c027ed74422642cf4d1aed
DIFF: https://github.com/llvm/llvm-project/commit/875c76de2b6ad67b10c027ed74422642cf4d1aed.diff
LOG: [RISCV] Add support for matching .vx and .vi forms of binary instructions for fixed vectors.
Unlike scalable vectors, I'm only using a ComplexPattern for
the immediate itself. The vmv_v_x is matched explicitly. We igore
the VL argument when matching a binary operator, but we do check
it when matching splat directly.
I left out tests for vXi64 as they fail on rv32 right now.
Reviewed By: frasercrmck
Differential Revision: https://reviews.llvm.org/D96365
Added:
Modified:
llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 62fe6460a7a4..05620fc4b85a 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -1015,7 +1015,8 @@ bool RISCVDAGToDAGISel::selectVLOp(SDValue N, SDValue &VL) {
bool RISCVDAGToDAGISel::selectVSplat(SDValue N, SDValue &SplatVal) {
if (N.getOpcode() != ISD::SPLAT_VECTOR &&
- N.getOpcode() != RISCVISD::SPLAT_VECTOR_I64)
+ N.getOpcode() != RISCVISD::SPLAT_VECTOR_I64 &&
+ N.getOpcode() != RISCVISD::VMV_V_X_VL)
return false;
SplatVal = N.getOperand(0);
return true;
@@ -1023,7 +1024,8 @@ bool RISCVDAGToDAGISel::selectVSplat(SDValue N, SDValue &SplatVal) {
bool RISCVDAGToDAGISel::selectVSplatSimm5(SDValue N, SDValue &SplatVal) {
if ((N.getOpcode() != ISD::SPLAT_VECTOR &&
- N.getOpcode() != RISCVISD::SPLAT_VECTOR_I64) ||
+ N.getOpcode() != RISCVISD::SPLAT_VECTOR_I64 &&
+ N.getOpcode() != RISCVISD::VMV_V_X_VL) ||
!isa<ConstantSDNode>(N.getOperand(0)))
return false;
@@ -1053,7 +1055,8 @@ bool RISCVDAGToDAGISel::selectVSplatSimm5(SDValue N, SDValue &SplatVal) {
bool RISCVDAGToDAGISel::selectVSplatUimm5(SDValue N, SDValue &SplatVal) {
if ((N.getOpcode() != ISD::SPLAT_VECTOR &&
- N.getOpcode() != RISCVISD::SPLAT_VECTOR_I64) ||
+ N.getOpcode() != RISCVISD::SPLAT_VECTOR_I64 &&
+ N.getOpcode() != RISCVISD::VMV_V_X_VL) ||
!isa<ConstantSDNode>(N.getOperand(0)))
return false;
@@ -1068,6 +1071,36 @@ bool RISCVDAGToDAGISel::selectVSplatUimm5(SDValue N, SDValue &SplatVal) {
return true;
}
+bool RISCVDAGToDAGISel::selectRVVSimm5(SDValue N, unsigned Width,
+ SDValue &Imm) {
+ if (auto *C = dyn_cast<ConstantSDNode>(N)) {
+ int64_t ImmVal = SignExtend64(C->getSExtValue(), Width);
+
+ if (!isInt<5>(ImmVal))
+ return false;
+
+ Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), Subtarget->getXLenVT());
+ return true;
+ }
+
+ return false;
+}
+
+bool RISCVDAGToDAGISel::selectRVVUimm5(SDValue N, unsigned Width,
+ SDValue &Imm) {
+ if (auto *C = dyn_cast<ConstantSDNode>(N)) {
+ int64_t ImmVal = C->getSExtValue();
+
+ if (!isUInt<5>(ImmVal))
+ return false;
+
+ Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), Subtarget->getXLenVT());
+ return true;
+ }
+
+ return false;
+}
+
// Merge an ADDI into the offset of a load/store instruction where possible.
// (load (addi base, off1), off2) -> (load base, off1+off2)
// (store val, (addi base, off1), off2) -> (store val, base, off1+off2)
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
index 0264de785f2a..1e9dba3cf0b0 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
@@ -57,6 +57,16 @@ class RISCVDAGToDAGISel : public SelectionDAGISel {
bool selectVSplatSimm5(SDValue N, SDValue &SplatVal);
bool selectVSplatUimm5(SDValue N, SDValue &SplatVal);
+ bool selectRVVSimm5(SDValue N, unsigned Width, SDValue &Imm);
+ template <unsigned Width> bool selectRVVSimm5(SDValue N, SDValue &Imm) {
+ return selectRVVSimm5(N, Width, Imm);
+ }
+
+ bool selectRVVUimm5(SDValue N, unsigned Width, SDValue &Imm);
+ template <unsigned Width> bool selectRVVUimm5(SDValue N, SDValue &Imm) {
+ return selectRVVUimm5(N, Width, Imm);
+ }
+
void selectVLSEG(SDNode *Node, unsigned IntNo, bool IsStrided);
void selectVLSEGMask(SDNode *Node, unsigned IntNo, bool IsStrided);
void selectVLSEGFF(SDNode *Node);
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index 6f299b537d58..e3e83923284e 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -4056,15 +4056,8 @@ foreach vti = AllVectors in {
(XLenVT (VLOp GPR:$vl)))),
(!cast<Instruction>("PseudoVMV_V_V_"#vti.LMul.MX)
$rs1, GPR:$vl, vti.SEW)>;
-}
-foreach vti = AllIntegerVectors in {
- def : Pat<(vti.Vector (riscv_vmv_v_x_vl GPR:$rs2, (XLenVT (VLOp GPR:$vl)))),
- (!cast<Instruction>("PseudoVMV_V_X_"#vti.LMul.MX)
- $rs2, GPR:$vl, vti.SEW)>;
- def : Pat<(vti.Vector (riscv_vmv_v_x_vl simm5:$imm5, (XLenVT (VLOp GPR:$vl)))),
- (!cast<Instruction>("PseudoVMV_V_I_"#vti.LMul.MX)
- simm5:$imm5, GPR:$vl, vti.SEW)>;
+ // vmv.v.x/vmv.v.i are handled in RISCInstrVInstrInfoVVLPatterns.td
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
index e99c8e3354ac..2976cb591993 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
@@ -126,7 +126,6 @@ class VPatBinarySDNode_XI<SDNode vop,
string suffix,
ValueType result_type,
ValueType vop_type,
- ValueType xop_type,
ValueType mask_type,
int sew,
LMULInfo vlmul,
@@ -150,7 +149,7 @@ multiclass VPatBinarySDNode_VV_VX<SDNode vop, string instruction_name>
vti.Vector, vti.Vector, vti.Mask, vti.SEW,
vti.LMul, vti.AVL, vti.RegClass, vti.RegClass>;
def : VPatBinarySDNode_XI<vop, instruction_name, "VX",
- vti.Vector, vti.Vector, XLenVT, vti.Mask, vti.SEW,
+ vti.Vector, vti.Vector, vti.Mask, vti.SEW,
vti.LMul, vti.AVL, vti.RegClass, vti.RegClass,
SplatPat, GPR>;
}
@@ -164,11 +163,11 @@ multiclass VPatBinarySDNode_VV_VX_VI<SDNode vop, string instruction_name,
vti.Vector, vti.Vector, vti.Mask, vti.SEW,
vti.LMul, vti.AVL, vti.RegClass, vti.RegClass>;
def : VPatBinarySDNode_XI<vop, instruction_name, "VX",
- vti.Vector, vti.Vector, XLenVT, vti.Mask, vti.SEW,
+ vti.Vector, vti.Vector, vti.Mask, vti.SEW,
vti.LMul, vti.AVL, vti.RegClass, vti.RegClass,
SplatPat, GPR>;
def : VPatBinarySDNode_XI<vop, instruction_name, "VI",
- vti.Vector, vti.Vector, XLenVT, vti.Mask, vti.SEW,
+ vti.Vector, vti.Vector, vti.Mask, vti.SEW,
vti.LMul, vti.AVL, vti.RegClass, vti.RegClass,
!cast<ComplexPattern>(SplatPat#_#ImmType),
ImmType>;
@@ -423,11 +422,11 @@ defm "" : VPatBinarySDNode_VV_VX<sub, "PseudoVSUB">;
// Handle VRSUB specially since it's the only integer binary op with reversed
// pattern operands
foreach vti = AllIntegerVectors in {
- def : Pat<(sub (vti.Vector (SplatPat XLenVT:$rs2)),
+ def : Pat<(sub (vti.Vector (SplatPat GPR:$rs2)),
(vti.Vector vti.RegClass:$rs1)),
(!cast<Instruction>("PseudoVRSUB_VX_"# vti.LMul.MX)
vti.RegClass:$rs1, GPR:$rs2, vti.AVL, vti.SEW)>;
- def : Pat<(sub (vti.Vector (SplatPat_simm5 XLenVT:$rs2)),
+ def : Pat<(sub (vti.Vector (SplatPat_simm5 simm5:$rs2)),
(vti.Vector vti.RegClass:$rs1)),
(!cast<Instruction>("PseudoVRSUB_VI_"# vti.LMul.MX)
vti.RegClass:$rs1, simm5:$rs2, vti.AVL, vti.SEW)>;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index f1d6952a9d8b..9517ac95aafe 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -97,11 +97,22 @@ def SDT_RISCVVMSETCLR_VL : SDTypeProfile<1, 1, [SDTCisVec<0>,
def riscv_vmclr_vl : SDNode<"RISCVISD::VMCLR_VL", SDT_RISCVVMSETCLR_VL>;
def riscv_vmset_vl : SDNode<"RISCVISD::VMSET_VL", SDT_RISCVVMSETCLR_VL>;
-def true_mask : PatLeaf<(riscv_vmset_vl (XLenVT srcvalue))>;
+def true_mask : PatLeaf<(riscv_vmset_vl (XLenVT srcvalue))>;
+// Ignore the vl operand.
def SplatFPOp : PatFrag<(ops node:$op),
(riscv_vfmv_v_f_vl node:$op, srcvalue)>;
+def sew8simm5 : ComplexPattern<XLenVT, 1, "selectRVVSimm5<8>", []>;
+def sew16simm5 : ComplexPattern<XLenVT, 1, "selectRVVSimm5<16>", []>;
+def sew32simm5 : ComplexPattern<XLenVT, 1, "selectRVVSimm5<32>", []>;
+def sew64simm5 : ComplexPattern<XLenVT, 1, "selectRVVSimm5<64>", []>;
+
+def sew8uimm5 : ComplexPattern<XLenVT, 1, "selectRVVUimm5<8>", []>;
+def sew16uimm5 : ComplexPattern<XLenVT, 1, "selectRVVUimm5<16>", []>;
+def sew32uimm5 : ComplexPattern<XLenVT, 1, "selectRVVUimm5<32>", []>;
+def sew64uimm5 : ComplexPattern<XLenVT, 1, "selectRVVUimm5<64>", []>;
+
class VPatBinaryVL_VV<SDNode vop,
string instruction_name,
ValueType result_type,
@@ -121,12 +132,37 @@ class VPatBinaryVL_VV<SDNode vop,
op_reg_class:$rs2,
GPR:$vl, sew)>;
+class VPatBinaryVL_XI<SDNode vop,
+ string instruction_name,
+ string suffix,
+ ValueType result_type,
+ ValueType vop_type,
+ ValueType mask_type,
+ int sew,
+ LMULInfo vlmul,
+ VReg RetClass,
+ VReg vop_reg_class,
+ ComplexPattern SplatPatKind,
+ DAGOperand xop_kind> :
+ Pat<(result_type (vop
+ (vop_type vop_reg_class:$rs1),
+ (vop_type (SplatPatKind xop_kind:$rs2)),
+ (mask_type true_mask),
+ (XLenVT (VLOp GPR:$vl)))),
+ (!cast<Instruction>(instruction_name#_#suffix#_# vlmul.MX)
+ vop_reg_class:$rs1,
+ xop_kind:$rs2,
+ GPR:$vl, sew)>;
+
multiclass VPatBinaryVL_VV_VX<SDNode vop, string instruction_name> {
foreach vti = AllIntegerVectors in {
def : VPatBinaryVL_VV<vop, instruction_name,
vti.Vector, vti.Vector, vti.Mask, vti.SEW,
vti.LMul, vti.RegClass, vti.RegClass>;
- // FIXME: Support splats.
+ def : VPatBinaryVL_XI<vop, instruction_name, "VX",
+ vti.Vector, vti.Vector, vti.Mask, vti.SEW,
+ vti.LMul, vti.RegClass, vti.RegClass,
+ SplatPat, GPR>;
}
}
@@ -136,7 +172,15 @@ multiclass VPatBinaryVL_VV_VX_VI<SDNode vop, string instruction_name,
def : VPatBinaryVL_VV<vop, instruction_name,
vti.Vector, vti.Vector, vti.Mask, vti.SEW,
vti.LMul, vti.RegClass, vti.RegClass>;
- // FIXME: Support splats.
+ def : VPatBinaryVL_XI<vop, instruction_name, "VX",
+ vti.Vector, vti.Vector, vti.Mask, vti.SEW,
+ vti.LMul, vti.RegClass, vti.RegClass,
+ SplatPat, GPR>;
+ def : VPatBinaryVL_XI<vop, instruction_name, "VI",
+ vti.Vector, vti.Vector, vti.Mask, vti.SEW,
+ vti.LMul, vti.RegClass, vti.RegClass,
+ !cast<ComplexPattern>(SplatPat#_#ImmType),
+ ImmType>;
}
}
@@ -214,6 +258,20 @@ foreach mti = AllMasks in {
// 12.1. Vector Single-Width Integer Add and Subtract
defm "" : VPatBinaryVL_VV_VX_VI<riscv_add_vl, "PseudoVADD">;
defm "" : VPatBinaryVL_VV_VX<riscv_sub_vl, "PseudoVSUB">;
+// Handle VRSUB specially since it's the only integer binary op with reversed
+// pattern operands
+foreach vti = AllIntegerVectors in {
+ def : Pat<(riscv_sub_vl (vti.Vector (SplatPat GPR:$rs2)),
+ (vti.Vector vti.RegClass:$rs1), (vti.Mask true_mask),
+ (XLenVT (VLOp GPR:$vl))),
+ (!cast<Instruction>("PseudoVRSUB_VX_"# vti.LMul.MX)
+ vti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.SEW)>;
+ def : Pat<(riscv_sub_vl (vti.Vector (SplatPat_simm5 simm5:$rs2)),
+ (vti.Vector vti.RegClass:$rs1), (vti.Mask true_mask),
+ (XLenVT (VLOp GPR:$vl))),
+ (!cast<Instruction>("PseudoVRSUB_VI_"# vti.LMul.MX)
+ vti.RegClass:$rs1, simm5:$rs2, GPR:$vl, vti.SEW)>;
+}
// 12.5. Vector Bitwise Logical Instructions
defm "" : VPatBinaryVL_VV_VX_VI<riscv_and_vl, "PseudoVAND">;
@@ -240,6 +298,18 @@ defm "" : VPatBinaryVL_VV_VX<riscv_sdiv_vl, "PseudoVDIV">;
defm "" : VPatBinaryVL_VV_VX<riscv_urem_vl, "PseudoVREMU">;
defm "" : VPatBinaryVL_VV_VX<riscv_srem_vl, "PseudoVREM">;
+// 12.17. Vector Integer Move Instructions
+foreach vti = AllIntegerVectors in {
+ def : Pat<(vti.Vector (riscv_vmv_v_x_vl GPR:$rs2, (XLenVT (VLOp GPR:$vl)))),
+ (!cast<Instruction>("PseudoVMV_V_X_"#vti.LMul.MX)
+ $rs2, GPR:$vl, vti.SEW)>;
+ defvar ImmPat = !cast<ComplexPattern>("sew"#vti.SEW#"simm5");
+ def : Pat<(vti.Vector (riscv_vmv_v_x_vl (ImmPat XLenVT:$imm5),
+ (XLenVT (VLOp GPR:$vl)))),
+ (!cast<Instruction>("PseudoVMV_V_I_"#vti.LMul.MX)
+ XLenVT:$imm5, GPR:$vl, vti.SEW)>;
+}
+
} // Predicates = [HasStdExtV]
let Predicates = [HasStdExtV, HasStdExtF] in {
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
index 6ebaa475c43b..59037541f0ad 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
@@ -4577,3 +4577,1638 @@ define void @umax_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
store <4 x i64> %c, <4 x i64>* %x
ret void
}
+
+define void @add_vi_v16i8(<16 x i8>* %x) {
+; CHECK-LABEL: add_vi_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, 16
+; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: vadd.vi v25, v25, -1
+; CHECK-NEXT: vse8.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <16 x i8>, <16 x i8>* %x
+ %b = insertelement <16 x i8> undef, i8 -1, i32 0
+ %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
+ %d = add <16 x i8> %a, %c
+ store <16 x i8> %d, <16 x i8>* %x
+ ret void
+}
+
+define void @add_vi_v8i16(<8 x i16>* %x) {
+; CHECK-LABEL: add_vi_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, 8
+; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu
+; CHECK-NEXT: vle16.v v25, (a0)
+; CHECK-NEXT: vadd.vi v25, v25, -1
+; CHECK-NEXT: vse16.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <8 x i16>, <8 x i16>* %x
+ %b = insertelement <8 x i16> undef, i16 -1, i32 0
+ %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
+ %d = add <8 x i16> %a, %c
+ store <8 x i16> %d, <8 x i16>* %x
+ ret void
+}
+
+define void @add_vi_v4i32(<4 x i32>* %x) {
+; CHECK-LABEL: add_vi_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, 4
+; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu
+; CHECK-NEXT: vle32.v v25, (a0)
+; CHECK-NEXT: vadd.vi v25, v25, -1
+; CHECK-NEXT: vse32.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i32>, <4 x i32>* %x
+ %b = insertelement <4 x i32> undef, i32 -1, i32 0
+ %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
+ %d = add <4 x i32> %a, %c
+ store <4 x i32> %d, <4 x i32>* %x
+ ret void
+}
+
+define void @add_iv_v16i8(<16 x i8>* %x) {
+; CHECK-LABEL: add_iv_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, 16
+; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: vadd.vi v25, v25, 1
+; CHECK-NEXT: vse8.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <16 x i8>, <16 x i8>* %x
+ %b = insertelement <16 x i8> undef, i8 1, i32 0
+ %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
+ %d = add <16 x i8> %c, %a
+ store <16 x i8> %d, <16 x i8>* %x
+ ret void
+}
+
+define void @add_iv_v8i16(<8 x i16>* %x) {
+; CHECK-LABEL: add_iv_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, 8
+; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu
+; CHECK-NEXT: vle16.v v25, (a0)
+; CHECK-NEXT: vadd.vi v25, v25, 1
+; CHECK-NEXT: vse16.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <8 x i16>, <8 x i16>* %x
+ %b = insertelement <8 x i16> undef, i16 1, i32 0
+ %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
+ %d = add <8 x i16> %c, %a
+ store <8 x i16> %d, <8 x i16>* %x
+ ret void
+}
+
+define void @add_iv_v4i32(<4 x i32>* %x) {
+; CHECK-LABEL: add_iv_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, 4
+; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu
+; CHECK-NEXT: vle32.v v25, (a0)
+; CHECK-NEXT: vadd.vi v25, v25, 1
+; CHECK-NEXT: vse32.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i32>, <4 x i32>* %x
+ %b = insertelement <4 x i32> undef, i32 1, i32 0
+ %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
+ %d = add <4 x i32> %c, %a
+ store <4 x i32> %d, <4 x i32>* %x
+ ret void
+}
+
+define void @add_vx_v16i8(<16 x i8>* %x, i8 %y) {
+; CHECK-LABEL: add_vx_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 16
+; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: vadd.vx v25, v25, a1
+; CHECK-NEXT: vse8.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <16 x i8>, <16 x i8>* %x
+ %b = insertelement <16 x i8> undef, i8 %y, i32 0
+ %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
+ %d = add <16 x i8> %a, %c
+ store <16 x i8> %d, <16 x i8>* %x
+ ret void
+}
+
+define void @add_vx_v8i16(<8 x i16>* %x, i16 %y) {
+; CHECK-LABEL: add_vx_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 8
+; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu
+; CHECK-NEXT: vle16.v v25, (a0)
+; CHECK-NEXT: vadd.vx v25, v25, a1
+; CHECK-NEXT: vse16.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <8 x i16>, <8 x i16>* %x
+ %b = insertelement <8 x i16> undef, i16 %y, i32 0
+ %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
+ %d = add <8 x i16> %a, %c
+ store <8 x i16> %d, <8 x i16>* %x
+ ret void
+}
+
+define void @add_vx_v4i32(<4 x i32>* %x, i32 %y) {
+; CHECK-LABEL: add_vx_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 4
+; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu
+; CHECK-NEXT: vle32.v v25, (a0)
+; CHECK-NEXT: vadd.vx v25, v25, a1
+; CHECK-NEXT: vse32.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i32>, <4 x i32>* %x
+ %b = insertelement <4 x i32> undef, i32 %y, i32 0
+ %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
+ %d = add <4 x i32> %a, %c
+ store <4 x i32> %d, <4 x i32>* %x
+ ret void
+}
+
+define void @add_xv_v16i8(<16 x i8>* %x, i8 %y) {
+; CHECK-LABEL: add_xv_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 16
+; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: vadd.vx v25, v25, a1
+; CHECK-NEXT: vse8.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <16 x i8>, <16 x i8>* %x
+ %b = insertelement <16 x i8> undef, i8 %y, i32 0
+ %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
+ %d = add <16 x i8> %c, %a
+ store <16 x i8> %d, <16 x i8>* %x
+ ret void
+}
+
+define void @add_xv_v8i16(<8 x i16>* %x, i16 %y) {
+; CHECK-LABEL: add_xv_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 8
+; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu
+; CHECK-NEXT: vle16.v v25, (a0)
+; CHECK-NEXT: vadd.vx v25, v25, a1
+; CHECK-NEXT: vse16.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <8 x i16>, <8 x i16>* %x
+ %b = insertelement <8 x i16> undef, i16 %y, i32 0
+ %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
+ %d = add <8 x i16> %c, %a
+ store <8 x i16> %d, <8 x i16>* %x
+ ret void
+}
+
+define void @add_xv_v4i32(<4 x i32>* %x, i32 %y) {
+; CHECK-LABEL: add_xv_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 4
+; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu
+; CHECK-NEXT: vle32.v v25, (a0)
+; CHECK-NEXT: vadd.vx v25, v25, a1
+; CHECK-NEXT: vse32.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i32>, <4 x i32>* %x
+ %b = insertelement <4 x i32> undef, i32 %y, i32 0
+ %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
+ %d = add <4 x i32> %c, %a
+ store <4 x i32> %d, <4 x i32>* %x
+ ret void
+}
+
+define void @sub_vi_v16i8(<16 x i8>* %x) {
+; CHECK-LABEL: sub_vi_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, 16
+; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: addi a1, zero, -1
+; CHECK-NEXT: vsub.vx v25, v25, a1
+; CHECK-NEXT: vse8.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <16 x i8>, <16 x i8>* %x
+ %b = insertelement <16 x i8> undef, i8 -1, i32 0
+ %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
+ %d = sub <16 x i8> %a, %c
+ store <16 x i8> %d, <16 x i8>* %x
+ ret void
+}
+
+define void @sub_vi_v8i16(<8 x i16>* %x) {
+; CHECK-LABEL: sub_vi_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, 8
+; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu
+; CHECK-NEXT: vle16.v v25, (a0)
+; CHECK-NEXT: addi a1, zero, -1
+; CHECK-NEXT: vsub.vx v25, v25, a1
+; CHECK-NEXT: vse16.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <8 x i16>, <8 x i16>* %x
+ %b = insertelement <8 x i16> undef, i16 -1, i32 0
+ %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
+ %d = sub <8 x i16> %a, %c
+ store <8 x i16> %d, <8 x i16>* %x
+ ret void
+}
+
+define void @sub_vi_v4i32(<4 x i32>* %x) {
+; CHECK-LABEL: sub_vi_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, 4
+; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu
+; CHECK-NEXT: vle32.v v25, (a0)
+; CHECK-NEXT: addi a1, zero, -1
+; CHECK-NEXT: vsub.vx v25, v25, a1
+; CHECK-NEXT: vse32.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i32>, <4 x i32>* %x
+ %b = insertelement <4 x i32> undef, i32 -1, i32 0
+ %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
+ %d = sub <4 x i32> %a, %c
+ store <4 x i32> %d, <4 x i32>* %x
+ ret void
+}
+
+define void @sub_iv_v16i8(<16 x i8>* %x) {
+; CHECK-LABEL: sub_iv_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, 16
+; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: vrsub.vi v25, v25, 1
+; CHECK-NEXT: vse8.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <16 x i8>, <16 x i8>* %x
+ %b = insertelement <16 x i8> undef, i8 1, i32 0
+ %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
+ %d = sub <16 x i8> %c, %a
+ store <16 x i8> %d, <16 x i8>* %x
+ ret void
+}
+
+define void @sub_iv_v8i16(<8 x i16>* %x) {
+; CHECK-LABEL: sub_iv_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, 8
+; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu
+; CHECK-NEXT: vle16.v v25, (a0)
+; CHECK-NEXT: vrsub.vi v25, v25, 1
+; CHECK-NEXT: vse16.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <8 x i16>, <8 x i16>* %x
+ %b = insertelement <8 x i16> undef, i16 1, i32 0
+ %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
+ %d = sub <8 x i16> %c, %a
+ store <8 x i16> %d, <8 x i16>* %x
+ ret void
+}
+
+define void @sub_iv_v4i32(<4 x i32>* %x) {
+; CHECK-LABEL: sub_iv_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, 4
+; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu
+; CHECK-NEXT: vle32.v v25, (a0)
+; CHECK-NEXT: vrsub.vi v25, v25, 1
+; CHECK-NEXT: vse32.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i32>, <4 x i32>* %x
+ %b = insertelement <4 x i32> undef, i32 1, i32 0
+ %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
+ %d = sub <4 x i32> %c, %a
+ store <4 x i32> %d, <4 x i32>* %x
+ ret void
+}
+
+define void @sub_vx_v16i8(<16 x i8>* %x, i8 %y) {
+; CHECK-LABEL: sub_vx_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 16
+; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: vsub.vx v25, v25, a1
+; CHECK-NEXT: vse8.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <16 x i8>, <16 x i8>* %x
+ %b = insertelement <16 x i8> undef, i8 %y, i32 0
+ %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
+ %d = sub <16 x i8> %a, %c
+ store <16 x i8> %d, <16 x i8>* %x
+ ret void
+}
+
+define void @sub_vx_v8i16(<8 x i16>* %x, i16 %y) {
+; CHECK-LABEL: sub_vx_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 8
+; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu
+; CHECK-NEXT: vle16.v v25, (a0)
+; CHECK-NEXT: vsub.vx v25, v25, a1
+; CHECK-NEXT: vse16.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <8 x i16>, <8 x i16>* %x
+ %b = insertelement <8 x i16> undef, i16 %y, i32 0
+ %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
+ %d = sub <8 x i16> %a, %c
+ store <8 x i16> %d, <8 x i16>* %x
+ ret void
+}
+
+define void @sub_vx_v4i32(<4 x i32>* %x, i32 %y) {
+; CHECK-LABEL: sub_vx_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 4
+; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu
+; CHECK-NEXT: vle32.v v25, (a0)
+; CHECK-NEXT: vsub.vx v25, v25, a1
+; CHECK-NEXT: vse32.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i32>, <4 x i32>* %x
+ %b = insertelement <4 x i32> undef, i32 %y, i32 0
+ %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
+ %d = sub <4 x i32> %a, %c
+ store <4 x i32> %d, <4 x i32>* %x
+ ret void
+}
+
+define void @sub_xv_v16i8(<16 x i8>* %x, i8 %y) {
+; CHECK-LABEL: sub_xv_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 16
+; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: vrsub.vx v25, v25, a1
+; CHECK-NEXT: vse8.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <16 x i8>, <16 x i8>* %x
+ %b = insertelement <16 x i8> undef, i8 %y, i32 0
+ %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
+ %d = sub <16 x i8> %c, %a
+ store <16 x i8> %d, <16 x i8>* %x
+ ret void
+}
+
+define void @sub_xv_v8i16(<8 x i16>* %x, i16 %y) {
+; CHECK-LABEL: sub_xv_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 8
+; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu
+; CHECK-NEXT: vle16.v v25, (a0)
+; CHECK-NEXT: vrsub.vx v25, v25, a1
+; CHECK-NEXT: vse16.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <8 x i16>, <8 x i16>* %x
+ %b = insertelement <8 x i16> undef, i16 %y, i32 0
+ %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
+ %d = sub <8 x i16> %c, %a
+ store <8 x i16> %d, <8 x i16>* %x
+ ret void
+}
+
+define void @sub_xv_v4i32(<4 x i32>* %x, i32 %y) {
+; CHECK-LABEL: sub_xv_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 4
+; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu
+; CHECK-NEXT: vle32.v v25, (a0)
+; CHECK-NEXT: vrsub.vx v25, v25, a1
+; CHECK-NEXT: vse32.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i32>, <4 x i32>* %x
+ %b = insertelement <4 x i32> undef, i32 %y, i32 0
+ %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
+ %d = sub <4 x i32> %c, %a
+ store <4 x i32> %d, <4 x i32>* %x
+ ret void
+}
+
+define void @mul_vx_v16i8(<16 x i8>* %x, i8 %y) {
+; CHECK-LABEL: mul_vx_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 16
+; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: vmul.vx v25, v25, a1
+; CHECK-NEXT: vse8.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <16 x i8>, <16 x i8>* %x
+ %b = insertelement <16 x i8> undef, i8 %y, i32 0
+ %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
+ %d = mul <16 x i8> %a, %c
+ store <16 x i8> %d, <16 x i8>* %x
+ ret void
+}
+
+define void @mul_vx_v8i16(<8 x i16>* %x, i16 %y) {
+; CHECK-LABEL: mul_vx_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 8
+; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu
+; CHECK-NEXT: vle16.v v25, (a0)
+; CHECK-NEXT: vmul.vx v25, v25, a1
+; CHECK-NEXT: vse16.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <8 x i16>, <8 x i16>* %x
+ %b = insertelement <8 x i16> undef, i16 %y, i32 0
+ %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
+ %d = mul <8 x i16> %a, %c
+ store <8 x i16> %d, <8 x i16>* %x
+ ret void
+}
+
+define void @mul_vx_v4i32(<4 x i32>* %x, i32 %y) {
+; CHECK-LABEL: mul_vx_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 4
+; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu
+; CHECK-NEXT: vle32.v v25, (a0)
+; CHECK-NEXT: vmul.vx v25, v25, a1
+; CHECK-NEXT: vse32.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i32>, <4 x i32>* %x
+ %b = insertelement <4 x i32> undef, i32 %y, i32 0
+ %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
+ %d = mul <4 x i32> %a, %c
+ store <4 x i32> %d, <4 x i32>* %x
+ ret void
+}
+
+define void @mul_xv_v16i8(<16 x i8>* %x, i8 %y) {
+; CHECK-LABEL: mul_xv_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 16
+; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: vmul.vx v25, v25, a1
+; CHECK-NEXT: vse8.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <16 x i8>, <16 x i8>* %x
+ %b = insertelement <16 x i8> undef, i8 %y, i32 0
+ %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
+ %d = mul <16 x i8> %c, %a
+ store <16 x i8> %d, <16 x i8>* %x
+ ret void
+}
+
+define void @mul_xv_v8i16(<8 x i16>* %x, i16 %y) {
+; CHECK-LABEL: mul_xv_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 8
+; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu
+; CHECK-NEXT: vle16.v v25, (a0)
+; CHECK-NEXT: vmul.vx v25, v25, a1
+; CHECK-NEXT: vse16.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <8 x i16>, <8 x i16>* %x
+ %b = insertelement <8 x i16> undef, i16 %y, i32 0
+ %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
+ %d = mul <8 x i16> %c, %a
+ store <8 x i16> %d, <8 x i16>* %x
+ ret void
+}
+
+define void @mul_xv_v4i32(<4 x i32>* %x, i32 %y) {
+; CHECK-LABEL: mul_xv_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 4
+; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu
+; CHECK-NEXT: vle32.v v25, (a0)
+; CHECK-NEXT: vmul.vx v25, v25, a1
+; CHECK-NEXT: vse32.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i32>, <4 x i32>* %x
+ %b = insertelement <4 x i32> undef, i32 %y, i32 0
+ %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
+ %d = mul <4 x i32> %c, %a
+ store <4 x i32> %d, <4 x i32>* %x
+ ret void
+}
+
+define void @and_vi_v16i8(<16 x i8>* %x) {
+; CHECK-LABEL: and_vi_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, 16
+; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: vand.vi v25, v25, -2
+; CHECK-NEXT: vse8.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <16 x i8>, <16 x i8>* %x
+ %b = insertelement <16 x i8> undef, i8 -2, i32 0
+ %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
+ %d = and <16 x i8> %a, %c
+ store <16 x i8> %d, <16 x i8>* %x
+ ret void
+}
+
+define void @and_vi_v8i16(<8 x i16>* %x) {
+; CHECK-LABEL: and_vi_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, 8
+; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu
+; CHECK-NEXT: vle16.v v25, (a0)
+; CHECK-NEXT: vand.vi v25, v25, -2
+; CHECK-NEXT: vse16.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <8 x i16>, <8 x i16>* %x
+ %b = insertelement <8 x i16> undef, i16 -2, i32 0
+ %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
+ %d = and <8 x i16> %a, %c
+ store <8 x i16> %d, <8 x i16>* %x
+ ret void
+}
+
+define void @and_vi_v4i32(<4 x i32>* %x) {
+; CHECK-LABEL: and_vi_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, 4
+; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu
+; CHECK-NEXT: vle32.v v25, (a0)
+; CHECK-NEXT: vand.vi v25, v25, -2
+; CHECK-NEXT: vse32.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i32>, <4 x i32>* %x
+ %b = insertelement <4 x i32> undef, i32 -2, i32 0
+ %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
+ %d = and <4 x i32> %a, %c
+ store <4 x i32> %d, <4 x i32>* %x
+ ret void
+}
+
+define void @and_iv_v16i8(<16 x i8>* %x) {
+; CHECK-LABEL: and_iv_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, 16
+; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: vand.vi v25, v25, 1
+; CHECK-NEXT: vse8.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <16 x i8>, <16 x i8>* %x
+ %b = insertelement <16 x i8> undef, i8 1, i32 0
+ %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
+ %d = and <16 x i8> %c, %a
+ store <16 x i8> %d, <16 x i8>* %x
+ ret void
+}
+
+define void @and_iv_v8i16(<8 x i16>* %x) {
+; CHECK-LABEL: and_iv_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, 8
+; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu
+; CHECK-NEXT: vle16.v v25, (a0)
+; CHECK-NEXT: vand.vi v25, v25, 1
+; CHECK-NEXT: vse16.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <8 x i16>, <8 x i16>* %x
+ %b = insertelement <8 x i16> undef, i16 1, i32 0
+ %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
+ %d = and <8 x i16> %c, %a
+ store <8 x i16> %d, <8 x i16>* %x
+ ret void
+}
+
+define void @and_iv_v4i32(<4 x i32>* %x) {
+; CHECK-LABEL: and_iv_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, 4
+; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu
+; CHECK-NEXT: vle32.v v25, (a0)
+; CHECK-NEXT: vand.vi v25, v25, 1
+; CHECK-NEXT: vse32.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i32>, <4 x i32>* %x
+ %b = insertelement <4 x i32> undef, i32 1, i32 0
+ %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
+ %d = and <4 x i32> %c, %a
+ store <4 x i32> %d, <4 x i32>* %x
+ ret void
+}
+
+define void @and_vx_v16i8(<16 x i8>* %x, i8 %y) {
+; CHECK-LABEL: and_vx_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 16
+; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: vand.vx v25, v25, a1
+; CHECK-NEXT: vse8.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <16 x i8>, <16 x i8>* %x
+ %b = insertelement <16 x i8> undef, i8 %y, i32 0
+ %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
+ %d = and <16 x i8> %a, %c
+ store <16 x i8> %d, <16 x i8>* %x
+ ret void
+}
+
+define void @and_vx_v8i16(<8 x i16>* %x, i16 %y) {
+; CHECK-LABEL: and_vx_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 8
+; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu
+; CHECK-NEXT: vle16.v v25, (a0)
+; CHECK-NEXT: vand.vx v25, v25, a1
+; CHECK-NEXT: vse16.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <8 x i16>, <8 x i16>* %x
+ %b = insertelement <8 x i16> undef, i16 %y, i32 0
+ %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
+ %d = and <8 x i16> %a, %c
+ store <8 x i16> %d, <8 x i16>* %x
+ ret void
+}
+
+define void @and_vx_v4i32(<4 x i32>* %x, i32 %y) {
+; CHECK-LABEL: and_vx_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 4
+; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu
+; CHECK-NEXT: vle32.v v25, (a0)
+; CHECK-NEXT: vand.vx v25, v25, a1
+; CHECK-NEXT: vse32.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i32>, <4 x i32>* %x
+ %b = insertelement <4 x i32> undef, i32 %y, i32 0
+ %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
+ %d = and <4 x i32> %a, %c
+ store <4 x i32> %d, <4 x i32>* %x
+ ret void
+}
+
+define void @and_xv_v16i8(<16 x i8>* %x, i8 %y) {
+; CHECK-LABEL: and_xv_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 16
+; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: vand.vx v25, v25, a1
+; CHECK-NEXT: vse8.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <16 x i8>, <16 x i8>* %x
+ %b = insertelement <16 x i8> undef, i8 %y, i32 0
+ %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
+ %d = and <16 x i8> %c, %a
+ store <16 x i8> %d, <16 x i8>* %x
+ ret void
+}
+
+define void @and_xv_v8i16(<8 x i16>* %x, i16 %y) {
+; CHECK-LABEL: and_xv_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 8
+; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu
+; CHECK-NEXT: vle16.v v25, (a0)
+; CHECK-NEXT: vand.vx v25, v25, a1
+; CHECK-NEXT: vse16.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <8 x i16>, <8 x i16>* %x
+ %b = insertelement <8 x i16> undef, i16 %y, i32 0
+ %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
+ %d = and <8 x i16> %c, %a
+ store <8 x i16> %d, <8 x i16>* %x
+ ret void
+}
+
+define void @and_xv_v4i32(<4 x i32>* %x, i32 %y) {
+; CHECK-LABEL: and_xv_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 4
+; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu
+; CHECK-NEXT: vle32.v v25, (a0)
+; CHECK-NEXT: vand.vx v25, v25, a1
+; CHECK-NEXT: vse32.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i32>, <4 x i32>* %x
+ %b = insertelement <4 x i32> undef, i32 %y, i32 0
+ %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
+ %d = and <4 x i32> %c, %a
+ store <4 x i32> %d, <4 x i32>* %x
+ ret void
+}
+
+define void @or_vi_v16i8(<16 x i8>* %x) {
+; CHECK-LABEL: or_vi_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, 16
+; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: vor.vi v25, v25, -2
+; CHECK-NEXT: vse8.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <16 x i8>, <16 x i8>* %x
+ %b = insertelement <16 x i8> undef, i8 -2, i32 0
+ %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
+ %d = or <16 x i8> %a, %c
+ store <16 x i8> %d, <16 x i8>* %x
+ ret void
+}
+
+define void @or_vi_v8i16(<8 x i16>* %x) {
+; CHECK-LABEL: or_vi_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, 8
+; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu
+; CHECK-NEXT: vle16.v v25, (a0)
+; CHECK-NEXT: vor.vi v25, v25, -2
+; CHECK-NEXT: vse16.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <8 x i16>, <8 x i16>* %x
+ %b = insertelement <8 x i16> undef, i16 -2, i32 0
+ %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
+ %d = or <8 x i16> %a, %c
+ store <8 x i16> %d, <8 x i16>* %x
+ ret void
+}
+
+define void @or_vi_v4i32(<4 x i32>* %x) {
+; CHECK-LABEL: or_vi_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, 4
+; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu
+; CHECK-NEXT: vle32.v v25, (a0)
+; CHECK-NEXT: vor.vi v25, v25, -2
+; CHECK-NEXT: vse32.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i32>, <4 x i32>* %x
+ %b = insertelement <4 x i32> undef, i32 -2, i32 0
+ %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
+ %d = or <4 x i32> %a, %c
+ store <4 x i32> %d, <4 x i32>* %x
+ ret void
+}
+
+define void @or_iv_v16i8(<16 x i8>* %x) {
+; CHECK-LABEL: or_iv_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, 16
+; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: vor.vi v25, v25, 1
+; CHECK-NEXT: vse8.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <16 x i8>, <16 x i8>* %x
+ %b = insertelement <16 x i8> undef, i8 1, i32 0
+ %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
+ %d = or <16 x i8> %c, %a
+ store <16 x i8> %d, <16 x i8>* %x
+ ret void
+}
+
+define void @or_iv_v8i16(<8 x i16>* %x) {
+; CHECK-LABEL: or_iv_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, 8
+; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu
+; CHECK-NEXT: vle16.v v25, (a0)
+; CHECK-NEXT: vor.vi v25, v25, 1
+; CHECK-NEXT: vse16.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <8 x i16>, <8 x i16>* %x
+ %b = insertelement <8 x i16> undef, i16 1, i32 0
+ %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
+ %d = or <8 x i16> %c, %a
+ store <8 x i16> %d, <8 x i16>* %x
+ ret void
+}
+
+define void @or_iv_v4i32(<4 x i32>* %x) {
+; CHECK-LABEL: or_iv_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, 4
+; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu
+; CHECK-NEXT: vle32.v v25, (a0)
+; CHECK-NEXT: vor.vi v25, v25, 1
+; CHECK-NEXT: vse32.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i32>, <4 x i32>* %x
+ %b = insertelement <4 x i32> undef, i32 1, i32 0
+ %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
+ %d = or <4 x i32> %c, %a
+ store <4 x i32> %d, <4 x i32>* %x
+ ret void
+}
+
+define void @or_vx_v16i8(<16 x i8>* %x, i8 %y) {
+; CHECK-LABEL: or_vx_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 16
+; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: vor.vx v25, v25, a1
+; CHECK-NEXT: vse8.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <16 x i8>, <16 x i8>* %x
+ %b = insertelement <16 x i8> undef, i8 %y, i32 0
+ %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
+ %d = or <16 x i8> %a, %c
+ store <16 x i8> %d, <16 x i8>* %x
+ ret void
+}
+
+define void @or_vx_v8i16(<8 x i16>* %x, i16 %y) {
+; CHECK-LABEL: or_vx_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 8
+; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu
+; CHECK-NEXT: vle16.v v25, (a0)
+; CHECK-NEXT: vor.vx v25, v25, a1
+; CHECK-NEXT: vse16.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <8 x i16>, <8 x i16>* %x
+ %b = insertelement <8 x i16> undef, i16 %y, i32 0
+ %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
+ %d = or <8 x i16> %a, %c
+ store <8 x i16> %d, <8 x i16>* %x
+ ret void
+}
+
+define void @or_vx_v4i32(<4 x i32>* %x, i32 %y) {
+; CHECK-LABEL: or_vx_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 4
+; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu
+; CHECK-NEXT: vle32.v v25, (a0)
+; CHECK-NEXT: vor.vx v25, v25, a1
+; CHECK-NEXT: vse32.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i32>, <4 x i32>* %x
+ %b = insertelement <4 x i32> undef, i32 %y, i32 0
+ %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
+ %d = or <4 x i32> %a, %c
+ store <4 x i32> %d, <4 x i32>* %x
+ ret void
+}
+
+define void @or_xv_v16i8(<16 x i8>* %x, i8 %y) {
+; CHECK-LABEL: or_xv_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 16
+; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: vor.vx v25, v25, a1
+; CHECK-NEXT: vse8.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <16 x i8>, <16 x i8>* %x
+ %b = insertelement <16 x i8> undef, i8 %y, i32 0
+ %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
+ %d = or <16 x i8> %c, %a
+ store <16 x i8> %d, <16 x i8>* %x
+ ret void
+}
+
+define void @or_xv_v8i16(<8 x i16>* %x, i16 %y) {
+; CHECK-LABEL: or_xv_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 8
+; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu
+; CHECK-NEXT: vle16.v v25, (a0)
+; CHECK-NEXT: vor.vx v25, v25, a1
+; CHECK-NEXT: vse16.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <8 x i16>, <8 x i16>* %x
+ %b = insertelement <8 x i16> undef, i16 %y, i32 0
+ %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
+ %d = or <8 x i16> %c, %a
+ store <8 x i16> %d, <8 x i16>* %x
+ ret void
+}
+
+define void @or_xv_v4i32(<4 x i32>* %x, i32 %y) {
+; CHECK-LABEL: or_xv_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 4
+; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu
+; CHECK-NEXT: vle32.v v25, (a0)
+; CHECK-NEXT: vor.vx v25, v25, a1
+; CHECK-NEXT: vse32.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i32>, <4 x i32>* %x
+ %b = insertelement <4 x i32> undef, i32 %y, i32 0
+ %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
+ %d = or <4 x i32> %c, %a
+ store <4 x i32> %d, <4 x i32>* %x
+ ret void
+}
+
+define void @xor_vi_v16i8(<16 x i8>* %x) {
+; CHECK-LABEL: xor_vi_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, 16
+; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: vxor.vi v25, v25, -1
+; CHECK-NEXT: vse8.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <16 x i8>, <16 x i8>* %x
+ %b = insertelement <16 x i8> undef, i8 -1, i32 0
+ %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
+ %d = xor <16 x i8> %a, %c
+ store <16 x i8> %d, <16 x i8>* %x
+ ret void
+}
+
+define void @xor_vi_v8i16(<8 x i16>* %x) {
+; CHECK-LABEL: xor_vi_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, 8
+; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu
+; CHECK-NEXT: vle16.v v25, (a0)
+; CHECK-NEXT: vxor.vi v25, v25, -1
+; CHECK-NEXT: vse16.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <8 x i16>, <8 x i16>* %x
+ %b = insertelement <8 x i16> undef, i16 -1, i32 0
+ %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
+ %d = xor <8 x i16> %a, %c
+ store <8 x i16> %d, <8 x i16>* %x
+ ret void
+}
+
+define void @xor_vi_v4i32(<4 x i32>* %x) {
+; CHECK-LABEL: xor_vi_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, 4
+; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu
+; CHECK-NEXT: vle32.v v25, (a0)
+; CHECK-NEXT: vxor.vi v25, v25, -1
+; CHECK-NEXT: vse32.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i32>, <4 x i32>* %x
+ %b = insertelement <4 x i32> undef, i32 -1, i32 0
+ %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
+ %d = xor <4 x i32> %a, %c
+ store <4 x i32> %d, <4 x i32>* %x
+ ret void
+}
+
+define void @xor_iv_v16i8(<16 x i8>* %x) {
+; CHECK-LABEL: xor_iv_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, 16
+; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: vxor.vi v25, v25, 1
+; CHECK-NEXT: vse8.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <16 x i8>, <16 x i8>* %x
+ %b = insertelement <16 x i8> undef, i8 1, i32 0
+ %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
+ %d = xor <16 x i8> %c, %a
+ store <16 x i8> %d, <16 x i8>* %x
+ ret void
+}
+
+define void @xor_iv_v8i16(<8 x i16>* %x) {
+; CHECK-LABEL: xor_iv_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, 8
+; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu
+; CHECK-NEXT: vle16.v v25, (a0)
+; CHECK-NEXT: vxor.vi v25, v25, 1
+; CHECK-NEXT: vse16.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <8 x i16>, <8 x i16>* %x
+ %b = insertelement <8 x i16> undef, i16 1, i32 0
+ %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
+ %d = xor <8 x i16> %c, %a
+ store <8 x i16> %d, <8 x i16>* %x
+ ret void
+}
+
+define void @xor_iv_v4i32(<4 x i32>* %x) {
+; CHECK-LABEL: xor_iv_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, 4
+; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu
+; CHECK-NEXT: vle32.v v25, (a0)
+; CHECK-NEXT: vxor.vi v25, v25, 1
+; CHECK-NEXT: vse32.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i32>, <4 x i32>* %x
+ %b = insertelement <4 x i32> undef, i32 1, i32 0
+ %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
+ %d = xor <4 x i32> %c, %a
+ store <4 x i32> %d, <4 x i32>* %x
+ ret void
+}
+
+define void @xor_vx_v16i8(<16 x i8>* %x, i8 %y) {
+; CHECK-LABEL: xor_vx_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 16
+; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: vxor.vx v25, v25, a1
+; CHECK-NEXT: vse8.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <16 x i8>, <16 x i8>* %x
+ %b = insertelement <16 x i8> undef, i8 %y, i32 0
+ %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
+ %d = xor <16 x i8> %a, %c
+ store <16 x i8> %d, <16 x i8>* %x
+ ret void
+}
+
+define void @xor_vx_v8i16(<8 x i16>* %x, i16 %y) {
+; CHECK-LABEL: xor_vx_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 8
+; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu
+; CHECK-NEXT: vle16.v v25, (a0)
+; CHECK-NEXT: vxor.vx v25, v25, a1
+; CHECK-NEXT: vse16.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <8 x i16>, <8 x i16>* %x
+ %b = insertelement <8 x i16> undef, i16 %y, i32 0
+ %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
+ %d = xor <8 x i16> %a, %c
+ store <8 x i16> %d, <8 x i16>* %x
+ ret void
+}
+
+define void @xor_vx_v4i32(<4 x i32>* %x, i32 %y) {
+; CHECK-LABEL: xor_vx_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 4
+; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu
+; CHECK-NEXT: vle32.v v25, (a0)
+; CHECK-NEXT: vxor.vx v25, v25, a1
+; CHECK-NEXT: vse32.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i32>, <4 x i32>* %x
+ %b = insertelement <4 x i32> undef, i32 %y, i32 0
+ %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
+ %d = xor <4 x i32> %a, %c
+ store <4 x i32> %d, <4 x i32>* %x
+ ret void
+}
+
+define void @xor_xv_v16i8(<16 x i8>* %x, i8 %y) {
+; CHECK-LABEL: xor_xv_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 16
+; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: vxor.vx v25, v25, a1
+; CHECK-NEXT: vse8.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <16 x i8>, <16 x i8>* %x
+ %b = insertelement <16 x i8> undef, i8 %y, i32 0
+ %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
+ %d = xor <16 x i8> %c, %a
+ store <16 x i8> %d, <16 x i8>* %x
+ ret void
+}
+
+define void @xor_xv_v8i16(<8 x i16>* %x, i16 %y) {
+; CHECK-LABEL: xor_xv_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 8
+; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu
+; CHECK-NEXT: vle16.v v25, (a0)
+; CHECK-NEXT: vxor.vx v25, v25, a1
+; CHECK-NEXT: vse16.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <8 x i16>, <8 x i16>* %x
+ %b = insertelement <8 x i16> undef, i16 %y, i32 0
+ %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
+ %d = xor <8 x i16> %c, %a
+ store <8 x i16> %d, <8 x i16>* %x
+ ret void
+}
+
+define void @xor_xv_v4i32(<4 x i32>* %x, i32 %y) {
+; CHECK-LABEL: xor_xv_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 4
+; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu
+; CHECK-NEXT: vle32.v v25, (a0)
+; CHECK-NEXT: vxor.vx v25, v25, a1
+; CHECK-NEXT: vse32.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i32>, <4 x i32>* %x
+ %b = insertelement <4 x i32> undef, i32 %y, i32 0
+ %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
+ %d = xor <4 x i32> %c, %a
+ store <4 x i32> %d, <4 x i32>* %x
+ ret void
+}
+
+define void @lshr_vi_v16i8(<16 x i8>* %x) {
+; CHECK-LABEL: lshr_vi_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, 16
+; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: vsrl.vi v25, v25, 7
+; CHECK-NEXT: vse8.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <16 x i8>, <16 x i8>* %x
+ %b = insertelement <16 x i8> undef, i8 7, i32 0
+ %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
+ %d = lshr <16 x i8> %a, %c
+ store <16 x i8> %d, <16 x i8>* %x
+ ret void
+}
+
+define void @lshr_vi_v8i16(<8 x i16>* %x) {
+; CHECK-LABEL: lshr_vi_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, 8
+; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu
+; CHECK-NEXT: vle16.v v25, (a0)
+; CHECK-NEXT: vsrl.vi v25, v25, 15
+; CHECK-NEXT: vse16.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <8 x i16>, <8 x i16>* %x
+ %b = insertelement <8 x i16> undef, i16 15, i32 0
+ %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
+ %d = lshr <8 x i16> %a, %c
+ store <8 x i16> %d, <8 x i16>* %x
+ ret void
+}
+
+define void @lshr_vi_v4i32(<4 x i32>* %x) {
+; CHECK-LABEL: lshr_vi_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, 4
+; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu
+; CHECK-NEXT: vle32.v v25, (a0)
+; CHECK-NEXT: vsrl.vi v25, v25, 31
+; CHECK-NEXT: vse32.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i32>, <4 x i32>* %x
+ %b = insertelement <4 x i32> undef, i32 31, i32 0
+ %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
+ %d = lshr <4 x i32> %a, %c
+ store <4 x i32> %d, <4 x i32>* %x
+ ret void
+}
+
+define void @lshr_vx_v16i8(<16 x i8>* %x, i8 %y) {
+; CHECK-LABEL: lshr_vx_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 16
+; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: vsrl.vx v25, v25, a1
+; CHECK-NEXT: vse8.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <16 x i8>, <16 x i8>* %x
+ %b = insertelement <16 x i8> undef, i8 %y, i32 0
+ %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
+ %d = lshr <16 x i8> %a, %c
+ store <16 x i8> %d, <16 x i8>* %x
+ ret void
+}
+
+define void @lshr_vx_v8i16(<8 x i16>* %x, i16 %y) {
+; CHECK-LABEL: lshr_vx_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 8
+; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu
+; CHECK-NEXT: vle16.v v25, (a0)
+; CHECK-NEXT: vsrl.vx v25, v25, a1
+; CHECK-NEXT: vse16.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <8 x i16>, <8 x i16>* %x
+ %b = insertelement <8 x i16> undef, i16 %y, i32 0
+ %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
+ %d = lshr <8 x i16> %a, %c
+ store <8 x i16> %d, <8 x i16>* %x
+ ret void
+}
+
+define void @lshr_vx_v4i32(<4 x i32>* %x, i32 %y) {
+; CHECK-LABEL: lshr_vx_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 4
+; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu
+; CHECK-NEXT: vle32.v v25, (a0)
+; CHECK-NEXT: vsrl.vx v25, v25, a1
+; CHECK-NEXT: vse32.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i32>, <4 x i32>* %x
+ %b = insertelement <4 x i32> undef, i32 %y, i32 0
+ %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
+ %d = lshr <4 x i32> %a, %c
+ store <4 x i32> %d, <4 x i32>* %x
+ ret void
+}
+
+define void @ashr_vi_v16i8(<16 x i8>* %x) {
+; CHECK-LABEL: ashr_vi_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, 16
+; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: vsra.vi v25, v25, 7
+; CHECK-NEXT: vse8.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <16 x i8>, <16 x i8>* %x
+ %b = insertelement <16 x i8> undef, i8 7, i32 0
+ %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
+ %d = ashr <16 x i8> %a, %c
+ store <16 x i8> %d, <16 x i8>* %x
+ ret void
+}
+
+define void @ashr_vi_v8i16(<8 x i16>* %x) {
+; CHECK-LABEL: ashr_vi_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, 8
+; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu
+; CHECK-NEXT: vle16.v v25, (a0)
+; CHECK-NEXT: vsra.vi v25, v25, 15
+; CHECK-NEXT: vse16.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <8 x i16>, <8 x i16>* %x
+ %b = insertelement <8 x i16> undef, i16 15, i32 0
+ %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
+ %d = ashr <8 x i16> %a, %c
+ store <8 x i16> %d, <8 x i16>* %x
+ ret void
+}
+
+define void @ashr_vi_v4i32(<4 x i32>* %x) {
+; CHECK-LABEL: ashr_vi_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, 4
+; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu
+; CHECK-NEXT: vle32.v v25, (a0)
+; CHECK-NEXT: vsra.vi v25, v25, 31
+; CHECK-NEXT: vse32.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i32>, <4 x i32>* %x
+ %b = insertelement <4 x i32> undef, i32 31, i32 0
+ %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
+ %d = ashr <4 x i32> %a, %c
+ store <4 x i32> %d, <4 x i32>* %x
+ ret void
+}
+
+define void @ashr_vx_v16i8(<16 x i8>* %x, i8 %y) {
+; CHECK-LABEL: ashr_vx_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 16
+; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: vsra.vx v25, v25, a1
+; CHECK-NEXT: vse8.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <16 x i8>, <16 x i8>* %x
+ %b = insertelement <16 x i8> undef, i8 %y, i32 0
+ %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
+ %d = ashr <16 x i8> %a, %c
+ store <16 x i8> %d, <16 x i8>* %x
+ ret void
+}
+
+define void @ashr_vx_v8i16(<8 x i16>* %x, i16 %y) {
+; CHECK-LABEL: ashr_vx_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 8
+; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu
+; CHECK-NEXT: vle16.v v25, (a0)
+; CHECK-NEXT: vsra.vx v25, v25, a1
+; CHECK-NEXT: vse16.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <8 x i16>, <8 x i16>* %x
+ %b = insertelement <8 x i16> undef, i16 %y, i32 0
+ %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
+ %d = ashr <8 x i16> %a, %c
+ store <8 x i16> %d, <8 x i16>* %x
+ ret void
+}
+
+define void @ashr_vx_v4i32(<4 x i32>* %x, i32 %y) {
+; CHECK-LABEL: ashr_vx_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 4
+; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu
+; CHECK-NEXT: vle32.v v25, (a0)
+; CHECK-NEXT: vsra.vx v25, v25, a1
+; CHECK-NEXT: vse32.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i32>, <4 x i32>* %x
+ %b = insertelement <4 x i32> undef, i32 %y, i32 0
+ %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
+ %d = ashr <4 x i32> %a, %c
+ store <4 x i32> %d, <4 x i32>* %x
+ ret void
+}
+
+define void @shl_vi_v16i8(<16 x i8>* %x) {
+; CHECK-LABEL: shl_vi_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, 16
+; CHECK-NEXT: vsetvli a1, a1, e8,m1,ta,mu
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: vsll.vi v25, v25, 7
+; CHECK-NEXT: vse8.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <16 x i8>, <16 x i8>* %x
+ %b = insertelement <16 x i8> undef, i8 7, i32 0
+ %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
+ %d = shl <16 x i8> %a, %c
+ store <16 x i8> %d, <16 x i8>* %x
+ ret void
+}
+
+define void @shl_vi_v8i16(<8 x i16>* %x) {
+; CHECK-LABEL: shl_vi_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, 8
+; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu
+; CHECK-NEXT: vle16.v v25, (a0)
+; CHECK-NEXT: vsll.vi v25, v25, 15
+; CHECK-NEXT: vse16.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <8 x i16>, <8 x i16>* %x
+ %b = insertelement <8 x i16> undef, i16 15, i32 0
+ %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
+ %d = shl <8 x i16> %a, %c
+ store <8 x i16> %d, <8 x i16>* %x
+ ret void
+}
+
+define void @shl_vi_v4i32(<4 x i32>* %x) {
+; CHECK-LABEL: shl_vi_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, zero, 4
+; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu
+; CHECK-NEXT: vle32.v v25, (a0)
+; CHECK-NEXT: vsll.vi v25, v25, 31
+; CHECK-NEXT: vse32.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i32>, <4 x i32>* %x
+ %b = insertelement <4 x i32> undef, i32 31, i32 0
+ %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
+ %d = shl <4 x i32> %a, %c
+ store <4 x i32> %d, <4 x i32>* %x
+ ret void
+}
+
+define void @shl_vx_v16i8(<16 x i8>* %x, i8 %y) {
+; CHECK-LABEL: shl_vx_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 16
+; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: vsll.vx v25, v25, a1
+; CHECK-NEXT: vse8.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <16 x i8>, <16 x i8>* %x
+ %b = insertelement <16 x i8> undef, i8 %y, i32 0
+ %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
+ %d = shl <16 x i8> %a, %c
+ store <16 x i8> %d, <16 x i8>* %x
+ ret void
+}
+
+define void @shl_vx_v8i16(<8 x i16>* %x, i16 %y) {
+; CHECK-LABEL: shl_vx_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 8
+; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu
+; CHECK-NEXT: vle16.v v25, (a0)
+; CHECK-NEXT: vsll.vx v25, v25, a1
+; CHECK-NEXT: vse16.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <8 x i16>, <8 x i16>* %x
+ %b = insertelement <8 x i16> undef, i16 %y, i32 0
+ %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
+ %d = shl <8 x i16> %a, %c
+ store <8 x i16> %d, <8 x i16>* %x
+ ret void
+}
+
+define void @shl_vx_v4i32(<4 x i32>* %x, i32 %y) {
+; CHECK-LABEL: shl_vx_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 4
+; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu
+; CHECK-NEXT: vle32.v v25, (a0)
+; CHECK-NEXT: vsll.vx v25, v25, a1
+; CHECK-NEXT: vse32.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i32>, <4 x i32>* %x
+ %b = insertelement <4 x i32> undef, i32 %y, i32 0
+ %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
+ %d = shl <4 x i32> %a, %c
+ store <4 x i32> %d, <4 x i32>* %x
+ ret void
+}
+
+define void @sdiv_vx_v16i8(<16 x i8>* %x, i8 %y) {
+; CHECK-LABEL: sdiv_vx_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 16
+; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: vdiv.vx v25, v25, a1
+; CHECK-NEXT: vse8.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <16 x i8>, <16 x i8>* %x
+ %b = insertelement <16 x i8> undef, i8 %y, i32 0
+ %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
+ %d = sdiv <16 x i8> %a, %c
+ store <16 x i8> %d, <16 x i8>* %x
+ ret void
+}
+
+define void @sdiv_vx_v8i16(<8 x i16>* %x, i16 %y) {
+; CHECK-LABEL: sdiv_vx_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 8
+; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu
+; CHECK-NEXT: vle16.v v25, (a0)
+; CHECK-NEXT: vdiv.vx v25, v25, a1
+; CHECK-NEXT: vse16.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <8 x i16>, <8 x i16>* %x
+ %b = insertelement <8 x i16> undef, i16 %y, i32 0
+ %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
+ %d = sdiv <8 x i16> %a, %c
+ store <8 x i16> %d, <8 x i16>* %x
+ ret void
+}
+
+define void @sdiv_vx_v4i32(<4 x i32>* %x, i32 %y) {
+; CHECK-LABEL: sdiv_vx_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 4
+; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu
+; CHECK-NEXT: vle32.v v25, (a0)
+; CHECK-NEXT: vdiv.vx v25, v25, a1
+; CHECK-NEXT: vse32.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i32>, <4 x i32>* %x
+ %b = insertelement <4 x i32> undef, i32 %y, i32 0
+ %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
+ %d = sdiv <4 x i32> %a, %c
+ store <4 x i32> %d, <4 x i32>* %x
+ ret void
+}
+
+define void @srem_vx_v16i8(<16 x i8>* %x, i8 %y) {
+; CHECK-LABEL: srem_vx_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 16
+; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: vrem.vx v25, v25, a1
+; CHECK-NEXT: vse8.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <16 x i8>, <16 x i8>* %x
+ %b = insertelement <16 x i8> undef, i8 %y, i32 0
+ %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
+ %d = srem <16 x i8> %a, %c
+ store <16 x i8> %d, <16 x i8>* %x
+ ret void
+}
+
+define void @srem_vx_v8i16(<8 x i16>* %x, i16 %y) {
+; CHECK-LABEL: srem_vx_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 8
+; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu
+; CHECK-NEXT: vle16.v v25, (a0)
+; CHECK-NEXT: vrem.vx v25, v25, a1
+; CHECK-NEXT: vse16.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <8 x i16>, <8 x i16>* %x
+ %b = insertelement <8 x i16> undef, i16 %y, i32 0
+ %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
+ %d = srem <8 x i16> %a, %c
+ store <8 x i16> %d, <8 x i16>* %x
+ ret void
+}
+
+define void @srem_vx_v4i32(<4 x i32>* %x, i32 %y) {
+; CHECK-LABEL: srem_vx_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 4
+; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu
+; CHECK-NEXT: vle32.v v25, (a0)
+; CHECK-NEXT: vrem.vx v25, v25, a1
+; CHECK-NEXT: vse32.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i32>, <4 x i32>* %x
+ %b = insertelement <4 x i32> undef, i32 %y, i32 0
+ %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
+ %d = srem <4 x i32> %a, %c
+ store <4 x i32> %d, <4 x i32>* %x
+ ret void
+}
+
+define void @udiv_vx_v16i8(<16 x i8>* %x, i8 %y) {
+; CHECK-LABEL: udiv_vx_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 16
+; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: vdivu.vx v25, v25, a1
+; CHECK-NEXT: vse8.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <16 x i8>, <16 x i8>* %x
+ %b = insertelement <16 x i8> undef, i8 %y, i32 0
+ %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
+ %d = udiv <16 x i8> %a, %c
+ store <16 x i8> %d, <16 x i8>* %x
+ ret void
+}
+
+define void @udiv_vx_v8i16(<8 x i16>* %x, i16 %y) {
+; CHECK-LABEL: udiv_vx_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 8
+; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu
+; CHECK-NEXT: vle16.v v25, (a0)
+; CHECK-NEXT: vdivu.vx v25, v25, a1
+; CHECK-NEXT: vse16.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <8 x i16>, <8 x i16>* %x
+ %b = insertelement <8 x i16> undef, i16 %y, i32 0
+ %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
+ %d = udiv <8 x i16> %a, %c
+ store <8 x i16> %d, <8 x i16>* %x
+ ret void
+}
+
+define void @udiv_vx_v4i32(<4 x i32>* %x, i32 %y) {
+; CHECK-LABEL: udiv_vx_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 4
+; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu
+; CHECK-NEXT: vle32.v v25, (a0)
+; CHECK-NEXT: vdivu.vx v25, v25, a1
+; CHECK-NEXT: vse32.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i32>, <4 x i32>* %x
+ %b = insertelement <4 x i32> undef, i32 %y, i32 0
+ %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
+ %d = udiv <4 x i32> %a, %c
+ store <4 x i32> %d, <4 x i32>* %x
+ ret void
+}
+
+define void @urem_vx_v16i8(<16 x i8>* %x, i8 %y) {
+; CHECK-LABEL: urem_vx_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 16
+; CHECK-NEXT: vsetvli a2, a2, e8,m1,ta,mu
+; CHECK-NEXT: vle8.v v25, (a0)
+; CHECK-NEXT: vremu.vx v25, v25, a1
+; CHECK-NEXT: vse8.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <16 x i8>, <16 x i8>* %x
+ %b = insertelement <16 x i8> undef, i8 %y, i32 0
+ %c = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
+ %d = urem <16 x i8> %a, %c
+ store <16 x i8> %d, <16 x i8>* %x
+ ret void
+}
+
+define void @urem_vx_v8i16(<8 x i16>* %x, i16 %y) {
+; CHECK-LABEL: urem_vx_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 8
+; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu
+; CHECK-NEXT: vle16.v v25, (a0)
+; CHECK-NEXT: vremu.vx v25, v25, a1
+; CHECK-NEXT: vse16.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <8 x i16>, <8 x i16>* %x
+ %b = insertelement <8 x i16> undef, i16 %y, i32 0
+ %c = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
+ %d = urem <8 x i16> %a, %c
+ store <8 x i16> %d, <8 x i16>* %x
+ ret void
+}
+
+define void @urem_vx_v4i32(<4 x i32>* %x, i32 %y) {
+; CHECK-LABEL: urem_vx_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, zero, 4
+; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu
+; CHECK-NEXT: vle32.v v25, (a0)
+; CHECK-NEXT: vremu.vx v25, v25, a1
+; CHECK-NEXT: vse32.v v25, (a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i32>, <4 x i32>* %x
+ %b = insertelement <4 x i32> undef, i32 %y, i32 0
+ %c = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
+ %d = urem <4 x i32> %a, %c
+ store <4 x i32> %d, <4 x i32>* %x
+ ret void
+}
More information about the llvm-commits
mailing list