[llvm] add224c - [LoongArch] Custom lowering `ISD::BUILD_VECTOR`
via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 30 17:16:28 PST 2023
Author: wanglei
Date: 2023-12-01T09:13:39+08:00
New Revision: add224c0a094d20389d3659f7b6e496df461a976
URL: https://github.com/llvm/llvm-project/commit/add224c0a094d20389d3659f7b6e496df461a976
DIFF: https://github.com/llvm/llvm-project/commit/add224c0a094d20389d3659f7b6e496df461a976.diff
LOG: [LoongArch] Custom lowering `ISD::BUILD_VECTOR`
Added:
llvm/test/CodeGen/LoongArch/lasx/build-vector.ll
llvm/test/CodeGen/LoongArch/lsx/build-vector.ll
Modified:
llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp
llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
llvm/lib/Target/LoongArch/LoongArchISelLowering.h
llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
llvm/test/CodeGen/LoongArch/lsx/ir-instruction/mul.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp
index 0cfee6025218884..726856bda5dc59c 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp
@@ -77,13 +77,63 @@ void LoongArchDAGToDAGISel::Select(SDNode *Node) {
return;
}
case ISD::BITCAST: {
- if (VT.is128BitVector() || VT.is512BitVector()) {
+ if (VT.is128BitVector() || VT.is256BitVector()) {
ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
CurDAG->RemoveDeadNode(Node);
return;
}
break;
}
+ case ISD::BUILD_VECTOR: {
+ // Select appropriate [x]vrepli.[bhwd] instructions for constant splats of
+ // 128/256-bit when LSX/LASX is enabled.
+ BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Node);
+ APInt SplatValue, SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ unsigned Op;
+ EVT ViaVecTy;
+ bool Is128Vec = BVN->getValueType(0).is128BitVector();
+ bool Is256Vec = BVN->getValueType(0).is256BitVector();
+
+ if (!Subtarget->hasExtLSX() || (!Is128Vec && !Is256Vec))
+ break;
+ if (!BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
+ HasAnyUndefs, 8))
+ break;
+
+ switch (SplatBitSize) {
+ default:
+ break;
+ case 8:
+ Op = Is256Vec ? LoongArch::PseudoXVREPLI_B : LoongArch::PseudoVREPLI_B;
+ ViaVecTy = Is256Vec ? MVT::v32i8 : MVT::v16i8;
+ break;
+ case 16:
+ Op = Is256Vec ? LoongArch::PseudoXVREPLI_H : LoongArch::PseudoVREPLI_H;
+ ViaVecTy = Is256Vec ? MVT::v16i16 : MVT::v8i16;
+ break;
+ case 32:
+ Op = Is256Vec ? LoongArch::PseudoXVREPLI_W : LoongArch::PseudoVREPLI_W;
+ ViaVecTy = Is256Vec ? MVT::v8i32 : MVT::v4i32;
+ break;
+ case 64:
+ Op = Is256Vec ? LoongArch::PseudoXVREPLI_D : LoongArch::PseudoVREPLI_D;
+ ViaVecTy = Is256Vec ? MVT::v4i64 : MVT::v2i64;
+ break;
+ }
+
+ SDNode *Res;
+ // If we have a signed 10 bit integer, we can splat it directly.
+ if (SplatValue.isSignedIntN(10)) {
+ SDValue Imm = CurDAG->getTargetConstant(SplatValue, DL,
+ ViaVecTy.getVectorElementType());
+ Res = CurDAG->getMachineNode(Op, DL, ViaVecTy, Imm);
+ ReplaceNode(Node, Res);
+ return;
+ }
+ break;
+ }
}
// Select the default instruction.
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 670620823440902..0a22f3c9930d674 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -243,11 +243,9 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BITCAST, VT, Legal);
setOperationAction(ISD::UNDEF, VT, Legal);
- // FIXME: For BUILD_VECTOR, it is temporarily set to `Legal` here, and it
- // will be `Custom` handled in the future.
- setOperationAction(ISD::BUILD_VECTOR, VT, Legal);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal);
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
}
for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal);
@@ -274,10 +272,9 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BITCAST, VT, Legal);
setOperationAction(ISD::UNDEF, VT, Legal);
- // FIXME: Same as above.
- setOperationAction(ISD::BUILD_VECTOR, VT, Legal);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal);
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
}
for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal);
@@ -382,10 +379,105 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op,
return lowerWRITE_REGISTER(Op, DAG);
case ISD::INSERT_VECTOR_ELT:
return lowerINSERT_VECTOR_ELT(Op, DAG);
+ case ISD::BUILD_VECTOR:
+ return lowerBUILD_VECTOR(Op, DAG);
}
return SDValue();
}
+static bool isConstantOrUndef(const SDValue Op) {
+ if (Op->isUndef())
+ return true;
+ if (isa<ConstantSDNode>(Op))
+ return true;
+ if (isa<ConstantFPSDNode>(Op))
+ return true;
+ return false;
+}
+
+static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) {
+ for (unsigned i = 0; i < Op->getNumOperands(); ++i)
+ if (isConstantOrUndef(Op->getOperand(i)))
+ return true;
+ return false;
+}
+
+SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
+ SelectionDAG &DAG) const {
+ BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
+ EVT ResTy = Op->getValueType(0);
+ SDLoc DL(Op);
+ APInt SplatValue, SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ bool Is128Vec = ResTy.is128BitVector();
+ bool Is256Vec = ResTy.is256BitVector();
+
+ if ((!Subtarget.hasExtLSX() || !Is128Vec) &&
+ (!Subtarget.hasExtLASX() || !Is256Vec))
+ return SDValue();
+
+ if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
+ /*MinSplatBits=*/8) &&
+ SplatBitSize <= 64) {
+ // We can only cope with 8, 16, 32, or 64-bit elements.
+ if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
+ SplatBitSize != 64)
+ return SDValue();
+
+ EVT ViaVecTy;
+
+ switch (SplatBitSize) {
+ default:
+ return SDValue();
+ case 8:
+ ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8;
+ break;
+ case 16:
+ ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16;
+ break;
+ case 32:
+ ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32;
+ break;
+ case 64:
+ ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64;
+ break;
+ }
+
+ // SelectionDAG::getConstant will promote SplatValue appropriately.
+ SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy);
+
+ // Bitcast to the type we originally wanted.
+ if (ViaVecTy != ResTy)
+ Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
+
+ return Result;
+ }
+
+ if (DAG.isSplatValue(Op, /*AllowUndefs=*/false))
+ return Op;
+
+ if (!isConstantOrUndefBUILD_VECTOR(Node)) {
+ // Use INSERT_VECTOR_ELT operations rather than expand to stores.
+ // The resulting code is the same length as the expansion, but it doesn't
+ // use memory operations.
+ EVT ResTy = Node->getValueType(0);
+
+ assert(ResTy.isVector());
+
+ unsigned NumElts = ResTy.getVectorNumElements();
+ SDValue Vector = DAG.getUNDEF(ResTy);
+ for (unsigned i = 0; i < NumElts; ++i) {
+ Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector,
+ Node->getOperand(i),
+ DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
+ }
+ return Vector;
+ }
+
+ return SDValue();
+}
+
SDValue
LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
SelectionDAG &DAG) const {
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index aa63cf0acabb189..3a81c0e827afb49 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -277,6 +277,7 @@ class LoongArchTargetLowering : public TargetLowering {
SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
bool isFPImmLegal(const APFloat &Imm, EVT VT,
bool ForCodeSize) const override;
diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
index 475565db15c9363..4487152fb42b827 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
@@ -33,6 +33,13 @@ def lasxsplati32
def lasxsplati64
: PatFrag<(ops node:$e0),
(v4i64 (build_vector node:$e0, node:$e0, node:$e0, node:$e0))>;
+def lasxsplatf32
+ : PatFrag<(ops node:$e0),
+ (v8f32 (build_vector node:$e0, node:$e0, node:$e0, node:$e0,
+ node:$e0, node:$e0, node:$e0, node:$e0))>;
+def lasxsplatf64
+ : PatFrag<(ops node:$e0),
+ (v4f64 (build_vector node:$e0, node:$e0, node:$e0, node:$e0))>;
//===----------------------------------------------------------------------===//
// Instruction class templates
@@ -1411,6 +1418,12 @@ def : Pat<(loongarch_vreplve v8i32:$xj, GRLenVT:$rk),
def : Pat<(loongarch_vreplve v4i64:$xj, GRLenVT:$rk),
(XVREPLVE_D v4i64:$xj, GRLenVT:$rk)>;
+// XVREPL128VEI_{W/D}
+def : Pat<(lasxsplatf32 FPR32:$fj),
+ (XVREPL128VEI_W (SUBREG_TO_REG (i64 0), FPR32:$fj, sub_32), 0)>;
+def : Pat<(lasxsplatf64 FPR64:$fj),
+ (XVREPL128VEI_D (SUBREG_TO_REG (i64 0), FPR64:$fj, sub_64), 0)>;
+
// Loads/Stores
foreach vt = [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64] in {
defm : LdPat<load, XVLD, vt>;
diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
index d8fd132a1c59db3..deac5015882ddf2 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
@@ -141,9 +141,13 @@ def lsxsplati16 : PatFrag<(ops node:$e0),
def lsxsplati32 : PatFrag<(ops node:$e0),
(v4i32 (build_vector node:$e0, node:$e0,
node:$e0, node:$e0))>;
-
def lsxsplati64 : PatFrag<(ops node:$e0),
(v2i64 (build_vector node:$e0, node:$e0))>;
+def lsxsplatf32 : PatFrag<(ops node:$e0),
+ (v4f32 (build_vector node:$e0, node:$e0,
+ node:$e0, node:$e0))>;
+def lsxsplatf64 : PatFrag<(ops node:$e0),
+ (v2f64 (build_vector node:$e0, node:$e0))>;
def to_valid_timm : SDNodeXForm<timm, [{
auto CN = cast<ConstantSDNode>(N);
@@ -1498,6 +1502,12 @@ def : Pat<(loongarch_vreplve v4i32:$vj, GRLenVT:$rk),
def : Pat<(loongarch_vreplve v2i64:$vj, GRLenVT:$rk),
(VREPLVE_D v2i64:$vj, GRLenVT:$rk)>;
+// VREPLVEI_{W/D}
+def : Pat<(lsxsplatf32 FPR32:$fj),
+ (VREPLVEI_W (SUBREG_TO_REG (i64 0), FPR32:$fj, sub_32), 0)>;
+def : Pat<(lsxsplatf64 FPR64:$fj),
+ (VREPLVEI_D (SUBREG_TO_REG (i64 0), FPR64:$fj, sub_64), 0)>;
+
// Loads/Stores
foreach vt = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in {
defm : LdPat<load, VLD, vt>;
diff --git a/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll b/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll
new file mode 100644
index 000000000000000..6824ab5cda8d9d4
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll
@@ -0,0 +1,551 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
+
+define void @buildvector_v32i8_splat(ptr %dst, i8 %a0) nounwind {
+; CHECK-LABEL: buildvector_v32i8_splat:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvreplgr2vr.b $xr0, $a1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %insert = insertelement <32 x i8> undef, i8 %a0, i8 0
+ %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+ store <32 x i8> %splat, ptr %dst
+ ret void
+}
+
+define void @buildvector_v16i16_splat(ptr %dst, i16 %a0) nounwind {
+; CHECK-LABEL: buildvector_v16i16_splat:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvreplgr2vr.h $xr0, $a1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %insert = insertelement <16 x i16> undef, i16 %a0, i8 0
+ %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
+ store <16 x i16> %splat, ptr %dst
+ ret void
+}
+
+define void @buildvector_v8i32_splat(ptr %dst, i32 %a0) nounwind {
+; CHECK-LABEL: buildvector_v8i32_splat:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvreplgr2vr.w $xr0, $a1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %insert = insertelement <8 x i32> undef, i32 %a0, i8 0
+ %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
+ store <8 x i32> %splat, ptr %dst
+ ret void
+}
+
+define void @buildvector_v4i64_splat(ptr %dst, i64 %a0) nounwind {
+; CHECK-LABEL: buildvector_v4i64_splat:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvreplgr2vr.d $xr0, $a1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %insert = insertelement <4 x i64> undef, i64 %a0, i8 0
+ %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
+ store <4 x i64> %splat, ptr %dst
+ ret void
+}
+
+define void @buildvector_v8f32_splat(ptr %dst, float %a0) nounwind {
+; CHECK-LABEL: buildvector_v8f32_splat:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: # kill: def $f0 killed $f0 def $xr0
+; CHECK-NEXT: xvrepl128vei.w $xr0, $xr0, 0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %insert = insertelement <8 x float> undef, float %a0, i8 0
+ %splat = shufflevector <8 x float> %insert, <8 x float> undef, <8 x i32> zeroinitializer
+ store <8 x float> %splat, ptr %dst
+ ret void
+}
+
+define void @buildvector_v4f64_splat(ptr %dst, double %a0) nounwind {
+; CHECK-LABEL: buildvector_v4f64_splat:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0
+; CHECK-NEXT: xvrepl128vei.d $xr0, $xr0, 0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %insert = insertelement <4 x double> undef, double %a0, i8 0
+ %splat = shufflevector <4 x double> %insert, <4 x double> undef, <4 x i32> zeroinitializer
+ store <4 x double> %splat, ptr %dst
+ ret void
+}
+
+define void @buildvector_v32i8_const_splat(ptr %dst) nounwind {
+; CHECK-LABEL: buildvector_v32i8_const_splat:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvrepli.b $xr0, 1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ store <32 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, ptr %dst
+ ret void
+}
+
+define void @buildvector_v16i16_const_splat(ptr %dst) nounwind {
+; CHECK-LABEL: buildvector_v16i16_const_splat:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvrepli.h $xr0, 1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ store <16 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, ptr %dst
+ ret void
+}
+
+define void @buildvector_v8i32_const_splat(ptr %dst) nounwind {
+; CHECK-LABEL: buildvector_v8i32_const_splat:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvrepli.w $xr0, 1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ store <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, ptr %dst
+ ret void
+}
+
+define void @buildvector_v4i64_const_splat(ptr %dst) nounwind {
+; CHECK-LABEL: buildvector_v4i64_const_splat:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvrepli.d $xr0, 1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ store <4 x i64> <i64 1, i64 1, i64 1, i64 1>, ptr %dst
+ ret void
+}
+
+define void @buildvector_v2f32_const_splat(ptr %dst) nounwind {
+; CHECK-LABEL: buildvector_v2f32_const_splat:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lu12i.w $a1, 260096
+; CHECK-NEXT: xvreplgr2vr.w $xr0, $a1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ store <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, ptr %dst
+ ret void
+}
+
+define void @buildvector_v4f64_const_splat(ptr %dst) nounwind {
+; CHECK-LABEL: buildvector_v4f64_const_splat:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lu52i.d $a1, $zero, 1023
+; CHECK-NEXT: xvreplgr2vr.d $xr0, $a1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ store <4 x double> <double 1.0, double 1.0, double 1.0, double 1.0>, ptr %dst
+ ret void
+}
+
+define void @buildvector_v32i8_const(ptr %dst) nounwind {
+; CHECK-LABEL: buildvector_v32i8_const:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI12_0)
+; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI12_0)
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ store <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, ptr %dst
+ ret void
+}
+
+define void @buildvector_v16i16_const(ptr %dst) nounwind {
+; CHECK-LABEL: buildvector_v16i16_const:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI13_0)
+; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI13_0)
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ store <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, ptr %dst
+ ret void
+}
+
+define void @buildvector_v8i32_const(ptr %dst) nounwind {
+; CHECK-LABEL: buildvector_v8i32_const:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI14_0)
+; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI14_0)
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ store <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, ptr %dst
+ ret void
+}
+
+define void @buildvector_v4i64_const(ptr %dst) nounwind {
+; CHECK-LABEL: buildvector_v4i64_const:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI15_0)
+; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI15_0)
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ store <4 x i64> <i64 0, i64 1, i64 2, i64 3>, ptr %dst
+ ret void
+}
+
+define void @buildvector_v2f32_const(ptr %dst) nounwind {
+; CHECK-LABEL: buildvector_v2f32_const:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI16_0)
+; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI16_0)
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ store <8 x float> <float 0.0, float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0>, ptr %dst
+ ret void
+}
+
+define void @buildvector_v4f64_const(ptr %dst) nounwind {
+; CHECK-LABEL: buildvector_v4f64_const:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI17_0)
+; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI17_0)
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ store <4 x double> <double 0.0, double 1.0, double 2.0, double 3.0>, ptr %dst
+ ret void
+}
+
+define void @buildvector_v32i8(ptr %dst, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7, i8 %a8, i8 %a9, i8 %a10, i8 %a11, i8 %a12, i8 %a13, i8 %a14, i8 %a15, i8 %a16, i8 %a17, i8 %a18, i8 %a19, i8 %a20, i8 %a21, i8 %a22, i8 %a23, i8 %a24, i8 %a25, i8 %a26, i8 %a27, i8 %a28, i8 %a29, i8 %a30, i8 %a31) nounwind {
+; CHECK-LABEL: buildvector_v32i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 0
+; CHECK-NEXT: vinsgr2vr.b $vr0, $a2, 1
+; CHECK-NEXT: vinsgr2vr.b $vr0, $a3, 2
+; CHECK-NEXT: vinsgr2vr.b $vr0, $a4, 3
+; CHECK-NEXT: vinsgr2vr.b $vr0, $a5, 4
+; CHECK-NEXT: vinsgr2vr.b $vr0, $a6, 5
+; CHECK-NEXT: vinsgr2vr.b $vr0, $a7, 6
+; CHECK-NEXT: ld.b $a1, $sp, 0
+; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 7
+; CHECK-NEXT: ld.b $a1, $sp, 8
+; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 8
+; CHECK-NEXT: ld.b $a1, $sp, 16
+; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 9
+; CHECK-NEXT: ld.b $a1, $sp, 24
+; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 10
+; CHECK-NEXT: ld.b $a1, $sp, 32
+; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 11
+; CHECK-NEXT: ld.b $a1, $sp, 40
+; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 12
+; CHECK-NEXT: ld.b $a1, $sp, 48
+; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 13
+; CHECK-NEXT: ld.b $a1, $sp, 56
+; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 14
+; CHECK-NEXT: ld.b $a1, $sp, 64
+; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 15
+; CHECK-NEXT: ld.b $a1, $sp, 72
+; CHECK-NEXT: xvori.b $xr1, $xr0, 0
+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 0
+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
+; CHECK-NEXT: ld.b $a1, $sp, 80
+; CHECK-NEXT: xvori.b $xr1, $xr0, 0
+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 1
+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
+; CHECK-NEXT: ld.b $a1, $sp, 88
+; CHECK-NEXT: xvori.b $xr1, $xr0, 0
+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 2
+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
+; CHECK-NEXT: ld.b $a1, $sp, 96
+; CHECK-NEXT: xvori.b $xr1, $xr0, 0
+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 3
+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
+; CHECK-NEXT: ld.b $a1, $sp, 104
+; CHECK-NEXT: xvori.b $xr1, $xr0, 0
+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 4
+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
+; CHECK-NEXT: ld.b $a1, $sp, 112
+; CHECK-NEXT: xvori.b $xr1, $xr0, 0
+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 5
+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
+; CHECK-NEXT: ld.b $a1, $sp, 120
+; CHECK-NEXT: xvori.b $xr1, $xr0, 0
+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 6
+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
+; CHECK-NEXT: ld.b $a1, $sp, 128
+; CHECK-NEXT: xvori.b $xr1, $xr0, 0
+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 7
+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
+; CHECK-NEXT: ld.b $a1, $sp, 136
+; CHECK-NEXT: xvori.b $xr1, $xr0, 0
+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 8
+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
+; CHECK-NEXT: ld.b $a1, $sp, 144
+; CHECK-NEXT: xvori.b $xr1, $xr0, 0
+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 9
+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
+; CHECK-NEXT: ld.b $a1, $sp, 152
+; CHECK-NEXT: xvori.b $xr1, $xr0, 0
+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 10
+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
+; CHECK-NEXT: ld.b $a1, $sp, 160
+; CHECK-NEXT: xvori.b $xr1, $xr0, 0
+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 11
+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
+; CHECK-NEXT: ld.b $a1, $sp, 168
+; CHECK-NEXT: xvori.b $xr1, $xr0, 0
+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 12
+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
+; CHECK-NEXT: ld.b $a1, $sp, 176
+; CHECK-NEXT: xvori.b $xr1, $xr0, 0
+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 13
+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
+; CHECK-NEXT: ld.b $a1, $sp, 184
+; CHECK-NEXT: xvori.b $xr1, $xr0, 0
+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 14
+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
+; CHECK-NEXT: ld.b $a1, $sp, 192
+; CHECK-NEXT: xvori.b $xr1, $xr0, 0
+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 15
+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %ins0 = insertelement <32 x i8> undef, i8 %a0, i32 0
+ %ins1 = insertelement <32 x i8> %ins0, i8 %a1, i32 1
+ %ins2 = insertelement <32 x i8> %ins1, i8 %a2, i32 2
+ %ins3 = insertelement <32 x i8> %ins2, i8 %a3, i32 3
+ %ins4 = insertelement <32 x i8> %ins3, i8 %a4, i32 4
+ %ins5 = insertelement <32 x i8> %ins4, i8 %a5, i32 5
+ %ins6 = insertelement <32 x i8> %ins5, i8 %a6, i32 6
+ %ins7 = insertelement <32 x i8> %ins6, i8 %a7, i32 7
+ %ins8 = insertelement <32 x i8> %ins7, i8 %a8, i32 8
+ %ins9 = insertelement <32 x i8> %ins8, i8 %a9, i32 9
+ %ins10 = insertelement <32 x i8> %ins9, i8 %a10, i32 10
+ %ins11 = insertelement <32 x i8> %ins10, i8 %a11, i32 11
+ %ins12 = insertelement <32 x i8> %ins11, i8 %a12, i32 12
+ %ins13 = insertelement <32 x i8> %ins12, i8 %a13, i32 13
+ %ins14 = insertelement <32 x i8> %ins13, i8 %a14, i32 14
+ %ins15 = insertelement <32 x i8> %ins14, i8 %a15, i32 15
+ %ins16 = insertelement <32 x i8> %ins15, i8 %a16, i32 16
+ %ins17 = insertelement <32 x i8> %ins16, i8 %a17, i32 17
+ %ins18 = insertelement <32 x i8> %ins17, i8 %a18, i32 18
+ %ins19 = insertelement <32 x i8> %ins18, i8 %a19, i32 19
+ %ins20 = insertelement <32 x i8> %ins19, i8 %a20, i32 20
+ %ins21 = insertelement <32 x i8> %ins20, i8 %a21, i32 21
+ %ins22 = insertelement <32 x i8> %ins21, i8 %a22, i32 22
+ %ins23 = insertelement <32 x i8> %ins22, i8 %a23, i32 23
+ %ins24 = insertelement <32 x i8> %ins23, i8 %a24, i32 24
+ %ins25 = insertelement <32 x i8> %ins24, i8 %a25, i32 25
+ %ins26 = insertelement <32 x i8> %ins25, i8 %a26, i32 26
+ %ins27 = insertelement <32 x i8> %ins26, i8 %a27, i32 27
+ %ins28 = insertelement <32 x i8> %ins27, i8 %a28, i32 28
+ %ins29 = insertelement <32 x i8> %ins28, i8 %a29, i32 29
+ %ins30 = insertelement <32 x i8> %ins29, i8 %a30, i32 30
+ %ins31 = insertelement <32 x i8> %ins30, i8 %a31, i32 31
+ store <32 x i8> %ins31, ptr %dst
+ ret void
+}
+
+define void @buildvector_v16i16(ptr %dst, i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7, i16 %a8, i16 %a9, i16 %a10, i16 %a11, i16 %a12, i16 %a13, i16 %a14, i16 %a15) nounwind {
+; CHECK-LABEL: buildvector_v16i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 0
+; CHECK-NEXT: vinsgr2vr.h $vr0, $a2, 1
+; CHECK-NEXT: vinsgr2vr.h $vr0, $a3, 2
+; CHECK-NEXT: vinsgr2vr.h $vr0, $a4, 3
+; CHECK-NEXT: vinsgr2vr.h $vr0, $a5, 4
+; CHECK-NEXT: vinsgr2vr.h $vr0, $a6, 5
+; CHECK-NEXT: vinsgr2vr.h $vr0, $a7, 6
+; CHECK-NEXT: ld.h $a1, $sp, 0
+; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 7
+; CHECK-NEXT: ld.h $a1, $sp, 8
+; CHECK-NEXT: xvori.b $xr1, $xr0, 0
+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
+; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 0
+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
+; CHECK-NEXT: ld.h $a1, $sp, 16
+; CHECK-NEXT: xvori.b $xr1, $xr0, 0
+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
+; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 1
+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
+; CHECK-NEXT: ld.h $a1, $sp, 24
+; CHECK-NEXT: xvori.b $xr1, $xr0, 0
+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
+; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 2
+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
+; CHECK-NEXT: ld.h $a1, $sp, 32
+; CHECK-NEXT: xvori.b $xr1, $xr0, 0
+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
+; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 3
+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
+; CHECK-NEXT: ld.h $a1, $sp, 40
+; CHECK-NEXT: xvori.b $xr1, $xr0, 0
+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
+; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 4
+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
+; CHECK-NEXT: ld.h $a1, $sp, 48
+; CHECK-NEXT: xvori.b $xr1, $xr0, 0
+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
+; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 5
+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
+; CHECK-NEXT: ld.h $a1, $sp, 56
+; CHECK-NEXT: xvori.b $xr1, $xr0, 0
+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
+; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 6
+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
+; CHECK-NEXT: ld.h $a1, $sp, 64
+; CHECK-NEXT: xvori.b $xr1, $xr0, 0
+; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
+; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 7
+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %ins0 = insertelement <16 x i16> undef, i16 %a0, i32 0
+ %ins1 = insertelement <16 x i16> %ins0, i16 %a1, i32 1
+ %ins2 = insertelement <16 x i16> %ins1, i16 %a2, i32 2
+ %ins3 = insertelement <16 x i16> %ins2, i16 %a3, i32 3
+ %ins4 = insertelement <16 x i16> %ins3, i16 %a4, i32 4
+ %ins5 = insertelement <16 x i16> %ins4, i16 %a5, i32 5
+ %ins6 = insertelement <16 x i16> %ins5, i16 %a6, i32 6
+ %ins7 = insertelement <16 x i16> %ins6, i16 %a7, i32 7
+ %ins8 = insertelement <16 x i16> %ins7, i16 %a8, i32 8
+ %ins9 = insertelement <16 x i16> %ins8, i16 %a9, i32 9
+ %ins10 = insertelement <16 x i16> %ins9, i16 %a10, i32 10
+ %ins11 = insertelement <16 x i16> %ins10, i16 %a11, i32 11
+ %ins12 = insertelement <16 x i16> %ins11, i16 %a12, i32 12
+ %ins13 = insertelement <16 x i16> %ins12, i16 %a13, i32 13
+ %ins14 = insertelement <16 x i16> %ins13, i16 %a14, i32 14
+ %ins15 = insertelement <16 x i16> %ins14, i16 %a15, i32 15
+ store <16 x i16> %ins15, ptr %dst
+ ret void
+}
+
+define void @buildvector_v8i32(ptr %dst, i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7) nounwind {
+; CHECK-LABEL: buildvector_v8i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 0
+; CHECK-NEXT: xvinsgr2vr.w $xr0, $a2, 1
+; CHECK-NEXT: xvinsgr2vr.w $xr0, $a3, 2
+; CHECK-NEXT: xvinsgr2vr.w $xr0, $a4, 3
+; CHECK-NEXT: xvinsgr2vr.w $xr0, $a5, 4
+; CHECK-NEXT: xvinsgr2vr.w $xr0, $a6, 5
+; CHECK-NEXT: xvinsgr2vr.w $xr0, $a7, 6
+; CHECK-NEXT: ld.w $a1, $sp, 0
+; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 7
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %ins0 = insertelement <8 x i32> undef, i32 %a0, i32 0
+ %ins1 = insertelement <8 x i32> %ins0, i32 %a1, i32 1
+ %ins2 = insertelement <8 x i32> %ins1, i32 %a2, i32 2
+ %ins3 = insertelement <8 x i32> %ins2, i32 %a3, i32 3
+ %ins4 = insertelement <8 x i32> %ins3, i32 %a4, i32 4
+ %ins5 = insertelement <8 x i32> %ins4, i32 %a5, i32 5
+ %ins6 = insertelement <8 x i32> %ins5, i32 %a6, i32 6
+ %ins7 = insertelement <8 x i32> %ins6, i32 %a7, i32 7
+ store <8 x i32> %ins7, ptr %dst
+ ret void
+}
+
+define void @buildvector_v4i64(ptr %dst, i64 %a0, i64 %a1, i64 %a2, i64 %a3) nounwind {
+; CHECK-LABEL: buildvector_v4i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvinsgr2vr.d $xr0, $a1, 0
+; CHECK-NEXT: xvinsgr2vr.d $xr0, $a2, 1
+; CHECK-NEXT: xvinsgr2vr.d $xr0, $a3, 2
+; CHECK-NEXT: xvinsgr2vr.d $xr0, $a4, 3
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %ins0 = insertelement <4 x i64> undef, i64 %a0, i32 0
+ %ins1 = insertelement <4 x i64> %ins0, i64 %a1, i32 1
+ %ins2 = insertelement <4 x i64> %ins1, i64 %a2, i32 2
+ %ins3 = insertelement <4 x i64> %ins2, i64 %a3, i32 3
+ store <4 x i64> %ins3, ptr %dst
+ ret void
+}
+
+define void @buildvector_v8f32(ptr %dst, float %a0, float %a1, float %a2, float %a3, float %a4, float %a5, float %a6, float %a7) nounwind {
+; CHECK-LABEL: buildvector_v8f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movfr2gr.s $a1, $fa0
+; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 0
+; CHECK-NEXT: movfr2gr.s $a1, $fa1
+; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 1
+; CHECK-NEXT: movfr2gr.s $a1, $fa2
+; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 2
+; CHECK-NEXT: movfr2gr.s $a1, $fa3
+; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 3
+; CHECK-NEXT: movfr2gr.s $a1, $fa4
+; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 4
+; CHECK-NEXT: movfr2gr.s $a1, $fa5
+; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 5
+; CHECK-NEXT: movfr2gr.s $a1, $fa6
+; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 6
+; CHECK-NEXT: movfr2gr.s $a1, $fa7
+; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 7
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %ins0 = insertelement <8 x float> undef, float %a0, i32 0
+ %ins1 = insertelement <8 x float> %ins0, float %a1, i32 1
+ %ins2 = insertelement <8 x float> %ins1, float %a2, i32 2
+ %ins3 = insertelement <8 x float> %ins2, float %a3, i32 3
+ %ins4 = insertelement <8 x float> %ins3, float %a4, i32 4
+ %ins5 = insertelement <8 x float> %ins4, float %a5, i32 5
+ %ins6 = insertelement <8 x float> %ins5, float %a6, i32 6
+ %ins7 = insertelement <8 x float> %ins6, float %a7, i32 7
+ store <8 x float> %ins7, ptr %dst
+ ret void
+}
+
+define void @buildvector_v4f64(ptr %dst, double %a0, double %a1, double %a2, double %a3) nounwind {
+; CHECK-LABEL: buildvector_v4f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movfr2gr.d $a1, $fa0
+; CHECK-NEXT: xvinsgr2vr.d $xr0, $a1, 0
+; CHECK-NEXT: movfr2gr.d $a1, $fa1
+; CHECK-NEXT: xvinsgr2vr.d $xr0, $a1, 1
+; CHECK-NEXT: movfr2gr.d $a1, $fa2
+; CHECK-NEXT: xvinsgr2vr.d $xr0, $a1, 2
+; CHECK-NEXT: movfr2gr.d $a1, $fa3
+; CHECK-NEXT: xvinsgr2vr.d $xr0, $a1, 3
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %ins0 = insertelement <4 x double> undef, double %a0, i32 0
+ %ins1 = insertelement <4 x double> %ins0, double %a1, i32 1
+ %ins2 = insertelement <4 x double> %ins1, double %a2, i32 2
+ %ins3 = insertelement <4 x double> %ins2, double %a3, i32 3
+ store <4 x double> %ins3, ptr %dst
+ ret void
+}
diff --git a/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll b/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll
new file mode 100644
index 000000000000000..3a74db5e1acb179
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll
@@ -0,0 +1,376 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s
+
+define void @buildvector_v16i8_splat(ptr %dst, i8 %a0) nounwind {
+; CHECK-LABEL: buildvector_v16i8_splat:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vreplgr2vr.b $vr0, $a1
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %insert = insertelement <16 x i8> undef, i8 %a0, i8 0
+ %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
+ store <16 x i8> %splat, ptr %dst
+ ret void
+}
+
+define void @buildvector_v8i16_splat(ptr %dst, i16 %a0) nounwind {
+; CHECK-LABEL: buildvector_v8i16_splat:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vreplgr2vr.h $vr0, $a1
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %insert = insertelement <8 x i16> undef, i16 %a0, i8 0
+ %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
+ store <8 x i16> %splat, ptr %dst
+ ret void
+}
+
+define void @buildvector_v4i32_splat(ptr %dst, i32 %a0) nounwind {
+; CHECK-LABEL: buildvector_v4i32_splat:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vreplgr2vr.w $vr0, $a1
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %insert = insertelement <4 x i32> undef, i32 %a0, i8 0
+ %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
+ store <4 x i32> %splat, ptr %dst
+ ret void
+}
+
+define void @buildvector_v2i64_splat(ptr %dst, i64 %a0) nounwind {
+; CHECK-LABEL: buildvector_v2i64_splat:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vreplgr2vr.d $vr0, $a1
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %insert = insertelement <2 x i64> undef, i64 %a0, i8 0
+ %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
+ store <2 x i64> %splat, ptr %dst
+ ret void
+}
+
+define void @buildvector_v4f32_splat(ptr %dst, float %a0) nounwind {
+; CHECK-LABEL: buildvector_v4f32_splat:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: # kill: def $f0 killed $f0 def $vr0
+; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %insert = insertelement <4 x float> undef, float %a0, i8 0
+ %splat = shufflevector <4 x float> %insert, <4 x float> undef, <4 x i32> zeroinitializer
+ store <4 x float> %splat, ptr %dst
+ ret void
+}
+
+define void @buildvector_v2f64_splat(ptr %dst, double %a0) nounwind {
+; CHECK-LABEL: buildvector_v2f64_splat:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0
+; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %insert = insertelement <2 x double> undef, double %a0, i8 0
+ %splat = shufflevector <2 x double> %insert, <2 x double> undef, <2 x i32> zeroinitializer
+ store <2 x double> %splat, ptr %dst
+ ret void
+}
+
+define void @buildvector_v16i8_const_splat(ptr %dst) nounwind {
+; CHECK-LABEL: buildvector_v16i8_const_splat:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vrepli.b $vr0, 1
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ store <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, ptr %dst
+ ret void
+}
+
+define void @buildvector_v8i16_const_splat(ptr %dst) nounwind {
+; CHECK-LABEL: buildvector_v8i16_const_splat:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vrepli.h $vr0, 1
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ store <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, ptr %dst
+ ret void
+}
+
+define void @buildvector_v4i32_const_splat(ptr %dst) nounwind {
+; CHECK-LABEL: buildvector_v4i32_const_splat:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vrepli.w $vr0, 1
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ store <4 x i32> <i32 1, i32 1, i32 1, i32 1>, ptr %dst
+ ret void
+}
+
+define void @buildvector_v2i64_const_splat(ptr %dst) nounwind {
+; CHECK-LABEL: buildvector_v2i64_const_splat:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vrepli.d $vr0, 1
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ store <2 x i64> <i64 1, i64 1>, ptr %dst
+ ret void
+}
+
+define void @buildvector_v2f32_const_splat(ptr %dst) nounwind {
+; CHECK-LABEL: buildvector_v2f32_const_splat:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lu12i.w $a1, 260096
+; CHECK-NEXT: vreplgr2vr.w $vr0, $a1
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ store <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, ptr %dst
+ ret void
+}
+
+define void @buildvector_v2f64_const_splat(ptr %dst) nounwind {
+; CHECK-LABEL: buildvector_v2f64_const_splat:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lu52i.d $a1, $zero, 1023
+; CHECK-NEXT: vreplgr2vr.d $vr0, $a1
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ store <2 x double> <double 1.0, double 1.0>, ptr %dst
+ ret void
+}
+
+define void @buildvector_v16i8_const(ptr %dst) nounwind {
+; CHECK-LABEL: buildvector_v16i8_const:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI12_0)
+; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI12_0)
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ store <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, ptr %dst
+ ret void
+}
+
+define void @buildvector_v8i16_const(ptr %dst) nounwind {
+; CHECK-LABEL: buildvector_v8i16_const:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI13_0)
+; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI13_0)
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ store <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, ptr %dst
+ ret void
+}
+
+define void @buildvector_v4i32_const(ptr %dst) nounwind {
+; CHECK-LABEL: buildvector_v4i32_const:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI14_0)
+; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI14_0)
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ store <4 x i32> <i32 0, i32 1, i32 2, i32 3>, ptr %dst
+ ret void
+}
+
+define void @buildvector_v2i64_const(ptr %dst) nounwind {
+; CHECK-LABEL: buildvector_v2i64_const:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI15_0)
+; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI15_0)
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ store <2 x i64> <i64 0, i64 1>, ptr %dst
+ ret void
+}
+
+define void @buildvector_v2f32_const(ptr %dst) nounwind {
+; CHECK-LABEL: buildvector_v2f32_const:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI16_0)
+; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI16_0)
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ store <4 x float> <float 0.0, float 1.0, float 2.0, float 3.0>, ptr %dst
+ ret void
+}
+
+define void @buildvector_v2f64_const(ptr %dst) nounwind {
+; CHECK-LABEL: buildvector_v2f64_const:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI17_0)
+; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI17_0)
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ store <2 x double> <double 0.0, double 1.0>, ptr %dst
+ ret void
+}
+
+define void @buildvector_v16i8(ptr %dst, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7, i8 %a8, i8 %a9, i8 %a10, i8 %a11, i8 %a12, i8 %a13, i8 %a14, i8 %a15) nounwind {
+; CHECK-LABEL: buildvector_v16i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 0
+; CHECK-NEXT: vinsgr2vr.b $vr0, $a2, 1
+; CHECK-NEXT: vinsgr2vr.b $vr0, $a3, 2
+; CHECK-NEXT: vinsgr2vr.b $vr0, $a4, 3
+; CHECK-NEXT: vinsgr2vr.b $vr0, $a5, 4
+; CHECK-NEXT: vinsgr2vr.b $vr0, $a6, 5
+; CHECK-NEXT: vinsgr2vr.b $vr0, $a7, 6
+; CHECK-NEXT: ld.b $a1, $sp, 0
+; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 7
+; CHECK-NEXT: ld.b $a1, $sp, 8
+; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 8
+; CHECK-NEXT: ld.b $a1, $sp, 16
+; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 9
+; CHECK-NEXT: ld.b $a1, $sp, 24
+; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 10
+; CHECK-NEXT: ld.b $a1, $sp, 32
+; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 11
+; CHECK-NEXT: ld.b $a1, $sp, 40
+; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 12
+; CHECK-NEXT: ld.b $a1, $sp, 48
+; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 13
+; CHECK-NEXT: ld.b $a1, $sp, 56
+; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 14
+; CHECK-NEXT: ld.b $a1, $sp, 64
+; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 15
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %ins0 = insertelement <16 x i8> undef, i8 %a0, i32 0
+ %ins1 = insertelement <16 x i8> %ins0, i8 %a1, i32 1
+ %ins2 = insertelement <16 x i8> %ins1, i8 %a2, i32 2
+ %ins3 = insertelement <16 x i8> %ins2, i8 %a3, i32 3
+ %ins4 = insertelement <16 x i8> %ins3, i8 %a4, i32 4
+ %ins5 = insertelement <16 x i8> %ins4, i8 %a5, i32 5
+ %ins6 = insertelement <16 x i8> %ins5, i8 %a6, i32 6
+ %ins7 = insertelement <16 x i8> %ins6, i8 %a7, i32 7
+ %ins8 = insertelement <16 x i8> %ins7, i8 %a8, i32 8
+ %ins9 = insertelement <16 x i8> %ins8, i8 %a9, i32 9
+ %ins10 = insertelement <16 x i8> %ins9, i8 %a10, i32 10
+ %ins11 = insertelement <16 x i8> %ins10, i8 %a11, i32 11
+ %ins12 = insertelement <16 x i8> %ins11, i8 %a12, i32 12
+ %ins13 = insertelement <16 x i8> %ins12, i8 %a13, i32 13
+ %ins14 = insertelement <16 x i8> %ins13, i8 %a14, i32 14
+ %ins15 = insertelement <16 x i8> %ins14, i8 %a15, i32 15
+ store <16 x i8> %ins15, ptr %dst
+ ret void
+}
+
+define void @buildvector_v8i16(ptr %dst, i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) nounwind {
+; CHECK-LABEL: buildvector_v8i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 0
+; CHECK-NEXT: vinsgr2vr.h $vr0, $a2, 1
+; CHECK-NEXT: vinsgr2vr.h $vr0, $a3, 2
+; CHECK-NEXT: vinsgr2vr.h $vr0, $a4, 3
+; CHECK-NEXT: vinsgr2vr.h $vr0, $a5, 4
+; CHECK-NEXT: vinsgr2vr.h $vr0, $a6, 5
+; CHECK-NEXT: vinsgr2vr.h $vr0, $a7, 6
+; CHECK-NEXT: ld.h $a1, $sp, 0
+; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 7
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %ins0 = insertelement <8 x i16> undef, i16 %a0, i32 0
+ %ins1 = insertelement <8 x i16> %ins0, i16 %a1, i32 1
+ %ins2 = insertelement <8 x i16> %ins1, i16 %a2, i32 2
+ %ins3 = insertelement <8 x i16> %ins2, i16 %a3, i32 3
+ %ins4 = insertelement <8 x i16> %ins3, i16 %a4, i32 4
+ %ins5 = insertelement <8 x i16> %ins4, i16 %a5, i32 5
+ %ins6 = insertelement <8 x i16> %ins5, i16 %a6, i32 6
+ %ins7 = insertelement <8 x i16> %ins6, i16 %a7, i32 7
+ store <8 x i16> %ins7, ptr %dst
+ ret void
+}
+
+define void @buildvector_v4i32(ptr %dst, i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind {
+; CHECK-LABEL: buildvector_v4i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vinsgr2vr.w $vr0, $a1, 0
+; CHECK-NEXT: vinsgr2vr.w $vr0, $a2, 1
+; CHECK-NEXT: vinsgr2vr.w $vr0, $a3, 2
+; CHECK-NEXT: vinsgr2vr.w $vr0, $a4, 3
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %ins0 = insertelement <4 x i32> undef, i32 %a0, i32 0
+ %ins1 = insertelement <4 x i32> %ins0, i32 %a1, i32 1
+ %ins2 = insertelement <4 x i32> %ins1, i32 %a2, i32 2
+ %ins3 = insertelement <4 x i32> %ins2, i32 %a3, i32 3
+ store <4 x i32> %ins3, ptr %dst
+ ret void
+}
+
+define void @buildvector_v2i64(ptr %dst, i64 %a0, i64 %a1) nounwind {
+; CHECK-LABEL: buildvector_v2i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vinsgr2vr.d $vr0, $a1, 0
+; CHECK-NEXT: vinsgr2vr.d $vr0, $a2, 1
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %ins0 = insertelement <2 x i64> undef, i64 %a0, i32 0
+ %ins1 = insertelement <2 x i64> %ins0, i64 %a1, i32 1
+ store <2 x i64> %ins1, ptr %dst
+ ret void
+}
+
+define void @buildvector_v4f32(ptr %dst, float %a0, float %a1, float %a2, float %a3) nounwind {
+; CHECK-LABEL: buildvector_v4f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movfr2gr.s $a1, $fa0
+; CHECK-NEXT: vinsgr2vr.w $vr0, $a1, 0
+; CHECK-NEXT: movfr2gr.s $a1, $fa1
+; CHECK-NEXT: vinsgr2vr.w $vr0, $a1, 1
+; CHECK-NEXT: movfr2gr.s $a1, $fa2
+; CHECK-NEXT: vinsgr2vr.w $vr0, $a1, 2
+; CHECK-NEXT: movfr2gr.s $a1, $fa3
+; CHECK-NEXT: vinsgr2vr.w $vr0, $a1, 3
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %ins0 = insertelement <4 x float> undef, float %a0, i32 0
+ %ins1 = insertelement <4 x float> %ins0, float %a1, i32 1
+ %ins2 = insertelement <4 x float> %ins1, float %a2, i32 2
+ %ins3 = insertelement <4 x float> %ins2, float %a3, i32 3
+ store <4 x float> %ins3, ptr %dst
+ ret void
+}
+
+define void @buildvector_v2f64(ptr %dst, double %a0, double %a1) nounwind {
+; CHECK-LABEL: buildvector_v2f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movfr2gr.d $a1, $fa0
+; CHECK-NEXT: vinsgr2vr.d $vr0, $a1, 0
+; CHECK-NEXT: movfr2gr.d $a1, $fa1
+; CHECK-NEXT: vinsgr2vr.d $vr0, $a1, 1
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %ins0 = insertelement <2 x double> undef, double %a0, i32 0
+ %ins1 = insertelement <2 x double> %ins0, double %a1, i32 1
+ store <2 x double> %ins1, ptr %dst
+ ret void
+}
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/mul.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/mul.ll
index 5060240cd8b1320..d0be9cb7e3c8bdc 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/mul.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/mul.ll
@@ -180,10 +180,9 @@ entry:
define void @mul_v16i8_17(ptr %res, ptr %a0) nounwind {
; CHECK-LABEL: mul_v16i8_17:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: ori $a2, $zero, 17
-; CHECK-NEXT: vreplgr2vr.b $vr0, $a2
-; CHECK-NEXT: vld $vr1, $a1, 0
-; CHECK-NEXT: vmul.b $vr0, $vr1, $vr0
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vrepli.b $vr1, 17
+; CHECK-NEXT: vmul.b $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -196,10 +195,9 @@ entry:
define void @mul_v8i16_17(ptr %res, ptr %a0) nounwind {
; CHECK-LABEL: mul_v8i16_17:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: ori $a2, $zero, 17
-; CHECK-NEXT: vreplgr2vr.h $vr0, $a2
-; CHECK-NEXT: vld $vr1, $a1, 0
-; CHECK-NEXT: vmul.h $vr0, $vr1, $vr0
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vrepli.h $vr1, 17
+; CHECK-NEXT: vmul.h $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -212,10 +210,9 @@ entry:
define void @mul_v4i32_17(ptr %res, ptr %a0) nounwind {
; CHECK-LABEL: mul_v4i32_17:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: ori $a2, $zero, 17
-; CHECK-NEXT: vreplgr2vr.w $vr0, $a2
-; CHECK-NEXT: vld $vr1, $a1, 0
-; CHECK-NEXT: vmul.w $vr0, $vr1, $vr0
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vrepli.w $vr1, 17
+; CHECK-NEXT: vmul.w $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -228,10 +225,9 @@ entry:
define void @mul_v2i64_17(ptr %res, ptr %a0) nounwind {
; CHECK-LABEL: mul_v2i64_17:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: ori $a2, $zero, 17
-; CHECK-NEXT: vreplgr2vr.d $vr0, $a2
-; CHECK-NEXT: vld $vr1, $a1, 0
-; CHECK-NEXT: vmul.d $vr0, $vr1, $vr0
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vrepli.d $vr1, 17
+; CHECK-NEXT: vmul.d $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
More information about the llvm-commits
mailing list