[llvm] [LoongArch] Add codegen support insertelement and custom lowering BUILD_VECTOR (PR #73917)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 30 02:00:27 PST 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-loongarch
Author: wanglei (wangleiat)
<details>
<summary>Changes</summary>
---
Patch is 69.23 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/73917.diff
9 Files Affected:
- (modified) llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp (+51-1)
- (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp (+177-7)
- (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.h (+2)
- (modified) llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td (+31)
- (modified) llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td (+16-1)
- (added) llvm/test/CodeGen/LoongArch/lasx/build-vector.ll (+551)
- (added) llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insertelement.ll (+270)
- (added) llvm/test/CodeGen/LoongArch/lsx/build-vector.ll (+376)
- (added) llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insertelement.ll (+196)
``````````diff
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp
index 0cfee6025218884..726856bda5dc59c 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp
@@ -77,13 +77,63 @@ void LoongArchDAGToDAGISel::Select(SDNode *Node) {
return;
}
case ISD::BITCAST: {
- if (VT.is128BitVector() || VT.is512BitVector()) {
+ if (VT.is128BitVector() || VT.is256BitVector()) {
ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
CurDAG->RemoveDeadNode(Node);
return;
}
break;
}
+ case ISD::BUILD_VECTOR: {
+ // Select appropriate [x]vrepli.[bhwd] instructions for constant splats of
+ // 128/256-bit when LSX/LASX is enabled.
+ BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Node);
+ APInt SplatValue, SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ unsigned Op;
+ EVT ViaVecTy;
+ bool Is128Vec = BVN->getValueType(0).is128BitVector();
+ bool Is256Vec = BVN->getValueType(0).is256BitVector();
+
+ if (!Subtarget->hasExtLSX() || (!Is128Vec && !Is256Vec))
+ break;
+ if (!BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
+ HasAnyUndefs, 8))
+ break;
+
+ switch (SplatBitSize) {
+ default:
+ break;
+ case 8:
+ Op = Is256Vec ? LoongArch::PseudoXVREPLI_B : LoongArch::PseudoVREPLI_B;
+ ViaVecTy = Is256Vec ? MVT::v32i8 : MVT::v16i8;
+ break;
+ case 16:
+ Op = Is256Vec ? LoongArch::PseudoXVREPLI_H : LoongArch::PseudoVREPLI_H;
+ ViaVecTy = Is256Vec ? MVT::v16i16 : MVT::v8i16;
+ break;
+ case 32:
+ Op = Is256Vec ? LoongArch::PseudoXVREPLI_W : LoongArch::PseudoVREPLI_W;
+ ViaVecTy = Is256Vec ? MVT::v8i32 : MVT::v4i32;
+ break;
+ case 64:
+ Op = Is256Vec ? LoongArch::PseudoXVREPLI_D : LoongArch::PseudoVREPLI_D;
+ ViaVecTy = Is256Vec ? MVT::v4i64 : MVT::v2i64;
+ break;
+ }
+
+ SDNode *Res;
+ // If we have a signed 10 bit integer, we can splat it directly.
+ if (SplatValue.isSignedIntN(10)) {
+ SDValue Imm = CurDAG->getTargetConstant(SplatValue, DL,
+ ViaVecTy.getVectorElementType());
+ Res = CurDAG->getMachineNode(Op, DL, ViaVecTy, Imm);
+ ReplaceNode(Node, Res);
+ return;
+ }
+ break;
+ }
}
// Select the default instruction.
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index f59beca523cbbc6..0a22f3c9930d674 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -243,11 +243,9 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BITCAST, VT, Legal);
setOperationAction(ISD::UNDEF, VT, Legal);
- // FIXME: For BUILD_VECTOR, it is temporarily set to `Legal` here, and it
- // will be `Custom` handled in the future.
- setOperationAction(ISD::BUILD_VECTOR, VT, Legal);
- setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Legal);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal);
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
}
for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal);
@@ -274,10 +272,9 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BITCAST, VT, Legal);
setOperationAction(ISD::UNDEF, VT, Legal);
- // FIXME: Same as above.
- setOperationAction(ISD::BUILD_VECTOR, VT, Legal);
- setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Legal);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal);
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
}
for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal);
@@ -380,10 +377,115 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op,
return lowerRETURNADDR(Op, DAG);
case ISD::WRITE_REGISTER:
return lowerWRITE_REGISTER(Op, DAG);
+ case ISD::INSERT_VECTOR_ELT:
+ return lowerINSERT_VECTOR_ELT(Op, DAG);
+ case ISD::BUILD_VECTOR:
+ return lowerBUILD_VECTOR(Op, DAG);
}
return SDValue();
}
+static bool isConstantOrUndef(const SDValue Op) {
+ if (Op->isUndef())
+ return true;
+ if (isa<ConstantSDNode>(Op))
+ return true;
+ if (isa<ConstantFPSDNode>(Op))
+ return true;
+ return false;
+}
+
+static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) {
+ for (unsigned i = 0; i < Op->getNumOperands(); ++i)
+ if (isConstantOrUndef(Op->getOperand(i)))
+ return true;
+ return false;
+}
+
+SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
+ SelectionDAG &DAG) const {
+ BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
+ EVT ResTy = Op->getValueType(0);
+ SDLoc DL(Op);
+ APInt SplatValue, SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ bool Is128Vec = ResTy.is128BitVector();
+ bool Is256Vec = ResTy.is256BitVector();
+
+ if ((!Subtarget.hasExtLSX() || !Is128Vec) &&
+ (!Subtarget.hasExtLASX() || !Is256Vec))
+ return SDValue();
+
+ if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
+ /*MinSplatBits=*/8) &&
+ SplatBitSize <= 64) {
+ // We can only cope with 8, 16, 32, or 64-bit elements.
+ if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
+ SplatBitSize != 64)
+ return SDValue();
+
+ EVT ViaVecTy;
+
+ switch (SplatBitSize) {
+ default:
+ return SDValue();
+ case 8:
+ ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8;
+ break;
+ case 16:
+ ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16;
+ break;
+ case 32:
+ ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32;
+ break;
+ case 64:
+ ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64;
+ break;
+ }
+
+ // SelectionDAG::getConstant will promote SplatValue appropriately.
+ SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy);
+
+ // Bitcast to the type we originally wanted.
+ if (ViaVecTy != ResTy)
+ Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
+
+ return Result;
+ }
+
+ if (DAG.isSplatValue(Op, /*AllowUndefs=*/false))
+ return Op;
+
+ if (!isConstantOrUndefBUILD_VECTOR(Node)) {
+ // Use INSERT_VECTOR_ELT operations rather than expand to stores.
+ // The resulting code is the same length as the expansion, but it doesn't
+ // use memory operations.
+ EVT ResTy = Node->getValueType(0);
+
+ assert(ResTy.isVector());
+
+ unsigned NumElts = ResTy.getVectorNumElements();
+ SDValue Vector = DAG.getUNDEF(ResTy);
+ for (unsigned i = 0; i < NumElts; ++i) {
+ Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector,
+ Node->getOperand(i),
+ DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
+ }
+ return Vector;
+ }
+
+ return SDValue();
+}
+
+SDValue
+LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
+ SelectionDAG &DAG) const {
+ if (isa<ConstantSDNode>(Op->getOperand(2)))
+ return Op;
+ return SDValue();
+}
+
SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
@@ -3067,6 +3169,71 @@ emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB,
return SinkBB;
}
+static MachineBasicBlock *
+emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB,
+ const LoongArchSubtarget &Subtarget) {
+ unsigned InsOp;
+ unsigned HalfSize;
+ switch (MI.getOpcode()) {
+ default:
+ llvm_unreachable("Unexpected opcode");
+ case LoongArch::PseudoXVINSGR2VR_B:
+ HalfSize = 16;
+ InsOp = LoongArch::VINSGR2VR_B;
+ break;
+ case LoongArch::PseudoXVINSGR2VR_H:
+ HalfSize = 8;
+ InsOp = LoongArch::VINSGR2VR_H;
+ break;
+ }
+ const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+ const TargetRegisterClass *RC = &LoongArch::LASX256RegClass;
+ const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass;
+ DebugLoc DL = MI.getDebugLoc();
+ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+ // XDst = vector_insert XSrc, Elt, Idx
+ Register XDst = MI.getOperand(0).getReg();
+ Register XSrc = MI.getOperand(1).getReg();
+ Register Elt = MI.getOperand(2).getReg();
+ unsigned Idx = MI.getOperand(3).getImm();
+
+ Register ScratchReg1 = XSrc;
+ if (Idx >= HalfSize) {
+ ScratchReg1 = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg1)
+ .addReg(XSrc)
+ .addReg(XSrc)
+ .addImm(1);
+ }
+
+ Register ScratchSubReg1 = MRI.createVirtualRegister(SubRC);
+ Register ScratchSubReg2 = MRI.createVirtualRegister(SubRC);
+ BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), ScratchSubReg1)
+ .addReg(ScratchReg1, 0, LoongArch::sub_128);
+ BuildMI(*BB, MI, DL, TII->get(InsOp), ScratchSubReg2)
+ .addReg(ScratchSubReg1)
+ .addReg(Elt)
+ .addImm(Idx >= HalfSize ? Idx - HalfSize : Idx);
+
+ Register ScratchReg2 = XDst;
+ if (Idx >= HalfSize)
+ ScratchReg2 = MRI.createVirtualRegister(RC);
+
+ BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), ScratchReg2)
+ .addImm(0)
+ .addReg(ScratchSubReg2)
+ .addImm(LoongArch::sub_128);
+
+ if (Idx >= HalfSize)
+ BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), XDst)
+ .addReg(XSrc)
+ .addReg(ScratchReg2)
+ .addImm(2);
+
+ MI.eraseFromParent();
+ return BB;
+}
+
MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
MachineInstr &MI, MachineBasicBlock *BB) const {
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
@@ -3122,6 +3289,9 @@ MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
case LoongArch::PseudoXVBNZ_W:
case LoongArch::PseudoXVBNZ_D:
return emitVecCondBranchPseudo(MI, BB, Subtarget);
+ case LoongArch::PseudoXVINSGR2VR_B:
+ case LoongArch::PseudoXVINSGR2VR_H:
+ return emitPseudoXVINSGR2VR(MI, BB, Subtarget);
}
}
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index 3141286671055dd..3a81c0e827afb49 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -276,6 +276,8 @@ class LoongArchTargetLowering : public TargetLowering {
SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
bool isFPImmLegal(const APFloat &Imm, EVT VT,
bool ForCodeSize) const override;
diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
index 380206ddcf1066a..4487152fb42b827 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
@@ -33,6 +33,13 @@ def lasxsplati32
def lasxsplati64
: PatFrag<(ops node:$e0),
(v4i64 (build_vector node:$e0, node:$e0, node:$e0, node:$e0))>;
+def lasxsplatf32
+ : PatFrag<(ops node:$e0),
+ (v8f32 (build_vector node:$e0, node:$e0, node:$e0, node:$e0,
+ node:$e0, node:$e0, node:$e0, node:$e0))>;
+def lasxsplatf64
+ : PatFrag<(ops node:$e0),
+ (v4f64 (build_vector node:$e0, node:$e0, node:$e0, node:$e0))>;
//===----------------------------------------------------------------------===//
// Instruction class templates
@@ -1065,6 +1072,13 @@ def PseudoXVBZ_W : VecCond<loongarch_vall_zero, v8i32, LASX256>;
def PseudoXVBZ_D : VecCond<loongarch_vall_zero, v4i64, LASX256>;
def PseudoXVBZ : VecCond<loongarch_vany_zero, v32i8, LASX256>;
+let usesCustomInserter = 1, Constraints = "$xd = $dst" in {
+def PseudoXVINSGR2VR_B
+ : Pseudo<(outs LASX256:$dst), (ins LASX256:$xd, GPR:$rj, uimm5:$imm)>;
+def PseudoXVINSGR2VR_H
+ : Pseudo<(outs LASX256:$dst), (ins LASX256:$xd, GPR:$rj, uimm4:$imm)>;
+} // usesCustomInserter = 1, Constraints = "$xd = $dst"
+
} // Predicates = [HasExtLASX]
multiclass PatXr<SDPatternOperator OpNode, string Inst> {
@@ -1365,12 +1379,23 @@ def : Pat<(fma v8f32:$xj, v8f32:$xk, v8f32:$xa),
def : Pat<(fma v4f64:$xj, v4f64:$xk, v4f64:$xa),
(XVFMADD_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>;
+// PseudoXVINSGR2VR_{B/H}
+def : Pat<(vector_insert v32i8:$xd, GRLenVT:$rj, uimm5:$imm),
+ (PseudoXVINSGR2VR_B v32i8:$xd, GRLenVT:$rj, uimm5:$imm)>;
+def : Pat<(vector_insert v16i16:$xd, GRLenVT:$rj, uimm4:$imm),
+ (PseudoXVINSGR2VR_H v16i16:$xd, GRLenVT:$rj, uimm4:$imm)>;
+
// XVINSGR2VR_{W/D}
def : Pat<(vector_insert v8i32:$xd, GRLenVT:$rj, uimm3:$imm),
(XVINSGR2VR_W v8i32:$xd, GRLenVT:$rj, uimm3:$imm)>;
def : Pat<(vector_insert v4i64:$xd, GRLenVT:$rj, uimm2:$imm),
(XVINSGR2VR_D v4i64:$xd, GRLenVT:$rj, uimm2:$imm)>;
+def : Pat<(vector_insert v8f32:$vd, FPR32:$fj, uimm3:$imm),
+ (XVINSGR2VR_W $vd, (COPY_TO_REGCLASS FPR32:$fj, GPR), uimm3:$imm)>;
+def : Pat<(vector_insert v4f64:$vd, FPR64:$fj, uimm2:$imm),
+ (XVINSGR2VR_D $vd, (COPY_TO_REGCLASS FPR64:$fj, GPR), uimm2:$imm)>;
+
// XVPICKVE2GR_W[U]
def : Pat<(loongarch_vpick_sext_elt v8i32:$xd, uimm3:$imm, i32),
(XVPICKVE2GR_W v8i32:$xd, uimm3:$imm)>;
@@ -1393,6 +1418,12 @@ def : Pat<(loongarch_vreplve v8i32:$xj, GRLenVT:$rk),
def : Pat<(loongarch_vreplve v4i64:$xj, GRLenVT:$rk),
(XVREPLVE_D v4i64:$xj, GRLenVT:$rk)>;
+// XVREPL128VEI_{W/D}
+def : Pat<(lasxsplatf32 FPR32:$fj),
+ (XVREPL128VEI_W (SUBREG_TO_REG (i64 0), FPR32:$fj, sub_32), 0)>;
+def : Pat<(lasxsplatf64 FPR64:$fj),
+ (XVREPL128VEI_D (SUBREG_TO_REG (i64 0), FPR64:$fj, sub_64), 0)>;
+
// Loads/Stores
foreach vt = [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64] in {
defm : LdPat<load, XVLD, vt>;
diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
index 980870e34503767..deac5015882ddf2 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
@@ -141,9 +141,13 @@ def lsxsplati16 : PatFrag<(ops node:$e0),
def lsxsplati32 : PatFrag<(ops node:$e0),
(v4i32 (build_vector node:$e0, node:$e0,
node:$e0, node:$e0))>;
-
def lsxsplati64 : PatFrag<(ops node:$e0),
(v2i64 (build_vector node:$e0, node:$e0))>;
+def lsxsplatf32 : PatFrag<(ops node:$e0),
+ (v4f32 (build_vector node:$e0, node:$e0,
+ node:$e0, node:$e0))>;
+def lsxsplatf64 : PatFrag<(ops node:$e0),
+ (v2f64 (build_vector node:$e0, node:$e0))>;
def to_valid_timm : SDNodeXForm<timm, [{
auto CN = cast<ConstantSDNode>(N);
@@ -1462,6 +1466,11 @@ def : Pat<(vector_insert v4i32:$vd, GRLenVT:$rj, uimm2:$imm),
def : Pat<(vector_insert v2i64:$vd, GRLenVT:$rj, uimm1:$imm),
(VINSGR2VR_D v2i64:$vd, GRLenVT:$rj, uimm1:$imm)>;
+def : Pat<(vector_insert v4f32:$vd, FPR32:$fj, uimm2:$imm),
+ (VINSGR2VR_W $vd, (COPY_TO_REGCLASS FPR32:$fj, GPR), uimm2:$imm)>;
+def : Pat<(vector_insert v2f64:$vd, FPR64:$fj, uimm1:$imm),
+ (VINSGR2VR_D $vd, (COPY_TO_REGCLASS FPR64:$fj, GPR), uimm1:$imm)>;
+
// VPICKVE2GR_{B/H/W}[U]
def : Pat<(loongarch_vpick_sext_elt v16i8:$vd, uimm4:$imm, i8),
(VPICKVE2GR_B v16i8:$vd, uimm4:$imm)>;
@@ -1493,6 +1502,12 @@ def : Pat<(loongarch_vreplve v4i32:$vj, GRLenVT:$rk),
def : Pat<(loongarch_vreplve v2i64:$vj, GRLenVT:$rk),
(VREPLVE_D v2i64:$vj, GRLenVT:$rk)>;
+// VREPLVEI_{W/D}
+def : Pat<(lsxsplatf32 FPR32:$fj),
+ (VREPLVEI_W (SUBREG_TO_REG (i64 0), FPR32:$fj, sub_32), 0)>;
+def : Pat<(lsxsplatf64 FPR64:$fj),
+ (VREPLVEI_D (SUBREG_TO_REG (i64 0), FPR64:$fj, sub_64), 0)>;
+
// Loads/Stores
foreach vt = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in {
defm : LdPat<load, VLD, vt>;
diff --git a/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll b/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll
new file mode 100644
index 000000000000000..6824ab5cda8d9d4
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll
@@ -0,0 +1,551 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
+
+define void @buildvector_v32i8_splat(ptr %dst, i8 %a0) nounwind {
+; CHECK-LABEL: buildvector_v32i8_splat:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvreplgr2vr.b $xr0, $a1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %insert = insertelement <32 x i8> undef, i8 %a0, i8 0
+ %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
+ store <32 x i8> %splat, ptr %dst
+ ret void
+}
+
+define void @buildvector_v16i16_splat(ptr %dst, i16 %a0) nounwind {
+; CHECK-LABEL: buildvector_v16i16_splat:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvreplgr2vr.h $xr0, $a1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %insert = insertelement <16 x i16> undef, i16 %a0, i8 0
+ %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
+ store <16 x i16> %splat, ptr %dst
+ ret void
+}
+
+define void @buildvector_v8i32_splat(ptr %dst, i32 %a0) nounwind {
+; CHECK-LABEL: buildvector_v8i32_splat:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvreplgr2vr.w $xr0, $a1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %insert = insertelement <8 x i32> undef, i32 %a0, i8 0
+ %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
+ store <8 x i32> %splat, ptr %dst
+ ret void
+}
+
+define void @buildvector_v4i64_splat(ptr %dst, i64 %a0) nounwind {
+; CHECK-LABEL: buildvector_v4i64_splat:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvreplgr2vr.d $xr0, $a1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %insert = insertelement <4 x i64> undef, i64 %a0, i8 0
+ %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
+ store <4 x i64> %splat, ptr %dst
+ ret void
+}
+
+define void @buildvector_v8f32_splat(ptr %dst, float %a0) nounwind {
+; CHECK-LABEL: buildvector_v8f32_splat:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: # kill: def $f0 killed $f0 def $xr0
+; CHECK-NEXT: xvrepl128vei.w $xr0, $xr0, 0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %insert = insertelement <8 x float> undef, float %a0, i8 0
+ %splat = shufflevector <8 x float> %insert, <8 x float> undef, <8 x i32> zeroinitializer
+ store <8 x float> %splat, ptr %dst
+ ret void
+}
+
+define void @buildvector_v4f64_splat(ptr %dst, double %a0) nounwind {
+; CHECK-LABEL: buildvector_v4f64_splat:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0
+; CHECK-NEXT: xvrepl128vei.d $xr0, $xr0, 0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ %insert = insertelement <4 x double> undef, double %a0, i8 0
+ %splat = shufflevector <4 x double> %insert, <4 x double> undef, <4 x i32> zeroinitializer
+ store <4 x double> %splat, ptr %dst
+ ret void
+}
+
+define void @buildvector_v32i8_const_splat(ptr %dst) nounwind {
+; CHECK-LABEL: buildvector_v32i8_const_splat:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvrepli.b $xr0, 1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+entry:
+ store <32 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/73917
More information about the llvm-commits
mailing list