[llvm] fde15cb - [LoongArch] Enable more vector tests for 32-bit target (#160656)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 25 06:08:45 PDT 2025
Author: hev
Date: 2025-09-25T21:08:39+08:00
New Revision: fde15cb3eeb68461c66f6b5b928c7093393496a0
URL: https://github.com/llvm/llvm-project/commit/fde15cb3eeb68461c66f6b5b928c7093393496a0
DIFF: https://github.com/llvm/llvm-project/commit/fde15cb3eeb68461c66f6b5b928c7093393496a0.diff
LOG: [LoongArch] Enable more vector tests for 32-bit target (#160656)
Added:
llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr-d-invalid-imm.ll
llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr-d.ll
llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl-ins-gr2vr-d.ll
llvm/test/CodeGen/LoongArch/lasx/intrinsic-replgr2vr-d.ll
llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr-d-invalid-imm.ll
llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr-d.ll
llvm/test/CodeGen/LoongArch/lsx/intrinsic-repl-ins-gr2vr-d.ll
llvm/test/CodeGen/LoongArch/lsx/intrinsic-replgr2vr-d.ll
Modified:
llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
llvm/test/CodeGen/LoongArch/lasx/bitreverse.ll
llvm/test/CodeGen/LoongArch/lasx/fpowi.ll
llvm/test/CodeGen/LoongArch/lasx/intrinsic-frecipe.ll
llvm/test/CodeGen/LoongArch/lasx/intrinsic-frsqrte.ll
llvm/test/CodeGen/LoongArch/lasx/intrinsic-max-invalid-imm.ll
llvm/test/CodeGen/LoongArch/lasx/intrinsic-min-invalid-imm.ll
llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr-invalid-imm.ll
llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr.ll
llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl-ins-gr2vr.ll
llvm/test/CodeGen/LoongArch/lasx/intrinsic-replgr2vr.ll
llvm/test/CodeGen/LoongArch/lasx/intrinsic-set.ll
llvm/test/CodeGen/LoongArch/lasx/intrinsic-setallnez.ll
llvm/test/CodeGen/LoongArch/lasx/intrinsic-setanyeqz.ll
llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll
llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insert-extract-element.ll
llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insert-extract-pair-elements.ll
llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insertelement.ll
llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvrepl128vei.ll
llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf.ll
llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf4i.ll
llvm/test/CodeGen/LoongArch/lasx/shuffle-as-permute-and-shuffle.ll
llvm/test/CodeGen/LoongArch/lasx/vec-reduce-add.ll
llvm/test/CodeGen/LoongArch/lasx/vec-reduce-and.ll
llvm/test/CodeGen/LoongArch/lasx/vec-reduce-or.ll
llvm/test/CodeGen/LoongArch/lasx/vec-reduce-smax.ll
llvm/test/CodeGen/LoongArch/lasx/vec-reduce-smin.ll
llvm/test/CodeGen/LoongArch/lasx/vec-reduce-umax.ll
llvm/test/CodeGen/LoongArch/lasx/vec-reduce-umin.ll
llvm/test/CodeGen/LoongArch/lasx/vec-reduce-xor.ll
llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-byte-rotate.ll
llvm/test/CodeGen/LoongArch/lasx/xvmskcond.ll
llvm/test/CodeGen/LoongArch/lsx/bitreverse.ll
llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcmp.ll
llvm/test/CodeGen/LoongArch/lsx/intrinsic-frecipe.ll
llvm/test/CodeGen/LoongArch/lsx/intrinsic-frsqrte.ll
llvm/test/CodeGen/LoongArch/lsx/intrinsic-max-invalid-imm.ll
llvm/test/CodeGen/LoongArch/lsx/intrinsic-min-invalid-imm.ll
llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr-invalid-imm.ll
llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr.ll
llvm/test/CodeGen/LoongArch/lsx/intrinsic-repl-ins-gr2vr.ll
llvm/test/CodeGen/LoongArch/lsx/intrinsic-replgr2vr.ll
llvm/test/CodeGen/LoongArch/lsx/intrinsic-set.ll
llvm/test/CodeGen/LoongArch/lsx/intrinsic-setallnez.ll
llvm/test/CodeGen/LoongArch/lsx/intrinsic-setanyeqz.ll
llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll
llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insertelement.ll
llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vreplvei.ll
llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf.ll
llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf4i.ll
llvm/test/CodeGen/LoongArch/lsx/vec-reduce-add.ll
llvm/test/CodeGen/LoongArch/lsx/vec-reduce-and.ll
llvm/test/CodeGen/LoongArch/lsx/vec-reduce-or.ll
llvm/test/CodeGen/LoongArch/lsx/vec-reduce-smax.ll
llvm/test/CodeGen/LoongArch/lsx/vec-reduce-smin.ll
llvm/test/CodeGen/LoongArch/lsx/vec-reduce-umax.ll
llvm/test/CodeGen/LoongArch/lsx/vec-reduce-umin.ll
llvm/test/CodeGen/LoongArch/lsx/vec-reduce-xor.ll
llvm/test/CodeGen/LoongArch/lsx/vec-sext.ll
llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-any-ext.ll
llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-rotate.ll
llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-shift.ll
llvm/test/CodeGen/LoongArch/lsx/vec-trunc.ll
llvm/test/CodeGen/LoongArch/lsx/vmskcond.ll
llvm/test/CodeGen/LoongArch/lsx/widen-shuffle-mask.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 32baa2d111270..801e557a22520 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -666,6 +666,7 @@ SDValue LoongArchTargetLowering::lowerVECREDUCE_ADD(SDValue Op,
unsigned NumEles = Val.getSimpleValueType().getVectorNumElements();
unsigned EleBits = Val.getSimpleValueType().getScalarSizeInBits();
+ unsigned ResBits = OpVT.getScalarSizeInBits();
unsigned LegalVecSize = 128;
bool isLASX256Vector =
@@ -691,10 +692,11 @@ SDValue LoongArchTargetLowering::lowerVECREDUCE_ADD(SDValue Op,
if (isLASX256Vector) {
SDValue Tmp = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, Val,
- DAG.getConstant(2, DL, MVT::i64));
+ DAG.getConstant(2, DL, Subtarget.getGRLenVT()));
Val = DAG.getNode(ISD::ADD, DL, MVT::v4i64, Tmp, Val);
}
+ Val = DAG.getBitcast(MVT::getVectorVT(OpVT, LegalVecSize / ResBits), Val);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Val,
DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
}
@@ -727,15 +729,16 @@ SDValue LoongArchTargetLowering::lowerVECREDUCE(SDValue Op,
unsigned Opcode = ISD::getVecReduceBaseOpcode(Op.getOpcode());
MVT VecTy = Val.getSimpleValueType();
+ MVT GRLenVT = Subtarget.getGRLenVT();
for (int i = NumEles; i > 1; i /= 2) {
- SDValue ShiftAmt = DAG.getConstant(i * EleBits / 16, DL, MVT::i64);
+ SDValue ShiftAmt = DAG.getConstant(i * EleBits / 16, DL, GRLenVT);
SDValue Tmp = DAG.getNode(LoongArchISD::VBSRL, DL, VecTy, Val, ShiftAmt);
Val = DAG.getNode(Opcode, DL, VecTy, Tmp, Val);
}
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Val,
- DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
+ DAG.getConstant(0, DL, GRLenVT));
}
SDValue LoongArchTargetLowering::lowerPREFETCH(SDValue Op,
@@ -1119,6 +1122,10 @@ SDValue LoongArchTargetLowering::lowerBITREVERSE(SDValue Op,
SDValue Src = Op->getOperand(0);
SDLoc DL(Op);
+ // LoongArchISD::BITREV_8B is not supported on LA32.
+ if (!Subtarget.is64Bit() && (ResTy == MVT::v16i8 || ResTy == MVT::v32i8))
+ return SDValue();
+
EVT NewVT = ResTy.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
unsigned int OrigEltNum = ResTy.getVectorNumElements();
unsigned int NewEltNum = NewVT.getVectorNumElements();
@@ -1128,7 +1135,7 @@ SDValue LoongArchTargetLowering::lowerBITREVERSE(SDValue Op,
SmallVector<SDValue, 8> Ops;
for (unsigned int i = 0; i < NewEltNum; i++) {
SDValue Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, NewSrc,
- DAG.getConstant(i, DL, MVT::i64));
+ DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
unsigned RevOp = (ResTy == MVT::v16i8 || ResTy == MVT::v32i8)
? (unsigned)LoongArchISD::BITREV_8B
: (unsigned)ISD::BITREVERSE;
@@ -1611,9 +1618,8 @@ lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
if (fitsRegularPattern<int>(Mask.begin(), 1, Mask.end(), SplatIndex, 0)) {
- APInt Imm(64, SplatIndex);
return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
- DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()));
+ DAG.getConstant(SplatIndex, DL, Subtarget.getGRLenVT()));
}
return SDValue();
@@ -1671,7 +1677,7 @@ lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
}
// Calculate the immediate. Replace any remaining undefs with zero
- APInt Imm(64, 0);
+ int Imm = 0;
for (int i = SubVecSize - 1; i >= 0; --i) {
int M = SubMask[i];
@@ -1946,11 +1952,12 @@ static SDValue lowerVECTOR_SHUFFLE_VPICKOD(const SDLoc &DL, ArrayRef<int> Mask,
/// adding it as an operand to the resulting VSHUF.
static SDValue lowerVECTOR_SHUFFLE_VSHUF(const SDLoc &DL, ArrayRef<int> Mask,
MVT VT, SDValue V1, SDValue V2,
- SelectionDAG &DAG) {
+ SelectionDAG &DAG,
+ const LoongArchSubtarget &Subtarget) {
SmallVector<SDValue, 16> Ops;
for (auto M : Mask)
- Ops.push_back(DAG.getConstant(M, DL, MVT::i64));
+ Ops.push_back(DAG.getSignedConstant(M, DL, Subtarget.getGRLenVT()));
EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops);
@@ -2030,7 +2037,8 @@ static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
return Result;
if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG))
return NewShuffle;
- if ((Result = lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG)))
+ if ((Result =
+ lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG, Subtarget)))
return Result;
return SDValue();
}
@@ -2088,7 +2096,8 @@ lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
/// Lower VECTOR_SHUFFLE into XVPERM (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVPERM(const SDLoc &DL, ArrayRef<int> Mask,
MVT VT, SDValue V1, SDValue V2,
- SelectionDAG &DAG) {
+ SelectionDAG &DAG,
+ const LoongArchSubtarget &Subtarget) {
// LoongArch LASX only have XVPERM_W.
if (Mask.size() != 8 || (VT != MVT::v8i32 && VT != MVT::v8f32))
return SDValue();
@@ -2119,9 +2128,10 @@ static SDValue lowerVECTOR_SHUFFLE_XVPERM(const SDLoc &DL, ArrayRef<int> Mask,
return SDValue();
SmallVector<SDValue, 8> Masks;
+ MVT GRLenVT = Subtarget.getGRLenVT();
for (unsigned i = 0; i < NumElts; ++i)
- Masks.push_back(Mask[i] == -1 ? DAG.getUNDEF(MVT::i64)
- : DAG.getConstant(Mask[i], DL, MVT::i64));
+ Masks.push_back(Mask[i] == -1 ? DAG.getUNDEF(GRLenVT)
+ : DAG.getConstant(Mask[i], DL, GRLenVT));
SDValue MaskVec = DAG.getBuildVector(MVT::v8i32, DL, Masks);
return DAG.getNode(LoongArchISD::XVPERM, DL, VT, V1, MaskVec);
@@ -2533,7 +2543,8 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, NewMask, VT, V1, V2, DAG,
Subtarget)))
return Result;
- if ((Result = lowerVECTOR_SHUFFLE_XVPERM(DL, NewMask, VT, V1, V2, DAG)))
+ if ((Result = lowerVECTOR_SHUFFLE_XVPERM(DL, NewMask, VT, V1, V2, DAG,
+ Subtarget)))
return Result;
if ((Result = lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(DL, NewMask, VT,
V1, V2, DAG)))
@@ -3102,12 +3113,33 @@ LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
return SDValue();
SDValue SplatElt = DAG.getSplatBuildVector(VT, DL, Op1);
- SDValue SplatIdx = DAG.getSplatBuildVector(IdxVTy, DL, Op2);
-
SmallVector<SDValue, 32> RawIndices;
- for (unsigned i = 0; i < NumElts; ++i)
- RawIndices.push_back(DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
- SDValue Indices = DAG.getBuildVector(IdxVTy, DL, RawIndices);
+ SDValue SplatIdx;
+ SDValue Indices;
+
+ if (!Subtarget.is64Bit() && IdxTy == MVT::i64) {
+ MVT PairVTy = MVT::getVectorVT(MVT::i32, NumElts * 2);
+ for (unsigned i = 0; i < NumElts; ++i) {
+ RawIndices.push_back(Op2);
+ RawIndices.push_back(DAG.getConstant(0, DL, MVT::i32));
+ }
+ SplatIdx = DAG.getBuildVector(PairVTy, DL, RawIndices);
+ SplatIdx = DAG.getBitcast(IdxVTy, SplatIdx);
+
+ RawIndices.clear();
+ for (unsigned i = 0; i < NumElts; ++i) {
+ RawIndices.push_back(DAG.getConstant(i, DL, MVT::i32));
+ RawIndices.push_back(DAG.getConstant(0, DL, MVT::i32));
+ }
+ Indices = DAG.getBuildVector(PairVTy, DL, RawIndices);
+ Indices = DAG.getBitcast(IdxVTy, Indices);
+ } else {
+ SplatIdx = DAG.getSplatBuildVector(IdxVTy, DL, Op2);
+
+ for (unsigned i = 0; i < NumElts; ++i)
+ RawIndices.push_back(DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
+ Indices = DAG.getBuildVector(IdxVTy, DL, RawIndices);
+ }
// insert vec, elt, idx
// =>
@@ -5129,7 +5161,7 @@ performSETCC_BITCASTCombine(SDNode *N, SelectionDAG &DAG,
if (Opc == ISD::DELETED_NODE)
return SDValue();
- SDValue V = DAG.getNode(Opc, DL, MVT::i64, Src.getOperand(0));
+ SDValue V = DAG.getNode(Opc, DL, Subtarget.getGRLenVT(), Src.getOperand(0));
EVT T = EVT::getIntegerVT(*DAG.getContext(), SrcVT.getVectorNumElements());
V = DAG.getZExtOrTrunc(V, DL, T);
return DAG.getBitcast(VT, V);
@@ -5142,6 +5174,7 @@ static SDValue performBITCASTCombine(SDNode *N, SelectionDAG &DAG,
EVT VT = N->getValueType(0);
SDValue Src = N->getOperand(0);
EVT SrcVT = Src.getValueType();
+ MVT GRLenVT = Subtarget.getGRLenVT();
if (!DCI.isBeforeLegalizeOps())
return SDValue();
@@ -5209,11 +5242,11 @@ static SDValue performBITCASTCombine(SDNode *N, SelectionDAG &DAG,
if (Src.getSimpleValueType() == MVT::v32i8) {
SDValue Lo, Hi;
std::tie(Lo, Hi) = DAG.SplitVector(Src, DL);
- Lo = DAG.getNode(LoongArchISD::VMSKLTZ, DL, MVT::i64, Lo);
- Hi = DAG.getNode(LoongArchISD::VMSKLTZ, DL, MVT::i64, Hi);
- Hi = DAG.getNode(ISD::SHL, DL, MVT::i64, Hi,
+ Lo = DAG.getNode(LoongArchISD::VMSKLTZ, DL, GRLenVT, Lo);
+ Hi = DAG.getNode(LoongArchISD::VMSKLTZ, DL, GRLenVT, Hi);
+ Hi = DAG.getNode(ISD::SHL, DL, GRLenVT, Hi,
DAG.getConstant(16, DL, MVT::i8));
- V = DAG.getNode(ISD::OR, DL, MVT::i64, Lo, Hi);
+ V = DAG.getNode(ISD::OR, DL, GRLenVT, Lo, Hi);
} else if (UseLASX) {
return SDValue();
}
@@ -5221,7 +5254,7 @@ static SDValue performBITCASTCombine(SDNode *N, SelectionDAG &DAG,
if (!V) {
Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
- V = DAG.getNode(Opc, DL, MVT::i64, Src);
+ V = DAG.getNode(Opc, DL, GRLenVT, Src);
}
EVT T = EVT::getIntegerVT(*DAG.getContext(), SrcVT.getVectorNumElements());
@@ -5878,6 +5911,22 @@ static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG) {
return DAG.getNode(ISD::XOR, DL, ResTy, Node->getOperand(1), BitImm);
}
+template <unsigned W>
+static SDValue lowerVectorPickVE2GR(SDNode *N, SelectionDAG &DAG,
+ unsigned ResOp) {
+ unsigned Imm = N->getConstantOperandVal(2);
+ if (!isUInt<W>(Imm)) {
+ const StringRef ErrorMsg = "argument out of range";
+ DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
+ return DAG.getUNDEF(N->getValueType(0));
+ }
+ SDLoc DL(N);
+ SDValue Vec = N->getOperand(1);
+ SDValue Idx = DAG.getConstant(Imm, DL, MVT::i32);
+ SDValue EltVT = DAG.getValueType(Vec.getValueType().getVectorElementType());
+ return DAG.getNode(ResOp, DL, N->getValueType(0), Vec, Idx, EltVT);
+}
+
static SDValue
performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
@@ -6367,6 +6416,68 @@ performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG,
N->getOperand(1),
DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
N->getOperand(2)));
+ case Intrinsic::loongarch_lsx_vpickve2gr_b:
+ if (!Subtarget.is64Bit())
+ return lowerVectorPickVE2GR<4>(N, DAG, LoongArchISD::VPICK_SEXT_ELT);
+ break;
+ case Intrinsic::loongarch_lsx_vpickve2gr_h:
+ case Intrinsic::loongarch_lasx_xvpickve2gr_w:
+ if (!Subtarget.is64Bit())
+ return lowerVectorPickVE2GR<3>(N, DAG, LoongArchISD::VPICK_SEXT_ELT);
+ break;
+ case Intrinsic::loongarch_lsx_vpickve2gr_w:
+ if (!Subtarget.is64Bit())
+ return lowerVectorPickVE2GR<2>(N, DAG, LoongArchISD::VPICK_SEXT_ELT);
+ break;
+ case Intrinsic::loongarch_lsx_vpickve2gr_bu:
+ if (!Subtarget.is64Bit())
+ return lowerVectorPickVE2GR<4>(N, DAG, LoongArchISD::VPICK_ZEXT_ELT);
+ break;
+ case Intrinsic::loongarch_lsx_vpickve2gr_hu:
+ case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
+ if (!Subtarget.is64Bit())
+ return lowerVectorPickVE2GR<3>(N, DAG, LoongArchISD::VPICK_ZEXT_ELT);
+ break;
+ case Intrinsic::loongarch_lsx_vpickve2gr_wu:
+ if (!Subtarget.is64Bit())
+ return lowerVectorPickVE2GR<2>(N, DAG, LoongArchISD::VPICK_ZEXT_ELT);
+ break;
+ case Intrinsic::loongarch_lsx_bz_b:
+ case Intrinsic::loongarch_lsx_bz_h:
+ case Intrinsic::loongarch_lsx_bz_w:
+ case Intrinsic::loongarch_lsx_bz_d:
+ case Intrinsic::loongarch_lasx_xbz_b:
+ case Intrinsic::loongarch_lasx_xbz_h:
+ case Intrinsic::loongarch_lasx_xbz_w:
+ case Intrinsic::loongarch_lasx_xbz_d:
+ if (!Subtarget.is64Bit())
+ return DAG.getNode(LoongArchISD::VALL_ZERO, DL, N->getValueType(0),
+ N->getOperand(1));
+ break;
+ case Intrinsic::loongarch_lsx_bz_v:
+ case Intrinsic::loongarch_lasx_xbz_v:
+ if (!Subtarget.is64Bit())
+ return DAG.getNode(LoongArchISD::VANY_ZERO, DL, N->getValueType(0),
+ N->getOperand(1));
+ break;
+ case Intrinsic::loongarch_lsx_bnz_b:
+ case Intrinsic::loongarch_lsx_bnz_h:
+ case Intrinsic::loongarch_lsx_bnz_w:
+ case Intrinsic::loongarch_lsx_bnz_d:
+ case Intrinsic::loongarch_lasx_xbnz_b:
+ case Intrinsic::loongarch_lasx_xbnz_h:
+ case Intrinsic::loongarch_lasx_xbnz_w:
+ case Intrinsic::loongarch_lasx_xbnz_d:
+ if (!Subtarget.is64Bit())
+ return DAG.getNode(LoongArchISD::VALL_NONZERO, DL, N->getValueType(0),
+ N->getOperand(1));
+ break;
+ case Intrinsic::loongarch_lsx_bnz_v:
+ case Intrinsic::loongarch_lasx_xbnz_v:
+ if (!Subtarget.is64Bit())
+ return DAG.getNode(LoongArchISD::VANY_NONZERO, DL, N->getValueType(0),
+ N->getOperand(1));
+ break;
}
return SDValue();
}
diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
index d99a57e562528..b0eb51a92c6c6 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
@@ -26,7 +26,7 @@ def SDT_LoongArchV1RUimm: SDTypeProfile<1, 2, [SDTCisVec<0>,
def SDT_LoongArchV2RUimm
: SDTypeProfile<1, 3,
[SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>,
- SDTCisVT<3, i64>]>;
+ SDTCisVT<3, GRLenVT>]>;
def SDT_LoongArchVreplgr2vr : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVec<0>, SDTCisInt<1>]>;
def SDT_LoongArchVFRECIPE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
def SDT_LoongArchVFRSQRTE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
@@ -1482,7 +1482,7 @@ multiclass VldreplPat<ValueType vt, LAInst Inst, Operand ImmOpnd> {
}
multiclass VstelmPat<PatFrag StoreOp, ValueType vt, LAInst Inst,
- Operand ImmOpnd, Operand IdxOpnd, ValueType elt = i64> {
+ Operand ImmOpnd, Operand IdxOpnd, ValueType elt = GRLenVT> {
def : Pat<(StoreOp(elt(vector_extract vt:$vd, IdxOpnd:$idx)), BaseAddr:$rj),
(Inst vt:$vd, BaseAddr:$rj, 0, IdxOpnd:$idx)>;
@@ -2110,8 +2110,8 @@ def : Pat<(GRLenVT (vector_extract v4i32:$vj, GRLenVT:$rk)),
(COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (VREPLVE_W v4i32:$vj, GRLenVT:$rk),
sub_32)),
GPR)>;
-def : Pat<(i64 (vector_extract v2i64:$vj, i64:$rk)),
- (COPY_TO_REGCLASS (f64 (EXTRACT_SUBREG (VREPLVE_D v2i64:$vj, i64:$rk),
+def : Pat<(GRLenVT (vector_extract v2i64:$vj, GRLenVT:$rk)),
+ (COPY_TO_REGCLASS (f64 (EXTRACT_SUBREG (VREPLVE_D v2i64:$vj, GRLenVT:$rk),
sub_64)),
GPR)>;
def : Pat<(f32 (vector_extract v4f32:$vj, GRLenVT:$rk)),
diff --git a/llvm/test/CodeGen/LoongArch/lasx/bitreverse.ll b/llvm/test/CodeGen/LoongArch/lasx/bitreverse.ll
index 87ee4ad025395..8b12216d0f856 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/bitreverse.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/bitreverse.ll
@@ -1,27 +1,46 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc --mtriple=loongarch32 -mattr=+32s,+lasx --verify-machineinstrs < %s \
+; RUN: | FileCheck %s --check-prefix=LA32
; RUN: llc --mtriple=loongarch64 -mattr=+lasx --verify-machineinstrs < %s \
-; RUN: | FileCheck %s
+; RUN: | FileCheck %s --check-prefix=LA64
declare <32 x i8> @llvm.bitreverse.v32i8(<32 x i8>)
define <32 x i8> @test_bitreverse_v32i8(<32 x i8> %a) nounwind {
-; CHECK-LABEL: test_bitreverse_v32i8:
-; CHECK: # %bb.0:
-; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 2
-; CHECK-NEXT: bitrev.8b $a0, $a0
-; CHECK-NEXT: vinsgr2vr.d $vr2, $a0, 0
-; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 3
-; CHECK-NEXT: bitrev.8b $a0, $a0
-; CHECK-NEXT: vinsgr2vr.d $vr2, $a0, 1
-; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 0
-; CHECK-NEXT: bitrev.8b $a0, $a0
-; CHECK-NEXT: vinsgr2vr.d $vr1, $a0, 0
-; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 1
-; CHECK-NEXT: bitrev.8b $a0, $a0
-; CHECK-NEXT: vinsgr2vr.d $vr1, $a0, 1
-; CHECK-NEXT: xvpermi.q $xr1, $xr2, 2
-; CHECK-NEXT: xvori.b $xr0, $xr1, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: test_bitreverse_v32i8:
+; LA32: # %bb.0:
+; LA32-NEXT: xvslli.b $xr1, $xr0, 4
+; LA32-NEXT: xvsrli.b $xr0, $xr0, 4
+; LA32-NEXT: xvor.v $xr0, $xr0, $xr1
+; LA32-NEXT: xvandi.b $xr1, $xr0, 51
+; LA32-NEXT: xvslli.b $xr1, $xr1, 2
+; LA32-NEXT: xvsrli.b $xr0, $xr0, 2
+; LA32-NEXT: xvandi.b $xr0, $xr0, 51
+; LA32-NEXT: xvor.v $xr0, $xr0, $xr1
+; LA32-NEXT: xvandi.b $xr1, $xr0, 85
+; LA32-NEXT: xvslli.b $xr1, $xr1, 1
+; LA32-NEXT: xvsrli.b $xr0, $xr0, 1
+; LA32-NEXT: xvandi.b $xr0, $xr0, 85
+; LA32-NEXT: xvor.v $xr0, $xr0, $xr1
+; LA32-NEXT: ret
+;
+; LA64-LABEL: test_bitreverse_v32i8:
+; LA64: # %bb.0:
+; LA64-NEXT: xvpickve2gr.d $a0, $xr0, 2
+; LA64-NEXT: bitrev.8b $a0, $a0
+; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 0
+; LA64-NEXT: xvpickve2gr.d $a0, $xr0, 3
+; LA64-NEXT: bitrev.8b $a0, $a0
+; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 1
+; LA64-NEXT: xvpickve2gr.d $a0, $xr0, 0
+; LA64-NEXT: bitrev.8b $a0, $a0
+; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 0
+; LA64-NEXT: xvpickve2gr.d $a0, $xr0, 1
+; LA64-NEXT: bitrev.8b $a0, $a0
+; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 1
+; LA64-NEXT: xvpermi.q $xr1, $xr2, 2
+; LA64-NEXT: xvori.b $xr0, $xr1, 0
+; LA64-NEXT: ret
%b = call <32 x i8> @llvm.bitreverse.v32i8(<32 x i8> %a)
ret <32 x i8> %b
}
@@ -29,23 +48,53 @@ define <32 x i8> @test_bitreverse_v32i8(<32 x i8> %a) nounwind {
declare <16 x i16> @llvm.bitreverse.v16i16(<16 x i16>)
define <16 x i16> @test_bitreverse_v16i16(<16 x i16> %a) nounwind {
-; CHECK-LABEL: test_bitreverse_v16i16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 2
-; CHECK-NEXT: bitrev.d $a0, $a0
-; CHECK-NEXT: vinsgr2vr.d $vr1, $a0, 0
-; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 3
-; CHECK-NEXT: bitrev.d $a0, $a0
-; CHECK-NEXT: vinsgr2vr.d $vr1, $a0, 1
-; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 0
-; CHECK-NEXT: bitrev.d $a0, $a0
-; CHECK-NEXT: vinsgr2vr.d $vr2, $a0, 0
-; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 1
-; CHECK-NEXT: bitrev.d $a0, $a0
-; CHECK-NEXT: vinsgr2vr.d $vr2, $a0, 1
-; CHECK-NEXT: xvpermi.q $xr2, $xr1, 2
-; CHECK-NEXT: xvshuf4i.h $xr0, $xr2, 27
-; CHECK-NEXT: ret
+; LA32-LABEL: test_bitreverse_v16i16:
+; LA32: # %bb.0:
+; LA32-NEXT: xvpickve2gr.w $a0, $xr0, 5
+; LA32-NEXT: bitrev.w $a0, $a0
+; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 0
+; LA32-NEXT: xvpickve2gr.w $a0, $xr0, 4
+; LA32-NEXT: bitrev.w $a0, $a0
+; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 1
+; LA32-NEXT: xvpickve2gr.w $a0, $xr0, 7
+; LA32-NEXT: bitrev.w $a0, $a0
+; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 2
+; LA32-NEXT: xvpickve2gr.w $a0, $xr0, 6
+; LA32-NEXT: bitrev.w $a0, $a0
+; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 3
+; LA32-NEXT: xvpickve2gr.w $a0, $xr0, 1
+; LA32-NEXT: bitrev.w $a0, $a0
+; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 0
+; LA32-NEXT: xvpickve2gr.w $a0, $xr0, 0
+; LA32-NEXT: bitrev.w $a0, $a0
+; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 1
+; LA32-NEXT: xvpickve2gr.w $a0, $xr0, 3
+; LA32-NEXT: bitrev.w $a0, $a0
+; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 2
+; LA32-NEXT: xvpickve2gr.w $a0, $xr0, 2
+; LA32-NEXT: bitrev.w $a0, $a0
+; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 3
+; LA32-NEXT: xvpermi.q $xr2, $xr1, 2
+; LA32-NEXT: xvshuf4i.h $xr0, $xr2, 27
+; LA32-NEXT: ret
+;
+; LA64-LABEL: test_bitreverse_v16i16:
+; LA64: # %bb.0:
+; LA64-NEXT: xvpickve2gr.d $a0, $xr0, 2
+; LA64-NEXT: bitrev.d $a0, $a0
+; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 0
+; LA64-NEXT: xvpickve2gr.d $a0, $xr0, 3
+; LA64-NEXT: bitrev.d $a0, $a0
+; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 1
+; LA64-NEXT: xvpickve2gr.d $a0, $xr0, 0
+; LA64-NEXT: bitrev.d $a0, $a0
+; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 0
+; LA64-NEXT: xvpickve2gr.d $a0, $xr0, 1
+; LA64-NEXT: bitrev.d $a0, $a0
+; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 1
+; LA64-NEXT: xvpermi.q $xr2, $xr1, 2
+; LA64-NEXT: xvshuf4i.h $xr0, $xr2, 27
+; LA64-NEXT: ret
%b = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a)
ret <16 x i16> %b
}
@@ -53,23 +102,53 @@ define <16 x i16> @test_bitreverse_v16i16(<16 x i16> %a) nounwind {
declare <8 x i32> @llvm.bitreverse.v8i32(<8 x i32>)
define <8 x i32> @test_bitreverse_v8i32(<8 x i32> %a) nounwind {
-; CHECK-LABEL: test_bitreverse_v8i32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 2
-; CHECK-NEXT: bitrev.d $a0, $a0
-; CHECK-NEXT: vinsgr2vr.d $vr1, $a0, 0
-; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 3
-; CHECK-NEXT: bitrev.d $a0, $a0
-; CHECK-NEXT: vinsgr2vr.d $vr1, $a0, 1
-; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 0
-; CHECK-NEXT: bitrev.d $a0, $a0
-; CHECK-NEXT: vinsgr2vr.d $vr2, $a0, 0
-; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 1
-; CHECK-NEXT: bitrev.d $a0, $a0
-; CHECK-NEXT: vinsgr2vr.d $vr2, $a0, 1
-; CHECK-NEXT: xvpermi.q $xr2, $xr1, 2
-; CHECK-NEXT: xvshuf4i.w $xr0, $xr2, 177
-; CHECK-NEXT: ret
+; LA32-LABEL: test_bitreverse_v8i32:
+; LA32: # %bb.0:
+; LA32-NEXT: xvpickve2gr.w $a0, $xr0, 4
+; LA32-NEXT: bitrev.w $a0, $a0
+; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 0
+; LA32-NEXT: xvpickve2gr.w $a0, $xr0, 5
+; LA32-NEXT: bitrev.w $a0, $a0
+; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 1
+; LA32-NEXT: xvpickve2gr.w $a0, $xr0, 6
+; LA32-NEXT: bitrev.w $a0, $a0
+; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 2
+; LA32-NEXT: xvpickve2gr.w $a0, $xr0, 7
+; LA32-NEXT: bitrev.w $a0, $a0
+; LA32-NEXT: vinsgr2vr.w $vr2, $a0, 3
+; LA32-NEXT: xvpickve2gr.w $a0, $xr0, 0
+; LA32-NEXT: bitrev.w $a0, $a0
+; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 0
+; LA32-NEXT: xvpickve2gr.w $a0, $xr0, 1
+; LA32-NEXT: bitrev.w $a0, $a0
+; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 1
+; LA32-NEXT: xvpickve2gr.w $a0, $xr0, 2
+; LA32-NEXT: bitrev.w $a0, $a0
+; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 2
+; LA32-NEXT: xvpickve2gr.w $a0, $xr0, 3
+; LA32-NEXT: bitrev.w $a0, $a0
+; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 3
+; LA32-NEXT: xvpermi.q $xr1, $xr2, 2
+; LA32-NEXT: xvori.b $xr0, $xr1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: test_bitreverse_v8i32:
+; LA64: # %bb.0:
+; LA64-NEXT: xvpickve2gr.d $a0, $xr0, 2
+; LA64-NEXT: bitrev.d $a0, $a0
+; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 0
+; LA64-NEXT: xvpickve2gr.d $a0, $xr0, 3
+; LA64-NEXT: bitrev.d $a0, $a0
+; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 1
+; LA64-NEXT: xvpickve2gr.d $a0, $xr0, 0
+; LA64-NEXT: bitrev.d $a0, $a0
+; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 0
+; LA64-NEXT: xvpickve2gr.d $a0, $xr0, 1
+; LA64-NEXT: bitrev.d $a0, $a0
+; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 1
+; LA64-NEXT: xvpermi.q $xr2, $xr1, 2
+; LA64-NEXT: xvshuf4i.w $xr0, $xr2, 177
+; LA64-NEXT: ret
%b = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a)
ret <8 x i32> %b
}
@@ -77,23 +156,43 @@ define <8 x i32> @test_bitreverse_v8i32(<8 x i32> %a) nounwind {
declare <4 x i64> @llvm.bitreverse.v4i64(<4 x i64>)
define <4 x i64> @test_bitreverse_v4i64(<4 x i64> %a) nounwind {
-; CHECK-LABEL: test_bitreverse_v4i64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 2
-; CHECK-NEXT: bitrev.d $a0, $a0
-; CHECK-NEXT: vinsgr2vr.d $vr2, $a0, 0
-; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 3
-; CHECK-NEXT: bitrev.d $a0, $a0
-; CHECK-NEXT: vinsgr2vr.d $vr2, $a0, 1
-; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 0
-; CHECK-NEXT: bitrev.d $a0, $a0
-; CHECK-NEXT: vinsgr2vr.d $vr1, $a0, 0
-; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 1
-; CHECK-NEXT: bitrev.d $a0, $a0
-; CHECK-NEXT: vinsgr2vr.d $vr1, $a0, 1
-; CHECK-NEXT: xvpermi.q $xr1, $xr2, 2
-; CHECK-NEXT: xvori.b $xr0, $xr1, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: test_bitreverse_v4i64:
+; LA32: # %bb.0:
+; LA32-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0)
+; LA32-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI3_0)
+; LA32-NEXT: xvshuf.b $xr0, $xr0, $xr0, $xr1
+; LA32-NEXT: xvslli.b $xr1, $xr0, 4
+; LA32-NEXT: xvsrli.b $xr0, $xr0, 4
+; LA32-NEXT: xvor.v $xr0, $xr0, $xr1
+; LA32-NEXT: xvandi.b $xr1, $xr0, 51
+; LA32-NEXT: xvslli.b $xr1, $xr1, 2
+; LA32-NEXT: xvsrli.b $xr0, $xr0, 2
+; LA32-NEXT: xvandi.b $xr0, $xr0, 51
+; LA32-NEXT: xvor.v $xr0, $xr0, $xr1
+; LA32-NEXT: xvandi.b $xr1, $xr0, 85
+; LA32-NEXT: xvslli.b $xr1, $xr1, 1
+; LA32-NEXT: xvsrli.b $xr0, $xr0, 1
+; LA32-NEXT: xvandi.b $xr0, $xr0, 85
+; LA32-NEXT: xvor.v $xr0, $xr0, $xr1
+; LA32-NEXT: ret
+;
+; LA64-LABEL: test_bitreverse_v4i64:
+; LA64: # %bb.0:
+; LA64-NEXT: xvpickve2gr.d $a0, $xr0, 2
+; LA64-NEXT: bitrev.d $a0, $a0
+; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 0
+; LA64-NEXT: xvpickve2gr.d $a0, $xr0, 3
+; LA64-NEXT: bitrev.d $a0, $a0
+; LA64-NEXT: vinsgr2vr.d $vr2, $a0, 1
+; LA64-NEXT: xvpickve2gr.d $a0, $xr0, 0
+; LA64-NEXT: bitrev.d $a0, $a0
+; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 0
+; LA64-NEXT: xvpickve2gr.d $a0, $xr0, 1
+; LA64-NEXT: bitrev.d $a0, $a0
+; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 1
+; LA64-NEXT: xvpermi.q $xr1, $xr2, 2
+; LA64-NEXT: xvori.b $xr0, $xr1, 0
+; LA64-NEXT: ret
%b = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a)
ret <4 x i64> %b
}
diff --git a/llvm/test/CodeGen/LoongArch/lasx/fpowi.ll b/llvm/test/CodeGen/LoongArch/lasx/fpowi.ll
index 156c829c2dfb6..45b25013c9173 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/fpowi.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/fpowi.ll
@@ -1,97 +1,178 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
+; RUN: llc --mtriple=loongarch32 --mattr=+lasx < %s | FileCheck %s --check-prefix=LA32
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefix=LA64
declare <8 x float> @llvm.powi.v8f32.i32(<8 x float>, i32)
define <8 x float> @powi_v8f32(<8 x float> %va, i32 %b) nounwind {
-; CHECK-LABEL: powi_v8f32:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: addi.d $sp, $sp, -128
-; CHECK-NEXT: st.d $ra, $sp, 120 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 112 # 8-byte Folded Spill
-; CHECK-NEXT: xvst $xr0, $sp, 80 # 32-byte Folded Spill
-; CHECK-NEXT: addi.w $fp, $a0, 0
-; CHECK-NEXT: xvpickve.w $xr0, $xr0, 5
-; CHECK-NEXT: # kill: def $f0 killed $f0 killed $xr0
-; CHECK-NEXT: move $a0, $fp
-; CHECK-NEXT: pcaddu18i $ra, %call36(__powisf2)
-; CHECK-NEXT: jirl $ra, $ra, 0
-; CHECK-NEXT: # kill: def $f0 killed $f0 def $vr0
-; CHECK-NEXT: vst $vr0, $sp, 48 # 16-byte Folded Spill
-; CHECK-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
-; CHECK-NEXT: xvpickve.w $xr0, $xr0, 4
-; CHECK-NEXT: # kill: def $f0 killed $f0 killed $xr0
-; CHECK-NEXT: move $a0, $fp
-; CHECK-NEXT: pcaddu18i $ra, %call36(__powisf2)
-; CHECK-NEXT: jirl $ra, $ra, 0
-; CHECK-NEXT: # kill: def $f0 killed $f0 def $xr0
-; CHECK-NEXT: vld $vr1, $sp, 48 # 16-byte Folded Reload
-; CHECK-NEXT: vextrins.w $vr0, $vr1, 16
-; CHECK-NEXT: xvst $xr0, $sp, 48 # 32-byte Folded Spill
-; CHECK-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
-; CHECK-NEXT: xvpickve.w $xr0, $xr0, 6
-; CHECK-NEXT: # kill: def $f0 killed $f0 killed $xr0
-; CHECK-NEXT: move $a0, $fp
-; CHECK-NEXT: pcaddu18i $ra, %call36(__powisf2)
-; CHECK-NEXT: jirl $ra, $ra, 0
-; CHECK-NEXT: # kill: def $f0 killed $f0 def $vr0
-; CHECK-NEXT: xvld $xr1, $sp, 48 # 32-byte Folded Reload
-; CHECK-NEXT: vextrins.w $vr1, $vr0, 32
-; CHECK-NEXT: xvst $xr1, $sp, 48 # 32-byte Folded Spill
-; CHECK-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
-; CHECK-NEXT: xvpickve.w $xr0, $xr0, 7
-; CHECK-NEXT: # kill: def $f0 killed $f0 killed $xr0
-; CHECK-NEXT: move $a0, $fp
-; CHECK-NEXT: pcaddu18i $ra, %call36(__powisf2)
-; CHECK-NEXT: jirl $ra, $ra, 0
-; CHECK-NEXT: # kill: def $f0 killed $f0 def $vr0
-; CHECK-NEXT: xvld $xr1, $sp, 48 # 32-byte Folded Reload
-; CHECK-NEXT: vextrins.w $vr1, $vr0, 48
-; CHECK-NEXT: xvst $xr1, $sp, 48 # 32-byte Folded Spill
-; CHECK-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
-; CHECK-NEXT: xvpickve.w $xr0, $xr0, 1
-; CHECK-NEXT: # kill: def $f0 killed $f0 killed $xr0
-; CHECK-NEXT: move $a0, $fp
-; CHECK-NEXT: pcaddu18i $ra, %call36(__powisf2)
-; CHECK-NEXT: jirl $ra, $ra, 0
-; CHECK-NEXT: # kill: def $f0 killed $f0 def $vr0
-; CHECK-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill
-; CHECK-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
-; CHECK-NEXT: xvpickve.w $xr0, $xr0, 0
-; CHECK-NEXT: # kill: def $f0 killed $f0 killed $xr0
-; CHECK-NEXT: move $a0, $fp
-; CHECK-NEXT: pcaddu18i $ra, %call36(__powisf2)
-; CHECK-NEXT: jirl $ra, $ra, 0
-; CHECK-NEXT: # kill: def $f0 killed $f0 def $xr0
-; CHECK-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload
-; CHECK-NEXT: vextrins.w $vr0, $vr1, 16
-; CHECK-NEXT: xvst $xr0, $sp, 16 # 32-byte Folded Spill
-; CHECK-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
-; CHECK-NEXT: xvpickve.w $xr0, $xr0, 2
-; CHECK-NEXT: # kill: def $f0 killed $f0 killed $xr0
-; CHECK-NEXT: move $a0, $fp
-; CHECK-NEXT: pcaddu18i $ra, %call36(__powisf2)
-; CHECK-NEXT: jirl $ra, $ra, 0
-; CHECK-NEXT: # kill: def $f0 killed $f0 def $vr0
-; CHECK-NEXT: xvld $xr1, $sp, 16 # 32-byte Folded Reload
-; CHECK-NEXT: vextrins.w $vr1, $vr0, 32
-; CHECK-NEXT: xvst $xr1, $sp, 16 # 32-byte Folded Spill
-; CHECK-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
-; CHECK-NEXT: xvpickve.w $xr0, $xr0, 3
-; CHECK-NEXT: # kill: def $f0 killed $f0 killed $xr0
-; CHECK-NEXT: move $a0, $fp
-; CHECK-NEXT: pcaddu18i $ra, %call36(__powisf2)
-; CHECK-NEXT: jirl $ra, $ra, 0
-; CHECK-NEXT: # kill: def $f0 killed $f0 def $vr0
-; CHECK-NEXT: xvld $xr1, $sp, 16 # 32-byte Folded Reload
-; CHECK-NEXT: vextrins.w $vr1, $vr0, 48
-; CHECK-NEXT: xvld $xr0, $sp, 48 # 32-byte Folded Reload
-; CHECK-NEXT: xvpermi.q $xr1, $xr0, 2
-; CHECK-NEXT: xvori.b $xr0, $xr1, 0
-; CHECK-NEXT: ld.d $fp, $sp, 112 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 120 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 128
-; CHECK-NEXT: ret
+; LA32-LABEL: powi_v8f32:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: addi.w $sp, $sp, -128
+; LA32-NEXT: st.w $ra, $sp, 124 # 4-byte Folded Spill
+; LA32-NEXT: st.w $fp, $sp, 120 # 4-byte Folded Spill
+; LA32-NEXT: move $fp, $a0
+; LA32-NEXT: xvst $xr0, $sp, 80 # 32-byte Folded Spill
+; LA32-NEXT: xvpickve.w $xr0, $xr0, 5
+; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0
+; LA32-NEXT: bl __powisf2
+; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0
+; LA32-NEXT: vst $vr0, $sp, 48 # 16-byte Folded Spill
+; LA32-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
+; LA32-NEXT: xvpickve.w $xr0, $xr0, 4
+; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0
+; LA32-NEXT: move $a0, $fp
+; LA32-NEXT: bl __powisf2
+; LA32-NEXT: # kill: def $f0 killed $f0 def $xr0
+; LA32-NEXT: vld $vr1, $sp, 48 # 16-byte Folded Reload
+; LA32-NEXT: vextrins.w $vr0, $vr1, 16
+; LA32-NEXT: xvst $xr0, $sp, 48 # 32-byte Folded Spill
+; LA32-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
+; LA32-NEXT: xvpickve.w $xr0, $xr0, 6
+; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0
+; LA32-NEXT: move $a0, $fp
+; LA32-NEXT: bl __powisf2
+; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0
+; LA32-NEXT: xvld $xr1, $sp, 48 # 32-byte Folded Reload
+; LA32-NEXT: vextrins.w $vr1, $vr0, 32
+; LA32-NEXT: xvst $xr1, $sp, 48 # 32-byte Folded Spill
+; LA32-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
+; LA32-NEXT: xvpickve.w $xr0, $xr0, 7
+; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0
+; LA32-NEXT: move $a0, $fp
+; LA32-NEXT: bl __powisf2
+; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0
+; LA32-NEXT: xvld $xr1, $sp, 48 # 32-byte Folded Reload
+; LA32-NEXT: vextrins.w $vr1, $vr0, 48
+; LA32-NEXT: xvst $xr1, $sp, 48 # 32-byte Folded Spill
+; LA32-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
+; LA32-NEXT: xvpickve.w $xr0, $xr0, 1
+; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0
+; LA32-NEXT: move $a0, $fp
+; LA32-NEXT: bl __powisf2
+; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0
+; LA32-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill
+; LA32-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
+; LA32-NEXT: xvpickve.w $xr0, $xr0, 0
+; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0
+; LA32-NEXT: move $a0, $fp
+; LA32-NEXT: bl __powisf2
+; LA32-NEXT: # kill: def $f0 killed $f0 def $xr0
+; LA32-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload
+; LA32-NEXT: vextrins.w $vr0, $vr1, 16
+; LA32-NEXT: xvst $xr0, $sp, 16 # 32-byte Folded Spill
+; LA32-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
+; LA32-NEXT: xvpickve.w $xr0, $xr0, 2
+; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0
+; LA32-NEXT: move $a0, $fp
+; LA32-NEXT: bl __powisf2
+; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0
+; LA32-NEXT: xvld $xr1, $sp, 16 # 32-byte Folded Reload
+; LA32-NEXT: vextrins.w $vr1, $vr0, 32
+; LA32-NEXT: xvst $xr1, $sp, 16 # 32-byte Folded Spill
+; LA32-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
+; LA32-NEXT: xvpickve.w $xr0, $xr0, 3
+; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0
+; LA32-NEXT: move $a0, $fp
+; LA32-NEXT: bl __powisf2
+; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0
+; LA32-NEXT: xvld $xr1, $sp, 16 # 32-byte Folded Reload
+; LA32-NEXT: vextrins.w $vr1, $vr0, 48
+; LA32-NEXT: xvld $xr0, $sp, 48 # 32-byte Folded Reload
+; LA32-NEXT: xvpermi.q $xr1, $xr0, 2
+; LA32-NEXT: xvori.b $xr0, $xr1, 0
+; LA32-NEXT: ld.w $fp, $sp, 120 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $ra, $sp, 124 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 128
+; LA32-NEXT: ret
+;
+; LA64-LABEL: powi_v8f32:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: addi.d $sp, $sp, -128
+; LA64-NEXT: st.d $ra, $sp, 120 # 8-byte Folded Spill
+; LA64-NEXT: st.d $fp, $sp, 112 # 8-byte Folded Spill
+; LA64-NEXT: xvst $xr0, $sp, 80 # 32-byte Folded Spill
+; LA64-NEXT: addi.w $fp, $a0, 0
+; LA64-NEXT: xvpickve.w $xr0, $xr0, 5
+; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0
+; LA64-NEXT: move $a0, $fp
+; LA64-NEXT: pcaddu18i $ra, %call36(__powisf2)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0
+; LA64-NEXT: vst $vr0, $sp, 48 # 16-byte Folded Spill
+; LA64-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
+; LA64-NEXT: xvpickve.w $xr0, $xr0, 4
+; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0
+; LA64-NEXT: move $a0, $fp
+; LA64-NEXT: pcaddu18i $ra, %call36(__powisf2)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: # kill: def $f0 killed $f0 def $xr0
+; LA64-NEXT: vld $vr1, $sp, 48 # 16-byte Folded Reload
+; LA64-NEXT: vextrins.w $vr0, $vr1, 16
+; LA64-NEXT: xvst $xr0, $sp, 48 # 32-byte Folded Spill
+; LA64-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
+; LA64-NEXT: xvpickve.w $xr0, $xr0, 6
+; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0
+; LA64-NEXT: move $a0, $fp
+; LA64-NEXT: pcaddu18i $ra, %call36(__powisf2)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0
+; LA64-NEXT: xvld $xr1, $sp, 48 # 32-byte Folded Reload
+; LA64-NEXT: vextrins.w $vr1, $vr0, 32
+; LA64-NEXT: xvst $xr1, $sp, 48 # 32-byte Folded Spill
+; LA64-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
+; LA64-NEXT: xvpickve.w $xr0, $xr0, 7
+; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0
+; LA64-NEXT: move $a0, $fp
+; LA64-NEXT: pcaddu18i $ra, %call36(__powisf2)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0
+; LA64-NEXT: xvld $xr1, $sp, 48 # 32-byte Folded Reload
+; LA64-NEXT: vextrins.w $vr1, $vr0, 48
+; LA64-NEXT: xvst $xr1, $sp, 48 # 32-byte Folded Spill
+; LA64-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
+; LA64-NEXT: xvpickve.w $xr0, $xr0, 1
+; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0
+; LA64-NEXT: move $a0, $fp
+; LA64-NEXT: pcaddu18i $ra, %call36(__powisf2)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0
+; LA64-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill
+; LA64-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
+; LA64-NEXT: xvpickve.w $xr0, $xr0, 0
+; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0
+; LA64-NEXT: move $a0, $fp
+; LA64-NEXT: pcaddu18i $ra, %call36(__powisf2)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: # kill: def $f0 killed $f0 def $xr0
+; LA64-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload
+; LA64-NEXT: vextrins.w $vr0, $vr1, 16
+; LA64-NEXT: xvst $xr0, $sp, 16 # 32-byte Folded Spill
+; LA64-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
+; LA64-NEXT: xvpickve.w $xr0, $xr0, 2
+; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0
+; LA64-NEXT: move $a0, $fp
+; LA64-NEXT: pcaddu18i $ra, %call36(__powisf2)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0
+; LA64-NEXT: xvld $xr1, $sp, 16 # 32-byte Folded Reload
+; LA64-NEXT: vextrins.w $vr1, $vr0, 32
+; LA64-NEXT: xvst $xr1, $sp, 16 # 32-byte Folded Spill
+; LA64-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload
+; LA64-NEXT: xvpickve.w $xr0, $xr0, 3
+; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0
+; LA64-NEXT: move $a0, $fp
+; LA64-NEXT: pcaddu18i $ra, %call36(__powisf2)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0
+; LA64-NEXT: xvld $xr1, $sp, 16 # 32-byte Folded Reload
+; LA64-NEXT: vextrins.w $vr1, $vr0, 48
+; LA64-NEXT: xvld $xr0, $sp, 48 # 32-byte Folded Reload
+; LA64-NEXT: xvpermi.q $xr1, $xr0, 2
+; LA64-NEXT: xvori.b $xr0, $xr1, 0
+; LA64-NEXT: ld.d $fp, $sp, 112 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $ra, $sp, 120 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 128
+; LA64-NEXT: ret
entry:
%res = call <8 x float> @llvm.powi.v8f32.i32(<8 x float> %va, i32 %b)
ret <8 x float> %res
@@ -100,53 +181,96 @@ entry:
declare <4 x double> @llvm.powi.v4f64.i32(<4 x double>, i32)
define <4 x double> @powi_v4f64(<4 x double> %va, i32 %b) nounwind {
-; CHECK-LABEL: powi_v4f64:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: addi.d $sp, $sp, -112
-; CHECK-NEXT: st.d $ra, $sp, 104 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 96 # 8-byte Folded Spill
-; CHECK-NEXT: xvst $xr0, $sp, 64 # 32-byte Folded Spill
-; CHECK-NEXT: addi.w $fp, $a0, 0
-; CHECK-NEXT: xvpickve.d $xr0, $xr0, 3
-; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0
-; CHECK-NEXT: move $a0, $fp
-; CHECK-NEXT: pcaddu18i $ra, %call36(__powidf2)
-; CHECK-NEXT: jirl $ra, $ra, 0
-; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0
-; CHECK-NEXT: vst $vr0, $sp, 32 # 16-byte Folded Spill
-; CHECK-NEXT: xvld $xr0, $sp, 64 # 32-byte Folded Reload
-; CHECK-NEXT: xvpickve.d $xr0, $xr0, 2
-; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0
-; CHECK-NEXT: move $a0, $fp
-; CHECK-NEXT: pcaddu18i $ra, %call36(__powidf2)
-; CHECK-NEXT: jirl $ra, $ra, 0
-; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0
-; CHECK-NEXT: vld $vr1, $sp, 32 # 16-byte Folded Reload
-; CHECK-NEXT: vextrins.d $vr0, $vr1, 16
-; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill
-; CHECK-NEXT: xvld $xr0, $sp, 64 # 32-byte Folded Reload
-; CHECK-NEXT: xvpickve.d $xr0, $xr0, 1
-; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0
-; CHECK-NEXT: move $a0, $fp
-; CHECK-NEXT: pcaddu18i $ra, %call36(__powidf2)
-; CHECK-NEXT: jirl $ra, $ra, 0
-; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0
-; CHECK-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill
-; CHECK-NEXT: xvld $xr0, $sp, 64 # 32-byte Folded Reload
-; CHECK-NEXT: xvpickve.d $xr0, $xr0, 0
-; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0
-; CHECK-NEXT: move $a0, $fp
-; CHECK-NEXT: pcaddu18i $ra, %call36(__powidf2)
-; CHECK-NEXT: jirl $ra, $ra, 0
-; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0
-; CHECK-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload
-; CHECK-NEXT: vextrins.d $vr0, $vr1, 16
-; CHECK-NEXT: xvld $xr1, $sp, 32 # 32-byte Folded Reload
-; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
-; CHECK-NEXT: ld.d $fp, $sp, 96 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 104 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 112
-; CHECK-NEXT: ret
+; LA32-LABEL: powi_v4f64:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: addi.w $sp, $sp, -112
+; LA32-NEXT: st.w $ra, $sp, 108 # 4-byte Folded Spill
+; LA32-NEXT: st.w $fp, $sp, 104 # 4-byte Folded Spill
+; LA32-NEXT: move $fp, $a0
+; LA32-NEXT: xvst $xr0, $sp, 64 # 32-byte Folded Spill
+; LA32-NEXT: xvpickve.d $xr0, $xr0, 3
+; LA32-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0
+; LA32-NEXT: bl __powidf2
+; LA32-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0
+; LA32-NEXT: vst $vr0, $sp, 32 # 16-byte Folded Spill
+; LA32-NEXT: xvld $xr0, $sp, 64 # 32-byte Folded Reload
+; LA32-NEXT: xvpickve.d $xr0, $xr0, 2
+; LA32-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0
+; LA32-NEXT: move $a0, $fp
+; LA32-NEXT: bl __powidf2
+; LA32-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0
+; LA32-NEXT: vld $vr1, $sp, 32 # 16-byte Folded Reload
+; LA32-NEXT: vextrins.d $vr0, $vr1, 16
+; LA32-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill
+; LA32-NEXT: xvld $xr0, $sp, 64 # 32-byte Folded Reload
+; LA32-NEXT: xvpickve.d $xr0, $xr0, 1
+; LA32-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0
+; LA32-NEXT: move $a0, $fp
+; LA32-NEXT: bl __powidf2
+; LA32-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0
+; LA32-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill
+; LA32-NEXT: xvld $xr0, $sp, 64 # 32-byte Folded Reload
+; LA32-NEXT: xvpickve.d $xr0, $xr0, 0
+; LA32-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0
+; LA32-NEXT: move $a0, $fp
+; LA32-NEXT: bl __powidf2
+; LA32-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0
+; LA32-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload
+; LA32-NEXT: vextrins.d $vr0, $vr1, 16
+; LA32-NEXT: xvld $xr1, $sp, 32 # 32-byte Folded Reload
+; LA32-NEXT: xvpermi.q $xr0, $xr1, 2
+; LA32-NEXT: ld.w $fp, $sp, 104 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $ra, $sp, 108 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 112
+; LA32-NEXT: ret
+;
+; LA64-LABEL: powi_v4f64:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: addi.d $sp, $sp, -112
+; LA64-NEXT: st.d $ra, $sp, 104 # 8-byte Folded Spill
+; LA64-NEXT: st.d $fp, $sp, 96 # 8-byte Folded Spill
+; LA64-NEXT: xvst $xr0, $sp, 64 # 32-byte Folded Spill
+; LA64-NEXT: addi.w $fp, $a0, 0
+; LA64-NEXT: xvpickve.d $xr0, $xr0, 3
+; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0
+; LA64-NEXT: move $a0, $fp
+; LA64-NEXT: pcaddu18i $ra, %call36(__powidf2)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0
+; LA64-NEXT: vst $vr0, $sp, 32 # 16-byte Folded Spill
+; LA64-NEXT: xvld $xr0, $sp, 64 # 32-byte Folded Reload
+; LA64-NEXT: xvpickve.d $xr0, $xr0, 2
+; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0
+; LA64-NEXT: move $a0, $fp
+; LA64-NEXT: pcaddu18i $ra, %call36(__powidf2)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0
+; LA64-NEXT: vld $vr1, $sp, 32 # 16-byte Folded Reload
+; LA64-NEXT: vextrins.d $vr0, $vr1, 16
+; LA64-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill
+; LA64-NEXT: xvld $xr0, $sp, 64 # 32-byte Folded Reload
+; LA64-NEXT: xvpickve.d $xr0, $xr0, 1
+; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0
+; LA64-NEXT: move $a0, $fp
+; LA64-NEXT: pcaddu18i $ra, %call36(__powidf2)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0
+; LA64-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill
+; LA64-NEXT: xvld $xr0, $sp, 64 # 32-byte Folded Reload
+; LA64-NEXT: xvpickve.d $xr0, $xr0, 0
+; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0
+; LA64-NEXT: move $a0, $fp
+; LA64-NEXT: pcaddu18i $ra, %call36(__powidf2)
+; LA64-NEXT: jirl $ra, $ra, 0
+; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0
+; LA64-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload
+; LA64-NEXT: vextrins.d $vr0, $vr1, 16
+; LA64-NEXT: xvld $xr1, $sp, 32 # 32-byte Folded Reload
+; LA64-NEXT: xvpermi.q $xr0, $xr1, 2
+; LA64-NEXT: ld.d $fp, $sp, 96 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $ra, $sp, 104 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 112
+; LA64-NEXT: ret
entry:
%res = call <4 x double> @llvm.powi.v4f64.i32(<4 x double> %va, i32 %b)
ret <4 x double> %res
diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frecipe.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frecipe.ll
index 215436823af83..623a6de1bc402 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frecipe.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frecipe.ll
@@ -1,4 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx,+frecipe < %s | FileCheck %s
; RUN: llc --mtriple=loongarch64 --mattr=+lasx,+frecipe < %s | FileCheck %s
declare <8 x float> @llvm.loongarch.lasx.xvfrecipe.s(<8 x float>)
diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frsqrte.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frsqrte.ll
index ad36c3aa5c29d..743ab10cc9b00 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frsqrte.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frsqrte.ll
@@ -1,4 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx,+frecipe < %s | FileCheck %s
; RUN: llc --mtriple=loongarch64 --mattr=+lasx,+frecipe < %s | FileCheck %s
declare <8 x float> @llvm.loongarch.lasx.xvfrsqrte.s(<8 x float>)
diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-max-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-max-invalid-imm.ll
index a671e9979b2fe..e6688bacd3bf9 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-max-invalid-imm.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-max-invalid-imm.ll
@@ -1,3 +1,4 @@
+; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s
; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s
declare <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8>, i32)
diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-min-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-min-invalid-imm.ll
index 5ed4104c295fa..cfe9ec575222a 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-min-invalid-imm.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-min-invalid-imm.ll
@@ -1,3 +1,4 @@
+; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s
; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s
declare <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8>, i32)
diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr-d-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr-d-invalid-imm.ll
new file mode 100644
index 0000000000000..5a5af4356f714
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr-d-invalid-imm.ll
@@ -0,0 +1,33 @@
+; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s
+
+declare i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64>, i32)
+
+define i64 @lasx_xvpickve2gr_d_lo(<4 x i64> %va) nounwind {
+; CHECK: llvm.loongarch.lasx.xvpickve2gr.d: argument out of range
+entry:
+ %res = call i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64> %va, i32 -1)
+ ret i64 %res
+}
+
+define i64 @lasx_xvpickve2gr_d_hi(<4 x i64> %va) nounwind {
+; CHECK: llvm.loongarch.lasx.xvpickve2gr.d: argument out of range
+entry:
+ %res = call i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64> %va, i32 4)
+ ret i64 %res
+}
+
+declare i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64>, i32)
+
+define i64 @lasx_xvpickve2gr_du_lo(<4 x i64> %va) nounwind {
+; CHECK: llvm.loongarch.lasx.xvpickve2gr.du: argument out of range
+entry:
+ %res = call i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64> %va, i32 -1)
+ ret i64 %res
+}
+
+define i64 @lasx_xvpickve2gr_du_hi(<4 x i64> %va) nounwind {
+; CHECK: llvm.loongarch.lasx.xvpickve2gr.du: argument out of range
+entry:
+ %res = call i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64> %va, i32 4)
+ ret i64 %res
+}
diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr-d.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr-d.ll
new file mode 100644
index 0000000000000..178dd92cbdb80
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr-d.ll
@@ -0,0 +1,26 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
+
+declare i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64>, i32)
+
+define i64 @lasx_xvpickve2gr_d(<4 x i64> %va) nounwind {
+; CHECK-LABEL: lasx_xvpickve2gr_d:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 1
+; CHECK-NEXT: ret
+entry:
+ %res = call i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64> %va, i32 1)
+ ret i64 %res
+}
+
+declare i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64>, i32)
+
+define i64 @lasx_xvpickve2gr_du(<4 x i64> %va) nounwind {
+; CHECK-LABEL: lasx_xvpickve2gr_du:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvpickve2gr.du $a0, $xr0, 1
+; CHECK-NEXT: ret
+entry:
+ %res = call i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64> %va, i32 1)
+ ret i64 %res
+}
diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr-invalid-imm.ll
index 93056b272dfc5..0c91b56387f79 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr-invalid-imm.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr-invalid-imm.ll
@@ -1,3 +1,4 @@
+; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s 2>&1 | FileCheck %s
; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s
declare i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32>, i32)
@@ -16,22 +17,6 @@ entry:
ret i32 %res
}
-declare i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64>, i32)
-
-define i64 @lasx_xvpickve2gr_d_lo(<4 x i64> %va) nounwind {
-; CHECK: llvm.loongarch.lasx.xvpickve2gr.d: argument out of range
-entry:
- %res = call i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64> %va, i32 -1)
- ret i64 %res
-}
-
-define i64 @lasx_xvpickve2gr_d_hi(<4 x i64> %va) nounwind {
-; CHECK: llvm.loongarch.lasx.xvpickve2gr.d: argument out of range
-entry:
- %res = call i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64> %va, i32 4)
- ret i64 %res
-}
-
declare i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32>, i32)
define i32 @lasx_xvpickve2gr_wu_lo(<8 x i32> %va) nounwind {
@@ -47,19 +32,3 @@ entry:
%res = call i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32> %va, i32 8)
ret i32 %res
}
-
-declare i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64>, i32)
-
-define i64 @lasx_xvpickve2gr_du_lo(<4 x i64> %va) nounwind {
-; CHECK: llvm.loongarch.lasx.xvpickve2gr.du: argument out of range
-entry:
- %res = call i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64> %va, i32 -1)
- ret i64 %res
-}
-
-define i64 @lasx_xvpickve2gr_du_hi(<4 x i64> %va) nounwind {
-; CHECK: llvm.loongarch.lasx.xvpickve2gr.du: argument out of range
-entry:
- %res = call i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64> %va, i32 4)
- ret i64 %res
-}
diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr.ll
index 0617e7424321b..a6f19ce0c0140 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr.ll
@@ -1,9 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s
; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
-
-
-
declare i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32>, i32)
define i32 @lasx_xvpickve2gr_w(<8 x i32> %va) nounwind {
@@ -16,18 +14,6 @@ entry:
ret i32 %res
}
-declare i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64>, i32)
-
-define i64 @lasx_xvpickve2gr_d(<4 x i64> %va) nounwind {
-; CHECK-LABEL: lasx_xvpickve2gr_d:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 1
-; CHECK-NEXT: ret
-entry:
- %res = call i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64> %va, i32 1)
- ret i64 %res
-}
-
declare i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32>, i32)
define i32 @lasx_xvpickve2gr_wu(<8 x i32> %va) nounwind {
@@ -39,15 +25,3 @@ entry:
%res = call i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32> %va, i32 1)
ret i32 %res
}
-
-declare i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64>, i32)
-
-define i64 @lasx_xvpickve2gr_du(<4 x i64> %va) nounwind {
-; CHECK-LABEL: lasx_xvpickve2gr_du:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvpickve2gr.du $a0, $xr0, 1
-; CHECK-NEXT: ret
-entry:
- %res = call i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64> %va, i32 1)
- ret i64 %res
-}
diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl-ins-gr2vr-d.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl-ins-gr2vr-d.ll
new file mode 100644
index 0000000000000..79ec7b51f6278
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl-ins-gr2vr-d.ll
@@ -0,0 +1,17 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
+
+define <4 x i64> @xvrepl_ins_d(i64 %a, i64 %b) {
+; CHECK-LABEL: xvrepl_ins_d:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvreplgr2vr.d $xr0, $a0
+; CHECK-NEXT: xvinsgr2vr.d $xr0, $a1, 1
+; CHECK-NEXT: ret
+entry:
+ %0 = call <4 x i64> @llvm.loongarch.lasx.xvreplgr2vr.d(i64 %a)
+ %1 = call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64> %0, i64 %b, i32 1)
+ ret <4 x i64> %1
+}
+
+declare <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64>, i64, i32 immarg)
+declare <4 x i64> @llvm.loongarch.lasx.xvreplgr2vr.d(i64)
diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl-ins-gr2vr.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl-ins-gr2vr.ll
index 2e538ed66b250..31b809e016564 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl-ins-gr2vr.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl-ins-gr2vr.ll
@@ -1,4 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s
; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
define <8 x i32> @xvrepl_ins_w(i32 %a, i32 %b) {
@@ -13,19 +14,5 @@ entry:
ret <8 x i32> %1
}
-define <4 x i64> @xvrepl_ins_d(i64 %a, i64 %b) {
-; CHECK-LABEL: xvrepl_ins_d:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvreplgr2vr.d $xr0, $a0
-; CHECK-NEXT: xvinsgr2vr.d $xr0, $a1, 1
-; CHECK-NEXT: ret
-entry:
- %0 = call <4 x i64> @llvm.loongarch.lasx.xvreplgr2vr.d(i64 %a)
- %1 = call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64> %0, i64 %b, i32 1)
- ret <4 x i64> %1
-}
-
declare <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32>, i32, i32 immarg)
declare <8 x i32> @llvm.loongarch.lasx.xvreplgr2vr.w(i32)
-declare <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64>, i64, i32 immarg)
-declare <4 x i64> @llvm.loongarch.lasx.xvreplgr2vr.d(i64)
diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-replgr2vr-d.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-replgr2vr-d.ll
new file mode 100644
index 0000000000000..61bc89249d97e
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-replgr2vr-d.ll
@@ -0,0 +1,14 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
+
+declare <4 x i64> @llvm.loongarch.lasx.xvreplgr2vr.d(i64)
+
+define <4 x i64> @lasx_xvreplgr2vr_d(i64 %a) nounwind {
+; CHECK-LABEL: lasx_xvreplgr2vr_d:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvreplgr2vr.d $xr0, $a0
+; CHECK-NEXT: ret
+entry:
+ %res = call <4 x i64> @llvm.loongarch.lasx.xvreplgr2vr.d(i64 %a)
+ ret <4 x i64> %res
+}
diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-replgr2vr.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-replgr2vr.ll
index c71abd2205c67..a3c0e261e7122 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-replgr2vr.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-replgr2vr.ll
@@ -1,4 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s
; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
declare <32 x i8> @llvm.loongarch.lasx.xvreplgr2vr.b(i32)
@@ -36,15 +37,3 @@ entry:
%res = call <8 x i32> @llvm.loongarch.lasx.xvreplgr2vr.w(i32 %a)
ret <8 x i32> %res
}
-
-declare <4 x i64> @llvm.loongarch.lasx.xvreplgr2vr.d(i64)
-
-define <4 x i64> @lasx_xvreplgr2vr_d(i64 %a) nounwind {
-; CHECK-LABEL: lasx_xvreplgr2vr_d:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvreplgr2vr.d $xr0, $a0
-; CHECK-NEXT: ret
-entry:
- %res = call <4 x i64> @llvm.loongarch.lasx.xvreplgr2vr.d(i64 %a)
- ret <4 x i64> %res
-}
diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-set.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-set.ll
index 6e3e2e0330f52..5e234e4bd8210 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-set.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-set.ll
@@ -1,4 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s
; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
declare i32 @llvm.loongarch.lasx.xbz.v(<32 x i8>)
diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-setallnez.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-setallnez.ll
index a466b78bf8d2d..38e3289ef4cba 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-setallnez.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-setallnez.ll
@@ -1,4 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s
; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
declare i32 @llvm.loongarch.lasx.xbnz.b(<32 x i8>)
diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-setanyeqz.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-setanyeqz.ll
index 36e65fc5b3281..f6917cffb36b5 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-setanyeqz.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-setanyeqz.ll
@@ -1,4 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s
; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
declare i32 @llvm.loongarch.lasx.xbz.b(<32 x i8>)
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll
index cf0496fb8fb89..60b51755681a4 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll
@@ -3,18 +3,11 @@
; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64
define void @extract_32xi8(ptr %src, ptr %dst) nounwind {
-; LA32-LABEL: extract_32xi8:
-; LA32: # %bb.0:
-; LA32-NEXT: xvld $xr0, $a0, 0
-; LA32-NEXT: vpickve2gr.b $a0, $vr0, 1
-; LA32-NEXT: st.b $a0, $a1, 0
-; LA32-NEXT: ret
-;
-; LA64-LABEL: extract_32xi8:
-; LA64: # %bb.0:
-; LA64-NEXT: xvld $xr0, $a0, 0
-; LA64-NEXT: xvstelm.b $xr0, $a1, 0, 1
-; LA64-NEXT: ret
+; CHECK-LABEL: extract_32xi8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvld $xr0, $a0, 0
+; CHECK-NEXT: xvstelm.b $xr0, $a1, 0, 1
+; CHECK-NEXT: ret
%v = load volatile <32 x i8>, ptr %src
%e = extractelement <32 x i8> %v, i32 1
store i8 %e, ptr %dst
@@ -22,18 +15,11 @@ define void @extract_32xi8(ptr %src, ptr %dst) nounwind {
}
define void @extract_16xi16(ptr %src, ptr %dst) nounwind {
-; LA32-LABEL: extract_16xi16:
-; LA32: # %bb.0:
-; LA32-NEXT: xvld $xr0, $a0, 0
-; LA32-NEXT: vpickve2gr.h $a0, $vr0, 1
-; LA32-NEXT: st.h $a0, $a1, 0
-; LA32-NEXT: ret
-;
-; LA64-LABEL: extract_16xi16:
-; LA64: # %bb.0:
-; LA64-NEXT: xvld $xr0, $a0, 0
-; LA64-NEXT: xvstelm.h $xr0, $a1, 0, 1
-; LA64-NEXT: ret
+; CHECK-LABEL: extract_16xi16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvld $xr0, $a0, 0
+; CHECK-NEXT: xvstelm.h $xr0, $a1, 0, 1
+; CHECK-NEXT: ret
%v = load volatile <16 x i16>, ptr %src
%e = extractelement <16 x i16> %v, i32 1
store i16 %e, ptr %dst
@@ -111,8 +97,7 @@ define void @extract_32xi8_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
; LA32-NEXT: movgr2fr.w $fa1, $a2
; LA32-NEXT: xvpermi.q $xr2, $xr0, 1
; LA32-NEXT: xvshuf.b $xr0, $xr2, $xr0, $xr1
-; LA32-NEXT: vpickve2gr.b $a0, $vr0, 0
-; LA32-NEXT: st.b $a0, $a1, 0
+; LA32-NEXT: xvstelm.b $xr0, $a1, 0, 0
; LA32-NEXT: ret
;
; LA64-LABEL: extract_32xi8_idx:
@@ -136,8 +121,7 @@ define void @extract_16xi16_idx(ptr %src, ptr %dst, i32 %idx) nounwind {
; LA32-NEXT: movgr2fr.w $fa1, $a2
; LA32-NEXT: xvpermi.q $xr2, $xr0, 1
; LA32-NEXT: xvshuf.h $xr1, $xr2, $xr0
-; LA32-NEXT: vpickve2gr.h $a0, $vr1, 0
-; LA32-NEXT: st.h $a0, $a1, 0
+; LA32-NEXT: xvstelm.h $xr1, $a1, 0, 0
; LA32-NEXT: ret
;
; LA64-LABEL: extract_16xi16_idx:
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insert-extract-element.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insert-extract-element.ll
index ca405314686e6..af1598f69569e 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insert-extract-element.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insert-extract-element.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64
define <32 x i8> @insert_extract_v32i8(<32 x i8> %a) nounwind {
; CHECK-LABEL: insert_extract_v32i8:
@@ -68,11 +69,19 @@ entry:
}
define <4 x i64> @insert_extract_v4i64(<4 x i64> %a) nounwind {
-; CHECK-LABEL: insert_extract_v4i64:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvpickve.d $xr1, $xr0, 3
-; CHECK-NEXT: xvinsve0.d $xr0, $xr1, 1
-; CHECK-NEXT: ret
+; LA32-LABEL: insert_extract_v4i64:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: xvpickve.w $xr1, $xr0, 6
+; LA32-NEXT: xvpickve.w $xr2, $xr0, 7
+; LA32-NEXT: xvinsve0.w $xr0, $xr1, 2
+; LA32-NEXT: xvinsve0.w $xr0, $xr2, 3
+; LA32-NEXT: ret
+;
+; LA64-LABEL: insert_extract_v4i64:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: xvpickve.d $xr1, $xr0, 3
+; LA64-NEXT: xvinsve0.d $xr0, $xr1, 1
+; LA64-NEXT: ret
entry:
%b = extractelement <4 x i64> %a, i32 3
%c = insertelement <4 x i64> %a, i64 %b, i32 1
@@ -80,10 +89,17 @@ entry:
}
define <4 x i64> @insert_extract0_v4i64(<4 x i64> %a) nounwind {
-; CHECK-LABEL: insert_extract0_v4i64:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvinsve0.d $xr0, $xr0, 1
-; CHECK-NEXT: ret
+; LA32-LABEL: insert_extract0_v4i64:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: xvpickve.w $xr1, $xr0, 1
+; LA32-NEXT: xvinsve0.w $xr0, $xr0, 2
+; LA32-NEXT: xvinsve0.w $xr0, $xr1, 3
+; LA32-NEXT: ret
+;
+; LA64-LABEL: insert_extract0_v4i64:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: xvinsve0.d $xr0, $xr0, 1
+; LA64-NEXT: ret
entry:
%b = extractelement <4 x i64> %a, i32 0
%c = insertelement <4 x i64> %a, i64 %b, i32 1
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insert-extract-pair-elements.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insert-extract-pair-elements.ll
index 4e173c4feadba..c5d20003742e5 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insert-extract-pair-elements.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insert-extract-pair-elements.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64
define <32 x i8> @insert_extract_v32i8(<32 x i8> %a) nounwind {
; CHECK-LABEL: insert_extract_v32i8:
@@ -54,10 +55,22 @@ entry:
}
define <4 x i64> @insert_extract_v4i64(<4 x i64> %a) nounwind {
-; CHECK-LABEL: insert_extract_v4i64:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvextrins.d $xr0, $xr0, 1
-; CHECK-NEXT: ret
+; LA32-LABEL: insert_extract_v4i64:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: xvpickve.w $xr1, $xr0, 2
+; LA32-NEXT: xvpickve.w $xr2, $xr0, 3
+; LA32-NEXT: xvpickve.w $xr3, $xr0, 6
+; LA32-NEXT: xvpickve.w $xr4, $xr0, 7
+; LA32-NEXT: xvinsve0.w $xr0, $xr1, 0
+; LA32-NEXT: xvinsve0.w $xr0, $xr2, 1
+; LA32-NEXT: xvinsve0.w $xr0, $xr3, 4
+; LA32-NEXT: xvinsve0.w $xr0, $xr4, 5
+; LA32-NEXT: ret
+;
+; LA64-LABEL: insert_extract_v4i64:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: xvextrins.d $xr0, $xr0, 1
+; LA64-NEXT: ret
entry:
%b_lo = extractelement <4 x i64> %a, i32 1
%b_hi = extractelement <4 x i64> %a, i32 3
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insertelement.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insertelement.ll
index aa29264924df9..2f1db43e68fef 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insertelement.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insertelement.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
-; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64
define void @insert_32xi8(ptr %src, ptr %dst, i8 %in) nounwind {
; CHECK-LABEL: insert_32xi8:
@@ -121,12 +122,20 @@ define void @insert_8xi32(ptr %src, ptr %dst, i32 %in) nounwind {
}
define void @insert_4xi64(ptr %src, ptr %dst, i64 %in) nounwind {
-; CHECK-LABEL: insert_4xi64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: xvld $xr0, $a0, 0
-; CHECK-NEXT: xvinsgr2vr.d $xr0, $a2, 1
-; CHECK-NEXT: xvst $xr0, $a1, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: insert_4xi64:
+; LA32: # %bb.0:
+; LA32-NEXT: xvld $xr0, $a0, 0
+; LA32-NEXT: xvinsgr2vr.w $xr0, $a2, 2
+; LA32-NEXT: xvinsgr2vr.w $xr0, $a3, 3
+; LA32-NEXT: xvst $xr0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: insert_4xi64:
+; LA64: # %bb.0:
+; LA64-NEXT: xvld $xr0, $a0, 0
+; LA64-NEXT: xvinsgr2vr.d $xr0, $a2, 1
+; LA64-NEXT: xvst $xr0, $a1, 0
+; LA64-NEXT: ret
%v = load volatile <4 x i64>, ptr %src
%v_new = insertelement <4 x i64> %v, i64 %in, i32 1
store <4 x i64> %v_new, ptr %dst
@@ -162,18 +171,30 @@ define void @insert_4xdouble(ptr %src, ptr %dst, double %in) nounwind {
}
define void @insert_32xi8_idx(ptr %src, ptr %dst, i8 %in, i32 %idx) nounwind {
-; CHECK-LABEL: insert_32xi8_idx:
-; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a4, %pc_hi20(.LCPI12_0)
-; CHECK-NEXT: xvld $xr0, $a4, %pc_lo12(.LCPI12_0)
-; CHECK-NEXT: xvld $xr1, $a0, 0
-; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0
-; CHECK-NEXT: xvreplgr2vr.b $xr2, $a0
-; CHECK-NEXT: xvseq.b $xr0, $xr2, $xr0
-; CHECK-NEXT: xvreplgr2vr.b $xr2, $a2
-; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr2, $xr0
-; CHECK-NEXT: xvst $xr0, $a1, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: insert_32xi8_idx:
+; LA32: # %bb.0:
+; LA32-NEXT: pcalau12i $a4, %pc_hi20(.LCPI12_0)
+; LA32-NEXT: xvld $xr0, $a4, %pc_lo12(.LCPI12_0)
+; LA32-NEXT: xvld $xr1, $a0, 0
+; LA32-NEXT: xvreplgr2vr.b $xr2, $a3
+; LA32-NEXT: xvseq.b $xr0, $xr2, $xr0
+; LA32-NEXT: xvreplgr2vr.b $xr2, $a2
+; LA32-NEXT: xvbitsel.v $xr0, $xr1, $xr2, $xr0
+; LA32-NEXT: xvst $xr0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: insert_32xi8_idx:
+; LA64: # %bb.0:
+; LA64-NEXT: pcalau12i $a4, %pc_hi20(.LCPI12_0)
+; LA64-NEXT: xvld $xr0, $a4, %pc_lo12(.LCPI12_0)
+; LA64-NEXT: xvld $xr1, $a0, 0
+; LA64-NEXT: bstrpick.d $a0, $a3, 31, 0
+; LA64-NEXT: xvreplgr2vr.b $xr2, $a0
+; LA64-NEXT: xvseq.b $xr0, $xr2, $xr0
+; LA64-NEXT: xvreplgr2vr.b $xr2, $a2
+; LA64-NEXT: xvbitsel.v $xr0, $xr1, $xr2, $xr0
+; LA64-NEXT: xvst $xr0, $a1, 0
+; LA64-NEXT: ret
%v = load volatile <32 x i8>, ptr %src
%v_new = insertelement <32 x i8> %v, i8 %in, i32 %idx
store <32 x i8> %v_new, ptr %dst
@@ -181,18 +202,30 @@ define void @insert_32xi8_idx(ptr %src, ptr %dst, i8 %in, i32 %idx) nounwind {
}
define void @insert_16xi16_idx(ptr %src, ptr %dst, i16 %in, i32 %idx) nounwind {
-; CHECK-LABEL: insert_16xi16_idx:
-; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a4, %pc_hi20(.LCPI13_0)
-; CHECK-NEXT: xvld $xr0, $a4, %pc_lo12(.LCPI13_0)
-; CHECK-NEXT: xvld $xr1, $a0, 0
-; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0
-; CHECK-NEXT: xvreplgr2vr.h $xr2, $a0
-; CHECK-NEXT: xvseq.h $xr0, $xr2, $xr0
-; CHECK-NEXT: xvreplgr2vr.h $xr2, $a2
-; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr2, $xr0
-; CHECK-NEXT: xvst $xr0, $a1, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: insert_16xi16_idx:
+; LA32: # %bb.0:
+; LA32-NEXT: pcalau12i $a4, %pc_hi20(.LCPI13_0)
+; LA32-NEXT: xvld $xr0, $a4, %pc_lo12(.LCPI13_0)
+; LA32-NEXT: xvld $xr1, $a0, 0
+; LA32-NEXT: xvreplgr2vr.h $xr2, $a3
+; LA32-NEXT: xvseq.h $xr0, $xr2, $xr0
+; LA32-NEXT: xvreplgr2vr.h $xr2, $a2
+; LA32-NEXT: xvbitsel.v $xr0, $xr1, $xr2, $xr0
+; LA32-NEXT: xvst $xr0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: insert_16xi16_idx:
+; LA64: # %bb.0:
+; LA64-NEXT: pcalau12i $a4, %pc_hi20(.LCPI13_0)
+; LA64-NEXT: xvld $xr0, $a4, %pc_lo12(.LCPI13_0)
+; LA64-NEXT: xvld $xr1, $a0, 0
+; LA64-NEXT: bstrpick.d $a0, $a3, 31, 0
+; LA64-NEXT: xvreplgr2vr.h $xr2, $a0
+; LA64-NEXT: xvseq.h $xr0, $xr2, $xr0
+; LA64-NEXT: xvreplgr2vr.h $xr2, $a2
+; LA64-NEXT: xvbitsel.v $xr0, $xr1, $xr2, $xr0
+; LA64-NEXT: xvst $xr0, $a1, 0
+; LA64-NEXT: ret
%v = load volatile <16 x i16>, ptr %src
%v_new = insertelement <16 x i16> %v, i16 %in, i32 %idx
store <16 x i16> %v_new, ptr %dst
@@ -200,18 +233,30 @@ define void @insert_16xi16_idx(ptr %src, ptr %dst, i16 %in, i32 %idx) nounwind {
}
define void @insert_8xi32_idx(ptr %src, ptr %dst, i32 %in, i32 %idx) nounwind {
-; CHECK-LABEL: insert_8xi32_idx:
-; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a4, %pc_hi20(.LCPI14_0)
-; CHECK-NEXT: xvld $xr0, $a4, %pc_lo12(.LCPI14_0)
-; CHECK-NEXT: xvld $xr1, $a0, 0
-; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0
-; CHECK-NEXT: xvreplgr2vr.w $xr2, $a0
-; CHECK-NEXT: xvseq.w $xr0, $xr2, $xr0
-; CHECK-NEXT: xvreplgr2vr.w $xr2, $a2
-; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr2, $xr0
-; CHECK-NEXT: xvst $xr0, $a1, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: insert_8xi32_idx:
+; LA32: # %bb.0:
+; LA32-NEXT: pcalau12i $a4, %pc_hi20(.LCPI14_0)
+; LA32-NEXT: xvld $xr0, $a4, %pc_lo12(.LCPI14_0)
+; LA32-NEXT: xvld $xr1, $a0, 0
+; LA32-NEXT: xvreplgr2vr.w $xr2, $a3
+; LA32-NEXT: xvseq.w $xr0, $xr2, $xr0
+; LA32-NEXT: xvreplgr2vr.w $xr2, $a2
+; LA32-NEXT: xvbitsel.v $xr0, $xr1, $xr2, $xr0
+; LA32-NEXT: xvst $xr0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: insert_8xi32_idx:
+; LA64: # %bb.0:
+; LA64-NEXT: pcalau12i $a4, %pc_hi20(.LCPI14_0)
+; LA64-NEXT: xvld $xr0, $a4, %pc_lo12(.LCPI14_0)
+; LA64-NEXT: xvld $xr1, $a0, 0
+; LA64-NEXT: bstrpick.d $a0, $a3, 31, 0
+; LA64-NEXT: xvreplgr2vr.w $xr2, $a0
+; LA64-NEXT: xvseq.w $xr0, $xr2, $xr0
+; LA64-NEXT: xvreplgr2vr.w $xr2, $a2
+; LA64-NEXT: xvbitsel.v $xr0, $xr1, $xr2, $xr0
+; LA64-NEXT: xvst $xr0, $a1, 0
+; LA64-NEXT: ret
%v = load volatile <8 x i32>, ptr %src
%v_new = insertelement <8 x i32> %v, i32 %in, i32 %idx
store <8 x i32> %v_new, ptr %dst
@@ -219,18 +264,36 @@ define void @insert_8xi32_idx(ptr %src, ptr %dst, i32 %in, i32 %idx) nounwind {
}
define void @insert_4xi64_idx(ptr %src, ptr %dst, i64 %in, i32 %idx) nounwind {
-; CHECK-LABEL: insert_4xi64_idx:
-; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a4, %pc_hi20(.LCPI15_0)
-; CHECK-NEXT: xvld $xr0, $a4, %pc_lo12(.LCPI15_0)
-; CHECK-NEXT: xvld $xr1, $a0, 0
-; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0
-; CHECK-NEXT: xvreplgr2vr.d $xr2, $a0
-; CHECK-NEXT: xvseq.d $xr0, $xr2, $xr0
-; CHECK-NEXT: xvreplgr2vr.d $xr2, $a2
-; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr2, $xr0
-; CHECK-NEXT: xvst $xr0, $a1, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: insert_4xi64_idx:
+; LA32: # %bb.0:
+; LA32-NEXT: pcalau12i $a5, %pc_hi20(.LCPI15_0)
+; LA32-NEXT: xvld $xr0, $a5, %pc_lo12(.LCPI15_0)
+; LA32-NEXT: add.w $a4, $a4, $a4
+; LA32-NEXT: xvld $xr1, $a0, 0
+; LA32-NEXT: xvreplgr2vr.w $xr2, $a4
+; LA32-NEXT: xvseq.w $xr2, $xr2, $xr0
+; LA32-NEXT: xvreplgr2vr.w $xr3, $a2
+; LA32-NEXT: xvbitsel.v $xr1, $xr1, $xr3, $xr2
+; LA32-NEXT: addi.w $a0, $a4, 1
+; LA32-NEXT: xvreplgr2vr.w $xr2, $a0
+; LA32-NEXT: xvseq.w $xr0, $xr2, $xr0
+; LA32-NEXT: xvreplgr2vr.w $xr2, $a3
+; LA32-NEXT: xvbitsel.v $xr0, $xr1, $xr2, $xr0
+; LA32-NEXT: xvst $xr0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: insert_4xi64_idx:
+; LA64: # %bb.0:
+; LA64-NEXT: pcalau12i $a4, %pc_hi20(.LCPI15_0)
+; LA64-NEXT: xvld $xr0, $a4, %pc_lo12(.LCPI15_0)
+; LA64-NEXT: xvld $xr1, $a0, 0
+; LA64-NEXT: bstrpick.d $a0, $a3, 31, 0
+; LA64-NEXT: xvreplgr2vr.d $xr2, $a0
+; LA64-NEXT: xvseq.d $xr0, $xr2, $xr0
+; LA64-NEXT: xvreplgr2vr.d $xr2, $a2
+; LA64-NEXT: xvbitsel.v $xr0, $xr1, $xr2, $xr0
+; LA64-NEXT: xvst $xr0, $a1, 0
+; LA64-NEXT: ret
%v = load volatile <4 x i64>, ptr %src
%v_new = insertelement <4 x i64> %v, i64 %in, i32 %idx
store <4 x i64> %v_new, ptr %dst
@@ -238,19 +301,32 @@ define void @insert_4xi64_idx(ptr %src, ptr %dst, i64 %in, i32 %idx) nounwind {
}
define void @insert_8xfloat_idx(ptr %src, ptr %dst, float %in, i32 %idx) nounwind {
-; CHECK-LABEL: insert_8xfloat_idx:
-; CHECK: # %bb.0:
-; CHECK-NEXT: # kill: def $f0 killed $f0 def $xr0
-; CHECK-NEXT: pcalau12i $a3, %pc_hi20(.LCPI16_0)
-; CHECK-NEXT: xvld $xr1, $a3, %pc_lo12(.LCPI16_0)
-; CHECK-NEXT: xvld $xr2, $a0, 0
-; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0
-; CHECK-NEXT: xvreplgr2vr.w $xr3, $a0
-; CHECK-NEXT: xvseq.w $xr1, $xr3, $xr1
-; CHECK-NEXT: xvreplve0.w $xr0, $xr0
-; CHECK-NEXT: xvbitsel.v $xr0, $xr2, $xr0, $xr1
-; CHECK-NEXT: xvst $xr0, $a1, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: insert_8xfloat_idx:
+; LA32: # %bb.0:
+; LA32-NEXT: pcalau12i $a3, %pc_hi20(.LCPI16_0)
+; LA32-NEXT: xvld $xr1, $a3, %pc_lo12(.LCPI16_0)
+; LA32-NEXT: # kill: def $f0 killed $f0 def $xr0
+; LA32-NEXT: xvld $xr2, $a0, 0
+; LA32-NEXT: xvreplgr2vr.w $xr3, $a2
+; LA32-NEXT: xvseq.w $xr1, $xr3, $xr1
+; LA32-NEXT: xvreplve0.w $xr0, $xr0
+; LA32-NEXT: xvbitsel.v $xr0, $xr2, $xr0, $xr1
+; LA32-NEXT: xvst $xr0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: insert_8xfloat_idx:
+; LA64: # %bb.0:
+; LA64-NEXT: # kill: def $f0 killed $f0 def $xr0
+; LA64-NEXT: pcalau12i $a3, %pc_hi20(.LCPI16_0)
+; LA64-NEXT: xvld $xr1, $a3, %pc_lo12(.LCPI16_0)
+; LA64-NEXT: xvld $xr2, $a0, 0
+; LA64-NEXT: bstrpick.d $a0, $a2, 31, 0
+; LA64-NEXT: xvreplgr2vr.w $xr3, $a0
+; LA64-NEXT: xvseq.w $xr1, $xr3, $xr1
+; LA64-NEXT: xvreplve0.w $xr0, $xr0
+; LA64-NEXT: xvbitsel.v $xr0, $xr2, $xr0, $xr1
+; LA64-NEXT: xvst $xr0, $a1, 0
+; LA64-NEXT: ret
%v = load volatile <8 x float>, ptr %src
%v_new = insertelement <8 x float> %v, float %in, i32 %idx
store <8 x float> %v_new, ptr %dst
@@ -258,19 +334,36 @@ define void @insert_8xfloat_idx(ptr %src, ptr %dst, float %in, i32 %idx) nounwin
}
define void @insert_4xdouble_idx(ptr %src, ptr %dst, double %in, i32 %idx) nounwind {
-; CHECK-LABEL: insert_4xdouble_idx:
-; CHECK: # %bb.0:
-; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0
-; CHECK-NEXT: pcalau12i $a3, %pc_hi20(.LCPI17_0)
-; CHECK-NEXT: xvld $xr1, $a3, %pc_lo12(.LCPI17_0)
-; CHECK-NEXT: xvld $xr2, $a0, 0
-; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0
-; CHECK-NEXT: xvreplgr2vr.d $xr3, $a0
-; CHECK-NEXT: xvseq.d $xr1, $xr3, $xr1
-; CHECK-NEXT: xvreplve0.d $xr0, $xr0
-; CHECK-NEXT: xvbitsel.v $xr0, $xr2, $xr0, $xr1
-; CHECK-NEXT: xvst $xr0, $a1, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: insert_4xdouble_idx:
+; LA32: # %bb.0:
+; LA32-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0
+; LA32-NEXT: xvld $xr1, $a0, 0
+; LA32-NEXT: xvrepli.b $xr2, 0
+; LA32-NEXT: xvinsgr2vr.w $xr2, $a2, 0
+; LA32-NEXT: pcalau12i $a0, %pc_hi20(.LCPI17_0)
+; LA32-NEXT: xvld $xr3, $a0, %pc_lo12(.LCPI17_0)
+; LA32-NEXT: xvinsgr2vr.w $xr2, $a2, 2
+; LA32-NEXT: xvinsgr2vr.w $xr2, $a2, 4
+; LA32-NEXT: xvinsgr2vr.w $xr2, $a2, 6
+; LA32-NEXT: xvseq.d $xr2, $xr2, $xr3
+; LA32-NEXT: xvreplve0.d $xr0, $xr0
+; LA32-NEXT: xvbitsel.v $xr0, $xr1, $xr0, $xr2
+; LA32-NEXT: xvst $xr0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: insert_4xdouble_idx:
+; LA64: # %bb.0:
+; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0
+; LA64-NEXT: pcalau12i $a3, %pc_hi20(.LCPI17_0)
+; LA64-NEXT: xvld $xr1, $a3, %pc_lo12(.LCPI17_0)
+; LA64-NEXT: xvld $xr2, $a0, 0
+; LA64-NEXT: bstrpick.d $a0, $a2, 31, 0
+; LA64-NEXT: xvreplgr2vr.d $xr3, $a0
+; LA64-NEXT: xvseq.d $xr1, $xr3, $xr1
+; LA64-NEXT: xvreplve0.d $xr0, $xr0
+; LA64-NEXT: xvbitsel.v $xr0, $xr2, $xr0, $xr1
+; LA64-NEXT: xvst $xr0, $a1, 0
+; LA64-NEXT: ret
%v = load volatile <4 x double>, ptr %src
%v_new = insertelement <4 x double> %v, double %in, i32 %idx
store <4 x double> %v_new, ptr %dst
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvrepl128vei.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvrepl128vei.ll
index 935a30a3e54ed..e498358cf4d19 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvrepl128vei.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvrepl128vei.ll
@@ -1,4 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx %s -o - | FileCheck %s
; RUN: llc --mtriple=loongarch64 --mattr=+lasx %s -o - | FileCheck %s
;; xvrepl128vei.b
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf.ll
index 6a88805148715..4900146b69a25 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf.ll
@@ -1,4 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx %s -o - | FileCheck %s
; RUN: llc --mtriple=loongarch64 --mattr=+lasx %s -o - | FileCheck %s
;; xvshuf.b
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf4i.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf4i.ll
index 02186d23e31e5..37b62ca989edb 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf4i.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf4i.ll
@@ -1,4 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx %s -o - | FileCheck %s
; RUN: llc --mtriple=loongarch64 --mattr=+lasx %s -o - | FileCheck %s
;; xxvshuf4i.b
@@ -40,4 +41,4 @@ define <8 x float> @shufflevector_xvshuf4i_v8f32(<8 x float> %a, <8 x float> %b)
; CHECK-NEXT: ret
%c = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
ret <8 x float> %c
-}
\ No newline at end of file
+}
diff --git a/llvm/test/CodeGen/LoongArch/lasx/shuffle-as-permute-and-shuffle.ll b/llvm/test/CodeGen/LoongArch/lasx/shuffle-as-permute-and-shuffle.ll
index 5f76d9951df9c..24f1b31702b71 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/shuffle-as-permute-and-shuffle.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/shuffle-as-permute-and-shuffle.ll
@@ -1,4 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx %s -o - | FileCheck %s
; RUN: llc --mtriple=loongarch64 --mattr=+lasx %s -o - | FileCheck %s
define <32 x i8> @shuffle_v32i8(<32 x i8> %a) {
diff --git a/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-add.ll b/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-add.ll
index 7268eb24ee51c..3e815a174d232 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-add.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-add.ll
@@ -1,19 +1,33 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc --mtriple=loongarch64 --mattr=+lasx %s -o - | FileCheck %s
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx %s -o - | FileCheck %s --check-prefix=LA32
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx %s -o - | FileCheck %s --check-prefix=LA64
define void @vec_reduce_add_v32i8(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: vec_reduce_add_v32i8:
-; CHECK: # %bb.0:
-; CHECK-NEXT: xvld $xr0, $a0, 0
-; CHECK-NEXT: xvhaddw.h.b $xr0, $xr0, $xr0
-; CHECK-NEXT: xvhaddw.w.h $xr0, $xr0, $xr0
-; CHECK-NEXT: xvhaddw.d.w $xr0, $xr0, $xr0
-; CHECK-NEXT: xvhaddw.q.d $xr0, $xr0, $xr0
-; CHECK-NEXT: xvpermi.d $xr1, $xr0, 2
-; CHECK-NEXT: xvadd.d $xr0, $xr1, $xr0
-; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 0
-; CHECK-NEXT: st.b $a0, $a1, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vec_reduce_add_v32i8:
+; LA32: # %bb.0:
+; LA32-NEXT: xvld $xr0, $a0, 0
+; LA32-NEXT: xvhaddw.h.b $xr0, $xr0, $xr0
+; LA32-NEXT: xvhaddw.w.h $xr0, $xr0, $xr0
+; LA32-NEXT: xvhaddw.d.w $xr0, $xr0, $xr0
+; LA32-NEXT: xvhaddw.q.d $xr0, $xr0, $xr0
+; LA32-NEXT: xvpermi.d $xr1, $xr0, 2
+; LA32-NEXT: xvadd.d $xr0, $xr1, $xr0
+; LA32-NEXT: xvpickve2gr.w $a0, $xr0, 0
+; LA32-NEXT: st.b $a0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vec_reduce_add_v32i8:
+; LA64: # %bb.0:
+; LA64-NEXT: xvld $xr0, $a0, 0
+; LA64-NEXT: xvhaddw.h.b $xr0, $xr0, $xr0
+; LA64-NEXT: xvhaddw.w.h $xr0, $xr0, $xr0
+; LA64-NEXT: xvhaddw.d.w $xr0, $xr0, $xr0
+; LA64-NEXT: xvhaddw.q.d $xr0, $xr0, $xr0
+; LA64-NEXT: xvpermi.d $xr1, $xr0, 2
+; LA64-NEXT: xvadd.d $xr0, $xr1, $xr0
+; LA64-NEXT: xvpickve2gr.d $a0, $xr0, 0
+; LA64-NEXT: st.b $a0, $a1, 0
+; LA64-NEXT: ret
%v = load <32 x i8>, ptr %src
%res = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> %v)
store i8 %res, ptr %dst
@@ -21,17 +35,29 @@ define void @vec_reduce_add_v32i8(ptr %src, ptr %dst) nounwind {
}
define void @vec_reduce_add_v16i16(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: vec_reduce_add_v16i16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: xvld $xr0, $a0, 0
-; CHECK-NEXT: xvhaddw.w.h $xr0, $xr0, $xr0
-; CHECK-NEXT: xvhaddw.d.w $xr0, $xr0, $xr0
-; CHECK-NEXT: xvhaddw.q.d $xr0, $xr0, $xr0
-; CHECK-NEXT: xvpermi.d $xr1, $xr0, 2
-; CHECK-NEXT: xvadd.d $xr0, $xr1, $xr0
-; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 0
-; CHECK-NEXT: st.h $a0, $a1, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vec_reduce_add_v16i16:
+; LA32: # %bb.0:
+; LA32-NEXT: xvld $xr0, $a0, 0
+; LA32-NEXT: xvhaddw.w.h $xr0, $xr0, $xr0
+; LA32-NEXT: xvhaddw.d.w $xr0, $xr0, $xr0
+; LA32-NEXT: xvhaddw.q.d $xr0, $xr0, $xr0
+; LA32-NEXT: xvpermi.d $xr1, $xr0, 2
+; LA32-NEXT: xvadd.d $xr0, $xr1, $xr0
+; LA32-NEXT: xvpickve2gr.w $a0, $xr0, 0
+; LA32-NEXT: st.h $a0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vec_reduce_add_v16i16:
+; LA64: # %bb.0:
+; LA64-NEXT: xvld $xr0, $a0, 0
+; LA64-NEXT: xvhaddw.w.h $xr0, $xr0, $xr0
+; LA64-NEXT: xvhaddw.d.w $xr0, $xr0, $xr0
+; LA64-NEXT: xvhaddw.q.d $xr0, $xr0, $xr0
+; LA64-NEXT: xvpermi.d $xr1, $xr0, 2
+; LA64-NEXT: xvadd.d $xr0, $xr1, $xr0
+; LA64-NEXT: xvpickve2gr.d $a0, $xr0, 0
+; LA64-NEXT: st.h $a0, $a1, 0
+; LA64-NEXT: ret
%v = load <16 x i16>, ptr %src
%res = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %v)
store i16 %res, ptr %dst
@@ -39,16 +65,27 @@ define void @vec_reduce_add_v16i16(ptr %src, ptr %dst) nounwind {
}
define void @vec_reduce_add_v8i32(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: vec_reduce_add_v8i32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: xvld $xr0, $a0, 0
-; CHECK-NEXT: xvhaddw.d.w $xr0, $xr0, $xr0
-; CHECK-NEXT: xvhaddw.q.d $xr0, $xr0, $xr0
-; CHECK-NEXT: xvpermi.d $xr1, $xr0, 2
-; CHECK-NEXT: xvadd.d $xr0, $xr1, $xr0
-; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 0
-; CHECK-NEXT: st.w $a0, $a1, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vec_reduce_add_v8i32:
+; LA32: # %bb.0:
+; LA32-NEXT: xvld $xr0, $a0, 0
+; LA32-NEXT: xvhaddw.d.w $xr0, $xr0, $xr0
+; LA32-NEXT: xvhaddw.q.d $xr0, $xr0, $xr0
+; LA32-NEXT: xvpermi.d $xr1, $xr0, 2
+; LA32-NEXT: xvadd.d $xr0, $xr1, $xr0
+; LA32-NEXT: xvpickve2gr.w $a0, $xr0, 0
+; LA32-NEXT: st.w $a0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vec_reduce_add_v8i32:
+; LA64: # %bb.0:
+; LA64-NEXT: xvld $xr0, $a0, 0
+; LA64-NEXT: xvhaddw.d.w $xr0, $xr0, $xr0
+; LA64-NEXT: xvhaddw.q.d $xr0, $xr0, $xr0
+; LA64-NEXT: xvpermi.d $xr1, $xr0, 2
+; LA64-NEXT: xvadd.d $xr0, $xr1, $xr0
+; LA64-NEXT: xvpickve2gr.d $a0, $xr0, 0
+; LA64-NEXT: st.w $a0, $a1, 0
+; LA64-NEXT: ret
%v = load <8 x i32>, ptr %src
%res = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %v)
store i32 %res, ptr %dst
@@ -56,14 +93,31 @@ define void @vec_reduce_add_v8i32(ptr %src, ptr %dst) nounwind {
}
define void @vec_reduce_add_v4i64(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: vec_reduce_add_v4i64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: xvld $xr0, $a0, 0
-; CHECK-NEXT: xvhaddw.q.d $xr0, $xr0, $xr0
-; CHECK-NEXT: xvpermi.d $xr1, $xr0, 2
-; CHECK-NEXT: xvadd.d $xr0, $xr1, $xr0
-; CHECK-NEXT: xvstelm.d $xr0, $a1, 0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vec_reduce_add_v4i64:
+; LA32: # %bb.0:
+; LA32-NEXT: xvld $xr0, $a0, 0
+; LA32-NEXT: xvpermi.q $xr1, $xr0, 1
+; LA32-NEXT: vadd.d $vr0, $vr0, $vr1
+; LA32-NEXT: vpickve2gr.w $a0, $vr0, 3
+; LA32-NEXT: vpickve2gr.w $a2, $vr0, 1
+; LA32-NEXT: add.w $a0, $a2, $a0
+; LA32-NEXT: vpickve2gr.w $a2, $vr0, 2
+; LA32-NEXT: vpickve2gr.w $a3, $vr0, 0
+; LA32-NEXT: add.w $a2, $a3, $a2
+; LA32-NEXT: sltu $a3, $a2, $a3
+; LA32-NEXT: add.w $a0, $a0, $a3
+; LA32-NEXT: st.w $a2, $a1, 0
+; LA32-NEXT: st.w $a0, $a1, 4
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vec_reduce_add_v4i64:
+; LA64: # %bb.0:
+; LA64-NEXT: xvld $xr0, $a0, 0
+; LA64-NEXT: xvhaddw.q.d $xr0, $xr0, $xr0
+; LA64-NEXT: xvpermi.d $xr1, $xr0, 2
+; LA64-NEXT: xvadd.d $xr0, $xr1, $xr0
+; LA64-NEXT: xvstelm.d $xr0, $a1, 0, 0
+; LA64-NEXT: ret
%v = load <4 x i64>, ptr %src
%res = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %v)
store i64 %res, ptr %dst
diff --git a/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-and.ll b/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-and.ll
index fd64beab57bf0..23cc230f04503 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-and.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-and.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc --mtriple=loongarch64 --mattr=+lasx %s -o - | FileCheck %s
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx %s -o - | FileCheck %s --check-prefixes=CHECK,LA32
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx %s -o - | FileCheck %s --check-prefixes=CHECK,LA64
define void @vec_reduce_and_v32i8(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_and_v32i8:
@@ -44,17 +45,30 @@ define void @vec_reduce_and_v16i16(ptr %src, ptr %dst) nounwind {
}
define void @vec_reduce_and_v8i32(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: vec_reduce_and_v8i32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: xvld $xr0, $a0, 0
-; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
-; CHECK-NEXT: vand.v $vr0, $vr1, $vr0
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4
-; CHECK-NEXT: vand.v $vr0, $vr1, $vr0
-; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vec_reduce_and_v8i32:
+; LA32: # %bb.0:
+; LA32-NEXT: xvld $xr0, $a0, 0
+; LA32-NEXT: xvpermi.q $xr1, $xr0, 1
+; LA32-NEXT: vand.v $vr0, $vr0, $vr1
+; LA32-NEXT: vbsrl.v $vr1, $vr0, 8
+; LA32-NEXT: vand.v $vr0, $vr1, $vr0
+; LA32-NEXT: vbsrl.v $vr1, $vr0, 4
+; LA32-NEXT: vand.v $vr0, $vr1, $vr0
+; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0
+; LA32-NEXT: st.w $a0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vec_reduce_and_v8i32:
+; LA64: # %bb.0:
+; LA64-NEXT: xvld $xr0, $a0, 0
+; LA64-NEXT: xvpermi.q $xr1, $xr0, 1
+; LA64-NEXT: vand.v $vr0, $vr0, $vr1
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 8
+; LA64-NEXT: vand.v $vr0, $vr1, $vr0
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 4
+; LA64-NEXT: vand.v $vr0, $vr1, $vr0
+; LA64-NEXT: vstelm.w $vr0, $a1, 0, 0
+; LA64-NEXT: ret
%v = load <8 x i32>, ptr %src
%res = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> %v)
store i32 %res, ptr %dst
@@ -62,15 +76,30 @@ define void @vec_reduce_and_v8i32(ptr %src, ptr %dst) nounwind {
}
define void @vec_reduce_and_v4i64(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: vec_reduce_and_v4i64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: xvld $xr0, $a0, 0
-; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
-; CHECK-NEXT: vand.v $vr0, $vr1, $vr0
-; CHECK-NEXT: vstelm.d $vr0, $a1, 0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vec_reduce_and_v4i64:
+; LA32: # %bb.0:
+; LA32-NEXT: xvld $xr0, $a0, 0
+; LA32-NEXT: xvpermi.q $xr1, $xr0, 1
+; LA32-NEXT: vand.v $vr0, $vr0, $vr1
+; LA32-NEXT: vpickve2gr.w $a0, $vr0, 2
+; LA32-NEXT: vpickve2gr.w $a2, $vr0, 0
+; LA32-NEXT: and $a0, $a2, $a0
+; LA32-NEXT: vpickve2gr.w $a2, $vr0, 3
+; LA32-NEXT: vpickve2gr.w $a3, $vr0, 1
+; LA32-NEXT: and $a2, $a3, $a2
+; LA32-NEXT: st.w $a2, $a1, 4
+; LA32-NEXT: st.w $a0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vec_reduce_and_v4i64:
+; LA64: # %bb.0:
+; LA64-NEXT: xvld $xr0, $a0, 0
+; LA64-NEXT: xvpermi.q $xr1, $xr0, 1
+; LA64-NEXT: vand.v $vr0, $vr0, $vr1
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 8
+; LA64-NEXT: vand.v $vr0, $vr1, $vr0
+; LA64-NEXT: vstelm.d $vr0, $a1, 0, 0
+; LA64-NEXT: ret
%v = load <4 x i64>, ptr %src
%res = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> %v)
store i64 %res, ptr %dst
diff --git a/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-or.ll b/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-or.ll
index cdb08d9de3821..d7d3afc6dd1da 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-or.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-or.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc --mtriple=loongarch64 --mattr=+lasx %s -o - | FileCheck %s
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx %s -o - | FileCheck %s --check-prefixes=CHECK,LA32
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx %s -o - | FileCheck %s --check-prefixes=CHECK,LA64
define void @vec_reduce_or_v32i8(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_or_v32i8:
@@ -44,17 +45,30 @@ define void @vec_reduce_or_v16i16(ptr %src, ptr %dst) nounwind {
}
define void @vec_reduce_or_v8i32(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: vec_reduce_or_v8i32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: xvld $xr0, $a0, 0
-; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
-; CHECK-NEXT: vor.v $vr0, $vr1, $vr0
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4
-; CHECK-NEXT: vor.v $vr0, $vr1, $vr0
-; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vec_reduce_or_v8i32:
+; LA32: # %bb.0:
+; LA32-NEXT: xvld $xr0, $a0, 0
+; LA32-NEXT: xvpermi.q $xr1, $xr0, 1
+; LA32-NEXT: vor.v $vr0, $vr0, $vr1
+; LA32-NEXT: vbsrl.v $vr1, $vr0, 8
+; LA32-NEXT: vor.v $vr0, $vr1, $vr0
+; LA32-NEXT: vbsrl.v $vr1, $vr0, 4
+; LA32-NEXT: vor.v $vr0, $vr1, $vr0
+; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0
+; LA32-NEXT: st.w $a0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vec_reduce_or_v8i32:
+; LA64: # %bb.0:
+; LA64-NEXT: xvld $xr0, $a0, 0
+; LA64-NEXT: xvpermi.q $xr1, $xr0, 1
+; LA64-NEXT: vor.v $vr0, $vr0, $vr1
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 8
+; LA64-NEXT: vor.v $vr0, $vr1, $vr0
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 4
+; LA64-NEXT: vor.v $vr0, $vr1, $vr0
+; LA64-NEXT: vstelm.w $vr0, $a1, 0, 0
+; LA64-NEXT: ret
%v = load <8 x i32>, ptr %src
%res = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> %v)
store i32 %res, ptr %dst
@@ -62,15 +76,30 @@ define void @vec_reduce_or_v8i32(ptr %src, ptr %dst) nounwind {
}
define void @vec_reduce_or_v4i64(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: vec_reduce_or_v4i64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: xvld $xr0, $a0, 0
-; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
-; CHECK-NEXT: vor.v $vr0, $vr1, $vr0
-; CHECK-NEXT: vstelm.d $vr0, $a1, 0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vec_reduce_or_v4i64:
+; LA32: # %bb.0:
+; LA32-NEXT: xvld $xr0, $a0, 0
+; LA32-NEXT: xvpermi.q $xr1, $xr0, 1
+; LA32-NEXT: vor.v $vr0, $vr0, $vr1
+; LA32-NEXT: vpickve2gr.w $a0, $vr0, 2
+; LA32-NEXT: vpickve2gr.w $a2, $vr0, 0
+; LA32-NEXT: or $a0, $a2, $a0
+; LA32-NEXT: vpickve2gr.w $a2, $vr0, 3
+; LA32-NEXT: vpickve2gr.w $a3, $vr0, 1
+; LA32-NEXT: or $a2, $a3, $a2
+; LA32-NEXT: st.w $a2, $a1, 4
+; LA32-NEXT: st.w $a0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vec_reduce_or_v4i64:
+; LA64: # %bb.0:
+; LA64-NEXT: xvld $xr0, $a0, 0
+; LA64-NEXT: xvpermi.q $xr1, $xr0, 1
+; LA64-NEXT: vor.v $vr0, $vr0, $vr1
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 8
+; LA64-NEXT: vor.v $vr0, $vr1, $vr0
+; LA64-NEXT: vstelm.d $vr0, $a1, 0, 0
+; LA64-NEXT: ret
%v = load <4 x i64>, ptr %src
%res = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> %v)
store i64 %res, ptr %dst
diff --git a/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-smax.ll b/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-smax.ll
index 1d182731c93be..8cbbb52884865 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-smax.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-smax.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc --mtriple=loongarch64 --mattr=+lasx %s -o - | FileCheck %s
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx %s -o - | FileCheck %s --check-prefixes=CHECK,LA32
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx %s -o - | FileCheck %s --check-prefixes=CHECK,LA64
define void @vec_reduce_smax_v32i8(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_smax_v32i8:
@@ -44,17 +45,30 @@ define void @vec_reduce_smax_v16i16(ptr %src, ptr %dst) nounwind {
}
define void @vec_reduce_smax_v8i32(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: vec_reduce_smax_v8i32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: xvld $xr0, $a0, 0
-; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT: vmax.w $vr0, $vr0, $vr1
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
-; CHECK-NEXT: vmax.w $vr0, $vr1, $vr0
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4
-; CHECK-NEXT: vmax.w $vr0, $vr1, $vr0
-; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vec_reduce_smax_v8i32:
+; LA32: # %bb.0:
+; LA32-NEXT: xvld $xr0, $a0, 0
+; LA32-NEXT: xvpermi.q $xr1, $xr0, 1
+; LA32-NEXT: vmax.w $vr0, $vr0, $vr1
+; LA32-NEXT: vbsrl.v $vr1, $vr0, 8
+; LA32-NEXT: vmax.w $vr0, $vr1, $vr0
+; LA32-NEXT: vbsrl.v $vr1, $vr0, 4
+; LA32-NEXT: vmax.w $vr0, $vr1, $vr0
+; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0
+; LA32-NEXT: st.w $a0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vec_reduce_smax_v8i32:
+; LA64: # %bb.0:
+; LA64-NEXT: xvld $xr0, $a0, 0
+; LA64-NEXT: xvpermi.q $xr1, $xr0, 1
+; LA64-NEXT: vmax.w $vr0, $vr0, $vr1
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 8
+; LA64-NEXT: vmax.w $vr0, $vr1, $vr0
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 4
+; LA64-NEXT: vmax.w $vr0, $vr1, $vr0
+; LA64-NEXT: vstelm.w $vr0, $a1, 0, 0
+; LA64-NEXT: ret
%v = load <8 x i32>, ptr %src
%res = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> %v)
store i32 %res, ptr %dst
@@ -62,15 +76,41 @@ define void @vec_reduce_smax_v8i32(ptr %src, ptr %dst) nounwind {
}
define void @vec_reduce_smax_v4i64(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: vec_reduce_smax_v4i64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: xvld $xr0, $a0, 0
-; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT: vmax.d $vr0, $vr0, $vr1
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
-; CHECK-NEXT: vmax.d $vr0, $vr1, $vr0
-; CHECK-NEXT: vstelm.d $vr0, $a1, 0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vec_reduce_smax_v4i64:
+; LA32: # %bb.0:
+; LA32-NEXT: xvld $xr0, $a0, 0
+; LA32-NEXT: xvpermi.q $xr1, $xr0, 1
+; LA32-NEXT: vmax.d $vr0, $vr0, $vr1
+; LA32-NEXT: vpickve2gr.w $a0, $vr0, 1
+; LA32-NEXT: vpickve2gr.w $a2, $vr0, 3
+; LA32-NEXT: slt $a3, $a2, $a0
+; LA32-NEXT: xor $a4, $a0, $a2
+; LA32-NEXT: sltui $a4, $a4, 1
+; LA32-NEXT: masknez $a3, $a3, $a4
+; LA32-NEXT: vpickve2gr.w $a5, $vr0, 0
+; LA32-NEXT: vpickve2gr.w $a6, $vr0, 2
+; LA32-NEXT: sltu $a7, $a6, $a5
+; LA32-NEXT: maskeqz $a4, $a7, $a4
+; LA32-NEXT: or $a3, $a4, $a3
+; LA32-NEXT: masknez $a4, $a6, $a3
+; LA32-NEXT: maskeqz $a5, $a5, $a3
+; LA32-NEXT: or $a4, $a5, $a4
+; LA32-NEXT: masknez $a2, $a2, $a3
+; LA32-NEXT: maskeqz $a0, $a0, $a3
+; LA32-NEXT: or $a0, $a0, $a2
+; LA32-NEXT: st.w $a0, $a1, 4
+; LA32-NEXT: st.w $a4, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vec_reduce_smax_v4i64:
+; LA64: # %bb.0:
+; LA64-NEXT: xvld $xr0, $a0, 0
+; LA64-NEXT: xvpermi.q $xr1, $xr0, 1
+; LA64-NEXT: vmax.d $vr0, $vr0, $vr1
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 8
+; LA64-NEXT: vmax.d $vr0, $vr1, $vr0
+; LA64-NEXT: vstelm.d $vr0, $a1, 0, 0
+; LA64-NEXT: ret
%v = load <4 x i64>, ptr %src
%res = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> %v)
store i64 %res, ptr %dst
diff --git a/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-smin.ll b/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-smin.ll
index 369afdd1fc7bc..c34852aa8a28f 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-smin.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-smin.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc --mtriple=loongarch64 --mattr=+lasx %s -o - | FileCheck %s
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx %s -o - | FileCheck %s --check-prefixes=CHECK,LA32
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx %s -o - | FileCheck %s --check-prefixes=CHECK,LA64
define void @vec_reduce_smin_v32i8(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_smin_v32i8:
@@ -44,17 +45,30 @@ define void @vec_reduce_smin_v16i16(ptr %src, ptr %dst) nounwind {
}
define void @vec_reduce_smin_v8i32(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: vec_reduce_smin_v8i32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: xvld $xr0, $a0, 0
-; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT: vmin.w $vr0, $vr0, $vr1
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
-; CHECK-NEXT: vmin.w $vr0, $vr1, $vr0
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4
-; CHECK-NEXT: vmin.w $vr0, $vr1, $vr0
-; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vec_reduce_smin_v8i32:
+; LA32: # %bb.0:
+; LA32-NEXT: xvld $xr0, $a0, 0
+; LA32-NEXT: xvpermi.q $xr1, $xr0, 1
+; LA32-NEXT: vmin.w $vr0, $vr0, $vr1
+; LA32-NEXT: vbsrl.v $vr1, $vr0, 8
+; LA32-NEXT: vmin.w $vr0, $vr1, $vr0
+; LA32-NEXT: vbsrl.v $vr1, $vr0, 4
+; LA32-NEXT: vmin.w $vr0, $vr1, $vr0
+; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0
+; LA32-NEXT: st.w $a0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vec_reduce_smin_v8i32:
+; LA64: # %bb.0:
+; LA64-NEXT: xvld $xr0, $a0, 0
+; LA64-NEXT: xvpermi.q $xr1, $xr0, 1
+; LA64-NEXT: vmin.w $vr0, $vr0, $vr1
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 8
+; LA64-NEXT: vmin.w $vr0, $vr1, $vr0
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 4
+; LA64-NEXT: vmin.w $vr0, $vr1, $vr0
+; LA64-NEXT: vstelm.w $vr0, $a1, 0, 0
+; LA64-NEXT: ret
%v = load <8 x i32>, ptr %src
%res = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> %v)
store i32 %res, ptr %dst
@@ -62,15 +76,41 @@ define void @vec_reduce_smin_v8i32(ptr %src, ptr %dst) nounwind {
}
define void @vec_reduce_smin_v4i64(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: vec_reduce_smin_v4i64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: xvld $xr0, $a0, 0
-; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT: vmin.d $vr0, $vr0, $vr1
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
-; CHECK-NEXT: vmin.d $vr0, $vr1, $vr0
-; CHECK-NEXT: vstelm.d $vr0, $a1, 0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vec_reduce_smin_v4i64:
+; LA32: # %bb.0:
+; LA32-NEXT: xvld $xr0, $a0, 0
+; LA32-NEXT: xvpermi.q $xr1, $xr0, 1
+; LA32-NEXT: vmin.d $vr0, $vr0, $vr1
+; LA32-NEXT: vpickve2gr.w $a0, $vr0, 3
+; LA32-NEXT: vpickve2gr.w $a2, $vr0, 1
+; LA32-NEXT: slt $a3, $a2, $a0
+; LA32-NEXT: xor $a4, $a2, $a0
+; LA32-NEXT: sltui $a4, $a4, 1
+; LA32-NEXT: masknez $a3, $a3, $a4
+; LA32-NEXT: vpickve2gr.w $a5, $vr0, 2
+; LA32-NEXT: vpickve2gr.w $a6, $vr0, 0
+; LA32-NEXT: sltu $a7, $a6, $a5
+; LA32-NEXT: maskeqz $a4, $a7, $a4
+; LA32-NEXT: or $a3, $a4, $a3
+; LA32-NEXT: masknez $a4, $a5, $a3
+; LA32-NEXT: maskeqz $a5, $a6, $a3
+; LA32-NEXT: or $a4, $a5, $a4
+; LA32-NEXT: masknez $a0, $a0, $a3
+; LA32-NEXT: maskeqz $a2, $a2, $a3
+; LA32-NEXT: or $a0, $a2, $a0
+; LA32-NEXT: st.w $a0, $a1, 4
+; LA32-NEXT: st.w $a4, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vec_reduce_smin_v4i64:
+; LA64: # %bb.0:
+; LA64-NEXT: xvld $xr0, $a0, 0
+; LA64-NEXT: xvpermi.q $xr1, $xr0, 1
+; LA64-NEXT: vmin.d $vr0, $vr0, $vr1
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 8
+; LA64-NEXT: vmin.d $vr0, $vr1, $vr0
+; LA64-NEXT: vstelm.d $vr0, $a1, 0, 0
+; LA64-NEXT: ret
%v = load <4 x i64>, ptr %src
%res = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> %v)
store i64 %res, ptr %dst
diff --git a/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-umax.ll b/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-umax.ll
index 5256a72ad7d97..c44f83a909a68 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-umax.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-umax.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc --mtriple=loongarch64 --mattr=+lasx %s -o - | FileCheck %s
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx %s -o - | FileCheck %s --check-prefixes=CHECK,LA32
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx %s -o - | FileCheck %s --check-prefixes=CHECK,LA64
define void @vec_reduce_umax_v32i8(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_umax_v32i8:
@@ -44,17 +45,30 @@ define void @vec_reduce_umax_v16i16(ptr %src, ptr %dst) nounwind {
}
define void @vec_reduce_umax_v8i32(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: vec_reduce_umax_v8i32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: xvld $xr0, $a0, 0
-; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT: vmax.wu $vr0, $vr0, $vr1
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
-; CHECK-NEXT: vmax.wu $vr0, $vr1, $vr0
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4
-; CHECK-NEXT: vmax.wu $vr0, $vr1, $vr0
-; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vec_reduce_umax_v8i32:
+; LA32: # %bb.0:
+; LA32-NEXT: xvld $xr0, $a0, 0
+; LA32-NEXT: xvpermi.q $xr1, $xr0, 1
+; LA32-NEXT: vmax.wu $vr0, $vr0, $vr1
+; LA32-NEXT: vbsrl.v $vr1, $vr0, 8
+; LA32-NEXT: vmax.wu $vr0, $vr1, $vr0
+; LA32-NEXT: vbsrl.v $vr1, $vr0, 4
+; LA32-NEXT: vmax.wu $vr0, $vr1, $vr0
+; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0
+; LA32-NEXT: st.w $a0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vec_reduce_umax_v8i32:
+; LA64: # %bb.0:
+; LA64-NEXT: xvld $xr0, $a0, 0
+; LA64-NEXT: xvpermi.q $xr1, $xr0, 1
+; LA64-NEXT: vmax.wu $vr0, $vr0, $vr1
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 8
+; LA64-NEXT: vmax.wu $vr0, $vr1, $vr0
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 4
+; LA64-NEXT: vmax.wu $vr0, $vr1, $vr0
+; LA64-NEXT: vstelm.w $vr0, $a1, 0, 0
+; LA64-NEXT: ret
%v = load <8 x i32>, ptr %src
%res = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> %v)
store i32 %res, ptr %dst
@@ -62,15 +76,41 @@ define void @vec_reduce_umax_v8i32(ptr %src, ptr %dst) nounwind {
}
define void @vec_reduce_umax_v4i64(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: vec_reduce_umax_v4i64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: xvld $xr0, $a0, 0
-; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT: vmax.du $vr0, $vr0, $vr1
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
-; CHECK-NEXT: vmax.du $vr0, $vr1, $vr0
-; CHECK-NEXT: vstelm.d $vr0, $a1, 0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vec_reduce_umax_v4i64:
+; LA32: # %bb.0:
+; LA32-NEXT: xvld $xr0, $a0, 0
+; LA32-NEXT: xvpermi.q $xr1, $xr0, 1
+; LA32-NEXT: vmax.du $vr0, $vr0, $vr1
+; LA32-NEXT: vpickve2gr.w $a0, $vr0, 1
+; LA32-NEXT: vpickve2gr.w $a2, $vr0, 3
+; LA32-NEXT: sltu $a3, $a2, $a0
+; LA32-NEXT: xor $a4, $a0, $a2
+; LA32-NEXT: sltui $a4, $a4, 1
+; LA32-NEXT: masknez $a3, $a3, $a4
+; LA32-NEXT: vpickve2gr.w $a5, $vr0, 0
+; LA32-NEXT: vpickve2gr.w $a6, $vr0, 2
+; LA32-NEXT: sltu $a7, $a6, $a5
+; LA32-NEXT: maskeqz $a4, $a7, $a4
+; LA32-NEXT: or $a3, $a4, $a3
+; LA32-NEXT: masknez $a4, $a6, $a3
+; LA32-NEXT: maskeqz $a5, $a5, $a3
+; LA32-NEXT: or $a4, $a5, $a4
+; LA32-NEXT: masknez $a2, $a2, $a3
+; LA32-NEXT: maskeqz $a0, $a0, $a3
+; LA32-NEXT: or $a0, $a0, $a2
+; LA32-NEXT: st.w $a0, $a1, 4
+; LA32-NEXT: st.w $a4, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vec_reduce_umax_v4i64:
+; LA64: # %bb.0:
+; LA64-NEXT: xvld $xr0, $a0, 0
+; LA64-NEXT: xvpermi.q $xr1, $xr0, 1
+; LA64-NEXT: vmax.du $vr0, $vr0, $vr1
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 8
+; LA64-NEXT: vmax.du $vr0, $vr1, $vr0
+; LA64-NEXT: vstelm.d $vr0, $a1, 0, 0
+; LA64-NEXT: ret
%v = load <4 x i64>, ptr %src
%res = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> %v)
store i64 %res, ptr %dst
diff --git a/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-umin.ll b/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-umin.ll
index a82c886d8eed1..f91a1b34dffe9 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-umin.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-umin.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc --mtriple=loongarch64 --mattr=+lasx %s -o - | FileCheck %s
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx %s -o - | FileCheck %s --check-prefixes=CHECK,LA32
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx %s -o - | FileCheck %s --check-prefixes=CHECK,LA64
define void @vec_reduce_umin_v32i8(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_umin_v32i8:
@@ -44,17 +45,30 @@ define void @vec_reduce_umin_v16i16(ptr %src, ptr %dst) nounwind {
}
define void @vec_reduce_umin_v8i32(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: vec_reduce_umin_v8i32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: xvld $xr0, $a0, 0
-; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT: vmin.wu $vr0, $vr0, $vr1
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
-; CHECK-NEXT: vmin.wu $vr0, $vr1, $vr0
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4
-; CHECK-NEXT: vmin.wu $vr0, $vr1, $vr0
-; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vec_reduce_umin_v8i32:
+; LA32: # %bb.0:
+; LA32-NEXT: xvld $xr0, $a0, 0
+; LA32-NEXT: xvpermi.q $xr1, $xr0, 1
+; LA32-NEXT: vmin.wu $vr0, $vr0, $vr1
+; LA32-NEXT: vbsrl.v $vr1, $vr0, 8
+; LA32-NEXT: vmin.wu $vr0, $vr1, $vr0
+; LA32-NEXT: vbsrl.v $vr1, $vr0, 4
+; LA32-NEXT: vmin.wu $vr0, $vr1, $vr0
+; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0
+; LA32-NEXT: st.w $a0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vec_reduce_umin_v8i32:
+; LA64: # %bb.0:
+; LA64-NEXT: xvld $xr0, $a0, 0
+; LA64-NEXT: xvpermi.q $xr1, $xr0, 1
+; LA64-NEXT: vmin.wu $vr0, $vr0, $vr1
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 8
+; LA64-NEXT: vmin.wu $vr0, $vr1, $vr0
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 4
+; LA64-NEXT: vmin.wu $vr0, $vr1, $vr0
+; LA64-NEXT: vstelm.w $vr0, $a1, 0, 0
+; LA64-NEXT: ret
%v = load <8 x i32>, ptr %src
%res = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> %v)
store i32 %res, ptr %dst
@@ -62,15 +76,41 @@ define void @vec_reduce_umin_v8i32(ptr %src, ptr %dst) nounwind {
}
define void @vec_reduce_umin_v4i64(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: vec_reduce_umin_v4i64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: xvld $xr0, $a0, 0
-; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT: vmin.du $vr0, $vr0, $vr1
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
-; CHECK-NEXT: vmin.du $vr0, $vr1, $vr0
-; CHECK-NEXT: vstelm.d $vr0, $a1, 0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vec_reduce_umin_v4i64:
+; LA32: # %bb.0:
+; LA32-NEXT: xvld $xr0, $a0, 0
+; LA32-NEXT: xvpermi.q $xr1, $xr0, 1
+; LA32-NEXT: vmin.du $vr0, $vr0, $vr1
+; LA32-NEXT: vpickve2gr.w $a0, $vr0, 3
+; LA32-NEXT: vpickve2gr.w $a2, $vr0, 1
+; LA32-NEXT: sltu $a3, $a2, $a0
+; LA32-NEXT: xor $a4, $a2, $a0
+; LA32-NEXT: sltui $a4, $a4, 1
+; LA32-NEXT: masknez $a3, $a3, $a4
+; LA32-NEXT: vpickve2gr.w $a5, $vr0, 2
+; LA32-NEXT: vpickve2gr.w $a6, $vr0, 0
+; LA32-NEXT: sltu $a7, $a6, $a5
+; LA32-NEXT: maskeqz $a4, $a7, $a4
+; LA32-NEXT: or $a3, $a4, $a3
+; LA32-NEXT: masknez $a4, $a5, $a3
+; LA32-NEXT: maskeqz $a5, $a6, $a3
+; LA32-NEXT: or $a4, $a5, $a4
+; LA32-NEXT: masknez $a0, $a0, $a3
+; LA32-NEXT: maskeqz $a2, $a2, $a3
+; LA32-NEXT: or $a0, $a2, $a0
+; LA32-NEXT: st.w $a0, $a1, 4
+; LA32-NEXT: st.w $a4, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vec_reduce_umin_v4i64:
+; LA64: # %bb.0:
+; LA64-NEXT: xvld $xr0, $a0, 0
+; LA64-NEXT: xvpermi.q $xr1, $xr0, 1
+; LA64-NEXT: vmin.du $vr0, $vr0, $vr1
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 8
+; LA64-NEXT: vmin.du $vr0, $vr1, $vr0
+; LA64-NEXT: vstelm.d $vr0, $a1, 0, 0
+; LA64-NEXT: ret
%v = load <4 x i64>, ptr %src
%res = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> %v)
store i64 %res, ptr %dst
diff --git a/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-xor.ll b/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-xor.ll
index 429fadcdd156e..af1a66b574c03 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-xor.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/vec-reduce-xor.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc --mtriple=loongarch64 --mattr=+lasx %s -o - | FileCheck %s
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx %s -o - | FileCheck %s --check-prefixes=CHECK,LA32
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx %s -o - | FileCheck %s --check-prefixes=CHECK,LA64
define void @vec_reduce_xor_v32i8(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_xor_v32i8:
@@ -44,17 +45,30 @@ define void @vec_reduce_xor_v16i16(ptr %src, ptr %dst) nounwind {
}
define void @vec_reduce_xor_v8i32(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: vec_reduce_xor_v8i32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: xvld $xr0, $a0, 0
-; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
-; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4
-; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0
-; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vec_reduce_xor_v8i32:
+; LA32: # %bb.0:
+; LA32-NEXT: xvld $xr0, $a0, 0
+; LA32-NEXT: xvpermi.q $xr1, $xr0, 1
+; LA32-NEXT: vxor.v $vr0, $vr0, $vr1
+; LA32-NEXT: vbsrl.v $vr1, $vr0, 8
+; LA32-NEXT: vxor.v $vr0, $vr1, $vr0
+; LA32-NEXT: vbsrl.v $vr1, $vr0, 4
+; LA32-NEXT: vxor.v $vr0, $vr1, $vr0
+; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0
+; LA32-NEXT: st.w $a0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vec_reduce_xor_v8i32:
+; LA64: # %bb.0:
+; LA64-NEXT: xvld $xr0, $a0, 0
+; LA64-NEXT: xvpermi.q $xr1, $xr0, 1
+; LA64-NEXT: vxor.v $vr0, $vr0, $vr1
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 8
+; LA64-NEXT: vxor.v $vr0, $vr1, $vr0
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 4
+; LA64-NEXT: vxor.v $vr0, $vr1, $vr0
+; LA64-NEXT: vstelm.w $vr0, $a1, 0, 0
+; LA64-NEXT: ret
%v = load <8 x i32>, ptr %src
%res = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> %v)
store i32 %res, ptr %dst
@@ -62,15 +76,30 @@ define void @vec_reduce_xor_v8i32(ptr %src, ptr %dst) nounwind {
}
define void @vec_reduce_xor_v4i64(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: vec_reduce_xor_v4i64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: xvld $xr0, $a0, 0
-; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
-; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0
-; CHECK-NEXT: vstelm.d $vr0, $a1, 0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vec_reduce_xor_v4i64:
+; LA32: # %bb.0:
+; LA32-NEXT: xvld $xr0, $a0, 0
+; LA32-NEXT: xvpermi.q $xr1, $xr0, 1
+; LA32-NEXT: vxor.v $vr0, $vr0, $vr1
+; LA32-NEXT: vpickve2gr.w $a0, $vr0, 2
+; LA32-NEXT: vpickve2gr.w $a2, $vr0, 0
+; LA32-NEXT: xor $a0, $a2, $a0
+; LA32-NEXT: vpickve2gr.w $a2, $vr0, 3
+; LA32-NEXT: vpickve2gr.w $a3, $vr0, 1
+; LA32-NEXT: xor $a2, $a3, $a2
+; LA32-NEXT: st.w $a2, $a1, 4
+; LA32-NEXT: st.w $a0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vec_reduce_xor_v4i64:
+; LA64: # %bb.0:
+; LA64-NEXT: xvld $xr0, $a0, 0
+; LA64-NEXT: xvpermi.q $xr1, $xr0, 1
+; LA64-NEXT: vxor.v $vr0, $vr0, $vr1
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 8
+; LA64-NEXT: vxor.v $vr0, $vr1, $vr0
+; LA64-NEXT: vstelm.d $vr0, $a1, 0, 0
+; LA64-NEXT: ret
%v = load <4 x i64>, ptr %src
%res = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> %v)
store i64 %res, ptr %dst
diff --git a/llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-byte-rotate.ll b/llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-byte-rotate.ll
index b697a2fd07435..c0fa734034114 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-byte-rotate.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-byte-rotate.ll
@@ -1,4 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s
; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
;; TODO For these special shuffle mask, we can lower it to xvbsll + xvbsrl + xvor.
diff --git a/llvm/test/CodeGen/LoongArch/lasx/xvmskcond.ll b/llvm/test/CodeGen/LoongArch/lasx/xvmskcond.ll
index 06d4a5d03f276..09908f619fa1f 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/xvmskcond.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/xvmskcond.ll
@@ -1,15 +1,25 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc --mtriple=loongarch64 --mattr=+lasx --verify-machineinstrs < %s | FileCheck %s
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx --verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LA32
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx --verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LA64
define i32 @xmsk_eq_allzeros_i8(<32 x i8 > %a) {
-; CHECK-LABEL: xmsk_eq_allzeros_i8:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvmsknz.b $xr0, $xr0
-; CHECK-NEXT: xvnor.v $xr0, $xr0, $xr0
-; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0
-; CHECK-NEXT: xvpickve2gr.wu $a1, $xr0, 4
-; CHECK-NEXT: bstrins.d $a0, $a1, 31, 16
-; CHECK-NEXT: ret
+; LA32-LABEL: xmsk_eq_allzeros_i8:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: xvmsknz.b $xr0, $xr0
+; LA32-NEXT: xvnor.v $xr0, $xr0, $xr0
+; LA32-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; LA32-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; LA32-NEXT: bstrins.w $a0, $a1, 31, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: xmsk_eq_allzeros_i8:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: xvmsknz.b $xr0, $xr0
+; LA64-NEXT: xvnor.v $xr0, $xr0, $xr0
+; LA64-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; LA64-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; LA64-NEXT: bstrins.d $a0, $a1, 31, 16
+; LA64-NEXT: ret
entry:
%1 = icmp eq <32 x i8> %a, splat (i8 0)
%2 = bitcast <32 x i1> %1 to i32
@@ -17,15 +27,25 @@ entry:
}
define i32 @xmsk_sgt_allzeros_i8(<32 x i8 > %a) {
-; CHECK-LABEL: xmsk_sgt_allzeros_i8:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvrepli.b $xr1, 0
-; CHECK-NEXT: xvslt.b $xr0, $xr1, $xr0
-; CHECK-NEXT: xvmskltz.b $xr0, $xr0
-; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0
-; CHECK-NEXT: xvpickve2gr.wu $a1, $xr0, 4
-; CHECK-NEXT: bstrins.d $a0, $a1, 31, 16
-; CHECK-NEXT: ret
+; LA32-LABEL: xmsk_sgt_allzeros_i8:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: xvrepli.b $xr1, 0
+; LA32-NEXT: xvslt.b $xr0, $xr1, $xr0
+; LA32-NEXT: xvmskltz.b $xr0, $xr0
+; LA32-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; LA32-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; LA32-NEXT: bstrins.w $a0, $a1, 31, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: xmsk_sgt_allzeros_i8:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: xvrepli.b $xr1, 0
+; LA64-NEXT: xvslt.b $xr0, $xr1, $xr0
+; LA64-NEXT: xvmskltz.b $xr0, $xr0
+; LA64-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; LA64-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; LA64-NEXT: bstrins.d $a0, $a1, 31, 16
+; LA64-NEXT: ret
entry:
%1 = icmp sgt <32 x i8> %a, splat (i8 0)
%2 = bitcast <32 x i1> %1 to i32
@@ -33,13 +53,21 @@ entry:
}
define i32 @xmsk_sgt_allones_i8(<32 x i8 > %a) {
-; CHECK-LABEL: xmsk_sgt_allones_i8:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvmskgez.b $xr0, $xr0
-; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0
-; CHECK-NEXT: xvpickve2gr.wu $a1, $xr0, 4
-; CHECK-NEXT: bstrins.d $a0, $a1, 31, 16
-; CHECK-NEXT: ret
+; LA32-LABEL: xmsk_sgt_allones_i8:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: xvmskgez.b $xr0, $xr0
+; LA32-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; LA32-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; LA32-NEXT: bstrins.w $a0, $a1, 31, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: xmsk_sgt_allones_i8:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: xvmskgez.b $xr0, $xr0
+; LA64-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; LA64-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; LA64-NEXT: bstrins.d $a0, $a1, 31, 16
+; LA64-NEXT: ret
entry:
%1 = icmp sgt <32 x i8> %a, splat (i8 -1)
%2 = bitcast <32 x i1> %1 to i32
@@ -47,13 +75,21 @@ entry:
}
define i32 @xmsk_sge_allzeros_i8(<32 x i8 > %a) {
-; CHECK-LABEL: xmsk_sge_allzeros_i8:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvmskgez.b $xr0, $xr0
-; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0
-; CHECK-NEXT: xvpickve2gr.wu $a1, $xr0, 4
-; CHECK-NEXT: bstrins.d $a0, $a1, 31, 16
-; CHECK-NEXT: ret
+; LA32-LABEL: xmsk_sge_allzeros_i8:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: xvmskgez.b $xr0, $xr0
+; LA32-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; LA32-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; LA32-NEXT: bstrins.w $a0, $a1, 31, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: xmsk_sge_allzeros_i8:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: xvmskgez.b $xr0, $xr0
+; LA64-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; LA64-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; LA64-NEXT: bstrins.d $a0, $a1, 31, 16
+; LA64-NEXT: ret
entry:
%1 = icmp sge <32 x i8> %a, splat (i8 0)
%2 = bitcast <32 x i1> %1 to i32
@@ -61,13 +97,21 @@ entry:
}
define i32 @xmsk_slt_allzeros_i8(<32 x i8 > %a) {
-; CHECK-LABEL: xmsk_slt_allzeros_i8:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvmskltz.b $xr0, $xr0
-; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0
-; CHECK-NEXT: xvpickve2gr.wu $a1, $xr0, 4
-; CHECK-NEXT: bstrins.d $a0, $a1, 31, 16
-; CHECK-NEXT: ret
+; LA32-LABEL: xmsk_slt_allzeros_i8:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: xvmskltz.b $xr0, $xr0
+; LA32-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; LA32-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; LA32-NEXT: bstrins.w $a0, $a1, 31, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: xmsk_slt_allzeros_i8:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: xvmskltz.b $xr0, $xr0
+; LA64-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; LA64-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; LA64-NEXT: bstrins.d $a0, $a1, 31, 16
+; LA64-NEXT: ret
entry:
%1 = icmp slt <32 x i8> %a, splat (i8 0)
%2 = bitcast <32 x i1> %1 to i32
@@ -75,13 +119,21 @@ entry:
}
define i16 @xmsk_slt_allzeros_i16(<16 x i16 > %a) {
-; CHECK-LABEL: xmsk_slt_allzeros_i16:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvmskltz.h $xr0, $xr0
-; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0
-; CHECK-NEXT: xvpickve2gr.wu $a1, $xr0, 4
-; CHECK-NEXT: bstrins.d $a0, $a1, 15, 8
-; CHECK-NEXT: ret
+; LA32-LABEL: xmsk_slt_allzeros_i16:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: xvmskltz.h $xr0, $xr0
+; LA32-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; LA32-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; LA32-NEXT: bstrins.w $a0, $a1, 15, 8
+; LA32-NEXT: ret
+;
+; LA64-LABEL: xmsk_slt_allzeros_i16:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: xvmskltz.h $xr0, $xr0
+; LA64-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; LA64-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; LA64-NEXT: bstrins.d $a0, $a1, 15, 8
+; LA64-NEXT: ret
entry:
%1 = icmp slt <16 x i16> %a, splat (i16 0)
%2 = bitcast <16 x i1> %1 to i16
@@ -89,13 +141,21 @@ entry:
}
define i8 @xmsk_slt_allzeros_i32(<8 x i32 > %a) {
-; CHECK-LABEL: xmsk_slt_allzeros_i32:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvmskltz.w $xr0, $xr0
-; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0
-; CHECK-NEXT: xvpickve2gr.wu $a1, $xr0, 4
-; CHECK-NEXT: bstrins.d $a0, $a1, 7, 4
-; CHECK-NEXT: ret
+; LA32-LABEL: xmsk_slt_allzeros_i32:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: xvmskltz.w $xr0, $xr0
+; LA32-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; LA32-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; LA32-NEXT: bstrins.w $a0, $a1, 7, 4
+; LA32-NEXT: ret
+;
+; LA64-LABEL: xmsk_slt_allzeros_i32:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: xvmskltz.w $xr0, $xr0
+; LA64-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; LA64-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; LA64-NEXT: bstrins.d $a0, $a1, 7, 4
+; LA64-NEXT: ret
entry:
%1 = icmp slt <8 x i32> %a, splat (i32 0)
%2 = bitcast <8 x i1> %1 to i8
@@ -103,13 +163,21 @@ entry:
}
define i4 @xmsk_slt_allzeros_i64(<4 x i64 > %a) {
-; CHECK-LABEL: xmsk_slt_allzeros_i64:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvmskltz.d $xr0, $xr0
-; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0
-; CHECK-NEXT: xvpickve2gr.wu $a1, $xr0, 4
-; CHECK-NEXT: bstrins.d $a0, $a1, 3, 2
-; CHECK-NEXT: ret
+; LA32-LABEL: xmsk_slt_allzeros_i64:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: xvmskltz.d $xr0, $xr0
+; LA32-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; LA32-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; LA32-NEXT: bstrins.w $a0, $a1, 3, 2
+; LA32-NEXT: ret
+;
+; LA64-LABEL: xmsk_slt_allzeros_i64:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: xvmskltz.d $xr0, $xr0
+; LA64-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; LA64-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; LA64-NEXT: bstrins.d $a0, $a1, 3, 2
+; LA64-NEXT: ret
entry:
%1 = icmp slt <4 x i64> %a, splat (i64 0)
%2 = bitcast <4 x i1> %1 to i4
@@ -117,14 +185,23 @@ entry:
}
define i32 @xmsk_sle_allzeros_i8(<32 x i8 > %a) {
-; CHECK-LABEL: xmsk_sle_allzeros_i8:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvslei.b $xr0, $xr0, 0
-; CHECK-NEXT: xvmskltz.b $xr0, $xr0
-; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0
-; CHECK-NEXT: xvpickve2gr.wu $a1, $xr0, 4
-; CHECK-NEXT: bstrins.d $a0, $a1, 31, 16
-; CHECK-NEXT: ret
+; LA32-LABEL: xmsk_sle_allzeros_i8:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: xvslei.b $xr0, $xr0, 0
+; LA32-NEXT: xvmskltz.b $xr0, $xr0
+; LA32-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; LA32-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; LA32-NEXT: bstrins.w $a0, $a1, 31, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: xmsk_sle_allzeros_i8:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: xvslei.b $xr0, $xr0, 0
+; LA64-NEXT: xvmskltz.b $xr0, $xr0
+; LA64-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; LA64-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; LA64-NEXT: bstrins.d $a0, $a1, 31, 16
+; LA64-NEXT: ret
entry:
%1 = icmp sle <32 x i8> %a, splat (i8 0)
%2 = bitcast <32 x i1> %1 to i32
@@ -132,13 +209,21 @@ entry:
}
define i32 @xmsk_sle_allones_i8(<32 x i8 > %a) {
-; CHECK-LABEL: xmsk_sle_allones_i8:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvmskltz.b $xr0, $xr0
-; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0
-; CHECK-NEXT: xvpickve2gr.wu $a1, $xr0, 4
-; CHECK-NEXT: bstrins.d $a0, $a1, 31, 16
-; CHECK-NEXT: ret
+; LA32-LABEL: xmsk_sle_allones_i8:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: xvmskltz.b $xr0, $xr0
+; LA32-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; LA32-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; LA32-NEXT: bstrins.w $a0, $a1, 31, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: xmsk_sle_allones_i8:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: xvmskltz.b $xr0, $xr0
+; LA64-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; LA64-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; LA64-NEXT: bstrins.d $a0, $a1, 31, 16
+; LA64-NEXT: ret
entry:
%1 = icmp sle <32 x i8> %a, splat (i8 -1)
%2 = bitcast <32 x i1> %1 to i32
@@ -146,13 +231,21 @@ entry:
}
define i16 @xmsk_sle_allones_i32(<16 x i16 > %a) {
-; CHECK-LABEL: xmsk_sle_allones_i32:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvmskltz.h $xr0, $xr0
-; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0
-; CHECK-NEXT: xvpickve2gr.wu $a1, $xr0, 4
-; CHECK-NEXT: bstrins.d $a0, $a1, 15, 8
-; CHECK-NEXT: ret
+; LA32-LABEL: xmsk_sle_allones_i32:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: xvmskltz.h $xr0, $xr0
+; LA32-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; LA32-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; LA32-NEXT: bstrins.w $a0, $a1, 15, 8
+; LA32-NEXT: ret
+;
+; LA64-LABEL: xmsk_sle_allones_i32:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: xvmskltz.h $xr0, $xr0
+; LA64-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; LA64-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; LA64-NEXT: bstrins.d $a0, $a1, 15, 8
+; LA64-NEXT: ret
entry:
%1 = icmp sle <16 x i16> %a, splat (i16 -1)
%2 = bitcast <16 x i1> %1 to i16
@@ -160,13 +253,21 @@ entry:
}
define i8 @xmsk_sle_allones_i16(<8 x i32 > %a) {
-; CHECK-LABEL: xmsk_sle_allones_i16:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvmskltz.w $xr0, $xr0
-; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0
-; CHECK-NEXT: xvpickve2gr.wu $a1, $xr0, 4
-; CHECK-NEXT: bstrins.d $a0, $a1, 7, 4
-; CHECK-NEXT: ret
+; LA32-LABEL: xmsk_sle_allones_i16:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: xvmskltz.w $xr0, $xr0
+; LA32-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; LA32-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; LA32-NEXT: bstrins.w $a0, $a1, 7, 4
+; LA32-NEXT: ret
+;
+; LA64-LABEL: xmsk_sle_allones_i16:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: xvmskltz.w $xr0, $xr0
+; LA64-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; LA64-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; LA64-NEXT: bstrins.d $a0, $a1, 7, 4
+; LA64-NEXT: ret
entry:
%1 = icmp sle <8 x i32> %a, splat (i32 -1)
%2 = bitcast <8 x i1> %1 to i8
@@ -174,13 +275,21 @@ entry:
}
define i4 @xmsk_sle_allones_i64(<4 x i64 > %a) {
-; CHECK-LABEL: xmsk_sle_allones_i64:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvmskltz.d $xr0, $xr0
-; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0
-; CHECK-NEXT: xvpickve2gr.wu $a1, $xr0, 4
-; CHECK-NEXT: bstrins.d $a0, $a1, 3, 2
-; CHECK-NEXT: ret
+; LA32-LABEL: xmsk_sle_allones_i64:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: xvmskltz.d $xr0, $xr0
+; LA32-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; LA32-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; LA32-NEXT: bstrins.w $a0, $a1, 3, 2
+; LA32-NEXT: ret
+;
+; LA64-LABEL: xmsk_sle_allones_i64:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: xvmskltz.d $xr0, $xr0
+; LA64-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; LA64-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; LA64-NEXT: bstrins.d $a0, $a1, 3, 2
+; LA64-NEXT: ret
entry:
%1 = icmp sle <4 x i64> %a, splat (i64 -1)
%2 = bitcast <4 x i1> %1 to i4
@@ -188,13 +297,21 @@ entry:
}
define i32 @xmsk_ne_allzeros_i8(<32 x i8 > %a) {
-; CHECK-LABEL: xmsk_ne_allzeros_i8:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvmsknz.b $xr0, $xr0
-; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0
-; CHECK-NEXT: xvpickve2gr.wu $a1, $xr0, 4
-; CHECK-NEXT: bstrins.d $a0, $a1, 31, 16
-; CHECK-NEXT: ret
+; LA32-LABEL: xmsk_ne_allzeros_i8:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: xvmsknz.b $xr0, $xr0
+; LA32-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; LA32-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; LA32-NEXT: bstrins.w $a0, $a1, 31, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: xmsk_ne_allzeros_i8:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: xvmsknz.b $xr0, $xr0
+; LA64-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; LA64-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; LA64-NEXT: bstrins.d $a0, $a1, 31, 16
+; LA64-NEXT: ret
entry:
%1 = icmp ne <32 x i8> %a, splat (i8 0)
%2 = bitcast <32 x i1> %1 to i32
@@ -202,100 +319,165 @@ entry:
}
define i4 @xvmsk_sgt_v4i64(<4 x i64> %a, <4 x i64> %b) {
-; CHECK-LABEL: xvmsk_sgt_v4i64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: xvslt.d $xr0, $xr1, $xr0
-; CHECK-NEXT: xvmskltz.d $xr0, $xr0
-; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0
-; CHECK-NEXT: xvpickve2gr.wu $a1, $xr0, 4
-; CHECK-NEXT: bstrins.d $a0, $a1, 3, 2
-; CHECK-NEXT: ret
+; LA32-LABEL: xvmsk_sgt_v4i64:
+; LA32: # %bb.0:
+; LA32-NEXT: xvslt.d $xr0, $xr1, $xr0
+; LA32-NEXT: xvmskltz.d $xr0, $xr0
+; LA32-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; LA32-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; LA32-NEXT: bstrins.w $a0, $a1, 3, 2
+; LA32-NEXT: ret
+;
+; LA64-LABEL: xvmsk_sgt_v4i64:
+; LA64: # %bb.0:
+; LA64-NEXT: xvslt.d $xr0, $xr1, $xr0
+; LA64-NEXT: xvmskltz.d $xr0, $xr0
+; LA64-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; LA64-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; LA64-NEXT: bstrins.d $a0, $a1, 3, 2
+; LA64-NEXT: ret
%x = icmp sgt <4 x i64> %a, %b
%res = bitcast <4 x i1> %x to i4
ret i4 %res
}
define i4 @xvmsk_ogt_v4f64(<4 x double> %a, <4 x double> %b) {
-; CHECK-LABEL: xvmsk_ogt_v4f64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: xvfcmp.clt.d $xr0, $xr1, $xr0
-; CHECK-NEXT: xvmskltz.d $xr0, $xr0
-; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0
-; CHECK-NEXT: xvpickve2gr.wu $a1, $xr0, 4
-; CHECK-NEXT: bstrins.d $a0, $a1, 3, 2
-; CHECK-NEXT: ret
+; LA32-LABEL: xvmsk_ogt_v4f64:
+; LA32: # %bb.0:
+; LA32-NEXT: xvfcmp.clt.d $xr0, $xr1, $xr0
+; LA32-NEXT: xvmskltz.d $xr0, $xr0
+; LA32-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; LA32-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; LA32-NEXT: bstrins.w $a0, $a1, 3, 2
+; LA32-NEXT: ret
+;
+; LA64-LABEL: xvmsk_ogt_v4f64:
+; LA64: # %bb.0:
+; LA64-NEXT: xvfcmp.clt.d $xr0, $xr1, $xr0
+; LA64-NEXT: xvmskltz.d $xr0, $xr0
+; LA64-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; LA64-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; LA64-NEXT: bstrins.d $a0, $a1, 3, 2
+; LA64-NEXT: ret
%x = fcmp ogt <4 x double> %a, %b
%res = bitcast <4 x i1> %x to i4
ret i4 %res
}
define i8 @xvmsk_sgt_v8i32(<8 x i32> %a, <8 x i32> %b) {
-; CHECK-LABEL: xvmsk_sgt_v8i32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: xvslt.w $xr0, $xr1, $xr0
-; CHECK-NEXT: xvmskltz.w $xr0, $xr0
-; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0
-; CHECK-NEXT: xvpickve2gr.wu $a1, $xr0, 4
-; CHECK-NEXT: bstrins.d $a0, $a1, 7, 4
-; CHECK-NEXT: ret
+; LA32-LABEL: xvmsk_sgt_v8i32:
+; LA32: # %bb.0:
+; LA32-NEXT: xvslt.w $xr0, $xr1, $xr0
+; LA32-NEXT: xvmskltz.w $xr0, $xr0
+; LA32-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; LA32-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; LA32-NEXT: bstrins.w $a0, $a1, 7, 4
+; LA32-NEXT: ret
+;
+; LA64-LABEL: xvmsk_sgt_v8i32:
+; LA64: # %bb.0:
+; LA64-NEXT: xvslt.w $xr0, $xr1, $xr0
+; LA64-NEXT: xvmskltz.w $xr0, $xr0
+; LA64-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; LA64-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; LA64-NEXT: bstrins.d $a0, $a1, 7, 4
+; LA64-NEXT: ret
%x = icmp sgt <8 x i32> %a, %b
%res = bitcast <8 x i1> %x to i8
ret i8 %res
}
define i8 @xvmsk_ogt_v8f32(<8 x float> %a, <8 x float> %b) {
-; CHECK-LABEL: xvmsk_ogt_v8f32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: xvfcmp.clt.s $xr0, $xr1, $xr0
-; CHECK-NEXT: xvmskltz.w $xr0, $xr0
-; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0
-; CHECK-NEXT: xvpickve2gr.wu $a1, $xr0, 4
-; CHECK-NEXT: bstrins.d $a0, $a1, 7, 4
-; CHECK-NEXT: ret
+; LA32-LABEL: xvmsk_ogt_v8f32:
+; LA32: # %bb.0:
+; LA32-NEXT: xvfcmp.clt.s $xr0, $xr1, $xr0
+; LA32-NEXT: xvmskltz.w $xr0, $xr0
+; LA32-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; LA32-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; LA32-NEXT: bstrins.w $a0, $a1, 7, 4
+; LA32-NEXT: ret
+;
+; LA64-LABEL: xvmsk_ogt_v8f32:
+; LA64: # %bb.0:
+; LA64-NEXT: xvfcmp.clt.s $xr0, $xr1, $xr0
+; LA64-NEXT: xvmskltz.w $xr0, $xr0
+; LA64-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; LA64-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; LA64-NEXT: bstrins.d $a0, $a1, 7, 4
+; LA64-NEXT: ret
%x = fcmp ogt <8 x float> %a, %b
%res = bitcast <8 x i1> %x to i8
ret i8 %res
}
define i16 @xvmsk_sgt_v16i16(<16 x i16> %a, <16 x i16> %b) {
-; CHECK-LABEL: xvmsk_sgt_v16i16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: xvslt.h $xr0, $xr1, $xr0
-; CHECK-NEXT: xvmskltz.h $xr0, $xr0
-; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0
-; CHECK-NEXT: xvpickve2gr.wu $a1, $xr0, 4
-; CHECK-NEXT: bstrins.d $a0, $a1, 15, 8
-; CHECK-NEXT: ret
+; LA32-LABEL: xvmsk_sgt_v16i16:
+; LA32: # %bb.0:
+; LA32-NEXT: xvslt.h $xr0, $xr1, $xr0
+; LA32-NEXT: xvmskltz.h $xr0, $xr0
+; LA32-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; LA32-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; LA32-NEXT: bstrins.w $a0, $a1, 15, 8
+; LA32-NEXT: ret
+;
+; LA64-LABEL: xvmsk_sgt_v16i16:
+; LA64: # %bb.0:
+; LA64-NEXT: xvslt.h $xr0, $xr1, $xr0
+; LA64-NEXT: xvmskltz.h $xr0, $xr0
+; LA64-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; LA64-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; LA64-NEXT: bstrins.d $a0, $a1, 15, 8
+; LA64-NEXT: ret
%x = icmp sgt <16 x i16> %a, %b
%res = bitcast <16 x i1> %x to i16
ret i16 %res
}
define i32 @xvmsk_sgt_v32i8(<32 x i8> %a, <32 x i8> %b) {
-; CHECK-LABEL: xvmsk_sgt_v32i8:
-; CHECK: # %bb.0:
-; CHECK-NEXT: xvslt.b $xr0, $xr1, $xr0
-; CHECK-NEXT: xvmskltz.b $xr0, $xr0
-; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0
-; CHECK-NEXT: xvpickve2gr.wu $a1, $xr0, 4
-; CHECK-NEXT: bstrins.d $a0, $a1, 31, 16
-; CHECK-NEXT: ret
+; LA32-LABEL: xvmsk_sgt_v32i8:
+; LA32: # %bb.0:
+; LA32-NEXT: xvslt.b $xr0, $xr1, $xr0
+; LA32-NEXT: xvmskltz.b $xr0, $xr0
+; LA32-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; LA32-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; LA32-NEXT: bstrins.w $a0, $a1, 31, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: xvmsk_sgt_v32i8:
+; LA64: # %bb.0:
+; LA64-NEXT: xvslt.b $xr0, $xr1, $xr0
+; LA64-NEXT: xvmskltz.b $xr0, $xr0
+; LA64-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; LA64-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; LA64-NEXT: bstrins.d $a0, $a1, 31, 16
+; LA64-NEXT: ret
%x = icmp sgt <32 x i8> %a, %b
%res = bitcast <32 x i1> %x to i32
ret i32 %res
}
define i4 @xvmsk_sgt_and_sgt_v4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c, <4 x i64> %d) {
-; CHECK-LABEL: xvmsk_sgt_and_sgt_v4i64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: xvslt.d $xr2, $xr3, $xr2
-; CHECK-NEXT: xvslt.d $xr0, $xr1, $xr0
-; CHECK-NEXT: xvand.v $xr0, $xr0, $xr2
-; CHECK-NEXT: xvmskltz.d $xr0, $xr0
-; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0
-; CHECK-NEXT: xvpickve2gr.wu $a1, $xr0, 4
-; CHECK-NEXT: bstrins.d $a0, $a1, 3, 2
-; CHECK-NEXT: ret
+; LA32-LABEL: xvmsk_sgt_and_sgt_v4i64:
+; LA32: # %bb.0:
+; LA32-NEXT: xvslt.d $xr2, $xr3, $xr2
+; LA32-NEXT: xvslt.d $xr0, $xr1, $xr0
+; LA32-NEXT: xvand.v $xr0, $xr0, $xr2
+; LA32-NEXT: xvmskltz.d $xr0, $xr0
+; LA32-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; LA32-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; LA32-NEXT: bstrins.w $a0, $a1, 3, 2
+; LA32-NEXT: ret
+;
+; LA64-LABEL: xvmsk_sgt_and_sgt_v4i64:
+; LA64: # %bb.0:
+; LA64-NEXT: xvslt.d $xr2, $xr3, $xr2
+; LA64-NEXT: xvslt.d $xr0, $xr1, $xr0
+; LA64-NEXT: xvand.v $xr0, $xr0, $xr2
+; LA64-NEXT: xvmskltz.d $xr0, $xr0
+; LA64-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; LA64-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; LA64-NEXT: bstrins.d $a0, $a1, 3, 2
+; LA64-NEXT: ret
%x0 = icmp sgt <4 x i64> %a, %b
%x1 = icmp sgt <4 x i64> %c, %d
%y = and <4 x i1> %x0, %x1
@@ -304,16 +486,27 @@ define i4 @xvmsk_sgt_and_sgt_v4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c, <4
}
define i4 @xvmsk_ogt_and_ogt_v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c, <4 x double> %d) {
-; CHECK-LABEL: xvmsk_ogt_and_ogt_v4f64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: xvfcmp.clt.d $xr2, $xr3, $xr2
-; CHECK-NEXT: xvfcmp.clt.d $xr0, $xr1, $xr0
-; CHECK-NEXT: xvand.v $xr0, $xr0, $xr2
-; CHECK-NEXT: xvmskltz.d $xr0, $xr0
-; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0
-; CHECK-NEXT: xvpickve2gr.wu $a1, $xr0, 4
-; CHECK-NEXT: bstrins.d $a0, $a1, 3, 2
-; CHECK-NEXT: ret
+; LA32-LABEL: xvmsk_ogt_and_ogt_v4f64:
+; LA32: # %bb.0:
+; LA32-NEXT: xvfcmp.clt.d $xr2, $xr3, $xr2
+; LA32-NEXT: xvfcmp.clt.d $xr0, $xr1, $xr0
+; LA32-NEXT: xvand.v $xr0, $xr0, $xr2
+; LA32-NEXT: xvmskltz.d $xr0, $xr0
+; LA32-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; LA32-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; LA32-NEXT: bstrins.w $a0, $a1, 3, 2
+; LA32-NEXT: ret
+;
+; LA64-LABEL: xvmsk_ogt_and_ogt_v4f64:
+; LA64: # %bb.0:
+; LA64-NEXT: xvfcmp.clt.d $xr2, $xr3, $xr2
+; LA64-NEXT: xvfcmp.clt.d $xr0, $xr1, $xr0
+; LA64-NEXT: xvand.v $xr0, $xr0, $xr2
+; LA64-NEXT: xvmskltz.d $xr0, $xr0
+; LA64-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; LA64-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; LA64-NEXT: bstrins.d $a0, $a1, 3, 2
+; LA64-NEXT: ret
%x0 = fcmp ogt <4 x double> %a, %b
%x1 = fcmp ogt <4 x double> %c, %d
%y = and <4 x i1> %x0, %x1
@@ -322,16 +515,27 @@ define i4 @xvmsk_ogt_and_ogt_v4f64(<4 x double> %a, <4 x double> %b, <4 x double
}
define i8 @xvmsk_sgt_and_sgt_v8i32(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x i32> %d) {
-; CHECK-LABEL: xvmsk_sgt_and_sgt_v8i32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: xvslt.w $xr2, $xr3, $xr2
-; CHECK-NEXT: xvslt.w $xr0, $xr1, $xr0
-; CHECK-NEXT: xvand.v $xr0, $xr0, $xr2
-; CHECK-NEXT: xvmskltz.w $xr0, $xr0
-; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0
-; CHECK-NEXT: xvpickve2gr.wu $a1, $xr0, 4
-; CHECK-NEXT: bstrins.d $a0, $a1, 7, 4
-; CHECK-NEXT: ret
+; LA32-LABEL: xvmsk_sgt_and_sgt_v8i32:
+; LA32: # %bb.0:
+; LA32-NEXT: xvslt.w $xr2, $xr3, $xr2
+; LA32-NEXT: xvslt.w $xr0, $xr1, $xr0
+; LA32-NEXT: xvand.v $xr0, $xr0, $xr2
+; LA32-NEXT: xvmskltz.w $xr0, $xr0
+; LA32-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; LA32-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; LA32-NEXT: bstrins.w $a0, $a1, 7, 4
+; LA32-NEXT: ret
+;
+; LA64-LABEL: xvmsk_sgt_and_sgt_v8i32:
+; LA64: # %bb.0:
+; LA64-NEXT: xvslt.w $xr2, $xr3, $xr2
+; LA64-NEXT: xvslt.w $xr0, $xr1, $xr0
+; LA64-NEXT: xvand.v $xr0, $xr0, $xr2
+; LA64-NEXT: xvmskltz.w $xr0, $xr0
+; LA64-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; LA64-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; LA64-NEXT: bstrins.d $a0, $a1, 7, 4
+; LA64-NEXT: ret
%x0 = icmp sgt <8 x i32> %a, %b
%x1 = icmp sgt <8 x i32> %c, %d
%y = and <8 x i1> %x0, %x1
@@ -340,16 +544,27 @@ define i8 @xvmsk_sgt_and_sgt_v8i32(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8
}
define i8 @xvmsk_sgt_or_sgt_v8i32(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x i32> %d) {
-; CHECK-LABEL: xvmsk_sgt_or_sgt_v8i32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: xvslt.w $xr2, $xr3, $xr2
-; CHECK-NEXT: xvslt.w $xr0, $xr1, $xr0
-; CHECK-NEXT: xvor.v $xr0, $xr0, $xr2
-; CHECK-NEXT: xvmskltz.w $xr0, $xr0
-; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0
-; CHECK-NEXT: xvpickve2gr.wu $a1, $xr0, 4
-; CHECK-NEXT: bstrins.d $a0, $a1, 7, 4
-; CHECK-NEXT: ret
+; LA32-LABEL: xvmsk_sgt_or_sgt_v8i32:
+; LA32: # %bb.0:
+; LA32-NEXT: xvslt.w $xr2, $xr3, $xr2
+; LA32-NEXT: xvslt.w $xr0, $xr1, $xr0
+; LA32-NEXT: xvor.v $xr0, $xr0, $xr2
+; LA32-NEXT: xvmskltz.w $xr0, $xr0
+; LA32-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; LA32-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; LA32-NEXT: bstrins.w $a0, $a1, 7, 4
+; LA32-NEXT: ret
+;
+; LA64-LABEL: xvmsk_sgt_or_sgt_v8i32:
+; LA64: # %bb.0:
+; LA64-NEXT: xvslt.w $xr2, $xr3, $xr2
+; LA64-NEXT: xvslt.w $xr0, $xr1, $xr0
+; LA64-NEXT: xvor.v $xr0, $xr0, $xr2
+; LA64-NEXT: xvmskltz.w $xr0, $xr0
+; LA64-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; LA64-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; LA64-NEXT: bstrins.d $a0, $a1, 7, 4
+; LA64-NEXT: ret
%x0 = icmp sgt <8 x i32> %a, %b
%x1 = icmp sgt <8 x i32> %c, %d
%y = or <8 x i1> %x0, %x1
@@ -358,18 +573,31 @@ define i8 @xvmsk_sgt_or_sgt_v8i32(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x
}
define i8 @xvmsk_sgt_or_slt_and_eq_v8i32(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x i32> %d, <8 x i32> %e, <8 x i32> %f) {
-; CHECK-LABEL: xvmsk_sgt_or_slt_and_eq_v8i32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: xvslt.w $xr2, $xr2, $xr3
-; CHECK-NEXT: xvslt.w $xr0, $xr1, $xr0
-; CHECK-NEXT: xvor.v $xr0, $xr0, $xr2
-; CHECK-NEXT: xvseq.w $xr1, $xr4, $xr5
-; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
-; CHECK-NEXT: xvmskltz.w $xr0, $xr0
-; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0
-; CHECK-NEXT: xvpickve2gr.wu $a1, $xr0, 4
-; CHECK-NEXT: bstrins.d $a0, $a1, 7, 4
-; CHECK-NEXT: ret
+; LA32-LABEL: xvmsk_sgt_or_slt_and_eq_v8i32:
+; LA32: # %bb.0:
+; LA32-NEXT: xvslt.w $xr2, $xr2, $xr3
+; LA32-NEXT: xvslt.w $xr0, $xr1, $xr0
+; LA32-NEXT: xvor.v $xr0, $xr0, $xr2
+; LA32-NEXT: xvseq.w $xr1, $xr4, $xr5
+; LA32-NEXT: xvand.v $xr0, $xr0, $xr1
+; LA32-NEXT: xvmskltz.w $xr0, $xr0
+; LA32-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; LA32-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; LA32-NEXT: bstrins.w $a0, $a1, 7, 4
+; LA32-NEXT: ret
+;
+; LA64-LABEL: xvmsk_sgt_or_slt_and_eq_v8i32:
+; LA64: # %bb.0:
+; LA64-NEXT: xvslt.w $xr2, $xr2, $xr3
+; LA64-NEXT: xvslt.w $xr0, $xr1, $xr0
+; LA64-NEXT: xvor.v $xr0, $xr0, $xr2
+; LA64-NEXT: xvseq.w $xr1, $xr4, $xr5
+; LA64-NEXT: xvand.v $xr0, $xr0, $xr1
+; LA64-NEXT: xvmskltz.w $xr0, $xr0
+; LA64-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; LA64-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; LA64-NEXT: bstrins.d $a0, $a1, 7, 4
+; LA64-NEXT: ret
%x0 = icmp sgt <8 x i32> %a, %b
%x1 = icmp slt <8 x i32> %c, %d
%x2 = icmp eq <8 x i32> %e, %f
@@ -380,15 +608,25 @@ define i8 @xvmsk_sgt_or_slt_and_eq_v8i32(<8 x i32> %a, <8 x i32> %b, <8 x i32> %
}
define i8 @xvmsk_eq_vsel_slt_v8i32(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2) {
-; CHECK-LABEL: xvmsk_eq_vsel_slt_v8i32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: xvseq.w $xr0, $xr0, $xr1
-; CHECK-NEXT: xvor.v $xr0, $xr0, $xr2
-; CHECK-NEXT: xvmskltz.w $xr0, $xr0
-; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0
-; CHECK-NEXT: xvpickve2gr.wu $a1, $xr0, 4
-; CHECK-NEXT: bstrins.d $a0, $a1, 7, 4
-; CHECK-NEXT: ret
+; LA32-LABEL: xvmsk_eq_vsel_slt_v8i32:
+; LA32: # %bb.0:
+; LA32-NEXT: xvseq.w $xr0, $xr0, $xr1
+; LA32-NEXT: xvor.v $xr0, $xr0, $xr2
+; LA32-NEXT: xvmskltz.w $xr0, $xr0
+; LA32-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; LA32-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; LA32-NEXT: bstrins.w $a0, $a1, 7, 4
+; LA32-NEXT: ret
+;
+; LA64-LABEL: xvmsk_eq_vsel_slt_v8i32:
+; LA64: # %bb.0:
+; LA64-NEXT: xvseq.w $xr0, $xr0, $xr1
+; LA64-NEXT: xvor.v $xr0, $xr0, $xr2
+; LA64-NEXT: xvmskltz.w $xr0, $xr0
+; LA64-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; LA64-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; LA64-NEXT: bstrins.d $a0, $a1, 7, 4
+; LA64-NEXT: ret
%cmp = icmp eq <8 x i32> %a0, %a1
%slt = icmp slt <8 x i32> %a2, zeroinitializer
%sel = select <8 x i1> %cmp, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i1> %slt
@@ -397,22 +635,39 @@ define i8 @xvmsk_eq_vsel_slt_v8i32(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2)
}
define i8 @xvmsk_sel_eq_or_eq_or_slt_v8i32(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2, <8 x i32> %a3, i1 %a4) {
-; CHECK-LABEL: xvmsk_sel_eq_or_eq_or_slt_v8i32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: andi $a0, $a0, 1
-; CHECK-NEXT: xvseq.w $xr2, $xr0, $xr2
-; CHECK-NEXT: addi.d $a1, $zero, -1
-; CHECK-NEXT: maskeqz $a0, $a1, $a0
-; CHECK-NEXT: xvreplgr2vr.w $xr4, $a0
-; CHECK-NEXT: xvand.v $xr2, $xr2, $xr4
-; CHECK-NEXT: xvseq.w $xr0, $xr0, $xr1
-; CHECK-NEXT: xvor.v $xr0, $xr3, $xr0
-; CHECK-NEXT: xvor.v $xr0, $xr0, $xr2
-; CHECK-NEXT: xvmskltz.w $xr0, $xr0
-; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0
-; CHECK-NEXT: xvpickve2gr.wu $a1, $xr0, 4
-; CHECK-NEXT: bstrins.d $a0, $a1, 7, 4
-; CHECK-NEXT: ret
+; LA32-LABEL: xvmsk_sel_eq_or_eq_or_slt_v8i32:
+; LA32: # %bb.0:
+; LA32-NEXT: andi $a0, $a0, 1
+; LA32-NEXT: xvseq.w $xr2, $xr0, $xr2
+; LA32-NEXT: addi.w $a1, $zero, -1
+; LA32-NEXT: maskeqz $a0, $a1, $a0
+; LA32-NEXT: xvreplgr2vr.w $xr4, $a0
+; LA32-NEXT: xvand.v $xr2, $xr2, $xr4
+; LA32-NEXT: xvseq.w $xr0, $xr0, $xr1
+; LA32-NEXT: xvor.v $xr0, $xr3, $xr0
+; LA32-NEXT: xvor.v $xr0, $xr0, $xr2
+; LA32-NEXT: xvmskltz.w $xr0, $xr0
+; LA32-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; LA32-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; LA32-NEXT: bstrins.w $a0, $a1, 7, 4
+; LA32-NEXT: ret
+;
+; LA64-LABEL: xvmsk_sel_eq_or_eq_or_slt_v8i32:
+; LA64: # %bb.0:
+; LA64-NEXT: andi $a0, $a0, 1
+; LA64-NEXT: xvseq.w $xr2, $xr0, $xr2
+; LA64-NEXT: addi.d $a1, $zero, -1
+; LA64-NEXT: maskeqz $a0, $a1, $a0
+; LA64-NEXT: xvreplgr2vr.w $xr4, $a0
+; LA64-NEXT: xvand.v $xr2, $xr2, $xr4
+; LA64-NEXT: xvseq.w $xr0, $xr0, $xr1
+; LA64-NEXT: xvor.v $xr0, $xr3, $xr0
+; LA64-NEXT: xvor.v $xr0, $xr0, $xr2
+; LA64-NEXT: xvmskltz.w $xr0, $xr0
+; LA64-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; LA64-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; LA64-NEXT: bstrins.d $a0, $a1, 7, 4
+; LA64-NEXT: ret
%cmp0 = icmp eq <8 x i32> %a0, %a1
%cmp1 = icmp eq <8 x i32> %a0, %a2
%cmp2 = icmp slt <8 x i32> %a3, zeroinitializer
@@ -424,16 +679,27 @@ define i8 @xvmsk_sel_eq_or_eq_or_slt_v8i32(<8 x i32> %a0, <8 x i32> %a1, <8 x i3
}
define i8 @xvmsk_ogt_and_ogt_v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float> %d) {
-; CHECK-LABEL: xvmsk_ogt_and_ogt_v8f32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: xvfcmp.clt.s $xr2, $xr3, $xr2
-; CHECK-NEXT: xvfcmp.clt.s $xr0, $xr1, $xr0
-; CHECK-NEXT: xvand.v $xr0, $xr0, $xr2
-; CHECK-NEXT: xvmskltz.w $xr0, $xr0
-; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0
-; CHECK-NEXT: xvpickve2gr.wu $a1, $xr0, 4
-; CHECK-NEXT: bstrins.d $a0, $a1, 7, 4
-; CHECK-NEXT: ret
+; LA32-LABEL: xvmsk_ogt_and_ogt_v8f32:
+; LA32: # %bb.0:
+; LA32-NEXT: xvfcmp.clt.s $xr2, $xr3, $xr2
+; LA32-NEXT: xvfcmp.clt.s $xr0, $xr1, $xr0
+; LA32-NEXT: xvand.v $xr0, $xr0, $xr2
+; LA32-NEXT: xvmskltz.w $xr0, $xr0
+; LA32-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; LA32-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; LA32-NEXT: bstrins.w $a0, $a1, 7, 4
+; LA32-NEXT: ret
+;
+; LA64-LABEL: xvmsk_ogt_and_ogt_v8f32:
+; LA64: # %bb.0:
+; LA64-NEXT: xvfcmp.clt.s $xr2, $xr3, $xr2
+; LA64-NEXT: xvfcmp.clt.s $xr0, $xr1, $xr0
+; LA64-NEXT: xvand.v $xr0, $xr0, $xr2
+; LA64-NEXT: xvmskltz.w $xr0, $xr0
+; LA64-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; LA64-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; LA64-NEXT: bstrins.d $a0, $a1, 7, 4
+; LA64-NEXT: ret
%x0 = fcmp ogt <8 x float> %a, %b
%x1 = fcmp ogt <8 x float> %c, %d
%y = and <8 x i1> %x0, %x1
@@ -442,16 +708,27 @@ define i8 @xvmsk_ogt_and_ogt_v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %
}
define i8 @xvmsk_sgt_xor_sgt_v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float> %d) {
-; CHECK-LABEL: xvmsk_sgt_xor_sgt_v8f32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: xvfcmp.clt.s $xr2, $xr3, $xr2
-; CHECK-NEXT: xvfcmp.clt.s $xr0, $xr1, $xr0
-; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr2
-; CHECK-NEXT: xvmskltz.w $xr0, $xr0
-; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0
-; CHECK-NEXT: xvpickve2gr.wu $a1, $xr0, 4
-; CHECK-NEXT: bstrins.d $a0, $a1, 7, 4
-; CHECK-NEXT: ret
+; LA32-LABEL: xvmsk_sgt_xor_sgt_v8f32:
+; LA32: # %bb.0:
+; LA32-NEXT: xvfcmp.clt.s $xr2, $xr3, $xr2
+; LA32-NEXT: xvfcmp.clt.s $xr0, $xr1, $xr0
+; LA32-NEXT: xvxor.v $xr0, $xr0, $xr2
+; LA32-NEXT: xvmskltz.w $xr0, $xr0
+; LA32-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; LA32-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; LA32-NEXT: bstrins.w $a0, $a1, 7, 4
+; LA32-NEXT: ret
+;
+; LA64-LABEL: xvmsk_sgt_xor_sgt_v8f32:
+; LA64: # %bb.0:
+; LA64-NEXT: xvfcmp.clt.s $xr2, $xr3, $xr2
+; LA64-NEXT: xvfcmp.clt.s $xr0, $xr1, $xr0
+; LA64-NEXT: xvxor.v $xr0, $xr0, $xr2
+; LA64-NEXT: xvmskltz.w $xr0, $xr0
+; LA64-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; LA64-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; LA64-NEXT: bstrins.d $a0, $a1, 7, 4
+; LA64-NEXT: ret
%x0 = fcmp ogt <8 x float> %a, %b
%x1 = fcmp ogt <8 x float> %c, %d
%y = xor <8 x i1> %x0, %x1
@@ -460,18 +737,31 @@ define i8 @xvmsk_sgt_xor_sgt_v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %
}
define i8 @xvmsk_ugt_xor_ueq_and_ogt_v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float> %d, <8 x float> %e, <8 x float> %f) {
-; CHECK-LABEL: xvmsk_ugt_xor_ueq_and_ogt_v8f32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: xvfcmp.cueq.s $xr2, $xr2, $xr3
-; CHECK-NEXT: xvfcmp.cult.s $xr0, $xr1, $xr0
-; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr2
-; CHECK-NEXT: xvfcmp.clt.s $xr1, $xr5, $xr4
-; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
-; CHECK-NEXT: xvmskltz.w $xr0, $xr0
-; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0
-; CHECK-NEXT: xvpickve2gr.wu $a1, $xr0, 4
-; CHECK-NEXT: bstrins.d $a0, $a1, 7, 4
-; CHECK-NEXT: ret
+; LA32-LABEL: xvmsk_ugt_xor_ueq_and_ogt_v8f32:
+; LA32: # %bb.0:
+; LA32-NEXT: xvfcmp.cueq.s $xr2, $xr2, $xr3
+; LA32-NEXT: xvfcmp.cult.s $xr0, $xr1, $xr0
+; LA32-NEXT: xvxor.v $xr0, $xr0, $xr2
+; LA32-NEXT: xvfcmp.clt.s $xr1, $xr5, $xr4
+; LA32-NEXT: xvand.v $xr0, $xr0, $xr1
+; LA32-NEXT: xvmskltz.w $xr0, $xr0
+; LA32-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; LA32-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; LA32-NEXT: bstrins.w $a0, $a1, 7, 4
+; LA32-NEXT: ret
+;
+; LA64-LABEL: xvmsk_ugt_xor_ueq_and_ogt_v8f32:
+; LA64: # %bb.0:
+; LA64-NEXT: xvfcmp.cueq.s $xr2, $xr2, $xr3
+; LA64-NEXT: xvfcmp.cult.s $xr0, $xr1, $xr0
+; LA64-NEXT: xvxor.v $xr0, $xr0, $xr2
+; LA64-NEXT: xvfcmp.clt.s $xr1, $xr5, $xr4
+; LA64-NEXT: xvand.v $xr0, $xr0, $xr1
+; LA64-NEXT: xvmskltz.w $xr0, $xr0
+; LA64-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; LA64-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; LA64-NEXT: bstrins.d $a0, $a1, 7, 4
+; LA64-NEXT: ret
%x0 = fcmp ugt <8 x float> %a, %b
%x1 = fcmp ueq <8 x float> %c, %d
%x2 = fcmp ogt <8 x float> %e, %f
@@ -482,16 +772,27 @@ define i8 @xvmsk_ugt_xor_ueq_and_ogt_v8f32(<8 x float> %a, <8 x float> %b, <8 x
}
define i16 @xvmsk_sgt_and_sgt_v16i16(<16 x i16> %a, <16 x i16> %b, <16 x i16> %c, <16 x i16> %d) {
-; CHECK-LABEL: xvmsk_sgt_and_sgt_v16i16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: xvslt.h $xr2, $xr3, $xr2
-; CHECK-NEXT: xvslt.h $xr0, $xr1, $xr0
-; CHECK-NEXT: xvand.v $xr0, $xr0, $xr2
-; CHECK-NEXT: xvmskltz.h $xr0, $xr0
-; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0
-; CHECK-NEXT: xvpickve2gr.wu $a1, $xr0, 4
-; CHECK-NEXT: bstrins.d $a0, $a1, 15, 8
-; CHECK-NEXT: ret
+; LA32-LABEL: xvmsk_sgt_and_sgt_v16i16:
+; LA32: # %bb.0:
+; LA32-NEXT: xvslt.h $xr2, $xr3, $xr2
+; LA32-NEXT: xvslt.h $xr0, $xr1, $xr0
+; LA32-NEXT: xvand.v $xr0, $xr0, $xr2
+; LA32-NEXT: xvmskltz.h $xr0, $xr0
+; LA32-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; LA32-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; LA32-NEXT: bstrins.w $a0, $a1, 15, 8
+; LA32-NEXT: ret
+;
+; LA64-LABEL: xvmsk_sgt_and_sgt_v16i16:
+; LA64: # %bb.0:
+; LA64-NEXT: xvslt.h $xr2, $xr3, $xr2
+; LA64-NEXT: xvslt.h $xr0, $xr1, $xr0
+; LA64-NEXT: xvand.v $xr0, $xr0, $xr2
+; LA64-NEXT: xvmskltz.h $xr0, $xr0
+; LA64-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; LA64-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; LA64-NEXT: bstrins.d $a0, $a1, 15, 8
+; LA64-NEXT: ret
%x0 = icmp sgt <16 x i16> %a, %b
%x1 = icmp sgt <16 x i16> %c, %d
%y = and <16 x i1> %x0, %x1
@@ -500,16 +801,27 @@ define i16 @xvmsk_sgt_and_sgt_v16i16(<16 x i16> %a, <16 x i16> %b, <16 x i16> %c
}
define i32 @xvmsk_sgt_and_sgt_v32i8(<32 x i8> %a, <32 x i8> %b, <32 x i8> %c, <32 x i8> %d) {
-; CHECK-LABEL: xvmsk_sgt_and_sgt_v32i8:
-; CHECK: # %bb.0:
-; CHECK-NEXT: xvslt.b $xr0, $xr1, $xr0
-; CHECK-NEXT: xvslt.b $xr1, $xr3, $xr2
-; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
-; CHECK-NEXT: xvmskltz.b $xr0, $xr0
-; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0
-; CHECK-NEXT: xvpickve2gr.wu $a1, $xr0, 4
-; CHECK-NEXT: bstrins.d $a0, $a1, 31, 16
-; CHECK-NEXT: ret
+; LA32-LABEL: xvmsk_sgt_and_sgt_v32i8:
+; LA32: # %bb.0:
+; LA32-NEXT: xvslt.b $xr0, $xr1, $xr0
+; LA32-NEXT: xvslt.b $xr1, $xr3, $xr2
+; LA32-NEXT: xvand.v $xr0, $xr0, $xr1
+; LA32-NEXT: xvmskltz.b $xr0, $xr0
+; LA32-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; LA32-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; LA32-NEXT: bstrins.w $a0, $a1, 31, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: xvmsk_sgt_and_sgt_v32i8:
+; LA64: # %bb.0:
+; LA64-NEXT: xvslt.b $xr0, $xr1, $xr0
+; LA64-NEXT: xvslt.b $xr1, $xr3, $xr2
+; LA64-NEXT: xvand.v $xr0, $xr0, $xr1
+; LA64-NEXT: xvmskltz.b $xr0, $xr0
+; LA64-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; LA64-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; LA64-NEXT: bstrins.d $a0, $a1, 31, 16
+; LA64-NEXT: ret
%x0 = icmp sgt <32 x i8> %a, %b
%x1 = icmp sgt <32 x i8> %c, %d
%y = and <32 x i1> %x0, %x1
@@ -518,17 +830,29 @@ define i32 @xvmsk_sgt_and_sgt_v32i8(<32 x i8> %a, <32 x i8> %b, <32 x i8> %c, <3
}
define i8 @xvmsk_eq_v2i64_concat_poison(<2 x i64> %vec) {
-; CHECK-LABEL: xvmsk_eq_v2i64_concat_poison:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vseqi.d $vr0, $vr0, 0
-; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 0
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 0
-; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 1
-; CHECK-NEXT: vslli.h $vr0, $vr1, 15
-; CHECK-NEXT: vmskltz.h $vr0, $vr0
-; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: xvmsk_eq_v2i64_concat_poison:
+; LA32: # %bb.0:
+; LA32-NEXT: vseqi.d $vr0, $vr0, 0
+; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0
+; LA32-NEXT: vinsgr2vr.h $vr1, $a0, 0
+; LA32-NEXT: vpickve2gr.w $a0, $vr0, 2
+; LA32-NEXT: vinsgr2vr.h $vr1, $a0, 1
+; LA32-NEXT: vslli.h $vr0, $vr1, 15
+; LA32-NEXT: vmskltz.h $vr0, $vr0
+; LA32-NEXT: vpickve2gr.hu $a0, $vr0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: xvmsk_eq_v2i64_concat_poison:
+; LA64: # %bb.0:
+; LA64-NEXT: vseqi.d $vr0, $vr0, 0
+; LA64-NEXT: vpickve2gr.d $a0, $vr0, 0
+; LA64-NEXT: vinsgr2vr.h $vr1, $a0, 0
+; LA64-NEXT: vpickve2gr.d $a0, $vr0, 1
+; LA64-NEXT: vinsgr2vr.h $vr1, $a0, 1
+; LA64-NEXT: vslli.h $vr0, $vr1, 15
+; LA64-NEXT: vmskltz.h $vr0, $vr0
+; LA64-NEXT: vpickve2gr.hu $a0, $vr0, 0
+; LA64-NEXT: ret
%tobool = icmp eq <2 x i64> %vec, zeroinitializer
%insertvec = shufflevector <2 x i1> %tobool, <2 x i1> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
%res = bitcast <8 x i1> %insertvec to i8
@@ -560,22 +884,39 @@ define i8 @xvmsk_ne_v4i32_concat_poison(<4 x i32> %vec) {
}
define i8 @xvmsk_ogt_v4f64_concat_poison(<4 x double> %vec) {
-; CHECK-LABEL: xvmsk_ogt_v4f64_concat_poison:
-; CHECK: # %bb.0:
-; CHECK-NEXT: xvrepli.b $xr1, 0
-; CHECK-NEXT: xvfcmp.clt.d $xr0, $xr1, $xr0
-; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 3
-; CHECK-NEXT: xvpickve2gr.d $a1, $xr0, 2
-; CHECK-NEXT: xvpickve2gr.d $a2, $xr0, 1
-; CHECK-NEXT: xvpickve2gr.d $a3, $xr0, 0
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a3, 0
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a2, 1
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 2
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 3
-; CHECK-NEXT: vslli.h $vr0, $vr0, 15
-; CHECK-NEXT: vmskltz.h $vr0, $vr0
-; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: xvmsk_ogt_v4f64_concat_poison:
+; LA32: # %bb.0:
+; LA32-NEXT: xvrepli.b $xr1, 0
+; LA32-NEXT: xvfcmp.clt.d $xr0, $xr1, $xr0
+; LA32-NEXT: xvpickve2gr.w $a0, $xr0, 6
+; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 4
+; LA32-NEXT: xvpickve2gr.w $a2, $xr0, 2
+; LA32-NEXT: xvpickve2gr.w $a3, $xr0, 0
+; LA32-NEXT: vinsgr2vr.h $vr0, $a3, 0
+; LA32-NEXT: vinsgr2vr.h $vr0, $a2, 1
+; LA32-NEXT: vinsgr2vr.h $vr0, $a1, 2
+; LA32-NEXT: vinsgr2vr.h $vr0, $a0, 3
+; LA32-NEXT: vslli.h $vr0, $vr0, 15
+; LA32-NEXT: vmskltz.h $vr0, $vr0
+; LA32-NEXT: vpickve2gr.hu $a0, $vr0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: xvmsk_ogt_v4f64_concat_poison:
+; LA64: # %bb.0:
+; LA64-NEXT: xvrepli.b $xr1, 0
+; LA64-NEXT: xvfcmp.clt.d $xr0, $xr1, $xr0
+; LA64-NEXT: xvpickve2gr.d $a0, $xr0, 3
+; LA64-NEXT: xvpickve2gr.d $a1, $xr0, 2
+; LA64-NEXT: xvpickve2gr.d $a2, $xr0, 1
+; LA64-NEXT: xvpickve2gr.d $a3, $xr0, 0
+; LA64-NEXT: vinsgr2vr.h $vr0, $a3, 0
+; LA64-NEXT: vinsgr2vr.h $vr0, $a2, 1
+; LA64-NEXT: vinsgr2vr.h $vr0, $a1, 2
+; LA64-NEXT: vinsgr2vr.h $vr0, $a0, 3
+; LA64-NEXT: vslli.h $vr0, $vr0, 15
+; LA64-NEXT: vmskltz.h $vr0, $vr0
+; LA64-NEXT: vpickve2gr.hu $a0, $vr0, 0
+; LA64-NEXT: ret
%tobool = fcmp ogt <4 x double> %vec, zeroinitializer
%insertvec = shufflevector <4 x i1> %tobool, <4 x i1> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
%res = bitcast <8 x i1> %insertvec to i8
@@ -583,56 +924,92 @@ define i8 @xvmsk_ogt_v4f64_concat_poison(<4 x double> %vec) {
}
define i32 @xvmsk_trunc_i8(<32 x i8> %a) {
-; CHECK-LABEL: xvmsk_trunc_i8:
-; CHECK: # %bb.0:
-; CHECK-NEXT: xvslli.b $xr0, $xr0, 7
-; CHECK-NEXT: xvmskltz.b $xr0, $xr0
-; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0
-; CHECK-NEXT: xvpickve2gr.wu $a1, $xr0, 4
-; CHECK-NEXT: bstrins.d $a0, $a1, 31, 16
-; CHECK-NEXT: ret
+; LA32-LABEL: xvmsk_trunc_i8:
+; LA32: # %bb.0:
+; LA32-NEXT: xvslli.b $xr0, $xr0, 7
+; LA32-NEXT: xvmskltz.b $xr0, $xr0
+; LA32-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; LA32-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; LA32-NEXT: bstrins.w $a0, $a1, 31, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: xvmsk_trunc_i8:
+; LA64: # %bb.0:
+; LA64-NEXT: xvslli.b $xr0, $xr0, 7
+; LA64-NEXT: xvmskltz.b $xr0, $xr0
+; LA64-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; LA64-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; LA64-NEXT: bstrins.d $a0, $a1, 31, 16
+; LA64-NEXT: ret
%y = trunc <32 x i8> %a to <32 x i1>
%res = bitcast <32 x i1> %y to i32
ret i32 %res
}
define i16 @xvmsk_trunc_i16(<16 x i16> %a) {
-; CHECK-LABEL: xvmsk_trunc_i16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: xvslli.h $xr0, $xr0, 15
-; CHECK-NEXT: xvmskltz.h $xr0, $xr0
-; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0
-; CHECK-NEXT: xvpickve2gr.wu $a1, $xr0, 4
-; CHECK-NEXT: bstrins.d $a0, $a1, 15, 8
-; CHECK-NEXT: ret
+; LA32-LABEL: xvmsk_trunc_i16:
+; LA32: # %bb.0:
+; LA32-NEXT: xvslli.h $xr0, $xr0, 15
+; LA32-NEXT: xvmskltz.h $xr0, $xr0
+; LA32-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; LA32-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; LA32-NEXT: bstrins.w $a0, $a1, 15, 8
+; LA32-NEXT: ret
+;
+; LA64-LABEL: xvmsk_trunc_i16:
+; LA64: # %bb.0:
+; LA64-NEXT: xvslli.h $xr0, $xr0, 15
+; LA64-NEXT: xvmskltz.h $xr0, $xr0
+; LA64-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; LA64-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; LA64-NEXT: bstrins.d $a0, $a1, 15, 8
+; LA64-NEXT: ret
%y = trunc <16 x i16> %a to <16 x i1>
%res = bitcast <16 x i1> %y to i16
ret i16 %res
}
define i8 @xvmsk_trunc_i32(<8 x i32> %a) {
-; CHECK-LABEL: xvmsk_trunc_i32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: xvslli.w $xr0, $xr0, 31
-; CHECK-NEXT: xvmskltz.w $xr0, $xr0
-; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0
-; CHECK-NEXT: xvpickve2gr.wu $a1, $xr0, 4
-; CHECK-NEXT: bstrins.d $a0, $a1, 7, 4
-; CHECK-NEXT: ret
+; LA32-LABEL: xvmsk_trunc_i32:
+; LA32: # %bb.0:
+; LA32-NEXT: xvslli.w $xr0, $xr0, 31
+; LA32-NEXT: xvmskltz.w $xr0, $xr0
+; LA32-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; LA32-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; LA32-NEXT: bstrins.w $a0, $a1, 7, 4
+; LA32-NEXT: ret
+;
+; LA64-LABEL: xvmsk_trunc_i32:
+; LA64: # %bb.0:
+; LA64-NEXT: xvslli.w $xr0, $xr0, 31
+; LA64-NEXT: xvmskltz.w $xr0, $xr0
+; LA64-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; LA64-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; LA64-NEXT: bstrins.d $a0, $a1, 7, 4
+; LA64-NEXT: ret
%y = trunc <8 x i32> %a to <8 x i1>
%res = bitcast <8 x i1> %y to i8
ret i8 %res
}
define i4 @xvmsk_trunc_i64(<4 x i64> %a) {
-; CHECK-LABEL: xvmsk_trunc_i64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: xvslli.d $xr0, $xr0, 63
-; CHECK-NEXT: xvmskltz.d $xr0, $xr0
-; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0
-; CHECK-NEXT: xvpickve2gr.wu $a1, $xr0, 4
-; CHECK-NEXT: bstrins.d $a0, $a1, 3, 2
-; CHECK-NEXT: ret
+; LA32-LABEL: xvmsk_trunc_i64:
+; LA32: # %bb.0:
+; LA32-NEXT: xvslli.d $xr0, $xr0, 63
+; LA32-NEXT: xvmskltz.d $xr0, $xr0
+; LA32-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; LA32-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; LA32-NEXT: bstrins.w $a0, $a1, 3, 2
+; LA32-NEXT: ret
+;
+; LA64-LABEL: xvmsk_trunc_i64:
+; LA64: # %bb.0:
+; LA64-NEXT: xvslli.d $xr0, $xr0, 63
+; LA64-NEXT: xvmskltz.d $xr0, $xr0
+; LA64-NEXT: xvpickve2gr.wu $a0, $xr0, 0
+; LA64-NEXT: xvpickve2gr.wu $a1, $xr0, 4
+; LA64-NEXT: bstrins.d $a0, $a1, 3, 2
+; LA64-NEXT: ret
%y = trunc <4 x i64> %a to <4 x i1>
%res = bitcast <4 x i1> %y to i4
ret i4 %res
diff --git a/llvm/test/CodeGen/LoongArch/lsx/bitreverse.ll b/llvm/test/CodeGen/LoongArch/lsx/bitreverse.ll
index 4c17d3fd8d7b2..b0d36a8143fa1 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/bitreverse.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/bitreverse.ll
@@ -1,20 +1,39 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc --mtriple=loongarch32 -mattr=+32s,+lsx --verify-machineinstrs < %s \
+; RUN: | FileCheck %s --check-prefix=LA32
; RUN: llc --mtriple=loongarch64 -mattr=+lsx --verify-machineinstrs < %s \
-; RUN: | FileCheck %s
+; RUN: | FileCheck %s --check-prefix=LA64
declare <16 x i8> @llvm.bitreverse.v16i8(<16 x i8>)
define <16 x i8> @test_bitreverse_v16i8(<16 x i8> %a) nounwind {
-; CHECK-LABEL: test_bitreverse_v16i8:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 0
-; CHECK-NEXT: bitrev.8b $a0, $a0
-; CHECK-NEXT: vinsgr2vr.d $vr1, $a0, 0
-; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 1
-; CHECK-NEXT: bitrev.8b $a0, $a0
-; CHECK-NEXT: vinsgr2vr.d $vr1, $a0, 1
-; CHECK-NEXT: vori.b $vr0, $vr1, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: test_bitreverse_v16i8:
+; LA32: # %bb.0:
+; LA32-NEXT: vslli.b $vr1, $vr0, 4
+; LA32-NEXT: vsrli.b $vr0, $vr0, 4
+; LA32-NEXT: vor.v $vr0, $vr0, $vr1
+; LA32-NEXT: vandi.b $vr1, $vr0, 51
+; LA32-NEXT: vslli.b $vr1, $vr1, 2
+; LA32-NEXT: vsrli.b $vr0, $vr0, 2
+; LA32-NEXT: vandi.b $vr0, $vr0, 51
+; LA32-NEXT: vor.v $vr0, $vr0, $vr1
+; LA32-NEXT: vandi.b $vr1, $vr0, 85
+; LA32-NEXT: vslli.b $vr1, $vr1, 1
+; LA32-NEXT: vsrli.b $vr0, $vr0, 1
+; LA32-NEXT: vandi.b $vr0, $vr0, 85
+; LA32-NEXT: vor.v $vr0, $vr0, $vr1
+; LA32-NEXT: ret
+;
+; LA64-LABEL: test_bitreverse_v16i8:
+; LA64: # %bb.0:
+; LA64-NEXT: vpickve2gr.d $a0, $vr0, 0
+; LA64-NEXT: bitrev.8b $a0, $a0
+; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 0
+; LA64-NEXT: vpickve2gr.d $a0, $vr0, 1
+; LA64-NEXT: bitrev.8b $a0, $a0
+; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 1
+; LA64-NEXT: vori.b $vr0, $vr1, 0
+; LA64-NEXT: ret
%b = call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> %a)
ret <16 x i8> %b
}
@@ -22,16 +41,33 @@ define <16 x i8> @test_bitreverse_v16i8(<16 x i8> %a) nounwind {
declare <8 x i16> @llvm.bitreverse.v8i16(<8 x i16>)
define <8 x i16> @test_bitreverse_v8i16(<8 x i16> %a) nounwind {
-; CHECK-LABEL: test_bitreverse_v8i16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 0
-; CHECK-NEXT: bitrev.d $a0, $a0
-; CHECK-NEXT: vinsgr2vr.d $vr1, $a0, 0
-; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 1
-; CHECK-NEXT: bitrev.d $a0, $a0
-; CHECK-NEXT: vinsgr2vr.d $vr1, $a0, 1
-; CHECK-NEXT: vshuf4i.h $vr0, $vr1, 27
-; CHECK-NEXT: ret
+; LA32-LABEL: test_bitreverse_v8i16:
+; LA32: # %bb.0:
+; LA32-NEXT: vpickve2gr.w $a0, $vr0, 1
+; LA32-NEXT: bitrev.w $a0, $a0
+; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 0
+; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0
+; LA32-NEXT: bitrev.w $a0, $a0
+; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 1
+; LA32-NEXT: vpickve2gr.w $a0, $vr0, 3
+; LA32-NEXT: bitrev.w $a0, $a0
+; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 2
+; LA32-NEXT: vpickve2gr.w $a0, $vr0, 2
+; LA32-NEXT: bitrev.w $a0, $a0
+; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 3
+; LA32-NEXT: vshuf4i.h $vr0, $vr1, 27
+; LA32-NEXT: ret
+;
+; LA64-LABEL: test_bitreverse_v8i16:
+; LA64: # %bb.0:
+; LA64-NEXT: vpickve2gr.d $a0, $vr0, 0
+; LA64-NEXT: bitrev.d $a0, $a0
+; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 0
+; LA64-NEXT: vpickve2gr.d $a0, $vr0, 1
+; LA64-NEXT: bitrev.d $a0, $a0
+; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 1
+; LA64-NEXT: vshuf4i.h $vr0, $vr1, 27
+; LA64-NEXT: ret
%b = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> %a)
ret <8 x i16> %b
}
@@ -39,16 +75,33 @@ define <8 x i16> @test_bitreverse_v8i16(<8 x i16> %a) nounwind {
declare <4 x i32> @llvm.bitreverse.v4i32(<4 x i32>)
define <4 x i32> @test_bitreverse_v4i32(<4 x i32> %a) nounwind {
-; CHECK-LABEL: test_bitreverse_v4i32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 0
-; CHECK-NEXT: bitrev.d $a0, $a0
-; CHECK-NEXT: vinsgr2vr.d $vr1, $a0, 0
-; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 1
-; CHECK-NEXT: bitrev.d $a0, $a0
-; CHECK-NEXT: vinsgr2vr.d $vr1, $a0, 1
-; CHECK-NEXT: vshuf4i.w $vr0, $vr1, 177
-; CHECK-NEXT: ret
+; LA32-LABEL: test_bitreverse_v4i32:
+; LA32: # %bb.0:
+; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0
+; LA32-NEXT: bitrev.w $a0, $a0
+; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 0
+; LA32-NEXT: vpickve2gr.w $a0, $vr0, 1
+; LA32-NEXT: bitrev.w $a0, $a0
+; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 1
+; LA32-NEXT: vpickve2gr.w $a0, $vr0, 2
+; LA32-NEXT: bitrev.w $a0, $a0
+; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 2
+; LA32-NEXT: vpickve2gr.w $a0, $vr0, 3
+; LA32-NEXT: bitrev.w $a0, $a0
+; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 3
+; LA32-NEXT: vori.b $vr0, $vr1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: test_bitreverse_v4i32:
+; LA64: # %bb.0:
+; LA64-NEXT: vpickve2gr.d $a0, $vr0, 0
+; LA64-NEXT: bitrev.d $a0, $a0
+; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 0
+; LA64-NEXT: vpickve2gr.d $a0, $vr0, 1
+; LA64-NEXT: bitrev.d $a0, $a0
+; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 1
+; LA64-NEXT: vshuf4i.w $vr0, $vr1, 177
+; LA64-NEXT: ret
%b = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> %a)
ret <4 x i32> %b
}
@@ -56,16 +109,36 @@ define <4 x i32> @test_bitreverse_v4i32(<4 x i32> %a) nounwind {
declare <2 x i64> @llvm.bitreverse.v2i64(<2 x i64>)
define <2 x i64> @test_bitreverse_v2i64(<2 x i64> %a) nounwind {
-; CHECK-LABEL: test_bitreverse_v2i64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 0
-; CHECK-NEXT: bitrev.d $a0, $a0
-; CHECK-NEXT: vinsgr2vr.d $vr1, $a0, 0
-; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 1
-; CHECK-NEXT: bitrev.d $a0, $a0
-; CHECK-NEXT: vinsgr2vr.d $vr1, $a0, 1
-; CHECK-NEXT: vori.b $vr0, $vr1, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: test_bitreverse_v2i64:
+; LA32: # %bb.0:
+; LA32-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0)
+; LA32-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI3_0)
+; LA32-NEXT: vshuf.b $vr0, $vr0, $vr0, $vr1
+; LA32-NEXT: vslli.b $vr1, $vr0, 4
+; LA32-NEXT: vsrli.b $vr0, $vr0, 4
+; LA32-NEXT: vor.v $vr0, $vr0, $vr1
+; LA32-NEXT: vandi.b $vr1, $vr0, 51
+; LA32-NEXT: vslli.b $vr1, $vr1, 2
+; LA32-NEXT: vsrli.b $vr0, $vr0, 2
+; LA32-NEXT: vandi.b $vr0, $vr0, 51
+; LA32-NEXT: vor.v $vr0, $vr0, $vr1
+; LA32-NEXT: vandi.b $vr1, $vr0, 85
+; LA32-NEXT: vslli.b $vr1, $vr1, 1
+; LA32-NEXT: vsrli.b $vr0, $vr0, 1
+; LA32-NEXT: vandi.b $vr0, $vr0, 85
+; LA32-NEXT: vor.v $vr0, $vr0, $vr1
+; LA32-NEXT: ret
+;
+; LA64-LABEL: test_bitreverse_v2i64:
+; LA64: # %bb.0:
+; LA64-NEXT: vpickve2gr.d $a0, $vr0, 0
+; LA64-NEXT: bitrev.d $a0, $a0
+; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 0
+; LA64-NEXT: vpickve2gr.d $a0, $vr0, 1
+; LA64-NEXT: bitrev.d $a0, $a0
+; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 1
+; LA64-NEXT: vori.b $vr0, $vr1, 0
+; LA64-NEXT: ret
%b = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> %a)
ret <2 x i64> %b
}
diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcmp.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcmp.ll
index 669c53b73b16f..92981211adeb8 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcmp.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcmp.ll
@@ -1,4 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s
; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s
declare <4 x i32> @llvm.loongarch.lsx.vfcmp.caf.s(<4 x float>, <4 x float>)
diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frecipe.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frecipe.ll
index 1b7a97d9f9720..324098b918890 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frecipe.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frecipe.ll
@@ -1,4 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx,+frecipe < %s | FileCheck %s
; RUN: llc --mtriple=loongarch64 --mattr=+lsx,+frecipe < %s | FileCheck %s
declare <4 x float> @llvm.loongarch.lsx.vfrecipe.s(<4 x float>)
diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frsqrte.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frsqrte.ll
index 3cd6c78e87d78..ad46b47c82c86 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frsqrte.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frsqrte.ll
@@ -1,4 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx,+frecipe < %s | FileCheck %s
; RUN: llc --mtriple=loongarch64 --mattr=+lsx,+frecipe < %s | FileCheck %s
declare <4 x float> @llvm.loongarch.lsx.vfrsqrte.s(<4 x float>)
diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-max-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-max-invalid-imm.ll
index 667ba32723fc4..2ecbe685ff20b 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-max-invalid-imm.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-max-invalid-imm.ll
@@ -1,3 +1,4 @@
+; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s
; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s
declare <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8>, i32)
diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-min-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-min-invalid-imm.ll
index b73bada4f06fb..f4348f57442e6 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-min-invalid-imm.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-min-invalid-imm.ll
@@ -1,3 +1,4 @@
+; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s
; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s
declare <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8>, i32)
diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr-d-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr-d-invalid-imm.ll
new file mode 100644
index 0000000000000..4dc5163e721ce
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr-d-invalid-imm.ll
@@ -0,0 +1,33 @@
+; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s
+
+declare i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64>, i32)
+
+define i64 @lsx_vpickve2gr_d_lo(<2 x i64> %va) nounwind {
+; CHECK: llvm.loongarch.lsx.vpickve2gr.d: argument out of range
+entry:
+ %res = call i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64> %va, i32 -1)
+ ret i64 %res
+}
+
+define i64 @lsx_vpickve2gr_d_hi(<2 x i64> %va) nounwind {
+; CHECK: llvm.loongarch.lsx.vpickve2gr.d: argument out of range
+entry:
+ %res = call i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64> %va, i32 2)
+ ret i64 %res
+}
+
+declare i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64>, i32)
+
+define i64 @lsx_vpickve2gr_du_lo(<2 x i64> %va) nounwind {
+; CHECK: llvm.loongarch.lsx.vpickve2gr.du: argument out of range
+entry:
+ %res = call i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64> %va, i32 -1)
+ ret i64 %res
+}
+
+define i64 @lsx_vpickve2gr_du_hi(<2 x i64> %va) nounwind {
+; CHECK: llvm.loongarch.lsx.vpickve2gr.du: argument out of range
+entry:
+ %res = call i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64> %va, i32 2)
+ ret i64 %res
+}
diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr-d.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr-d.ll
new file mode 100644
index 0000000000000..78f4e3c1bc18b
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr-d.ll
@@ -0,0 +1,26 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s
+
+declare i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64>, i32)
+
+define i64 @lsx_vpickve2gr_d(<2 x i64> %va) nounwind {
+; CHECK-LABEL: lsx_vpickve2gr_d:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 1
+; CHECK-NEXT: ret
+entry:
+ %res = call i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64> %va, i32 1)
+ ret i64 %res
+}
+
+declare i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64>, i32)
+
+define i64 @lsx_vpickve2gr_du(<2 x i64> %va) nounwind {
+; CHECK-LABEL: lsx_vpickve2gr_du:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vpickve2gr.du $a0, $vr0, 1
+; CHECK-NEXT: ret
+entry:
+ %res = call i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64> %va, i32 1)
+ ret i64 %res
+}
diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr-invalid-imm.ll
index 3430c54d21941..492b97c8316c1 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr-invalid-imm.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr-invalid-imm.ll
@@ -1,3 +1,4 @@
+; RUN: not llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s 2>&1 | FileCheck %s
; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s
declare i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8>, i32)
@@ -48,22 +49,6 @@ entry:
ret i32 %res
}
-declare i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64>, i32)
-
-define i64 @lsx_vpickve2gr_d_lo(<2 x i64> %va) nounwind {
-; CHECK: llvm.loongarch.lsx.vpickve2gr.d: argument out of range
-entry:
- %res = call i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64> %va, i32 -1)
- ret i64 %res
-}
-
-define i64 @lsx_vpickve2gr_d_hi(<2 x i64> %va) nounwind {
-; CHECK: llvm.loongarch.lsx.vpickve2gr.d: argument out of range
-entry:
- %res = call i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64> %va, i32 2)
- ret i64 %res
-}
-
declare i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8>, i32)
define i32 @lsx_vpickve2gr_bu_lo(<16 x i8> %va) nounwind {
@@ -111,19 +96,3 @@ entry:
%res = call i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32> %va, i32 4)
ret i32 %res
}
-
-declare i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64>, i32)
-
-define i64 @lsx_vpickve2gr_du_lo(<2 x i64> %va) nounwind {
-; CHECK: llvm.loongarch.lsx.vpickve2gr.du: argument out of range
-entry:
- %res = call i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64> %va, i32 -1)
- ret i64 %res
-}
-
-define i64 @lsx_vpickve2gr_du_hi(<2 x i64> %va) nounwind {
-; CHECK: llvm.loongarch.lsx.vpickve2gr.du: argument out of range
-entry:
- %res = call i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64> %va, i32 2)
- ret i64 %res
-}
diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr.ll
index ed56d30ce3c46..4e77f6b72fed9 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr.ll
@@ -1,4 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s
; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s
declare i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8>, i32)
@@ -37,18 +38,6 @@ entry:
ret i32 %res
}
-declare i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64>, i32)
-
-define i64 @lsx_vpickve2gr_d(<2 x i64> %va) nounwind {
-; CHECK-LABEL: lsx_vpickve2gr_d:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 1
-; CHECK-NEXT: ret
-entry:
- %res = call i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64> %va, i32 1)
- ret i64 %res
-}
-
declare i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8>, i32)
define i32 @lsx_vpickve2gr_bu(<16 x i8> %va) nounwind {
@@ -84,15 +73,3 @@ entry:
%res = call i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32> %va, i32 3)
ret i32 %res
}
-
-declare i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64>, i32)
-
-define i64 @lsx_vpickve2gr_du(<2 x i64> %va) nounwind {
-; CHECK-LABEL: lsx_vpickve2gr_du:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vpickve2gr.du $a0, $vr0, 1
-; CHECK-NEXT: ret
-entry:
- %res = call i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64> %va, i32 1)
- ret i64 %res
-}
diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-repl-ins-gr2vr-d.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-repl-ins-gr2vr-d.ll
new file mode 100644
index 0000000000000..51533e4b2474c
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-repl-ins-gr2vr-d.ll
@@ -0,0 +1,17 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s
+
+define <2 x i64> @vrepl_ins_d(i64 %a, i64 %b) {
+; CHECK-LABEL: vrepl_ins_d:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vreplgr2vr.d $vr0, $a0
+; CHECK-NEXT: vinsgr2vr.d $vr0, $a1, 1
+; CHECK-NEXT: ret
+entry:
+ %0 = call <2 x i64> @llvm.loongarch.lsx.vreplgr2vr.d(i64 %a)
+ %1 = call <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64> %0, i64 %b, i32 1)
+ ret <2 x i64> %1
+}
+
+declare <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64>, i64, i32 immarg)
+declare <2 x i64> @llvm.loongarch.lsx.vreplgr2vr.d(i64)
diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-repl-ins-gr2vr.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-repl-ins-gr2vr.ll
index aee7492946829..9d7ab6e1ab5ef 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-repl-ins-gr2vr.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-repl-ins-gr2vr.ll
@@ -1,4 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s
; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s
define <16 x i8> @vrepl_ins_b(i32 %a, i32 %b) {
@@ -37,23 +38,9 @@ entry:
ret <4 x i32> %1
}
-define <2 x i64> @vrepl_ins_d(i64 %a, i64 %b) {
-; CHECK-LABEL: vrepl_ins_d:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vreplgr2vr.d $vr0, $a0
-; CHECK-NEXT: vinsgr2vr.d $vr0, $a1, 1
-; CHECK-NEXT: ret
-entry:
- %0 = call <2 x i64> @llvm.loongarch.lsx.vreplgr2vr.d(i64 %a)
- %1 = call <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64> %0, i64 %b, i32 1)
- ret <2 x i64> %1
-}
-
declare <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8>, i32, i32 immarg)
declare <16 x i8> @llvm.loongarch.lsx.vreplgr2vr.b(i32)
declare <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16>, i32, i32 immarg)
declare <8 x i16> @llvm.loongarch.lsx.vreplgr2vr.h(i32)
declare <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32>, i32, i32 immarg)
declare <4 x i32> @llvm.loongarch.lsx.vreplgr2vr.w(i32)
-declare <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64>, i64, i32 immarg)
-declare <2 x i64> @llvm.loongarch.lsx.vreplgr2vr.d(i64)
diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replgr2vr-d.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replgr2vr-d.ll
new file mode 100644
index 0000000000000..c8d0fce6ed5a2
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replgr2vr-d.ll
@@ -0,0 +1,14 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s
+
+declare <2 x i64> @llvm.loongarch.lsx.vreplgr2vr.d(i64)
+
+define <2 x i64> @lsx_vreplgr2vr_d(i64 %a) nounwind {
+; CHECK-LABEL: lsx_vreplgr2vr_d:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vreplgr2vr.d $vr0, $a0
+; CHECK-NEXT: ret
+entry:
+ %res = call <2 x i64> @llvm.loongarch.lsx.vreplgr2vr.d(i64 %a)
+ ret <2 x i64> %res
+}
diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replgr2vr.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replgr2vr.ll
index 091f1c98c2289..edaa20792012d 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replgr2vr.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replgr2vr.ll
@@ -1,4 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s
; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s
declare <16 x i8> @llvm.loongarch.lsx.vreplgr2vr.b(i32)
@@ -36,15 +37,3 @@ entry:
%res = call <4 x i32> @llvm.loongarch.lsx.vreplgr2vr.w(i32 %a)
ret <4 x i32> %res
}
-
-declare <2 x i64> @llvm.loongarch.lsx.vreplgr2vr.d(i64)
-
-define <2 x i64> @lsx_vreplgr2vr_d(i64 %a) nounwind {
-; CHECK-LABEL: lsx_vreplgr2vr_d:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vreplgr2vr.d $vr0, $a0
-; CHECK-NEXT: ret
-entry:
- %res = call <2 x i64> @llvm.loongarch.lsx.vreplgr2vr.d(i64 %a)
- ret <2 x i64> %res
-}
diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-set.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-set.ll
index 3188fb4e2c2ef..004bcde90907a 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-set.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-set.ll
@@ -1,4 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc --mtriple=loongarch32 --mattr=+lsx < %s | FileCheck %s
; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s
declare i32 @llvm.loongarch.lsx.bz.v(<16 x i8>)
diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-setallnez.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-setallnez.ll
index 22e01922e87bb..6544f91f045a7 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-setallnez.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-setallnez.ll
@@ -1,4 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s
; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s
declare i32 @llvm.loongarch.lsx.bnz.b(<16 x i8>)
diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-setanyeqz.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-setanyeqz.ll
index 96c79c10e4688..5ba3eb788c1d7 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-setanyeqz.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-setanyeqz.ll
@@ -1,4 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s
; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s
declare i32 @llvm.loongarch.lsx.bz.b(<16 x i8>)
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll
index 3fb55d4806160..b17a90e71e85a 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll
@@ -3,18 +3,11 @@
; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA64
define void @extract_16xi8(ptr %src, ptr %dst) nounwind {
-; LA32-LABEL: extract_16xi8:
-; LA32: # %bb.0:
-; LA32-NEXT: vld $vr0, $a0, 0
-; LA32-NEXT: vpickve2gr.b $a0, $vr0, 1
-; LA32-NEXT: st.b $a0, $a1, 0
-; LA32-NEXT: ret
-;
-; LA64-LABEL: extract_16xi8:
-; LA64: # %bb.0:
-; LA64-NEXT: vld $vr0, $a0, 0
-; LA64-NEXT: vstelm.b $vr0, $a1, 0, 1
-; LA64-NEXT: ret
+; CHECK-LABEL: extract_16xi8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vld $vr0, $a0, 0
+; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 1
+; CHECK-NEXT: ret
%v = load volatile <16 x i8>, ptr %src
%e = extractelement <16 x i8> %v, i32 1
store i8 %e, ptr %dst
@@ -22,18 +15,11 @@ define void @extract_16xi8(ptr %src, ptr %dst) nounwind {
}
define void @extract_8xi16(ptr %src, ptr %dst) nounwind {
-; LA32-LABEL: extract_8xi16:
-; LA32: # %bb.0:
-; LA32-NEXT: vld $vr0, $a0, 0
-; LA32-NEXT: vpickve2gr.h $a0, $vr0, 1
-; LA32-NEXT: st.h $a0, $a1, 0
-; LA32-NEXT: ret
-;
-; LA64-LABEL: extract_8xi16:
-; LA64: # %bb.0:
-; LA64-NEXT: vld $vr0, $a0, 0
-; LA64-NEXT: vstelm.h $vr0, $a1, 0, 1
-; LA64-NEXT: ret
+; CHECK-LABEL: extract_8xi16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vld $vr0, $a0, 0
+; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 1
+; CHECK-NEXT: ret
%v = load volatile <8 x i16>, ptr %src
%e = extractelement <8 x i16> %v, i32 1
store i16 %e, ptr %dst
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insertelement.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insertelement.ll
index 4bb1941724dc6..496a1aed39fb5 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insertelement.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insertelement.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
-; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA32
+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA64
define void @insert_16xi8(ptr %src, ptr %dst, i8 %ins) nounwind {
; CHECK-LABEL: insert_16xi8:
@@ -41,12 +42,20 @@ define void @insert_4xi32(ptr %src, ptr %dst, i32 %ins) nounwind {
}
define void @insert_2xi64(ptr %src, ptr %dst, i64 %ins) nounwind {
-; CHECK-LABEL: insert_2xi64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: vinsgr2vr.d $vr0, $a2, 1
-; CHECK-NEXT: vst $vr0, $a1, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: insert_2xi64:
+; LA32: # %bb.0:
+; LA32-NEXT: vld $vr0, $a0, 0
+; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 2
+; LA32-NEXT: vinsgr2vr.w $vr0, $a3, 3
+; LA32-NEXT: vst $vr0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: insert_2xi64:
+; LA64: # %bb.0:
+; LA64-NEXT: vld $vr0, $a0, 0
+; LA64-NEXT: vinsgr2vr.d $vr0, $a2, 1
+; LA64-NEXT: vst $vr0, $a1, 0
+; LA64-NEXT: ret
%v = load volatile <2 x i64>, ptr %src
%v_new = insertelement <2 x i64> %v, i64 %ins, i32 1
store <2 x i64> %v_new, ptr %dst
@@ -82,18 +91,30 @@ define void @insert_2xdouble(ptr %src, ptr %dst, double %ins) nounwind {
}
define void @insert_16xi8_idx(ptr %src, ptr %dst, i8 %ins, i32 %idx) nounwind {
-; CHECK-LABEL: insert_16xi8_idx:
-; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a4, %pc_hi20(.LCPI6_0)
-; CHECK-NEXT: vld $vr0, $a4, %pc_lo12(.LCPI6_0)
-; CHECK-NEXT: vld $vr1, $a0, 0
-; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0
-; CHECK-NEXT: vreplgr2vr.b $vr2, $a0
-; CHECK-NEXT: vseq.b $vr0, $vr2, $vr0
-; CHECK-NEXT: vreplgr2vr.b $vr2, $a2
-; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr2, $vr0
-; CHECK-NEXT: vst $vr0, $a1, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: insert_16xi8_idx:
+; LA32: # %bb.0:
+; LA32-NEXT: pcalau12i $a4, %pc_hi20(.LCPI6_0)
+; LA32-NEXT: vld $vr0, $a4, %pc_lo12(.LCPI6_0)
+; LA32-NEXT: vld $vr1, $a0, 0
+; LA32-NEXT: vreplgr2vr.b $vr2, $a3
+; LA32-NEXT: vseq.b $vr0, $vr2, $vr0
+; LA32-NEXT: vreplgr2vr.b $vr2, $a2
+; LA32-NEXT: vbitsel.v $vr0, $vr1, $vr2, $vr0
+; LA32-NEXT: vst $vr0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: insert_16xi8_idx:
+; LA64: # %bb.0:
+; LA64-NEXT: pcalau12i $a4, %pc_hi20(.LCPI6_0)
+; LA64-NEXT: vld $vr0, $a4, %pc_lo12(.LCPI6_0)
+; LA64-NEXT: vld $vr1, $a0, 0
+; LA64-NEXT: bstrpick.d $a0, $a3, 31, 0
+; LA64-NEXT: vreplgr2vr.b $vr2, $a0
+; LA64-NEXT: vseq.b $vr0, $vr2, $vr0
+; LA64-NEXT: vreplgr2vr.b $vr2, $a2
+; LA64-NEXT: vbitsel.v $vr0, $vr1, $vr2, $vr0
+; LA64-NEXT: vst $vr0, $a1, 0
+; LA64-NEXT: ret
%v = load volatile <16 x i8>, ptr %src
%v_new = insertelement <16 x i8> %v, i8 %ins, i32 %idx
store <16 x i8> %v_new, ptr %dst
@@ -101,18 +122,30 @@ define void @insert_16xi8_idx(ptr %src, ptr %dst, i8 %ins, i32 %idx) nounwind {
}
define void @insert_8xi16_idx(ptr %src, ptr %dst, i16 %ins, i32 %idx) nounwind {
-; CHECK-LABEL: insert_8xi16_idx:
-; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a4, %pc_hi20(.LCPI7_0)
-; CHECK-NEXT: vld $vr0, $a4, %pc_lo12(.LCPI7_0)
-; CHECK-NEXT: vld $vr1, $a0, 0
-; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0
-; CHECK-NEXT: vreplgr2vr.h $vr2, $a0
-; CHECK-NEXT: vseq.h $vr0, $vr2, $vr0
-; CHECK-NEXT: vreplgr2vr.h $vr2, $a2
-; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr2, $vr0
-; CHECK-NEXT: vst $vr0, $a1, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: insert_8xi16_idx:
+; LA32: # %bb.0:
+; LA32-NEXT: pcalau12i $a4, %pc_hi20(.LCPI7_0)
+; LA32-NEXT: vld $vr0, $a4, %pc_lo12(.LCPI7_0)
+; LA32-NEXT: vld $vr1, $a0, 0
+; LA32-NEXT: vreplgr2vr.h $vr2, $a3
+; LA32-NEXT: vseq.h $vr0, $vr2, $vr0
+; LA32-NEXT: vreplgr2vr.h $vr2, $a2
+; LA32-NEXT: vbitsel.v $vr0, $vr1, $vr2, $vr0
+; LA32-NEXT: vst $vr0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: insert_8xi16_idx:
+; LA64: # %bb.0:
+; LA64-NEXT: pcalau12i $a4, %pc_hi20(.LCPI7_0)
+; LA64-NEXT: vld $vr0, $a4, %pc_lo12(.LCPI7_0)
+; LA64-NEXT: vld $vr1, $a0, 0
+; LA64-NEXT: bstrpick.d $a0, $a3, 31, 0
+; LA64-NEXT: vreplgr2vr.h $vr2, $a0
+; LA64-NEXT: vseq.h $vr0, $vr2, $vr0
+; LA64-NEXT: vreplgr2vr.h $vr2, $a2
+; LA64-NEXT: vbitsel.v $vr0, $vr1, $vr2, $vr0
+; LA64-NEXT: vst $vr0, $a1, 0
+; LA64-NEXT: ret
%v = load volatile <8 x i16>, ptr %src
%v_new = insertelement <8 x i16> %v, i16 %ins, i32 %idx
store <8 x i16> %v_new, ptr %dst
@@ -120,18 +153,30 @@ define void @insert_8xi16_idx(ptr %src, ptr %dst, i16 %ins, i32 %idx) nounwind {
}
define void @insert_4xi32_idx(ptr %src, ptr %dst, i32 %ins, i32 %idx) nounwind {
-; CHECK-LABEL: insert_4xi32_idx:
-; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a4, %pc_hi20(.LCPI8_0)
-; CHECK-NEXT: vld $vr0, $a4, %pc_lo12(.LCPI8_0)
-; CHECK-NEXT: vld $vr1, $a0, 0
-; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0
-; CHECK-NEXT: vreplgr2vr.w $vr2, $a0
-; CHECK-NEXT: vseq.w $vr0, $vr2, $vr0
-; CHECK-NEXT: vreplgr2vr.w $vr2, $a2
-; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr2, $vr0
-; CHECK-NEXT: vst $vr0, $a1, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: insert_4xi32_idx:
+; LA32: # %bb.0:
+; LA32-NEXT: pcalau12i $a4, %pc_hi20(.LCPI8_0)
+; LA32-NEXT: vld $vr0, $a4, %pc_lo12(.LCPI8_0)
+; LA32-NEXT: vld $vr1, $a0, 0
+; LA32-NEXT: vreplgr2vr.w $vr2, $a3
+; LA32-NEXT: vseq.w $vr0, $vr2, $vr0
+; LA32-NEXT: vreplgr2vr.w $vr2, $a2
+; LA32-NEXT: vbitsel.v $vr0, $vr1, $vr2, $vr0
+; LA32-NEXT: vst $vr0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: insert_4xi32_idx:
+; LA64: # %bb.0:
+; LA64-NEXT: pcalau12i $a4, %pc_hi20(.LCPI8_0)
+; LA64-NEXT: vld $vr0, $a4, %pc_lo12(.LCPI8_0)
+; LA64-NEXT: vld $vr1, $a0, 0
+; LA64-NEXT: bstrpick.d $a0, $a3, 31, 0
+; LA64-NEXT: vreplgr2vr.w $vr2, $a0
+; LA64-NEXT: vseq.w $vr0, $vr2, $vr0
+; LA64-NEXT: vreplgr2vr.w $vr2, $a2
+; LA64-NEXT: vbitsel.v $vr0, $vr1, $vr2, $vr0
+; LA64-NEXT: vst $vr0, $a1, 0
+; LA64-NEXT: ret
%v = load volatile <4 x i32>, ptr %src
%v_new = insertelement <4 x i32> %v, i32 %ins, i32 %idx
store <4 x i32> %v_new, ptr %dst
@@ -139,18 +184,36 @@ define void @insert_4xi32_idx(ptr %src, ptr %dst, i32 %ins, i32 %idx) nounwind {
}
define void @insert_2xi64_idx(ptr %src, ptr %dst, i64 %ins, i32 %idx) nounwind {
-; CHECK-LABEL: insert_2xi64_idx:
-; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a4, %pc_hi20(.LCPI9_0)
-; CHECK-NEXT: vld $vr0, $a4, %pc_lo12(.LCPI9_0)
-; CHECK-NEXT: vld $vr1, $a0, 0
-; CHECK-NEXT: bstrpick.d $a0, $a3, 31, 0
-; CHECK-NEXT: vreplgr2vr.d $vr2, $a0
-; CHECK-NEXT: vseq.d $vr0, $vr2, $vr0
-; CHECK-NEXT: vreplgr2vr.d $vr2, $a2
-; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr2, $vr0
-; CHECK-NEXT: vst $vr0, $a1, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: insert_2xi64_idx:
+; LA32: # %bb.0:
+; LA32-NEXT: pcalau12i $a5, %pc_hi20(.LCPI9_0)
+; LA32-NEXT: vld $vr0, $a5, %pc_lo12(.LCPI9_0)
+; LA32-NEXT: add.w $a4, $a4, $a4
+; LA32-NEXT: vld $vr1, $a0, 0
+; LA32-NEXT: vreplgr2vr.w $vr2, $a4
+; LA32-NEXT: vseq.w $vr2, $vr2, $vr0
+; LA32-NEXT: vreplgr2vr.w $vr3, $a2
+; LA32-NEXT: vbitsel.v $vr1, $vr1, $vr3, $vr2
+; LA32-NEXT: addi.w $a0, $a4, 1
+; LA32-NEXT: vreplgr2vr.w $vr2, $a0
+; LA32-NEXT: vseq.w $vr0, $vr2, $vr0
+; LA32-NEXT: vreplgr2vr.w $vr2, $a3
+; LA32-NEXT: vbitsel.v $vr0, $vr1, $vr2, $vr0
+; LA32-NEXT: vst $vr0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: insert_2xi64_idx:
+; LA64: # %bb.0:
+; LA64-NEXT: pcalau12i $a4, %pc_hi20(.LCPI9_0)
+; LA64-NEXT: vld $vr0, $a4, %pc_lo12(.LCPI9_0)
+; LA64-NEXT: vld $vr1, $a0, 0
+; LA64-NEXT: bstrpick.d $a0, $a3, 31, 0
+; LA64-NEXT: vreplgr2vr.d $vr2, $a0
+; LA64-NEXT: vseq.d $vr0, $vr2, $vr0
+; LA64-NEXT: vreplgr2vr.d $vr2, $a2
+; LA64-NEXT: vbitsel.v $vr0, $vr1, $vr2, $vr0
+; LA64-NEXT: vst $vr0, $a1, 0
+; LA64-NEXT: ret
%v = load volatile <2 x i64>, ptr %src
%v_new = insertelement <2 x i64> %v, i64 %ins, i32 %idx
store <2 x i64> %v_new, ptr %dst
@@ -158,19 +221,32 @@ define void @insert_2xi64_idx(ptr %src, ptr %dst, i64 %ins, i32 %idx) nounwind {
}
define void @insert_4xfloat_idx(ptr %src, ptr %dst, float %ins, i32 %idx) nounwind {
-; CHECK-LABEL: insert_4xfloat_idx:
-; CHECK: # %bb.0:
-; CHECK-NEXT: # kill: def $f0 killed $f0 def $vr0
-; CHECK-NEXT: pcalau12i $a3, %pc_hi20(.LCPI10_0)
-; CHECK-NEXT: vld $vr1, $a3, %pc_lo12(.LCPI10_0)
-; CHECK-NEXT: vld $vr2, $a0, 0
-; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0
-; CHECK-NEXT: vreplgr2vr.w $vr3, $a0
-; CHECK-NEXT: vseq.w $vr1, $vr3, $vr1
-; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0
-; CHECK-NEXT: vbitsel.v $vr0, $vr2, $vr0, $vr1
-; CHECK-NEXT: vst $vr0, $a1, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: insert_4xfloat_idx:
+; LA32: # %bb.0:
+; LA32-NEXT: pcalau12i $a3, %pc_hi20(.LCPI10_0)
+; LA32-NEXT: vld $vr1, $a3, %pc_lo12(.LCPI10_0)
+; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0
+; LA32-NEXT: vld $vr2, $a0, 0
+; LA32-NEXT: vreplgr2vr.w $vr3, $a2
+; LA32-NEXT: vseq.w $vr1, $vr3, $vr1
+; LA32-NEXT: vreplvei.w $vr0, $vr0, 0
+; LA32-NEXT: vbitsel.v $vr0, $vr2, $vr0, $vr1
+; LA32-NEXT: vst $vr0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: insert_4xfloat_idx:
+; LA64: # %bb.0:
+; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0
+; LA64-NEXT: pcalau12i $a3, %pc_hi20(.LCPI10_0)
+; LA64-NEXT: vld $vr1, $a3, %pc_lo12(.LCPI10_0)
+; LA64-NEXT: vld $vr2, $a0, 0
+; LA64-NEXT: bstrpick.d $a0, $a2, 31, 0
+; LA64-NEXT: vreplgr2vr.w $vr3, $a0
+; LA64-NEXT: vseq.w $vr1, $vr3, $vr1
+; LA64-NEXT: vreplvei.w $vr0, $vr0, 0
+; LA64-NEXT: vbitsel.v $vr0, $vr2, $vr0, $vr1
+; LA64-NEXT: vst $vr0, $a1, 0
+; LA64-NEXT: ret
%v = load volatile <4 x float>, ptr %src
%v_new = insertelement <4 x float> %v, float %ins, i32 %idx
store <4 x float> %v_new, ptr %dst
@@ -178,19 +254,34 @@ define void @insert_4xfloat_idx(ptr %src, ptr %dst, float %ins, i32 %idx) nounwi
}
define void @insert_2xdouble_idx(ptr %src, ptr %dst, double %ins, i32 %idx) nounwind {
-; CHECK-LABEL: insert_2xdouble_idx:
-; CHECK: # %bb.0:
-; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0
-; CHECK-NEXT: pcalau12i $a3, %pc_hi20(.LCPI11_0)
-; CHECK-NEXT: vld $vr1, $a3, %pc_lo12(.LCPI11_0)
-; CHECK-NEXT: vld $vr2, $a0, 0
-; CHECK-NEXT: bstrpick.d $a0, $a2, 31, 0
-; CHECK-NEXT: vreplgr2vr.d $vr3, $a0
-; CHECK-NEXT: vseq.d $vr1, $vr3, $vr1
-; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0
-; CHECK-NEXT: vbitsel.v $vr0, $vr2, $vr0, $vr1
-; CHECK-NEXT: vst $vr0, $a1, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: insert_2xdouble_idx:
+; LA32: # %bb.0:
+; LA32-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0
+; LA32-NEXT: vld $vr1, $a0, 0
+; LA32-NEXT: pcalau12i $a0, %pc_hi20(.LCPI11_0)
+; LA32-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI11_0)
+; LA32-NEXT: vrepli.b $vr3, 0
+; LA32-NEXT: vinsgr2vr.w $vr3, $a2, 0
+; LA32-NEXT: vinsgr2vr.w $vr3, $a2, 2
+; LA32-NEXT: vseq.d $vr2, $vr3, $vr2
+; LA32-NEXT: vreplvei.d $vr0, $vr0, 0
+; LA32-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2
+; LA32-NEXT: vst $vr0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: insert_2xdouble_idx:
+; LA64: # %bb.0:
+; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0
+; LA64-NEXT: pcalau12i $a3, %pc_hi20(.LCPI11_0)
+; LA64-NEXT: vld $vr1, $a3, %pc_lo12(.LCPI11_0)
+; LA64-NEXT: vld $vr2, $a0, 0
+; LA64-NEXT: bstrpick.d $a0, $a2, 31, 0
+; LA64-NEXT: vreplgr2vr.d $vr3, $a0
+; LA64-NEXT: vseq.d $vr1, $vr3, $vr1
+; LA64-NEXT: vreplvei.d $vr0, $vr0, 0
+; LA64-NEXT: vbitsel.v $vr0, $vr2, $vr0, $vr1
+; LA64-NEXT: vst $vr0, $a1, 0
+; LA64-NEXT: ret
%v = load volatile <2 x double>, ptr %src
%v_new = insertelement <2 x double> %v, double %ins, i32 %idx
store <2 x double> %v_new, ptr %dst
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vreplvei.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vreplvei.ll
index 10510786f3216..40961bc9a08b9 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vreplvei.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vreplvei.ll
@@ -1,4 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx %s -o - | FileCheck %s
; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s
;; vreplvei.b
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf.ll
index d1c071b45ddff..b13433ee5d159 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf.ll
@@ -1,4 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx %s -o - | FileCheck %s
; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s
define <16 x i8> @shufflevector_v16i8(<16 x i8> %a, <16 x i8> %b) {
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf4i.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf4i.ll
index cd80dcb44e433..bee4ba6a84334 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf4i.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf4i.ll
@@ -1,4 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx %s -o - | FileCheck %s
; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s
;; vshuf4i.b
diff --git a/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-add.ll b/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-add.ll
index 57fd09ed2e09b..9c3a6f7be0542 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-add.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-add.ll
@@ -1,17 +1,29 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx %s -o - | FileCheck %s --check-prefix=LA32
+; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s --check-prefix=LA64
define void @vec_reduce_add_v16i8(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: vec_reduce_add_v16i8:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: vhaddw.h.b $vr0, $vr0, $vr0
-; CHECK-NEXT: vhaddw.w.h $vr0, $vr0, $vr0
-; CHECK-NEXT: vhaddw.d.w $vr0, $vr0, $vr0
-; CHECK-NEXT: vhaddw.q.d $vr0, $vr0, $vr0
-; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 0
-; CHECK-NEXT: st.b $a0, $a1, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vec_reduce_add_v16i8:
+; LA32: # %bb.0:
+; LA32-NEXT: vld $vr0, $a0, 0
+; LA32-NEXT: vhaddw.h.b $vr0, $vr0, $vr0
+; LA32-NEXT: vhaddw.w.h $vr0, $vr0, $vr0
+; LA32-NEXT: vhaddw.d.w $vr0, $vr0, $vr0
+; LA32-NEXT: vhaddw.q.d $vr0, $vr0, $vr0
+; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0
+; LA32-NEXT: st.b $a0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vec_reduce_add_v16i8:
+; LA64: # %bb.0:
+; LA64-NEXT: vld $vr0, $a0, 0
+; LA64-NEXT: vhaddw.h.b $vr0, $vr0, $vr0
+; LA64-NEXT: vhaddw.w.h $vr0, $vr0, $vr0
+; LA64-NEXT: vhaddw.d.w $vr0, $vr0, $vr0
+; LA64-NEXT: vhaddw.q.d $vr0, $vr0, $vr0
+; LA64-NEXT: vpickve2gr.d $a0, $vr0, 0
+; LA64-NEXT: st.b $a0, $a1, 0
+; LA64-NEXT: ret
%v = load <16 x i8>, ptr %src
%res = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %v)
store i8 %res, ptr %dst
@@ -19,16 +31,29 @@ define void @vec_reduce_add_v16i8(ptr %src, ptr %dst) nounwind {
}
define void @vec_reduce_add_v8i8(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: vec_reduce_add_v8i8:
-; CHECK: # %bb.0:
-; CHECK-NEXT: ld.d $a0, $a0, 0
-; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
-; CHECK-NEXT: vhaddw.h.b $vr0, $vr0, $vr0
-; CHECK-NEXT: vhaddw.w.h $vr0, $vr0, $vr0
-; CHECK-NEXT: vhaddw.d.w $vr0, $vr0, $vr0
-; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 0
-; CHECK-NEXT: st.b $a0, $a1, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vec_reduce_add_v8i8:
+; LA32: # %bb.0:
+; LA32-NEXT: ld.w $a2, $a0, 0
+; LA32-NEXT: ld.w $a0, $a0, 4
+; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
+; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1
+; LA32-NEXT: vhaddw.h.b $vr0, $vr0, $vr0
+; LA32-NEXT: vhaddw.w.h $vr0, $vr0, $vr0
+; LA32-NEXT: vhaddw.d.w $vr0, $vr0, $vr0
+; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0
+; LA32-NEXT: st.b $a0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vec_reduce_add_v8i8:
+; LA64: # %bb.0:
+; LA64-NEXT: ld.d $a0, $a0, 0
+; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
+; LA64-NEXT: vhaddw.h.b $vr0, $vr0, $vr0
+; LA64-NEXT: vhaddw.w.h $vr0, $vr0, $vr0
+; LA64-NEXT: vhaddw.d.w $vr0, $vr0, $vr0
+; LA64-NEXT: vpickve2gr.d $a0, $vr0, 0
+; LA64-NEXT: st.b $a0, $a1, 0
+; LA64-NEXT: ret
%v = load <8 x i8>, ptr %src
%res = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> %v)
store i8 %res, ptr %dst
@@ -36,15 +61,25 @@ define void @vec_reduce_add_v8i8(ptr %src, ptr %dst) nounwind {
}
define void @vec_reduce_add_v4i8(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: vec_reduce_add_v4i8:
-; CHECK: # %bb.0:
-; CHECK-NEXT: ld.w $a0, $a0, 0
-; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0
-; CHECK-NEXT: vhaddw.h.b $vr0, $vr0, $vr0
-; CHECK-NEXT: vhaddw.w.h $vr0, $vr0, $vr0
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 0
-; CHECK-NEXT: st.b $a0, $a1, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vec_reduce_add_v4i8:
+; LA32: # %bb.0:
+; LA32-NEXT: ld.w $a0, $a0, 0
+; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 0
+; LA32-NEXT: vhaddw.h.b $vr0, $vr0, $vr0
+; LA32-NEXT: vhaddw.w.h $vr0, $vr0, $vr0
+; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0
+; LA32-NEXT: st.b $a0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vec_reduce_add_v4i8:
+; LA64: # %bb.0:
+; LA64-NEXT: ld.w $a0, $a0, 0
+; LA64-NEXT: vinsgr2vr.w $vr0, $a0, 0
+; LA64-NEXT: vhaddw.h.b $vr0, $vr0, $vr0
+; LA64-NEXT: vhaddw.w.h $vr0, $vr0, $vr0
+; LA64-NEXT: vpickve2gr.d $a0, $vr0, 0
+; LA64-NEXT: st.b $a0, $a1, 0
+; LA64-NEXT: ret
%v = load <4 x i8>, ptr %src
%res = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> %v)
store i8 %res, ptr %dst
@@ -52,13 +87,23 @@ define void @vec_reduce_add_v4i8(ptr %src, ptr %dst) nounwind {
}
define void @vec_reduce_add_v2i8(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: vec_reduce_add_v2i8:
-; CHECK: # %bb.0:
-; CHECK-NEXT: ld.h $a0, $a0, 0
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 0
-; CHECK-NEXT: vhaddw.h.b $vr0, $vr0, $vr0
-; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vec_reduce_add_v2i8:
+; LA32: # %bb.0:
+; LA32-NEXT: ld.h $a0, $a0, 0
+; LA32-NEXT: vinsgr2vr.h $vr0, $a0, 0
+; LA32-NEXT: vhaddw.h.b $vr0, $vr0, $vr0
+; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0
+; LA32-NEXT: st.b $a0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vec_reduce_add_v2i8:
+; LA64: # %bb.0:
+; LA64-NEXT: ld.h $a0, $a0, 0
+; LA64-NEXT: vinsgr2vr.h $vr0, $a0, 0
+; LA64-NEXT: vhaddw.h.b $vr0, $vr0, $vr0
+; LA64-NEXT: vpickve2gr.d $a0, $vr0, 0
+; LA64-NEXT: st.b $a0, $a1, 0
+; LA64-NEXT: ret
%v = load <2 x i8>, ptr %src
%res = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> %v)
store i8 %res, ptr %dst
@@ -66,15 +111,25 @@ define void @vec_reduce_add_v2i8(ptr %src, ptr %dst) nounwind {
}
define void @vec_reduce_add_v8i16(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: vec_reduce_add_v8i16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: vhaddw.w.h $vr0, $vr0, $vr0
-; CHECK-NEXT: vhaddw.d.w $vr0, $vr0, $vr0
-; CHECK-NEXT: vhaddw.q.d $vr0, $vr0, $vr0
-; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 0
-; CHECK-NEXT: st.h $a0, $a1, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vec_reduce_add_v8i16:
+; LA32: # %bb.0:
+; LA32-NEXT: vld $vr0, $a0, 0
+; LA32-NEXT: vhaddw.w.h $vr0, $vr0, $vr0
+; LA32-NEXT: vhaddw.d.w $vr0, $vr0, $vr0
+; LA32-NEXT: vhaddw.q.d $vr0, $vr0, $vr0
+; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0
+; LA32-NEXT: st.h $a0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vec_reduce_add_v8i16:
+; LA64: # %bb.0:
+; LA64-NEXT: vld $vr0, $a0, 0
+; LA64-NEXT: vhaddw.w.h $vr0, $vr0, $vr0
+; LA64-NEXT: vhaddw.d.w $vr0, $vr0, $vr0
+; LA64-NEXT: vhaddw.q.d $vr0, $vr0, $vr0
+; LA64-NEXT: vpickve2gr.d $a0, $vr0, 0
+; LA64-NEXT: st.h $a0, $a1, 0
+; LA64-NEXT: ret
%v = load <8 x i16>, ptr %src
%res = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %v)
store i16 %res, ptr %dst
@@ -82,15 +137,27 @@ define void @vec_reduce_add_v8i16(ptr %src, ptr %dst) nounwind {
}
define void @vec_reduce_add_v4i16(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: vec_reduce_add_v4i16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: ld.d $a0, $a0, 0
-; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
-; CHECK-NEXT: vhaddw.w.h $vr0, $vr0, $vr0
-; CHECK-NEXT: vhaddw.d.w $vr0, $vr0, $vr0
-; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 0
-; CHECK-NEXT: st.h $a0, $a1, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vec_reduce_add_v4i16:
+; LA32: # %bb.0:
+; LA32-NEXT: ld.w $a2, $a0, 0
+; LA32-NEXT: ld.w $a0, $a0, 4
+; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
+; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1
+; LA32-NEXT: vhaddw.w.h $vr0, $vr0, $vr0
+; LA32-NEXT: vhaddw.d.w $vr0, $vr0, $vr0
+; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0
+; LA32-NEXT: st.h $a0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vec_reduce_add_v4i16:
+; LA64: # %bb.0:
+; LA64-NEXT: ld.d $a0, $a0, 0
+; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
+; LA64-NEXT: vhaddw.w.h $vr0, $vr0, $vr0
+; LA64-NEXT: vhaddw.d.w $vr0, $vr0, $vr0
+; LA64-NEXT: vpickve2gr.d $a0, $vr0, 0
+; LA64-NEXT: st.h $a0, $a1, 0
+; LA64-NEXT: ret
%v = load <4 x i16>, ptr %src
%res = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %v)
store i16 %res, ptr %dst
@@ -98,13 +165,23 @@ define void @vec_reduce_add_v4i16(ptr %src, ptr %dst) nounwind {
}
define void @vec_reduce_add_v2i16(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: vec_reduce_add_v2i16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: ld.w $a0, $a0, 0
-; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0
-; CHECK-NEXT: vhaddw.w.h $vr0, $vr0, $vr0
-; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vec_reduce_add_v2i16:
+; LA32: # %bb.0:
+; LA32-NEXT: ld.w $a0, $a0, 0
+; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 0
+; LA32-NEXT: vhaddw.w.h $vr0, $vr0, $vr0
+; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0
+; LA32-NEXT: st.h $a0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vec_reduce_add_v2i16:
+; LA64: # %bb.0:
+; LA64-NEXT: ld.w $a0, $a0, 0
+; LA64-NEXT: vinsgr2vr.w $vr0, $a0, 0
+; LA64-NEXT: vhaddw.w.h $vr0, $vr0, $vr0
+; LA64-NEXT: vpickve2gr.d $a0, $vr0, 0
+; LA64-NEXT: st.h $a0, $a1, 0
+; LA64-NEXT: ret
%v = load <2 x i16>, ptr %src
%res = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %v)
store i16 %res, ptr %dst
@@ -112,14 +189,23 @@ define void @vec_reduce_add_v2i16(ptr %src, ptr %dst) nounwind {
}
define void @vec_reduce_add_v4i32(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: vec_reduce_add_v4i32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: vhaddw.d.w $vr0, $vr0, $vr0
-; CHECK-NEXT: vhaddw.q.d $vr0, $vr0, $vr0
-; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 0
-; CHECK-NEXT: st.w $a0, $a1, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vec_reduce_add_v4i32:
+; LA32: # %bb.0:
+; LA32-NEXT: vld $vr0, $a0, 0
+; LA32-NEXT: vhaddw.d.w $vr0, $vr0, $vr0
+; LA32-NEXT: vhaddw.q.d $vr0, $vr0, $vr0
+; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0
+; LA32-NEXT: st.w $a0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vec_reduce_add_v4i32:
+; LA64: # %bb.0:
+; LA64-NEXT: vld $vr0, $a0, 0
+; LA64-NEXT: vhaddw.d.w $vr0, $vr0, $vr0
+; LA64-NEXT: vhaddw.q.d $vr0, $vr0, $vr0
+; LA64-NEXT: vpickve2gr.d $a0, $vr0, 0
+; LA64-NEXT: st.w $a0, $a1, 0
+; LA64-NEXT: ret
%v = load <4 x i32>, ptr %src
%res = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %v)
store i32 %res, ptr %dst
@@ -127,13 +213,25 @@ define void @vec_reduce_add_v4i32(ptr %src, ptr %dst) nounwind {
}
define void @vec_reduce_add_v2i32(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: vec_reduce_add_v2i32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: ld.d $a0, $a0, 0
-; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
-; CHECK-NEXT: vhaddw.d.w $vr0, $vr0, $vr0
-; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vec_reduce_add_v2i32:
+; LA32: # %bb.0:
+; LA32-NEXT: ld.w $a2, $a0, 0
+; LA32-NEXT: ld.w $a0, $a0, 4
+; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
+; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1
+; LA32-NEXT: vhaddw.d.w $vr0, $vr0, $vr0
+; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0
+; LA32-NEXT: st.w $a0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vec_reduce_add_v2i32:
+; LA64: # %bb.0:
+; LA64-NEXT: ld.d $a0, $a0, 0
+; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
+; LA64-NEXT: vhaddw.d.w $vr0, $vr0, $vr0
+; LA64-NEXT: vpickve2gr.d $a0, $vr0, 0
+; LA64-NEXT: st.w $a0, $a1, 0
+; LA64-NEXT: ret
%v = load <2 x i32>, ptr %src
%res = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %v)
store i32 %res, ptr %dst
@@ -141,12 +239,27 @@ define void @vec_reduce_add_v2i32(ptr %src, ptr %dst) nounwind {
}
define void @vec_reduce_add_v2i64(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: vec_reduce_add_v2i64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: vhaddw.q.d $vr0, $vr0, $vr0
-; CHECK-NEXT: vstelm.d $vr0, $a1, 0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vec_reduce_add_v2i64:
+; LA32: # %bb.0:
+; LA32-NEXT: vld $vr0, $a0, 0
+; LA32-NEXT: vpickve2gr.w $a0, $vr0, 2
+; LA32-NEXT: vpickve2gr.w $a2, $vr0, 0
+; LA32-NEXT: vpickve2gr.w $a3, $vr0, 3
+; LA32-NEXT: vpickve2gr.w $a4, $vr0, 1
+; LA32-NEXT: add.w $a3, $a4, $a3
+; LA32-NEXT: add.w $a0, $a2, $a0
+; LA32-NEXT: sltu $a2, $a0, $a2
+; LA32-NEXT: add.w $a2, $a3, $a2
+; LA32-NEXT: st.w $a0, $a1, 0
+; LA32-NEXT: st.w $a2, $a1, 4
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vec_reduce_add_v2i64:
+; LA64: # %bb.0:
+; LA64-NEXT: vld $vr0, $a0, 0
+; LA64-NEXT: vhaddw.q.d $vr0, $vr0, $vr0
+; LA64-NEXT: vstelm.d $vr0, $a1, 0, 0
+; LA64-NEXT: ret
%v = load <2 x i64>, ptr %src
%res = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %v)
store i64 %res, ptr %dst
diff --git a/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-and.ll b/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-and.ll
index cca4ce30758f1..734ecba843a4e 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-and.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-and.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx %s -o - | FileCheck %s --check-prefixes=CHECK,LA32
+; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s --check-prefixes=CHECK,LA64
define void @vec_reduce_and_v16i8(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_and_v16i8:
@@ -22,18 +23,33 @@ define void @vec_reduce_and_v16i8(ptr %src, ptr %dst) nounwind {
}
define void @vec_reduce_and_v8i8(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: vec_reduce_and_v8i8:
-; CHECK: # %bb.0:
-; CHECK-NEXT: ld.d $a0, $a0, 0
-; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4
-; CHECK-NEXT: vand.v $vr0, $vr1, $vr0
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2
-; CHECK-NEXT: vand.v $vr0, $vr1, $vr0
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 1
-; CHECK-NEXT: vand.v $vr0, $vr1, $vr0
-; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vec_reduce_and_v8i8:
+; LA32: # %bb.0:
+; LA32-NEXT: ld.w $a2, $a0, 0
+; LA32-NEXT: ld.w $a0, $a0, 4
+; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
+; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1
+; LA32-NEXT: vbsrl.v $vr1, $vr0, 4
+; LA32-NEXT: vand.v $vr0, $vr1, $vr0
+; LA32-NEXT: vbsrl.v $vr1, $vr0, 2
+; LA32-NEXT: vand.v $vr0, $vr1, $vr0
+; LA32-NEXT: vbsrl.v $vr1, $vr0, 1
+; LA32-NEXT: vand.v $vr0, $vr1, $vr0
+; LA32-NEXT: vstelm.b $vr0, $a1, 0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vec_reduce_and_v8i8:
+; LA64: # %bb.0:
+; LA64-NEXT: ld.d $a0, $a0, 0
+; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 4
+; LA64-NEXT: vand.v $vr0, $vr1, $vr0
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 2
+; LA64-NEXT: vand.v $vr0, $vr1, $vr0
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 1
+; LA64-NEXT: vand.v $vr0, $vr1, $vr0
+; LA64-NEXT: vstelm.b $vr0, $a1, 0, 0
+; LA64-NEXT: ret
%v = load <8 x i8>, ptr %src
%res = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> %v)
store i8 %res, ptr %dst
@@ -91,16 +107,29 @@ define void @vec_reduce_and_v8i16(ptr %src, ptr %dst) nounwind {
}
define void @vec_reduce_and_v4i16(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: vec_reduce_and_v4i16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: ld.d $a0, $a0, 0
-; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4
-; CHECK-NEXT: vand.v $vr0, $vr1, $vr0
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2
-; CHECK-NEXT: vand.v $vr0, $vr1, $vr0
-; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vec_reduce_and_v4i16:
+; LA32: # %bb.0:
+; LA32-NEXT: ld.w $a2, $a0, 0
+; LA32-NEXT: ld.w $a0, $a0, 4
+; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
+; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1
+; LA32-NEXT: vbsrl.v $vr1, $vr0, 4
+; LA32-NEXT: vand.v $vr0, $vr1, $vr0
+; LA32-NEXT: vbsrl.v $vr1, $vr0, 2
+; LA32-NEXT: vand.v $vr0, $vr1, $vr0
+; LA32-NEXT: vstelm.h $vr0, $a1, 0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vec_reduce_and_v4i16:
+; LA64: # %bb.0:
+; LA64-NEXT: ld.d $a0, $a0, 0
+; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 4
+; LA64-NEXT: vand.v $vr0, $vr1, $vr0
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 2
+; LA64-NEXT: vand.v $vr0, $vr1, $vr0
+; LA64-NEXT: vstelm.h $vr0, $a1, 0, 0
+; LA64-NEXT: ret
%v = load <4 x i16>, ptr %src
%res = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> %v)
store i16 %res, ptr %dst
@@ -123,15 +152,26 @@ define void @vec_reduce_and_v2i16(ptr %src, ptr %dst) nounwind {
}
define void @vec_reduce_and_v4i32(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: vec_reduce_and_v4i32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
-; CHECK-NEXT: vand.v $vr0, $vr1, $vr0
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4
-; CHECK-NEXT: vand.v $vr0, $vr1, $vr0
-; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vec_reduce_and_v4i32:
+; LA32: # %bb.0:
+; LA32-NEXT: vld $vr0, $a0, 0
+; LA32-NEXT: vbsrl.v $vr1, $vr0, 8
+; LA32-NEXT: vand.v $vr0, $vr1, $vr0
+; LA32-NEXT: vbsrl.v $vr1, $vr0, 4
+; LA32-NEXT: vand.v $vr0, $vr1, $vr0
+; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0
+; LA32-NEXT: st.w $a0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vec_reduce_and_v4i32:
+; LA64: # %bb.0:
+; LA64-NEXT: vld $vr0, $a0, 0
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 8
+; LA64-NEXT: vand.v $vr0, $vr1, $vr0
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 4
+; LA64-NEXT: vand.v $vr0, $vr1, $vr0
+; LA64-NEXT: vstelm.w $vr0, $a1, 0, 0
+; LA64-NEXT: ret
%v = load <4 x i32>, ptr %src
%res = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %v)
store i32 %res, ptr %dst
@@ -139,14 +179,27 @@ define void @vec_reduce_and_v4i32(ptr %src, ptr %dst) nounwind {
}
define void @vec_reduce_and_v2i32(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: vec_reduce_and_v2i32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: ld.d $a0, $a0, 0
-; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4
-; CHECK-NEXT: vand.v $vr0, $vr1, $vr0
-; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vec_reduce_and_v2i32:
+; LA32: # %bb.0:
+; LA32-NEXT: ld.w $a2, $a0, 0
+; LA32-NEXT: ld.w $a0, $a0, 4
+; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
+; LA32-NEXT: vori.b $vr1, $vr0, 0
+; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 1
+; LA32-NEXT: vbsrl.v $vr1, $vr1, 4
+; LA32-NEXT: vand.v $vr0, $vr1, $vr0
+; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0
+; LA32-NEXT: st.w $a0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vec_reduce_and_v2i32:
+; LA64: # %bb.0:
+; LA64-NEXT: ld.d $a0, $a0, 0
+; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 4
+; LA64-NEXT: vand.v $vr0, $vr1, $vr0
+; LA64-NEXT: vstelm.w $vr0, $a1, 0, 0
+; LA64-NEXT: ret
%v = load <2 x i32>, ptr %src
%res = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> %v)
store i32 %res, ptr %dst
@@ -154,13 +207,26 @@ define void @vec_reduce_and_v2i32(ptr %src, ptr %dst) nounwind {
}
define void @vec_reduce_and_v2i64(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: vec_reduce_and_v2i64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
-; CHECK-NEXT: vand.v $vr0, $vr1, $vr0
-; CHECK-NEXT: vstelm.d $vr0, $a1, 0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vec_reduce_and_v2i64:
+; LA32: # %bb.0:
+; LA32-NEXT: vld $vr0, $a0, 0
+; LA32-NEXT: vpickve2gr.w $a0, $vr0, 3
+; LA32-NEXT: vpickve2gr.w $a2, $vr0, 1
+; LA32-NEXT: vpickve2gr.w $a3, $vr0, 2
+; LA32-NEXT: vpickve2gr.w $a4, $vr0, 0
+; LA32-NEXT: and $a3, $a4, $a3
+; LA32-NEXT: and $a0, $a2, $a0
+; LA32-NEXT: st.w $a0, $a1, 4
+; LA32-NEXT: st.w $a3, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vec_reduce_and_v2i64:
+; LA64: # %bb.0:
+; LA64-NEXT: vld $vr0, $a0, 0
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 8
+; LA64-NEXT: vand.v $vr0, $vr1, $vr0
+; LA64-NEXT: vstelm.d $vr0, $a1, 0, 0
+; LA64-NEXT: ret
%v = load <2 x i64>, ptr %src
%res = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> %v)
store i64 %res, ptr %dst
diff --git a/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-or.ll b/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-or.ll
index ce431f0cf6a74..e833930830c3f 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-or.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-or.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx %s -o - | FileCheck %s --check-prefixes=CHECK,LA32
+; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s --check-prefixes=CHECK,LA64
define void @vec_reduce_or_v16i8(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_or_v16i8:
@@ -22,18 +23,33 @@ define void @vec_reduce_or_v16i8(ptr %src, ptr %dst) nounwind {
}
define void @vec_reduce_or_v8i8(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: vec_reduce_or_v8i8:
-; CHECK: # %bb.0:
-; CHECK-NEXT: ld.d $a0, $a0, 0
-; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4
-; CHECK-NEXT: vor.v $vr0, $vr1, $vr0
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2
-; CHECK-NEXT: vor.v $vr0, $vr1, $vr0
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 1
-; CHECK-NEXT: vor.v $vr0, $vr1, $vr0
-; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vec_reduce_or_v8i8:
+; LA32: # %bb.0:
+; LA32-NEXT: ld.w $a2, $a0, 0
+; LA32-NEXT: ld.w $a0, $a0, 4
+; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
+; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1
+; LA32-NEXT: vbsrl.v $vr1, $vr0, 4
+; LA32-NEXT: vor.v $vr0, $vr1, $vr0
+; LA32-NEXT: vbsrl.v $vr1, $vr0, 2
+; LA32-NEXT: vor.v $vr0, $vr1, $vr0
+; LA32-NEXT: vbsrl.v $vr1, $vr0, 1
+; LA32-NEXT: vor.v $vr0, $vr1, $vr0
+; LA32-NEXT: vstelm.b $vr0, $a1, 0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vec_reduce_or_v8i8:
+; LA64: # %bb.0:
+; LA64-NEXT: ld.d $a0, $a0, 0
+; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 4
+; LA64-NEXT: vor.v $vr0, $vr1, $vr0
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 2
+; LA64-NEXT: vor.v $vr0, $vr1, $vr0
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 1
+; LA64-NEXT: vor.v $vr0, $vr1, $vr0
+; LA64-NEXT: vstelm.b $vr0, $a1, 0, 0
+; LA64-NEXT: ret
%v = load <8 x i8>, ptr %src
%res = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> %v)
store i8 %res, ptr %dst
@@ -91,16 +107,29 @@ define void @vec_reduce_or_v8i16(ptr %src, ptr %dst) nounwind {
}
define void @vec_reduce_or_v4i16(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: vec_reduce_or_v4i16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: ld.d $a0, $a0, 0
-; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4
-; CHECK-NEXT: vor.v $vr0, $vr1, $vr0
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2
-; CHECK-NEXT: vor.v $vr0, $vr1, $vr0
-; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vec_reduce_or_v4i16:
+; LA32: # %bb.0:
+; LA32-NEXT: ld.w $a2, $a0, 0
+; LA32-NEXT: ld.w $a0, $a0, 4
+; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
+; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1
+; LA32-NEXT: vbsrl.v $vr1, $vr0, 4
+; LA32-NEXT: vor.v $vr0, $vr1, $vr0
+; LA32-NEXT: vbsrl.v $vr1, $vr0, 2
+; LA32-NEXT: vor.v $vr0, $vr1, $vr0
+; LA32-NEXT: vstelm.h $vr0, $a1, 0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vec_reduce_or_v4i16:
+; LA64: # %bb.0:
+; LA64-NEXT: ld.d $a0, $a0, 0
+; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 4
+; LA64-NEXT: vor.v $vr0, $vr1, $vr0
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 2
+; LA64-NEXT: vor.v $vr0, $vr1, $vr0
+; LA64-NEXT: vstelm.h $vr0, $a1, 0, 0
+; LA64-NEXT: ret
%v = load <4 x i16>, ptr %src
%res = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> %v)
store i16 %res, ptr %dst
@@ -123,15 +152,26 @@ define void @vec_reduce_or_v2i16(ptr %src, ptr %dst) nounwind {
}
define void @vec_reduce_or_v4i32(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: vec_reduce_or_v4i32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
-; CHECK-NEXT: vor.v $vr0, $vr1, $vr0
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4
-; CHECK-NEXT: vor.v $vr0, $vr1, $vr0
-; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vec_reduce_or_v4i32:
+; LA32: # %bb.0:
+; LA32-NEXT: vld $vr0, $a0, 0
+; LA32-NEXT: vbsrl.v $vr1, $vr0, 8
+; LA32-NEXT: vor.v $vr0, $vr1, $vr0
+; LA32-NEXT: vbsrl.v $vr1, $vr0, 4
+; LA32-NEXT: vor.v $vr0, $vr1, $vr0
+; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0
+; LA32-NEXT: st.w $a0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vec_reduce_or_v4i32:
+; LA64: # %bb.0:
+; LA64-NEXT: vld $vr0, $a0, 0
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 8
+; LA64-NEXT: vor.v $vr0, $vr1, $vr0
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 4
+; LA64-NEXT: vor.v $vr0, $vr1, $vr0
+; LA64-NEXT: vstelm.w $vr0, $a1, 0, 0
+; LA64-NEXT: ret
%v = load <4 x i32>, ptr %src
%res = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %v)
store i32 %res, ptr %dst
@@ -139,14 +179,27 @@ define void @vec_reduce_or_v4i32(ptr %src, ptr %dst) nounwind {
}
define void @vec_reduce_or_v2i32(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: vec_reduce_or_v2i32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: ld.d $a0, $a0, 0
-; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4
-; CHECK-NEXT: vor.v $vr0, $vr1, $vr0
-; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vec_reduce_or_v2i32:
+; LA32: # %bb.0:
+; LA32-NEXT: ld.w $a2, $a0, 0
+; LA32-NEXT: ld.w $a0, $a0, 4
+; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
+; LA32-NEXT: vori.b $vr1, $vr0, 0
+; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 1
+; LA32-NEXT: vbsrl.v $vr1, $vr1, 4
+; LA32-NEXT: vor.v $vr0, $vr1, $vr0
+; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0
+; LA32-NEXT: st.w $a0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vec_reduce_or_v2i32:
+; LA64: # %bb.0:
+; LA64-NEXT: ld.d $a0, $a0, 0
+; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 4
+; LA64-NEXT: vor.v $vr0, $vr1, $vr0
+; LA64-NEXT: vstelm.w $vr0, $a1, 0, 0
+; LA64-NEXT: ret
%v = load <2 x i32>, ptr %src
%res = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> %v)
store i32 %res, ptr %dst
@@ -154,13 +207,26 @@ define void @vec_reduce_or_v2i32(ptr %src, ptr %dst) nounwind {
}
define void @vec_reduce_or_v2i64(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: vec_reduce_or_v2i64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
-; CHECK-NEXT: vor.v $vr0, $vr1, $vr0
-; CHECK-NEXT: vstelm.d $vr0, $a1, 0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vec_reduce_or_v2i64:
+; LA32: # %bb.0:
+; LA32-NEXT: vld $vr0, $a0, 0
+; LA32-NEXT: vpickve2gr.w $a0, $vr0, 3
+; LA32-NEXT: vpickve2gr.w $a2, $vr0, 1
+; LA32-NEXT: vpickve2gr.w $a3, $vr0, 2
+; LA32-NEXT: vpickve2gr.w $a4, $vr0, 0
+; LA32-NEXT: or $a3, $a4, $a3
+; LA32-NEXT: or $a0, $a2, $a0
+; LA32-NEXT: st.w $a0, $a1, 4
+; LA32-NEXT: st.w $a3, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vec_reduce_or_v2i64:
+; LA64: # %bb.0:
+; LA64-NEXT: vld $vr0, $a0, 0
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 8
+; LA64-NEXT: vor.v $vr0, $vr1, $vr0
+; LA64-NEXT: vstelm.d $vr0, $a1, 0, 0
+; LA64-NEXT: ret
%v = load <2 x i64>, ptr %src
%res = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> %v)
store i64 %res, ptr %dst
diff --git a/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-smax.ll b/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-smax.ll
index bdf153ad7794f..2220df68cddfd 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-smax.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-smax.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx %s -o - | FileCheck %s --check-prefixes=CHECK,LA32
+; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s --check-prefixes=CHECK,LA64
define void @vec_reduce_smax_v16i8(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_smax_v16i8:
@@ -22,18 +23,33 @@ define void @vec_reduce_smax_v16i8(ptr %src, ptr %dst) nounwind {
}
define void @vec_reduce_smax_v8i8(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: vec_reduce_smax_v8i8:
-; CHECK: # %bb.0:
-; CHECK-NEXT: ld.d $a0, $a0, 0
-; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4
-; CHECK-NEXT: vmax.b $vr0, $vr1, $vr0
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2
-; CHECK-NEXT: vmax.b $vr0, $vr1, $vr0
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 1
-; CHECK-NEXT: vmax.b $vr0, $vr1, $vr0
-; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vec_reduce_smax_v8i8:
+; LA32: # %bb.0:
+; LA32-NEXT: ld.w $a2, $a0, 0
+; LA32-NEXT: ld.w $a0, $a0, 4
+; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
+; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1
+; LA32-NEXT: vbsrl.v $vr1, $vr0, 4
+; LA32-NEXT: vmax.b $vr0, $vr1, $vr0
+; LA32-NEXT: vbsrl.v $vr1, $vr0, 2
+; LA32-NEXT: vmax.b $vr0, $vr1, $vr0
+; LA32-NEXT: vbsrl.v $vr1, $vr0, 1
+; LA32-NEXT: vmax.b $vr0, $vr1, $vr0
+; LA32-NEXT: vstelm.b $vr0, $a1, 0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vec_reduce_smax_v8i8:
+; LA64: # %bb.0:
+; LA64-NEXT: ld.d $a0, $a0, 0
+; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 4
+; LA64-NEXT: vmax.b $vr0, $vr1, $vr0
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 2
+; LA64-NEXT: vmax.b $vr0, $vr1, $vr0
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 1
+; LA64-NEXT: vmax.b $vr0, $vr1, $vr0
+; LA64-NEXT: vstelm.b $vr0, $a1, 0, 0
+; LA64-NEXT: ret
%v = load <8 x i8>, ptr %src
%res = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> %v)
store i8 %res, ptr %dst
@@ -91,16 +107,29 @@ define void @vec_reduce_smax_v8i16(ptr %src, ptr %dst) nounwind {
}
define void @vec_reduce_smax_v4i16(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: vec_reduce_smax_v4i16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: ld.d $a0, $a0, 0
-; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4
-; CHECK-NEXT: vmax.h $vr0, $vr1, $vr0
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2
-; CHECK-NEXT: vmax.h $vr0, $vr1, $vr0
-; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vec_reduce_smax_v4i16:
+; LA32: # %bb.0:
+; LA32-NEXT: ld.w $a2, $a0, 0
+; LA32-NEXT: ld.w $a0, $a0, 4
+; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
+; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1
+; LA32-NEXT: vbsrl.v $vr1, $vr0, 4
+; LA32-NEXT: vmax.h $vr0, $vr1, $vr0
+; LA32-NEXT: vbsrl.v $vr1, $vr0, 2
+; LA32-NEXT: vmax.h $vr0, $vr1, $vr0
+; LA32-NEXT: vstelm.h $vr0, $a1, 0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vec_reduce_smax_v4i16:
+; LA64: # %bb.0:
+; LA64-NEXT: ld.d $a0, $a0, 0
+; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 4
+; LA64-NEXT: vmax.h $vr0, $vr1, $vr0
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 2
+; LA64-NEXT: vmax.h $vr0, $vr1, $vr0
+; LA64-NEXT: vstelm.h $vr0, $a1, 0, 0
+; LA64-NEXT: ret
%v = load <4 x i16>, ptr %src
%res = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> %v)
store i16 %res, ptr %dst
@@ -123,15 +152,26 @@ define void @vec_reduce_smax_v2i16(ptr %src, ptr %dst) nounwind {
}
define void @vec_reduce_smax_v4i32(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: vec_reduce_smax_v4i32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
-; CHECK-NEXT: vmax.w $vr0, $vr1, $vr0
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4
-; CHECK-NEXT: vmax.w $vr0, $vr1, $vr0
-; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vec_reduce_smax_v4i32:
+; LA32: # %bb.0:
+; LA32-NEXT: vld $vr0, $a0, 0
+; LA32-NEXT: vbsrl.v $vr1, $vr0, 8
+; LA32-NEXT: vmax.w $vr0, $vr1, $vr0
+; LA32-NEXT: vbsrl.v $vr1, $vr0, 4
+; LA32-NEXT: vmax.w $vr0, $vr1, $vr0
+; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0
+; LA32-NEXT: st.w $a0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vec_reduce_smax_v4i32:
+; LA64: # %bb.0:
+; LA64-NEXT: vld $vr0, $a0, 0
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 8
+; LA64-NEXT: vmax.w $vr0, $vr1, $vr0
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 4
+; LA64-NEXT: vmax.w $vr0, $vr1, $vr0
+; LA64-NEXT: vstelm.w $vr0, $a1, 0, 0
+; LA64-NEXT: ret
%v = load <4 x i32>, ptr %src
%res = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %v)
store i32 %res, ptr %dst
@@ -139,14 +179,26 @@ define void @vec_reduce_smax_v4i32(ptr %src, ptr %dst) nounwind {
}
define void @vec_reduce_smax_v2i32(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: vec_reduce_smax_v2i32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: ld.d $a0, $a0, 0
-; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4
-; CHECK-NEXT: vmax.w $vr0, $vr1, $vr0
-; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vec_reduce_smax_v2i32:
+; LA32: # %bb.0:
+; LA32-NEXT: ld.w $a2, $a0, 0
+; LA32-NEXT: ld.w $a0, $a0, 4
+; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
+; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1
+; LA32-NEXT: vbsrl.v $vr1, $vr0, 4
+; LA32-NEXT: vmax.w $vr0, $vr1, $vr0
+; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0
+; LA32-NEXT: st.w $a0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vec_reduce_smax_v2i32:
+; LA64: # %bb.0:
+; LA64-NEXT: ld.d $a0, $a0, 0
+; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 4
+; LA64-NEXT: vmax.w $vr0, $vr1, $vr0
+; LA64-NEXT: vstelm.w $vr0, $a1, 0, 0
+; LA64-NEXT: ret
%v = load <2 x i32>, ptr %src
%res = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> %v)
store i32 %res, ptr %dst
@@ -154,13 +206,37 @@ define void @vec_reduce_smax_v2i32(ptr %src, ptr %dst) nounwind {
}
define void @vec_reduce_smax_v2i64(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: vec_reduce_smax_v2i64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
-; CHECK-NEXT: vmax.d $vr0, $vr1, $vr0
-; CHECK-NEXT: vstelm.d $vr0, $a1, 0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vec_reduce_smax_v2i64:
+; LA32: # %bb.0:
+; LA32-NEXT: vld $vr0, $a0, 0
+; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0
+; LA32-NEXT: vpickve2gr.w $a2, $vr0, 2
+; LA32-NEXT: vpickve2gr.w $a3, $vr0, 1
+; LA32-NEXT: vpickve2gr.w $a4, $vr0, 3
+; LA32-NEXT: slt $a5, $a4, $a3
+; LA32-NEXT: xor $a6, $a3, $a4
+; LA32-NEXT: sltui $a6, $a6, 1
+; LA32-NEXT: masknez $a5, $a5, $a6
+; LA32-NEXT: sltu $a7, $a2, $a0
+; LA32-NEXT: maskeqz $a6, $a7, $a6
+; LA32-NEXT: or $a5, $a6, $a5
+; LA32-NEXT: masknez $a2, $a2, $a5
+; LA32-NEXT: maskeqz $a0, $a0, $a5
+; LA32-NEXT: or $a0, $a0, $a2
+; LA32-NEXT: masknez $a2, $a4, $a5
+; LA32-NEXT: maskeqz $a3, $a3, $a5
+; LA32-NEXT: or $a2, $a3, $a2
+; LA32-NEXT: st.w $a2, $a1, 4
+; LA32-NEXT: st.w $a0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vec_reduce_smax_v2i64:
+; LA64: # %bb.0:
+; LA64-NEXT: vld $vr0, $a0, 0
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 8
+; LA64-NEXT: vmax.d $vr0, $vr1, $vr0
+; LA64-NEXT: vstelm.d $vr0, $a1, 0, 0
+; LA64-NEXT: ret
%v = load <2 x i64>, ptr %src
%res = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> %v)
store i64 %res, ptr %dst
diff --git a/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-smin.ll b/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-smin.ll
index e3b3c5e6f2410..50d76a3872e1e 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-smin.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-smin.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx %s -o - | FileCheck %s --check-prefixes=CHECK,LA32
+; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s --check-prefixes=CHECK,LA64
define void @vec_reduce_smin_v16i8(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_smin_v16i8:
@@ -22,18 +23,33 @@ define void @vec_reduce_smin_v16i8(ptr %src, ptr %dst) nounwind {
}
define void @vec_reduce_smin_v8i8(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: vec_reduce_smin_v8i8:
-; CHECK: # %bb.0:
-; CHECK-NEXT: ld.d $a0, $a0, 0
-; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4
-; CHECK-NEXT: vmin.b $vr0, $vr1, $vr0
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2
-; CHECK-NEXT: vmin.b $vr0, $vr1, $vr0
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 1
-; CHECK-NEXT: vmin.b $vr0, $vr1, $vr0
-; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vec_reduce_smin_v8i8:
+; LA32: # %bb.0:
+; LA32-NEXT: ld.w $a2, $a0, 0
+; LA32-NEXT: ld.w $a0, $a0, 4
+; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
+; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1
+; LA32-NEXT: vbsrl.v $vr1, $vr0, 4
+; LA32-NEXT: vmin.b $vr0, $vr1, $vr0
+; LA32-NEXT: vbsrl.v $vr1, $vr0, 2
+; LA32-NEXT: vmin.b $vr0, $vr1, $vr0
+; LA32-NEXT: vbsrl.v $vr1, $vr0, 1
+; LA32-NEXT: vmin.b $vr0, $vr1, $vr0
+; LA32-NEXT: vstelm.b $vr0, $a1, 0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vec_reduce_smin_v8i8:
+; LA64: # %bb.0:
+; LA64-NEXT: ld.d $a0, $a0, 0
+; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 4
+; LA64-NEXT: vmin.b $vr0, $vr1, $vr0
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 2
+; LA64-NEXT: vmin.b $vr0, $vr1, $vr0
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 1
+; LA64-NEXT: vmin.b $vr0, $vr1, $vr0
+; LA64-NEXT: vstelm.b $vr0, $a1, 0, 0
+; LA64-NEXT: ret
%v = load <8 x i8>, ptr %src
%res = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> %v)
store i8 %res, ptr %dst
@@ -91,16 +107,29 @@ define void @vec_reduce_smin_v8i16(ptr %src, ptr %dst) nounwind {
}
define void @vec_reduce_smin_v4i16(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: vec_reduce_smin_v4i16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: ld.d $a0, $a0, 0
-; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4
-; CHECK-NEXT: vmin.h $vr0, $vr1, $vr0
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2
-; CHECK-NEXT: vmin.h $vr0, $vr1, $vr0
-; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vec_reduce_smin_v4i16:
+; LA32: # %bb.0:
+; LA32-NEXT: ld.w $a2, $a0, 0
+; LA32-NEXT: ld.w $a0, $a0, 4
+; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
+; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1
+; LA32-NEXT: vbsrl.v $vr1, $vr0, 4
+; LA32-NEXT: vmin.h $vr0, $vr1, $vr0
+; LA32-NEXT: vbsrl.v $vr1, $vr0, 2
+; LA32-NEXT: vmin.h $vr0, $vr1, $vr0
+; LA32-NEXT: vstelm.h $vr0, $a1, 0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vec_reduce_smin_v4i16:
+; LA64: # %bb.0:
+; LA64-NEXT: ld.d $a0, $a0, 0
+; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 4
+; LA64-NEXT: vmin.h $vr0, $vr1, $vr0
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 2
+; LA64-NEXT: vmin.h $vr0, $vr1, $vr0
+; LA64-NEXT: vstelm.h $vr0, $a1, 0, 0
+; LA64-NEXT: ret
%v = load <4 x i16>, ptr %src
%res = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> %v)
store i16 %res, ptr %dst
@@ -123,15 +152,26 @@ define void @vec_reduce_smin_v2i16(ptr %src, ptr %dst) nounwind {
}
define void @vec_reduce_smin_v4i32(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: vec_reduce_smin_v4i32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
-; CHECK-NEXT: vmin.w $vr0, $vr1, $vr0
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4
-; CHECK-NEXT: vmin.w $vr0, $vr1, $vr0
-; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vec_reduce_smin_v4i32:
+; LA32: # %bb.0:
+; LA32-NEXT: vld $vr0, $a0, 0
+; LA32-NEXT: vbsrl.v $vr1, $vr0, 8
+; LA32-NEXT: vmin.w $vr0, $vr1, $vr0
+; LA32-NEXT: vbsrl.v $vr1, $vr0, 4
+; LA32-NEXT: vmin.w $vr0, $vr1, $vr0
+; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0
+; LA32-NEXT: st.w $a0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vec_reduce_smin_v4i32:
+; LA64: # %bb.0:
+; LA64-NEXT: vld $vr0, $a0, 0
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 8
+; LA64-NEXT: vmin.w $vr0, $vr1, $vr0
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 4
+; LA64-NEXT: vmin.w $vr0, $vr1, $vr0
+; LA64-NEXT: vstelm.w $vr0, $a1, 0, 0
+; LA64-NEXT: ret
%v = load <4 x i32>, ptr %src
%res = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %v)
store i32 %res, ptr %dst
@@ -139,14 +179,26 @@ define void @vec_reduce_smin_v4i32(ptr %src, ptr %dst) nounwind {
}
define void @vec_reduce_smin_v2i32(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: vec_reduce_smin_v2i32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: ld.d $a0, $a0, 0
-; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4
-; CHECK-NEXT: vmin.w $vr0, $vr1, $vr0
-; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vec_reduce_smin_v2i32:
+; LA32: # %bb.0:
+; LA32-NEXT: ld.w $a2, $a0, 0
+; LA32-NEXT: ld.w $a0, $a0, 4
+; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
+; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1
+; LA32-NEXT: vbsrl.v $vr1, $vr0, 4
+; LA32-NEXT: vmin.w $vr0, $vr1, $vr0
+; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0
+; LA32-NEXT: st.w $a0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vec_reduce_smin_v2i32:
+; LA64: # %bb.0:
+; LA64-NEXT: ld.d $a0, $a0, 0
+; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 4
+; LA64-NEXT: vmin.w $vr0, $vr1, $vr0
+; LA64-NEXT: vstelm.w $vr0, $a1, 0, 0
+; LA64-NEXT: ret
%v = load <2 x i32>, ptr %src
%res = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> %v)
store i32 %res, ptr %dst
@@ -154,13 +206,37 @@ define void @vec_reduce_smin_v2i32(ptr %src, ptr %dst) nounwind {
}
define void @vec_reduce_smin_v2i64(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: vec_reduce_smin_v2i64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
-; CHECK-NEXT: vmin.d $vr0, $vr1, $vr0
-; CHECK-NEXT: vstelm.d $vr0, $a1, 0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vec_reduce_smin_v2i64:
+; LA32: # %bb.0:
+; LA32-NEXT: vld $vr0, $a0, 0
+; LA32-NEXT: vpickve2gr.w $a0, $vr0, 2
+; LA32-NEXT: vpickve2gr.w $a2, $vr0, 0
+; LA32-NEXT: vpickve2gr.w $a3, $vr0, 3
+; LA32-NEXT: vpickve2gr.w $a4, $vr0, 1
+; LA32-NEXT: slt $a5, $a4, $a3
+; LA32-NEXT: xor $a6, $a4, $a3
+; LA32-NEXT: sltui $a6, $a6, 1
+; LA32-NEXT: masknez $a5, $a5, $a6
+; LA32-NEXT: sltu $a7, $a2, $a0
+; LA32-NEXT: maskeqz $a6, $a7, $a6
+; LA32-NEXT: or $a5, $a6, $a5
+; LA32-NEXT: masknez $a0, $a0, $a5
+; LA32-NEXT: maskeqz $a2, $a2, $a5
+; LA32-NEXT: or $a0, $a2, $a0
+; LA32-NEXT: masknez $a2, $a3, $a5
+; LA32-NEXT: maskeqz $a3, $a4, $a5
+; LA32-NEXT: or $a2, $a3, $a2
+; LA32-NEXT: st.w $a2, $a1, 4
+; LA32-NEXT: st.w $a0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vec_reduce_smin_v2i64:
+; LA64: # %bb.0:
+; LA64-NEXT: vld $vr0, $a0, 0
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 8
+; LA64-NEXT: vmin.d $vr0, $vr1, $vr0
+; LA64-NEXT: vstelm.d $vr0, $a1, 0, 0
+; LA64-NEXT: ret
%v = load <2 x i64>, ptr %src
%res = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> %v)
store i64 %res, ptr %dst
diff --git a/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-umax.ll b/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-umax.ll
index fff2304befd68..88146c78a969d 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-umax.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-umax.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx %s -o - | FileCheck %s --check-prefixes=CHECK,LA32
+; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s --check-prefixes=CHECK,LA64
define void @vec_reduce_umax_v16i8(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_umax_v16i8:
@@ -22,18 +23,33 @@ define void @vec_reduce_umax_v16i8(ptr %src, ptr %dst) nounwind {
}
define void @vec_reduce_umax_v8i8(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: vec_reduce_umax_v8i8:
-; CHECK: # %bb.0:
-; CHECK-NEXT: ld.d $a0, $a0, 0
-; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4
-; CHECK-NEXT: vmax.bu $vr0, $vr1, $vr0
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2
-; CHECK-NEXT: vmax.bu $vr0, $vr1, $vr0
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 1
-; CHECK-NEXT: vmax.bu $vr0, $vr1, $vr0
-; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vec_reduce_umax_v8i8:
+; LA32: # %bb.0:
+; LA32-NEXT: ld.w $a2, $a0, 0
+; LA32-NEXT: ld.w $a0, $a0, 4
+; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
+; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1
+; LA32-NEXT: vbsrl.v $vr1, $vr0, 4
+; LA32-NEXT: vmax.bu $vr0, $vr1, $vr0
+; LA32-NEXT: vbsrl.v $vr1, $vr0, 2
+; LA32-NEXT: vmax.bu $vr0, $vr1, $vr0
+; LA32-NEXT: vbsrl.v $vr1, $vr0, 1
+; LA32-NEXT: vmax.bu $vr0, $vr1, $vr0
+; LA32-NEXT: vstelm.b $vr0, $a1, 0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vec_reduce_umax_v8i8:
+; LA64: # %bb.0:
+; LA64-NEXT: ld.d $a0, $a0, 0
+; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 4
+; LA64-NEXT: vmax.bu $vr0, $vr1, $vr0
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 2
+; LA64-NEXT: vmax.bu $vr0, $vr1, $vr0
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 1
+; LA64-NEXT: vmax.bu $vr0, $vr1, $vr0
+; LA64-NEXT: vstelm.b $vr0, $a1, 0, 0
+; LA64-NEXT: ret
%v = load <8 x i8>, ptr %src
%res = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> %v)
store i8 %res, ptr %dst
@@ -91,16 +107,29 @@ define void @vec_reduce_umax_v8i16(ptr %src, ptr %dst) nounwind {
}
define void @vec_reduce_umax_v4i16(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: vec_reduce_umax_v4i16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: ld.d $a0, $a0, 0
-; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4
-; CHECK-NEXT: vmax.hu $vr0, $vr1, $vr0
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2
-; CHECK-NEXT: vmax.hu $vr0, $vr1, $vr0
-; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vec_reduce_umax_v4i16:
+; LA32: # %bb.0:
+; LA32-NEXT: ld.w $a2, $a0, 0
+; LA32-NEXT: ld.w $a0, $a0, 4
+; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
+; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1
+; LA32-NEXT: vbsrl.v $vr1, $vr0, 4
+; LA32-NEXT: vmax.hu $vr0, $vr1, $vr0
+; LA32-NEXT: vbsrl.v $vr1, $vr0, 2
+; LA32-NEXT: vmax.hu $vr0, $vr1, $vr0
+; LA32-NEXT: vstelm.h $vr0, $a1, 0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vec_reduce_umax_v4i16:
+; LA64: # %bb.0:
+; LA64-NEXT: ld.d $a0, $a0, 0
+; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 4
+; LA64-NEXT: vmax.hu $vr0, $vr1, $vr0
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 2
+; LA64-NEXT: vmax.hu $vr0, $vr1, $vr0
+; LA64-NEXT: vstelm.h $vr0, $a1, 0, 0
+; LA64-NEXT: ret
%v = load <4 x i16>, ptr %src
%res = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> %v)
store i16 %res, ptr %dst
@@ -123,15 +152,26 @@ define void @vec_reduce_umax_v2i16(ptr %src, ptr %dst) nounwind {
}
define void @vec_reduce_umax_v4i32(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: vec_reduce_umax_v4i32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
-; CHECK-NEXT: vmax.wu $vr0, $vr1, $vr0
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4
-; CHECK-NEXT: vmax.wu $vr0, $vr1, $vr0
-; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vec_reduce_umax_v4i32:
+; LA32: # %bb.0:
+; LA32-NEXT: vld $vr0, $a0, 0
+; LA32-NEXT: vbsrl.v $vr1, $vr0, 8
+; LA32-NEXT: vmax.wu $vr0, $vr1, $vr0
+; LA32-NEXT: vbsrl.v $vr1, $vr0, 4
+; LA32-NEXT: vmax.wu $vr0, $vr1, $vr0
+; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0
+; LA32-NEXT: st.w $a0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vec_reduce_umax_v4i32:
+; LA64: # %bb.0:
+; LA64-NEXT: vld $vr0, $a0, 0
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 8
+; LA64-NEXT: vmax.wu $vr0, $vr1, $vr0
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 4
+; LA64-NEXT: vmax.wu $vr0, $vr1, $vr0
+; LA64-NEXT: vstelm.w $vr0, $a1, 0, 0
+; LA64-NEXT: ret
%v = load <4 x i32>, ptr %src
%res = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %v)
store i32 %res, ptr %dst
@@ -139,14 +179,26 @@ define void @vec_reduce_umax_v4i32(ptr %src, ptr %dst) nounwind {
}
define void @vec_reduce_umax_v2i32(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: vec_reduce_umax_v2i32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: ld.d $a0, $a0, 0
-; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4
-; CHECK-NEXT: vmax.wu $vr0, $vr1, $vr0
-; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vec_reduce_umax_v2i32:
+; LA32: # %bb.0:
+; LA32-NEXT: ld.w $a2, $a0, 0
+; LA32-NEXT: ld.w $a0, $a0, 4
+; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
+; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1
+; LA32-NEXT: vbsrl.v $vr1, $vr0, 4
+; LA32-NEXT: vmax.wu $vr0, $vr1, $vr0
+; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0
+; LA32-NEXT: st.w $a0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vec_reduce_umax_v2i32:
+; LA64: # %bb.0:
+; LA64-NEXT: ld.d $a0, $a0, 0
+; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 4
+; LA64-NEXT: vmax.wu $vr0, $vr1, $vr0
+; LA64-NEXT: vstelm.w $vr0, $a1, 0, 0
+; LA64-NEXT: ret
%v = load <2 x i32>, ptr %src
%res = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> %v)
store i32 %res, ptr %dst
@@ -154,13 +206,37 @@ define void @vec_reduce_umax_v2i32(ptr %src, ptr %dst) nounwind {
}
define void @vec_reduce_umax_v2i64(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: vec_reduce_umax_v2i64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
-; CHECK-NEXT: vmax.du $vr0, $vr1, $vr0
-; CHECK-NEXT: vstelm.d $vr0, $a1, 0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vec_reduce_umax_v2i64:
+; LA32: # %bb.0:
+; LA32-NEXT: vld $vr0, $a0, 0
+; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0
+; LA32-NEXT: vpickve2gr.w $a2, $vr0, 2
+; LA32-NEXT: vpickve2gr.w $a3, $vr0, 1
+; LA32-NEXT: vpickve2gr.w $a4, $vr0, 3
+; LA32-NEXT: sltu $a5, $a4, $a3
+; LA32-NEXT: xor $a6, $a3, $a4
+; LA32-NEXT: sltui $a6, $a6, 1
+; LA32-NEXT: masknez $a5, $a5, $a6
+; LA32-NEXT: sltu $a7, $a2, $a0
+; LA32-NEXT: maskeqz $a6, $a7, $a6
+; LA32-NEXT: or $a5, $a6, $a5
+; LA32-NEXT: masknez $a2, $a2, $a5
+; LA32-NEXT: maskeqz $a0, $a0, $a5
+; LA32-NEXT: or $a0, $a0, $a2
+; LA32-NEXT: masknez $a2, $a4, $a5
+; LA32-NEXT: maskeqz $a3, $a3, $a5
+; LA32-NEXT: or $a2, $a3, $a2
+; LA32-NEXT: st.w $a2, $a1, 4
+; LA32-NEXT: st.w $a0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vec_reduce_umax_v2i64:
+; LA64: # %bb.0:
+; LA64-NEXT: vld $vr0, $a0, 0
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 8
+; LA64-NEXT: vmax.du $vr0, $vr1, $vr0
+; LA64-NEXT: vstelm.d $vr0, $a1, 0, 0
+; LA64-NEXT: ret
%v = load <2 x i64>, ptr %src
%res = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> %v)
store i64 %res, ptr %dst
diff --git a/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-umin.ll b/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-umin.ll
index e14a294cbcfb6..e9d4b4aab6f91 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-umin.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-umin.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx %s -o - | FileCheck %s --check-prefixes=CHECK,LA32
+; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s --check-prefixes=CHECK,LA64
define void @vec_reduce_umin_v16i8(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_umin_v16i8:
@@ -22,18 +23,33 @@ define void @vec_reduce_umin_v16i8(ptr %src, ptr %dst) nounwind {
}
define void @vec_reduce_umin_v8i8(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: vec_reduce_umin_v8i8:
-; CHECK: # %bb.0:
-; CHECK-NEXT: ld.d $a0, $a0, 0
-; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4
-; CHECK-NEXT: vmin.bu $vr0, $vr1, $vr0
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2
-; CHECK-NEXT: vmin.bu $vr0, $vr1, $vr0
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 1
-; CHECK-NEXT: vmin.bu $vr0, $vr1, $vr0
-; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vec_reduce_umin_v8i8:
+; LA32: # %bb.0:
+; LA32-NEXT: ld.w $a2, $a0, 0
+; LA32-NEXT: ld.w $a0, $a0, 4
+; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
+; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1
+; LA32-NEXT: vbsrl.v $vr1, $vr0, 4
+; LA32-NEXT: vmin.bu $vr0, $vr1, $vr0
+; LA32-NEXT: vbsrl.v $vr1, $vr0, 2
+; LA32-NEXT: vmin.bu $vr0, $vr1, $vr0
+; LA32-NEXT: vbsrl.v $vr1, $vr0, 1
+; LA32-NEXT: vmin.bu $vr0, $vr1, $vr0
+; LA32-NEXT: vstelm.b $vr0, $a1, 0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vec_reduce_umin_v8i8:
+; LA64: # %bb.0:
+; LA64-NEXT: ld.d $a0, $a0, 0
+; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 4
+; LA64-NEXT: vmin.bu $vr0, $vr1, $vr0
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 2
+; LA64-NEXT: vmin.bu $vr0, $vr1, $vr0
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 1
+; LA64-NEXT: vmin.bu $vr0, $vr1, $vr0
+; LA64-NEXT: vstelm.b $vr0, $a1, 0, 0
+; LA64-NEXT: ret
%v = load <8 x i8>, ptr %src
%res = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> %v)
store i8 %res, ptr %dst
@@ -91,16 +107,29 @@ define void @vec_reduce_umin_v8i16(ptr %src, ptr %dst) nounwind {
}
define void @vec_reduce_umin_v4i16(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: vec_reduce_umin_v4i16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: ld.d $a0, $a0, 0
-; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4
-; CHECK-NEXT: vmin.hu $vr0, $vr1, $vr0
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2
-; CHECK-NEXT: vmin.hu $vr0, $vr1, $vr0
-; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vec_reduce_umin_v4i16:
+; LA32: # %bb.0:
+; LA32-NEXT: ld.w $a2, $a0, 0
+; LA32-NEXT: ld.w $a0, $a0, 4
+; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
+; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1
+; LA32-NEXT: vbsrl.v $vr1, $vr0, 4
+; LA32-NEXT: vmin.hu $vr0, $vr1, $vr0
+; LA32-NEXT: vbsrl.v $vr1, $vr0, 2
+; LA32-NEXT: vmin.hu $vr0, $vr1, $vr0
+; LA32-NEXT: vstelm.h $vr0, $a1, 0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vec_reduce_umin_v4i16:
+; LA64: # %bb.0:
+; LA64-NEXT: ld.d $a0, $a0, 0
+; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 4
+; LA64-NEXT: vmin.hu $vr0, $vr1, $vr0
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 2
+; LA64-NEXT: vmin.hu $vr0, $vr1, $vr0
+; LA64-NEXT: vstelm.h $vr0, $a1, 0, 0
+; LA64-NEXT: ret
%v = load <4 x i16>, ptr %src
%res = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> %v)
store i16 %res, ptr %dst
@@ -123,15 +152,26 @@ define void @vec_reduce_umin_v2i16(ptr %src, ptr %dst) nounwind {
}
define void @vec_reduce_umin_v4i32(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: vec_reduce_umin_v4i32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
-; CHECK-NEXT: vmin.wu $vr0, $vr1, $vr0
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4
-; CHECK-NEXT: vmin.wu $vr0, $vr1, $vr0
-; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vec_reduce_umin_v4i32:
+; LA32: # %bb.0:
+; LA32-NEXT: vld $vr0, $a0, 0
+; LA32-NEXT: vbsrl.v $vr1, $vr0, 8
+; LA32-NEXT: vmin.wu $vr0, $vr1, $vr0
+; LA32-NEXT: vbsrl.v $vr1, $vr0, 4
+; LA32-NEXT: vmin.wu $vr0, $vr1, $vr0
+; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0
+; LA32-NEXT: st.w $a0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vec_reduce_umin_v4i32:
+; LA64: # %bb.0:
+; LA64-NEXT: vld $vr0, $a0, 0
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 8
+; LA64-NEXT: vmin.wu $vr0, $vr1, $vr0
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 4
+; LA64-NEXT: vmin.wu $vr0, $vr1, $vr0
+; LA64-NEXT: vstelm.w $vr0, $a1, 0, 0
+; LA64-NEXT: ret
%v = load <4 x i32>, ptr %src
%res = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %v)
store i32 %res, ptr %dst
@@ -139,14 +179,26 @@ define void @vec_reduce_umin_v4i32(ptr %src, ptr %dst) nounwind {
}
define void @vec_reduce_umin_v2i32(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: vec_reduce_umin_v2i32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: ld.d $a0, $a0, 0
-; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4
-; CHECK-NEXT: vmin.wu $vr0, $vr1, $vr0
-; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vec_reduce_umin_v2i32:
+; LA32: # %bb.0:
+; LA32-NEXT: ld.w $a2, $a0, 0
+; LA32-NEXT: ld.w $a0, $a0, 4
+; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
+; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1
+; LA32-NEXT: vbsrl.v $vr1, $vr0, 4
+; LA32-NEXT: vmin.wu $vr0, $vr1, $vr0
+; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0
+; LA32-NEXT: st.w $a0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vec_reduce_umin_v2i32:
+; LA64: # %bb.0:
+; LA64-NEXT: ld.d $a0, $a0, 0
+; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 4
+; LA64-NEXT: vmin.wu $vr0, $vr1, $vr0
+; LA64-NEXT: vstelm.w $vr0, $a1, 0, 0
+; LA64-NEXT: ret
%v = load <2 x i32>, ptr %src
%res = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> %v)
store i32 %res, ptr %dst
@@ -154,13 +206,37 @@ define void @vec_reduce_umin_v2i32(ptr %src, ptr %dst) nounwind {
}
define void @vec_reduce_umin_v2i64(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: vec_reduce_umin_v2i64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
-; CHECK-NEXT: vmin.du $vr0, $vr1, $vr0
-; CHECK-NEXT: vstelm.d $vr0, $a1, 0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vec_reduce_umin_v2i64:
+; LA32: # %bb.0:
+; LA32-NEXT: vld $vr0, $a0, 0
+; LA32-NEXT: vpickve2gr.w $a0, $vr0, 2
+; LA32-NEXT: vpickve2gr.w $a2, $vr0, 0
+; LA32-NEXT: vpickve2gr.w $a3, $vr0, 3
+; LA32-NEXT: vpickve2gr.w $a4, $vr0, 1
+; LA32-NEXT: sltu $a5, $a4, $a3
+; LA32-NEXT: xor $a6, $a4, $a3
+; LA32-NEXT: sltui $a6, $a6, 1
+; LA32-NEXT: masknez $a5, $a5, $a6
+; LA32-NEXT: sltu $a7, $a2, $a0
+; LA32-NEXT: maskeqz $a6, $a7, $a6
+; LA32-NEXT: or $a5, $a6, $a5
+; LA32-NEXT: masknez $a0, $a0, $a5
+; LA32-NEXT: maskeqz $a2, $a2, $a5
+; LA32-NEXT: or $a0, $a2, $a0
+; LA32-NEXT: masknez $a2, $a3, $a5
+; LA32-NEXT: maskeqz $a3, $a4, $a5
+; LA32-NEXT: or $a2, $a3, $a2
+; LA32-NEXT: st.w $a2, $a1, 4
+; LA32-NEXT: st.w $a0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vec_reduce_umin_v2i64:
+; LA64: # %bb.0:
+; LA64-NEXT: vld $vr0, $a0, 0
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 8
+; LA64-NEXT: vmin.du $vr0, $vr1, $vr0
+; LA64-NEXT: vstelm.d $vr0, $a1, 0, 0
+; LA64-NEXT: ret
%v = load <2 x i64>, ptr %src
%res = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> %v)
store i64 %res, ptr %dst
diff --git a/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-xor.ll b/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-xor.ll
index ae2bb8f91de05..ed965e9e10ee7 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-xor.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/vec-reduce-xor.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx %s -o - | FileCheck %s --check-prefixes=CHECK,LA32
+; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s --check-prefixes=CHECK,LA64
define void @vec_reduce_xor_v16i8(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: vec_reduce_xor_v16i8:
@@ -22,18 +23,33 @@ define void @vec_reduce_xor_v16i8(ptr %src, ptr %dst) nounwind {
}
define void @vec_reduce_xor_v8i8(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: vec_reduce_xor_v8i8:
-; CHECK: # %bb.0:
-; CHECK-NEXT: ld.d $a0, $a0, 0
-; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4
-; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2
-; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 1
-; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0
-; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vec_reduce_xor_v8i8:
+; LA32: # %bb.0:
+; LA32-NEXT: ld.w $a2, $a0, 0
+; LA32-NEXT: ld.w $a0, $a0, 4
+; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
+; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1
+; LA32-NEXT: vbsrl.v $vr1, $vr0, 4
+; LA32-NEXT: vxor.v $vr0, $vr1, $vr0
+; LA32-NEXT: vbsrl.v $vr1, $vr0, 2
+; LA32-NEXT: vxor.v $vr0, $vr1, $vr0
+; LA32-NEXT: vbsrl.v $vr1, $vr0, 1
+; LA32-NEXT: vxor.v $vr0, $vr1, $vr0
+; LA32-NEXT: vstelm.b $vr0, $a1, 0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vec_reduce_xor_v8i8:
+; LA64: # %bb.0:
+; LA64-NEXT: ld.d $a0, $a0, 0
+; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 4
+; LA64-NEXT: vxor.v $vr0, $vr1, $vr0
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 2
+; LA64-NEXT: vxor.v $vr0, $vr1, $vr0
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 1
+; LA64-NEXT: vxor.v $vr0, $vr1, $vr0
+; LA64-NEXT: vstelm.b $vr0, $a1, 0, 0
+; LA64-NEXT: ret
%v = load <8 x i8>, ptr %src
%res = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> %v)
store i8 %res, ptr %dst
@@ -91,16 +107,29 @@ define void @vec_reduce_xor_v8i16(ptr %src, ptr %dst) nounwind {
}
define void @vec_reduce_xor_v4i16(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: vec_reduce_xor_v4i16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: ld.d $a0, $a0, 0
-; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4
-; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 2
-; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0
-; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vec_reduce_xor_v4i16:
+; LA32: # %bb.0:
+; LA32-NEXT: ld.w $a2, $a0, 0
+; LA32-NEXT: ld.w $a0, $a0, 4
+; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
+; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1
+; LA32-NEXT: vbsrl.v $vr1, $vr0, 4
+; LA32-NEXT: vxor.v $vr0, $vr1, $vr0
+; LA32-NEXT: vbsrl.v $vr1, $vr0, 2
+; LA32-NEXT: vxor.v $vr0, $vr1, $vr0
+; LA32-NEXT: vstelm.h $vr0, $a1, 0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vec_reduce_xor_v4i16:
+; LA64: # %bb.0:
+; LA64-NEXT: ld.d $a0, $a0, 0
+; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 4
+; LA64-NEXT: vxor.v $vr0, $vr1, $vr0
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 2
+; LA64-NEXT: vxor.v $vr0, $vr1, $vr0
+; LA64-NEXT: vstelm.h $vr0, $a1, 0, 0
+; LA64-NEXT: ret
%v = load <4 x i16>, ptr %src
%res = call i16 @llvm.vector.reduce.xor.v4i16(<4 x i16> %v)
store i16 %res, ptr %dst
@@ -123,15 +152,26 @@ define void @vec_reduce_xor_v2i16(ptr %src, ptr %dst) nounwind {
}
define void @vec_reduce_xor_v4i32(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: vec_reduce_xor_v4i32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
-; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4
-; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0
-; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vec_reduce_xor_v4i32:
+; LA32: # %bb.0:
+; LA32-NEXT: vld $vr0, $a0, 0
+; LA32-NEXT: vbsrl.v $vr1, $vr0, 8
+; LA32-NEXT: vxor.v $vr0, $vr1, $vr0
+; LA32-NEXT: vbsrl.v $vr1, $vr0, 4
+; LA32-NEXT: vxor.v $vr0, $vr1, $vr0
+; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0
+; LA32-NEXT: st.w $a0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vec_reduce_xor_v4i32:
+; LA64: # %bb.0:
+; LA64-NEXT: vld $vr0, $a0, 0
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 8
+; LA64-NEXT: vxor.v $vr0, $vr1, $vr0
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 4
+; LA64-NEXT: vxor.v $vr0, $vr1, $vr0
+; LA64-NEXT: vstelm.w $vr0, $a1, 0, 0
+; LA64-NEXT: ret
%v = load <4 x i32>, ptr %src
%res = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> %v)
store i32 %res, ptr %dst
@@ -139,14 +179,27 @@ define void @vec_reduce_xor_v4i32(ptr %src, ptr %dst) nounwind {
}
define void @vec_reduce_xor_v2i32(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: vec_reduce_xor_v2i32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: ld.d $a0, $a0, 0
-; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 4
-; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0
-; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vec_reduce_xor_v2i32:
+; LA32: # %bb.0:
+; LA32-NEXT: ld.w $a2, $a0, 0
+; LA32-NEXT: ld.w $a0, $a0, 4
+; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
+; LA32-NEXT: vori.b $vr1, $vr0, 0
+; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 1
+; LA32-NEXT: vbsrl.v $vr1, $vr1, 4
+; LA32-NEXT: vxor.v $vr0, $vr1, $vr0
+; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0
+; LA32-NEXT: st.w $a0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vec_reduce_xor_v2i32:
+; LA64: # %bb.0:
+; LA64-NEXT: ld.d $a0, $a0, 0
+; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 4
+; LA64-NEXT: vxor.v $vr0, $vr1, $vr0
+; LA64-NEXT: vstelm.w $vr0, $a1, 0, 0
+; LA64-NEXT: ret
%v = load <2 x i32>, ptr %src
%res = call i32 @llvm.vector.reduce.xor.v2i32(<2 x i32> %v)
store i32 %res, ptr %dst
@@ -154,13 +207,26 @@ define void @vec_reduce_xor_v2i32(ptr %src, ptr %dst) nounwind {
}
define void @vec_reduce_xor_v2i64(ptr %src, ptr %dst) nounwind {
-; CHECK-LABEL: vec_reduce_xor_v2i64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
-; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0
-; CHECK-NEXT: vstelm.d $vr0, $a1, 0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: vec_reduce_xor_v2i64:
+; LA32: # %bb.0:
+; LA32-NEXT: vld $vr0, $a0, 0
+; LA32-NEXT: vpickve2gr.w $a0, $vr0, 3
+; LA32-NEXT: vpickve2gr.w $a2, $vr0, 1
+; LA32-NEXT: vpickve2gr.w $a3, $vr0, 2
+; LA32-NEXT: vpickve2gr.w $a4, $vr0, 0
+; LA32-NEXT: xor $a3, $a4, $a3
+; LA32-NEXT: xor $a0, $a2, $a0
+; LA32-NEXT: st.w $a0, $a1, 4
+; LA32-NEXT: st.w $a3, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vec_reduce_xor_v2i64:
+; LA64: # %bb.0:
+; LA64-NEXT: vld $vr0, $a0, 0
+; LA64-NEXT: vbsrl.v $vr1, $vr0, 8
+; LA64-NEXT: vxor.v $vr0, $vr1, $vr0
+; LA64-NEXT: vstelm.d $vr0, $a1, 0, 0
+; LA64-NEXT: ret
%v = load <2 x i64>, ptr %src
%res = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> %v)
store i64 %res, ptr %dst
diff --git a/llvm/test/CodeGen/LoongArch/lsx/vec-sext.ll b/llvm/test/CodeGen/LoongArch/lsx/vec-sext.ll
index 9485df746ff1c..dce6dc9f2aa37 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/vec-sext.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/vec-sext.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s
-
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx %s -o - | FileCheck %s --check-prefixes=CHECK,LA32
+; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s --check-prefixes=CHECK,LA64
define void @load_sext_2i8_to_2i64(ptr %ptr, ptr %dst) {
; CHECK-LABEL: load_sext_2i8_to_2i64:
@@ -40,15 +40,27 @@ entry:
}
define void @load_sext_8i8_to_8i16(ptr %ptr, ptr %dst) {
-; CHECK-LABEL: load_sext_8i8_to_8i16:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: ld.d $a0, $a0, 0
-; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
-; CHECK-NEXT: vilvl.b $vr0, $vr0, $vr0
-; CHECK-NEXT: vslli.h $vr0, $vr0, 8
-; CHECK-NEXT: vsrai.h $vr0, $vr0, 8
-; CHECK-NEXT: vst $vr0, $a1, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: load_sext_8i8_to_8i16:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: ld.w $a2, $a0, 0
+; LA32-NEXT: ld.w $a0, $a0, 4
+; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
+; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1
+; LA32-NEXT: vilvl.b $vr0, $vr0, $vr0
+; LA32-NEXT: vslli.h $vr0, $vr0, 8
+; LA32-NEXT: vsrai.h $vr0, $vr0, 8
+; LA32-NEXT: vst $vr0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: load_sext_8i8_to_8i16:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: ld.d $a0, $a0, 0
+; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
+; LA64-NEXT: vilvl.b $vr0, $vr0, $vr0
+; LA64-NEXT: vslli.h $vr0, $vr0, 8
+; LA64-NEXT: vsrai.h $vr0, $vr0, 8
+; LA64-NEXT: vst $vr0, $a1, 0
+; LA64-NEXT: ret
entry:
%A = load <8 x i8>, ptr %ptr
%B = sext <8 x i8> %A to <8 x i16>
@@ -75,15 +87,27 @@ entry:
}
define void @load_sext_4i16_to_4i32(ptr %ptr, ptr %dst) {
-; CHECK-LABEL: load_sext_4i16_to_4i32:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: ld.d $a0, $a0, 0
-; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
-; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0
-; CHECK-NEXT: vslli.w $vr0, $vr0, 16
-; CHECK-NEXT: vsrai.w $vr0, $vr0, 16
-; CHECK-NEXT: vst $vr0, $a1, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: load_sext_4i16_to_4i32:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: ld.w $a2, $a0, 0
+; LA32-NEXT: ld.w $a0, $a0, 4
+; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
+; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1
+; LA32-NEXT: vilvl.h $vr0, $vr0, $vr0
+; LA32-NEXT: vslli.w $vr0, $vr0, 16
+; LA32-NEXT: vsrai.w $vr0, $vr0, 16
+; LA32-NEXT: vst $vr0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: load_sext_4i16_to_4i32:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: ld.d $a0, $a0, 0
+; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
+; LA64-NEXT: vilvl.h $vr0, $vr0, $vr0
+; LA64-NEXT: vslli.w $vr0, $vr0, 16
+; LA64-NEXT: vsrai.w $vr0, $vr0, 16
+; LA64-NEXT: vst $vr0, $a1, 0
+; LA64-NEXT: ret
entry:
%A = load <4 x i16>, ptr %ptr
%B = sext <4 x i16> %A to <4 x i32>
@@ -92,15 +116,26 @@ entry:
}
define void @load_sext_2i32_to_2i64(ptr %ptr, ptr %dst) {
-; CHECK-LABEL: load_sext_2i32_to_2i64:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: ld.d $a0, $a0, 0
-; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
-; CHECK-NEXT: vshuf4i.w $vr0, $vr0, 16
-; CHECK-NEXT: vslli.d $vr0, $vr0, 32
-; CHECK-NEXT: vsrai.d $vr0, $vr0, 32
-; CHECK-NEXT: vst $vr0, $a1, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: load_sext_2i32_to_2i64:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: ld.w $a2, $a0, 0
+; LA32-NEXT: ld.w $a0, $a0, 4
+; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
+; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 2
+; LA32-NEXT: vslli.d $vr0, $vr0, 32
+; LA32-NEXT: vsrai.d $vr0, $vr0, 32
+; LA32-NEXT: vst $vr0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: load_sext_2i32_to_2i64:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: ld.d $a0, $a0, 0
+; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
+; LA64-NEXT: vshuf4i.w $vr0, $vr0, 16
+; LA64-NEXT: vslli.d $vr0, $vr0, 32
+; LA64-NEXT: vsrai.d $vr0, $vr0, 32
+; LA64-NEXT: vst $vr0, $a1, 0
+; LA64-NEXT: ret
entry:
%A = load <2 x i32>, ptr %ptr
%B = sext <2 x i32> %A to <2 x i64>
diff --git a/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-any-ext.ll b/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-any-ext.ll
index 9b1b584bd9c76..bb008ee5eb903 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-any-ext.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-any-ext.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA32
+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA64
define void @shuffle_any_ext_2i8_to_2i64(ptr %ptr, ptr %dst) nounwind {
; CHECK-LABEL: shuffle_any_ext_2i8_to_2i64:
@@ -35,13 +36,22 @@ define void @shuffle_any_ext_2i16_to_2i64(ptr %ptr, ptr %dst) nounwind {
}
define void @shuffle_any_ext_2i32_to_2i64(ptr %ptr, ptr %dst) nounwind {
-; CHECK-LABEL: shuffle_any_ext_2i32_to_2i64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: ld.d $a0, $a0, 0
-; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
-; CHECK-NEXT: vshuf4i.w $vr0, $vr0, 16
-; CHECK-NEXT: vst $vr0, $a1, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: shuffle_any_ext_2i32_to_2i64:
+; LA32: # %bb.0:
+; LA32-NEXT: ld.w $a2, $a0, 0
+; LA32-NEXT: ld.w $a0, $a0, 4
+; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
+; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 2
+; LA32-NEXT: vst $vr0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: shuffle_any_ext_2i32_to_2i64:
+; LA64: # %bb.0:
+; LA64-NEXT: ld.d $a0, $a0, 0
+; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
+; LA64-NEXT: vshuf4i.w $vr0, $vr0, 16
+; LA64-NEXT: vst $vr0, $a1, 0
+; LA64-NEXT: ret
%x = load <2 x i32>, ptr %ptr
%y = shufflevector <2 x i32> %x, <2 x i32> poison, <4 x i32> <i32 0, i32 3, i32 1, i32 2>
%r = bitcast <4 x i32> %y to <2 x i64>
@@ -66,13 +76,23 @@ define void @shuffle_any_ext_4i8_to_4i32(ptr %ptr, ptr %dst) nounwind {
}
define void @shuffle_any_ext_4i16_to_4i32(ptr %ptr, ptr %dst) nounwind {
-; CHECK-LABEL: shuffle_any_ext_4i16_to_4i32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: ld.d $a0, $a0, 0
-; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
-; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0
-; CHECK-NEXT: vst $vr0, $a1, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: shuffle_any_ext_4i16_to_4i32:
+; LA32: # %bb.0:
+; LA32-NEXT: ld.w $a2, $a0, 0
+; LA32-NEXT: ld.w $a0, $a0, 4
+; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
+; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1
+; LA32-NEXT: vilvl.h $vr0, $vr0, $vr0
+; LA32-NEXT: vst $vr0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: shuffle_any_ext_4i16_to_4i32:
+; LA64: # %bb.0:
+; LA64-NEXT: ld.d $a0, $a0, 0
+; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
+; LA64-NEXT: vilvl.h $vr0, $vr0, $vr0
+; LA64-NEXT: vst $vr0, $a1, 0
+; LA64-NEXT: ret
%x = load <4 x i16>, ptr %ptr
%y = shufflevector <4 x i16> %x, <4 x i16> poison, <8 x i32> <i32 0, i32 7, i32 1, i32 6, i32 2, i32 5, i32 3, i32 4>
%r = bitcast <8 x i16> %y to <4 x i32>
@@ -81,13 +101,23 @@ define void @shuffle_any_ext_4i16_to_4i32(ptr %ptr, ptr %dst) nounwind {
}
define void @shuffle_any_ext_8i8_to_8i16(ptr %ptr, ptr %dst) nounwind {
-; CHECK-LABEL: shuffle_any_ext_8i8_to_8i16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: ld.d $a0, $a0, 0
-; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
-; CHECK-NEXT: vilvl.b $vr0, $vr0, $vr0
-; CHECK-NEXT: vst $vr0, $a1, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: shuffle_any_ext_8i8_to_8i16:
+; LA32: # %bb.0:
+; LA32-NEXT: ld.w $a2, $a0, 0
+; LA32-NEXT: ld.w $a0, $a0, 4
+; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
+; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1
+; LA32-NEXT: vilvl.b $vr0, $vr0, $vr0
+; LA32-NEXT: vst $vr0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: shuffle_any_ext_8i8_to_8i16:
+; LA64: # %bb.0:
+; LA64-NEXT: ld.d $a0, $a0, 0
+; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
+; LA64-NEXT: vilvl.b $vr0, $vr0, $vr0
+; LA64-NEXT: vst $vr0, $a1, 0
+; LA64-NEXT: ret
%x = load <8 x i8>, ptr %ptr
%y = shufflevector <8 x i8> %x, <8 x i8> poison, <16 x i32> <i32 0, i32 15, i32 1, i32 14, i32 2, i32 13, i32 3, i32 12, i32 4, i32 11, i32 5, i32 10, i32 6, i32 9, i32 7, i32 8>
%r = bitcast <16 x i8> %y to <8 x i16>
diff --git a/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-rotate.ll b/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-rotate.ll
index b1e3f74cd1739..be241925a2788 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-rotate.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-rotate.ll
@@ -1,4 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s
; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s
;; TODO For these special shuffle mask, we can lower it to vbsll + vbsrl + vor.
diff --git a/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-shift.ll b/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-shift.ll
index ff0f252ba2bdf..5275d5326f73a 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-shift.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-shift.ll
@@ -1,4 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s
; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s
define <16 x i8> @shuffle_16i8_vbsll_v_1(<16 x i8> %a) nounwind {
diff --git a/llvm/test/CodeGen/LoongArch/lsx/vec-trunc.ll b/llvm/test/CodeGen/LoongArch/lsx/vec-trunc.ll
index e056e7c38ddcd..314350acd23d6 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/vec-trunc.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/vec-trunc.ll
@@ -1,13 +1,23 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx %s -o - | FileCheck %s --check-prefix=LA32
+; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s --check-prefix=LA64
define void @load_trunc_2i64_to_2i32(ptr %ptr, ptr %dst) nounwind {
-; CHECK-LABEL: load_trunc_2i64_to_2i32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: vshuf4i.w $vr0, $vr0, 8
-; CHECK-NEXT: vstelm.d $vr0, $a1, 0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: load_trunc_2i64_to_2i32:
+; LA32: # %bb.0:
+; LA32-NEXT: vld $vr0, $a0, 0
+; LA32-NEXT: vpickve2gr.w $a0, $vr0, 2
+; LA32-NEXT: st.w $a0, $a1, 4
+; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0
+; LA32-NEXT: st.w $a0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: load_trunc_2i64_to_2i32:
+; LA64: # %bb.0:
+; LA64-NEXT: vld $vr0, $a0, 0
+; LA64-NEXT: vshuf4i.w $vr0, $vr0, 8
+; LA64-NEXT: vstelm.d $vr0, $a1, 0, 0
+; LA64-NEXT: ret
%a = load <2 x i64>, ptr %ptr
%trunc = trunc <2 x i64> %a to <2 x i32>
store <2 x i32> %trunc, ptr %dst
@@ -15,14 +25,24 @@ define void @load_trunc_2i64_to_2i32(ptr %ptr, ptr %dst) nounwind {
}
define void @load_trunc_2i64_to_2i16(ptr %ptr, ptr %dst) nounwind {
-; CHECK-LABEL: load_trunc_2i64_to_2i16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI1_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI1_0)
-; CHECK-NEXT: vshuf.h $vr1, $vr0, $vr0
-; CHECK-NEXT: vstelm.w $vr1, $a1, 0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: load_trunc_2i64_to_2i16:
+; LA32: # %bb.0:
+; LA32-NEXT: vld $vr0, $a0, 0
+; LA32-NEXT: pcalau12i $a0, %pc_hi20(.LCPI1_0)
+; LA32-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI1_0)
+; LA32-NEXT: vshuf.h $vr1, $vr0, $vr0
+; LA32-NEXT: vpickve2gr.w $a0, $vr1, 0
+; LA32-NEXT: st.w $a0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: load_trunc_2i64_to_2i16:
+; LA64: # %bb.0:
+; LA64-NEXT: vld $vr0, $a0, 0
+; LA64-NEXT: pcalau12i $a0, %pc_hi20(.LCPI1_0)
+; LA64-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI1_0)
+; LA64-NEXT: vshuf.h $vr1, $vr0, $vr0
+; LA64-NEXT: vstelm.w $vr1, $a1, 0, 0
+; LA64-NEXT: ret
%a = load <2 x i64>, ptr %ptr
%trunc = trunc <2 x i64> %a to <2 x i16>
store <2 x i16> %trunc, ptr %dst
@@ -30,14 +50,23 @@ define void @load_trunc_2i64_to_2i16(ptr %ptr, ptr %dst) nounwind {
}
define void @load_trunc_2i64_to_2i8(ptr %ptr, ptr %dst) nounwind {
-; CHECK-LABEL: load_trunc_2i64_to_2i8:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI2_0)
-; CHECK-NEXT: vshuf.b $vr0, $vr0, $vr0, $vr1
-; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: load_trunc_2i64_to_2i8:
+; LA32: # %bb.0:
+; LA32-NEXT: vld $vr0, $a0, 0
+; LA32-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0)
+; LA32-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI2_0)
+; LA32-NEXT: vshuf.b $vr0, $vr0, $vr0, $vr1
+; LA32-NEXT: vstelm.h $vr0, $a1, 0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: load_trunc_2i64_to_2i8:
+; LA64: # %bb.0:
+; LA64-NEXT: vld $vr0, $a0, 0
+; LA64-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0)
+; LA64-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI2_0)
+; LA64-NEXT: vshuf.b $vr0, $vr0, $vr0, $vr1
+; LA64-NEXT: vstelm.h $vr0, $a1, 0, 0
+; LA64-NEXT: ret
%a = load <2 x i64>, ptr %ptr
%trunc = trunc <2 x i64> %a to <2 x i8>
store <2 x i8> %trunc, ptr %dst
@@ -45,12 +74,22 @@ define void @load_trunc_2i64_to_2i8(ptr %ptr, ptr %dst) nounwind {
}
define void @load_trunc_4i32_to_4i16(ptr %ptr, ptr %dst) nounwind {
-; CHECK-LABEL: load_trunc_4i32_to_4i16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: vpickev.h $vr0, $vr0, $vr0
-; CHECK-NEXT: vstelm.d $vr0, $a1, 0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: load_trunc_4i32_to_4i16:
+; LA32: # %bb.0:
+; LA32-NEXT: vld $vr0, $a0, 0
+; LA32-NEXT: vpickev.h $vr0, $vr0, $vr0
+; LA32-NEXT: vpickve2gr.w $a0, $vr0, 1
+; LA32-NEXT: st.w $a0, $a1, 4
+; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0
+; LA32-NEXT: st.w $a0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: load_trunc_4i32_to_4i16:
+; LA64: # %bb.0:
+; LA64-NEXT: vld $vr0, $a0, 0
+; LA64-NEXT: vpickev.h $vr0, $vr0, $vr0
+; LA64-NEXT: vstelm.d $vr0, $a1, 0, 0
+; LA64-NEXT: ret
%a = load <4 x i32>, ptr %ptr
%trunc = trunc <4 x i32> %a to <4 x i16>
store <4 x i16> %trunc, ptr %dst
@@ -58,14 +97,24 @@ define void @load_trunc_4i32_to_4i16(ptr %ptr, ptr %dst) nounwind {
}
define void @load_trunc_4i32_to_4i8(ptr %ptr, ptr %dst) nounwind {
-; CHECK-LABEL: load_trunc_4i32_to_4i8:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI4_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI4_0)
-; CHECK-NEXT: vshuf.b $vr0, $vr0, $vr0, $vr1
-; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: load_trunc_4i32_to_4i8:
+; LA32: # %bb.0:
+; LA32-NEXT: vld $vr0, $a0, 0
+; LA32-NEXT: pcalau12i $a0, %pc_hi20(.LCPI4_0)
+; LA32-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI4_0)
+; LA32-NEXT: vshuf.b $vr0, $vr0, $vr0, $vr1
+; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0
+; LA32-NEXT: st.w $a0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: load_trunc_4i32_to_4i8:
+; LA64: # %bb.0:
+; LA64-NEXT: vld $vr0, $a0, 0
+; LA64-NEXT: pcalau12i $a0, %pc_hi20(.LCPI4_0)
+; LA64-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI4_0)
+; LA64-NEXT: vshuf.b $vr0, $vr0, $vr0, $vr1
+; LA64-NEXT: vstelm.w $vr0, $a1, 0, 0
+; LA64-NEXT: ret
%a = load <4 x i32>, ptr %ptr
%trunc = trunc <4 x i32> %a to <4 x i8>
store <4 x i8> %trunc, ptr %dst
@@ -73,12 +122,22 @@ define void @load_trunc_4i32_to_4i8(ptr %ptr, ptr %dst) nounwind {
}
define void @load_trunc_8i16_to_8i8(ptr %ptr, ptr %dst) nounwind {
-; CHECK-LABEL: load_trunc_8i16_to_8i8:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: vpickev.b $vr0, $vr0, $vr0
-; CHECK-NEXT: vstelm.d $vr0, $a1, 0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: load_trunc_8i16_to_8i8:
+; LA32: # %bb.0:
+; LA32-NEXT: vld $vr0, $a0, 0
+; LA32-NEXT: vpickev.b $vr0, $vr0, $vr0
+; LA32-NEXT: vpickve2gr.w $a0, $vr0, 1
+; LA32-NEXT: st.w $a0, $a1, 4
+; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0
+; LA32-NEXT: st.w $a0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: load_trunc_8i16_to_8i8:
+; LA64: # %bb.0:
+; LA64-NEXT: vld $vr0, $a0, 0
+; LA64-NEXT: vpickev.b $vr0, $vr0, $vr0
+; LA64-NEXT: vstelm.d $vr0, $a1, 0, 0
+; LA64-NEXT: ret
%a = load <8 x i16>, ptr %ptr
%trunc = trunc <8 x i16> %a to <8 x i8>
store <8 x i8> %trunc, ptr %dst
@@ -86,13 +145,24 @@ define void @load_trunc_8i16_to_8i8(ptr %ptr, ptr %dst) nounwind {
}
define void @load_trunc_2i32_to_2i16(ptr %ptr, ptr %dst) nounwind {
-; CHECK-LABEL: load_trunc_2i32_to_2i16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: ld.d $a0, $a0, 0
-; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
-; CHECK-NEXT: vshuf4i.h $vr0, $vr0, 8
-; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: load_trunc_2i32_to_2i16:
+; LA32: # %bb.0:
+; LA32-NEXT: ld.w $a2, $a0, 0
+; LA32-NEXT: ld.w $a0, $a0, 4
+; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
+; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1
+; LA32-NEXT: vshuf4i.h $vr0, $vr0, 8
+; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0
+; LA32-NEXT: st.w $a0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: load_trunc_2i32_to_2i16:
+; LA64: # %bb.0:
+; LA64-NEXT: ld.d $a0, $a0, 0
+; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
+; LA64-NEXT: vshuf4i.h $vr0, $vr0, 8
+; LA64-NEXT: vstelm.w $vr0, $a1, 0, 0
+; LA64-NEXT: ret
%a = load <2 x i32>, ptr %ptr
%trunc = trunc <2 x i32> %a to <2 x i16>
store <2 x i16> %trunc, ptr %dst
@@ -100,15 +170,27 @@ define void @load_trunc_2i32_to_2i16(ptr %ptr, ptr %dst) nounwind {
}
define void @load_trunc_2i32_to_2i8(ptr %ptr, ptr %dst) nounwind {
-; CHECK-LABEL: load_trunc_2i32_to_2i8:
-; CHECK: # %bb.0:
-; CHECK-NEXT: ld.d $a0, $a0, 0
-; CHECK-NEXT: pcalau12i $a2, %pc_hi20(.LCPI7_0)
-; CHECK-NEXT: vld $vr0, $a2, %pc_lo12(.LCPI7_0)
-; CHECK-NEXT: vinsgr2vr.d $vr1, $a0, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr0, $vr1, $vr0
-; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: load_trunc_2i32_to_2i8:
+; LA32: # %bb.0:
+; LA32-NEXT: ld.w $a2, $a0, 0
+; LA32-NEXT: ld.w $a0, $a0, 4
+; LA32-NEXT: pcalau12i $a3, %pc_hi20(.LCPI7_0)
+; LA32-NEXT: vld $vr0, $a3, %pc_lo12(.LCPI7_0)
+; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 0
+; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 1
+; LA32-NEXT: vshuf.b $vr0, $vr0, $vr1, $vr0
+; LA32-NEXT: vstelm.h $vr0, $a1, 0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: load_trunc_2i32_to_2i8:
+; LA64: # %bb.0:
+; LA64-NEXT: ld.d $a0, $a0, 0
+; LA64-NEXT: pcalau12i $a2, %pc_hi20(.LCPI7_0)
+; LA64-NEXT: vld $vr0, $a2, %pc_lo12(.LCPI7_0)
+; LA64-NEXT: vinsgr2vr.d $vr1, $a0, 0
+; LA64-NEXT: vshuf.b $vr0, $vr0, $vr1, $vr0
+; LA64-NEXT: vstelm.h $vr0, $a1, 0, 0
+; LA64-NEXT: ret
%a = load <2 x i32>, ptr %ptr
%trunc = trunc <2 x i32> %a to <2 x i8>
store <2 x i8> %trunc, ptr %dst
@@ -116,13 +198,24 @@ define void @load_trunc_2i32_to_2i8(ptr %ptr, ptr %dst) nounwind {
}
define void @load_trunc_4i16_to_4i8(ptr %ptr, ptr %dst) nounwind {
-; CHECK-LABEL: load_trunc_4i16_to_4i8:
-; CHECK: # %bb.0:
-; CHECK-NEXT: ld.d $a0, $a0, 0
-; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
-; CHECK-NEXT: vpickev.b $vr0, $vr0, $vr0
-; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: load_trunc_4i16_to_4i8:
+; LA32: # %bb.0:
+; LA32-NEXT: ld.w $a2, $a0, 0
+; LA32-NEXT: ld.w $a0, $a0, 4
+; LA32-NEXT: vinsgr2vr.w $vr0, $a2, 0
+; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 1
+; LA32-NEXT: vpickev.b $vr0, $vr0, $vr0
+; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0
+; LA32-NEXT: st.w $a0, $a1, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: load_trunc_4i16_to_4i8:
+; LA64: # %bb.0:
+; LA64-NEXT: ld.d $a0, $a0, 0
+; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
+; LA64-NEXT: vpickev.b $vr0, $vr0, $vr0
+; LA64-NEXT: vstelm.w $vr0, $a1, 0, 0
+; LA64-NEXT: ret
%a = load <4 x i16>, ptr %ptr
%trunc = trunc <4 x i16> %a to <4 x i8>
store <4 x i8> %trunc, ptr %dst
@@ -130,17 +223,23 @@ define void @load_trunc_4i16_to_4i8(ptr %ptr, ptr %dst) nounwind {
}
define void @load_trunc_2i16_to_2i8(ptr %ptr, ptr %dst) nounwind {
-; CHECK-LABEL: load_trunc_2i16_to_2i8:
-; CHECK: # %bb.0:
-; CHECK-NEXT: ld.w $a0, $a0, 0
-; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0
-; CHECK-NEXT: vshuf4i.b $vr0, $vr0, 8
-; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: load_trunc_2i16_to_2i8:
+; LA32: # %bb.0:
+; LA32-NEXT: ld.w $a0, $a0, 0
+; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 0
+; LA32-NEXT: vshuf4i.b $vr0, $vr0, 8
+; LA32-NEXT: vstelm.h $vr0, $a1, 0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: load_trunc_2i16_to_2i8:
+; LA64: # %bb.0:
+; LA64-NEXT: ld.w $a0, $a0, 0
+; LA64-NEXT: vinsgr2vr.w $vr0, $a0, 0
+; LA64-NEXT: vshuf4i.b $vr0, $vr0, 8
+; LA64-NEXT: vstelm.h $vr0, $a1, 0, 0
+; LA64-NEXT: ret
%a = load <2 x i16>, ptr %ptr
%trunc = trunc <2 x i16> %a to <2 x i8>
store <2 x i8> %trunc, ptr %dst
ret void
}
-
-
diff --git a/llvm/test/CodeGen/LoongArch/lsx/vmskcond.ll b/llvm/test/CodeGen/LoongArch/lsx/vmskcond.ll
index 7fa591db5d1fa..8bdeebef13dd2 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/vmskcond.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/vmskcond.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc --mtriple=loongarch64 --mattr=+lsx --verify-machineinstrs < %s | FileCheck %s
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx --verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LA32
+; RUN: llc --mtriple=loongarch64 --mattr=+lsx --verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LA64
define i16 @vmsk_eq_allzeros_i8(<16 x i8 > %a) {
; CHECK-LABEL: vmsk_eq_allzeros_i8:
@@ -605,17 +606,29 @@ define i4 @vmsk_eq_allzeros_v4i8(<4 x i8> %a) {
}
define i32 @vmsk2_eq_allzeros_i8(<32 x i8> %a) {
-; CHECK-LABEL: vmsk2_eq_allzeros_i8:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vseqi.b $vr0, $vr0, 0
-; CHECK-NEXT: vmskltz.b $vr0, $vr0
-; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0
-; CHECK-NEXT: vseqi.b $vr0, $vr1, 0
-; CHECK-NEXT: vmskltz.b $vr0, $vr0
-; CHECK-NEXT: vpickve2gr.hu $a1, $vr0, 0
-; CHECK-NEXT: slli.d $a1, $a1, 16
-; CHECK-NEXT: or $a0, $a0, $a1
-; CHECK-NEXT: ret
+; LA32-LABEL: vmsk2_eq_allzeros_i8:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: vseqi.b $vr0, $vr0, 0
+; LA32-NEXT: vmskltz.b $vr0, $vr0
+; LA32-NEXT: vpickve2gr.hu $a0, $vr0, 0
+; LA32-NEXT: vseqi.b $vr0, $vr1, 0
+; LA32-NEXT: vmskltz.b $vr0, $vr0
+; LA32-NEXT: vpickve2gr.hu $a1, $vr0, 0
+; LA32-NEXT: slli.w $a1, $a1, 16
+; LA32-NEXT: or $a0, $a0, $a1
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vmsk2_eq_allzeros_i8:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: vseqi.b $vr0, $vr0, 0
+; LA64-NEXT: vmskltz.b $vr0, $vr0
+; LA64-NEXT: vpickve2gr.hu $a0, $vr0, 0
+; LA64-NEXT: vseqi.b $vr0, $vr1, 0
+; LA64-NEXT: vmskltz.b $vr0, $vr0
+; LA64-NEXT: vpickve2gr.hu $a1, $vr0, 0
+; LA64-NEXT: slli.d $a1, $a1, 16
+; LA64-NEXT: or $a0, $a0, $a1
+; LA64-NEXT: ret
entry:
%1 = icmp eq <32 x i8> %a, splat (i8 0)
%2 = bitcast <32 x i1> %1 to i32
@@ -623,18 +636,31 @@ entry:
}
define i32 @vmsk2_sgt_allzeros_i8(<32 x i8> %a) {
-; CHECK-LABEL: vmsk2_sgt_allzeros_i8:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vslt.b $vr0, $vr2, $vr0
-; CHECK-NEXT: vmskltz.b $vr0, $vr0
-; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0
-; CHECK-NEXT: vslt.b $vr0, $vr2, $vr1
-; CHECK-NEXT: vmskltz.b $vr0, $vr0
-; CHECK-NEXT: vpickve2gr.hu $a1, $vr0, 0
-; CHECK-NEXT: slli.d $a1, $a1, 16
-; CHECK-NEXT: or $a0, $a0, $a1
-; CHECK-NEXT: ret
+; LA32-LABEL: vmsk2_sgt_allzeros_i8:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: vrepli.b $vr2, 0
+; LA32-NEXT: vslt.b $vr0, $vr2, $vr0
+; LA32-NEXT: vmskltz.b $vr0, $vr0
+; LA32-NEXT: vpickve2gr.hu $a0, $vr0, 0
+; LA32-NEXT: vslt.b $vr0, $vr2, $vr1
+; LA32-NEXT: vmskltz.b $vr0, $vr0
+; LA32-NEXT: vpickve2gr.hu $a1, $vr0, 0
+; LA32-NEXT: slli.w $a1, $a1, 16
+; LA32-NEXT: or $a0, $a0, $a1
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vmsk2_sgt_allzeros_i8:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: vrepli.b $vr2, 0
+; LA64-NEXT: vslt.b $vr0, $vr2, $vr0
+; LA64-NEXT: vmskltz.b $vr0, $vr0
+; LA64-NEXT: vpickve2gr.hu $a0, $vr0, 0
+; LA64-NEXT: vslt.b $vr0, $vr2, $vr1
+; LA64-NEXT: vmskltz.b $vr0, $vr0
+; LA64-NEXT: vpickve2gr.hu $a1, $vr0, 0
+; LA64-NEXT: slli.d $a1, $a1, 16
+; LA64-NEXT: or $a0, $a0, $a1
+; LA64-NEXT: ret
entry:
%1 = icmp sgt <32 x i8> %a, splat (i8 0)
%2 = bitcast <32 x i1> %1 to i32
@@ -642,18 +668,31 @@ entry:
}
define i32 @vmsk2_sgt_allones_i8(<32 x i8> %a) {
-; CHECK-LABEL: vmsk2_sgt_allones_i8:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vrepli.b $vr2, -1
-; CHECK-NEXT: vslt.b $vr0, $vr2, $vr0
-; CHECK-NEXT: vmskltz.b $vr0, $vr0
-; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0
-; CHECK-NEXT: vslt.b $vr0, $vr2, $vr1
-; CHECK-NEXT: vmskltz.b $vr0, $vr0
-; CHECK-NEXT: vpickve2gr.hu $a1, $vr0, 0
-; CHECK-NEXT: slli.d $a1, $a1, 16
-; CHECK-NEXT: or $a0, $a0, $a1
-; CHECK-NEXT: ret
+; LA32-LABEL: vmsk2_sgt_allones_i8:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: vrepli.b $vr2, -1
+; LA32-NEXT: vslt.b $vr0, $vr2, $vr0
+; LA32-NEXT: vmskltz.b $vr0, $vr0
+; LA32-NEXT: vpickve2gr.hu $a0, $vr0, 0
+; LA32-NEXT: vslt.b $vr0, $vr2, $vr1
+; LA32-NEXT: vmskltz.b $vr0, $vr0
+; LA32-NEXT: vpickve2gr.hu $a1, $vr0, 0
+; LA32-NEXT: slli.w $a1, $a1, 16
+; LA32-NEXT: or $a0, $a0, $a1
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vmsk2_sgt_allones_i8:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: vrepli.b $vr2, -1
+; LA64-NEXT: vslt.b $vr0, $vr2, $vr0
+; LA64-NEXT: vmskltz.b $vr0, $vr0
+; LA64-NEXT: vpickve2gr.hu $a0, $vr0, 0
+; LA64-NEXT: vslt.b $vr0, $vr2, $vr1
+; LA64-NEXT: vmskltz.b $vr0, $vr0
+; LA64-NEXT: vpickve2gr.hu $a1, $vr0, 0
+; LA64-NEXT: slli.d $a1, $a1, 16
+; LA64-NEXT: or $a0, $a0, $a1
+; LA64-NEXT: ret
entry:
%1 = icmp sgt <32 x i8> %a, splat (i8 -1)
%2 = bitcast <32 x i1> %1 to i32
@@ -661,18 +700,31 @@ entry:
}
define i32 @vmsk2_sge_allzeros_i8(<32 x i8> %a) {
-; CHECK-LABEL: vmsk2_sge_allzeros_i8:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vsle.b $vr0, $vr2, $vr0
-; CHECK-NEXT: vmskltz.b $vr0, $vr0
-; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0
-; CHECK-NEXT: vsle.b $vr0, $vr2, $vr1
-; CHECK-NEXT: vmskltz.b $vr0, $vr0
-; CHECK-NEXT: vpickve2gr.hu $a1, $vr0, 0
-; CHECK-NEXT: slli.d $a1, $a1, 16
-; CHECK-NEXT: or $a0, $a0, $a1
-; CHECK-NEXT: ret
+; LA32-LABEL: vmsk2_sge_allzeros_i8:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: vrepli.b $vr2, 0
+; LA32-NEXT: vsle.b $vr0, $vr2, $vr0
+; LA32-NEXT: vmskltz.b $vr0, $vr0
+; LA32-NEXT: vpickve2gr.hu $a0, $vr0, 0
+; LA32-NEXT: vsle.b $vr0, $vr2, $vr1
+; LA32-NEXT: vmskltz.b $vr0, $vr0
+; LA32-NEXT: vpickve2gr.hu $a1, $vr0, 0
+; LA32-NEXT: slli.w $a1, $a1, 16
+; LA32-NEXT: or $a0, $a0, $a1
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vmsk2_sge_allzeros_i8:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: vrepli.b $vr2, 0
+; LA64-NEXT: vsle.b $vr0, $vr2, $vr0
+; LA64-NEXT: vmskltz.b $vr0, $vr0
+; LA64-NEXT: vpickve2gr.hu $a0, $vr0, 0
+; LA64-NEXT: vsle.b $vr0, $vr2, $vr1
+; LA64-NEXT: vmskltz.b $vr0, $vr0
+; LA64-NEXT: vpickve2gr.hu $a1, $vr0, 0
+; LA64-NEXT: slli.d $a1, $a1, 16
+; LA64-NEXT: or $a0, $a0, $a1
+; LA64-NEXT: ret
entry:
%1 = icmp sge <32 x i8> %a, splat (i8 0)
%2 = bitcast <32 x i1> %1 to i32
@@ -680,15 +732,25 @@ entry:
}
define i32 @vmsk2_slt_allzeros_i8(<32 x i8> %a) {
-; CHECK-LABEL: vmsk2_slt_allzeros_i8:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmskltz.b $vr0, $vr0
-; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0
-; CHECK-NEXT: vmskltz.b $vr0, $vr1
-; CHECK-NEXT: vpickve2gr.hu $a1, $vr0, 0
-; CHECK-NEXT: slli.d $a1, $a1, 16
-; CHECK-NEXT: or $a0, $a0, $a1
-; CHECK-NEXT: ret
+; LA32-LABEL: vmsk2_slt_allzeros_i8:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: vmskltz.b $vr0, $vr0
+; LA32-NEXT: vpickve2gr.hu $a0, $vr0, 0
+; LA32-NEXT: vmskltz.b $vr0, $vr1
+; LA32-NEXT: vpickve2gr.hu $a1, $vr0, 0
+; LA32-NEXT: slli.w $a1, $a1, 16
+; LA32-NEXT: or $a0, $a0, $a1
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vmsk2_slt_allzeros_i8:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: vmskltz.b $vr0, $vr0
+; LA64-NEXT: vpickve2gr.hu $a0, $vr0, 0
+; LA64-NEXT: vmskltz.b $vr0, $vr1
+; LA64-NEXT: vpickve2gr.hu $a1, $vr0, 0
+; LA64-NEXT: slli.d $a1, $a1, 16
+; LA64-NEXT: or $a0, $a0, $a1
+; LA64-NEXT: ret
entry:
%1 = icmp slt <32 x i8> %a, splat (i8 0)
%2 = bitcast <32 x i1> %1 to i32
@@ -696,17 +758,29 @@ entry:
}
define i32 @vmsk2_sle_allzeros_i8(<32 x i8> %a) {
-; CHECK-LABEL: vmsk2_sle_allzeros_i8:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vslei.b $vr0, $vr0, 0
-; CHECK-NEXT: vmskltz.b $vr0, $vr0
-; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0
-; CHECK-NEXT: vslei.b $vr0, $vr1, 0
-; CHECK-NEXT: vmskltz.b $vr0, $vr0
-; CHECK-NEXT: vpickve2gr.hu $a1, $vr0, 0
-; CHECK-NEXT: slli.d $a1, $a1, 16
-; CHECK-NEXT: or $a0, $a0, $a1
-; CHECK-NEXT: ret
+; LA32-LABEL: vmsk2_sle_allzeros_i8:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: vslei.b $vr0, $vr0, 0
+; LA32-NEXT: vmskltz.b $vr0, $vr0
+; LA32-NEXT: vpickve2gr.hu $a0, $vr0, 0
+; LA32-NEXT: vslei.b $vr0, $vr1, 0
+; LA32-NEXT: vmskltz.b $vr0, $vr0
+; LA32-NEXT: vpickve2gr.hu $a1, $vr0, 0
+; LA32-NEXT: slli.w $a1, $a1, 16
+; LA32-NEXT: or $a0, $a0, $a1
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vmsk2_sle_allzeros_i8:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: vslei.b $vr0, $vr0, 0
+; LA64-NEXT: vmskltz.b $vr0, $vr0
+; LA64-NEXT: vpickve2gr.hu $a0, $vr0, 0
+; LA64-NEXT: vslei.b $vr0, $vr1, 0
+; LA64-NEXT: vmskltz.b $vr0, $vr0
+; LA64-NEXT: vpickve2gr.hu $a1, $vr0, 0
+; LA64-NEXT: slli.d $a1, $a1, 16
+; LA64-NEXT: or $a0, $a0, $a1
+; LA64-NEXT: ret
entry:
%1 = icmp sle <32 x i8> %a, splat (i8 0)
%2 = bitcast <32 x i1> %1 to i32
@@ -714,17 +788,29 @@ entry:
}
define i32 @vmsk2_sle_allones_i8(<32 x i8> %a) {
-; CHECK-LABEL: vmsk2_sle_allones_i8:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vslei.b $vr0, $vr0, -1
-; CHECK-NEXT: vmskltz.b $vr0, $vr0
-; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0
-; CHECK-NEXT: vslei.b $vr0, $vr1, -1
-; CHECK-NEXT: vmskltz.b $vr0, $vr0
-; CHECK-NEXT: vpickve2gr.hu $a1, $vr0, 0
-; CHECK-NEXT: slli.d $a1, $a1, 16
-; CHECK-NEXT: or $a0, $a0, $a1
-; CHECK-NEXT: ret
+; LA32-LABEL: vmsk2_sle_allones_i8:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: vslei.b $vr0, $vr0, -1
+; LA32-NEXT: vmskltz.b $vr0, $vr0
+; LA32-NEXT: vpickve2gr.hu $a0, $vr0, 0
+; LA32-NEXT: vslei.b $vr0, $vr1, -1
+; LA32-NEXT: vmskltz.b $vr0, $vr0
+; LA32-NEXT: vpickve2gr.hu $a1, $vr0, 0
+; LA32-NEXT: slli.w $a1, $a1, 16
+; LA32-NEXT: or $a0, $a0, $a1
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vmsk2_sle_allones_i8:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: vslei.b $vr0, $vr0, -1
+; LA64-NEXT: vmskltz.b $vr0, $vr0
+; LA64-NEXT: vpickve2gr.hu $a0, $vr0, 0
+; LA64-NEXT: vslei.b $vr0, $vr1, -1
+; LA64-NEXT: vmskltz.b $vr0, $vr0
+; LA64-NEXT: vpickve2gr.hu $a1, $vr0, 0
+; LA64-NEXT: slli.d $a1, $a1, 16
+; LA64-NEXT: or $a0, $a0, $a1
+; LA64-NEXT: ret
entry:
%1 = icmp sle <32 x i8> %a, splat (i8 -1)
%2 = bitcast <32 x i1> %1 to i32
@@ -732,19 +818,33 @@ entry:
}
define i32 @vmsk2_ne_allzeros_i8(<32 x i8> %a) {
-; CHECK-LABEL: vmsk2_ne_allzeros_i8:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vseqi.b $vr0, $vr0, 0
-; CHECK-NEXT: vxori.b $vr0, $vr0, 255
-; CHECK-NEXT: vmskltz.b $vr0, $vr0
-; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0
-; CHECK-NEXT: vseqi.b $vr0, $vr1, 0
-; CHECK-NEXT: vxori.b $vr0, $vr0, 255
-; CHECK-NEXT: vmskltz.b $vr0, $vr0
-; CHECK-NEXT: vpickve2gr.hu $a1, $vr0, 0
-; CHECK-NEXT: slli.d $a1, $a1, 16
-; CHECK-NEXT: or $a0, $a0, $a1
-; CHECK-NEXT: ret
+; LA32-LABEL: vmsk2_ne_allzeros_i8:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: vseqi.b $vr0, $vr0, 0
+; LA32-NEXT: vxori.b $vr0, $vr0, 255
+; LA32-NEXT: vmskltz.b $vr0, $vr0
+; LA32-NEXT: vpickve2gr.hu $a0, $vr0, 0
+; LA32-NEXT: vseqi.b $vr0, $vr1, 0
+; LA32-NEXT: vxori.b $vr0, $vr0, 255
+; LA32-NEXT: vmskltz.b $vr0, $vr0
+; LA32-NEXT: vpickve2gr.hu $a1, $vr0, 0
+; LA32-NEXT: slli.w $a1, $a1, 16
+; LA32-NEXT: or $a0, $a0, $a1
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vmsk2_ne_allzeros_i8:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: vseqi.b $vr0, $vr0, 0
+; LA64-NEXT: vxori.b $vr0, $vr0, 255
+; LA64-NEXT: vmskltz.b $vr0, $vr0
+; LA64-NEXT: vpickve2gr.hu $a0, $vr0, 0
+; LA64-NEXT: vseqi.b $vr0, $vr1, 0
+; LA64-NEXT: vxori.b $vr0, $vr0, 255
+; LA64-NEXT: vmskltz.b $vr0, $vr0
+; LA64-NEXT: vpickve2gr.hu $a1, $vr0, 0
+; LA64-NEXT: slli.d $a1, $a1, 16
+; LA64-NEXT: or $a0, $a0, $a1
+; LA64-NEXT: ret
entry:
%1 = icmp ne <32 x i8> %a, splat (i8 0)
%2 = bitcast <32 x i1> %1 to i32
@@ -752,38 +852,66 @@ entry:
}
define i32 @vmsk2_sgt_v32i8(<32 x i8> %a, <32 x i8> %b) {
-; CHECK-LABEL: vmsk2_sgt_v32i8:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vslt.b $vr0, $vr2, $vr0
-; CHECK-NEXT: vmskltz.b $vr0, $vr0
-; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0
-; CHECK-NEXT: vslt.b $vr0, $vr3, $vr1
-; CHECK-NEXT: vmskltz.b $vr0, $vr0
-; CHECK-NEXT: vpickve2gr.hu $a1, $vr0, 0
-; CHECK-NEXT: slli.d $a1, $a1, 16
-; CHECK-NEXT: or $a0, $a0, $a1
-; CHECK-NEXT: ret
+; LA32-LABEL: vmsk2_sgt_v32i8:
+; LA32: # %bb.0:
+; LA32-NEXT: vslt.b $vr0, $vr2, $vr0
+; LA32-NEXT: vmskltz.b $vr0, $vr0
+; LA32-NEXT: vpickve2gr.hu $a0, $vr0, 0
+; LA32-NEXT: vslt.b $vr0, $vr3, $vr1
+; LA32-NEXT: vmskltz.b $vr0, $vr0
+; LA32-NEXT: vpickve2gr.hu $a1, $vr0, 0
+; LA32-NEXT: slli.w $a1, $a1, 16
+; LA32-NEXT: or $a0, $a0, $a1
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vmsk2_sgt_v32i8:
+; LA64: # %bb.0:
+; LA64-NEXT: vslt.b $vr0, $vr2, $vr0
+; LA64-NEXT: vmskltz.b $vr0, $vr0
+; LA64-NEXT: vpickve2gr.hu $a0, $vr0, 0
+; LA64-NEXT: vslt.b $vr0, $vr3, $vr1
+; LA64-NEXT: vmskltz.b $vr0, $vr0
+; LA64-NEXT: vpickve2gr.hu $a1, $vr0, 0
+; LA64-NEXT: slli.d $a1, $a1, 16
+; LA64-NEXT: or $a0, $a0, $a1
+; LA64-NEXT: ret
%x = icmp sgt <32 x i8> %a, %b
%res = bitcast <32 x i1> %x to i32
ret i32 %res
}
define i32 @vmsk2_sgt_and_sgt_v32i8(<32 x i8> %a, <32 x i8> %b, <32 x i8> %c, <32 x i8> %d) {
-; CHECK-LABEL: vmsk2_sgt_and_sgt_v32i8:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vslt.b $vr0, $vr2, $vr0
-; CHECK-NEXT: vslt.b $vr1, $vr3, $vr1
-; CHECK-NEXT: vslt.b $vr2, $vr6, $vr4
-; CHECK-NEXT: vslt.b $vr3, $vr7, $vr5
-; CHECK-NEXT: vand.v $vr1, $vr1, $vr3
-; CHECK-NEXT: vand.v $vr0, $vr0, $vr2
-; CHECK-NEXT: vmskltz.b $vr0, $vr0
-; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0
-; CHECK-NEXT: vmskltz.b $vr0, $vr1
-; CHECK-NEXT: vpickve2gr.hu $a1, $vr0, 0
-; CHECK-NEXT: slli.d $a1, $a1, 16
-; CHECK-NEXT: or $a0, $a0, $a1
-; CHECK-NEXT: ret
+; LA32-LABEL: vmsk2_sgt_and_sgt_v32i8:
+; LA32: # %bb.0:
+; LA32-NEXT: vslt.b $vr0, $vr2, $vr0
+; LA32-NEXT: vslt.b $vr1, $vr3, $vr1
+; LA32-NEXT: vslt.b $vr2, $vr6, $vr4
+; LA32-NEXT: vslt.b $vr3, $vr7, $vr5
+; LA32-NEXT: vand.v $vr1, $vr1, $vr3
+; LA32-NEXT: vand.v $vr0, $vr0, $vr2
+; LA32-NEXT: vmskltz.b $vr0, $vr0
+; LA32-NEXT: vpickve2gr.hu $a0, $vr0, 0
+; LA32-NEXT: vmskltz.b $vr0, $vr1
+; LA32-NEXT: vpickve2gr.hu $a1, $vr0, 0
+; LA32-NEXT: slli.w $a1, $a1, 16
+; LA32-NEXT: or $a0, $a0, $a1
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vmsk2_sgt_and_sgt_v32i8:
+; LA64: # %bb.0:
+; LA64-NEXT: vslt.b $vr0, $vr2, $vr0
+; LA64-NEXT: vslt.b $vr1, $vr3, $vr1
+; LA64-NEXT: vslt.b $vr2, $vr6, $vr4
+; LA64-NEXT: vslt.b $vr3, $vr7, $vr5
+; LA64-NEXT: vand.v $vr1, $vr1, $vr3
+; LA64-NEXT: vand.v $vr0, $vr0, $vr2
+; LA64-NEXT: vmskltz.b $vr0, $vr0
+; LA64-NEXT: vpickve2gr.hu $a0, $vr0, 0
+; LA64-NEXT: vmskltz.b $vr0, $vr1
+; LA64-NEXT: vpickve2gr.hu $a1, $vr0, 0
+; LA64-NEXT: slli.d $a1, $a1, 16
+; LA64-NEXT: or $a0, $a0, $a1
+; LA64-NEXT: ret
%x0 = icmp sgt <32 x i8> %a, %b
%x1 = icmp sgt <32 x i8> %c, %d
%y = and <32 x i1> %x0, %x1
@@ -792,17 +920,29 @@ define i32 @vmsk2_sgt_and_sgt_v32i8(<32 x i8> %a, <32 x i8> %b, <32 x i8> %c, <3
}
define i32 @vmsk2_trunc_i8(<32 x i8> %a) {
-; CHECK-LABEL: vmsk2_trunc_i8:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vslli.b $vr0, $vr0, 7
-; CHECK-NEXT: vmskltz.b $vr0, $vr0
-; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0
-; CHECK-NEXT: vslli.b $vr0, $vr1, 7
-; CHECK-NEXT: vmskltz.b $vr0, $vr0
-; CHECK-NEXT: vpickve2gr.hu $a1, $vr0, 0
-; CHECK-NEXT: slli.d $a1, $a1, 16
-; CHECK-NEXT: or $a0, $a0, $a1
-; CHECK-NEXT: ret
+; LA32-LABEL: vmsk2_trunc_i8:
+; LA32: # %bb.0:
+; LA32-NEXT: vslli.b $vr0, $vr0, 7
+; LA32-NEXT: vmskltz.b $vr0, $vr0
+; LA32-NEXT: vpickve2gr.hu $a0, $vr0, 0
+; LA32-NEXT: vslli.b $vr0, $vr1, 7
+; LA32-NEXT: vmskltz.b $vr0, $vr0
+; LA32-NEXT: vpickve2gr.hu $a1, $vr0, 0
+; LA32-NEXT: slli.w $a1, $a1, 16
+; LA32-NEXT: or $a0, $a0, $a1
+; LA32-NEXT: ret
+;
+; LA64-LABEL: vmsk2_trunc_i8:
+; LA64: # %bb.0:
+; LA64-NEXT: vslli.b $vr0, $vr0, 7
+; LA64-NEXT: vmskltz.b $vr0, $vr0
+; LA64-NEXT: vpickve2gr.hu $a0, $vr0, 0
+; LA64-NEXT: vslli.b $vr0, $vr1, 7
+; LA64-NEXT: vmskltz.b $vr0, $vr0
+; LA64-NEXT: vpickve2gr.hu $a1, $vr0, 0
+; LA64-NEXT: slli.d $a1, $a1, 16
+; LA64-NEXT: or $a0, $a0, $a1
+; LA64-NEXT: ret
%y = trunc <32 x i8> %a to <32 x i1>
%res = bitcast <32 x i1> %y to i32
ret i32 %res
diff --git a/llvm/test/CodeGen/LoongArch/lsx/widen-shuffle-mask.ll b/llvm/test/CodeGen/LoongArch/lsx/widen-shuffle-mask.ll
index 54328260d9d14..42ef9133bf04d 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/widen-shuffle-mask.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/widen-shuffle-mask.ll
@@ -1,4 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s
; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s
define <16 x i8> @widen_shuffle_mask_v16i8_to_v8i16(<16 x i8> %a, <16 x i8> %b) {
More information about the llvm-commits
mailing list