[llvm] [LoongArch] Optimize 128-to-256-bit vector insertion and 256-to-128-bit subvector extraction (PR #146300)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Jun 29 19:35:50 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-loongarch
Author: hev (heiher)
<details>
<summary>Changes</summary>
This patch replaces stack-based accesses with register moves when converting between 128-bit and 256-bit vectors. A 128-bit subvector extract from, or insert to, the lower half of a 256-bit vector is now treated as a subregister copy that needs no instruction.
---
Patch is 41.49 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/146300.diff
6 Files Affected:
- (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp (+71-1)
- (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.h (+1)
- (modified) llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td (+29-6)
- (added) llvm/test/CodeGen/LoongArch/lasx/concat-vectors.ll (+218)
- (added) llvm/test/CodeGen/LoongArch/lasx/insert-extract-subvector.ll (+668)
- (modified) llvm/test/CodeGen/LoongArch/lasx/issue107355.ll (+2-2)
``````````diff
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 7dae4d30d31be..addb0e056ff9c 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -289,6 +289,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SETCC, VT, Legal);
setOperationAction(ISD::VSELECT, VT, Legal);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
}
for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal);
@@ -350,7 +351,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
+ setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
+ setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
setOperationAction(ISD::SETCC, VT, Legal);
setOperationAction(ISD::VSELECT, VT, Legal);
@@ -497,6 +499,8 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op,
return lowerEXTRACT_VECTOR_ELT(Op, DAG);
case ISD::BUILD_VECTOR:
return lowerBUILD_VECTOR(Op, DAG);
+ case ISD::CONCAT_VECTORS:
+ return lowerCONCAT_VECTORS(Op, DAG);
case ISD::VECTOR_SHUFFLE:
return lowerVECTOR_SHUFFLE(Op, DAG);
case ISD::BITREVERSE:
@@ -2520,6 +2524,72 @@ SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
return SDValue();
}
+SDValue LoongArchTargetLowering::lowerCONCAT_VECTORS(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ MVT ResVT = Op.getSimpleValueType();
+ assert(ResVT.is256BitVector() && Op.getNumOperands() == 2);
+
+ unsigned NumOperands = Op.getNumOperands();
+ unsigned NumFreezeUndef = 0;
+ unsigned NumZero = 0;
+ unsigned NumNonZero = 0;
+ unsigned NonZeros = 0;
+ SmallSet<SDValue, 4> Undefs;
+ for (unsigned i = 0; i != NumOperands; ++i) {
+ SDValue SubVec = Op.getOperand(i);
+ if (SubVec.isUndef())
+ continue;
+ if (ISD::isFreezeUndef(SubVec.getNode())) {
+ // If the freeze(undef) has multiple uses then we must fold to zero.
+ if (SubVec.hasOneUse()) {
+ ++NumFreezeUndef;
+ } else {
+ ++NumZero;
+ Undefs.insert(SubVec);
+ }
+ } else if (ISD::isBuildVectorAllZeros(SubVec.getNode()))
+ ++NumZero;
+ else {
+ assert(i < sizeof(NonZeros) * CHAR_BIT); // Ensure the shift is in range.
+ NonZeros |= 1 << i;
+ ++NumNonZero;
+ }
+ }
+
+ // If we have more than 2 non-zeros, build each half separately.
+ if (NumNonZero > 2) {
+ MVT HalfVT = ResVT.getHalfNumVectorElementsVT();
+ ArrayRef<SDUse> Ops = Op->ops();
+ SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
+ Ops.slice(0, NumOperands / 2));
+ SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
+ Ops.slice(NumOperands / 2));
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
+ }
+
+ // Otherwise, build it up through insert_subvectors.
+ SDValue Vec = NumZero ? DAG.getConstant(0, DL, ResVT)
+ : (NumFreezeUndef ? DAG.getFreeze(DAG.getUNDEF(ResVT))
+ : DAG.getUNDEF(ResVT));
+
+ // Replace Undef operands with ZeroVector.
+ for (SDValue U : Undefs)
+ DAG.ReplaceAllUsesWith(U, DAG.getConstant(0, DL, U.getSimpleValueType()));
+
+ MVT SubVT = Op.getOperand(0).getSimpleValueType();
+ unsigned NumSubElems = SubVT.getVectorNumElements();
+ for (unsigned i = 0; i != NumOperands; ++i) {
+ if ((NonZeros & (1 << i)) == 0)
+ continue;
+
+ Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ResVT, Vec, Op.getOperand(i),
+ DAG.getVectorIdxConstant(i * NumSubElems, DL));
+ }
+
+ return Vec;
+}
+
SDValue
LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
SelectionDAG &DAG) const {
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index 60dc2b385a75c..6b49a98f3ae46 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -376,6 +376,7 @@ class LoongArchTargetLowering : public TargetLowering {
SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerBITREVERSE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
index ff7b0f2ae3f25..95e9fd49d1c0d 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
@@ -1860,12 +1860,6 @@ def : Pat<(v4i32(fp_to_uint v4f64:$vj)),
(XVFTINTRZ_LU_D v4f64:$vj)),
sub_128)>;
-// XVPERMI_Q
-foreach vt = [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64] in
-def : Pat<(vt (concat_vectors LSX128:$vd, LSX128:$vj)),
- (XVPERMI_Q (SUBREG_TO_REG (i64 0), LSX128:$vd, sub_128),
- (SUBREG_TO_REG (i64 0), LSX128:$vj, sub_128), 2)>;
-
// XVABSD_{B/H/W/D}[U]
defm : PatXrXr<abds, "XVABSD">;
defm : PatXrXrU<abdu, "XVABSD">;
@@ -1879,6 +1873,35 @@ def : Pat<(loongarch_xvmskgez (v32i8 LASX256:$vj)), (PseudoXVMSKGEZ_B LASX256:$v
def : Pat<(loongarch_xvmskeqz (v32i8 LASX256:$vj)), (PseudoXVMSKEQZ_B LASX256:$vj)>;
def : Pat<(loongarch_xvmsknez (v32i8 LASX256:$vj)), (PseudoXVMSKNEZ_B LASX256:$vj)>;
+// Subvector tricks
+// Patterns for insert_subvector/extract_subvector
+multiclass subvector_subreg_lowering<RegisterClass subRC, ValueType subVT,
+ RegisterClass RC, ValueType VT,
+ int hiIdx, SubRegIndex subIdx> {
+ // A 128-bit subvector extract from the first 256-bit vector position is a
+ // subregister copy that needs no instruction. Likewise, a 128-bit subvector
+ // insert to the first 256-bit vector position is a subregister copy that needs
+ // no instruction.
+ def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))),
+ (subVT (EXTRACT_SUBREG RC:$src, subIdx))>;
+ def : Pat<(VT (insert_subvector undef_or_freeze_undef, subRC:$src, (iPTR 0))),
+ (VT (INSERT_SUBREG (IMPLICIT_DEF), subRC:$src, subIdx))>;
+
+ def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR hiIdx))),
+ (subVT (EXTRACT_SUBREG (XVPERMI_Q (IMPLICIT_DEF), RC:$src, 1), subIdx))>;
+ def : Pat<(VT (insert_subvector RC:$vd, subRC:$vj, (iPTR 0))),
+ (VT (XVPERMI_Q RC:$vd, (INSERT_SUBREG (IMPLICIT_DEF), subRC:$vj, subIdx), 48))>;
+ def : Pat<(VT (insert_subvector RC:$vd, subRC:$vj, (iPTR hiIdx))),
+ (VT (XVPERMI_Q RC:$vd, (INSERT_SUBREG (IMPLICIT_DEF), subRC:$vj, subIdx), 2))>;
+}
+
+defm : subvector_subreg_lowering<LSX128, v4i32, LASX256, v8i32, 4, sub_128>;
+defm : subvector_subreg_lowering<LSX128, v4f32, LASX256, v8f32, 4, sub_128>;
+defm : subvector_subreg_lowering<LSX128, v2i64, LASX256, v4i64, 2, sub_128>;
+defm : subvector_subreg_lowering<LSX128, v2f64, LASX256, v4f64, 2, sub_128>;
+defm : subvector_subreg_lowering<LSX128, v8i16, LASX256, v16i16, 8, sub_128>;
+defm : subvector_subreg_lowering<LSX128, v16i8, LASX256, v32i8, 16, sub_128>;
+
} // Predicates = [HasExtLASX]
/// Intrinsic pattern
diff --git a/llvm/test/CodeGen/LoongArch/lasx/concat-vectors.ll b/llvm/test/CodeGen/LoongArch/lasx/concat-vectors.ll
new file mode 100644
index 0000000000000..231e82a6d53ac
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lasx/concat-vectors.ll
@@ -0,0 +1,218 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
+
+define <32 x i8> @concat_poison_v32i8_1(<16 x i8> %a) {
+; CHECK-LABEL: concat_poison_v32i8_1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0
+; CHECK-NEXT: ret
+entry:
+ %1 = shufflevector <16 x i8> %a, <16 x i8> poison,
+ <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
+ i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15,
+ i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23,
+ i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ ret <32 x i8> %1
+}
+
+define <32 x i8> @concat_poison_v32i8_2(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: concat_poison_v32i8_2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vori.b $vr0, $vr1, 0
+; CHECK-NEXT: ret
+entry:
+ %1 = shufflevector <16 x i8> %b, <16 x i8> poison,
+ <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
+ i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15,
+ i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23,
+ i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ ret <32 x i8> %1
+}
+
+define <32 x i8> @concat_vectors_v32i8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: concat_vectors_v32i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1
+; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0
+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
+; CHECK-NEXT: ret
+entry:
+ %1 = shufflevector <16 x i8> %a, <16 x i8> %b,
+ <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
+ i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15,
+ i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23,
+ i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ ret <32 x i8> %1
+}
+
+define <16 x i16> @concat_poison_v16i16_1(<8 x i16> %a) {
+; CHECK-LABEL: concat_poison_v16i16_1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0
+; CHECK-NEXT: ret
+entry:
+ %1 = shufflevector <8 x i16> %a, <8 x i16> poison,
+ <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
+ i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ ret <16 x i16> %1
+}
+
+define <16 x i16> @concat_poison_v16i16_2(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: concat_poison_v16i16_2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vori.b $vr0, $vr1, 0
+; CHECK-NEXT: ret
+entry:
+ %1 = shufflevector <8 x i16> %b, <8 x i16> poison,
+ <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
+ i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ ret <16 x i16> %1
+}
+
+define <16 x i16> @concat_vectors_v16i16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: concat_vectors_v16i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1
+; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0
+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
+; CHECK-NEXT: ret
+entry:
+ %1 = shufflevector <8 x i16> %a, <8 x i16> %b,
+ <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
+ i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ ret <16 x i16> %1
+}
+
+define <8 x i32> @concat_poison_v8i32_1(<4 x i32> %a) {
+; CHECK-LABEL: concat_poison_v8i32_1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0
+; CHECK-NEXT: ret
+entry:
+ %1 = shufflevector <4 x i32> %a, <4 x i32> poison,
+ <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ ret <8 x i32> %1
+}
+
+define <8 x i32> @concat_poison_v8i32_2(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: concat_poison_v8i32_2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vori.b $vr0, $vr1, 0
+; CHECK-NEXT: ret
+entry:
+ %1 = shufflevector <4 x i32> %b, <4 x i32> poison,
+ <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ ret <8 x i32> %1
+}
+
+define <8 x i32> @concat_vectors_v8i32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: concat_vectors_v8i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1
+; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0
+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
+; CHECK-NEXT: ret
+entry:
+ %1 = shufflevector <4 x i32> %a, <4 x i32> %b,
+ <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ ret <8 x i32> %1
+}
+
+define <8 x float> @concat_poison_v8f32_1(<4 x float> %a) {
+; CHECK-LABEL: concat_poison_v8f32_1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0
+; CHECK-NEXT: ret
+entry:
+ %1 = shufflevector <4 x float> %a, <4 x float> poison,
+ <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ ret <8 x float> %1
+}
+
+define <8 x float> @concat_poison_v8f32_2(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: concat_poison_v8f32_2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vori.b $vr0, $vr1, 0
+; CHECK-NEXT: ret
+entry:
+ %1 = shufflevector <4 x float> %b, <4 x float> poison,
+ <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ ret <8 x float> %1
+}
+
+define <8 x float> @concat_vectors_v8f32(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: concat_vectors_v8f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1
+; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0
+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
+; CHECK-NEXT: ret
+entry:
+ %1 = shufflevector <4 x float> %a, <4 x float> %b,
+ <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ ret <8 x float> %1
+}
+
+define <4 x i64> @concat_poison_v8i64_1(<2 x i64> %a) {
+; CHECK-LABEL: concat_poison_v8i64_1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0
+; CHECK-NEXT: ret
+entry:
+ %1 = shufflevector <2 x i64> %a, <2 x i64> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ret <4 x i64> %1
+}
+
+define <4 x i64> @concat_poison_v8i64_2(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: concat_poison_v8i64_2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vori.b $vr0, $vr1, 0
+; CHECK-NEXT: ret
+entry:
+ %1 = shufflevector <2 x i64> %b, <2 x i64> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ret <4 x i64> %1
+}
+
+define <4 x i64> @concat_vectors_v8i64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: concat_vectors_v8i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1
+; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0
+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
+; CHECK-NEXT: ret
+entry:
+ %1 = shufflevector <2 x i64> %a, <2 x i64> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ret <4 x i64> %1
+}
+
+define <4 x double> @concat_poison_v8f64_1(<2 x double> %a) {
+; CHECK-LABEL: concat_poison_v8f64_1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0
+; CHECK-NEXT: ret
+entry:
+ %1 = shufflevector <2 x double> %a, <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ret <4 x double> %1
+}
+
+define <4 x double> @concat_poison_v8f64_2(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: concat_poison_v8f64_2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vori.b $vr0, $vr1, 0
+; CHECK-NEXT: ret
+entry:
+ %1 = shufflevector <2 x double> %b, <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ret <4 x double> %1
+}
+
+define <4 x double> @concat_vectors_v8f64(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: concat_vectors_v8f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1
+; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0
+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
+; CHECK-NEXT: ret
+entry:
+ %1 = shufflevector <2 x double> %a, <2 x double> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ret <4 x double> %1
+}
diff --git a/llvm/test/CodeGen/LoongArch/lasx/insert-extract-subvector.ll b/llvm/test/CodeGen/LoongArch/lasx/insert-extract-subvector.ll
new file mode 100644
index 0000000000000..7a90afca376db
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lasx/insert-extract-subvector.ll
@@ -0,0 +1,668 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
+
+declare <8 x i32> @llvm.experimental.vector.insert.v8i32.v4i32(<8 x i32>, <4 x i32>, i64)
+
+define <8 x i32> @insert_lo128_v8i32_1(<4 x i32> %a) {
+; CHECK-LABEL: insert_lo128_v8i32_1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0
+; CHECK-NEXT: ret
+entry:
+ %1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v4i32(<8 x i32> poison, <4 x i32> %a, i64 0)
+ ret <8 x i32> %1
+}
+
+define <8 x i32> @insert_hi128_v8i32_1(<4 x i32> %a) {
+; CHECK-LABEL: insert_hi128_v8i32_1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0
+; CHECK-NEXT: xvpermi.q $xr0, $xr0, 2
+; CHECK-NEXT: ret
+entry:
+ %1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v4i32(<8 x i32> poison, <4 x i32> %a, i64 4)
+ ret <8 x i32> %1
+}
+
+define <8 x i32> @insert_lo128_v8i32_2(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: insert_lo128_v8i32_2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vori.b $vr0, $vr1, 0
+; CHECK-NEXT: ret
+entry:
+ %1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v4i32(<8 x i32> poison, <4 x i32> %b, i64 0)
+ ret <8 x i32> %1
+}
+
+define <8 x i32> @insert_hi128_v8i32_2(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: insert_hi128_v8i32_2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1
+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
+; CHECK-NEXT: ret
+entry:
+ %1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v4i32(<8 x i32> poison, <4 x i32> %b, i64 4)
+ ret <8 x i32> %1
+}
+
+define <8 x i32> @insert_lo128_v8i32_3(<8 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: insert_lo128_v8i32_3:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1
+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 48
+; CHECK-NEXT: ret
+entry:
+ %1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v4i32(<8 x i32> %a, <4 x i32> %b, i64 0)
+ ret <8 x i32> %1
+}
+
+define <8 x i32> @insert_hi128_v8i32_3(<8 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: insert_hi128_v8i32_3:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1
+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
+; CHECK-NEXT: ret
+entry:
+ %1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v4i32(<8 x i32> %a, <4 x i32> %b, i64 4)
+ ret <8 x i32> %1
+}
+
+declare <8 x float> @llvm.experimental.vector.insert.v8f32.v4f32(<8 x float>, <4 x float>, i64)
+
+define <8 x float> @insert_lo128_v8f32_1(<4 x float> %a) {
+; CHECK-LABEL: insert_lo128_v8f32_1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0
+; CHECK-NEXT: ret
+entry:
+ %1 = call <8 x float> @llvm.experimental.vector.insert.v8f32.v4f32(<8 x float> poison, <4 x float> %a, i64 0)
+ ret <8 x float> %1
+}
+
+define <8 x float> @insert_hi128_v8f32_1(<4 x float> %a) {
+; CHECK-LABEL: insert_hi128_v8f32_1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: # kill: def $vr0 killed $vr0 def $xr0
+; CHECK-NEXT:...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/146300
More information about the llvm-commits
mailing list