[llvm] [RISCV][GISel] Support select G_INSERT_SUBVECTOR (PR #171092)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 8 00:32:31 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Jianjian Guan (jacquesguan)
<details>
<summary>Changes</summary>
---
Patch is 22.12 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/171092.diff
3 Files Affected:
- (modified) llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp (+63-1)
- (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+2-1)
- (added) llvm/test/CodeGen/RISCV/GlobalISel/rvv/insert-subvector.ll (+411)
``````````diff
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
index 4f2e633c1c524..c252875f05dca 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
@@ -100,7 +100,7 @@ class RISCVInstructionSelector : public InstructionSelector {
bool selectIntrinsicWithSideEffects(MachineInstr &I,
MachineIRBuilder &MIB) const;
bool selectExtractSubvector(MachineInstr &MI, MachineIRBuilder &MIB) const;
-
+ bool selectInsertSubVector(MachineInstr &I, MachineIRBuilder &MIB) const;
ComplexRendererFns selectShiftMask(MachineOperand &Root,
unsigned ShiftWidth) const;
ComplexRendererFns selectShiftMaskXLen(MachineOperand &Root) const {
@@ -1007,6 +1007,66 @@ bool RISCVInstructionSelector::selectExtractSubvector(
return true;
}
+bool RISCVInstructionSelector::selectInsertSubVector(
+ MachineInstr &MI, MachineIRBuilder &MIB) const {
+ assert(MI.getOpcode() == TargetOpcode::G_INSERT_SUBVECTOR);
+
+ Register DstReg = MI.getOperand(0).getReg();
+ Register VecReg = MI.getOperand(1).getReg();
+ Register SubVecReg = MI.getOperand(2).getReg();
+
+ LLT VecTy = MRI->getType(VecReg);
+ LLT SubVecTy = MRI->getType(SubVecReg);
+
+ MVT VecMVT = getMVTForLLT(VecTy);
+ MVT SubVecMVT = getMVTForLLT(SubVecTy);
+
+ unsigned Idx = static_cast<unsigned>(MI.getOperand(3).getImm());
+
+ unsigned SubRegIdx;
+ std::tie(SubRegIdx, Idx) =
+ RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
+ VecMVT, SubVecMVT, Idx, &TRI);
+
+ // If the Idx hasn't been completely eliminated then this is a subvector
+ // insert which doesn't naturally align to a vector register. These must
+ // be handled using instructions to manipulate the vector registers.
+ if (Idx != 0)
+ return false;
+
+ RISCVVType::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecMVT);
+ [[maybe_unused]] bool IsSubVecPartReg =
+ SubVecLMUL == RISCVVType::VLMUL::LMUL_F2 ||
+ SubVecLMUL == RISCVVType::VLMUL::LMUL_F4 ||
+ SubVecLMUL == RISCVVType::VLMUL::LMUL_F8;
+
+ // Constrain dst
+ unsigned DstRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(VecMVT);
+ const TargetRegisterClass *DstRC = TRI.getRegClass(DstRegClassID);
+ if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
+ return false;
+
+ // If we haven't set a SubRegIdx, then we must be going between
+ // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy.
+ if (SubRegIdx == RISCV::NoSubRegister) {
+ assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecMVT) ==
+ DstRegClassID &&
+ "Unexpected subvector insert");
+ MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}).addReg(SubVecReg);
+ MI.eraseFromParent();
+ return true;
+ }
+
+ // Use INSERT_SUBREG to insert the subvector into the vector at the
+ // appropriate subregister index.
+ auto Ins =
+ MIB.buildInstr(TargetOpcode::INSERT_SUBREG, {DstReg}, {VecReg, SubVecReg})
+ .addImm(SubRegIdx);
+
+ MI.eraseFromParent();
+ return constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI);
+}
+
bool RISCVInstructionSelector::select(MachineInstr &MI) {
MachineIRBuilder MIB(MI);
@@ -1281,6 +1341,8 @@ bool RISCVInstructionSelector::select(MachineInstr &MI) {
return selectIntrinsicWithSideEffects(MI, MIB);
case TargetOpcode::G_EXTRACT_SUBVECTOR:
return selectExtractSubvector(MI, MIB);
+ case TargetOpcode::G_INSERT_SUBVECTOR:
+ return selectInsertSubVector(MI, MIB);
default:
return false;
}
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index a6eb225e24609..c79f9ce338c80 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -25580,7 +25580,8 @@ bool RISCVTargetLowering::fallBackToDAGISel(const Instruction &Inst) const {
return false;
}
- if (II->getIntrinsicID() == Intrinsic::vector_extract)
+ if (II->getIntrinsicID() == Intrinsic::vector_extract ||
+ II->getIntrinsicID() == Intrinsic::vector_insert)
return false;
}
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rvv/insert-subvector.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rvv/insert-subvector.ll
new file mode 100644
index 0000000000000..9c83c6fe2ba27
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/rvv/insert-subvector.ll
@@ -0,0 +1,411 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple riscv32 -global-isel -mattr=+m,+d,+zvfh,+v,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple riscv64 -global-isel -mattr=+m,+d,+zvfh,+v,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s
+
+define <vscale x 8 x i32> @insert_nxv8i32_nxv4i32_0(<vscale x 8 x i32> %vec, <vscale x 4 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv8i32_nxv4i32_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vmv2r.v v8, v12
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x i32> @llvm.vector.insert.nxv4i32.nxv8i32(<vscale x 8 x i32> %vec, <vscale x 4 x i32> %subvec, i64 0)
+ ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @insert_nxv8i32_nxv4i32_4(<vscale x 8 x i32> %vec, <vscale x 4 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv8i32_nxv4i32_4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vmv2r.v v10, v12
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x i32> @llvm.vector.insert.nxv4i32.nxv8i32(<vscale x 8 x i32> %vec, <vscale x 4 x i32> %subvec, i64 4)
+ ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @insert_nxv8i32_nxv2i32_0(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv8i32_nxv2i32_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v8, v12
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x i32> @llvm.vector.insert.nxv2i32.nxv8i32(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec, i64 0)
+ ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @insert_nxv8i32_nxv2i32_2(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv8i32_nxv2i32_2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v9, v12
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x i32> @llvm.vector.insert.nxv2i32.nxv8i32(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec, i64 2)
+ ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @insert_nxv8i32_nxv2i32_4(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv8i32_nxv2i32_4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v10, v12
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x i32> @llvm.vector.insert.nxv2i32.nxv8i32(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec, i64 4)
+ ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @insert_nxv8i32_nxv2i32_6(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv8i32_nxv2i32_6:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v11, v12
+; CHECK-NEXT: ret
+ %v = call <vscale x 8 x i32> @llvm.vector.insert.nxv2i32.nxv8i32(<vscale x 8 x i32> %vec, <vscale x 2 x i32> %subvec, i64 6)
+ ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 4 x i8> @insert_nxv1i8_nxv4i8_0(<vscale x 4 x i8> %vec, <vscale x 1 x i8> %subvec) {
+; CHECK-LABEL: insert_nxv1i8_nxv4i8_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: srli a0, a0, 3
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, ma
+; CHECK-NEXT: vmv.v.v v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x i8> @llvm.vector.insert.nxv1i8.nxv4i8(<vscale x 4 x i8> %vec, <vscale x 1 x i8> %subvec, i64 0)
+ ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 4 x i8> @insert_nxv1i8_nxv4i8_3(<vscale x 4 x i8> %vec, <vscale x 1 x i8> %subvec) {
+; CHECK-LABEL: insert_nxv1i8_nxv4i8_3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: li a1, 3
+; CHECK-NEXT: srli a0, a0, 3
+; CHECK-NEXT: mul a1, a0, a1
+; CHECK-NEXT: add a0, a1, a0
+; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma
+; CHECK-NEXT: vslideup.vx v8, v9, a1
+; CHECK-NEXT: ret
+ %v = call <vscale x 4 x i8> @llvm.vector.insert.nxv1i8.nxv4i8(<vscale x 4 x i8> %vec, <vscale x 1 x i8> %subvec, i64 3)
+ ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv8i32_0(<vscale x 16 x i32> %vec, <vscale x 8 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv8i32_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vmv4r.v v8, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv8i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 8 x i32> %subvec, i64 0)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv8i32_8(<vscale x 16 x i32> %vec, <vscale x 8 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv8i32_8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vmv4r.v v12, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv8i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 8 x i32> %subvec, i64 8)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv4i32_0(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv4i32_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vmv2r.v v8, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv4i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec, i64 0)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv4i32_4(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv4i32_4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vmv2r.v v10, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv4i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec, i64 4)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv4i32_8(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv4i32_8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vmv2r.v v12, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv4i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec, i64 8)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv4i32_12(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv4i32_12:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vmv2r.v v14, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv4i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subvec, i64 12)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_0(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv2i32_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v8, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 0)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_2(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv2i32_2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v9, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 2)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_4(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv2i32_4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v10, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 4)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_6(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv2i32_6:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v11, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 6)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_8(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv2i32_8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v12, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 8)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_10(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv2i32_10:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v13, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 10)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_12(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv2i32_12:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v14, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 12)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv2i32_14(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv2i32_14:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; CHECK-NEXT: vmv1r.v v15, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 2 x i32> %subvec, i64 14)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv1i32_0(<vscale x 16 x i32> %vec, <vscale x 1 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv1i32_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: srli a0, a0, 3
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma
+; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv1i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 1 x i32> %subvec, i64 0)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv1i32_1(<vscale x 16 x i32> %vec, <vscale x 1 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv1i32_1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: srli a0, a0, 3
+; CHECK-NEXT: add a1, a0, a0
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT: vslideup.vx v8, v16, a0
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv1i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 1 x i32> %subvec, i64 1)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @insert_nxv16i32_nxv1i32_6(<vscale x 16 x i32> %vec, <vscale x 1 x i32> %subvec) {
+; CHECK-LABEL: insert_nxv16i32_nxv1i32_6:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: srli a0, a0, 3
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma
+; CHECK-NEXT: vmv.v.v v11, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i32> @llvm.vector.insert.nxv1i32.nxv16i32(<vscale x 16 x i32> %vec, <vscale x 1 x i32> %subvec, i64 6)
+ ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i8> @insert_nxv16i8_nxv1i8_0(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec) {
+; CHECK-LABEL: insert_nxv16i8_nxv1i8_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: srli a0, a0, 3
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, ma
+; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i8> @llvm.vector.insert.nxv1i8.nxv16i8(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec, i64 0)
+ ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @insert_nxv16i8_nxv1i8_1(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec) {
+; CHECK-LABEL: insert_nxv16i8_nxv1i8_1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: srli a0, a0, 3
+; CHECK-NEXT: add a1, a0, a0
+; CHECK-NEXT: vsetvli zero, a1, e8, m1, tu, ma
+; CHECK-NEXT: vslideup.vx v8, v10, a0
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i8> @llvm.vector.insert.nxv1i8.nxv16i8(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec, i64 1)
+ ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @insert_nxv16i8_nxv1i8_2(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec) {
+; CHECK-LABEL: insert_nxv16i8_nxv1i8_2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: srli a1, a0, 3
+; CHECK-NEXT: srli a0, a0, 2
+; CHECK-NEXT: add a1, a0, a1
+; CHECK-NEXT: vsetvli zero, a1, e8, m1, tu, ma
+; CHECK-NEXT: vslideup.vx v8, v10, a0
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i8> @llvm.vector.insert.nxv1i8.nxv16i8(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec, i64 2)
+ ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @insert_nxv16i8_nxv1i8_3(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec) {
+; CHECK-LABEL: insert_nxv16i8_nxv1i8_3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: li a1, 3
+; CHECK-NEXT: srli a0, a0, 3
+; CHECK-NEXT: mul a1, a0, a1
+; CHECK-NEXT: add a0, a1, a0
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, ma
+; CHECK-NEXT: vslideup.vx v8, v10, a1
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i8> @llvm.vector.insert.nxv1i8.nxv16i8(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec, i64 3)
+ ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @insert_nxv16i8_nxv1i8_7(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec) {
+; CHECK-LABEL: insert_nxv16i8_nxv1i8_7:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: srli a0, a0, 3
+; CHECK-NEXT: li a1, 7
+; CHECK-NEXT: mul a1, a0, a1
+; CHECK-NEXT: add a0, a1, a0
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT: vslideup.vx v8, v10, a1
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i8> @llvm.vector.insert.nxv1i8.nxv16i8(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec, i64 7)
+ ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @insert_nxv16i8_nxv1i8_15(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec) {
+; CHECK-LABEL: insert_nxv16i8_nxv1i8_15:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: srli a0, a0, 3
+; CHECK-NEXT: li a1, 7
+; CHECK-NEXT: mul a1, a0, a1
+; CHECK-NEXT: add a0, a1, a0
+; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; CHECK-NEXT: vslideup.vx v9, v10, a1
+; CHECK-NEXT: ret
+ %v = call <vscale x 16 x i8> @llvm.vector.insert.nxv1i8.nxv16i8(<vscale x 16 x i8> %vec, <vscale x 1 x i8> %subvec, i64 15)
+ ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 32 x half> @insert_nxv32f16_nxv2f16_0(<vscale x 32 x half> %vec, <vscale x 2 x half> %subvec) {
+; CHECK-LABEL: insert_nxv32f16_nxv2f16_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: srli a0, a0, 2
+; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma
+; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: ret
+ %v = call <vscale x 32 x half> @llvm.vector.insert.nxv2f16.nxv32f16(<vscale x 32 x h...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/171092
More information about the llvm-commits
mailing list