[llvm] [RISCV][GISEL] Legalize and post-legalize lower G_INSERT_SUBVECTOR (PR #108859)
Michael Maitland via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 17 14:17:33 PDT 2024
https://github.com/michaelmaitland updated https://github.com/llvm/llvm-project/pull/108859
>From 1313d728ea02b805f1d58a78d47ce4bef4edb6f4 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Mon, 16 Sep 2024 10:10:46 -0700
Subject: [PATCH 1/7] [RISCV][GISEL] Legalize G_INSERT_SUBVECTOR
This code is heavily based on the SelectionDAG lowerINSERT_SUBVECTOR code.
---
.../Target/RISCV/GISel/RISCVLegalizerInfo.cpp | 156 +++++++
.../Target/RISCV/GISel/RISCVLegalizerInfo.h | 1 +
llvm/lib/Target/RISCV/RISCVInstrGISel.td | 18 +
.../rvv/legalize-insert-subvector.mir | 402 ++++++++++++++++++
4 files changed, 577 insertions(+)
create mode 100644 llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-insert-subvector.mir
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
index c204683f4e79f8..8396316dc47fdc 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
@@ -581,6 +581,12 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
SplatActions.clampScalar(1, sXLen, sXLen);
+ getActionDefinitionsBuilder(G_INSERT_SUBVECTOR)
+ .customIf(all(typeIsLegalBoolVec(0, BoolVecTys, ST),
+ typeIsLegalBoolVec(1, BoolVecTys, ST)))
+ .customIf(all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST),
+ typeIsLegalIntOrFPVec(1, IntOrFPVecTys, ST)));
+
getLegacyLegalizerInfo().computeTables();
}
@@ -915,6 +921,154 @@ bool RISCVLegalizerInfo::legalizeSplatVector(MachineInstr &MI,
return true;
}
+static LLT getLMUL1Ty(LLT VecTy) {
+ assert(VecTy.getElementType().getSizeInBits() <= 64 &&
+ "Unexpected vector LLT");
+ return LLT::scalable_vector(RISCV::RVVBitsPerBlock /
+ VecTy.getElementType().getSizeInBits(),
+ VecTy.getElementType());
+}
+
+bool RISCVLegalizerInfo::legalizeInsertSubvector(MachineInstr &MI,
+ MachineIRBuilder &MIB) const {
+ assert(MI.getOpcode() == TargetOpcode::G_INSERT_SUBVECTOR);
+
+ MachineRegisterInfo &MRI = *MIB.getMRI();
+
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src1 = MI.getOperand(1).getReg();
+ Register Src2 = MI.getOperand(2).getReg();
+ uint64_t Idx = MI.getOperand(3).getImm();
+
+ LLT BigTy = MRI.getType(Src1);
+ LLT LitTy = MRI.getType(Src2);
+ Register BigVec = Src1;
+ Register LitVec = Src2;
+
+ // We don't have the ability to slide mask vectors up indexed by their i1
+ // elements; the smallest we can do is i8. Often we are able to bitcast to
+ // equivalent i8 vectors. Otherwise, we can must zeroextend to equivalent i8
+ // vectors and truncate down after the insert.
+ if (LitTy.getElementType() == LLT::scalar(1) &&
+ (Idx != 0 ||
+ MRI.getVRegDef(BigVec)->getOpcode() != TargetOpcode::G_IMPLICIT_DEF)) {
+ auto BigTyMinElts = BigTy.getElementCount().getKnownMinValue();
+ auto LitTyMinElts = LitTy.getElementCount().getKnownMinValue();
+ if (BigTyMinElts >= 8 && LitTyMinElts >= 8) {
+ assert(Idx % 8 == 0 && "Invalid index");
+ assert(BigTyMinElts % 8 == 0 && LitTyMinElts % 8 == 0 &&
+ "Unexpected mask vector lowering");
+ Idx /= 8;
+ BigTy = LLT::vector(BigTy.getElementCount().divideCoefficientBy(8), 8);
+ LitTy = LLT::vector(LitTy.getElementCount().divideCoefficientBy(8), 8);
+ BigVec = MIB.buildBitcast(BigTy, BigVec).getReg(0);
+ LitVec = MIB.buildBitcast(LitTy, LitVec).getReg(0);
+ } else {
+ // We can't slide this mask vector up indexed by its i1 elements.
+ // This poses a problem when we wish to insert a scalable vector which
+ // can't be re-expressed as a larger type. Just choose the slow path and
+ // extend to a larger type, then truncate back down.
+ LLT ExtBigTy = BigTy.changeElementType(LLT::scalar(8));
+ LLT ExtLitTy = LitTy.changeElementType(LLT::scalar(8));
+ auto BigZExt = MIB.buildZExt(ExtBigTy, BigVec);
+ auto LitZExt = MIB.buildZExt(ExtLitTy, LitVec);
+ auto Insert = MIB.buildInsertSubvector(ExtBigTy, BigZExt, LitZExt, Idx);
+ auto SplatZero = MIB.buildSplatVector(
+ ExtBigTy, MIB.buildConstant(ExtBigTy.getElementType(), 0));
+ MIB.buildICmp(CmpInst::Predicate::ICMP_NE, Dst, Insert, SplatZero);
+ MI.eraseFromParent();
+ return true;
+ }
+ }
+
+ const RISCVRegisterInfo *TRI = STI.getRegisterInfo();
+ MVT LitTyMVT = getMVTForLLT(LitTy);
+ unsigned SubRegIdx, RemIdx;
+ std::tie(SubRegIdx, RemIdx) =
+ RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
+ getMVTForLLT(BigTy), LitTyMVT, Idx, TRI);
+
+ RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(getMVTForLLT(LitTy));
+ bool IsSubVecPartReg = SubVecLMUL == RISCVII::VLMUL::LMUL_F2 ||
+ SubVecLMUL == RISCVII::VLMUL::LMUL_F4 ||
+ SubVecLMUL == RISCVII::VLMUL::LMUL_F8;
+
+ // If the Idx has been completely eliminated and this subvector's size is a
+ // vector register or a multiple thereof, or the surrounding elements are
+ // undef, then this is a subvector insert which naturally aligns to a vector
+ // register. These can easily be handled using subregister manipulation.
+ if (RemIdx == 0 && (!IsSubVecPartReg || MRI.getVRegDef(Src1)->getOpcode() ==
+ TargetOpcode::G_IMPLICIT_DEF))
+ return true;
+
+ // If the subvector is smaller than a vector register, then the insertion
+ // must preserve the undisturbed elements of the register. We do this by
+ // lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type
+ // (which resolves to a subregister copy), performing a VSLIDEUP to place the
+ // subvector within the vector register, and an INSERT_SUBVECTOR of that
+ // LMUL=1 type back into the larger vector (resolving to another subregister
+ // operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type
+ // to avoid allocating a large register group to hold our subvector.
+
+ // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
+ // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
+ // (in our case undisturbed). This means we can set up a subvector insertion
+ // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
+ // size of the subvector.
+ const LLT XLenTy(STI.getXLenVT());
+ LLT InterLitTy = BigTy;
+ Register AlignedExtract = Src1;
+ unsigned AlignedIdx = Idx - RemIdx;
+ if (TypeSize::isKnownGT(BigTy.getSizeInBits(),
+ getLMUL1Ty(BigTy).getSizeInBits())) {
+ InterLitTy = getLMUL1Ty(BigTy);
+ // Extract a subvector equal to the nearest full vector register type. This
+ // should resolve to a G_EXTRACT on a subreg.
+ AlignedExtract =
+ MIB.buildExtractSubvector(InterLitTy, BigVec, AlignedIdx).getReg(0);
+ }
+
+ auto Insert = MIB.buildInsertSubvector(InterLitTy, MIB.buildUndef(InterLitTy),
+ LitVec, 0);
+
+ auto [Mask, _] = buildDefaultVLOps(BigTy, MIB, MRI);
+ auto VL = MIB.buildVScale(XLenTy, LitTy.getElementCount().getKnownMinValue());
+
+ // Use tail agnostic policy if we're inserting over InterLitTy's tail.
+ ElementCount EndIndex =
+ ElementCount::getScalable(RemIdx) + LitTy.getElementCount();
+ uint64_t Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED;
+ if (EndIndex == InterLitTy.getElementCount())
+ Policy = RISCVII::TAIL_AGNOSTIC;
+
+ // If we're inserting into the lowest elements, use a tail undisturbed
+ // vmv.v.v.
+ MachineInstrBuilder Inserted;
+ if (RemIdx == 0) {
+ Inserted = MIB.buildInstr(RISCV::G_VMV_V_V_VL, {InterLitTy},
+ {AlignedExtract, Insert, VL});
+ } else {
+ auto SlideupAmt = MIB.buildVScale(XLenTy, RemIdx);
+ // Construct the vector length corresponding to RemIdx + length(LitTy).
+ VL = MIB.buildAdd(XLenTy, SlideupAmt, VL);
+ Inserted =
+ MIB.buildInstr(RISCV::G_VSLIDEUP_VL, {InterLitTy},
+ {AlignedExtract, LitVec, SlideupAmt, Mask, VL, Policy});
+ }
+
+ // If required, insert this subvector back into the correct vector register.
+ // This should resolve to an INSERT_SUBREG instruction.
+ if (TypeSize::isKnownGT(BigTy.getSizeInBits(), InterLitTy.getSizeInBits()))
+ Inserted = MIB.buildInsert(BigTy, BigVec, LitVec, AlignedIdx);
+
+ // We might have bitcast from a mask type: cast back to the original type if
+ // required.
+ MIB.buildBitcast(Dst, Inserted);
+
+ MI.eraseFromParent();
+ return true;
+}
+
bool RISCVLegalizerInfo::legalizeCustom(
LegalizerHelper &Helper, MachineInstr &MI,
LostDebugLocObserver &LocObserver) const {
@@ -985,6 +1139,8 @@ bool RISCVLegalizerInfo::legalizeCustom(
return legalizeExt(MI, MIRBuilder);
case TargetOpcode::G_SPLAT_VECTOR:
return legalizeSplatVector(MI, MIRBuilder);
+ case TargetOpcode::G_INSERT_SUBVECTOR:
+ return legalizeInsertSubvector(MI, MIRBuilder);
case TargetOpcode::G_LOAD:
case TargetOpcode::G_STORE:
return legalizeLoadStore(MI, Helper, MIRBuilder);
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h
index 2fc28615e7630d..ccd8ac9fe4ec90 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h
+++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h
@@ -46,6 +46,7 @@ class RISCVLegalizerInfo : public LegalizerInfo {
bool legalizeVScale(MachineInstr &MI, MachineIRBuilder &MIB) const;
bool legalizeExt(MachineInstr &MI, MachineIRBuilder &MIRBuilder) const;
bool legalizeSplatVector(MachineInstr &MI, MachineIRBuilder &MIB) const;
+ bool legalizeInsertSubvector(MachineInstr &MI, MachineIRBuilder &MIB) const;
bool legalizeLoadStore(MachineInstr &MI, LegalizerHelper &Helper,
MachineIRBuilder &MIB) const;
};
diff --git a/llvm/lib/Target/RISCV/RISCVInstrGISel.td b/llvm/lib/Target/RISCV/RISCVInstrGISel.td
index ba40662c49c1df..948167023d50a2 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrGISel.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrGISel.td
@@ -57,3 +57,21 @@ def G_SPLAT_VECTOR_SPLIT_I64_VL : RISCVGenericInstruction {
let InOperandList = (ins type0:$passthru, type1:$hi, type1:$lo, type2:$vl);
let hasSideEffects = false;
}
+
+// Pseudo equivalent to a RISCVISD::VMV_V_V_VL
+def G_VMV_V_V_VL : RISCVGenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$vec, type2:$vl);
+ let hasSideEffects = false;
+}
+def : GINodeEquiv<G_VMV_V_V_VL, riscv_vmv_v_v_vl>;
+
+// Pseudo equivalent to a RISCVISD::VSLIDEUP_VL
+def G_VSLIDEUP_VL : RISCVGenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$merge, type0:$vec, type1:$idx, type2:$mask,
+ type3:$vl, type4:$policy);
+ let hasSideEffects = false;
+}
+def : GINodeEquiv<G_VSLIDEUP_VL, riscv_slideup_vl>;
+
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-insert-subvector.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-insert-subvector.mir
new file mode 100644
index 00000000000000..a5f1228b8f8ca6
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-insert-subvector.mir
@@ -0,0 +1,402 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=riscv64 -mattr=+v -run-pass=legalizer %s -o - | FileCheck %s -check-prefixes=CHECK,RV32
+# RUN: llc -mtriple=riscv32 -mattr=+v -run-pass=legalizer %s -o - | FileCheck %s -check-prefixes=CHECK,RV64
+
+# Special handling for i1-element vectors with non-zero index
+---
+name: insert_subvector_nxv2i1_nxv4i1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32-LABEL: name: insert_subvector_nxv2i1_nxv4i1
+ ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 4 x s1>) = G_IMPLICIT_DEF
+ ; RV32-NEXT: [[DEF1:%[0-9]+]]:_(<vscale x 2 x s1>) = G_IMPLICIT_DEF
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV32-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32)
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SELECT [[DEF]](<vscale x 4 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[C2]](s32)
+ ; RV32-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[ANYEXT2]](s64)
+ ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV32-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[C3]](s32)
+ ; RV32-NEXT: [[SPLAT_VECTOR3:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[ANYEXT3]](s64)
+ ; RV32-NEXT: [[SELECT1:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SELECT [[DEF1]](<vscale x 2 x s1>), [[SPLAT_VECTOR3]], [[SPLAT_VECTOR2]]
+ ; RV32-NEXT: [[VMSET_VL:%[0-9]+]]:_(<vscale x 4 x s1>) = G_VMSET_VL $x0
+ ; RV32-NEXT: [[READ_VLENB:%[0-9]+]]:_(s64) = G_READ_VLENB
+ ; RV32-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; RV32-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[READ_VLENB]], [[C4]](s64)
+ ; RV32-NEXT: [[READ_VLENB1:%[0-9]+]]:_(s64) = G_READ_VLENB
+ ; RV32-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; RV32-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[READ_VLENB1]], [[C5]](s64)
+ ; RV32-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[LSHR1]], [[LSHR]]
+ ; RV32-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_(<vscale x 4 x s8>) = G_VSLIDEUP_VL [[SELECT]], [[SELECT1]], [[LSHR1]](s64), [[VMSET_VL]](<vscale x 4 x s1>), [[ADD]](s64), 1
+ ; RV32-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 4 x s8>) = G_BITCAST [[VSLIDEUP_VL]](<vscale x 4 x s8>)
+ ; RV32-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[ANYEXT4:%[0-9]+]]:_(s64) = G_ANYEXT [[C6]](s32)
+ ; RV32-NEXT: [[SPLAT_VECTOR4:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[ANYEXT4]](s64)
+ ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 4 x s1>) = G_ICMP intpred(ne), [[BITCAST]](<vscale x 4 x s8>), [[SPLAT_VECTOR4]]
+ ; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 4 x s1>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: insert_subvector_nxv2i1_nxv4i1
+ ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 4 x s1>) = G_IMPLICIT_DEF
+ ; RV64-NEXT: [[DEF1:%[0-9]+]]:_(<vscale x 2 x s1>) = G_IMPLICIT_DEF
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[C]](s32)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[C1]](s32)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SELECT [[DEF]](<vscale x 4 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[C2]](s32)
+ ; RV64-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV64-NEXT: [[SPLAT_VECTOR3:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[C3]](s32)
+ ; RV64-NEXT: [[SELECT1:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SELECT [[DEF1]](<vscale x 2 x s1>), [[SPLAT_VECTOR3]], [[SPLAT_VECTOR2]]
+ ; RV64-NEXT: [[VMSET_VL:%[0-9]+]]:_(<vscale x 4 x s1>) = G_VMSET_VL $x0
+ ; RV64-NEXT: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
+ ; RV64-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+ ; RV64-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB]], [[C4]](s32)
+ ; RV64-NEXT: [[READ_VLENB1:%[0-9]+]]:_(s32) = G_READ_VLENB
+ ; RV64-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+ ; RV64-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB1]], [[C5]](s32)
+ ; RV64-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LSHR1]], [[LSHR]]
+ ; RV64-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_(<vscale x 4 x s8>) = G_VSLIDEUP_VL [[SELECT]], [[SELECT1]], [[LSHR1]](s32), [[VMSET_VL]](<vscale x 4 x s1>), [[ADD]](s32), 1
+ ; RV64-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 4 x s8>) = G_BITCAST [[VSLIDEUP_VL]](<vscale x 4 x s8>)
+ ; RV64-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[SPLAT_VECTOR4:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[C6]](s32)
+ ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 4 x s1>) = G_ICMP intpred(ne), [[BITCAST]](<vscale x 4 x s8>), [[SPLAT_VECTOR4]]
+ ; RV64-NEXT: $v8 = COPY [[ICMP]](<vscale x 4 x s1>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 4 x s1>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 2 x s1>) = G_IMPLICIT_DEF
+ %2:_(<vscale x 4 x s1>) = G_INSERT_SUBVECTOR %0(<vscale x 4 x s1>), %1, 2
+ $v8 = COPY %2(<vscale x 4 x s1>)
+ PseudoRET implicit $v8
+...
+---
+name: insert_subvector_nxv4i1_nxv8i1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32-LABEL: name: insert_subvector_nxv4i1_nxv8i1
+ ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 8 x s1>) = G_IMPLICIT_DEF
+ ; RV32-NEXT: [[DEF1:%[0-9]+]]:_(<vscale x 2 x s1>) = G_IMPLICIT_DEF
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV32-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32)
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SELECT [[DEF]](<vscale x 8 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[C2]](s32)
+ ; RV32-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[ANYEXT2]](s64)
+ ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV32-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[C3]](s32)
+ ; RV32-NEXT: [[SPLAT_VECTOR3:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[ANYEXT3]](s64)
+ ; RV32-NEXT: [[SELECT1:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SELECT [[DEF1]](<vscale x 2 x s1>), [[SPLAT_VECTOR3]], [[SPLAT_VECTOR2]]
+ ; RV32-NEXT: [[VMSET_VL:%[0-9]+]]:_(<vscale x 8 x s1>) = G_VMSET_VL $x0
+ ; RV32-NEXT: [[READ_VLENB:%[0-9]+]]:_(s64) = G_READ_VLENB
+ ; RV32-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; RV32-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[READ_VLENB]], [[C4]](s64)
+ ; RV32-NEXT: [[READ_VLENB1:%[0-9]+]]:_(s64) = G_READ_VLENB
+ ; RV32-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; RV32-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[READ_VLENB1]], [[C5]](s64)
+ ; RV32-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[LSHR1]], [[LSHR]]
+ ; RV32-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_(<vscale x 8 x s8>) = G_VSLIDEUP_VL [[SELECT]], [[SELECT1]], [[LSHR1]](s64), [[VMSET_VL]](<vscale x 8 x s1>), [[ADD]](s64), 0
+ ; RV32-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 8 x s8>) = G_BITCAST [[VSLIDEUP_VL]](<vscale x 8 x s8>)
+ ; RV32-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[ANYEXT4:%[0-9]+]]:_(s64) = G_ANYEXT [[C6]](s32)
+ ; RV32-NEXT: [[SPLAT_VECTOR4:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR [[ANYEXT4]](s64)
+ ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 8 x s1>) = G_ICMP intpred(ne), [[BITCAST]](<vscale x 8 x s8>), [[SPLAT_VECTOR4]]
+ ; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 8 x s1>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: insert_subvector_nxv4i1_nxv8i1
+ ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 8 x s1>) = G_IMPLICIT_DEF
+ ; RV64-NEXT: [[DEF1:%[0-9]+]]:_(<vscale x 2 x s1>) = G_IMPLICIT_DEF
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR [[C]](s32)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR [[C1]](s32)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SELECT [[DEF]](<vscale x 8 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[C2]](s32)
+ ; RV64-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV64-NEXT: [[SPLAT_VECTOR3:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[C3]](s32)
+ ; RV64-NEXT: [[SELECT1:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SELECT [[DEF1]](<vscale x 2 x s1>), [[SPLAT_VECTOR3]], [[SPLAT_VECTOR2]]
+ ; RV64-NEXT: [[VMSET_VL:%[0-9]+]]:_(<vscale x 8 x s1>) = G_VMSET_VL $x0
+ ; RV64-NEXT: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
+ ; RV64-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+ ; RV64-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB]], [[C4]](s32)
+ ; RV64-NEXT: [[READ_VLENB1:%[0-9]+]]:_(s32) = G_READ_VLENB
+ ; RV64-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+ ; RV64-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB1]], [[C5]](s32)
+ ; RV64-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LSHR1]], [[LSHR]]
+ ; RV64-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_(<vscale x 8 x s8>) = G_VSLIDEUP_VL [[SELECT]], [[SELECT1]], [[LSHR1]](s32), [[VMSET_VL]](<vscale x 8 x s1>), [[ADD]](s32), 0
+ ; RV64-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 8 x s8>) = G_BITCAST [[VSLIDEUP_VL]](<vscale x 8 x s8>)
+ ; RV64-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[SPLAT_VECTOR4:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR [[C6]](s32)
+ ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 8 x s1>) = G_ICMP intpred(ne), [[BITCAST]](<vscale x 8 x s8>), [[SPLAT_VECTOR4]]
+ ; RV64-NEXT: $v8 = COPY [[ICMP]](<vscale x 8 x s1>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 8 x s1>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 2 x s1>) = G_IMPLICIT_DEF
+ %2:_(<vscale x 8 x s1>) = G_INSERT_SUBVECTOR %0(<vscale x 8 x s1>), %1, 2
+ $v8 = COPY %2(<vscale x 8 x s1>)
+ PseudoRET implicit $v8
+...
+---
+name: insert_subvector_nxv32i1_nxv64i1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32-LABEL: name: insert_subvector_nxv32i1_nxv64i1
+ ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 64 x s1>) = G_IMPLICIT_DEF
+ ; RV32-NEXT: [[DEF1:%[0-9]+]]:_(<vscale x 32 x s1>) = G_IMPLICIT_DEF
+ ; RV32-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 4 x s8>) = G_BITCAST [[DEF1]](<vscale x 32 x s1>)
+ ; RV32-NEXT: [[VMSET_VL:%[0-9]+]]:_(<vscale x 8 x s1>) = G_VMSET_VL $x0
+ ; RV32-NEXT: [[READ_VLENB:%[0-9]+]]:_(s64) = G_READ_VLENB
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; RV32-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[READ_VLENB]], [[C]](s64)
+ ; RV32-NEXT: [[READ_VLENB1:%[0-9]+]]:_(s64) = G_READ_VLENB
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; RV32-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[READ_VLENB1]], [[C1]](s64)
+ ; RV32-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[LSHR1]], [[LSHR]]
+ ; RV32-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_(<vscale x 8 x s8>) = G_VSLIDEUP_VL [[DEF]], [[BITCAST]], [[LSHR1]](s64), [[VMSET_VL]](<vscale x 8 x s1>), [[ADD]](s64), 0
+ ; RV32-NEXT: [[BITCAST1:%[0-9]+]]:_(<vscale x 64 x s1>) = G_BITCAST [[VSLIDEUP_VL]](<vscale x 8 x s8>)
+ ; RV32-NEXT: $v8 = COPY [[BITCAST1]](<vscale x 64 x s1>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: insert_subvector_nxv32i1_nxv64i1
+ ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 64 x s1>) = G_IMPLICIT_DEF
+ ; RV64-NEXT: [[DEF1:%[0-9]+]]:_(<vscale x 32 x s1>) = G_IMPLICIT_DEF
+ ; RV64-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 4 x s8>) = G_BITCAST [[DEF1]](<vscale x 32 x s1>)
+ ; RV64-NEXT: [[VMSET_VL:%[0-9]+]]:_(<vscale x 8 x s1>) = G_VMSET_VL $x0
+ ; RV64-NEXT: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV64-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB]], [[C]](s32)
+ ; RV64-NEXT: [[READ_VLENB1:%[0-9]+]]:_(s32) = G_READ_VLENB
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+ ; RV64-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB1]], [[C1]](s32)
+ ; RV64-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LSHR1]], [[LSHR]]
+ ; RV64-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_(<vscale x 8 x s8>) = G_VSLIDEUP_VL [[DEF]], [[BITCAST]], [[LSHR1]](s32), [[VMSET_VL]](<vscale x 8 x s1>), [[ADD]](s32), 0
+ ; RV64-NEXT: [[BITCAST1:%[0-9]+]]:_(<vscale x 64 x s1>) = G_BITCAST [[VSLIDEUP_VL]](<vscale x 8 x s8>)
+ ; RV64-NEXT: $v8 = COPY [[BITCAST1]](<vscale x 64 x s1>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 64 x s1>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 32 x s1>) = G_IMPLICIT_DEF
+ %2:_(<vscale x 64 x s1>) = G_INSERT_SUBVECTOR %0(<vscale x 64 x s1>), %1, 16
+ $v8 = COPY %2(<vscale x 64 x s1>)
+ PseudoRET implicit $v8
+...
+
+# i1-element vectors with zero index
+---
+name: insert_subvector_nxv2i1_nxv4i1_zero
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; CHECK-LABEL: name: insert_subvector_nxv2i1_nxv4i1_zero
+ ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 4 x s1>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<vscale x 1 x s1>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[INSERT_SUBVECTOR:%[0-9]+]]:_(<vscale x 4 x s1>) = G_INSERT_SUBVECTOR [[DEF]], [[DEF1]](<vscale x 1 x s1>), 0
+ ; CHECK-NEXT: $v8 = COPY [[INSERT_SUBVECTOR]](<vscale x 4 x s1>)
+ ; CHECK-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 4 x s1>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 1 x s1>) = G_IMPLICIT_DEF
+ %2:_(<vscale x 4 x s1>) = G_INSERT_SUBVECTOR %0(<vscale x 4 x s1>), %1, 0
+ $v8 = COPY %2(<vscale x 4 x s1>)
+ PseudoRET implicit $v8
+...
+---
+name: insert_subvector_nxv4i1_nxv8i1_zero
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; CHECK-LABEL: name: insert_subvector_nxv4i1_nxv8i1_zero
+ ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 8 x s1>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<vscale x 2 x s1>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[INSERT_SUBVECTOR:%[0-9]+]]:_(<vscale x 8 x s1>) = G_INSERT_SUBVECTOR [[DEF]], [[DEF1]](<vscale x 2 x s1>), 0
+ ; CHECK-NEXT: $v8 = COPY [[INSERT_SUBVECTOR]](<vscale x 8 x s1>)
+ ; CHECK-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 8 x s1>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 2 x s1>) = G_IMPLICIT_DEF
+ %2:_(<vscale x 8 x s1>) = G_INSERT_SUBVECTOR %0(<vscale x 8 x s1>), %1, 0
+ $v8 = COPY %2(<vscale x 8 x s1>)
+ PseudoRET implicit $v8
+...
+---
+name: insert_subvector_nxv32i1_nxv64i1_zero
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; CHECK-LABEL: name: insert_subvector_nxv32i1_nxv64i1_zero
+ ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 64 x s1>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<vscale x 16 x s1>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[INSERT_SUBVECTOR:%[0-9]+]]:_(<vscale x 64 x s1>) = G_INSERT_SUBVECTOR [[DEF]], [[DEF1]](<vscale x 16 x s1>), 0
+ ; CHECK-NEXT: $v8 = COPY [[INSERT_SUBVECTOR]](<vscale x 64 x s1>)
+ ; CHECK-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 64 x s1>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 16 x s1>) = G_IMPLICIT_DEF
+ %2:_(<vscale x 64 x s1>) = G_INSERT_SUBVECTOR %0(<vscale x 64 x s1>), %1, 0
+ $v8 = COPY %2(<vscale x 64 x s1>)
+ PseudoRET implicit $v8
+...
+
+# Inserrt with zero index
+---
+name: insert_subvector_nxv1i8_nxv2i8_zero
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; CHECK-LABEL: name: insert_subvector_nxv1i8_nxv2i8_zero
+ ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 2 x s8>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<vscale x 1 x s8>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[INSERT_SUBVECTOR:%[0-9]+]]:_(<vscale x 2 x s8>) = G_INSERT_SUBVECTOR [[DEF]], [[DEF1]](<vscale x 1 x s8>), 0
+ ; CHECK-NEXT: $v8 = COPY [[INSERT_SUBVECTOR]](<vscale x 2 x s8>)
+ ; CHECK-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 2 x s8>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 1 x s8>) = G_IMPLICIT_DEF
+ %2:_(<vscale x 2 x s8>) = G_INSERT_SUBVECTOR %0(<vscale x 2 x s8>), %1, 0
+ $v8 = COPY %2(<vscale x 2 x s8>)
+ PseudoRET implicit $v8
+...
+---
+name: insert_subvector_nxv2i16_nxv4i16_zero
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; CHECK-LABEL: name: insert_subvector_nxv2i16_nxv4i16_zero
+ ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 4 x s16>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<vscale x 1 x s16>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[INSERT_SUBVECTOR:%[0-9]+]]:_(<vscale x 4 x s16>) = G_INSERT_SUBVECTOR [[DEF]], [[DEF1]](<vscale x 1 x s16>), 0
+ ; CHECK-NEXT: $v8 = COPY [[INSERT_SUBVECTOR]](<vscale x 4 x s16>)
+ ; CHECK-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 4 x s16>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 1 x s16>) = G_IMPLICIT_DEF
+ %2:_(<vscale x 4 x s16>) = G_INSERT_SUBVECTOR %0(<vscale x 4 x s16>), %1, 0
+ $v8 = COPY %2(<vscale x 4 x s16>)
+ PseudoRET implicit $v8
+...
+---
+name: insert_subvector_nxv4i32_nxv8i32_zero
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; CHECK-LABEL: name: insert_subvector_nxv4i32_nxv8i32_zero
+ ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 8 x s32>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<vscale x 4 x s32>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[INSERT_SUBVECTOR:%[0-9]+]]:_(<vscale x 8 x s32>) = G_INSERT_SUBVECTOR [[DEF]], [[DEF1]](<vscale x 4 x s32>), 0
+ ; CHECK-NEXT: $v8 = COPY [[INSERT_SUBVECTOR]](<vscale x 8 x s32>)
+ ; CHECK-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 8 x s32>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 4 x s32>) = G_IMPLICIT_DEF
+ %2:_(<vscale x 8 x s32>) = G_INSERT_SUBVECTOR %0(<vscale x 8 x s32>), %1, 0
+ $v8 = COPY %2(<vscale x 8 x s32>)
+ PseudoRET implicit $v8
+...
+---
+name: insert_subvector_nxv2i64_nxv8i64_zero
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; CHECK-LABEL: name: insert_subvector_nxv2i64_nxv8i64_zero
+ ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 8 x s64>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<vscale x 2 x s64>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[INSERT_SUBVECTOR:%[0-9]+]]:_(<vscale x 8 x s64>) = G_INSERT_SUBVECTOR [[DEF]], [[DEF1]](<vscale x 2 x s64>), 0
+ ; CHECK-NEXT: $v8 = COPY [[INSERT_SUBVECTOR]](<vscale x 8 x s64>)
+ ; CHECK-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 8 x s64>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 2 x s64>) = G_IMPLICIT_DEF
+ %2:_(<vscale x 8 x s64>) = G_INSERT_SUBVECTOR %0(<vscale x 8 x s64>), %1, 0
+ $v8 = COPY %2(<vscale x 8 x s64>)
+ PseudoRET implicit $v8
+...
+
+# Extract with non-zero index
+---
+name: insert_subvector_nxv1i8_nxv2i8
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; CHECK-LABEL: name: insert_subvector_nxv1i8_nxv2i8
+ ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 2 x s8>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<vscale x 1 x s8>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[INSERT_SUBVECTOR:%[0-9]+]]:_(<vscale x 2 x s8>) = G_INSERT_SUBVECTOR [[DEF]], [[DEF1]](<vscale x 1 x s8>), 0
+ ; CHECK-NEXT: $v8 = COPY [[INSERT_SUBVECTOR]](<vscale x 2 x s8>)
+ ; CHECK-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 2 x s8>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 1 x s8>) = G_IMPLICIT_DEF
+ %2:_(<vscale x 2 x s8>) = G_INSERT_SUBVECTOR %0(<vscale x 2 x s8>), %1, 0
+ $v8 = COPY %2(<vscale x 2 x s8>)
+ PseudoRET implicit $v8
+...
+---
+name: insert_subvector_nxv2i16_nxv4i16
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; CHECK-LABEL: name: insert_subvector_nxv2i16_nxv4i16
+ ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 4 x s16>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<vscale x 1 x s16>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[INSERT_SUBVECTOR:%[0-9]+]]:_(<vscale x 4 x s16>) = G_INSERT_SUBVECTOR [[DEF]], [[DEF1]](<vscale x 1 x s16>), 0
+ ; CHECK-NEXT: $v8 = COPY [[INSERT_SUBVECTOR]](<vscale x 4 x s16>)
+ ; CHECK-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 4 x s16>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 1 x s16>) = G_IMPLICIT_DEF
+ %2:_(<vscale x 4 x s16>) = G_INSERT_SUBVECTOR %0(<vscale x 4 x s16>), %1, 0
+ $v8 = COPY %2(<vscale x 4 x s16>)
+ PseudoRET implicit $v8
+...
+---
+name: insert_subvector_nxv4i32_nxv8i32
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; CHECK-LABEL: name: insert_subvector_nxv4i32_nxv8i32
+ ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 8 x s32>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<vscale x 4 x s32>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[INSERT_SUBVECTOR:%[0-9]+]]:_(<vscale x 8 x s32>) = G_INSERT_SUBVECTOR [[DEF]], [[DEF1]](<vscale x 4 x s32>), 0
+ ; CHECK-NEXT: $v8 = COPY [[INSERT_SUBVECTOR]](<vscale x 8 x s32>)
+ ; CHECK-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 8 x s32>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 4 x s32>) = G_IMPLICIT_DEF
+ %2:_(<vscale x 8 x s32>) = G_INSERT_SUBVECTOR %0(<vscale x 8 x s32>), %1, 0
+ $v8 = COPY %2(<vscale x 8 x s32>)
+ PseudoRET implicit $v8
+...
+---
+name: insert_subvector_nxv2i64_nxv8i64
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; CHECK-LABEL: name: insert_subvector_nxv2i64_nxv8i64
+ ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 8 x s64>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<vscale x 1 x s64>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[INSERT_SUBVECTOR:%[0-9]+]]:_(<vscale x 8 x s64>) = G_INSERT_SUBVECTOR [[DEF]], [[DEF1]](<vscale x 1 x s64>), 0
+ ; CHECK-NEXT: $v8 = COPY [[INSERT_SUBVECTOR]](<vscale x 8 x s64>)
+ ; CHECK-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 8 x s64>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 1 x s64>) = G_IMPLICIT_DEF
+ %2:_(<vscale x 8 x s64>) = G_INSERT_SUBVECTOR %0(<vscale x 8 x s64>), %1, 0
+ $v8 = COPY %2(<vscale x 8 x s64>)
+ PseudoRET implicit $v8
+...
>From 2aac5883903ff5ecf0a6f1d5fff60a4b7c3b2957 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Mon, 16 Sep 2024 11:32:15 -0700
Subject: [PATCH 2/7] [GISEL] Add GInsertSubvector
---
.../llvm/CodeGen/GlobalISel/GenericMachineInstrs.h | 12 ++++++++++++
1 file changed, 12 insertions(+)
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
index 132b7ec9aeef7c..e9305e0431ab91 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
@@ -800,6 +800,18 @@ class GInsertVectorElement : public GenericMachineInstr {
}
};
+/// Represents a insert subvector.
+class GInsertSubvector : public GenericMachineInstr {
+public:
+ Register getBigVec() const { return getOperand(1).getReg(); }
+ Register getSubVec() const { return getOperand(1).getReg(); }
+ uint64_t getIndexImm() const { return getOperand(3).getImm(); }
+
+ static bool classof(const MachineInstr *MI) {
+ return MI->getOpcode() == TargetOpcode::G_INSERT_SUBVECTOR;
+ }
+};
+
/// Represents a freeze.
class GFreeze : public GenericMachineInstr {
public:
>From 830c2f375a593a5b7dc667e277ea75370da9aae9 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Mon, 16 Sep 2024 11:33:53 -0700
Subject: [PATCH 3/7] fixup! respond to reviews
---
.../Target/RISCV/GISel/RISCVLegalizerInfo.cpp | 16 +-
.../rvv/legalize-insert-subvector.mir | 293 ++++++++++--------
2 files changed, 172 insertions(+), 137 deletions(-)
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
index 8396316dc47fdc..14d7c78e80e47f 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
@@ -931,14 +931,14 @@ static LLT getLMUL1Ty(LLT VecTy) {
bool RISCVLegalizerInfo::legalizeInsertSubvector(MachineInstr &MI,
MachineIRBuilder &MIB) const {
- assert(MI.getOpcode() == TargetOpcode::G_INSERT_SUBVECTOR);
+ GInsertSubvector &IS = cast<GInsertSubvector>(MI);
MachineRegisterInfo &MRI = *MIB.getMRI();
- Register Dst = MI.getOperand(0).getReg();
- Register Src1 = MI.getOperand(1).getReg();
- Register Src2 = MI.getOperand(2).getReg();
- uint64_t Idx = MI.getOperand(3).getImm();
+ Register Dst = IS.getOperand(0).getReg();
+ Register Src1 = IS.getBigVec();
+ Register Src2 = IS.getSubVec();
+ uint64_t Idx = IS.getIndexImm();
LLT BigTy = MRI.getType(Src1);
LLT LitTy = MRI.getType(Src2);
@@ -989,9 +989,7 @@ bool RISCVLegalizerInfo::legalizeInsertSubvector(MachineInstr &MI,
getMVTForLLT(BigTy), LitTyMVT, Idx, TRI);
RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(getMVTForLLT(LitTy));
- bool IsSubVecPartReg = SubVecLMUL == RISCVII::VLMUL::LMUL_F2 ||
- SubVecLMUL == RISCVII::VLMUL::LMUL_F4 ||
- SubVecLMUL == RISCVII::VLMUL::LMUL_F8;
+ bool IsSubVecPartReg = !RISCVVType::decodeVLMUL(SubVecLMUL).second;
// If the Idx has been completely eliminated and this subvector's size is a
// vector register or a multiple thereof, or the surrounding elements are
@@ -1059,7 +1057,7 @@ bool RISCVLegalizerInfo::legalizeInsertSubvector(MachineInstr &MI,
// If required, insert this subvector back into the correct vector register.
// This should resolve to an INSERT_SUBREG instruction.
if (TypeSize::isKnownGT(BigTy.getSizeInBits(), InterLitTy.getSizeInBits()))
- Inserted = MIB.buildInsert(BigTy, BigVec, LitVec, AlignedIdx);
+ Inserted = MIB.buildInsertSubvector(BigTy, BigVec, LitVec, AlignedIdx);
// We might have bitcast from a mask type: cast back to the original type if
// required.
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-insert-subvector.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-insert-subvector.mir
index a5f1228b8f8ca6..598c2e2a142cb4 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-insert-subvector.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-insert-subvector.mir
@@ -11,7 +11,6 @@ body: |
bb.0.entry:
; RV32-LABEL: name: insert_subvector_nxv2i1_nxv4i1
; RV32: [[DEF:%[0-9]+]]:_(<vscale x 4 x s1>) = G_IMPLICIT_DEF
- ; RV32-NEXT: [[DEF1:%[0-9]+]]:_(<vscale x 2 x s1>) = G_IMPLICIT_DEF
; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
@@ -19,56 +18,43 @@ body: |
; RV32-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32)
; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SELECT [[DEF]](<vscale x 4 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
- ; RV32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; RV32-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[C2]](s32)
- ; RV32-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[ANYEXT2]](s64)
- ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; RV32-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[C3]](s32)
- ; RV32-NEXT: [[SPLAT_VECTOR3:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[ANYEXT3]](s64)
- ; RV32-NEXT: [[SELECT1:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SELECT [[DEF1]](<vscale x 2 x s1>), [[SPLAT_VECTOR3]], [[SPLAT_VECTOR2]]
; RV32-NEXT: [[VMSET_VL:%[0-9]+]]:_(<vscale x 4 x s1>) = G_VMSET_VL $x0
; RV32-NEXT: [[READ_VLENB:%[0-9]+]]:_(s64) = G_READ_VLENB
- ; RV32-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
- ; RV32-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[READ_VLENB]], [[C4]](s64)
+ ; RV32-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; RV32-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[READ_VLENB]], [[C2]](s64)
; RV32-NEXT: [[READ_VLENB1:%[0-9]+]]:_(s64) = G_READ_VLENB
- ; RV32-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
- ; RV32-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[READ_VLENB1]], [[C5]](s64)
+ ; RV32-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; RV32-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[READ_VLENB1]], [[C3]](s64)
; RV32-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[LSHR1]], [[LSHR]]
- ; RV32-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_(<vscale x 4 x s8>) = G_VSLIDEUP_VL [[SELECT]], [[SELECT1]], [[LSHR1]](s64), [[VMSET_VL]](<vscale x 4 x s1>), [[ADD]](s64), 1
+ ; RV32-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_(<vscale x 4 x s8>) = G_VSLIDEUP_VL [[SELECT]], [[SELECT]], [[LSHR1]](s64), [[VMSET_VL]](<vscale x 4 x s1>), [[ADD]](s64), 0
; RV32-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 4 x s8>) = G_BITCAST [[VSLIDEUP_VL]](<vscale x 4 x s8>)
- ; RV32-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; RV32-NEXT: [[ANYEXT4:%[0-9]+]]:_(s64) = G_ANYEXT [[C6]](s32)
- ; RV32-NEXT: [[SPLAT_VECTOR4:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[ANYEXT4]](s64)
- ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 4 x s1>) = G_ICMP intpred(ne), [[BITCAST]](<vscale x 4 x s8>), [[SPLAT_VECTOR4]]
+ ; RV32-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[C4]](s32)
+ ; RV32-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[ANYEXT2]](s64)
+ ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 4 x s1>) = G_ICMP intpred(ne), [[BITCAST]](<vscale x 4 x s8>), [[SPLAT_VECTOR2]]
; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 4 x s1>)
; RV32-NEXT: PseudoRET implicit $v8
;
; RV64-LABEL: name: insert_subvector_nxv2i1_nxv4i1
; RV64: [[DEF:%[0-9]+]]:_(<vscale x 4 x s1>) = G_IMPLICIT_DEF
- ; RV64-NEXT: [[DEF1:%[0-9]+]]:_(<vscale x 2 x s1>) = G_IMPLICIT_DEF
; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[C]](s32)
; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[C1]](s32)
; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SELECT [[DEF]](<vscale x 4 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
- ; RV64-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; RV64-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[C2]](s32)
- ; RV64-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; RV64-NEXT: [[SPLAT_VECTOR3:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[C3]](s32)
- ; RV64-NEXT: [[SELECT1:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SELECT [[DEF1]](<vscale x 2 x s1>), [[SPLAT_VECTOR3]], [[SPLAT_VECTOR2]]
; RV64-NEXT: [[VMSET_VL:%[0-9]+]]:_(<vscale x 4 x s1>) = G_VMSET_VL $x0
; RV64-NEXT: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
- ; RV64-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
- ; RV64-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB]], [[C4]](s32)
+ ; RV64-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV64-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB]], [[C2]](s32)
; RV64-NEXT: [[READ_VLENB1:%[0-9]+]]:_(s32) = G_READ_VLENB
- ; RV64-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
- ; RV64-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB1]], [[C5]](s32)
+ ; RV64-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+ ; RV64-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB1]], [[C3]](s32)
; RV64-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LSHR1]], [[LSHR]]
- ; RV64-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_(<vscale x 4 x s8>) = G_VSLIDEUP_VL [[SELECT]], [[SELECT1]], [[LSHR1]](s32), [[VMSET_VL]](<vscale x 4 x s1>), [[ADD]](s32), 1
+ ; RV64-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_(<vscale x 4 x s8>) = G_VSLIDEUP_VL [[SELECT]], [[SELECT]], [[LSHR1]](s32), [[VMSET_VL]](<vscale x 4 x s1>), [[ADD]](s32), 0
; RV64-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 4 x s8>) = G_BITCAST [[VSLIDEUP_VL]](<vscale x 4 x s8>)
- ; RV64-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; RV64-NEXT: [[SPLAT_VECTOR4:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[C6]](s32)
- ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 4 x s1>) = G_ICMP intpred(ne), [[BITCAST]](<vscale x 4 x s8>), [[SPLAT_VECTOR4]]
+ ; RV64-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[C4]](s32)
+ ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 4 x s1>) = G_ICMP intpred(ne), [[BITCAST]](<vscale x 4 x s8>), [[SPLAT_VECTOR2]]
; RV64-NEXT: $v8 = COPY [[ICMP]](<vscale x 4 x s1>)
; RV64-NEXT: PseudoRET implicit $v8
%0:_(<vscale x 4 x s1>) = G_IMPLICIT_DEF
@@ -85,69 +71,38 @@ body: |
bb.0.entry:
; RV32-LABEL: name: insert_subvector_nxv4i1_nxv8i1
; RV32: [[DEF:%[0-9]+]]:_(<vscale x 8 x s1>) = G_IMPLICIT_DEF
- ; RV32-NEXT: [[DEF1:%[0-9]+]]:_(<vscale x 2 x s1>) = G_IMPLICIT_DEF
- ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
- ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
- ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; RV32-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32)
- ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
- ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SELECT [[DEF]](<vscale x 8 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
- ; RV32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; RV32-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[C2]](s32)
- ; RV32-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[ANYEXT2]](s64)
- ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; RV32-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[C3]](s32)
- ; RV32-NEXT: [[SPLAT_VECTOR3:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[ANYEXT3]](s64)
- ; RV32-NEXT: [[SELECT1:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SELECT [[DEF1]](<vscale x 2 x s1>), [[SPLAT_VECTOR3]], [[SPLAT_VECTOR2]]
- ; RV32-NEXT: [[VMSET_VL:%[0-9]+]]:_(<vscale x 8 x s1>) = G_VMSET_VL $x0
+ ; RV32-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 1 x s8>) = G_BITCAST [[DEF]](<vscale x 8 x s1>)
+ ; RV32-NEXT: [[VMSET_VL:%[0-9]+]]:_(<vscale x 1 x s1>) = G_VMSET_VL $x0
; RV32-NEXT: [[READ_VLENB:%[0-9]+]]:_(s64) = G_READ_VLENB
- ; RV32-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
- ; RV32-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[READ_VLENB]], [[C4]](s64)
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+ ; RV32-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[READ_VLENB]], [[C]](s64)
; RV32-NEXT: [[READ_VLENB1:%[0-9]+]]:_(s64) = G_READ_VLENB
- ; RV32-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
- ; RV32-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[READ_VLENB1]], [[C5]](s64)
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+ ; RV32-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[READ_VLENB1]], [[C1]](s64)
; RV32-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[LSHR1]], [[LSHR]]
- ; RV32-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_(<vscale x 8 x s8>) = G_VSLIDEUP_VL [[SELECT]], [[SELECT1]], [[LSHR1]](s64), [[VMSET_VL]](<vscale x 8 x s1>), [[ADD]](s64), 0
- ; RV32-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 8 x s8>) = G_BITCAST [[VSLIDEUP_VL]](<vscale x 8 x s8>)
- ; RV32-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; RV32-NEXT: [[ANYEXT4:%[0-9]+]]:_(s64) = G_ANYEXT [[C6]](s32)
- ; RV32-NEXT: [[SPLAT_VECTOR4:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR [[ANYEXT4]](s64)
- ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 8 x s1>) = G_ICMP intpred(ne), [[BITCAST]](<vscale x 8 x s8>), [[SPLAT_VECTOR4]]
- ; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 8 x s1>)
+ ; RV32-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_(<vscale x 1 x s8>) = G_VSLIDEUP_VL [[DEF]], [[BITCAST]], [[LSHR1]](s64), [[VMSET_VL]](<vscale x 1 x s1>), [[ADD]](s64), 0
+ ; RV32-NEXT: [[BITCAST1:%[0-9]+]]:_(<vscale x 8 x s1>) = G_BITCAST [[VSLIDEUP_VL]](<vscale x 1 x s8>)
+ ; RV32-NEXT: $v8 = COPY [[BITCAST1]](<vscale x 8 x s1>)
; RV32-NEXT: PseudoRET implicit $v8
;
; RV64-LABEL: name: insert_subvector_nxv4i1_nxv8i1
; RV64: [[DEF:%[0-9]+]]:_(<vscale x 8 x s1>) = G_IMPLICIT_DEF
- ; RV64-NEXT: [[DEF1:%[0-9]+]]:_(<vscale x 2 x s1>) = G_IMPLICIT_DEF
- ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR [[C]](s32)
- ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR [[C1]](s32)
- ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SELECT [[DEF]](<vscale x 8 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
- ; RV64-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; RV64-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[C2]](s32)
- ; RV64-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; RV64-NEXT: [[SPLAT_VECTOR3:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[C3]](s32)
- ; RV64-NEXT: [[SELECT1:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SELECT [[DEF1]](<vscale x 2 x s1>), [[SPLAT_VECTOR3]], [[SPLAT_VECTOR2]]
- ; RV64-NEXT: [[VMSET_VL:%[0-9]+]]:_(<vscale x 8 x s1>) = G_VMSET_VL $x0
+ ; RV64-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 1 x s8>) = G_BITCAST [[DEF]](<vscale x 8 x s1>)
+ ; RV64-NEXT: [[VMSET_VL:%[0-9]+]]:_(<vscale x 1 x s1>) = G_VMSET_VL $x0
; RV64-NEXT: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
- ; RV64-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
- ; RV64-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB]], [[C4]](s32)
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+ ; RV64-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB]], [[C]](s32)
; RV64-NEXT: [[READ_VLENB1:%[0-9]+]]:_(s32) = G_READ_VLENB
- ; RV64-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
- ; RV64-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB1]], [[C5]](s32)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+ ; RV64-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB1]], [[C1]](s32)
; RV64-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LSHR1]], [[LSHR]]
- ; RV64-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_(<vscale x 8 x s8>) = G_VSLIDEUP_VL [[SELECT]], [[SELECT1]], [[LSHR1]](s32), [[VMSET_VL]](<vscale x 8 x s1>), [[ADD]](s32), 0
- ; RV64-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 8 x s8>) = G_BITCAST [[VSLIDEUP_VL]](<vscale x 8 x s8>)
- ; RV64-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; RV64-NEXT: [[SPLAT_VECTOR4:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR [[C6]](s32)
- ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 8 x s1>) = G_ICMP intpred(ne), [[BITCAST]](<vscale x 8 x s8>), [[SPLAT_VECTOR4]]
- ; RV64-NEXT: $v8 = COPY [[ICMP]](<vscale x 8 x s1>)
+ ; RV64-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_(<vscale x 1 x s8>) = G_VSLIDEUP_VL [[DEF]], [[BITCAST]], [[LSHR1]](s32), [[VMSET_VL]](<vscale x 1 x s1>), [[ADD]](s32), 0
+ ; RV64-NEXT: [[BITCAST1:%[0-9]+]]:_(<vscale x 8 x s1>) = G_BITCAST [[VSLIDEUP_VL]](<vscale x 1 x s8>)
+ ; RV64-NEXT: $v8 = COPY [[BITCAST1]](<vscale x 8 x s1>)
; RV64-NEXT: PseudoRET implicit $v8
%0:_(<vscale x 8 x s1>) = G_IMPLICIT_DEF
%1:_(<vscale x 2 x s1>) = G_IMPLICIT_DEF
- %2:_(<vscale x 8 x s1>) = G_INSERT_SUBVECTOR %0(<vscale x 8 x s1>), %1, 2
+ %2:_(<vscale x 8 x s1>) = G_INSERT_SUBVECTOR %0(<vscale x 8 x s1>), %1, 8
$v8 = COPY %2(<vscale x 8 x s1>)
PseudoRET implicit $v8
...
@@ -159,40 +114,34 @@ body: |
bb.0.entry:
; RV32-LABEL: name: insert_subvector_nxv32i1_nxv64i1
; RV32: [[DEF:%[0-9]+]]:_(<vscale x 64 x s1>) = G_IMPLICIT_DEF
- ; RV32-NEXT: [[DEF1:%[0-9]+]]:_(<vscale x 32 x s1>) = G_IMPLICIT_DEF
- ; RV32-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 4 x s8>) = G_BITCAST [[DEF1]](<vscale x 32 x s1>)
+ ; RV32-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 8 x s8>) = G_BITCAST [[DEF]](<vscale x 64 x s1>)
; RV32-NEXT: [[VMSET_VL:%[0-9]+]]:_(<vscale x 8 x s1>) = G_VMSET_VL $x0
; RV32-NEXT: [[READ_VLENB:%[0-9]+]]:_(s64) = G_READ_VLENB
- ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
- ; RV32-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[READ_VLENB]], [[C]](s64)
; RV32-NEXT: [[READ_VLENB1:%[0-9]+]]:_(s64) = G_READ_VLENB
- ; RV32-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
- ; RV32-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[READ_VLENB1]], [[C1]](s64)
- ; RV32-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[LSHR1]], [[LSHR]]
- ; RV32-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_(<vscale x 8 x s8>) = G_VSLIDEUP_VL [[DEF]], [[BITCAST]], [[LSHR1]](s64), [[VMSET_VL]](<vscale x 8 x s1>), [[ADD]](s64), 0
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; RV32-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[READ_VLENB1]], [[C]](s64)
+ ; RV32-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[LSHR]], [[READ_VLENB]]
+ ; RV32-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_(<vscale x 8 x s8>) = G_VSLIDEUP_VL [[DEF]], [[BITCAST]], [[LSHR]](s64), [[VMSET_VL]](<vscale x 8 x s1>), [[ADD]](s64), 0
; RV32-NEXT: [[BITCAST1:%[0-9]+]]:_(<vscale x 64 x s1>) = G_BITCAST [[VSLIDEUP_VL]](<vscale x 8 x s8>)
; RV32-NEXT: $v8 = COPY [[BITCAST1]](<vscale x 64 x s1>)
; RV32-NEXT: PseudoRET implicit $v8
;
; RV64-LABEL: name: insert_subvector_nxv32i1_nxv64i1
; RV64: [[DEF:%[0-9]+]]:_(<vscale x 64 x s1>) = G_IMPLICIT_DEF
- ; RV64-NEXT: [[DEF1:%[0-9]+]]:_(<vscale x 32 x s1>) = G_IMPLICIT_DEF
- ; RV64-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 4 x s8>) = G_BITCAST [[DEF1]](<vscale x 32 x s1>)
+ ; RV64-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 8 x s8>) = G_BITCAST [[DEF]](<vscale x 64 x s1>)
; RV64-NEXT: [[VMSET_VL:%[0-9]+]]:_(<vscale x 8 x s1>) = G_VMSET_VL $x0
; RV64-NEXT: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
- ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; RV64-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB]], [[C]](s32)
; RV64-NEXT: [[READ_VLENB1:%[0-9]+]]:_(s32) = G_READ_VLENB
- ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
- ; RV64-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB1]], [[C1]](s32)
- ; RV64-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LSHR1]], [[LSHR]]
- ; RV64-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_(<vscale x 8 x s8>) = G_VSLIDEUP_VL [[DEF]], [[BITCAST]], [[LSHR1]](s32), [[VMSET_VL]](<vscale x 8 x s1>), [[ADD]](s32), 0
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV64-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB1]], [[C]](s32)
+ ; RV64-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LSHR]], [[READ_VLENB]]
+ ; RV64-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_(<vscale x 8 x s8>) = G_VSLIDEUP_VL [[DEF]], [[BITCAST]], [[LSHR]](s32), [[VMSET_VL]](<vscale x 8 x s1>), [[ADD]](s32), 0
; RV64-NEXT: [[BITCAST1:%[0-9]+]]:_(<vscale x 64 x s1>) = G_BITCAST [[VSLIDEUP_VL]](<vscale x 8 x s8>)
; RV64-NEXT: $v8 = COPY [[BITCAST1]](<vscale x 64 x s1>)
; RV64-NEXT: PseudoRET implicit $v8
%0:_(<vscale x 64 x s1>) = G_IMPLICIT_DEF
%1:_(<vscale x 32 x s1>) = G_IMPLICIT_DEF
- %2:_(<vscale x 64 x s1>) = G_INSERT_SUBVECTOR %0(<vscale x 64 x s1>), %1, 16
+ %2:_(<vscale x 64 x s1>) = G_INSERT_SUBVECTOR %0(<vscale x 64 x s1>), %1, 32
$v8 = COPY %2(<vscale x 64 x s1>)
PseudoRET implicit $v8
...
@@ -253,7 +202,7 @@ body: |
PseudoRET implicit $v8
...
-# Inserrt with zero index
+# Insert with zero index
---
name: insert_subvector_nxv1i8_nxv2i8_zero
legalized: false
@@ -327,22 +276,45 @@ body: |
PseudoRET implicit $v8
...
-# Extract with non-zero index
+# Insert with non-zero index
---
name: insert_subvector_nxv1i8_nxv2i8
legalized: false
tracksRegLiveness: true
body: |
bb.0.entry:
- ; CHECK-LABEL: name: insert_subvector_nxv1i8_nxv2i8
- ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 2 x s8>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<vscale x 1 x s8>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[INSERT_SUBVECTOR:%[0-9]+]]:_(<vscale x 2 x s8>) = G_INSERT_SUBVECTOR [[DEF]], [[DEF1]](<vscale x 1 x s8>), 0
- ; CHECK-NEXT: $v8 = COPY [[INSERT_SUBVECTOR]](<vscale x 2 x s8>)
- ; CHECK-NEXT: PseudoRET implicit $v8
+ ; RV32-LABEL: name: insert_subvector_nxv1i8_nxv2i8
+ ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 2 x s8>) = G_IMPLICIT_DEF
+ ; RV32-NEXT: [[VMSET_VL:%[0-9]+]]:_(<vscale x 2 x s1>) = G_VMSET_VL $x0
+ ; RV32-NEXT: [[READ_VLENB:%[0-9]+]]:_(s64) = G_READ_VLENB
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; RV32-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[READ_VLENB]], [[C]](s64)
+ ; RV32-NEXT: [[READ_VLENB1:%[0-9]+]]:_(s64) = G_READ_VLENB
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+ ; RV32-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[READ_VLENB1]], [[C1]](s64)
+ ; RV32-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[LSHR1]], [[LSHR]]
+ ; RV32-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_(<vscale x 2 x s8>) = G_VSLIDEUP_VL [[DEF]], [[DEF]], [[LSHR1]](s64), [[VMSET_VL]](<vscale x 2 x s1>), [[ADD]](s64), 0
+ ; RV32-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 2 x s8>) = G_BITCAST [[VSLIDEUP_VL]](<vscale x 2 x s8>)
+ ; RV32-NEXT: $v8 = COPY [[BITCAST]](<vscale x 2 x s8>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: insert_subvector_nxv1i8_nxv2i8
+ ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 2 x s8>) = G_IMPLICIT_DEF
+ ; RV64-NEXT: [[VMSET_VL:%[0-9]+]]:_(<vscale x 2 x s1>) = G_VMSET_VL $x0
+ ; RV64-NEXT: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+ ; RV64-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB]], [[C]](s32)
+ ; RV64-NEXT: [[READ_VLENB1:%[0-9]+]]:_(s32) = G_READ_VLENB
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+ ; RV64-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB1]], [[C1]](s32)
+ ; RV64-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LSHR1]], [[LSHR]]
+ ; RV64-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_(<vscale x 2 x s8>) = G_VSLIDEUP_VL [[DEF]], [[DEF]], [[LSHR1]](s32), [[VMSET_VL]](<vscale x 2 x s1>), [[ADD]](s32), 0
+ ; RV64-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 2 x s8>) = G_BITCAST [[VSLIDEUP_VL]](<vscale x 2 x s8>)
+ ; RV64-NEXT: $v8 = COPY [[BITCAST]](<vscale x 2 x s8>)
+ ; RV64-NEXT: PseudoRET implicit $v8
%0:_(<vscale x 2 x s8>) = G_IMPLICIT_DEF
%1:_(<vscale x 1 x s8>) = G_IMPLICIT_DEF
- %2:_(<vscale x 2 x s8>) = G_INSERT_SUBVECTOR %0(<vscale x 2 x s8>), %1, 0
+ %2:_(<vscale x 2 x s8>) = G_INSERT_SUBVECTOR %0(<vscale x 2 x s8>), %1, 1
$v8 = COPY %2(<vscale x 2 x s8>)
PseudoRET implicit $v8
...
@@ -352,15 +324,38 @@ legalized: false
tracksRegLiveness: true
body: |
bb.0.entry:
- ; CHECK-LABEL: name: insert_subvector_nxv2i16_nxv4i16
- ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 4 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<vscale x 1 x s16>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[INSERT_SUBVECTOR:%[0-9]+]]:_(<vscale x 4 x s16>) = G_INSERT_SUBVECTOR [[DEF]], [[DEF1]](<vscale x 1 x s16>), 0
- ; CHECK-NEXT: $v8 = COPY [[INSERT_SUBVECTOR]](<vscale x 4 x s16>)
- ; CHECK-NEXT: PseudoRET implicit $v8
+ ; RV32-LABEL: name: insert_subvector_nxv2i16_nxv4i16
+ ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 4 x s16>) = G_IMPLICIT_DEF
+ ; RV32-NEXT: [[VMSET_VL:%[0-9]+]]:_(<vscale x 4 x s1>) = G_VMSET_VL $x0
+ ; RV32-NEXT: [[READ_VLENB:%[0-9]+]]:_(s64) = G_READ_VLENB
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; RV32-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[READ_VLENB]], [[C]](s64)
+ ; RV32-NEXT: [[READ_VLENB1:%[0-9]+]]:_(s64) = G_READ_VLENB
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+ ; RV32-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[READ_VLENB1]], [[C1]](s64)
+ ; RV32-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[LSHR1]], [[LSHR]]
+ ; RV32-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_(<vscale x 4 x s16>) = G_VSLIDEUP_VL [[DEF]], [[DEF]], [[LSHR1]](s64), [[VMSET_VL]](<vscale x 4 x s1>), [[ADD]](s64), 0
+ ; RV32-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 4 x s16>) = G_BITCAST [[VSLIDEUP_VL]](<vscale x 4 x s16>)
+ ; RV32-NEXT: $v8 = COPY [[BITCAST]](<vscale x 4 x s16>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: insert_subvector_nxv2i16_nxv4i16
+ ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 4 x s16>) = G_IMPLICIT_DEF
+ ; RV64-NEXT: [[VMSET_VL:%[0-9]+]]:_(<vscale x 4 x s1>) = G_VMSET_VL $x0
+ ; RV64-NEXT: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV64-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB]], [[C]](s32)
+ ; RV64-NEXT: [[READ_VLENB1:%[0-9]+]]:_(s32) = G_READ_VLENB
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+ ; RV64-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB1]], [[C1]](s32)
+ ; RV64-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LSHR1]], [[LSHR]]
+ ; RV64-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_(<vscale x 4 x s16>) = G_VSLIDEUP_VL [[DEF]], [[DEF]], [[LSHR1]](s32), [[VMSET_VL]](<vscale x 4 x s1>), [[ADD]](s32), 0
+ ; RV64-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 4 x s16>) = G_BITCAST [[VSLIDEUP_VL]](<vscale x 4 x s16>)
+ ; RV64-NEXT: $v8 = COPY [[BITCAST]](<vscale x 4 x s16>)
+ ; RV64-NEXT: PseudoRET implicit $v8
%0:_(<vscale x 4 x s16>) = G_IMPLICIT_DEF
%1:_(<vscale x 1 x s16>) = G_IMPLICIT_DEF
- %2:_(<vscale x 4 x s16>) = G_INSERT_SUBVECTOR %0(<vscale x 4 x s16>), %1, 0
+ %2:_(<vscale x 4 x s16>) = G_INSERT_SUBVECTOR %0(<vscale x 4 x s16>), %1, 1
$v8 = COPY %2(<vscale x 4 x s16>)
PseudoRET implicit $v8
...
@@ -370,15 +365,34 @@ legalized: false
tracksRegLiveness: true
body: |
bb.0.entry:
- ; CHECK-LABEL: name: insert_subvector_nxv4i32_nxv8i32
- ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 8 x s32>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<vscale x 4 x s32>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[INSERT_SUBVECTOR:%[0-9]+]]:_(<vscale x 8 x s32>) = G_INSERT_SUBVECTOR [[DEF]], [[DEF1]](<vscale x 4 x s32>), 0
- ; CHECK-NEXT: $v8 = COPY [[INSERT_SUBVECTOR]](<vscale x 8 x s32>)
- ; CHECK-NEXT: PseudoRET implicit $v8
+ ; RV32-LABEL: name: insert_subvector_nxv4i32_nxv8i32
+ ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 8 x s32>) = G_IMPLICIT_DEF
+ ; RV32-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_(<vscale x 2 x s32>) = G_EXTRACT_SUBVECTOR [[DEF]], 0
+ ; RV32-NEXT: [[VMSET_VL:%[0-9]+]]:_(<vscale x 8 x s1>) = G_VMSET_VL $x0
+ ; RV32-NEXT: [[READ_VLENB:%[0-9]+]]:_(s64) = G_READ_VLENB
+ ; RV32-NEXT: [[READ_VLENB1:%[0-9]+]]:_(s64) = G_READ_VLENB
+ ; RV32-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[READ_VLENB1]], [[READ_VLENB]]
+ ; RV32-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_(<vscale x 2 x s32>) = G_VSLIDEUP_VL [[EXTRACT_SUBVECTOR]], [[DEF]], [[READ_VLENB1]](s64), [[VMSET_VL]](<vscale x 8 x s1>), [[ADD]](s64), 0
+ ; RV32-NEXT: [[INSERT_SUBVECTOR:%[0-9]+]]:_(<vscale x 8 x s32>) = G_INSERT_SUBVECTOR [[DEF]], [[DEF]](<vscale x 8 x s32>), 0
+ ; RV32-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 8 x s32>) = G_BITCAST [[INSERT_SUBVECTOR]](<vscale x 8 x s32>)
+ ; RV32-NEXT: $v8 = COPY [[BITCAST]](<vscale x 8 x s32>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: insert_subvector_nxv4i32_nxv8i32
+ ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 8 x s32>) = G_IMPLICIT_DEF
+ ; RV64-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_(<vscale x 2 x s32>) = G_EXTRACT_SUBVECTOR [[DEF]], 0
+ ; RV64-NEXT: [[VMSET_VL:%[0-9]+]]:_(<vscale x 8 x s1>) = G_VMSET_VL $x0
+ ; RV64-NEXT: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
+ ; RV64-NEXT: [[READ_VLENB1:%[0-9]+]]:_(s32) = G_READ_VLENB
+ ; RV64-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[READ_VLENB1]], [[READ_VLENB]]
+ ; RV64-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_(<vscale x 2 x s32>) = G_VSLIDEUP_VL [[EXTRACT_SUBVECTOR]], [[DEF]], [[READ_VLENB1]](s32), [[VMSET_VL]](<vscale x 8 x s1>), [[ADD]](s32), 0
+ ; RV64-NEXT: [[INSERT_SUBVECTOR:%[0-9]+]]:_(<vscale x 8 x s32>) = G_INSERT_SUBVECTOR [[DEF]], [[DEF]](<vscale x 8 x s32>), 0
+ ; RV64-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 8 x s32>) = G_BITCAST [[INSERT_SUBVECTOR]](<vscale x 8 x s32>)
+ ; RV64-NEXT: $v8 = COPY [[BITCAST]](<vscale x 8 x s32>)
+ ; RV64-NEXT: PseudoRET implicit $v8
%0:_(<vscale x 8 x s32>) = G_IMPLICIT_DEF
%1:_(<vscale x 4 x s32>) = G_IMPLICIT_DEF
- %2:_(<vscale x 8 x s32>) = G_INSERT_SUBVECTOR %0(<vscale x 8 x s32>), %1, 0
+ %2:_(<vscale x 8 x s32>) = G_INSERT_SUBVECTOR %0(<vscale x 8 x s32>), %1, 8
$v8 = COPY %2(<vscale x 8 x s32>)
PseudoRET implicit $v8
...
@@ -388,15 +402,38 @@ legalized: false
tracksRegLiveness: true
body: |
bb.0.entry:
- ; CHECK-LABEL: name: insert_subvector_nxv2i64_nxv8i64
- ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 8 x s64>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<vscale x 1 x s64>) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[INSERT_SUBVECTOR:%[0-9]+]]:_(<vscale x 8 x s64>) = G_INSERT_SUBVECTOR [[DEF]], [[DEF1]](<vscale x 1 x s64>), 0
- ; CHECK-NEXT: $v8 = COPY [[INSERT_SUBVECTOR]](<vscale x 8 x s64>)
- ; CHECK-NEXT: PseudoRET implicit $v8
+ ; RV32-LABEL: name: insert_subvector_nxv2i64_nxv8i64
+ ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 8 x s64>) = G_IMPLICIT_DEF
+ ; RV32-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_(<vscale x 1 x s64>) = G_EXTRACT_SUBVECTOR [[DEF]], 0
+ ; RV32-NEXT: [[VMSET_VL:%[0-9]+]]:_(<vscale x 8 x s1>) = G_VMSET_VL $x0
+ ; RV32-NEXT: [[READ_VLENB:%[0-9]+]]:_(s64) = G_READ_VLENB
+ ; RV32-NEXT: [[READ_VLENB1:%[0-9]+]]:_(s64) = G_READ_VLENB
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; RV32-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[READ_VLENB1]], [[C]](s64)
+ ; RV32-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[LSHR]], [[READ_VLENB]]
+ ; RV32-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_(<vscale x 1 x s64>) = G_VSLIDEUP_VL [[EXTRACT_SUBVECTOR]], [[DEF]], [[LSHR]](s64), [[VMSET_VL]](<vscale x 8 x s1>), [[ADD]](s64), 0
+ ; RV32-NEXT: [[INSERT_SUBVECTOR:%[0-9]+]]:_(<vscale x 8 x s64>) = G_INSERT_SUBVECTOR [[DEF]], [[DEF]](<vscale x 8 x s64>), 0
+ ; RV32-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 8 x s64>) = G_BITCAST [[INSERT_SUBVECTOR]](<vscale x 8 x s64>)
+ ; RV32-NEXT: $v8 = COPY [[BITCAST]](<vscale x 8 x s64>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: insert_subvector_nxv2i64_nxv8i64
+ ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 8 x s64>) = G_IMPLICIT_DEF
+ ; RV64-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_(<vscale x 1 x s64>) = G_EXTRACT_SUBVECTOR [[DEF]], 0
+ ; RV64-NEXT: [[VMSET_VL:%[0-9]+]]:_(<vscale x 8 x s1>) = G_VMSET_VL $x0
+ ; RV64-NEXT: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
+ ; RV64-NEXT: [[READ_VLENB1:%[0-9]+]]:_(s32) = G_READ_VLENB
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV64-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB1]], [[C]](s32)
+ ; RV64-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LSHR]], [[READ_VLENB]]
+ ; RV64-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_(<vscale x 1 x s64>) = G_VSLIDEUP_VL [[EXTRACT_SUBVECTOR]], [[DEF]], [[LSHR]](s32), [[VMSET_VL]](<vscale x 8 x s1>), [[ADD]](s32), 0
+ ; RV64-NEXT: [[INSERT_SUBVECTOR:%[0-9]+]]:_(<vscale x 8 x s64>) = G_INSERT_SUBVECTOR [[DEF]], [[DEF]](<vscale x 8 x s64>), 0
+ ; RV64-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 8 x s64>) = G_BITCAST [[INSERT_SUBVECTOR]](<vscale x 8 x s64>)
+ ; RV64-NEXT: $v8 = COPY [[BITCAST]](<vscale x 8 x s64>)
+ ; RV64-NEXT: PseudoRET implicit $v8
%0:_(<vscale x 8 x s64>) = G_IMPLICIT_DEF
%1:_(<vscale x 1 x s64>) = G_IMPLICIT_DEF
- %2:_(<vscale x 8 x s64>) = G_INSERT_SUBVECTOR %0(<vscale x 8 x s64>), %1, 0
+ %2:_(<vscale x 8 x s64>) = G_INSERT_SUBVECTOR %0(<vscale x 8 x s64>), %1, 4
$v8 = COPY %2(<vscale x 8 x s64>)
PseudoRET implicit $v8
...
>From b5b1a77cd115a639b0f9e8fcd432de4f45e68fe1 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Mon, 16 Sep 2024 12:10:10 -0700
Subject: [PATCH 4/7] fixup! use getReg
---
llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
index 14d7c78e80e47f..b1bb4ce273dd51 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
@@ -935,7 +935,7 @@ bool RISCVLegalizerInfo::legalizeInsertSubvector(MachineInstr &MI,
MachineRegisterInfo &MRI = *MIB.getMRI();
- Register Dst = IS.getOperand(0).getReg();
+ Register Dst = IS.getReg(0);
Register Src1 = IS.getBigVec();
Register Src2 = IS.getSubVec();
uint64_t Idx = IS.getIndexImm();
>From 8e717a2f6f44dbbd07eeff788a90a42e8f04d469 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Tue, 17 Sep 2024 10:13:25 -0700
Subject: [PATCH 5/7] fixup! move to postlegalize lowering
---
.../Target/RISCV/GISel/RISCVLegalizerInfo.cpp | 152 +-------
.../Target/RISCV/GISel/RISCVLegalizerInfo.h | 1 -
.../GISel/RISCVPostLegalizerLowering.cpp | 178 +++++++++
llvm/lib/Target/RISCV/RISCVCombine.td | 8 +-
.../rvv/legalize-insert-subvector.mir | 260 ++-----------
.../rvv/insert-subvector.mir | 368 ++++++++++++++++++
6 files changed, 597 insertions(+), 370 deletions(-)
create mode 100644 llvm/test/CodeGen/RISCV/GlobalISel/postlegalizer-lowering/rvv/insert-subvector.mir
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
index b1bb4ce273dd51..d03a0939ab1af3 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
@@ -582,9 +582,9 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
SplatActions.clampScalar(1, sXLen, sXLen);
getActionDefinitionsBuilder(G_INSERT_SUBVECTOR)
- .customIf(all(typeIsLegalBoolVec(0, BoolVecTys, ST),
+ .legalIf(all(typeIsLegalBoolVec(0, BoolVecTys, ST),
typeIsLegalBoolVec(1, BoolVecTys, ST)))
- .customIf(all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST),
+ .legalIf(all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST),
typeIsLegalIntOrFPVec(1, IntOrFPVecTys, ST)));
getLegacyLegalizerInfo().computeTables();
@@ -921,152 +921,6 @@ bool RISCVLegalizerInfo::legalizeSplatVector(MachineInstr &MI,
return true;
}
-static LLT getLMUL1Ty(LLT VecTy) {
- assert(VecTy.getElementType().getSizeInBits() <= 64 &&
- "Unexpected vector LLT");
- return LLT::scalable_vector(RISCV::RVVBitsPerBlock /
- VecTy.getElementType().getSizeInBits(),
- VecTy.getElementType());
-}
-
-bool RISCVLegalizerInfo::legalizeInsertSubvector(MachineInstr &MI,
- MachineIRBuilder &MIB) const {
- GInsertSubvector &IS = cast<GInsertSubvector>(MI);
-
- MachineRegisterInfo &MRI = *MIB.getMRI();
-
- Register Dst = IS.getReg(0);
- Register Src1 = IS.getBigVec();
- Register Src2 = IS.getSubVec();
- uint64_t Idx = IS.getIndexImm();
-
- LLT BigTy = MRI.getType(Src1);
- LLT LitTy = MRI.getType(Src2);
- Register BigVec = Src1;
- Register LitVec = Src2;
-
- // We don't have the ability to slide mask vectors up indexed by their i1
- // elements; the smallest we can do is i8. Often we are able to bitcast to
- // equivalent i8 vectors. Otherwise, we can must zeroextend to equivalent i8
- // vectors and truncate down after the insert.
- if (LitTy.getElementType() == LLT::scalar(1) &&
- (Idx != 0 ||
- MRI.getVRegDef(BigVec)->getOpcode() != TargetOpcode::G_IMPLICIT_DEF)) {
- auto BigTyMinElts = BigTy.getElementCount().getKnownMinValue();
- auto LitTyMinElts = LitTy.getElementCount().getKnownMinValue();
- if (BigTyMinElts >= 8 && LitTyMinElts >= 8) {
- assert(Idx % 8 == 0 && "Invalid index");
- assert(BigTyMinElts % 8 == 0 && LitTyMinElts % 8 == 0 &&
- "Unexpected mask vector lowering");
- Idx /= 8;
- BigTy = LLT::vector(BigTy.getElementCount().divideCoefficientBy(8), 8);
- LitTy = LLT::vector(LitTy.getElementCount().divideCoefficientBy(8), 8);
- BigVec = MIB.buildBitcast(BigTy, BigVec).getReg(0);
- LitVec = MIB.buildBitcast(LitTy, LitVec).getReg(0);
- } else {
- // We can't slide this mask vector up indexed by its i1 elements.
- // This poses a problem when we wish to insert a scalable vector which
- // can't be re-expressed as a larger type. Just choose the slow path and
- // extend to a larger type, then truncate back down.
- LLT ExtBigTy = BigTy.changeElementType(LLT::scalar(8));
- LLT ExtLitTy = LitTy.changeElementType(LLT::scalar(8));
- auto BigZExt = MIB.buildZExt(ExtBigTy, BigVec);
- auto LitZExt = MIB.buildZExt(ExtLitTy, LitVec);
- auto Insert = MIB.buildInsertSubvector(ExtBigTy, BigZExt, LitZExt, Idx);
- auto SplatZero = MIB.buildSplatVector(
- ExtBigTy, MIB.buildConstant(ExtBigTy.getElementType(), 0));
- MIB.buildICmp(CmpInst::Predicate::ICMP_NE, Dst, Insert, SplatZero);
- MI.eraseFromParent();
- return true;
- }
- }
-
- const RISCVRegisterInfo *TRI = STI.getRegisterInfo();
- MVT LitTyMVT = getMVTForLLT(LitTy);
- unsigned SubRegIdx, RemIdx;
- std::tie(SubRegIdx, RemIdx) =
- RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
- getMVTForLLT(BigTy), LitTyMVT, Idx, TRI);
-
- RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(getMVTForLLT(LitTy));
- bool IsSubVecPartReg = !RISCVVType::decodeVLMUL(SubVecLMUL).second;
-
- // If the Idx has been completely eliminated and this subvector's size is a
- // vector register or a multiple thereof, or the surrounding elements are
- // undef, then this is a subvector insert which naturally aligns to a vector
- // register. These can easily be handled using subregister manipulation.
- if (RemIdx == 0 && (!IsSubVecPartReg || MRI.getVRegDef(Src1)->getOpcode() ==
- TargetOpcode::G_IMPLICIT_DEF))
- return true;
-
- // If the subvector is smaller than a vector register, then the insertion
- // must preserve the undisturbed elements of the register. We do this by
- // lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type
- // (which resolves to a subregister copy), performing a VSLIDEUP to place the
- // subvector within the vector register, and an INSERT_SUBVECTOR of that
- // LMUL=1 type back into the larger vector (resolving to another subregister
- // operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type
- // to avoid allocating a large register group to hold our subvector.
-
- // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
- // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
- // (in our case undisturbed). This means we can set up a subvector insertion
- // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
- // size of the subvector.
- const LLT XLenTy(STI.getXLenVT());
- LLT InterLitTy = BigTy;
- Register AlignedExtract = Src1;
- unsigned AlignedIdx = Idx - RemIdx;
- if (TypeSize::isKnownGT(BigTy.getSizeInBits(),
- getLMUL1Ty(BigTy).getSizeInBits())) {
- InterLitTy = getLMUL1Ty(BigTy);
- // Extract a subvector equal to the nearest full vector register type. This
- // should resolve to a G_EXTRACT on a subreg.
- AlignedExtract =
- MIB.buildExtractSubvector(InterLitTy, BigVec, AlignedIdx).getReg(0);
- }
-
- auto Insert = MIB.buildInsertSubvector(InterLitTy, MIB.buildUndef(InterLitTy),
- LitVec, 0);
-
- auto [Mask, _] = buildDefaultVLOps(BigTy, MIB, MRI);
- auto VL = MIB.buildVScale(XLenTy, LitTy.getElementCount().getKnownMinValue());
-
- // Use tail agnostic policy if we're inserting over InterLitTy's tail.
- ElementCount EndIndex =
- ElementCount::getScalable(RemIdx) + LitTy.getElementCount();
- uint64_t Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED;
- if (EndIndex == InterLitTy.getElementCount())
- Policy = RISCVII::TAIL_AGNOSTIC;
-
- // If we're inserting into the lowest elements, use a tail undisturbed
- // vmv.v.v.
- MachineInstrBuilder Inserted;
- if (RemIdx == 0) {
- Inserted = MIB.buildInstr(RISCV::G_VMV_V_V_VL, {InterLitTy},
- {AlignedExtract, Insert, VL});
- } else {
- auto SlideupAmt = MIB.buildVScale(XLenTy, RemIdx);
- // Construct the vector length corresponding to RemIdx + length(LitTy).
- VL = MIB.buildAdd(XLenTy, SlideupAmt, VL);
- Inserted =
- MIB.buildInstr(RISCV::G_VSLIDEUP_VL, {InterLitTy},
- {AlignedExtract, LitVec, SlideupAmt, Mask, VL, Policy});
- }
-
- // If required, insert this subvector back into the correct vector register.
- // This should resolve to an INSERT_SUBREG instruction.
- if (TypeSize::isKnownGT(BigTy.getSizeInBits(), InterLitTy.getSizeInBits()))
- Inserted = MIB.buildInsertSubvector(BigTy, BigVec, LitVec, AlignedIdx);
-
- // We might have bitcast from a mask type: cast back to the original type if
- // required.
- MIB.buildBitcast(Dst, Inserted);
-
- MI.eraseFromParent();
- return true;
-}
-
bool RISCVLegalizerInfo::legalizeCustom(
LegalizerHelper &Helper, MachineInstr &MI,
LostDebugLocObserver &LocObserver) const {
@@ -1137,8 +991,6 @@ bool RISCVLegalizerInfo::legalizeCustom(
return legalizeExt(MI, MIRBuilder);
case TargetOpcode::G_SPLAT_VECTOR:
return legalizeSplatVector(MI, MIRBuilder);
- case TargetOpcode::G_INSERT_SUBVECTOR:
- return legalizeInsertSubvector(MI, MIRBuilder);
case TargetOpcode::G_LOAD:
case TargetOpcode::G_STORE:
return legalizeLoadStore(MI, Helper, MIRBuilder);
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h
index ccd8ac9fe4ec90..2fc28615e7630d 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h
+++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h
@@ -46,7 +46,6 @@ class RISCVLegalizerInfo : public LegalizerInfo {
bool legalizeVScale(MachineInstr &MI, MachineIRBuilder &MIB) const;
bool legalizeExt(MachineInstr &MI, MachineIRBuilder &MIRBuilder) const;
bool legalizeSplatVector(MachineInstr &MI, MachineIRBuilder &MIB) const;
- bool legalizeInsertSubvector(MachineInstr &MI, MachineIRBuilder &MIB) const;
bool legalizeLoadStore(MachineInstr &MI, LegalizerHelper &Helper,
MachineIRBuilder &MIB) const;
};
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVPostLegalizerLowering.cpp b/llvm/lib/Target/RISCV/GISel/RISCVPostLegalizerLowering.cpp
index 66db15e3a2e28c..955cc74629a348 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVPostLegalizerLowering.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVPostLegalizerLowering.cpp
@@ -41,6 +41,184 @@ namespace {
#include "RISCVGenPostLegalizeGILowering.inc"
#undef GET_GICOMBINER_TYPES
+static LLT getLMUL1Ty(LLT VecTy) {
+ assert(VecTy.getElementType().getSizeInBits() <= 64 &&
+ "Unexpected vector LLT");
+ return LLT::scalable_vector(RISCV::RVVBitsPerBlock /
+ VecTy.getElementType().getSizeInBits(),
+ VecTy.getElementType());
+}
+
+/// Return the type of the mask type suitable for masking the provided
+/// vector type. This is simply an i1 element type vector of the same
+/// (possibly scalable) length.
+static LLT getMaskTypeFor(LLT VecTy) {
+ assert(VecTy.isVector());
+ ElementCount EC = VecTy.getElementCount();
+ return LLT::vector(EC, LLT::scalar(1));
+}
+
+/// Creates an all ones mask suitable for masking a vector of type VecTy with
+/// vector length VL.
+static MachineInstrBuilder buildAllOnesMask(LLT VecTy, const SrcOp &VL,
+ MachineIRBuilder &MIB,
+ MachineRegisterInfo &MRI) {
+ LLT MaskTy = getMaskTypeFor(VecTy);
+ return MIB.buildInstr(RISCV::G_VMSET_VL, {MaskTy}, {VL});
+}
+
+/// Gets the two common "VL" operands: an all-ones mask and the vector length.
+/// VecTy is a scalable vector type.
+static std::pair<MachineInstrBuilder, Register>
+buildDefaultVLOps(const DstOp &Dst, MachineIRBuilder &MIB,
+ MachineRegisterInfo &MRI) {
+ LLT VecTy = Dst.getLLTTy(MRI);
+ assert(VecTy.isScalableVector() && "Expecting scalable container type");
+ Register VL(RISCV::X0);
+ MachineInstrBuilder Mask = buildAllOnesMask(VecTy, VL, MIB, MRI);
+ return {Mask, VL};
+}
+
+/// Lowers G_INSERT_SUBVECTOR. We know we can lower it here since the legalizer
+/// marked it as legal.
+void lowerInsertSubvector(MachineInstr &MI, const RISCVSubtarget &STI) {
+ GInsertSubvector &IS = cast<GInsertSubvector>(MI);
+
+ MachineIRBuilder MIB(MI);
+ MachineRegisterInfo &MRI = *MIB.getMRI();
+
+ Register Dst = IS.getReg(0);
+ Register Src1 = IS.getBigVec();
+ Register Src2 = IS.getSubVec();
+ uint64_t Idx = IS.getIndexImm();
+
+ LLT BigTy = MRI.getType(Src1);
+ LLT LitTy = MRI.getType(Src2);
+ Register BigVec = Src1;
+ Register LitVec = Src2;
+
+ // We don't have the ability to slide mask vectors up indexed by their i1
+ // elements; the smallest we can do is i8. Often we are able to bitcast to
+ // equivalent i8 vectors. Otherwise, we can must zeroextend to equivalent i8
+ // vectors and truncate down after the insert.
+ if (LitTy.getElementType() == LLT::scalar(1) &&
+ (Idx != 0 ||
+ MRI.getVRegDef(BigVec)->getOpcode() != TargetOpcode::G_IMPLICIT_DEF)) {
+ auto BigTyMinElts = BigTy.getElementCount().getKnownMinValue();
+ auto LitTyMinElts = LitTy.getElementCount().getKnownMinValue();
+ if (BigTyMinElts >= 8 && LitTyMinElts >= 8) {
+ assert(Idx % 8 == 0 && "Invalid index");
+ assert(BigTyMinElts % 8 == 0 && LitTyMinElts % 8 == 0 &&
+ "Unexpected mask vector lowering");
+ Idx /= 8;
+ BigTy = LLT::vector(BigTy.getElementCount().divideCoefficientBy(8), 8);
+ LitTy = LLT::vector(LitTy.getElementCount().divideCoefficientBy(8), 8);
+ BigVec = MIB.buildBitcast(BigTy, BigVec).getReg(0);
+ LitVec = MIB.buildBitcast(LitTy, LitVec).getReg(0);
+ } else {
+ // We can't slide this mask vector up indexed by its i1 elements.
+ // This poses a problem when we wish to insert a scalable vector which
+ // can't be re-expressed as a larger type. Just choose the slow path and
+ // extend to a larger type, then truncate back down.
+ LLT ExtBigTy = BigTy.changeElementType(LLT::scalar(8));
+ LLT ExtLitTy = LitTy.changeElementType(LLT::scalar(8));
+ auto BigZExt = MIB.buildZExt(ExtBigTy, BigVec);
+ auto LitZExt = MIB.buildZExt(ExtLitTy, LitVec);
+ auto Insert = MIB.buildInsertSubvector(ExtBigTy, BigZExt, LitZExt, Idx);
+ auto SplatZero = MIB.buildSplatVector(
+ ExtBigTy, MIB.buildConstant(ExtBigTy.getElementType(), 0));
+ MIB.buildICmp(CmpInst::Predicate::ICMP_NE, Dst, Insert, SplatZero);
+ MI.eraseFromParent();
+ return;
+ }
+ }
+
+ const RISCVRegisterInfo *TRI = STI.getRegisterInfo();
+ MVT LitTyMVT = getMVTForLLT(LitTy);
+ unsigned SubRegIdx, RemIdx;
+ std::tie(SubRegIdx, RemIdx) =
+ RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
+ getMVTForLLT(BigTy), LitTyMVT, Idx, TRI);
+
+ RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(getMVTForLLT(LitTy));
+ bool IsSubVecPartReg = !RISCVVType::decodeVLMUL(SubVecLMUL).second;
+
+ // If the Idx has been completely eliminated and this subvector's size is a
+ // vector register or a multiple thereof, or the surrounding elements are
+ // undef, then this is a subvector insert which naturally aligns to a vector
+ // register. These can easily be handled using subregister manipulation.
+ if (RemIdx == 0 && (!IsSubVecPartReg || MRI.getVRegDef(Src1)->getOpcode() ==
+ TargetOpcode::G_IMPLICIT_DEF))
+ return;
+
+ // If the subvector is smaller than a vector register, then the insertion
+ // must preserve the undisturbed elements of the register. We do this by
+ // lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type
+ // (which resolves to a subregister copy), performing a VSLIDEUP to place the
+ // subvector within the vector register, and an INSERT_SUBVECTOR of that
+ // LMUL=1 type back into the larger vector (resolving to another subregister
+ // operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type
+ // to avoid allocating a large register group to hold our subvector.
+
+ // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
+ // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
+ // (in our case undisturbed). This means we can set up a subvector insertion
+ // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
+ // size of the subvector.
+ const LLT XLenTy(STI.getXLenVT());
+ LLT InterLitTy = BigTy;
+ Register AlignedExtract = Src1;
+ unsigned AlignedIdx = Idx - RemIdx;
+ if (TypeSize::isKnownGT(BigTy.getSizeInBits(),
+ getLMUL1Ty(BigTy).getSizeInBits())) {
+ InterLitTy = getLMUL1Ty(BigTy);
+ // Extract a subvector equal to the nearest full vector register type. This
+ // should resolve to a G_EXTRACT on a subreg.
+ AlignedExtract =
+ MIB.buildExtractSubvector(InterLitTy, BigVec, AlignedIdx).getReg(0);
+ }
+
+ auto Insert = MIB.buildInsertSubvector(InterLitTy, MIB.buildUndef(InterLitTy),
+ LitVec, 0);
+
+ auto [Mask, _] = buildDefaultVLOps(BigTy, MIB, MRI);
+ auto VL = MIB.buildVScale(XLenTy, LitTy.getElementCount().getKnownMinValue());
+
+ // Use tail agnostic policy if we're inserting over InterLitTy's tail.
+ ElementCount EndIndex =
+ ElementCount::getScalable(RemIdx) + LitTy.getElementCount();
+ uint64_t Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED;
+ if (EndIndex == InterLitTy.getElementCount())
+ Policy = RISCVII::TAIL_AGNOSTIC;
+
+ // If we're inserting into the lowest elements, use a tail undisturbed
+ // vmv.v.v.
+ MachineInstrBuilder Inserted;
+ if (RemIdx == 0) {
+ Inserted = MIB.buildInstr(RISCV::G_VMV_V_V_VL, {InterLitTy},
+ {AlignedExtract, Insert, VL});
+ } else {
+ auto SlideupAmt = MIB.buildVScale(XLenTy, RemIdx);
+ // Construct the vector length corresponding to RemIdx + length(LitTy).
+ VL = MIB.buildAdd(XLenTy, SlideupAmt, VL);
+ Inserted =
+ MIB.buildInstr(RISCV::G_VSLIDEUP_VL, {InterLitTy},
+ {AlignedExtract, LitVec, SlideupAmt, Mask, VL, Policy});
+ }
+
+ // If required, insert this subvector back into the correct vector register.
+ // This should resolve to an INSERT_SUBREG instruction.
+ if (TypeSize::isKnownGT(BigTy.getSizeInBits(), InterLitTy.getSizeInBits()))
+ Inserted = MIB.buildInsertSubvector(BigTy, BigVec, LitVec, AlignedIdx);
+
+ // We might have bitcast from a mask type: cast back to the original type if
+ // required.
+ MIB.buildBitcast(Dst, Inserted);
+
+ MI.eraseFromParent();
+ return;
+}
+
class RISCVPostLegalizerLoweringImpl : public Combiner {
protected:
// TODO: Make CombinerHelper methods const.
diff --git a/llvm/lib/Target/RISCV/RISCVCombine.td b/llvm/lib/Target/RISCV/RISCVCombine.td
index d48698ae6f2bfb..902f3a05bc0fcf 100644
--- a/llvm/lib/Target/RISCV/RISCVCombine.td
+++ b/llvm/lib/Target/RISCV/RISCVCombine.td
@@ -19,11 +19,17 @@ def RISCVO0PreLegalizerCombiner: GICombiner<
"RISCVO0PreLegalizerCombinerImpl", [optnone_combines]> {
}
+def lower_insert_subvector : GICombineRule<
+ (defs root:$root),
+ (match (G_INSERT_SUBVECTOR $dst, $src1, $src2, $idx):$root),
+ (apply [{ lowerInsertSubvector(*${root}, STI); }])
+>;
+
// Post-legalization combines which should happen at all optimization levels.
// (E.g. ones that facilitate matching for the selector) For example, matching
// pseudos.
def RISCVPostLegalizerLowering
- : GICombiner<"RISCVPostLegalizerLoweringImpl", []> {
+ : GICombiner<"RISCVPostLegalizerLoweringImpl", [lower_insert_subvector]> {
}
// Post-legalization combines which are primarily optimizations.
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-insert-subvector.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-insert-subvector.mir
index 598c2e2a142cb4..440486a81a0c44 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-insert-subvector.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-insert-subvector.mir
@@ -9,54 +9,12 @@ legalized: false
tracksRegLiveness: true
body: |
bb.0.entry:
- ; RV32-LABEL: name: insert_subvector_nxv2i1_nxv4i1
- ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 4 x s1>) = G_IMPLICIT_DEF
- ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
- ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
- ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; RV32-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32)
- ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
- ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SELECT [[DEF]](<vscale x 4 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
- ; RV32-NEXT: [[VMSET_VL:%[0-9]+]]:_(<vscale x 4 x s1>) = G_VMSET_VL $x0
- ; RV32-NEXT: [[READ_VLENB:%[0-9]+]]:_(s64) = G_READ_VLENB
- ; RV32-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
- ; RV32-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[READ_VLENB]], [[C2]](s64)
- ; RV32-NEXT: [[READ_VLENB1:%[0-9]+]]:_(s64) = G_READ_VLENB
- ; RV32-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
- ; RV32-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[READ_VLENB1]], [[C3]](s64)
- ; RV32-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[LSHR1]], [[LSHR]]
- ; RV32-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_(<vscale x 4 x s8>) = G_VSLIDEUP_VL [[SELECT]], [[SELECT]], [[LSHR1]](s64), [[VMSET_VL]](<vscale x 4 x s1>), [[ADD]](s64), 0
- ; RV32-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 4 x s8>) = G_BITCAST [[VSLIDEUP_VL]](<vscale x 4 x s8>)
- ; RV32-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; RV32-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[C4]](s32)
- ; RV32-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[ANYEXT2]](s64)
- ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 4 x s1>) = G_ICMP intpred(ne), [[BITCAST]](<vscale x 4 x s8>), [[SPLAT_VECTOR2]]
- ; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 4 x s1>)
- ; RV32-NEXT: PseudoRET implicit $v8
- ;
- ; RV64-LABEL: name: insert_subvector_nxv2i1_nxv4i1
- ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 4 x s1>) = G_IMPLICIT_DEF
- ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[C]](s32)
- ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[C1]](s32)
- ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SELECT [[DEF]](<vscale x 4 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
- ; RV64-NEXT: [[VMSET_VL:%[0-9]+]]:_(<vscale x 4 x s1>) = G_VMSET_VL $x0
- ; RV64-NEXT: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
- ; RV64-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; RV64-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB]], [[C2]](s32)
- ; RV64-NEXT: [[READ_VLENB1:%[0-9]+]]:_(s32) = G_READ_VLENB
- ; RV64-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
- ; RV64-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB1]], [[C3]](s32)
- ; RV64-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LSHR1]], [[LSHR]]
- ; RV64-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_(<vscale x 4 x s8>) = G_VSLIDEUP_VL [[SELECT]], [[SELECT]], [[LSHR1]](s32), [[VMSET_VL]](<vscale x 4 x s1>), [[ADD]](s32), 0
- ; RV64-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 4 x s8>) = G_BITCAST [[VSLIDEUP_VL]](<vscale x 4 x s8>)
- ; RV64-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; RV64-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[C4]](s32)
- ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 4 x s1>) = G_ICMP intpred(ne), [[BITCAST]](<vscale x 4 x s8>), [[SPLAT_VECTOR2]]
- ; RV64-NEXT: $v8 = COPY [[ICMP]](<vscale x 4 x s1>)
- ; RV64-NEXT: PseudoRET implicit $v8
+ ; CHECK-LABEL: name: insert_subvector_nxv2i1_nxv4i1
+ ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 4 x s1>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<vscale x 2 x s1>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[INSERT_SUBVECTOR:%[0-9]+]]:_(<vscale x 4 x s1>) = G_INSERT_SUBVECTOR [[DEF]], [[DEF1]](<vscale x 2 x s1>), 2
+ ; CHECK-NEXT: $v8 = COPY [[INSERT_SUBVECTOR]](<vscale x 4 x s1>)
+ ; CHECK-NEXT: PseudoRET implicit $v8
%0:_(<vscale x 4 x s1>) = G_IMPLICIT_DEF
%1:_(<vscale x 2 x s1>) = G_IMPLICIT_DEF
%2:_(<vscale x 4 x s1>) = G_INSERT_SUBVECTOR %0(<vscale x 4 x s1>), %1, 2
@@ -69,37 +27,12 @@ legalized: false
tracksRegLiveness: true
body: |
bb.0.entry:
- ; RV32-LABEL: name: insert_subvector_nxv4i1_nxv8i1
- ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 8 x s1>) = G_IMPLICIT_DEF
- ; RV32-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 1 x s8>) = G_BITCAST [[DEF]](<vscale x 8 x s1>)
- ; RV32-NEXT: [[VMSET_VL:%[0-9]+]]:_(<vscale x 1 x s1>) = G_VMSET_VL $x0
- ; RV32-NEXT: [[READ_VLENB:%[0-9]+]]:_(s64) = G_READ_VLENB
- ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
- ; RV32-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[READ_VLENB]], [[C]](s64)
- ; RV32-NEXT: [[READ_VLENB1:%[0-9]+]]:_(s64) = G_READ_VLENB
- ; RV32-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
- ; RV32-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[READ_VLENB1]], [[C1]](s64)
- ; RV32-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[LSHR1]], [[LSHR]]
- ; RV32-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_(<vscale x 1 x s8>) = G_VSLIDEUP_VL [[DEF]], [[BITCAST]], [[LSHR1]](s64), [[VMSET_VL]](<vscale x 1 x s1>), [[ADD]](s64), 0
- ; RV32-NEXT: [[BITCAST1:%[0-9]+]]:_(<vscale x 8 x s1>) = G_BITCAST [[VSLIDEUP_VL]](<vscale x 1 x s8>)
- ; RV32-NEXT: $v8 = COPY [[BITCAST1]](<vscale x 8 x s1>)
- ; RV32-NEXT: PseudoRET implicit $v8
- ;
- ; RV64-LABEL: name: insert_subvector_nxv4i1_nxv8i1
- ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 8 x s1>) = G_IMPLICIT_DEF
- ; RV64-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 1 x s8>) = G_BITCAST [[DEF]](<vscale x 8 x s1>)
- ; RV64-NEXT: [[VMSET_VL:%[0-9]+]]:_(<vscale x 1 x s1>) = G_VMSET_VL $x0
- ; RV64-NEXT: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
- ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
- ; RV64-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB]], [[C]](s32)
- ; RV64-NEXT: [[READ_VLENB1:%[0-9]+]]:_(s32) = G_READ_VLENB
- ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
- ; RV64-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB1]], [[C1]](s32)
- ; RV64-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LSHR1]], [[LSHR]]
- ; RV64-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_(<vscale x 1 x s8>) = G_VSLIDEUP_VL [[DEF]], [[BITCAST]], [[LSHR1]](s32), [[VMSET_VL]](<vscale x 1 x s1>), [[ADD]](s32), 0
- ; RV64-NEXT: [[BITCAST1:%[0-9]+]]:_(<vscale x 8 x s1>) = G_BITCAST [[VSLIDEUP_VL]](<vscale x 1 x s8>)
- ; RV64-NEXT: $v8 = COPY [[BITCAST1]](<vscale x 8 x s1>)
- ; RV64-NEXT: PseudoRET implicit $v8
+ ; CHECK-LABEL: name: insert_subvector_nxv4i1_nxv8i1
+ ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 8 x s1>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<vscale x 2 x s1>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[INSERT_SUBVECTOR:%[0-9]+]]:_(<vscale x 8 x s1>) = G_INSERT_SUBVECTOR [[DEF]], [[DEF1]](<vscale x 2 x s1>), 8
+ ; CHECK-NEXT: $v8 = COPY [[INSERT_SUBVECTOR]](<vscale x 8 x s1>)
+ ; CHECK-NEXT: PseudoRET implicit $v8
%0:_(<vscale x 8 x s1>) = G_IMPLICIT_DEF
%1:_(<vscale x 2 x s1>) = G_IMPLICIT_DEF
%2:_(<vscale x 8 x s1>) = G_INSERT_SUBVECTOR %0(<vscale x 8 x s1>), %1, 8
@@ -112,33 +45,12 @@ legalized: false
tracksRegLiveness: true
body: |
bb.0.entry:
- ; RV32-LABEL: name: insert_subvector_nxv32i1_nxv64i1
- ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 64 x s1>) = G_IMPLICIT_DEF
- ; RV32-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 8 x s8>) = G_BITCAST [[DEF]](<vscale x 64 x s1>)
- ; RV32-NEXT: [[VMSET_VL:%[0-9]+]]:_(<vscale x 8 x s1>) = G_VMSET_VL $x0
- ; RV32-NEXT: [[READ_VLENB:%[0-9]+]]:_(s64) = G_READ_VLENB
- ; RV32-NEXT: [[READ_VLENB1:%[0-9]+]]:_(s64) = G_READ_VLENB
- ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
- ; RV32-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[READ_VLENB1]], [[C]](s64)
- ; RV32-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[LSHR]], [[READ_VLENB]]
- ; RV32-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_(<vscale x 8 x s8>) = G_VSLIDEUP_VL [[DEF]], [[BITCAST]], [[LSHR]](s64), [[VMSET_VL]](<vscale x 8 x s1>), [[ADD]](s64), 0
- ; RV32-NEXT: [[BITCAST1:%[0-9]+]]:_(<vscale x 64 x s1>) = G_BITCAST [[VSLIDEUP_VL]](<vscale x 8 x s8>)
- ; RV32-NEXT: $v8 = COPY [[BITCAST1]](<vscale x 64 x s1>)
- ; RV32-NEXT: PseudoRET implicit $v8
- ;
- ; RV64-LABEL: name: insert_subvector_nxv32i1_nxv64i1
- ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 64 x s1>) = G_IMPLICIT_DEF
- ; RV64-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 8 x s8>) = G_BITCAST [[DEF]](<vscale x 64 x s1>)
- ; RV64-NEXT: [[VMSET_VL:%[0-9]+]]:_(<vscale x 8 x s1>) = G_VMSET_VL $x0
- ; RV64-NEXT: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
- ; RV64-NEXT: [[READ_VLENB1:%[0-9]+]]:_(s32) = G_READ_VLENB
- ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; RV64-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB1]], [[C]](s32)
- ; RV64-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LSHR]], [[READ_VLENB]]
- ; RV64-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_(<vscale x 8 x s8>) = G_VSLIDEUP_VL [[DEF]], [[BITCAST]], [[LSHR]](s32), [[VMSET_VL]](<vscale x 8 x s1>), [[ADD]](s32), 0
- ; RV64-NEXT: [[BITCAST1:%[0-9]+]]:_(<vscale x 64 x s1>) = G_BITCAST [[VSLIDEUP_VL]](<vscale x 8 x s8>)
- ; RV64-NEXT: $v8 = COPY [[BITCAST1]](<vscale x 64 x s1>)
- ; RV64-NEXT: PseudoRET implicit $v8
+ ; CHECK-LABEL: name: insert_subvector_nxv32i1_nxv64i1
+ ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 64 x s1>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<vscale x 32 x s1>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[INSERT_SUBVECTOR:%[0-9]+]]:_(<vscale x 64 x s1>) = G_INSERT_SUBVECTOR [[DEF]], [[DEF1]](<vscale x 32 x s1>), 32
+ ; CHECK-NEXT: $v8 = COPY [[INSERT_SUBVECTOR]](<vscale x 64 x s1>)
+ ; CHECK-NEXT: PseudoRET implicit $v8
%0:_(<vscale x 64 x s1>) = G_IMPLICIT_DEF
%1:_(<vscale x 32 x s1>) = G_IMPLICIT_DEF
%2:_(<vscale x 64 x s1>) = G_INSERT_SUBVECTOR %0(<vscale x 64 x s1>), %1, 32
@@ -283,35 +195,12 @@ legalized: false
tracksRegLiveness: true
body: |
bb.0.entry:
- ; RV32-LABEL: name: insert_subvector_nxv1i8_nxv2i8
- ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 2 x s8>) = G_IMPLICIT_DEF
- ; RV32-NEXT: [[VMSET_VL:%[0-9]+]]:_(<vscale x 2 x s1>) = G_VMSET_VL $x0
- ; RV32-NEXT: [[READ_VLENB:%[0-9]+]]:_(s64) = G_READ_VLENB
- ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
- ; RV32-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[READ_VLENB]], [[C]](s64)
- ; RV32-NEXT: [[READ_VLENB1:%[0-9]+]]:_(s64) = G_READ_VLENB
- ; RV32-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
- ; RV32-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[READ_VLENB1]], [[C1]](s64)
- ; RV32-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[LSHR1]], [[LSHR]]
- ; RV32-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_(<vscale x 2 x s8>) = G_VSLIDEUP_VL [[DEF]], [[DEF]], [[LSHR1]](s64), [[VMSET_VL]](<vscale x 2 x s1>), [[ADD]](s64), 0
- ; RV32-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 2 x s8>) = G_BITCAST [[VSLIDEUP_VL]](<vscale x 2 x s8>)
- ; RV32-NEXT: $v8 = COPY [[BITCAST]](<vscale x 2 x s8>)
- ; RV32-NEXT: PseudoRET implicit $v8
- ;
- ; RV64-LABEL: name: insert_subvector_nxv1i8_nxv2i8
- ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 2 x s8>) = G_IMPLICIT_DEF
- ; RV64-NEXT: [[VMSET_VL:%[0-9]+]]:_(<vscale x 2 x s1>) = G_VMSET_VL $x0
- ; RV64-NEXT: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
- ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
- ; RV64-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB]], [[C]](s32)
- ; RV64-NEXT: [[READ_VLENB1:%[0-9]+]]:_(s32) = G_READ_VLENB
- ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
- ; RV64-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB1]], [[C1]](s32)
- ; RV64-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LSHR1]], [[LSHR]]
- ; RV64-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_(<vscale x 2 x s8>) = G_VSLIDEUP_VL [[DEF]], [[DEF]], [[LSHR1]](s32), [[VMSET_VL]](<vscale x 2 x s1>), [[ADD]](s32), 0
- ; RV64-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 2 x s8>) = G_BITCAST [[VSLIDEUP_VL]](<vscale x 2 x s8>)
- ; RV64-NEXT: $v8 = COPY [[BITCAST]](<vscale x 2 x s8>)
- ; RV64-NEXT: PseudoRET implicit $v8
+ ; CHECK-LABEL: name: insert_subvector_nxv1i8_nxv2i8
+ ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 2 x s8>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<vscale x 1 x s8>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[INSERT_SUBVECTOR:%[0-9]+]]:_(<vscale x 2 x s8>) = G_INSERT_SUBVECTOR [[DEF]], [[DEF1]](<vscale x 1 x s8>), 1
+ ; CHECK-NEXT: $v8 = COPY [[INSERT_SUBVECTOR]](<vscale x 2 x s8>)
+ ; CHECK-NEXT: PseudoRET implicit $v8
%0:_(<vscale x 2 x s8>) = G_IMPLICIT_DEF
%1:_(<vscale x 1 x s8>) = G_IMPLICIT_DEF
%2:_(<vscale x 2 x s8>) = G_INSERT_SUBVECTOR %0(<vscale x 2 x s8>), %1, 1
@@ -324,35 +213,12 @@ legalized: false
tracksRegLiveness: true
body: |
bb.0.entry:
- ; RV32-LABEL: name: insert_subvector_nxv2i16_nxv4i16
- ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 4 x s16>) = G_IMPLICIT_DEF
- ; RV32-NEXT: [[VMSET_VL:%[0-9]+]]:_(<vscale x 4 x s1>) = G_VMSET_VL $x0
- ; RV32-NEXT: [[READ_VLENB:%[0-9]+]]:_(s64) = G_READ_VLENB
- ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
- ; RV32-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[READ_VLENB]], [[C]](s64)
- ; RV32-NEXT: [[READ_VLENB1:%[0-9]+]]:_(s64) = G_READ_VLENB
- ; RV32-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
- ; RV32-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[READ_VLENB1]], [[C1]](s64)
- ; RV32-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[LSHR1]], [[LSHR]]
- ; RV32-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_(<vscale x 4 x s16>) = G_VSLIDEUP_VL [[DEF]], [[DEF]], [[LSHR1]](s64), [[VMSET_VL]](<vscale x 4 x s1>), [[ADD]](s64), 0
- ; RV32-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 4 x s16>) = G_BITCAST [[VSLIDEUP_VL]](<vscale x 4 x s16>)
- ; RV32-NEXT: $v8 = COPY [[BITCAST]](<vscale x 4 x s16>)
- ; RV32-NEXT: PseudoRET implicit $v8
- ;
- ; RV64-LABEL: name: insert_subvector_nxv2i16_nxv4i16
- ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 4 x s16>) = G_IMPLICIT_DEF
- ; RV64-NEXT: [[VMSET_VL:%[0-9]+]]:_(<vscale x 4 x s1>) = G_VMSET_VL $x0
- ; RV64-NEXT: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
- ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; RV64-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB]], [[C]](s32)
- ; RV64-NEXT: [[READ_VLENB1:%[0-9]+]]:_(s32) = G_READ_VLENB
- ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
- ; RV64-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB1]], [[C1]](s32)
- ; RV64-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LSHR1]], [[LSHR]]
- ; RV64-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_(<vscale x 4 x s16>) = G_VSLIDEUP_VL [[DEF]], [[DEF]], [[LSHR1]](s32), [[VMSET_VL]](<vscale x 4 x s1>), [[ADD]](s32), 0
- ; RV64-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 4 x s16>) = G_BITCAST [[VSLIDEUP_VL]](<vscale x 4 x s16>)
- ; RV64-NEXT: $v8 = COPY [[BITCAST]](<vscale x 4 x s16>)
- ; RV64-NEXT: PseudoRET implicit $v8
+ ; CHECK-LABEL: name: insert_subvector_nxv2i16_nxv4i16
+ ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 4 x s16>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<vscale x 1 x s16>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[INSERT_SUBVECTOR:%[0-9]+]]:_(<vscale x 4 x s16>) = G_INSERT_SUBVECTOR [[DEF]], [[DEF1]](<vscale x 1 x s16>), 1
+ ; CHECK-NEXT: $v8 = COPY [[INSERT_SUBVECTOR]](<vscale x 4 x s16>)
+ ; CHECK-NEXT: PseudoRET implicit $v8
%0:_(<vscale x 4 x s16>) = G_IMPLICIT_DEF
%1:_(<vscale x 1 x s16>) = G_IMPLICIT_DEF
%2:_(<vscale x 4 x s16>) = G_INSERT_SUBVECTOR %0(<vscale x 4 x s16>), %1, 1
@@ -365,31 +231,12 @@ legalized: false
tracksRegLiveness: true
body: |
bb.0.entry:
- ; RV32-LABEL: name: insert_subvector_nxv4i32_nxv8i32
- ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 8 x s32>) = G_IMPLICIT_DEF
- ; RV32-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_(<vscale x 2 x s32>) = G_EXTRACT_SUBVECTOR [[DEF]], 0
- ; RV32-NEXT: [[VMSET_VL:%[0-9]+]]:_(<vscale x 8 x s1>) = G_VMSET_VL $x0
- ; RV32-NEXT: [[READ_VLENB:%[0-9]+]]:_(s64) = G_READ_VLENB
- ; RV32-NEXT: [[READ_VLENB1:%[0-9]+]]:_(s64) = G_READ_VLENB
- ; RV32-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[READ_VLENB1]], [[READ_VLENB]]
- ; RV32-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_(<vscale x 2 x s32>) = G_VSLIDEUP_VL [[EXTRACT_SUBVECTOR]], [[DEF]], [[READ_VLENB1]](s64), [[VMSET_VL]](<vscale x 8 x s1>), [[ADD]](s64), 0
- ; RV32-NEXT: [[INSERT_SUBVECTOR:%[0-9]+]]:_(<vscale x 8 x s32>) = G_INSERT_SUBVECTOR [[DEF]], [[DEF]](<vscale x 8 x s32>), 0
- ; RV32-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 8 x s32>) = G_BITCAST [[INSERT_SUBVECTOR]](<vscale x 8 x s32>)
- ; RV32-NEXT: $v8 = COPY [[BITCAST]](<vscale x 8 x s32>)
- ; RV32-NEXT: PseudoRET implicit $v8
- ;
- ; RV64-LABEL: name: insert_subvector_nxv4i32_nxv8i32
- ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 8 x s32>) = G_IMPLICIT_DEF
- ; RV64-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_(<vscale x 2 x s32>) = G_EXTRACT_SUBVECTOR [[DEF]], 0
- ; RV64-NEXT: [[VMSET_VL:%[0-9]+]]:_(<vscale x 8 x s1>) = G_VMSET_VL $x0
- ; RV64-NEXT: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
- ; RV64-NEXT: [[READ_VLENB1:%[0-9]+]]:_(s32) = G_READ_VLENB
- ; RV64-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[READ_VLENB1]], [[READ_VLENB]]
- ; RV64-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_(<vscale x 2 x s32>) = G_VSLIDEUP_VL [[EXTRACT_SUBVECTOR]], [[DEF]], [[READ_VLENB1]](s32), [[VMSET_VL]](<vscale x 8 x s1>), [[ADD]](s32), 0
- ; RV64-NEXT: [[INSERT_SUBVECTOR:%[0-9]+]]:_(<vscale x 8 x s32>) = G_INSERT_SUBVECTOR [[DEF]], [[DEF]](<vscale x 8 x s32>), 0
- ; RV64-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 8 x s32>) = G_BITCAST [[INSERT_SUBVECTOR]](<vscale x 8 x s32>)
- ; RV64-NEXT: $v8 = COPY [[BITCAST]](<vscale x 8 x s32>)
- ; RV64-NEXT: PseudoRET implicit $v8
+ ; CHECK-LABEL: name: insert_subvector_nxv4i32_nxv8i32
+ ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 8 x s32>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<vscale x 4 x s32>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[INSERT_SUBVECTOR:%[0-9]+]]:_(<vscale x 8 x s32>) = G_INSERT_SUBVECTOR [[DEF]], [[DEF1]](<vscale x 4 x s32>), 8
+ ; CHECK-NEXT: $v8 = COPY [[INSERT_SUBVECTOR]](<vscale x 8 x s32>)
+ ; CHECK-NEXT: PseudoRET implicit $v8
%0:_(<vscale x 8 x s32>) = G_IMPLICIT_DEF
%1:_(<vscale x 4 x s32>) = G_IMPLICIT_DEF
%2:_(<vscale x 8 x s32>) = G_INSERT_SUBVECTOR %0(<vscale x 8 x s32>), %1, 8
@@ -402,35 +249,12 @@ legalized: false
tracksRegLiveness: true
body: |
bb.0.entry:
- ; RV32-LABEL: name: insert_subvector_nxv2i64_nxv8i64
- ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 8 x s64>) = G_IMPLICIT_DEF
- ; RV32-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_(<vscale x 1 x s64>) = G_EXTRACT_SUBVECTOR [[DEF]], 0
- ; RV32-NEXT: [[VMSET_VL:%[0-9]+]]:_(<vscale x 8 x s1>) = G_VMSET_VL $x0
- ; RV32-NEXT: [[READ_VLENB:%[0-9]+]]:_(s64) = G_READ_VLENB
- ; RV32-NEXT: [[READ_VLENB1:%[0-9]+]]:_(s64) = G_READ_VLENB
- ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
- ; RV32-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[READ_VLENB1]], [[C]](s64)
- ; RV32-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[LSHR]], [[READ_VLENB]]
- ; RV32-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_(<vscale x 1 x s64>) = G_VSLIDEUP_VL [[EXTRACT_SUBVECTOR]], [[DEF]], [[LSHR]](s64), [[VMSET_VL]](<vscale x 8 x s1>), [[ADD]](s64), 0
- ; RV32-NEXT: [[INSERT_SUBVECTOR:%[0-9]+]]:_(<vscale x 8 x s64>) = G_INSERT_SUBVECTOR [[DEF]], [[DEF]](<vscale x 8 x s64>), 0
- ; RV32-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 8 x s64>) = G_BITCAST [[INSERT_SUBVECTOR]](<vscale x 8 x s64>)
- ; RV32-NEXT: $v8 = COPY [[BITCAST]](<vscale x 8 x s64>)
- ; RV32-NEXT: PseudoRET implicit $v8
- ;
- ; RV64-LABEL: name: insert_subvector_nxv2i64_nxv8i64
- ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 8 x s64>) = G_IMPLICIT_DEF
- ; RV64-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_(<vscale x 1 x s64>) = G_EXTRACT_SUBVECTOR [[DEF]], 0
- ; RV64-NEXT: [[VMSET_VL:%[0-9]+]]:_(<vscale x 8 x s1>) = G_VMSET_VL $x0
- ; RV64-NEXT: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
- ; RV64-NEXT: [[READ_VLENB1:%[0-9]+]]:_(s32) = G_READ_VLENB
- ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; RV64-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB1]], [[C]](s32)
- ; RV64-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LSHR]], [[READ_VLENB]]
- ; RV64-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_(<vscale x 1 x s64>) = G_VSLIDEUP_VL [[EXTRACT_SUBVECTOR]], [[DEF]], [[LSHR]](s32), [[VMSET_VL]](<vscale x 8 x s1>), [[ADD]](s32), 0
- ; RV64-NEXT: [[INSERT_SUBVECTOR:%[0-9]+]]:_(<vscale x 8 x s64>) = G_INSERT_SUBVECTOR [[DEF]], [[DEF]](<vscale x 8 x s64>), 0
- ; RV64-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 8 x s64>) = G_BITCAST [[INSERT_SUBVECTOR]](<vscale x 8 x s64>)
- ; RV64-NEXT: $v8 = COPY [[BITCAST]](<vscale x 8 x s64>)
- ; RV64-NEXT: PseudoRET implicit $v8
+ ; CHECK-LABEL: name: insert_subvector_nxv2i64_nxv8i64
+ ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 8 x s64>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<vscale x 1 x s64>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[INSERT_SUBVECTOR:%[0-9]+]]:_(<vscale x 8 x s64>) = G_INSERT_SUBVECTOR [[DEF]], [[DEF1]](<vscale x 1 x s64>), 4
+ ; CHECK-NEXT: $v8 = COPY [[INSERT_SUBVECTOR]](<vscale x 8 x s64>)
+ ; CHECK-NEXT: PseudoRET implicit $v8
%0:_(<vscale x 8 x s64>) = G_IMPLICIT_DEF
%1:_(<vscale x 1 x s64>) = G_IMPLICIT_DEF
%2:_(<vscale x 8 x s64>) = G_INSERT_SUBVECTOR %0(<vscale x 8 x s64>), %1, 4
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/postlegalizer-lowering/rvv/insert-subvector.mir b/llvm/test/CodeGen/RISCV/GlobalISel/postlegalizer-lowering/rvv/insert-subvector.mir
new file mode 100644
index 00000000000000..dd8fb0cbae2c00
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/postlegalizer-lowering/rvv/insert-subvector.mir
@@ -0,0 +1,368 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=riscv32 -mattr=+v -run-pass=riscv-postlegalizer-lowering %s \
+# RUN: -o - | FileCheck %s -check-prefixes=CHECK,RV32
+# RUN: llc -mtriple=riscv64 -mattr=+v -run-pass=riscv-postlegalizer-lowering %s \
+# RUN: -o - | FileCheck %s -check-prefixes=CHECK,RV64
+
+# Special handling for i1-element vectors with non-zero index
+---
+name: insert_subvector_nxv2i1_nxv4i1
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32-LABEL: name: insert_subvector_nxv2i1_nxv4i1
+ ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 4 x s1>) = G_IMPLICIT_DEF
+ ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 4 x s8>) = G_ZEXT [[DEF]](<vscale x 4 x s1>)
+ ; RV32-NEXT: [[VMSET_VL:%[0-9]+]]:_(<vscale x 4 x s1>) = G_VMSET_VL $x0
+ ; RV32-NEXT: [[VSCALE:%[0-9]+]]:_(s32) = G_VSCALE i32 4
+ ; RV32-NEXT: [[VSCALE1:%[0-9]+]]:_(s32) = G_VSCALE i32 2
+ ; RV32-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[VSCALE1]], [[VSCALE]]
+ ; RV32-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_(<vscale x 4 x s8>) = G_VSLIDEUP_VL [[ZEXT]], [[ZEXT]], [[VSCALE1]](s32), [[VMSET_VL]](<vscale x 4 x s1>), [[ADD]](s32), 0
+ ; RV32-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 4 x s8>) = G_BITCAST [[VSLIDEUP_VL]](<vscale x 4 x s8>)
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[C]](s8)
+ ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 4 x s1>) = G_ICMP intpred(ne), [[BITCAST]](<vscale x 4 x s8>), [[SPLAT_VECTOR]]
+ ; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 4 x s1>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: insert_subvector_nxv2i1_nxv4i1
+ ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 4 x s1>) = G_IMPLICIT_DEF
+ ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_(<vscale x 4 x s8>) = G_ZEXT [[DEF]](<vscale x 4 x s1>)
+ ; RV64-NEXT: [[VMSET_VL:%[0-9]+]]:_(<vscale x 4 x s1>) = G_VMSET_VL $x0
+ ; RV64-NEXT: [[VSCALE:%[0-9]+]]:_(s64) = G_VSCALE i64 4
+ ; RV64-NEXT: [[VSCALE1:%[0-9]+]]:_(s64) = G_VSCALE i64 2
+ ; RV64-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[VSCALE1]], [[VSCALE]]
+ ; RV64-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_(<vscale x 4 x s8>) = G_VSLIDEUP_VL [[ZEXT]], [[ZEXT]], [[VSCALE1]](s64), [[VMSET_VL]](<vscale x 4 x s1>), [[ADD]](s64), 0
+ ; RV64-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 4 x s8>) = G_BITCAST [[VSLIDEUP_VL]](<vscale x 4 x s8>)
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[C]](s8)
+ ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 4 x s1>) = G_ICMP intpred(ne), [[BITCAST]](<vscale x 4 x s8>), [[SPLAT_VECTOR]]
+ ; RV64-NEXT: $v8 = COPY [[ICMP]](<vscale x 4 x s1>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 4 x s1>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 2 x s1>) = G_IMPLICIT_DEF
+ %2:_(<vscale x 4 x s1>) = G_INSERT_SUBVECTOR %0, %1(<vscale x 2 x s1>), 2
+ $v8 = COPY %2(<vscale x 4 x s1>)
+ PseudoRET implicit $v8
+
+...
+---
+name: insert_subvector_nxv4i1_nxv8i1
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32-LABEL: name: insert_subvector_nxv4i1_nxv8i1
+ ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 8 x s1>) = G_IMPLICIT_DEF
+ ; RV32-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 1 x s8>) = G_BITCAST [[DEF]](<vscale x 8 x s1>)
+ ; RV32-NEXT: [[VMSET_VL:%[0-9]+]]:_(<vscale x 1 x s1>) = G_VMSET_VL $x0
+ ; RV32-NEXT: [[VSCALE:%[0-9]+]]:_(s32) = G_VSCALE i32 1
+ ; RV32-NEXT: [[VSCALE1:%[0-9]+]]:_(s32) = G_VSCALE i32 1
+ ; RV32-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[VSCALE1]], [[VSCALE]]
+ ; RV32-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_(<vscale x 1 x s8>) = G_VSLIDEUP_VL [[DEF]], [[BITCAST]], [[VSCALE1]](s32), [[VMSET_VL]](<vscale x 1 x s1>), [[ADD]](s32), 0
+ ; RV32-NEXT: [[BITCAST1:%[0-9]+]]:_(<vscale x 8 x s1>) = G_BITCAST [[VSLIDEUP_VL]](<vscale x 1 x s8>)
+ ; RV32-NEXT: $v8 = COPY [[BITCAST1]](<vscale x 8 x s1>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: insert_subvector_nxv4i1_nxv8i1
+ ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 8 x s1>) = G_IMPLICIT_DEF
+ ; RV64-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 1 x s8>) = G_BITCAST [[DEF]](<vscale x 8 x s1>)
+ ; RV64-NEXT: [[VMSET_VL:%[0-9]+]]:_(<vscale x 1 x s1>) = G_VMSET_VL $x0
+ ; RV64-NEXT: [[VSCALE:%[0-9]+]]:_(s64) = G_VSCALE i64 1
+ ; RV64-NEXT: [[VSCALE1:%[0-9]+]]:_(s64) = G_VSCALE i64 1
+ ; RV64-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[VSCALE1]], [[VSCALE]]
+ ; RV64-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_(<vscale x 1 x s8>) = G_VSLIDEUP_VL [[DEF]], [[BITCAST]], [[VSCALE1]](s64), [[VMSET_VL]](<vscale x 1 x s1>), [[ADD]](s64), 0
+ ; RV64-NEXT: [[BITCAST1:%[0-9]+]]:_(<vscale x 8 x s1>) = G_BITCAST [[VSLIDEUP_VL]](<vscale x 1 x s8>)
+ ; RV64-NEXT: $v8 = COPY [[BITCAST1]](<vscale x 8 x s1>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 8 x s1>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 2 x s1>) = G_IMPLICIT_DEF
+ %2:_(<vscale x 8 x s1>) = G_INSERT_SUBVECTOR %0, %1(<vscale x 2 x s1>), 8
+ $v8 = COPY %2(<vscale x 8 x s1>)
+ PseudoRET implicit $v8
+
+...
+---
+name: insert_subvector_nxv32i1_nxv64i1
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32-LABEL: name: insert_subvector_nxv32i1_nxv64i1
+ ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 64 x s1>) = G_IMPLICIT_DEF
+ ; RV32-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 8 x s8>) = G_BITCAST [[DEF]](<vscale x 64 x s1>)
+ ; RV32-NEXT: [[VMSET_VL:%[0-9]+]]:_(<vscale x 8 x s1>) = G_VMSET_VL $x0
+ ; RV32-NEXT: [[VSCALE:%[0-9]+]]:_(s32) = G_VSCALE i32 8
+ ; RV32-NEXT: [[VSCALE1:%[0-9]+]]:_(s32) = G_VSCALE i32 4
+ ; RV32-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[VSCALE1]], [[VSCALE]]
+ ; RV32-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_(<vscale x 8 x s8>) = G_VSLIDEUP_VL [[DEF]], [[BITCAST]], [[VSCALE1]](s32), [[VMSET_VL]](<vscale x 8 x s1>), [[ADD]](s32), 0
+ ; RV32-NEXT: [[BITCAST1:%[0-9]+]]:_(<vscale x 64 x s1>) = G_BITCAST [[VSLIDEUP_VL]](<vscale x 8 x s8>)
+ ; RV32-NEXT: $v8 = COPY [[BITCAST1]](<vscale x 64 x s1>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: insert_subvector_nxv32i1_nxv64i1
+ ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 64 x s1>) = G_IMPLICIT_DEF
+ ; RV64-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 8 x s8>) = G_BITCAST [[DEF]](<vscale x 64 x s1>)
+ ; RV64-NEXT: [[VMSET_VL:%[0-9]+]]:_(<vscale x 8 x s1>) = G_VMSET_VL $x0
+ ; RV64-NEXT: [[VSCALE:%[0-9]+]]:_(s64) = G_VSCALE i64 8
+ ; RV64-NEXT: [[VSCALE1:%[0-9]+]]:_(s64) = G_VSCALE i64 4
+ ; RV64-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[VSCALE1]], [[VSCALE]]
+ ; RV64-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_(<vscale x 8 x s8>) = G_VSLIDEUP_VL [[DEF]], [[BITCAST]], [[VSCALE1]](s64), [[VMSET_VL]](<vscale x 8 x s1>), [[ADD]](s64), 0
+ ; RV64-NEXT: [[BITCAST1:%[0-9]+]]:_(<vscale x 64 x s1>) = G_BITCAST [[VSLIDEUP_VL]](<vscale x 8 x s8>)
+ ; RV64-NEXT: $v8 = COPY [[BITCAST1]](<vscale x 64 x s1>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 64 x s1>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 32 x s1>) = G_IMPLICIT_DEF
+ %2:_(<vscale x 64 x s1>) = G_INSERT_SUBVECTOR %0, %1(<vscale x 32 x s1>), 32
+ $v8 = COPY %2(<vscale x 64 x s1>)
+ PseudoRET implicit $v8
+
+...
+
+# i1-element vectors with zero index
+---
+name: insert_subvector_nxv2i1_nxv4i1_zero
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; CHECK-LABEL: name: insert_subvector_nxv2i1_nxv4i1_zero
+ ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 4 x s1>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<vscale x 1 x s1>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[INSERT_SUBVECTOR:%[0-9]+]]:_(<vscale x 4 x s1>) = G_INSERT_SUBVECTOR [[DEF]], [[DEF1]](<vscale x 1 x s1>), 0
+ ; CHECK-NEXT: $v8 = COPY [[INSERT_SUBVECTOR]](<vscale x 4 x s1>)
+ ; CHECK-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 4 x s1>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 1 x s1>) = G_IMPLICIT_DEF
+ %2:_(<vscale x 4 x s1>) = G_INSERT_SUBVECTOR %0, %1(<vscale x 1 x s1>), 0
+ $v8 = COPY %2(<vscale x 4 x s1>)
+ PseudoRET implicit $v8
+
+...
+---
+name: insert_subvector_nxv4i1_nxv8i1_zero
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; CHECK-LABEL: name: insert_subvector_nxv4i1_nxv8i1_zero
+ ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 8 x s1>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<vscale x 2 x s1>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[INSERT_SUBVECTOR:%[0-9]+]]:_(<vscale x 8 x s1>) = G_INSERT_SUBVECTOR [[DEF]], [[DEF1]](<vscale x 2 x s1>), 0
+ ; CHECK-NEXT: $v8 = COPY [[INSERT_SUBVECTOR]](<vscale x 8 x s1>)
+ ; CHECK-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 8 x s1>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 2 x s1>) = G_IMPLICIT_DEF
+ %2:_(<vscale x 8 x s1>) = G_INSERT_SUBVECTOR %0, %1(<vscale x 2 x s1>), 0
+ $v8 = COPY %2(<vscale x 8 x s1>)
+ PseudoRET implicit $v8
+
+...
+---
+name: insert_subvector_nxv32i1_nxv64i1_zero
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; CHECK-LABEL: name: insert_subvector_nxv32i1_nxv64i1_zero
+ ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 64 x s1>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<vscale x 16 x s1>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[INSERT_SUBVECTOR:%[0-9]+]]:_(<vscale x 64 x s1>) = G_INSERT_SUBVECTOR [[DEF]], [[DEF1]](<vscale x 16 x s1>), 0
+ ; CHECK-NEXT: $v8 = COPY [[INSERT_SUBVECTOR]](<vscale x 64 x s1>)
+ ; CHECK-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 64 x s1>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 16 x s1>) = G_IMPLICIT_DEF
+ %2:_(<vscale x 64 x s1>) = G_INSERT_SUBVECTOR %0, %1(<vscale x 16 x s1>), 0
+ $v8 = COPY %2(<vscale x 64 x s1>)
+ PseudoRET implicit $v8
+
+...
+
+# Insert with zero index
+---
+name: insert_subvector_nxv1i8_nxv2i8_zero
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; CHECK-LABEL: name: insert_subvector_nxv1i8_nxv2i8_zero
+ ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 2 x s8>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<vscale x 1 x s8>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[INSERT_SUBVECTOR:%[0-9]+]]:_(<vscale x 2 x s8>) = G_INSERT_SUBVECTOR [[DEF]], [[DEF1]](<vscale x 1 x s8>), 0
+ ; CHECK-NEXT: $v8 = COPY [[INSERT_SUBVECTOR]](<vscale x 2 x s8>)
+ ; CHECK-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 2 x s8>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 1 x s8>) = G_IMPLICIT_DEF
+ %2:_(<vscale x 2 x s8>) = G_INSERT_SUBVECTOR %0, %1(<vscale x 1 x s8>), 0
+ $v8 = COPY %2(<vscale x 2 x s8>)
+ PseudoRET implicit $v8
+
+...
+---
+name: insert_subvector_nxv2i16_nxv4i16_zero
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; CHECK-LABEL: name: insert_subvector_nxv2i16_nxv4i16_zero
+ ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 4 x s16>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<vscale x 1 x s16>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[INSERT_SUBVECTOR:%[0-9]+]]:_(<vscale x 4 x s16>) = G_INSERT_SUBVECTOR [[DEF]], [[DEF1]](<vscale x 1 x s16>), 0
+ ; CHECK-NEXT: $v8 = COPY [[INSERT_SUBVECTOR]](<vscale x 4 x s16>)
+ ; CHECK-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 4 x s16>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 1 x s16>) = G_IMPLICIT_DEF
+ %2:_(<vscale x 4 x s16>) = G_INSERT_SUBVECTOR %0, %1(<vscale x 1 x s16>), 0
+ $v8 = COPY %2(<vscale x 4 x s16>)
+ PseudoRET implicit $v8
+
+...
+---
+name: insert_subvector_nxv4i32_nxv8i32_zero
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; CHECK-LABEL: name: insert_subvector_nxv4i32_nxv8i32_zero
+ ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 8 x s32>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<vscale x 4 x s32>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[INSERT_SUBVECTOR:%[0-9]+]]:_(<vscale x 8 x s32>) = G_INSERT_SUBVECTOR [[DEF]], [[DEF1]](<vscale x 4 x s32>), 0
+ ; CHECK-NEXT: $v8 = COPY [[INSERT_SUBVECTOR]](<vscale x 8 x s32>)
+ ; CHECK-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 8 x s32>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 4 x s32>) = G_IMPLICIT_DEF
+ %2:_(<vscale x 8 x s32>) = G_INSERT_SUBVECTOR %0, %1(<vscale x 4 x s32>), 0
+ $v8 = COPY %2(<vscale x 8 x s32>)
+ PseudoRET implicit $v8
+
+...
+---
+name: insert_subvector_nxv2i64_nxv8i64_zero
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; CHECK-LABEL: name: insert_subvector_nxv2i64_nxv8i64_zero
+ ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 8 x s64>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<vscale x 2 x s64>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[INSERT_SUBVECTOR:%[0-9]+]]:_(<vscale x 8 x s64>) = G_INSERT_SUBVECTOR [[DEF]], [[DEF1]](<vscale x 2 x s64>), 0
+ ; CHECK-NEXT: $v8 = COPY [[INSERT_SUBVECTOR]](<vscale x 8 x s64>)
+ ; CHECK-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 8 x s64>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 2 x s64>) = G_IMPLICIT_DEF
+ %2:_(<vscale x 8 x s64>) = G_INSERT_SUBVECTOR %0, %1(<vscale x 2 x s64>), 0
+ $v8 = COPY %2(<vscale x 8 x s64>)
+ PseudoRET implicit $v8
+
+...
+
+# Insert with non-zero index
+---
+name: insert_subvector_nxv1i8_nxv2i8
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32-LABEL: name: insert_subvector_nxv1i8_nxv2i8
+ ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 2 x s8>) = G_IMPLICIT_DEF
+ ; RV32-NEXT: [[VMSET_VL:%[0-9]+]]:_(<vscale x 2 x s1>) = G_VMSET_VL $x0
+ ; RV32-NEXT: [[VSCALE:%[0-9]+]]:_(s32) = G_VSCALE i32 2
+ ; RV32-NEXT: [[VSCALE1:%[0-9]+]]:_(s32) = G_VSCALE i32 1
+ ; RV32-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[VSCALE1]], [[VSCALE]]
+ ; RV32-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_(<vscale x 2 x s8>) = G_VSLIDEUP_VL [[DEF]], [[DEF]], [[VSCALE1]](s32), [[VMSET_VL]](<vscale x 2 x s1>), [[ADD]](s32), 0
+ ; RV32-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 2 x s8>) = G_BITCAST [[VSLIDEUP_VL]](<vscale x 2 x s8>)
+ ; RV32-NEXT: $v8 = COPY [[BITCAST]](<vscale x 2 x s8>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: insert_subvector_nxv1i8_nxv2i8
+ ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 2 x s8>) = G_IMPLICIT_DEF
+ ; RV64-NEXT: [[VMSET_VL:%[0-9]+]]:_(<vscale x 2 x s1>) = G_VMSET_VL $x0
+ ; RV64-NEXT: [[VSCALE:%[0-9]+]]:_(s64) = G_VSCALE i64 2
+ ; RV64-NEXT: [[VSCALE1:%[0-9]+]]:_(s64) = G_VSCALE i64 1
+ ; RV64-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[VSCALE1]], [[VSCALE]]
+ ; RV64-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_(<vscale x 2 x s8>) = G_VSLIDEUP_VL [[DEF]], [[DEF]], [[VSCALE1]](s64), [[VMSET_VL]](<vscale x 2 x s1>), [[ADD]](s64), 0
+ ; RV64-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 2 x s8>) = G_BITCAST [[VSLIDEUP_VL]](<vscale x 2 x s8>)
+ ; RV64-NEXT: $v8 = COPY [[BITCAST]](<vscale x 2 x s8>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 2 x s8>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 1 x s8>) = G_IMPLICIT_DEF
+ %2:_(<vscale x 2 x s8>) = G_INSERT_SUBVECTOR %0, %1(<vscale x 1 x s8>), 1
+ $v8 = COPY %2(<vscale x 2 x s8>)
+ PseudoRET implicit $v8
+
+...
+
+# i1-element vectors with zero index
+---
+name: insert_subvector_nxv2i16_nxv4i16
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32-LABEL: name: insert_subvector_nxv2i16_nxv4i16
+ ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 4 x s16>) = G_IMPLICIT_DEF
+ ; RV32-NEXT: [[VMSET_VL:%[0-9]+]]:_(<vscale x 4 x s1>) = G_VMSET_VL $x0
+ ; RV32-NEXT: [[VSCALE:%[0-9]+]]:_(s32) = G_VSCALE i32 4
+ ; RV32-NEXT: [[VSCALE1:%[0-9]+]]:_(s32) = G_VSCALE i32 1
+ ; RV32-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[VSCALE1]], [[VSCALE]]
+ ; RV32-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_(<vscale x 4 x s16>) = G_VSLIDEUP_VL [[DEF]], [[DEF]], [[VSCALE1]](s32), [[VMSET_VL]](<vscale x 4 x s1>), [[ADD]](s32), 0
+ ; RV32-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 4 x s16>) = G_BITCAST [[VSLIDEUP_VL]](<vscale x 4 x s16>)
+ ; RV32-NEXT: $v8 = COPY [[BITCAST]](<vscale x 4 x s16>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: insert_subvector_nxv2i16_nxv4i16
+ ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 4 x s16>) = G_IMPLICIT_DEF
+ ; RV64-NEXT: [[VMSET_VL:%[0-9]+]]:_(<vscale x 4 x s1>) = G_VMSET_VL $x0
+ ; RV64-NEXT: [[VSCALE:%[0-9]+]]:_(s64) = G_VSCALE i64 4
+ ; RV64-NEXT: [[VSCALE1:%[0-9]+]]:_(s64) = G_VSCALE i64 1
+ ; RV64-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[VSCALE1]], [[VSCALE]]
+ ; RV64-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_(<vscale x 4 x s16>) = G_VSLIDEUP_VL [[DEF]], [[DEF]], [[VSCALE1]](s64), [[VMSET_VL]](<vscale x 4 x s1>), [[ADD]](s64), 0
+ ; RV64-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 4 x s16>) = G_BITCAST [[VSLIDEUP_VL]](<vscale x 4 x s16>)
+ ; RV64-NEXT: $v8 = COPY [[BITCAST]](<vscale x 4 x s16>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 4 x s16>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 1 x s16>) = G_IMPLICIT_DEF
+ %2:_(<vscale x 4 x s16>) = G_INSERT_SUBVECTOR %0, %1(<vscale x 1 x s16>), 1
+ $v8 = COPY %2(<vscale x 4 x s16>)
+ PseudoRET implicit $v8
+
+...
+---
+name: insert_subvector_nxv4i32_nxv8i32
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; CHECK-LABEL: name: insert_subvector_nxv4i32_nxv8i32
+ ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 8 x s32>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[INSERT_SUBVECTOR:%[0-9]+]]:_(<vscale x 8 x s32>) = G_INSERT_SUBVECTOR [[DEF]], [[DEF]](<vscale x 8 x s32>), 0
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 8 x s32>) = G_BITCAST [[INSERT_SUBVECTOR]](<vscale x 8 x s32>)
+ ; CHECK-NEXT: $v8 = COPY [[BITCAST]](<vscale x 8 x s32>)
+ ; CHECK-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 8 x s32>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 4 x s32>) = G_IMPLICIT_DEF
+ %2:_(<vscale x 8 x s32>) = G_INSERT_SUBVECTOR %0, %1(<vscale x 4 x s32>), 8
+ $v8 = COPY %2(<vscale x 8 x s32>)
+ PseudoRET implicit $v8
+
+...
+---
+name: insert_subvector_nxv2i64_nxv8i64
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; CHECK-LABEL: name: insert_subvector_nxv2i64_nxv8i64
+ ; CHECK: [[DEF:%[0-9]+]]:_(<vscale x 8 x s64>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[INSERT_SUBVECTOR:%[0-9]+]]:_(<vscale x 8 x s64>) = G_INSERT_SUBVECTOR [[DEF]], [[DEF]](<vscale x 8 x s64>), 0
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 8 x s64>) = G_BITCAST [[INSERT_SUBVECTOR]](<vscale x 8 x s64>)
+ ; CHECK-NEXT: $v8 = COPY [[BITCAST]](<vscale x 8 x s64>)
+ ; CHECK-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 8 x s64>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 1 x s64>) = G_IMPLICIT_DEF
+ %2:_(<vscale x 8 x s64>) = G_INSERT_SUBVECTOR %0, %1(<vscale x 1 x s64>), 4
+ $v8 = COPY %2(<vscale x 8 x s64>)
+ PseudoRET implicit $v8
+
+...
>From 7850ed8b23c770b5a9fd714d5c4c54975da66df4 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Tue, 17 Sep 2024 11:02:26 -0700
Subject: [PATCH 6/7] fixup! clang format
---
llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
index d03a0939ab1af3..54aef303b92e0e 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
@@ -583,9 +583,9 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
getActionDefinitionsBuilder(G_INSERT_SUBVECTOR)
.legalIf(all(typeIsLegalBoolVec(0, BoolVecTys, ST),
- typeIsLegalBoolVec(1, BoolVecTys, ST)))
+ typeIsLegalBoolVec(1, BoolVecTys, ST)))
.legalIf(all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST),
- typeIsLegalIntOrFPVec(1, IntOrFPVecTys, ST)));
+ typeIsLegalIntOrFPVec(1, IntOrFPVecTys, ST)));
getLegacyLegalizerInfo().computeTables();
}
>From 415eda95a9bc6bd9c247be707dcaa6bc5a4b4451 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Tue, 17 Sep 2024 14:17:03 -0700
Subject: [PATCH 7/7] fixup! repond to comments
---
.../GISel/RISCVPostLegalizerLowering.cpp | 9 +++--
.../rvv/insert-subvector.mir | 40 +++++++------------
2 files changed, 20 insertions(+), 29 deletions(-)
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVPostLegalizerLowering.cpp b/llvm/lib/Target/RISCV/GISel/RISCVPostLegalizerLowering.cpp
index 955cc74629a348..dda7ba049b269d 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVPostLegalizerLowering.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVPostLegalizerLowering.cpp
@@ -70,9 +70,8 @@ static MachineInstrBuilder buildAllOnesMask(LLT VecTy, const SrcOp &VL,
/// Gets the two common "VL" operands: an all-ones mask and the vector length.
/// VecTy is a scalable vector type.
static std::pair<MachineInstrBuilder, Register>
-buildDefaultVLOps(const DstOp &Dst, MachineIRBuilder &MIB,
+buildDefaultVLOps(LLT VecTy, MachineIRBuilder &MIB,
MachineRegisterInfo &MRI) {
- LLT VecTy = Dst.getLLTTy(MRI);
assert(VecTy.isScalableVector() && "Expecting scalable container type");
Register VL(RISCV::X0);
MachineInstrBuilder Mask = buildAllOnesMask(VecTy, VL, MIB, MRI);
@@ -213,7 +212,11 @@ void lowerInsertSubvector(MachineInstr &MI, const RISCVSubtarget &STI) {
// We might have bitcast from a mask type: cast back to the original type if
// required.
- MIB.buildBitcast(Dst, Inserted);
+ if (TypeSize::isKnownLT(InterLitTy.getSizeInBits(),
+ MRI.getType(Dst).getSizeInBits()))
+ MIB.buildBitcast(Dst, Inserted);
+ else
+ Inserted->getOperand(0).setReg(Dst);
MI.eraseFromParent();
return;
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/postlegalizer-lowering/rvv/insert-subvector.mir b/llvm/test/CodeGen/RISCV/GlobalISel/postlegalizer-lowering/rvv/insert-subvector.mir
index dd8fb0cbae2c00..4ea9431a98ea3b 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/postlegalizer-lowering/rvv/insert-subvector.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/postlegalizer-lowering/rvv/insert-subvector.mir
@@ -19,10 +19,9 @@ body: |
; RV32-NEXT: [[VSCALE1:%[0-9]+]]:_(s32) = G_VSCALE i32 2
; RV32-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[VSCALE1]], [[VSCALE]]
; RV32-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_(<vscale x 4 x s8>) = G_VSLIDEUP_VL [[ZEXT]], [[ZEXT]], [[VSCALE1]](s32), [[VMSET_VL]](<vscale x 4 x s1>), [[ADD]](s32), 0
- ; RV32-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 4 x s8>) = G_BITCAST [[VSLIDEUP_VL]](<vscale x 4 x s8>)
; RV32-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0
; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[C]](s8)
- ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 4 x s1>) = G_ICMP intpred(ne), [[BITCAST]](<vscale x 4 x s8>), [[SPLAT_VECTOR]]
+ ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 4 x s1>) = G_ICMP intpred(ne), [[VSLIDEUP_VL]](<vscale x 4 x s8>), [[SPLAT_VECTOR]]
; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 4 x s1>)
; RV32-NEXT: PseudoRET implicit $v8
;
@@ -34,10 +33,9 @@ body: |
; RV64-NEXT: [[VSCALE1:%[0-9]+]]:_(s64) = G_VSCALE i64 2
; RV64-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[VSCALE1]], [[VSCALE]]
; RV64-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_(<vscale x 4 x s8>) = G_VSLIDEUP_VL [[ZEXT]], [[ZEXT]], [[VSCALE1]](s64), [[VMSET_VL]](<vscale x 4 x s1>), [[ADD]](s64), 0
- ; RV64-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 4 x s8>) = G_BITCAST [[VSLIDEUP_VL]](<vscale x 4 x s8>)
; RV64-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0
; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[C]](s8)
- ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 4 x s1>) = G_ICMP intpred(ne), [[BITCAST]](<vscale x 4 x s8>), [[SPLAT_VECTOR]]
+ ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 4 x s1>) = G_ICMP intpred(ne), [[VSLIDEUP_VL]](<vscale x 4 x s8>), [[SPLAT_VECTOR]]
; RV64-NEXT: $v8 = COPY [[ICMP]](<vscale x 4 x s1>)
; RV64-NEXT: PseudoRET implicit $v8
%0:_(<vscale x 4 x s1>) = G_IMPLICIT_DEF
@@ -60,9 +58,8 @@ body: |
; RV32-NEXT: [[VSCALE:%[0-9]+]]:_(s32) = G_VSCALE i32 1
; RV32-NEXT: [[VSCALE1:%[0-9]+]]:_(s32) = G_VSCALE i32 1
; RV32-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[VSCALE1]], [[VSCALE]]
- ; RV32-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_(<vscale x 1 x s8>) = G_VSLIDEUP_VL [[DEF]], [[BITCAST]], [[VSCALE1]](s32), [[VMSET_VL]](<vscale x 1 x s1>), [[ADD]](s32), 0
- ; RV32-NEXT: [[BITCAST1:%[0-9]+]]:_(<vscale x 8 x s1>) = G_BITCAST [[VSLIDEUP_VL]](<vscale x 1 x s8>)
- ; RV32-NEXT: $v8 = COPY [[BITCAST1]](<vscale x 8 x s1>)
+ ; RV32-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_(<vscale x 8 x s1>) = G_VSLIDEUP_VL [[DEF]], [[BITCAST]], [[VSCALE1]](s32), [[VMSET_VL]](<vscale x 1 x s1>), [[ADD]](s32), 0
+ ; RV32-NEXT: $v8 = COPY [[VSLIDEUP_VL]](<vscale x 8 x s1>)
; RV32-NEXT: PseudoRET implicit $v8
;
; RV64-LABEL: name: insert_subvector_nxv4i1_nxv8i1
@@ -72,9 +69,8 @@ body: |
; RV64-NEXT: [[VSCALE:%[0-9]+]]:_(s64) = G_VSCALE i64 1
; RV64-NEXT: [[VSCALE1:%[0-9]+]]:_(s64) = G_VSCALE i64 1
; RV64-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[VSCALE1]], [[VSCALE]]
- ; RV64-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_(<vscale x 1 x s8>) = G_VSLIDEUP_VL [[DEF]], [[BITCAST]], [[VSCALE1]](s64), [[VMSET_VL]](<vscale x 1 x s1>), [[ADD]](s64), 0
- ; RV64-NEXT: [[BITCAST1:%[0-9]+]]:_(<vscale x 8 x s1>) = G_BITCAST [[VSLIDEUP_VL]](<vscale x 1 x s8>)
- ; RV64-NEXT: $v8 = COPY [[BITCAST1]](<vscale x 8 x s1>)
+ ; RV64-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_(<vscale x 8 x s1>) = G_VSLIDEUP_VL [[DEF]], [[BITCAST]], [[VSCALE1]](s64), [[VMSET_VL]](<vscale x 1 x s1>), [[ADD]](s64), 0
+ ; RV64-NEXT: $v8 = COPY [[VSLIDEUP_VL]](<vscale x 8 x s1>)
; RV64-NEXT: PseudoRET implicit $v8
%0:_(<vscale x 8 x s1>) = G_IMPLICIT_DEF
%1:_(<vscale x 2 x s1>) = G_IMPLICIT_DEF
@@ -96,9 +92,8 @@ body: |
; RV32-NEXT: [[VSCALE:%[0-9]+]]:_(s32) = G_VSCALE i32 8
; RV32-NEXT: [[VSCALE1:%[0-9]+]]:_(s32) = G_VSCALE i32 4
; RV32-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[VSCALE1]], [[VSCALE]]
- ; RV32-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_(<vscale x 8 x s8>) = G_VSLIDEUP_VL [[DEF]], [[BITCAST]], [[VSCALE1]](s32), [[VMSET_VL]](<vscale x 8 x s1>), [[ADD]](s32), 0
- ; RV32-NEXT: [[BITCAST1:%[0-9]+]]:_(<vscale x 64 x s1>) = G_BITCAST [[VSLIDEUP_VL]](<vscale x 8 x s8>)
- ; RV32-NEXT: $v8 = COPY [[BITCAST1]](<vscale x 64 x s1>)
+ ; RV32-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_(<vscale x 64 x s1>) = G_VSLIDEUP_VL [[DEF]], [[BITCAST]], [[VSCALE1]](s32), [[VMSET_VL]](<vscale x 8 x s1>), [[ADD]](s32), 0
+ ; RV32-NEXT: $v8 = COPY [[VSLIDEUP_VL]](<vscale x 64 x s1>)
; RV32-NEXT: PseudoRET implicit $v8
;
; RV64-LABEL: name: insert_subvector_nxv32i1_nxv64i1
@@ -108,9 +103,8 @@ body: |
; RV64-NEXT: [[VSCALE:%[0-9]+]]:_(s64) = G_VSCALE i64 8
; RV64-NEXT: [[VSCALE1:%[0-9]+]]:_(s64) = G_VSCALE i64 4
; RV64-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[VSCALE1]], [[VSCALE]]
- ; RV64-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_(<vscale x 8 x s8>) = G_VSLIDEUP_VL [[DEF]], [[BITCAST]], [[VSCALE1]](s64), [[VMSET_VL]](<vscale x 8 x s1>), [[ADD]](s64), 0
- ; RV64-NEXT: [[BITCAST1:%[0-9]+]]:_(<vscale x 64 x s1>) = G_BITCAST [[VSLIDEUP_VL]](<vscale x 8 x s8>)
- ; RV64-NEXT: $v8 = COPY [[BITCAST1]](<vscale x 64 x s1>)
+ ; RV64-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_(<vscale x 64 x s1>) = G_VSLIDEUP_VL [[DEF]], [[BITCAST]], [[VSCALE1]](s64), [[VMSET_VL]](<vscale x 8 x s1>), [[ADD]](s64), 0
+ ; RV64-NEXT: $v8 = COPY [[VSLIDEUP_VL]](<vscale x 64 x s1>)
; RV64-NEXT: PseudoRET implicit $v8
%0:_(<vscale x 64 x s1>) = G_IMPLICIT_DEF
%1:_(<vscale x 32 x s1>) = G_IMPLICIT_DEF
@@ -271,8 +265,7 @@ body: |
; RV32-NEXT: [[VSCALE1:%[0-9]+]]:_(s32) = G_VSCALE i32 1
; RV32-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[VSCALE1]], [[VSCALE]]
; RV32-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_(<vscale x 2 x s8>) = G_VSLIDEUP_VL [[DEF]], [[DEF]], [[VSCALE1]](s32), [[VMSET_VL]](<vscale x 2 x s1>), [[ADD]](s32), 0
- ; RV32-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 2 x s8>) = G_BITCAST [[VSLIDEUP_VL]](<vscale x 2 x s8>)
- ; RV32-NEXT: $v8 = COPY [[BITCAST]](<vscale x 2 x s8>)
+ ; RV32-NEXT: $v8 = COPY [[VSLIDEUP_VL]](<vscale x 2 x s8>)
; RV32-NEXT: PseudoRET implicit $v8
;
; RV64-LABEL: name: insert_subvector_nxv1i8_nxv2i8
@@ -282,8 +275,7 @@ body: |
; RV64-NEXT: [[VSCALE1:%[0-9]+]]:_(s64) = G_VSCALE i64 1
; RV64-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[VSCALE1]], [[VSCALE]]
; RV64-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_(<vscale x 2 x s8>) = G_VSLIDEUP_VL [[DEF]], [[DEF]], [[VSCALE1]](s64), [[VMSET_VL]](<vscale x 2 x s1>), [[ADD]](s64), 0
- ; RV64-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 2 x s8>) = G_BITCAST [[VSLIDEUP_VL]](<vscale x 2 x s8>)
- ; RV64-NEXT: $v8 = COPY [[BITCAST]](<vscale x 2 x s8>)
+ ; RV64-NEXT: $v8 = COPY [[VSLIDEUP_VL]](<vscale x 2 x s8>)
; RV64-NEXT: PseudoRET implicit $v8
%0:_(<vscale x 2 x s8>) = G_IMPLICIT_DEF
%1:_(<vscale x 1 x s8>) = G_IMPLICIT_DEF
@@ -292,8 +284,6 @@ body: |
PseudoRET implicit $v8
...
-
-# i1-element vectors with zero index
---
name: insert_subvector_nxv2i16_nxv4i16
legalized: true
@@ -307,8 +297,7 @@ body: |
; RV32-NEXT: [[VSCALE1:%[0-9]+]]:_(s32) = G_VSCALE i32 1
; RV32-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[VSCALE1]], [[VSCALE]]
; RV32-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_(<vscale x 4 x s16>) = G_VSLIDEUP_VL [[DEF]], [[DEF]], [[VSCALE1]](s32), [[VMSET_VL]](<vscale x 4 x s1>), [[ADD]](s32), 0
- ; RV32-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 4 x s16>) = G_BITCAST [[VSLIDEUP_VL]](<vscale x 4 x s16>)
- ; RV32-NEXT: $v8 = COPY [[BITCAST]](<vscale x 4 x s16>)
+ ; RV32-NEXT: $v8 = COPY [[VSLIDEUP_VL]](<vscale x 4 x s16>)
; RV32-NEXT: PseudoRET implicit $v8
;
; RV64-LABEL: name: insert_subvector_nxv2i16_nxv4i16
@@ -318,8 +307,7 @@ body: |
; RV64-NEXT: [[VSCALE1:%[0-9]+]]:_(s64) = G_VSCALE i64 1
; RV64-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[VSCALE1]], [[VSCALE]]
; RV64-NEXT: [[VSLIDEUP_VL:%[0-9]+]]:_(<vscale x 4 x s16>) = G_VSLIDEUP_VL [[DEF]], [[DEF]], [[VSCALE1]](s64), [[VMSET_VL]](<vscale x 4 x s1>), [[ADD]](s64), 0
- ; RV64-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 4 x s16>) = G_BITCAST [[VSLIDEUP_VL]](<vscale x 4 x s16>)
- ; RV64-NEXT: $v8 = COPY [[BITCAST]](<vscale x 4 x s16>)
+ ; RV64-NEXT: $v8 = COPY [[VSLIDEUP_VL]](<vscale x 4 x s16>)
; RV64-NEXT: PseudoRET implicit $v8
%0:_(<vscale x 4 x s16>) = G_IMPLICIT_DEF
%1:_(<vscale x 1 x s16>) = G_IMPLICIT_DEF
More information about the llvm-commits
mailing list