[llvm] [RISCV][GISEL] Legalize G_INSERT_VECTOR_ELT (PR #108250)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 11 09:56:54 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-globalisel
Author: Michael Maitland (michaelmaitland)
<details>
<summary>Changes</summary>
This is stacked on #<!-- -->108220. This patch justifies the reason to support G_INSERT and G_EXTRACT, at least for the meantime until we can deprecate G_INSERT and G_EXTRACT.
---
Patch is 84.81 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/108250.diff
8 Files Affected:
- (modified) llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp (+5-3)
- (modified) llvm/lib/CodeGen/MachineVerifier.cpp (+9-7)
- (modified) llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp (+510)
- (modified) llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h (+3)
- (modified) llvm/lib/Target/RISCV/RISCVInstrGISel.td (+76)
- (added) llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-extract.mir (+195)
- (added) llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-insertelement-knownvlen.mir (+65)
- (added) llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-insertelement-rv64.mir (+856)
``````````diff
diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index 925a1c7cf6aacc..96ca99a3871d8e 100644
--- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -621,7 +621,8 @@ MachineInstrBuilder MachineIRBuilder::buildExtract(const DstOp &Dst,
#ifndef NDEBUG
assert(SrcTy.isValid() && "invalid operand type");
assert(DstTy.isValid() && "invalid operand type");
- assert(Index + DstTy.getSizeInBits() <= SrcTy.getSizeInBits() &&
+ assert(TypeSize::isKnownLE(DstTy.getSizeInBits().getWithIncrement(Index),
+ SrcTy.getSizeInBits()) &&
"extracting off end of register");
#endif
@@ -797,8 +798,9 @@ MachineInstrBuilder MachineIRBuilder::buildInsert(const DstOp &Res,
const SrcOp &Src,
const SrcOp &Op,
unsigned Index) {
- assert(Index + Op.getLLTTy(*getMRI()).getSizeInBits() <=
- Res.getLLTTy(*getMRI()).getSizeInBits() &&
+ assert(TypeSize::isKnownLE(
+ Op.getLLTTy(*getMRI()).getSizeInBits().getWithIncrement(Index),
+ Res.getLLTTy(*getMRI()).getSizeInBits()) &&
"insertion past the end of a register");
if (Res.getLLTTy(*getMRI()).getSizeInBits() ==
diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp
index 759201ed9dadc7..606929bb594e93 100644
--- a/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -1587,12 +1587,13 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
break;
}
- unsigned DstSize = MRI->getType(MI->getOperand(0).getReg()).getSizeInBits();
- unsigned SrcSize = MRI->getType(SrcOp.getReg()).getSizeInBits();
+ TypeSize DstSize = MRI->getType(MI->getOperand(0).getReg()).getSizeInBits();
+ TypeSize SrcSize = MRI->getType(SrcOp.getReg()).getSizeInBits();
if (SrcSize == DstSize)
report("extract source must be larger than result", MI);
- if (DstSize + OffsetOp.getImm() > SrcSize)
+ if (DstSize.getKnownMinValue() + OffsetOp.getImm() >
+ SrcSize.getKnownMinValue())
report("extract reads past end of register", MI);
break;
}
@@ -1609,13 +1610,14 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
break;
}
- unsigned DstSize = MRI->getType(MI->getOperand(0).getReg()).getSizeInBits();
- unsigned SrcSize = MRI->getType(SrcOp.getReg()).getSizeInBits();
+ TypeSize DstSize = MRI->getType(MI->getOperand(0).getReg()).getSizeInBits();
+ TypeSize SrcSize = MRI->getType(SrcOp.getReg()).getSizeInBits();
- if (DstSize <= SrcSize)
+ if (TypeSize::isKnownLE(DstSize, SrcSize))
report("inserted size must be smaller than total register", MI);
- if (SrcSize + OffsetOp.getImm() > DstSize)
+ if (SrcSize.getKnownMinValue() + OffsetOp.getImm() >
+ DstSize.getKnownMinValue())
report("insert writes past end of register", MI);
break;
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
index 64e8ee76e83915..1d824008089bfb 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
@@ -580,6 +580,27 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
SplatActions.clampScalar(1, sXLen, sXLen);
+ getActionDefinitionsBuilder(G_INSERT)
+ .customIf(all(typeIsLegalBoolVec(0, BoolVecTys, ST),
+ typeIsLegalBoolVec(1, BoolVecTys, ST)))
+ .customIf(all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST),
+ typeIsLegalIntOrFPVec(1, IntOrFPVecTys, ST)));
+
+ getActionDefinitionsBuilder(G_EXTRACT)
+ .customIf(typeIsLegalBoolVec(0, BoolVecTys, ST))
+ .customIf(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST));
+
+ // TODO: i64-element vectors on RV32 may be legalized in certain cases.
+ getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)
+ .clampScalar(2, sXLen, sXLen) // clamp Index operand to SXLen
+ .customIf(all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST),
+ typeInSet(1, {s8, s16, s32, s64}), typeInSet(2, {sXLen})));
+
+ getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
+ .clampScalar(1, sXLen, sXLen) // clamp Index operand to SXLen
+ .customIf(all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST),
+ typeInSet(1, {sXLen})));
+
getLegacyLegalizerInfo().computeTables();
}
@@ -802,6 +823,39 @@ bool RISCVLegalizerInfo::legalizeLoadStore(MachineInstr &MI,
return true;
}
+static LLT getLMUL1Ty(LLT VecTy) {
+ assert(VecTy.getElementType().getSizeInBits() <= 64 &&
+ "Unexpected vector LLT");
+ return LLT::scalable_vector(RISCV::RVVBitsPerBlock /
+ VecTy.getElementType().getSizeInBits(),
+ VecTy.getElementType());
+}
+
+/// Given a scalable vector type and an index into it, returns the type for the
+/// smallest subvector that the index fits in. This can be used to reduce LMUL
+/// for operations like vslidedown.
+///
+/// E.g. With Zvl128b, index 3 in a nxv4i32 fits within the first nxv2i32.
+static std::optional<LLT>
+getSmallestLLTForIndex(LLT VecTy, unsigned MaxIdx,
+ const RISCVSubtarget &Subtarget) {
+ assert(VecTy.isScalableVector());
+ const unsigned EltSize = VecTy.getScalarSizeInBits();
+ const unsigned VectorBitsMin = Subtarget.getRealMinVLen();
+ const unsigned MinVLMAX = VectorBitsMin / EltSize;
+ LLT SmallerTy;
+ if (MaxIdx < MinVLMAX)
+ SmallerTy = getLMUL1Ty(VecTy);
+ else if (MaxIdx < MinVLMAX * 2)
+ SmallerTy = getLMUL1Ty(VecTy).multiplyElements(2);
+ else if (MaxIdx < MinVLMAX * 4)
+ SmallerTy = getLMUL1Ty(VecTy).multiplyElements(4);
+ if (!SmallerTy.isValid() ||
+ !TypeSize::isKnownGT(VecTy.getSizeInBits(), SmallerTy.getSizeInBits()))
+ return std::nullopt;
+ return SmallerTy;
+}
+
/// Return the type of the mask type suitable for masking the provided
/// vector type. This is simply an i1 element type vector of the same
/// (possibly scalable) length.
@@ -858,6 +912,83 @@ buildSplatSplitS64WithVL(const DstOp &Dst, const SrcOp &Passthru,
Unmerge.getReg(1), VL, MIB, MRI);
}
+static MachineInstrBuilder
+buildScalarSplat(const DstOp &Dst, const SrcOp &Passthru, Register Scalar,
+ Register VL, MachineIRBuilder &MIB, MachineRegisterInfo &MRI,
+ const RISCVSubtarget &Subtarget) {
+ const LLT XLenTy(Subtarget.getXLenVT());
+
+ // TODO: Simplest case is that the operand needs to be promoted to XLenTy.
+ // Currently the only call to buildScalarSplat occurs when
+ // isKnownGT(ScalarTySize, XLenTySize) so we don't need to hanle this case
+ // yet.
+
+ LLT ScalarTy = MRI.getType(Scalar);
+
+ assert(XLenTy == LLT::scalar(32) && ScalarTy == LLT::scalar(64) &&
+ "Unexpected scalar for splat lowering!");
+
+ if (auto C = getIConstantVRegSExtVal(VL, MRI);
+ *C == 1 && isNullOrNullSplat(*MRI.getVRegDef(Scalar), MRI))
+ return MIB.buildInstr(RISCV::G_SCALAR_MOVE_VL, {Dst},
+ {Passthru, MIB.buildConstant(XLenTy, 0), VL});
+
+ // Otherwise use the more complicated splatting algorithm.
+ return buildSplatSplitS64WithVL(Dst, Passthru, Scalar, VL, MIB, MRI);
+}
+
+// This function lowers an insert of a scalar operand Scalar into lane
+// 0 of the vector regardless of the value of VL. The contents of the
+// remaining lanes of the result vector are unspecified. VL is assumed
+// to be non-zero.
+static MachineInstrBuilder buildScalarInsert(const DstOp &Dst, Register Scalar,
+ const SrcOp &VL,
+ MachineIRBuilder &MIB,
+ MachineRegisterInfo &MRI,
+ const RISCVSubtarget &Subtarget) {
+ LLT VecTy = Dst.getLLTTy(MRI);
+ assert(VecTy.isScalableVector() && "Expect Dst is scalable vector type.");
+
+ const LLT XLenTy(Subtarget.getXLenVT());
+ auto Undef = MIB.buildUndef(VecTy);
+
+ // Dst = G_INSERT_VECTOR_ELT Undef (G_EXTRACT_VECTOR_ELT V 0) N -> Dst = V
+ MachineInstr *ScalarMI = MRI.getVRegDef(Scalar);
+ if (ScalarMI->getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
+ isNullOrNullSplat(*MRI.getVRegDef(ScalarMI->getOperand(2).getReg()),
+ MRI)) {
+ Register V = ScalarMI->getOperand(1).getReg();
+ LLT VTy = MRI.getType(V);
+ // If V is not big enough, merge it with Undef.
+ if (TypeSize::isKnownLE(VTy.getSizeInBits(), VecTy.getSizeInBits()))
+ return MIB.buildInsert(Dst, Undef, Scalar, 0);
+ // V is as big or bigger then VecTy. Use an extract to get the correct Dst
+ // type.
+ return MIB.buildExtract(Dst, V, 0);
+ }
+
+ // Avoid the tricky legalization cases by falling back to using the
+ // splat code which already handles it gracefully.
+ LLT ScalarTy = MRI.getType(Scalar);
+ if (TypeSize::isKnownGT(ScalarTy.getSizeInBits(), XLenTy.getSizeInBits()))
+ return buildScalarSplat(Dst, Undef, Scalar,
+ MIB.buildConstant(XLenTy, 1).getReg(0), MIB, MRI,
+ Subtarget);
+
+ Register ExtScalar = Scalar;
+ if (TypeSize::isKnownLT(ScalarTy.getSizeInBits(), XLenTy.getSizeInBits())) {
+ // If the operand is a constant, sign extend to increase our chances
+ // of being able to use a .vi instruction. ANY_EXTEND would become a
+ // a zero extend and the simm5 check in isel would fail.
+ // FIXME: Should we ignore the upper bits in isel instead?
+ unsigned ExtOpc = isConstantOrConstantVector(*MRI.getVRegDef(Scalar), MRI)
+ ? TargetOpcode::G_SEXT
+ : TargetOpcode::G_ANYEXT;
+ ExtScalar = MIB.buildInstr(ExtOpc, {XLenTy}, {Scalar}).getReg(0);
+ }
+ return MIB.buildInstr(RISCV::G_SCALAR_MOVE_VL, {Dst}, {Undef, ExtScalar, VL});
+}
+
// Lower splats of s1 types to G_ICMP. For each mask vector type, we have a
// legal equivalently-sized i8 type, so we can use that as a go-between.
// Splats of s1 types that have constant value can be legalized as VMSET_VL or
@@ -914,6 +1045,379 @@ bool RISCVLegalizerInfo::legalizeSplatVector(MachineInstr &MI,
return true;
}
+/// Insert into the first position of a vector, and that vector is slid up to
+/// the insert index. By limiting the active vector length to index+1 and
+/// merging with the original vector (with an undisturbed tail policy for
+/// elements >= VL), we achieve the desired result of leaving all elements
+/// untouched except the one at VL-1, which is replaced with the desired value.
+bool RISCVLegalizerInfo::legalizeInsertVectorElt(MachineInstr &MI,
+ MachineIRBuilder &MIB) const {
+ assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
+
+ MachineRegisterInfo &MRI = *MIB.getMRI();
+
+ Register Dst = MI.getOperand(0).getReg();
+ Register SrcVec = MI.getOperand(1).getReg();
+ Register Elt = MI.getOperand(2).getReg();
+ Register Idx = MI.getOperand(3).getReg();
+
+ LLT VecTy = MRI.getType(Dst);
+ const LLT EltTy = VecTy.getElementType();
+ const LLT XLenTy(STI.getXLenVT());
+
+ // FIXME: SelectionDAG promotes a s1 vector type to s8 vector type at this
+ // point. GISel should take care of this during legalization. For now, the
+ // legalizer will return not legal.
+
+ // If we know the index we're going to insert at, we can shrink Vec so that
+ // we're performing the scalar inserts and slideup on a smaller LMUL.
+
+ // If we can shrink the vector type, then a G_EXTRACT will be used to the the
+ // smaller vector. The insertion will occur on that smaller vector, and then
+ // a G_INSERT will be used to put the result back into the larger, original
+ // vector.
+ unsigned AlignedIdx = 0;
+ auto InsertVec = SrcVec;
+ auto NewIdx = Idx;
+ if (auto ConstIdxOpt = getIConstantVRegVal(Idx, MRI)) {
+ const unsigned OrigIdx = ConstIdxOpt->getZExtValue();
+ // Do we know an upper bound on LMUL?
+ if (auto ShrunkTy = getSmallestLLTForIndex(VecTy, OrigIdx, STI))
+ VecTy = *ShrunkTy;
+
+ // If we're compiling for an exact VLEN value, we can always perform
+ // the insert in m1 as we can determine the register corresponding to
+ // the index in the register group.
+ const unsigned MinVLen = STI.getRealMinVLen();
+ const unsigned MaxVLen = STI.getRealMaxVLen();
+ const LLT M1Ty = getLMUL1Ty(VecTy);
+ if (MinVLen == MaxVLen &&
+ VecTy.getSizeInBits().getKnownMinValue() > MinVLen) {
+ unsigned ElemsPerVReg = MinVLen / EltTy.getSizeInBits().getFixedValue();
+ unsigned RemIdx = OrigIdx % ElemsPerVReg;
+ unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
+ unsigned ExtractIdx =
+ SubRegIdx * M1Ty.getElementCount().getKnownMinValue();
+ AlignedIdx = ExtractIdx;
+ NewIdx = MIB.buildConstant(MRI.getType(Idx), RemIdx).getReg(0);
+ VecTy = M1Ty;
+ }
+
+ if (AlignedIdx)
+ InsertVec = MIB.buildExtract(VecTy, SrcVec, AlignedIdx).getReg(0);
+ }
+
+ // TODO: i64-element vectors on RV32 can be lowered without scalar
+ // legalization if the most-significant 32 bits of the value are not affected
+ // by the sign-extension of the lower 32 bits. The Legalizer does not allow
+ // i64-elements through at the moment, so there is no sense in supporting
+ // the selection for this case.
+
+ // Insert into index zero
+
+ auto [Mask, VL] = buildDefaultVLOps(VecTy, MIB, MRI);
+ if (isNullOrNullSplat(*MRI.getVRegDef(NewIdx), MRI)) {
+ // TODO: Make sure we have fprb tests for regbankselect and legalization
+ // too.
+ auto Move =
+ MIB.buildInstr(RISCV::G_SCALAR_MOVE_VL, {VecTy}, {InsertVec, Elt, VL});
+ if (AlignedIdx)
+ MIB.buildInsert(Dst, InsertVec, Move, AlignedIdx);
+
+ MI.eraseFromParent();
+ return true;
+ }
+
+ // Insert into non-constant or non-zero-constant index
+
+ auto ValInVec = buildScalarInsert(VecTy, Elt, VL, MIB, MRI, STI);
+ // Now that the value is in lane 0 of vector, slide it into position.
+ auto InsertVL = MIB.buildAdd(XLenTy, NewIdx, MIB.buildConstant(XLenTy, 1));
+ // TODO: SelectionDAG uses tail agnostic policy if Idx is the last index of
+ // Vec. It can do this because it lets in fixed vectors as a legal type. GISel
+ // does not. Can we find a way to use TA if Vec was fixed vector before
+ // legalization?
+ uint64_t Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED;
+ auto Slideup =
+ MIB.buildInstr(RISCV::G_VSLIDEUP_VL, {VecTy},
+ {InsertVec, ValInVec, NewIdx, Mask, InsertVL, Policy});
+ // If we used a smaller vector to do the insertion, put the smaller vector
+ // result back into the original vector.
+ if (AlignedIdx)
+ MIB.buildInsert(Dst, InsertVec, Slideup, AlignedIdx);
+
+ MI.eraseFromParent();
+ return true;
+}
+
+static MachineInstrBuilder buildVLMax(LLT VecTy, MachineIRBuilder &MIB) {
+ assert(VecTy.isScalableVector() && "Expected scalable vector");
+ // TODO: Figure out how to represent VLMAX as a MI
+ llvm_unreachable("Unimplemented");
+}
+
+bool RISCVLegalizerInfo::legalizeInsert(MachineInstr &MI,
+ MachineIRBuilder &MIB) const {
+ assert(MI.getOpcode() == TargetOpcode::G_INSERT);
+
+ MachineRegisterInfo &MRI = *MIB.getMRI();
+
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src1 = MI.getOperand(1).getReg();
+ Register Src2 = MI.getOperand(2).getReg();
+ uint64_t Idx = MI.getOperand(3).getImm();
+
+ // Only support vectors using custom legalization
+ LLT BigTy = MRI.getType(Dst);
+ if (BigTy.isScalar())
+ return false;
+
+ LLT LitTy = MRI.getType(Src2);
+ Register BigVec = Src1;
+ Register LitVec = Src2;
+
+ // We don't have the ability to slide mask vectors up indexed by their i1
+ // elements; the smallest we can do is i8. Often we are able to bitcast to
+ // equivalent i8 vectors. Otherwise, we can must zeroextend to equivalent i8
+ // vectors and truncate down after the insert.
+ if (LitTy.getElementType() == LLT::scalar(1) &&
+ (Idx != 0 ||
+ MRI.getVRegDef(BigVec)->getOpcode() != TargetOpcode::G_IMPLICIT_DEF)) {
+ auto BigTyMinElts = BigTy.getElementCount().getKnownMinValue();
+ auto LitTyMinElts = LitTy.getElementCount().getKnownMinValue();
+ if (BigTyMinElts >= 8 && LitTyMinElts >= 8) {
+ assert(Idx % 8 == 0 && "Invalid index");
+ assert(BigTyMinElts % 8 == 0 && LitTyMinElts % 8 == 0 &&
+ "Unexpected mask vector lowering");
+ Idx /= 8;
+ BigTy = LLT::vector(BigTy.getElementCount().divideCoefficientBy(8), 8);
+ LitTy = LLT::vector(LitTy.getElementCount().divideCoefficientBy(8), 8);
+ BigVec = MIB.buildBitcast(BigTy, BigVec).getReg(0);
+ LitVec = MIB.buildBitcast(LitTy, LitVec).getReg(0);
+ } else {
+ // We can't slide this mask vector up indexed by its i1 elements.
+ // This poses a problem when we wish to insert a scalable vector which
+ // can't be re-expressed as a larger type. Just choose the slow path and
+ // extend to a larger type, then truncate back down.
+ LLT ExtBigTy = BigTy.changeElementType(LLT::scalar(8));
+ LLT ExtLitTy = LitTy.changeElementType(LLT::scalar(8));
+ auto BigZExt = MIB.buildZExt(ExtBigTy, BigVec);
+ auto LitZExt = MIB.buildZExt(ExtLitTy, LitVec);
+ auto Insert = MIB.buildInsert(ExtBigTy, BigZExt, LitZExt, Idx);
+ auto SplatZero = MIB.buildConstant(ExtBigTy, 0);
+ MIB.buildICmp(CmpInst::Predicate::ICMP_NE, Dst, Insert, SplatZero);
+ MI.eraseFromParent();
+ return true;
+ }
+ }
+
+ const RISCVRegisterInfo *TRI = STI.getRegisterInfo();
+ MVT LitTyMVT = getMVTForLLT(LitTy);
+ unsigned SubRegIdx, RemIdx;
+ std::tie(SubRegIdx, RemIdx) =
+ RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
+ getMVTForLLT(BigTy), LitTyMVT, Idx, TRI);
+
+ RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(getMVTForLLT(LitTy));
+ bool IsSubVecPartReg = SubVecLMUL == RISCVII::VLMUL::LMUL_F2 ||
+ SubVecLMUL == RISCVII::VLMUL::LMUL_F4 ||
+ SubVecLMUL == RISCVII::VLMUL::LMUL_F8;
+
+ // If the Idx has been completely eliminated and this subvector's size is a
+ // vector register or a multiple thereof, or the surrounding elements are
+ // undef, then this is a subvector insert which naturally aligns to a vector
+ // register. These can easily be handled using subregister manipulation.
+ if (RemIdx == 0 && (!IsSubVecPartReg || MRI.getVRegDef(Src1)->getOpcode() ==
+ TargetOpcode::G_IMPLICIT_DEF))
+ return true;
+
+ // If the subvector is smaller than a vector register, then the insertion
+ // must preserve the undisturbed elements of the register. We do this by
+ // lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type
+ // (which resolves to a subregister copy), performing a VSLIDEUP to place the
+ // subvector within the vector register, and an INSERT_SUBVECTOR of that
+ // LMUL=1 type back into the larger vector (resolving to another subregister
+ // operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type
+ // to avoid allocating a large register group to hold our subvector.
+
+ // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
+ // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
+ // (in our case undisturbed). This means we can set up a subvector insertion
+ // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
+ // size of the subvector.
+ const LLT XLenTy(STI.getXLenVT());
+ LLT InterLitTy = BigTy;
+ Register AlignedExtract = Src1;
+ unsigned AlignedIdx = Idx - RemIdx;
+ if (TypeSize::isKnownGT(BigTy.getSizeInBits(),
+ getLMUL1Ty(BigTy).getSizeInBits())) {
+ InterLitTy = getLMUL1Ty(BigTy);
+ // Extract a subvector equal to the nearest full vector register type. This
+ // should resolve to a G_EXTRACT on a subreg.
+ AlignedExtract = MIB.buildExtract(InterLitTy, BigVec, Al...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/108250
More information about the llvm-commits
mailing list