[llvm] [RISCV][GISEL] Legalize G_INSERT_SUBVECTOR (PR #108859)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 10 10:10:32 PDT 2024
================
@@ -1054,6 +1058,134 @@ bool RISCVLegalizerInfo::legalizeExtractSubvector(MachineInstr &MI,
return true;
}
+bool RISCVLegalizerInfo::legalizeInsertSubvector(MachineInstr &MI,
+ LegalizerHelper &Helper,
+ MachineIRBuilder &MIB) const {
+ GInsertSubvector &IS = cast<GInsertSubvector>(MI);
+
+ MachineRegisterInfo &MRI = *MIB.getMRI();
+
+ Register Dst = IS.getReg(0);
+ Register BigVec = IS.getBigVec();
+ Register LitVec = IS.getSubVec();
+ uint64_t Idx = IS.getIndexImm();
+
+ LLT BigTy = MRI.getType(BigVec);
+ LLT LitTy = MRI.getType(LitVec);
+
+ if (Idx == 0 ||
+ MRI.getVRegDef(BigVec)->getOpcode() == TargetOpcode::G_IMPLICIT_DEF)
+ return true;
+
+ // We don't have the ability to slide mask vectors up indexed by their i1
+ // elements; the smallest we can do is i8. Often we are able to bitcast to
+ // equivalent i8 vectors. Otherwise, we can must zeroextend to equivalent i8
+ // vectors and truncate down after the insert.
+ if (LitTy.getElementType() == LLT::scalar(1)) {
+ auto BigTyMinElts = BigTy.getElementCount().getKnownMinValue();
+ auto LitTyMinElts = LitTy.getElementCount().getKnownMinValue();
+ if (BigTyMinElts >= 8 && LitTyMinElts >= 8)
+ return Helper.bitcast(
+ IS, 0,
+ LLT::vector(BigTy.getElementCount().divideCoefficientBy(8), 8));
+
+ // We can't slide this mask vector up indexed by its i1 elements.
+ // This poses a problem when we wish to insert a scalable vector which
+ // can't be re-expressed as a larger type. Just choose the slow path and
+ // extend to a larger type, then truncate back down.
+ LLT ExtBigTy = BigTy.changeElementType(LLT::scalar(8));
+ return Helper.widenScalar(IS, 0, ExtBigTy);
+ }
+
+ const RISCVRegisterInfo *TRI = STI.getRegisterInfo();
+ unsigned SubRegIdx, RemIdx;
+ std::tie(SubRegIdx, RemIdx) =
+ RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
+ getMVTForLLT(BigTy), getMVTForLLT(LitTy), Idx, TRI);
+
+ TypeSize VecRegSize = TypeSize::getScalable(RISCV::RVVBitsPerBlock);
+ assert(isPowerOf2_64(
+ STI.expandVScale(LitTy.getSizeInBits()).getKnownMinValue()));
+ bool ExactlyVecRegSized =
+ STI.expandVScale(LitTy.getSizeInBits())
+ .isKnownMultipleOf(STI.expandVScale(VecRegSize));
+
+ // If the Idx has been completely eliminated and this subvector's size is a
+ // vector register or a multiple thereof, or the surrounding elements are
+ // undef, then this is a subvector insert which naturally aligns to a vector
+ // register. These can easily be handled using subregister manipulation.
+ if (RemIdx == 0 &&
+ (ExactlyVecRegSized ||
+ MRI.getVRegDef(BigVec)->getOpcode() == TargetOpcode::G_IMPLICIT_DEF))
+ return true;
+
+ // If the subvector is smaller than a vector register, then the insertion
+ // must preserve the undisturbed elements of the register. We do this by
+ // lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type
+ // (which resolves to a subregister copy), performing a VSLIDEUP to place the
+ // subvector within the vector register, and an INSERT_SUBVECTOR of that
+ // LMUL=1 type back into the larger vector (resolving to another subregister
+ // operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type
+ // to avoid allocating a large register group to hold our subvector.
+
+ // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
+ // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
+ // (in our case undisturbed). This means we can set up a subvector insertion
+ // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
+ // size of the subvector.
+ const LLT XLenTy(STI.getXLenVT());
+ LLT InterLitTy = BigTy;
+ Register AlignedExtract = BigVec;
+ unsigned AlignedIdx = Idx - RemIdx;
+ if (TypeSize::isKnownGT(BigTy.getSizeInBits(),
+ getLMUL1Ty(BigTy).getSizeInBits())) {
+ InterLitTy = getLMUL1Ty(BigTy);
+ // Extract a subvector equal to the nearest full vector register type. This
+ // should resolve to a G_EXTRACT on a subreg.
+ AlignedExtract =
+ MIB.buildExtractSubvector(InterLitTy, BigVec, AlignedIdx).getReg(0);
+ }
+
+ auto Insert = MIB.buildInsertSubvector(InterLitTy, MIB.buildUndef(InterLitTy),
+ LitVec, 0);
+
+ auto [Mask, _] = buildDefaultVLOps(BigTy, MIB, MRI);
+ auto VL = MIB.buildVScale(XLenTy, LitTy.getElementCount().getKnownMinValue());
+
+ // Use tail agnostic policy if we're inserting over InterLitTy's tail.
+ ElementCount EndIndex =
+ ElementCount::getScalable(RemIdx) + LitTy.getElementCount();
+ uint64_t Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED;
+ if (STI.expandVScale(EndIndex) ==
+ STI.expandVScale(InterLitTy.getElementCount()))
+ Policy = RISCVII::TAIL_AGNOSTIC;
+
+ // If we're inserting into the lowest elements, use a tail undisturbed
+ // vmv.v.v.
+ MachineInstrBuilder Inserted;
+ if (RemIdx == 0) {
+ Inserted = MIB.buildInstr(RISCV::G_VMV_V_V_VL, {InterLitTy},
+ {AlignedExtract, Insert, VL});
+ } else {
+ auto SlideupAmt = MIB.buildVScale(XLenTy, RemIdx);
+ // Construct the vector length corresponding to RemIdx + length(LitTy).
+ VL = MIB.buildAdd(XLenTy, SlideupAmt, VL);
+ Inserted =
+ MIB.buildInstr(RISCV::G_VSLIDEUP_VL, {InterLitTy},
+ {AlignedExtract, Insert, SlideupAmt, Mask, VL, Policy});
+ }
+
+ // If required, insert this subvector back into the correct vector register.
+ // This should resolve to an INSERT_SUBREG instruction.
+ if (TypeSize::isKnownGT(BigTy.getSizeInBits(), InterLitTy.getSizeInBits()))
+ MIB.buildInsertSubvector(Dst, BigVec, LitVec, AlignedIdx);
+ else
+ Inserted->getOperand(0).setReg(Dst);
----------------
arsenm wrote:
DIdn't call the observer before modifying instruction in place. Can you build the instruction above with the correct register?
https://github.com/llvm/llvm-project/pull/108859
More information about the llvm-commits
mailing list