[llvm] [RISCV][GISEL] Legalize and post-legalize lower G_INSERT_SUBVECTOR (PR #108859)
Thorsten Schütt via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 17 13:20:56 PDT 2024
================
@@ -41,6 +41,184 @@ namespace {
#include "RISCVGenPostLegalizeGILowering.inc"
#undef GET_GICOMBINER_TYPES
+static LLT getLMUL1Ty(LLT VecTy) {
+ assert(VecTy.getElementType().getSizeInBits() <= 64 &&
+ "Unexpected vector LLT");
+ return LLT::scalable_vector(RISCV::RVVBitsPerBlock /
+ VecTy.getElementType().getSizeInBits(),
+ VecTy.getElementType());
+}
+
+/// Return the type of the mask type suitable for masking the provided
+/// vector type. This is simply an i1 element type vector of the same
+/// (possibly scalable) length.
+static LLT getMaskTypeFor(LLT VecTy) {
+ assert(VecTy.isVector());
+ ElementCount EC = VecTy.getElementCount();
+ return LLT::vector(EC, LLT::scalar(1));
+}
+
+/// Creates an all ones mask suitable for masking a vector of type VecTy with
+/// vector length VL.
+static MachineInstrBuilder buildAllOnesMask(LLT VecTy, const SrcOp &VL,
+ MachineIRBuilder &MIB,
+ MachineRegisterInfo &MRI) {
+ LLT MaskTy = getMaskTypeFor(VecTy);
+ return MIB.buildInstr(RISCV::G_VMSET_VL, {MaskTy}, {VL});
+}
+
+/// Gets the two common "VL" operands: an all-ones mask and the vector length.
+/// VecTy is a scalable vector type.
+static std::pair<MachineInstrBuilder, Register>
+buildDefaultVLOps(const DstOp &Dst, MachineIRBuilder &MIB,
+ MachineRegisterInfo &MRI) {
+ LLT VecTy = Dst.getLLTTy(MRI);
+ assert(VecTy.isScalableVector() && "Expecting scalable container type");
+ Register VL(RISCV::X0);
+ MachineInstrBuilder Mask = buildAllOnesMask(VecTy, VL, MIB, MRI);
+ return {Mask, VL};
+}
+
+/// Lowers G_INSERT_SUBVECTOR. We know we can lower it here since the legalizer
+/// marked it as legal.
+void lowerInsertSubvector(MachineInstr &MI, const RISCVSubtarget &STI) {
+ GInsertSubvector &IS = cast<GInsertSubvector>(MI);
+
+ MachineIRBuilder MIB(MI);
+ MachineRegisterInfo &MRI = *MIB.getMRI();
+
+ Register Dst = IS.getReg(0);
+ Register Src1 = IS.getBigVec();
+ Register Src2 = IS.getSubVec();
+ uint64_t Idx = IS.getIndexImm();
+
+ LLT BigTy = MRI.getType(Src1);
+ LLT LitTy = MRI.getType(Src2);
+ Register BigVec = Src1;
+ Register LitVec = Src2;
+
+ // We don't have the ability to slide mask vectors up indexed by their i1
+ // elements; the smallest we can do is i8. Often we are able to bitcast to
+ // equivalent i8 vectors. Otherwise, we can must zeroextend to equivalent i8
+ // vectors and truncate down after the insert.
+ if (LitTy.getElementType() == LLT::scalar(1) &&
+ (Idx != 0 ||
+ MRI.getVRegDef(BigVec)->getOpcode() != TargetOpcode::G_IMPLICIT_DEF)) {
+ auto BigTyMinElts = BigTy.getElementCount().getKnownMinValue();
+ auto LitTyMinElts = LitTy.getElementCount().getKnownMinValue();
+ if (BigTyMinElts >= 8 && LitTyMinElts >= 8) {
+ assert(Idx % 8 == 0 && "Invalid index");
+ assert(BigTyMinElts % 8 == 0 && LitTyMinElts % 8 == 0 &&
+ "Unexpected mask vector lowering");
+ Idx /= 8;
+ BigTy = LLT::vector(BigTy.getElementCount().divideCoefficientBy(8), 8);
+ LitTy = LLT::vector(LitTy.getElementCount().divideCoefficientBy(8), 8);
+ BigVec = MIB.buildBitcast(BigTy, BigVec).getReg(0);
+ LitVec = MIB.buildBitcast(LitTy, LitVec).getReg(0);
+ } else {
+ // We can't slide this mask vector up indexed by its i1 elements.
+ // This poses a problem when we wish to insert a scalable vector which
+ // can't be re-expressed as a larger type. Just choose the slow path and
+ // extend to a larger type, then truncate back down.
+ LLT ExtBigTy = BigTy.changeElementType(LLT::scalar(8));
+ LLT ExtLitTy = LitTy.changeElementType(LLT::scalar(8));
+ auto BigZExt = MIB.buildZExt(ExtBigTy, BigVec);
+ auto LitZExt = MIB.buildZExt(ExtLitTy, LitVec);
+ auto Insert = MIB.buildInsertSubvector(ExtBigTy, BigZExt, LitZExt, Idx);
+ auto SplatZero = MIB.buildSplatVector(
+ ExtBigTy, MIB.buildConstant(ExtBigTy.getElementType(), 0));
+ MIB.buildICmp(CmpInst::Predicate::ICMP_NE, Dst, Insert, SplatZero);
+ MI.eraseFromParent();
+ return;
+ }
+ }
+
+ const RISCVRegisterInfo *TRI = STI.getRegisterInfo();
+ MVT LitTyMVT = getMVTForLLT(LitTy);
+ unsigned SubRegIdx, RemIdx;
+ std::tie(SubRegIdx, RemIdx) =
+ RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
+ getMVTForLLT(BigTy), LitTyMVT, Idx, TRI);
+
+ RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(getMVTForLLT(LitTy));
+ bool IsSubVecPartReg = !RISCVVType::decodeVLMUL(SubVecLMUL).second;
+
+ // If the Idx has been completely eliminated and this subvector's size is a
+ // vector register or a multiple thereof, or the surrounding elements are
+ // undef, then this is a subvector insert which naturally aligns to a vector
+ // register. These can easily be handled using subregister manipulation.
+ if (RemIdx == 0 && (!IsSubVecPartReg || MRI.getVRegDef(Src1)->getOpcode() ==
+ TargetOpcode::G_IMPLICIT_DEF))
+ return;
+
+ // If the subvector is smaller than a vector register, then the insertion
+ // must preserve the undisturbed elements of the register. We do this by
+ // lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type
+ // (which resolves to a subregister copy), performing a VSLIDEUP to place the
+ // subvector within the vector register, and an INSERT_SUBVECTOR of that
+ // LMUL=1 type back into the larger vector (resolving to another subregister
+ // operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type
+ // to avoid allocating a large register group to hold our subvector.
+
+ // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
+ // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
+ // (in our case undisturbed). This means we can set up a subvector insertion
+ // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
+ // size of the subvector.
+ const LLT XLenTy(STI.getXLenVT());
+ LLT InterLitTy = BigTy;
+ Register AlignedExtract = Src1;
+ unsigned AlignedIdx = Idx - RemIdx;
+ if (TypeSize::isKnownGT(BigTy.getSizeInBits(),
+ getLMUL1Ty(BigTy).getSizeInBits())) {
+ InterLitTy = getLMUL1Ty(BigTy);
+ // Extract a subvector equal to the nearest full vector register type. This
+ // should resolve to a G_EXTRACT on a subreg.
+ AlignedExtract =
+ MIB.buildExtractSubvector(InterLitTy, BigVec, AlignedIdx).getReg(0);
----------------
tschuett wrote:
I would leave it as is.
https://github.com/llvm/llvm-project/pull/108859
More information about the llvm-commits
mailing list