[llvm] [RISCV][GISEL] Legalize G_INSERT_SUBVECTOR (PR #108859)

Thu Oct 10 10:10:32 PDT 2024

================
@@ -1054,6 +1058,134 @@ bool RISCVLegalizerInfo::legalizeExtractSubvector(MachineInstr &MI,
   return true;
 }
 
+bool RISCVLegalizerInfo::legalizeInsertSubvector(MachineInstr &MI,
+                                                 LegalizerHelper &Helper,
+                                                 MachineIRBuilder &MIB) const {
+  GInsertSubvector &IS = cast<GInsertSubvector>(MI);
+
+  MachineRegisterInfo &MRI = *MIB.getMRI();
+
+  Register Dst = IS.getReg(0);
+  Register BigVec = IS.getBigVec();
+  Register LitVec = IS.getSubVec();
+  uint64_t Idx = IS.getIndexImm();
+
+  LLT BigTy = MRI.getType(BigVec);
+  LLT LitTy = MRI.getType(LitVec);
+
+  if (Idx == 0 ||
+      MRI.getVRegDef(BigVec)->getOpcode() == TargetOpcode::G_IMPLICIT_DEF)
+    return true;
+
+  // We don't have the ability to slide mask vectors up indexed by their i1
+  // elements; the smallest we can do is i8. Often we are able to bitcast to
+  // equivalent i8 vectors. Otherwise, we can must zeroextend to equivalent i8
+  // vectors and truncate down after the insert.
+  if (LitTy.getElementType() == LLT::scalar(1)) {
+    auto BigTyMinElts = BigTy.getElementCount().getKnownMinValue();
+    auto LitTyMinElts = LitTy.getElementCount().getKnownMinValue();
+    if (BigTyMinElts >= 8 && LitTyMinElts >= 8)
+      return Helper.bitcast(
+          IS, 0,
+          LLT::vector(BigTy.getElementCount().divideCoefficientBy(8), 8));
+
+    // We can't slide this mask vector up indexed by its i1 elements.
+    // This poses a problem when we wish to insert a scalable vector which
+    // can't be re-expressed as a larger type. Just choose the slow path and
+    // extend to a larger type, then truncate back down.
+    LLT ExtBigTy = BigTy.changeElementType(LLT::scalar(8));
+    return Helper.widenScalar(IS, 0, ExtBigTy);
+  }
+
+  const RISCVRegisterInfo *TRI = STI.getRegisterInfo();
+  unsigned SubRegIdx, RemIdx;
+  std::tie(SubRegIdx, RemIdx) =
+      RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
+          getMVTForLLT(BigTy), getMVTForLLT(LitTy), Idx, TRI);
+
+  TypeSize VecRegSize = TypeSize::getScalable(RISCV::RVVBitsPerBlock);
+  assert(isPowerOf2_64(
+      STI.expandVScale(LitTy.getSizeInBits()).getKnownMinValue()));
+  bool ExactlyVecRegSized =
+      STI.expandVScale(LitTy.getSizeInBits())
+          .isKnownMultipleOf(STI.expandVScale(VecRegSize));
+
+  // If the Idx has been completely eliminated and this subvector's size is a
+  // vector register or a multiple thereof, or the surrounding elements are
+  // undef, then this is a subvector insert which naturally aligns to a vector
+  // register. These can easily be handled using subregister manipulation.
+  if (RemIdx == 0 &&
+      (ExactlyVecRegSized ||
+       MRI.getVRegDef(BigVec)->getOpcode() == TargetOpcode::G_IMPLICIT_DEF))
----------------
arsenm wrote:

Repeated this check above 

https://github.com/llvm/llvm-project/pull/108859