[llvm] [VectorCombine] Expand `vector_insert` into shufflevector for earlier cost optimizations (#145512) (PR #146479)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 1 10:30:35 PDT 2025
================
@@ -804,6 +805,66 @@ bool VectorCombine::foldInsExtBinop(Instruction &I) {
return true;
}
+/// Try to fold vector_insert intrinsics into shufflevector instructions.
+bool VectorCombine::foldVectorInsertToShuffle(Instruction &I) {
+ auto *II = dyn_cast<IntrinsicInst>(&I);
+ // This optimization only applies to vector_insert intrinsics.
+ if (!II || II->getIntrinsicID() != Intrinsic::vector_insert)
+ return false;
+
+ Value *Vec = II->getArgOperand(0);
+ Value *SubVec = II->getArgOperand(1);
+ Value *Idx = II->getArgOperand(2);
+
+ // Caller guarantees DstTy is a fixed vector.
+ auto *DstTy = cast<FixedVectorType>(II->getType());
+ auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType());
+ auto *SubVecTy = dyn_cast<FixedVectorType>(SubVec->getType());
+
+ // Only canonicalize if Vec and SubVec are both fixed vectors.
+ if (!VecTy || !SubVecTy)
+ return false;
+
+ unsigned DstNumElts = DstTy->getNumElements();
+ unsigned VecNumElts = VecTy->getNumElements();
+ unsigned SubVecNumElts = SubVecTy->getNumElements();
+ auto *SubVecPtr = dyn_cast<ConstantInt>(Idx);
+ if (!SubVecPtr)
+ return false;
+
+ unsigned SubVecIdx = SubVecPtr->getZExtValue();
+
+ // Ensure insertion of SubVec doesn't exceed Dst bounds.
+ if (SubVecIdx % SubVecNumElts != 0 || SubVecIdx + SubVecNumElts > DstNumElts)
+ return false;
+
+ // An insert that entirely overwrites Vec with SubVec is a nop.
+ if (VecNumElts == SubVecNumElts) {
+ replaceValue(I, *SubVec);
+ return true;
+ }
+
+ // Widen SubVec into a vector of the same width as Vec, since
+ // shufflevector requires the two input vectors to be the same width.
+ // Elements beyond the bounds of SubVec within the widened vector are
+ // undefined.
+ SmallVector<int, 8> WidenMask(VecNumElts, PoisonMaskElem);
+ std::iota(WidenMask.begin(), WidenMask.begin() + SubVecNumElts, 0);
+ std::fill(WidenMask.begin() + SubVecNumElts, WidenMask.end(), PoisonMaskElem);
+
+ auto *WidenShuffle = Builder.CreateShuffleVector(SubVec, WidenMask);
+ Worklist.pushValue(WidenShuffle);
+
+ SmallVector<int, 8> Mask(DstNumElts);
+ std::iota(Mask.begin(), Mask.begin() + SubVecIdx, 0);
+ std::iota(Mask.begin() + SubVecIdx, Mask.begin() + SubVecIdx + SubVecNumElts, DstNumElts);
+ std::iota(Mask.begin() + SubVecIdx + SubVecNumElts, Mask.end(), SubVecIdx + SubVecNumElts);
----------------
laurenmchin wrote:
yes, you're right. thanks for pointing that out — the second call was redundant and overwriting part of the first. i’ve updated it to just use two iota calls, with the second overwriting the relevant subrange in-place.
https://github.com/llvm/llvm-project/pull/146479
More information about the llvm-commits
mailing list