[llvm] [LV] Convert gather loads with invariant stride into strided loads (PR #147297)
Mel Chen via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 9 05:36:51 PST 2025
================
@@ -1797,6 +1806,125 @@ void VPWidenIntrinsicRecipe::printRecipe(raw_ostream &O, const Twine &Indent,
}
#endif
+unsigned VPWidenMemIntrinsicRecipe::getMemoryPointerParamPos() const {
+ if (auto Pos = VPIntrinsic::getMemoryPointerParamPos(VectorIntrinsicID))
+ return *Pos;
+
+ switch (VectorIntrinsicID) {
+ case Intrinsic::masked_load:
+ case Intrinsic::masked_gather:
+ case Intrinsic::masked_expandload:
+ return 0;
+ case Intrinsic::masked_store:
+ case Intrinsic::masked_scatter:
+ case Intrinsic::masked_compressstore:
+ return 1;
+ default:
+ llvm_unreachable("unknown vector memory intrinsic");
+ }
+}
+
+unsigned VPWidenMemIntrinsicRecipe::getMaskParamPos() const {
+ if (auto Pos = VPIntrinsic::getMaskParamPos(VectorIntrinsicID))
+ return *Pos;
+
+ switch (VectorIntrinsicID) {
+ case Intrinsic::masked_load:
+ case Intrinsic::masked_gather:
+ case Intrinsic::masked_expandload:
+ return 1;
+ case Intrinsic::masked_store:
+ case Intrinsic::masked_scatter:
+ case Intrinsic::masked_compressstore:
+ return 2;
+ default:
+ llvm_unreachable("unknown vector memory intrinsic");
+ }
+}
+
+void VPWidenMemIntrinsicRecipe::execute(VPTransformState &State) {
+ assert(State.VF.isVector() && "not widening");
+
+ SmallVector<Type *, 2> TysForDecl;
+ // Add return type if intrinsic is overloaded on it.
+ if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1, State.TTI))
+ TysForDecl.push_back(VectorType::get(getResultType(), State.VF));
+ SmallVector<Value *, 4> Args;
+ for (const auto &I : enumerate(operands())) {
+ // Some intrinsics have a scalar argument - don't replace it with a
+ // vector.
+ Value *Arg;
+ if (isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index(),
+ State.TTI))
+ Arg = State.get(I.value(), VPLane(0));
+ else
+ Arg = State.get(I.value(), usesFirstLaneOnly(I.value()));
+ if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, I.index(),
+ State.TTI))
+ TysForDecl.push_back(Arg->getType());
+ Args.push_back(Arg);
+ }
+
+ // Use vector version of the intrinsic.
+ Module *M = State.Builder.GetInsertBlock()->getModule();
+ Function *VectorF =
+ Intrinsic::getOrInsertDeclaration(M, VectorIntrinsicID, TysForDecl);
+ assert(VectorF &&
+ "Can't retrieve vector intrinsic or vector-predication intrinsics.");
+
+ CallInst *MemI = State.Builder.CreateCall(VectorF, Args);
+ MemI->addParamAttr(
+ getMemoryPointerParamPos(),
+ Attribute::getWithAlignment(MemI->getContext(), Alignment));
+ applyMetadata(*MemI);
+
+ if (!MemI->getType()->isVoidTy())
+ State.set(getVPSingleValue(), MemI);
+}
+
+InstructionCost
+VPWidenMemIntrinsicRecipe::computeCost(ElementCount VF,
+ VPCostContext &Ctx) const {
+ Type *Ty = toVectorTy(getLoadStoreType(&Ingredient), VF);
+ const Value *Ptr = getLoadStorePointerOperand(&Ingredient);
+ return Ctx.TTI.getMemIntrinsicInstrCost(
+ MemIntrinsicCostAttributes(VectorIntrinsicID, Ty, Ptr,
+ match(getMask(), m_True()), Alignment,
+ &Ingredient),
----------------
Mel-Chen wrote:
I switched to using the underlying instruction.
https://github.com/llvm/llvm-project/pull/147297
More information about the llvm-commits
mailing list