[llvm] [LV] Support strided load with a stride of -1 (PR #128718)
Mel Chen via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 16 03:37:46 PDT 2025
================
@@ -2523,6 +2529,78 @@ void VPlanTransforms::dissolveLoopRegions(VPlan &Plan) {
R->dissolveToCFGLoop();
}
+void VPlanTransforms::convertToStridedAccesses(VPlan &Plan, VPCostContext &Ctx,
+ VFRange &Range) {
+ if (Plan.hasScalarVFOnly())
+ return;
+
+ SmallVector<VPRecipeBase *> ToErase;
+ for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
+ vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry()))) {
+ for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
+ auto *MemR = dyn_cast<VPWidenMemoryRecipe>(&R);
+ // TODO: support strided store
+ // TODO: support strided accesses with stride not equal to -1
+ if (!MemR || !isa<VPWidenLoadRecipe>(MemR) || !MemR->isReverse())
+ continue;
+
+ auto *VecEndPtr = cast<VPVectorEndPointerRecipe>(MemR->getAddr());
+ VPValue *Ptr = VecEndPtr->getPtr();
+ Value *PtrUV = Ptr->getUnderlyingValue();
+ // Memory cost model requires the pointer operand of memory access
+ // instruction.
+ if (!PtrUV)
+ continue;
+
+ Instruction &Ingredient = MemR->getIngredient();
+ Type *ElementTy = getLoadStoreType(&Ingredient);
+
+ auto IsProfitable = [&](ElementCount VF) -> bool {
+ Type *DataTy = toVectorTy(ElementTy, VF);
+ const Align Alignment = getLoadStoreAlignment(&Ingredient);
+ if (!Ctx.TTI.isLegalStridedLoadStore(DataTy, Alignment))
+ return false;
+ const InstructionCost CurrentCost = MemR->computeCost(VF, Ctx);
+ const InstructionCost StridedLoadStoreCost =
+ Ctx.TTI.getStridedMemoryOpCost(Instruction::Load, DataTy, PtrUV,
+ MemR->isMasked(), Alignment,
+ Ctx.CostKind, &Ingredient);
+ return StridedLoadStoreCost < CurrentCost;
+ };
+
+ if (!LoopVectorizationPlanner::getDecisionAndClampRange(IsProfitable,
+ Range))
+ continue;
+
+ // The stride of consecutive reverse access must be -1.
+ int64_t Stride = -1;
+ auto *GEP = dyn_cast<GetElementPtrInst>(PtrUV->stripPointerCasts());
----------------
Mel-Chen wrote:
We can't directly pass the GEPNoWrapFlags from VPVectorEndPointerRecipe here. Tail folding may cause VPVectorEndPointerRecipe to compute addresses that the original scalar loop wouldn't, which results in the removal of the 'inbounds' flag.
```
// When folding the tail, we may compute an address that we don't in the
// original scalar loop and it may not be inbounds. Drop Inbounds in that
// case.
GEPNoWrapFlags Flags =
(CM.foldTailByMasking() || !GEP || !GEP->isInBounds())
? GEPNoWrapFlags::none()
: GEPNoWrapFlags::inBounds();
VectorPtr = new VPVectorEndPointerRecipe(
Ptr, &Plan.getVF(), getLoadStoreType(I), Flags, I->getDebugLoc());
```
However, this doesn't apply to VPVectorPointerRecipe — if the original IR had the 'inbounds' flag, it should be preserved.
https://github.com/llvm/llvm-project/pull/128718
More information about the llvm-commits
mailing list