[llvm] [LV] Support strided load with a stride of -1 (PR #128718)

Mon Jun 16 03:37:46 PDT 2025

================
@@ -2523,6 +2529,78 @@ void VPlanTransforms::dissolveLoopRegions(VPlan &Plan) {
     R->dissolveToCFGLoop();
 }
 
+void VPlanTransforms::convertToStridedAccesses(VPlan &Plan, VPCostContext &Ctx,
+                                               VFRange &Range) {
+  if (Plan.hasScalarVFOnly())
+    return;
+
+  SmallVector<VPRecipeBase *> ToErase;
+  for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
+           vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry()))) {
+    for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
+      auto *MemR = dyn_cast<VPWidenMemoryRecipe>(&R);
+      // TODO: support strided store
+      // TODO: support strided accesses with stride not equal to -1
+      if (!MemR || !isa<VPWidenLoadRecipe>(MemR) || !MemR->isReverse())
+        continue;
+
+      auto *VecEndPtr = cast<VPVectorEndPointerRecipe>(MemR->getAddr());
+      VPValue *Ptr = VecEndPtr->getPtr();
+      Value *PtrUV = Ptr->getUnderlyingValue();
+      // Memory cost model requires the pointer operand of memory access
+      // instruction.
+      if (!PtrUV)
+        continue;
+
+      Instruction &Ingredient = MemR->getIngredient();
+      Type *ElementTy = getLoadStoreType(&Ingredient);
+
+      auto IsProfitable = [&](ElementCount VF) -> bool {
+        Type *DataTy = toVectorTy(ElementTy, VF);
+        const Align Alignment = getLoadStoreAlignment(&Ingredient);
+        if (!Ctx.TTI.isLegalStridedLoadStore(DataTy, Alignment))
+          return false;
+        const InstructionCost CurrentCost = MemR->computeCost(VF, Ctx);
+        const InstructionCost StridedLoadStoreCost =
+            Ctx.TTI.getStridedMemoryOpCost(Instruction::Load, DataTy, PtrUV,
+                                           MemR->isMasked(), Alignment,
+                                           Ctx.CostKind, &Ingredient);
+        return StridedLoadStoreCost < CurrentCost;
+      };
+
+      if (!LoopVectorizationPlanner::getDecisionAndClampRange(IsProfitable,
+                                                              Range))
+        continue;
+
+      // The stride of consecutive reverse access must be -1.
+      int64_t Stride = -1;
+      auto *GEP = dyn_cast<GetElementPtrInst>(PtrUV->stripPointerCasts());
----------------
Mel-Chen wrote:

We can't directly pass the GEPNoWrapFlags from VPVectorEndPointerRecipe here. Tail folding may cause VPVectorEndPointerRecipe to compute addresses that the original scalar loop wouldn't, which results in the removal of the 'inbounds' flag. 
```
        // When folding the tail, we may compute an address that we don't in the
        // original scalar loop and it may not be inbounds. Drop Inbounds in that
        // case.
        GEPNoWrapFlags Flags =
            (CM.foldTailByMasking() || !GEP || !GEP->isInBounds())
                ? GEPNoWrapFlags::none()
                : GEPNoWrapFlags::inBounds();
        VectorPtr = new VPVectorEndPointerRecipe(
            Ptr, &Plan.getVF(), getLoadStoreType(I), Flags, I->getDebugLoc());
```
However, this doesn't apply to VPVectorPointerRecipe — if the original IR had the 'inbounds' flag, it should be preserved.

https://github.com/llvm/llvm-project/pull/128718