[llvm] [WIP][LV][VPlan] For reverse iterative continuous load and store access, use unit load/store (PR #130032)

via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 6 02:29:57 PST 2025


https://github.com/LiqinWeng updated https://github.com/llvm/llvm-project/pull/130032

>From 344c2e105eff990a77370829b0544519051e3ef7 Mon Sep 17 00:00:00 2001
From: "Liqin.Weng" <liqin.weng at spacemit.com>
Date: Thu, 6 Mar 2025 17:37:15 +0800
Subject: [PATCH] [LV][VPlan] For reverse iterative continuous load and store
 access, use unit load/store

---
 .../Transforms/Vectorize/LoopVectorize.cpp    |  9 +-
 .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 86 ++++++++++---------
 2 files changed, 52 insertions(+), 43 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index c447fa4843591..0b155c95f0221 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7487,6 +7487,13 @@ static bool planContainsAdditionalSimplifications(VPlan &Plan,
       // comparing against the legacy cost isn't desirable.
       if (isa<VPPartialReductionRecipe>(&R))
         return true;
+
+      // The VPlan-based cost model may calculate the cost of strided load/store
+      // which can't be modeled in the legacy cost model.
+      if (isa<VPWidenLoadEVLRecipe, VPWidenStoreEVLRecipe>(&R))
+        if (cast<VPWidenMemoryRecipe>(&R)->isReverse())
+          return true;
+
       if (Instruction *UI = GetInstructionForCost(&R))
         SeenInstrs.insert(UI);
     }
@@ -8344,7 +8351,7 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
     auto *GEP = dyn_cast<GetElementPtrInst>(
         Ptr->getUnderlyingValue()->stripPointerCasts());
     VPSingleDefRecipe *VectorPtr;
-    if (Reverse) {
+    if (Reverse && !CM.foldTailWithEVL()) {
       // When folding the tail, we may compute an address that we don't in the
       // original scalar loop and it may not be inbounds. Drop Inbounds in that
       // case.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index e9f50e88867b2..93146eb83295a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -2640,17 +2640,6 @@ void VPWidenLoadRecipe::print(raw_ostream &O, const Twine &Indent,
 }
 #endif
 
-/// Use all-true mask for reverse rather than actual mask, as it avoids a
-/// dependence w/o affecting the result.
-static Instruction *createReverseEVL(IRBuilderBase &Builder, Value *Operand,
-                                     Value *EVL, const Twine &Name) {
-  VectorType *ValTy = cast<VectorType>(Operand->getType());
-  Value *AllTrueMask =
-      Builder.CreateVectorSplat(ValTy->getElementCount(), Builder.getTrue());
-  return Builder.CreateIntrinsic(ValTy, Intrinsic::experimental_vp_reverse,
-                                 {Operand, AllTrueMask, EVL}, nullptr, Name);
-}
-
 void VPWidenLoadEVLRecipe::execute(VPTransformState &State) {
   auto *LI = cast<LoadInst>(&Ingredient);
 
@@ -2665,19 +2654,26 @@ void VPWidenLoadEVLRecipe::execute(VPTransformState &State) {
   Value *EVL = State.get(getEVL(), VPLane(0));
   Value *Addr = State.get(getAddr(), !CreateGather);
   Value *Mask = nullptr;
-  if (VPValue *VPMask = getMask()) {
+  if (VPValue *VPMask = getMask())
     Mask = State.get(VPMask);
-    if (isReverse())
-      Mask = createReverseEVL(Builder, Mask, EVL, "vp.reverse.mask");
-  } else {
+  else
     Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());
-  }
 
   if (CreateGather) {
     NewLI =
         Builder.CreateIntrinsic(DataTy, Intrinsic::vp_gather, {Addr, Mask, EVL},
                                 nullptr, "wide.masked.gather");
   } else {
+    if (isReverse()) {
+      auto *EltTy = DataTy->getElementType();
+      // if (EltTy->getScalarSizeInBits() !=
+      // EVL->getType()->getScalarSizeInBits())
+      //   EVL = ConstantInt::getSigned(EVL->getType(),
+      //   static_cast<int64_t>(EltTy->getScalarSizeInBits()) / 8);
+      auto *GEP = dyn_cast<GetElementPtrInst>(Addr->stripPointerCasts());
+      Value *Offset = Builder.CreateSub(State.Builder.getInt32(1), EVL);
+      Addr = Builder.CreateGEP(EltTy, Addr, Offset, "", GEP->isInBounds());
+    }
     VectorBuilder VBuilder(Builder);
     VBuilder.setEVL(EVL).setMask(Mask);
     NewLI = cast<CallInst>(VBuilder.createVectorInstruction(
@@ -2686,10 +2682,7 @@ void VPWidenLoadEVLRecipe::execute(VPTransformState &State) {
   NewLI->addParamAttr(
       0, Attribute::getWithAlignment(NewLI->getContext(), Alignment));
   State.addMetadata(NewLI, LI);
-  Instruction *Res = NewLI;
-  if (isReverse())
-    Res = createReverseEVL(Builder, Res, EVL, "vp.reverse");
-  State.set(this, Res);
+  State.set(this, NewLI);
 }
 
 InstructionCost VPWidenLoadEVLRecipe::computeCost(ElementCount VF,
@@ -2707,14 +2700,13 @@ InstructionCost VPWidenLoadEVLRecipe::computeCost(ElementCount VF,
       getLoadStoreAlignment(const_cast<Instruction *>(&Ingredient));
   unsigned AS =
       getLoadStoreAddressSpace(const_cast<Instruction *>(&Ingredient));
-  InstructionCost Cost = Ctx.TTI.getMaskedMemoryOpCost(
-      Ingredient.getOpcode(), Ty, Alignment, AS, Ctx.CostKind);
-  if (!Reverse)
-    return Cost;
+  // if (!Reverse)
+    return Ctx.TTI.getMaskedMemoryOpCost(Ingredient.getOpcode(), Ty, Alignment,
+                                         AS, Ctx.CostKind);
 
-  return Cost + Ctx.TTI.getShuffleCost(TargetTransformInfo::SK_Reverse,
-                                       cast<VectorType>(Ty), {}, Ctx.CostKind,
-                                       0);
+  // return Ctx.TTI.getStridedMemoryOpCost(Ingredient.getOpcode(), Ty,
+  //                                       getAddr()->getUnderlyingValue(), false,
+  //                                       Alignment, Ctx.CostKind);
 }
 
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -2775,7 +2767,8 @@ void VPWidenStoreRecipe::print(raw_ostream &O, const Twine &Indent,
 
 void VPWidenStoreEVLRecipe::execute(VPTransformState &State) {
   auto *SI = cast<StoreInst>(&Ingredient);
-
+  Type *ScalarDataTy = getLoadStoreType(&Ingredient);
+  auto *DataTy = VectorType::get(ScalarDataTy, State.VF);
   VPValue *StoredValue = getStoredValue();
   bool CreateScatter = !isConsecutive();
   const Align Alignment = getLoadStoreAlignment(&Ingredient);
@@ -2786,22 +2779,32 @@ void VPWidenStoreEVLRecipe::execute(VPTransformState &State) {
   CallInst *NewSI = nullptr;
   Value *StoredVal = State.get(StoredValue);
   Value *EVL = State.get(getEVL(), VPLane(0));
-  if (isReverse())
-    StoredVal = createReverseEVL(Builder, StoredVal, EVL, "vp.reverse");
   Value *Mask = nullptr;
-  if (VPValue *VPMask = getMask()) {
+  if (VPValue *VPMask = getMask())
     Mask = State.get(VPMask);
-    if (isReverse())
-      Mask = createReverseEVL(Builder, Mask, EVL, "vp.reverse.mask");
-  } else {
+  else
     Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());
-  }
+
   Value *Addr = State.get(getAddr(), !CreateScatter);
   if (CreateScatter) {
     NewSI = Builder.CreateIntrinsic(Type::getVoidTy(EVL->getContext()),
                                     Intrinsic::vp_scatter,
                                     {StoredVal, Addr, Mask, EVL});
   } else {
+    if (isReverse()) {
+      auto *EltTy = DataTy->getElementType();
+      // FIXME: we may need not deal with the size, the InstCombine will deal
+      // with the Offset Type if (EltTy->getScalarSizeInBits() !=
+      // EVL->getType()->getScalarSizeInBits())
+      //   EVL = ConstantInt::getSigned(EVL->getType(),
+      //   static_cast<int64_t>(EltTy->getScalarSizeInBits()) / 8);
+      auto *GEP = dyn_cast<GetElementPtrInst>(Addr->stripPointerCasts());
+      // Value *Offset =
+      // Builder.CreateSub(State.Builder.getIntN(EVL->getType()->getScalarSizeInBits(),
+      // 1), EVL);
+      Value *Offset = Builder.CreateSub(State.Builder.getInt32(1), EVL);
+      Addr = Builder.CreateGEP(EltTy, Addr, Offset, "", GEP->isInBounds());
+    }
     VectorBuilder VBuilder(Builder);
     VBuilder.setEVL(EVL).setMask(Mask);
     NewSI = cast<CallInst>(VBuilder.createVectorInstruction(
@@ -2828,14 +2831,13 @@ InstructionCost VPWidenStoreEVLRecipe::computeCost(ElementCount VF,
       getLoadStoreAlignment(const_cast<Instruction *>(&Ingredient));
   unsigned AS =
       getLoadStoreAddressSpace(const_cast<Instruction *>(&Ingredient));
-  InstructionCost Cost = Ctx.TTI.getMaskedMemoryOpCost(
-      Ingredient.getOpcode(), Ty, Alignment, AS, Ctx.CostKind);
-  if (!Reverse)
-    return Cost;
+  // if (!Reverse)
+    return Ctx.TTI.getMaskedMemoryOpCost(Ingredient.getOpcode(), Ty, Alignment,
+                                         AS, Ctx.CostKind);
 
-  return Cost + Ctx.TTI.getShuffleCost(TargetTransformInfo::SK_Reverse,
-                                       cast<VectorType>(Ty), {}, Ctx.CostKind,
-                                       0);
+  // return Ctx.TTI.getStridedMemoryOpCost(Ingredient.getOpcode(), Ty,
+  //                                       getAddr()->getUnderlyingValue(), false,
+  //                                       Alignment, Ctx.CostKind);
 }
 
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)



More information about the llvm-commits mailing list