[llvm] [VPlan] Update final exit value via VPlan. (PR #112147)

via llvm-commits llvm-commits at lists.llvm.org
Sun Dec 29 12:16:51 PST 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-vectorizers

Author: Florian Hahn (fhahn)

<details>
<summary>Changes</summary>

Model updating IV users directly in VPlan, replace fixupIVUsers.

This completes the transition towards modeling all live-outs directly in VPlan.

Depends on https://github.com/llvm/llvm-project/pull/110004,
https://github.com/llvm/llvm-project/pull/109975 and
https://github.com/llvm/llvm-project/pull/112145.

---

Patch is 21.85 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/112147.diff


5 Files Affected:

- (modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+98-149) 
- (modified) llvm/lib/Transforms/Vectorize/VPlan.cpp (+14-10) 
- (modified) llvm/lib/Transforms/Vectorize/VPlan.h (+5) 
- (modified) llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll (-2) 


``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 1975df3cacbcae..e95797591bb297 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -542,11 +542,6 @@ class InnerLoopVectorizer {
 protected:
   friend class LoopVectorizationPlanner;
 
-  /// Set up the values of the IVs correctly when exiting the vector loop.
-  virtual void fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II,
-                            Value *VectorTripCount, BasicBlock *MiddleBlock,
-                            VPTransformState &State);
-
   /// Iteratively sink the scalarized operands of a predicated instruction into
   /// the block that was created for it.
   void sinkScalarOperands(Instruction *PredInst);
@@ -775,10 +770,6 @@ class EpilogueVectorizerMainLoop : public InnerLoopAndEpilogueVectorizer {
   BasicBlock *emitIterationCountCheck(BasicBlock *Bypass, bool ForEpilogue);
   void printDebugTracesAtStart() override;
   void printDebugTracesAtEnd() override;
-
-  void fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II,
-                    Value *VectorTripCount, BasicBlock *MiddleBlock,
-                    VPTransformState &State) override {};
 };
 
 // A specialized derived class of inner loop vectorizer that performs
@@ -2751,97 +2742,6 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton(
   return LoopVectorPreHeader;
 }
 
-// Fix up external users of the induction variable. At this point, we are
-// in LCSSA form, with all external PHIs that use the IV having one input value,
-// coming from the remainder loop. We need those PHIs to also have a correct
-// value for the IV when arriving directly from the middle block.
-void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi,
-                                       const InductionDescriptor &II,
-                                       Value *VectorTripCount,
-                                       BasicBlock *MiddleBlock,
-                                       VPTransformState &State) {
-  // There are two kinds of external IV usages - those that use the value
-  // computed in the last iteration (the PHI) and those that use the penultimate
-  // value (the value that feeds into the phi from the loop latch).
-  // We allow both, but they, obviously, have different values.
-
-  DenseMap<Value *, Value *> MissingVals;
-
-  Value *EndValue = cast<PHINode>(OrigPhi->getIncomingValueForBlock(
-                                      OrigLoop->getLoopPreheader()))
-                        ->getIncomingValueForBlock(MiddleBlock);
-
-  // An external user of the last iteration's value should see the value that
-  // the remainder loop uses to initialize its own IV.
-  Value *PostInc = OrigPhi->getIncomingValueForBlock(OrigLoop->getLoopLatch());
-  for (User *U : PostInc->users()) {
-    Instruction *UI = cast<Instruction>(U);
-    if (!OrigLoop->contains(UI)) {
-      assert(isa<PHINode>(UI) && "Expected LCSSA form");
-      MissingVals[UI] = EndValue;
-    }
-  }
-
-  // An external user of the penultimate value need to see EndValue - Step.
-  // The simplest way to get this is to recompute it from the constituent SCEVs,
-  // that is Start + (Step * (CRD - 1)).
-  for (User *U : OrigPhi->users()) {
-    auto *UI = cast<Instruction>(U);
-    if (!OrigLoop->contains(UI)) {
-      assert(isa<PHINode>(UI) && "Expected LCSSA form");
-      IRBuilder<> B(MiddleBlock->getTerminator());
-
-      // Fast-math-flags propagate from the original induction instruction.
-      if (isa_and_nonnull<FPMathOperator>(II.getInductionBinOp()))
-        B.setFastMathFlags(II.getInductionBinOp()->getFastMathFlags());
-
-      VPValue *StepVPV = Plan.getSCEVExpansion(II.getStep());
-      assert(StepVPV && "step must have been expanded during VPlan execution");
-      Value *Step = StepVPV->isLiveIn() ? StepVPV->getLiveInIRValue()
-                                        : State.get(StepVPV, VPLane(0));
-      Value *Escape = nullptr;
-      if (EndValue->getType()->isIntegerTy())
-        Escape = B.CreateSub(EndValue, Step);
-      else if (EndValue->getType()->isPointerTy())
-        Escape = B.CreatePtrAdd(EndValue, B.CreateNeg(Step));
-      else {
-        assert(EndValue->getType()->isFloatingPointTy() &&
-               "Unexpected induction type");
-        Escape = B.CreateBinOp(II.getInductionBinOp()->getOpcode() ==
-                                       Instruction::FAdd
-                                   ? Instruction::FSub
-                                   : Instruction::FAdd,
-                               EndValue, Step);
-      }
-      Escape->setName("ind.escape");
-      MissingVals[UI] = Escape;
-    }
-  }
-
-  assert((MissingVals.empty() ||
-          all_of(MissingVals,
-                 [MiddleBlock, this](const std::pair<Value *, Value *> &P) {
-                   return all_of(
-                       predecessors(cast<Instruction>(P.first)->getParent()),
-                       [MiddleBlock, this](BasicBlock *Pred) {
-                         return Pred == MiddleBlock ||
-                                Pred == OrigLoop->getLoopLatch();
-                       });
-                 })) &&
-         "Expected escaping values from latch/middle.block only");
-
-  for (auto &I : MissingVals) {
-    PHINode *PHI = cast<PHINode>(I.first);
-    // One corner case we have to handle is two IVs "chasing" each-other,
-    // that is %IV2 = phi [...], [ %IV1, %latch ]
-    // In this case, if IV1 has an external use, we need to avoid adding both
-    // "last value of IV1" and "penultimate value of IV2". So, verify that we
-    // don't already have an incoming value for the middle block.
-    if (PHI->getBasicBlockIndex(MiddleBlock) == -1)
-      PHI->addIncoming(I.second, MiddleBlock);
-  }
-}
-
 namespace {
 
 struct CSEDenseMapInfo {
@@ -2986,24 +2886,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
     for (PHINode &PN : Exit->phis())
       PSE.getSE()->forgetLcssaPhiWithNewPredecessor(OrigLoop, &PN);
 
-  if (Cost->requiresScalarEpilogue(VF.isVector())) {
-    // No edge from the middle block to the unique exit block has been inserted
-    // and there is nothing to fix from vector loop; phis should have incoming
-    // from scalar loop only.
-  } else {
-    // TODO: Check in VPlan to see if IV users need fixing instead of checking
-    // the cost model.
-
-    // If we inserted an edge from the middle block to the unique exit block,
-    // update uses outside the loop (phis) to account for the newly inserted
-    // edge.
-
-    // Fix-up external users of the induction variables.
-    for (const auto &Entry : Legal->getInductionVars())
-      fixupIVUsers(Entry.first, Entry.second,
-                   getOrCreateVectorTripCount(nullptr), LoopMiddleBlock, State);
-  }
-
   for (Instruction *PI : PredicatedInstructions)
     sinkScalarOperands(&*PI);
 
@@ -8857,11 +8739,10 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW,
 /// Create and return a ResumePhi for \p WideIV, unless it is truncated. If the
 /// induction recipe is not canonical, creates a VPDerivedIVRecipe to compute
 /// the end value of the induction.
-static VPValue *addResumePhiRecipeForInduction(VPWidenInductionRecipe *WideIV,
-                                               VPBuilder &VectorPHBuilder,
-                                               VPBuilder &ScalarPHBuilder,
-                                               VPTypeAnalysis &TypeInfo,
-                                               VPValue *VectorTC) {
+static VPValue *addResumePhiRecipeForInduction(
+    VPWidenInductionRecipe *WideIV, VPBuilder &VectorPHBuilder,
+    VPBuilder &ScalarPHBuilder, VPTypeAnalysis &TypeInfo, VPValue *VectorTC,
+    DenseMap<VPValue *, VPValue *> &EndValues) {
   auto *WideIntOrFp = dyn_cast<VPWidenIntOrFpInductionRecipe>(WideIV);
   // Truncated wide inductions resume from the last lane of their vector value
   // in the last vector iteration which is handled elsewhere.
@@ -8886,6 +8767,7 @@ static VPValue *addResumePhiRecipeForInduction(VPWidenInductionRecipe *WideIV,
                                                 ScalarTypeOfWideIV);
   }
 
+  EndValues[WideIV] = EndValue;
   auto *ResumePhiRecipe =
       ScalarPHBuilder.createNaryOp(VPInstruction::ResumePhi, {EndValue, Start},
                                    WideIV->getDebugLoc(), "bc.resume.val");
@@ -8895,7 +8777,9 @@ static VPValue *addResumePhiRecipeForInduction(VPWidenInductionRecipe *WideIV,
 /// Create resume phis in the scalar preheader for first-order recurrences,
 /// reductions and inductions, and update the VPIRInstructions wrapping the
 /// original phis in the scalar header.
-static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) {
+static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan,
+                                Loop *OrigLoop,
+                                DenseMap<VPValue *, VPValue *> &EndValues) {
   VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType());
   auto *ScalarPH = Plan.getScalarPreheader();
   auto *MiddleVPBB = cast<VPBasicBlock>(ScalarPH->getSinglePredecessor());
@@ -8915,7 +8799,7 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) {
     if (auto *WideIVR = dyn_cast<VPWidenInductionRecipe>(VectorPhiR)) {
       if (VPValue *ResumePhi = addResumePhiRecipeForInduction(
               WideIVR, VectorPHBuilder, ScalarPHBuilder, TypeInfo,
-              &Plan.getVectorTripCount())) {
+              &Plan.getVectorTripCount(), EndValues)) {
         ScalarPhiIRI->addOperand(ResumePhi);
         continue;
       }
@@ -8949,9 +8833,9 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) {
 // modeled explicitly yet and won't be included. Those are un-truncated
 // VPWidenIntOrFpInductionRecipe, VPWidenPointerInductionRecipe and induction
 // increments.
-static SetVector<VPIRInstruction *> collectUsersInExitBlocks(
-    Loop *OrigLoop, VPRecipeBuilder &Builder, VPlan &Plan,
-    const MapVector<PHINode *, InductionDescriptor> &Inductions) {
+static SetVector<VPIRInstruction *>
+collectUsersInExitBlocks(Loop *OrigLoop, VPRecipeBuilder &Builder,
+                         VPlan &Plan) {
   auto *MiddleVPBB = Plan.getMiddleBlock();
   SetVector<VPIRInstruction *> ExitUsersToFix;
   for (VPIRBasicBlock *ExitVPBB : Plan.getExitBlocks()) {
@@ -8976,18 +8860,6 @@ static SetVector<VPIRInstruction *> collectUsersInExitBlocks(
         // Exit values for inductions are computed and updated outside of VPlan
         // and independent of induction recipes.
         // TODO: Compute induction exit values in VPlan.
-        if ((isa<VPWidenIntOrFpInductionRecipe>(V) &&
-             !cast<VPWidenIntOrFpInductionRecipe>(V)->getTruncInst()) ||
-            isa<VPWidenPointerInductionRecipe>(V) ||
-            (isa<Instruction>(IncomingValue) &&
-             OrigLoop->contains(cast<Instruction>(IncomingValue)) &&
-             any_of(IncomingValue->users(), [&Inductions](User *U) {
-               auto *P = dyn_cast<PHINode>(U);
-               return P && Inductions.contains(P);
-             }))) {
-          if (ExitVPBB->getSinglePredecessor() == MiddleVPBB)
-            continue;
-        }
         ExitUsersToFix.insert(ExitIRI);
         ExitIRI->addOperand(V);
       }
@@ -8996,17 +8868,86 @@ static SetVector<VPIRInstruction *> collectUsersInExitBlocks(
   return ExitUsersToFix;
 }
 
+/// If \p Incoming is a user of a non-truncated induction, create recipes to
+/// compute the final value and update the user \p ExitIRI.
+static bool addInductionEndValue(
+    VPlan &Plan, VPIRInstruction *ExitIRI, VPValue *Incoming,
+    const MapVector<PHINode *, InductionDescriptor> &Inductions,
+    DenseMap<VPValue *, VPValue *> &EndValues, VPTypeAnalysis &TypeInfo) {
+  if ((isa<VPWidenIntOrFpInductionRecipe>(Incoming) &&
+       !cast<VPWidenIntOrFpInductionRecipe>(Incoming)->getTruncInst()) ||
+      isa<VPWidenPointerInductionRecipe>(Incoming) ||
+      (isa<Instruction>(Incoming->getUnderlyingValue()) &&
+       any_of(cast<Instruction>(Incoming->getUnderlyingValue())->users(),
+              [&Inductions](User *U) {
+                auto *P = dyn_cast<PHINode>(U);
+                return P && Inductions.contains(P);
+              }))) {
+    VPValue *IV;
+    if (auto *WideIV =
+            dyn_cast<VPWidenInductionRecipe>(Incoming->getDefiningRecipe()))
+      IV = WideIV;
+    else if (auto *WideIV =
+                 dyn_cast<VPWidenInductionRecipe>(Incoming->getDefiningRecipe()
+                                                      ->getOperand(0)
+                                                      ->getDefiningRecipe()))
+      IV = WideIV;
+    else
+      IV = Incoming->getDefiningRecipe()->getOperand(1);
+    // Skip phi nodes already updated. This can be the case if 2 induction
+    // phis chase each other.
+    VPValue *EndValue = EndValues[IV];
+    if (any_of(cast<VPRecipeBase>(Incoming->getDefiningRecipe())->operands(),
+               IsaPred<VPWidenIntOrFpInductionRecipe,
+                       VPWidenPointerInductionRecipe>)) {
+      ExitIRI->setOperand(0, EndValue);
+      return true;
+    }
+
+    VPBuilder B(Plan.getMiddleBlock()->getTerminator());
+    VPValue *Escape = nullptr;
+    auto *WideIV = cast<VPWidenInductionRecipe>(IV->getDefiningRecipe());
+    VPValue *Step = WideIV->getStepValue();
+    Type *ScalarTy = TypeInfo.inferScalarType(WideIV);
+    if (ScalarTy->isIntegerTy())
+      Escape =
+          B.createNaryOp(Instruction::Sub, {EndValue, Step}, {}, "ind.escape");
+    else if (ScalarTy->isPointerTy())
+      Escape = B.createPtrAdd(
+          EndValue,
+          B.createNaryOp(Instruction::Sub,
+                         {Plan.getOrAddLiveIn(ConstantInt::get(
+                              Step->getLiveInIRValue()->getType(), 0)),
+                          Step}),
+          {}, "ind.escape");
+    else if (ScalarTy->isFloatingPointTy()) {
+      const auto &ID = WideIV->getInductionDescriptor();
+      Escape = B.createNaryOp(
+          ID.getInductionBinOp()->getOpcode() == Instruction::FAdd
+              ? Instruction::FSub
+              : Instruction::FAdd,
+          {EndValue, Step}, {ID.getInductionBinOp()->getFastMathFlags()});
+    } else {
+      llvm_unreachable("all possible induction types must be handled");
+    }
+    ExitIRI->setOperand(0, Escape);
+    return true;
+  }
+  return false;
+}
 // Add exit values to \p Plan. Extracts are added for each entry in \p
 // ExitUsersToFix if needed and their operands are updated. Returns true if all
 // exit users can be handled, otherwise return false.
-static bool
-addUsersInExitBlocks(VPlan &Plan,
-                     const SetVector<VPIRInstruction *> &ExitUsersToFix) {
+static bool addUsersInExitBlocks(
+    VPlan &Plan, const SetVector<VPIRInstruction *> &ExitUsersToFix,
+    const MapVector<PHINode *, InductionDescriptor> &Inductions,
+    DenseMap<VPValue *, VPValue *> &EndValues) {
   if (ExitUsersToFix.empty())
     return true;
 
   auto *MiddleVPBB = Plan.getMiddleBlock();
   VPBuilder B(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
+  VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType());
 
   // Introduce extract for exiting values and update the VPIRInstructions
   // modeling the corresponding LCSSA phis.
@@ -9022,11 +8963,16 @@ addUsersInExitBlocks(VPlan &Plan,
       if (ExitIRI->getParent()->getSinglePredecessor() != MiddleVPBB)
         return false;
 
+      VPValue *Incoming = ExitIRI->getOperand(0);
+      if (addInductionEndValue(Plan, ExitIRI, Incoming, Inductions, EndValues,
+                               TypeInfo))
+        continue;
+
       LLVMContext &Ctx = ExitIRI->getInstruction().getContext();
       VPValue *Ext = B.createNaryOp(VPInstruction::ExtractFromEnd,
                                     {Op, Plan.getOrAddLiveIn(ConstantInt::get(
                                              IntegerType::get(Ctx, 32), 1))});
-      ExitIRI->setOperand(Idx, Ext);
+      ExitIRI->setOperand(0, Ext);
     }
   }
   return true;
@@ -9307,11 +9253,13 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
     VPlanTransforms::handleUncountableEarlyExit(
         *Plan, *PSE.getSE(), OrigLoop, UncountableExitingBlock, RecipeBuilder);
   }
-  addScalarResumePhis(RecipeBuilder, *Plan);
-  SetVector<VPIRInstruction *> ExitUsersToFix = collectUsersInExitBlocks(
-      OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars());
+  DenseMap<VPValue *, VPValue *> EndValues;
+  addScalarResumePhis(RecipeBuilder, *Plan, OrigLoop, EndValues);
+  SetVector<VPIRInstruction *> ExitUsersToFix =
+      collectUsersInExitBlocks(OrigLoop, RecipeBuilder, *Plan);
   addExitUsersForFirstOrderRecurrences(*Plan, ExitUsersToFix);
-  if (!addUsersInExitBlocks(*Plan, ExitUsersToFix)) {
+  if (!addUsersInExitBlocks(*Plan, ExitUsersToFix, Legal->getInductionVars(),
+                            EndValues)) {
     reportVectorizationFailure(
         "Some exit values in loop with uncountable exit not supported yet",
         "UncountableEarlyExitLoopsUnsupportedExitValue", ORE, OrigLoop);
@@ -9438,7 +9386,8 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
     auto *HeaderR = cast<VPHeaderPHIRecipe>(&R);
     RecipeBuilder.setRecipe(HeaderR->getUnderlyingInstr(), HeaderR);
   }
-  addScalarResumePhis(RecipeBuilder, *Plan);
+  DenseMap<VPValue *, VPValue *> EndValues;
+  addScalarResumePhis(RecipeBuilder, *Plan, OrigLoop, EndValues);
 
   assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid");
   return Plan;
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 9a082921d4f7f2..d763a9a605294b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -315,16 +315,20 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) {
     LastLane = 0;
   }
 
-  auto *LastInst = cast<Instruction>(get(Def, LastLane));
-  // Set the insert point after the last scalarized instruction or after the
-  // last PHI, if LastInst is a PHI. This ensures the insertelement sequence
-  // will directly follow the scalar definitions.
+  auto *LastDef = get(Def, LastLane);
   auto OldIP = Builder.saveIP();
-  auto NewIP =
-      isa<PHINode>(LastInst)
-          ? BasicBlock::iterator(LastInst->getParent()->getFirstNonPHI())
-          : std::next(BasicBlock::iterator(LastInst));
-  Builder.SetInsertPoint(&*NewIP);
+  if (auto *LastInst = dyn_cast<Instruction>(LastDef)) {
+    // TODO: Remove once VPDerivedIVReicpe can be simplified, which requires
+    // vector trip count being modeled in VPlan.
+    // Set the insert point after the last scalarized instruction or after the
+    // last PHI, if LastInst is a PHI. This ensures the insertelement sequence
+    // will directly follow the scalar definitions.
+    auto NewIP =
+        isa<PHINode>(LastInst)
+            ? BasicBlock::iterator(LastInst->getParent()->getFirstNonPHI())
+            : std::next(BasicBlock::iterator(LastInst));
+    Builder.SetInsertPoint(&*NewIP);
+  }
 
   // However, if we are vectorizing, we need to construct the vector values.
   // If the value is known to be uniform after vectorization, we can just
@@ -339,7 +343,7 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) {
   } else {
     // Initialize packing with insertelements to start from undef.
     assert(!VF.isScalable() && "VF is assumed to be non scalable.");
-    Value *Undef = PoisonValue::get(VectorType::get(LastInst->getType(), VF));
+    Value *Undef = PoisonValue::get(VectorType::get(LastDef->getType(), VF));
     set(Def, Undef);
     for (unsigned Lane = 0; Lane < VF.getKnownMinValue(); ++Lane)
       packScalarIntoVectorValue(Def, Lane);
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 404202b7f31304..dd0d2dfdd2e01c 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1422,6 +1422,11 @@ class VPIRInstruction : public VPRecipeBase {
            "Op must be an operand of the recipe");
     return true;
   }
+  bool onlyFirstLaneUsed(const VPValue *Op) const override {
+    assert(is_contained(operands(), Op) &&
+           "Op must be an operand of ...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/112147


More information about the llvm-commits mailing list