[llvm] 826f237 - [VPlan] Don't added separate vector latch block (NFC).

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Sat Apr 26 14:04:10 PDT 2025


Author: Florian Hahn
Date: 2025-04-26T22:03:18+01:00
New Revision: 826f237cb4b44f6d95d66eea0e58f7d24585986b

URL: https://github.com/llvm/llvm-project/commit/826f237cb4b44f6d95d66eea0e58f7d24585986b
DIFF: https://github.com/llvm/llvm-project/commit/826f237cb4b44f6d95d66eea0e58f7d24585986b.diff

LOG: [VPlan] Don't added separate vector latch block (NFC).

Simplify initial VPlan construction by not creating a separate
vector.latch block, which isn't needed and will get folded away later.
This has been suggested as independent clean-up multiple times.

Added: 
    

Modified: 
    llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
    llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
    llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
    llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 4beeabd36d523..6b5b50a26c199 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9531,14 +9531,6 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
   VPBasicBlock::iterator MBIP = MiddleVPBB->getFirstNonPhi();
   VPBlockBase *PrevVPBB = nullptr;
   for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
-    // Handle VPBBs down to the latch.
-    if (VPBB == LoopRegion->getExiting()) {
-      assert(!VPB2IRBB.contains(VPBB) &&
-             "the latch block shouldn't have a corresponding IRBB");
-      VPBlockUtils::connectBlocks(PrevVPBB, VPBB);
-      break;
-    }
-
     // Create mask based on the IR BB corresponding to VPBB.
     // TODO: Predicate directly based on VPlan.
     Builder.setInsertPoint(VPBB, VPBB->begin());
@@ -9761,6 +9753,12 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
   for (ElementCount VF : Range)
     Plan->addVF(VF);
 
+  // Tail folding is not supported for outer loops, so the induction increment
+  // is guaranteed to not wrap.
+  bool HasNUW = true;
+  addCanonicalIVRecipes(*Plan, Legal->getWidestInductionType(), HasNUW,
+                        DebugLoc());
+
   if (!VPlanTransforms::tryToConvertVPInstructionsToVPRecipes(
           Plan,
           [this](PHINode *P) {
@@ -9769,12 +9767,6 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
           *PSE.getSE(), *TLI))
     return nullptr;
 
-  // Tail folding is not supported for outer loops, so the induction increment
-  // is guaranteed to not wrap.
-  bool HasNUW = true;
-  addCanonicalIVRecipes(*Plan, Legal->getWidestInductionType(), HasNUW,
-                        DebugLoc());
-
   // Collect mapping of IR header phis to header phi recipes, to be used in
   // addScalarResumePhis.
   VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, &TTI, Legal, CM, PSE,
@@ -9939,14 +9931,18 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
       // ensure that it comes after all of it's inputs, including CondOp.
       // Delete CurrentLink as it will be invalid if its operand is replaced
       // with a reduction defined at the bottom of the block in the next link.
-      LinkVPBB->appendRecipe(RedRecipe);
+      if (LinkVPBB->getNumSuccessors() == 0)
+        RedRecipe->insertBefore(&*std::prev(std::prev(LinkVPBB->end())));
+      else
+        LinkVPBB->appendRecipe(RedRecipe);
+
       CurrentLink->replaceAllUsesWith(RedRecipe);
       ToDelete.push_back(CurrentLink);
       PreviousLink = RedRecipe;
     }
   }
   VPBasicBlock *LatchVPBB = VectorLoopRegion->getExitingBasicBlock();
-  Builder.setInsertPoint(&*LatchVPBB->begin());
+  Builder.setInsertPoint(&*std::prev(std::prev(LatchVPBB->end())));
   VPBasicBlock::iterator IP = MiddleVPBB->getFirstNonPhi();
   for (VPRecipeBase &R :
        Plan->getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
@@ -9966,8 +9962,6 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
     if (!PhiR->isInLoop() && CM.foldTailByMasking() &&
         !isa<VPPartialReductionRecipe>(OrigExitingVPV->getDefiningRecipe())) {
       VPValue *Cond = RecipeBuilder.getBlockInMask(OrigLoop->getHeader());
-      assert(OrigExitingVPV->getDefiningRecipe()->getParent() != LatchVPBB &&
-             "reduction recipe must be defined before latch");
       Type *PhiTy = PhiR->getOperand(0)->getLiveInIRValue()->getType();
       std::optional<FastMathFlags> FMFs =
           PhiTy->isFloatingPointTy()

diff  --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index f24d42256caef..b374371667b5e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -451,10 +451,6 @@ void VPlanTransforms::createLoopRegions(VPlan &Plan, Type *InductionTy,
       createLoopRegion(Plan, HeaderVPB);
 
   VPRegionBlock *TopRegion = Plan.getVectorLoopRegion();
-  auto *OrigExiting = TopRegion->getExiting();
-  VPBasicBlock *LatchVPBB = Plan.createVPBasicBlock("vector.latch");
-  VPBlockUtils::insertBlockAfter(LatchVPBB, OrigExiting);
-  TopRegion->setExiting(LatchVPBB);
   TopRegion->setName("vector loop");
   TopRegion->getEntryBasicBlock()->setName("vector.body");
 
@@ -472,7 +468,6 @@ void VPlanTransforms::createLoopRegions(VPlan &Plan, Type *InductionTy,
 
   VPBasicBlock *VecPreheader = Plan.createVPBasicBlock("vector.ph");
   VPBlockUtils::insertBlockAfter(VecPreheader, Plan.getEntry());
-
   VPBasicBlock *MiddleVPBB = Plan.createVPBasicBlock("middle.block");
   VPBlockUtils::insertBlockAfter(MiddleVPBB, TopRegion);
 

diff  --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 95dfeeaa57f3a..fa16a64ebd80a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -55,6 +55,9 @@ bool VPlanTransforms::tryToConvertVPInstructionsToVPRecipes(
          make_early_inc_range(make_range(VPBB->begin(), EndIter))) {
 
       VPValue *VPV = Ingredient.getVPSingleValue();
+      if (!VPV->getUnderlyingValue())
+        continue;
+
       Instruction *Inst = cast<Instruction>(VPV->getUnderlyingValue());
 
       VPRecipeBase *NewRecipe = nullptr;
@@ -387,9 +390,13 @@ static void addReplicateRegions(VPlan &Plan) {
     SplitBlock->setName(
         OrigBB->hasName() ? OrigBB->getName() + "." + Twine(BBNum++) : "");
     // Record predicated instructions for above packing optimizations.
-    VPBlockBase *Region = createReplicateRegion(RepR, Plan);
+    VPRegionBlock *Region = createReplicateRegion(RepR, Plan);
     Region->setParent(CurrentBlock->getParent());
     VPBlockUtils::insertOnEdge(CurrentBlock, SplitBlock, Region);
+
+    VPRegionBlock *ParentRegion = Region->getParent();
+    if (ParentRegion && ParentRegion->getExiting() == CurrentBlock)
+      ParentRegion->setExiting(SplitBlock);
   }
 }
 

diff  --git a/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp
index be98e86bceed7..11abdf790d121 100644
--- a/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp
@@ -53,14 +53,10 @@ TEST_F(VPlanHCFGTest, testBuildHCFGInnerLoop) {
   VPBasicBlock *VecBB = Plan->getVectorLoopRegion()->getEntryBasicBlock();
   EXPECT_EQ(7u, VecBB->size());
   EXPECT_EQ(0u, VecBB->getNumPredecessors());
-  EXPECT_EQ(1u, VecBB->getNumSuccessors());
+  EXPECT_EQ(0u, VecBB->getNumSuccessors());
   EXPECT_EQ(VecBB->getParent()->getEntryBasicBlock(), VecBB);
   EXPECT_EQ(&*Plan, VecBB->getPlan());
 
-  VPBlockBase *VecLatch = VecBB->getSingleSuccessor();
-  EXPECT_EQ(VecLatch->getParent()->getExitingBasicBlock(), VecLatch);
-  EXPECT_EQ(0u, VecLatch->getNumSuccessors());
-
   auto Iter = VecBB->begin();
   VPWidenPHIRecipe *Phi = dyn_cast<VPWidenPHIRecipe>(&*Iter++);
   EXPECT_NE(nullptr, Phi);
@@ -130,33 +126,28 @@ compound=true
       "  EMIT store ir\<%res\>, ir\<%arr.idx\>\l" +
       "  EMIT ir\<%indvars.iv.next\> = add ir\<%indvars.iv\>, ir\<1\>\l" +
       "  EMIT ir\<%exitcond\> = icmp ir\<%indvars.iv.next\>, ir\<%N\>\l" +
-      "Successor(s): vector.latch\l"
-    ]
-    N2 -> N4 [ label=""]
-    N4 [label =
-      "vector.latch:\l" +
       "No successors\l"
     ]
   }
-  N4 -> N5 [ label="" ltail=cluster_N3]
-  N5 [label =
+  N2 -> N4 [ label="" ltail=cluster_N3]
+  N4 [label =
     "middle.block:\l" +
     "  EMIT vp\<%cmp.n\> = icmp eq ir\<%N\>, vp\<%0\>\l" +
     "  EMIT branch-on-cond vp\<%cmp.n\>\l" +
     "Successor(s): ir-bb\<for.end\>, scalar.ph\l"
   ]
-  N5 -> N6 [ label="T"]
-  N5 -> N7 [ label="F"]
-  N6 [label =
+  N4 -> N5 [ label="T"]
+  N4 -> N6 [ label="F"]
+  N5 [label =
     "ir-bb\<for.end\>:\l" +
     "No successors\l"
   ]
-  N7 [label =
+  N6 [label =
     "scalar.ph:\l" +
     "Successor(s): ir-bb\<for.body\>\l"
   ]
-  N7 -> N8 [ label=""]
-  N8 [label =
+  N6 -> N7 [ label=""]
+  N7 [label =
     "ir-bb\<for.body\>:\l" +
     "  IR   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]\l" +
     "  IR   %arr.idx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv\l" +
@@ -171,10 +162,6 @@ compound=true
 )";
   EXPECT_EQ(ExpectedStr, FullDump);
 #endif
-  TargetLibraryInfoImpl TLII(M.getTargetTriple());
-  TargetLibraryInfo TLI(TLII);
-  VPlanTransforms::tryToConvertVPInstructionsToVPRecipes(
-      Plan, [](PHINode *P) { return nullptr; }, *SE, TLI);
 }
 
 TEST_F(VPlanHCFGTest, testVPInstructionToVPRecipesInner) {
@@ -203,6 +190,12 @@ TEST_F(VPlanHCFGTest, testVPInstructionToVPRecipesInner) {
 
   TargetLibraryInfoImpl TLII(M.getTargetTriple());
   TargetLibraryInfo TLI(TLII);
+  // Current VPlan construction doesn't add a terminator for top-level loop
+  // latches. Add it before running transform.
+  cast<VPBasicBlock>(Plan->getVectorLoopRegion()->getExiting())
+      ->appendRecipe(new VPInstruction(
+          VPInstruction::BranchOnCond,
+          {Plan->getOrAddLiveIn(ConstantInt::getTrue(F->getContext()))}));
   VPlanTransforms::tryToConvertVPInstructionsToVPRecipes(
       Plan, [](PHINode *P) { return nullptr; }, *SE, TLI);
 
@@ -214,15 +207,11 @@ TEST_F(VPlanHCFGTest, testVPInstructionToVPRecipesInner) {
   // Check that the region following the preheader consists of a block for the
   // original header and a separate latch.
   VPBasicBlock *VecBB = Plan->getVectorLoopRegion()->getEntryBasicBlock();
-  EXPECT_EQ(7u, VecBB->size());
+  EXPECT_EQ(8u, VecBB->size());
   EXPECT_EQ(0u, VecBB->getNumPredecessors());
-  EXPECT_EQ(1u, VecBB->getNumSuccessors());
+  EXPECT_EQ(0u, VecBB->getNumSuccessors());
   EXPECT_EQ(VecBB->getParent()->getEntryBasicBlock(), VecBB);
 
-  VPBlockBase *VecLatch = VecBB->getSingleSuccessor();
-  EXPECT_EQ(VecLatch->getParent()->getExitingBasicBlock(), VecLatch);
-  EXPECT_EQ(0u, VecLatch->getNumSuccessors());
-
   auto Iter = VecBB->begin();
   EXPECT_NE(nullptr, dyn_cast<VPWidenPHIRecipe>(&*Iter++));
   EXPECT_NE(nullptr, dyn_cast<VPWidenGEPRecipe>(&*Iter++));
@@ -231,6 +220,7 @@ TEST_F(VPlanHCFGTest, testVPInstructionToVPRecipesInner) {
   EXPECT_NE(nullptr, dyn_cast<VPWidenMemoryRecipe>(&*Iter++));
   EXPECT_NE(nullptr, dyn_cast<VPWidenRecipe>(&*Iter++));
   EXPECT_NE(nullptr, dyn_cast<VPWidenRecipe>(&*Iter++));
+  EXPECT_NE(nullptr, dyn_cast<VPInstruction>(&*Iter++));
   EXPECT_EQ(VecBB->end(), Iter);
 }
 
@@ -303,33 +293,28 @@ compound=true
       "  EMIT store ir\<%res\>, ir\<%arr.idx\>\l" +
       "  EMIT ir\<%iv.next\> = add ir\<%iv\>, ir\<1\>\l" +
       "  EMIT ir\<%exitcond\> = icmp ir\<%iv.next\>, ir\<%N\>\l" +
-      "Successor(s): vector.latch\l"
-    ]
-    N4 -> N5 [ label=""]
-    N5 [label =
-      "vector.latch:\l" +
       "No successors\l"
     ]
   }
-  N5 -> N6 [ label="" ltail=cluster_N3]
-  N6 [label =
+  N4 -> N5 [ label="" ltail=cluster_N3]
+  N5 [label =
     "middle.block:\l" +
     "  EMIT vp\<%cmp.n\> = icmp eq ir\<%N\>, vp\<%0\>\l" +
     "  EMIT branch-on-cond vp\<%cmp.n\>\l" +
     "Successor(s): ir-bb\<exit.2\>, scalar.ph\l"
   ]
-  N6 -> N7 [ label="T"]
-  N6 -> N8 [ label="F"]
-  N7 [label =
+  N5 -> N6 [ label="T"]
+  N5 -> N7 [ label="F"]
+  N6 [label =
     "ir-bb\<exit.2\>:\l" +
     "No successors\l"
   ]
-  N8 [label =
+  N7 [label =
     "scalar.ph:\l" +
     "Successor(s): ir-bb\<loop.header\>\l"
   ]
-  N8 -> N9 [ label=""]
-  N9 [label =
+  N7 -> N8 [ label=""]
+  N8 [label =
     "ir-bb\<loop.header\>:\l" +
     "  IR   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]\l" +
     "  IR   %arr.idx = getelementptr inbounds i32, ptr %A, i64 %iv\l" +


        


More information about the llvm-commits mailing list