[llvm] [VPlan] Move predication to VPlanTransform (NFC) (WIP). (PR #128420)

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Mon Apr 28 05:35:16 PDT 2025


https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/128420

>From eb047413a6da391d9a17964871bc07fc57d13550 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Mon, 28 Apr 2025 12:12:55 +0100
Subject: [PATCH 1/2] [VPlan] Retain exit conditions early

Step tmp
---
 .../Transforms/Vectorize/LoopVectorize.cpp    | 46 ++++++++--
 llvm/lib/Transforms/Vectorize/VPlan.h         |  3 +-
 .../Vectorize/VPlanConstruction.cpp           | 57 +++++-------
 .../Transforms/Vectorize/VPlanTransforms.cpp  | 90 ++++++++++---------
 .../Transforms/Vectorize/VPlanTransforms.h    |  5 +-
 .../vplan-printing-outer-loop.ll              |  6 +-
 6 files changed, 117 insertions(+), 90 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index d73ba07ad350e..f0ccdbd3fdb0b 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9158,6 +9158,7 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
 // loop.
 static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW,
                                   DebugLoc DL) {
+  using namespace VPlanPatternMatch;
   Value *StartIdx = ConstantInt::get(IdxTy, 0);
   auto *StartV = Plan.getOrAddLiveIn(StartIdx);
 
@@ -9167,7 +9168,16 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW,
   VPBasicBlock *Header = TopRegion->getEntryBasicBlock();
   Header->insert(CanonicalIVPHI, Header->begin());
 
-  VPBuilder Builder(TopRegion->getExitingBasicBlock());
+  VPBasicBlock *LatchVPBB = TopRegion->getExitingBasicBlock();
+  // We are about to replace the branch to exit the region. Remove the original
+  // BranchOnCond, if there is any.
+  // TODO: Move canonical IV and BranchOnCount introduction to initial skeleton
+  // creation.
+  if (!LatchVPBB->empty() &&
+      match(&LatchVPBB->back(), m_BranchOnCond(m_VPValue())))
+    LatchVPBB->getTerminator()->eraseFromParent();
+
+  VPBuilder Builder(LatchVPBB);
   // Add a VPInstruction to increment the scalar canonical IV by VF * UF.
   auto *CanonicalIVIncrement = Builder.createOverflowingOp(
       Instruction::Add, {CanonicalIVPHI, &Plan.getVFxUF()}, {HasNUW, false}, DL,
@@ -9469,6 +9479,23 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
   bool HasNUW = !IVUpdateMayOverflow || Style == TailFoldingStyle::None;
   addCanonicalIVRecipes(*Plan, Legal->getWidestInductionType(), HasNUW, DL);
 
+  if (Legal->hasUncountableEarlyExit()) {
+    VPlanTransforms::runPass(VPlanTransforms::handleUncountableEarlyExit, *Plan,
+                             Range);
+  } else {
+    SmallPtrSet<VPBlockBase *, 2> ExitBlocks(Plan->getExitBlocks().begin(),
+                                             Plan->getExitBlocks().end());
+    for (VPBlockBase *VPBB : to_vector(
+             vp_depth_first_shallow(Plan->getVectorLoopRegion()->getEntry()))) {
+      for (VPBlockBase *EB : ExitBlocks) {
+        if (is_contained(VPBB->getSuccessors(), EB)) {
+          cast<VPBasicBlock>(VPBB)->getTerminator()->eraseFromParent();
+          VPBlockUtils::disconnectBlocks(VPBB, EB);
+        }
+      }
+    }
+  }
+
   VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, &TTI, Legal, CM, PSE,
                                 Builder);
 
@@ -9639,12 +9666,6 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
     R->setOperand(1, WideIV->getStepValue());
   }
 
-  if (auto *UncountableExitingBlock =
-          Legal->getUncountableEarlyExitingBlock()) {
-    VPlanTransforms::runPass(VPlanTransforms::handleUncountableEarlyExit, *Plan,
-                             OrigLoop, UncountableExitingBlock, RecipeBuilder,
-                             Range);
-  }
   DenseMap<VPValue *, VPValue *> IVEndValues;
   addScalarResumePhis(RecipeBuilder, *Plan, IVEndValues);
   SetVector<VPIRInstruction *> ExitUsersToFix =
@@ -9742,6 +9763,17 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
   auto Plan = VPlanTransforms::buildPlainCFG(OrigLoop, *LI, VPB2IRBB);
   VPlanTransforms::createLoopRegions(*Plan, Legal->getWidestInductionType(),
                                      PSE, true, false, OrigLoop);
+  SmallPtrSet<VPBlockBase *, 2> ExitBlocks(Plan->getExitBlocks().begin(),
+                                           Plan->getExitBlocks().end());
+  for (VPBlockBase *VPBB : to_vector(
+           vp_depth_first_shallow(Plan->getVectorLoopRegion()->getEntry()))) {
+    for (VPBlockBase *EB : ExitBlocks) {
+      if (is_contained(VPBB->getSuccessors(), EB)) {
+        cast<VPBasicBlock>(VPBB)->getTerminator()->eraseFromParent();
+        VPBlockUtils::disconnectBlocks(VPBB, EB);
+      }
+    }
+  }
 
   for (ElementCount VF : Range)
     Plan->addVF(VF);
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 147ca5b4475b5..e04b42bad29ad 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -117,6 +117,7 @@ class VPBlockBase {
     Predecessors.erase(Pos);
   }
 
+public:
   /// Remove \p Successor from the successors of this block.
   void removeSuccessor(VPBlockBase *Successor) {
     auto Pos = find(Successors, Successor);
@@ -129,8 +130,6 @@ class VPBlockBase {
   void replacePredecessor(VPBlockBase *Old, VPBlockBase *New) {
     auto I = find(Predecessors, Old);
     assert(I != Predecessors.end());
-    assert(Old->getParent() == New->getParent() &&
-           "replaced predecessor must have the same parent");
     *I = New;
   }
 
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index 5eb2f058f329f..e65f25975f3a3 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -112,6 +112,9 @@ VPBasicBlock *PlainCFGBuilder::getOrCreateVPBB(BasicBlock *BB) {
     return VPBB;
   }
 
+  if (!TheLoop->contains(BB))
+    return Plan->getExitBlock(BB);
+
   // Create new VPBB.
   StringRef Name = BB->getName();
   LLVM_DEBUG(dbgs() << "Creating VPBasicBlock for " << Name << "\n");
@@ -145,14 +148,6 @@ bool PlainCFGBuilder::isExternalDef(Value *Val) {
     // Instruction definition is in outermost loop PH.
     return false;
 
-  // Check whether Instruction definition is in a loop exit.
-  SmallVector<BasicBlock *> ExitBlocks;
-  TheLoop->getExitBlocks(ExitBlocks);
-  if (is_contained(ExitBlocks, InstParent)) {
-    // Instruction definition is in outermost loop exit.
-    return false;
-  }
-
   // Check whether Instruction definition is in loop body.
   return !TheLoop->contains(Inst);
 }
@@ -201,11 +196,6 @@ void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB,
            "Instruction shouldn't have been visited.");
 
     if (auto *Br = dyn_cast<BranchInst>(Inst)) {
-      if (TheLoop->getLoopLatch() == BB ||
-          any_of(successors(BB),
-                 [this](BasicBlock *Succ) { return !TheLoop->contains(Succ); }))
-        continue;
-
       // Conditional branch instruction are represented using BranchOnCond
       // recipes.
       if (Br->isConditional()) {
@@ -295,7 +285,6 @@ std::unique_ptr<VPlan> PlainCFGBuilder::buildPlainCFG(
   for (BasicBlock *BB : RPO) {
     // Create or retrieve the VPBasicBlock for this BB.
     VPBasicBlock *VPBB = getOrCreateVPBB(BB);
-    Loop *LoopForBB = LI->getLoopFor(BB);
     // Set VPBB predecessors in the same order as they are in the incoming BB.
     setVPBBPredsFromBB(VPBB, BB);
 
@@ -326,24 +315,12 @@ std::unique_ptr<VPlan> PlainCFGBuilder::buildPlainCFG(
     BasicBlock *IRSucc1 = BI->getSuccessor(1);
     VPBasicBlock *Successor0 = getOrCreateVPBB(IRSucc0);
     VPBasicBlock *Successor1 = getOrCreateVPBB(IRSucc1);
-
-    // Don't connect any blocks outside the current loop except the latches for
-    // inner loops.
-    // TODO: Also connect exit blocks during initial VPlan construction.
-    if (LoopForBB == TheLoop || BB != LoopForBB->getLoopLatch()) {
-      if (!LoopForBB->contains(IRSucc0)) {
-        VPBB->setOneSuccessor(Successor1);
-        continue;
-      }
-      if (!LoopForBB->contains(IRSucc1)) {
-        VPBB->setOneSuccessor(Successor0);
-        continue;
-      }
-    }
-
     VPBB->setTwoSuccessors(Successor0, Successor1);
   }
 
+  for (auto *EB : Plan->getExitBlocks())
+    setVPBBPredsFromBB(EB, EB->getIRBasicBlock());
+
   // 2. The whole CFG has been built at this point so all the input Values must
   // have a VPlan counterpart. Fix VPlan header phi by adding their
   // corresponding VPlan operands.
@@ -392,6 +369,8 @@ std::unique_ptr<VPlan> VPlanTransforms::buildPlainCFG(
 /// return false.
 static bool canonicalHeaderAndLatch(VPBlockBase *HeaderVPB,
                                     const VPDominatorTree &VPDT) {
+  if (isa<VPIRBasicBlock>(HeaderVPB))
+    return false;
   ArrayRef<VPBlockBase *> Preds = HeaderVPB->getPredecessors();
   if (Preds.size() != 2)
     return false;
@@ -447,18 +426,23 @@ static void createLoopRegion(VPlan &Plan, VPBlockBase *HeaderVPB) {
   assert(LatchVPBB->getNumSuccessors() <= 1 &&
          "Latch has more than one successor");
   if (Succ)
-    VPBlockUtils::disconnectBlocks(LatchVPBB, Succ);
+    LatchVPBB->removeSuccessor(Succ);
 
   auto *R = Plan.createVPRegionBlock(HeaderVPB, LatchVPBB, "",
                                      false /*isReplicator*/);
   // All VPBB's reachable shallowly from HeaderVPB belong to top level loop,
   // because VPlan is expected to end at top level latch disconnected above.
+  SmallPtrSet<VPBlockBase *, 2> ExitBlocks(Plan.getExitBlocks().begin(),
+                                           Plan.getExitBlocks().end());
   for (VPBlockBase *VPBB : vp_depth_first_shallow(HeaderVPB))
-    VPBB->setParent(R);
+    if (!ExitBlocks.contains(VPBB))
+      VPBB->setParent(R);
 
   VPBlockUtils::insertBlockAfter(R, PreheaderVPBB);
-  if (Succ)
-    VPBlockUtils::connectBlocks(R, Succ);
+  if (Succ) {
+    R->setOneSuccessor(Succ);
+    Succ->replacePredecessor(LatchVPBB, R);
+  }
 }
 
 void VPlanTransforms::createLoopRegions(VPlan &Plan, Type *InductionTy,
@@ -505,7 +489,11 @@ void VPlanTransforms::createLoopRegions(VPlan &Plan, Type *InductionTy,
   //    remainder and we can set the condition to true.
   // 3) Otherwise, construct a runtime check.
 
+  BasicBlock *IRExitBlock = TheLoop->getUniqueLatchExitBlock();
+  auto *VPExitBlock = IRExitBlock ? Plan.getExitBlock(IRExitBlock) : nullptr;
   if (!RequiresScalarEpilogueCheck) {
+    if (VPExitBlock)
+      VPBlockUtils::disconnectBlocks(MiddleVPBB, VPExitBlock);
     VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
     // The exit blocks are unreachable, remove their recipes to make sure no
     // users remain that may pessimize transforms.
@@ -516,10 +504,7 @@ void VPlanTransforms::createLoopRegions(VPlan &Plan, Type *InductionTy,
     return;
   }
 
-  BasicBlock *IRExitBlock = TheLoop->getUniqueLatchExitBlock();
-  auto *VPExitBlock = Plan.getExitBlock(IRExitBlock);
   // The connection order corresponds to the operands of the conditional branch.
-  VPBlockUtils::insertBlockAfter(VPExitBlock, MiddleVPBB);
   VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
 
   auto *ScalarLatchTerm = TheLoop->getLoopLatch()->getTerminator();
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index d0128ecd491dc..97dba91b91f0b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -2470,33 +2470,56 @@ void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan,
     R->eraseFromParent();
 }
 
-void VPlanTransforms::handleUncountableEarlyExit(
-    VPlan &Plan, Loop *OrigLoop, BasicBlock *UncountableExitingBlock,
-    VPRecipeBuilder &RecipeBuilder, VFRange &Range) {
+void VPlanTransforms::handleUncountableEarlyExit(VPlan &Plan, VFRange &Range) {
+  auto *MiddleVPBB = Plan.getMiddleBlock();
+  // First find the uncountable early exiting block by looking at the
+  // predecessors of the exit blocks.
+  VPBasicBlock *EarlyExitingVPBB = nullptr;
+  VPIRBasicBlock *EarlyExitVPBB = nullptr;
+  for (auto *EB : Plan.getExitBlocks()) {
+    for (VPBlockBase *Pred : EB->getPredecessors()) {
+      if (Pred != MiddleVPBB) {
+        EarlyExitingVPBB = cast<VPBasicBlock>(Pred);
+        EarlyExitVPBB = EB;
+        break;
+      }
+    }
+  }
+
   VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
   auto *LatchVPBB = cast<VPBasicBlock>(LoopRegion->getExiting());
   VPBuilder Builder(LatchVPBB->getTerminator());
-  auto *MiddleVPBB = Plan.getMiddleBlock();
-  VPValue *IsEarlyExitTaken = nullptr;
-
-  // Process the uncountable exiting block. Update IsEarlyExitTaken, which
-  // tracks if the uncountable early exit has been taken. Also split the middle
-  // block and have it conditionally branch to the early exit block if
-  // EarlyExitTaken.
-  auto *EarlyExitingBranch =
-      cast<BranchInst>(UncountableExitingBlock->getTerminator());
-  BasicBlock *TrueSucc = EarlyExitingBranch->getSuccessor(0);
-  BasicBlock *FalseSucc = EarlyExitingBranch->getSuccessor(1);
-  BasicBlock *EarlyExitIRBB =
-      !OrigLoop->contains(TrueSucc) ? TrueSucc : FalseSucc;
-  VPIRBasicBlock *VPEarlyExitBlock = Plan.getExitBlock(EarlyExitIRBB);
-
-  VPValue *EarlyExitNotTakenCond = RecipeBuilder.getBlockInMask(
-      OrigLoop->contains(TrueSucc) ? TrueSucc : FalseSucc);
-  auto *EarlyExitTakenCond = Builder.createNot(EarlyExitNotTakenCond);
-  IsEarlyExitTaken =
-      Builder.createNaryOp(VPInstruction::AnyOf, {EarlyExitTakenCond});
+  VPBlockBase *TrueSucc = EarlyExitingVPBB->getSuccessors()[0];
+  VPValue *EarlyExitCond = EarlyExitingVPBB->getTerminator()->getOperand(0);
+  auto *EarlyExitTakenCond = TrueSucc == EarlyExitVPBB
+                                 ? EarlyExitCond
+                                 : Builder.createNot(EarlyExitCond);
+
+  if (!EarlyExitVPBB->getSinglePredecessor() &&
+      EarlyExitVPBB->getPredecessors()[0] != MiddleVPBB) {
+    for (VPRecipeBase &R : EarlyExitVPBB->phis()) {
+      // Early exit operand should always be last, i.e., 0 if EarlyExitVPBB has
+      // a single predecessor and 1 if it has two.
+      // If EarlyExitVPBB has two predecessors, they are already ordered such
+      // that early exit is second (and latch exit is first), by construction.
+      // But its underlying IRBB (EarlyExitIRBB) may have its predecessors
+      // ordered the other way around, and it is the order of the latter which
+      // corresponds to the order of operands of EarlyExitVPBB's phi recipes.
+      // Therefore, if early exit (UncountableExitingBlock) is the first
+      // predecessor of EarlyExitIRBB, we swap the operands of phi recipes,
+      // thereby bringing them to match EarlyExitVPBB's predecessor order,
+      // with early exit being last (second). Otherwise they already match.
+      cast<VPIRPhi>(&R)->swapOperands();
+    }
+  }
 
+  EarlyExitingVPBB->getTerminator()->eraseFromParent();
+  VPBlockUtils::disconnectBlocks(EarlyExitingVPBB, EarlyExitVPBB);
+
+  // Split the middle block and have it conditionally branch to the early exit
+  // block if EarlyExitTaken.
+  VPValue *IsEarlyExitTaken =
+      Builder.createNaryOp(VPInstruction::AnyOf, {EarlyExitTakenCond});
   VPBasicBlock *NewMiddle = Plan.createVPBasicBlock("middle.split");
   VPBasicBlock *VectorEarlyExitVPBB =
       Plan.createVPBasicBlock("vector.early.exit");
@@ -2504,30 +2527,17 @@ void VPlanTransforms::handleUncountableEarlyExit(
   VPBlockUtils::connectBlocks(NewMiddle, VectorEarlyExitVPBB);
   NewMiddle->swapSuccessors();
 
-  VPBlockUtils::connectBlocks(VectorEarlyExitVPBB, VPEarlyExitBlock);
+  VPBlockUtils::connectBlocks(VectorEarlyExitVPBB, EarlyExitVPBB);
 
   // Update the exit phis in the early exit block.
   VPBuilder MiddleBuilder(NewMiddle);
   VPBuilder EarlyExitB(VectorEarlyExitVPBB);
-  for (VPRecipeBase &R : VPEarlyExitBlock->phis()) {
+  for (VPRecipeBase &R : EarlyExitVPBB->phis()) {
     auto *ExitIRI = cast<VPIRPhi>(&R);
-    // Early exit operand should always be last, i.e., 0 if VPEarlyExitBlock has
+    // Early exit operand should always be last, i.e., 0 if EarlyExitVPBB has
     // a single predecessor and 1 if it has two.
     unsigned EarlyExitIdx = ExitIRI->getNumOperands() - 1;
-    if (!VPEarlyExitBlock->getSinglePredecessor()) {
-      // If VPEarlyExitBlock has two predecessors, they are already ordered such
-      // that early exit is second (and latch exit is first), by construction.
-      // But its underlying IRBB (EarlyExitIRBB) may have its predecessors
-      // ordered the other way around, and it is the order of the latter which
-      // corresponds to the order of operands of VPEarlyExitBlock's phi recipes.
-      // Therefore, if early exit (UncountableExitingBlock) is the first
-      // predecessor of EarlyExitIRBB, we swap the operands of phi recipes,
-      // thereby bringing them to match VPEarlyExitBlock's predecessor order,
-      // with early exit being last (second). Otherwise they already match.
-      if (*pred_begin(VPEarlyExitBlock->getIRBasicBlock()) ==
-          UncountableExitingBlock)
-        ExitIRI->swapOperands();
-
+    if (!EarlyExitVPBB->getSinglePredecessor()) {
       // The first of two operands corresponds to the latch exit, via MiddleVPBB
       // predecessor. Extract its last lane.
       ExitIRI->extractLastLaneOfFirstOperand(MiddleBuilder);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index 2635bb8a62f74..d9f7b77a3ade6 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -175,10 +175,7 @@ struct VPlanTransforms {
   ///    exit conditions
   ///  * splitting the original middle block to branch to the early exit block
   ///    if taken.
-  static void handleUncountableEarlyExit(VPlan &Plan, Loop *OrigLoop,
-                                         BasicBlock *UncountableExitingBlock,
-                                         VPRecipeBuilder &RecipeBuilder,
-                                         VFRange &Range);
+  static void handleUncountableEarlyExit(VPlan &Plan, VFRange &Range);
 
   /// Lower abstract recipes to concrete ones, that can be codegen'd. Use \p
   /// CanonicalIVTy as type for all un-typed live-ins in VPTypeAnalysis.
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll
index 91a5ea6b7fe36..fe845ae74cbee 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll
@@ -31,7 +31,11 @@ define void @foo(i64 %n) {
 ; CHECK-NEXT: outer.latch:
 ; CHECK-NEXT:   EMIT ir<%outer.iv.next> = add ir<%outer.iv>, ir<1>
 ; CHECK-NEXT:   EMIT ir<%outer.ec> = icmp ir<%outer.iv.next>, ir<8>
-; CHECK-NEXT: Successor(s): outer.header
+; CHECK-NEXT:   EMIT branch-on-cond ir<%outer.ec>
+; CHECK-NEXT: Successor(s): ir-bb<exit>, outer.header
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<exit>:
+; CHECK-NEXT: No successors
 ; CHECK-NEXT: }
 entry:
   br label %outer.header

>From 7f61860e21418c6bd03b4a1059355b4f9d2f8eef Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Sat, 22 Feb 2025 19:15:32 +0000
Subject: [PATCH 2/2] [VPlan] Move predication to VPlanTransform (NFC) (WIP).

This patch moves the logic to predicate and linearize a VPlan to a
dedicated VPlan transform.

The main logic to perform predication is ready to review, although
there are few things to note that should be improved, either directly in
the PR or in the future:
 * Edge and block masks are cached in VPRecipeBuilder, so they can be
   accessed during recipe construction. A better alternative may be to
   add mask operands to all VPInstructions that need them and use that
   during recipe construction
 * The mask caching in a map also means that this map needs updating
   each time a new recipe replaces a VPInstruction; this would also be
   handled by adding mask operands.

Currently this is still WIP due to early-exit loop handling not working
due to the exit conditions not being available in the initial VPlans.
This will be fixed with https://github.com/llvm/llvm-project/pull/128419
and follow-ups

All tests except early-exit loops are passing
---
 llvm/lib/Transforms/Vectorize/CMakeLists.txt  |   1 +
 .../Transforms/Vectorize/LoopVectorize.cpp    | 324 ++++--------------
 .../Transforms/Vectorize/VPRecipeBuilder.h    |  45 +--
 .../Vectorize/VPlanConstruction.cpp           |  30 +-
 .../Transforms/Vectorize/VPlanPredicator.cpp  | 252 ++++++++++++++
 .../Transforms/Vectorize/VPlanTransforms.h    |   7 +-
 .../vplan-printing-outer-loop.ll              |   4 +-
 .../Transforms/Vectorize/VPlanTestBase.h      |   3 +-
 8 files changed, 353 insertions(+), 313 deletions(-)
 create mode 100644 llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp

diff --git a/llvm/lib/Transforms/Vectorize/CMakeLists.txt b/llvm/lib/Transforms/Vectorize/CMakeLists.txt
index 0dc6a7d2f594f..e6c7142edd100 100644
--- a/llvm/lib/Transforms/Vectorize/CMakeLists.txt
+++ b/llvm/lib/Transforms/Vectorize/CMakeLists.txt
@@ -24,6 +24,7 @@ add_llvm_component_library(LLVMVectorize
   VPlan.cpp
   VPlanAnalysis.cpp
   VPlanConstruction.cpp
+  VPlanPredicator.cpp
   VPlanRecipes.cpp
   VPlanSLP.cpp
   VPlanTransforms.cpp
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index f0ccdbd3fdb0b..9959ca141a96b 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8264,185 +8264,6 @@ void EpilogueVectorizerEpilogueLoop::printDebugTracesAtEnd() {
   });
 }
 
-void VPRecipeBuilder::createSwitchEdgeMasks(SwitchInst *SI) {
-  BasicBlock *Src = SI->getParent();
-  assert(!OrigLoop->isLoopExiting(Src) &&
-         all_of(successors(Src),
-                [this](BasicBlock *Succ) {
-                  return OrigLoop->getHeader() != Succ;
-                }) &&
-         "unsupported switch either exiting loop or continuing to header");
-  // Create masks where the terminator in Src is a switch. We create mask for
-  // all edges at the same time. This is more efficient, as we can create and
-  // collect compares for all cases once.
-  VPValue *Cond = getVPValueOrAddLiveIn(SI->getCondition());
-  BasicBlock *DefaultDst = SI->getDefaultDest();
-  MapVector<BasicBlock *, SmallVector<VPValue *>> Dst2Compares;
-  for (auto &C : SI->cases()) {
-    BasicBlock *Dst = C.getCaseSuccessor();
-    assert(!EdgeMaskCache.contains({Src, Dst}) && "Edge masks already created");
-    // Cases whose destination is the same as default are redundant and can be
-    // ignored - they will get there anyhow.
-    if (Dst == DefaultDst)
-      continue;
-    auto &Compares = Dst2Compares[Dst];
-    VPValue *V = getVPValueOrAddLiveIn(C.getCaseValue());
-    Compares.push_back(Builder.createICmp(CmpInst::ICMP_EQ, Cond, V));
-  }
-
-  // We need to handle 2 separate cases below for all entries in Dst2Compares,
-  // which excludes destinations matching the default destination.
-  VPValue *SrcMask = getBlockInMask(Src);
-  VPValue *DefaultMask = nullptr;
-  for (const auto &[Dst, Conds] : Dst2Compares) {
-    // 1. Dst is not the default destination. Dst is reached if any of the cases
-    // with destination == Dst are taken. Join the conditions for each case
-    // whose destination == Dst using an OR.
-    VPValue *Mask = Conds[0];
-    for (VPValue *V : ArrayRef<VPValue *>(Conds).drop_front())
-      Mask = Builder.createOr(Mask, V);
-    if (SrcMask)
-      Mask = Builder.createLogicalAnd(SrcMask, Mask);
-    EdgeMaskCache[{Src, Dst}] = Mask;
-
-    // 2. Create the mask for the default destination, which is reached if none
-    // of the cases with destination != default destination are taken. Join the
-    // conditions for each case where the destination is != Dst using an OR and
-    // negate it.
-    DefaultMask = DefaultMask ? Builder.createOr(DefaultMask, Mask) : Mask;
-  }
-
-  if (DefaultMask) {
-    DefaultMask = Builder.createNot(DefaultMask);
-    if (SrcMask)
-      DefaultMask = Builder.createLogicalAnd(SrcMask, DefaultMask);
-  }
-  EdgeMaskCache[{Src, DefaultDst}] = DefaultMask;
-}
-
-VPValue *VPRecipeBuilder::createEdgeMask(BasicBlock *Src, BasicBlock *Dst) {
-  assert(is_contained(predecessors(Dst), Src) && "Invalid edge");
-
-  // Look for cached value.
-  std::pair<BasicBlock *, BasicBlock *> Edge(Src, Dst);
-  EdgeMaskCacheTy::iterator ECEntryIt = EdgeMaskCache.find(Edge);
-  if (ECEntryIt != EdgeMaskCache.end())
-    return ECEntryIt->second;
-
-  if (auto *SI = dyn_cast<SwitchInst>(Src->getTerminator())) {
-    createSwitchEdgeMasks(SI);
-    assert(EdgeMaskCache.contains(Edge) && "Mask for Edge not created?");
-    return EdgeMaskCache[Edge];
-  }
-
-  VPValue *SrcMask = getBlockInMask(Src);
-
-  // The terminator has to be a branch inst!
-  BranchInst *BI = dyn_cast<BranchInst>(Src->getTerminator());
-  assert(BI && "Unexpected terminator found");
-  if (!BI->isConditional() || BI->getSuccessor(0) == BI->getSuccessor(1))
-    return EdgeMaskCache[Edge] = SrcMask;
-
-  // If source is an exiting block, we know the exit edge is dynamically dead
-  // in the vector loop, and thus we don't need to restrict the mask.  Avoid
-  // adding uses of an otherwise potentially dead instruction unless we are
-  // vectorizing a loop with uncountable exits. In that case, we always
-  // materialize the mask.
-  if (OrigLoop->isLoopExiting(Src) &&
-      Src != Legal->getUncountableEarlyExitingBlock())
-    return EdgeMaskCache[Edge] = SrcMask;
-
-  VPValue *EdgeMask = getVPValueOrAddLiveIn(BI->getCondition());
-  assert(EdgeMask && "No Edge Mask found for condition");
-
-  if (BI->getSuccessor(0) != Dst)
-    EdgeMask = Builder.createNot(EdgeMask, BI->getDebugLoc());
-
-  if (SrcMask) { // Otherwise block in-mask is all-one, no need to AND.
-    // The bitwise 'And' of SrcMask and EdgeMask introduces new UB if SrcMask
-    // is false and EdgeMask is poison. Avoid that by using 'LogicalAnd'
-    // instead which generates 'select i1 SrcMask, i1 EdgeMask, i1 false'.
-    EdgeMask = Builder.createLogicalAnd(SrcMask, EdgeMask, BI->getDebugLoc());
-  }
-
-  return EdgeMaskCache[Edge] = EdgeMask;
-}
-
-VPValue *VPRecipeBuilder::getEdgeMask(BasicBlock *Src, BasicBlock *Dst) const {
-  assert(is_contained(predecessors(Dst), Src) && "Invalid edge");
-
-  // Look for cached value.
-  std::pair<BasicBlock *, BasicBlock *> Edge(Src, Dst);
-  EdgeMaskCacheTy::const_iterator ECEntryIt = EdgeMaskCache.find(Edge);
-  assert(ECEntryIt != EdgeMaskCache.end() &&
-         "looking up mask for edge which has not been created");
-  return ECEntryIt->second;
-}
-
-void VPRecipeBuilder::createHeaderMask() {
-  BasicBlock *Header = OrigLoop->getHeader();
-
-  // When not folding the tail, use nullptr to model all-true mask.
-  if (!CM.foldTailByMasking()) {
-    BlockMaskCache[Header] = nullptr;
-    return;
-  }
-
-  // Introduce the early-exit compare IV <= BTC to form header block mask.
-  // This is used instead of IV < TC because TC may wrap, unlike BTC. Start by
-  // constructing the desired canonical IV in the header block as its first
-  // non-phi instructions.
-
-  VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
-  auto NewInsertionPoint = HeaderVPBB->getFirstNonPhi();
-  auto *IV = new VPWidenCanonicalIVRecipe(Plan.getCanonicalIV());
-  HeaderVPBB->insert(IV, NewInsertionPoint);
-
-  VPBuilder::InsertPointGuard Guard(Builder);
-  Builder.setInsertPoint(HeaderVPBB, NewInsertionPoint);
-  VPValue *BlockMask = nullptr;
-  VPValue *BTC = Plan.getOrCreateBackedgeTakenCount();
-  BlockMask = Builder.createICmp(CmpInst::ICMP_ULE, IV, BTC);
-  BlockMaskCache[Header] = BlockMask;
-}
-
-VPValue *VPRecipeBuilder::getBlockInMask(BasicBlock *BB) const {
-  // Return the cached value.
-  BlockMaskCacheTy::const_iterator BCEntryIt = BlockMaskCache.find(BB);
-  assert(BCEntryIt != BlockMaskCache.end() &&
-         "Trying to access mask for block without one.");
-  return BCEntryIt->second;
-}
-
-void VPRecipeBuilder::createBlockInMask(BasicBlock *BB) {
-  assert(OrigLoop->contains(BB) && "Block is not a part of a loop");
-  assert(BlockMaskCache.count(BB) == 0 && "Mask for block already computed");
-  assert(OrigLoop->getHeader() != BB &&
-         "Loop header must have cached block mask");
-
-  // All-one mask is modelled as no-mask following the convention for masked
-  // load/store/gather/scatter. Initialize BlockMask to no-mask.
-  VPValue *BlockMask = nullptr;
-  // This is the block mask. We OR all unique incoming edges.
-  for (auto *Predecessor :
-       SetVector<BasicBlock *>(llvm::from_range, predecessors(BB))) {
-    VPValue *EdgeMask = createEdgeMask(Predecessor, BB);
-    if (!EdgeMask) { // Mask of predecessor is all-one so mask of block is too.
-      BlockMaskCache[BB] = EdgeMask;
-      return;
-    }
-
-    if (!BlockMask) { // BlockMask has its initialized nullptr value.
-      BlockMask = EdgeMask;
-      continue;
-    }
-
-    BlockMask = Builder.createOr(BlockMask, EdgeMask, {});
-  }
-
-  BlockMaskCache[BB] = BlockMask;
-}
-
 VPWidenMemoryRecipe *
 VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
                                   VFRange &Range) {
@@ -8467,7 +8288,7 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
 
   VPValue *Mask = nullptr;
   if (Legal->isMaskRequired(I))
-    Mask = getBlockInMask(I->getParent());
+    Mask = getBlockInMask(Builder.getInsertBlock());
 
   // Determine if the pointer operand of the access is either consecutive or
   // reverse consecutive.
@@ -8586,38 +8407,6 @@ VPWidenIntOrFpInductionRecipe *VPRecipeBuilder::tryToOptimizeInductionTruncate(
   return nullptr;
 }
 
-VPBlendRecipe *VPRecipeBuilder::tryToBlend(PHINode *Phi,
-                                           ArrayRef<VPValue *> Operands) {
-  unsigned NumIncoming = Phi->getNumIncomingValues();
-
-  // We know that all PHIs in non-header blocks are converted into selects, so
-  // we don't have to worry about the insertion order and we can just use the
-  // builder. At this point we generate the predication tree. There may be
-  // duplications since this is a simple recursive scan, but future
-  // optimizations will clean it up.
-
-  // Map incoming IR BasicBlocks to incoming VPValues, for lookup below.
-  // TODO: Add operands and masks in order from the VPlan predecessors.
-  DenseMap<BasicBlock *, VPValue *> VPIncomingValues;
-  for (const auto &[Idx, Pred] : enumerate(predecessors(Phi->getParent())))
-    VPIncomingValues[Pred] = Operands[Idx];
-
-  SmallVector<VPValue *, 2> OperandsWithMask;
-  for (unsigned In = 0; In < NumIncoming; In++) {
-    BasicBlock *Pred = Phi->getIncomingBlock(In);
-    OperandsWithMask.push_back(VPIncomingValues.lookup(Pred));
-    VPValue *EdgeMask = getEdgeMask(Pred, Phi->getParent());
-    if (!EdgeMask) {
-      assert(In == 0 && "Both null and non-null edge masks found");
-      assert(all_equal(Operands) &&
-             "Distinct incoming values with one having a full mask");
-      break;
-    }
-    OperandsWithMask.push_back(EdgeMask);
-  }
-  return new VPBlendRecipe(Phi, OperandsWithMask);
-}
-
 VPSingleDefRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
                                                    ArrayRef<VPValue *> Operands,
                                                    VFRange &Range) {
@@ -8693,7 +8482,7 @@ VPSingleDefRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
       //      all-true mask.
       VPValue *Mask = nullptr;
       if (Legal->isMaskRequired(CI))
-        Mask = getBlockInMask(CI->getParent());
+        Mask = getBlockInMask(Builder.getInsertBlock());
       else
         Mask = Plan.getOrAddLiveIn(
             ConstantInt::getTrue(IntegerType::getInt1Ty(CI->getContext())));
@@ -8735,7 +8524,7 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(Instruction *I,
     // div/rem operation itself.  Otherwise fall through to general handling below.
     if (CM.isPredicatedInst(I)) {
       SmallVector<VPValue *> Ops(Operands);
-      VPValue *Mask = getBlockInMask(I->getParent());
+      VPValue *Mask = getBlockInMask(Builder.getInsertBlock());
       VPValue *One =
           Plan.getOrAddLiveIn(ConstantInt::get(I->getType(), 1u, false));
       auto *SafeRHS = Builder.createSelect(Mask, Ops[1], One, I->getDebugLoc());
@@ -8817,7 +8606,7 @@ VPRecipeBuilder::tryToWidenHistogram(const HistogramInfo *HI,
   // In case of predicated execution (due to tail-folding, or conditional
   // execution, or both), pass the relevant mask.
   if (Legal->isMaskRequired(HI->Store))
-    HGramOps.push_back(getBlockInMask(HI->Store->getParent()));
+    HGramOps.push_back(getBlockInMask(Builder.getInsertBlock()));
 
   return new VPHistogramRecipe(Opcode,
                                make_range(HGramOps.begin(), HGramOps.end()),
@@ -8873,7 +8662,7 @@ VPRecipeBuilder::handleReplication(Instruction *I, ArrayRef<VPValue *> Operands,
     // added initially. Masked replicate recipes will later be placed under an
     // if-then construct to prevent side-effects. Generate recipes to compute
     // the block mask for this region.
-    BlockInMask = getBlockInMask(I->getParent());
+    BlockInMask = getBlockInMask(Builder.getInsertBlock());
   }
 
   // Note that there is some custom logic to mark some intrinsics as uniform
@@ -9010,9 +8799,8 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(
   // nodes, calls and memory operations.
   VPRecipeBase *Recipe;
   if (auto *Phi = dyn_cast<PHINode>(Instr)) {
-    if (Phi->getParent() != OrigLoop->getHeader())
-      return tryToBlend(Phi, Operands);
-
+    assert(Phi->getParent() == OrigLoop->getHeader() &&
+           "Non-header phis should have been handled during predication");
     assert(Operands.size() == 2 && "Must have 2 operands for header phis");
     if ((Recipe = tryToOptimizeInductionPHI(Phi, Operands, Range)))
       return Recipe;
@@ -9118,7 +8906,7 @@ VPRecipeBuilder::tryToCreatePartialReduction(Instruction *Reduction,
             ReductionOpcode == Instruction::Sub) &&
            "Expected an ADD or SUB operation for predicated partial "
            "reductions (because the neutral element in the mask is zero)!");
-    VPValue *Mask = getBlockInMask(Reduction->getParent());
+    VPValue *Mask = getBlockInMask(Builder.getInsertBlock());
     VPValue *Zero =
         Plan.getOrAddLiveIn(ConstantInt::get(Reduction->getType(), 0));
     BinOp = Builder.createSelect(Mask, BinOp, Zero, Reduction->getDebugLoc());
@@ -9456,8 +9244,7 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
             return !CM.requiresScalarEpilogue(VF.isVector());
           },
           Range);
-  DenseMap<VPBlockBase *, BasicBlock *> VPB2IRBB;
-  auto Plan = VPlanTransforms::buildPlainCFG(OrigLoop, *LI, VPB2IRBB);
+  auto Plan = VPlanTransforms::buildPlainCFG(OrigLoop, *LI);
   VPlanTransforms::createLoopRegions(*Plan, Legal->getWidestInductionType(),
                                      PSE, RequiresScalarEpilogueCheck,
                                      CM.foldTailByMasking(), OrigLoop);
@@ -9496,9 +9283,6 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
     }
   }
 
-  VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, &TTI, Legal, CM, PSE,
-                                Builder);
-
   // ---------------------------------------------------------------------------
   // Pre-construction: record ingredients whose recipes we'll need to further
   // process after constructing the initial VPlan.
@@ -9539,31 +9323,55 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
         return Legal->blockNeedsPredication(BB) || NeedsBlends;
       });
 
-  RecipeBuilder.collectScaledReductions(Range);
 
   auto *MiddleVPBB = Plan->getMiddleBlock();
 
+  VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, &TTI, Legal, CM, PSE,
+                                Builder);
+  if (NeedsMasks) {
+    VPlanTransforms::predicateAndLinearize(*Plan, CM.foldTailByMasking(),
+                                           RecipeBuilder);
+  }
+
+  {
+    VPBlockBase *PrevVPBB = nullptr;
+    VPBasicBlock *Header = LoopRegion->getEntryBasicBlock();
+    ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>>
+        RPOT(Header);
+
+    for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
+      // Handle VPBBs down to the latch.
+      if (PrevVPBB && VPBB == LoopRegion->getExiting()) {
+        VPBlockUtils::connectBlocks(PrevVPBB, VPBB);
+        break;
+      }
+
+      auto Successors = to_vector(VPBB->getSuccessors());
+      if (Successors.size() > 1)
+        VPBB->getTerminator()->eraseFromParent();
+
+      // Flatten the CFG in the loop. Masks for blocks have already been
+      // generated and added to recipes as needed. To do so, first disconnect
+      // VPBB from its successors. Then connect VPBB to the previously visited
+      // VPBB.
+      for (auto *Succ : Successors)
+        VPBlockUtils::disconnectBlocks(VPBB, Succ);
+      if (PrevVPBB)
+        VPBlockUtils::connectBlocks(PrevVPBB, VPBB);
+
+      PrevVPBB = VPBB;
+    }
+  }
+
+  RecipeBuilder.collectScaledReductions(Range);
+
   // Scan the body of the loop in a topological order to visit each basic block
   // after having visited its predecessor basic blocks.
   ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT(
       HeaderVPBB);
 
   VPBasicBlock::iterator MBIP = MiddleVPBB->getFirstNonPhi();
-  VPBlockBase *PrevVPBB = nullptr;
   for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
-    // Create mask based on the IR BB corresponding to VPBB.
-    // TODO: Predicate directly based on VPlan.
-    Builder.setInsertPoint(VPBB, VPBB->begin());
-    if (VPBB == HeaderVPBB) {
-      Builder.setInsertPoint(VPBB, VPBB->getFirstNonPhi());
-      RecipeBuilder.createHeaderMask();
-    } else if (NeedsMasks) {
-      // FIXME: At the moment, masks need to be placed at the beginning of the
-      // block, as blends introduced for phi nodes need to use it. The created
-      // blends should be sunk after the mask recipes.
-      RecipeBuilder.createBlockInMask(VPB2IRBB.lookup(VPBB));
-    }
-
     // Convert input VPInstructions to widened recipes.
     for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
       auto *SingleDef = cast<VPSingleDefRecipe>(&R);
@@ -9573,7 +9381,8 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
       // latter are added above for masking.
       // FIXME: Migrate code relying on the underlying instruction from VPlan0
       // to construct recipes below to not use the underlying instruction.
-      if (isa<VPCanonicalIVPHIRecipe, VPWidenCanonicalIVRecipe>(&R) ||
+      if (isa<VPCanonicalIVPHIRecipe, VPWidenCanonicalIVRecipe, VPBlendRecipe>(
+              &R) ||
           (isa<VPInstruction>(&R) && !UnderlyingValue))
         continue;
 
@@ -9582,14 +9391,6 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
       assert((isa<VPWidenPHIRecipe>(&R) || isa<VPInstruction>(&R)) &&
              UnderlyingValue && "unsupported recipe");
 
-      if (isa<VPInstruction>(&R) &&
-          (cast<VPInstruction>(&R)->getOpcode() ==
-               VPInstruction::BranchOnCond ||
-           (cast<VPInstruction>(&R)->getOpcode() == Instruction::Switch))) {
-        R.eraseFromParent();
-        break;
-      }
-
       // TODO: Gradually replace uses of underlying instruction by analyses on
       // VPlan.
       Instruction *Instr = cast<Instruction>(UnderlyingValue);
@@ -9625,22 +9426,17 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
       } else {
         Builder.insert(Recipe);
       }
-      if (Recipe->getNumDefinedValues() == 1)
+      if (Recipe->getNumDefinedValues() == 1) {
         SingleDef->replaceAllUsesWith(Recipe->getVPSingleValue());
-      else
+        for (auto &[_, V] : RecipeBuilder.BlockMaskCache) {
+          if (V == SingleDef)
+            V = Recipe->getVPSingleValue();
+        }
+      } else
         assert(Recipe->getNumDefinedValues() == 0 &&
                "Unexpected multidef recipe");
       R.eraseFromParent();
     }
-
-    // Flatten the CFG in the loop. Masks for blocks have already been generated
-    // and added to recipes as needed. To do so, first disconnect VPBB from its
-    // successors. Then connect VPBB to the previously visited VPBB.
-    for (auto *Succ : to_vector(VPBB->getSuccessors()))
-      VPBlockUtils::disconnectBlocks(VPBB, Succ);
-    if (PrevVPBB)
-      VPBlockUtils::connectBlocks(PrevVPBB, VPBB);
-    PrevVPBB = VPBB;
   }
 
   assert(isa<VPRegionBlock>(Plan->getVectorLoopRegion()) &&
@@ -9759,8 +9555,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
   assert(!OrigLoop->isInnermost());
   assert(EnableVPlanNativePath && "VPlan-native path is not enabled.");
 
-  DenseMap<VPBlockBase *, BasicBlock *> VPB2IRBB;
-  auto Plan = VPlanTransforms::buildPlainCFG(OrigLoop, *LI, VPB2IRBB);
+  auto Plan = VPlanTransforms::buildPlainCFG(OrigLoop, *LI);
   VPlanTransforms::createLoopRegions(*Plan, Legal->getWidestInductionType(),
                                      PSE, true, false, OrigLoop);
   SmallPtrSet<VPBlockBase *, 2> ExitBlocks(Plan->getExitBlocks().begin(),
@@ -9943,7 +9738,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
       BasicBlock *BB = CurrentLinkI->getParent();
       VPValue *CondOp = nullptr;
       if (CM.blockNeedsPredicationForAnyReason(BB))
-        CondOp = RecipeBuilder.getBlockInMask(BB);
+        CondOp = RecipeBuilder.getBlockInMask(CurrentLink->getParent());
 
       // Non-FP RdxDescs will have all fast math flags set, so clear them.
       FastMathFlags FMFs = isa<FPMathOperator>(CurrentLinkI)
@@ -9986,7 +9781,8 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
     // different numbers of lanes. Partial reductions mask the input instead.
     if (!PhiR->isInLoop() && CM.foldTailByMasking() &&
         !isa<VPPartialReductionRecipe>(OrigExitingVPV->getDefiningRecipe())) {
-      VPValue *Cond = RecipeBuilder.getBlockInMask(OrigLoop->getHeader());
+      VPValue *Cond =
+          RecipeBuilder.getBlockInMask(VectorLoopRegion->getEntryBasicBlock());
       Type *PhiTy = PhiR->getOperand(0)->getLiveInIRValue()->getType();
       std::optional<FastMathFlags> FMFs =
           PhiTy->isFloatingPointTy()
diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
index fd0064a34c4c9..d87c9b4d7e20e 100644
--- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
+++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
@@ -73,11 +73,14 @@ class VPRecipeBuilder {
   /// if-conversion currently takes place during VPlan-construction, so these
   /// caches are only used at that stage.
   using EdgeMaskCacheTy =
-      DenseMap<std::pair<BasicBlock *, BasicBlock *>, VPValue *>;
-  using BlockMaskCacheTy = DenseMap<BasicBlock *, VPValue *>;
+      DenseMap<std::pair<VPBasicBlock *, VPBasicBlock *>, VPValue *>;
+  using BlockMaskCacheTy = DenseMap<VPBasicBlock *, VPValue *>;
   EdgeMaskCacheTy EdgeMaskCache;
+
+public:
   BlockMaskCacheTy BlockMaskCache;
 
+private:
   // VPlan construction support: Hold a mapping from ingredients to
   // their recipe.
   DenseMap<Instruction *, VPRecipeBase *> Ingredient2Recipe;
@@ -114,11 +117,6 @@ class VPRecipeBuilder {
   tryToOptimizeInductionTruncate(TruncInst *I, ArrayRef<VPValue *> Operands,
                                  VFRange &Range);
 
-  /// Handle non-loop phi nodes. Return a new VPBlendRecipe otherwise. Currently
-  /// all such phi nodes are turned into a sequence of select instructions as
-  /// the vectorizer currently performs full if-conversion.
-  VPBlendRecipe *tryToBlend(PHINode *Phi, ArrayRef<VPValue *> Operands);
-
   /// Handle call instructions. If \p CI can be widened for \p Range.Start,
   /// return a new VPWidenCallRecipe or VPWidenIntrinsicRecipe. Range.End may be
   /// decreased to ensure same decision from \p Range.Start to \p Range.End.
@@ -188,27 +186,20 @@ class VPRecipeBuilder {
     Ingredient2Recipe[I] = R;
   }
 
-  /// Create the mask for the vector loop header block.
-  void createHeaderMask();
-
-  /// A helper function that computes the predicate of the block BB, assuming
-  /// that the header block of the loop is set to True or the loop mask when
-  /// tail folding.
-  void createBlockInMask(BasicBlock *BB);
-
+  void setBlockInMask(VPBasicBlock *BB, VPValue *Mask) {
+    assert(!BlockMaskCache.contains(BB) && "Mask already set");
+    BlockMaskCache[BB] = Mask;
+  }
   /// Returns the *entry* mask for the block \p BB.
-  VPValue *getBlockInMask(BasicBlock *BB) const;
-
-  /// Create an edge mask for every destination of cases and/or default.
-  void createSwitchEdgeMasks(SwitchInst *SI);
-
-  /// A helper function that computes the predicate of the edge between SRC
-  /// and DST.
-  VPValue *createEdgeMask(BasicBlock *Src, BasicBlock *Dst);
-
-  /// A helper that returns the previously computed predicate of the edge
-  /// between SRC and DST.
-  VPValue *getEdgeMask(BasicBlock *Src, BasicBlock *Dst) const;
+  VPValue *getBlockInMask(VPBasicBlock *BB) const {
+    return BlockMaskCache.lookup(BB);
+  }
+  void setEdgeMask(VPBasicBlock *Src, VPBasicBlock *Dst, VPValue *Mask) {
+    EdgeMaskCache[{Src, Dst}] = Mask;
+  }
+  VPValue *getEdgeMask(VPBasicBlock *Src, VPBasicBlock *Dst) const {
+    return EdgeMaskCache.lookup({Src, Dst});
+  }
 
   /// Return the recipe created for given ingredient.
   VPRecipeBase *getRecipe(Instruction *I) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index e65f25975f3a3..550ac198924dc 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -65,8 +65,7 @@ class PlainCFGBuilder {
       : TheLoop(Lp), LI(LI), Plan(std::make_unique<VPlan>(Lp)) {}
 
   /// Build plain CFG for TheLoop  and connects it to Plan's entry.
-  std::unique_ptr<VPlan>
-  buildPlainCFG(DenseMap<VPBlockBase *, BasicBlock *> &VPB2IRBB);
+  std::unique_ptr<VPlan> buildPlainCFG();
 };
 } // anonymous namespace
 
@@ -245,10 +244,16 @@ void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB,
       for (Value *Op : Inst->operands())
         VPOperands.push_back(getOrCreateVPOperand(Op));
 
-      // Build VPInstruction for any arbitrary Instruction without specific
-      // representation in VPlan.
-      NewR = cast<VPInstruction>(
-          VPIRBuilder.createNaryOp(Inst->getOpcode(), VPOperands, Inst));
+      if (auto *ICmp = dyn_cast<ICmpInst>(Inst)) {
+        NewR = cast<VPInstruction>(VPIRBuilder.createICmp(
+            ICmp->getPredicate(), VPOperands[0], VPOperands[1]));
+        NewR->setUnderlyingValue(ICmp);
+      } else {
+        // Build VPInstruction for any arbitrary Instruction without specific
+        // representation in VPlan.
+        NewR = cast<VPInstruction>(
+            VPIRBuilder.createNaryOp(Inst->getOpcode(), VPOperands, Inst));
+      }
     }
 
     IRDef2VPValue[Inst] = NewR;
@@ -256,8 +261,7 @@ void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB,
 }
 
 // Main interface to build the plain CFG.
-std::unique_ptr<VPlan> PlainCFGBuilder::buildPlainCFG(
-    DenseMap<VPBlockBase *, BasicBlock *> &VPB2IRBB) {
+std::unique_ptr<VPlan> PlainCFGBuilder::buildPlainCFG() {
   VPIRBasicBlock *Entry = cast<VPIRBasicBlock>(Plan->getEntry());
   BB2VPBB[Entry->getIRBasicBlock()] = Entry;
 
@@ -346,18 +350,14 @@ std::unique_ptr<VPlan> PlainCFGBuilder::buildPlainCFG(
     }
   }
 
-  for (const auto &[IRBB, VPB] : BB2VPBB)
-    VPB2IRBB[VPB] = IRBB;
-
   LLVM_DEBUG(Plan->setName("Plain CFG\n"); dbgs() << *Plan);
   return std::move(Plan);
 }
 
-std::unique_ptr<VPlan> VPlanTransforms::buildPlainCFG(
-    Loop *TheLoop, LoopInfo &LI,
-    DenseMap<VPBlockBase *, BasicBlock *> &VPB2IRBB) {
+std::unique_ptr<VPlan> VPlanTransforms::buildPlainCFG(Loop *TheLoop,
+                                                      LoopInfo &LI) {
   PlainCFGBuilder Builder(TheLoop, &LI);
-  return Builder.buildPlainCFG(VPB2IRBB);
+  return Builder.buildPlainCFG();
 }
 
 /// Checks if \p HeaderVPB is a loop header block in the plain CFG; that is, it
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp
new file mode 100644
index 0000000000000..1b7118da3425f
--- /dev/null
+++ b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp
@@ -0,0 +1,252 @@
+//===-- VPlanPredicator.cpp - VPlan predicator ----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file implements predication for VPlans.
+///
+//===----------------------------------------------------------------------===//
+
+#include "VPRecipeBuilder.h"
+#include "VPlan.h"
+#include "VPlanCFG.h"
+#include "VPlanTransforms.h"
+#include "VPlanUtils.h"
+#include "llvm/ADT/PostOrderIterator.h"
+
+using namespace llvm;
+
+struct VPPredicator {
+  /// When we if-convert we need to create edge masks. We have to cache values
+  /// so that we don't end up with exponential recursion/IR. Note that
+  /// if-conversion currently takes place during VPlan-construction, so these
+  /// caches are only used at that stage.
+  using EdgeMaskCacheTy =
+      DenseMap<std::pair<VPBasicBlock *, VPBasicBlock *>, VPValue *>;
+  using BlockMaskCacheTy = DenseMap<VPBasicBlock *, VPValue *>;
+
+  VPPredicator(VPRecipeBuilder &RecipeBuilder) : RecipeBuilder(RecipeBuilder) {}
+
+  VPRecipeBuilder &RecipeBuilder;
+
+  VPBuilder Builder;
+  VPValue *createEdgeMask(VPBasicBlock *Src, VPBasicBlock *Dst) {
+    assert(is_contained(Dst->getPredecessors(), Src) && "Invalid edge");
+
+    // Look for cached value.
+    VPValue *EdgeMask = RecipeBuilder.getEdgeMask(Src, Dst);
+    if (EdgeMask)
+      return EdgeMask;
+
+    VPValue *SrcMask = RecipeBuilder.getBlockInMask(Src);
+
+    // The terminator has to be a branch inst!
+    if (Src->empty() || Src->getNumSuccessors() == 1) {
+      RecipeBuilder.setEdgeMask(Src, Dst, SrcMask);
+      return SrcMask;
+    }
+
+    auto *Term = cast<VPInstruction>(Src->getTerminator());
+    if (Term->getOpcode() == Instruction::Switch) {
+      createSwitchEdgeMasks(Term);
+      return RecipeBuilder.getEdgeMask(Src, Dst);
+    }
+
+    auto *BI = cast<VPInstruction>(Src->getTerminator());
+    assert(BI->getOpcode() == VPInstruction::BranchOnCond);
+    if (Src->getSuccessors()[0] == Src->getSuccessors()[1]) {
+      RecipeBuilder.setEdgeMask(Src, Dst, SrcMask);
+      return SrcMask;
+    }
+
+    EdgeMask = BI->getOperand(0);
+    assert(EdgeMask && "No Edge Mask found for condition");
+
+    if (Src->getSuccessors()[0] != Dst)
+      EdgeMask = Builder.createNot(EdgeMask, BI->getDebugLoc());
+
+    if (SrcMask) { // Otherwise block in-mask is all-one, no need to AND.
+      // The bitwise 'And' of SrcMask and EdgeMask introduces new UB if SrcMask
+      // is false and EdgeMask is poison. Avoid that by using 'LogicalAnd'
+      // instead which generates 'select i1 SrcMask, i1 EdgeMask, i1 false'.
+      EdgeMask = Builder.createLogicalAnd(SrcMask, EdgeMask, BI->getDebugLoc());
+    }
+
+    RecipeBuilder.setEdgeMask(Src, Dst, EdgeMask);
+    return EdgeMask;
+  }
+
+  VPValue *createBlockInMask(VPBasicBlock *VPBB) {
+    Builder.setInsertPoint(VPBB, VPBB->begin());
+    // All-one mask is modelled as no-mask following the convention for masked
+    // load/store/gather/scatter. Initialize BlockMask to no-mask.
+    VPValue *BlockMask = nullptr;
+    // This is the block mask. We OR all unique incoming edges.
+    for (auto *Predecessor : SetVector<VPBlockBase *>(
+             VPBB->getPredecessors().begin(), VPBB->getPredecessors().end())) {
+      VPValue *EdgeMask = createEdgeMask(cast<VPBasicBlock>(Predecessor), VPBB);
+      if (!EdgeMask) { // Mask of predecessor is all-one so mask of block is
+                       // too.
+        RecipeBuilder.setBlockInMask(VPBB, EdgeMask);
+        return EdgeMask;
+      }
+
+      if (!BlockMask) { // BlockMask has its initialized nullptr value.
+        BlockMask = EdgeMask;
+        continue;
+      }
+
+      BlockMask = Builder.createOr(BlockMask, EdgeMask, {});
+    }
+
+    RecipeBuilder.setBlockInMask(VPBB, BlockMask);
+    return BlockMask;
+  }
+
+  void createHeaderMask(VPBasicBlock *HeaderVPBB, bool FoldTail) {
+    if (!FoldTail) {
+      RecipeBuilder.setBlockInMask(HeaderVPBB, nullptr);
+      return;
+    }
+
+    // Introduce the early-exit compare IV <= BTC to form header block mask.
+    // This is used instead of IV < TC because TC may wrap, unlike BTC. Start by
+    // constructing the desired canonical IV in the header block as its first
+    // non-phi instructions.
+
+    auto NewInsertionPoint = HeaderVPBB->getFirstNonPhi();
+    auto &Plan = *HeaderVPBB->getPlan();
+    auto *IV = new VPWidenCanonicalIVRecipe(Plan.getCanonicalIV());
+    HeaderVPBB->insert(IV, NewInsertionPoint);
+
+    VPBuilder::InsertPointGuard Guard(Builder);
+    Builder.setInsertPoint(HeaderVPBB, NewInsertionPoint);
+    VPValue *BlockMask = nullptr;
+    VPValue *BTC = Plan.getOrCreateBackedgeTakenCount();
+    BlockMask = Builder.createICmp(CmpInst::ICMP_ULE, IV, BTC);
+    RecipeBuilder.setBlockInMask(HeaderVPBB, BlockMask);
+  }
+
+  void createSwitchEdgeMasks(VPInstruction *SI) {
+    VPBasicBlock *Src = SI->getParent();
+
+    // Create masks where the terminator in Src is a switch. We create mask for
+    // all edges at the same time. This is more efficient, as we can create and
+    // collect compares for all cases once.
+    VPValue *Cond = SI->getOperand(0);
+    VPBasicBlock *DefaultDst = cast<VPBasicBlock>(Src->getSuccessors()[0]);
+    MapVector<VPBasicBlock *, SmallVector<VPValue *>> Dst2Compares;
+    for (const auto &[Idx, Succ] :
+         enumerate(ArrayRef(Src->getSuccessors()).drop_front())) {
+      VPBasicBlock *Dst = cast<VPBasicBlock>(Succ);
+      // assert(!EdgeMaskCache.contains({Src, Dst}) && "Edge masks already
+      // created");
+      //  Cases whose destination is the same as default are redundant and can
+      //  be ignored - they will get there anyhow.
+      if (Dst == DefaultDst)
+        continue;
+      auto &Compares = Dst2Compares[Dst];
+      VPValue *V = SI->getOperand(Idx + 1);
+      Compares.push_back(Builder.createICmp(CmpInst::ICMP_EQ, Cond, V));
+    }
+
+    // We need to handle 2 separate cases below for all entries in Dst2Compares,
+    // which excludes destinations matching the default destination.
+    VPValue *SrcMask = RecipeBuilder.getBlockInMask(Src);
+    VPValue *DefaultMask = nullptr;
+    for (const auto &[Dst, Conds] : Dst2Compares) {
+      // 1. Dst is not the default destination. Dst is reached if any of the
+      // cases with destination == Dst are taken. Join the conditions for each
+      // case whose destination == Dst using an OR.
+      VPValue *Mask = Conds[0];
+      for (VPValue *V : ArrayRef<VPValue *>(Conds).drop_front())
+        Mask = Builder.createOr(Mask, V);
+      if (SrcMask)
+        Mask = Builder.createLogicalAnd(SrcMask, Mask);
+      RecipeBuilder.setEdgeMask(Src, Dst, Mask);
+
+      // 2. Create the mask for the default destination, which is reached if
+      // none of the cases with destination != default destination are taken.
+      // Join the conditions for each case where the destination is != Dst using
+      // an OR and negate it.
+      DefaultMask = DefaultMask ? Builder.createOr(DefaultMask, Mask) : Mask;
+    }
+
+    if (DefaultMask) {
+      DefaultMask = Builder.createNot(DefaultMask);
+      if (SrcMask)
+        DefaultMask = Builder.createLogicalAnd(SrcMask, DefaultMask);
+    }
+    RecipeBuilder.setEdgeMask(Src, DefaultDst, DefaultMask);
+  }
+};
+
+void VPlanTransforms::predicateAndLinearize(VPlan &Plan, bool FoldTail,
+                                            VPRecipeBuilder &RecipeBuilder) {
+  VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
+  // Scan the body of the loop in a topological order to visit each basic block
+  // after having visited its predecessor basic blocks.
+  VPBasicBlock *Header = LoopRegion->getEntryBasicBlock();
+  ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT(
+      Header);
+  VPPredicator Predicator(RecipeBuilder);
+  for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
+    if (VPBB == Header) {
+      Predicator.createHeaderMask(Header, FoldTail);
+      continue;
+    }
+
+    SmallVector<VPWidenPHIRecipe *> Phis;
+    for (VPRecipeBase &R : VPBB->phis())
+      Phis.push_back(cast<VPWidenPHIRecipe>(&R));
+
+    Predicator.createBlockInMask(VPBB);
+
+    for (VPWidenPHIRecipe *Phi : Phis) {
+      PHINode *IRPhi = cast<PHINode>(Phi->getUnderlyingValue());
+
+      unsigned NumIncoming = IRPhi->getNumIncomingValues();
+
+      // We know that all PHIs in non-header blocks are converted into selects,
+      // so we don't have to worry about the insertion order and we can just use
+      // the builder. At this point we generate the predication tree. There may
+      // be duplications since this is a simple recursive scan, but future
+      // optimizations will clean it up.
+
+      // Map incoming IR BasicBlocks to incoming VPValues, for lookup below.
+      // TODO: Add operands and masks in order from the VPlan predecessors.
+      DenseMap<BasicBlock *, VPValue *> VPIncomingValues;
+      DenseMap<BasicBlock *, VPBasicBlock *> VPIncomingBlocks;
+      for (const auto &[Idx, Pred] :
+           enumerate(predecessors(IRPhi->getParent()))) {
+        VPIncomingValues[Pred] = Phi->getOperand(Idx);
+        VPIncomingBlocks[Pred] =
+            cast<VPBasicBlock>(VPBB->getPredecessors()[Idx]);
+      }
+
+      SmallVector<VPValue *, 2> OperandsWithMask;
+      for (unsigned In = 0; In < NumIncoming; In++) {
+        BasicBlock *Pred = IRPhi->getIncomingBlock(In);
+        OperandsWithMask.push_back(VPIncomingValues.lookup(Pred));
+        VPValue *EdgeMask =
+            RecipeBuilder.getEdgeMask(VPIncomingBlocks.lookup(Pred), VPBB);
+        if (!EdgeMask) {
+          assert(In == 0 && "Both null and non-null edge masks found");
+          assert(all_equal(Phi->operands()) &&
+                 "Distinct incoming values with one having a full mask");
+          break;
+        }
+        OperandsWithMask.push_back(EdgeMask);
+      }
+      auto *Blend = new VPBlendRecipe(IRPhi, OperandsWithMask);
+      Blend->insertBefore(Phi);
+      Phi->replaceAllUsesWith(Blend);
+      Phi->eraseFromParent();
+      RecipeBuilder.setRecipe(IRPhi, Blend);
+    }
+  }
+}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index d9f7b77a3ade6..237be9d900147 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -53,9 +53,7 @@ struct VPlanTransforms {
       verifyVPlanIsValid(Plan);
   }
 
-  static std::unique_ptr<VPlan>
-  buildPlainCFG(Loop *TheLoop, LoopInfo &LI,
-                DenseMap<VPBlockBase *, BasicBlock *> &VPB2IRBB);
+  static std::unique_ptr<VPlan> buildPlainCFG(Loop *TheLoop, LoopInfo &LI);
 
   /// Replace loops in \p Plan's flat CFG with VPRegionBlocks, turing \p Plan's
   /// flat CFG into a hierarchical CFG. It also creates a VPValue expression for
@@ -203,6 +201,9 @@ struct VPlanTransforms {
   /// candidates.
   static void narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
                                      unsigned VectorRegWidth);
+
+  static void predicateAndLinearize(VPlan &Plan, bool FoldTail,
+                                    VPRecipeBuilder &RecipeBuilder);
 };
 
 } // namespace llvm
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll
index fe845ae74cbee..65c939cac8dc1 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll
@@ -24,13 +24,13 @@ define void @foo(i64 %n) {
 ; CHECK-NEXT:   EMIT ir<%gep.2> = getelementptr ir<@arr>, ir<0>, ir<%inner.iv>, ir<%outer.iv>
 ; CHECK-NEXT:   EMIT store ir<%add>, ir<%gep.2>
 ; CHECK-NEXT:   EMIT ir<%inner.iv.next> = add ir<%inner.iv>, ir<1>
-; CHECK-NEXT:   EMIT ir<%inner.ec> = icmp ir<%inner.iv.next>, ir<8>
+; CHECK-NEXT:   EMIT ir<%inner.ec> = icmp eq ir<%inner.iv.next>, ir<8>
 ; CHECK-NEXT:   EMIT branch-on-cond ir<%inner.ec>
 ; CHECK-NEXT: Successor(s): outer.latch, inner
 ; CHECK-EMPTY:
 ; CHECK-NEXT: outer.latch:
 ; CHECK-NEXT:   EMIT ir<%outer.iv.next> = add ir<%outer.iv>, ir<1>
-; CHECK-NEXT:   EMIT ir<%outer.ec> = icmp ir<%outer.iv.next>, ir<8>
+; CHECK-NEXT:   EMIT ir<%outer.ec> = icmp eq ir<%outer.iv.next>, ir<8>
 ; CHECK-NEXT:   EMIT branch-on-cond ir<%outer.ec>
 ; CHECK-NEXT: Successor(s): ir-bb<exit>, outer.header
 ; CHECK-EMPTY:
diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h b/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h
index d49483f0ebf88..2771c5ef0fb5e 100644
--- a/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h
+++ b/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h
@@ -70,8 +70,7 @@ class VPlanTestIRBase : public testing::Test {
 
     Loop *L = LI->getLoopFor(LoopHeader);
     PredicatedScalarEvolution PSE(*SE, *L);
-    DenseMap<VPBlockBase *, BasicBlock *> VPB2IRBB;
-    auto Plan = VPlanTransforms::buildPlainCFG(L, *LI, VPB2IRBB);
+    auto Plan = VPlanTransforms::buildPlainCFG(L, *LI);
     VPlanTransforms::createLoopRegions(*Plan, IntegerType::get(*Ctx, 64), PSE,
                                        true, false, L);
     return Plan;



More information about the llvm-commits mailing list