[llvm] [VPlan] Simplify branch to scalar ph in VPlan transform. (NFC) (PR #140409)

via llvm-commits llvm-commits at lists.llvm.org
Sat May 17 14:34:21 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-risc-v

Author: Florian Hahn (fhahn)

<details>
<summary>Changes</summary>

Simplify branch on false, starting with the branch from the middle block
to the scalar preheader. Initially this helps simplifying the initial VPlan construction.

Depends on https://github.com/llvm/llvm-project/pull/140405.

---

Patch is 101.60 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/140409.diff


32 Files Affected:

- (modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+23-37) 
- (modified) llvm/lib/Transforms/Vectorize/VPlan.h (+18-7) 
- (modified) llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp (-1) 
- (modified) llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp (+21-23) 
- (modified) llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp (+11-26) 
- (modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp (+23-12) 
- (modified) llvm/lib/Transforms/Vectorize/VPlanValue.h (+5) 
- (modified) llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp (+1-3) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-vplan.ll (+8-8) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/synthesize-mask-for-call.ll (+24-24) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll (+5-5) 
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/widen-call-with-intrinsic-or-libfunc.ll (+8-8) 
- (modified) llvm/test/Transforms/LoopVectorize/PowerPC/vplan-force-tail-with-evl.ll (+2-2) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll (+8-8) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-fixed-order-recurrence.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll (+9-11) 
- (modified) llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll (+16-16) 
- (modified) llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll (+6-12) 
- (modified) llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll (+9-9) 
- (modified) llvm/test/Transforms/LoopVectorize/uncountable-early-exit-vplan.ll (+25-25) 
- (modified) llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll (+4-4) 
- (modified) llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll (+4-4) 
- (modified) llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll (+18-18) 
- (modified) llvm/test/Transforms/LoopVectorize/vplan-printing.ll (+66-66) 
- (modified) llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll (+4-4) 
- (modified) llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll (+20-20) 
- (modified) llvm/test/Transforms/LoopVectorize/vplan-unused-interleave-group.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/vplan-widen-struct-return.ll (+2-2) 


``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 148b187f4f25d..6c3a2ce941c54 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -490,7 +490,7 @@ class InnerLoopVectorizer {
         MinProfitableTripCount(MinProfitableTripCount), UF(UnrollFactor),
         Builder(PSE.getSE()->getContext()), Cost(CM), BFI(BFI), PSI(PSI),
         RTChecks(RTChecks), Plan(Plan),
-        VectorPHVPB(Plan.getEntry()->getSingleSuccessor()) {}
+        VectorPHVPB(Plan.getEntry()->getSuccessors()[1]) {}
 
   virtual ~InnerLoopVectorizer() = default;
 
@@ -2365,22 +2365,19 @@ InnerLoopVectorizer::getOrCreateVectorTripCount(BasicBlock *InsertBlock) {
 void InnerLoopVectorizer::introduceCheckBlockInVPlan(BasicBlock *CheckIRBB) {
   VPBlockBase *ScalarPH = Plan.getScalarPreheader();
   VPBlockBase *PreVectorPH = VectorPHVPB->getSinglePredecessor();
-  if (PreVectorPH->getNumSuccessors() != 1) {
-    assert(PreVectorPH->getNumSuccessors() == 2 && "Expected 2 successors");
-    assert(PreVectorPH->getSuccessors()[0] == ScalarPH &&
-           "Unexpected successor");
-    VPIRBasicBlock *CheckVPIRBB = Plan.createVPIRBasicBlock(CheckIRBB);
-    VPBlockUtils::insertOnEdge(PreVectorPH, VectorPHVPB, CheckVPIRBB);
-    PreVectorPH = CheckVPIRBB;
-  }
+  assert(PreVectorPH->getNumSuccessors() == 2 && "Expected 2 successors");
+  assert(PreVectorPH->getSuccessors()[0] == ScalarPH && "Unexpected successor");
+  VPIRBasicBlock *CheckVPIRBB = Plan.createVPIRBasicBlock(CheckIRBB);
+  VPBlockUtils::insertOnEdge(PreVectorPH, VectorPHVPB, CheckVPIRBB);
+  PreVectorPH = CheckVPIRBB;
   VPBlockUtils::connectBlocks(PreVectorPH, ScalarPH);
   PreVectorPH->swapSuccessors();
 
   // We just connected a new block to the scalar preheader. Update all
   // ResumePhis by adding an incoming value for it, replicating the last value.
   for (VPRecipeBase &R : *cast<VPBasicBlock>(ScalarPH)) {
-    auto *ResumePhi = dyn_cast<VPInstruction>(&R);
-    if (!ResumePhi || ResumePhi->getOpcode() != VPInstruction::ResumePhi)
+    auto *ResumePhi = cast<VPPhi>(&R);
+    if (ResumePhi->getNumIncoming() == ScalarPH->getNumPredecessors())
       continue;
     ResumePhi->addOperand(
         ResumePhi->getOperand(ResumePhi->getNumOperands() - 1));
@@ -2463,9 +2460,6 @@ void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) {
     setBranchWeights(BI, MinItersBypassWeights, /*IsExpected=*/false);
   ReplaceInstWithInst(TCCheckBlock->getTerminator(), &BI);
   LoopBypassBlocks.push_back(TCCheckBlock);
-
-  // TODO: Wrap LoopVectorPreHeader in VPIRBasicBlock here.
-  introduceCheckBlockInVPlan(TCCheckBlock);
 }
 
 BasicBlock *InnerLoopVectorizer::emitSCEVChecks(BasicBlock *Bypass) {
@@ -2530,7 +2524,8 @@ BasicBlock *InnerLoopVectorizer::emitMemRuntimeChecks(BasicBlock *Bypass) {
 static void replaceVPBBWithIRVPBB(VPBasicBlock *VPBB, BasicBlock *IRBB) {
   VPIRBasicBlock *IRVPBB = VPBB->getPlan()->createVPIRBasicBlock(IRBB);
   for (auto &R : make_early_inc_range(*VPBB)) {
-    assert(!R.isPhi() && "Tried to move phi recipe to end of block");
+    assert((IRVPBB->empty() || IRVPBB->back().isPhi() || !R.isPhi()) &&
+           "Tried to move phi recipe to end of block");
     R.moveBefore(*IRVPBB, IRVPBB->end());
   }
 
@@ -7768,10 +7763,7 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(
   // created a bc.merge.rdx Phi after the main vector body. Ensure that we carry
   // over the incoming values correctly.
   using namespace VPlanPatternMatch;
-  auto IsResumePhi = [](VPUser *U) {
-    auto *VPI = dyn_cast<VPInstruction>(U);
-    return VPI && VPI->getOpcode() == VPInstruction::ResumePhi;
-  };
+  auto IsResumePhi = [](VPUser *U) { return isa<VPPhi>(U); };
   assert(count_if(EpiRedResult->users(), IsResumePhi) == 1 &&
          "ResumePhi must have a single user");
   auto *EpiResumePhiVPI =
@@ -7837,7 +7829,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
 
   // 1. Set up the skeleton for vectorization, including vector pre-header and
   // middle block. The vector loop is created during VPlan execution.
-  VPBasicBlock *VectorPH = cast<VPBasicBlock>(Entry->getSingleSuccessor());
+  VPBasicBlock *VectorPH = cast<VPBasicBlock>(Entry->getSuccessors()[1]);
   State.CFG.PrevBB = ILV.createVectorizedLoopSkeleton();
   if (VectorizingEpilogue)
     VPlanTransforms::removeDeadRecipes(BestVPlan);
@@ -8070,7 +8062,8 @@ EpilogueVectorizerMainLoop::emitIterationCountCheck(BasicBlock *Bypass,
     setBranchWeights(BI, MinItersBypassWeights, /*IsExpected=*/false);
   ReplaceInstWithInst(TCCheckBlock->getTerminator(), &BI);
 
-  introduceCheckBlockInVPlan(TCCheckBlock);
+  if (!ForEpilogue)
+    introduceCheckBlockInVPlan(TCCheckBlock);
   return TCCheckBlock;
 }
 
@@ -8200,7 +8193,6 @@ EpilogueVectorizerEpilogueLoop::emitMinimumVectorEpilogueIterCountCheck(
   Plan.setEntry(NewEntry);
   // OldEntry is now dead and will be cleaned up when the plan gets destroyed.
 
-  introduceCheckBlockInVPlan(Insert);
   return Insert;
 }
 
@@ -9146,9 +9138,8 @@ static VPInstruction *addResumePhiRecipeForInduction(
                                                 WideIV->getDebugLoc());
   }
 
-  auto *ResumePhiRecipe =
-      ScalarPHBuilder.createNaryOp(VPInstruction::ResumePhi, {EndValue, Start},
-                                   WideIV->getDebugLoc(), "bc.resume.val");
+  auto *ResumePhiRecipe = ScalarPHBuilder.createScalarPhi(
+      {EndValue, Start}, WideIV->getDebugLoc(), "bc.resume.val");
   return ResumePhiRecipe;
 }
 
@@ -9160,7 +9151,7 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan,
                                 DenseMap<VPValue *, VPValue *> &IVEndValues) {
   VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType());
   auto *ScalarPH = Plan.getScalarPreheader();
-  auto *MiddleVPBB = cast<VPBasicBlock>(ScalarPH->getSinglePredecessor());
+  auto *MiddleVPBB = cast<VPBasicBlock>(ScalarPH->getPredecessors()[0]);
   VPRegionBlock *VectorRegion = Plan.getVectorLoopRegion();
   VPBuilder VectorPHBuilder(
       cast<VPBasicBlock>(VectorRegion->getSinglePredecessor()));
@@ -9177,8 +9168,7 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan,
       if (VPInstruction *ResumePhi = addResumePhiRecipeForInduction(
               WideIVR, VectorPHBuilder, ScalarPHBuilder, TypeInfo,
               &Plan.getVectorTripCount())) {
-        assert(ResumePhi->getOpcode() == VPInstruction::ResumePhi &&
-               "Expected a ResumePhi");
+        assert(isa<VPPhi>(ResumePhi) && "Expected a phi");
         IVEndValues[WideIVR] = ResumePhi->getOperand(0);
         ScalarPhiIRI->addOperand(ResumePhi);
         continue;
@@ -9203,8 +9193,7 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan,
           VPInstruction::ExtractLastElement, {ResumeFromVectorLoop}, {},
           "vector.recur.extract");
     StringRef Name = IsFOR ? "scalar.recur.init" : "bc.merge.rdx";
-    auto *ResumePhiR = ScalarPHBuilder.createNaryOp(
-        VPInstruction::ResumePhi,
+    auto *ResumePhiR = ScalarPHBuilder.createScalarPhi(
         {ResumeFromVectorLoop, VectorPhiR->getStartValue()}, {}, Name);
     ScalarPhiIRI->addOperand(ResumePhiR);
   }
@@ -10406,9 +10395,7 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) {
       VPI->setOperand(1, Freeze);
       if (UpdateResumePhis)
         OrigStart->replaceUsesWithIf(Freeze, [Freeze](VPUser &U, unsigned) {
-          return Freeze != &U && isa<VPInstruction>(&U) &&
-                 cast<VPInstruction>(&U)->getOpcode() ==
-                     VPInstruction::ResumePhi;
+          return Freeze != &U && isa<VPPhi>(&U);
         });
     }
   };
@@ -10421,13 +10408,12 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) {
   // scalar (which will become vector) epilogue loop we are done. Otherwise
   // create it below.
   if (any_of(*MainScalarPH, [VectorTC](VPRecipeBase &R) {
-        return match(&R, m_VPInstruction<VPInstruction::ResumePhi>(
-                             m_Specific(VectorTC), m_SpecificInt(0)));
+        return match(&R, m_VPInstruction<Instruction::PHI>(m_Specific(VectorTC),
+                                                           m_SpecificInt(0)));
       }))
     return;
   VPBuilder ScalarPHBuilder(MainScalarPH, MainScalarPH->begin());
-  ScalarPHBuilder.createNaryOp(
-      VPInstruction::ResumePhi,
+  ScalarPHBuilder.createScalarPhi(
       {VectorTC, MainPlan.getCanonicalIV()->getStartValue()}, {},
       "vec.epilog.resume.val");
 }
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index e634de1e17c69..ebbffef2a4afe 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -887,11 +887,6 @@ class VPInstruction : public VPRecipeWithIRFlags,
     SLPStore,
     ActiveLaneMask,
     ExplicitVectorLength,
-    /// Creates a scalar phi in a leaf VPBB with a single predecessor in VPlan.
-    /// The first operand is the incoming value from the predecessor in VPlan,
-    /// the second operand is the incoming value for all other predecessors
-    /// (which are currently not modeled in VPlan).
-    ResumePhi,
     CalculateTripCountMinusVF,
     // Increment the canonical IV separately for each unrolled part.
     CanonicalIVIncrementForPart,
@@ -1160,6 +1155,8 @@ class VPPhiAccessors {
     return getAsRecipe()->getNumOperands();
   }
 
+  void removeIncomingValue(VPBlockBase *VPB) const;
+
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
   /// Print the recipe.
   void printPhiOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const;
@@ -1170,11 +1167,15 @@ struct VPPhi : public VPInstruction, public VPPhiAccessors {
   VPPhi(ArrayRef<VPValue *> Operands, DebugLoc DL, const Twine &Name = "")
       : VPInstruction(Instruction::PHI, Operands, DL, Name) {}
 
-  static inline bool classof(const VPRecipeBase *U) {
+  static inline bool classof(const VPUser *U) {
     auto *R = dyn_cast<VPInstruction>(U);
     return R && R->getOpcode() == Instruction::PHI;
   }
 
+  VPPhi *clone() override {
+    return new VPPhi(operands(), getDebugLoc(), getName());
+  }
+
   void execute(VPTransformState &State) override;
 
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -3737,6 +3738,15 @@ VPPhiAccessors::getIncomingBlock(unsigned Idx) const {
   return getAsRecipe()->getParent()->getCFGPredecessor(Idx);
 }
 
+inline void VPPhiAccessors::removeIncomingValue(VPBlockBase *VPB) const {
+  VPRecipeBase *R = const_cast<VPRecipeBase *>(getAsRecipe());
+  const VPBasicBlock *Parent = R->getParent();
+  assert(R->getNumOperands() == Parent->getNumPredecessors());
+  auto I = find(Parent->getPredecessors(), VPB);
+  R->getOperand(I - Parent->getPredecessors().begin())->removeUser(*R);
+  R->removeOperand(I - Parent->getPredecessors().begin());
+}
+
 /// A special type of VPBasicBlock that wraps an existing IR basic block.
 /// Recipes of the block get added before the first non-phi instruction in the
 /// wrapped block.
@@ -4246,7 +4256,8 @@ class VPlan {
   /// that this relies on unneeded branches to the scalar tail loop being
   /// removed.
   bool hasScalarTail() const {
-    return getScalarPreheader()->getNumPredecessors() != 0;
+    return getScalarPreheader()->getNumPredecessors() != 0 &&
+           getScalarPreheader()->getSinglePredecessor() != getEntry();
   }
 };
 
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index ac0f30cb4693c..926490bfad7d0 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -101,7 +101,6 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
     return inferScalarType(R->getOperand(0));
   case VPInstruction::FirstOrderRecurrenceSplice:
   case VPInstruction::Not:
-  case VPInstruction::ResumePhi:
   case VPInstruction::CalculateTripCountMinusVF:
   case VPInstruction::CanonicalIVIncrementForPart:
   case VPInstruction::AnyOf:
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index 287bc93ce496a..204af8f12311b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -507,8 +507,12 @@ void VPlanTransforms::prepareForVectorization(
                                    cast<VPBasicBlock>(HeaderVPB),
                                    cast<VPBasicBlock>(LatchVPB), Range);
         HandledUncountableEarlyExit = true;
+      } else {
+        for (VPRecipeBase &R : cast<VPIRBasicBlock>(EB)->phis()) {
+          if (auto *PhiR = dyn_cast<VPIRPhi>(&R))
+            PhiR->removeIncomingValue(Pred);
+        }
       }
-
       cast<VPBasicBlock>(Pred)->getTerminator()->eraseFromParent();
       VPBlockUtils::disconnectBlocks(Pred, EB);
     }
@@ -541,23 +545,13 @@ void VPlanTransforms::prepareForVectorization(
   //    Thus if tail is to be folded, we know we don't need to run the
   //    remainder and we can set the condition to true.
   // 3) Otherwise, construct a runtime check.
+  VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
+  // Also connect the entry block to the scalar preheader.
+  VPBlockUtils::connectBlocks(Plan.getEntry(), ScalarPH);
+  Plan.getEntry()->swapSuccessors();
 
-  if (!RequiresScalarEpilogueCheck) {
-    if (auto *LatchExitVPB = MiddleVPBB->getSingleSuccessor())
-      VPBlockUtils::disconnectBlocks(MiddleVPBB, LatchExitVPB);
-    VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
-    // The exit blocks are unreachable, remove their recipes to make sure no
-    // users remain that may pessimize transforms.
-    for (auto *EB : Plan.getExitBlocks()) {
-      for (VPRecipeBase &R : make_early_inc_range(*EB))
-        R.eraseFromParent();
-    }
+  if (MiddleVPBB->getNumSuccessors() != 2)
     return;
-  }
-
-  // The connection order corresponds to the operands of the conditional branch,
-  // with the middle block already connected to the exit block.
-  VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
 
   auto *ScalarLatchTerm = TheLoop->getLoopLatch()->getTerminator();
   // Here we use the same DebugLoc as the scalar loop latch terminator instead
@@ -565,13 +559,17 @@ void VPlanTransforms::prepareForVectorization(
   // different line numbers and we want to avoid awkward line stepping while
   // debugging. Eg. if the compare has got a line number inside the loop.
   VPBuilder Builder(MiddleVPBB);
-  VPValue *Cmp =
-      TailFolded
-          ? Plan.getOrAddLiveIn(ConstantInt::getTrue(
-                IntegerType::getInt1Ty(TripCount->getType()->getContext())))
-          : Builder.createICmp(CmpInst::ICMP_EQ, Plan.getTripCount(),
-                               &Plan.getVectorTripCount(),
-                               ScalarLatchTerm->getDebugLoc(), "cmp.n");
+  VPValue *Cmp;
+  if (TailFolded)
+    Cmp = Plan.getOrAddLiveIn(ConstantInt::getTrue(
+        IntegerType::getInt1Ty(TripCount->getType()->getContext())));
+  else if (!RequiresScalarEpilogueCheck)
+    Cmp = Plan.getOrAddLiveIn(ConstantInt::getFalse(
+        IntegerType::getInt1Ty(TripCount->getType()->getContext())));
+  else
+    Cmp = Builder.createICmp(CmpInst::ICMP_EQ, Plan.getTripCount(),
+                             &Plan.getVectorTripCount(),
+                             ScalarLatchTerm->getDebugLoc(), "cmp.n");
   Builder.createNaryOp(VPInstruction::BranchOnCond, {Cmp},
                        ScalarLatchTerm->getDebugLoc());
 }
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 3fa6a21b80b17..f4beb5b6d11e3 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -754,17 +754,6 @@ Value *VPInstruction::generate(VPTransformState &State) {
     Value *Addend = State.get(getOperand(1), VPLane(0));
     return Builder.CreatePtrAdd(Ptr, Addend, Name, getGEPNoWrapFlags());
   }
-  case VPInstruction::ResumePhi: {
-    auto *NewPhi =
-        Builder.CreatePHI(State.TypeAnalysis.inferScalarType(this), 2, Name);
-    for (const auto &[IncVPV, PredVPBB] :
-         zip(operands(), getParent()->getPredecessors())) {
-      Value *IncV = State.get(IncVPV, /* IsScalar */ true);
-      BasicBlock *PredBB = State.CFG.VPBB2IRBB.at(cast<VPBasicBlock>(PredVPBB));
-      NewPhi->addIncoming(IncV, PredBB);
-    }
-    return NewPhi;
-  }
   case VPInstruction::AnyOf: {
     Value *A = State.get(getOperand(0));
     return Builder.CreateOrReduce(A);
@@ -863,8 +852,7 @@ bool VPInstruction::isVectorToScalar() const {
 }
 
 bool VPInstruction::isSingleScalar() const {
-  return getOpcode() == VPInstruction::ResumePhi ||
-         getOpcode() == Instruction::PHI;
+  return getOpcode() == Instruction::PHI;
 }
 
 #if !defined(NDEBUG)
@@ -963,7 +951,6 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
   case VPInstruction::CanonicalIVIncrementForPart:
   case VPInstruction::BranchOnCount:
   case VPInstruction::BranchOnCond:
-  case VPInstruction::ResumePhi:
     return true;
   case VPInstruction::PtrAdd:
     return Op == getOperand(0) || vputils::onlyFirstLaneUsed(this);
@@ -1020,9 +1007,6 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
   case VPInstruction::ActiveLaneMask:
     O << "active lane mask";
     break;
-  case VPInstruction::ResumePhi:
-    O << "resume-phi";
-    break;
   case VPInstruction::ExplicitVectorLength:
     O << "EXPLICIT-VECTOR-LENGTH";
     break;
@@ -1130,15 +1114,16 @@ void VPInstructionWithType::print(raw_ostream &O, const Twine &Indent,
 
 void VPPhi::execute(VPTransformState &State) {
   State.setDebugLocFrom(getDebugLoc());
-  assert(getParent() ==
-             getParent()->getPlan()->getVectorLoopRegion()->getEntry() &&
-         "VPInstructions with PHI opcodes must be used for header phis only "
-         "at the moment");
-  BasicBlock *VectorPH = State.CFG.VPBB2IRBB.at(getIncomingBlock(0));
-  Value *Start = State.get(getIncomingValue(0), VPLane(0));
-  PHINode *Phi = State.Builder.CreatePHI(Start->getType(), 2, getName());
-  Phi->addIncoming(Start, VectorPH);
-  State.set(this, Phi, VPLane(0));
+  PHINode *NewPhi = State.Builder.CreatePHI(
+      State.TypeAnalysis.inferScalarType(this), 2, getName());
+  unsigned NumIncoming =
+      getParent()->getNumPredecessors() == 0 ? 1 : getNumIncoming();
+  for (unsigned Idx = 0; Idx != NumIncoming; ++Idx) {
+    Value *IncV = State.get(getIncomingValue(Idx), /* IsScalar */ true);
+    BasicBlock *PredBB = State.CFG.VPBB2IRBB.at(getIncomingBlock(Idx));
+    NewPhi->addIncoming(IncV, PredBB);
+  }
+  State.set(this, NewPhi, VPLane(0));
 }
 
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index f1c466e3208be..e1c0e505d7c90 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1732,11 +1732,21 @@ static void removeBranchOnCondTrue(VPlan &Plan) {
   using namespace llvm::VPlanPatternMatch;
   for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
            vp_depth_first_shallow(Plan.getEntry()))) {
-    if (VPBB->getNumSuccessors() != 2 ||
-        !match(&VPBB->back(), m_BranchOnCond(m_True())))
+    VPValue *Cond;
+    if (VPBB->getNumSuccessors() != 2 || VPBB->empty() ||
+        !match(&VPBB->back(), m_BranchOnCond(m_VPValue(Cond))))
       continue;
 
-    VPBasicBlock *RemovedSucc = cast<VPBasicBlock>(VPBB->getSuccessors()[1]);
+    unsigned RemovedIdx;
+    if (match(Cond, m_True()))
+      RemovedIdx = 1;
+    else if (match(Cond, m_False()))
+      RemovedIdx = 0;
+    else
+      continue;
+
+    VPBasicBlock *RemovedSucc =
+        cast<VPBasicBlock>(VPBB->getSuccessors()[RemovedIdx]);
     const auto &Preds = RemovedSucc->getPredecessors();
     assert(count(Preds, VPBB) == 1 &&
            "There must be a single edge between VPBB and its successor");
@@ -1745,12 +1755,14 @@ static void removeBranchOnCondTrue(VPlan &Plan) {
     // Values coming from VPBB into ResumePhi recipes of RemoveSucc are removed
     // from these recipes.
     for (VPRecipeBase &R : make_early_inc_range(*RemovedSucc)) {
-...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/140409


More information about the llvm-commits mailing list