[llvm] [VPlan] Implement cloning of VPlans. (PR #73158)

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 22 10:45:52 PST 2023


https://github.com/fhahn created https://github.com/llvm/llvm-project/pull/73158

This patch implements cloning for VPlans and recipes. Cloning is used in the epilogue vectorization path, to clone the VPlan for the main vector loop. This means we won't re-use a VPlan when executing the VPlan for the epilogue vector loop, which in turn will enable us to perform optimizations based on UF & VF.

>From f227f0f16c4eb18f0de57ab5f3e5fe418c2746c9 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Thu, 6 Jul 2023 08:07:45 +0100
Subject: [PATCH] [VPlan] Implement cloning of VPlans.

This patch implements cloning for VPlans and recipes. Cloning is used in
the epilogue vectorization path, to clone the VPlan for the main vector
loop. This means we won't re-use a VPlan when executing the VPlan for
the epilogue vector loop, which in turn will enable us to perform
optimizations based on UF & VF.
---
 .../Transforms/Vectorize/LoopVectorize.cpp    |   2 +-
 llvm/lib/Transforms/Vectorize/VPlan.cpp       | 121 +++++++++++++
 llvm/lib/Transforms/Vectorize/VPlan.h         | 171 ++++++++++++++++++
 3 files changed, 293 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index df71593543051f6..469e77d72b6b998 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -10293,7 +10293,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
         EpilogueVectorizerMainLoop MainILV(L, PSE, LI, DT, TLI, TTI, AC, ORE,
                                            EPI, &LVL, &CM, BFI, PSI, Checks);
 
-        VPlan &BestMainPlan = LVP.getBestPlanFor(EPI.MainLoopVF);
+        VPlan &BestMainPlan = *LVP.getBestPlanFor(EPI.MainLoopVF).clone();
         auto ExpandedSCEVs = LVP.executePlan(EPI.MainLoopVF, EPI.MainLoopUF,
                                              BestMainPlan, MainILV, DT, true);
         ++LoopsVectorized;
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 9ffce475f022eb3..e7130a889e8b307 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -614,6 +614,18 @@ void VPBasicBlock::print(raw_ostream &O, const Twine &Indent,
 }
 #endif
 
+VPBlockBase *VPRegionBlock::clone() {
+  DenseMap<VPBlockBase *, VPBlockBase *> Old2New;
+  DenseMap<VPValue *, VPValue *> Old2NewVPValues;
+  VPBlockBase *NewEntry =
+      VPBlockUtils::cloneCFG(Entry, Old2New, Old2NewVPValues);
+  auto *NewR =
+      new VPRegionBlock(NewEntry, Old2New[Exiting], getName(), isReplicator());
+  for (VPBlockBase *Block : vp_depth_first_shallow(NewEntry))
+    Block->setParent(NewR);
+  return NewR;
+}
+
 void VPRegionBlock::dropAllReferences(VPValue *NewValue) {
   for (VPBlockBase *Block : vp_depth_first_shallow(Entry))
     // Drop all references in VPBasicBlocks and replace all uses with
@@ -964,6 +976,65 @@ void VPlan::updateDominatorTree(DominatorTree *DT, BasicBlock *LoopHeaderBB,
   assert(DT->verify(DominatorTree::VerificationLevel::Fast));
 }
 
+static void remapVPValues(VPBasicBlock *OldBB, VPBasicBlock *NewBB,
+                          DenseMap<VPValue *, VPValue *> &Old2NewVPValues,
+                          bool Full = false) {
+  for (const auto &[OldR, NewR] : zip(*OldBB, *NewBB)) {
+    for (unsigned I = 0, E = NewR.getNumOperands(); I != E; ++I) {
+      VPValue *NewOp = Old2NewVPValues.lookup(OldR.getOperand(I));
+      if (!Full)
+        continue;
+      NewR.setOperand(I, NewOp);
+    }
+    for (const auto &[OldV, NewV] :
+         zip(OldR.definedValues(), NewR.definedValues()))
+      Old2NewVPValues[OldV] = NewV;
+  }
+}
+
+VPlan *VPlan::clone() {
+  DenseMap<VPBlockBase *, VPBlockBase *> Old2New;
+  DenseMap<VPValue *, VPValue *> Old2NewVPValues;
+
+  auto *P = new VPlan();
+  SmallVector<VPValue *, 16> NewLiveIns;
+  for (VPValue *LI : VPLiveInsToFree) {
+    VPValue *NewLI = new VPValue(LI->getLiveInIRValue());
+    P->VPLiveInsToFree.push_back(NewLI);
+    Old2NewVPValues[LI] = NewLI;
+  }
+
+  Old2NewVPValues[&VectorTripCount] = &P->VectorTripCount;
+  if (BackedgeTakenCount) {
+    Old2NewVPValues[BackedgeTakenCount] = new VPValue();
+    P->BackedgeTakenCount = Old2NewVPValues[BackedgeTakenCount];
+  }
+
+  auto NewPH = cast<VPBasicBlock>(Preheader->clone());
+  remapVPValues(cast<VPBasicBlock>(Preheader), cast<VPBasicBlock>(NewPH),
+                Old2NewVPValues, /*Full*/ true);
+  VPValue *NewTC = Old2NewVPValues.lookup(TripCount);
+  if (!NewTC)
+    Old2NewVPValues[TripCount] = new VPValue(TripCount->getLiveInIRValue());
+  P->TripCount = Old2NewVPValues[TripCount];
+
+  auto *NewEntry = cast<VPBasicBlock>(VPBlockUtils::cloneCFG(
+      getEntry(), Old2New, Old2NewVPValues, /*FullRemapping*/ true));
+
+  P->Entry = NewEntry;
+  P->Preheader = NewPH;
+  NewEntry->setPlan(P);
+  NewPH->setPlan(P);
+  P->VFs = VFs;
+  P->UFs = UFs;
+  P->Name = Name;
+
+  for (const auto &[_, LO] : LiveOuts) {
+    P->addLiveOut(LO->getPhi(), Old2NewVPValues[LO->getOperand(0)]);
+  }
+  return P;
+}
+
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 
 Twine VPlanPrinter::getUID(const VPBlockBase *Block) {
@@ -1168,6 +1239,56 @@ void VPUser::printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const {
 }
 #endif
 
+VPBlockBase *VPBlockUtils::cloneCFG(
+    VPBlockBase *Entry, DenseMap<VPBlockBase *, VPBlockBase *> &Old2New,
+    DenseMap<VPValue *, VPValue *> &Old2NewVPValues, bool FullRemapping) {
+  ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT(
+      Entry);
+  VPBlockBase *NewEntry = nullptr;
+  for (VPBlockBase *BB : RPOT) {
+    VPBlockBase *NewBB = BB->clone();
+    if (!NewEntry)
+      NewEntry = NewBB;
+
+    for (VPBlockBase *Pred : BB->getPredecessors())
+      connectBlocks(Old2New[Pred], NewBB);
+
+    Old2New[BB] = NewBB;
+
+    if (!isa<VPBasicBlock>(BB))
+      continue;
+  }
+
+  // Update the operands of all cloned recipes starting at NewEntry. This traverses all reachable blocks. This is done in two steps, to handle cycles in PHI recipes.
+  ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>>
+      OldDeepRPOT(Entry);
+  ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>>
+      NewDeepRPOT(NewEntry);
+  // First, collect all mappings from old to new VPValues defined by cloned recipes.
+  for (const auto &[OldBB, NewBB] :
+       zip(VPBlockUtils::blocksOnly<VPBasicBlock>(OldDeepRPOT),
+           VPBlockUtils::blocksOnly<VPBasicBlock>(NewDeepRPOT))) {
+    for (const auto &[OldR, NewR] : zip(*OldBB, *NewBB))
+      for (const auto &[OldV, NewV] :
+           zip(OldR.definedValues(), NewR.definedValues()))
+        Old2NewVPValues[OldV] = NewV;
+  }
+
+  // Update all operands to use cloned VPValues.
+  for (VPBasicBlock *NewBB :
+           VPBlockUtils::blocksOnly<VPBasicBlock>(NewDeepRPOT)) {
+    for (VPRecipeBase &NewR : *NewBB)
+      for (unsigned I = 0, E = NewR.getNumOperands(); I != E; ++I) {
+        VPValue *NewOp = Old2NewVPValues.lookup(NewR.getOperand(I));
+        if (!FullRemapping)
+          continue;
+        NewR.setOperand(I, NewOp);
+      }
+  }
+
+  return NewEntry;
+}
+
 void VPInterleavedAccessInfo::visitRegion(VPRegionBlock *Region,
                                           Old2NewTy &Old2New,
                                           InterleavedAccessInfo &IAI) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index a26308a212bbd3c..83e689e5cb3a06e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -482,6 +482,8 @@ class VPBlockBase {
 
   using VPBlocksTy = SmallVectorImpl<VPBlockBase *>;
 
+  virtual VPBlockBase *clone() = 0;
+
   virtual ~VPBlockBase() = default;
 
   const std::string &getName() const { return Name; }
@@ -729,6 +731,9 @@ class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
       : VPDef(SC), VPUser(Operands, VPUser::VPUserID::Recipe), DL(DL) {}
   virtual ~VPRecipeBase() = default;
 
+  /// Clone the current recipe.
+  virtual VPRecipeBase *clone() { return nullptr; }
+
   /// \return the VPBasicBlock which this VPRecipe belongs to.
   VPBasicBlock *getParent() { return Parent; }
   const VPBasicBlock *getParent() const { return Parent; }
@@ -874,6 +879,12 @@ class VPRecipeWithIRFlags : public VPRecipeBase {
     unsigned AllFlags;
   };
 
+protected:
+  void transferFlags(VPRecipeWithIRFlags &Other) {
+    OpType = Other.OpType;
+    AllFlags = Other.AllFlags;
+  }
+
 public:
   template <typename IterT>
   VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, DebugLoc DL = {})
@@ -1086,6 +1097,13 @@ class VPInstruction : public VPRecipeWithIRFlags, public VPValue {
 
   VP_CLASSOF_IMPL(VPDef::VPInstructionSC)
 
+  virtual VPRecipeBase *clone() override {
+    SmallVector<VPValue *, 2> Operands(operands());
+    auto *New = new VPInstruction(Opcode, Operands, getDebugLoc(), Name);
+    New->transferFlags(*this);
+    return New;
+  }
+
   unsigned getOpcode() const { return Opcode; }
 
   /// Generate the instruction.
@@ -1166,6 +1184,12 @@ class VPWidenRecipe : public VPRecipeWithIRFlags, public VPValue {
 
   ~VPWidenRecipe() override = default;
 
+  VPRecipeBase *clone() override {
+    auto *R = new VPWidenRecipe(*getUnderlyingInstr(), operands());
+    R->transferFlags(*this);
+    return R;
+  }
+
   VP_CLASSOF_IMPL(VPDef::VPWidenSC)
 
   /// Produce widened copies of all Ingredients.
@@ -1201,6 +1225,12 @@ class VPWidenCastRecipe : public VPRecipeBase, public VPValue {
 
   ~VPWidenCastRecipe() override = default;
 
+  VPRecipeBase *clone() override {
+    return new VPWidenCastRecipe(
+        Opcode, getOperand(0), ResultTy,
+        cast_if_present<CastInst>(getUnderlyingValue()));
+  }
+
   VP_CLASSOF_IMPL(VPDef::VPWidenCastSC)
 
   /// Produce widened copies of the cast.
@@ -1239,6 +1269,11 @@ class VPWidenCallRecipe : public VPRecipeBase, public VPValue {
 
   ~VPWidenCallRecipe() override = default;
 
+  VPRecipeBase *clone() override {
+    return new VPWidenCallRecipe(*cast<CallInst>(getUnderlyingInstr()),
+                                 operands(), VectorIntrinsicID, Variant);
+  }
+
   VP_CLASSOF_IMPL(VPDef::VPWidenCallSC)
 
   /// Produce a widened version of the call instruction.
@@ -1260,6 +1295,11 @@ struct VPWidenSelectRecipe : public VPRecipeBase, public VPValue {
 
   ~VPWidenSelectRecipe() override = default;
 
+  VPRecipeBase *clone() override {
+    return new VPWidenSelectRecipe(*cast<SelectInst>(getUnderlyingInstr()),
+                                   operands());
+  }
+
   VP_CLASSOF_IMPL(VPDef::VPWidenSelectSC)
 
   /// Produce a widened version of the select instruction.
@@ -1304,6 +1344,11 @@ class VPWidenGEPRecipe : public VPRecipeWithIRFlags, public VPValue {
 
   ~VPWidenGEPRecipe() override = default;
 
+  VPRecipeBase *clone() override {
+    return new VPWidenGEPRecipe(cast<GetElementPtrInst>(getUnderlyingInstr()),
+                                operands());
+  }
+
   VP_CLASSOF_IMPL(VPDef::VPWidenGEPSC)
 
   /// Generate the gep nodes.
@@ -1419,6 +1464,11 @@ class VPWidenIntOrFpInductionRecipe : public VPHeaderPHIRecipe {
 
   ~VPWidenIntOrFpInductionRecipe() override = default;
 
+  VPRecipeBase *clone() override {
+    return new VPWidenIntOrFpInductionRecipe(IV, getStartValue(),
+                                             getStepValue(), IndDesc, Trunc);
+  }
+
   VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionSC)
 
   /// Generate the vectorized and scalarized versions of the phi node as
@@ -1489,6 +1539,12 @@ class VPWidenPointerInductionRecipe : public VPHeaderPHIRecipe {
 
   ~VPWidenPointerInductionRecipe() override = default;
 
+  VPRecipeBase *clone() override {
+    return new VPWidenPointerInductionRecipe(
+        cast<PHINode>(getUnderlyingInstr()), getOperand(0), getOperand(1),
+        IndDesc, IsScalarAfterVectorization);
+  }
+
   VP_CLASSOF_IMPL(VPDef::VPWidenPointerInductionSC)
 
   /// Generate vector values for the pointer induction.
@@ -1522,6 +1578,13 @@ class VPWidenPHIRecipe : public VPHeaderPHIRecipe {
       addOperand(Start);
   }
 
+  VPRecipeBase *clone() override {
+    auto *Res = new VPWidenPHIRecipe(cast<PHINode>(getUnderlyingInstr()),
+                                     getOperand(0));
+    Res->IncomingBlocks = IncomingBlocks;
+    return Res;
+  }
+
   ~VPWidenPHIRecipe() override = default;
 
   VP_CLASSOF_IMPL(VPDef::VPWidenPHISC)
@@ -1561,6 +1624,11 @@ struct VPFirstOrderRecurrencePHIRecipe : public VPHeaderPHIRecipe {
     return R->getVPDefID() == VPDef::VPFirstOrderRecurrencePHISC;
   }
 
+  VPRecipeBase *clone() override {
+    return new VPFirstOrderRecurrencePHIRecipe(
+        cast<PHINode>(getUnderlyingInstr()), *getOperand(0));
+  }
+
   void execute(VPTransformState &State) override;
 
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -1596,6 +1664,14 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe {
 
   ~VPReductionPHIRecipe() override = default;
 
+  VPRecipeBase *clone() override {
+    auto *R =
+        new VPReductionPHIRecipe(cast<PHINode>(getUnderlyingInstr()), RdxDesc,
+                                 *getOperand(0), IsInLoop, IsOrdered);
+    R->addOperand(getBackedgeValue());
+    return R;
+  }
+
   VP_CLASSOF_IMPL(VPDef::VPReductionPHISC)
 
   static inline bool classof(const VPHeaderPHIRecipe *R) {
@@ -1638,6 +1714,11 @@ class VPBlendRecipe : public VPRecipeBase, public VPValue {
            "of operands");
   }
 
+  VPRecipeBase *clone() override {
+    SmallVector<VPValue *> Ops(operands());
+    return new VPBlendRecipe(cast<PHINode>(getUnderlyingValue()), Ops);
+  }
+
   VP_CLASSOF_IMPL(VPDef::VPBlendSC)
 
   /// Return the number of incoming values, taking into account that a single
@@ -1707,6 +1788,11 @@ class VPInterleaveRecipe : public VPRecipeBase {
   }
   ~VPInterleaveRecipe() override = default;
 
+  VPRecipeBase *clone() override {
+    return new VPInterleaveRecipe(IG, getAddr(), getStoredValues(), getMask(),
+                                  NeedsMaskForGaps);
+  }
+
   VP_CLASSOF_IMPL(VPDef::VPInterleaveSC)
 
   /// Return the address accessed by this recipe.
@@ -1773,6 +1859,11 @@ class VPReductionRecipe : public VPRecipeBase, public VPValue {
 
   ~VPReductionRecipe() override = default;
 
+  VPRecipeBase *clone() override {
+    return new VPReductionRecipe(RdxDesc, getUnderlyingInstr(), getChainOp(),
+                                 getVecOp(), getCondOp());
+  }
+
   VP_CLASSOF_IMPL(VPDef::VPReductionSC)
 
   /// Generate the reduction in the loop
@@ -1817,6 +1908,11 @@ class VPReplicateRecipe : public VPRecipeWithIRFlags, public VPValue {
 
   ~VPReplicateRecipe() override = default;
 
+  VPRecipeBase *clone() override {
+    return new VPReplicateRecipe(getUnderlyingInstr(), operands(), IsUniform,
+                                 isPredicated() ? getMask() : nullptr);
+  }
+
   VP_CLASSOF_IMPL(VPDef::VPReplicateSC)
 
   /// Generate replicas of the desired Ingredient. Replicas will be generated
@@ -1869,6 +1965,10 @@ class VPBranchOnMaskRecipe : public VPRecipeBase {
       addOperand(BlockInMask);
   }
 
+  VPRecipeBase *clone() override {
+    return new VPBranchOnMaskRecipe(getOperand(0));
+  }
+
   VP_CLASSOF_IMPL(VPDef::VPBranchOnMaskSC)
 
   /// Generate the extraction of the appropriate bit from the block mask and the
@@ -1916,6 +2016,10 @@ class VPPredInstPHIRecipe : public VPRecipeBase, public VPValue {
       : VPRecipeBase(VPDef::VPPredInstPHISC, PredV), VPValue(this) {}
   ~VPPredInstPHIRecipe() override = default;
 
+  VPRecipeBase *clone() override {
+    return new VPPredInstPHIRecipe(getOperand(0));
+  }
+
   VP_CLASSOF_IMPL(VPDef::VPPredInstPHISC)
 
   /// Generates phi nodes for live-outs as needed to retain SSA form.
@@ -1979,6 +2083,16 @@ class VPWidenMemoryInstructionRecipe : public VPRecipeBase {
     setMask(Mask);
   }
 
+  VPRecipeBase *clone() override {
+    if (isStore())
+      return new VPWidenMemoryInstructionRecipe(
+          cast<StoreInst>(Ingredient), getAddr(), getStoredValue(), getMask(),
+          Consecutive, Reverse);
+
+    return new VPWidenMemoryInstructionRecipe(
+        cast<LoadInst>(Ingredient), getAddr(), getMask(), Consecutive, Reverse);
+  }
+
   VP_CLASSOF_IMPL(VPDef::VPWidenMemoryInstructionSC)
 
   /// Return the address accessed by this recipe.
@@ -2045,6 +2159,8 @@ class VPExpandSCEVRecipe : public VPRecipeBase, public VPValue {
 
   ~VPExpandSCEVRecipe() override = default;
 
+  VPRecipeBase *clone() override { return new VPExpandSCEVRecipe(Expr, SE); }
+
   VP_CLASSOF_IMPL(VPDef::VPExpandSCEVSC)
 
   /// Generate a canonical vector induction variable of the vector loop, with
@@ -2070,6 +2186,12 @@ class VPCanonicalIVPHIRecipe : public VPHeaderPHIRecipe {
 
   ~VPCanonicalIVPHIRecipe() override = default;
 
+  VPRecipeBase *clone() override {
+    auto *R = new VPCanonicalIVPHIRecipe(getOperand(0), getDebugLoc());
+    R->addOperand(getBackedgeValue());
+    return R;
+  }
+
   VP_CLASSOF_IMPL(VPDef::VPCanonicalIVPHISC)
 
   static inline bool classof(const VPHeaderPHIRecipe *D) {
@@ -2116,6 +2238,10 @@ class VPActiveLaneMaskPHIRecipe : public VPHeaderPHIRecipe {
 
   ~VPActiveLaneMaskPHIRecipe() override = default;
 
+  VPRecipeBase *clone() override {
+    return new VPActiveLaneMaskPHIRecipe(getOperand(0), getDebugLoc());
+  }
+
   VP_CLASSOF_IMPL(VPDef::VPActiveLaneMaskPHISC)
 
   static inline bool classof(const VPHeaderPHIRecipe *D) {
@@ -2141,6 +2267,11 @@ class VPWidenCanonicalIVRecipe : public VPRecipeBase, public VPValue {
 
   ~VPWidenCanonicalIVRecipe() override = default;
 
+  VPRecipeBase *clone() override {
+    return new VPWidenCanonicalIVRecipe(
+        cast<VPCanonicalIVPHIRecipe>(getOperand(0)));
+  }
+
   VP_CLASSOF_IMPL(VPDef::VPWidenCanonicalIVSC)
 
   /// Generate a canonical vector induction variable of the vector loop, with
@@ -2183,9 +2314,22 @@ class VPDerivedIVRecipe : public VPRecipeBase, public VPValue {
         VPValue(this), TruncResultTy(TruncResultTy), Kind(IndDesc.getKind()),
         FPBinOp(dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp())) {
   }
+  VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind,
+                    const FPMathOperator *FPBinOp, VPValue *Start,
+                    VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step,
+                    Type *TruncResultTy)
+      : VPRecipeBase(VPDef::VPDerivedIVSC, {Start, CanonicalIV, Step}),
+        VPValue(this), TruncResultTy(TruncResultTy), Kind(Kind),
+        FPBinOp(FPBinOp) {}
 
   ~VPDerivedIVRecipe() override = default;
 
+  VPRecipeBase *clone() override {
+    return new VPDerivedIVRecipe(Kind, FPBinOp, getOperand(0),
+                                 cast<VPCanonicalIVPHIRecipe>(getOperand(1)),
+                                 getOperand(2), TruncResultTy);
+  }
+
   VP_CLASSOF_IMPL(VPDef::VPDerivedIVSC)
 
   /// Generate the transformed value of the induction at offset StartValue (1.
@@ -2237,6 +2381,12 @@ class VPScalarIVStepsRecipe : public VPRecipeWithIRFlags, public VPValue {
 
   ~VPScalarIVStepsRecipe() override = default;
 
+  VPRecipeBase *clone() override {
+    return new VPScalarIVStepsRecipe(
+        getOperand(0), getOperand(1), InductionOpcode,
+        hasFastMathFlags() ? getFastMathFlags() : FastMathFlags());
+  }
+
   VP_CLASSOF_IMPL(VPDef::VPScalarIVStepsSC)
 
   /// Generate the scalarized versions of the phi node as needed by their users.
@@ -2281,6 +2431,13 @@ class VPBasicBlock : public VPBlockBase {
       Recipes.pop_back();
   }
 
+  VPBlockBase *clone() override {
+    auto *NewBlock = new VPBasicBlock(getName());
+    for (VPRecipeBase &R : *this)
+      NewBlock->appendRecipe(R.clone());
+    return NewBlock;
+  }
+
   /// Instruction iterators...
   using iterator = RecipeListTy::iterator;
   using const_iterator = RecipeListTy::const_iterator;
@@ -2419,6 +2576,8 @@ class VPRegionBlock : public VPBlockBase {
     }
   }
 
+  VPBlockBase *clone() override;
+
   /// Method to support type inquiry through isa, cast, and dyn_cast.
   static inline bool classof(const VPBlockBase *V) {
     return V->getVPBlockID() == VPBlockBase::VPRegionBlockSC;
@@ -2536,6 +2695,9 @@ class VPlan {
   /// been modeled in VPlan directly.
   DenseMap<const SCEV *, VPValue *> SCEVToExpansion;
 
+  /// Construct an uninitialized VPlan, should be used for cloning only.
+  VPlan() {}
+
 public:
   /// Construct a VPlan with original preheader \p Preheader, trip count \p TC
   /// and \p Entry to the plan. At the moment, \p Preheader and \p Entry need to
@@ -2720,6 +2882,8 @@ class VPlan {
   VPBasicBlock *getPreheader() { return Preheader; }
   const VPBasicBlock *getPreheader() const { return Preheader; }
 
+  VPlan *clone();
+
 private:
   /// Add to the given dominator tree the header block and every new basic block
   /// that was created between it and the latch block, inclusive.
@@ -2882,6 +3046,13 @@ class VPBlockUtils {
       return cast<BlockTy>(&Block);
     });
   }
+
+  /// Clone the CFG for all nodes reachable from \p Entry, this includes cloning the blocks and their recipes. Operands of cloned recipes will be updated to use new VPValues from \p Old2NewValues. If \p FullRemapping is set to true, then all old VPValues from outside the cloned nodes must be mapped in \p Old2NewValues.
+  static VPBlockBase *
+  cloneCFG(VPBlockBase *Entry,
+           DenseMap<VPBlockBase *, VPBlockBase *> &Old2NewBBs,
+           DenseMap<VPValue *, VPValue *> &Old2NewValues,
+           bool FullRemapping = false);
 };
 
 class VPInterleavedAccessInfo {



More information about the llvm-commits mailing list