[llvm] ec402a2 - [VPlan] Implement cloning of VPlans. (#73158)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Jan 27 05:30:55 PST 2024
Author: Florian Hahn
Date: 2024-01-27T13:30:52Z
New Revision: ec402a2e532db7ec38e6aef0ed1b1c04c6769d9b
URL: https://github.com/llvm/llvm-project/commit/ec402a2e532db7ec38e6aef0ed1b1c04c6769d9b
DIFF: https://github.com/llvm/llvm-project/commit/ec402a2e532db7ec38e6aef0ed1b1c04c6769d9b.diff
LOG: [VPlan] Implement cloning of VPlans. (#73158)
This patch implements cloning for VPlans and recipes. Cloning is used in
the epilogue vectorization path, to clone the VPlan for the main vector
loop. This means we won't re-use a VPlan when executing the VPlan for
the epilogue vector loop, which in turn will enable us to perform
optimizations based on UF & VF.
Added:
Modified:
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/lib/Transforms/Vectorize/VPlan.cpp
llvm/lib/Transforms/Vectorize/VPlan.h
llvm/unittests/Transforms/Vectorize/VPlanTest.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index c9c74edccf913b..7721075c31353d 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -10141,7 +10141,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
EpilogueVectorizerMainLoop MainILV(L, PSE, LI, DT, TLI, TTI, AC, ORE,
EPI, &LVL, &CM, BFI, PSI, Checks);
- VPlan &BestMainPlan = LVP.getBestPlanFor(EPI.MainLoopVF);
+ VPlan &BestMainPlan = *LVP.getBestPlanFor(EPI.MainLoopVF).duplicate();
const auto &[ExpandedSCEVs, ReductionResumeValues] = LVP.executePlan(
EPI.MainLoopVF, EPI.MainLoopUF, BestMainPlan, MainILV, DT, true);
++LoopsVectorized;
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 72709ac033a94c..b7ff1e1af65ac1 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -615,6 +615,54 @@ void VPBasicBlock::print(raw_ostream &O, const Twine &Indent,
}
#endif
+static std::pair<VPBlockBase *, VPBlockBase *> cloneSESE(VPBlockBase *Entry);
+
+// Clone the CFG for all nodes in the single-entry-single-exit region reachable
+// from \p Entry, this includes cloning the blocks and their recipes. Operands
+// of cloned recipes will NOT be updated. Remapping of operands must be done
+// separately. Returns a pair with the the new entry and exiting blocks of the
+// cloned region.
+static std::pair<VPBlockBase *, VPBlockBase *> cloneSESE(VPBlockBase *Entry) {
+ DenseMap<VPBlockBase *, VPBlockBase *> Old2NewVPBlocks;
+ ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT(
+ Entry);
+ for (VPBlockBase *BB : RPOT) {
+ VPBlockBase *NewBB = BB->clone();
+ for (VPBlockBase *Pred : BB->getPredecessors())
+ VPBlockUtils::connectBlocks(Old2NewVPBlocks[Pred], NewBB);
+
+ Old2NewVPBlocks[BB] = NewBB;
+ }
+
+#if !defined(NDEBUG)
+ // Verify that the order of predecessors and successors matches in the cloned
+ // version.
+ ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>>
+ NewRPOT(Old2NewVPBlocks[Entry]);
+ for (const auto &[OldBB, NewBB] : zip(RPOT, NewRPOT)) {
+ for (const auto &[OldPred, NewPred] :
+ zip(OldBB->getPredecessors(), NewBB->getPredecessors()))
+ assert(NewPred == Old2NewVPBlocks[OldPred] && "Different predecessors");
+
+ for (const auto &[OldSucc, NewSucc] :
+ zip(OldBB->successors(), NewBB->successors()))
+ assert(NewSucc == Old2NewVPBlocks[OldSucc] && "Different successors");
+ }
+#endif
+
+ return std::make_pair(Old2NewVPBlocks[Entry],
+ Old2NewVPBlocks[*reverse(RPOT).begin()]);
+}
+
+VPRegionBlock *VPRegionBlock::clone() {
+ const auto &[NewEntry, NewExiting] = cloneSESE(getEntry());
+ auto *NewRegion =
+ new VPRegionBlock(NewEntry, NewExiting, getName(), isReplicator());
+ for (VPBlockBase *Block : vp_depth_first_shallow(NewEntry))
+ Block->setParent(NewRegion);
+ return NewRegion;
+}
+
void VPRegionBlock::dropAllReferences(VPValue *NewValue) {
for (VPBlockBase *Block : vp_depth_first_shallow(Entry))
// Drop all references in VPBasicBlocks and replace all uses with
@@ -983,6 +1031,87 @@ void VPlan::updateDominatorTree(DominatorTree *DT, BasicBlock *LoopHeaderBB,
assert(DT->verify(DominatorTree::VerificationLevel::Fast));
}
+static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry,
+ DenseMap<VPValue *, VPValue *> &Old2NewVPValues) {
+ // Update the operands of all cloned recipes starting at NewEntry. This
+ // traverses all reachable blocks. This is done in two steps, to handle cycles
+ // in PHI recipes.
+ ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>>
+ OldDeepRPOT(Entry);
+ ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>>
+ NewDeepRPOT(NewEntry);
+ // First, collect all mappings from old to new VPValues defined by cloned
+ // recipes.
+ for (const auto &[OldBB, NewBB] :
+ zip(VPBlockUtils::blocksOnly<VPBasicBlock>(OldDeepRPOT),
+ VPBlockUtils::blocksOnly<VPBasicBlock>(NewDeepRPOT))) {
+ assert(OldBB->getRecipeList().size() == NewBB->getRecipeList().size() &&
+ "blocks must have the same number of recipes");
+ for (const auto &[OldR, NewR] : zip(*OldBB, *NewBB)) {
+ assert(OldR.getNumOperands() == NewR.getNumOperands() &&
+ "recipes must have the same number of operands");
+ assert(OldR.getNumDefinedValues() == NewR.getNumDefinedValues() &&
+ "recipes must define the same number of operands");
+ for (const auto &[OldV, NewV] :
+ zip(OldR.definedValues(), NewR.definedValues()))
+ Old2NewVPValues[OldV] = NewV;
+ }
+ }
+
+ // Update all operands to use cloned VPValues.
+ for (VPBasicBlock *NewBB :
+ VPBlockUtils::blocksOnly<VPBasicBlock>(NewDeepRPOT)) {
+ for (VPRecipeBase &NewR : *NewBB)
+ for (unsigned I = 0, E = NewR.getNumOperands(); I != E; ++I) {
+ VPValue *NewOp = Old2NewVPValues.lookup(NewR.getOperand(I));
+ NewR.setOperand(I, NewOp);
+ }
+ }
+}
+
+VPlan *VPlan::duplicate() {
+ // Clone blocks.
+ VPBasicBlock *NewPreheader = Preheader->clone();
+ const auto &[NewEntry, __] = cloneSESE(Entry);
+
+ // Create VPlan, clone live-ins and remap operands in the cloned blocks.
+ auto *NewPlan = new VPlan(NewPreheader, cast<VPBasicBlock>(NewEntry));
+ DenseMap<VPValue *, VPValue *> Old2NewVPValues;
+ for (VPValue *OldLiveIn : VPLiveInsToFree) {
+ VPValue *NewLiveIn = new VPValue(OldLiveIn->getLiveInIRValue());
+ NewPlan->VPLiveInsToFree.push_back(NewLiveIn);
+ Old2NewVPValues[OldLiveIn] = NewLiveIn;
+ }
+ Old2NewVPValues[&VectorTripCount] = &NewPlan->VectorTripCount;
+ Old2NewVPValues[&VFxUF] = &NewPlan->VFxUF;
+ if (BackedgeTakenCount) {
+ NewPlan->BackedgeTakenCount = new VPValue();
+ Old2NewVPValues[BackedgeTakenCount] = NewPlan->BackedgeTakenCount;
+ }
+ assert(TripCount && "trip count must be set");
+ if (TripCount->isLiveIn())
+ Old2NewVPValues[TripCount] = new VPValue(TripCount->getLiveInIRValue());
+ // else NewTripCount will be created and inserted into Old2NewVPValues when
+ // TripCount is cloned. In any case NewPlan->TripCount is updated below.
+
+ remapOperands(Preheader, NewPreheader, Old2NewVPValues);
+ remapOperands(Entry, NewEntry, Old2NewVPValues);
+
+ // Clone live-outs.
+ for (const auto &[_, LO] : LiveOuts)
+ NewPlan->addLiveOut(LO->getPhi(), Old2NewVPValues[LO->getOperand(0)]);
+
+ // Initialize remaining fields of cloned VPlan.
+ NewPlan->VFs = VFs;
+ NewPlan->UFs = UFs;
+ // TODO: Adjust names.
+ NewPlan->Name = Name;
+ assert(Old2NewVPValues.contains(TripCount) &&
+ "TripCount must have been added to Old2NewVPValues");
+ NewPlan->TripCount = Old2NewVPValues[TripCount];
+ return NewPlan;
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
Twine VPlanPrinter::getUID(const VPBlockBase *Block) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 6715f73e3fa20d..20792cb9ac7c1c 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -662,6 +662,11 @@ class VPBlockBase {
/// Dump this VPBlockBase to dbgs().
LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
#endif
+
+ /// Clone the current block and it's recipes without updating the operands of
+ /// the cloned recipes, including all blocks in the single-entry single-exit
+ /// region for VPRegionBlocks.
+ virtual VPBlockBase *clone() = 0;
};
/// A value that is used outside the VPlan. The operand of the user needs to be
@@ -727,6 +732,9 @@ class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
: VPDef(SC), VPUser(Operands, VPUser::VPUserID::Recipe), DL(DL) {}
virtual ~VPRecipeBase() = default;
+ /// Clone the current recipe.
+ virtual VPRecipeBase *clone() = 0;
+
/// \return the VPBasicBlock which this VPRecipe belongs to.
VPBasicBlock *getParent() { return Parent; }
const VPBasicBlock *getParent() const { return Parent; }
@@ -947,6 +955,12 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
unsigned AllFlags;
};
+protected:
+ void transferFlags(VPRecipeWithIRFlags &Other) {
+ OpType = Other.OpType;
+ AllFlags = Other.AllFlags;
+ }
+
public:
template <typename IterT>
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, DebugLoc DL = {})
@@ -1189,6 +1203,13 @@ class VPInstruction : public VPRecipeWithIRFlags {
VP_CLASSOF_IMPL(VPDef::VPInstructionSC)
+ VPRecipeBase *clone() override {
+ SmallVector<VPValue *, 2> Operands(operands());
+ auto *New = new VPInstruction(Opcode, Operands, getDebugLoc(), Name);
+ New->transferFlags(*this);
+ return New;
+ }
+
unsigned getOpcode() const { return Opcode; }
/// Generate the instruction.
@@ -1283,6 +1304,12 @@ class VPWidenRecipe : public VPRecipeWithIRFlags {
~VPWidenRecipe() override = default;
+ VPRecipeBase *clone() override {
+ auto *R = new VPWidenRecipe(*getUnderlyingInstr(), operands());
+ R->transferFlags(*this);
+ return R;
+ }
+
VP_CLASSOF_IMPL(VPDef::VPWidenSC)
/// Produce widened copies of all Ingredients.
@@ -1322,6 +1349,14 @@ class VPWidenCastRecipe : public VPRecipeWithIRFlags {
~VPWidenCastRecipe() override = default;
+ VPRecipeBase *clone() override {
+ if (auto *UV = getUnderlyingValue())
+ return new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy,
+ *cast<CastInst>(UV));
+
+ return new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy);
+ }
+
VP_CLASSOF_IMPL(VPDef::VPWidenCastSC)
/// Produce widened copies of the cast.
@@ -1354,6 +1389,10 @@ class VPScalarCastRecipe : public VPSingleDefRecipe {
~VPScalarCastRecipe() override = default;
+ VPRecipeBase *clone() override {
+ return new VPScalarCastRecipe(Opcode, getOperand(0), ResultTy);
+ }
+
VP_CLASSOF_IMPL(VPDef::VPScalarCastSC)
void execute(VPTransformState &State) override;
@@ -1388,6 +1427,12 @@ class VPWidenCallRecipe : public VPSingleDefRecipe {
~VPWidenCallRecipe() override = default;
+ VPRecipeBase *clone() override {
+ return new VPWidenCallRecipe(*cast<CallInst>(getUnderlyingInstr()),
+ operands(), VectorIntrinsicID, getDebugLoc(),
+ Variant);
+ }
+
VP_CLASSOF_IMPL(VPDef::VPWidenCallSC)
/// Produce a widened version of the call instruction.
@@ -1409,6 +1454,11 @@ struct VPWidenSelectRecipe : public VPSingleDefRecipe {
~VPWidenSelectRecipe() override = default;
+ VPRecipeBase *clone() override {
+ return new VPWidenSelectRecipe(*cast<SelectInst>(getUnderlyingInstr()),
+ operands());
+ }
+
VP_CLASSOF_IMPL(VPDef::VPWidenSelectSC)
/// Produce a widened version of the select instruction.
@@ -1452,6 +1502,11 @@ class VPWidenGEPRecipe : public VPRecipeWithIRFlags {
~VPWidenGEPRecipe() override = default;
+ VPRecipeBase *clone() override {
+ return new VPWidenGEPRecipe(cast<GetElementPtrInst>(getUnderlyingInstr()),
+ operands());
+ }
+
VP_CLASSOF_IMPL(VPDef::VPWidenGEPSC)
/// Generate the gep nodes.
@@ -1488,6 +1543,11 @@ class VPVectorPointerRecipe : public VPRecipeWithIRFlags {
return true;
}
+ VPRecipeBase *clone() override {
+ return new VPVectorPointerRecipe(getOperand(0), IndexedTy, IsReverse,
+ isInBounds(), getDebugLoc());
+ }
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
@@ -1598,6 +1658,11 @@ class VPWidenIntOrFpInductionRecipe : public VPHeaderPHIRecipe {
~VPWidenIntOrFpInductionRecipe() override = default;
+ VPRecipeBase *clone() override {
+ return new VPWidenIntOrFpInductionRecipe(IV, getStartValue(),
+ getStepValue(), IndDesc, Trunc);
+ }
+
VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionSC)
/// Generate the vectorized and scalarized versions of the phi node as
@@ -1668,6 +1733,12 @@ class VPWidenPointerInductionRecipe : public VPHeaderPHIRecipe {
~VPWidenPointerInductionRecipe() override = default;
+ VPRecipeBase *clone() override {
+ return new VPWidenPointerInductionRecipe(
+ cast<PHINode>(getUnderlyingInstr()), getOperand(0), getOperand(1),
+ IndDesc, IsScalarAfterVectorization);
+ }
+
VP_CLASSOF_IMPL(VPDef::VPWidenPointerInductionSC)
/// Generate vector values for the pointer induction.
@@ -1701,6 +1772,10 @@ class VPWidenPHIRecipe : public VPHeaderPHIRecipe {
addOperand(Start);
}
+ VPRecipeBase *clone() override {
+ llvm_unreachable("cloning not implemented yet");
+ }
+
~VPWidenPHIRecipe() override = default;
VP_CLASSOF_IMPL(VPDef::VPWidenPHISC)
@@ -1740,6 +1815,11 @@ struct VPFirstOrderRecurrencePHIRecipe : public VPHeaderPHIRecipe {
return R->getVPDefID() == VPDef::VPFirstOrderRecurrencePHISC;
}
+ VPRecipeBase *clone() override {
+ return new VPFirstOrderRecurrencePHIRecipe(
+ cast<PHINode>(getUnderlyingInstr()), *getOperand(0));
+ }
+
void execute(VPTransformState &State) override;
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -1775,6 +1855,14 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe {
~VPReductionPHIRecipe() override = default;
+ VPRecipeBase *clone() override {
+ auto *R =
+ new VPReductionPHIRecipe(cast<PHINode>(getUnderlyingInstr()), RdxDesc,
+ *getOperand(0), IsInLoop, IsOrdered);
+ R->addOperand(getBackedgeValue());
+ return R;
+ }
+
VP_CLASSOF_IMPL(VPDef::VPReductionPHISC)
static inline bool classof(const VPHeaderPHIRecipe *R) {
@@ -1816,6 +1904,11 @@ class VPBlendRecipe : public VPSingleDefRecipe {
"of operands");
}
+ VPRecipeBase *clone() override {
+ SmallVector<VPValue *> Ops(operands());
+ return new VPBlendRecipe(cast<PHINode>(getUnderlyingValue()), Ops);
+ }
+
VP_CLASSOF_IMPL(VPDef::VPBlendSC)
/// Return the number of incoming values, taking into account that a single
@@ -1885,6 +1978,11 @@ class VPInterleaveRecipe : public VPRecipeBase {
}
~VPInterleaveRecipe() override = default;
+ VPRecipeBase *clone() override {
+ return new VPInterleaveRecipe(IG, getAddr(), getStoredValues(), getMask(),
+ NeedsMaskForGaps);
+ }
+
VP_CLASSOF_IMPL(VPDef::VPInterleaveSC)
/// Return the address accessed by this recipe.
@@ -1952,6 +2050,11 @@ class VPReductionRecipe : public VPSingleDefRecipe {
~VPReductionRecipe() override = default;
+ VPRecipeBase *clone() override {
+ return new VPReductionRecipe(RdxDesc, getUnderlyingInstr(), getChainOp(),
+ getVecOp(), getCondOp());
+ }
+
VP_CLASSOF_IMPL(VPDef::VPReductionSC)
/// Generate the reduction in the loop
@@ -1996,6 +2099,11 @@ class VPReplicateRecipe : public VPRecipeWithIRFlags {
~VPReplicateRecipe() override = default;
+ VPRecipeBase *clone() override {
+ return new VPReplicateRecipe(getUnderlyingInstr(), operands(), IsUniform,
+ isPredicated() ? getMask() : nullptr);
+ }
+
VP_CLASSOF_IMPL(VPDef::VPReplicateSC)
/// Generate replicas of the desired Ingredient. Replicas will be generated
@@ -2048,6 +2156,10 @@ class VPBranchOnMaskRecipe : public VPRecipeBase {
addOperand(BlockInMask);
}
+ VPRecipeBase *clone() override {
+ return new VPBranchOnMaskRecipe(getOperand(0));
+ }
+
VP_CLASSOF_IMPL(VPDef::VPBranchOnMaskSC)
/// Generate the extraction of the appropriate bit from the block mask and the
@@ -2095,6 +2207,10 @@ class VPPredInstPHIRecipe : public VPSingleDefRecipe {
: VPSingleDefRecipe(VPDef::VPPredInstPHISC, PredV) {}
~VPPredInstPHIRecipe() override = default;
+ VPRecipeBase *clone() override {
+ return new VPPredInstPHIRecipe(getOperand(0));
+ }
+
VP_CLASSOF_IMPL(VPDef::VPPredInstPHISC)
/// Generates phi nodes for live-outs as needed to retain SSA form.
@@ -2158,6 +2274,16 @@ class VPWidenMemoryInstructionRecipe : public VPRecipeBase {
setMask(Mask);
}
+ VPRecipeBase *clone() override {
+ if (isStore())
+ return new VPWidenMemoryInstructionRecipe(
+ cast<StoreInst>(Ingredient), getAddr(), getStoredValue(), getMask(),
+ Consecutive, Reverse);
+
+ return new VPWidenMemoryInstructionRecipe(
+ cast<LoadInst>(Ingredient), getAddr(), getMask(), Consecutive, Reverse);
+ }
+
VP_CLASSOF_IMPL(VPDef::VPWidenMemoryInstructionSC)
/// Return the address accessed by this recipe.
@@ -2223,6 +2349,8 @@ class VPExpandSCEVRecipe : public VPSingleDefRecipe {
~VPExpandSCEVRecipe() override = default;
+ VPRecipeBase *clone() override { return new VPExpandSCEVRecipe(Expr, SE); }
+
VP_CLASSOF_IMPL(VPDef::VPExpandSCEVSC)
/// Generate a canonical vector induction variable of the vector loop, with
@@ -2248,6 +2376,12 @@ class VPCanonicalIVPHIRecipe : public VPHeaderPHIRecipe {
~VPCanonicalIVPHIRecipe() override = default;
+ VPRecipeBase *clone() override {
+ auto *R = new VPCanonicalIVPHIRecipe(getOperand(0), getDebugLoc());
+ R->addOperand(getBackedgeValue());
+ return R;
+ }
+
VP_CLASSOF_IMPL(VPDef::VPCanonicalIVPHISC)
static inline bool classof(const VPHeaderPHIRecipe *D) {
@@ -2300,6 +2434,10 @@ class VPActiveLaneMaskPHIRecipe : public VPHeaderPHIRecipe {
~VPActiveLaneMaskPHIRecipe() override = default;
+ VPRecipeBase *clone() override {
+ return new VPActiveLaneMaskPHIRecipe(getOperand(0), getDebugLoc());
+ }
+
VP_CLASSOF_IMPL(VPDef::VPActiveLaneMaskPHISC)
static inline bool classof(const VPHeaderPHIRecipe *D) {
@@ -2324,6 +2462,11 @@ class VPWidenCanonicalIVRecipe : public VPSingleDefRecipe {
~VPWidenCanonicalIVRecipe() override = default;
+ VPRecipeBase *clone() override {
+ return new VPWidenCanonicalIVRecipe(
+ cast<VPCanonicalIVPHIRecipe>(getOperand(0)));
+ }
+
VP_CLASSOF_IMPL(VPDef::VPWidenCanonicalIVSC)
/// Generate a canonical vector induction variable of the vector loop, with
@@ -2354,16 +2497,27 @@ class VPDerivedIVRecipe : public VPSingleDefRecipe {
/// for floating point inductions.
const FPMathOperator *FPBinOp;
+ VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind,
+ const FPMathOperator *FPBinOp, VPValue *Start,
+ VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step)
+ : VPSingleDefRecipe(VPDef::VPDerivedIVSC, {Start, CanonicalIV, Step}),
+ Kind(Kind), FPBinOp(FPBinOp) {}
+
public:
VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPValue *Start,
VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step)
- : VPSingleDefRecipe(VPDef::VPDerivedIVSC, {Start, CanonicalIV, Step}),
- Kind(IndDesc.getKind()),
- FPBinOp(dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp())) {
- }
+ : VPDerivedIVRecipe(
+ IndDesc.getKind(),
+ dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp()),
+ Start, CanonicalIV, Step) {}
~VPDerivedIVRecipe() override = default;
+ VPRecipeBase *clone() override {
+ return new VPDerivedIVRecipe(Kind, FPBinOp, getStartValue(),
+ getCanonicalIV(), getStepValue());
+ }
+
VP_CLASSOF_IMPL(VPDef::VPDerivedIVSC)
/// Generate the transformed value of the induction at offset StartValue (1.
@@ -2381,7 +2535,9 @@ class VPDerivedIVRecipe : public VPSingleDefRecipe {
}
VPValue *getStartValue() const { return getOperand(0); }
- VPValue *getCanonicalIV() const { return getOperand(1); }
+ VPCanonicalIVPHIRecipe *getCanonicalIV() const {
+ return cast<VPCanonicalIVPHIRecipe>(getOperand(1));
+ }
VPValue *getStepValue() const { return getOperand(2); }
/// Returns true if the recipe only uses the first lane of operand \p Op.
@@ -2414,6 +2570,12 @@ class VPScalarIVStepsRecipe : public VPRecipeWithIRFlags {
~VPScalarIVStepsRecipe() override = default;
+ VPRecipeBase *clone() override {
+ return new VPScalarIVStepsRecipe(
+ getOperand(0), getOperand(1), InductionOpcode,
+ hasFastMathFlags() ? getFastMathFlags() : FastMathFlags());
+ }
+
VP_CLASSOF_IMPL(VPDef::VPScalarIVStepsSC)
/// Generate the scalarized versions of the phi node as needed by their users.
@@ -2548,6 +2710,15 @@ class VPBasicBlock : public VPBlockBase {
/// Returns true if the block is exiting it's parent region.
bool isExiting() const;
+ /// Clone the current block and it's recipes, without updating the operands of
+ /// the cloned recipes.
+ VPBasicBlock *clone() override {
+ auto *NewBlock = new VPBasicBlock(getName());
+ for (VPRecipeBase &R : *this)
+ NewBlock->appendRecipe(R.clone());
+ return NewBlock;
+ }
+
private:
/// Create an IR BasicBlock to hold the output instructions generated by this
/// VPBasicBlock, and return it. Update the CFGState accordingly.
@@ -2652,6 +2823,10 @@ class VPRegionBlock : public VPBlockBase {
VPSlotTracker &SlotTracker) const override;
using VPBlockBase::print; // Get the print(raw_stream &O) version.
#endif
+
+ /// Clone all blocks in the single-entry single-exit region of the block and
+ /// their recipes without updating the operands of the cloned recipes.
+ VPRegionBlock *clone() override;
};
/// VPlan models a candidate for vectorization, encoding various decisions take
@@ -2905,6 +3080,10 @@ class VPlan {
VPBasicBlock *getPreheader() { return Preheader; }
const VPBasicBlock *getPreheader() const { return Preheader; }
+ /// Clone the current VPlan, update all VPValues of the new VPlan and cloned
+ /// recipes to refer to the clones, and return it.
+ VPlan *duplicate();
+
private:
/// Add to the given dominator tree the header block and every new basic block
/// that was created between it and the latch block, inclusive.
diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp
index 65d241feeab2fc..33d5e2759af590 100644
--- a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp
@@ -1299,6 +1299,8 @@ struct VPDoubleValueDef : public VPRecipeBase {
new VPValue(nullptr, this);
}
+ VPRecipeBase *clone() override { return nullptr; }
+
void execute(struct VPTransformState &State) override {}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void print(raw_ostream &O, const Twine &Indent,
More information about the llvm-commits
mailing list