[llvm] [VPlan] Move auxiliary declarations out of VPlan.h (NFC). (PR #124104)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 23 12:46:47 PST 2025
https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/124104
>From 96ddaa27df13c419e362b5c841698a135f2d3f90 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Wed, 22 Jan 2025 14:11:21 +0000
Subject: [PATCH 1/2] [VPlan] Move auxiliary declarations out of VPlan.h (NFC).
Nothing in VPlan.h directly depends on VPTransformState, VPCostContext,
VPFRange, VPlanPrinter or VPSlotTracker. Move them out to a separate
header to reduce the size of widely used VPlan.h.
This is a first step towards more cleanly separating declarations in
VPlan.
Besides reducing VPlan.h's size, this also allows including additional
VPlan-related headers in VPlanHelpers.h for use there. An example is
using VPDominatorTree in VPTransformState
(https://github.com/llvm/llvm-project/pull/117138).
---
.../Vectorize/LoopVectorizationPlanner.h | 1 +
.../Transforms/Vectorize/LoopVectorize.cpp | 1 +
.../Transforms/Vectorize/VPRecipeBuilder.h | 1 +
llvm/lib/Transforms/Vectorize/VPlan.cpp | 71 +--
llvm/lib/Transforms/Vectorize/VPlan.h | 537 +-----------------
llvm/lib/Transforms/Vectorize/VPlanHelpers.h | 465 +++++++++++++++
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 2 +
llvm/lib/Transforms/Vectorize/VPlanSLP.cpp | 55 ++
llvm/lib/Transforms/Vectorize/VPlanSLP.h | 166 ++++++
.../Transforms/Vectorize/VPlanTransforms.cpp | 1 +
llvm/lib/Transforms/Vectorize/VPlanValue.h | 35 --
.../Transforms/Vectorize/VPlanSlpTest.cpp | 1 +
.../Transforms/Vectorize/VPlanTest.cpp | 1 +
13 files changed, 712 insertions(+), 625 deletions(-)
create mode 100644 llvm/lib/Transforms/Vectorize/VPlanHelpers.h
create mode 100644 llvm/lib/Transforms/Vectorize/VPlanSLP.h
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index bc44ec11edb7b0..fc10a518d39ef8 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -40,6 +40,7 @@ class OptimizationRemarkEmitter;
class TargetTransformInfo;
class TargetLibraryInfo;
class VPRecipeBuilder;
+struct VFRange;
/// VPlan-based builder utility analogous to IRBuilder.
class VPBuilder {
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index dec7a87ba9c50b..d48c7e7f3993e1 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -59,6 +59,7 @@
#include "VPlan.h"
#include "VPlanAnalysis.h"
#include "VPlanHCFGBuilder.h"
+#include "VPlanHelpers.h"
#include "VPlanPatternMatch.h"
#include "VPlanTransforms.h"
#include "VPlanUtils.h"
diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
index 9b1f40d0560bc2..43d5f35650578c 100644
--- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
+++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
@@ -23,6 +23,7 @@ class LoopVectorizationCostModel;
class TargetLibraryInfo;
class TargetTransformInfo;
struct HistogramInfo;
+struct VFRange;
/// A chain of instructions that form a partial reduction.
/// Designed to match: reduction_bin_op (bin_op (extend (A), (extend (B))),
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index f1228368804beb..6cd88897accd43 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -19,6 +19,7 @@
#include "VPlan.h"
#include "LoopVectorizationPlanner.h"
#include "VPlanCFG.h"
+#include "VPlanHelpers.h"
#include "VPlanPatternMatch.h"
#include "VPlanTransforms.h"
#include "VPlanUtils.h"
@@ -400,8 +401,8 @@ void VPTransformState::packScalarIntoVectorValue(VPValue *Def,
set(Def, VectorValue);
}
-BasicBlock *
-VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) {
+BasicBlock *VPBasicBlock::createEmptyBasicBlock(VPTransformState &State) {
+ auto &CFG = State.CFG;
// BB stands for IR BasicBlocks. VPBB stands for VPlan VPBasicBlocks.
// Pred stands for Predessor. Prev stands for Previous - last visited/created.
BasicBlock *PrevBB = CFG.PrevBB;
@@ -412,7 +413,8 @@ VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) {
return NewBB;
}
-void VPBasicBlock::connectToPredecessors(VPTransformState::CFGState &CFG) {
+void VPBasicBlock::connectToPredecessors(VPTransformState &State) {
+ auto &CFG = State.CFG;
BasicBlock *NewBB = CFG.VPBB2IRBB[this];
// Hook up the new basic block to its predecessors.
for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) {
@@ -467,7 +469,7 @@ void VPIRBasicBlock::execute(VPTransformState *State) {
"other blocks must be terminated by a branch");
}
- connectToPredecessors(State->CFG);
+ connectToPredecessors(*State);
}
VPIRBasicBlock *VPIRBasicBlock::clone() {
@@ -494,7 +496,7 @@ void VPBasicBlock::execute(VPTransformState *State) {
// * the exit of a replicate region.
State->CFG.VPBB2IRBB[this] = NewBB;
} else {
- NewBB = createEmptyBasicBlock(State->CFG);
+ NewBB = createEmptyBasicBlock(*State);
State->Builder.SetInsertPoint(NewBB);
// Temporarily terminate with unreachable until CFG is rewired.
@@ -507,7 +509,7 @@ void VPBasicBlock::execute(VPTransformState *State) {
State->CFG.PrevBB = NewBB;
State->CFG.VPBB2IRBB[this] = NewBB;
- connectToPredecessors(State->CFG);
+ connectToPredecessors(*State);
}
// 2. Fill the IR basic block with IR instructions.
@@ -616,6 +618,11 @@ bool VPBasicBlock::isExiting() const {
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void VPBlockBase::print(raw_ostream &O) const {
+ VPSlotTracker SlotTracker(getPlan());
+ print(O, "", SlotTracker);
+}
+
void VPBlockBase::printSuccessors(raw_ostream &O, const Twine &Indent) const {
if (getSuccessors().empty()) {
O << Indent << "No successors\n";
@@ -1460,58 +1467,6 @@ void VPUser::printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const {
}
#endif
-void VPInterleavedAccessInfo::visitRegion(VPRegionBlock *Region,
- Old2NewTy &Old2New,
- InterleavedAccessInfo &IAI) {
- ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>>
- RPOT(Region->getEntry());
- for (VPBlockBase *Base : RPOT) {
- visitBlock(Base, Old2New, IAI);
- }
-}
-
-void VPInterleavedAccessInfo::visitBlock(VPBlockBase *Block, Old2NewTy &Old2New,
- InterleavedAccessInfo &IAI) {
- if (VPBasicBlock *VPBB = dyn_cast<VPBasicBlock>(Block)) {
- for (VPRecipeBase &VPI : *VPBB) {
- if (isa<VPWidenPHIRecipe>(&VPI))
- continue;
- assert(isa<VPInstruction>(&VPI) && "Can only handle VPInstructions");
- auto *VPInst = cast<VPInstruction>(&VPI);
-
- auto *Inst = dyn_cast_or_null<Instruction>(VPInst->getUnderlyingValue());
- if (!Inst)
- continue;
- auto *IG = IAI.getInterleaveGroup(Inst);
- if (!IG)
- continue;
-
- auto NewIGIter = Old2New.find(IG);
- if (NewIGIter == Old2New.end())
- Old2New[IG] = new InterleaveGroup<VPInstruction>(
- IG->getFactor(), IG->isReverse(), IG->getAlign());
-
- if (Inst == IG->getInsertPos())
- Old2New[IG]->setInsertPos(VPInst);
-
- InterleaveGroupMap[VPInst] = Old2New[IG];
- InterleaveGroupMap[VPInst]->insertMember(
- VPInst, IG->getIndex(Inst),
- Align(IG->isReverse() ? (-1) * int(IG->getFactor())
- : IG->getFactor()));
- }
- } else if (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
- visitRegion(Region, Old2New, IAI);
- else
- llvm_unreachable("Unsupported kind of VPBlock.");
-}
-
-VPInterleavedAccessInfo::VPInterleavedAccessInfo(VPlan &Plan,
- InterleavedAccessInfo &IAI) {
- Old2NewTy Old2New;
- visitRegion(Plan.getVectorLoopRegion(), Old2New, IAI);
-}
-
void VPSlotTracker::assignName(const VPValue *V) {
assert(!VPValue2Name.contains(V) && "VPValue already has a name!");
auto *UV = V->getUnderlyingValue();
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index b52ee3c2428f3f..db68c6a7148a7b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -17,7 +17,6 @@
/// 4. VPInstruction, a concrete Recipe and VPUser modeling a single planned
/// instruction;
/// 5. The VPlan class holding a candidate for vectorization;
-/// 6. The VPlanPrinter class providing a way to print a plan in dot format;
/// These are documented in docs/VectorizationPlan.rst.
//
//===----------------------------------------------------------------------===//
@@ -34,10 +33,7 @@
#include "llvm/ADT/Twine.h"
#include "llvm/ADT/ilist.h"
#include "llvm/ADT/ilist_node.h"
-#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/IVDescriptors.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/FMF.h"
@@ -54,7 +50,7 @@ class BasicBlock;
class DominatorTree;
class InnerLoopVectorizer;
class IRBuilderBase;
-class LoopInfo;
+struct VPTransformState;
class raw_ostream;
class RecurrenceDescriptor;
class SCEV;
@@ -62,11 +58,11 @@ class Type;
class VPBasicBlock;
class VPRegionBlock;
class VPlan;
+class VPLane;
class VPReplicateRecipe;
class VPlanSlp;
class Value;
class LoopVectorizationCostModel;
-class LoopVersioning;
struct VPCostContext;
@@ -74,318 +70,8 @@ namespace Intrinsic {
typedef unsigned ID;
}
-/// Returns a calculation for the total number of elements for a given \p VF.
-/// For fixed width vectors this value is a constant, whereas for scalable
-/// vectors it is an expression determined at runtime.
-Value *getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF);
-
-/// Return a value for Step multiplied by VF.
-Value *createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF,
- int64_t Step);
-
-/// A helper function that returns the reciprocal of the block probability of
-/// predicated blocks. If we return X, we are assuming the predicated block
-/// will execute once for every X iterations of the loop header.
-///
-/// TODO: We should use actual block probability here, if available. Currently,
-/// we always assume predicated blocks have a 50% chance of executing.
-inline unsigned getReciprocalPredBlockProb() { return 2; }
-
-/// A range of powers-of-2 vectorization factors with fixed start and
-/// adjustable end. The range includes start and excludes end, e.g.,:
-/// [1, 16) = {1, 2, 4, 8}
-struct VFRange {
- // A power of 2.
- const ElementCount Start;
-
- // A power of 2. If End <= Start range is empty.
- ElementCount End;
-
- bool isEmpty() const {
- return End.getKnownMinValue() <= Start.getKnownMinValue();
- }
-
- VFRange(const ElementCount &Start, const ElementCount &End)
- : Start(Start), End(End) {
- assert(Start.isScalable() == End.isScalable() &&
- "Both Start and End should have the same scalable flag");
- assert(isPowerOf2_32(Start.getKnownMinValue()) &&
- "Expected Start to be a power of 2");
- assert(isPowerOf2_32(End.getKnownMinValue()) &&
- "Expected End to be a power of 2");
- }
-
- /// Iterator to iterate over vectorization factors in a VFRange.
- class iterator
- : public iterator_facade_base<iterator, std::forward_iterator_tag,
- ElementCount> {
- ElementCount VF;
-
- public:
- iterator(ElementCount VF) : VF(VF) {}
-
- bool operator==(const iterator &Other) const { return VF == Other.VF; }
-
- ElementCount operator*() const { return VF; }
-
- iterator &operator++() {
- VF *= 2;
- return *this;
- }
- };
-
- iterator begin() { return iterator(Start); }
- iterator end() {
- assert(isPowerOf2_32(End.getKnownMinValue()));
- return iterator(End);
- }
-};
-
using VPlanPtr = std::unique_ptr<VPlan>;
-/// In what follows, the term "input IR" refers to code that is fed into the
-/// vectorizer whereas the term "output IR" refers to code that is generated by
-/// the vectorizer.
-
-/// VPLane provides a way to access lanes in both fixed width and scalable
-/// vectors, where for the latter the lane index sometimes needs calculating
-/// as a runtime expression.
-class VPLane {
-public:
- /// Kind describes how to interpret Lane.
- enum class Kind : uint8_t {
- /// For First, Lane is the index into the first N elements of a
- /// fixed-vector <N x <ElTy>> or a scalable vector <vscale x N x <ElTy>>.
- First,
- /// For ScalableLast, Lane is the offset from the start of the last
- /// N-element subvector in a scalable vector <vscale x N x <ElTy>>. For
- /// example, a Lane of 0 corresponds to lane `(vscale - 1) * N`, a Lane of
- /// 1 corresponds to `((vscale - 1) * N) + 1`, etc.
- ScalableLast
- };
-
-private:
- /// in [0..VF)
- unsigned Lane;
-
- /// Indicates how the Lane should be interpreted, as described above.
- Kind LaneKind;
-
-public:
- VPLane(unsigned Lane) : Lane(Lane), LaneKind(VPLane::Kind::First) {}
- VPLane(unsigned Lane, Kind LaneKind) : Lane(Lane), LaneKind(LaneKind) {}
-
- static VPLane getFirstLane() { return VPLane(0, VPLane::Kind::First); }
-
- static VPLane getLaneFromEnd(const ElementCount &VF, unsigned Offset) {
- assert(Offset > 0 && Offset <= VF.getKnownMinValue() &&
- "trying to extract with invalid offset");
- unsigned LaneOffset = VF.getKnownMinValue() - Offset;
- Kind LaneKind;
- if (VF.isScalable())
- // In this case 'LaneOffset' refers to the offset from the start of the
- // last subvector with VF.getKnownMinValue() elements.
- LaneKind = VPLane::Kind::ScalableLast;
- else
- LaneKind = VPLane::Kind::First;
- return VPLane(LaneOffset, LaneKind);
- }
-
- static VPLane getLastLaneForVF(const ElementCount &VF) {
- return getLaneFromEnd(VF, 1);
- }
-
- /// Returns a compile-time known value for the lane index and asserts if the
- /// lane can only be calculated at runtime.
- unsigned getKnownLane() const {
- assert(LaneKind == Kind::First);
- return Lane;
- }
-
- /// Returns an expression describing the lane index that can be used at
- /// runtime.
- Value *getAsRuntimeExpr(IRBuilderBase &Builder, const ElementCount &VF) const;
-
- /// Returns the Kind of lane offset.
- Kind getKind() const { return LaneKind; }
-
- /// Returns true if this is the first lane of the whole vector.
- bool isFirstLane() const { return Lane == 0 && LaneKind == Kind::First; }
-
- /// Maps the lane to a cache index based on \p VF.
- unsigned mapToCacheIndex(const ElementCount &VF) const {
- switch (LaneKind) {
- case VPLane::Kind::ScalableLast:
- assert(VF.isScalable() && Lane < VF.getKnownMinValue());
- return VF.getKnownMinValue() + Lane;
- default:
- assert(Lane < VF.getKnownMinValue());
- return Lane;
- }
- }
-};
-
-/// VPTransformState holds information passed down when "executing" a VPlan,
-/// needed for generating the output IR.
-struct VPTransformState {
- VPTransformState(const TargetTransformInfo *TTI, ElementCount VF, unsigned UF,
- LoopInfo *LI, DominatorTree *DT, IRBuilderBase &Builder,
- InnerLoopVectorizer *ILV, VPlan *Plan,
- Loop *CurrentParentLoop, Type *CanonicalIVTy);
- /// Target Transform Info.
- const TargetTransformInfo *TTI;
-
- /// The chosen Vectorization Factor of the loop being vectorized.
- ElementCount VF;
-
- /// Hold the index to generate specific scalar instructions. Null indicates
- /// that all instances are to be generated, using either scalar or vector
- /// instructions.
- std::optional<VPLane> Lane;
-
- struct DataState {
- // Each value from the original loop, when vectorized, is represented by a
- // vector value in the map.
- DenseMap<VPValue *, Value *> VPV2Vector;
-
- DenseMap<VPValue *, SmallVector<Value *, 4>> VPV2Scalars;
- } Data;
-
- /// Get the generated vector Value for a given VPValue \p Def if \p IsScalar
- /// is false, otherwise return the generated scalar. \See set.
- Value *get(VPValue *Def, bool IsScalar = false);
-
- /// Get the generated Value for a given VPValue and given Part and Lane.
- Value *get(VPValue *Def, const VPLane &Lane);
-
- bool hasVectorValue(VPValue *Def) { return Data.VPV2Vector.contains(Def); }
-
- bool hasScalarValue(VPValue *Def, VPLane Lane) {
- auto I = Data.VPV2Scalars.find(Def);
- if (I == Data.VPV2Scalars.end())
- return false;
- unsigned CacheIdx = Lane.mapToCacheIndex(VF);
- return CacheIdx < I->second.size() && I->second[CacheIdx];
- }
-
- /// Set the generated vector Value for a given VPValue, if \p
- /// IsScalar is false. If \p IsScalar is true, set the scalar in lane 0.
- void set(VPValue *Def, Value *V, bool IsScalar = false) {
- if (IsScalar) {
- set(Def, V, VPLane(0));
- return;
- }
- assert((VF.isScalar() || V->getType()->isVectorTy()) &&
- "scalar values must be stored as (0, 0)");
- Data.VPV2Vector[Def] = V;
- }
-
- /// Reset an existing vector value for \p Def and a given \p Part.
- void reset(VPValue *Def, Value *V) {
- assert(Data.VPV2Vector.contains(Def) && "need to overwrite existing value");
- Data.VPV2Vector[Def] = V;
- }
-
- /// Set the generated scalar \p V for \p Def and the given \p Lane.
- void set(VPValue *Def, Value *V, const VPLane &Lane) {
- auto &Scalars = Data.VPV2Scalars[Def];
- unsigned CacheIdx = Lane.mapToCacheIndex(VF);
- if (Scalars.size() <= CacheIdx)
- Scalars.resize(CacheIdx + 1);
- assert(!Scalars[CacheIdx] && "should overwrite existing value");
- Scalars[CacheIdx] = V;
- }
-
- /// Reset an existing scalar value for \p Def and a given \p Lane.
- void reset(VPValue *Def, Value *V, const VPLane &Lane) {
- auto Iter = Data.VPV2Scalars.find(Def);
- assert(Iter != Data.VPV2Scalars.end() &&
- "need to overwrite existing value");
- unsigned CacheIdx = Lane.mapToCacheIndex(VF);
- assert(CacheIdx < Iter->second.size() &&
- "need to overwrite existing value");
- Iter->second[CacheIdx] = V;
- }
-
- /// Add additional metadata to \p To that was not present on \p Orig.
- ///
- /// Currently this is used to add the noalias annotations based on the
- /// inserted memchecks. Use this for instructions that are *cloned* into the
- /// vector loop.
- void addNewMetadata(Instruction *To, const Instruction *Orig);
-
- /// Add metadata from one instruction to another.
- ///
- /// This includes both the original MDs from \p From and additional ones (\see
- /// addNewMetadata). Use this for *newly created* instructions in the vector
- /// loop.
- void addMetadata(Value *To, Instruction *From);
-
- /// Set the debug location in the builder using the debug location \p DL.
- void setDebugLocFrom(DebugLoc DL);
-
- /// Construct the vector value of a scalarized value \p V one lane at a time.
- void packScalarIntoVectorValue(VPValue *Def, const VPLane &Lane);
-
- /// Hold state information used when constructing the CFG of the output IR,
- /// traversing the VPBasicBlocks and generating corresponding IR BasicBlocks.
- struct CFGState {
- /// The previous VPBasicBlock visited. Initially set to null.
- VPBasicBlock *PrevVPBB = nullptr;
-
- /// The previous IR BasicBlock created or used. Initially set to the new
- /// header BasicBlock.
- BasicBlock *PrevBB = nullptr;
-
- /// The last IR BasicBlock in the output IR. Set to the exit block of the
- /// vector loop.
- BasicBlock *ExitBB = nullptr;
-
- /// A mapping of each VPBasicBlock to the corresponding BasicBlock. In case
- /// of replication, maps the BasicBlock of the last replica created.
- SmallDenseMap<VPBasicBlock *, BasicBlock *> VPBB2IRBB;
-
- /// Updater for the DominatorTree.
- DomTreeUpdater DTU;
-
- CFGState(DominatorTree *DT)
- : DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy) {}
-
- /// Returns the BasicBlock* mapped to the pre-header of the loop region
- /// containing \p R.
- BasicBlock *getPreheaderBBFor(VPRecipeBase *R);
- } CFG;
-
- /// Hold a pointer to LoopInfo to register new basic blocks in the loop.
- LoopInfo *LI;
-
- /// Hold a reference to the IRBuilder used to generate output IR code.
- IRBuilderBase &Builder;
-
- /// Hold a pointer to InnerLoopVectorizer to reuse its IR generation methods.
- InnerLoopVectorizer *ILV;
-
- /// Pointer to the VPlan code is generated for.
- VPlan *Plan;
-
- /// The parent loop object for the current scope, or nullptr.
- Loop *CurrentParentLoop = nullptr;
-
- /// LoopVersioning. It's only set up (non-null) if memchecks were
- /// used.
- ///
- /// This is currently only used to add no-alias metadata based on the
- /// memchecks. The actually versioning is performed manually.
- LoopVersioning *LVer = nullptr;
-
- /// Map SCEVs to their expanded values. Populated when executing
- /// VPExpandSCEVRecipes.
- DenseMap<const SCEV *, Value *> ExpandedSCEVs;
-
- /// VPlan-based type analysis.
- VPTypeAnalysis TypeAnalysis;
-};
-
/// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
/// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock.
class VPBlockBase {
@@ -653,10 +339,7 @@ class VPBlockBase {
VPSlotTracker &SlotTracker) const = 0;
/// Print plain-text dump of this VPlan to \p O.
- void print(raw_ostream &O) const {
- VPSlotTracker SlotTracker(getPlan());
- print(O, "", SlotTracker);
- }
+ void print(raw_ostream &O) const;
/// Print the successors of this block to \p O, prefixing all lines with \p
/// Indent.
@@ -672,34 +355,6 @@ class VPBlockBase {
virtual VPBlockBase *clone() = 0;
};
-/// Struct to hold various analysis needed for cost computations.
-struct VPCostContext {
- const TargetTransformInfo &TTI;
- const TargetLibraryInfo &TLI;
- VPTypeAnalysis Types;
- LLVMContext &LLVMCtx;
- LoopVectorizationCostModel &CM;
- SmallPtrSet<Instruction *, 8> SkipCostComputation;
- TargetTransformInfo::TargetCostKind CostKind;
-
- VPCostContext(const TargetTransformInfo &TTI, const TargetLibraryInfo &TLI,
- Type *CanIVTy, LoopVectorizationCostModel &CM,
- TargetTransformInfo::TargetCostKind CostKind)
- : TTI(TTI), TLI(TLI), Types(CanIVTy), LLVMCtx(CanIVTy->getContext()),
- CM(CM), CostKind(CostKind) {}
-
- /// Return the cost for \p UI with \p VF using the legacy cost model as
- /// fallback until computing the cost of all recipes migrates to VPlan.
- InstructionCost getLegacyCost(Instruction *UI, ElementCount VF) const;
-
- /// Return true if the cost for \p UI shouldn't be computed, e.g. because it
- /// has already been pre-computed.
- bool skipCostComputation(Instruction *UI, bool IsVector) const;
-
- /// Returns the OperandInfo for \p V, if it is a live-in.
- TargetTransformInfo::OperandValueInfo getOperandInfo(VPValue *V) const;
-};
-
/// VPRecipeBase is a base class modeling a sequence of one or more output IR
/// instructions. VPRecipeBase owns the VPValues it defines through VPDef
/// and is responsible for deleting its defined values. Single-value
@@ -3660,12 +3315,12 @@ class VPBasicBlock : public VPBlockBase {
/// Connect the VPBBs predecessors' in the VPlan CFG to the IR basic block
/// generated for this VPBB.
- void connectToPredecessors(VPTransformState::CFGState &CFG);
+ void connectToPredecessors(VPTransformState &State);
private:
/// Create an IR BasicBlock to hold the output instructions generated by this
/// VPBasicBlock, and return it. Update the CFGState accordingly.
- BasicBlock *createEmptyBasicBlock(VPTransformState::CFGState &CFG);
+ BasicBlock *createEmptyBasicBlock(VPTransformState &State);
};
/// A special type of VPBasicBlock that wraps an existing IR basic block.
@@ -4132,55 +3787,6 @@ class VPlan {
};
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-/// VPlanPrinter prints a given VPlan to a given output stream. The printing is
-/// indented and follows the dot format.
-class VPlanPrinter {
- raw_ostream &OS;
- const VPlan &Plan;
- unsigned Depth = 0;
- unsigned TabWidth = 2;
- std::string Indent;
- unsigned BID = 0;
- SmallDenseMap<const VPBlockBase *, unsigned> BlockID;
-
- VPSlotTracker SlotTracker;
-
- /// Handle indentation.
- void bumpIndent(int b) { Indent = std::string((Depth += b) * TabWidth, ' '); }
-
- /// Print a given \p Block of the Plan.
- void dumpBlock(const VPBlockBase *Block);
-
- /// Print the information related to the CFG edges going out of a given
- /// \p Block, followed by printing the successor blocks themselves.
- void dumpEdges(const VPBlockBase *Block);
-
- /// Print a given \p BasicBlock, including its VPRecipes, followed by printing
- /// its successor blocks.
- void dumpBasicBlock(const VPBasicBlock *BasicBlock);
-
- /// Print a given \p Region of the Plan.
- void dumpRegion(const VPRegionBlock *Region);
-
- unsigned getOrCreateBID(const VPBlockBase *Block) {
- return BlockID.count(Block) ? BlockID[Block] : BlockID[Block] = BID++;
- }
-
- Twine getOrCreateName(const VPBlockBase *Block);
-
- Twine getUID(const VPBlockBase *Block);
-
- /// Print the information related to a CFG edge between two VPBlockBases.
- void drawEdge(const VPBlockBase *From, const VPBlockBase *To, bool Hidden,
- const Twine &Label);
-
-public:
- VPlanPrinter(raw_ostream &O, const VPlan &P)
- : OS(O), Plan(P), SlotTracker(&P) {}
-
- LLVM_DUMP_METHOD void dump();
-};
-
struct VPlanIngredient {
const Value *V;
@@ -4200,139 +3806,6 @@ inline raw_ostream &operator<<(raw_ostream &OS, const VPlan &Plan) {
}
#endif
-class VPInterleavedAccessInfo {
- DenseMap<VPInstruction *, InterleaveGroup<VPInstruction> *>
- InterleaveGroupMap;
-
- /// Type for mapping of instruction based interleave groups to VPInstruction
- /// interleave groups
- using Old2NewTy = DenseMap<InterleaveGroup<Instruction> *,
- InterleaveGroup<VPInstruction> *>;
-
- /// Recursively \p Region and populate VPlan based interleave groups based on
- /// \p IAI.
- void visitRegion(VPRegionBlock *Region, Old2NewTy &Old2New,
- InterleavedAccessInfo &IAI);
- /// Recursively traverse \p Block and populate VPlan based interleave groups
- /// based on \p IAI.
- void visitBlock(VPBlockBase *Block, Old2NewTy &Old2New,
- InterleavedAccessInfo &IAI);
-
-public:
- VPInterleavedAccessInfo(VPlan &Plan, InterleavedAccessInfo &IAI);
-
- ~VPInterleavedAccessInfo() {
- SmallPtrSet<InterleaveGroup<VPInstruction> *, 4> DelSet;
- // Avoid releasing a pointer twice.
- for (auto &I : InterleaveGroupMap)
- DelSet.insert(I.second);
- for (auto *Ptr : DelSet)
- delete Ptr;
- }
-
- /// Get the interleave group that \p Instr belongs to.
- ///
- /// \returns nullptr if doesn't have such group.
- InterleaveGroup<VPInstruction> *
- getInterleaveGroup(VPInstruction *Instr) const {
- return InterleaveGroupMap.lookup(Instr);
- }
-};
-
-/// Class that maps (parts of) an existing VPlan to trees of combined
-/// VPInstructions.
-class VPlanSlp {
- enum class OpMode { Failed, Load, Opcode };
-
- /// A DenseMapInfo implementation for using SmallVector<VPValue *, 4> as
- /// DenseMap keys.
- struct BundleDenseMapInfo {
- static SmallVector<VPValue *, 4> getEmptyKey() {
- return {reinterpret_cast<VPValue *>(-1)};
- }
-
- static SmallVector<VPValue *, 4> getTombstoneKey() {
- return {reinterpret_cast<VPValue *>(-2)};
- }
-
- static unsigned getHashValue(const SmallVector<VPValue *, 4> &V) {
- return static_cast<unsigned>(hash_combine_range(V.begin(), V.end()));
- }
-
- static bool isEqual(const SmallVector<VPValue *, 4> &LHS,
- const SmallVector<VPValue *, 4> &RHS) {
- return LHS == RHS;
- }
- };
-
- /// Mapping of values in the original VPlan to a combined VPInstruction.
- DenseMap<SmallVector<VPValue *, 4>, VPInstruction *, BundleDenseMapInfo>
- BundleToCombined;
-
- VPInterleavedAccessInfo &IAI;
-
- /// Basic block to operate on. For now, only instructions in a single BB are
- /// considered.
- const VPBasicBlock &BB;
-
- /// Indicates whether we managed to combine all visited instructions or not.
- bool CompletelySLP = true;
-
- /// Width of the widest combined bundle in bits.
- unsigned WidestBundleBits = 0;
-
- using MultiNodeOpTy =
- typename std::pair<VPInstruction *, SmallVector<VPValue *, 4>>;
-
- // Input operand bundles for the current multi node. Each multi node operand
- // bundle contains values not matching the multi node's opcode. They will
- // be reordered in reorderMultiNodeOps, once we completed building a
- // multi node.
- SmallVector<MultiNodeOpTy, 4> MultiNodeOps;
-
- /// Indicates whether we are building a multi node currently.
- bool MultiNodeActive = false;
-
- /// Check if we can vectorize Operands together.
- bool areVectorizable(ArrayRef<VPValue *> Operands) const;
-
- /// Add combined instruction \p New for the bundle \p Operands.
- void addCombined(ArrayRef<VPValue *> Operands, VPInstruction *New);
-
- /// Indicate we hit a bundle we failed to combine. Returns nullptr for now.
- VPInstruction *markFailed();
-
- /// Reorder operands in the multi node to maximize sequential memory access
- /// and commutative operations.
- SmallVector<MultiNodeOpTy, 4> reorderMultiNodeOps();
-
- /// Choose the best candidate to use for the lane after \p Last. The set of
- /// candidates to choose from are values with an opcode matching \p Last's
- /// or loads consecutive to \p Last.
- std::pair<OpMode, VPValue *> getBest(OpMode Mode, VPValue *Last,
- SmallPtrSetImpl<VPValue *> &Candidates,
- VPInterleavedAccessInfo &IAI);
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- /// Print bundle \p Values to dbgs().
- void dumpBundle(ArrayRef<VPValue *> Values);
-#endif
-
-public:
- VPlanSlp(VPInterleavedAccessInfo &IAI, VPBasicBlock &BB) : IAI(IAI), BB(BB) {}
-
- ~VPlanSlp() = default;
-
- /// Tries to build an SLP tree rooted at \p Operands and returns a
- /// VPInstruction combining \p Operands, if they can be combined.
- VPInstruction *buildGraph(ArrayRef<VPValue *> Operands);
-
- /// Return the width of the widest combined bundle in bits.
- unsigned getWidestBundleBits() const { return WidestBundleBits; }
-
- /// Return true if all visited instruction can be combined.
- bool isCompletelySLP() const { return CompletelySLP; }
-};
} // end namespace llvm
#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
diff --git a/llvm/lib/Transforms/Vectorize/VPlanHelpers.h b/llvm/lib/Transforms/Vectorize/VPlanHelpers.h
new file mode 100644
index 00000000000000..e5f66ca8060cba
--- /dev/null
+++ b/llvm/lib/Transforms/Vectorize/VPlanHelpers.h
@@ -0,0 +1,465 @@
+//===- VPlanHelpers.h - VPlan-related auxiliary helpers -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This file contains the declarations of different VPlan-related auxiliary
+/// helpers.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLANHELPERS_H
+#define LLVM_TRANSFORMS_VECTORIZE_VPLANHELPERS_H
+
+#include "VPlanAnalysis.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/Support/InstructionCost.h"
+
+namespace llvm {
+
+class BasicBlock;
+class DominatorTree;
+class InnerLoopVectorizer;
+class IRBuilderBase;
+class LoopInfo;
+class SCEV;
+class Type;
+class VPBasicBlock;
+class VPRegionBlock;
+class VPlan;
+class Value;
+class LoopVersioning;
+
+/// Returns a calculation for the total number of elements for a given \p VF.
+/// For fixed width vectors this value is a constant, whereas for scalable
+/// vectors it is an expression determined at runtime.
+Value *getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF);
+
+/// Return a value for Step multiplied by VF.
+Value *createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF,
+ int64_t Step);
+
+/// A helper function that returns the reciprocal of the block probability of
+/// predicated blocks. If we return X, we are assuming the predicated block
+/// will execute once for every X iterations of the loop header.
+///
+/// TODO: We should use actual block probability here, if available. Currently,
+/// we always assume predicated blocks have a 50% chance of executing.
+inline unsigned getReciprocalPredBlockProb() { return 2; }
+
+/// A range of powers-of-2 vectorization factors with fixed start and
+/// adjustable end. The range includes start and excludes end, e.g.,:
+/// [1, 16) = {1, 2, 4, 8}
+struct VFRange {
+ // A power of 2.
+ const ElementCount Start;
+
+ // A power of 2. If End <= Start range is empty.
+ ElementCount End;
+
+ bool isEmpty() const {
+ return End.getKnownMinValue() <= Start.getKnownMinValue();
+ }
+
+ VFRange(const ElementCount &Start, const ElementCount &End)
+ : Start(Start), End(End) {
+ assert(Start.isScalable() == End.isScalable() &&
+ "Both Start and End should have the same scalable flag");
+ assert(isPowerOf2_32(Start.getKnownMinValue()) &&
+ "Expected Start to be a power of 2");
+ assert(isPowerOf2_32(End.getKnownMinValue()) &&
+ "Expected End to be a power of 2");
+ }
+
+ /// Iterator to iterate over vectorization factors in a VFRange.
+ class iterator
+ : public iterator_facade_base<iterator, std::forward_iterator_tag,
+ ElementCount> {
+ ElementCount VF;
+
+ public:
+ iterator(ElementCount VF) : VF(VF) {}
+
+ bool operator==(const iterator &Other) const { return VF == Other.VF; }
+
+ ElementCount operator*() const { return VF; }
+
+ iterator &operator++() {
+ VF *= 2;
+ return *this;
+ }
+ };
+
+ iterator begin() { return iterator(Start); }
+ iterator end() {
+ assert(isPowerOf2_32(End.getKnownMinValue()));
+ return iterator(End);
+ }
+};
+
+/// In what follows, the term "input IR" refers to code that is fed into the
+/// vectorizer whereas the term "output IR" refers to code that is generated by
+/// the vectorizer.
+
+/// VPLane provides a way to access lanes in both fixed width and scalable
+/// vectors, where for the latter the lane index sometimes needs calculating
+/// as a runtime expression.
+class VPLane {
+public:
+ /// Kind describes how to interpret Lane.
+ enum class Kind : uint8_t {
+ /// For First, Lane is the index into the first N elements of a
+ /// fixed-vector <N x <ElTy>> or a scalable vector <vscale x N x <ElTy>>.
+ First,
+ /// For ScalableLast, Lane is the offset from the start of the last
+ /// N-element subvector in a scalable vector <vscale x N x <ElTy>>. For
+ /// example, a Lane of 0 corresponds to lane `(vscale - 1) * N`, a Lane of
+ /// 1 corresponds to `((vscale - 1) * N) + 1`, etc.
+ ScalableLast
+ };
+
+private:
+ /// in [0..VF)
+ unsigned Lane;
+
+ /// Indicates how the Lane should be interpreted, as described above.
+ Kind LaneKind;
+
+public:
+ VPLane(unsigned Lane) : Lane(Lane), LaneKind(VPLane::Kind::First) {}
+ VPLane(unsigned Lane, Kind LaneKind) : Lane(Lane), LaneKind(LaneKind) {}
+
+ static VPLane getFirstLane() { return VPLane(0, VPLane::Kind::First); }
+
+ static VPLane getLaneFromEnd(const ElementCount &VF, unsigned Offset) {
+ assert(Offset > 0 && Offset <= VF.getKnownMinValue() &&
+ "trying to extract with invalid offset");
+ unsigned LaneOffset = VF.getKnownMinValue() - Offset;
+ Kind LaneKind;
+ if (VF.isScalable())
+ // In this case 'LaneOffset' refers to the offset from the start of the
+ // last subvector with VF.getKnownMinValue() elements.
+ LaneKind = VPLane::Kind::ScalableLast;
+ else
+ LaneKind = VPLane::Kind::First;
+ return VPLane(LaneOffset, LaneKind);
+ }
+
+ static VPLane getLastLaneForVF(const ElementCount &VF) {
+ return getLaneFromEnd(VF, 1);
+ }
+
+ /// Returns a compile-time known value for the lane index and asserts if the
+ /// lane can only be calculated at runtime.
+ unsigned getKnownLane() const {
+ assert(LaneKind == Kind::First);
+ return Lane;
+ }
+
+ /// Returns an expression describing the lane index that can be used at
+ /// runtime.
+ Value *getAsRuntimeExpr(IRBuilderBase &Builder, const ElementCount &VF) const;
+
+ /// Returns the Kind of lane offset.
+ Kind getKind() const { return LaneKind; }
+
+ /// Returns true if this is the first lane of the whole vector.
+ bool isFirstLane() const { return Lane == 0 && LaneKind == Kind::First; }
+
+ /// Maps the lane to a cache index based on \p VF.
+ unsigned mapToCacheIndex(const ElementCount &VF) const {
+ switch (LaneKind) {
+ case VPLane::Kind::ScalableLast:
+ assert(VF.isScalable() && Lane < VF.getKnownMinValue());
+ return VF.getKnownMinValue() + Lane;
+ default:
+ assert(Lane < VF.getKnownMinValue());
+ return Lane;
+ }
+ }
+};
+
+/// VPTransformState holds information passed down when "executing" a VPlan,
+/// needed for generating the output IR.
+struct VPTransformState {
+ VPTransformState(const TargetTransformInfo *TTI, ElementCount VF, unsigned UF,
+ LoopInfo *LI, DominatorTree *DT, IRBuilderBase &Builder,
+ InnerLoopVectorizer *ILV, VPlan *Plan,
+ Loop *CurrentParentLoop, Type *CanonicalIVTy);
+ /// Target Transform Info.
+ const TargetTransformInfo *TTI;
+
+ /// The chosen Vectorization Factor of the loop being vectorized.
+ ElementCount VF;
+
+ /// Hold the index to generate specific scalar instructions. Null indicates
+ /// that all instances are to be generated, using either scalar or vector
+ /// instructions.
+ std::optional<VPLane> Lane;
+
+ struct DataState {
+ // Each value from the original loop, when vectorized, is represented by a
+ // vector value in the map.
+ DenseMap<VPValue *, Value *> VPV2Vector;
+
+ DenseMap<VPValue *, SmallVector<Value *, 4>> VPV2Scalars;
+ } Data;
+
+ /// Get the generated vector Value for a given VPValue \p Def if \p IsScalar
+ /// is false, otherwise return the generated scalar. \See set.
+ Value *get(VPValue *Def, bool IsScalar = false);
+
+ /// Get the generated Value for a given VPValue and given Part and Lane.
+ Value *get(VPValue *Def, const VPLane &Lane);
+
+ bool hasVectorValue(VPValue *Def) { return Data.VPV2Vector.contains(Def); }
+
+ bool hasScalarValue(VPValue *Def, VPLane Lane) {
+ auto I = Data.VPV2Scalars.find(Def);
+ if (I == Data.VPV2Scalars.end())
+ return false;
+ unsigned CacheIdx = Lane.mapToCacheIndex(VF);
+ return CacheIdx < I->second.size() && I->second[CacheIdx];
+ }
+
+ /// Set the generated vector Value for a given VPValue, if \p
+ /// IsScalar is false. If \p IsScalar is true, set the scalar in lane 0.
+ void set(VPValue *Def, Value *V, bool IsScalar = false) {
+ if (IsScalar) {
+ set(Def, V, VPLane(0));
+ return;
+ }
+ assert((VF.isScalar() || V->getType()->isVectorTy()) &&
+ "scalar values must be stored as (0, 0)");
+ Data.VPV2Vector[Def] = V;
+ }
+
+ /// Reset an existing vector value for \p Def and a given \p Part.
+ void reset(VPValue *Def, Value *V) {
+ assert(Data.VPV2Vector.contains(Def) && "need to overwrite existing value");
+ Data.VPV2Vector[Def] = V;
+ }
+
+ /// Set the generated scalar \p V for \p Def and the given \p Lane.
+ void set(VPValue *Def, Value *V, const VPLane &Lane) {
+ auto &Scalars = Data.VPV2Scalars[Def];
+ unsigned CacheIdx = Lane.mapToCacheIndex(VF);
+ if (Scalars.size() <= CacheIdx)
+ Scalars.resize(CacheIdx + 1);
+ assert(!Scalars[CacheIdx] && "should overwrite existing value");
+ Scalars[CacheIdx] = V;
+ }
+
+ /// Reset an existing scalar value for \p Def and a given \p Lane.
+ void reset(VPValue *Def, Value *V, const VPLane &Lane) {
+ auto Iter = Data.VPV2Scalars.find(Def);
+ assert(Iter != Data.VPV2Scalars.end() &&
+ "need to overwrite existing value");
+ unsigned CacheIdx = Lane.mapToCacheIndex(VF);
+ assert(CacheIdx < Iter->second.size() &&
+ "need to overwrite existing value");
+ Iter->second[CacheIdx] = V;
+ }
+
+ /// Add additional metadata to \p To that was not present on \p Orig.
+ ///
+ /// Currently this is used to add the noalias annotations based on the
+ /// inserted memchecks. Use this for instructions that are *cloned* into the
+ /// vector loop.
+ void addNewMetadata(Instruction *To, const Instruction *Orig);
+
+ /// Add metadata from one instruction to another.
+ ///
+ /// This includes both the original MDs from \p From and additional ones (\see
+ /// addNewMetadata). Use this for *newly created* instructions in the vector
+ /// loop.
+ void addMetadata(Value *To, Instruction *From);
+
+ /// Set the debug location in the builder using the debug location \p DL.
+ void setDebugLocFrom(DebugLoc DL);
+
+ /// Construct the vector value of a scalarized value \p V one lane at a time.
+ void packScalarIntoVectorValue(VPValue *Def, const VPLane &Lane);
+
+ /// Hold state information used when constructing the CFG of the output IR,
+ /// traversing the VPBasicBlocks and generating corresponding IR BasicBlocks.
+ struct CFGState {
+ /// The previous VPBasicBlock visited. Initially set to null.
+ VPBasicBlock *PrevVPBB = nullptr;
+
+ /// The previous IR BasicBlock created or used. Initially set to the new
+ /// header BasicBlock.
+ BasicBlock *PrevBB = nullptr;
+
+ /// The last IR BasicBlock in the output IR. Set to the exit block of the
+ /// vector loop.
+ BasicBlock *ExitBB = nullptr;
+
+ /// A mapping of each VPBasicBlock to the corresponding BasicBlock. In case
+ /// of replication, maps the BasicBlock of the last replica created.
+ SmallDenseMap<VPBasicBlock *, BasicBlock *> VPBB2IRBB;
+
+ /// Updater for the DominatorTree.
+ DomTreeUpdater DTU;
+
+ CFGState(DominatorTree *DT)
+ : DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy) {}
+
+ /// Returns the BasicBlock* mapped to the pre-header of the loop region
+ /// containing \p R.
+ BasicBlock *getPreheaderBBFor(VPRecipeBase *R);
+ } CFG;
+
+ /// Hold a pointer to LoopInfo to register new basic blocks in the loop.
+ LoopInfo *LI;
+
+ /// Hold a reference to the IRBuilder used to generate output IR code.
+ IRBuilderBase &Builder;
+
+ /// Hold a pointer to InnerLoopVectorizer to reuse its IR generation methods.
+ InnerLoopVectorizer *ILV;
+
+ /// Pointer to the VPlan code is generated for.
+ VPlan *Plan;
+
+ /// The parent loop object for the current scope, or nullptr.
+ Loop *CurrentParentLoop = nullptr;
+
+ /// LoopVersioning. It's only set up (non-null) if memchecks were
+ /// used.
+ ///
+ /// This is currently only used to add no-alias metadata based on the
+ /// memchecks. The actually versioning is performed manually.
+ LoopVersioning *LVer = nullptr;
+
+ /// Map SCEVs to their expanded values. Populated when executing
+ /// VPExpandSCEVRecipes.
+ DenseMap<const SCEV *, Value *> ExpandedSCEVs;
+
+ /// VPlan-based type analysis.
+ VPTypeAnalysis TypeAnalysis;
+};
+
+/// Struct to hold various analysis needed for cost computations.
+struct VPCostContext {
+ const TargetTransformInfo &TTI;
+ const TargetLibraryInfo &TLI;
+ VPTypeAnalysis Types;
+ LLVMContext &LLVMCtx;
+ LoopVectorizationCostModel &CM;
+ SmallPtrSet<Instruction *, 8> SkipCostComputation;
+ TargetTransformInfo::TargetCostKind CostKind;
+
+ VPCostContext(const TargetTransformInfo &TTI, const TargetLibraryInfo &TLI,
+ Type *CanIVTy, LoopVectorizationCostModel &CM,
+ TargetTransformInfo::TargetCostKind CostKind)
+ : TTI(TTI), TLI(TLI), Types(CanIVTy), LLVMCtx(CanIVTy->getContext()),
+ CM(CM), CostKind(CostKind) {}
+
+ /// Return the cost for \p UI with \p VF using the legacy cost model as
+ /// fallback until computing the cost of all recipes migrates to VPlan.
+ InstructionCost getLegacyCost(Instruction *UI, ElementCount VF) const;
+
+ /// Return true if the cost for \p UI shouldn't be computed, e.g. because it
+ /// has already been pre-computed.
+ bool skipCostComputation(Instruction *UI, bool IsVector) const;
+
+ /// Returns the OperandInfo for \p V, if it is a live-in.
+ TargetTransformInfo::OperandValueInfo getOperandInfo(VPValue *V) const;
+};
+
+/// This class can be used to assign names to VPValues. For VPValues without
+/// underlying value, assign consecutive numbers and use those as names (wrapped
+/// in vp<>). Otherwise, use the name from the underlying value (wrapped in
+/// ir<>), appending a .V version number if there are multiple uses of the same
+/// name. Allows querying names for VPValues for printing, similar to the
+/// ModuleSlotTracker for IR values.
+class VPSlotTracker {
+ /// Keep track of versioned names assigned to VPValues with underlying IR
+ /// values.
+ DenseMap<const VPValue *, std::string> VPValue2Name;
+ /// Keep track of the next number to use to version the base name.
+ StringMap<unsigned> BaseName2Version;
+
+ /// Number to assign to the next VPValue without underlying value.
+ unsigned NextSlot = 0;
+
+ void assignName(const VPValue *V);
+ void assignNames(const VPlan &Plan);
+ void assignNames(const VPBasicBlock *VPBB);
+
+public:
+ VPSlotTracker(const VPlan *Plan = nullptr) {
+ if (Plan)
+ assignNames(*Plan);
+ }
+
+ /// Returns the name assigned to \p V, if there is one, otherwise try to
+ /// construct one from the underlying value, if there's one; else return
+ /// <badref>.
+ std::string getOrCreateName(const VPValue *V) const;
+};
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+/// VPlanPrinter prints a given VPlan to a given output stream. The printing is
+/// indented and follows the dot format.
+class VPlanPrinter {
+ raw_ostream &OS;
+ const VPlan &Plan;
+ unsigned Depth = 0;
+ unsigned TabWidth = 2;
+ std::string Indent;
+ unsigned BID = 0;
+ SmallDenseMap<const VPBlockBase *, unsigned> BlockID;
+
+ VPSlotTracker SlotTracker;
+
+ /// Handle indentation.
+ void bumpIndent(int b) { Indent = std::string((Depth += b) * TabWidth, ' '); }
+
+ /// Print a given \p Block of the Plan.
+ void dumpBlock(const VPBlockBase *Block);
+
+ /// Print the information related to the CFG edges going out of a given
+ /// \p Block, followed by printing the successor blocks themselves.
+ void dumpEdges(const VPBlockBase *Block);
+
+ /// Print a given \p BasicBlock, including its VPRecipes, followed by printing
+ /// its successor blocks.
+ void dumpBasicBlock(const VPBasicBlock *BasicBlock);
+
+ /// Print a given \p Region of the Plan.
+ void dumpRegion(const VPRegionBlock *Region);
+
+ unsigned getOrCreateBID(const VPBlockBase *Block) {
+ return BlockID.count(Block) ? BlockID[Block] : BlockID[Block] = BID++;
+ }
+
+ Twine getOrCreateName(const VPBlockBase *Block);
+
+ Twine getUID(const VPBlockBase *Block);
+
+ /// Print the information related to a CFG edge between two VPBlockBases.
+ void drawEdge(const VPBlockBase *From, const VPBlockBase *To, bool Hidden,
+ const Twine &Label);
+
+public:
+ VPlanPrinter(raw_ostream &O, const VPlan &P)
+ : OS(O), Plan(P), SlotTracker(&P) {}
+
+ LLVM_DUMP_METHOD void dump();
+};
+#endif
+
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 7b5d0d70933fd0..85fc6d3fdd6b36 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -13,12 +13,14 @@
#include "VPlan.h"
#include "VPlanAnalysis.h"
+#include "VPlanHelpers.h"
#include "VPlanPatternMatch.h"
#include "VPlanUtils.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/IVDescriptors.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
diff --git a/llvm/lib/Transforms/Vectorize/VPlanSLP.cpp b/llvm/lib/Transforms/Vectorize/VPlanSLP.cpp
index 98ccf216946357..455a3c521ba746 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanSLP.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanSLP.cpp
@@ -14,10 +14,13 @@
///
//===----------------------------------------------------------------------===//
+#include "VPlanSLP.h"
#include "VPlan.h"
+#include "VPlanCFG.h"
#include "VPlanValue.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
@@ -39,6 +42,58 @@ using namespace llvm;
// Number of levels to look ahead when re-ordering multi node operands.
static unsigned LookaheadMaxDepth = 5;
+void VPInterleavedAccessInfo::visitRegion(VPRegionBlock *Region,
+ Old2NewTy &Old2New,
+ InterleavedAccessInfo &IAI) {
+ ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT(
+ Region->getEntry());
+ for (VPBlockBase *Base : RPOT) {
+ visitBlock(Base, Old2New, IAI);
+ }
+}
+
+void VPInterleavedAccessInfo::visitBlock(VPBlockBase *Block, Old2NewTy &Old2New,
+ InterleavedAccessInfo &IAI) {
+ if (VPBasicBlock *VPBB = dyn_cast<VPBasicBlock>(Block)) {
+ for (VPRecipeBase &VPI : *VPBB) {
+ if (isa<VPWidenPHIRecipe>(&VPI))
+ continue;
+ assert(isa<VPInstruction>(&VPI) && "Can only handle VPInstructions");
+ auto *VPInst = cast<VPInstruction>(&VPI);
+
+ auto *Inst = dyn_cast_or_null<Instruction>(VPInst->getUnderlyingValue());
+ if (!Inst)
+ continue;
+ auto *IG = IAI.getInterleaveGroup(Inst);
+ if (!IG)
+ continue;
+
+ auto NewIGIter = Old2New.find(IG);
+ if (NewIGIter == Old2New.end())
+ Old2New[IG] = new InterleaveGroup<VPInstruction>(
+ IG->getFactor(), IG->isReverse(), IG->getAlign());
+
+ if (Inst == IG->getInsertPos())
+ Old2New[IG]->setInsertPos(VPInst);
+
+ InterleaveGroupMap[VPInst] = Old2New[IG];
+ InterleaveGroupMap[VPInst]->insertMember(
+ VPInst, IG->getIndex(Inst),
+ Align(IG->isReverse() ? (-1) * int(IG->getFactor())
+ : IG->getFactor()));
+ }
+ } else if (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
+ visitRegion(Region, Old2New, IAI);
+ else
+ llvm_unreachable("Unsupported kind of VPBlock.");
+}
+
+VPInterleavedAccessInfo::VPInterleavedAccessInfo(VPlan &Plan,
+ InterleavedAccessInfo &IAI) {
+ Old2NewTy Old2New;
+ visitRegion(Plan.getVectorLoopRegion(), Old2New, IAI);
+}
+
VPInstruction *VPlanSlp::markFailed() {
// FIXME: Currently this is used to signal we hit instructions we cannot
// trivially SLP'ize.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanSLP.h b/llvm/lib/Transforms/Vectorize/VPlanSLP.h
new file mode 100644
index 00000000000000..a40ebd28deea23
--- /dev/null
+++ b/llvm/lib/Transforms/Vectorize/VPlanSLP.h
@@ -0,0 +1,166 @@
+//===- VPlan.h - VPlan-based SLP ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This file contains the declarations for VPlan-based SLP.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLANSLP_H
+#define LLVM_TRANSFORMS_VECTORIZE_VPLANSLP_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/VectorUtils.h"
+
+namespace llvm {
+
+class VPBasicBlock;
+class VPBlockBase;
+class VPRegionBlock;
+class VPlan;
+class VPValue;
+class VPInstruction;
+
+class VPInterleavedAccessInfo {
+ DenseMap<VPInstruction *, InterleaveGroup<VPInstruction> *>
+ InterleaveGroupMap;
+
+ /// Type for mapping of instruction based interleave groups to VPInstruction
+ /// interleave groups
+ using Old2NewTy = DenseMap<InterleaveGroup<Instruction> *,
+ InterleaveGroup<VPInstruction> *>;
+
+ /// Recursively \p Region and populate VPlan based interleave groups based on
+ /// \p IAI.
+ void visitRegion(VPRegionBlock *Region, Old2NewTy &Old2New,
+ InterleavedAccessInfo &IAI);
+ /// Recursively traverse \p Block and populate VPlan based interleave groups
+ /// based on \p IAI.
+ void visitBlock(VPBlockBase *Block, Old2NewTy &Old2New,
+ InterleavedAccessInfo &IAI);
+
+public:
+ VPInterleavedAccessInfo(VPlan &Plan, InterleavedAccessInfo &IAI);
+
+ ~VPInterleavedAccessInfo() {
+ SmallPtrSet<InterleaveGroup<VPInstruction> *, 4> DelSet;
+ // Avoid releasing a pointer twice.
+ for (auto &I : InterleaveGroupMap)
+ DelSet.insert(I.second);
+ for (auto *Ptr : DelSet)
+ delete Ptr;
+ }
+
+ /// Get the interleave group that \p Instr belongs to.
+ ///
+ /// \returns nullptr if doesn't have such group.
+ InterleaveGroup<VPInstruction> *
+ getInterleaveGroup(VPInstruction *Instr) const {
+ return InterleaveGroupMap.lookup(Instr);
+ }
+};
+
+/// Class that maps (parts of) an existing VPlan to trees of combined
+/// VPInstructions.
+class VPlanSlp {
+ enum class OpMode { Failed, Load, Opcode };
+
+ /// A DenseMapInfo implementation for using SmallVector<VPValue *, 4> as
+ /// DenseMap keys.
+ struct BundleDenseMapInfo {
+ static SmallVector<VPValue *, 4> getEmptyKey() {
+ return {reinterpret_cast<VPValue *>(-1)};
+ }
+
+ static SmallVector<VPValue *, 4> getTombstoneKey() {
+ return {reinterpret_cast<VPValue *>(-2)};
+ }
+
+ static unsigned getHashValue(const SmallVector<VPValue *, 4> &V) {
+ return static_cast<unsigned>(hash_combine_range(V.begin(), V.end()));
+ }
+
+ static bool isEqual(const SmallVector<VPValue *, 4> &LHS,
+ const SmallVector<VPValue *, 4> &RHS) {
+ return LHS == RHS;
+ }
+ };
+
+ /// Mapping of values in the original VPlan to a combined VPInstruction.
+ DenseMap<SmallVector<VPValue *, 4>, VPInstruction *, BundleDenseMapInfo>
+ BundleToCombined;
+
+ VPInterleavedAccessInfo &IAI;
+
+ /// Basic block to operate on. For now, only instructions in a single BB are
+ /// considered.
+ const VPBasicBlock &BB;
+
+ /// Indicates whether we managed to combine all visited instructions or not.
+ bool CompletelySLP = true;
+
+ /// Width of the widest combined bundle in bits.
+ unsigned WidestBundleBits = 0;
+
+ using MultiNodeOpTy =
+ typename std::pair<VPInstruction *, SmallVector<VPValue *, 4>>;
+
+ // Input operand bundles for the current multi node. Each multi node operand
+ // bundle contains values not matching the multi node's opcode. They will
+ // be reordered in reorderMultiNodeOps, once we completed building a
+ // multi node.
+ SmallVector<MultiNodeOpTy, 4> MultiNodeOps;
+
+ /// Indicates whether we are building a multi node currently.
+ bool MultiNodeActive = false;
+
+ /// Check if we can vectorize Operands together.
+ bool areVectorizable(ArrayRef<VPValue *> Operands) const;
+
+ /// Add combined instruction \p New for the bundle \p Operands.
+ void addCombined(ArrayRef<VPValue *> Operands, VPInstruction *New);
+
+ /// Indicate we hit a bundle we failed to combine. Returns nullptr for now.
+ VPInstruction *markFailed();
+
+ /// Reorder operands in the multi node to maximize sequential memory access
+ /// and commutative operations.
+ SmallVector<MultiNodeOpTy, 4> reorderMultiNodeOps();
+
+ /// Choose the best candidate to use for the lane after \p Last. The set of
+ /// candidates to choose from are values with an opcode matching \p Last's
+ /// or loads consecutive to \p Last.
+ std::pair<OpMode, VPValue *> getBest(OpMode Mode, VPValue *Last,
+ SmallPtrSetImpl<VPValue *> &Candidates,
+ VPInterleavedAccessInfo &IAI);
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ /// Print bundle \p Values to dbgs().
+ void dumpBundle(ArrayRef<VPValue *> Values);
+#endif
+
+public:
+ VPlanSlp(VPInterleavedAccessInfo &IAI, VPBasicBlock &BB) : IAI(IAI), BB(BB) {}
+
+ ~VPlanSlp() = default;
+
+ /// Tries to build an SLP tree rooted at \p Operands and returns a
+ /// VPInstruction combining \p Operands, if they can be combined.
+ VPInstruction *buildGraph(ArrayRef<VPValue *> Operands);
+
+ /// Return the width of the widest combined bundle in bits.
+ unsigned getWidestBundleBits() const { return WidestBundleBits; }
+
+ /// Return true if all visited instruction can be combined.
+ bool isCompletelySLP() const { return CompletelySLP; }
+};
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 9febd612c644e1..3d5b6d619b44bf 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -24,6 +24,7 @@
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/TypeSwitch.h"
#include "llvm/Analysis/IVDescriptors.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/PatternMatch.h"
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
index 23e39ce89a3a42..aabc4ab571e7a9 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -435,41 +435,6 @@ class VPDef {
#endif
};
-class VPlan;
-class VPBasicBlock;
-
-/// This class can be used to assign names to VPValues. For VPValues without
-/// underlying value, assign consecutive numbers and use those as names (wrapped
-/// in vp<>). Otherwise, use the name from the underlying value (wrapped in
-/// ir<>), appending a .V version number if there are multiple uses of the same
-/// name. Allows querying names for VPValues for printing, similar to the
-/// ModuleSlotTracker for IR values.
-class VPSlotTracker {
- /// Keep track of versioned names assigned to VPValues with underlying IR
- /// values.
- DenseMap<const VPValue *, std::string> VPValue2Name;
- /// Keep track of the next number to use to version the base name.
- StringMap<unsigned> BaseName2Version;
-
- /// Number to assign to the next VPValue without underlying value.
- unsigned NextSlot = 0;
-
- void assignName(const VPValue *V);
- void assignNames(const VPlan &Plan);
- void assignNames(const VPBasicBlock *VPBB);
-
-public:
- VPSlotTracker(const VPlan *Plan = nullptr) {
- if (Plan)
- assignNames(*Plan);
- }
-
- /// Returns the name assigned to \p V, if there is one, otherwise try to
- /// construct one from the underlying value, if there's one; else return
- /// <badref>.
- std::string getOrCreateName(const VPValue *V) const;
-};
-
} // namespace llvm
#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_VALUE_H
diff --git a/llvm/unittests/Transforms/Vectorize/VPlanSlpTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanSlpTest.cpp
index e3c542ec5cac85..3a2658ea1e8dc8 100644
--- a/llvm/unittests/Transforms/Vectorize/VPlanSlpTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/VPlanSlpTest.cpp
@@ -6,6 +6,7 @@
//
//===----------------------------------------------------------------------===//
+#include "../lib/Transforms/Vectorize/VPlanSLP.h"
#include "../lib/Transforms/Vectorize/VPlan.h"
#include "../lib/Transforms/Vectorize/VPlanHCFGBuilder.h"
#include "VPlanTestBase.h"
diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp
index e7987a95f1ca26..23ecffa2db3b73 100644
--- a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp
@@ -9,6 +9,7 @@
#include "../lib/Transforms/Vectorize/VPlan.h"
#include "../lib/Transforms/Vectorize/VPlanCFG.h"
+#include "../lib/Transforms/Vectorize/VPlanHelpers.h"
#include "VPlanTestBase.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/PostOrderIterator.h"
>From fda19f926a119ae9ec7fbcddb22d2b8a8eae626c Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Thu, 23 Jan 2025 20:35:12 +0000
Subject: [PATCH 2/2] !fixup add assert messages, default value
---
llvm/lib/Transforms/Vectorize/VPlanHelpers.h | 13 ++++++++-----
llvm/lib/Transforms/Vectorize/VPlanSLP.cpp | 5 +++--
2 files changed, 11 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanHelpers.h b/llvm/lib/Transforms/Vectorize/VPlanHelpers.h
index e5f66ca8060cba..74713daf904f04 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanHelpers.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanHelpers.h
@@ -132,10 +132,10 @@ class VPLane {
unsigned Lane;
/// Indicates how the Lane should be interpreted, as described above.
- Kind LaneKind;
+ Kind LaneKind = Kind::First;
public:
- VPLane(unsigned Lane) : Lane(Lane), LaneKind(VPLane::Kind::First) {}
+ VPLane(unsigned Lane) : Lane(Lane) {}
VPLane(unsigned Lane, Kind LaneKind) : Lane(Lane), LaneKind(LaneKind) {}
static VPLane getFirstLane() { return VPLane(0, VPLane::Kind::First); }
@@ -161,7 +161,8 @@ class VPLane {
/// Returns a compile-time known value for the lane index and asserts if the
/// lane can only be calculated at runtime.
unsigned getKnownLane() const {
- assert(LaneKind == Kind::First);
+ assert(LaneKind == Kind::First &&
+ "can only get known lane from the beginning");
return Lane;
}
@@ -179,10 +180,12 @@ class VPLane {
unsigned mapToCacheIndex(const ElementCount &VF) const {
switch (LaneKind) {
case VPLane::Kind::ScalableLast:
- assert(VF.isScalable() && Lane < VF.getKnownMinValue());
+ assert(VF.isScalable() && Lane < VF.getKnownMinValue() &&
+ "ScalableLast can only be used with scalable VFs");
return VF.getKnownMinValue() + Lane;
default:
- assert(Lane < VF.getKnownMinValue());
+ assert(Lane < VF.getKnownMinValue() &&
+ "Cannot extract lane larger than VF");
return Lane;
}
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanSLP.cpp b/llvm/lib/Transforms/Vectorize/VPlanSLP.cpp
index 455a3c521ba746..e41f7ca575d924 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanSLP.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanSLP.cpp
@@ -82,10 +82,11 @@ void VPInterleavedAccessInfo::visitBlock(VPBlockBase *Block, Old2NewTy &Old2New,
Align(IG->isReverse() ? (-1) * int(IG->getFactor())
: IG->getFactor()));
}
- } else if (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
+ } else if (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block)) {
visitRegion(Region, Old2New, IAI);
- else
+ } else {
llvm_unreachable("Unsupported kind of VPBlock.");
+ }
}
VPInterleavedAccessInfo::VPInterleavedAccessInfo(VPlan &Plan,
More information about the llvm-commits
mailing list