[llvm] [VPlan] Move auxiliary declarations out of VPlan.h (NFC). (PR #124104)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 23 12:50:52 PST 2025
================
@@ -0,0 +1,471 @@
+//===- VPlanHelpers.h - VPlan-related auxiliary helpers -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This file contains the declarations of different VPlan-related auxiliary
+/// helpers.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLANHELPERS_H
+#define LLVM_TRANSFORMS_VECTORIZE_VPLANHELPERS_H
+
+#include "VPlanAnalysis.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/Support/InstructionCost.h"
+
+namespace llvm {
+
+class BasicBlock;
+class DominatorTree;
+class InnerLoopVectorizer;
+class IRBuilderBase;
+class LoopInfo;
+class SCEV;
+class Type;
+class VPBasicBlock;
+class VPRegionBlock;
+class VPlan;
+class Value;
+class LoopVersioning;
+
+/// Returns a calculation for the total number of elements for a given \p VF.
+/// For fixed width vectors this value is a constant, whereas for scalable
+/// vectors it is an expression determined at runtime.
+Value *getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF);
+
+/// Return a value for Step multiplied by VF.
+Value *createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF,
+ int64_t Step);
+
+/// A helper function that returns the reciprocal of the block probability of
+/// predicated blocks. If we return X, we are assuming the predicated block
+/// will execute once for every X iterations of the loop header.
+///
+/// TODO: We should use actual block probability here, if available. Currently,
+/// we always assume predicated blocks have a 50% chance of executing.
+inline unsigned getReciprocalPredBlockProb() { return 2; }
+
+/// A range of powers-of-2 vectorization factors with fixed start and
+/// adjustable end. The range includes start and excludes end, e.g.,:
+/// [1, 16) = {1, 2, 4, 8}
+struct VFRange {
+ // A power of 2.
+ const ElementCount Start;
+
+ // A power of 2. If End <= Start range is empty.
+ ElementCount End;
+
+ bool isEmpty() const {
+ return End.getKnownMinValue() <= Start.getKnownMinValue();
+ }
+
+ VFRange(const ElementCount &Start, const ElementCount &End)
+ : Start(Start), End(End) {
+ assert(Start.isScalable() == End.isScalable() &&
+ "Both Start and End should have the same scalable flag");
+ assert(isPowerOf2_32(Start.getKnownMinValue()) &&
+ "Expected Start to be a power of 2");
+ assert(isPowerOf2_32(End.getKnownMinValue()) &&
+ "Expected End to be a power of 2");
+ }
+
+ /// Iterator to iterate over vectorization factors in a VFRange.
+ class iterator
+ : public iterator_facade_base<iterator, std::forward_iterator_tag,
+ ElementCount> {
+ ElementCount VF;
+
+ public:
+ iterator(ElementCount VF) : VF(VF) {}
+
+ bool operator==(const iterator &Other) const { return VF == Other.VF; }
+
+ ElementCount operator*() const { return VF; }
+
+ iterator &operator++() {
+ VF *= 2;
+ return *this;
+ }
+ };
+
+ iterator begin() { return iterator(Start); }
+ iterator end() {
+ assert(isPowerOf2_32(End.getKnownMinValue()));
+ return iterator(End);
+ }
+};
+
+/// In what follows, the term "input IR" refers to code that is fed into the
+/// vectorizer whereas the term "output IR" refers to code that is generated by
+/// the vectorizer.
+
+/// VPLane provides a way to access lanes in both fixed width and scalable
+/// vectors, where for the latter the lane index sometimes needs calculating
+/// as a runtime expression.
+class VPLane {
+public:
+ /// Kind describes how to interpret Lane.
+ enum class Kind : uint8_t {
+ /// For First, Lane is the index into the first N elements of a
+ /// fixed-vector <N x <ElTy>> or a scalable vector <vscale x N x <ElTy>>.
+ First,
+ /// For ScalableLast, Lane is the offset from the start of the last
+ /// N-element subvector in a scalable vector <vscale x N x <ElTy>>. For
+ /// example, a Lane of 0 corresponds to lane `(vscale - 1) * N`, a Lane of
+ /// 1 corresponds to `((vscale - 1) * N) + 1`, etc.
+ ScalableLast
+ };
+
+private:
+ /// in [0..VF)
+ unsigned Lane;
+
+ /// Indicates how the Lane should be interpreted, as described above.
+ Kind LaneKind;
+
+public:
+ VPLane(unsigned Lane) : Lane(Lane), LaneKind(VPLane::Kind::First) {}
+ VPLane(unsigned Lane, Kind LaneKind) : Lane(Lane), LaneKind(LaneKind) {}
+
+ static VPLane getFirstLane() { return VPLane(0, VPLane::Kind::First); }
+
+ static VPLane getLaneFromEnd(const ElementCount &VF, unsigned Offset) {
+ assert(Offset > 0 && Offset <= VF.getKnownMinValue() &&
+ "trying to extract with invalid offset");
+ unsigned LaneOffset = VF.getKnownMinValue() - Offset;
+ Kind LaneKind;
+ if (VF.isScalable())
+ // In this case 'LaneOffset' refers to the offset from the start of the
+ // last subvector with VF.getKnownMinValue() elements.
+ LaneKind = VPLane::Kind::ScalableLast;
+ else
+ LaneKind = VPLane::Kind::First;
+ return VPLane(LaneOffset, LaneKind);
+ }
+
+ static VPLane getLastLaneForVF(const ElementCount &VF) {
+ return getLaneFromEnd(VF, 1);
+ }
+
+ /// Returns a compile-time known value for the lane index and asserts if the
+ /// lane can only be calculated at runtime.
+ unsigned getKnownLane() const {
+ assert(LaneKind == Kind::First);
+ return Lane;
+ }
+
+ /// Returns an expression describing the lane index that can be used at
+ /// runtime.
+ Value *getAsRuntimeExpr(IRBuilderBase &Builder, const ElementCount &VF) const;
+
+ /// Returns the Kind of lane offset.
+ Kind getKind() const { return LaneKind; }
+
+ /// Returns true if this is the first lane of the whole vector.
+ bool isFirstLane() const { return Lane == 0 && LaneKind == Kind::First; }
+
+ /// Maps the lane to a cache index based on \p VF.
+ unsigned mapToCacheIndex(const ElementCount &VF) const {
+ switch (LaneKind) {
+ case VPLane::Kind::ScalableLast:
+ assert(VF.isScalable() && Lane < VF.getKnownMinValue());
+ return VF.getKnownMinValue() + Lane;
+ default:
+ assert(Lane < VF.getKnownMinValue());
+ return Lane;
+ }
+ }
+
+ /// Returns the maxmimum number of lanes that we are able to consider
+ /// caching for \p VF.
+ static unsigned getNumCachedLanes(const ElementCount &VF) {
+ return VF.getKnownMinValue() * (VF.isScalable() ? 2 : 1);
+ }
+};
+
+/// VPTransformState holds information passed down when "executing" a VPlan,
+/// needed for generating the output IR.
+struct VPTransformState {
+ VPTransformState(const TargetTransformInfo *TTI, ElementCount VF, unsigned UF,
+ LoopInfo *LI, DominatorTree *DT, IRBuilderBase &Builder,
+ InnerLoopVectorizer *ILV, VPlan *Plan,
+ Loop *CurrentParentLoop, Type *CanonicalIVTy);
+ /// Target Transform Info.
+ const TargetTransformInfo *TTI;
+
+ /// The chosen Vectorization Factor of the loop being vectorized.
+ ElementCount VF;
+
+ /// Hold the index to generate specific scalar instructions. Null indicates
+ /// that all instances are to be generated, using either scalar or vector
+ /// instructions.
+ std::optional<VPLane> Lane;
+
+ struct DataState {
+ // Each value from the original loop, when vectorized, is represented by a
+ // vector value in the map.
+ DenseMap<VPValue *, Value *> VPV2Vector;
+
+ DenseMap<VPValue *, SmallVector<Value *, 4>> VPV2Scalars;
+ } Data;
+
+ /// Get the generated vector Value for a given VPValue \p Def if \p IsScalar
+ /// is false, otherwise return the generated scalar. \See set.
+ Value *get(VPValue *Def, bool IsScalar = false);
+
+ /// Get the generated Value for a given VPValue and given Part and Lane.
+ Value *get(VPValue *Def, const VPLane &Lane);
+
+ bool hasVectorValue(VPValue *Def) { return Data.VPV2Vector.contains(Def); }
+
+ bool hasScalarValue(VPValue *Def, VPLane Lane) {
+ auto I = Data.VPV2Scalars.find(Def);
+ if (I == Data.VPV2Scalars.end())
+ return false;
+ unsigned CacheIdx = Lane.mapToCacheIndex(VF);
+ return CacheIdx < I->second.size() && I->second[CacheIdx];
+ }
+
+ /// Set the generated vector Value for a given VPValue, if \p
+ /// IsScalar is false. If \p IsScalar is true, set the scalar in lane 0.
+ void set(VPValue *Def, Value *V, bool IsScalar = false) {
+ if (IsScalar) {
+ set(Def, V, VPLane(0));
+ return;
+ }
+ assert((VF.isScalar() || V->getType()->isVectorTy()) &&
+ "scalar values must be stored as (0, 0)");
+ Data.VPV2Vector[Def] = V;
+ }
+
+ /// Reset an existing vector value for \p Def and a given \p Part.
+ void reset(VPValue *Def, Value *V) {
+ assert(Data.VPV2Vector.contains(Def) && "need to overwrite existing value");
+ Data.VPV2Vector[Def] = V;
+ }
+
+ /// Set the generated scalar \p V for \p Def and the given \p Lane.
+ void set(VPValue *Def, Value *V, const VPLane &Lane) {
+ auto &Scalars = Data.VPV2Scalars[Def];
+ unsigned CacheIdx = Lane.mapToCacheIndex(VF);
+ if (Scalars.size() <= CacheIdx)
+ Scalars.resize(CacheIdx + 1);
+ assert(!Scalars[CacheIdx] && "should overwrite existing value");
+ Scalars[CacheIdx] = V;
+ }
+
+ /// Reset an existing scalar value for \p Def and a given \p Lane.
+ void reset(VPValue *Def, Value *V, const VPLane &Lane) {
+ auto Iter = Data.VPV2Scalars.find(Def);
+ assert(Iter != Data.VPV2Scalars.end() &&
+ "need to overwrite existing value");
+ unsigned CacheIdx = Lane.mapToCacheIndex(VF);
+ assert(CacheIdx < Iter->second.size() &&
+ "need to overwrite existing value");
+ Iter->second[CacheIdx] = V;
+ }
+
+ /// Add additional metadata to \p To that was not present on \p Orig.
+ ///
+ /// Currently this is used to add the noalias annotations based on the
+ /// inserted memchecks. Use this for instructions that are *cloned* into the
+ /// vector loop.
+ void addNewMetadata(Instruction *To, const Instruction *Orig);
+
+ /// Add metadata from one instruction to another.
+ ///
+ /// This includes both the original MDs from \p From and additional ones (\see
+ /// addNewMetadata). Use this for *newly created* instructions in the vector
+ /// loop.
+ void addMetadata(Value *To, Instruction *From);
+
+ /// Set the debug location in the builder using the debug location \p DL.
+ void setDebugLocFrom(DebugLoc DL);
+
+ /// Construct the vector value of a scalarized value \p V one lane at a time.
+ void packScalarIntoVectorValue(VPValue *Def, const VPLane &Lane);
+
+ /// Hold state information used when constructing the CFG of the output IR,
+ /// traversing the VPBasicBlocks and generating corresponding IR BasicBlocks.
+ struct CFGState {
+ /// The previous VPBasicBlock visited. Initially set to null.
+ VPBasicBlock *PrevVPBB = nullptr;
+
+ /// The previous IR BasicBlock created or used. Initially set to the new
+ /// header BasicBlock.
+ BasicBlock *PrevBB = nullptr;
+
+ /// The last IR BasicBlock in the output IR. Set to the exit block of the
+ /// vector loop.
+ BasicBlock *ExitBB = nullptr;
+
+ /// A mapping of each VPBasicBlock to the corresponding BasicBlock. In case
+ /// of replication, maps the BasicBlock of the last replica created.
+ SmallDenseMap<VPBasicBlock *, BasicBlock *> VPBB2IRBB;
+
+ /// Updater for the DominatorTree.
+ DomTreeUpdater DTU;
+
+ CFGState(DominatorTree *DT)
+ : DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy) {}
+
+ /// Returns the BasicBlock* mapped to the pre-header of the loop region
+ /// containing \p R.
+ BasicBlock *getPreheaderBBFor(VPRecipeBase *R);
+ } CFG;
+
+ /// Hold a pointer to LoopInfo to register new basic blocks in the loop.
+ LoopInfo *LI;
+
+ /// Hold a reference to the IRBuilder used to generate output IR code.
+ IRBuilderBase &Builder;
+
+ /// Hold a pointer to InnerLoopVectorizer to reuse its IR generation methods.
+ InnerLoopVectorizer *ILV;
+
+ /// Pointer to the VPlan code is generated for.
+ VPlan *Plan;
+
+ /// The parent loop object for the current scope, or nullptr.
+ Loop *CurrentParentLoop = nullptr;
+
+ /// LoopVersioning. It's only set up (non-null) if memchecks were
+ /// used.
+ ///
+ /// This is currently only used to add no-alias metadata based on the
+ /// memchecks. The actually versioning is performed manually.
+ LoopVersioning *LVer = nullptr;
----------------
fhahn wrote:
At the moment it is only set if memory checks are needed hence the pointer. Others like `VPlan` `ILV` & co could be references, but I'd prefer to update that separately and keep this PR mostly a plain move
https://github.com/llvm/llvm-project/pull/124104
More information about the llvm-commits
mailing list