[llvm] aae7ac6 - [VPlan] Remove VPIteration, update to use directly VPLane instead (NFC)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 25 08:45:46 PDT 2024
Author: Florian Hahn
Date: 2024-09-25T16:44:42+01:00
New Revision: aae7ac668588192e21a2435da0229fa0f49c231f
URL: https://github.com/llvm/llvm-project/commit/aae7ac668588192e21a2435da0229fa0f49c231f
DIFF: https://github.com/llvm/llvm-project/commit/aae7ac668588192e21a2435da0229fa0f49c231f.diff
LOG: [VPlan] Remove VPIteration, update to use directly VPLane instead (NFC)
After 8ec406757cb92 (https://github.com/llvm/llvm-project/pull/95842),
only the lane part of VPIteration is used.
Simplify the code by replacing remaining uses of VPIteration with VPLane directly.
Added:
Modified:
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/lib/Transforms/Vectorize/VPlan.cpp
llvm/lib/Transforms/Vectorize/VPlan.h
llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index cac0b57fc69649..db4631e19c11d3 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -505,8 +505,7 @@ class InnerLoopVectorizer {
/// inclusive. Uses the VPValue operands from \p RepRecipe instead of \p
/// Instr's operands.
void scalarizeInstruction(const Instruction *Instr,
- VPReplicateRecipe *RepRecipe,
- const VPIteration &Instance,
+ VPReplicateRecipe *RepRecipe, const VPLane &Lane,
VPTransformState &State);
/// Fix the non-induction PHIs in \p Plan.
@@ -2322,14 +2321,14 @@ static bool useMaskedInterleavedAccesses(const TargetTransformInfo &TTI) {
void InnerLoopVectorizer::scalarizeInstruction(const Instruction *Instr,
VPReplicateRecipe *RepRecipe,
- const VPIteration &Instance,
+ const VPLane &Lane,
VPTransformState &State) {
assert(!Instr->getType()->isAggregateType() && "Can't handle vectors");
// llvm.experimental.noalias.scope.decl intrinsics must only be duplicated for
// the first lane and part.
if (isa<NoAliasScopeDeclInst>(Instr))
- if (!Instance.isFirstIteration())
+ if (!Lane.isFirstLane())
return;
// Does this instruction return a value ?
@@ -2354,18 +2353,18 @@ void InnerLoopVectorizer::scalarizeInstruction(const Instruction *Instr,
// Replace the operands of the cloned instructions with their scalar
// equivalents in the new loop.
for (const auto &I : enumerate(RepRecipe->operands())) {
- auto InputInstance = Instance;
+ auto InputLane = Lane;
VPValue *Operand = I.value();
if (vputils::isUniformAfterVectorization(Operand))
- InputInstance.Lane = VPLane::getFirstLane();
- Cloned->setOperand(I.index(), State.get(Operand, InputInstance));
+ InputLane = VPLane::getFirstLane();
+ Cloned->setOperand(I.index(), State.get(Operand, InputLane));
}
State.addNewMetadata(Cloned, Instr);
// Place the cloned scalar in the new loop.
State.Builder.Insert(Cloned);
- State.set(RepRecipe, Cloned, Instance);
+ State.set(RepRecipe, Cloned, Lane);
// If we just cloned a new assumption, add it the assumption cache.
if (auto *II = dyn_cast<AssumeInst>(Cloned))
@@ -2784,7 +2783,7 @@ void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi,
VPValue *StepVPV = Plan.getSCEVExpansion(II.getStep());
assert(StepVPV && "step must have been expanded during VPlan execution");
Value *Step = StepVPV->isLiveIn() ? StepVPV->getLiveInIRValue()
- : State.get(StepVPV, {0, 0});
+ : State.get(StepVPV, VPLane(0));
Value *Escape =
emitTransformedIndex(B, CountMinusOne, II.getStartValue(), Step,
II.getKind(), II.getInductionBinOp());
@@ -7435,8 +7434,7 @@ static void createAndCollectMergePhiForReduction(
auto *PhiR = cast<VPReductionPHIRecipe>(RedResult->getOperand(0));
const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
- Value *FinalValue =
- State.get(RedResult, VPIteration(0, VPLane::getFirstLane()));
+ Value *FinalValue = State.get(RedResult, VPLane(VPLane::getFirstLane()));
auto *ResumePhi =
dyn_cast<PHINode>(PhiR->getStartValue()->getUnderlyingValue());
if (VectorizingEpilogue && RecurrenceDescriptor::isAnyOfRecurrenceKind(
@@ -7525,7 +7523,7 @@ LoopVectorizationPlanner::executePlan(
BestVPlan.getPreheader()->execute(&State);
}
if (!ILV.getTripCount())
- ILV.setTripCount(State.get(BestVPlan.getTripCount(), {0, 0}));
+ ILV.setTripCount(State.get(BestVPlan.getTripCount(), VPLane(0)));
else
assert(IsEpilogueVectorization && "should only re-use the existing trip "
"count during epilogue vectorization");
@@ -9409,48 +9407,48 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
}
void VPDerivedIVRecipe::execute(VPTransformState &State) {
- assert(!State.Instance && "VPDerivedIVRecipe being replicated.");
+ assert(!State.Lane && "VPDerivedIVRecipe being replicated.");
// Fast-math-flags propagate from the original induction instruction.
IRBuilder<>::FastMathFlagGuard FMFG(State.Builder);
if (FPBinOp)
State.Builder.setFastMathFlags(FPBinOp->getFastMathFlags());
- Value *Step = State.get(getStepValue(), VPIteration(0, 0));
- Value *CanonicalIV = State.get(getOperand(1), VPIteration(0, 0));
+ Value *Step = State.get(getStepValue(), VPLane(0));
+ Value *CanonicalIV = State.get(getOperand(1), VPLane(0));
Value *DerivedIV = emitTransformedIndex(
State.Builder, CanonicalIV, getStartValue()->getLiveInIRValue(), Step,
Kind, cast_if_present<BinaryOperator>(FPBinOp));
DerivedIV->setName("offset.idx");
assert(DerivedIV != CanonicalIV && "IV didn't need transforming?");
- State.set(this, DerivedIV, VPIteration(0, 0));
+ State.set(this, DerivedIV, VPLane(0));
}
void VPReplicateRecipe::execute(VPTransformState &State) {
Instruction *UI = getUnderlyingInstr();
- if (State.Instance) { // Generate a single instance.
+ if (State.Lane) { // Generate a single instance.
assert((State.VF.isScalar() || !isUniform()) &&
"uniform recipe shouldn't be predicated");
assert(!State.VF.isScalable() && "Can't scalarize a scalable vector");
- State.ILV->scalarizeInstruction(UI, this, *State.Instance, State);
+ State.ILV->scalarizeInstruction(UI, this, *State.Lane, State);
// Insert scalar instance packing it into a vector.
if (State.VF.isVector() && shouldPack()) {
// If we're constructing lane 0, initialize to start from poison.
- if (State.Instance->Lane.isFirstLane()) {
+ if (State.Lane->isFirstLane()) {
assert(!State.VF.isScalable() && "VF is assumed to be non scalable.");
Value *Poison = PoisonValue::get(
VectorType::get(UI->getType(), State.VF));
State.set(this, Poison);
}
- State.packScalarIntoVectorValue(this, *State.Instance);
+ State.packScalarIntoVectorValue(this, *State.Lane);
}
return;
}
if (IsUniform) {
// Uniform within VL means we need to generate lane 0.
- State.ILV->scalarizeInstruction(UI, this, VPIteration(0, 0), State);
+ State.ILV->scalarizeInstruction(UI, this, VPLane(0), State);
return;
}
@@ -9459,7 +9457,7 @@ void VPReplicateRecipe::execute(VPTransformState &State) {
if (isa<StoreInst>(UI) &&
vputils::isUniformAfterVectorization(getOperand(1))) {
auto Lane = VPLane::getLastLaneForVF(State.VF);
- State.ILV->scalarizeInstruction(UI, this, VPIteration(0, Lane), State);
+ State.ILV->scalarizeInstruction(UI, this, VPLane(Lane), State);
return;
}
@@ -9467,7 +9465,7 @@ void VPReplicateRecipe::execute(VPTransformState &State) {
assert(!State.VF.isScalable() && "Can't scalarize a scalable vector");
const unsigned EndLane = State.VF.getKnownMinValue();
for (unsigned Lane = 0; Lane < EndLane; ++Lane)
- State.ILV->scalarizeInstruction(UI, this, VPIteration(0, Lane), State);
+ State.ILV->scalarizeInstruction(UI, this, VPLane(Lane), State);
}
// Determine how to lower the scalar epilogue, which depends on 1) optimising
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 5e4d487261c6f0..6ddbfcf0ecfe58 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -228,28 +228,27 @@ VPTransformState::VPTransformState(ElementCount VF, unsigned UF, LoopInfo *LI,
: VF(VF), CFG(DT), LI(LI), Builder(Builder), ILV(ILV), Plan(Plan),
LVer(nullptr), TypeAnalysis(Plan->getCanonicalIV()->getScalarType()) {}
-Value *VPTransformState::get(VPValue *Def, const VPIteration &Instance) {
+Value *VPTransformState::get(VPValue *Def, const VPLane &Lane) {
if (Def->isLiveIn())
return Def->getLiveInIRValue();
- if (hasScalarValue(Def, Instance)) {
- return Data.VPV2Scalars[Def][Instance.Lane.mapToCacheIndex(VF)];
- }
- if (!Instance.Lane.isFirstLane() &&
- vputils::isUniformAfterVectorization(Def) &&
- hasScalarValue(Def, {Instance.Part, VPLane::getFirstLane()})) {
+ if (hasScalarValue(Def, Lane))
+ return Data.VPV2Scalars[Def][Lane.mapToCacheIndex(VF)];
+
+ if (!Lane.isFirstLane() && vputils::isUniformAfterVectorization(Def) &&
+ hasScalarValue(Def, VPLane::getFirstLane())) {
return Data.VPV2Scalars[Def][0];
}
assert(hasVectorValue(Def));
auto *VecPart = Data.VPV2Vector[Def];
if (!VecPart->getType()->isVectorTy()) {
- assert(Instance.Lane.isFirstLane() && "cannot get lane > 0 for scalar");
+ assert(Lane.isFirstLane() && "cannot get lane > 0 for scalar");
return VecPart;
}
// TODO: Cache created scalar values.
- Value *Lane = Instance.Lane.getAsRuntimeExpr(Builder, VF);
- auto *Extract = Builder.CreateExtractElement(VecPart, Lane);
+ Value *LaneV = Lane.getAsRuntimeExpr(Builder, VF);
+ auto *Extract = Builder.CreateExtractElement(VecPart, LaneV);
// set(Def, Extract, Instance);
return Extract;
}
@@ -258,11 +257,11 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) {
if (NeedsScalar) {
assert((VF.isScalar() || Def->isLiveIn() || hasVectorValue(Def) ||
!vputils::onlyFirstLaneUsed(Def) ||
- (hasScalarValue(Def, VPIteration(0, 0)) &&
+ (hasScalarValue(Def, VPLane(0)) &&
Data.VPV2Scalars[Def].size() == 1)) &&
"Trying to access a single scalar per part but has multiple scalars "
"per part.");
- return get(Def, VPIteration(0, 0));
+ return get(Def, VPLane(0));
}
// If Values have been set for this Def return the one relevant for \p Part.
@@ -289,7 +288,7 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) {
return Shuf;
};
- if (!hasScalarValue(Def, {0, 0})) {
+ if (!hasScalarValue(Def, {0})) {
assert(Def->isLiveIn() && "expected a live-in");
Value *IRV = Def->getLiveInIRValue();
Value *B = GetBroadcastInstrs(IRV);
@@ -297,7 +296,7 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) {
return B;
}
- Value *ScalarValue = get(Def, {0, 0});
+ Value *ScalarValue = get(Def, VPLane(0));
// If we aren't vectorizing, we can just copy the scalar map values over
// to the vector map.
if (VF.isScalar()) {
@@ -307,9 +306,9 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) {
bool IsUniform = vputils::isUniformAfterVectorization(Def);
- unsigned LastLane = IsUniform ? 0 : VF.getKnownMinValue() - 1;
+ VPLane LastLane(IsUniform ? 0 : VF.getKnownMinValue() - 1);
// Check if there is a scalar value for the selected lane.
- if (!hasScalarValue(Def, {0, LastLane})) {
+ if (!hasScalarValue(Def, LastLane)) {
// At the moment, VPWidenIntOrFpInductionRecipes, VPScalarIVStepsRecipes and
// VPExpandSCEVRecipes can also be uniform.
assert((isa<VPWidenIntOrFpInductionRecipe>(Def->getDefiningRecipe()) ||
@@ -320,7 +319,7 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) {
LastLane = 0;
}
- auto *LastInst = cast<Instruction>(get(Def, {0, LastLane}));
+ auto *LastInst = cast<Instruction>(get(Def, LastLane));
// Set the insert point after the last scalarized instruction or after the
// last PHI, if LastInst is a PHI. This ensures the insertelement sequence
// will directly follow the scalar definitions.
@@ -347,7 +346,7 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) {
Value *Undef = PoisonValue::get(VectorType::get(LastInst->getType(), VF));
set(Def, Undef);
for (unsigned Lane = 0; Lane < VF.getKnownMinValue(); ++Lane)
- packScalarIntoVectorValue(Def, {0, Lane});
+ packScalarIntoVectorValue(Def, Lane);
VectorValue = get(Def);
}
Builder.restoreIP(OldIP);
@@ -401,11 +400,11 @@ void VPTransformState::setDebugLocFrom(DebugLoc DL) {
}
void VPTransformState::packScalarIntoVectorValue(VPValue *Def,
- const VPIteration &Instance) {
- Value *ScalarInst = get(Def, Instance);
+ const VPLane &Lane) {
+ Value *ScalarInst = get(Def, Lane);
Value *VectorValue = get(Def);
- VectorValue = Builder.CreateInsertElement(
- VectorValue, ScalarInst, Instance.Lane.getAsRuntimeExpr(Builder, VF));
+ VectorValue = Builder.CreateInsertElement(VectorValue, ScalarInst,
+ Lane.getAsRuntimeExpr(Builder, VF));
set(Def, VectorValue);
}
@@ -483,7 +482,7 @@ void VPIRBasicBlock::execute(VPTransformState *State) {
}
void VPBasicBlock::execute(VPTransformState *State) {
- bool Replica = State->Instance && !State->Instance->isFirstIteration();
+ bool Replica = bool(State->Lane);
VPBasicBlock *PrevVPBB = State->CFG.PrevVPBB;
VPBlockBase *SingleHPred = nullptr;
BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible.
@@ -765,14 +764,14 @@ void VPRegionBlock::execute(VPTransformState *State) {
return;
}
- assert(!State->Instance && "Replicating a Region with non-null instance.");
+ assert(!State->Lane && "Replicating a Region with non-null instance.");
// Enter replicating mode.
- State->Instance = VPIteration(0, 0);
assert(!State->VF.isScalable() && "VF is assumed to be non scalable.");
+ State->Lane = VPLane(0);
for (unsigned Lane = 0, VF = State->VF.getKnownMinValue(); Lane < VF;
++Lane) {
- State->Instance->Lane = VPLane(Lane, VPLane::Kind::First);
+ State->Lane = VPLane(Lane, VPLane::Kind::First);
// Visit the VPBlocks connected to \p this, starting from it.
for (VPBlockBase *Block : RPOT) {
LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
@@ -781,7 +780,7 @@ void VPRegionBlock::execute(VPTransformState *State) {
}
// Exit replicating mode.
- State->Instance.reset();
+ State->Lane.reset();
}
InstructionCost VPBasicBlock::cost(ElementCount VF, VPCostContext &Ctx) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index c886a39aec76e5..bbcfaf9e19cd0c 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -172,6 +172,7 @@ class VPLane {
Kind LaneKind;
public:
+ VPLane(unsigned Lane) : Lane(Lane), LaneKind(VPLane::Kind::First) {}
VPLane(unsigned Lane, Kind LaneKind) : Lane(Lane), LaneKind(LaneKind) {}
static VPLane getFirstLane() { return VPLane(0, VPLane::Kind::First); }
@@ -230,23 +231,6 @@ class VPLane {
}
};
-/// VPIteration represents a single point in the iteration space of the output
-/// (vectorized and/or unrolled) IR loop.
-struct VPIteration {
- /// in [0..UF)
- unsigned Part;
-
- VPLane Lane;
-
- VPIteration(unsigned Part, unsigned Lane,
- VPLane::Kind Kind = VPLane::Kind::First)
- : Part(Part), Lane(Lane, Kind) {}
-
- VPIteration(unsigned Part, const VPLane &Lane) : Part(Part), Lane(Lane) {}
-
- bool isFirstIteration() const { return Part == 0 && Lane.isFirstLane(); }
-};
-
/// VPTransformState holds information passed down when "executing" a VPlan,
/// needed for generating the output IR.
struct VPTransformState {
@@ -257,10 +241,10 @@ struct VPTransformState {
/// The chosen Vectorization Factor of the loop being vectorized.
ElementCount VF;
- /// Hold the indices to generate specific scalar instructions. Null indicates
+ /// Hold the index to generate specific scalar instructions. Null indicates
/// that all instances are to be generated, using either scalar or vector
/// instructions.
- std::optional<VPIteration> Instance;
+ std::optional<VPLane> Lane;
struct DataState {
// Each value from the original loop, when vectorized, is represented by a
@@ -275,15 +259,15 @@ struct VPTransformState {
Value *get(VPValue *Def, bool IsScalar = false);
/// Get the generated Value for a given VPValue and given Part and Lane.
- Value *get(VPValue *Def, const VPIteration &Instance);
+ Value *get(VPValue *Def, const VPLane &Lane);
bool hasVectorValue(VPValue *Def) { return Data.VPV2Vector.contains(Def); }
- bool hasScalarValue(VPValue *Def, VPIteration Instance) {
+ bool hasScalarValue(VPValue *Def, VPLane Lane) {
auto I = Data.VPV2Scalars.find(Def);
if (I == Data.VPV2Scalars.end())
return false;
- unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF);
+ unsigned CacheIdx = Lane.mapToCacheIndex(VF);
return CacheIdx < I->second.size() && I->second[CacheIdx];
}
@@ -291,7 +275,7 @@ struct VPTransformState {
/// IsScalar is false. If \p IsScalar is true, set the scalar in lane 0.
void set(VPValue *Def, Value *V, bool IsScalar = false) {
if (IsScalar) {
- set(Def, V, VPIteration(0, 0));
+ set(Def, V, VPLane(0));
return;
}
assert((VF.isScalar() || V->getType()->isVectorTy()) &&
@@ -305,23 +289,23 @@ struct VPTransformState {
Data.VPV2Vector[Def] = V;
}
- /// Set the generated scalar \p V for \p Def and the given \p Instance.
- void set(VPValue *Def, Value *V, const VPIteration &Instance) {
+ /// Set the generated scalar \p V for \p Def and the given \p Lane.
+ void set(VPValue *Def, Value *V, const VPLane &Lane) {
auto Iter = Data.VPV2Scalars.insert({Def, {}});
auto &Scalars = Iter.first->second;
- unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF);
+ unsigned CacheIdx = Lane.mapToCacheIndex(VF);
if (Scalars.size() <= CacheIdx)
Scalars.resize(CacheIdx + 1);
assert(!Scalars[CacheIdx] && "should overwrite existing value");
Scalars[CacheIdx] = V;
}
- /// Reset an existing scalar value for \p Def and a given \p Instance.
- void reset(VPValue *Def, Value *V, const VPIteration &Instance) {
+ /// Reset an existing scalar value for \p Def and a given \p Lane.
+ void reset(VPValue *Def, Value *V, const VPLane &Lane) {
auto Iter = Data.VPV2Scalars.find(Def);
assert(Iter != Data.VPV2Scalars.end() &&
"need to overwrite existing value");
- unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF);
+ unsigned CacheIdx = Lane.mapToCacheIndex(VF);
assert(CacheIdx < Iter->second.size() &&
"need to overwrite existing value");
Iter->second[CacheIdx] = V;
@@ -345,7 +329,7 @@ struct VPTransformState {
void setDebugLocFrom(DebugLoc DL);
/// Construct the vector value of a scalarized value \p V one lane at a time.
- void packScalarIntoVectorValue(VPValue *Def, const VPIteration &Instance);
+ void packScalarIntoVectorValue(VPValue *Def, const VPLane &Lane);
/// Hold state information used when constructing the CFG of the output IR,
/// traversing the VPBasicBlocks and generating corresponding IR BasicBlocks.
@@ -1289,7 +1273,7 @@ class VPInstruction : public VPRecipeWithIRFlags,
/// Utility methods serving execute(): generates a scalar single instance of
/// the modeled instruction for a given lane. \returns the scalar generated
/// value for lane \p Lane.
- Value *generatePerLane(VPTransformState &State, const VPIteration &Lane);
+ Value *generatePerLane(VPTransformState &State, const VPLane &Lane);
#if !defined(NDEBUG)
/// Return true if the VPInstruction is a floating point math operation, i.e.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 5d1a13086e9f95..dacba152611c19 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -209,7 +209,7 @@ void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) {
? MiddleVPBB
: ExitingVPBB;
BasicBlock *PredBB = State.CFG.VPBB2IRBB[PredVPBB];
- Value *V = State.get(ExitValue, VPIteration(0, 0));
+ Value *V = State.get(ExitValue, VPLane(0));
if (Phi->getBasicBlockIndex(PredBB) != -1)
Phi->setIncomingValueForBlock(PredBB, V);
else
@@ -390,7 +390,7 @@ bool VPInstruction::canGenerateScalarForFirstLane() const {
}
Value *VPInstruction::generatePerLane(VPTransformState &State,
- const VPIteration &Lane) {
+ const VPLane &Lane) {
IRBuilderBase &Builder = State.Builder;
assert(getOpcode() == VPInstruction::PtrAdd &&
@@ -432,9 +432,9 @@ Value *VPInstruction::generate(VPTransformState &State) {
}
case VPInstruction::ActiveLaneMask: {
// Get first lane of vector induction variable.
- Value *VIVElem0 = State.get(getOperand(0), VPIteration(0, 0));
+ Value *VIVElem0 = State.get(getOperand(0), VPLane(0));
// Get the original loop tripcount.
- Value *ScalarTC = State.get(getOperand(1), VPIteration(0, 0));
+ Value *ScalarTC = State.get(getOperand(1), VPLane(0));
// If this part of the active lane mask is scalar, generate the CMP directly
// to avoid unnecessary extracts.
@@ -469,7 +469,7 @@ Value *VPInstruction::generate(VPTransformState &State) {
}
case VPInstruction::CalculateTripCountMinusVF: {
unsigned UF = getParent()->getPlan()->getUF();
- Value *ScalarTC = State.get(getOperand(0), {0, 0});
+ Value *ScalarTC = State.get(getOperand(0), VPLane(0));
Value *Step = createStepForVF(Builder, ScalarTC->getType(), State.VF, UF);
Value *Sub = Builder.CreateSub(ScalarTC, Step);
Value *Cmp = Builder.CreateICmp(CmpInst::Predicate::ICMP_UGT, ScalarTC, Step);
@@ -479,7 +479,7 @@ Value *VPInstruction::generate(VPTransformState &State) {
case VPInstruction::ExplicitVectorLength: {
// TODO: Restructure this code with an explicit remainder loop, vsetvli can
// be outside of the main loop.
- Value *AVL = State.get(getOperand(0), VPIteration(0, 0));
+ Value *AVL = State.get(getOperand(0), /*IsScalar*/ true);
// Compute EVL
assert(AVL->getType()->isIntegerTy() &&
"Requested vector length should be an integer.");
@@ -494,7 +494,7 @@ Value *VPInstruction::generate(VPTransformState &State) {
}
case VPInstruction::CanonicalIVIncrementForPart: {
unsigned Part = getUnrollPart(*this);
- auto *IV = State.get(getOperand(0), VPIteration(0, 0));
+ auto *IV = State.get(getOperand(0), VPLane(0));
assert(Part != 0 && "Must have a positive part");
// The canonical IV is incremented by the vectorization factor (num of
// SIMD elements) times the unroll part.
@@ -503,7 +503,7 @@ Value *VPInstruction::generate(VPTransformState &State) {
hasNoSignedWrap());
}
case VPInstruction::BranchOnCond: {
- Value *Cond = State.get(getOperand(0), VPIteration(0, 0));
+ Value *Cond = State.get(getOperand(0), VPLane(0));
// Replace the temporary unreachable terminator with a new conditional
// branch, hooking it up to backward destination for exiting blocks now and
// to forward destination(s) later when they are created.
@@ -625,8 +625,7 @@ Value *VPInstruction::generate(VPTransformState &State) {
assert(Offset <= State.VF.getKnownMinValue() &&
"invalid offset to extract from");
// Extract lane VF - Offset from the operand.
- Res = State.get(getOperand(0),
- VPIteration(0, VPLane::getLaneFromEnd(State.VF, Offset)));
+ Res = State.get(getOperand(0), VPLane::getLaneFromEnd(State.VF, Offset));
} else {
assert(Offset <= 1 && "invalid offset to extract from");
Res = State.get(getOperand(0));
@@ -692,7 +691,7 @@ bool VPInstruction::isFPMathOp() const {
#endif
void VPInstruction::execute(VPTransformState &State) {
- assert(!State.Instance && "VPInstruction executing an Instance");
+ assert(!State.Lane && "VPInstruction executing an Lane");
IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder);
assert((hasFastMathFlags() == isFPMathOp() ||
getOpcode() == Instruction::Select) &&
@@ -707,9 +706,9 @@ void VPInstruction::execute(VPTransformState &State) {
if (GeneratesPerAllLanes) {
for (unsigned Lane = 0, NumLanes = State.VF.getKnownMinValue();
Lane != NumLanes; ++Lane) {
- Value *GeneratedValue = generatePerLane(State, VPIteration(0, Lane));
+ Value *GeneratedValue = generatePerLane(State, VPLane(Lane));
assert(GeneratedValue && "generatePerLane must produce a value");
- State.set(this, GeneratedValue, VPIteration(0, Lane));
+ State.set(this, GeneratedValue, VPLane(Lane));
}
return;
}
@@ -857,7 +856,7 @@ void VPIRInstruction::execute(VPTransformState &State) {
// Set insertion point in PredBB in case an extract needs to be generated.
// TODO: Model extracts explicitly.
State.Builder.SetInsertPoint(PredBB, PredBB->getFirstNonPHIIt());
- Value *V = State.get(ExitValue, VPIteration(0, Lane));
+ Value *V = State.get(ExitValue, VPLane(Lane));
auto *Phi = cast<PHINode>(&I);
Phi->addIncoming(V, PredBB);
}
@@ -905,12 +904,12 @@ void VPWidenCallRecipe::execute(VPTransformState &State) {
Value *Arg;
if (UseIntrinsic &&
isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index()))
- Arg = State.get(I.value(), VPIteration(0, 0));
+ Arg = State.get(I.value(), VPLane(0));
// Some vectorized function variants may also take a scalar argument,
// e.g. linear parameters for pointers. This needs to be the scalar value
// from the start of the respective part when interleaving.
else if (VFTy && !VFTy->getParamType(I.index())->isVectorTy())
- Arg = State.get(I.value(), VPIteration(0, 0));
+ Arg = State.get(I.value(), VPLane(0));
else
Arg = State.get(I.value());
if (UseIntrinsic &&
@@ -1045,7 +1044,7 @@ void VPWidenSelectRecipe::execute(VPTransformState &State) {
// We have to take the 'vectorized' value and pick the first lane.
// Instcombine will make this a no-op.
auto *InvarCond =
- isInvariantCond() ? State.get(getCond(), VPIteration(0, 0)) : nullptr;
+ isInvariantCond() ? State.get(getCond(), VPLane(0)) : nullptr;
Value *Cond = InvarCond ? InvarCond : State.get(getCond());
Value *Op0 = State.get(getOperand(1));
@@ -1410,7 +1409,7 @@ static Constant *getSignedIntOrFpConstant(Type *Ty, int64_t C) {
}
void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) {
- assert(!State.Instance && "Int or FP induction being replicated.");
+ assert(!State.Lane && "Int or FP induction being replicated.");
Value *Start = getStartValue()->getLiveInIRValue();
const InductionDescriptor &ID = getInductionDescriptor();
@@ -1429,7 +1428,7 @@ void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) {
Builder.setFastMathFlags(ID.getInductionBinOp()->getFastMathFlags());
// Now do the actual transformations, and start with fetching the step value.
- Value *Step = State.get(getStepValue(), VPIteration(0, 0));
+ Value *Step = State.get(getStepValue(), VPLane(0));
assert((isa<PHINode>(EntryVal) || isa<TruncInst>(EntryVal)) &&
"Expected either an induction phi-node or a truncate of it!");
@@ -1472,7 +1471,7 @@ void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) {
// Multiply the vectorization factor by the step using integer or
// floating-point arithmetic as appropriate.
Type *StepType = Step->getType();
- Value *RuntimeVF = State.get(getVFValue(), {0, 0});
+ Value *RuntimeVF = State.get(getVFValue(), VPLane(0));
if (Step->getType()->isFloatingPointTy())
RuntimeVF = Builder.CreateUIToFP(RuntimeVF, StepType);
else
@@ -1569,8 +1568,8 @@ void VPScalarIVStepsRecipe::execute(VPTransformState &State) {
/// Compute scalar induction steps. \p ScalarIV is the scalar induction
/// variable on which to base the steps, \p Step is the size of the step.
- Value *BaseIV = State.get(getOperand(0), VPIteration(0, 0));
- Value *Step = State.get(getStepValue(), VPIteration(0, 0));
+ Value *BaseIV = State.get(getOperand(0), VPLane(0));
+ Value *Step = State.get(getStepValue(), VPLane(0));
IRBuilderBase &Builder = State.Builder;
// Ensure step has the same type as that of scalar IV.
@@ -1607,8 +1606,8 @@ void VPScalarIVStepsRecipe::execute(VPTransformState &State) {
unsigned StartLane = 0;
unsigned EndLane = FirstLaneOnly ? 1 : State.VF.getKnownMinValue();
- if (State.Instance) {
- StartLane = State.Instance->Lane.getKnownLane();
+ if (State.Lane) {
+ StartLane = State.Lane->getKnownLane();
EndLane = StartLane + 1;
}
Value *StartIdx0 =
@@ -1640,7 +1639,7 @@ void VPScalarIVStepsRecipe::execute(VPTransformState &State) {
"scalable");
auto *Mul = Builder.CreateBinOp(MulOp, StartIdx, Step);
auto *Add = Builder.CreateBinOp(AddOp, BaseIV, Mul);
- State.set(this, Add, VPIteration(0, Lane));
+ State.set(this, Add, VPLane(Lane));
}
}
@@ -1678,7 +1677,7 @@ void VPWidenGEPRecipe::execute(VPTransformState &State) {
// the lane-zero scalar value.
SmallVector<Value *> Ops;
for (unsigned I = 0, E = getNumOperands(); I != E; I++)
- Ops.push_back(State.get(getOperand(I), VPIteration(0, 0)));
+ Ops.push_back(State.get(getOperand(I), VPLane(0)));
auto *NewGEP =
State.Builder.CreateGEP(GEP->getSourceElementType(), Ops[0],
@@ -1691,9 +1690,8 @@ void VPWidenGEPRecipe::execute(VPTransformState &State) {
// produce a vector of pointers unless VF is scalar.
// The pointer operand of the new GEP. If it's loop-invariant, we
// won't broadcast it.
- auto *Ptr = isPointerLoopInvariant()
- ? State.get(getOperand(0), VPIteration(0, 0))
- : State.get(getOperand(0));
+ auto *Ptr = isPointerLoopInvariant() ? State.get(getOperand(0), VPLane(0))
+ : State.get(getOperand(0));
// Collect all the indices for the new GEP. If any index is
// loop-invariant, we won't broadcast it.
@@ -1701,7 +1699,7 @@ void VPWidenGEPRecipe::execute(VPTransformState &State) {
for (unsigned I = 1, E = getNumOperands(); I < E; I++) {
VPValue *Operand = getOperand(I);
if (isIndexLoopInvariant(I - 1))
- Indices.push_back(State.get(Operand, VPIteration(0, 0)));
+ Indices.push_back(State.get(Operand, VPLane(0)));
else
Indices.push_back(State.get(Operand));
}
@@ -1743,7 +1741,7 @@ void VPVectorPointerRecipe ::execute(VPTransformState &State) {
Type *IndexTy = State.VF.isScalable() && (IsReverse || CurrentPart > 0)
? DL.getIndexType(IndexedTy->getPointerTo())
: Builder.getInt32Ty();
- Value *Ptr = State.get(getOperand(0), VPIteration(0, 0));
+ Value *Ptr = State.get(getOperand(0), VPLane(0));
bool InBounds = isInBounds();
Value *ResultPtr = nullptr;
@@ -1844,7 +1842,7 @@ void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent,
#endif
void VPReductionRecipe::execute(VPTransformState &State) {
- assert(!State.Instance && "Reduction being replicated.");
+ assert(!State.Lane && "Reduction being replicated.");
Value *PrevInChain = State.get(getChainOp(), /*IsScalar*/ true);
RecurKind Kind = RdxDesc.getRecurrenceKind();
// Propagate the fast-math flags carried by the underlying instruction.
@@ -1894,7 +1892,7 @@ void VPReductionRecipe::execute(VPTransformState &State) {
}
void VPReductionEVLRecipe::execute(VPTransformState &State) {
- assert(!State.Instance && "Reduction being replicated.");
+ assert(!State.Lane && "Reduction being replicated.");
auto &Builder = State.Builder;
// Propagate the fast-math flags carried by the underlying instruction.
@@ -1905,7 +1903,7 @@ void VPReductionEVLRecipe::execute(VPTransformState &State) {
RecurKind Kind = RdxDesc.getRecurrenceKind();
Value *Prev = State.get(getChainOp(), /*IsScalar*/ true);
Value *VecOp = State.get(getVecOp());
- Value *EVL = State.get(getEVL(), VPIteration(0, 0));
+ Value *EVL = State.get(getEVL(), VPLane(0));
VectorBuilder VBuilder(Builder);
VBuilder.setEVL(EVL);
@@ -2027,7 +2025,7 @@ Value *VPScalarCastRecipe ::generate(VPTransformState &State) {
case Instruction::ZExt:
case Instruction::Trunc: {
// Note: SExt/ZExt not used yet.
- Value *Op = State.get(getOperand(0), VPIteration(0, 0));
+ Value *Op = State.get(getOperand(0), VPLane(0));
return State.Builder.CreateCast(Instruction::CastOps(Opcode), Op, ResultTy);
}
default:
@@ -2036,7 +2034,7 @@ Value *VPScalarCastRecipe ::generate(VPTransformState &State) {
}
void VPScalarCastRecipe ::execute(VPTransformState &State) {
- State.set(this, generate(State), VPIteration(0, 0));
+ State.set(this, generate(State), VPLane(0));
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -2051,9 +2049,9 @@ void VPScalarCastRecipe ::print(raw_ostream &O, const Twine &Indent,
#endif
void VPBranchOnMaskRecipe::execute(VPTransformState &State) {
- assert(State.Instance && "Branch on Mask works only on single instance.");
+ assert(State.Lane && "Branch on Mask works only on single instance.");
- unsigned Lane = State.Instance->Lane.getKnownLane();
+ unsigned Lane = State.Lane->getKnownLane();
Value *ConditionBit = nullptr;
VPValue *BlockInMask = getMask();
@@ -2076,9 +2074,9 @@ void VPBranchOnMaskRecipe::execute(VPTransformState &State) {
}
void VPPredInstPHIRecipe::execute(VPTransformState &State) {
- assert(State.Instance && "Predicated instruction PHI works per instance.");
+ assert(State.Lane && "Predicated instruction PHI works per instance.");
Instruction *ScalarPredInst =
- cast<Instruction>(State.get(getOperand(0), *State.Instance));
+ cast<Instruction>(State.get(getOperand(0), *State.Lane));
BasicBlock *PredicatedBB = ScalarPredInst->getParent();
BasicBlock *PredicatingBB = PredicatedBB->getSinglePredecessor();
assert(PredicatingBB && "Predicated block has no single predecessor.");
@@ -2110,13 +2108,13 @@ void VPPredInstPHIRecipe::execute(VPTransformState &State) {
Phi->addIncoming(PoisonValue::get(ScalarPredInst->getType()),
PredicatingBB);
Phi->addIncoming(ScalarPredInst, PredicatedBB);
- if (State.hasScalarValue(this, *State.Instance))
- State.reset(this, Phi, *State.Instance);
+ if (State.hasScalarValue(this, *State.Lane))
+ State.reset(this, Phi, *State.Lane);
else
- State.set(this, Phi, *State.Instance);
+ State.set(this, Phi, *State.Lane);
// NOTE: Currently we need to update the value of the operand, so the next
// predicated iteration inserts its generated value in the correct vector.
- State.reset(getOperand(0), Phi, *State.Instance);
+ State.reset(getOperand(0), Phi, *State.Lane);
}
}
@@ -2239,7 +2237,7 @@ void VPWidenLoadEVLRecipe::execute(VPTransformState &State) {
auto &Builder = State.Builder;
State.setDebugLocFrom(getDebugLoc());
CallInst *NewLI;
- Value *EVL = State.get(getEVL(), VPIteration(0, 0));
+ Value *EVL = State.get(getEVL(), VPLane(0));
Value *Addr = State.get(getAddr(), !CreateGather);
Value *Mask = nullptr;
if (VPValue *VPMask = getMask()) {
@@ -2337,7 +2335,7 @@ void VPWidenStoreEVLRecipe::execute(VPTransformState &State) {
CallInst *NewSI = nullptr;
Value *StoredVal = State.get(StoredValue);
- Value *EVL = State.get(getEVL(), VPIteration(0, 0));
+ Value *EVL = State.get(getEVL(), VPLane(0));
if (isReverse())
StoredVal = createReverseEVL(Builder, StoredVal, EVL, "vp.reverse");
Value *Mask = nullptr;
@@ -2463,7 +2461,7 @@ static Value *interleaveVectors(IRBuilderBase &Builder, ArrayRef<Value *> Vals,
// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11> ; Interleave R,G,B elements
// store <12 x i32> %interleaved.vec ; Write 4 tuples of R,G,B
void VPInterleaveRecipe::execute(VPTransformState &State) {
- assert(!State.Instance && "Interleave group being replicated.");
+ assert(!State.Lane && "Interleave group being replicated.");
const InterleaveGroup<Instruction> *Group = IG;
Instruction *Instr = Group->getInsertPos();
@@ -2497,7 +2495,7 @@ void VPInterleaveRecipe::execute(VPTransformState &State) {
Idx = State.Builder.getInt32(-Index);
VPValue *Addr = getAddr();
- Value *ResAddr = State.get(Addr, VPIteration(0, 0));
+ Value *ResAddr = State.get(Addr, VPLane(0));
if (auto *I = dyn_cast<Instruction>(ResAddr))
State.setDebugLocFrom(I->getDebugLoc());
@@ -2797,7 +2795,7 @@ void VPWidenPointerInductionRecipe::execute(VPTransformState &State) {
// A pointer induction, performed by using a gep
BasicBlock::iterator InductionLoc = State.Builder.GetInsertPoint();
- Value *ScalarStepValue = State.get(getOperand(1), VPIteration(0, 0));
+ Value *ScalarStepValue = State.get(getOperand(1), VPLane(0));
Type *PhiType = IndDesc.getStep()->getType();
Value *RuntimeVF = getRuntimeVF(State.Builder, PhiType, State.VF);
// Add induction update using an incorrect block temporarily. The phi node
@@ -2831,7 +2829,7 @@ void VPWidenPointerInductionRecipe::execute(VPTransformState &State) {
StartOffset = State.Builder.CreateAdd(
StartOffset, State.Builder.CreateStepVector(VecPhiType));
- assert(ScalarStepValue == State.get(getOperand(1), VPIteration(0, 0)) &&
+ assert(ScalarStepValue == State.get(getOperand(1), VPLane(0)) &&
"scalar step must be the same across all parts");
Value *GEP = State.Builder.CreateGEP(
State.Builder.getInt8Ty(), NewPointerPhi,
@@ -2861,7 +2859,7 @@ void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent,
#endif
void VPExpandSCEVRecipe::execute(VPTransformState &State) {
- assert(!State.Instance && "cannot be used in per-lane");
+ assert(!State.Lane && "cannot be used in per-lane");
const DataLayout &DL = State.CFG.PrevBB->getDataLayout();
SCEVExpander Exp(SE, DL, "induction");
@@ -2870,7 +2868,7 @@ void VPExpandSCEVRecipe::execute(VPTransformState &State) {
assert(!State.ExpandedSCEVs.contains(Expr) &&
"Same SCEV expanded multiple times");
State.ExpandedSCEVs[Expr] = Res;
- State.set(this, Res, {0, 0});
+ State.set(this, Res, VPLane(0));
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -3079,7 +3077,7 @@ void VPActiveLaneMaskPHIRecipe::print(raw_ostream &O, const Twine &Indent,
void VPEVLBasedIVPHIRecipe::execute(VPTransformState &State) {
BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
- Value *Start = State.get(getOperand(0), VPIteration(0, 0));
+ Value *Start = State.get(getOperand(0), VPLane(0));
PHINode *Phi = State.Builder.CreatePHI(Start->getType(), 2, "evl.based.iv");
Phi->addIncoming(Start, VectorPH);
Phi->setDebugLoc(getDebugLoc());
More information about the llvm-commits
mailing list