[llvm] edc92a1 - [LV] Remove VPCallback.
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 19 04:53:32 PST 2021
Author: Florian Hahn
Date: 2021-02-19T12:50:41Z
New Revision: edc92a1c42590a1fb5e852cea6ffbc253e5e0a7f
URL: https://github.com/llvm/llvm-project/commit/edc92a1c42590a1fb5e852cea6ffbc253e5e0a7f
DIFF: https://github.com/llvm/llvm-project/commit/edc92a1c42590a1fb5e852cea6ffbc253e5e0a7f.diff
LOG: [LV] Remove VPCallback.
Now that all state for generated instructions is managed directly in
VPTransformState, VPCallBack is no longer needed. This patch updates the
last use of `getOrCreateScalarValue` to instead manage the value
directly in VPTransformState and removes VPCallback.
Reviewed By: gilr
Differential Revision: https://reviews.llvm.org/D95383
Added:
Modified:
llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/lib/Transforms/Vectorize/VPlan.cpp
llvm/lib/Transforms/Vectorize/VPlan.h
llvm/test/Transforms/LoopVectorize/outer_loop_test2.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index 19797e6f7858..1f8d5c8aa195 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -222,18 +222,6 @@ class LoopVectorizationPlanner {
SmallVector<VPlanPtr, 4> VPlans;
- /// This class is used to enable the VPlan to invoke a method of ILV. This is
- /// needed until the method is refactored out of ILV and becomes reusable.
- struct VPCallbackILV : public VPCallback {
- InnerLoopVectorizer &ILV;
-
- VPCallbackILV(InnerLoopVectorizer &ILV) : ILV(ILV) {}
-
- Value *getOrCreateVectorValues(Value *V, unsigned Part) override;
- Value *getOrCreateScalarValue(Value *V,
- const VPIteration &Instance) override;
- };
-
/// A builder used to construct the current plan.
VPBuilder Builder;
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 109686c7742c..f89a04172b64 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -461,9 +461,8 @@ class InnerLoopVectorizer {
ProfileSummaryInfo *PSI)
: OrigLoop(OrigLoop), PSE(PSE), LI(LI), DT(DT), TLI(TLI), TTI(TTI),
AC(AC), ORE(ORE), VF(VecWidth), UF(UnrollFactor),
- Builder(PSE.getSE()->getContext()),
- VectorLoopValueMap(UnrollFactor, VecWidth), Legal(LVL), Cost(CM),
- BFI(BFI), PSI(PSI) {
+ Builder(PSE.getSE()->getContext()), Legal(LVL), Cost(CM), BFI(BFI),
+ PSI(PSI) {
// Query this against the original loop and save it here because the profile
// of the original loop header may change as the transformation happens.
OptForSizeBasedOnProfile = llvm::shouldOptimizeForSize(
@@ -533,50 +532,7 @@ class InnerLoopVectorizer {
VPValue *Def, VPValue *CastDef,
VPTransformState &State);
- /// getOrCreateVectorValue and getOrCreateScalarValue coordinate to generate a
- /// vector or scalar value on-demand if one is not yet available. When
- /// vectorizing a loop, we visit the definition of an instruction before its
- /// uses. When visiting the definition, we either vectorize or scalarize the
- /// instruction, creating an entry for it in the corresponding map. (In some
- /// cases, such as induction variables, we will create both vector and scalar
- /// entries.) Then, as we encounter uses of the definition, we derive values
- /// for each scalar or vector use unless such a value is already available.
- /// For example, if we scalarize a definition and one of its uses is vector,
- /// we build the required vector on-demand with an insertelement sequence
- /// when visiting the use. Otherwise, if the use is scalar, we can use the
- /// existing scalar definition.
- ///
- /// Return a value in the new loop corresponding to \p V from the original
- /// loop at unroll index \p Part. If the value has already been vectorized,
- /// the corresponding vector entry in VectorLoopValueMap is returned. If,
- /// however, the value has a scalar entry in VectorLoopValueMap, we construct
- /// a new vector value on-demand by inserting the scalar values into a vector
- /// with an insertelement sequence. If the value has been neither vectorized
- /// nor scalarized, it must be loop invariant, so we simply broadcast the
- /// value into a vector.
- Value *getOrCreateVectorValue(Value *V, unsigned Part);
-
- void setVectorValue(Value *Scalar, unsigned Part, Value *Vector) {
- VectorLoopValueMap.setVectorValue(Scalar, Part, Vector);
- }
-
- void resetVectorValue(Value *Scalar, unsigned Part, Value *Vector) {
- VectorLoopValueMap.resetVectorValue(Scalar, Part, Vector);
- }
-
- void setScalarValue(Value *Scalar, const VPIteration &Instance, Value *V) {
- VectorLoopValueMap.setScalarValue(Scalar, Instance, V);
- }
-
- /// Return a value in the new loop corresponding to \p V from the original
- /// loop at unroll and vector indices \p Instance. If the value has been
- /// vectorized but not scalarized, the necessary extractelement instruction
- /// will be generated.
- Value *getOrCreateScalarValue(Value *V, const VPIteration &Instance);
-
/// Construct the vector value of a scalarized value \p V one lane at a time.
- void packScalarIntoVectorValue(Value *V, const VPIteration &Instance);
-
void packScalarIntoVectorValue(VPValue *Def, const VPIteration &Instance,
VPTransformState &State);
@@ -645,7 +601,8 @@ class InnerLoopVectorizer {
void fixReduction(PHINode *Phi, VPTransformState &State);
/// Clear NSW/NUW flags from reduction instructions if necessary.
- void clearReductionWrapFlags(RecurrenceDescriptor &RdxDesc);
+ void clearReductionWrapFlags(RecurrenceDescriptor &RdxDesc,
+ VPTransformState &State);
/// Fixup the LCSSA phi nodes in the unique exit block. This simply
/// means we need to add the appropriate incoming value from the middle
@@ -660,7 +617,7 @@ class InnerLoopVectorizer {
/// Shrinks vector element sizes to the smallest bitwidth they can be legally
/// represented as.
- void truncateToMinimalBitwidths();
+ void truncateToMinimalBitwidths(VPTransformState &State);
/// This function adds (StartIdx, StartIdx + Step, StartIdx + 2*Step, ...)
/// to each vector element of Val. The sequence starts at StartIndex.
@@ -876,12 +833,6 @@ class InnerLoopVectorizer {
/// The induction variable of the old basic block.
PHINode *OldInduction = nullptr;
- /// Maps values from the original loop to their corresponding values in the
- /// vectorized loop. A key value can map to either vector values, scalar
- /// values or both kinds of values, depending on whether the key was
- /// vectorized and scalarized.
- VectorizerValueMap VectorLoopValueMap;
-
/// Store instructions that were predicated.
SmallVector<Instruction *, 4> PredicatedInstructions;
@@ -2104,7 +2055,7 @@ void InnerLoopVectorizer::createVectorIntOrFpInductionPHI(
VecInd->setDebugLoc(EntryVal->getDebugLoc());
Instruction *LastInduction = VecInd;
for (unsigned Part = 0; Part < UF; ++Part) {
- State.set(Def, EntryVal, LastInduction, Part);
+ State.set(Def, LastInduction, Part);
if (isa<TruncInst>(EntryVal))
addMetadata(LastInduction, EntryVal);
@@ -2236,7 +2187,7 @@ void InnerLoopVectorizer::widenIntOrFpInduction(PHINode *IV, Value *Start,
Value *EntryPart =
getStepVector(Broadcasted, VF.getKnownMinValue() * Part, Step,
ID.getInductionOpcode());
- State.set(Def, EntryVal, EntryPart, Part);
+ State.set(Def, EntryPart, Part);
if (Trunc)
addMetadata(EntryPart, Trunc);
recordVectorLoopValueForInductionCast(ID, EntryVal, EntryPart, CastDef,
@@ -2375,7 +2326,7 @@ void InnerLoopVectorizer::buildScalarSteps(Value *ScalarIV, Value *Step,
: VF.getKnownMinValue();
assert((!VF.isScalable() || Lanes == 1) &&
"Should never scalarize a scalable vector");
- // Compute the scalar steps and save the results in VectorLoopValueMap.
+ // Compute the scalar steps and save the results in State.
for (unsigned Part = 0; Part < UF; ++Part) {
for (unsigned Lane = 0; Lane < Lanes; ++Lane) {
auto *IntStepTy = IntegerType::get(ScalarIVTy->getContext(),
@@ -2400,132 +2351,6 @@ void InnerLoopVectorizer::buildScalarSteps(Value *ScalarIV, Value *Step,
}
}
-Value *InnerLoopVectorizer::getOrCreateVectorValue(Value *V, unsigned Part) {
- assert(V != Induction && "The new induction variable should not be used.");
- assert(!V->getType()->isVectorTy() && "Can't widen a vector");
- assert(!V->getType()->isVoidTy() && "Type does not produce a value");
-
- // If we have a stride that is replaced by one, do it here. Defer this for
- // the VPlan-native path until we start running Legal checks in that path.
- if (!EnableVPlanNativePath && Legal->hasStride(V))
- V = ConstantInt::get(V->getType(), 1);
-
- // If we have a vector mapped to this value, return it.
- if (VectorLoopValueMap.hasVectorValue(V, Part))
- return VectorLoopValueMap.getVectorValue(V, Part);
-
- // If the value has not been vectorized, check if it has been scalarized
- // instead. If it has been scalarized, and we actually need the value in
- // vector form, we will construct the vector values on demand.
- if (VectorLoopValueMap.hasAnyScalarValue(V)) {
- Value *ScalarValue =
- VectorLoopValueMap.getScalarValue(V, VPIteration(Part, 0));
-
- // If we've scalarized a value, that value should be an instruction.
- auto *I = cast<Instruction>(V);
-
- // If we aren't vectorizing, we can just copy the scalar map values over to
- // the vector map.
- if (VF.isScalar()) {
- VectorLoopValueMap.setVectorValue(V, Part, ScalarValue);
- return ScalarValue;
- }
-
- // Get the last scalar instruction we generated for V and Part. If the value
- // is known to be uniform after vectorization, this corresponds to lane zero
- // of the Part unroll iteration. Otherwise, the last instruction is the one
- // we created for the last vector lane of the Part unroll iteration.
- unsigned LastLane = Cost->isUniformAfterVectorization(I, VF)
- ? 0
- : VF.getKnownMinValue() - 1;
- assert((!VF.isScalable() || LastLane == 0) &&
- "Scalable vectorization can't lead to any scalarized values.");
- auto *LastInst = cast<Instruction>(
- VectorLoopValueMap.getScalarValue(V, VPIteration(Part, LastLane)));
-
- // Set the insert point after the last scalarized instruction. This ensures
- // the insertelement sequence will directly follow the scalar definitions.
- auto OldIP = Builder.saveIP();
- auto NewIP = std::next(BasicBlock::iterator(LastInst));
- Builder.SetInsertPoint(&*NewIP);
-
- // However, if we are vectorizing, we need to construct the vector values.
- // If the value is known to be uniform after vectorization, we can just
- // broadcast the scalar value corresponding to lane zero for each unroll
- // iteration. Otherwise, we construct the vector values using insertelement
- // instructions. Since the resulting vectors are stored in
- // VectorLoopValueMap, we will only generate the insertelements once.
- Value *VectorValue = nullptr;
- if (Cost->isUniformAfterVectorization(I, VF)) {
- VectorValue = getBroadcastInstrs(ScalarValue);
- VectorLoopValueMap.setVectorValue(V, Part, VectorValue);
- } else {
- // Initialize packing with insertelements to start from poison.
- assert(!VF.isScalable() && "VF is assumed to be non scalable.");
- Value *Poison = PoisonValue::get(VectorType::get(V->getType(), VF));
- VectorLoopValueMap.setVectorValue(V, Part, Poison);
- for (unsigned Lane = 0; Lane < VF.getKnownMinValue(); ++Lane)
- packScalarIntoVectorValue(V, VPIteration(Part, Lane));
- VectorValue = VectorLoopValueMap.getVectorValue(V, Part);
- }
- Builder.restoreIP(OldIP);
- return VectorValue;
- }
-
- // If this scalar is unknown, assume that it is a constant or that it is
- // loop invariant. Broadcast V and save the value for future uses.
- Value *B = getBroadcastInstrs(V);
- VectorLoopValueMap.setVectorValue(V, Part, B);
- return B;
-}
-
-Value *
-InnerLoopVectorizer::getOrCreateScalarValue(Value *V,
- const VPIteration &Instance) {
- // If the value is not an instruction contained in the loop, it should
- // already be scalar.
- if (OrigLoop->isLoopInvariant(V))
- return V;
-
- assert(Instance.Lane > 0
- ? !Cost->isUniformAfterVectorization(cast<Instruction>(V), VF)
- : true && "Uniform values only have lane zero");
-
- // If the value from the original loop has not been vectorized, it is
- // represented by UF x VF scalar values in the new loop. Return the requested
- // scalar value.
- if (VectorLoopValueMap.hasScalarValue(V, Instance))
- return VectorLoopValueMap.getScalarValue(V, Instance);
-
- // If the value has not been scalarized, get its entry in VectorLoopValueMap
- // for the given unroll part. If this entry is not a vector type (i.e., the
- // vectorization factor is one), there is no need to generate an
- // extractelement instruction.
- auto *U = getOrCreateVectorValue(V, Instance.Part);
- if (!U->getType()->isVectorTy()) {
- assert(VF.isScalar() && "Value not scalarized has non-vector type");
- return U;
- }
-
- // Otherwise, the value from the original loop has been vectorized and is
- // represented by UF vector values. Extract and return the requested scalar
- // value from the appropriate vector lane.
- return Builder.CreateExtractElement(U, Builder.getInt32(Instance.Lane));
-}
-
-void InnerLoopVectorizer::packScalarIntoVectorValue(
- Value *V, const VPIteration &Instance) {
- assert(V != Induction && "The new induction variable should not be used.");
- assert(!V->getType()->isVectorTy() && "Can't pack a vector");
- assert(!V->getType()->isVoidTy() && "Type does not produce a value");
-
- Value *ScalarInst = VectorLoopValueMap.getScalarValue(V, Instance);
- Value *VectorValue = VectorLoopValueMap.getVectorValue(V, Instance.Part);
- VectorValue = Builder.CreateInsertElement(VectorValue, ScalarInst,
- Builder.getInt32(Instance.Lane));
- VectorLoopValueMap.resetVectorValue(V, Instance.Part, VectorValue);
-}
-
void InnerLoopVectorizer::packScalarIntoVectorValue(VPValue *Def,
const VPIteration &Instance,
VPTransformState &State) {
@@ -2715,7 +2540,7 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(
if (Group->isReverse())
StridedVec = reverseVector(StridedVec);
- State.set(VPDefs[J], Member, StridedVec, Part);
+ State.set(VPDefs[J], StridedVec, Part);
}
++J;
}
@@ -2909,7 +2734,7 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(
NewLI = reverseVector(NewLI);
}
- State.set(Def, Instr, NewLI, Part);
+ State.set(Def, NewLI, Part);
}
}
@@ -2953,7 +2778,7 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, VPValue *Def,
// Place the cloned scalar in the new loop.
Builder.Insert(Cloned);
- State.set(Def, Instr, Cloned, Instance);
+ State.set(Def, Cloned, Instance);
// If we just cloned a new assumption, add it the assumption cache.
if (auto *II = dyn_cast<IntrinsicInst>(Cloned))
@@ -3832,19 +3657,20 @@ static Type *largestIntegerVectorType(Type *T1, Type *T2) {
return I1->getBitWidth() > I2->getBitWidth() ? T1 : T2;
}
-void InnerLoopVectorizer::truncateToMinimalBitwidths() {
+void InnerLoopVectorizer::truncateToMinimalBitwidths(VPTransformState &State) {
// For every instruction `I` in MinBWs, truncate the operands, create a
// truncated version of `I` and reextend its result. InstCombine runs
// later and will remove any ext/trunc pairs.
SmallPtrSet<Value *, 4> Erased;
for (const auto &KV : Cost->getMinimalBitwidths()) {
// If the value wasn't vectorized, we must maintain the original scalar
- // type. The absence of the value from VectorLoopValueMap indicates that it
+ // type. The absence of the value from State indicates that it
// wasn't vectorized.
- if (!VectorLoopValueMap.hasAnyVectorValue(KV.first))
+ VPValue *Def = State.Plan->getVPValue(KV.first);
+ if (!State.hasAnyVectorValue(Def))
continue;
for (unsigned Part = 0; Part < UF; ++Part) {
- Value *I = getOrCreateVectorValue(KV.first, Part);
+ Value *I = State.get(Def, Part);
if (Erased.count(I) || I->use_empty() || !isa<Instruction>(I))
continue;
Type *OriginalTy = I->getType();
@@ -3943,24 +3769,25 @@ void InnerLoopVectorizer::truncateToMinimalBitwidths() {
I->replaceAllUsesWith(Res);
cast<Instruction>(I)->eraseFromParent();
Erased.insert(I);
- VectorLoopValueMap.resetVectorValue(KV.first, Part, Res);
+ State.reset(Def, Res, Part);
}
}
// We'll have created a bunch of ZExts that are now parentless. Clean up.
for (const auto &KV : Cost->getMinimalBitwidths()) {
// If the value wasn't vectorized, we must maintain the original scalar
- // type. The absence of the value from VectorLoopValueMap indicates that it
+ // type. The absence of the value from State indicates that it
// wasn't vectorized.
- if (!VectorLoopValueMap.hasAnyVectorValue(KV.first))
+ VPValue *Def = State.Plan->getVPValue(KV.first);
+ if (!State.hasAnyVectorValue(Def))
continue;
for (unsigned Part = 0; Part < UF; ++Part) {
- Value *I = getOrCreateVectorValue(KV.first, Part);
+ Value *I = State.get(Def, Part);
ZExtInst *Inst = dyn_cast<ZExtInst>(I);
if (Inst && Inst->use_empty()) {
Value *NewI = Inst->getOperand(0);
Inst->eraseFromParent();
- VectorLoopValueMap.resetVectorValue(KV.first, Part, NewI);
+ State.reset(Def, NewI, Part);
}
}
}
@@ -3970,7 +3797,7 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
// Insert truncates and extends for any truncated instructions as hints to
// InstCombine.
if (VF.isVector())
- truncateToMinimalBitwidths();
+ truncateToMinimalBitwidths(State);
// Fix widened non-induction PHIs by setting up the PHI operands.
if (OrigPHIsToFix.size()) {
@@ -4163,7 +3990,7 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi,
: Incoming;
PhiPart->replaceAllUsesWith(Shuffle);
cast<Instruction>(PhiPart)->eraseFromParent();
- State.reset(PhiDef, Phi, Shuffle, Part);
+ State.reset(PhiDef, Shuffle, Part);
Incoming = PreviousPart;
}
@@ -4239,7 +4066,7 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi, VPTransformState &State) {
Type *VecTy = State.get(LoopExitInstDef, 0)->getType();
// Wrap flags are in general invalid after vectorization, clear them.
- clearReductionWrapFlags(RdxDesc);
+ clearReductionWrapFlags(RdxDesc, State);
// Fix the vector-loop phi.
@@ -4279,7 +4106,7 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi, VPTransformState &State) {
assert(isa<PHINode>(U) && "Reduction exit must feed Phi's or select");
}
assert(Sel && "Reduction exit feeds no select");
- State.reset(LoopExitInstDef, LoopExitInst, Sel, Part);
+ State.reset(LoopExitInstDef, Sel, Part);
// If the target can create a predicated operator for the reduction at no
// extra cost in the loop (for example a predicated vadd), it can be
@@ -4326,7 +4153,7 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi, VPTransformState &State) {
Builder.SetInsertPoint(&*LoopMiddleBlock->getFirstInsertionPt());
for (unsigned Part = 0; Part < UF; ++Part) {
RdxParts[Part] = Builder.CreateTrunc(RdxParts[Part], RdxVecTy);
- State.reset(LoopExitInstDef, LoopExitInst, RdxParts[Part], Part);
+ State.reset(LoopExitInstDef, RdxParts[Part], Part);
}
}
@@ -4401,8 +4228,8 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi, VPTransformState &State) {
Phi->setIncomingValue(IncomingEdgeBlockIdx, LoopExitInst);
}
-void InnerLoopVectorizer::clearReductionWrapFlags(
- RecurrenceDescriptor &RdxDesc) {
+void InnerLoopVectorizer::clearReductionWrapFlags(RecurrenceDescriptor &RdxDesc,
+ VPTransformState &State) {
RecurKind RK = RdxDesc.getRecurrenceKind();
if (RK != RecurKind::Add && RK != RecurKind::Mul)
return;
@@ -4418,7 +4245,7 @@ void InnerLoopVectorizer::clearReductionWrapFlags(
Instruction *Cur = Worklist.pop_back_val();
if (isa<OverflowingBinaryOperator>(Cur))
for (unsigned Part = 0; Part < UF; ++Part) {
- Value *V = getOrCreateVectorValue(Cur, Part);
+ Value *V = State.get(State.Plan->getVPValue(Cur), Part);
cast<Instruction>(V)->dropPoisonGeneratingFlags();
}
@@ -4540,7 +4367,7 @@ void InnerLoopVectorizer::fixNonInductionPHIs(VPTransformState &State) {
// The insertion point in Builder may be invalidated by the time we get
// here. Force the Builder insertion point to something valid so that we do
// not run into issues during insertion point restore in
- // getOrCreateVectorValue calls below.
+ // State::get() calls below.
Builder.SetInsertPoint(NewPhi);
// The predecessor order is preserved and we can rely on mapping between
@@ -4554,7 +4381,7 @@ void InnerLoopVectorizer::fixNonInductionPHIs(VPTransformState &State) {
OrigPhi->getIncomingValueForBlock(ScalarBBPredecessors[i]);
// Scalar incoming value may need a broadcast
- Value *NewIncV = getOrCreateVectorValue(ScIncV, 0);
+ Value *NewIncV = State.get(State.Plan->getOrAddVPValue(ScIncV), 0);
NewPhi->addIncoming(NewIncV, NewPredBB);
}
}
@@ -4587,7 +4414,7 @@ void InnerLoopVectorizer::widenGEP(GetElementPtrInst *GEP, VPValue *VPDef,
auto *Clone = Builder.Insert(GEP->clone());
for (unsigned Part = 0; Part < UF; ++Part) {
Value *EntryPart = Builder.CreateVectorSplat(VF, Clone);
- State.set(VPDef, GEP, EntryPart, Part);
+ State.set(VPDef, EntryPart, Part);
addMetadata(EntryPart, GEP);
}
} else {
@@ -4625,7 +4452,7 @@ void InnerLoopVectorizer::widenGEP(GetElementPtrInst *GEP, VPValue *VPDef,
: Builder.CreateGEP(GEP->getSourceElementType(), Ptr, Indices);
assert((VF.isScalar() || NewGEP->getType()->isVectorTy()) &&
"NewGEP is not a pointer vector");
- State.set(VPDef, GEP, NewGEP, Part);
+ State.set(VPDef, NewGEP, Part);
addMetadata(NewGEP, GEP);
}
}
@@ -4645,7 +4472,7 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN,
? PN->getType()
: VectorType::get(PN->getType(), State.VF);
Value *VecPhi = Builder.CreatePHI(VecTy, PN->getNumOperands(), "vec.phi");
- State.set(Def, P, VecPhi, 0);
+ State.set(Def, VecPhi, 0);
OrigPHIsToFix.push_back(P);
return;
@@ -4699,7 +4526,7 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN,
// This is phase one of vectorizing PHIs.
Value *EntryPart = PHINode::Create(
VecTy, 2, "vec.phi", &*LoopVectorBody->getFirstInsertionPt());
- State.set(Def, P, EntryPart, Part);
+ State.set(Def, EntryPart, Part);
if (StartV) {
// Make sure to add the reduction start value only to the
// first unroll part.
@@ -4752,7 +4579,7 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN,
Value *SclrGep =
emitTransformedIndex(Builder, GlobalIdx, PSE.getSE(), DL, II);
SclrGep->setName("next.gep");
- State.set(Def, P, SclrGep, VPIteration(Part, Lane));
+ State.set(Def, SclrGep, VPIteration(Part, Lane));
}
}
return;
@@ -4800,7 +4627,7 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN,
Builder.CreateVectorSplat(
State.VF.getKnownMinValue(), ScalarStepValue),
"vector.gep"));
- State.set(Def, P, GEP, Part);
+ State.set(Def, GEP, Part);
}
}
}
@@ -4867,7 +4694,7 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I, VPValue *Def,
VecOp->copyIRFlags(&I);
// Use this vector value for all users of the original instruction.
- State.set(Def, &I, V, Part);
+ State.set(Def, V, Part);
addMetadata(V, &I);
}
@@ -4891,7 +4718,7 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I, VPValue *Def,
} else {
C = Builder.CreateICmp(Cmp->getPredicate(), A, B);
}
- State.set(Def, &I, C, Part);
+ State.set(Def, C, Part);
addMetadata(C, &I);
}
@@ -4920,7 +4747,7 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I, VPValue *Def,
for (unsigned Part = 0; Part < UF; ++Part) {
Value *A = State.get(User.getOperand(0), Part);
Value *Cast = Builder.CreateCast(CI->getOpcode(), A, DestTy);
- State.set(Def, &I, Cast, Part);
+ State.set(Def, Cast, Part);
addMetadata(Cast, &I);
}
break;
@@ -4997,7 +4824,7 @@ void InnerLoopVectorizer::widenCallInstruction(CallInst &I, VPValue *Def,
if (isa<FPMathOperator>(V))
V->copyFastMathFlags(CI);
- State.set(Def, &I, V, Part);
+ State.set(Def, V, Part);
addMetadata(V, &I);
}
}
@@ -5022,7 +4849,7 @@ void InnerLoopVectorizer::widenSelectInstruction(SelectInst &I, VPValue *VPDef,
Value *Op0 = State.get(Operands.getOperand(1), Part);
Value *Op1 = State.get(Operands.getOperand(2), Part);
Value *Sel = Builder.CreateSelect(Cond, Op0, Op1);
- State.set(VPDef, &I, Sel, Part);
+ State.set(VPDef, Sel, Part);
addMetadata(Sel, &I);
}
}
@@ -7792,16 +7619,11 @@ void LoopVectorizationPlanner::executePlan(InnerLoopVectorizer &ILV,
// Perform the actual loop transformation.
// 1. Create a new empty loop. Unlink the old loop and connect the new one.
- VPCallbackILV CallbackILV(ILV);
-
assert(BestVF.hasValue() && "Vectorization Factor is missing");
assert(VPlans.size() == 1 && "Not a single VPlan to execute.");
- VPTransformState State{*BestVF, BestUF,
- LI, DT,
- ILV.Builder, ILV.VectorLoopValueMap,
- &ILV, VPlans.front().get(),
- CallbackILV};
+ VPTransformState State{
+ *BestVF, BestUF, LI, DT, ILV.Builder, &ILV, VPlans.front().get()};
State.CFG.PrevBB = ILV.createVectorizedLoopSkeleton();
State.TripCount = ILV.getOrCreateTripCount(nullptr);
State.CanonicalIV = ILV.Induction;
@@ -9016,16 +8838,6 @@ void LoopVectorizationPlanner::adjustRecipesForInLoopReductions(
}
}
-Value* LoopVectorizationPlanner::VPCallbackILV::
-getOrCreateVectorValues(Value *V, unsigned Part) {
- return ILV.getOrCreateVectorValue(V, Part);
-}
-
-Value *LoopVectorizationPlanner::VPCallbackILV::getOrCreateScalarValue(
- Value *V, const VPIteration &Instance) {
- return ILV.getOrCreateScalarValue(V, Instance);
-}
-
void VPInterleaveRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << "\"INTERLEAVE-GROUP with factor " << IG->getFactor() << " at ";
@@ -9112,7 +8924,7 @@ void VPBlendRecipe::execute(VPTransformState &State) {
}
}
for (unsigned Part = 0; Part < State.UF; ++Part)
- State.set(this, Phi, Entry[Part], Part);
+ State.set(this, Entry[Part], Part);
}
void VPInterleaveRecipe::execute(VPTransformState &State) {
@@ -9149,7 +8961,7 @@ void VPReductionRecipe::execute(VPTransformState &State) {
(Instruction::BinaryOps)getUnderlyingInstr()->getOpcode(), NewRed,
PrevInChain);
}
- State.set(this, getUnderlyingInstr(), NextInChain, Part);
+ State.set(this, NextInChain, Part);
}
}
@@ -9165,7 +8977,7 @@ void VPReplicateRecipe::execute(VPTransformState &State) {
assert(!State.VF.isScalable() && "VF is assumed to be non scalable.");
Value *Poison = PoisonValue::get(
VectorType::get(getUnderlyingValue()->getType(), State.VF));
- State.set(this, getUnderlyingInstr(), Poison, State.Instance->Part);
+ State.set(this, Poison, State.Instance->Part);
}
State.ILV->packScalarIntoVectorValue(this, *State.Instance, State);
}
@@ -9314,31 +9126,17 @@ static ScalarEpilogueLowering getScalarEpilogueLowering(
return CM_ScalarEpilogueAllowed;
}
-void VPTransformState::set(VPValue *Def, Value *IRDef, Value *V,
- const VPIteration &Instance) {
- set(Def, V, Instance);
- ILV->setScalarValue(IRDef, Instance, V);
-}
-
-void VPTransformState::set(VPValue *Def, Value *IRDef, Value *V,
- unsigned Part) {
- set(Def, V, Part);
- ILV->setVectorValue(IRDef, Part, V);
-}
-
-void VPTransformState::reset(VPValue *Def, Value *IRDef, Value *V,
- unsigned Part) {
- set(Def, V, Part);
- ILV->resetVectorValue(IRDef, Part, V);
-}
-
Value *VPTransformState::get(VPValue *Def, unsigned Part) {
// If Values have been set for this Def return the one relevant for \p Part.
if (hasVectorValue(Def, Part))
return Data.PerPartOutput[Def][Part];
- if (!hasScalarValue(Def, {Part, 0}))
- return Callback.getOrCreateVectorValues(VPValue2Value[Def], Part);
+ if (!hasScalarValue(Def, {Part, 0})) {
+ Value *IRV = Def->getLiveInIRValue();
+ Value *B = ILV->getBroadcastInstrs(IRV);
+ set(Def, B, Part);
+ return B;
+ }
Value *ScalarValue = get(Def, {Part, 0});
// If we aren't vectorizing, we can just copy the scalar map values over
@@ -9366,7 +9164,7 @@ Value *VPTransformState::get(VPValue *Def, unsigned Part) {
// broadcast the scalar value corresponding to lane zero for each unroll
// iteration. Otherwise, we construct the vector values using
// insertelement instructions. Since the resulting vectors are stored in
- // VectorLoopValueMap, we will only generate the insertelements once.
+ // State, we will only generate the insertelements once.
Value *VectorValue = nullptr;
if (IsUniform) {
VectorValue = ILV->getBroadcastInstrs(ScalarValue);
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 942208d499f8..50edd32e293e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -306,9 +306,7 @@ void VPBasicBlock::execute(VPTransformState *State) {
// branch instruction using the condition value from vector lane 0 and dummy
// successors. The successors are fixed later when the successor blocks are
// visited.
- Value *NewCond = State->Callback.getOrCreateVectorValues(IRCBV, 0);
- NewCond = State->Builder.CreateExtractElement(NewCond,
- State->Builder.getInt32(0));
+ Value *NewCond = State->get(CBV, {0, 0});
// Replace the temporary unreachable terminator with the new conditional
// branch.
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 9a55f1c2555a..e729089023d2 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -103,159 +103,14 @@ struct VPIteration {
bool isFirstIteration() const { return Part == 0 && Lane == 0; }
};
-/// This is a helper struct for maintaining vectorization state. It's used for
-/// mapping values from the original loop to their corresponding values in
-/// the new loop. Two mappings are maintained: one for vectorized values and
-/// one for scalarized values. Vectorized values are represented with UF
-/// vector values in the new loop, and scalarized values are represented with
-/// UF x VF scalar values in the new loop. UF and VF are the unroll and
-/// vectorization factors, respectively.
-///
-/// Entries can be added to either map with setVectorValue and setScalarValue,
-/// which assert that an entry was not already added before. If an entry is to
-/// replace an existing one, call resetVectorValue and resetScalarValue. This is
-/// currently needed to modify the mapped values during "fix-up" operations that
-/// occur once the first phase of widening is complete. These operations include
-/// type truncation and the second phase of recurrence widening.
-///
-/// Entries from either map can be retrieved using the getVectorValue and
-/// getScalarValue functions, which assert that the desired value exists.
-struct VectorizerValueMap {
- friend struct VPTransformState;
-
-private:
- /// The unroll factor. Each entry in the vector map contains UF vector values.
- unsigned UF;
-
- /// The vectorization factor. Each entry in the scalar map contains UF x VF
- /// scalar values.
- ElementCount VF;
-
- /// The vector and scalar map storage. We use std::map and not DenseMap
- /// because insertions to DenseMap invalidate its iterators.
- using VectorParts = SmallVector<Value *, 2>;
- using ScalarParts = SmallVector<SmallVector<Value *, 4>, 2>;
- std::map<Value *, VectorParts> VectorMapStorage;
- std::map<Value *, ScalarParts> ScalarMapStorage;
-
-public:
- /// Construct an empty map with the given unroll and vectorization factors.
- VectorizerValueMap(unsigned UF, ElementCount VF) : UF(UF), VF(VF) {}
-
- /// \return True if the map has any vector entry for \p Key.
- bool hasAnyVectorValue(Value *Key) const {
- return VectorMapStorage.count(Key);
- }
-
- /// \return True if the map has a vector entry for \p Key and \p Part.
- bool hasVectorValue(Value *Key, unsigned Part) const {
- assert(Part < UF && "Queried Vector Part is too large.");
- if (!hasAnyVectorValue(Key))
- return false;
- const VectorParts &Entry = VectorMapStorage.find(Key)->second;
- assert(Entry.size() == UF && "VectorParts has wrong dimensions.");
- return Entry[Part] != nullptr;
- }
-
- /// \return True if the map has any scalar entry for \p Key.
- bool hasAnyScalarValue(Value *Key) const {
- return ScalarMapStorage.count(Key);
- }
-
- /// \return True if the map has a scalar entry for \p Key and \p Instance.
- bool hasScalarValue(Value *Key, const VPIteration &Instance) const {
- assert(Instance.Part < UF && "Queried Scalar Part is too large.");
- assert(Instance.Lane < VF.getKnownMinValue() &&
- "Queried Scalar Lane is too large.");
-
- if (!hasAnyScalarValue(Key))
- return false;
- const ScalarParts &Entry = ScalarMapStorage.find(Key)->second;
- assert(Entry.size() == UF && "ScalarParts has wrong dimensions.");
- assert(Entry[Instance.Part].size() == VF.getKnownMinValue() &&
- "ScalarParts has wrong dimensions.");
- return Entry[Instance.Part][Instance.Lane] != nullptr;
- }
-
- /// Retrieve the existing vector value that corresponds to \p Key and
- /// \p Part.
- Value *getVectorValue(Value *Key, unsigned Part) {
- assert(hasVectorValue(Key, Part) && "Getting non-existent value.");
- return VectorMapStorage[Key][Part];
- }
-
- /// Retrieve the existing scalar value that corresponds to \p Key and
- /// \p Instance.
- Value *getScalarValue(Value *Key, const VPIteration &Instance) {
- assert(hasScalarValue(Key, Instance) && "Getting non-existent value.");
- return ScalarMapStorage[Key][Instance.Part][Instance.Lane];
- }
-
- /// Set a vector value associated with \p Key and \p Part. Assumes such a
- /// value is not already set. If it is, use resetVectorValue() instead.
- void setVectorValue(Value *Key, unsigned Part, Value *Vector) {
- assert(!hasVectorValue(Key, Part) && "Vector value already set for part");
- if (!VectorMapStorage.count(Key)) {
- VectorParts Entry(UF);
- VectorMapStorage[Key] = Entry;
- }
- VectorMapStorage[Key][Part] = Vector;
- }
-
- /// Set a scalar value associated with \p Key and \p Instance. Assumes such a
- /// value is not already set.
- void setScalarValue(Value *Key, const VPIteration &Instance, Value *Scalar) {
- assert(!hasScalarValue(Key, Instance) && "Scalar value already set");
- if (!ScalarMapStorage.count(Key)) {
- ScalarParts Entry(UF);
- // TODO: Consider storing uniform values only per-part, as they occupy
- // lane 0 only, keeping the other VF-1 redundant entries null.
- for (unsigned Part = 0; Part < UF; ++Part)
- Entry[Part].resize(VF.getKnownMinValue(), nullptr);
- ScalarMapStorage[Key] = Entry;
- }
- ScalarMapStorage[Key][Instance.Part][Instance.Lane] = Scalar;
- }
-
- /// Reset the vector value associated with \p Key for the given \p Part.
- /// This function can be used to update values that have already been
- /// vectorized. This is the case for "fix-up" operations including type
- /// truncation and the second phase of recurrence vectorization.
- void resetVectorValue(Value *Key, unsigned Part, Value *Vector) {
- assert(hasVectorValue(Key, Part) && "Vector value not set for part");
- VectorMapStorage[Key][Part] = Vector;
- }
-
- /// Reset the scalar value associated with \p Key for \p Part and \p Lane.
- /// This function can be used to update values that have already been
- /// scalarized. This is the case for "fix-up" operations including scalar phi
- /// nodes for scalarized and predicated instructions.
- void resetScalarValue(Value *Key, const VPIteration &Instance,
- Value *Scalar) {
- assert(hasScalarValue(Key, Instance) &&
- "Scalar value not set for part and lane");
- ScalarMapStorage[Key][Instance.Part][Instance.Lane] = Scalar;
- }
-};
-
-/// This class is used to enable the VPlan to invoke a method of ILV. This is
-/// needed until the method is refactored out of ILV and becomes reusable.
-struct VPCallback {
- virtual ~VPCallback() {}
- virtual Value *getOrCreateVectorValues(Value *V, unsigned Part) = 0;
- virtual Value *getOrCreateScalarValue(Value *V,
- const VPIteration &Instance) = 0;
-};
-
/// VPTransformState holds information passed down when "executing" a VPlan,
/// needed for generating the output IR.
struct VPTransformState {
VPTransformState(ElementCount VF, unsigned UF, LoopInfo *LI,
DominatorTree *DT, IRBuilder<> &Builder,
- VectorizerValueMap &ValueMap, InnerLoopVectorizer *ILV,
- VPlan *Plan, VPCallback &Callback)
- : VF(VF), UF(UF), Instance(), LI(LI), DT(DT), Builder(Builder),
- ValueMap(ValueMap), ILV(ILV), Plan(Plan), Callback(Callback) {}
+ InnerLoopVectorizer *ILV, VPlan *Plan)
+ : VF(VF), UF(UF), Instance(), LI(LI), DT(DT), Builder(Builder), ILV(ILV),
+ Plan(Plan) {}
/// The chosen Vectorization and Unroll Factors of the loop being vectorized.
ElementCount VF;
@@ -294,6 +149,10 @@ struct VPTransformState {
I->second[Part];
}
+ bool hasAnyVectorValue(VPValue *Def) const {
+ return Data.PerPartOutput.find(Def) != Data.PerPartOutput.end();
+ }
+
bool hasScalarValue(VPValue *Def, VPIteration Instance) {
auto I = Data.PerPartScalars.find(Def);
if (I == Data.PerPartScalars.end())
@@ -319,12 +178,6 @@ struct VPTransformState {
Iter->second[Part] = V;
}
- void set(VPValue *Def, Value *IRDef, Value *V, unsigned Part);
- void reset(VPValue *Def, Value *IRDef, Value *V, unsigned Part);
-
- /// Set the generated scalar \p V for \p Def and \p IRDef and the given \p
- /// Instance.
- void set(VPValue *Def, Value *IRDef, Value *V, const VPIteration &Instance);
/// Set the generated scalar \p V for \p Def and the given \p Instance.
void set(VPValue *Def, Value *V, const VPIteration &Instance) {
auto Iter = Data.PerPartScalars.insert({Def, {}});
@@ -384,12 +237,6 @@ struct VPTransformState {
/// Hold a reference to the IRBuilder used to generate output IR code.
IRBuilder<> &Builder;
- /// Hold a reference to the Value state information used when generating the
- /// Values of the output IR.
- VectorizerValueMap &ValueMap;
-
- /// Hold a reference to a mapping between VPValues in VPlan and original
- /// Values they correspond to.
VPValue2ValueTy VPValue2Value;
/// Hold the canonical scalar IV of the vector loop (start=0, step=VF*UF).
@@ -403,8 +250,6 @@ struct VPTransformState {
/// Pointer to the VPlan code is generated for.
VPlan *Plan;
-
- VPCallback &Callback;
};
/// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
diff --git a/llvm/test/Transforms/LoopVectorize/outer_loop_test2.ll b/llvm/test/Transforms/LoopVectorize/outer_loop_test2.ll
index 5a11cc531c2c..452281cdb19e 100644
--- a/llvm/test/Transforms/LoopVectorize/outer_loop_test2.ll
+++ b/llvm/test/Transforms/LoopVectorize/outer_loop_test2.ll
@@ -18,16 +18,13 @@
; CHECK-LABEL: vector.ph:
; CHECK: %[[CVal0:.*]] = insertelement <4 x i32> poison, i32 %c, i32 0
; CHECK-NEXT: %[[CSplat:.*]] = shufflevector <4 x i32> %[[CVal0]], <4 x i32> poison, <4 x i32> zeroinitializer
-; CHECK: %[[ZVal0:.*]] = insertelement <4 x i1> poison, i1 %[[ZeroTripChk]], i32 0
-; CHECK-NEXT: %[[ZSplat:.*]] = shufflevector <4 x i1> %[[ZVal0]], <4 x i1> poison, <4 x i32> zeroinitializer
; CHECK-LABEL: vector.body:
; CHECK: %[[Ind:.*]] = phi i64 [ 0, %vector.ph ], [ %[[IndNext:.*]], %[[ForInc:.*]] ]
; CHECK: %[[VecInd:.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %vector.ph ], [ %[[VecIndNext:.*]], %[[ForInc]] ]
; CHECK: %[[AAddr:.*]] = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, <4 x i64> %[[VecInd]]
; CHECK: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %[[CSplat]], <4 x i32*> %[[AAddr]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
-; CHECK: %[[ZCmpExtr:.*]] = extractelement <4 x i1> %[[ZSplat]], i32 0
-; CHECK: br i1 %[[ZCmpExtr]], label %[[InnerForPh:.*]], label %[[OuterInc:.*]]
+; CHECK: br i1 %[[ZeroTripChk]], label %[[InnerForPh:.*]], label %[[OuterInc:.*]]
; CHECK: [[InnerForPh]]:
; CHECK: %[[WideAVal:.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %[[AAddr]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
More information about the llvm-commits
mailing list