[llvm] 7f15254 - [LV] Apply sink-after & interleave-groups as VPlan transformations (NFCI)
Gil Rapaport via llvm-commits
llvm-commits at lists.llvm.org
Sat Nov 9 10:55:54 PST 2019
Author: Gil Rapaport
Date: 2019-11-09T20:52:25+02:00
New Revision: 7f152543e4ff91f2ce8578a1b3b3ec18f580345a
URL: https://github.com/llvm/llvm-project/commit/7f152543e4ff91f2ce8578a1b3b3ec18f580345a
DIFF: https://github.com/llvm/llvm-project/commit/7f152543e4ff91f2ce8578a1b3b3ec18f580345a.diff
LOG: [LV] Apply sink-after & interleave-groups as VPlan transformations (NFCI)
This recommits 11ed1c0239fd51fd2f064311dc7725277ed0a994 (reverted in
9f08ce0d2197d4f163dfa4633eae2347ce8fc881 for failing an assert) with a fix:
tryToWidenMemory() now first checks if the widening decision is to interleave,
thus maintaining previous behavior where tryToInterleaveMemory() was called
first, giving priority to interleave decisions over widening/scalarization. This
commit adds the test case that exposed this bug as a LIT.
Added:
llvm/test/Transforms/LoopVectorize/interleaved-accesses-uniform-load.ll
Modified:
llvm/include/llvm/Analysis/VectorUtils.h
llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
llvm/lib/Transforms/Vectorize/VPlan.cpp
llvm/lib/Transforms/Vectorize/VPlan.h
llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
llvm/unittests/Transforms/Vectorize/VPlanTest.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/Analysis/VectorUtils.h b/llvm/include/llvm/Analysis/VectorUtils.h
index 4a61c2bc35c7..5dc14dbe6574 100644
--- a/llvm/include/llvm/Analysis/VectorUtils.h
+++ b/llvm/include/llvm/Analysis/VectorUtils.h
@@ -542,13 +542,10 @@ class InterleavedAccessInfo {
/// formation for predicated accesses, we may be able to relax this limitation
/// in the future once we handle more complicated blocks.
void reset() {
- SmallPtrSet<InterleaveGroup<Instruction> *, 4> DelSet;
- // Avoid releasing a pointer twice.
- for (auto &I : InterleaveGroupMap)
- DelSet.insert(I.second);
- for (auto *Ptr : DelSet)
- delete Ptr;
InterleaveGroupMap.clear();
+ for (auto *Ptr : InterleaveGroups)
+ delete Ptr;
+ InterleaveGroups.clear();
RequiresScalarEpilogue = false;
}
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index a5e85f27fabf..614f931cbc68 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -201,6 +201,9 @@ class LoopVectorizationPlanner {
/// The profitability analysis.
LoopVectorizationCostModel &CM;
+ /// The interleaved access analysis.
+ InterleavedAccessInfo &IAI;
+
SmallVector<VPlanPtr, 4> VPlans;
/// This class is used to enable the VPlan to invoke a method of ILV. This is
@@ -223,8 +226,10 @@ class LoopVectorizationPlanner {
LoopVectorizationPlanner(Loop *L, LoopInfo *LI, const TargetLibraryInfo *TLI,
const TargetTransformInfo *TTI,
LoopVectorizationLegality *Legal,
- LoopVectorizationCostModel &CM)
- : OrigLoop(L), LI(LI), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM) {}
+ LoopVectorizationCostModel &CM,
+ InterleavedAccessInfo &IAI)
+ : OrigLoop(L), LI(LI), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM),
+ IAI(IAI) {}
/// Plan how to best vectorize, return the best VF and its cost, or None if
/// vectorization and interleaving should be avoided up front.
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 9368dd7c8b14..5c7ff8d76b40 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -6710,37 +6710,6 @@ VPValue *VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlanPtr &Plan) {
return BlockMaskCache[BB] = BlockMask;
}
-VPInterleaveRecipe *VPRecipeBuilder::tryToInterleaveMemory(Instruction *I,
- VFRange &Range,
- VPlanPtr &Plan) {
- const InterleaveGroup<Instruction> *IG = CM.getInterleavedAccessGroup(I);
- if (!IG)
- return nullptr;
-
- // Now check if IG is relevant for VF's in the given range.
- auto isIGMember = [&](Instruction *I) -> std::function<bool(unsigned)> {
- return [=](unsigned VF) -> bool {
- return (VF >= 2 && // Query is illegal for VF == 1
- CM.getWideningDecision(I, VF) ==
- LoopVectorizationCostModel::CM_Interleave);
- };
- };
- if (!LoopVectorizationPlanner::getDecisionAndClampRange(isIGMember(I), Range))
- return nullptr;
-
- // I is a member of an InterleaveGroup for VF's in the (possibly trimmed)
- // range. If it's the primary member of the IG construct a VPInterleaveRecipe.
- // Otherwise, it's an adjunct member of the IG, do not construct any Recipe.
- assert(I == IG->getInsertPos() &&
- "Generating a recipe for an adjunct member of an interleave group");
-
- VPValue *Mask = nullptr;
- if (Legal->isMaskRequired(I))
- Mask = createBlockInMask(I->getParent(), Plan);
-
- return new VPInterleaveRecipe(IG, Mask);
-}
-
VPWidenMemoryInstructionRecipe *
VPRecipeBuilder::tryToWidenMemory(Instruction *I, VFRange &Range,
VPlanPtr &Plan) {
@@ -6750,15 +6719,15 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, VFRange &Range,
auto willWiden = [&](unsigned VF) -> bool {
if (VF == 1)
return false;
- if (CM.isScalarAfterVectorization(I, VF) ||
- CM.isProfitableToScalarize(I, VF))
- return false;
LoopVectorizationCostModel::InstWidening Decision =
CM.getWideningDecision(I, VF);
assert(Decision != LoopVectorizationCostModel::CM_Unknown &&
"CM decision should be taken at this point.");
- assert(Decision != LoopVectorizationCostModel::CM_Interleave &&
- "Interleave memory opportunity should be caught earlier.");
+ if (Decision == LoopVectorizationCostModel::CM_Interleave)
+ return true;
+ if (CM.isScalarAfterVectorization(I, VF) ||
+ CM.isProfitableToScalarize(I, VF))
+ return false;
return Decision != LoopVectorizationCostModel::CM_Scalarize;
};
@@ -6923,15 +6892,21 @@ bool VPRecipeBuilder::tryToWiden(Instruction *I, VPBasicBlock *VPBB,
if (!LoopVectorizationPlanner::getDecisionAndClampRange(willWiden, Range))
return false;
+ // If this ingredient's recipe is to be recorded, keep its recipe a singleton
+ // to avoid having to split recipes later.
+ bool IsSingleton = Ingredient2Recipe.count(I);
+
// Success: widen this instruction. We optimize the common case where
// consecutive instructions can be represented by a single recipe.
- if (!VPBB->empty()) {
- VPWidenRecipe *LastWidenRecipe = dyn_cast<VPWidenRecipe>(&VPBB->back());
- if (LastWidenRecipe && LastWidenRecipe->appendInstruction(I))
- return true;
- }
+ if (!IsSingleton && !VPBB->empty() && LastExtensibleRecipe == &VPBB->back() &&
+ LastExtensibleRecipe->appendInstruction(I))
+ return true;
- VPBB->appendRecipe(new VPWidenRecipe(I));
+ VPWidenRecipe *WidenRecipe = new VPWidenRecipe(I);
+ if (!IsSingleton)
+ LastExtensibleRecipe = WidenRecipe;
+ setRecipe(I, WidenRecipe);
+ VPBB->appendRecipe(WidenRecipe);
return true;
}
@@ -6947,6 +6922,7 @@ VPBasicBlock *VPRecipeBuilder::handleReplication(
[&](unsigned VF) { return CM.isScalarWithPredication(I, VF); }, Range);
auto *Recipe = new VPReplicateRecipe(I, IsUniform, IsPredicated);
+ setRecipe(I, Recipe);
// Find if I uses a predicated instruction. If so, it will use its scalar
// value. Avoid hoisting the insert-element which packs the scalar value into
@@ -7005,36 +6981,20 @@ VPRegionBlock *VPRecipeBuilder::createReplicateRegion(Instruction *Instr,
bool VPRecipeBuilder::tryToCreateRecipe(Instruction *Instr, VFRange &Range,
VPlanPtr &Plan, VPBasicBlock *VPBB) {
VPRecipeBase *Recipe = nullptr;
- // Check if Instr should belong to an interleave memory recipe, or already
- // does. In the latter case Instr is irrelevant.
- if ((Recipe = tryToInterleaveMemory(Instr, Range, Plan))) {
- VPBB->appendRecipe(Recipe);
- return true;
- }
-
- // Check if Instr is a memory operation that should be widened.
- if ((Recipe = tryToWidenMemory(Instr, Range, Plan))) {
- VPBB->appendRecipe(Recipe);
- return true;
- }
- // Check if Instr should form some PHI recipe.
- if ((Recipe = tryToOptimizeInduction(Instr, Range))) {
- VPBB->appendRecipe(Recipe);
- return true;
- }
- if ((Recipe = tryToBlend(Instr, Plan))) {
+ // First, check for specific widening recipes that deal with memory
+ // operations, inductions and Phi nodes.
+ if ((Recipe = tryToWidenMemory(Instr, Range, Plan)) ||
+ (Recipe = tryToOptimizeInduction(Instr, Range)) ||
+ (Recipe = tryToBlend(Instr, Plan)) ||
+ (isa<PHINode>(Instr) &&
+ (Recipe = new VPWidenPHIRecipe(cast<PHINode>(Instr))))) {
+ setRecipe(Instr, Recipe);
VPBB->appendRecipe(Recipe);
return true;
}
- if (PHINode *Phi = dyn_cast<PHINode>(Instr)) {
- VPBB->appendRecipe(new VPWidenPHIRecipe(Phi));
- return true;
- }
- // Check if Instr is to be widened by a general VPWidenRecipe, after
- // having first checked for specific widening recipes that deal with
- // Interleave Groups, Inductions and Phi nodes.
+ // Check if Instr is to be widened by a general VPWidenRecipe.
if (tryToWiden(Instr, VPBB, Range))
return true;
@@ -7090,19 +7050,57 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(unsigned MinVF,
VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
VFRange &Range, SmallPtrSetImpl<Value *> &NeedDef,
SmallPtrSetImpl<Instruction *> &DeadInstructions) {
+
// Hold a mapping from predicated instructions to their recipes, in order to
// fix their AlsoPack behavior if a user is determined to replicate and use a
// scalar instead of vector value.
DenseMap<Instruction *, VPReplicateRecipe *> PredInst2Recipe;
DenseMap<Instruction *, Instruction *> &SinkAfter = Legal->getSinkAfter();
- DenseMap<Instruction *, Instruction *> SinkAfterInverse;
+
+ SmallPtrSet<const InterleaveGroup<Instruction> *, 1> InterleaveGroups;
+
+ VPRecipeBuilder RecipeBuilder(OrigLoop, TLI, Legal, CM, Builder);
+
+ // ---------------------------------------------------------------------------
+ // Pre-construction: record ingredients whose recipes we'll need to further
+ // process after constructing the initial VPlan.
+ // ---------------------------------------------------------------------------
+
+ // Mark instructions we'll need to sink later and their targets as
+ // ingredients whose recipe we'll need to record.
+ for (auto &Entry : SinkAfter) {
+ RecipeBuilder.recordRecipeOf(Entry.first);
+ RecipeBuilder.recordRecipeOf(Entry.second);
+ }
+
+ // For each interleave group which is relevant for this (possibly trimmed)
+ // Range, add it to the set of groups to be later applied to the VPlan and add
+ // placeholders for its members' Recipes which we'll be replacing with a
+ // single VPInterleaveRecipe.
+ for (InterleaveGroup<Instruction> *IG : IAI.getInterleaveGroups()) {
+ auto applyIG = [IG, this](unsigned VF) -> bool {
+ return (VF >= 2 && // Query is illegal for VF == 1
+ CM.getWideningDecision(IG->getInsertPos(), VF) ==
+ LoopVectorizationCostModel::CM_Interleave);
+ };
+ if (!getDecisionAndClampRange(applyIG, Range))
+ continue;
+ InterleaveGroups.insert(IG);
+ for (unsigned i = 0; i < IG->getFactor(); i++)
+ if (Instruction *Member = IG->getMember(i))
+ RecipeBuilder.recordRecipeOf(Member);
+ };
+
+ // ---------------------------------------------------------------------------
+ // Build initial VPlan: Scan the body of the loop in a topological order to
+ // visit each basic block after having visited its predecessor basic blocks.
+ // ---------------------------------------------------------------------------
// Create a dummy pre-entry VPBasicBlock to start building the VPlan.
VPBasicBlock *VPBB = new VPBasicBlock("Pre-Entry");
auto Plan = std::make_unique<VPlan>(VPBB);
- VPRecipeBuilder RecipeBuilder(OrigLoop, TLI, Legal, CM, Builder);
// Represent values that will have defs inside VPlan.
for (Value *V : NeedDef)
Plan->addVPValue(V);
@@ -7123,8 +7121,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
std::vector<Instruction *> Ingredients;
- // Organize the ingredients to vectorize from current basic block in the
- // right order.
+ // Introduce each ingredient into VPlan.
for (Instruction &I : BB->instructionsWithoutDebug()) {
Instruction *Instr = &I;
@@ -7134,43 +7131,6 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
DeadInstructions.find(Instr) != DeadInstructions.end())
continue;
- // I is a member of an InterleaveGroup for Range.Start. If it's an adjunct
- // member of the IG, do not construct any Recipe for it.
- const InterleaveGroup<Instruction> *IG =
- CM.getInterleavedAccessGroup(Instr);
- if (IG && Instr != IG->getInsertPos() &&
- Range.Start >= 2 && // Query is illegal for VF == 1
- CM.getWideningDecision(Instr, Range.Start) ==
- LoopVectorizationCostModel::CM_Interleave) {
- auto SinkCandidate = SinkAfterInverse.find(Instr);
- if (SinkCandidate != SinkAfterInverse.end())
- Ingredients.push_back(SinkCandidate->second);
- continue;
- }
-
- // Move instructions to handle first-order recurrences, step 1: avoid
- // handling this instruction until after we've handled the instruction it
- // should follow.
- auto SAIt = SinkAfter.find(Instr);
- if (SAIt != SinkAfter.end()) {
- LLVM_DEBUG(dbgs() << "Sinking" << *SAIt->first << " after"
- << *SAIt->second
- << " to vectorize a 1st order recurrence.\n");
- SinkAfterInverse[SAIt->second] = Instr;
- continue;
- }
-
- Ingredients.push_back(Instr);
-
- // Move instructions to handle first-order recurrences, step 2: push the
- // instruction to be sunk at its insertion point.
- auto SAInvIt = SinkAfterInverse.find(Instr);
- if (SAInvIt != SinkAfterInverse.end())
- Ingredients.push_back(SAInvIt->second);
- }
-
- // Introduce each ingredient into VPlan.
- for (Instruction *Instr : Ingredients) {
if (RecipeBuilder.tryToCreateRecipe(Instr, Range, Plan, VPBB))
continue;
@@ -7195,6 +7155,32 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
VPBlockUtils::disconnectBlocks(PreEntry, Entry);
delete PreEntry;
+ // ---------------------------------------------------------------------------
+ // Transform initial VPlan: Apply previously taken decisions, in order, to
+ // bring the VPlan to its final state.
+ // ---------------------------------------------------------------------------
+
+ // Apply Sink-After legal constraints.
+ for (auto &Entry : SinkAfter) {
+ VPRecipeBase *Sink = RecipeBuilder.getRecipe(Entry.first);
+ VPRecipeBase *Target = RecipeBuilder.getRecipe(Entry.second);
+ Sink->moveAfter(Target);
+ }
+
+ // Interleave memory: for each Interleave Group we marked earlier as relevant
+ // for this VPlan, replace the Recipes widening its memory instructions with a
+ // single VPInterleaveRecipe at its insertion point.
+ for (auto IG : InterleaveGroups) {
+ auto *Recipe = cast<VPWidenMemoryInstructionRecipe>(
+ RecipeBuilder.getRecipe(IG->getInsertPos()));
+ (new VPInterleaveRecipe(IG, Recipe->getMask()))->insertBefore(Recipe);
+
+ for (unsigned i = 0; i < IG->getFactor(); ++i)
+ if (Instruction *Member = IG->getMember(i)) {
+ RecipeBuilder.getRecipe(Member)->eraseFromParent();
+ }
+ }
+
// Finally, if tail is folded by masking, introduce selects between the phi
// and the live-out instruction of each reduction, at the end of the latch.
if (CM.foldTailByMasking()) {
@@ -7427,12 +7413,11 @@ void VPPredInstPHIRecipe::execute(VPTransformState &State) {
}
void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
- if (!User)
+ VPValue *Mask = getMask();
+ if (!Mask)
return State.ILV->vectorizeMemoryInstruction(&Instr);
- // Last (and currently only) operand is a mask.
InnerLoopVectorizer::VectorParts MaskValues(State.UF);
- VPValue *Mask = User->getOperand(User->getNumOperands() - 1);
for (unsigned Part = 0; Part < State.UF; ++Part)
MaskValues[Part] = State.get(Mask, Part);
State.ILV->vectorizeMemoryInstruction(&Instr, &MaskValues);
@@ -7481,7 +7466,7 @@ static bool processLoopInVPlanNativePath(
// Use the planner for outer loop vectorization.
// TODO: CM is not used at this point inside the planner. Turn CM into an
// optional argument if we don't need it in the future.
- LoopVectorizationPlanner LVP(L, LI, TLI, TTI, LVL, CM);
+ LoopVectorizationPlanner LVP(L, LI, TLI, TTI, LVL, CM, IAI);
// Get user vectorization factor.
const unsigned UserVF = Hints.getWidth();
@@ -7641,7 +7626,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
CM.collectValuesToIgnore();
// Use the planner for vectorization.
- LoopVectorizationPlanner LVP(L, LI, TLI, TTI, &LVL, CM);
+ LoopVectorizationPlanner LVP(L, LI, TLI, TTI, &LVL, CM, IAI);
// Get user vectorization factor.
unsigned UserVF = Hints.getWidth();
diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
index 0ca6a6b93cfd..598fb00e956e 100644
--- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
+++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
@@ -47,6 +47,24 @@ class VPRecipeBuilder {
EdgeMaskCacheTy EdgeMaskCache;
BlockMaskCacheTy BlockMaskCache;
+ // VPlan-VPlan transformations support: Hold a mapping from ingredients to
+ // their recipe. To save on memory, only do so for selected ingredients,
+ // marked by having a nullptr entry in this map. If those ingredients get a
+ // VPWidenRecipe, also avoid compressing other ingredients into it to avoid
+ // having to split such recipes later.
+ DenseMap<Instruction *, VPRecipeBase *> Ingredient2Recipe;
+ VPWidenRecipe *LastExtensibleRecipe = nullptr;
+
+ /// Set the recipe created for given ingredient. This operation is a no-op for
+ /// ingredients that were not marked using a nullptr entry in the map.
+ void setRecipe(Instruction *I, VPRecipeBase *R) {
+ if (!Ingredient2Recipe.count(I))
+ return;
+ assert(Ingredient2Recipe[I] == nullptr &&
+ "Recipe already set for ingredient");
+ Ingredient2Recipe[I] = R;
+ }
+
public:
/// A helper function that computes the predicate of the block BB, assuming
/// that the header block of the loop is set to True. It returns the *entry*
@@ -57,16 +75,22 @@ class VPRecipeBuilder {
/// and DST.
VPValue *createEdgeMask(BasicBlock *Src, BasicBlock *Dst, VPlanPtr &Plan);
- /// Check if \I belongs to an Interleave Group within the given VF \p Range,
- /// \return true in the first returned value if so and false otherwise.
- /// Build a new VPInterleaveGroup Recipe if \I is the primary member of an IG
- /// for \p Range.Start, and provide it as the second returned value.
- /// Note that if \I is an adjunct member of an IG for \p Range.Start, the
- /// \return value is <true, nullptr>, as it is handled by another recipe.
- /// \p Range.End may be decreased to ensure same decision from \p Range.Start
- /// to \p Range.End.
- VPInterleaveRecipe *tryToInterleaveMemory(Instruction *I, VFRange &Range,
- VPlanPtr &Plan);
+ /// Mark given ingredient for recording its recipe once one is created for
+ /// it.
+ void recordRecipeOf(Instruction *I) {
+ assert((!Ingredient2Recipe.count(I) || Ingredient2Recipe[I] == nullptr) &&
+ "Recipe already set for ingredient");
+ Ingredient2Recipe[I] = nullptr;
+ }
+
+ /// Return the recipe created for given ingredient.
+ VPRecipeBase *getRecipe(Instruction *I) {
+ assert(Ingredient2Recipe.count(I) &&
+ "Recording this ingredients recipe was not requested");
+ assert(Ingredient2Recipe[I] != nullptr &&
+ "Ingredient doesn't have a recipe");
+ return Ingredient2Recipe[I];
+ }
/// Check if \I is a memory instruction to be widened for \p Range.Start and
/// potentially masked. Such instructions are handled by a recipe that takes
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 4b80d1fb20aa..bc32e54be720 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -275,18 +275,35 @@ void VPRegionBlock::execute(VPTransformState *State) {
}
void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) {
+ assert(!Parent && "Recipe already in some VPBasicBlock");
+ assert(InsertPos->getParent() &&
+ "Insertion position not in any VPBasicBlock");
Parent = InsertPos->getParent();
Parent->getRecipeList().insert(InsertPos->getIterator(), this);
}
+void VPRecipeBase::insertAfter(VPRecipeBase *InsertPos) {
+ assert(!Parent && "Recipe already in some VPBasicBlock");
+ assert(InsertPos->getParent() &&
+ "Insertion position not in any VPBasicBlock");
+ Parent = InsertPos->getParent();
+ Parent->getRecipeList().insertAfter(InsertPos->getIterator(), this);
+}
+
+void VPRecipeBase::removeFromParent() {
+ assert(getParent() && "Recipe not in any VPBasicBlock");
+ getParent()->getRecipeList().remove(getIterator());
+ Parent = nullptr;
+}
+
iplist<VPRecipeBase>::iterator VPRecipeBase::eraseFromParent() {
+ assert(getParent() && "Recipe not in any VPBasicBlock");
return getParent()->getRecipeList().erase(getIterator());
}
void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) {
- InsertPos->getParent()->getRecipeList().splice(
- std::next(InsertPos->getIterator()), getParent()->getRecipeList(),
- getIterator());
+ removeFromParent();
+ insertAfter(InsertPos);
}
void VPInstruction::generateInstruction(VPTransformState &State,
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 6eeec0f21fd0..226c6c0279d7 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -567,6 +567,7 @@ class VPBlockBase {
/// instructions.
class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock> {
friend VPBasicBlock;
+ friend class VPBlockUtils;
private:
const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
@@ -615,10 +616,18 @@ class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock> {
/// the specified recipe.
void insertBefore(VPRecipeBase *InsertPos);
+ /// Insert an unlinked Recipe into a basic block immediately after
+ /// the specified Recipe.
+ void insertAfter(VPRecipeBase *InsertPos);
+
/// Unlink this recipe from its current VPBasicBlock and insert it into
/// the VPBasicBlock that MovePos lives in, right after MovePos.
void moveAfter(VPRecipeBase *MovePos);
+ /// This method unlinks 'this' from the containing basic block, but does not
+ /// delete it.
+ void removeFromParent();
+
/// This method unlinks 'this' from the containing basic block and deletes it.
///
/// \returns an iterator pointing to the element after the erased one
@@ -973,6 +982,13 @@ class VPWidenMemoryInstructionRecipe : public VPRecipeBase {
return V->getVPRecipeID() == VPRecipeBase::VPWidenMemoryInstructionSC;
}
+ /// Return the mask used by this recipe. Note that a full mask is represented
+ /// by a nullptr.
+ VPValue *getMask() {
+ // Mask is the last operand.
+ return User ? User->getOperand(User->getNumOperands() - 1) : nullptr;
+ }
+
/// Generate the wide load/store.
void execute(VPTransformState &State) override;
diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
index 998f412674bd..f86dcd7e2e7c 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
@@ -572,3 +572,38 @@ for.body:
for.end:
ret void
}
+
+; Do not sink branches: While branches are if-converted and do not require
+; sinking, instructions with side effects (e.g. loads) conditioned by those
+; branches will become users of the condition bit after vectorization and would
+; need to be sunk if the loop is vectorized.
+define void @do_not_sink_branch(i32 %x, i32* %in, i32* %out, i32 %tc) local_unnamed_addr #0 {
+; NO-SINK-AFTER-LABEL: do_not_sink_branch
+; NO-SINK-AFTER-NOT: vector.ph:
+; NO-SINK-AFTER: }
+entry:
+ %cmp530 = icmp slt i32 0, %tc
+ br label %for.body4
+
+for.body4: ; preds = %cond.end, %entry
+ %indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %cond.end ]
+ %cmp534 = phi i1 [ %cmp530, %entry ], [ %cmp5, %cond.end ]
+ br i1 %cmp534, label %cond.true, label %cond.end
+
+cond.true: ; preds = %for.body4
+ %arrayidx7 = getelementptr inbounds i32, i32* %in, i32 %indvars.iv
+ %in.val = load i32, i32* %arrayidx7, align 4
+ br label %cond.end
+
+cond.end: ; preds = %for.body4, %cond.true
+ %cond = phi i32 [ %in.val, %cond.true ], [ 0, %for.body4 ]
+ %arrayidx8 = getelementptr inbounds i32, i32* %out, i32 %indvars.iv
+ store i32 %cond, i32* %arrayidx8, align 4
+ %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
+ %cmp5 = icmp slt i32 %indvars.iv.next, %tc
+ %exitcond = icmp eq i32 %indvars.iv.next, %x
+ br i1 %exitcond, label %for.end12.loopexit, label %for.body4
+
+for.end12.loopexit: ; preds = %cond.end
+ ret void
+}
diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-uniform-load.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-uniform-load.ll
new file mode 100644
index 000000000000..b56470cec0ab
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-uniform-load.ll
@@ -0,0 +1,49 @@
+; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -enable-interleaved-mem-accesses=true < %s
+
+; Make sure the vectorizer can handle this loop: The strided load is only used
+; by the loop's exit condition, which is not vectorized, and is therefore
+; considered uniform while also forming an interleave group.
+
+%0 = type { i32 ()*, i32 }
+
+ at 0 = internal unnamed_addr constant [59 x %0] [%0 zeroinitializer,
+%0 zeroinitializer, %0 zeroinitializer, %0 zeroinitializer,
+%0 zeroinitializer, %0 zeroinitializer, %0 zeroinitializer,
+%0 zeroinitializer, %0 zeroinitializer, %0 zeroinitializer,
+%0 zeroinitializer, %0 zeroinitializer, %0 zeroinitializer,
+%0 zeroinitializer, %0 zeroinitializer, %0 zeroinitializer,
+%0 zeroinitializer, %0 zeroinitializer, %0 zeroinitializer,
+%0 zeroinitializer, %0 zeroinitializer, %0 zeroinitializer,
+%0 zeroinitializer, %0 zeroinitializer, %0 zeroinitializer,
+%0 zeroinitializer, %0 zeroinitializer, %0 zeroinitializer,
+%0 zeroinitializer, %0 zeroinitializer, %0 zeroinitializer,
+%0 zeroinitializer, %0 zeroinitializer, %0 zeroinitializer,
+%0 zeroinitializer, %0 zeroinitializer, %0 zeroinitializer,
+%0 zeroinitializer, %0 zeroinitializer, %0 zeroinitializer,
+%0 zeroinitializer, %0 zeroinitializer, %0 zeroinitializer,
+%0 zeroinitializer, %0 zeroinitializer, %0 zeroinitializer,
+%0 zeroinitializer, %0 zeroinitializer, %0 zeroinitializer,
+%0 zeroinitializer, %0 zeroinitializer, %0 zeroinitializer,
+%0 {i32 ()* null, i32 258}, %0 zeroinitializer, %0 zeroinitializer,
+%0 zeroinitializer, %0 zeroinitializer, %0 zeroinitializer,
+%0 zeroinitializer], align 8
+
+define dso_local void @test_dead_load(i32 %arg) {
+; CHECK-LABEL: @test_dead_load(
+; CHECK: vector.body:
+; CHECK: %wide.vec = load <16 x i32>, <16 x i32>* %3, align 8
+; CHECK: %strided.vec = shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
+bb1:
+ br label %bb2
+
+bb2:
+ %tmp = phi %0* [ %tmp6, %bb2 ], [ getelementptr inbounds ([59 x %0], [59 x %0]* @0, i64 0, i64 0), %bb1 ]
+ %tmp3 = getelementptr inbounds %0, %0* %tmp, i64 0, i32 1
+ %tmp4 = load i32, i32* %tmp3, align 8
+ %tmp5 = icmp eq i32 %tmp4, 258
+ %tmp6 = getelementptr inbounds %0, %0* %tmp, i64 1
+ br i1 %tmp5, label %bb65, label %bb2
+
+bb65:
+ unreachable
+}
diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp
index 57567e7d8434..67936a83efaf 100644
--- a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp
@@ -83,6 +83,7 @@ TEST(VPInstructionTest, moveAfter) {
CHECK_ITERATOR(VPBB1, I2, I1);
CHECK_ITERATOR(VPBB2, I4, I3, I5);
+ EXPECT_EQ(I3->getParent(), I4->getParent());
}
} // namespace
More information about the llvm-commits
mailing list