[llvm] 18138e0 - [VPlan] Introduce VPWidenSelectRecipe (NFC).
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Mon Apr 13 00:35:53 PDT 2020
Author: Florian Hahn
Date: 2020-04-13T08:35:28+01:00
New Revision: 18138e02529067144acd4bdb9c0ac09beac5cd3d
URL: https://github.com/llvm/llvm-project/commit/18138e02529067144acd4bdb9c0ac09beac5cd3d
DIFF: https://github.com/llvm/llvm-project/commit/18138e02529067144acd4bdb9c0ac09beac5cd3d.diff
LOG: [VPlan] Introduce VPWidenSelectRecipe (NFC).
Widening a selects depends on whether the condition is loop invariant or
not. Rather than checking during codegen-time, the information can be
recorded at the VPlan construction time.
This was suggested as part of D76992, to reduce the reliance on
accessing the original underlying IR values.
Reviewers: gilr, rengolin, Ayal, hsaito
Reviewed By: gilr
Differential Revision: https://reviews.llvm.org/D77869
Added:
Modified:
llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
llvm/lib/Transforms/Vectorize/VPlan.cpp
llvm/lib/Transforms/Vectorize/VPlan.h
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index c8fe5a6d4738..d77739223668 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -31,6 +31,8 @@
namespace llvm {
+class PredicatedScalarEvolution;
+
/// VPlan-based builder utility analogous to IRBuilder.
class VPBuilder {
VPBasicBlock *BB = nullptr;
@@ -203,6 +205,8 @@ class LoopVectorizationPlanner {
/// The interleaved access analysis.
InterleavedAccessInfo &IAI;
+ PredicatedScalarEvolution &PSE;
+
SmallVector<VPlanPtr, 4> VPlans;
/// This class is used to enable the VPlan to invoke a method of ILV. This is
@@ -228,9 +232,10 @@ class LoopVectorizationPlanner {
const TargetTransformInfo *TTI,
LoopVectorizationLegality *Legal,
LoopVectorizationCostModel &CM,
- InterleavedAccessInfo &IAI)
- : OrigLoop(L), LI(LI), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM),
- IAI(IAI) {}
+ InterleavedAccessInfo &IAI,
+ PredicatedScalarEvolution &PSE)
+ : OrigLoop(L), LI(LI), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM), IAI(IAI),
+ PSE(PSE) {}
/// Plan how to best vectorize, return the best VF and its cost, or None if
/// vectorization and interleaving should be avoided up front.
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 4b7e177a9edd..16aef2991ee1 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -413,6 +413,9 @@ class InnerLoopVectorizer {
void widenCallInstruction(CallInst &I, VPUser &ArgOperands,
VPTransformState &State);
+ /// Widen a single select instruction within the innermost loop.
+ void widenSelectInstruction(SelectInst &I, bool InvariantCond);
+
/// Fix the vectorized code, taking care of header phi's, live-outs, and more.
void fixVectorizedLoop();
@@ -4232,6 +4235,7 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I) {
case Instruction::Br:
case Instruction::PHI:
case Instruction::GetElementPtr:
+ case Instruction::Select:
llvm_unreachable("This instruction is handled by a
diff erent recipe.");
case Instruction::UDiv:
case Instruction::SDiv:
@@ -4272,35 +4276,6 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I) {
break;
}
- case Instruction::Select: {
- // Widen selects.
- // If the selector is loop invariant we can create a select
- // instruction with a scalar condition. Otherwise, use vector-select.
- auto *SE = PSE.getSE();
- bool InvariantCond =
- SE->isLoopInvariant(PSE.getSCEV(I.getOperand(0)), OrigLoop);
- setDebugLocFromInst(Builder, &I);
-
- // The condition can be loop invariant but still defined inside the
- // loop. This means that we can't just use the original 'cond' value.
- // We have to take the 'vectorized' value and pick the first lane.
- // Instcombine will make this a no-op.
-
- auto *ScalarCond = getOrCreateScalarValue(I.getOperand(0), {0, 0});
-
- for (unsigned Part = 0; Part < UF; ++Part) {
- Value *Cond = getOrCreateVectorValue(I.getOperand(0), Part);
- Value *Op0 = getOrCreateVectorValue(I.getOperand(1), Part);
- Value *Op1 = getOrCreateVectorValue(I.getOperand(2), Part);
- Value *Sel =
- Builder.CreateSelect(InvariantCond ? ScalarCond : Cond, Op0, Op1);
- VectorLoopValueMap.setVectorValue(&I, Part, Sel);
- addMetadata(Sel, &I);
- }
-
- break;
- }
-
case Instruction::ICmp:
case Instruction::FCmp: {
// Widen compares. Generate vector compares.
@@ -4433,6 +4408,28 @@ void InnerLoopVectorizer::widenCallInstruction(CallInst &I, VPUser &ArgOperands,
}
}
+void InnerLoopVectorizer::widenSelectInstruction(SelectInst &I,
+ bool InvariantCond) {
+ setDebugLocFromInst(Builder, &I);
+
+ // The condition can be loop invariant but still defined inside the
+ // loop. This means that we can't just use the original 'cond' value.
+ // We have to take the 'vectorized' value and pick the first lane.
+ // Instcombine will make this a no-op.
+
+ auto *ScalarCond = getOrCreateScalarValue(I.getOperand(0), {0, 0});
+
+ for (unsigned Part = 0; Part < UF; ++Part) {
+ Value *Cond = getOrCreateVectorValue(I.getOperand(0), Part);
+ Value *Op0 = getOrCreateVectorValue(I.getOperand(1), Part);
+ Value *Op1 = getOrCreateVectorValue(I.getOperand(2), Part);
+ Value *Sel =
+ Builder.CreateSelect(InvariantCond ? ScalarCond : Cond, Op0, Op1);
+ VectorLoopValueMap.setVectorValue(&I, Part, Sel);
+ addMetadata(Sel, &I);
+ }
+}
+
void LoopVectorizationCostModel::collectLoopScalars(unsigned VF) {
// We should not collect Scalars more than once per VF. Right now, this
// function is called from collectUniformsAndScalars(), which already does
@@ -6937,6 +6934,29 @@ VPRecipeBuilder::tryToWidenCall(Instruction *I, VFRange &Range, VPlan &Plan) {
return new VPWidenCallRecipe(*CI, VPValues);
}
+VPWidenSelectRecipe *VPRecipeBuilder::tryToWidenSelect(Instruction *I,
+ VFRange &Range) {
+ auto *SI = dyn_cast<SelectInst>(I);
+ if (!SI)
+ return nullptr;
+
+ // SI should be widened, unless it is scalar after vectorization,
+ // scalarization is profitable or it is predicated.
+ auto willWiden = [this, SI](unsigned VF) -> bool {
+ return !CM.isScalarAfterVectorization(SI, VF) &&
+ !CM.isProfitableToScalarize(SI, VF) &&
+ !CM.isScalarWithPredication(SI, VF);
+ };
+ if (!LoopVectorizationPlanner::getDecisionAndClampRange(willWiden, Range))
+ return nullptr;
+
+ auto *SE = PSE.getSE();
+ bool InvariantCond =
+ SE->isLoopInvariant(PSE.getSCEV(SI->getOperand(0)), OrigLoop);
+ // Success: widen this instruction.
+ return new VPWidenSelectRecipe(*SI, InvariantCond);
+}
+
VPWidenRecipe *VPRecipeBuilder::tryToWiden(Instruction *I, VFRange &Range) {
bool IsPredicated = LoopVectorizationPlanner::getDecisionAndClampRange(
[&](unsigned VF) { return CM.isScalarWithPredication(I, VF); }, Range);
@@ -7088,6 +7108,7 @@ bool VPRecipeBuilder::tryToCreateRecipe(Instruction *Instr, VFRange &Range,
// operations, inductions and Phi nodes.
if ((Recipe = tryToWidenCall(Instr, Range, *Plan)) ||
(Recipe = tryToWidenMemory(Instr, Range, Plan)) ||
+ (Recipe = tryToWidenSelect(Instr, Range)) ||
(Recipe = tryToOptimizeInduction(Instr, Range)) ||
(Recipe = tryToBlend(Instr, Plan)) ||
(isa<PHINode>(Instr) &&
@@ -7194,7 +7215,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
SmallPtrSet<const InterleaveGroup<Instruction> *, 1> InterleaveGroups;
- VPRecipeBuilder RecipeBuilder(OrigLoop, TLI, Legal, CM, Builder);
+ VPRecipeBuilder RecipeBuilder(OrigLoop, TLI, Legal, CM, PSE, Builder);
// ---------------------------------------------------------------------------
// Pre-construction: record ingredients whose recipes we'll need to further
@@ -7410,6 +7431,10 @@ void VPWidenCallRecipe::execute(VPTransformState &State) {
State.ILV->widenCallInstruction(Ingredient, User, State);
}
+void VPWidenSelectRecipe::execute(VPTransformState &State) {
+ State.ILV->widenSelectInstruction(Ingredient, InvariantCond);
+}
+
void VPWidenRecipe::execute(VPTransformState &State) {
State.ILV->widenInstruction(Ingredient);
}
@@ -7620,7 +7645,7 @@ static bool processLoopInVPlanNativePath(
// Use the planner for outer loop vectorization.
// TODO: CM is not used at this point inside the planner. Turn CM into an
// optional argument if we don't need it in the future.
- LoopVectorizationPlanner LVP(L, LI, TLI, TTI, LVL, CM, IAI);
+ LoopVectorizationPlanner LVP(L, LI, TLI, TTI, LVL, CM, IAI, PSE);
// Get user vectorization factor.
const unsigned UserVF = Hints.getWidth();
@@ -7779,7 +7804,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
CM.collectValuesToIgnore();
// Use the planner for vectorization.
- LoopVectorizationPlanner LVP(L, LI, TLI, TTI, &LVL, CM, IAI);
+ LoopVectorizationPlanner LVP(L, LI, TLI, TTI, &LVL, CM, IAI, PSE);
// Get user vectorization factor.
unsigned UserVF = Hints.getWidth();
diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
index 805f11d1ebb1..a12c3f33ab53 100644
--- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
+++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
@@ -35,6 +35,8 @@ class VPRecipeBuilder {
/// The profitablity analysis.
LoopVectorizationCostModel &CM;
+ PredicatedScalarEvolution &PSE;
+
VPBuilder &Builder;
/// When we if-convert we need to create edge masks. We have to cache values
@@ -113,6 +115,8 @@ class VPRecipeBuilder {
VPWidenCallRecipe *tryToWidenCall(Instruction *I, VFRange &Range,
VPlan &Plan);
+ VPWidenSelectRecipe *tryToWidenSelect(Instruction *I, VFRange &Range);
+
/// Check if \p I can be widened within the given VF \p Range. If \p I can be
/// widened for \p Range.Start, build a new VPWidenRecipe and return it.
/// Range.End may be decreased to ensure same decision from \p Range.Start to
@@ -127,8 +131,10 @@ class VPRecipeBuilder {
public:
VPRecipeBuilder(Loop *OrigLoop, const TargetLibraryInfo *TLI,
LoopVectorizationLegality *Legal,
- LoopVectorizationCostModel &CM, VPBuilder &Builder)
- : OrigLoop(OrigLoop), TLI(TLI), Legal(Legal), CM(CM), Builder(Builder) {}
+ LoopVectorizationCostModel &CM,
+ PredicatedScalarEvolution &PSE, VPBuilder &Builder)
+ : OrigLoop(OrigLoop), TLI(TLI), Legal(Legal), CM(CM), PSE(PSE),
+ Builder(Builder) {}
/// Check if a recipe can be create for \p I withing the given VF \p Range.
/// If a recipe can be created, it adds it to \p VPBB.
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index ae295bdcac66..b094a6f33fa4 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -718,6 +718,13 @@ void VPWidenCallRecipe::print(raw_ostream &O, const Twine &Indent,
<< Indent << "\"WIDEN-CALL " << VPlanIngredient(&Ingredient) << "\\l\"";
}
+void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const {
+ O << " +\n"
+ << Indent << "\"WIDEN-SELECT" << VPlanIngredient(&Ingredient)
+ << (InvariantCond ? " (condition is loop invariant)" : "") << "\\l\"";
+}
+
void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << " +\n" << Indent << "\"WIDEN\\l\"";
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 030ba9812d14..15546f231659 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -617,6 +617,7 @@ class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock> {
VPWidenMemoryInstructionSC,
VPWidenPHISC,
VPWidenSC,
+ VPWidenSelectSC
};
VPRecipeBase(const unsigned char SC) : SubclassID(SC) {}
@@ -813,6 +814,38 @@ class VPWidenCallRecipe : public VPRecipeBase {
VPSlotTracker &SlotTracker) const override;
};
+/// A recipe for widening select instructions.
+class VPWidenSelectRecipe : public VPRecipeBase {
+private:
+ /// Hold the select to be widened.
+ SelectInst &Ingredient;
+
+ /// Is the condition of the select loop invariant?
+ bool InvariantCond;
+
+ /// Hold VPValues for the arguments of the call.
+ VPUser User;
+
+public:
+ VPWidenSelectRecipe(SelectInst &I, bool InvariantCond)
+ : VPRecipeBase(VPWidenSelectSC), Ingredient(I),
+ InvariantCond(InvariantCond) {}
+
+ ~VPWidenSelectRecipe() override = default;
+
+ /// Method to support type inquiry through isa, cast, and dyn_cast.
+ static inline bool classof(const VPRecipeBase *V) {
+ return V->getVPRecipeID() == VPRecipeBase::VPWidenSelectSC;
+ }
+
+ /// Produce a widened version of the select instruction.
+ void execute(VPTransformState &State) override;
+
+ /// Print the recipe.
+ void print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const override;
+};
+
/// A recipe for handling GEP instructions.
class VPWidenGEPRecipe : public VPRecipeBase {
GetElementPtrInst *GEP;
More information about the llvm-commits
mailing list