[llvm] r360758 - [LV] Move getScalarizationOverhead and vector call cost computations to CM. (NFC)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Wed May 15 03:05:49 PDT 2019
Author: fhahn
Date: Wed May 15 03:05:49 2019
New Revision: 360758
URL: http://llvm.org/viewvc/llvm-project?rev=360758&view=rev
Log:
[LV] Move getScalarizationOverhead and vector call cost computations to CM. (NFC)
This reduces the number of parameters we need to pass in and they seem a
natural fit in LoopVectorizationCostModel. Also simplifies things for
D59995.
As a follow up refactoring, we could only expose a expose a
shouldUseVectorIntrinsic() helper in LoopVectorizationCostModel, instead
of calling getVectorCallCost/getVectorIntrinsicCost in
InnerLoopVectorizer/VPRecipeBuilder.
Reviewers: Ayal, hsaito, dcaballe, rengolin
Reviewed By: rengolin
Differential Revision: https://reviews.llvm.org/D61638
Modified:
llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/trunk/lib/Transforms/Vectorize/VPRecipeBuilder.h
Modified: llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp?rev=360758&r1=360757&r2=360758&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp Wed May 15 03:05:49 2019
@@ -1169,6 +1169,18 @@ public:
return foldTailByMasking() || Legal->blockNeedsPredication(BB);
}
+ /// Estimate cost of an intrinsic call instruction CI if it were vectorized
+ /// with factor VF. Return the cost of the instruction, including
+ /// scalarization overhead if it's needed.
+ unsigned getVectorIntrinsicCost(CallInst *CI, unsigned VF);
+
+ /// Estimate cost of a call instruction CI if it were vectorized with factor
+ /// VF. Return the cost of the instruction, including scalarization overhead
+ /// if it's needed. The flag NeedToScalarize shows if the call needs to be
+ /// scalarized -
+ // i.e. either vector version isn't available, or is too expensive.
+ unsigned getVectorCallCost(CallInst *CI, unsigned VF, bool &NeedToScalarize);
+
private:
unsigned NumPredStores = 0;
@@ -1221,6 +1233,10 @@ private:
/// element)
unsigned getUniformMemOpCost(Instruction *I, unsigned VF);
+ /// Estimate the overhead of scalarizing an instruction. This is a
+ /// convenience wrapper for the type-based getScalarizationOverhead API.
+ unsigned getScalarizationOverhead(Instruction *I, unsigned VF);
+
/// Returns whether the instruction is a load or store and will be a emitted
/// as a vector operation.
bool isConsecutiveLoadOrStore(Instruction *I);
@@ -3057,45 +3073,9 @@ static void cse(BasicBlock *BB) {
}
}
-/// Estimate the overhead of scalarizing an instruction. This is a
-/// convenience wrapper for the type-based getScalarizationOverhead API.
-static unsigned getScalarizationOverhead(Instruction *I, unsigned VF,
- const TargetTransformInfo &TTI) {
- if (VF == 1)
- return 0;
-
- unsigned Cost = 0;
- Type *RetTy = ToVectorTy(I->getType(), VF);
- if (!RetTy->isVoidTy() &&
- (!isa<LoadInst>(I) ||
- !TTI.supportsEfficientVectorElementLoadStore()))
- Cost += TTI.getScalarizationOverhead(RetTy, true, false);
-
- // Some targets keep addresses scalar.
- if (isa<LoadInst>(I) && !TTI.prefersVectorizedAddressing())
- return Cost;
-
- if (CallInst *CI = dyn_cast<CallInst>(I)) {
- SmallVector<const Value *, 4> Operands(CI->arg_operands());
- Cost += TTI.getOperandsScalarizationOverhead(Operands, VF);
- }
- else if (!isa<StoreInst>(I) ||
- !TTI.supportsEfficientVectorElementLoadStore()) {
- SmallVector<const Value *, 4> Operands(I->operand_values());
- Cost += TTI.getOperandsScalarizationOverhead(Operands, VF);
- }
-
- return Cost;
-}
-
-// Estimate cost of a call instruction CI if it were vectorized with factor VF.
-// Return the cost of the instruction, including scalarization overhead if it's
-// needed. The flag NeedToScalarize shows if the call needs to be scalarized -
-// i.e. either vector version isn't available, or is too expensive.
-static unsigned getVectorCallCost(CallInst *CI, unsigned VF,
- const TargetTransformInfo &TTI,
- const TargetLibraryInfo *TLI,
- bool &NeedToScalarize) {
+unsigned LoopVectorizationCostModel::getVectorCallCost(CallInst *CI,
+ unsigned VF,
+ bool &NeedToScalarize) {
Function *F = CI->getCalledFunction();
StringRef FnName = CI->getCalledFunction()->getName();
Type *ScalarRetTy = CI->getType();
@@ -3118,7 +3098,7 @@ static unsigned getVectorCallCost(CallIn
// Compute costs of unpacking argument values for the scalar calls and
// packing the return values to a vector.
- unsigned ScalarizationCost = getScalarizationOverhead(CI, VF, TTI);
+ unsigned ScalarizationCost = getScalarizationOverhead(CI, VF);
unsigned Cost = ScalarCallCost * VF + ScalarizationCost;
@@ -3137,12 +3117,8 @@ static unsigned getVectorCallCost(CallIn
return Cost;
}
-// Estimate cost of an intrinsic call instruction CI if it were vectorized with
-// factor VF. Return the cost of the instruction, including scalarization
-// overhead if it's needed.
-static unsigned getVectorIntrinsicCost(CallInst *CI, unsigned VF,
- const TargetTransformInfo &TTI,
- const TargetLibraryInfo *TLI) {
+unsigned LoopVectorizationCostModel::getVectorIntrinsicCost(CallInst *CI,
+ unsigned VF) {
Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
assert(ID && "Expected intrinsic call!");
@@ -4126,9 +4102,9 @@ void InnerLoopVectorizer::widenInstructi
// version of the instruction.
// Is it beneficial to perform intrinsic call compared to lib call?
bool NeedToScalarize;
- unsigned CallCost = getVectorCallCost(CI, VF, *TTI, TLI, NeedToScalarize);
+ unsigned CallCost = Cost->getVectorCallCost(CI, VF, NeedToScalarize);
bool UseVectorIntrinsic =
- ID && getVectorIntrinsicCost(CI, VF, *TTI, TLI) <= CallCost;
+ ID && Cost->getVectorIntrinsicCost(CI, VF) <= CallCost;
assert((UseVectorIntrinsic || !NeedToScalarize) &&
"Instruction should be scalarized elsewhere.");
@@ -5522,7 +5498,7 @@ unsigned LoopVectorizationCostModel::get
// Get the overhead of the extractelement and insertelement instructions
// we might create due to scalarization.
- Cost += getScalarizationOverhead(I, VF, TTI);
+ Cost += getScalarizationOverhead(I, VF);
// If we have a predicated store, it may not be executed for each vector
// lane. Scale the cost by the probability of executing the predicated
@@ -5674,6 +5650,34 @@ LoopVectorizationCostModel::getInstructi
return VectorizationCostTy(C, TypeNotScalarized);
}
+unsigned LoopVectorizationCostModel::getScalarizationOverhead(Instruction *I,
+ unsigned VF) {
+
+ if (VF == 1)
+ return 0;
+
+ unsigned Cost = 0;
+ Type *RetTy = ToVectorTy(I->getType(), VF);
+ if (!RetTy->isVoidTy() &&
+ (!isa<LoadInst>(I) || !TTI.supportsEfficientVectorElementLoadStore()))
+ Cost += TTI.getScalarizationOverhead(RetTy, true, false);
+
+ // Some targets keep addresses scalar.
+ if (isa<LoadInst>(I) && !TTI.prefersVectorizedAddressing())
+ return Cost;
+
+ if (CallInst *CI = dyn_cast<CallInst>(I)) {
+ SmallVector<const Value *, 4> Operands(CI->arg_operands());
+ Cost += TTI.getOperandsScalarizationOverhead(Operands, VF);
+ } else if (!isa<StoreInst>(I) ||
+ !TTI.supportsEfficientVectorElementLoadStore()) {
+ SmallVector<const Value *, 4> Operands(I->operand_values());
+ Cost += TTI.getOperandsScalarizationOverhead(Operands, VF);
+ }
+
+ return Cost;
+}
+
void LoopVectorizationCostModel::setCostBasedWideningDecision(unsigned VF) {
if (VF == 1)
return;
@@ -5914,7 +5918,7 @@ unsigned LoopVectorizationCostModel::get
// The cost of insertelement and extractelement instructions needed for
// scalarization.
- Cost += getScalarizationOverhead(I, VF, TTI);
+ Cost += getScalarizationOverhead(I, VF);
// Scale the cost by the probability of executing the predicated blocks.
// This assumes the predicated block for each vector lane is equally
@@ -6035,16 +6039,16 @@ unsigned LoopVectorizationCostModel::get
case Instruction::Call: {
bool NeedToScalarize;
CallInst *CI = cast<CallInst>(I);
- unsigned CallCost = getVectorCallCost(CI, VF, TTI, TLI, NeedToScalarize);
+ unsigned CallCost = getVectorCallCost(CI, VF, NeedToScalarize);
if (getVectorIntrinsicIDForCall(CI, TLI))
- return std::min(CallCost, getVectorIntrinsicCost(CI, VF, TTI, TLI));
+ return std::min(CallCost, getVectorIntrinsicCost(CI, VF));
return CallCost;
}
default:
// The cost of executing VF copies of the scalar instruction. This opcode
// is unknown. Assume that it is the same as 'mul'.
return VF * TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy) +
- getScalarizationOverhead(I, VF, TTI);
+ getScalarizationOverhead(I, VF);
} // end of switch.
}
@@ -6638,9 +6642,9 @@ bool VPRecipeBuilder::tryToWiden(Instruc
// version of the instruction.
// Is it beneficial to perform intrinsic call compared to lib call?
bool NeedToScalarize;
- unsigned CallCost = getVectorCallCost(CI, VF, *TTI, TLI, NeedToScalarize);
+ unsigned CallCost = CM.getVectorCallCost(CI, VF, NeedToScalarize);
bool UseVectorIntrinsic =
- ID && getVectorIntrinsicCost(CI, VF, *TTI, TLI) <= CallCost;
+ ID && CM.getVectorIntrinsicCost(CI, VF) <= CallCost;
return UseVectorIntrinsic || !NeedToScalarize;
}
if (isa<LoadInst>(I) || isa<StoreInst>(I)) {
@@ -6828,7 +6832,7 @@ LoopVectorizationPlanner::buildVPlanWith
VPBasicBlock *VPBB = new VPBasicBlock("Pre-Entry");
auto Plan = llvm::make_unique<VPlan>(VPBB);
- VPRecipeBuilder RecipeBuilder(OrigLoop, TLI, TTI, Legal, CM, Builder);
+ VPRecipeBuilder RecipeBuilder(OrigLoop, TLI, Legal, CM, Builder);
// Represent values that will have defs inside VPlan.
for (Value *V : NeedDef)
Plan->addVPValue(V);
Modified: llvm/trunk/lib/Transforms/Vectorize/VPRecipeBuilder.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/VPRecipeBuilder.h?rev=360758&r1=360757&r2=360758&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/VPRecipeBuilder.h (original)
+++ llvm/trunk/lib/Transforms/Vectorize/VPRecipeBuilder.h Wed May 15 03:05:49 2019
@@ -29,9 +29,6 @@ class VPRecipeBuilder {
/// Target Library Info.
const TargetLibraryInfo *TLI;
- /// Target Transform Info.
- const TargetTransformInfo *TTI;
-
/// The legality analysis.
LoopVectorizationLegality *Legal;
@@ -104,11 +101,9 @@ public:
public:
VPRecipeBuilder(Loop *OrigLoop, const TargetLibraryInfo *TLI,
- const TargetTransformInfo *TTI,
LoopVectorizationLegality *Legal,
LoopVectorizationCostModel &CM, VPBuilder &Builder)
- : OrigLoop(OrigLoop), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM),
- Builder(Builder) {}
+ : OrigLoop(OrigLoop), TLI(TLI), Legal(Legal), CM(CM), Builder(Builder) {}
/// Check if a recipe can be create for \p I withing the given VF \p Range.
/// If a recipe can be created, it adds it to \p VPBB.
More information about the llvm-commits
mailing list