[llvm] 9c710eb - [TTI] NFC: Reduce InstructionCost::getValue() usage...
Daniil Fukalov via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 26 06:38:04 PDT 2022
Author: Daniil Fukalov
Date: 2022-08-26T16:37:32+03:00
New Revision: 9c710ebbdb6079b4176c1b89daf525f0f0e6499c
URL: https://github.com/llvm/llvm-project/commit/9c710ebbdb6079b4176c1b89daf525f0f0e6499c
DIFF: https://github.com/llvm/llvm-project/commit/9c710ebbdb6079b4176c1b89daf525f0f0e6499c.diff
LOG: [TTI] NFC: Reduce InstructionCost::getValue() usage...
in order to propagate `InstructionCost` value upper.
Reviewed By: fhahn
Differential Revision: https://reviews.llvm.org/D103406
Added:
Modified:
llvm/include/llvm/Support/InstructionCost.h
llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
llvm/lib/Transforms/IPO/PartialInlining.cpp
llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/Support/InstructionCost.h b/llvm/include/llvm/Support/InstructionCost.h
index 7656f45b2031d..aa8825ea9eaa4 100644
--- a/llvm/include/llvm/Support/InstructionCost.h
+++ b/llvm/include/llvm/Support/InstructionCost.h
@@ -244,7 +244,7 @@ class InstructionCost {
template <class Function>
auto map(const Function &F) const -> InstructionCost {
if (isValid())
- return F(*getValue());
+ return F(Value);
return getInvalid();
}
};
diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index fbc9638850917..9884bdfe18cb3 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -552,7 +552,7 @@ class FunctionSpecializer {
// inlined so that we shouldn't specialize it.
if (Metrics.notDuplicatable || !Metrics.NumInsts.isValid() ||
(!ForceFunctionSpecialization &&
- *Metrics.NumInsts.getValue() < SmallFunctionThreshold)) {
+ Metrics.NumInsts < SmallFunctionThreshold)) {
InstructionCost C{};
C.setInvalid();
return C;
diff --git a/llvm/lib/Transforms/IPO/PartialInlining.cpp b/llvm/lib/Transforms/IPO/PartialInlining.cpp
index ab2be532c0560..d2b37f6a5c0df 100644
--- a/llvm/lib/Transforms/IPO/PartialInlining.cpp
+++ b/llvm/lib/Transforms/IPO/PartialInlining.cpp
@@ -1353,16 +1353,13 @@ bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
if (Cloner.OutlinedFunctions.empty())
return false;
- int SizeCost = 0;
- BlockFrequency WeightedRcost;
- int NonWeightedRcost;
-
auto OutliningCosts = computeOutliningCosts(Cloner);
- assert(std::get<0>(OutliningCosts).isValid() &&
- std::get<1>(OutliningCosts).isValid() && "Expected valid costs");
- SizeCost = *std::get<0>(OutliningCosts).getValue();
- NonWeightedRcost = *std::get<1>(OutliningCosts).getValue();
+ InstructionCost SizeCost = std::get<0>(OutliningCosts);
+ InstructionCost NonWeightedRcost = std::get<1>(OutliningCosts);
+
+ assert(SizeCost.isValid() && NonWeightedRcost.isValid() &&
+ "Expected valid costs");
// Only calculate RelativeToEntryFreq when we are doing single region
// outlining.
@@ -1377,7 +1374,8 @@ bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
// execute the calls to outlined functions.
RelativeToEntryFreq = BranchProbability(0, 1);
- WeightedRcost = BlockFrequency(NonWeightedRcost) * RelativeToEntryFreq;
+ BlockFrequency WeightedRcost =
+ BlockFrequency(*NonWeightedRcost.getValue()) * RelativeToEntryFreq;
// The call sequence(s) to the outlined function(s) are larger than the sum of
// the original outlined region size(s), it does not increase the chances of
diff --git a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
index 5667eefabad56..1689314b22a07 100644
--- a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
+++ b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
@@ -840,7 +840,7 @@ struct TransformDFA {
}
}
- unsigned DuplicationCost = 0;
+ InstructionCost DuplicationCost = 0;
unsigned JumpTableSize = 0;
TTI->getEstimatedNumberOfCaseClusters(*Switch, JumpTableSize, nullptr,
@@ -851,7 +851,7 @@ struct TransformDFA {
// using binary search, hence the LogBase2().
unsigned CondBranches =
APInt(32, Switch->getNumSuccessors()).ceilLogBase2();
- DuplicationCost = *Metrics.NumInsts.getValue() / CondBranches;
+ DuplicationCost = Metrics.NumInsts / CondBranches;
} else {
// Compared with jump tables, the DFA optimizer removes an indirect branch
// on each loop iteration, thus making branch prediction more precise. The
@@ -859,7 +859,7 @@ struct TransformDFA {
// predictor to make a mistake, and the more benefit there is in the DFA
// optimizer. Thus, the more branch targets there are, the lower is the
// cost of the DFA opt.
- DuplicationCost = *Metrics.NumInsts.getValue() / JumpTableSize;
+ DuplicationCost = Metrics.NumInsts / JumpTableSize;
}
LLVM_DEBUG(dbgs() << "\nDFA Jump Threading: Cost to jump thread block "
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 5f958eebfb646..21c00881ae2e5 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -682,7 +682,7 @@ InstructionCost llvm::ApproximateLoopSize(
// that each loop has at least three instructions (likely a conditional
// branch, a comparison feeding that branch, and some kind of loop increment
// feeding that comparison instruction).
- if (LoopSize.isValid() && *LoopSize.getValue() < BEInsns + 1)
+ if (LoopSize.isValid() && LoopSize < BEInsns + 1)
// This is an open coded max() on InstructionCost
LoopSize = BEInsns + 1;
diff --git a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
index 1d5529940df3a..7bb7cea055e63 100644
--- a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
@@ -316,7 +316,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
L->dump());
return Rotated;
}
- if (*Metrics.NumInsts.getValue() > MaxHeaderSize) {
+ if (Metrics.NumInsts > MaxHeaderSize) {
LLVM_DEBUG(dbgs() << "LoopRotation: NOT rotating - contains "
<< Metrics.NumInsts
<< " instructions, which is more than the threshold ("
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 76529e3914d66..efff051401ef0 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1182,7 +1182,7 @@ class LoopVectorizationCostModel {
/// If interleave count has been specified by metadata it will be returned.
/// Otherwise, the interleave count is computed and returned. VF and LoopCost
/// are the selected vectorization factor and the cost of the selected VF.
- unsigned selectInterleaveCount(ElementCount VF, unsigned LoopCost);
+ unsigned selectInterleaveCount(ElementCount VF, InstructionCost LoopCost);
/// Memory access instruction may be vectorized in more than one way.
/// Form of instruction after vectorization depends on cost.
@@ -1701,8 +1701,9 @@ class LoopVectorizationCostModel {
/// scalarize and their scalar costs are collected in \p ScalarCosts. A
/// non-negative return value implies the expression will be scalarized.
/// Currently, only single-use chains are considered for scalarization.
- int computePredInstDiscount(Instruction *PredInst, ScalarCostsTy &ScalarCosts,
- ElementCount VF);
+ InstructionCost computePredInstDiscount(Instruction *PredInst,
+ ScalarCostsTy &ScalarCosts,
+ ElementCount VF);
/// Collect the instructions that are uniform after vectorization. An
/// instruction is uniform if we represent it with a single scalar value in
@@ -5636,8 +5637,9 @@ void LoopVectorizationCostModel::collectElementTypesForWidening() {
}
}
-unsigned LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF,
- unsigned LoopCost) {
+unsigned
+LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF,
+ InstructionCost LoopCost) {
// -- The interleave heuristics --
// We interleave the loop in order to expose ILP and reduce the loop overhead.
// There are many micro-architectural considerations that we can't predict
@@ -5673,9 +5675,8 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF,
// If we did not calculate the cost for VF (because the user selected the VF)
// then we calculate the cost of VF here.
if (LoopCost == 0) {
- InstructionCost C = expectedCost(VF).first;
- assert(C.isValid() && "Expected to have chosen a VF with valid cost");
- LoopCost = *C.getValue();
+ LoopCost = expectedCost(VF).first;
+ assert(LoopCost.isValid() && "Expected to have chosen a VF with valid cost");
// Loop body is free and there is no need for interleaving.
if (LoopCost == 0)
@@ -5803,8 +5804,8 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF,
// We assume that the cost overhead is 1 and we use the cost model
// to estimate the cost of the loop and interleave until the cost of the
// loop overhead is about 5% of the cost of the loop.
- unsigned SmallIC =
- std::min(IC, (unsigned)PowerOf2Floor(SmallLoopCost / LoopCost));
+ unsigned SmallIC = std::min(
+ IC, (unsigned)PowerOf2Floor(SmallLoopCost / *LoopCost.getValue()));
// Interleave until store/load ports (estimated by max interleave count) are
// saturated.
@@ -6130,7 +6131,7 @@ void LoopVectorizationCostModel::collectInstsToScalarize(ElementCount VF) {
}
}
-int LoopVectorizationCostModel::computePredInstDiscount(
+InstructionCost LoopVectorizationCostModel::computePredInstDiscount(
Instruction *PredInst, ScalarCostsTy &ScalarCosts, ElementCount VF) {
assert(!isUniformAfterVectorization(PredInst, VF) &&
"Instruction marked uniform-after-vectorization will be predicated");
@@ -6239,7 +6240,7 @@ int LoopVectorizationCostModel::computePredInstDiscount(
ScalarCosts[I] = ScalarCost;
}
- return *Discount.getValue();
+ return Discount;
}
LoopVectorizationCostModel::VectorizationCostTy
@@ -10305,7 +10306,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
if (MaybeVF) {
VF = *MaybeVF;
// Select the interleave count.
- IC = CM.selectInterleaveCount(VF.Width, *VF.Cost.getValue());
+ IC = CM.selectInterleaveCount(VF.Width, VF.Cost);
unsigned SelectedIC = std::max(IC, UserIC);
// Optimistically generate runtime checks if they are needed. Drop them if
More information about the llvm-commits
mailing list