[llvm] 61eb12e - [BOLT] introducing profi params
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 9 12:03:46 PST 2023
Author: spupyrev
Date: 2023-01-09T12:03:28-08:00
New Revision: 61eb12e1f423063b0ead944827dc53b02baed0d4
URL: https://github.com/llvm/llvm-project/commit/61eb12e1f423063b0ead944827dc53b02baed0d4
DIFF: https://github.com/llvm/llvm-project/commit/61eb12e1f423063b0ead944827dc53b02baed0d4.diff
LOG: [BOLT] introducing profi params
We want to use profile inference (**profi**) in BOLT for stale profile matching.
To this end, I am making a few changes modifying the interface of the algorithm.
This is the first change for existing usages of profi (e.g., CSSPGO):
- introducing an object holding the algorithmic parameters;
- some renaming of existing options;
- dropped unused option, SampleProfileInferEntryCount, as we don't plan to change its default value;
- no changes in the output / tests.
Reviewed By: hoy
Differential Revision: https://reviews.llvm.org/D134756
Added:
Modified:
llvm/include/llvm/Transforms/Utils/SampleProfileInference.h
llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h
llvm/lib/Transforms/Utils/SampleProfileInference.cpp
llvm/lib/Transforms/Utils/SampleProfileLoaderBaseUtil.cpp
llvm/test/Transforms/SampleProfile/csspgo-import-list.ll
llvm/test/Transforms/SampleProfile/profile-context-tracker.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h b/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h
index 5a4c28063a1dd..070ecce282de6 100644
--- a/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h
+++ b/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h
@@ -48,7 +48,7 @@ struct FlowJump;
struct FlowBlock {
uint64_t Index;
uint64_t Weight{0};
- bool UnknownWeight{false};
+ bool HasUnknownWeight{false};
uint64_t Flow{0};
bool HasSelfEdge{false};
std::vector<FlowJump *> SuccJumps;
@@ -74,9 +74,45 @@ struct FlowFunction {
std::vector<FlowBlock> Blocks;
std::vector<FlowJump> Jumps;
/// The index of the entry block.
- uint64_t Entry;
+ uint64_t Entry{0};
};
+/// Various thresholds and options controlling the behavior of the profile
+/// inference algorithm. Default values are tuned for several large-scale
+/// applications, and can be modified via corresponding command-line flags.
+struct ProfiParams {
+ /// Evenly distribute flow when there are multiple equally likely options.
+ bool EvenFlowDistribution{false};
+
+ /// Evenly re-distribute flow among unknown subgraphs.
+ bool RebalanceUnknown{false};
+
+ /// Join isolated components having positive flow.
+ bool JoinIslands{false};
+
+ /// The cost of increasing a block's count by one.
+ unsigned CostBlockInc{0};
+
+ /// The cost of decreasing a block's count by one.
+ unsigned CostBlockDec{0};
+
+ /// The cost of increasing a count of zero-weight block by one.
+ unsigned CostBlockZeroInc{0};
+
+ /// The cost of increasing the entry block's count by one.
+ unsigned CostBlockEntryInc{0};
+
+ /// The cost of decreasing the entry block's count by one.
+ unsigned CostBlockEntryDec{0};
+
+ /// The cost of increasing an unknown block's count by one.
+ unsigned CostBlockUnknownInc{0};
+
+ /// The cost of taking an unlikely block/jump.
+ const int64_t CostUnlikely = ((int64_t)1) << 30;
+};
+
+void applyFlowInference(const ProfiParams &Params, FlowFunction &Func);
void applyFlowInference(FlowFunction &Func);
/// Sample profile inference pass.
@@ -171,10 +207,10 @@ void SampleProfileInference<BT>::apply(BlockWeightMap &BlockWeights,
for (const auto *BB : BasicBlocks) {
FlowBlock Block;
if (SampleBlockWeights.find(BB) != SampleBlockWeights.end()) {
- Block.UnknownWeight = false;
+ Block.HasUnknownWeight = false;
Block.Weight = SampleBlockWeights[BB];
} else {
- Block.UnknownWeight = true;
+ Block.HasUnknownWeight = true;
Block.Weight = 0;
}
Block.Index = Func.Blocks.size();
diff --git a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h
index 2250e928d1e64..19aef8050d44b 100644
--- a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h
+++ b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h
@@ -76,7 +76,6 @@ template <> struct IRTraits<BasicBlock> {
} // end namespace afdo_detail
extern cl::opt<bool> SampleProfileUseProfi;
-extern cl::opt<bool> SampleProfileInferEntryCount;
template <typename BT> class SampleProfileLoaderBaseImpl {
public:
@@ -922,8 +921,7 @@ void SampleProfileLoaderBaseImpl<BT>::finalizeWeightPropagation(
if (SampleProfileUseProfi) {
const BasicBlockT *EntryBB = getEntryBB(&F);
ErrorOr<uint64_t> EntryWeight = getBlockWeight(EntryBB);
- if (BlockWeights[EntryBB] > 0 &&
- (SampleProfileInferEntryCount || !EntryWeight)) {
+ if (BlockWeights[EntryBB] > 0) {
getFunction(F).setEntryCount(
ProfileCount(BlockWeights[EntryBB], Function::PCT_Real),
&InlinedGUIDs);
diff --git a/llvm/lib/Transforms/Utils/SampleProfileInference.cpp b/llvm/lib/Transforms/Utils/SampleProfileInference.cpp
index a8b817d63ca56..f9870196d9d93 100644
--- a/llvm/lib/Transforms/Utils/SampleProfileInference.cpp
+++ b/llvm/lib/Transforms/Utils/SampleProfileInference.cpp
@@ -26,34 +26,42 @@ using namespace llvm;
namespace {
-static cl::opt<bool> SampleProfileEvenCountDistribution(
- "sample-profile-even-count-distribution", cl::init(true), cl::Hidden,
- cl::desc("Try to evenly distribute counts when there are multiple equally "
+static cl::opt<bool> SampleProfileEvenFlowDistribution(
+ "sample-profile-even-flow-distribution", cl::init(true), cl::Hidden,
+ cl::desc("Try to evenly distribute flow when there are multiple equally "
"likely options."));
-static cl::opt<unsigned> SampleProfileMaxDfsCalls(
- "sample-profile-max-dfs-calls", cl::init(10), cl::Hidden,
- cl::desc("Maximum number of dfs iterations for even count distribution."));
+static cl::opt<bool> SampleProfileRebalanceUnknown(
+ "sample-profile-rebalance-unknown", cl::init(true), cl::Hidden,
+ cl::desc("Evenly re-distribute flow among unknown subgraphs."));
-static cl::opt<unsigned> SampleProfileProfiCostInc(
- "sample-profile-profi-cost-inc", cl::init(10), cl::Hidden,
- cl::desc("A cost of increasing a block's count by one."));
+static cl::opt<bool> SampleProfileJoinIslands(
+ "sample-profile-join-islands", cl::init(true), cl::Hidden,
+ cl::desc("Join isolated components having positive flow."));
-static cl::opt<unsigned> SampleProfileProfiCostDec(
- "sample-profile-profi-cost-dec", cl::init(20), cl::Hidden,
- cl::desc("A cost of decreasing a block's count by one."));
+static cl::opt<unsigned> SampleProfileProfiCostBlockInc(
+ "sample-profile-profi-cost-block-inc", cl::init(10), cl::Hidden,
+ cl::desc("The cost of increasing a block's count by one."));
-static cl::opt<unsigned> SampleProfileProfiCostIncZero(
- "sample-profile-profi-cost-inc-zero", cl::init(11), cl::Hidden,
- cl::desc("A cost of increasing a count of zero-weight block by one."));
+static cl::opt<unsigned> SampleProfileProfiCostBlockDec(
+ "sample-profile-profi-cost-block-dec", cl::init(20), cl::Hidden,
+ cl::desc("The cost of decreasing a block's count by one."));
-static cl::opt<unsigned> SampleProfileProfiCostIncEntry(
- "sample-profile-profi-cost-inc-entry", cl::init(40), cl::Hidden,
- cl::desc("A cost of increasing the entry block's count by one."));
+static cl::opt<unsigned> SampleProfileProfiCostBlockEntryInc(
+ "sample-profile-profi-cost-block-entry-inc", cl::init(40), cl::Hidden,
+ cl::desc("The cost of increasing the entry block's count by one."));
-static cl::opt<unsigned> SampleProfileProfiCostDecEntry(
- "sample-profile-profi-cost-dec-entry", cl::init(10), cl::Hidden,
- cl::desc("A cost of decreasing the entry block's count by one."));
+static cl::opt<unsigned> SampleProfileProfiCostBlockEntryDec(
+ "sample-profile-profi-cost-block-entry-dec", cl::init(10), cl::Hidden,
+ cl::desc("The cost of decreasing the entry block's count by one."));
+
+static cl::opt<unsigned> SampleProfileProfiCostBlockZeroInc(
+ "sample-profile-profi-cost-block-zero-inc", cl::init(11), cl::Hidden,
+ cl::desc("The cost of increasing a count of zero-weight block by one."));
+
+static cl::opt<unsigned> SampleProfileProfiCostBlockUnknownInc(
+ "sample-profile-profi-cost-block-unknown-inc", cl::init(0), cl::Hidden,
+ cl::desc("The cost of increasing an unknown block's count by one."));
/// A value indicating an infinite flow/capacity/weight of a block/edge.
/// Not using numeric_limits<int64_t>::max(), as the values can be summed up
@@ -76,6 +84,8 @@ static constexpr int64_t INF = ((int64_t)1) << 50;
/// minimum total cost respecting the given edge capacities.
class MinCostMaxFlow {
public:
+ MinCostMaxFlow(const ProfiParams &Params) : Params(Params) {}
+
// Initialize algorithm's data structures for a network of a given size.
void initialize(uint64_t NodeCount, uint64_t SourceNode, uint64_t SinkNode) {
Source = SourceNode;
@@ -83,7 +93,7 @@ class MinCostMaxFlow {
Nodes = std::vector<Node>(NodeCount);
Edges = std::vector<std::vector<Edge>>(NodeCount, std::vector<Edge>());
- if (SampleProfileEvenCountDistribution)
+ if (Params.EvenFlowDistribution)
AugmentingEdges =
std::vector<std::vector<Edge *>>(NodeCount, std::vector<Edge *>());
}
@@ -166,11 +176,6 @@ class MinCostMaxFlow {
return Flow;
}
- /// A cost of taking an unlikely jump.
- static constexpr int64_t AuxCostUnlikely = ((int64_t)1) << 30;
- /// Minimum BaseDistance for the jump distance values in island joining.
- static constexpr uint64_t MinBaseDistance = 10000;
-
private:
/// Iteratively find an augmentation path/dag in the network and send the
/// flow along its edges. The method returns the number of applied iterations.
@@ -180,7 +185,7 @@ class MinCostMaxFlow {
uint64_t PathCapacity = computeAugmentingPathCapacity();
while (PathCapacity > 0) {
bool Progress = false;
- if (SampleProfileEvenCountDistribution) {
+ if (Params.EvenFlowDistribution) {
// Identify node/edge candidates for augmentation
identifyShortestEdges(PathCapacity);
@@ -253,7 +258,7 @@ class MinCostMaxFlow {
// from Source to Target; it follows from inequalities
// Dist[Source, Target] >= Dist[Source, V] + Dist[V, Target]
// >= Dist[Source, V]
- if (!SampleProfileEvenCountDistribution && Nodes[Target].Distance == 0)
+ if (!Params.EvenFlowDistribution && Nodes[Target].Distance == 0)
break;
if (Nodes[Src].Distance > Nodes[Target].Distance)
continue;
@@ -342,7 +347,7 @@ class MinCostMaxFlow {
if (Edge.OnShortestPath) {
// If we haven't seen Edge.Dst so far, continue DFS search there
- if (Dst.Discovery == 0 && Dst.NumCalls < SampleProfileMaxDfsCalls) {
+ if (Dst.Discovery == 0 && Dst.NumCalls < MaxDfsCalls) {
Dst.Discovery = ++Time;
Stack.emplace(Edge.Dst, 0);
Dst.NumCalls++;
@@ -512,6 +517,9 @@ class MinCostMaxFlow {
}
}
+ /// Maximum number of DFS iterations for DAG finding.
+ static constexpr uint64_t MaxDfsCalls = 10;
+
/// A node in a flow network.
struct Node {
/// The cost of the cheapest path from the source to the current node.
@@ -566,6 +574,8 @@ class MinCostMaxFlow {
uint64_t Target;
/// Augmenting edges.
std::vector<std::vector<Edge *>> AugmentingEdges;
+ /// Params for flow computation.
+ const ProfiParams &Params;
};
/// A post-processing adjustment of control flow. It applies two steps by
@@ -586,18 +596,23 @@ class MinCostMaxFlow {
///
class FlowAdjuster {
public:
- FlowAdjuster(FlowFunction &Func) : Func(Func) {
+ FlowAdjuster(const ProfiParams &Params, FlowFunction &Func)
+ : Params(Params), Func(Func) {
assert(Func.Blocks[Func.Entry].isEntry() &&
"incorrect index of the entry block");
}
// Run the post-processing
void run() {
- /// Adjust the flow to get rid of isolated components.
- joinIsolatedComponents();
+ if (Params.JoinIslands) {
+ /// Adjust the flow to get rid of isolated components.
+ joinIsolatedComponents();
+ }
- /// Rebalance the flow inside unknown subgraphs.
- rebalanceUnknownSubgraphs();
+ if (Params.RebalanceUnknown) {
+ /// Rebalance the flow inside unknown subgraphs.
+ rebalanceUnknownSubgraphs();
+ }
}
private:
@@ -736,12 +751,13 @@ class FlowAdjuster {
/// To capture this objective with integer distances, we round off fractional
/// parts to a multiple of 1 / BaseDistance.
int64_t jumpDistance(FlowJump *Jump) const {
+ if (Jump->IsUnlikely)
+ return Params.CostUnlikely;
+
uint64_t BaseDistance =
- std::max(MinCostMaxFlow::MinBaseDistance,
+ std::max(FlowAdjuster::MinBaseDistance,
std::min(Func.Blocks[Func.Entry].Flow,
- MinCostMaxFlow::AuxCostUnlikely / NumBlocks()));
- if (Jump->IsUnlikely)
- return MinCostMaxFlow::AuxCostUnlikely;
+ Params.CostUnlikely / NumBlocks()));
if (Jump->Flow > 0)
return BaseDistance + BaseDistance / Jump->Flow;
return BaseDistance * NumBlocks();
@@ -786,13 +802,13 @@ class FlowAdjuster {
bool canRebalanceAtRoot(const FlowBlock *SrcBlock) {
// Do not attempt to find unknown subgraphs from an unknown or a
// zero-flow block
- if (SrcBlock->UnknownWeight || SrcBlock->Flow == 0)
+ if (SrcBlock->HasUnknownWeight || SrcBlock->Flow == 0)
return false;
// Do not attempt to process subgraphs from a block w/o unknown sucessors
bool HasUnknownSuccs = false;
for (auto *Jump : SrcBlock->SuccJumps) {
- if (Func.Blocks[Jump->Target].UnknownWeight) {
+ if (Func.Blocks[Jump->Target].HasUnknownWeight) {
HasUnknownSuccs = true;
break;
}
@@ -830,7 +846,7 @@ class FlowAdjuster {
continue;
// Process block Dst
Visited[Dst] = true;
- if (!Func.Blocks[Dst].UnknownWeight) {
+ if (!Func.Blocks[Dst].HasUnknownWeight) {
KnownDstBlocks.push_back(&Func.Blocks[Dst]);
} else {
Queue.push(Dst);
@@ -893,11 +909,11 @@ class FlowAdjuster {
return false;
// Ignore jumps out of SrcBlock to known blocks
- if (!JumpTarget->UnknownWeight && JumpSource == SrcBlock)
+ if (!JumpTarget->HasUnknownWeight && JumpSource == SrcBlock)
return true;
// Ignore jumps to known blocks with zero flow
- if (!JumpTarget->UnknownWeight && JumpTarget->Flow == 0)
+ if (!JumpTarget->HasUnknownWeight && JumpTarget->Flow == 0)
return true;
return false;
@@ -935,7 +951,7 @@ class FlowAdjuster {
break;
// Keep an acyclic order of unknown blocks
- if (Block->UnknownWeight && Block != SrcBlock)
+ if (Block->HasUnknownWeight && Block != SrcBlock)
AcyclicOrder.push_back(Block);
// Add to the queue all successors with zero local in-degree
@@ -977,7 +993,7 @@ class FlowAdjuster {
// Ditribute flow from the remaining blocks
for (auto *Block : UnknownBlocks) {
- assert(Block->UnknownWeight && "incorrect unknown subgraph");
+ assert(Block->HasUnknownWeight && "incorrect unknown subgraph");
uint64_t BlockFlow = 0;
// Block's flow is the sum of incoming flows
for (auto *Jump : Block->PredJumps) {
@@ -1019,7 +1035,11 @@ class FlowAdjuster {
/// A constant indicating an arbitrary exit block of a function.
static constexpr uint64_t AnyExitBlock = uint64_t(-1);
+ /// Minimum BaseDistance for the jump distance values in island joining.
+ static constexpr uint64_t MinBaseDistance = 10000;
+ /// Params for flow computation.
+ const ProfiParams &Params;
/// The function.
FlowFunction &Func;
};
@@ -1029,7 +1049,8 @@ class FlowAdjuster {
/// Every block is split into three nodes that are responsible for (i) an
/// incoming flow, (ii) an outgoing flow, and (iii) penalizing an increase or
/// reduction of the block weight.
-void initializeNetwork(MinCostMaxFlow &Network, FlowFunction &Func) {
+void initializeNetwork(const ProfiParams &Params, MinCostMaxFlow &Network,
+ FlowFunction &Func) {
uint64_t NumBlocks = Func.Blocks.size();
assert(NumBlocks > 1 && "Too few blocks in a function");
LLVM_DEBUG(dbgs() << "Initializing profi for " << NumBlocks << " blocks\n");
@@ -1051,7 +1072,7 @@ void initializeNetwork(MinCostMaxFlow &Network, FlowFunction &Func) {
// Create three nodes for every block of the function
for (uint64_t B = 0; B < NumBlocks; B++) {
auto &Block = Func.Blocks[B];
- assert((!Block.UnknownWeight || Block.Weight == 0 || Block.isEntry()) &&
+ assert((!Block.HasUnknownWeight || Block.Weight == 0 || Block.isEntry()) &&
"non-zero weight of a block w/o weight except for an entry");
// Split every block into two nodes
@@ -1076,22 +1097,22 @@ void initializeNetwork(MinCostMaxFlow &Network, FlowFunction &Func) {
// We assume that decreasing block counts is more expensive than increasing,
// and thus, setting separate costs here. In the future we may want to tune
// the relative costs so as to maximize the quality of generated profiles.
- int64_t AuxCostInc = SampleProfileProfiCostInc;
- int64_t AuxCostDec = SampleProfileProfiCostDec;
- if (Block.UnknownWeight) {
+ int64_t AuxCostInc = Params.CostBlockInc;
+ int64_t AuxCostDec = Params.CostBlockDec;
+ if (Block.HasUnknownWeight) {
// Do not penalize changing weights of blocks w/o known profile count
- AuxCostInc = 0;
+ AuxCostInc = Params.CostBlockUnknownInc;
AuxCostDec = 0;
} else {
// Increasing the count for "cold" blocks with zero initial count is more
// expensive than for "hot" ones
if (Block.Weight == 0) {
- AuxCostInc = SampleProfileProfiCostIncZero;
+ AuxCostInc = Params.CostBlockZeroInc;
}
// Modifying the count of the entry block is expensive
if (Block.isEntry()) {
- AuxCostInc = SampleProfileProfiCostIncEntry;
- AuxCostDec = SampleProfileProfiCostDecEntry;
+ AuxCostInc = Params.CostBlockEntryInc;
+ AuxCostDec = Params.CostBlockEntryDec;
}
}
// For blocks with self-edges, do not penalize a reduction of the count,
@@ -1115,7 +1136,7 @@ void initializeNetwork(MinCostMaxFlow &Network, FlowFunction &Func) {
if (Src != Dst) {
uint64_t SrcOut = 3 * Src + 1;
uint64_t DstIn = 3 * Dst;
- uint64_t Cost = Jump.IsUnlikely ? MinCostMaxFlow::AuxCostUnlikely : 0;
+ uint64_t Cost = Jump.IsUnlikely ? Params.CostUnlikely : 0;
Network.addEdge(SrcOut, DstIn, Cost);
}
}
@@ -1232,17 +1253,17 @@ void verifyWeights(const FlowFunction &Func) {
} // end of anonymous namespace
/// Apply the profile inference algorithm for a given flow function
-void llvm::applyFlowInference(FlowFunction &Func) {
+void llvm::applyFlowInference(const ProfiParams &Params, FlowFunction &Func) {
// Create and apply an inference network model
- auto InferenceNetwork = MinCostMaxFlow();
- initializeNetwork(InferenceNetwork, Func);
+ auto InferenceNetwork = MinCostMaxFlow(Params);
+ initializeNetwork(Params, InferenceNetwork, Func);
InferenceNetwork.run();
// Extract flow values for every block and every edge
extractWeights(InferenceNetwork, Func);
// Post-processing adjustments to the flow
- auto Adjuster = FlowAdjuster(Func);
+ auto Adjuster = FlowAdjuster(Params, Func);
Adjuster.run();
#ifndef NDEBUG
@@ -1250,3 +1271,20 @@ void llvm::applyFlowInference(FlowFunction &Func) {
verifyWeights(Func);
#endif
}
+
+/// Apply the profile inference algorithm for a given flow function
+void llvm::applyFlowInference(FlowFunction &Func) {
+ ProfiParams Params;
+ // Set the params from the command-line flags.
+ Params.EvenFlowDistribution = SampleProfileEvenFlowDistribution;
+ Params.RebalanceUnknown = SampleProfileRebalanceUnknown;
+ Params.JoinIslands = SampleProfileJoinIslands;
+ Params.CostBlockInc = SampleProfileProfiCostBlockInc;
+ Params.CostBlockDec = SampleProfileProfiCostBlockDec;
+ Params.CostBlockEntryInc = SampleProfileProfiCostBlockEntryInc;
+ Params.CostBlockEntryDec = SampleProfileProfiCostBlockEntryDec;
+ Params.CostBlockZeroInc = SampleProfileProfiCostBlockZeroInc;
+ Params.CostBlockUnknownInc = SampleProfileProfiCostBlockUnknownInc;
+
+ applyFlowInference(Params, Func);
+}
diff --git a/llvm/lib/Transforms/Utils/SampleProfileLoaderBaseUtil.cpp b/llvm/lib/Transforms/Utils/SampleProfileLoaderBaseUtil.cpp
index a2588b8cec7d2..f7ae6ad844948 100644
--- a/llvm/lib/Transforms/Utils/SampleProfileLoaderBaseUtil.cpp
+++ b/llvm/lib/Transforms/Utils/SampleProfileLoaderBaseUtil.cpp
@@ -42,10 +42,6 @@ cl::opt<bool> SampleProfileUseProfi(
"sample-profile-use-profi", cl::Hidden,
cl::desc("Use profi to infer block and edge counts."));
-cl::opt<bool> SampleProfileInferEntryCount(
- "sample-profile-infer-entry-count", cl::init(true), cl::Hidden,
- cl::desc("Use profi to infer function entry count."));
-
namespace sampleprofutil {
/// Return true if the given callsite is hot wrt to hot cutoff threshold.
diff --git a/llvm/test/Transforms/SampleProfile/csspgo-import-list.ll b/llvm/test/Transforms/SampleProfile/csspgo-import-list.ll
index 0202177da3e0e..077eab72b1547 100644
--- a/llvm/test/Transforms/SampleProfile/csspgo-import-list.ll
+++ b/llvm/test/Transforms/SampleProfile/csspgo-import-list.ll
@@ -1,19 +1,19 @@
; Make sure Import GUID list for ThinLTO properly set for CSSPGO
-; RUN: opt < %s -passes='thinlto-pre-link<O2>' -pgo-kind=pgo-sample-use-pipeline -sample-profile-file=%S/Inputs/csspgo-import-list.prof -sample-profile-even-count-distribution=0 -S | FileCheck %s
+; RUN: opt < %s -passes='thinlto-pre-link<O2>' -pgo-kind=pgo-sample-use-pipeline -sample-profile-file=%S/Inputs/csspgo-import-list.prof --sample-profile-even-flow-distribution=0 -S | FileCheck %s
; RUN: llvm-profdata merge --sample --extbinary %S/Inputs/csspgo-import-list.prof -o %t.prof
-; RUN: opt < %s -passes='thinlto-pre-link<O2>' -pgo-kind=pgo-sample-use-pipeline -sample-profile-file=%t.prof -sample-profile-even-count-distribution=0 -S | FileCheck %s
+; RUN: opt < %s -passes='thinlto-pre-link<O2>' -pgo-kind=pgo-sample-use-pipeline -sample-profile-file=%t.prof --sample-profile-even-flow-distribution=0 -S | FileCheck %s
; RUN: llvm-profdata show --sample -show-sec-info-only %t.prof | FileCheck %s --check-prefix=CHECK-ORDERED
; RUN: llvm-profdata merge --sample --extbinary --use-md5 %S/Inputs/csspgo-import-list.prof -o %t.md5
-; RUN: opt < %s -passes='thinlto-pre-link<O2>' -pgo-kind=pgo-sample-use-pipeline -sample-profile-file=%t.md5 -sample-profile-even-count-distribution=0 -S | FileCheck %s
+; RUN: opt < %s -passes='thinlto-pre-link<O2>' -pgo-kind=pgo-sample-use-pipeline -sample-profile-file=%t.md5 --sample-profile-even-flow-distribution=0 -S | FileCheck %s
; RUN: llvm-profdata show --sample -show-sec-info-only %t.md5 | FileCheck %s --check-prefix=CHECK-ORDERED
;; Validate that with replay in effect, we import call sites even if they are below the threshold
;; Baseline import decisions
-; RUN: opt < %s -passes='thinlto-pre-link<O2>' -pgo-kind=pgo-sample-use-pipeline -sample-profile-file=%S/Inputs/csspgo-import-list.prof -profile-summary-hot-count=10000 -sample-profile-even-count-distribution=0 -S | FileCheck %s --check-prefix=THRESHOLD
+; RUN: opt < %s -passes='thinlto-pre-link<O2>' -pgo-kind=pgo-sample-use-pipeline -sample-profile-file=%S/Inputs/csspgo-import-list.prof -profile-summary-hot-count=10000 --sample-profile-even-flow-distribution=0 -S | FileCheck %s --check-prefix=THRESHOLD
;; With replay
-; RUN: opt < %s -passes='thinlto-pre-link<O2>' -pgo-kind=pgo-sample-use-pipeline -sample-profile-file=%S/Inputs/csspgo-import-list.prof -sample-profile-inline-replay=%S/Inputs/csspgo-import-list-replay.txt -sample-profile-inline-replay-scope=Module -profile-summary-hot-count=10000 -sample-profile-even-count-distribution=0 -S | FileCheck %s --check-prefix=THRESHOLD-REPLAY
+; RUN: opt < %s -passes='thinlto-pre-link<O2>' -pgo-kind=pgo-sample-use-pipeline -sample-profile-file=%S/Inputs/csspgo-import-list.prof -sample-profile-inline-replay=%S/Inputs/csspgo-import-list-replay.txt -sample-profile-inline-replay-scope=Module -profile-summary-hot-count=10000 --sample-profile-even-flow-distribution=0 -S | FileCheck %s --check-prefix=THRESHOLD-REPLAY
;; With replay but no profile information for call to _Z5funcAi. We import _Z5funcAi because it's explicitly in the replay but don't go further to its callee (_Z3fibi) because we lack samples
-; RUN: opt < %s -passes='thinlto-pre-link<O2>' -pgo-kind=pgo-sample-use-pipeline -sample-profile-file=%S/Inputs/csspgo-import-list-no-funca.prof -sample-profile-inline-replay=%S/Inputs/csspgo-import-list-replay.txt -sample-profile-inline-replay-scope=Module -profile-summary-hot-count=10000 -sample-profile-even-count-distribution=0 -S | FileCheck %s --check-prefix=THRESHOLD-REPLAY-NO-FUNCA
+; RUN: opt < %s -passes='thinlto-pre-link<O2>' -pgo-kind=pgo-sample-use-pipeline -sample-profile-file=%S/Inputs/csspgo-import-list-no-funca.prof -sample-profile-inline-replay=%S/Inputs/csspgo-import-list-replay.txt -sample-profile-inline-replay-scope=Module -profile-summary-hot-count=10000 --sample-profile-even-flow-distribution=0 -S | FileCheck %s --check-prefix=THRESHOLD-REPLAY-NO-FUNCA
declare i32 @_Z5funcBi(i32 %x)
declare i32 @_Z5funcAi(i32 %x)
diff --git a/llvm/test/Transforms/SampleProfile/profile-context-tracker.ll b/llvm/test/Transforms/SampleProfile/profile-context-tracker.ll
index 0acb4d4d53ef6..9ca493f3deefa 100644
--- a/llvm/test/Transforms/SampleProfile/profile-context-tracker.ll
+++ b/llvm/test/Transforms/SampleProfile/profile-context-tracker.ll
@@ -8,10 +8,10 @@
; main:3 @ _Z5funcAi
; main:3 @ _Z5funcAi:1 @ _Z8funcLeafi
; _Z5funcBi:1 @ _Z8funcLeafi
-; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -sample-profile-prioritized-inline=0 -profile-sample-accurate -sample-profile-even-count-distribution=0 -S | FileCheck %s --check-prefix=INLINE-ALL
-; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t -sample-profile-inline-size -sample-profile-prioritized-inline=0 -profile-sample-accurate -sample-profile-even-count-distribution=0 -S | FileCheck %s --check-prefix=INLINE-ALL
-; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -sample-profile-cold-inline-threshold=200 -profile-sample-accurate -sample-profile-even-count-distribution=0 -S | FileCheck %s --check-prefix=INLINE-ALL
-; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t -sample-profile-inline-size -sample-profile-cold-inline-threshold=200 -profile-sample-accurate -sample-profile-even-count-distribution=0 -S | FileCheck %s --check-prefix=INLINE-ALL
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -sample-profile-prioritized-inline=0 -profile-sample-accurate -sample-profile-even-flow-distribution=0 -S | FileCheck %s --check-prefix=INLINE-ALL
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t -sample-profile-inline-size -sample-profile-prioritized-inline=0 -profile-sample-accurate -sample-profile-even-flow-distribution=0 -S | FileCheck %s --check-prefix=INLINE-ALL
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -sample-profile-cold-inline-threshold=200 -profile-sample-accurate -sample-profile-even-flow-distribution=0 -S | FileCheck %s --check-prefix=INLINE-ALL
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t -sample-profile-inline-size -sample-profile-cold-inline-threshold=200 -profile-sample-accurate -sample-profile-even-flow-distribution=0 -S | FileCheck %s --check-prefix=INLINE-ALL
;
; Test we inlined the following in top-down order and entry counts accurate reflects post-inline base profile
; _Z5funcAi:1 @ _Z8funcLeafi
More information about the llvm-commits
mailing list