[llvm] 13d1364 - A better profi rebalancer
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 18 12:20:39 PST 2022
Author: spupyrev
Date: 2022-01-18T12:14:24-08:00
New Revision: 13d1364a344328145b4bc9449fa6aae34f300886
URL: https://github.com/llvm/llvm-project/commit/13d1364a344328145b4bc9449fa6aae34f300886
DIFF: https://github.com/llvm/llvm-project/commit/13d1364a344328145b4bc9449fa6aae34f300886.diff
LOG: A better profi rebalancer
This is an extension of **profi** post-processing step that rebalances counts
in CFGs that have basic blocks w/o probes (aka "unknown" blocks). Specifically,
the new version finds many more "unknown" subgraphs and marks more "unknown"
basic blocks as hot (which prevents unwanted optimization passes).
I see up to 0.5% perf on some (large) binaries, e.g., clang-10 and gcc-8.
The algorithm is still linear and yields no build time overhead.
Added:
llvm/test/Transforms/SampleProfile/Inputs/profile-inference-rebalance-large.prof
llvm/test/Transforms/SampleProfile/profile-inference-rebalance-large.ll
Modified:
llvm/lib/Transforms/Utils/SampleProfileInference.cpp
llvm/test/Transforms/SampleProfile/profile-inference-rebalance.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Utils/SampleProfileInference.cpp b/llvm/lib/Transforms/Utils/SampleProfileInference.cpp
index 1d5582980af50..961adf2570a73 100644
--- a/llvm/lib/Transforms/Utils/SampleProfileInference.cpp
+++ b/llvm/lib/Transforms/Utils/SampleProfileInference.cpp
@@ -145,7 +145,7 @@ class MinCostMaxFlow {
/// A cost of decreasing the entry block's count by one.
static constexpr int64_t AuxCostDecEntry = 10;
/// A cost of taking an unlikely jump.
- static constexpr int64_t AuxCostUnlikely = ((int64_t)1) << 20;
+ static constexpr int64_t AuxCostUnlikely = ((int64_t)1) << 30;
private:
/// Check for existence of an augmenting path with a positive capacity.
@@ -237,7 +237,7 @@ class MinCostMaxFlow {
}
}
- /// An node in a flow network.
+ /// A node in a flow network.
struct Node {
/// The cost of the cheapest path from the source to the current node.
int64_t Distance;
@@ -304,9 +304,6 @@ class FlowAdjuster {
rebalanceUnknownSubgraphs();
}
- /// The probability for the first successor of a unknown subgraph
- static constexpr double UnknownFirstSuccProbability = 0.5;
-
private:
void joinIsolatedComponents() {
// Find blocks that are reachable from the source
@@ -453,45 +450,70 @@ class FlowAdjuster {
uint64_t NumBlocks() const { return Func.Blocks.size(); }
- /// Rebalance unknown subgraphs so as each branch splits with probabilities
- /// UnknownFirstSuccProbability and 1 - UnknownFirstSuccProbability
+ /// Rebalance unknown subgraphs so that the flow is split evenly across the
+ /// outgoing branches of every block of the subgraph. The method iterates over
+ /// blocks with known weight and identifies unknown subgraphs rooted at the
+ /// blocks. Then it verifies if flow rebalancing is feasible and applies it.
void rebalanceUnknownSubgraphs() {
- static_assert(
- UnknownFirstSuccProbability >= 0.0 &&
- UnknownFirstSuccProbability <= 1.0,
- "the share of the unknown successor should be between 0 and 1");
- // Try to find unknown subgraphs from each non-unknown block
+ // Try to find unknown subgraphs from each block
for (uint64_t I = 0; I < Func.Blocks.size(); I++) {
auto SrcBlock = &Func.Blocks[I];
- // Do not attempt to find unknown successors from a unknown or a
- // zero-flow block
- if (SrcBlock->UnknownWeight || SrcBlock->Flow == 0)
+ // Verify if rebalancing rooted at SrcBlock is feasible
+ if (!canRebalanceAtRoot(SrcBlock))
continue;
- std::vector<FlowBlock *> UnknownSuccs;
+ // Find an unknown subgraphs starting at SrcBlock. Along the way,
+ // fill in known destinations and intermediate unknown blocks.
+ std::vector<FlowBlock *> UnknownBlocks;
+ std::vector<FlowBlock *> KnownDstBlocks;
+ findUnknownSubgraph(SrcBlock, KnownDstBlocks, UnknownBlocks);
+
+ // Verify if rebalancing of the subgraph is feasible. If the search is
+ // successful, find the unique destination block (which can be null)
FlowBlock *DstBlock = nullptr;
- // Find a unknown subgraphs starting at block SrcBlock
- if (!findUnknownSubgraph(SrcBlock, DstBlock, UnknownSuccs))
+ if (!canRebalanceSubgraph(SrcBlock, KnownDstBlocks, UnknownBlocks,
+ DstBlock))
continue;
- // At the moment, we do not rebalance subgraphs containing cycles among
- // unknown blocks
- if (!isAcyclicSubgraph(SrcBlock, DstBlock, UnknownSuccs))
+
+ // We cannot rebalance subgraphs containing cycles among unknown blocks
+ if (!isAcyclicSubgraph(SrcBlock, DstBlock, UnknownBlocks))
continue;
// Rebalance the flow
- rebalanceUnknownSubgraph(SrcBlock, DstBlock, UnknownSuccs);
+ rebalanceUnknownSubgraph(SrcBlock, DstBlock, UnknownBlocks);
}
}
- /// Find a unknown subgraph starting at block SrcBlock.
- /// If the search is successful, the method sets DstBlock and UnknownSuccs.
- bool findUnknownSubgraph(FlowBlock *SrcBlock, FlowBlock *&DstBlock,
- std::vector<FlowBlock *> &UnknownSuccs) {
+ /// Verify if rebalancing rooted at a given block is possible.
+ bool canRebalanceAtRoot(const FlowBlock *SrcBlock) {
+ // Do not attempt to find unknown subgraphs from an unknown or a
+ // zero-flow block
+ if (SrcBlock->UnknownWeight || SrcBlock->Flow == 0)
+ return false;
+
+ // Do not attempt to process subgraphs from a block w/o unknown sucessors
+ bool HasUnknownSuccs = false;
+ for (auto Jump : SrcBlock->SuccJumps) {
+ if (Func.Blocks[Jump->Target].UnknownWeight) {
+ HasUnknownSuccs = true;
+ break;
+ }
+ }
+ if (!HasUnknownSuccs)
+ return false;
+
+ return true;
+ }
+
+ /// Find an unknown subgraph starting at block SrcBlock. The method sets
+ /// identified destinations, KnownDstBlocks, and intermediate UnknownBlocks.
+ void findUnknownSubgraph(const FlowBlock *SrcBlock,
+ std::vector<FlowBlock *> &KnownDstBlocks,
+ std::vector<FlowBlock *> &UnknownBlocks) {
// Run BFS from SrcBlock and make sure all paths are going through unknown
// blocks and end at a non-unknown DstBlock
auto Visited = BitVector(NumBlocks(), false);
std::queue<uint64_t> Queue;
- DstBlock = nullptr;
Queue.push(SrcBlock->Index);
Visited[SrcBlock->Index] = true;
@@ -500,52 +522,105 @@ class FlowAdjuster {
Queue.pop();
// Process blocks reachable from Block
for (auto Jump : Block.SuccJumps) {
+ // If Jump can be ignored, skip it
+ if (ignoreJump(SrcBlock, nullptr, Jump))
+ continue;
+
uint64_t Dst = Jump->Target;
+ // If Dst has been visited, skip Jump
if (Visited[Dst])
continue;
+ // Process block Dst
Visited[Dst] = true;
if (!Func.Blocks[Dst].UnknownWeight) {
- // If we see non-unique non-unknown block reachable from SrcBlock,
- // stop processing and skip rebalancing
- FlowBlock *CandidateDstBlock = &Func.Blocks[Dst];
- if (DstBlock != nullptr && DstBlock != CandidateDstBlock)
- return false;
- DstBlock = CandidateDstBlock;
+ KnownDstBlocks.push_back(&Func.Blocks[Dst]);
} else {
Queue.push(Dst);
- UnknownSuccs.push_back(&Func.Blocks[Dst]);
+ UnknownBlocks.push_back(&Func.Blocks[Dst]);
}
}
}
+ }
+ /// Verify if rebalancing of the subgraph is feasible. If the checks are
+ /// successful, set the unique destination block, DstBlock (can be null).
+ bool canRebalanceSubgraph(const FlowBlock *SrcBlock,
+ const std::vector<FlowBlock *> &KnownDstBlocks,
+ const std::vector<FlowBlock *> &UnknownBlocks,
+ FlowBlock *&DstBlock) {
// If the list of unknown blocks is empty, we don't need rebalancing
- if (UnknownSuccs.empty())
+ if (UnknownBlocks.empty())
return false;
- // If all reachable nodes from SrcBlock are unknown, skip rebalancing
- if (DstBlock == nullptr)
+
+ // If there are multiple known sinks, we can't rebalance
+ if (KnownDstBlocks.size() > 1)
return false;
- // If any of the unknown blocks is an exit block, skip rebalancing
- for (auto Block : UnknownSuccs) {
- if (Block->isExit())
+ DstBlock = KnownDstBlocks.empty() ? nullptr : KnownDstBlocks.front();
+
+ // Verify sinks of the subgraph
+ for (auto Block : UnknownBlocks) {
+ if (Block->SuccJumps.empty()) {
+ // If there are multiple (known and unknown) sinks, we can't rebalance
+ if (DstBlock != nullptr)
+ return false;
+ continue;
+ }
+ size_t NumIgnoredJumps = 0;
+ for (auto Jump : Block->SuccJumps) {
+ if (ignoreJump(SrcBlock, DstBlock, Jump))
+ NumIgnoredJumps++;
+ }
+ // If there is a non-sink block in UnknownBlocks with all jumps ignored,
+ // then we can't rebalance
+ if (NumIgnoredJumps == Block->SuccJumps.size())
return false;
}
return true;
}
+ /// Decide whether the Jump is ignored while processing an unknown subgraphs
+ /// rooted at basic block SrcBlock with the destination block, DstBlock.
+ bool ignoreJump(const FlowBlock *SrcBlock, const FlowBlock *DstBlock,
+ const FlowJump *Jump) {
+ // Ignore unlikely jumps with zero flow
+ if (Jump->IsUnlikely && Jump->Flow == 0)
+ return true;
+
+ auto JumpSource = &Func.Blocks[Jump->Source];
+ auto JumpTarget = &Func.Blocks[Jump->Target];
+
+ // Do not ignore jumps coming into DstBlock
+ if (DstBlock != nullptr && JumpTarget == DstBlock)
+ return false;
+
+ // Ignore jumps out of SrcBlock to known blocks
+ if (!JumpTarget->UnknownWeight && JumpSource == SrcBlock)
+ return true;
+
+ // Ignore jumps to known blocks with zero flow
+ if (!JumpTarget->UnknownWeight && JumpTarget->Flow == 0)
+ return true;
+
+ return false;
+ }
+
/// Verify if the given unknown subgraph is acyclic, and if yes, reorder
- /// UnknownSuccs in the topological order (so that all jumps are "forward").
- bool isAcyclicSubgraph(FlowBlock *SrcBlock, FlowBlock *DstBlock,
- std::vector<FlowBlock *> &UnknownSuccs) {
+ /// UnknownBlocks in the topological order (so that all jumps are "forward").
+ bool isAcyclicSubgraph(const FlowBlock *SrcBlock, const FlowBlock *DstBlock,
+ std::vector<FlowBlock *> &UnknownBlocks) {
// Extract local in-degrees in the considered subgraph
auto LocalInDegree = std::vector<uint64_t>(NumBlocks(), 0);
- for (auto Jump : SrcBlock->SuccJumps) {
- LocalInDegree[Jump->Target]++;
- }
- for (uint64_t I = 0; I < UnknownSuccs.size(); I++) {
- for (auto Jump : UnknownSuccs[I]->SuccJumps) {
+ auto fillInDegree = [&](const FlowBlock *Block) {
+ for (auto Jump : Block->SuccJumps) {
+ if (ignoreJump(SrcBlock, DstBlock, Jump))
+ continue;
LocalInDegree[Jump->Target]++;
}
+ };
+ fillInDegree(SrcBlock);
+ for (auto Block : UnknownBlocks) {
+ fillInDegree(Block);
}
// A loop containing SrcBlock
if (LocalInDegree[SrcBlock->Index] > 0)
@@ -555,15 +630,20 @@ class FlowAdjuster {
std::queue<uint64_t> Queue;
Queue.push(SrcBlock->Index);
while (!Queue.empty()) {
- auto &Block = Func.Blocks[Queue.front()];
+ FlowBlock *Block = &Func.Blocks[Queue.front()];
Queue.pop();
- // Stop propagation once we reach DstBlock
- if (Block.Index == DstBlock->Index)
+ // Stop propagation once we reach DstBlock, if any
+ if (DstBlock != nullptr && Block == DstBlock)
break;
- AcyclicOrder.push_back(&Block);
+ // Keep an acyclic order of unknown blocks
+ if (Block->UnknownWeight && Block != SrcBlock)
+ AcyclicOrder.push_back(Block);
+
// Add to the queue all successors with zero local in-degree
- for (auto Jump : Block.SuccJumps) {
+ for (auto Jump : Block->SuccJumps) {
+ if (ignoreJump(SrcBlock, DstBlock, Jump))
+ continue;
uint64_t Dst = Jump->Target;
LocalInDegree[Dst]--;
if (LocalInDegree[Dst] == 0) {
@@ -574,42 +654,69 @@ class FlowAdjuster {
// If there is a cycle in the subgraph, AcyclicOrder contains only a subset
// of all blocks
- if (UnknownSuccs.size() + 1 != AcyclicOrder.size())
+ if (UnknownBlocks.size() != AcyclicOrder.size())
return false;
- UnknownSuccs = AcyclicOrder;
+ UnknownBlocks = AcyclicOrder;
return true;
}
- /// Rebalance a given subgraph.
- void rebalanceUnknownSubgraph(FlowBlock *SrcBlock, FlowBlock *DstBlock,
- std::vector<FlowBlock *> &UnknownSuccs) {
+ /// Rebalance a given subgraph rooted at SrcBlock, ending at DstBlock and
+ /// having UnknownBlocks intermediate blocks.
+ void rebalanceUnknownSubgraph(const FlowBlock *SrcBlock,
+ const FlowBlock *DstBlock,
+ const std::vector<FlowBlock *> &UnknownBlocks) {
assert(SrcBlock->Flow > 0 && "zero-flow block in unknown subgraph");
- assert(UnknownSuccs.front() == SrcBlock && "incorrect order of unknowns");
- for (auto Block : UnknownSuccs) {
+ // Ditribute flow from the source block
+ uint64_t BlockFlow = 0;
+ // SrcBlock's flow is the sum of outgoing flows along non-ignored jumps
+ for (auto Jump : SrcBlock->SuccJumps) {
+ if (ignoreJump(SrcBlock, DstBlock, Jump))
+ continue;
+ BlockFlow += Jump->Flow;
+ }
+ rebalanceBlock(SrcBlock, DstBlock, SrcBlock, BlockFlow);
+
+ // Ditribute flow from the remaining blocks
+ for (auto Block : UnknownBlocks) {
+ assert(Block->UnknownWeight && "incorrect unknown subgraph");
+ uint64_t BlockFlow = 0;
// Block's flow is the sum of incoming flows
- uint64_t TotalFlow = 0;
- if (Block == SrcBlock) {
- TotalFlow = Block->Flow;
- } else {
- for (auto Jump : Block->PredJumps) {
- TotalFlow += Jump->Flow;
- }
- Block->Flow = TotalFlow;
+ for (auto Jump : Block->PredJumps) {
+ BlockFlow += Jump->Flow;
}
+ Block->Flow = BlockFlow;
+ rebalanceBlock(SrcBlock, DstBlock, Block, BlockFlow);
+ }
+ }
- // Process all successor jumps and update corresponding flow values
- for (uint64_t I = 0; I < Block->SuccJumps.size(); I++) {
- auto Jump = Block->SuccJumps[I];
- if (I + 1 == Block->SuccJumps.size()) {
- Jump->Flow = TotalFlow;
- continue;
- }
- uint64_t Flow = uint64_t(TotalFlow * UnknownFirstSuccProbability);
- Jump->Flow = Flow;
- TotalFlow -= Flow;
- }
+ /// Redistribute flow for a block in a subgraph rooted at SrcBlock,
+ /// and ending at DstBlock.
+ void rebalanceBlock(const FlowBlock *SrcBlock, const FlowBlock *DstBlock,
+ const FlowBlock *Block, uint64_t BlockFlow) {
+ // Process all successor jumps and update corresponding flow values
+ size_t BlockDegree = 0;
+ for (auto Jump : Block->SuccJumps) {
+ if (ignoreJump(SrcBlock, DstBlock, Jump))
+ continue;
+ BlockDegree++;
+ }
+ // If all successor jumps of the block are ignored, skip it
+ if (DstBlock == nullptr && BlockDegree == 0)
+ return;
+ assert(BlockDegree > 0 && "all outgoing jumps are ignored");
+
+ // Each of the Block's successors gets the following amount of flow.
+ // Rounding the value up so that all flow is propagated
+ uint64_t SuccFlow = (BlockFlow + BlockDegree - 1) / BlockDegree;
+ for (auto Jump : Block->SuccJumps) {
+ if (ignoreJump(SrcBlock, DstBlock, Jump))
+ continue;
+ uint64_t Flow = std::min(SuccFlow, BlockFlow);
+ Jump->Flow = Flow;
+ BlockFlow -= Flow;
}
+ assert(BlockFlow == 0 && "not all flow is propagated");
}
/// A constant indicating an arbitrary exit block of a function.
diff --git a/llvm/test/Transforms/SampleProfile/Inputs/profile-inference-rebalance-large.prof b/llvm/test/Transforms/SampleProfile/Inputs/profile-inference-rebalance-large.prof
new file mode 100644
index 0000000000000..6dcd3fe241b77
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/Inputs/profile-inference-rebalance-large.prof
@@ -0,0 +1,26 @@
+foo1:37078302:0
+ 1: 3300
+ 2: 0
+ 6: 3300
+ !CFGChecksum: 157181141624
+
+foo2:37078302:0
+ 1: 128
+ 2: 128
+ 3: 128
+ 4: 128
+ !CFGChecksum: 208782362068
+
+foo3:37078302:0
+ 1: 500
+ 2: 1500
+ 4: 1200
+ 6: 900
+ 9: 500
+ !CFGChecksum: 189901498683
+
+foo4:37078302:0
+ 1: 400
+ 3: 400
+ 10: 400
+ !CFGChecksum: 241030178952
diff --git a/llvm/test/Transforms/SampleProfile/profile-inference-rebalance-large.ll b/llvm/test/Transforms/SampleProfile/profile-inference-rebalance-large.ll
new file mode 100644
index 0000000000000..cf633abefdef8
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/profile-inference-rebalance-large.ll
@@ -0,0 +1,387 @@
+; RUN: opt < %s -passes=pseudo-probe,sample-profile -sample-profile-use-profi -sample-profile-file=%S/Inputs/profile-inference-rebalance-large.prof | opt -analyze -branch-prob -enable-new-pm=0 | FileCheck %s
+; RUN: opt < %s -passes=pseudo-probe,sample-profile -sample-profile-use-profi -sample-profile-file=%S/Inputs/profile-inference-rebalance-large.prof | opt -analyze -block-freq -enable-new-pm=0 | FileCheck %s --check-prefix=CHECK2
+
+; The test verifies that counts can rebalanced in switch statements that contain
+; both 'known' and 'unknown' basic blocks.
+;
+; +---------+
+; +----------------- | b15 [?] |
+; | +---------+
+; | ^
+; | |
+; | |
+; | +---------+ +--------------+ +---------+
+; | | b13 [?] | <-- | b11 [3300] | --> | b14 [?] |
+; | +---------+ +--------------+ +---------+
+; | | | | |
+; | | | | |
+; | | v | |
+; | | +---------+ | |
+; | | | b12 [0] | | |
+; | | +---------+ | |
+; | | | | |
+; | | | | |
+; | | v v |
+; | | +--------------+ |
+; | +-----------> | | <-----+
+; | | b16 [3300] |
+; +----------------> | |
+; +--------------+
+
+ at yydebug = dso_local global i32 0, align 4
+
+; Function Attrs: nounwind uwtable
+define dso_local i32 @foo1(i32 %0, i32 %1) #0 {
+b11:
+ call void @llvm.pseudoprobe(i64 7682762345278052905, i64 1, i32 0, i64 -1)
+ %cmp = icmp ne i32 %0, 0
+ switch i32 %1, label %b12 [
+ i32 1, label %b13
+ i32 2, label %b14
+ i32 3, label %b15
+ i32 4, label %b16
+ ]
+; CHECK: edge b11 -> b12 probability is 0x00000000 / 0x80000000 = 0.00%
+; CHECK: edge b11 -> b13 probability is 0x20000000 / 0x80000000 = 25.00%
+; CHECK: edge b11 -> b14 probability is 0x20000000 / 0x80000000 = 25.00%
+; CHECK: edge b11 -> b15 probability is 0x20000000 / 0x80000000 = 25.00%
+; CHECK: edge b11 -> b16 probability is 0x20000000 / 0x80000000 = 25.00%
+; CHECK2: - b11: float = {{.*}}, int = {{.*}}, count = 3300
+
+b12:
+ call void @llvm.pseudoprobe(i64 7682762345278052905, i64 2, i32 0, i64 -1)
+ br label %b16
+; CHECK2: - b12: float = {{.*}}, int = {{.*}}, count = 0
+
+b13:
+ call void @llvm.pseudoprobe(i64 7682762345278052905, i64 3, i32 0, i64 -1)
+ br label %b16
+; CHECK2: - b13: float = {{.*}}, int = {{.*}}, count = 825
+
+b14:
+ call void @llvm.pseudoprobe(i64 7682762345278052905, i64 4, i32 0, i64 -1)
+ br label %b16
+; CHECK2: - b14: float = {{.*}}, int = {{.*}}, count = 825
+
+b15:
+ call void @llvm.pseudoprobe(i64 7682762345278052905, i64 5, i32 0, i64 -1)
+ br label %b16
+; CHECK2: - b15: float = {{.*}}, int = {{.*}}, count = 825
+
+b16:
+ call void @llvm.pseudoprobe(i64 7682762345278052905, i64 6, i32 0, i64 -1)
+ ret i32 %1
+; CHECK2: - b16: float = {{.*}}, int = {{.*}}, count = 3300
+}
+
+
+; The test verifies that counts can rebalanced even when control-flow ends at
+; a basic block with an unknown count.
+;
+; +-----------+
+; | b21 [128] | -+
+; +-----------+ |
+; | |
+; v |
+; +-----------+ |
+; | b22 [128] | |
+; +-----------+ |
+; | |
+; v |
+; +-----------+ |
+; +------------ | b23 [128] | <+
+; | +-----------+
+; | |
+; v v
+; +---------+ +-----------+
+; | b26 [?] | <-- | b24 [128] |
+; +---------+ +-----------+
+; | |
+; | v
+; | +-----------+
+; | | b25 [?] |
+; | +-----------+
+; | |
+; | v
+; | +-----------+
+; +-----------> | b27 [?] | -+
+; +-----------+ |
+; | |
+; v |
+; +-----------+ |
+; | b28 [?] | |
+; +-----------+ |
+; | |
+; v |
+; +-----------+ |
+; | b29 [?] | <+
+; +-----------+
+
+define dso_local i32 @foo2(i32 %0, i32 %1) #0 {
+b21:
+ call void @llvm.pseudoprobe(i64 2494702099028631698, i64 1, i32 0, i64 -1)
+ %cmp = icmp ne i32 %0, 0
+ br i1 %cmp, label %b22, label %b23
+; CHECK: edge b21 -> b22 probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
+; CHECK: edge b21 -> b23 probability is 0x00000000 / 0x80000000 = 0.00%
+; CHECK2: - b21: float = {{.*}}, int = {{.*}}, count = 128
+
+b22:
+ call void @llvm.pseudoprobe(i64 2494702099028631698, i64 2, i32 0, i64 -1)
+ br label %b23
+
+b23:
+ call void @llvm.pseudoprobe(i64 2494702099028631698, i64 3, i32 0, i64 -1)
+ br i1 %cmp, label %b24, label %b26
+; CHECK: edge b23 -> b24 probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
+; CHECK: edge b23 -> b26 probability is 0x00000000 / 0x80000000 = 0.00%
+; CHECK2: - b23: float = {{.*}}, int = {{.*}}, count = 128
+
+b24:
+ call void @llvm.pseudoprobe(i64 2494702099028631698, i64 4, i32 0, i64 -1)
+ br i1 %cmp, label %b25, label %b26
+; CHECK: edge b24 -> b25 probability is 0x40000000 / 0x80000000 = 50.00%
+; CHECK: edge b24 -> b26 probability is 0x40000000 / 0x80000000 = 50.00%
+
+b25:
+ call void @llvm.pseudoprobe(i64 2494702099028631698, i64 5, i32 0, i64 -1)
+ br label %b27
+; CHECK2: - b25: float = {{.*}}, int = {{.*}}, count = 64
+
+b26:
+ call void @llvm.pseudoprobe(i64 2494702099028631698, i64 6, i32 0, i64 -1)
+ br label %b27
+; CHECK2: - b26: float = {{.*}}, int = {{.*}}, count = 64
+
+b27:
+ call void @llvm.pseudoprobe(i64 2494702099028631698, i64 7, i32 0, i64 -1)
+ br i1 %cmp, label %b28, label %b29
+; CHECK: edge b27 -> b28 probability is 0x40000000 / 0x80000000 = 50.00%
+; CHECK: edge b27 -> b29 probability is 0x40000000 / 0x80000000 = 50.00%
+; CHECK2: - b27: float = {{.*}}, int = {{.*}}, count = 128
+
+b28:
+ call void @llvm.pseudoprobe(i64 2494702099028631698, i64 8, i32 0, i64 -1)
+ br label %b29
+; CHECK2: - b28: float = {{.*}}, int = {{.*}}, count = 64
+
+b29:
+ call void @llvm.pseudoprobe(i64 2494702099028631698, i64 9, i32 0, i64 -1)
+ ret i32 %1
+; CHECK2: - b29: float = {{.*}}, int = {{.*}}, count = 128
+}
+
+
+; The test verifies a flexible mode of rebalancing in which some jumps to known
+; basic blocks are ignored.
+;
+; +------------+
+; | b31 [500] |
+; +------------+
+; |
+; v
+; +---------+ +------------+
+; | b33 [?] | <-- | b32 [1500] | <-----+
+; +---------+ +------------+ |
+; | | |
+; | v |
+; | +------------+ +-----------+
+; | | b34 [1200] | --> | b36 [900] |
+; | +------------+ +-----------+
+; | |
+; | v
+; | +------------+
+; | | b35 [?] |
+; | +------------+
+; | |
+; | v
+; | +------------+
+; +-----------> | b37 [?] | -+
+; +------------+ |
+; | |
+; v |
+; +------------+ |
+; | b38 [?] | |
+; +------------+ |
+; | |
+; v |
+; +------------+ |
+; | b39 [500] | <+
+; +------------+
+;
+
+define dso_local i32 @foo3(i32 %0, i32 %1) #0 {
+b31:
+ call void @llvm.pseudoprobe(i64 -7908226060800700466, i64 1, i32 0, i64 -1)
+ %cmp = icmp ne i32 %0, 0
+ br label %b32
+; CHECK2: - b31: float = {{.*}}, int = {{.*}}, count = 500
+
+b32:
+ call void @llvm.pseudoprobe(i64 -7908226060800700466, i64 2, i32 0, i64 -1)
+ br i1 %cmp, label %b33, label %b34
+; CHECK: edge b32 -> b33 probability is 0x1999999a / 0x80000000 = 20.00%
+; CHECK: edge b32 -> b34 probability is 0x66666666 / 0x80000000 = 80.00%
+; CHECK2: - b32: float = {{.*}}, int = {{.*}}, count = 1500
+
+b33:
+ call void @llvm.pseudoprobe(i64 -7908226060800700466, i64 3, i32 0, i64 -1)
+ br label %b37
+; CHECK2: - b33: float = {{.*}}, int = {{.*}}, count = 300
+
+b34:
+ call void @llvm.pseudoprobe(i64 -7908226060800700466, i64 4, i32 0, i64 -1)
+ br i1 %cmp, label %b35, label %b36
+; CHECK: edge b34 -> b35 probability is 0x15555555 / 0x80000000 = 16.67%
+; CHECK: edge b34 -> b36 probability is 0x6aaaaaab / 0x80000000 = 83.33% [HOT edge]
+; CHECK2: - b34: float = {{.*}}, int = {{.*}}, count = 1200
+
+b35:
+ call void @llvm.pseudoprobe(i64 -7908226060800700466, i64 5, i32 0, i64 -1)
+ br label %b37
+; CHECK2: - b35: float = {{.*}}, int = {{.*}}, count = 200
+
+b36:
+ call void @llvm.pseudoprobe(i64 -7908226060800700466, i64 6, i32 0, i64 -1)
+ br label %b32
+; CHECK2: - b36: float = {{.*}}, int = {{.*}}, count = 1000
+
+b37:
+ call void @llvm.pseudoprobe(i64 -7908226060800700466, i64 7, i32 0, i64 -1)
+ br i1 %cmp, label %b38, label %b39
+; CHECK: edge b37 -> b38 probability is 0x40000000 / 0x80000000 = 50.00%
+; CHECK: edge b37 -> b39 probability is 0x40000000 / 0x80000000 = 50.00%
+; CHECK2: - b37: float = {{.*}}, int = {{.*}}, count = 500
+
+b38:
+ call void @llvm.pseudoprobe(i64 -7908226060800700466, i64 8, i32 0, i64 -1)
+ br label %b39
+; CHECK2: - b38: float = {{.*}}, int = {{.*}}, count = 250
+
+b39:
+ call void @llvm.pseudoprobe(i64 -7908226060800700466, i64 9, i32 0, i64 -1)
+ ret i32 %1
+; CHECK2: - b39: float = {{.*}}, int = {{.*}}, count = 500
+}
+
+
+; The test verifies that flow rebalancer can ignore 'unlikely' jumps.
+;
+; +-----------+
+; | b41 [400] | -+
+; +-----------+ |
+; | |
+; | |
+; v |
+; +-----------+ |
+; | b42 [?] | |
+; +-----------+ |
+; | |
+; | |
+; v v
+; +---------++---------+ +---------------------------+ +---------++---------+
+; | b48 [?] || b46 [?] | <-- | | --> | b47 [?] || b49 [?] |
+; +---------++---------+ | | +---------++---------+
+; | ^ | | | | ^
+; | | | | b43 [400] | | |
+; | +-------+-------------| | | |
+; | | | | | |
+; | | | | ------+----------+
+; | | +---------------------------+ |
+; | | | | |
+; | | | | |
+; | | v v |
+; | | +-----------+ +---------+ |
+; | | | b44 [?] | | b45 [?] | |
+; | | +-----------+ +---------+ |
+; | | | | |
+; | | | | |
+; | | v v |
+; | | +---------------------------+ |
+; | +-----------> | | <-----+
+; | | b410 [400] |
+; | | |
+; +----------------------> | |
+; +---------------------------+
+
+
+define dso_local void @foo4(i32 %0, i32 %1) #0 {
+b41:
+ call void @llvm.pseudoprobe(i64 -6882312132165544686, i64 1, i32 0, i64 -1)
+ %cmp = icmp ne i32 %0, 0
+ br i1 %cmp, label %b42, label %b43
+; CHECK: edge b41 -> b42 probability is 0x40000000 / 0x80000000 = 50.00%
+; CHECK: edge b41 -> b43 probability is 0x40000000 / 0x80000000 = 50.00%
+; CHECK2: - b41: float = {{.*}}, int = {{.*}}, count = 400
+
+b42:
+ call void @llvm.pseudoprobe(i64 -6882312132165544686, i64 2, i32 0, i64 -1)
+ br label %b43
+; CHECK2: - b42: float = {{.*}}, int = {{.*}}, count = 200
+
+b43:
+ call void @llvm.pseudoprobe(i64 -6882312132165544686, i64 3, i32 0, i64 -1)
+ switch i32 %1, label %b49 [
+ i32 1, label %b44
+ i32 2, label %b45
+ i32 3, label %b46
+ i32 4, label %b47
+ i32 5, label %b48
+ ]
+; CHECK: edge b43 -> b49 probability is 0x00000000 / 0x80000000 = 0.00%
+; CHECK: edge b43 -> b44 probability is 0x1999999a / 0x80000000 = 20.00%
+; CHECK: edge b43 -> b45 probability is 0x1999999a / 0x80000000 = 20.00%
+; CHECK: edge b43 -> b46 probability is 0x1999999a / 0x80000000 = 20.00%
+; CHECK: edge b43 -> b47 probability is 0x1999999a / 0x80000000 = 20.00%
+; CHECK: edge b43 -> b48 probability is 0x1999999a / 0x80000000 = 20.00%
+; CHECK2: - b43: float = {{.*}}, int = {{.*}}, count = 400
+
+b44:
+ call void @llvm.pseudoprobe(i64 -6882312132165544686, i64 4, i32 0, i64 -1)
+ br label %b410
+; CHECK2: - b44: float = {{.*}}, int = {{.*}}, count = 80
+
+b45:
+ call void @llvm.pseudoprobe(i64 -6882312132165544686, i64 5, i32 0, i64 -1)
+ br label %b410
+; CHECK2: - b45: float = {{.*}}, int = {{.*}}, count = 80
+
+b46:
+ call void @llvm.pseudoprobe(i64 -6882312132165544686, i64 6, i32 0, i64 -1)
+ br label %b410
+; CHECK2: - b46: float = {{.*}}, int = {{.*}}, count = 80
+
+b47:
+ call void @llvm.pseudoprobe(i64 -6882312132165544686, i64 7, i32 0, i64 -1)
+ br label %b410
+; CHECK2: - b47: float = {{.*}}, int = {{.*}}, count = 80
+
+b48:
+ call void @llvm.pseudoprobe(i64 -6882312132165544686, i64 8, i32 0, i64 -1)
+ br label %b410
+; CHECK2: - b48: float = {{.*}}, int = {{.*}}, count = 80
+
+b49:
+ call void @llvm.pseudoprobe(i64 -6882312132165544686, i64 9, i32 0, i64 -1)
+ unreachable
+; CHECK2: - b49: float = {{.*}}, int = {{.*}}, count = 0
+
+b410:
+ call void @llvm.pseudoprobe(i64 -6882312132165544686, i64 10, i32 0, i64 -1)
+ ret void
+; CHECK2: - b410: float = {{.*}}, int = {{.*}}, count = 400
+}
+
+
+; Function Attrs: inaccessiblememonly nounwind willreturn
+declare void @llvm.pseudoprobe(i64, i64, i32, i64) #4
+
+attributes #0 = { noinline nounwind uwtable "use-sample-profile" }
+attributes #4 = { inaccessiblememonly nounwind willreturn }
+
+!llvm.pseudo_probe_desc = !{!7, !8, !9, !10}
+
+!7 = !{i64 7682762345278052905, i64 157181141624, !"foo1", null}
+!8 = !{i64 2494702099028631698, i64 208782362068, !"foo2", null}
+!9 = !{i64 -7908226060800700466, i64 189901498683, !"foo3", null}
+!10 = !{i64 -6882312132165544686, i64 241030178952, !"foo4", null}
diff --git a/llvm/test/Transforms/SampleProfile/profile-inference-rebalance.ll b/llvm/test/Transforms/SampleProfile/profile-inference-rebalance.ll
index 36d2cdb8124cf..9290e17ddbc42 100644
--- a/llvm/test/Transforms/SampleProfile/profile-inference-rebalance.ll
+++ b/llvm/test/Transforms/SampleProfile/profile-inference-rebalance.ll
@@ -1,7 +1,7 @@
; RUN: opt < %s -passes=pseudo-probe,sample-profile -sample-profile-use-profi -sample-profile-file=%S/Inputs/profile-inference-rebalance.prof | opt -analyze -branch-prob -enable-new-pm=0 | FileCheck %s
; RUN: opt < %s -passes=pseudo-probe,sample-profile -sample-profile-use-profi -sample-profile-file=%S/Inputs/profile-inference-rebalance.prof | opt -analyze -block-freq -enable-new-pm=0 | FileCheck %s --check-prefix=CHECK2
-; The test contains a "dimanond" and a "triangle" that needs to be rebalanced
+; The test contains a "diamond" and a "triangle" that needs to be rebalanced
; after basic profile inference.
;
; +----------------+
More information about the llvm-commits
mailing list