[llvm] 1392b65 - Revert "profi - a flow-based profile inference algorithm: Part I (out of 3)"
Mehdi Amini via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 23 12:11:06 PST 2021
Author: Mehdi Amini
Date: 2021-11-23T20:10:36Z
New Revision: 1392b654ff6573ca2dba4101f72e990822539c7d
URL: https://github.com/llvm/llvm-project/commit/1392b654ff6573ca2dba4101f72e990822539c7d
DIFF: https://github.com/llvm/llvm-project/commit/1392b654ff6573ca2dba4101f72e990822539c7d.diff
LOG: Revert "profi - a flow-based profile inference algorithm: Part I (out of 3)"
This reverts commit 884b6dd311422bbfac62b8a90fbfff8e77ba8121.
The windows build is broken with a linker error.
Added:
Modified:
llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h
llvm/lib/Transforms/IPO/SampleProfile.cpp
llvm/lib/Transforms/Utils/CMakeLists.txt
llvm/lib/Transforms/Utils/SampleProfileLoaderBaseUtil.cpp
Removed:
llvm/include/llvm/Transforms/Utils/SampleProfileInference.h
llvm/lib/Transforms/Utils/SampleProfileInference.cpp
llvm/test/Transforms/SampleProfile/Inputs/profile-inference.prof
llvm/test/Transforms/SampleProfile/profile-inference.ll
################################################################################
diff --git a/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h b/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h
deleted file mode 100644
index e1f681bbd3677..0000000000000
--- a/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h
+++ /dev/null
@@ -1,284 +0,0 @@
-//===- Transforms/Utils/SampleProfileInference.h ----------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// This file provides the interface for the profile inference algorithm, profi.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TRANSFORMS_UTILS_SAMPLEPROFILEINFERENCE_H
-#define LLVM_TRANSFORMS_UTILS_SAMPLEPROFILEINFERENCE_H
-
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/SmallVector.h"
-
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/Instruction.h"
-#include "llvm/IR/Instructions.h"
-
-namespace llvm {
-
-class BasicBlock;
-class Function;
-class MachineBasicBlock;
-class MachineFunction;
-
-namespace afdo_detail {
-
-template <class BlockT> struct TypeMap {};
-template <> struct TypeMap<BasicBlock> {
- using BasicBlockT = BasicBlock;
- using FunctionT = Function;
-};
-template <> struct TypeMap<MachineBasicBlock> {
- using BasicBlockT = MachineBasicBlock;
- using FunctionT = MachineFunction;
-};
-
-} // end namespace afdo_detail
-
-struct FlowJump;
-
-/// A wrapper of a binary basic block.
-struct FlowBlock {
- uint64_t Index;
- uint64_t Weight{0};
- bool UnknownWeight{false};
- uint64_t Flow{0};
- bool HasSelfEdge{false};
- std::vector<FlowJump *> SuccJumps;
- std::vector<FlowJump *> PredJumps;
-
- /// Check if it is the entry block in the function.
- bool isEntry() const { return PredJumps.empty(); }
-
- /// Check if it is an exit block in the function.
- bool isExit() const { return SuccJumps.empty(); }
-};
-
-/// A wrapper of a jump between two basic blocks.
-struct FlowJump {
- uint64_t Source;
- uint64_t Target;
- uint64_t Flow{0};
- bool IsUnlikely{false};
-};
-
-/// A wrapper of binary function with basic blocks and jumps.
-struct FlowFunction {
- std::vector<FlowBlock> Blocks;
- std::vector<FlowJump> Jumps;
- /// The index of the entry block.
- uint64_t Entry;
-};
-
-void applyFlowInference(FlowFunction &Func);
-
-/// Sample profile inference pass.
-template <typename BT> class SampleProfileInference {
-public:
- using BasicBlockT = typename afdo_detail::TypeMap<BT>::BasicBlockT;
- using FunctionT = typename afdo_detail::TypeMap<BT>::FunctionT;
- using Edge = std::pair<const BasicBlockT *, const BasicBlockT *>;
- using BlockWeightMap = DenseMap<const BasicBlockT *, uint64_t>;
- using EdgeWeightMap = DenseMap<Edge, uint64_t>;
- using BlockEdgeMap =
- DenseMap<const BasicBlockT *, SmallVector<const BasicBlockT *, 8>>;
-
- SampleProfileInference(FunctionT &F, BlockEdgeMap &Successors,
- BlockWeightMap &SampleBlockWeights)
- : F(F), Successors(Successors), SampleBlockWeights(SampleBlockWeights) {}
-
- /// Apply the profile inference algorithm for a given function
- void apply(BlockWeightMap &BlockWeights, EdgeWeightMap &EdgeWeights);
-
-private:
- /// Try to infer branch probabilities mimicking implementation of
- /// BranchProbabilityInfo. Unlikely taken branches are marked so that the
- /// inference algorithm can avoid sending flow along corresponding edges.
- void findUnlikelyJumps(const std::vector<const BasicBlockT *> &BasicBlocks,
- BlockEdgeMap &Successors, FlowFunction &Func);
-
- /// Determine whether the block is an exit in the CFG.
- bool isExit(const BasicBlockT *BB);
-
- /// Function.
- const FunctionT &F;
-
- /// Successors for each basic block in the CFG.
- BlockEdgeMap &Successors;
-
- /// Map basic blocks to their sampled weights.
- BlockWeightMap &SampleBlockWeights;
-};
-
-template <typename BT>
-void SampleProfileInference<BT>::apply(BlockWeightMap &BlockWeights,
- EdgeWeightMap &EdgeWeights) {
- // Find all forwards reachable blocks which the inference algorithm will be
- // applied on.
- df_iterator_default_set<const BasicBlockT *> Reachable;
- for (auto *BB : depth_first_ext(&F, Reachable))
- (void)BB /* Mark all reachable blocks */;
-
- // Find all backwards reachable blocks which the inference algorithm will be
- // applied on.
- df_iterator_default_set<const BasicBlockT *> InverseReachable;
- for (const auto &BB : F) {
- // An exit block is a block without any successors.
- if (isExit(&BB)) {
- for (auto *RBB : inverse_depth_first_ext(&BB, InverseReachable))
- (void)RBB;
- }
- }
-
- // Keep a stable order for reachable blocks
- DenseMap<const BasicBlockT *, uint64_t> BlockIndex;
- std::vector<const BasicBlockT *> BasicBlocks;
- BlockIndex.reserve(Reachable.size());
- BasicBlocks.reserve(Reachable.size());
- for (const auto &BB : F) {
- if (Reachable.count(&BB) && InverseReachable.count(&BB)) {
- BlockIndex[&BB] = BasicBlocks.size();
- BasicBlocks.push_back(&BB);
- }
- }
-
- BlockWeights.clear();
- EdgeWeights.clear();
- bool HasSamples = false;
- for (const auto *BB : BasicBlocks) {
- auto It = SampleBlockWeights.find(BB);
- if (It != SampleBlockWeights.end() && It->second > 0) {
- HasSamples = true;
- BlockWeights[BB] = It->second;
- }
- }
- // Quit early for functions with a single block or ones w/o samples
- if (BasicBlocks.size() <= 1 || !HasSamples) {
- return;
- }
-
- // Create necessary objects
- FlowFunction Func;
- Func.Blocks.reserve(BasicBlocks.size());
- // Create FlowBlocks
- for (const auto *BB : BasicBlocks) {
- FlowBlock Block;
- if (SampleBlockWeights.find(BB) != SampleBlockWeights.end()) {
- Block.UnknownWeight = false;
- Block.Weight = SampleBlockWeights[BB];
- } else {
- Block.UnknownWeight = true;
- Block.Weight = 0;
- }
- Block.Index = Func.Blocks.size();
- Func.Blocks.push_back(Block);
- }
- // Create FlowEdges
- for (const auto *BB : BasicBlocks) {
- for (auto *Succ : Successors[BB]) {
- if (!BlockIndex.count(Succ))
- continue;
- FlowJump Jump;
- Jump.Source = BlockIndex[BB];
- Jump.Target = BlockIndex[Succ];
- Func.Jumps.push_back(Jump);
- if (BB == Succ) {
- Func.Blocks[BlockIndex[BB]].HasSelfEdge = true;
- }
- }
- }
- for (auto &Jump : Func.Jumps) {
- Func.Blocks[Jump.Source].SuccJumps.push_back(&Jump);
- Func.Blocks[Jump.Target].PredJumps.push_back(&Jump);
- }
-
- // Try to infer probabilities of jumps based on the content of basic block
- findUnlikelyJumps(BasicBlocks, Successors, Func);
-
- // Find the entry block
- for (size_t I = 0; I < Func.Blocks.size(); I++) {
- if (Func.Blocks[I].isEntry()) {
- Func.Entry = I;
- break;
- }
- }
-
- // Create and apply the inference network model.
- applyFlowInference(Func);
-
- // Extract the resulting weights from the control flow
- // All weights are increased by one to avoid propagation errors introduced by
- // zero weights.
- for (const auto *BB : BasicBlocks) {
- BlockWeights[BB] = Func.Blocks[BlockIndex[BB]].Flow;
- }
- for (auto &Jump : Func.Jumps) {
- Edge E = std::make_pair(BasicBlocks[Jump.Source], BasicBlocks[Jump.Target]);
- EdgeWeights[E] = Jump.Flow;
- }
-
-#ifndef NDEBUG
- // Unreachable blocks and edges should not have a weight.
- for (auto &I : BlockWeights) {
- assert(Reachable.contains(I.first));
- assert(InverseReachable.contains(I.first));
- }
- for (auto &I : EdgeWeights) {
- assert(Reachable.contains(I.first.first) &&
- Reachable.contains(I.first.second));
- assert(InverseReachable.contains(I.first.first) &&
- InverseReachable.contains(I.first.second));
- }
-#endif
-}
-
-template <typename BT>
-inline void SampleProfileInference<BT>::findUnlikelyJumps(
- const std::vector<const BasicBlockT *> &BasicBlocks,
- BlockEdgeMap &Successors, FlowFunction &Func) {}
-
-template <>
-inline void SampleProfileInference<BasicBlock>::findUnlikelyJumps(
- const std::vector<const BasicBlockT *> &BasicBlocks,
- BlockEdgeMap &Successors, FlowFunction &Func) {
- for (auto &Jump : Func.Jumps) {
- const auto *BB = BasicBlocks[Jump.Source];
- const auto *Succ = BasicBlocks[Jump.Target];
- const Instruction *TI = BB->getTerminator();
- // Check if a block ends with InvokeInst and mark non-taken branch unlikely.
- // In that case block Succ should be a landing pad
- if (Successors[BB].size() == 2 && Successors[BB].back() == Succ) {
- if (isa<InvokeInst>(TI)) {
- Jump.IsUnlikely = true;
- }
- }
- const Instruction *SuccTI = Succ->getTerminator();
- // Check if the target block contains UnreachableInst and mark it unlikely
- if (SuccTI->getNumSuccessors() == 0) {
- if (isa<UnreachableInst>(SuccTI)) {
- Jump.IsUnlikely = true;
- }
- }
- }
-}
-
-template <typename BT>
-inline bool SampleProfileInference<BT>::isExit(const BasicBlockT *BB) {
- return BB->succ_empty();
-}
-
-template <>
-inline bool SampleProfileInference<BasicBlock>::isExit(const BasicBlock *BB) {
- return succ_empty(BB);
-}
-
-} // end namespace llvm
-#endif // LLVM_TRANSFORMS_UTILS_SAMPLEPROFILEINFERENCE_H
diff --git a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h
index e9b3d5aef15fb..6a2f0acf46f32 100644
--- a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h
+++ b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h
@@ -38,7 +38,6 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/GenericDomTree.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Utils/SampleProfileInference.h"
#include "llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h"
namespace llvm {
@@ -75,8 +74,6 @@ template <> struct IRTraits<BasicBlock> {
} // end namespace afdo_detail
-extern cl::opt<unsigned> SampleProfileUseProfi;
-
template <typename BT> class SampleProfileLoaderBaseImpl {
public:
SampleProfileLoaderBaseImpl(std::string Name, std::string RemapName)
@@ -145,9 +142,6 @@ template <typename BT> class SampleProfileLoaderBaseImpl {
ArrayRef<BasicBlockT *> Descendants,
PostDominatorTreeT *DomTree);
void propagateWeights(FunctionT &F);
- void applyProfi(FunctionT &F, BlockEdgeMap &Successors,
- BlockWeightMap &SampleBlockWeights,
- BlockWeightMap &BlockWeights, EdgeWeightMap &EdgeWeights);
uint64_t visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge);
void buildEdges(FunctionT &F);
bool propagateThroughEdges(FunctionT &F, bool UpdateBlockCount);
@@ -156,11 +150,6 @@ template <typename BT> class SampleProfileLoaderBaseImpl {
bool
computeAndPropagateWeights(FunctionT &F,
const DenseSet<GlobalValue::GUID> &InlinedGUIDs);
- void initWeightPropagation(FunctionT &F,
- const DenseSet<GlobalValue::GUID> &InlinedGUIDs);
- void
- finalizeWeightPropagation(FunctionT &F,
- const DenseSet<GlobalValue::GUID> &InlinedGUIDs);
void emitCoverageRemarks(FunctionT &F);
/// Map basic blocks to their computed weights.
@@ -752,65 +741,50 @@ void SampleProfileLoaderBaseImpl<BT>::buildEdges(FunctionT &F) {
/// known).
template <typename BT>
void SampleProfileLoaderBaseImpl<BT>::propagateWeights(FunctionT &F) {
- // Flow-based profile inference is only usable with BasicBlock instantiation
- // of SampleProfileLoaderBaseImpl.
- if (SampleProfileUseProfi) {
- // Prepare block sample counts for inference.
- BlockWeightMap SampleBlockWeights;
- for (const auto &BI : F) {
- ErrorOr<uint64_t> Weight = getBlockWeight(&BI);
- if (Weight)
- SampleBlockWeights[&BI] = Weight.get();
+ bool Changed = true;
+ unsigned I = 0;
+
+ // If BB weight is larger than its corresponding loop's header BB weight,
+ // use the BB weight to replace the loop header BB weight.
+ for (auto &BI : F) {
+ BasicBlockT *BB = &BI;
+ LoopT *L = LI->getLoopFor(BB);
+ if (!L) {
+ continue;
}
- // Fill in BlockWeights and EdgeWeights using an inference algorithm.
- applyProfi(F, Successors, SampleBlockWeights, BlockWeights, EdgeWeights);
- } else {
- bool Changed = true;
- unsigned I = 0;
-
- // If BB weight is larger than its corresponding loop's header BB weight,
- // use the BB weight to replace the loop header BB weight.
- for (auto &BI : F) {
- BasicBlockT *BB = &BI;
- LoopT *L = LI->getLoopFor(BB);
- if (!L) {
- continue;
- }
- BasicBlockT *Header = L->getHeader();
- if (Header && BlockWeights[BB] > BlockWeights[Header]) {
- BlockWeights[Header] = BlockWeights[BB];
- }
+ BasicBlockT *Header = L->getHeader();
+ if (Header && BlockWeights[BB] > BlockWeights[Header]) {
+ BlockWeights[Header] = BlockWeights[BB];
}
+ }
- // Propagate until we converge or we go past the iteration limit.
- while (Changed && I++ < SampleProfileMaxPropagateIterations) {
- Changed = propagateThroughEdges(F, false);
- }
+ // Before propagation starts, build, for each block, a list of
+ // unique predecessors and successors. This is necessary to handle
+ // identical edges in multiway branches. Since we visit all blocks and all
+ // edges of the CFG, it is cleaner to build these lists once at the start
+ // of the pass.
+ buildEdges(F);
- // The first propagation propagates BB counts from annotated BBs to unknown
- // BBs. The 2nd propagation pass resets edges weights, and use all BB
- // weights to propagate edge weights.
- VisitedEdges.clear();
- Changed = true;
- while (Changed && I++ < SampleProfileMaxPropagateIterations) {
- Changed = propagateThroughEdges(F, false);
- }
+ // Propagate until we converge or we go past the iteration limit.
+ while (Changed && I++ < SampleProfileMaxPropagateIterations) {
+ Changed = propagateThroughEdges(F, false);
+ }
- // The 3rd propagation pass allows adjust annotated BB weights that are
- // obviously wrong.
- Changed = true;
- while (Changed && I++ < SampleProfileMaxPropagateIterations) {
- Changed = propagateThroughEdges(F, true);
- }
+ // The first propagation propagates BB counts from annotated BBs to unknown
+ // BBs. The 2nd propagation pass resets edges weights, and use all BB weights
+ // to propagate edge weights.
+ VisitedEdges.clear();
+ Changed = true;
+ while (Changed && I++ < SampleProfileMaxPropagateIterations) {
+ Changed = propagateThroughEdges(F, false);
}
-}
-template <typename BT>
-void SampleProfileLoaderBaseImpl<BT>::applyProfi(
- FunctionT &F, BlockEdgeMap &Successors, BlockWeightMap &SampleBlockWeights,
- BlockWeightMap &BlockWeights, EdgeWeightMap &EdgeWeights) {
- auto Infer = SampleProfileInference<BT>(F, Successors, SampleBlockWeights);
- Infer.apply(BlockWeights, EdgeWeights);
+ // The 3rd propagation pass allows adjust annotated BB weights that are
+ // obviously wrong.
+ Changed = true;
+ while (Changed && I++ < SampleProfileMaxPropagateIterations) {
+ Changed = propagateThroughEdges(F, true);
+ }
}
/// Generate branch weight metadata for all branches in \p F.
@@ -868,64 +842,26 @@ bool SampleProfileLoaderBaseImpl<BT>::computeAndPropagateWeights(
Changed |= computeBlockWeights(F);
if (Changed) {
- // Initialize propagation.
- initWeightPropagation(F, InlinedGUIDs);
+ // Add an entry count to the function using the samples gathered at the
+ // function entry.
+ // Sets the GUIDs that are inlined in the profiled binary. This is used
+ // for ThinLink to make correct liveness analysis, and also make the IR
+ // match the profiled binary before annotation.
+ getFunction(F).setEntryCount(
+ ProfileCount(Samples->getHeadSamples() + 1, Function::PCT_Real),
+ &InlinedGUIDs);
- // Propagate weights to all edges.
- propagateWeights(F);
-
- // Post-process propagated weights.
- finalizeWeightPropagation(F, InlinedGUIDs);
- }
-
- return Changed;
-}
-
-template <typename BT>
-void SampleProfileLoaderBaseImpl<BT>::initWeightPropagation(
- FunctionT &F, const DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
- // Add an entry count to the function using the samples gathered at the
- // function entry.
- // Sets the GUIDs that are inlined in the profiled binary. This is used
- // for ThinLink to make correct liveness analysis, and also make the IR
- // match the profiled binary before annotation.
- getFunction(F).setEntryCount(
- ProfileCount(Samples->getHeadSamples() + 1, Function::PCT_Real),
- &InlinedGUIDs);
-
- if (!SampleProfileUseProfi) {
// Compute dominance and loop info needed for propagation.
computeDominanceAndLoopInfo(F);
// Find equivalence classes.
findEquivalenceClasses(F);
- }
-
- // Before propagation starts, build, for each block, a list of
- // unique predecessors and successors. This is necessary to handle
- // identical edges in multiway branches. Since we visit all blocks and all
- // edges of the CFG, it is cleaner to build these lists once at the start
- // of the pass.
- buildEdges(F);
-}
-template <typename BT>
-void SampleProfileLoaderBaseImpl<BT>::finalizeWeightPropagation(
- FunctionT &F, const DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
- // If we utilize a flow-based count inference, then we trust the computed
- // counts and set the entry count as computed by the algorithm. This is
- // primarily done to sync the counts produced by profi and BFI inference,
- // which uses the entry count for mass propagation.
- // If profi produces a zero-value for the entry count, we fallback to
- // Samples->getHeadSamples() + 1 to avoid functions with zero count.
- if (SampleProfileUseProfi) {
- const BasicBlockT *EntryBB = getEntryBB(&F);
- if (BlockWeights[EntryBB] > 0) {
- getFunction(F).setEntryCount(
- ProfileCount(BlockWeights[EntryBB], Function::PCT_Real),
- &InlinedGUIDs);
- }
+ // Propagate weights to all edges.
+ propagateWeights(F);
}
+
+ return Changed;
}
template <typename BT>
diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index 3e01fd17f5260..a961c47a75013 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -84,7 +84,6 @@
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Utils/CallPromotionUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/Transforms/Utils/SampleProfileInference.h"
#include "llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h"
#include "llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h"
#include <algorithm>
@@ -1649,19 +1648,6 @@ void SampleProfileLoader::generateMDProfMetadata(Function &F) {
SmallVector<uint32_t, 4> Weights;
uint32_t MaxWeight = 0;
Instruction *MaxDestInst;
- // Since profi treats multiple edges (multiway branches) as a single edge,
- // we need to distribute the computed weight among the branches. We do
- // this by evenly splitting the edge weight among destinations.
- DenseMap<const BasicBlock *, uint64_t> EdgeMultiplicity;
- std::vector<uint64_t> EdgeIndex;
- if (SampleProfileUseProfi) {
- EdgeIndex.resize(TI->getNumSuccessors());
- for (unsigned I = 0; I < TI->getNumSuccessors(); ++I) {
- const BasicBlock *Succ = TI->getSuccessor(I);
- EdgeIndex[I] = EdgeMultiplicity[Succ];
- EdgeMultiplicity[Succ]++;
- }
- }
for (unsigned I = 0; I < TI->getNumSuccessors(); ++I) {
BasicBlock *Succ = TI->getSuccessor(I);
Edge E = std::make_pair(BB, Succ);
@@ -1674,19 +1660,9 @@ void SampleProfileLoader::generateMDProfMetadata(Function &F) {
LLVM_DEBUG(dbgs() << " (saturated due to uint32_t overflow)");
Weight = std::numeric_limits<uint32_t>::max();
}
- if (!SampleProfileUseProfi) {
- // Weight is added by one to avoid propagation errors introduced by
- // 0 weights.
- Weights.push_back(static_cast<uint32_t>(Weight + 1));
- } else {
- // Profi creates proper weights that do not require "+1" adjustments but
- // we evenly split the weight among branches with the same destination.
- uint64_t W = Weight / EdgeMultiplicity[Succ];
- // Rounding up, if needed, so that first branches are hotter.
- if (EdgeIndex[I] < Weight % EdgeMultiplicity[Succ])
- W++;
- Weights.push_back(static_cast<uint32_t>(W));
- }
+ // Weight is added by one to avoid propagation errors introduced by
+ // 0 weights.
+ Weights.push_back(static_cast<uint32_t>(Weight + 1));
if (Weight != 0) {
if (Weight > MaxWeight) {
MaxWeight = Weight;
diff --git a/llvm/lib/Transforms/Utils/CMakeLists.txt b/llvm/lib/Transforms/Utils/CMakeLists.txt
index 22b9c0b19adab..be4f7125eb853 100644
--- a/llvm/lib/Transforms/Utils/CMakeLists.txt
+++ b/llvm/lib/Transforms/Utils/CMakeLists.txt
@@ -60,7 +60,6 @@ add_llvm_component_library(LLVMTransformUtils
StripGCRelocates.cpp
SSAUpdater.cpp
SSAUpdaterBulk.cpp
- SampleProfileInference.cpp
SampleProfileLoaderBaseUtil.cpp
SanitizerStats.cpp
SimplifyCFG.cpp
diff --git a/llvm/lib/Transforms/Utils/SampleProfileInference.cpp b/llvm/lib/Transforms/Utils/SampleProfileInference.cpp
deleted file mode 100644
index 412a724006aa2..0000000000000
--- a/llvm/lib/Transforms/Utils/SampleProfileInference.cpp
+++ /dev/null
@@ -1,461 +0,0 @@
-//===- SampleProfileInference.cpp - Adjust sample profiles in the IR ------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements a profile inference algorithm. Given an incomplete and
-// possibly imprecise block counts, the algorithm reconstructs realistic block
-// and edge counts that satisfy flow conservation rules, while minimally modify
-// input block counts.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/Utils/SampleProfileInference.h"
-#include "llvm/Support/Debug.h"
-#include <queue>
-#include <set>
-
-using namespace llvm;
-#define DEBUG_TYPE "sample-profile-inference"
-
-namespace {
-
-/// A value indicating an infinite flow/capacity/weight of a block/edge.
-/// Not using numeric_limits<int64_t>::max(), as the values can be summed up
-/// during the execution.
-static constexpr int64_t INF = ((int64_t)1) << 50;
-
-/// The minimum-cost maximum flow algorithm.
-///
-/// The algorithm finds the maximum flow of minimum cost on a given (directed)
-/// network using a modified version of the classical Moore-Bellman-Ford
-/// approach. The algorithm applies a number of augmentation iterations in which
-/// flow is sent along paths of positive capacity from the source to the sink.
-/// The worst-case time complexity of the implementation is O(v(f)*m*n), where
-/// where m is the number of edges, n is the number of vertices, and v(f) is the
-/// value of the maximum flow. However, the observed running time on typical
-/// instances is sub-quadratic, that is, o(n^2).
-///
-/// The input is a set of edges with specified costs and capacities, and a pair
-/// of nodes (source and sink). The output is the flow along each edge of the
-/// minimum total cost respecting the given edge capacities.
-class MinCostMaxFlow {
-public:
- // Initialize algorithm's data structures for a network of a given size.
- void initialize(uint64_t NodeCount, uint64_t SourceNode, uint64_t SinkNode) {
- Source = SourceNode;
- Target = SinkNode;
-
- Nodes = std::vector<Node>(NodeCount);
- Edges = std::vector<std::vector<Edge>>(NodeCount, std::vector<Edge>());
- }
-
- // Run the algorithm.
- int64_t run() {
- // Find an augmenting path and update the flow along the path
- size_t AugmentationIters = 0;
- while (findAugmentingPath()) {
- augmentFlowAlongPath();
- AugmentationIters++;
- }
-
- // Compute the total flow and its cost
- int64_t TotalCost = 0;
- int64_t TotalFlow = 0;
- for (uint64_t Src = 0; Src < Nodes.size(); Src++) {
- for (auto &Edge : Edges[Src]) {
- if (Edge.Flow > 0) {
- TotalCost += Edge.Cost * Edge.Flow;
- if (Src == Source)
- TotalFlow += Edge.Flow;
- }
- }
- }
- LLVM_DEBUG(dbgs() << "Completed profi after " << AugmentationIters
- << " iterations with " << TotalFlow << " total flow"
- << " of " << TotalCost << " cost\n");
- return TotalCost;
- }
-
- /// Adding an edge to the network with a specified capacity and a cost.
- /// Multiple edges between a pair of nodes are allowed but self-edges
- /// are not supported.
- void addEdge(uint64_t Src, uint64_t Dst, int64_t Capacity, int64_t Cost) {
- assert(Capacity > 0 && "adding an edge of zero capacity");
- assert(Src != Dst && "loop edge are not supported");
-
- Edge SrcEdge;
- SrcEdge.Dst = Dst;
- SrcEdge.Cost = Cost;
- SrcEdge.Capacity = Capacity;
- SrcEdge.Flow = 0;
- SrcEdge.RevEdgeIndex = Edges[Dst].size();
-
- Edge DstEdge;
- DstEdge.Dst = Src;
- DstEdge.Cost = -Cost;
- DstEdge.Capacity = 0;
- DstEdge.Flow = 0;
- DstEdge.RevEdgeIndex = Edges[Src].size();
-
- Edges[Src].push_back(SrcEdge);
- Edges[Dst].push_back(DstEdge);
- }
-
- /// Adding an edge to the network of infinite capacity and a given cost.
- void addEdge(uint64_t Src, uint64_t Dst, int64_t Cost) {
- addEdge(Src, Dst, INF, Cost);
- }
-
- /// Get the total flow from a given source node.
- /// Returns a list of pairs (target node, amount of flow to the target).
- const std::vector<std::pair<uint64_t, int64_t>> getFlow(uint64_t Src) const {
- std::vector<std::pair<uint64_t, int64_t>> Flow;
- for (auto &Edge : Edges[Src]) {
- if (Edge.Flow > 0)
- Flow.push_back(std::make_pair(Edge.Dst, Edge.Flow));
- }
- return Flow;
- }
-
- /// Get the total flow between a pair of nodes.
- int64_t getFlow(uint64_t Src, uint64_t Dst) const {
- int64_t Flow = 0;
- for (auto &Edge : Edges[Src]) {
- if (Edge.Dst == Dst) {
- Flow += Edge.Flow;
- }
- }
- return Flow;
- }
-
- /// A cost of increasing a block's count by one.
- static constexpr int64_t AuxCostInc = 10;
- /// A cost of decreasing a block's count by one.
- static constexpr int64_t AuxCostDec = 20;
- /// A cost of increasing a count of zero-weight block by one.
- static constexpr int64_t AuxCostIncZero = 11;
- /// A cost of increasing the entry block's count by one.
- static constexpr int64_t AuxCostIncEntry = 40;
- /// A cost of decreasing the entry block's count by one.
- static constexpr int64_t AuxCostDecEntry = 10;
- /// A cost of taking an unlikely jump.
- static constexpr int64_t AuxCostUnlikely = ((int64_t)1) << 20;
-
-private:
- /// Check for existence of an augmenting path with a positive capacity.
- bool findAugmentingPath() {
- // Initialize data structures
- for (auto &Node : Nodes) {
- Node.Distance = INF;
- Node.ParentNode = uint64_t(-1);
- Node.ParentEdgeIndex = uint64_t(-1);
- Node.Taken = false;
- }
-
- std::queue<uint64_t> Queue;
- Queue.push(Source);
- Nodes[Source].Distance = 0;
- Nodes[Source].Taken = true;
- while (!Queue.empty()) {
- uint64_t Src = Queue.front();
- Queue.pop();
- Nodes[Src].Taken = false;
- // Although the residual network contains edges with negative costs
- // (in particular, backward edges), it can be shown that there are no
- // negative-weight cycles and the following two invariants are maintained:
- // (i) Dist[Source, V] >= 0 and (ii) Dist[V, Target] >= 0 for all nodes V,
- // where Dist is the length of the shortest path between two nodes. This
- // allows to prune the search-space of the path-finding algorithm using
- // the following early-stop criteria:
- // -- If we find a path with zero-distance from Source to Target, stop the
- // search, as the path is the shortest since Dist[Source, Target] >= 0;
- // -- If we have Dist[Source, V] > Dist[Source, Target], then do not
- // process node V, as it is guaranteed _not_ to be on a shortest path
- // from Source to Target; it follows from inequalities
- // Dist[Source, Target] >= Dist[Source, V] + Dist[V, Target]
- // >= Dist[Source, V]
- if (Nodes[Target].Distance == 0)
- break;
- if (Nodes[Src].Distance > Nodes[Target].Distance)
- continue;
-
- // Process adjacent edges
- for (uint64_t EdgeIdx = 0; EdgeIdx < Edges[Src].size(); EdgeIdx++) {
- auto &Edge = Edges[Src][EdgeIdx];
- if (Edge.Flow < Edge.Capacity) {
- uint64_t Dst = Edge.Dst;
- int64_t NewDistance = Nodes[Src].Distance + Edge.Cost;
- if (Nodes[Dst].Distance > NewDistance) {
- // Update the distance and the parent node/edge
- Nodes[Dst].Distance = NewDistance;
- Nodes[Dst].ParentNode = Src;
- Nodes[Dst].ParentEdgeIndex = EdgeIdx;
- // Add the node to the queue, if it is not there yet
- if (!Nodes[Dst].Taken) {
- Queue.push(Dst);
- Nodes[Dst].Taken = true;
- }
- }
- }
- }
- }
-
- return Nodes[Target].Distance != INF;
- }
-
- /// Update the current flow along the augmenting path.
- void augmentFlowAlongPath() {
- // Find path capacity
- int64_t PathCapacity = INF;
- uint64_t Now = Target;
- while (Now != Source) {
- uint64_t Pred = Nodes[Now].ParentNode;
- auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];
- PathCapacity = std::min(PathCapacity, Edge.Capacity - Edge.Flow);
- Now = Pred;
- }
-
- assert(PathCapacity > 0 && "found incorrect augmenting path");
-
- // Update the flow along the path
- Now = Target;
- while (Now != Source) {
- uint64_t Pred = Nodes[Now].ParentNode;
- auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];
- auto &RevEdge = Edges[Now][Edge.RevEdgeIndex];
-
- Edge.Flow += PathCapacity;
- RevEdge.Flow -= PathCapacity;
-
- Now = Pred;
- }
- }
-
- /// An node in a flow network.
- struct Node {
- /// The cost of the cheapest path from the source to the current node.
- int64_t Distance;
- /// The node preceding the current one in the path.
- uint64_t ParentNode;
- /// The index of the edge between ParentNode and the current node.
- uint64_t ParentEdgeIndex;
- /// An indicator of whether the current node is in a queue.
- bool Taken;
- };
- /// An edge in a flow network.
- struct Edge {
- /// The cost of the edge.
- int64_t Cost;
- /// The capacity of the edge.
- int64_t Capacity;
- /// The current flow on the edge.
- int64_t Flow;
- /// The destination node of the edge.
- uint64_t Dst;
- /// The index of the reverse edge between Dst and the current node.
- uint64_t RevEdgeIndex;
- };
-
- /// The set of network nodes.
- std::vector<Node> Nodes;
- /// The set of network edges.
- std::vector<std::vector<Edge>> Edges;
- /// Source node of the flow.
- uint64_t Source;
- /// Target (sink) node of the flow.
- uint64_t Target;
-};
-
-/// Initializing flow network for a given function.
-///
-/// Every block is split into three nodes that are responsible for (i) an
-/// incoming flow, (ii) an outgoing flow, and (iii) penalizing an increase or
-/// reduction of the block weight.
-void initializeNetwork(MinCostMaxFlow &Network, FlowFunction &Func) {
- uint64_t NumBlocks = Func.Blocks.size();
- assert(NumBlocks > 1 && "Too few blocks in a function");
- LLVM_DEBUG(dbgs() << "Initializing profi for " << NumBlocks << " blocks\n");
-
- // Pre-process data: make sure the entry weight is at least 1
- if (Func.Blocks[Func.Entry].Weight == 0) {
- Func.Blocks[Func.Entry].Weight = 1;
- }
- // Introducing dummy source/sink pairs to allow flow circulation.
- // The nodes corresponding to blocks of Func have indicies in the range
- // [0..3 * NumBlocks); the dummy nodes are indexed by the next four values.
- uint64_t S = 3 * NumBlocks;
- uint64_t T = S + 1;
- uint64_t S1 = S + 2;
- uint64_t T1 = S + 3;
-
- Network.initialize(3 * NumBlocks + 4, S1, T1);
-
- // Create three nodes for every block of the function
- for (uint64_t B = 0; B < NumBlocks; B++) {
- auto &Block = Func.Blocks[B];
- assert((!Block.UnknownWeight || Block.Weight == 0 || Block.isEntry()) &&
- "non-zero weight of a block w/o weight except for an entry");
-
- // Split every block into two nodes
- uint64_t Bin = 3 * B;
- uint64_t Bout = 3 * B + 1;
- uint64_t Baux = 3 * B + 2;
- if (Block.Weight > 0) {
- Network.addEdge(S1, Bout, Block.Weight, 0);
- Network.addEdge(Bin, T1, Block.Weight, 0);
- }
-
- // Edges from S and to T
- assert((!Block.isEntry() || !Block.isExit()) &&
- "a block cannot be an entry and an exit");
- if (Block.isEntry()) {
- Network.addEdge(S, Bin, 0);
- } else if (Block.isExit()) {
- Network.addEdge(Bout, T, 0);
- }
-
- // An auxiliary node to allow increase/reduction of block counts:
- // We assume that decreasing block counts is more expensive than increasing,
- // and thus, setting separate costs here. In the future we may want to tune
- // the relative costs so as to maximize the quality of generated profiles.
- int64_t AuxCostInc = MinCostMaxFlow::AuxCostInc;
- int64_t AuxCostDec = MinCostMaxFlow::AuxCostDec;
- if (Block.UnknownWeight) {
- // Do not penalize changing weights of blocks w/o known profile count
- AuxCostInc = 0;
- AuxCostDec = 0;
- } else {
- // Increasing the count for "cold" blocks with zero initial count is more
- // expensive than for "hot" ones
- if (Block.Weight == 0) {
- AuxCostInc = MinCostMaxFlow::AuxCostIncZero;
- }
- // Modifying the count of the entry block is expensive
- if (Block.isEntry()) {
- AuxCostInc = MinCostMaxFlow::AuxCostIncEntry;
- AuxCostDec = MinCostMaxFlow::AuxCostDecEntry;
- }
- }
- // For blocks with self-edges, do not penalize a reduction of the count,
- // as all of the increase can be attributed to the self-edge
- if (Block.HasSelfEdge) {
- AuxCostDec = 0;
- }
-
- Network.addEdge(Bin, Baux, AuxCostInc);
- Network.addEdge(Baux, Bout, AuxCostInc);
- if (Block.Weight > 0) {
- Network.addEdge(Bout, Baux, AuxCostDec);
- Network.addEdge(Baux, Bin, AuxCostDec);
- }
- }
-
- // Creating edges for every jump
- for (auto &Jump : Func.Jumps) {
- uint64_t Src = Jump.Source;
- uint64_t Dst = Jump.Target;
- if (Src != Dst) {
- uint64_t SrcOut = 3 * Src + 1;
- uint64_t DstIn = 3 * Dst;
- uint64_t Cost = Jump.IsUnlikely ? MinCostMaxFlow::AuxCostUnlikely : 0;
- Network.addEdge(SrcOut, DstIn, Cost);
- }
- }
-
- // Make sure we have a valid flow circulation
- Network.addEdge(T, S, 0);
-}
-
-/// Extract resulting block and edge counts from the flow network.
-void extractWeights(MinCostMaxFlow &Network, FlowFunction &Func) {
- uint64_t NumBlocks = Func.Blocks.size();
-
- // Extract resulting block counts
- for (uint64_t Src = 0; Src < NumBlocks; Src++) {
- auto &Block = Func.Blocks[Src];
- uint64_t SrcOut = 3 * Src + 1;
- int64_t Flow = 0;
- for (auto &Adj : Network.getFlow(SrcOut)) {
- uint64_t DstIn = Adj.first;
- int64_t DstFlow = Adj.second;
- bool IsAuxNode = (DstIn < 3 * NumBlocks && DstIn % 3 == 2);
- if (!IsAuxNode || Block.HasSelfEdge) {
- Flow += DstFlow;
- }
- }
- Block.Flow = Flow;
- assert(Flow >= 0 && "negative block flow");
- }
-
- // Extract resulting jump counts
- for (auto &Jump : Func.Jumps) {
- uint64_t Src = Jump.Source;
- uint64_t Dst = Jump.Target;
- int64_t Flow = 0;
- if (Src != Dst) {
- uint64_t SrcOut = 3 * Src + 1;
- uint64_t DstIn = 3 * Dst;
- Flow = Network.getFlow(SrcOut, DstIn);
- } else {
- uint64_t SrcOut = 3 * Src + 1;
- uint64_t SrcAux = 3 * Src + 2;
- int64_t AuxFlow = Network.getFlow(SrcOut, SrcAux);
- if (AuxFlow > 0)
- Flow = AuxFlow;
- }
- Jump.Flow = Flow;
- assert(Flow >= 0 && "negative jump flow");
- }
-}
-
-#ifndef NDEBUG
-/// Verify that the computed flow values satisfy flow conservation rules
-void verifyWeights(const FlowFunction &Func) {
- const uint64_t NumBlocks = Func.Blocks.size();
- auto InFlow = std::vector<uint64_t>(NumBlocks, 0);
- auto OutFlow = std::vector<uint64_t>(NumBlocks, 0);
- for (auto &Jump : Func.Jumps) {
- InFlow[Jump.Target] += Jump.Flow;
- OutFlow[Jump.Source] += Jump.Flow;
- }
-
- uint64_t TotalInFlow = 0;
- uint64_t TotalOutFlow = 0;
- for (uint64_t I = 0; I < NumBlocks; I++) {
- auto &Block = Func.Blocks[I];
- if (Block.isEntry()) {
- TotalInFlow += Block.Flow;
- assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow");
- } else if (Block.isExit()) {
- TotalOutFlow += Block.Flow;
- assert(Block.Flow == InFlow[I] && "incorrectly computed control flow");
- } else {
- assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow");
- assert(Block.Flow == InFlow[I] && "incorrectly computed control flow");
- }
- }
- assert(TotalInFlow == TotalOutFlow && "incorrectly computed control flow");
-}
-#endif
-
-} // end of anonymous namespace
-
-/// Apply the profile inference algorithm for a given flow function
-void llvm::applyFlowInference(FlowFunction &Func) {
- // Create and apply an inference network model
- auto InferenceNetwork = MinCostMaxFlow();
- initializeNetwork(InferenceNetwork, Func);
- InferenceNetwork.run();
-
- // Extract flow values for every block and every edge
- extractWeights(InferenceNetwork, Func);
-
-#ifndef NDEBUG
- // Verify the result
- verifyWeights(Func);
-#endif
-}
diff --git a/llvm/lib/Transforms/Utils/SampleProfileLoaderBaseUtil.cpp b/llvm/lib/Transforms/Utils/SampleProfileLoaderBaseUtil.cpp
index ea0e8343eb887..6d995cf4c0481 100644
--- a/llvm/lib/Transforms/Utils/SampleProfileLoaderBaseUtil.cpp
+++ b/llvm/lib/Transforms/Utils/SampleProfileLoaderBaseUtil.cpp
@@ -34,10 +34,6 @@ cl::opt<bool> NoWarnSampleUnused(
cl::desc("Use this option to turn off/on warnings about function with "
"samples but without debug information to use those samples. "));
-cl::opt<bool> SampleProfileUseProfi(
- "sample-profile-use-profi", cl::init(false), cl::Hidden, cl::ZeroOrMore,
- cl::desc("Use profi to infer block and edge counts."));
-
namespace sampleprofutil {
/// Return true if the given callsite is hot wrt to hot cutoff threshold.
diff --git a/llvm/test/Transforms/SampleProfile/Inputs/profile-inference.prof b/llvm/test/Transforms/SampleProfile/Inputs/profile-inference.prof
deleted file mode 100644
index e995a04c7fd44..0000000000000
--- a/llvm/test/Transforms/SampleProfile/Inputs/profile-inference.prof
+++ /dev/null
@@ -1,23 +0,0 @@
-test_1:23968:0
- 1: 100
- 2: 60
- 3: 40
- !CFGChecksum: 4294967295
-
-test_2:23968:0
- 1: 100
- 3: 10
- !CFGChecksum: 37753817093
-
-test_3:10000:0
- 3: 13
- 5: 89
- !CFGChecksum: 69502983527
-
-sum_of_squares:23968:0
- 2: 5993
- 3: 1
- 4: 5992
- 5: 5992
- 8: 5992
- !CFGChecksum: 175862120757
diff --git a/llvm/test/Transforms/SampleProfile/profile-inference.ll b/llvm/test/Transforms/SampleProfile/profile-inference.ll
deleted file mode 100644
index 7f40358e65268..0000000000000
--- a/llvm/test/Transforms/SampleProfile/profile-inference.ll
+++ /dev/null
@@ -1,245 +0,0 @@
-; RUN: opt < %s -passes=pseudo-probe,sample-profile -sample-profile-use-profi -sample-profile-file=%S/Inputs/profile-inference.prof | opt -analyze -branch-prob -enable-new-pm=0 | FileCheck %s
-; RUN: opt < %s -passes=pseudo-probe,sample-profile -sample-profile-use-profi -sample-profile-file=%S/Inputs/profile-inference.prof | opt -analyze -block-freq -enable-new-pm=0 | FileCheck %s --check-prefix=CHECK2
-
-; The test verifies that profile inference correctly builds branch probabilities
-; from sampling-based block counts.
-;
-; +---------+ +----------+
-; | b3 [40] | <-- | b1 [100] |
-; +---------+ +----------+
-; |
-; |
-; v
-; +----------+
-; | b2 [60] |
-; +----------+
-
- at yydebug = dso_local global i32 0, align 4
-
-; Function Attrs: nounwind uwtable
-define dso_local i32 @test_1() #0 {
-b1:
- call void @llvm.pseudoprobe(i64 7964825052912775246, i64 1, i32 0, i64 -1)
- %0 = load i32, i32* @yydebug, align 4
- %cmp = icmp ne i32 %0, 0
- br i1 %cmp, label %b2, label %b3
-; CHECK: edge b1 -> b2 probability is 0x4ccccccd / 0x80000000 = 60.00%
-; CHECK: edge b1 -> b3 probability is 0x33333333 / 0x80000000 = 40.00%
-; CHECK2: - b1: float = {{.*}}, int = {{.*}}, count = 100
-
-b2:
- call void @llvm.pseudoprobe(i64 7964825052912775246, i64 2, i32 0, i64 -1)
- ret i32 %0
-; CHECK2: - b2: float = {{.*}}, int = {{.*}}, count = 60
-
-b3:
- call void @llvm.pseudoprobe(i64 7964825052912775246, i64 3, i32 0, i64 -1)
- ret i32 %0
-; CHECK2: - b3: float = {{.*}}, int = {{.*}}, count = 40
-}
-
-
-; The test verifies that profile inference correctly builds branch probabilities
-; from sampling-based block counts in the presence of "dangling" probes (whose
-; block counts are missing).
-;
-; +---------+ +----------+
-; | b3 [10] | <-- | b1 [100] |
-; +---------+ +----------+
-; |
-; |
-; v
-; +----------+
-; | b2 [?] |
-; +----------+
-
-; Function Attrs: nounwind uwtable
-define dso_local i32 @test_2() #0 {
-b1:
- call void @llvm.pseudoprobe(i64 -6216829535442445639, i64 1, i32 0, i64 -1)
- %0 = load i32, i32* @yydebug, align 4
- %cmp = icmp ne i32 %0, 0
- br i1 %cmp, label %b2, label %b3
-; CHECK: edge b1 -> b2 probability is 0x73333333 / 0x80000000 = 90.00%
-; CHECK: edge b1 -> b3 probability is 0x0ccccccd / 0x80000000 = 10.00%
-; CHECK2: - b1: float = {{.*}}, int = {{.*}}, count = 100
-
-b2:
- call void @llvm.pseudoprobe(i64 -6216829535442445639, i64 2, i32 0, i64 -1)
- ret i32 %0
-; CHECK2: - b2: float = {{.*}}, int = {{.*}}, count = 90
-
-b3:
- call void @llvm.pseudoprobe(i64 -6216829535442445639, i64 3, i32 0, i64 -1)
- ret i32 %0
-}
-; CHECK2: - b3: float = {{.*}}, int = {{.*}}, count = 10
-
-
-; The test verifies that profi is able to infer block counts from hot subgraphs.
-;
-; +---------+ +---------+
-; | b4 [?] | <-- | b1 [?] |
-; +---------+ +---------+
-; | |
-; | |
-; v v
-; +---------+ +---------+
-; | b5 [89] | | b2 [?] |
-; +---------+ +---------+
-; |
-; |
-; v
-; +---------+
-; | b3 [13] |
-; +---------+
-
-; Function Attrs: nounwind uwtable
-define dso_local i32 @test_3() #0 {
-b1:
- call void @llvm.pseudoprobe(i64 1649282507922421973, i64 1, i32 0, i64 -1)
- %0 = load i32, i32* @yydebug, align 4
- %cmp = icmp ne i32 %0, 0
- br i1 %cmp, label %b2, label %b4
-; CHECK: edge b1 -> b2 probability is 0x10505050 / 0x80000000 = 12.75%
-; CHECK: edge b1 -> b4 probability is 0x6fafafb0 / 0x80000000 = 87.25%
-; CHECK2: - b1: float = {{.*}}, int = {{.*}}, count = 102
-
-b2:
- call void @llvm.pseudoprobe(i64 1649282507922421973, i64 2, i32 0, i64 -1)
- br label %b3
-; CHECK: edge b2 -> b3 probability is 0x80000000 / 0x80000000 = 100.00%
-; CHECK2: - b2: float = {{.*}}, int = {{.*}}, count = 13
-
-b3:
- call void @llvm.pseudoprobe(i64 1649282507922421973, i64 3, i32 0, i64 -1)
- ret i32 %0
-; CHECK2: - b3: float = {{.*}}, int = {{.*}}, count = 13
-
-b4:
- call void @llvm.pseudoprobe(i64 1649282507922421973, i64 4, i32 0, i64 -1)
- br label %b5
-; CHECK: edge b4 -> b5 probability is 0x80000000 / 0x80000000 = 100.00%
-; CHECK2: - b4: float = {{.*}}, int = {{.*}}, count = 89
-
-b5:
- call void @llvm.pseudoprobe(i64 1649282507922421973, i64 5, i32 0, i64 -1)
- ret i32 %0
-; CHECK2: - b5: float = {{.*}}, int = {{.*}}, count = 89
-}
-
-
-; A larger test to verify that profile inference correctly identifies hot parts
-; of the control-flow graph.
-;
-; +-----------+
-; | b1 [?] |
-; +-----------+
-; |
-; |
-; v
-; +--------+ +-----------+
-; | b3 [1] | <-- | b2 [5993] |
-; +--------+ +-----------+
-; | |
-; | |
-; | v
-; | +-----------+ +--------+
-; | | b4 [5992] | --> | b6 [?] |
-; | +-----------+ +--------+
-; | | |
-; | | |
-; | v |
-; | +-----------+ |
-; | | b5 [5992] | |
-; | +-----------+ |
-; | | |
-; | | |
-; | v |
-; | +-----------+ |
-; | | b7 [?] | |
-; | +-----------+ |
-; | | |
-; | | |
-; | v |
-; | +-----------+ |
-; | | b8 [5992] | <-----+
-; | +-----------+
-; | |
-; | |
-; | v
-; | +-----------+
-; +----------> | b9 [?] |
-; +-----------+
-
-; Function Attrs: nounwind uwtable
-define dso_local i32 @sum_of_squares() #0 {
-b1:
- call void @llvm.pseudoprobe(i64 -907520326213521421, i64 1, i32 0, i64 -1)
- %0 = load i32, i32* @yydebug, align 4
- %cmp = icmp ne i32 %0, 0
- br label %b2
-; CHECK: edge b1 -> b2 probability is 0x80000000 / 0x80000000 = 100.00%
-; CHECK2: - b1: float = {{.*}}, int = {{.*}}, count = 5993
-
-b2:
- call void @llvm.pseudoprobe(i64 -907520326213521421, i64 2, i32 0, i64 -1)
- br i1 %cmp, label %b4, label %b3
-; CHECK: edge b2 -> b4 probability is 0x7ffa8844 / 0x80000000 = 99.98%
-; CHECK: edge b2 -> b3 probability is 0x000577bc / 0x80000000 = 0.02%
-; CHECK2: - b2: float = {{.*}}, int = {{.*}}, count = 5993
-
-b3:
- call void @llvm.pseudoprobe(i64 -907520326213521421, i64 3, i32 0, i64 -1)
- br label %b9
-; CHECK: edge b3 -> b9 probability is 0x80000000 / 0x80000000 = 100.00%
-; CHECK2: - b3: float = {{.*}}, int = {{.*}}, count = 1
-
-b4:
- call void @llvm.pseudoprobe(i64 -907520326213521421, i64 4, i32 0, i64 -1)
- br i1 %cmp, label %b5, label %b6
-; CHECK: edge b4 -> b5 probability is 0x80000000 / 0x80000000 = 100.00%
-; CHECK: edge b4 -> b6 probability is 0x00000000 / 0x80000000 = 0.00%
-; CHECK2: - b4: float = {{.*}}, int = {{.*}}, count = 5992
-
-b5:
- call void @llvm.pseudoprobe(i64 -907520326213521421, i64 5, i32 0, i64 -1)
- br label %b7
-; CHECK: edge b5 -> b7 probability is 0x80000000 / 0x80000000 = 100.00%
-; CHECK2: - b5: float = {{.*}}, int = {{.*}}, count = 5992
-
-b6:
- call void @llvm.pseudoprobe(i64 -907520326213521421, i64 6, i32 0, i64 -1)
- br label %b8
-; CHECK: edge b6 -> b8 probability is 0x80000000 / 0x80000000 = 100.00%
-; CHECK2: - b6: float = {{.*}}, int = {{.*}}, count = 0
-
-b7:
- call void @llvm.pseudoprobe(i64 -907520326213521421, i64 7, i32 0, i64 -1)
- br label %b8
-; CHECK: edge b7 -> b8 probability is 0x80000000 / 0x80000000 = 100.00%
-; CHECK2: - b7: float = {{.*}}, int = {{.*}}, count = 5992
-
-b8:
- call void @llvm.pseudoprobe(i64 -907520326213521421, i64 8, i32 0, i64 -1)
- br label %b9
-; CHECK: edge b8 -> b9 probability is 0x80000000 / 0x80000000 = 100.00%
-; CHECK2: - b8: float = {{.*}}, int = {{.*}}, count = 5992
-
-b9:
- call void @llvm.pseudoprobe(i64 -907520326213521421, i64 9, i32 0, i64 -1)
- ret i32 %0
-}
-; CHECK2: - b9: float = {{.*}}, int = {{.*}}, count = 5993
-
-declare void @llvm.pseudoprobe(i64, i64, i32, i64) #1
-
-attributes #0 = { noinline nounwind uwtable "use-sample-profile"}
-attributes #1 = { nounwind }
-
-!llvm.pseudo_probe_desc = !{!6, !7, !8, !9}
-
-!6 = !{i64 7964825052912775246, i64 4294967295, !"test_1", null}
-!7 = !{i64 -6216829535442445639, i64 37753817093, !"test_2", null}
-!8 = !{i64 1649282507922421973, i64 69502983527, !"test_3", null}
-!9 = !{i64 -907520326213521421, i64 175862120757, !"sum_of_squares", null}
More information about the llvm-commits
mailing list