[llvm] [BOLT] Deprecate hfsort+ in favor of cdsort (PR #72408)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 12 11:04:31 PST 2024
https://github.com/spupyrev updated https://github.com/llvm/llvm-project/pull/72408
>From a9434d2c127e6161c276b8778cb0674969176e06 Mon Sep 17 00:00:00 2001
From: spupyrev <spupyrev at fb.com>
Date: Wed, 15 Nov 2023 07:56:42 -0800
Subject: [PATCH] [BOLT] Deprecating hfsort+ in favor of cdsort
---
bolt/include/bolt/Passes/HFSort.h | 12 +-
bolt/lib/Passes/CMakeLists.txt | 1 -
bolt/lib/Passes/HFSortPlus.cpp | 612 ---------------------------
bolt/lib/Passes/ReorderFunctions.cpp | 18 +-
4 files changed, 16 insertions(+), 627 deletions(-)
delete mode 100644 bolt/lib/Passes/HFSortPlus.cpp
diff --git a/bolt/include/bolt/Passes/HFSort.h b/bolt/include/bolt/Passes/HFSort.h
index 884c4692ddebbb..3787187733d072 100644
--- a/bolt/include/bolt/Passes/HFSort.h
+++ b/bolt/include/bolt/Passes/HFSort.h
@@ -9,11 +9,10 @@
// Implementation of HFSort algorithm for function ordering:
// https://research.fb.com/wp-content/uploads/2017/01/cgo2017-hfsort-final1.pdf
//
-// Cluster functions by hotness. There are four clustering algorithms:
+// Cluster functions by hotness. There are three clustering algorithms:
// 1. clusterize
-// 2. HFsort+
-// 3. pettisAndHansen
-// 4. randomClusters
+// 2. pettisAndHansen
+// 3. randomClusters
//
//===----------------------------------------------------------------------===//
@@ -81,11 +80,6 @@ inline bool compareClustersDensity(const Cluster &C1, const Cluster &C2) {
*/
std::vector<Cluster> clusterize(const CallGraph &Cg);
-/*
- * Optimize function placement prioritizing i-TLB and i-cache performance.
- */
-std::vector<Cluster> hfsortPlus(CallGraph &Cg);
-
/*
* Pettis-Hansen code layout algorithm
* reference: K. Pettis and R. C. Hansen, "Profile Guided Code Positioning",
diff --git a/bolt/lib/Passes/CMakeLists.txt b/bolt/lib/Passes/CMakeLists.txt
index b8bbe59a64480c..04057a895d6662 100644
--- a/bolt/lib/Passes/CMakeLists.txt
+++ b/bolt/lib/Passes/CMakeLists.txt
@@ -16,7 +16,6 @@ add_llvm_library(LLVMBOLTPasses
FixRelaxationPass.cpp
FixRISCVCallsPass.cpp
HFSort.cpp
- HFSortPlus.cpp
Hugify.cpp
IdenticalCodeFolding.cpp
IndirectCallPromotion.cpp
diff --git a/bolt/lib/Passes/HFSortPlus.cpp b/bolt/lib/Passes/HFSortPlus.cpp
deleted file mode 100644
index 0a481b5418dd25..00000000000000
--- a/bolt/lib/Passes/HFSortPlus.cpp
+++ /dev/null
@@ -1,612 +0,0 @@
-//===- bolt/Passes/HFSortPlus.cpp - Order functions by hotness ------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// hfsort+ - layout of hot functions with i-TLB cache optimization.
-//
-// Given an ordering of hot functions (and hence, their assignment to the
-// i-TLB pages), we can divide all functions calls Into two categories:
-// - 'short' ones that have a caller-callee distance less than a page;
-// - 'long' ones where the distance exceeds a page.
-// The short calls are likely to result in a i-TLB cache hit. For the long ones,
-// the hit/miss result depends on the 'hotness' of the page (i.e., how often
-// the page is accessed). Assuming that functions are sent to the i-TLB cache
-// in a random order, the probability that a page is present in the cache is
-// proportional to the number of samples corresponding to the functions on the
-// page. The following algorithm detects short and long calls, and optimizes
-// the expected number of cache misses for the long ones.
-//
-//===----------------------------------------------------------------------===//
-
-#include "bolt/Passes/HFSort.h"
-#include "llvm/Support/CommandLine.h"
-#include <cmath>
-#include <set>
-#include <vector>
-
-#define DEBUG_TYPE "hfsort"
-
-using namespace llvm;
-using namespace bolt;
-
-namespace opts {
-
-extern cl::OptionCategory BoltOptCategory;
-
-cl::opt<unsigned> ITLBPageSize("itlb-page-size",
- cl::desc("The size of i-tlb cache page"),
- cl::init(4096), cl::ReallyHidden,
- cl::cat(BoltOptCategory));
-
-cl::opt<unsigned> ITLBEntries("itlb-entries",
- cl::desc("The number of entries in i-tlb cache"),
- cl::init(16), cl::ReallyHidden,
- cl::cat(BoltOptCategory));
-
-static cl::opt<unsigned> ITLBDensity("itlb-density",
- cl::desc("The density of i-tlb cache"),
- cl::init(4096), cl::ReallyHidden,
- cl::cat(BoltOptCategory));
-
-static cl::opt<double> MergeProbability(
- "merge-probability",
- cl::desc("The minimum probability of a call for merging two clusters"),
- cl::init(0.9), cl::ReallyHidden, cl::cat(BoltOptCategory));
-
-static cl::opt<double> ArcThreshold(
- "arc-threshold",
- cl::desc("The threshold for ignoring arcs with a small relative weight"),
- cl::init(0.00000001), cl::ReallyHidden, cl::cat(BoltOptCategory));
-
-} // namespace opts
-
-namespace llvm {
-namespace bolt {
-
-using NodeId = CallGraph::NodeId;
-using Arc = CallGraph::Arc;
-
-namespace {
-
-class Edge;
-using ArcList = std::vector<const Arc *>;
-
-// A chain (ordered sequence) of nodes (functions) in the call graph
-class Chain {
-public:
- Chain(const Chain &) = delete;
- Chain(Chain &&) = default;
- Chain &operator=(const Chain &) = delete;
- Chain &operator=(Chain &&) = default;
-
- explicit Chain(size_t Id_, NodeId Node, size_t Samples_, size_t Size_)
- : Id(Id_), Samples(Samples_), Size(Size_), Nodes(1, Node) {}
-
- double density() const { return static_cast<double>(Samples) / Size; }
-
- Edge *getEdge(Chain *Other) const {
- for (std::pair<Chain *, Edge *> It : Edges)
- if (It.first == Other)
- return It.second;
- return nullptr;
- }
-
- void removeEdge(Chain *Other) {
- auto It = Edges.begin();
- while (It != Edges.end()) {
- if (It->first == Other) {
- Edges.erase(It);
- return;
- }
- It++;
- }
- }
-
- void addEdge(Chain *Other, Edge *Edge) { Edges.emplace_back(Other, Edge); }
-
- void merge(Chain *Other) {
- Nodes.insert(Nodes.end(), Other->Nodes.begin(), Other->Nodes.end());
- Samples += Other->Samples;
- Size += Other->Size;
- }
-
- void mergeEdges(Chain *Other);
-
- void clear() {
- Nodes.clear();
- Edges.clear();
- }
-
-public:
- size_t Id;
- uint64_t Samples;
- uint64_t Size;
- // Cached score for the chain
- double Score{0};
- // Cached short-calls for the chain
- double ShortCalls{0};
- // Nodes in the chain
- std::vector<NodeId> Nodes;
- // Adjacent chains and corresponding edges (lists of arcs)
- std::vector<std::pair<Chain *, Edge *>> Edges;
-};
-
-// An edge in the call graph representing Arcs between two Chains.
-// When functions are merged Into chains, the edges are combined too so that
-// there is always at most one edge between a pair of chains
-class Edge {
-public:
- Edge(const Edge &) = delete;
- Edge(Edge &&) = default;
- Edge &operator=(const Edge &) = delete;
- Edge &operator=(Edge &&) = default;
-
- explicit Edge(Chain *SrcChain_, Chain *DstChain_, const Arc *A)
- : SrcChain(SrcChain_), DstChain(DstChain_), Arcs(1, A) {}
-
- void changeEndpoint(Chain *From, Chain *To) {
- if (From == SrcChain)
- SrcChain = To;
- if (From == DstChain)
- DstChain = To;
- }
-
- void moveArcs(Edge *Other) {
- Arcs.insert(Arcs.end(), Other->Arcs.begin(), Other->Arcs.end());
- Other->Arcs.clear();
- }
-
- void setMergeGain(Chain *PredChain, double ForwardGain, double BackwardGain) {
- // When forward and backward gains are the same, prioritize merging that
- // preserves the original order of the functions in the binary
- if (std::abs(ForwardGain - BackwardGain) < 1e-8) {
- if (SrcChain->Id < DstChain->Id) {
- IsGainForward = true;
- CachedGain = PredChain == SrcChain ? ForwardGain : BackwardGain;
- } else {
- IsGainForward = false;
- CachedGain = PredChain == SrcChain ? BackwardGain : ForwardGain;
- }
- } else if (ForwardGain > BackwardGain) {
- IsGainForward = PredChain == SrcChain;
- CachedGain = ForwardGain;
- } else {
- IsGainForward = PredChain != SrcChain;
- CachedGain = BackwardGain;
- }
- }
-
- double gain() const { return CachedGain; }
-
- Chain *predChain() const { return IsGainForward ? SrcChain : DstChain; }
-
- Chain *succChain() const { return IsGainForward ? DstChain : SrcChain; }
-
-private:
- Chain *SrcChain{nullptr};
- Chain *DstChain{nullptr};
-
-public:
- // Original arcs in the binary with corresponding execution counts
- ArcList Arcs;
- // Cached gain of merging the pair of chains
- double CachedGain{-1.0};
- // Since the gain of merging (Src, Dst) and (Dst, Src) might be different,
- // we store a flag indicating which of the options results in a higher gain
- bool IsGainForward;
-};
-
-void Chain::mergeEdges(Chain *Other) {
- // Update edges adjacent to chain other
- for (auto EdgeIt : Other->Edges) {
- Chain *const DstChain = EdgeIt.first;
- Edge *const DstEdge = EdgeIt.second;
- Chain *const TargetChain = DstChain == Other ? this : DstChain;
-
- // Find the corresponding edge in the current chain
- Edge *CurEdge = getEdge(TargetChain);
- if (CurEdge == nullptr) {
- DstEdge->changeEndpoint(Other, this);
- this->addEdge(TargetChain, DstEdge);
- if (DstChain != this && DstChain != Other)
- DstChain->addEdge(this, DstEdge);
- } else {
- CurEdge->moveArcs(DstEdge);
- }
- // Cleanup leftover edge
- if (DstChain != Other)
- DstChain->removeEdge(Other);
- }
-}
-
-class HFSortPlus {
-public:
- explicit HFSortPlus(const CallGraph &Cg) : Cg(Cg) { initialize(); }
-
- /// Run the algorithm and return ordered set of function clusters.
- std::vector<Cluster> run() {
- // Pass 1
- runPassOne();
-
- // Pass 2
- runPassTwo();
-
- outs() << "BOLT-INFO: hfsort+ reduced the number of chains from "
- << Cg.numNodes() << " to " << HotChains.size() << "\n";
-
- // Sorting chains by density in decreasing order
- auto DensityComparator = [](const Chain *L, const Chain *R) {
- if (L->density() != R->density())
- return L->density() > R->density();
- // Making sure the comparison is deterministic
- return L->Id < R->Id;
- };
- llvm::stable_sort(HotChains, DensityComparator);
-
- // Return the set of clusters that are left, which are the ones that
- // didn't get merged (so their first func is its original func)
- std::vector<Cluster> Clusters;
- Clusters.reserve(HotChains.size());
- for (Chain *Chain : HotChains)
- Clusters.emplace_back(Cluster(Chain->Nodes, Cg));
- return Clusters;
- }
-
-private:
- /// Initialize the set of active chains, function id to chain mapping,
- /// total number of samples and function addresses.
- void initialize() {
- OutWeight.resize(Cg.numNodes(), 0);
- InWeight.resize(Cg.numNodes(), 0);
- AllChains.reserve(Cg.numNodes());
- HotChains.reserve(Cg.numNodes());
- NodeChain.resize(Cg.numNodes(), nullptr);
- Addr.resize(Cg.numNodes(), 0);
-
- // Initialize chains
- for (NodeId F = 0; F < Cg.numNodes(); ++F) {
- AllChains.emplace_back(F, F, Cg.samples(F), Cg.size(F));
- HotChains.push_back(&AllChains.back());
- NodeChain[F] = &AllChains.back();
- TotalSamples += Cg.samples(F);
- for (NodeId Succ : Cg.successors(F)) {
- if (F == Succ)
- continue;
- const Arc &Arc = *Cg.findArc(F, Succ);
- OutWeight[F] += Arc.weight();
- InWeight[Succ] += Arc.weight();
- }
- }
-
- AllEdges.reserve(Cg.numArcs());
- for (NodeId F = 0; F < Cg.numNodes(); ++F) {
- for (NodeId Succ : Cg.successors(F)) {
- if (F == Succ)
- continue;
- const Arc &Arc = *Cg.findArc(F, Succ);
- if (Arc.weight() == 0.0 ||
- Arc.weight() / TotalSamples < opts::ArcThreshold) {
- continue;
- }
-
- Edge *CurEdge = NodeChain[F]->getEdge(NodeChain[Succ]);
- if (CurEdge != nullptr) {
- // This edge is already present in the graph
- assert(NodeChain[Succ]->getEdge(NodeChain[F]) != nullptr);
- CurEdge->Arcs.push_back(&Arc);
- } else {
- // This is a new edge
- AllEdges.emplace_back(NodeChain[F], NodeChain[Succ], &Arc);
- NodeChain[F]->addEdge(NodeChain[Succ], &AllEdges.back());
- NodeChain[Succ]->addEdge(NodeChain[F], &AllEdges.back());
- }
- }
- }
-
- for (Chain *&Chain : HotChains) {
- Chain->ShortCalls = shortCalls(Chain);
- Chain->Score = score(Chain);
- }
- }
-
- /// The probability that a page with a given density is not in the cache.
- ///
- /// Assume that the hot functions are called in a random order; then the
- /// probability of an i-TLB page being accessed after a function call is
- /// p = pageSamples / TotalSamples. The probability that the page is not
- /// accessed is (1 - p), and the probability that it is not in the cache
- /// (i.e. not accessed during the last kCacheEntries function calls)
- /// is (1 - p)^kCacheEntries
- double missProbability(double ChainDensity) const {
- double PageSamples = ChainDensity * opts::ITLBDensity;
-
- if (PageSamples >= TotalSamples)
- return 0;
-
- double P = PageSamples / TotalSamples;
- return pow(1.0 - P, double(opts::ITLBEntries));
- }
-
- /// The expected number of calls on different i-TLB pages for an arc of the
- /// call graph with a specified weight
- double expectedCalls(uint64_t SrcAddr, uint64_t DstAddr,
- double Weight) const {
- uint64_t Dist = SrcAddr >= DstAddr ? SrcAddr - DstAddr : DstAddr - SrcAddr;
- if (Dist >= opts::ITLBPageSize)
- return 0;
-
- double D = double(Dist) / double(opts::ITLBPageSize);
- // Increasing the importance of shorter calls
- return (1.0 - D * D) * Weight;
- }
-
- /// The expected number of calls within a given chain with both endpoints on
- /// the same cache page
- double shortCalls(Chain *Chain) const {
- Edge *Edge = Chain->getEdge(Chain);
- if (Edge == nullptr)
- return 0;
-
- double Calls = 0;
- for (const Arc *Arc : Edge->Arcs) {
- uint64_t SrcAddr = Addr[Arc->src()] + uint64_t(Arc->avgCallOffset());
- uint64_t DstAddr = Addr[Arc->dst()];
- Calls += expectedCalls(SrcAddr, DstAddr, Arc->weight());
- }
- return Calls;
- }
-
- /// The number of calls between the two chains with both endpoints on
- /// the same i-TLB page, assuming that a given pair of chains gets merged
- double shortCalls(Chain *ChainPred, Chain *ChainSucc, Edge *Edge) const {
- double Calls = 0;
- for (const Arc *Arc : Edge->Arcs) {
- Chain *SrcChain = NodeChain[Arc->src()];
- uint64_t SrcAddr;
- uint64_t DstAddr;
- if (SrcChain == ChainPred) {
- SrcAddr = Addr[Arc->src()] + uint64_t(Arc->avgCallOffset());
- DstAddr = Addr[Arc->dst()] + ChainPred->Size;
- } else {
- SrcAddr =
- Addr[Arc->src()] + uint64_t(Arc->avgCallOffset()) + ChainPred->Size;
- DstAddr = Addr[Arc->dst()];
- }
- Calls += expectedCalls(SrcAddr, DstAddr, Arc->weight());
- }
-
- Calls += ChainPred->ShortCalls;
- Calls += ChainSucc->ShortCalls;
-
- return Calls;
- }
-
- double score(Chain *Chain) const {
- double LongCalls = Chain->Samples - Chain->ShortCalls;
- return LongCalls * missProbability(Chain->density());
- }
-
- /// The gain of merging two chains.
- ///
- /// We assume that the final chains are sorted by their density, and hence
- /// every chain is likely to be adjacent with chains of the same density.
- /// Thus, the 'hotness' of every chain can be estimated by density*pageSize,
- /// which is used to compute the probability of cache misses for long calls
- /// of a given chain.
- /// The result is also scaled by the size of the resulting chain in order to
- /// increase the chance of merging short chains, which is helpful for
- /// the i-cache performance.
- double mergeGain(Chain *ChainPred, Chain *ChainSucc, Edge *Edge) const {
- // Cache misses on the chains before merging
- double CurScore = ChainPred->Score + ChainSucc->Score;
-
- // Cache misses on the merged chain
- double LongCalls = ChainPred->Samples + ChainSucc->Samples -
- shortCalls(ChainPred, ChainSucc, Edge);
- const double MergedSamples = ChainPred->Samples + ChainSucc->Samples;
- const double MergedSize = ChainPred->Size + ChainSucc->Size;
- double NewScore = LongCalls * missProbability(MergedSamples / MergedSize);
-
- double Gain = CurScore - NewScore;
- // Scale the result to increase the importance of merging short chains
- Gain /= std::min(ChainPred->Size, ChainSucc->Size);
-
- return Gain;
- }
-
- /// Run the first optimization pass of the algorithm:
- /// Merge chains that call each other with a high probability.
- void runPassOne() {
- // Find candidate pairs of chains for merging
- std::vector<const Arc *> ArcsToMerge;
- for (Chain *ChainPred : HotChains) {
- NodeId F = ChainPred->Nodes.back();
- for (NodeId Succ : Cg.successors(F)) {
- if (F == Succ)
- continue;
-
- const Arc &Arc = *Cg.findArc(F, Succ);
- if (Arc.weight() == 0.0 ||
- Arc.weight() / TotalSamples < opts::ArcThreshold)
- continue;
-
- const double CallsFromPred = OutWeight[F];
- const double CallsToSucc = InWeight[Succ];
- const double CallsPredSucc = Arc.weight();
-
- // Probability that the first chain is calling the second one
- const double ProbOut =
- CallsFromPred > 0 ? CallsPredSucc / CallsFromPred : 0;
- assert(0.0 <= ProbOut && ProbOut <= 1.0 && "incorrect out-probability");
-
- // Probability that the second chain is called From the first one
- const double ProbIn = CallsToSucc > 0 ? CallsPredSucc / CallsToSucc : 0;
- assert(0.0 <= ProbIn && ProbIn <= 1.0 && "incorrect in-probability");
-
- if (std::min(ProbOut, ProbIn) >= opts::MergeProbability)
- ArcsToMerge.push_back(&Arc);
- }
- }
-
- // Sort the pairs by the weight in reverse order
- llvm::sort(ArcsToMerge, [](const Arc *L, const Arc *R) {
- return L->weight() > R->weight();
- });
-
- // Merge the pairs of chains
- for (const Arc *Arc : ArcsToMerge) {
- Chain *ChainPred = NodeChain[Arc->src()];
- Chain *ChainSucc = NodeChain[Arc->dst()];
- if (ChainPred == ChainSucc)
- continue;
- if (ChainPred->Nodes.back() == Arc->src() &&
- ChainSucc->Nodes.front() == Arc->dst())
- mergeChains(ChainPred, ChainSucc);
- }
- }
-
- /// Run the second optimization pass of the hfsort+ algorithm:
- /// Merge pairs of chains while there is an improvement in the
- /// expected cache miss ratio.
- void runPassTwo() {
- // Creating a priority queue containing all edges ordered by the merge gain
- auto GainComparator = [](Edge *L, Edge *R) {
- if (std::abs(L->gain() - R->gain()) > 1e-8)
- return L->gain() > R->gain();
-
- // Making sure the comparison is deterministic
- if (L->predChain()->Id != R->predChain()->Id)
- return L->predChain()->Id < R->predChain()->Id;
-
- return L->succChain()->Id < R->succChain()->Id;
- };
- std::set<Edge *, decltype(GainComparator)> Queue(GainComparator);
-
- // Inserting the edges Into the queue
- for (Chain *ChainPred : HotChains) {
- for (auto EdgeIt : ChainPred->Edges) {
- Chain *ChainSucc = EdgeIt.first;
- Edge *ChainEdge = EdgeIt.second;
- // Ignore loop edges
- if (ChainPred == ChainSucc)
- continue;
- // Ignore already processed edges
- if (ChainEdge->gain() != -1.0)
- continue;
-
- // Compute the gain of merging the two chains
- auto ForwardGain = mergeGain(ChainPred, ChainSucc, ChainEdge);
- auto BackwardGain = mergeGain(ChainSucc, ChainPred, ChainEdge);
- ChainEdge->setMergeGain(ChainPred, ForwardGain, BackwardGain);
- if (ChainEdge->gain() > 0.0)
- Queue.insert(ChainEdge);
- }
- }
-
- // Merge the chains while the gain of merging is positive
- while (!Queue.empty()) {
- // Extract the best (top) edge for merging
- Edge *It = *Queue.begin();
- Queue.erase(Queue.begin());
- Edge *BestEdge = It;
- Chain *BestChainPred = BestEdge->predChain();
- Chain *BestChainSucc = BestEdge->succChain();
- if (BestChainPred == BestChainSucc || BestEdge->gain() <= 0.0)
- continue;
-
- // Remove outdated edges
- for (std::pair<Chain *, Edge *> EdgeIt : BestChainPred->Edges)
- Queue.erase(EdgeIt.second);
- for (std::pair<Chain *, Edge *> EdgeIt : BestChainSucc->Edges)
- Queue.erase(EdgeIt.second);
-
- // Merge the best pair of chains
- mergeChains(BestChainPred, BestChainSucc);
-
- // Insert newly created edges Into the queue
- for (auto EdgeIt : BestChainPred->Edges) {
- Chain *ChainSucc = EdgeIt.first;
- Edge *ChainEdge = EdgeIt.second;
- // Ignore loop edges
- if (BestChainPred == ChainSucc)
- continue;
-
- // Compute the gain of merging the two chains
- auto ForwardGain = mergeGain(BestChainPred, ChainSucc, ChainEdge);
- auto BackwardGain = mergeGain(ChainSucc, BestChainPred, ChainEdge);
- ChainEdge->setMergeGain(BestChainPred, ForwardGain, BackwardGain);
- if (ChainEdge->gain() > 0.0)
- Queue.insert(ChainEdge);
- }
- }
- }
-
- /// Merge chain From into chain Into and update the list of active chains.
- void mergeChains(Chain *Into, Chain *From) {
- assert(Into != From && "cannot merge a chain with itself");
- Into->merge(From);
-
- // Update the chains and addresses for functions merged from From
- size_t CurAddr = 0;
- for (NodeId F : Into->Nodes) {
- NodeChain[F] = Into;
- Addr[F] = CurAddr;
- CurAddr += Cg.size(F);
- }
-
- // Merge edges
- Into->mergeEdges(From);
- From->clear();
-
- // Update cached scores for the new chain
- Into->ShortCalls = shortCalls(Into);
- Into->Score = score(Into);
-
- // Remove chain From From the list of active chains
- llvm::erase(HotChains, From);
- }
-
-private:
- // The call graph
- const CallGraph &Cg;
-
- // All chains of functions
- std::vector<Chain> AllChains;
-
- // Active chains. The vector gets updated at runtime when chains are merged
- std::vector<Chain *> HotChains;
-
- // All edges between chains
- std::vector<Edge> AllEdges;
-
- // Node_id => chain
- std::vector<Chain *> NodeChain;
-
- // Current address of the function From the beginning of its chain
- std::vector<uint64_t> Addr;
-
- // Total weight of outgoing arcs for each function
- std::vector<double> OutWeight;
-
- // Total weight of incoming arcs for each function
- std::vector<double> InWeight;
- // The total number of samples in the graph
- double TotalSamples{0};
-};
-
-} // end anonymous namespace
-
-std::vector<Cluster> hfsortPlus(CallGraph &Cg) {
- // It is required that the sum of incoming arc weights is not greater
- // than the number of samples for every function.
- // Ensuring the call graph obeys the property before running the algorithm.
- Cg.adjustArcWeights();
- return HFSortPlus(Cg).run();
-}
-
-} // namespace bolt
-} // namespace llvm
diff --git a/bolt/lib/Passes/ReorderFunctions.cpp b/bolt/lib/Passes/ReorderFunctions.cpp
index 7e672969098130..2446524c1ab416 100644
--- a/bolt/lib/Passes/ReorderFunctions.cpp
+++ b/bolt/lib/Passes/ReorderFunctions.cpp
@@ -30,6 +30,7 @@ extern cl::opt<uint32_t> RandomSeed;
extern size_t padFunction(const bolt::BinaryFunction &Function);
+extern cl::opt<bolt::ReorderFunctions::ReorderType> ReorderFunctions;
cl::opt<bolt::ReorderFunctions::ReorderType> ReorderFunctions(
"reorder-functions",
cl::desc("reorder and cluster functions (works only with relocations)"),
@@ -41,7 +42,7 @@ cl::opt<bolt::ReorderFunctions::ReorderType> ReorderFunctions(
clEnumValN(bolt::ReorderFunctions::RT_HFSORT, "hfsort",
"use hfsort algorithm"),
clEnumValN(bolt::ReorderFunctions::RT_HFSORT_PLUS, "hfsort+",
- "use hfsort+ algorithm"),
+ "use cache-directed sort"),
clEnumValN(bolt::ReorderFunctions::RT_CDSORT, "cdsort",
"use cache-directed sort"),
clEnumValN(bolt::ReorderFunctions::RT_PETTIS_HANSEN,
@@ -50,7 +51,14 @@ cl::opt<bolt::ReorderFunctions::ReorderType> ReorderFunctions(
"reorder functions randomly"),
clEnumValN(bolt::ReorderFunctions::RT_USER, "user",
"use function order specified by -function-order")),
- cl::ZeroOrMore, cl::cat(BoltOptCategory));
+ cl::ZeroOrMore, cl::cat(BoltOptCategory),
+ cl::callback([](const bolt::ReorderFunctions::ReorderType &option) {
+ if (option == bolt::ReorderFunctions::RT_HFSORT_PLUS) {
+ errs() << "BOLT-WARNING: '-reorder-functions=hfsort+' is deprecated,"
+ << " please use '-reorder-functions=cdsort' instead\n";
+ ReorderFunctions = bolt::ReorderFunctions::RT_CDSORT;
+ }
+ }));
static cl::opt<bool> ReorderFunctionsUseHotSize(
"reorder-functions-use-hot-size",
@@ -319,9 +327,6 @@ void ReorderFunctions::runOnFunctions(BinaryContext &BC) {
case RT_HFSORT:
Clusters = clusterize(Cg);
break;
- case RT_HFSORT_PLUS:
- Clusters = hfsortPlus(Cg);
- break;
case RT_CDSORT: {
// It is required that the sum of incoming arc weights is not greater
// than the number of samples for every function. Ensuring the call graph
@@ -423,6 +428,9 @@ void ReorderFunctions::runOnFunctions(BinaryContext &BC) {
errs() << "BOLT-WARNING: Reorder functions: can't find functions for "
<< InvalidEntries << " entries in -function-order list\n";
} break;
+
+ default:
+ llvm_unreachable("unexpected layout type");
}
reorder(std::move(Clusters), BFs);
More information about the llvm-commits
mailing list