[PATCH] D36351: [lld][ELF] Add profile guided section layout

Rafael Avila de Espindola via llvm-commits llvm-commits at lists.llvm.org
Fri Feb 9 08:01:53 PST 2018


You are still missing:


+    if (FromSec && ToSec) {
+      uint64_t &V = Config->CallGraphProfile[std::make_pair(FromSec, ToSec)];
+      V += Count;
+    }

Cheers,
Rafael

Michael Spencer via Phabricator <reviews at reviews.llvm.org> writes:

> Bigcheese updated this revision to Diff 133550.
> Bigcheese added a comment.
>
> - Update to top of tree.
> - Add some comments.
>
>
> https://reviews.llvm.org/D36351
>
> Files:
>   ELF/CMakeLists.txt
>   ELF/CallGraphSort.cpp
>   ELF/CallGraphSort.h
>   ELF/Config.h
>   ELF/Driver.cpp
>   ELF/Options.td
>   ELF/Writer.cpp
>   test/ELF/cgprofile-txt.s
>
> Index: test/ELF/cgprofile-txt.s
> ===================================================================
> --- /dev/null
> +++ test/ELF/cgprofile-txt.s
> @@ -0,0 +1,106 @@
> +# REQUIRES: x86
> +
> +# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t
> +# RUN: ld.lld -e A %t -o %t2
> +# RUN: llvm-readobj -symbols %t2 | FileCheck %s --check-prefix=NOSORT
> +
> +# RUN: echo "A B 100" > %t.call_graph
> +# RUN: echo "A C 40" >> %t.call_graph
> +# RUN: echo "B C 30" >> %t.call_graph
> +# RUN: echo "C D 90" >> %t.call_graph
> +# RUN: echo "PP TS 100" >> %t.call_graph
> +# RUN: echo "_init2 _init 24567837" >> %t.call_graph
> +# RUN: echo "TS QC 9001" >> %t.call_graph
> +# RUN: ld.lld -e A %t --call-graph-ordering-file %t.call_graph -o %t2
> +# RUN: llvm-readobj -symbols %t2 | FileCheck %s
> +
> +    .section    .text.D,"ax", at progbits
> +D:
> +    retq
> +
> +    .section    .text.C,"ax", at progbits
> +    .globl  C
> +C:
> +    retq
> +
> +    .section    .text.B,"ax", at progbits
> +    .globl  B
> +B:
> +    retq
> +
> +    .section    .text.A,"ax", at progbits
> +    .globl  A
> +A:
> +    retq
> +
> +    .section    .ponies,"ax", at progbits,unique,1
> +    .globl TS
> +TS:
> +    retq
> +
> +    .section    .ponies,"ax", at progbits,unique,2
> +    .globl PP
> +PP:
> +    retq
> +
> +    .section    .other,"ax", at progbits,unique,1
> +    .globl QC
> +QC:
> +    retq
> +
> +    .section    .other,"ax", at progbits,unique,2
> +    .globl GB
> +GB:
> +    retq
> +
> +    .section    .init,"ax", at progbits,unique,1
> +    .globl _init
> +_init:
> +    retq
> +
> +    .section    .init,"ax", at progbits,unique,2
> +    .globl _init2
> +_init2:
> +    retq
> +
> +# CHECK:          Name: D
> +# CHECK-NEXT:     Value: 0x201003
> +# CHECK:          Name: A
> +# CHECK-NEXT:     Value: 0x201000
> +# CHECK:          Name: B
> +# CHECK-NEXT:     Value: 0x201001
> +# CHECK:          Name: C
> +# CHECK-NEXT:     Value: 0x201002
> +# CHECK:          Name: GB
> +# CHECK-NEXT:     Value: 0x201007
> +# CHECK:          Name: PP
> +# CHECK-NEXT:     Value: 0x201004
> +# CHECK:          Name: QC
> +# CHECK-NEXT:     Value: 0x201006
> +# CHECK:          Name: TS
> +# CHECK-NEXT:     Value: 0x201005
> +# CHECK:          Name: _init
> +# CHECK-NEXT:     Value: 0x201008
> +# CHECK:          Name: _init2
> +# CHECK-NEXT:     Value: 0x201009
> +
> +# NOSORT:          Name: D
> +# NOSORT-NEXT:     Value: 0x201000
> +# NOSORT:          Name: A
> +# NOSORT-NEXT:     Value: 0x201003
> +# NOSORT:          Name: B
> +# NOSORT-NEXT:     Value: 0x201002
> +# NOSORT:          Name: C
> +# NOSORT-NEXT:     Value: 0x201001
> +# NOSORT:          Name: GB
> +# NOSORT-NEXT:     Value: 0x201007
> +# NOSORT:          Name: PP
> +# NOSORT-NEXT:     Value: 0x201005
> +# NOSORT:          Name: QC
> +# NOSORT-NEXT:     Value: 0x201006
> +# NOSORT:          Name: TS
> +# NOSORT-NEXT:     Value: 0x201004
> +# NOSORT:          Name: _init
> +# NOSORT-NEXT:     Value: 0x201008
> +# NOSORT:          Name: _init2
> +# NOSORT-NEXT:     Value: 0x201009
> Index: ELF/Writer.cpp
> ===================================================================
> --- ELF/Writer.cpp
> +++ ELF/Writer.cpp
> @@ -9,6 +9,7 @@
>  
>  #include "Writer.h"
>  #include "AArch64ErrataFix.h"
> +#include "CallGraphSort.h"
>  #include "Config.h"
>  #include "Filesystem.h"
>  #include "LinkerScript.h"
> @@ -1058,6 +1059,17 @@
>  // If no layout was provided by linker script, we want to apply default
>  // sorting for special input sections. This also handles --symbol-ordering-file.
>  template <class ELFT> void Writer<ELFT>::sortInputSections() {
> +  // Use the rarely used option -call-graph-ordering-file to sort sections.
> +  if (!Config->CallGraphProfile.empty()) {
> +    DenseMap<const InputSectionBase *, int> OrderMap =
> +        computeCallGraphProfileOrder();
> +
> +    for (BaseCommand *Base : Script->SectionCommands)
> +      if (auto *Sec = dyn_cast<OutputSection>(Base))
> +        if (Sec->Live)
> +          Sec->sort([&](InputSectionBase *S) { return OrderMap.lookup(S); });
> +  }
> +
>    // Sort input sections by priority using the list provided
>    // by --symbol-ordering-file.
>    DenseMap<SectionBase *, int> Order = buildSectionOrder();
> Index: ELF/Options.td
> ===================================================================
> --- ELF/Options.td
> +++ ELF/Options.td
> @@ -66,6 +66,9 @@
>      "Only set DT_NEEDED for shared libraries if used",
>      "Always set DT_NEEDED for shared libraries">;
>  
> +def call_graph_ordering_file: S<"call-graph-ordering-file">,
> +  HelpText<"Layout sections to optimize the given callgraph">;
> +
>  // -chroot doesn't have a help text because it is an internal option.
>  def chroot: S<"chroot">;
>  
> Index: ELF/Driver.cpp
> ===================================================================
> --- ELF/Driver.cpp
> +++ ELF/Driver.cpp
> @@ -570,6 +570,31 @@
>    return {BuildIdKind::None, {}};
>  }
>  
> +static void readCallGraph(MemoryBufferRef MB) {
> +  // Build a map from symbol name to section
> +  DenseMap<StringRef, InputSectionBase *> SymbolSection;
> +  for (InputFile *File : ObjectFiles)
> +    for (Symbol *Sym : File->getSymbols())
> +      if (auto *D = dyn_cast<Defined>(Sym))
> +        if (auto *IS = dyn_cast_or_null<InputSectionBase>(D->Section))
> +          SymbolSection[D->getName()] = IS;
> +
> +  std::vector<StringRef> Lines = args::getLines(MB);
> +  for (StringRef L : Lines) {
> +    SmallVector<StringRef, 3> Fields;
> +    L.split(Fields, ' ');
> +    if (Fields.size() != 3)
> +      fatal("parse error");
> +    uint64_t Count;
> +    if (!to_integer(Fields[2], Count))
> +      fatal("parse error");
> +    InputSectionBase *FromSec = SymbolSection.lookup(Fields[0]);
> +    InputSectionBase *ToSec = SymbolSection.lookup(Fields[1]);
> +    if (FromSec && ToSec)
> +      Config->CallGraphProfile[std::make_pair(FromSec, ToSec)] = Count;
> +  }
> +}
> +
>  static bool getCompressDebugSections(opt::InputArgList &Args) {
>    StringRef S = Args.getLastArgValue(OPT_compress_debug_sections, "none");
>    if (S == "none")
> @@ -1097,6 +1122,10 @@
>    // Apply symbol renames for -wrap.
>    Symtab->applySymbolWrap();
>  
> +  if (auto *Arg = Args.getLastArg(OPT_call_graph_ordering_file))
> +    if (Optional<MemoryBufferRef> Buffer = readFile(Arg->getValue()))
> +      readCallGraph(*Buffer);
> +
>    // Now that we have a complete list of input files.
>    // Beyond this point, no new files are added.
>    // Aggregate all input sections into one place.
> Index: ELF/Config.h
> ===================================================================
> --- ELF/Config.h
> +++ ELF/Config.h
> @@ -25,6 +25,7 @@
>  namespace elf {
>  
>  class InputFile;
> +class InputSectionBase;
>  
>  enum ELFKind {
>    ELFNoneKind,
> @@ -104,6 +105,9 @@
>    std::vector<SymbolVersion> VersionScriptGlobals;
>    std::vector<SymbolVersion> VersionScriptLocals;
>    std::vector<uint8_t> BuildIdVector;
> +  llvm::MapVector<std::pair<const InputSectionBase *, const InputSectionBase *>,
> +                  uint64_t>
> +      CallGraphProfile;
>    bool AllowMultipleDefinition;
>    bool AndroidPackDynRelocs = false;
>    bool ARMHasBlx = false;
> Index: ELF/CallGraphSort.h
> ===================================================================
> --- /dev/null
> +++ ELF/CallGraphSort.h
> @@ -0,0 +1,23 @@
> +//===- CallGraphSort.h ------------------------------------------*- C++ -*-===//
> +//
> +//                             The LLVM Linker
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#ifndef LLD_ELF_CALL_GRAPH_SORT_H
> +#define LLD_ELF_CALL_GRAPH_SORT_H
> +
> +#include "llvm/ADT/DenseMap.h"
> +
> +namespace lld {
> +namespace elf {
> +class InputSectionBase;
> +
> +llvm::DenseMap<const InputSectionBase *, int> computeCallGraphProfileOrder();
> +} // namespace elf
> +} // namespace lld
> +
> +#endif
> Index: ELF/CallGraphSort.cpp
> ===================================================================
> --- /dev/null
> +++ ELF/CallGraphSort.cpp
> @@ -0,0 +1,379 @@
> +//===- CallGraphSort.cpp --------------------------------------------------===//
> +//
> +//                             The LLVM Linker
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +///
> +/// Implementation of Call-Chain Clustering from: Optimizing Function Placement
> +/// for Large-Scale Data-Center Applications
> +/// https://research.fb.com/wp-content/uploads/2017/01/cgo2017-hfsort-final1.pdf
> +///
> +/// The goal of this algorithm is to improve runtime performance of the final
> +/// executable by arranging code sections such that page table and i-cache
> +/// misses are minimized.
> +///
> +/// Definitions:
> +/// * Cluster
> +///   * An ordered list of input sections which are layed out as a unit. At the
> +///     beginning of the algorithm each input section has its own cluster and
> +///     the weight of the cluster is the sum of the weight of all incomming
> +///     edges.
> +/// * Call-Chain Clustering (Cウ) Heuristic
> +///   * Defines when and how clusters are combined. Pick the highest weight edge
> +///     from cluster _u_ to _v_ then move the sections in _v_ and append them to
> +///     _u_ unless the combined size would be larger than the page size.
> +/// * Density
> +///   * The weight of the cluster divided by the size of the cluster. This is a
> +///     proxy for the ammount of execution time spent per byte of the cluster.
> +///
> +/// It does so given a call graph profile by the following:
> +/// * Build a call graph from the profile
> +/// * While there are unresolved edges
> +///   * Find the edge with the highest weight
> +///   * Check if merging the two clusters would create a cluster larger than the
> +///     target page size
> +///   * If not, contract that edge putting the callee after the caller
> +/// * Sort remaining clusters by density
> +///
> +//===----------------------------------------------------------------------===//
> +
> +#include "CallGraphSort.h"
> +#include "OutputSections.h"
> +#include "SymbolTable.h"
> +#include "Symbols.h"
> +#include "Target.h"
> +
> +#include "llvm/Support/MathExtras.h"
> +
> +#include <queue>
> +#include <set>
> +#include <unordered_map>
> +
> +using namespace llvm;
> +using namespace lld;
> +using namespace lld::elf;
> +
> +namespace {
> +using NodeIndex = std::ptrdiff_t;
> +using EdgeIndex = std::ptrdiff_t;
> +
> +struct Edge;
> +
> +struct EdgePriorityCmp {
> +  std::vector<Edge> &Edges;
> +  bool operator()(EdgeIndex A, EdgeIndex B) const;
> +};
> +
> +using PriorityQueue = std::multiset<EdgeIndex, EdgePriorityCmp>;
> +
> +struct Node {
> +  Node() = default;
> +  Node(const InputSectionBase *IS);
> +  std::vector<const InputSectionBase *> Sections;
> +  std::vector<EdgeIndex> IncidentEdges;
> +  int64_t Size = 0;
> +  uint64_t Weight = 0;
> +};
> +
> +struct Edge {
> +  NodeIndex From;
> +  NodeIndex To;
> +  uint64_t Weight;
> +  PriorityQueue::iterator PriorityPos;
> +  bool operator==(const Edge Other) const;
> +  bool operator<(const Edge Other) const;
> +};
> +
> +bool EdgePriorityCmp::operator()(EdgeIndex A, EdgeIndex B) const {
> +  return Edges[A].Weight < Edges[B].Weight;
> +}
> +
> +struct EdgeDenseMapInfo {
> +  static Edge getEmptyKey() {
> +    return {DenseMapInfo<NodeIndex>::getEmptyKey(),
> +            DenseMapInfo<NodeIndex>::getEmptyKey(), 0,
> +            PriorityQueue::iterator()};
> +  }
> +  static Edge getTombstoneKey() {
> +    return {DenseMapInfo<NodeIndex>::getTombstoneKey(),
> +            DenseMapInfo<NodeIndex>::getTombstoneKey(), 0,
> +            PriorityQueue::iterator()};
> +  }
> +  static unsigned getHashValue(const Edge &Val) {
> +    return hash_combine(DenseMapInfo<NodeIndex>::getHashValue(Val.From),
> +                        DenseMapInfo<NodeIndex>::getHashValue(Val.To));
> +  }
> +  static bool isEqual(const Edge &LHS, const Edge &RHS) { return LHS == RHS; }
> +};
> +
> +class CallGraphSort {
> +public:
> +  CallGraphSort();
> +
> +  DenseMap<const InputSectionBase *, int> run();
> +
> +private:
> +  std::vector<Node> Nodes;
> +  std::vector<Edge> Edges;
> +
> +  PriorityQueue WorkQueue{EdgePriorityCmp{Edges}};
> +
> +  bool killEdge(EdgeIndex EI);
> +  void contractEdge(EdgeIndex CEI);
> +  void generateClusters();
> +};
> +} // end anonymous namespace
> +
> +Node::Node(const InputSectionBase *IS) {
> +  Sections.push_back(IS);
> +  Size = IS->getSize();
> +}
> +
> +bool Edge::operator==(const Edge Other) const {
> +  return From == Other.From && To == Other.To;
> +}
> +
> +bool Edge::operator<(const Edge Other) const {
> +  if (From != Other.From)
> +    return From < Other.From;
> +  return To < Other.To;
> +}
> +
> +static bool isKnownNonreorderableSection(const OutputSection *OS) {
> +  return llvm::StringSwitch<bool>(OS->Name)
> +      .Cases(".init", ".fini", ".init_array.", ".fini_array.", ".ctors.",
> +             ".dtors.", true)
> +      .Default(false);
> +}
> +
> +// Take the edge list in Config->CallGraphProfile, resolve symbol names to
> +// Symbols, and generate a graph between InputSections with the provided
> +// weights.
> +CallGraphSort::CallGraphSort() {
> +  MapVector<std::pair<const InputSectionBase *, const InputSectionBase *>,
> +            uint64_t> &Profile = Config->CallGraphProfile;
> +  DenseMap<const InputSectionBase *, NodeIndex> SecToNode;
> +  DenseMap<Edge, EdgeIndex, EdgeDenseMapInfo> EdgeMap;
> +
> +  auto GetOrCreateNode = [&](const InputSectionBase *IS) -> NodeIndex {
> +    auto Res = SecToNode.insert(std::make_pair(IS, Nodes.size()));
> +    if (Res.second)
> +      Nodes.emplace_back(IS);
> +    return Res.first->second;
> +  };
> +
> +  // Create the graph.
> +  for (const auto &C : Profile) {
> +    const InputSectionBase *FromSB = C.first.first;
> +    const InputSectionBase *ToSB = C.first.second;
> +    uint64_t Weight = C.second;
> +
> +    if (Weight == 0)
> +      continue;
> +
> +    // Ignore edges with input sections from different output sections.  This is
> +    // done so that we don't form clusters which contain input sections which
> +    // can't be laid out adjacently.  This also prevents accidental reordering
> +    // of input sections due to getting assigned an order in the order map.
> +    if (FromSB->getOutputSection() != ToSB->getOutputSection())
> +      continue;
> +
> +    if (isKnownNonreorderableSection(FromSB->getOutputSection()))
> +      continue;
> +
> +    NodeIndex From = GetOrCreateNode(FromSB);
> +    NodeIndex To = GetOrCreateNode(ToSB);
> +
> +    Nodes[To].Weight = SaturatingAdd(Nodes[To].Weight, Weight);
> +
> +    // Reject self edges after adding node weight.  This ends up with the same
> +    // weight as if we had merged the nodes.
> +    if (From == To)
> +      continue;
> +
> +    Edge E{From, To, Weight, WorkQueue.end()};
> +
> +    // Add or increment an edge
> +    auto Res = EdgeMap.insert(std::make_pair(E, Edges.size()));
> +    EdgeIndex EI = Res.first->second;
> +    if (Res.second) {
> +      Edges.push_back(E);
> +      Nodes[From].IncidentEdges.push_back(EI);
> +      Nodes[To].IncidentEdges.push_back(EI);
> +    } else
> +      Edges[EI].Weight = SaturatingAdd(Edges[EI].Weight, Weight);
> +  }
> +}
> +
> +/// Like std::unique, but calls Merge on equal values. Merge is allowed
> +/// to modifiy its first argument.
> +///
> +/// Merge is a callable with signature
> +///   Merge(*declval<ForwardIt>(), *declval<ForwardIt>())
> +///
> +/// Example:
> +///
> +///   int a[] = {1, 2, 2, 3, 4, 5, 5};
> +///   auto end = merge_unique(std::begin(a), std::end(a),
> +///     [](int a, int b) { return a == b; },
> +///     [](int &a, int b) { a += b; });
> +///
> +///   for (auto i = a; i != end; ++i)
> +///     std::cout << *i << " ";
> +///
> +///   -- 1 4 3 4 10
> +template <class ForwardIt, class PredTy, class MergeTy>
> +static ForwardIt merge_unique(ForwardIt First, ForwardIt Last, PredTy Pred,
> +                              MergeTy Merge) {
> +  if (First == Last)
> +    return Last;
> +
> +  ForwardIt I = First;
> +  while (++I != Last) {
> +    if (Pred(*First, *I))
> +      Merge(*First, *I);
> +    else if (++First != I)
> +      *First = std::move(*I);
> +  }
> +  return ++First;
> +}
> +
> +/// Marks an edge as head and removes it from the work queue.
> +/// Returns true if the edge was killed, false if it was already dead.
> +bool CallGraphSort::killEdge(EdgeIndex EI) {
> +  Edge &E = Edges[EI];
> +  if (E.PriorityPos != WorkQueue.end()) {
> +    WorkQueue.erase(E.PriorityPos);
> +    E.PriorityPos = WorkQueue.end();
> +    return true;
> +  }
> +  return false;
> +}
> +
> +/// Remove edge \p CEI from the graph while simultaneously merging its two
> +/// incident vertices u and v. This merges any duplicate edges between u and v
> +/// by accumulating their weights.
> +void CallGraphSort::contractEdge(EdgeIndex CEI) {
> +  // Make a copy of the edge as the original will be marked killed while being
> +  // used.
> +  Edge CE = Edges[CEI];
> +  assert(CE.From != CE.To && "Got self edge!");
> +  std::vector<EdgeIndex> &FE = Nodes[CE.From].IncidentEdges;
> +
> +  // Remove the self edge from From.
> +  FE.erase(std::remove(FE.begin(), FE.end(), CEI));
> +  std::vector<EdgeIndex> &TE = Nodes[CE.To].IncidentEdges;
> +
> +  // Update all edges incident with To to reference From instead. Then if they
> +  // aren't self edges add them to From.
> +  for (EdgeIndex EI : TE) {
> +    Edge &E = Edges[EI];
> +    if (E.From == CE.To)
> +      E.From = CE.From;
> +    if (E.To == CE.To)
> +      E.To = CE.From;
> +    if (E.To == E.From) {
> +      killEdge(EI);
> +      continue;
> +    }
> +    FE.push_back(EI);
> +  }
> +
> +  // Free memory. Otherwise we end up with N^2 memory usage.
> +  std::vector<EdgeIndex>().swap(TE);
> +
> +  if (FE.empty())
> +    return;
> +
> +  // Sort edges so they can be merged. The stability of this sort doesn't matter
> +  // as equal edges will be merged in an order independent manner.
> +  std::sort(FE.begin(), FE.end(),
> +            [&](EdgeIndex AI, EdgeIndex BI) { return Edges[AI] < Edges[BI]; });
> +
> +  FE.erase(merge_unique(FE.begin(), FE.end(),
> +                        [&](EdgeIndex AI, EdgeIndex BI) {
> +                          return Edges[AI] == Edges[BI];
> +                        },
> +                        [&](EdgeIndex AI, EdgeIndex BI) {
> +                          Edge &A = Edges[AI];
> +                          Edge &B = Edges[BI];
> +                          killEdge(BI);
> +                          bool Restore = killEdge(AI);
> +                          A.Weight = SaturatingAdd(A.Weight, B.Weight);
> +                          if (Restore)
> +                            A.PriorityPos = WorkQueue.insert(AI);
> +                        }),
> +           FE.end());
> +}
> +
> +// Group InputSections into clusters using the Call-Chain Clustering heuristic
> +// then sort the clusters by density.
> +void CallGraphSort::generateClusters() {
> +  for (size_t I = 0; I < Edges.size(); ++I) {
> +    Edges[I].PriorityPos = WorkQueue.insert(I);
> +  }
> +
> +  // Collapse the graph.
> +  while (!WorkQueue.empty()) {
> +    PriorityQueue::const_iterator I = --WorkQueue.end();
> +    EdgeIndex MaxI = *I;
> +    const Edge MaxE = Edges[MaxI];
> +    killEdge(MaxI);
> +    // Merge the Nodes.
> +    Node &From = Nodes[MaxE.From];
> +    Node &To = Nodes[MaxE.To];
> +    if (From.Size + To.Size > Target->PageSize)
> +      continue;
> +    contractEdge(MaxI);
> +    From.Sections.insert(From.Sections.end(), To.Sections.begin(),
> +                         To.Sections.end());
> +    From.Size += To.Size;
> +    From.Weight = SaturatingAdd(From.Weight, To.Weight);
> +    To.Sections.clear();
> +    To.Size = 0;
> +    To.Weight = 0;
> +  }
> +
> +  // Remove empty or dead nodes.
> +  Nodes.erase(std::remove_if(Nodes.begin(), Nodes.end(),
> +                             [](const Node &N) {
> +                               return N.Size == 0 || N.Sections.empty();
> +                             }),
> +              Nodes.end());
> +
> +  // Sort by density. Invalidates all NodeIndexs.
> +  std::sort(Nodes.begin(), Nodes.end(), [](const Node &A, const Node &B) {
> +    return (APFloat(APFloat::IEEEdouble(), A.Weight) /
> +            APFloat(APFloat::IEEEdouble(), A.Size))
> +               .compare(APFloat(APFloat::IEEEdouble(), B.Weight) /
> +                        APFloat(APFloat::IEEEdouble(), B.Size)) ==
> +           APFloat::cmpLessThan;
> +  });
> +}
> +
> +DenseMap<const InputSectionBase *, int> CallGraphSort::run() {
> +  generateClusters();
> +
> +  // Generate order.
> +  llvm::DenseMap<const InputSectionBase *, int> OrderMap;
> +  ssize_t CurOrder = 1;
> +
> +  for (const Node &N : Nodes)
> +    for (const InputSectionBase *IS : N.Sections)
> +      OrderMap[IS] = CurOrder++;
> +
> +  return OrderMap;
> +}
> +
> +// Sort sections by the profile data provided by -callgraph-profile-file
> +//
> +// This first builds a call graph based on the profile data then iteratively
> +// merges the hottest call edges as long as it would not create a cluster larger
> +// than the page size. All clusters are then sorted by a density metric to
> +// further improve locality.
> +DenseMap<const InputSectionBase *, int> elf::computeCallGraphProfileOrder() {
> +  return CallGraphSort().run();
> +}
> Index: ELF/CMakeLists.txt
> ===================================================================
> --- ELF/CMakeLists.txt
> +++ ELF/CMakeLists.txt
> @@ -19,6 +19,7 @@
>    Arch/SPARCV9.cpp
>    Arch/X86.cpp
>    Arch/X86_64.cpp
> +  CallGraphSort.cpp
>    Driver.cpp
>    DriverUtils.cpp
>    EhFrame.cpp


More information about the llvm-commits mailing list