[lld] 6db04b9 - [lld-macho] Port CallGraphSort from COFF/ELF

Nico Weber via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 12 07:57:54 PST 2022


Author: Leonard Grey
Date: 2022-01-12T10:47:04-05:00
New Revision: 6db04b97e6a2873cbf1252b7b6b7efedf3fceb4e

URL: https://github.com/llvm/llvm-project/commit/6db04b97e6a2873cbf1252b7b6b7efedf3fceb4e
DIFF: https://github.com/llvm/llvm-project/commit/6db04b97e6a2873cbf1252b7b6b7efedf3fceb4e.diff

LOG: [lld-macho] Port CallGraphSort from COFF/ELF

Depends on D112160

This adds the new options `--call-graph-profile-sort` (default),
`--no-call-graph-profile-sort` and `--print-symbol-order=`. If call graph
profile sorting is enabled, reads `__LLVM,__cg_profile` sections from object
files and uses the resulting graph to put callees and callers close to each
other in the final binary via the C3 clustering heuristic.

Differential Revision: https://reviews.llvm.org/D112164

Added: 
    lld/MachO/CallGraphSort.cpp
    lld/MachO/CallGraphSort.h
    lld/test/MachO/cgprofile-icf.s
    lld/test/MachO/cgprofile-obj.s
    lld/test/MachO/cgprofile-print.s

Modified: 
    lld/MachO/CMakeLists.txt
    lld/MachO/Config.h
    lld/MachO/Driver.cpp
    lld/MachO/InputFiles.cpp
    lld/MachO/InputFiles.h
    lld/MachO/InputSection.h
    lld/MachO/Options.td
    lld/MachO/Writer.cpp
    llvm/utils/gn/secondary/lld/MachO/BUILD.gn

Removed: 
    


################################################################################
diff  --git a/lld/MachO/CMakeLists.txt b/lld/MachO/CMakeLists.txt
index abc9e473ef515..61f5c70005b9f 100644
--- a/lld/MachO/CMakeLists.txt
+++ b/lld/MachO/CMakeLists.txt
@@ -10,6 +10,7 @@ add_lld_library(lldMachO
   Arch/ARM64Common.cpp
   Arch/ARM64_32.cpp
   Arch/X86_64.cpp
+  CallGraphSort.cpp
   ConcatOutputSection.cpp
   Driver.cpp
   DriverUtils.cpp

diff  --git a/lld/MachO/CallGraphSort.cpp b/lld/MachO/CallGraphSort.cpp
new file mode 100644
index 0000000000000..7a0192ea691e0
--- /dev/null
+++ b/lld/MachO/CallGraphSort.cpp
@@ -0,0 +1,252 @@
+//===- CallGraphSort.cpp --------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// This is based on the ELF port, see ELF/CallGraphSort.cpp for the details
+/// about the algorithm.
+///
+//===----------------------------------------------------------------------===//
+
+#include "CallGraphSort.h"
+#include "Config.h"
+#include "InputFiles.h"
+#include "Symbols.h"
+#include "Target.h"
+#include "lld/Common/ErrorHandler.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/Support/TimeProfiler.h"
+#include "llvm/Support/raw_ostream.h"
+#include <numeric>
+
+using namespace llvm;
+using namespace lld;
+using namespace lld::macho;
+
+namespace {
+struct Edge {
+  int from;
+  uint64_t weight;
+};
+
+struct Cluster {
+  Cluster(int sec, size_t s) : next(sec), prev(sec), size(s) {}
+
+  double getDensity() const {
+    if (size == 0)
+      return 0;
+    return double(weight) / double(size);
+  }
+
+  int next;
+  int prev;
+  uint64_t size;
+  uint64_t weight = 0;
+  uint64_t initialWeight = 0;
+  Edge bestPred = {-1, 0};
+};
+
+class CallGraphSort {
+public:
+  CallGraphSort();
+
+  DenseMap<const InputSection *, size_t> run();
+
+private:
+  std::vector<Cluster> clusters;
+  std::vector<const InputSection *> sections;
+};
+// Maximum amount the combined cluster density can be worse than the original
+// cluster to consider merging.
+constexpr int MAX_DENSITY_DEGRADATION = 8;
+} // end anonymous namespace
+
+using SectionPair = std::pair<const InputSection *, const InputSection *>;
+
+// Take the edge list in config->callGraphProfile, resolve symbol names to
+// Symbols, and generate a graph between InputSections with the provided
+// weights.
+CallGraphSort::CallGraphSort() {
+  MapVector<SectionPair, uint64_t> &profile = config->callGraphProfile;
+  DenseMap<const InputSection *, int> secToCluster;
+
+  auto getOrCreateCluster = [&](const InputSection *isec) -> int {
+    auto res = secToCluster.try_emplace(isec, clusters.size());
+    if (res.second) {
+      sections.push_back(isec);
+      clusters.emplace_back(clusters.size(), isec->getSize());
+    }
+    return res.first->second;
+  };
+
+  // Create the graph
+  for (std::pair<SectionPair, uint64_t> &c : profile) {
+    const auto fromSec = c.first.first->canonical();
+    const auto toSec = c.first.second->canonical();
+    uint64_t weight = c.second;
+    // Ignore edges between input sections belonging to 
diff erent output
+    // sections.  This is done because otherwise we would end up with clusters
+    // containing input sections that can't actually be placed adjacently in the
+    // output.  This messes with the cluster size and density calculations.  We
+    // would also end up moving input sections in other output sections without
+    // moving them closer to what calls them.
+    if (fromSec->parent != toSec->parent)
+      continue;
+
+    int from = getOrCreateCluster(fromSec);
+    int to = getOrCreateCluster(toSec);
+
+    clusters[to].weight += weight;
+
+    if (from == to)
+      continue;
+
+    // Remember the best edge.
+    Cluster &toC = clusters[to];
+    if (toC.bestPred.from == -1 || toC.bestPred.weight < weight) {
+      toC.bestPred.from = from;
+      toC.bestPred.weight = weight;
+    }
+  }
+  for (Cluster &c : clusters)
+    c.initialWeight = c.weight;
+}
+
+// It's bad to merge clusters which would degrade the density too much.
+static bool isNewDensityBad(Cluster &a, Cluster &b) {
+  double newDensity = double(a.weight + b.weight) / double(a.size + b.size);
+  return newDensity < a.getDensity() / MAX_DENSITY_DEGRADATION;
+}
+
+// Find the leader of V's belonged cluster (represented as an equivalence
+// class). We apply union-find path-halving technique (simple to implement) in
+// the meantime as it decreases depths and the time complexity.
+static int getLeader(std::vector<int> &leaders, int v) {
+  while (leaders[v] != v) {
+    leaders[v] = leaders[leaders[v]];
+    v = leaders[v];
+  }
+  return v;
+}
+
+static void mergeClusters(std::vector<Cluster> &cs, Cluster &into, int intoIdx,
+                          Cluster &from, int fromIdx) {
+  int tail1 = into.prev, tail2 = from.prev;
+  into.prev = tail2;
+  cs[tail2].next = intoIdx;
+  from.prev = tail1;
+  cs[tail1].next = fromIdx;
+  into.size += from.size;
+  into.weight += from.weight;
+  from.size = 0;
+  from.weight = 0;
+}
+
+// Group InputSections into clusters using the Call-Chain Clustering heuristic
+// then sort the clusters by density.
+DenseMap<const InputSection *, size_t> CallGraphSort::run() {
+  const uint64_t maxClusterSize = target->getPageSize();
+
+  // Cluster indices sorted by density.
+  std::vector<int> sorted(clusters.size());
+  // For union-find.
+  std::vector<int> leaders(clusters.size());
+
+  std::iota(leaders.begin(), leaders.end(), 0);
+  std::iota(sorted.begin(), sorted.end(), 0);
+
+  llvm::stable_sort(sorted, [&](int a, int b) {
+    return clusters[a].getDensity() > clusters[b].getDensity();
+  });
+
+  for (int l : sorted) {
+    // The cluster index is the same as the index of its leader here because
+    // clusters[L] has not been merged into another cluster yet.
+    Cluster &c = clusters[l];
+
+    // Don't consider merging if the edge is unlikely.
+    if (c.bestPred.from == -1 || c.bestPred.weight * 10 <= c.initialWeight)
+      continue;
+
+    int predL = getLeader(leaders, c.bestPred.from);
+    // Already in the same cluster.
+    if (l == predL)
+      continue;
+
+    Cluster *predC = &clusters[predL];
+    if (c.size + predC->size > maxClusterSize)
+      continue;
+
+    if (isNewDensityBad(*predC, c))
+      continue;
+
+    leaders[l] = predL;
+    mergeClusters(clusters, *predC, predL, c, l);
+  }
+  // Sort remaining non-empty clusters by density.
+  sorted.clear();
+  for (int i = 0, e = (int)clusters.size(); i != e; ++i)
+    if (clusters[i].size > 0)
+      sorted.push_back(i);
+  llvm::stable_sort(sorted, [&](int a, int b) {
+    return clusters[a].getDensity() > clusters[b].getDensity();
+  });
+
+  DenseMap<const InputSection *, size_t> orderMap;
+
+  // Sections will be sorted by decreasing order. Absent sections will have
+  // priority 0 and be placed at the end of sections.
+  // NB: This is opposite from COFF/ELF to be compatible with the existing
+  // order-file code.
+  int curOrder = clusters.size();
+  for (int leader : sorted) {
+    for (int i = leader;;) {
+      orderMap[sections[i]] = curOrder--;
+      i = clusters[i].next;
+      if (i == leader)
+        break;
+    }
+  }
+  if (!config->printSymbolOrder.empty()) {
+    std::error_code ec;
+    raw_fd_ostream os(config->printSymbolOrder, ec, sys::fs::OF_None);
+    if (ec) {
+      error("cannot open " + config->printSymbolOrder + ": " + ec.message());
+      return orderMap;
+    }
+    // Print the symbols ordered by C3, in the order of decreasing curOrder
+    // Instead of sorting all the orderMap, just repeat the loops above.
+    for (int leader : sorted)
+      for (int i = leader;;) {
+        const InputSection *isec = sections[i];
+        // Search all the symbols in the file of the section
+        // and find out a Defined symbol with name that is within the
+        // section.
+        for (Symbol *sym : isec->getFile()->symbols) {
+          if (auto *d = dyn_cast_or_null<Defined>(sym)) {
+            if (d->isec == isec)
+              os << sym->getName() << "\n";
+          }
+        }
+        i = clusters[i].next;
+        if (i == leader)
+          break;
+      }
+  }
+
+  return orderMap;
+}
+
+// Sort sections by the profile data provided by __LLVM,__cg_profile sections.
+//
+// This first builds a call graph based on the profile data then merges sections
+// according to the C³ heuristic. All clusters are then sorted by a density
+// metric to further improve locality.
+DenseMap<const InputSection *, size_t> macho::computeCallGraphProfileOrder() {
+  TimeTraceScope timeScope("Call graph profile sort");
+  return CallGraphSort().run();
+}

diff  --git a/lld/MachO/CallGraphSort.h b/lld/MachO/CallGraphSort.h
new file mode 100644
index 0000000000000..034f54a260899
--- /dev/null
+++ b/lld/MachO/CallGraphSort.h
@@ -0,0 +1,22 @@
+//===- CallGraphSort.h ------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLD_MACHO_CALL_GRAPH_SORT_H
+#define LLD_MACHO_CALL_GRAPH_SORT_H
+
+#include "InputSection.h"
+#include "llvm/ADT/DenseMap.h"
+
+namespace lld {
+namespace macho {
+
+llvm::DenseMap<const InputSection *, size_t> computeCallGraphProfileOrder();
+} // namespace macho
+} // namespace lld
+
+#endif

diff  --git a/lld/MachO/Config.h b/lld/MachO/Config.h
index 8c7f16c3d5533..7a1c15472f74a 100644
--- a/lld/MachO/Config.h
+++ b/lld/MachO/Config.h
@@ -12,6 +12,7 @@
 #include "llvm/ADT/CachedHashString.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/StringSet.h"
 #include "llvm/BinaryFormat/MachO.h"
@@ -27,6 +28,7 @@
 namespace lld {
 namespace macho {
 
+class InputSection;
 class Symbol;
 struct SymbolPriorityEntry;
 
@@ -169,6 +171,12 @@ struct Configuration {
   std::vector<SegmentProtection> segmentProtections;
 
   llvm::DenseMap<llvm::StringRef, SymbolPriorityEntry> priorities;
+  llvm::MapVector<std::pair<const InputSection *, const InputSection *>,
+                  uint64_t>
+      callGraphProfile;
+  bool callGraphProfileSort = false;
+  llvm::StringRef printSymbolOrder;
+
   SectionRenameMap sectionRenameMap;
   SegmentRenameMap segmentRenameMap;
 

diff  --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index e1371dbd72985..92946eb41e1a6 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -1057,6 +1057,25 @@ static void gatherInputSections() {
   assert(inputOrder <= UnspecifiedInputOrder);
 }
 
+static void extractCallGraphProfile() {
+  TimeTraceScope timeScope("Extract call graph profile");
+  for (const InputFile *file : inputFiles) {
+    auto *obj = dyn_cast_or_null<ObjFile>(file);
+    if (!obj)
+      continue;
+    for (const CallGraphEntry &entry : obj->callGraph) {
+      assert(entry.fromIndex < obj->symbols.size() &&
+             entry.toIndex < obj->symbols.size());
+      auto *fromSym = dyn_cast_or_null<Defined>(obj->symbols[entry.fromIndex]);
+      auto *toSym = dyn_cast_or_null<Defined>(obj->symbols[entry.toIndex]);
+
+      if (!fromSym || !toSym)
+        continue;
+      config->callGraphProfile[{fromSym->isec, toSym->isec}] += entry.count;
+    }
+  }
+}
+
 static void foldIdenticalLiterals() {
   // We always create a cStringSection, regardless of whether dedupLiterals is
   // true. If it isn't, we simply create a non-deduplicating CStringSection.
@@ -1266,6 +1285,9 @@ bool macho::link(ArrayRef<const char *> argsArr, bool canExitEarly,
                           config->icfLevel != ICFLevel::none;
   config->warnDylibInstallName = args.hasFlag(
       OPT_warn_dylib_install_name, OPT_no_warn_dylib_install_name, false);
+  config->callGraphProfileSort = args.hasFlag(
+      OPT_call_graph_profile_sort, OPT_no_call_graph_profile_sort, true);
+  config->printSymbolOrder = args.getLastArgValue(OPT_print_symbol_order);
 
   // FIXME: Add a commandline flag for this too.
   config->zeroModTime = getenv("ZERO_AR_DATE");
@@ -1458,8 +1480,10 @@ bool macho::link(ArrayRef<const char *> argsArr, bool canExitEarly,
     replaceCommonSymbols();
 
     StringRef orderFile = args.getLastArgValue(OPT_order_file);
-    if (!orderFile.empty())
+    if (!orderFile.empty()) {
       parseOrderFile(orderFile);
+      config->callGraphProfileSort = false;
+    }
 
     referenceStubBinder();
 
@@ -1509,6 +1533,8 @@ bool macho::link(ArrayRef<const char *> argsArr, bool canExitEarly,
     }
 
     gatherInputSections();
+    if (config->callGraphProfileSort)
+      extractCallGraphProfile();
 
     if (config->deadStrip)
       markLive();

diff  --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index 74bd463706c2e..cfe3c41cb1602 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -63,10 +63,12 @@
 #include "llvm/ADT/iterator.h"
 #include "llvm/BinaryFormat/MachO.h"
 #include "llvm/LTO/LTO.h"
+#include "llvm/Support/BinaryStreamReader.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/TarWriter.h"
+#include "llvm/Support/TimeProfiler.h"
 #include "llvm/TextAPI/Architecture.h"
 #include "llvm/TextAPI/InterfaceFile.h"
 
@@ -325,6 +327,25 @@ void ObjFile::parseSections(ArrayRef<SectionHeader> sectionHeaders) {
       if (name == section_names::compactUnwind)
         compactUnwindSection = &sections.back();
     } else if (segname == segment_names::llvm) {
+      if (name == "__cg_profile" && config->callGraphProfileSort) {
+        TimeTraceScope timeScope("Parsing call graph section");
+        BinaryStreamReader reader(data, support::little);
+        while (!reader.empty()) {
+          uint32_t fromIndex, toIndex;
+          uint64_t count;
+          if (Error err = reader.readInteger(fromIndex))
+            fatal(toString(this) + ": Expected 32-bit integer");
+          if (Error err = reader.readInteger(toIndex))
+            fatal(toString(this) + ": Expected 32-bit integer");
+          if (Error err = reader.readInteger(count))
+            fatal(toString(this) + ": Expected 64-bit integer");
+          callGraph.emplace_back();
+          CallGraphEntry &entry = callGraph.back();
+          entry.fromIndex = fromIndex;
+          entry.toIndex = toIndex;
+          entry.count = count;
+        }
+      }
       // ld64 does not appear to emit contents from sections within the __LLVM
       // segment. Symbols within those sections point to bitcode metadata
       // instead of actual symbols. Global symbols within those sections could

diff  --git a/lld/MachO/InputFiles.h b/lld/MachO/InputFiles.h
index 36da70011170c..a29e688c58d1d 100644
--- a/lld/MachO/InputFiles.h
+++ b/lld/MachO/InputFiles.h
@@ -65,6 +65,16 @@ struct Section {
   Section(uint64_t addr) : address(addr){};
 };
 
+// Represents a call graph profile edge.
+struct CallGraphEntry {
+  // The index of the caller in the symbol table.
+  uint32_t fromIndex;
+  // The index of the callee in the symbol table.
+  uint32_t toIndex;
+  // Number of calls from callee to caller in the profile.
+  uint64_t count;
+};
+
 class InputFile {
 public:
   enum Kind {
@@ -115,6 +125,7 @@ class ObjFile final : public InputFile {
   llvm::DWARFUnit *compileUnit = nullptr;
   const uint32_t modTime;
   std::vector<ConcatInputSection *> debugSections;
+  std::vector<CallGraphEntry> callGraph;
 
 private:
   Section *compactUnwindSection = nullptr;

diff  --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h
index 58f794227b617..5535e871c539d 100644
--- a/lld/MachO/InputSection.h
+++ b/lld/MachO/InputSection.h
@@ -53,6 +53,7 @@ class InputSection {
   virtual bool isLive(uint64_t off) const = 0;
   virtual void markLive(uint64_t off) = 0;
   virtual InputSection *canonical() { return this; }
+  virtual const InputSection *canonical() const { return this; }
 
   OutputSection *parent = nullptr;
 
@@ -124,6 +125,9 @@ class ConcatInputSection final : public InputSection {
   ConcatInputSection *canonical() override {
     return replacement ? replacement : this;
   }
+  const InputSection *canonical() const override {
+    return replacement ? replacement : this;
+  }
 
   static bool classof(const InputSection *isec) {
     return isec->kind() == ConcatKind;

diff  --git a/lld/MachO/Options.td b/lld/MachO/Options.td
index 7cd4d01a0a5a6..b4b9ca6582eec 100644
--- a/lld/MachO/Options.td
+++ b/lld/MachO/Options.td
@@ -79,6 +79,15 @@ def no_warn_dylib_install_name: Joined<["--"], "no-warn-dylib-install-name">,
 def warn_dylib_install_name: Joined<["--"], "warn-dylib-install-name">,
     HelpText<"Warn on -install-name if -dylib is not passed">,
     Group<grp_lld>;
+def call_graph_profile_sort: Flag<["--"], "call-graph-profile-sort">,
+    HelpText<"Reorder sections with call graph profile (default)">,
+    Group<grp_lld>;
+def no_call_graph_profile_sort : Flag<["--"], "no-call-graph-profile-sort">,
+    HelpText<"Do not reorder sections with call graph profile">,
+    Group<grp_lld>;
+def print_symbol_order: Joined<["--"], "print-symbol-order=">,
+    HelpText<"Print a symbol order specified by --call-graph-profile-sort into the specified file">,
+    Group<grp_lld>;
 
 // This is a complete Options.td compiled from Apple's ld(1) manpage
 // dated 2018-03-07 and cross checked with ld64 source code in repo

diff  --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
index c633dbb027726..efa0c9d90cb40 100644
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "Writer.h"
+#include "CallGraphSort.h"
 #include "ConcatOutputSection.h"
 #include "Config.h"
 #include "InputFiles.h"
@@ -865,6 +866,8 @@ static size_t getSymbolPriority(const SymbolPriorityEntry &entry,
 // Each section gets assigned the priority of the highest-priority symbol it
 // contains.
 static DenseMap<const InputSection *, size_t> buildInputSectionPriorities() {
+  if (config->callGraphProfileSort)
+    return computeCallGraphProfileOrder();
   DenseMap<const InputSection *, size_t> sectionPriorities;
 
   if (config->priorities.empty())

diff  --git a/lld/test/MachO/cgprofile-icf.s b/lld/test/MachO/cgprofile-icf.s
new file mode 100644
index 0000000000000..3d34b9336790a
--- /dev/null
+++ b/lld/test/MachO/cgprofile-icf.s
@@ -0,0 +1,46 @@
+# REQUIRES: x86
+
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t
+
+# RUN: %lld -e A %t -o %t.out --icf=all
+# RUN: llvm-nm --numeric-sort %t.out | FileCheck %s
+# RUN: %lld -e A %t -o %t2.out
+# RUN: llvm-nm --numeric-sort %t2.out | FileCheck %s --check-prefix=NOICF
+
+.text
+    .globl  D
+D:
+    mov $60, %rax
+    retq
+
+    .globl  C
+C:
+    mov $60, %rax
+    retq
+
+    .globl  B
+B:
+    mov $2, %rax
+    retq
+
+    .globl  A
+A:
+    mov $42, %rax
+    retq
+
+.cg_profile A, B, 100
+.cg_profile A, C,  40
+.cg_profile C, D,  61
+
+.subsections_via_symbols
+
+# CHECK:      0000000100000290 T A
+# CHECK-NEXT: 0000000100000298 T C
+# CHECK-NEXT: 0000000100000298 T D
+# CHECK-NEXT: 00000001000002a0 T B
+
+# NOICF:      0000000100000290 T A
+# NOICF-NEXT: 0000000100000298 T B
+# NOICF-NEXT: 00000001000002a0 T C
+# NOICF-NEXT: 00000001000002a8 T D
+

diff  --git a/lld/test/MachO/cgprofile-obj.s b/lld/test/MachO/cgprofile-obj.s
new file mode 100644
index 0000000000000..97aaf2de8c18d
--- /dev/null
+++ b/lld/test/MachO/cgprofile-obj.s
@@ -0,0 +1,42 @@
+# REQUIRES: x86
+
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o
+# RUN: %lld -lSystem -e A -o %t.out %t.o
+# RUN: llvm-nm --numeric-sort %t.out | FileCheck %s
+# RUN: %lld --no-call-graph-profile-sort -lSystem -e A -o %t.out %t.o
+# RUN: llvm-nm --numeric-sort %t.out | FileCheck %s --check-prefix=NO-CG
+
+.text
+
+D:
+  retq
+.globl C
+C:
+  retq
+.globl B
+B:
+  retq
+.globl A
+A:
+Aa:
+  retq
+.subsections_via_symbols
+
+
+
+    .cg_profile A, B, 10
+    .cg_profile A, B, 10
+    .cg_profile Aa, B, 80
+    .cg_profile A, C, 40
+    .cg_profile B, C, 30
+    .cg_profile C, D, 90
+
+# CHECK: 00000001000002c8 T A
+# CHECK: 00000001000002c9 T B
+# CHECK: 00000001000002ca T C
+# CHECK: 00000001000002cb t D
+
+# NO-CG: 00000001000002c8 t D
+# NO-CG: 00000001000002c9 T C
+# NO-CG: 00000001000002ca T B
+# NO-CG: 00000001000002cb T A

diff  --git a/lld/test/MachO/cgprofile-print.s b/lld/test/MachO/cgprofile-print.s
new file mode 100644
index 0000000000000..9435891695c9d
--- /dev/null
+++ b/lld/test/MachO/cgprofile-print.s
@@ -0,0 +1,34 @@
+# REQUIRES: x86
+
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t
+# RUN: %lld -e A %t -o %t2  --print-symbol-order=%t3
+# RUN: FileCheck %s --input-file %t3
+
+# CHECK: B
+# CHECK-NEXT: C
+# CHECK-NEXT: D
+# CHECK-NEXT: A
+
+.text
+.globl  A
+A:
+ nop
+
+.globl  B
+B:
+ nop
+
+.globl  C
+C:
+ nop
+
+.globl  D
+D:
+ nop
+
+.subsections_via_symbols
+
+.cg_profile A, B, 5
+.cg_profile B, C, 50
+.cg_profile C, D, 40
+.cg_profile D, B, 10

diff  --git a/llvm/utils/gn/secondary/lld/MachO/BUILD.gn b/llvm/utils/gn/secondary/lld/MachO/BUILD.gn
index 086a71a5b9481..4d8a050dcd8c6 100644
--- a/llvm/utils/gn/secondary/lld/MachO/BUILD.gn
+++ b/llvm/utils/gn/secondary/lld/MachO/BUILD.gn
@@ -27,6 +27,7 @@ static_library("MachO") {
     "Arch/ARM64Common.cpp",
     "Arch/ARM64_32.cpp",
     "Arch/X86_64.cpp",
+    "CallGraphSort.cpp",
     "ConcatOutputSection.cpp",
     "Driver.cpp",
     "DriverUtils.cpp",


        


More information about the llvm-commits mailing list