[llvm] [BOLT][NFCI] Use heuristic for matching split global functions (PR #90429)

Amir Ayupov via llvm-commits llvm-commits at lists.llvm.org
Wed May 8 13:59:26 PDT 2024


https://github.com/aaupov updated https://github.com/llvm/llvm-project/pull/90429

>From b7457cc79e1dcd44cfe1448b0a5f0abe3c66f398 Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov at fb.com>
Date: Sun, 28 Apr 2024 21:04:55 -0700
Subject: [PATCH] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20change?=
 =?UTF-8?q?s=20to=20main=20this=20commit=20is=20based=20on?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Created using spr 1.3.5

[skip ci]
---
 bolt/docs/BAT.md                              |   9 +-
 .../bolt/Profile/BoltAddressTranslation.h     |   6 +-
 bolt/include/bolt/Rewrite/RewriteInstance.h   |   3 +
 bolt/include/bolt/Utils/NameResolver.h        |  17 +-
 bolt/lib/Profile/BoltAddressTranslation.cpp   |  86 +++++------
 bolt/lib/Rewrite/RewriteInstance.cpp          | 145 ++++++++++++++----
 bolt/test/X86/bolt-address-translation.test   |   2 +-
 bolt/test/X86/cdsplit-symbol-names.s          |  15 +-
 bolt/test/X86/fragment-lite.s                 |  54 +++++--
 llvm/include/llvm/Object/ELFObjectFile.h      |   8 +
 10 files changed, 251 insertions(+), 94 deletions(-)

diff --git a/bolt/docs/BAT.md b/bolt/docs/BAT.md
index 7ffb5d7c00816..f23ef1abf8761 100644
--- a/bolt/docs/BAT.md
+++ b/bolt/docs/BAT.md
@@ -81,10 +81,9 @@ Hot indices are delta encoded, implicitly starting at zero.
 | `FuncHash` | 8b | Function hash for input function | Hot |
 | `NumBlocks` | ULEB128 | Number of basic blocks in the original function | Hot |
 | `NumSecEntryPoints` | ULEB128 | Number of secondary entry points in the original function | Hot |
-| `ColdInputSkew` | ULEB128 | Skew to apply to all input offsets | Cold |
 | `NumEntries` | ULEB128 | Number of address translation entries for a function | Both |
-| `EqualElems` | ULEB128 | Number of equal offsets in the beginning of a function | Both |
-| `BranchEntries` | Bitmask, `alignTo(EqualElems, 8)` bits | If `EqualElems` is non-zero, bitmask denoting entries with `BRANCHENTRY` bit | Both |
+| `EqualElems` | ULEB128 | Number of equal offsets in the beginning of a function | Hot |
+| `BranchEntries` | Bitmask, `alignTo(EqualElems, 8)` bits | If `EqualElems` is non-zero, bitmask denoting entries with `BRANCHENTRY` bit | Hot |
 
 Function header is followed by *Address Translation Table* with `NumEntries`
 total entries, and *Secondary Entry Points* table with `NumSecEntryPoints`
@@ -100,8 +99,8 @@ entry is encoded. Input offsets implicitly start at zero.
 | `BBHash` | Optional, 8b | Basic block hash in input binary | BB |
 | `BBIdx`  | Optional, Delta, ULEB128 | Basic block index in input binary | BB |
 
-The table omits the first `EqualElems` input offsets where the input offset
-equals output offset.
+For hot fragments, the table omits the first `EqualElems` input offsets
+where the input offset equals output offset.
 
 `BRANCHENTRY` bit denotes whether a given offset pair is a control flow source
 (branch or call instruction). If not set, it signifies a control flow target
diff --git a/bolt/include/bolt/Profile/BoltAddressTranslation.h b/bolt/include/bolt/Profile/BoltAddressTranslation.h
index 68b993ee363cc..eef05e8a0e681 100644
--- a/bolt/include/bolt/Profile/BoltAddressTranslation.h
+++ b/bolt/include/bolt/Profile/BoltAddressTranslation.h
@@ -149,9 +149,9 @@ class BoltAddressTranslation {
   /// entries in function address translation map.
   APInt calculateBranchEntriesBitMask(MapTy &Map, size_t EqualElems);
 
-  /// Calculate the number of equal offsets (output = input - skew) in the
-  /// beginning of the function.
-  size_t getNumEqualOffsets(const MapTy &Map, uint32_t Skew) const;
+  /// Calculate the number of equal offsets (output = input) in the beginning
+  /// of the function.
+  size_t getNumEqualOffsets(const MapTy &Map) const;
 
   std::map<uint64_t, MapTy> Maps;
 
diff --git a/bolt/include/bolt/Rewrite/RewriteInstance.h b/bolt/include/bolt/Rewrite/RewriteInstance.h
index f4bffba96b1d4..d37e6f5ed8595 100644
--- a/bolt/include/bolt/Rewrite/RewriteInstance.h
+++ b/bolt/include/bolt/Rewrite/RewriteInstance.h
@@ -494,6 +494,9 @@ class RewriteInstance {
   /// Store all non-zero symbols in this map for a quick address lookup.
   std::map<uint64_t, llvm::object::SymbolRef> FileSymRefs;
 
+  /// FILE symbols used for disambiguating split function parents.
+  std::vector<ELFSymbolRef> FileSymbols;
+
   std::unique_ptr<DWARFRewriter> DebugInfoRewriter;
 
   std::unique_ptr<BoltAddressTranslation> BAT;
diff --git a/bolt/include/bolt/Utils/NameResolver.h b/bolt/include/bolt/Utils/NameResolver.h
index 2e3ac20a532d7..ccffa5633245c 100644
--- a/bolt/include/bolt/Utils/NameResolver.h
+++ b/bolt/include/bolt/Utils/NameResolver.h
@@ -28,10 +28,23 @@ class NameResolver {
   static constexpr char Sep = '/';
 
 public:
-  /// Return unique version of the \p Name in the form "Name<Sep><Number>".
+  /// Return the number of uniquified versions of a given \p Name.
+  uint64_t getUniquifiedNameCount(StringRef Name) const {
+    if (Counters.contains(Name))
+      return Counters.at(Name);
+    return 0;
+  }
+
+  /// Return unique version of the \p Name in the form "Name<Sep><ID>".
+  std::string getUniqueName(StringRef Name, const uint64_t ID) const {
+    return (Name + Twine(Sep) + Twine(ID)).str();
+  }
+
+  /// Register new version of \p Name and return unique version in the form
+  /// "Name<Sep><Number>".
   std::string uniquify(StringRef Name) {
     const uint64_t ID = ++Counters[Name];
-    return (Name + Twine(Sep) + Twine(ID)).str();
+    return getUniqueName(Name, ID);
   }
 
   /// For uniquified \p Name, return the original form (that may no longer be
diff --git a/bolt/lib/Profile/BoltAddressTranslation.cpp b/bolt/lib/Profile/BoltAddressTranslation.cpp
index 7cfb9c132c2c6..0141ce189acda 100644
--- a/bolt/lib/Profile/BoltAddressTranslation.cpp
+++ b/bolt/lib/Profile/BoltAddressTranslation.cpp
@@ -153,13 +153,12 @@ APInt BoltAddressTranslation::calculateBranchEntriesBitMask(MapTy &Map,
   return BitMask;
 }
 
-size_t BoltAddressTranslation::getNumEqualOffsets(const MapTy &Map,
-                                                  uint32_t Skew) const {
+size_t BoltAddressTranslation::getNumEqualOffsets(const MapTy &Map) const {
   size_t EqualOffsets = 0;
   for (const std::pair<const uint32_t, uint32_t> &KeyVal : Map) {
     const uint32_t OutputOffset = KeyVal.first;
     const uint32_t InputOffset = KeyVal.second >> 1;
-    if (OutputOffset == InputOffset - Skew)
+    if (OutputOffset == InputOffset)
       ++EqualOffsets;
     else
       break;
@@ -197,17 +196,12 @@ void BoltAddressTranslation::writeMaps(std::map<uint64_t, MapTy> &Maps,
         SecondaryEntryPointsMap.count(Address)
             ? SecondaryEntryPointsMap[Address].size()
             : 0;
-    uint32_t Skew = 0;
     if (Cold) {
       auto HotEntryIt = Maps.find(ColdPartSource[Address]);
       assert(HotEntryIt != Maps.end());
       size_t HotIndex = std::distance(Maps.begin(), HotEntryIt);
       encodeULEB128(HotIndex - PrevIndex, OS);
       PrevIndex = HotIndex;
-      // Skew of all input offsets for cold fragments is simply the first input
-      // offset.
-      Skew = Map.begin()->second >> 1;
-      encodeULEB128(Skew, OS);
     } else {
       // Function hash
       size_t BFHash = getBFHash(HotInputAddress);
@@ -223,21 +217,24 @@ void BoltAddressTranslation::writeMaps(std::map<uint64_t, MapTy> &Maps,
                         << '\n');
     }
     encodeULEB128(NumEntries, OS);
-    // Encode the number of equal offsets (output = input - skew) in the
-    // beginning of the function. Only encode one offset in these cases.
-    const size_t EqualElems = getNumEqualOffsets(Map, Skew);
-    encodeULEB128(EqualElems, OS);
-    if (EqualElems) {
-      const size_t BranchEntriesBytes = alignTo(EqualElems, 8) / 8;
-      APInt BranchEntries = calculateBranchEntriesBitMask(Map, EqualElems);
-      OS.write(reinterpret_cast<const char *>(BranchEntries.getRawData()),
-               BranchEntriesBytes);
-      LLVM_DEBUG({
-        dbgs() << "BranchEntries: ";
-        SmallString<8> BitMaskStr;
-        BranchEntries.toString(BitMaskStr, 2, false);
-        dbgs() << BitMaskStr << '\n';
-      });
+    // For hot fragments only: encode the number of equal offsets
+    // (output = input) in the beginning of the function. Only encode one offset
+    // in these cases.
+    const size_t EqualElems = Cold ? 0 : getNumEqualOffsets(Map);
+    if (!Cold) {
+      encodeULEB128(EqualElems, OS);
+      if (EqualElems) {
+        const size_t BranchEntriesBytes = alignTo(EqualElems, 8) / 8;
+        APInt BranchEntries = calculateBranchEntriesBitMask(Map, EqualElems);
+        OS.write(reinterpret_cast<const char *>(BranchEntries.getRawData()),
+                 BranchEntriesBytes);
+        LLVM_DEBUG({
+          dbgs() << "BranchEntries: ";
+          SmallString<8> BitMaskStr;
+          BranchEntries.toString(BitMaskStr, 2, false);
+          dbgs() << BitMaskStr << '\n';
+        });
+      }
     }
     const BBHashMapTy &BBHashMap = getBBHashMap(HotInputAddress);
     size_t Index = 0;
@@ -318,12 +315,10 @@ void BoltAddressTranslation::parseMaps(std::vector<uint64_t> &HotFuncs,
     uint64_t HotAddress = Cold ? 0 : Address;
     PrevAddress = Address;
     uint32_t SecondaryEntryPoints = 0;
-    uint64_t ColdInputSkew = 0;
     if (Cold) {
       HotIndex += DE.getULEB128(&Offset, &Err);
       HotAddress = HotFuncs[HotIndex];
       ColdPartSource.emplace(Address, HotAddress);
-      ColdInputSkew = DE.getULEB128(&Offset, &Err);
     } else {
       HotFuncs.push_back(Address);
       // Function hash
@@ -344,25 +339,28 @@ void BoltAddressTranslation::parseMaps(std::vector<uint64_t> &HotFuncs,
                             getULEB128Size(SecondaryEntryPoints)));
     }
     const uint32_t NumEntries = DE.getULEB128(&Offset, &Err);
-    // Equal offsets.
-    const size_t EqualElems = DE.getULEB128(&Offset, &Err);
+    // Equal offsets, hot fragments only.
+    size_t EqualElems = 0;
     APInt BEBitMask;
-    LLVM_DEBUG(dbgs() << formatv("Equal offsets: {0}, {1} bytes\n", EqualElems,
-                                 getULEB128Size(EqualElems)));
-    if (EqualElems) {
-      const size_t BranchEntriesBytes = alignTo(EqualElems, 8) / 8;
-      BEBitMask = APInt(alignTo(EqualElems, 8), 0);
-      LoadIntFromMemory(
-          BEBitMask,
-          reinterpret_cast<const uint8_t *>(
-              DE.getBytes(&Offset, BranchEntriesBytes, &Err).data()),
-          BranchEntriesBytes);
-      LLVM_DEBUG({
-        dbgs() << "BEBitMask: ";
-        SmallString<8> BitMaskStr;
-        BEBitMask.toString(BitMaskStr, 2, false);
-        dbgs() << BitMaskStr << ", " << BranchEntriesBytes << " bytes\n";
-      });
+    if (!Cold) {
+      EqualElems = DE.getULEB128(&Offset, &Err);
+      LLVM_DEBUG(dbgs() << formatv("Equal offsets: {0}, {1} bytes\n",
+                                   EqualElems, getULEB128Size(EqualElems)));
+      if (EqualElems) {
+        const size_t BranchEntriesBytes = alignTo(EqualElems, 8) / 8;
+        BEBitMask = APInt(alignTo(EqualElems, 8), 0);
+        LoadIntFromMemory(
+            BEBitMask,
+            reinterpret_cast<const uint8_t *>(
+                DE.getBytes(&Offset, BranchEntriesBytes, &Err).data()),
+            BranchEntriesBytes);
+        LLVM_DEBUG({
+          dbgs() << "BEBitMask: ";
+          SmallString<8> BitMaskStr;
+          BEBitMask.toString(BitMaskStr, 2, false);
+          dbgs() << BitMaskStr << ", " << BranchEntriesBytes << " bytes\n";
+        });
+      }
     }
     MapTy Map;
 
@@ -377,7 +375,7 @@ void BoltAddressTranslation::parseMaps(std::vector<uint64_t> &HotFuncs,
       PrevAddress = OutputAddress;
       int64_t InputDelta = 0;
       if (J < EqualElems) {
-        InputOffset = ((OutputOffset + ColdInputSkew) << 1) | BEBitMask[J];
+        InputOffset = (OutputOffset << 1) | BEBitMask[J];
       } else {
         InputDelta = DE.getSLEB128(&Offset, &Err);
         InputOffset += InputDelta;
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index 065260936e70a..8eb2e5a9d9120 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -840,6 +840,7 @@ void RewriteInstance::discoverFileObjects() {
       continue;
 
     if (cantFail(Symbol.getType()) == SymbolRef::ST_File) {
+      FileSymbols.emplace_back(Symbol);
       StringRef Name =
           cantFail(std::move(NameOrError), "cannot get symbol name for file");
       // Ignore Clang LTO artificial FILE symbol as it is not always generated,
@@ -1340,6 +1341,7 @@ void RewriteInstance::discoverFileObjects() {
   }
 
   registerFragments();
+  FileSymbols.clear();
 }
 
 Error RewriteInstance::discoverRtFiniAddress() {
@@ -1417,50 +1419,139 @@ void RewriteInstance::registerFragments() {
   if (!BC->HasSplitFunctions)
     return;
 
+  // Process fragments with ambiguous parents separately as they are typically a
+  // vanishing minority of cases and require expensive symbol table lookups.
+  std::vector<std::pair<StringRef, BinaryFunction *>> AmbiguousFragments;
   for (auto &BFI : BC->getBinaryFunctions()) {
     BinaryFunction &Function = BFI.second;
     if (!Function.isFragment())
       continue;
-    unsigned ParentsFound = 0;
     for (StringRef Name : Function.getNames()) {
-      StringRef BaseName, Suffix;
-      std::tie(BaseName, Suffix) = Name.split('/');
+      StringRef BaseName = NR.restore(Name);
+      const bool IsGlobal = BaseName == Name;
       const size_t ColdSuffixPos = BaseName.find(".cold");
       if (ColdSuffixPos == StringRef::npos)
         continue;
-      // For cold function with local (foo.cold/1) symbol, prefer a parent with
-      // local symbol as well (foo/1) over global symbol (foo).
-      std::string ParentName = BaseName.substr(0, ColdSuffixPos).str();
+      StringRef ParentName = BaseName.substr(0, ColdSuffixPos);
       const BinaryData *BD = BC->getBinaryDataByName(ParentName);
-      if (Suffix != "") {
-        ParentName.append(Twine("/", Suffix).str());
-        const BinaryData *BDLocal = BC->getBinaryDataByName(ParentName);
-        if (BDLocal || !BD)
-          BD = BDLocal;
-      }
-      if (!BD) {
-        if (opts::Verbosity >= 1)
-          BC->outs() << "BOLT-INFO: parent function not found for " << Name
-                     << "\n";
+      const uint64_t NumPossibleLocalParents =
+          NR.getUniquifiedNameCount(ParentName);
+      // The most common case: single local parent fragment.
+      if (!BD && NumPossibleLocalParents == 1) {
+        BD = BC->getBinaryDataByName(NR.getUniqueName(ParentName, 1));
+      } else if (BD && (!NumPossibleLocalParents || IsGlobal)) {
+        // Global parent and either no local candidates (second most common), or
+        // the fragment is global as well (uncommon).
+      } else {
+        // Any other case: need to disambiguate using FILE symbols.
+        AmbiguousFragments.emplace_back(ParentName, &Function);
         continue;
       }
-      const uint64_t Address = BD->getAddress();
-      BinaryFunction *BF = BC->getBinaryFunctionAtAddress(Address);
-      if (!BF) {
-        if (opts::Verbosity >= 1)
-          BC->outs() << formatv(
-              "BOLT-INFO: parent function not found at {0:x}\n", Address);
-        continue;
+      if (BD) {
+        BinaryFunction *BF = BC->getFunctionForSymbol(BD->getSymbol());
+        if (BF) {
+          BC->registerFragment(Function, *BF);
+          continue;
+        }
       }
-      BC->registerFragment(Function, *BF);
-      ++ParentsFound;
-    }
-    if (!ParentsFound) {
       BC->errs() << "BOLT-ERROR: parent function not found for " << Function
                  << '\n';
       exit(1);
     }
   }
+
+  if (AmbiguousFragments.empty())
+    return;
+
+  if (!BC->hasSymbolsWithFileName()) {
+    BC->errs() << "BOLT-ERROR: input file has split functions but does not "
+                  "have FILE symbols. If the binary was stripped, preserve "
+                  "FILE symbols with --keep-file-symbols strip option";
+    exit(1);
+  }
+
+  // The first global symbol is identified by the symbol table sh_info value.
+  // Used as local symbol search stopping point.
+  auto *ELF64LEFile = cast<ELF64LEObjectFile>(InputFile);
+  const ELFFile<ELF64LE> &Obj = ELF64LEFile->getELFFile();
+  auto *SymTab = llvm::find_if(cantFail(Obj.sections()), [](const auto &Sec) {
+    return Sec.sh_type == ELF::SHT_SYMTAB;
+  });
+  assert(SymTab);
+  if (!SymTab->sh_info) {
+    BC->errs() << "BOLT-ERROR: malformed SYMTAB sh_info\n";
+    exit(1);
+  }
+  ELFSymbolRef FirstGlobal = ELF64LEFile->toSymbolRef(SymTab, SymTab->sh_info);
+
+  for (auto &[ParentName, BF] : AmbiguousFragments) {
+    const uint64_t Address = BF->getAddress();
+
+    // Get fragment's own symbol
+    const auto SymIt = FileSymRefs.find(Address);
+    if (SymIt == FileSymRefs.end()) {
+      BC->errs()
+          << "BOLT-ERROR: symbol lookup failed for function at address 0x"
+          << Twine::utohexstr(Address) << '\n';
+      exit(1);
+    }
+
+    // Find containing FILE symbol
+    ELFSymbolRef Symbol = SymIt->second;
+    auto FSI = llvm::upper_bound(FileSymbols, Symbol);
+    if (FSI == FileSymbols.begin()) {
+      BC->errs() << "BOLT-ERROR: owning FILE symbol not found for symbol "
+                 << cantFail(Symbol.getName()) << '\n';
+      exit(1);
+    }
+
+    ELFSymbolRef StopSymbol = FirstGlobal;
+    if (FSI != FileSymbols.end())
+      StopSymbol = *FSI;
+
+    uint64_t ParentAddress{0};
+
+    // BOLT split fragment symbols are emitted just before the main function
+    // symbol.
+    for (ELFSymbolRef NextSymbol = Symbol; NextSymbol < StopSymbol;
+         NextSymbol.moveNext()) {
+      Expected<StringRef> NameOrError = Symbol.getName();
+      if (!NameOrError)
+        break;
+      StringRef Name = *NameOrError;
+      if (Name == ParentName) {
+        ParentAddress = cantFail(NextSymbol.getValue());
+        goto registerParent;
+      }
+      if (Name.starts_with(ParentName))
+        // With multi-way splitting, there are multiple fragments with different
+        // suffixes. Parent follows the last fragment.
+        continue;
+      break;
+    }
+
+    // Iterate over local file symbols and check symbol names to match parent.
+    for (ELFSymbolRef Symbol(FSI[-1]); Symbol < StopSymbol; Symbol.moveNext()) {
+      if (cantFail(Symbol.getName()) == ParentName) {
+        ParentAddress = cantFail(Symbol.getAddress());
+        break;
+      }
+    }
+
+registerParent:
+    // No local parent is found, use global parent function.
+    if (!ParentAddress)
+      if (BinaryData *ParentBD = BC->getBinaryDataByName(ParentName))
+        ParentAddress = ParentBD->getAddress();
+
+    if (BinaryFunction *ParentBF =
+            BC->getBinaryFunctionAtAddress(ParentAddress)) {
+      BC->registerFragment(*BF, *ParentBF);
+      continue;
+    }
+    BC->errs() << "BOLT-ERROR: parent function not found for " << *BF << '\n';
+    exit(1);
+  }
 }
 
 void RewriteInstance::createPLTBinaryFunction(uint64_t TargetAddress,
diff --git a/bolt/test/X86/bolt-address-translation.test b/bolt/test/X86/bolt-address-translation.test
index e6b21c14077b4..63234b4c1d218 100644
--- a/bolt/test/X86/bolt-address-translation.test
+++ b/bolt/test/X86/bolt-address-translation.test
@@ -37,7 +37,7 @@
 # CHECK:      BOLT: 3 out of 7 functions were overwritten.
 # CHECK:      BOLT-INFO: Wrote 6 BAT maps
 # CHECK:      BOLT-INFO: Wrote 3 function and 58 basic block hashes
-# CHECK:      BOLT-INFO: BAT section size (bytes): 928
+# CHECK:      BOLT-INFO: BAT section size (bytes): 924
 #
 # usqrt mappings (hot part). We match against any key (left side containing
 # the bolted binary offsets) because BOLT may change where it puts instructions
diff --git a/bolt/test/X86/cdsplit-symbol-names.s b/bolt/test/X86/cdsplit-symbol-names.s
index e53863e22246d..1d3fa91936af0 100644
--- a/bolt/test/X86/cdsplit-symbol-names.s
+++ b/bolt/test/X86/cdsplit-symbol-names.s
@@ -7,7 +7,7 @@
 # RUN: llvm-strip --strip-unneeded %t.o
 # RUN: %clang %cflags %t.o -o %t.exe -Wl,-q
 # RUN: llvm-bolt %t.exe -o %t.bolt --split-functions --split-strategy=cdsplit \
-# RUN:         --call-scale=2 --data=%t.fdata --reorder-blocks=ext-tsp
+# RUN:   --call-scale=2 --data=%t.fdata --reorder-blocks=ext-tsp --enable-bat
 # RUN: llvm-objdump --syms %t.bolt | FileCheck %s --check-prefix=CHECK-SYMS-WARM
 
 # CHECK-SYMS-WARM: 0000000000000000 l df *ABS* 0000000000000000 bolt-pseudo.o
@@ -16,8 +16,19 @@
 # CHECK-SYMS-WARM: .text.cold
 # CHECK-SYMS-WARM-SAME: dummy.cold
 
+# RUN: link_fdata %s %t.bolt %t.preagg PREAGG
+# PREAGG: B X:0 #chain.warm# 1 0
+# RUN: perf2bolt %t.bolt -p %t.preagg --pa -o %t.bat.fdata -w %t.bat.yaml -v=1 \
+# RUN:   | FileCheck %s --check-prefix=CHECK-REGISTER
+
+# CHECK-REGISTER: BOLT-INFO: marking chain.warm/1(*2) as a fragment of chain/2(*2)
+
         .text
-        .globl  chain
+        .type   chain, @function
+chain:
+        ret
+        .size   chain, .-chain
+
         .type   chain, @function
 chain:
         pushq   %rbp
diff --git a/bolt/test/X86/fragment-lite.s b/bolt/test/X86/fragment-lite.s
index 97069bf8096e1..32d1f5a98b64a 100644
--- a/bolt/test/X86/fragment-lite.s
+++ b/bolt/test/X86/fragment-lite.s
@@ -3,35 +3,42 @@
 # RUN: split-file %s %t
 # RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %t/main.s -o %t.o
 # RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %t/baz.s -o %t.baz.o
+# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %t/baz2.s -o %t.baz2.o
 # RUN: link_fdata %s %t.o %t.main.fdata
 # RUN: link_fdata %s %t.baz.o %t.baz.fdata
-# RUN: merge-fdata %t.main.fdata %t.baz.fdata > %t.fdata
-# RUN: %clang %cflags %t.o %t.baz.o -o %t.exe -Wl,-q
+# RUN: link_fdata %s %t.baz2.o %t.baz2.fdata
+# RUN: merge-fdata %t.main.fdata %t.baz.fdata %t.baz2.fdata > %t.fdata
+# RUN: %clang %cflags %t.o %t.baz.o %t.baz2.o -o %t.exe -Wl,-q
 # RUN: llvm-bolt %t.exe -o %t.out --lite=1 --data %t.fdata -v=1 -print-cfg \
 # RUN:   2>&1 | FileCheck %s
 
 # CHECK: BOLT-INFO: processing main.cold.1 as a sibling of non-ignored function
-# CHECK: BOLT-INFO: processing foo.cold.1/1 as a sibling of non-ignored function
-# CHECK: BOLT-INFO: processing bar.cold.1/1 as a sibling of non-ignored function
+# CHECK: BOLT-INFO: processing foo.cold.1/1(*2) as a sibling of non-ignored function
+# CHECK: BOLT-INFO: processing bar.cold.1/1(*2) as a sibling of non-ignored function
 # CHECK: BOLT-INFO: processing baz.cold.1 as a sibling of non-ignored function
-# CHECK: BOLT-INFO: processing baz.cold.1/1 as a sibling of non-ignored function
+# CHECK: BOLT-INFO: processing baz.cold.1/1(*2) as a sibling of non-ignored function
+# CHECK: BOLT-INFO: processing baz.cold.1/2(*2) as a sibling of non-ignored function
 
 # CHECK: Binary Function "main.cold.1" after building cfg
 # CHECK: Parent : main
 
-# CHECK: Binary Function "foo.cold.1/1" after building cfg
+# CHECK: Binary Function "foo.cold.1/1(*2)" after building cfg
 # CHECK: Parent : foo
 
-# CHECK: Binary Function "bar.cold.1/1" after building cfg
-# CHECK: Parent : bar/1
+# CHECK: Binary Function "bar.cold.1/1(*2)" after building cfg
+# CHECK: Parent : bar/1(*2)
 
 # CHECK: Binary Function "baz.cold.1" after building cfg
 # CHECK: Parent : baz{{$}}
 
-# CHECK: Binary Function "baz.cold.1/1" after building cfg
-# CHECK: Parent : baz/1
+# CHECK: Binary Function "baz.cold.1/1(*2)" after building cfg
+# CHECK: Parent : baz/1(*2)
+
+# CHECK: Binary Function "baz.cold.1/2(*2)" after building cfg
+# CHECK: Parent : baz/2(*2)
 
 #--- main.s
+.file "main.s"
   .globl main
   .type main, %function
 main:
@@ -126,6 +133,7 @@ baz.cold.1:
 .size baz.cold.1, .-baz.cold.1
 
 #--- baz.s
+.file "baz.s"
   .local baz
   .type baz, %function
 baz:
@@ -149,3 +157,29 @@ baz.cold.1:
   retq
   .cfi_endproc
 .size baz.cold.1, .-baz.cold.1
+
+#--- baz2.s
+.file "baz2.s"
+  .local baz
+  .type baz, %function
+baz:
+  .cfi_startproc
+# FDATA: 0 [unknown] 0 1 baz/2 0 1 0
+  cmpl	$0x0, %eax
+  je	baz.cold.1
+  retq
+  .cfi_endproc
+.size baz, .-baz
+
+  .section .text.cold
+  .local baz.cold.1
+  .type baz.cold.1, %function
+baz.cold.1:
+  .cfi_startproc
+  pushq	%rbp
+  movq	%rsp, %rbp
+  movl	$0x0, %eax
+  popq	%rbp
+  retq
+  .cfi_endproc
+.size baz.cold.1, .-baz.cold.1
diff --git a/llvm/include/llvm/Object/ELFObjectFile.h b/llvm/include/llvm/Object/ELFObjectFile.h
index 4494d9b96189b..8cc09e7fd7d55 100644
--- a/llvm/include/llvm/Object/ELFObjectFile.h
+++ b/llvm/include/llvm/Object/ELFObjectFile.h
@@ -199,6 +199,14 @@ class ELFSymbolRef : public SymbolRef {
   }
 };
 
+inline bool operator<(const ELFSymbolRef &A, const ELFSymbolRef &B) {
+  const DataRefImpl &DRIA = A.getRawDataRefImpl();
+  const DataRefImpl &DRIB = B.getRawDataRefImpl();
+  if (DRIA.d.a == DRIB.d.a)
+    return DRIA.d.b < DRIB.d.b;
+  return DRIA.d.a < DRIB.d.a;
+}
+
 class elf_symbol_iterator : public symbol_iterator {
 public:
   elf_symbol_iterator(const basic_symbol_iterator &B)



More information about the llvm-commits mailing list