[lld] [lld][macho] Strip .__uniq. and .llvm. hashes in -order_file (PR #140670)

via llvm-commits llvm-commits at lists.llvm.org
Wed May 21 11:13:05 PDT 2025


https://github.com/SharonXSharon updated https://github.com/llvm/llvm-project/pull/140670

>From a1d230aa577a16f55152f4516b332417310528a9 Mon Sep 17 00:00:00 2001
From: Sharon Xu <sharonxu at fb.com>
Date: Mon, 19 May 2025 17:39:06 -0700
Subject: [PATCH 1/2] [lld][macho] Strip .__uniq. and .llvm. hashes when doing
 symbol matching in -order_file

---
 lld/MachO/SectionPriorities.cpp          |  10 ++-
 lld/MachO/SectionPriorities.h            |   5 ++
 lld/test/MachO/order-file-strip-hashes.s | 101 +++++++++++++++++++++++
 3 files changed, 114 insertions(+), 2 deletions(-)
 create mode 100644 lld/test/MachO/order-file-strip-hashes.s

diff --git a/lld/MachO/SectionPriorities.cpp b/lld/MachO/SectionPriorities.cpp
index 7a4a5d8465f64..213623b338472 100644
--- a/lld/MachO/SectionPriorities.cpp
+++ b/lld/MachO/SectionPriorities.cpp
@@ -245,12 +245,18 @@ DenseMap<const InputSection *, int> CallGraphSort::run() {
   return orderMap;
 }
 
+StringRef macho::PriorityBuilder::getRootSymbol(StringRef Name) {
+  auto [P0, S0] = Name.rsplit(".llvm.");
+  auto [P1, S1] = P0.rsplit(".__uniq.");
+  return P1;
+}
+
 std::optional<int>
 macho::PriorityBuilder::getSymbolPriority(const Defined *sym) {
   if (sym->isAbsolute())
     return std::nullopt;
 
-  auto it = priorities.find(sym->getName());
+  auto it = priorities.find(getRootSymbol(sym->getName()));
   if (it == priorities.end())
     return std::nullopt;
   const SymbolPriorityEntry &entry = it->second;
@@ -330,7 +336,7 @@ void macho::PriorityBuilder::parseOrderFile(StringRef path) {
         break;
       }
     }
-    symbol = line.trim();
+    symbol = getRootSymbol(line.trim());
 
     if (!symbol.empty()) {
       SymbolPriorityEntry &entry = priorities[symbol];
diff --git a/lld/MachO/SectionPriorities.h b/lld/MachO/SectionPriorities.h
index 44fb101990c51..0bbf238c2c116 100644
--- a/lld/MachO/SectionPriorities.h
+++ b/lld/MachO/SectionPriorities.h
@@ -69,6 +69,11 @@ class PriorityBuilder {
   std::optional<int> getSymbolPriority(const Defined *sym);
   llvm::DenseMap<llvm::StringRef, SymbolPriorityEntry> priorities;
   llvm::MapVector<SectionPair, uint64_t> callGraphProfile;
+  /// Symbols can be appended with "(.__uniq.xxxx)?.llvm.yyyy" where "xxxx" and
+  /// "yyyy" are numbers that could change between builds. We need to use the
+  /// root symbol name before this suffix so these symbols can be matched with
+  /// profiles which may have different suffixes.
+  llvm::StringRef getRootSymbol(llvm::StringRef Name);
 };
 
 extern PriorityBuilder priorityBuilder;
diff --git a/lld/test/MachO/order-file-strip-hashes.s b/lld/test/MachO/order-file-strip-hashes.s
new file mode 100644
index 0000000000000..d7e21371ad9ca
--- /dev/null
+++ b/lld/test/MachO/order-file-strip-hashes.s
@@ -0,0 +1,101 @@
+# RUN: rm -rf %t && split-file %s %t
+# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/a.s -o %t/a.o
+
+# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o -order_file %t/ord-1
+# RUN: llvm-nm --numeric-sort --format=just-symbols %t/a.out | FileCheck %s
+
+
+# .text
+# CHECK: A
+# CHECK: B
+# CHECK: C
+# .section __DATA,__objc_const
+# CHECK: _OBJC_$_CATEGORY_INSTANCE_METHODS_Foo_$_Cat1
+# CHECK: _ALPHABETIC_SORT_FIRST
+# CHECK: _OBJC_$_CATEGORY_Foo_$_Cat1
+# CHECK: _OBJC_$_CATEGORY_SOME_$_FOLDED
+# CHECK: _OBJC_$_CATEGORY_CLASS_METHODS_Foo_$_Cat2
+# CHECK: _OBJC_$_CATEGORY_Foo_$_Cat2
+# .section __DATA,__objc_data
+# CHECK: _OBJC_CLASS_$_Baz
+# CHECK: _OBJC_CLASS_$_Bar
+# CHECK: _OBJC_CLASS_$_Foo
+# CHECK: _OBJC_CLASS_$_Baz2
+
+	
+#--- a.s
+.text
+.globl _main, A, _B, C.__uniq.111111111111111111111111111111111111111.llvm.2222222222222222222
+
+_main:
+  ret
+A:
+  ret
+F:
+  add w0, w0, #3
+  bl C.__uniq.111111111111111111111111111111111111111.llvm.2222222222222222222
+  ret
+C.__uniq.111111111111111111111111111111111111111.llvm.2222222222222222222:
+  add w0, w0, #2
+  bl  A
+  ret
+D:
+  add w0, w0, #2
+  bl B
+  ret
+B:
+  add w0, w0, #1
+  bl  A
+  ret
+E:
+  add w0, w0, #2
+  bl C.__uniq.111111111111111111111111111111111111111.llvm.2222222222222222222
+  ret
+
+.section __DATA,__objc_const
+# test multiple symbols at the same address, which will be alphabetic sorted based symbol names
+_OBJC_$_CATEGORY_CLASS_METHODS_Foo_$_Cat2:
+  .quad 789
+
+_OBJC_$_CATEGORY_SOME_$_FOLDED:
+_OBJC_$_CATEGORY_Foo_$_Cat1:
+_ALPHABETIC_SORT_FIRST:
+ .quad 123
+
+_OBJC_$_CATEGORY_Foo_$_Cat2:
+ .quad 222
+
+_OBJC_$_CATEGORY_INSTANCE_METHODS_Foo_$_Cat1:
+  .quad 456
+
+.section __DATA,__objc_data
+_OBJC_CLASS_$_Foo:
+ .quad 123
+
+_OBJC_CLASS_$_Bar.llvm.1234:
+ .quad 456
+
+_OBJC_CLASS_$_Baz:
+ .quad 789
+
+_OBJC_CLASS_$_Baz2:
+ .quad 999
+
+.section __DATA,__objc_classrefs
+.quad _OBJC_CLASS_$_Foo
+.quad _OBJC_CLASS_$_Bar.llvm.1234
+.quad _OBJC_CLASS_$_Baz
+
+.subsections_via_symbols
+
+
+#--- ord-1
+# change order, parital covered
+A
+B
+C.__uniq.555555555555555555555555555555555555555.llvm.6666666666666666666
+_OBJC_CLASS_$_Baz
+_OBJC_CLASS_$_Bar.__uniq.12345
+_OBJC_CLASS_$_Foo.__uniq.123.llvm.123456789
+_OBJC_$_CATEGORY_INSTANCE_METHODS_Foo_$_Cat1
+_OBJC_$_CATEGORY_Foo_$_Cat1.llvm.1234567

>From e607206a397fdf866bbcbae4ff1ac2b072f987e9 Mon Sep 17 00:00:00 2001
From: Sharon Xu <sharonxu at fb.com>
Date: Wed, 21 May 2025 10:45:14 -0700
Subject: [PATCH 2/2] create a new lld/include/lld/Common/Utils.h

---
 lld/Common/CMakeLists.txt                     |  1 +
 lld/Common/Utils.cpp                          | 20 ++++++++++++
 lld/ELF/BPSectionOrderer.cpp                  |  3 +-
 lld/MachO/BPSectionOrderer.cpp                |  2 +-
 lld/MachO/SectionPriorities.cpp               | 11 ++-----
 lld/MachO/SectionPriorities.h                 |  5 ---
 .../lld/Common/BPSectionOrdererBase.inc       | 20 +++---------
 lld/include/lld/Common/Utils.h                | 28 +++++++++++++++++
 lld/test/MachO/order-file-strip-hashes.s      | 31 +++++++------------
 9 files changed, 71 insertions(+), 50 deletions(-)
 create mode 100644 lld/Common/Utils.cpp
 create mode 100644 lld/include/lld/Common/Utils.h

diff --git a/lld/Common/CMakeLists.txt b/lld/Common/CMakeLists.txt
index 4f503d04f7844..a9e8d72fb5ecc 100644
--- a/lld/Common/CMakeLists.txt
+++ b/lld/Common/CMakeLists.txt
@@ -34,6 +34,7 @@ add_lld_library(lldCommon
   Strings.cpp
   TargetOptionsCommandFlags.cpp
   Timer.cpp
+  Utils.cpp
   VCSVersion.inc
   Version.cpp
 
diff --git a/lld/Common/Utils.cpp b/lld/Common/Utils.cpp
new file mode 100644
index 0000000000000..55d25fde9879e
--- /dev/null
+++ b/lld/Common/Utils.cpp
@@ -0,0 +1,20 @@
+//===- Utils.cpp ------------------------------------------------*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// The file defines untils functions that can be shared across archs.
+//===----------------------------------------------------------------------===//
+
+#include "lld/Common/Utils.h"
+
+using namespace llvm;
+using namespace lld;
+
+StringRef lld::utils::getRootSymbol(StringRef name) {
+  name.consume_back(".Tgm");
+  auto [P0, S0] = name.rsplit(".llvm.");
+  auto [P1, S1] = P0.rsplit(".__uniq.");
+  return P1;
+}
diff --git a/lld/ELF/BPSectionOrderer.cpp b/lld/ELF/BPSectionOrderer.cpp
index 793176c7725a3..f464b1d4518a4 100644
--- a/lld/ELF/BPSectionOrderer.cpp
+++ b/lld/ELF/BPSectionOrderer.cpp
@@ -81,7 +81,8 @@ DenseMap<const InputSectionBase *, int> elf::runBalancedPartitioning(
     if (!sec || sec->size == 0 || !sec->isLive() || sec->repl != sec ||
         !orderer.secToSym.try_emplace(sec, d).second)
       return;
-    rootSymbolToSectionIdxs[CachedHashStringRef(getRootSymbol(sym.getName()))]
+    rootSymbolToSectionIdxs[CachedHashStringRef(
+                                lld::utils::getRootSymbol(sym.getName()))]
         .insert(sections.size());
     sections.emplace_back(sec);
   };
diff --git a/lld/MachO/BPSectionOrderer.cpp b/lld/MachO/BPSectionOrderer.cpp
index ca273f7f798dc..268fb89502a88 100644
--- a/lld/MachO/BPSectionOrderer.cpp
+++ b/lld/MachO/BPSectionOrderer.cpp
@@ -124,7 +124,7 @@ DenseMap<const InputSection *, int> lld::macho::runBalancedPartitioning(
         size_t idx = sections.size();
         sections.emplace_back(isec);
         for (auto *sym : BPOrdererMachO::getSymbols(*isec)) {
-          auto rootName = getRootSymbol(sym->getName());
+          auto rootName = lld::utils::getRootSymbol(sym->getName());
           rootSymbolToSectionIdxs[CachedHashStringRef(rootName)].insert(idx);
           if (auto linkageName =
                   BPOrdererMachO::getResolvedLinkageName(rootName))
diff --git a/lld/MachO/SectionPriorities.cpp b/lld/MachO/SectionPriorities.cpp
index 213623b338472..5faedd9b790a5 100644
--- a/lld/MachO/SectionPriorities.cpp
+++ b/lld/MachO/SectionPriorities.cpp
@@ -21,6 +21,7 @@
 #include "lld/Common/Args.h"
 #include "lld/Common/CommonLinkerContext.h"
 #include "lld/Common/ErrorHandler.h"
+#include "lld/Common/Utils.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/Support/Path.h"
@@ -245,18 +246,12 @@ DenseMap<const InputSection *, int> CallGraphSort::run() {
   return orderMap;
 }
 
-StringRef macho::PriorityBuilder::getRootSymbol(StringRef Name) {
-  auto [P0, S0] = Name.rsplit(".llvm.");
-  auto [P1, S1] = P0.rsplit(".__uniq.");
-  return P1;
-}
-
 std::optional<int>
 macho::PriorityBuilder::getSymbolPriority(const Defined *sym) {
   if (sym->isAbsolute())
     return std::nullopt;
 
-  auto it = priorities.find(getRootSymbol(sym->getName()));
+  auto it = priorities.find(utils::getRootSymbol(sym->getName()));
   if (it == priorities.end())
     return std::nullopt;
   const SymbolPriorityEntry &entry = it->second;
@@ -336,7 +331,7 @@ void macho::PriorityBuilder::parseOrderFile(StringRef path) {
         break;
       }
     }
-    symbol = getRootSymbol(line.trim());
+    symbol = utils::getRootSymbol(line.trim());
 
     if (!symbol.empty()) {
       SymbolPriorityEntry &entry = priorities[symbol];
diff --git a/lld/MachO/SectionPriorities.h b/lld/MachO/SectionPriorities.h
index 0bbf238c2c116..44fb101990c51 100644
--- a/lld/MachO/SectionPriorities.h
+++ b/lld/MachO/SectionPriorities.h
@@ -69,11 +69,6 @@ class PriorityBuilder {
   std::optional<int> getSymbolPriority(const Defined *sym);
   llvm::DenseMap<llvm::StringRef, SymbolPriorityEntry> priorities;
   llvm::MapVector<SectionPair, uint64_t> callGraphProfile;
-  /// Symbols can be appended with "(.__uniq.xxxx)?.llvm.yyyy" where "xxxx" and
-  /// "yyyy" are numbers that could change between builds. We need to use the
-  /// root symbol name before this suffix so these symbols can be matched with
-  /// profiles which may have different suffixes.
-  llvm::StringRef getRootSymbol(llvm::StringRef Name);
 };
 
 extern PriorityBuilder priorityBuilder;
diff --git a/lld/include/lld/Common/BPSectionOrdererBase.inc b/lld/include/lld/Common/BPSectionOrdererBase.inc
index 51dfb6471644a..bb2e55af1eb35 100644
--- a/lld/include/lld/Common/BPSectionOrdererBase.inc
+++ b/lld/include/lld/Common/BPSectionOrdererBase.inc
@@ -20,6 +20,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "lld/Common/ErrorHandler.h"
+#include "lld/Common/Utils.h"
 #include "llvm/ADT/CachedHashString.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/MapVector.h"
@@ -147,19 +148,6 @@ static SmallVector<std::pair<unsigned, UtilityNodes>> getUnsForCompression(
   return sectionUns;
 }
 
-/// Symbols can be appended with "(.__uniq.xxxx)?(.llvm.yyyy)?(.Tgm)?" where
-/// "xxxx" and "yyyy" are numbers that could change between builds, and .Tgm is
-/// the global merge functions suffix
-/// (see GlobalMergeFunc::MergingInstanceSuffix). We need to use the root symbol
-/// name before this suffix so these symbols can be matched with profiles which
-/// may have different suffixes.
-inline StringRef getRootSymbol(StringRef name) {
-  name.consume_back(".Tgm");
-  auto [P0, S0] = name.rsplit(".llvm.");
-  auto [P1, S1] = P0.rsplit(".__uniq.");
-  return P1;
-}
-
 template <class D>
 auto BPOrderer<D>::computeOrder(
     StringRef profilePath, bool forFunctionCompression, bool forDataCompression,
@@ -197,7 +185,7 @@ auto BPOrderer<D>::computeOrder(
       for (size_t timestamp = 0; timestamp < trace.size(); timestamp++) {
         auto [_, parsedFuncName] = getParsedIRPGOName(
             reader->getSymtab().getFuncOrVarName(trace[timestamp]));
-        parsedFuncName = getRootSymbol(parsedFuncName);
+        parsedFuncName = lld::utils::getRootSymbol(parsedFuncName);
 
         auto sectionIdxsIt =
             rootSymbolToSectionIdxs.find(CachedHashStringRef(parsedFuncName));
@@ -375,7 +363,7 @@ auto BPOrderer<D>::computeOrder(
           // 4?
           uint64_t lastPage = endAddress / pageSize;
           StringRef rootSymbol = D::getSymName(*sym);
-          rootSymbol = getRootSymbol(rootSymbol);
+          rootSymbol = lld::utils::getRootSymbol(rootSymbol);
           symbolToPageNumbers.try_emplace(rootSymbol, firstPage, lastPage);
           if (auto resolvedLinkageName = D::getResolvedLinkageName(rootSymbol))
             symbolToPageNumbers.try_emplace(resolvedLinkageName.value(),
@@ -393,7 +381,7 @@ auto BPOrderer<D>::computeOrder(
           auto traceId = trace.FunctionNameRefs[step];
           auto [Filename, ParsedFuncName] =
               getParsedIRPGOName(reader->getSymtab().getFuncOrVarName(traceId));
-          ParsedFuncName = getRootSymbol(ParsedFuncName);
+          ParsedFuncName = lld::utils::getRootSymbol(ParsedFuncName);
           auto it = symbolToPageNumbers.find(ParsedFuncName);
           if (it != symbolToPageNumbers.end()) {
             auto &[firstPage, lastPage] = it->getValue();
diff --git a/lld/include/lld/Common/Utils.h b/lld/include/lld/Common/Utils.h
new file mode 100644
index 0000000000000..867b82eb34683
--- /dev/null
+++ b/lld/include/lld/Common/Utils.h
@@ -0,0 +1,28 @@
+//===- Utils.h ------------------------------------------------*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// The file declares untils functions that can be shared across archs.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLD_UTILS_H
+#define LLD_UTILS_H
+
+#include "llvm/ADT/StringRef.h"
+
+namespace lld {
+namespace utils {
+
+/// Symbols can be appended with "(.__uniq.xxxx)?(.llvm.yyyy)?(.Tgm)?" where
+/// "xxxx" and "yyyy" are numbers that could change between builds, and .Tgm is
+/// the global merge functions suffix
+/// (see GlobalMergeFunc::MergingInstanceSuffix). We need to use the root symbol
+/// name before this suffix so these symbols can be matched with profiles which
+/// may have different suffixes.
+llvm::StringRef getRootSymbol(llvm::StringRef Name);
+} // namespace utils
+} // namespace lld
+
+#endif
diff --git a/lld/test/MachO/order-file-strip-hashes.s b/lld/test/MachO/order-file-strip-hashes.s
index d7e21371ad9ca..ee0f876bb67ef 100644
--- a/lld/test/MachO/order-file-strip-hashes.s
+++ b/lld/test/MachO/order-file-strip-hashes.s
@@ -3,25 +3,6 @@
 
 # RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o -order_file %t/ord-1
 # RUN: llvm-nm --numeric-sort --format=just-symbols %t/a.out | FileCheck %s
-
-
-# .text
-# CHECK: A
-# CHECK: B
-# CHECK: C
-# .section __DATA,__objc_const
-# CHECK: _OBJC_$_CATEGORY_INSTANCE_METHODS_Foo_$_Cat1
-# CHECK: _ALPHABETIC_SORT_FIRST
-# CHECK: _OBJC_$_CATEGORY_Foo_$_Cat1
-# CHECK: _OBJC_$_CATEGORY_SOME_$_FOLDED
-# CHECK: _OBJC_$_CATEGORY_CLASS_METHODS_Foo_$_Cat2
-# CHECK: _OBJC_$_CATEGORY_Foo_$_Cat2
-# .section __DATA,__objc_data
-# CHECK: _OBJC_CLASS_$_Baz
-# CHECK: _OBJC_CLASS_$_Bar
-# CHECK: _OBJC_CLASS_$_Foo
-# CHECK: _OBJC_CLASS_$_Baz2
-
 	
 #--- a.s
 .text
@@ -99,3 +80,15 @@ _OBJC_CLASS_$_Bar.__uniq.12345
 _OBJC_CLASS_$_Foo.__uniq.123.llvm.123456789
 _OBJC_$_CATEGORY_INSTANCE_METHODS_Foo_$_Cat1
 _OBJC_$_CATEGORY_Foo_$_Cat1.llvm.1234567
+
+# .text
+# CHECK: A
+# CHECK: B
+# CHECK: C
+# .section __DATA,__objc_const
+# CHECK: _OBJC_$_CATEGORY_INSTANCE_METHODS_Foo_$_Cat1
+# CHECK: _OBJC_$_CATEGORY_Foo_$_Cat1
+# .section __DATA,__objc_data
+# CHECK: _OBJC_CLASS_$_Baz
+# CHECK: _OBJC_CLASS_$_Bar
+# CHECK: _OBJC_CLASS_$_Foo



More information about the llvm-commits mailing list