[lld] [lld][macho] Strip .__uniq. and .llvm. hashes in -order_file (PR #140670)

via llvm-commits llvm-commits at lists.llvm.org
Mon May 19 20:50:00 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-lld

Author: None (SharonXSharon)

<details>
<summary>Changes</summary>

```
/// Symbols can be appended with "(.__uniq.xxxx)?.llvm.yyyy" where "xxxx" and
/// "yyyy" are numbers that could change between builds. We need to use the root
/// symbol name before this suffix so these symbols can be matched with profiles
/// which may have different suffixes.
```
Just like what we are doing in BP, https://github.com/llvm/llvm-project/blob/main/lld/MachO/BPSectionOrderer.cpp#L127

 the patch removes the suffixes when parsing the order file and getting the symbol priority to have a better symbol match.


---
Full diff: https://github.com/llvm/llvm-project/pull/140670.diff


3 Files Affected:

- (modified) lld/MachO/SectionPriorities.cpp (+8-2) 
- (modified) lld/MachO/SectionPriorities.h (+5) 
- (added) lld/test/MachO/order-file-strip-hashes.s (+101) 


``````````diff
diff --git a/lld/MachO/SectionPriorities.cpp b/lld/MachO/SectionPriorities.cpp
index 7a4a5d8465f64..213623b338472 100644
--- a/lld/MachO/SectionPriorities.cpp
+++ b/lld/MachO/SectionPriorities.cpp
@@ -245,12 +245,18 @@ DenseMap<const InputSection *, int> CallGraphSort::run() {
   return orderMap;
 }
 
+StringRef macho::PriorityBuilder::getRootSymbol(StringRef Name) {
+  auto [P0, S0] = Name.rsplit(".llvm.");
+  auto [P1, S1] = P0.rsplit(".__uniq.");
+  return P1;
+}
+
 std::optional<int>
 macho::PriorityBuilder::getSymbolPriority(const Defined *sym) {
   if (sym->isAbsolute())
     return std::nullopt;
 
-  auto it = priorities.find(sym->getName());
+  auto it = priorities.find(getRootSymbol(sym->getName()));
   if (it == priorities.end())
     return std::nullopt;
   const SymbolPriorityEntry &entry = it->second;
@@ -330,7 +336,7 @@ void macho::PriorityBuilder::parseOrderFile(StringRef path) {
         break;
       }
     }
-    symbol = line.trim();
+    symbol = getRootSymbol(line.trim());
 
     if (!symbol.empty()) {
       SymbolPriorityEntry &entry = priorities[symbol];
diff --git a/lld/MachO/SectionPriorities.h b/lld/MachO/SectionPriorities.h
index 44fb101990c51..0bbf238c2c116 100644
--- a/lld/MachO/SectionPriorities.h
+++ b/lld/MachO/SectionPriorities.h
@@ -69,6 +69,11 @@ class PriorityBuilder {
   std::optional<int> getSymbolPriority(const Defined *sym);
   llvm::DenseMap<llvm::StringRef, SymbolPriorityEntry> priorities;
   llvm::MapVector<SectionPair, uint64_t> callGraphProfile;
+  /// Symbols can be appended with "(.__uniq.xxxx)?.llvm.yyyy" where "xxxx" and
+  /// "yyyy" are numbers that could change between builds. We need to use the
+  /// root symbol name before this suffix so these symbols can be matched with
+  /// profiles which may have different suffixes.
+  llvm::StringRef getRootSymbol(llvm::StringRef Name);
 };
 
 extern PriorityBuilder priorityBuilder;
diff --git a/lld/test/MachO/order-file-strip-hashes.s b/lld/test/MachO/order-file-strip-hashes.s
new file mode 100644
index 0000000000000..d7e21371ad9ca
--- /dev/null
+++ b/lld/test/MachO/order-file-strip-hashes.s
@@ -0,0 +1,101 @@
+# RUN: rm -rf %t && split-file %s %t
+# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/a.s -o %t/a.o
+
+# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o -order_file %t/ord-1
+# RUN: llvm-nm --numeric-sort --format=just-symbols %t/a.out | FileCheck %s
+
+
+# .text
+# CHECK: A
+# CHECK: B
+# CHECK: C
+# .section __DATA,__objc_const
+# CHECK: _OBJC_$_CATEGORY_INSTANCE_METHODS_Foo_$_Cat1
+# CHECK: _ALPHABETIC_SORT_FIRST
+# CHECK: _OBJC_$_CATEGORY_Foo_$_Cat1
+# CHECK: _OBJC_$_CATEGORY_SOME_$_FOLDED
+# CHECK: _OBJC_$_CATEGORY_CLASS_METHODS_Foo_$_Cat2
+# CHECK: _OBJC_$_CATEGORY_Foo_$_Cat2
+# .section __DATA,__objc_data
+# CHECK: _OBJC_CLASS_$_Baz
+# CHECK: _OBJC_CLASS_$_Bar
+# CHECK: _OBJC_CLASS_$_Foo
+# CHECK: _OBJC_CLASS_$_Baz2
+
+	
+#--- a.s
+.text
+.globl _main, A, _B, C.__uniq.111111111111111111111111111111111111111.llvm.2222222222222222222
+
+_main:
+  ret
+A:
+  ret
+F:
+  add w0, w0, #3
+  bl C.__uniq.111111111111111111111111111111111111111.llvm.2222222222222222222
+  ret
+C.__uniq.111111111111111111111111111111111111111.llvm.2222222222222222222:
+  add w0, w0, #2
+  bl  A
+  ret
+D:
+  add w0, w0, #2
+  bl B
+  ret
+B:
+  add w0, w0, #1
+  bl  A
+  ret
+E:
+  add w0, w0, #2
+  bl C.__uniq.111111111111111111111111111111111111111.llvm.2222222222222222222
+  ret
+
+.section __DATA,__objc_const
+# test multiple symbols at the same address, which will be alphabetic sorted based symbol names
+_OBJC_$_CATEGORY_CLASS_METHODS_Foo_$_Cat2:
+  .quad 789
+
+_OBJC_$_CATEGORY_SOME_$_FOLDED:
+_OBJC_$_CATEGORY_Foo_$_Cat1:
+_ALPHABETIC_SORT_FIRST:
+ .quad 123
+
+_OBJC_$_CATEGORY_Foo_$_Cat2:
+ .quad 222
+
+_OBJC_$_CATEGORY_INSTANCE_METHODS_Foo_$_Cat1:
+  .quad 456
+
+.section __DATA,__objc_data
+_OBJC_CLASS_$_Foo:
+ .quad 123
+
+_OBJC_CLASS_$_Bar.llvm.1234:
+ .quad 456
+
+_OBJC_CLASS_$_Baz:
+ .quad 789
+
+_OBJC_CLASS_$_Baz2:
+ .quad 999
+
+.section __DATA,__objc_classrefs
+.quad _OBJC_CLASS_$_Foo
+.quad _OBJC_CLASS_$_Bar.llvm.1234
+.quad _OBJC_CLASS_$_Baz
+
+.subsections_via_symbols
+
+
+#--- ord-1
+# change order, parital covered
+A
+B
+C.__uniq.555555555555555555555555555555555555555.llvm.6666666666666666666
+_OBJC_CLASS_$_Baz
+_OBJC_CLASS_$_Bar.__uniq.12345
+_OBJC_CLASS_$_Foo.__uniq.123.llvm.123456789
+_OBJC_$_CATEGORY_INSTANCE_METHODS_Foo_$_Cat1
+_OBJC_$_CATEGORY_Foo_$_Cat1.llvm.1234567

``````````

</details>


https://github.com/llvm/llvm-project/pull/140670


More information about the llvm-commits mailing list