[lld] [lld-macho] Fix category merging category map non-determinism (PR #91159)

via llvm-commits llvm-commits at lists.llvm.org
Mon May 6 05:33:30 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-lld

Author: None (alx32)

<details>
<summary>Changes</summary>

Currently in `ObjcCategoryMerger::doMerge` we create merged categories via `for (auto &entry : categoryMap) [...]`. The issue is that `categoryMap` is a `DenseMap` of pointers to symbols. Since pointers in memory change from run to run, the iteration through the map will also be arbitrary. This will cause categories to be emitted in random order. This violates linker determinism. 

To fix this, we instead sort categories by the same order that they showed up in the `inputSections`. This way determinism is preserved. 

---
Full diff: https://github.com/llvm/llvm-project/pull/91159.diff


1 Files Affected:

- (modified) lld/MachO/ObjC.cpp (+33-12) 


``````````diff
diff --git a/lld/MachO/ObjC.cpp b/lld/MachO/ObjC.cpp
index 4760fffebe3b30..a48baec9064f12 100644
--- a/lld/MachO/ObjC.cpp
+++ b/lld/MachO/ObjC.cpp
@@ -341,10 +341,13 @@ class ObjcCategoryMerger {
     ConcatInputSection *catListIsec;
     ConcatInputSection *catBodyIsec;
     uint32_t offCatListIsec = 0;
+    uint32_t inputIndex = 0;
 
     bool wasMerged = false;
   };
 
+  typedef std::vector<InfoInputCategory> CategoryGroup;
+
   // To write new (merged) categories or classes, we will try make limited
   // assumptions about the alignment and the sections the various class/category
   // info are stored in and . So we'll just reuse the same sections and
@@ -416,8 +419,7 @@ class ObjcCategoryMerger {
 
 private:
   void collectAndValidateCategoriesData();
-  void
-  mergeCategoriesIntoSingleCategory(std::vector<InfoInputCategory> &categories);
+  void mergeCategoriesIntoSingleCategory(CategoryGroup &categories);
 
   void eraseISec(ConcatInputSection *isec);
   void eraseMergedCategories();
@@ -476,8 +478,8 @@ class ObjcCategoryMerger {
 
   InfoCategoryWriter infoCategoryWriter;
   std::vector<ConcatInputSection *> &allInputSections;
-  // Map of base class Symbol to list of InfoInputCategory's for it
-  DenseMap<const Symbol *, std::vector<InfoInputCategory>> categoryMap;
+  // Info for all input categories, grouped by base class
+  std::vector<CategoryGroup> categoryGroups;
 
   // Normally, the binary data comes from the input files, but since we're
   // generating binary data ourselves, we use the below array to store it in.
@@ -1043,6 +1045,12 @@ void ObjcCategoryMerger::createSymbolReference(Defined *refFrom,
 }
 
 void ObjcCategoryMerger::collectAndValidateCategoriesData() {
+  // Make category merging deterministic by using a counter for found categories
+  uint32_t inputCategoryIndex = 0;
+  // Map of base class Symbol to list of InfoInputCategory's for it. We use this
+  // for fast lookup of categories to their base class. Later this info will be
+  // moved into 'categoryGroups' member.
+  DenseMap<const Symbol *, std::vector<InfoInputCategory>> categoryMap;
   for (InputSection *sec : allInputSections) {
     if (sec->getName() != section_names::objcCatList)
       continue;
@@ -1072,12 +1080,25 @@ void ObjcCategoryMerger::collectAndValidateCategoriesData() {
           tryGetSymbolAtIsecOffset(catBodyIsec, catLayout.klassOffset);
       assert(classSym && "Category does not have a valid base class");
 
-      InfoInputCategory catInputInfo{catListCisec, catBodyIsec, off};
+      InfoInputCategory catInputInfo{catListCisec, catBodyIsec, off,
+                                     inputCategoryIndex++};
       categoryMap[classSym].push_back(catInputInfo);
 
       collectCategoryWriterInfoFromCategory(catInputInfo);
     }
   }
+
+  // Move categoryMap into categoryGroups and sort by the first category's
+  // inputIndex. This way we can be sure that category merging will be
+  // deterministic across linker runs.
+  categoryGroups.reserve(categoryMap.size());
+  for (auto &mapEntry : categoryMap)
+    categoryGroups.push_back(mapEntry.second);
+
+  std::sort(categoryGroups.begin(), categoryGroups.end(),
+            [](const CategoryGroup &a, const CategoryGroup &b) {
+              return a[0].inputIndex < b[0].inputIndex;
+            });
 }
 
 // In the input we have multiple __objc_catlist InputSection, each of which may
@@ -1153,8 +1174,8 @@ void ObjcCategoryMerger::eraseMergedCategories() {
   // Map of InputSection to a set of offsets of the categories that were merged
   std::map<ConcatInputSection *, std::set<uint64_t>> catListToErasedOffsets;
 
-  for (auto &mapEntry : categoryMap) {
-    for (InfoInputCategory &catInfo : mapEntry.second) {
+  for (auto &catGroup : categoryGroups) {
+    for (InfoInputCategory &catInfo : catGroup) {
       if (catInfo.wasMerged) {
         eraseISec(catInfo.catListIsec);
         catListToErasedOffsets[catInfo.catListIsec].insert(
@@ -1169,8 +1190,8 @@ void ObjcCategoryMerger::eraseMergedCategories() {
   generateCatListForNonErasedCategories(catListToErasedOffsets);
 
   // Erase the old method lists & names of the categories that were merged
-  for (auto &mapEntry : categoryMap) {
-    for (InfoInputCategory &catInfo : mapEntry.second) {
+  for (auto &catgroup : categoryGroups) {
+    for (InfoInputCategory &catInfo : catgroup) {
       if (!catInfo.wasMerged)
         continue;
 
@@ -1193,10 +1214,10 @@ void ObjcCategoryMerger::eraseMergedCategories() {
 void ObjcCategoryMerger::doMerge() {
   collectAndValidateCategoriesData();
 
-  for (auto &entry : categoryMap)
-    if (entry.second.size() > 1)
+  for (auto &catGroup : categoryGroups)
+    if (catGroup.size() > 1)
       // Merge all categories into a new, single category
-      mergeCategoriesIntoSingleCategory(entry.second);
+      mergeCategoriesIntoSingleCategory(catGroup);
 
   // Erase all categories that were merged
   eraseMergedCategories();

``````````

</details>


https://github.com/llvm/llvm-project/pull/91159


More information about the llvm-commits mailing list