[lld] [lld-macho] Fix category merging category map non-determinism (PR #91159)

via llvm-commits llvm-commits at lists.llvm.org
Sun May 5 19:31:56 PDT 2024


https://github.com/alx32 created https://github.com/llvm/llvm-project/pull/91159

Currently in `ObjcCategoryMerger::doMerge` we create merged categories via `for (auto &entry : categoryMap) [...]`. The issue is that `categoryMap` is a `DenseMap` of pointers to symbols. Since pointers in memory change from run to run, the iteration through the map will also be arbitrary. This will cause categories to be emitted in random order. This violates linker determinism. 

To fix this, we instead sort categories by the same order that they showed up in the `inputSections`. This way determinism is preserved. 

>From 2ac1d784b57719a525ade4a29c8042f6ba974773 Mon Sep 17 00:00:00 2001
From: Alex Borcan <alexborcan at fb.com>
Date: Sun, 5 May 2024 19:11:28 -0700
Subject: [PATCH] [lld-macho] Fix category merging category map non-determinism

---
 lld/MachO/ObjC.cpp | 48 ++++++++++++++++++++++++++++++++++------------
 1 file changed, 36 insertions(+), 12 deletions(-)

diff --git a/lld/MachO/ObjC.cpp b/lld/MachO/ObjC.cpp
index 4760fffebe3b30..8acda0104cbf42 100644
--- a/lld/MachO/ObjC.cpp
+++ b/lld/MachO/ObjC.cpp
@@ -341,10 +341,13 @@ class ObjcCategoryMerger {
     ConcatInputSection *catListIsec;
     ConcatInputSection *catBodyIsec;
     uint32_t offCatListIsec = 0;
+    uint32_t inputIndex = 0;
 
     bool wasMerged = false;
   };
 
+  typedef std::vector<InfoInputCategory> CategoryGroup;
+
   // To write new (merged) categories or classes, we will try make limited
   // assumptions about the alignment and the sections the various class/category
   // info are stored in and . So we'll just reuse the same sections and
@@ -416,8 +419,7 @@ class ObjcCategoryMerger {
 
 private:
   void collectAndValidateCategoriesData();
-  void
-  mergeCategoriesIntoSingleCategory(std::vector<InfoInputCategory> &categories);
+  void mergeCategoriesIntoSingleCategory(CategoryGroup &categories);
 
   void eraseISec(ConcatInputSection *isec);
   void eraseMergedCategories();
@@ -476,8 +478,9 @@ class ObjcCategoryMerger {
 
   InfoCategoryWriter infoCategoryWriter;
   std::vector<ConcatInputSection *> &allInputSections;
-  // Map of base class Symbol to list of InfoInputCategory's for it
-  DenseMap<const Symbol *, std::vector<InfoInputCategory>> categoryMap;
+  // We'll use this to keep track of which category groups (categories with the
+  // same base class)
+  std::vector<CategoryGroup> categoryGroups;
 
   // Normally, the binary data comes from the input files, but since we're
   // generating binary data ourselves, we use the below array to store it in.
@@ -1043,6 +1046,14 @@ void ObjcCategoryMerger::createSymbolReference(Defined *refFrom,
 }
 
 void ObjcCategoryMerger::collectAndValidateCategoriesData() {
+
+  // A counter for the cateogries found in the input, used to make category
+  // merging deterministic.
+  uint32_t inputCategoryIndex = 0;
+  // Map of base class Symbol to list of InfoInputCategory's for it. We use this
+  // for fast lookup of categories to their base class. Later this info will be
+  // moved into categoryGroups.
+  DenseMap<const Symbol *, std::vector<InfoInputCategory>> categoryMap;
   for (InputSection *sec : allInputSections) {
     if (sec->getName() != section_names::objcCatList)
       continue;
@@ -1072,12 +1083,25 @@ void ObjcCategoryMerger::collectAndValidateCategoriesData() {
           tryGetSymbolAtIsecOffset(catBodyIsec, catLayout.klassOffset);
       assert(classSym && "Category does not have a valid base class");
 
-      InfoInputCategory catInputInfo{catListCisec, catBodyIsec, off};
+      InfoInputCategory catInputInfo{catListCisec, catBodyIsec, off,
+                                     inputCategoryIndex++};
       categoryMap[classSym].push_back(catInputInfo);
 
       collectCategoryWriterInfoFromCategory(catInputInfo);
     }
   }
+
+  // Move categoryMap into  categoryGroups and sort by the first category's
+  // inputIndex. This way we can be sure that category merging will be
+  // deterministic across linker runs.
+  categoryGroups.reserve(categoryMap.size());
+  for (auto &mapEntry : categoryMap)
+    categoryGroups.push_back(mapEntry.second);
+
+  std::sort(categoryGroups.begin(), categoryGroups.end(),
+            [](const CategoryGroup &a, const CategoryGroup &b) {
+              return a[0].inputIndex < b[0].inputIndex;
+            });
 }
 
 // In the input we have multiple __objc_catlist InputSection, each of which may
@@ -1153,8 +1177,8 @@ void ObjcCategoryMerger::eraseMergedCategories() {
   // Map of InputSection to a set of offsets of the categories that were merged
   std::map<ConcatInputSection *, std::set<uint64_t>> catListToErasedOffsets;
 
-  for (auto &mapEntry : categoryMap) {
-    for (InfoInputCategory &catInfo : mapEntry.second) {
+  for (auto &catGroup : categoryGroups) {
+    for (InfoInputCategory &catInfo : catGroup) {
       if (catInfo.wasMerged) {
         eraseISec(catInfo.catListIsec);
         catListToErasedOffsets[catInfo.catListIsec].insert(
@@ -1169,8 +1193,8 @@ void ObjcCategoryMerger::eraseMergedCategories() {
   generateCatListForNonErasedCategories(catListToErasedOffsets);
 
   // Erase the old method lists & names of the categories that were merged
-  for (auto &mapEntry : categoryMap) {
-    for (InfoInputCategory &catInfo : mapEntry.second) {
+  for (auto &catgroup : categoryGroups) {
+    for (InfoInputCategory &catInfo : catgroup) {
       if (!catInfo.wasMerged)
         continue;
 
@@ -1193,10 +1217,10 @@ void ObjcCategoryMerger::eraseMergedCategories() {
 void ObjcCategoryMerger::doMerge() {
   collectAndValidateCategoriesData();
 
-  for (auto &entry : categoryMap)
-    if (entry.second.size() > 1)
+  for (auto &catGroup : categoryGroups)
+    if (catGroup.size() > 1)
       // Merge all categories into a new, single category
-      mergeCategoriesIntoSingleCategory(entry.second);
+      mergeCategoriesIntoSingleCategory(catGroup);
 
   // Erase all categories that were merged
   eraseMergedCategories();



More information about the llvm-commits mailing list