[lld] [lld-macho] Fix category merging category map non-determinism (PR #91159)

Leandro Lupori via llvm-commits llvm-commits at lists.llvm.org
Mon May 6 09:30:33 PDT 2024


https://github.com/luporl updated https://github.com/llvm/llvm-project/pull/91159

>From 8add44db1e16bd8abf7fbf126b7efb9b50346e72 Mon Sep 17 00:00:00 2001
From: Alex Borcan <alexborcan at fb.com>
Date: Sun, 5 May 2024 19:11:28 -0700
Subject: [PATCH 1/2] [lld-macho] Fix category merging category map
 non-determinism

---
 lld/MachO/ObjC.cpp | 45 +++++++++++++++++++++++++++++++++------------
 1 file changed, 33 insertions(+), 12 deletions(-)

diff --git a/lld/MachO/ObjC.cpp b/lld/MachO/ObjC.cpp
index 4760fffebe3b30..a48baec9064f12 100644
--- a/lld/MachO/ObjC.cpp
+++ b/lld/MachO/ObjC.cpp
@@ -341,10 +341,13 @@ class ObjcCategoryMerger {
     ConcatInputSection *catListIsec;
     ConcatInputSection *catBodyIsec;
     uint32_t offCatListIsec = 0;
+    uint32_t inputIndex = 0;
 
     bool wasMerged = false;
   };
 
+  typedef std::vector<InfoInputCategory> CategoryGroup;
+
   // To write new (merged) categories or classes, we will try make limited
   // assumptions about the alignment and the sections the various class/category
   // info are stored in and . So we'll just reuse the same sections and
@@ -416,8 +419,7 @@ class ObjcCategoryMerger {
 
 private:
   void collectAndValidateCategoriesData();
-  void
-  mergeCategoriesIntoSingleCategory(std::vector<InfoInputCategory> &categories);
+  void mergeCategoriesIntoSingleCategory(CategoryGroup &categories);
 
   void eraseISec(ConcatInputSection *isec);
   void eraseMergedCategories();
@@ -476,8 +478,8 @@ class ObjcCategoryMerger {
 
   InfoCategoryWriter infoCategoryWriter;
   std::vector<ConcatInputSection *> &allInputSections;
-  // Map of base class Symbol to list of InfoInputCategory's for it
-  DenseMap<const Symbol *, std::vector<InfoInputCategory>> categoryMap;
+  // Info for all input categories, grouped by base class
+  std::vector<CategoryGroup> categoryGroups;
 
   // Normally, the binary data comes from the input files, but since we're
   // generating binary data ourselves, we use the below array to store it in.
@@ -1043,6 +1045,12 @@ void ObjcCategoryMerger::createSymbolReference(Defined *refFrom,
 }
 
 void ObjcCategoryMerger::collectAndValidateCategoriesData() {
+  // Make category merging deterministic by using a counter for found categories
+  uint32_t inputCategoryIndex = 0;
+  // Map of base class Symbol to list of InfoInputCategory's for it. We use this
+  // for fast lookup of categories to their base class. Later this info will be
+  // moved into 'categoryGroups' member.
+  DenseMap<const Symbol *, std::vector<InfoInputCategory>> categoryMap;
   for (InputSection *sec : allInputSections) {
     if (sec->getName() != section_names::objcCatList)
       continue;
@@ -1072,12 +1080,25 @@ void ObjcCategoryMerger::collectAndValidateCategoriesData() {
           tryGetSymbolAtIsecOffset(catBodyIsec, catLayout.klassOffset);
       assert(classSym && "Category does not have a valid base class");
 
-      InfoInputCategory catInputInfo{catListCisec, catBodyIsec, off};
+      InfoInputCategory catInputInfo{catListCisec, catBodyIsec, off,
+                                     inputCategoryIndex++};
       categoryMap[classSym].push_back(catInputInfo);
 
       collectCategoryWriterInfoFromCategory(catInputInfo);
     }
   }
+
+  // Move categoryMap into categoryGroups and sort by the first category's
+  // inputIndex. This way we can be sure that category merging will be
+  // deterministic across linker runs.
+  categoryGroups.reserve(categoryMap.size());
+  for (auto &mapEntry : categoryMap)
+    categoryGroups.push_back(mapEntry.second);
+
+  std::sort(categoryGroups.begin(), categoryGroups.end(),
+            [](const CategoryGroup &a, const CategoryGroup &b) {
+              return a[0].inputIndex < b[0].inputIndex;
+            });
 }
 
 // In the input we have multiple __objc_catlist InputSection, each of which may
@@ -1153,8 +1174,8 @@ void ObjcCategoryMerger::eraseMergedCategories() {
   // Map of InputSection to a set of offsets of the categories that were merged
   std::map<ConcatInputSection *, std::set<uint64_t>> catListToErasedOffsets;
 
-  for (auto &mapEntry : categoryMap) {
-    for (InfoInputCategory &catInfo : mapEntry.second) {
+  for (auto &catGroup : categoryGroups) {
+    for (InfoInputCategory &catInfo : catGroup) {
       if (catInfo.wasMerged) {
         eraseISec(catInfo.catListIsec);
         catListToErasedOffsets[catInfo.catListIsec].insert(
@@ -1169,8 +1190,8 @@ void ObjcCategoryMerger::eraseMergedCategories() {
   generateCatListForNonErasedCategories(catListToErasedOffsets);
 
   // Erase the old method lists & names of the categories that were merged
-  for (auto &mapEntry : categoryMap) {
-    for (InfoInputCategory &catInfo : mapEntry.second) {
+  for (auto &catgroup : categoryGroups) {
+    for (InfoInputCategory &catInfo : catgroup) {
       if (!catInfo.wasMerged)
         continue;
 
@@ -1193,10 +1214,10 @@ void ObjcCategoryMerger::eraseMergedCategories() {
 void ObjcCategoryMerger::doMerge() {
   collectAndValidateCategoriesData();
 
-  for (auto &entry : categoryMap)
-    if (entry.second.size() > 1)
+  for (auto &catGroup : categoryGroups)
+    if (catGroup.size() > 1)
       // Merge all categories into a new, single category
-      mergeCategoriesIntoSingleCategory(entry.second);
+      mergeCategoriesIntoSingleCategory(catGroup);
 
   // Erase all categories that were merged
   eraseMergedCategories();

>From 5c49bd0e71cc06f669c947008d570abee6c11b0b Mon Sep 17 00:00:00 2001
From: Alex Borcan <alexborcan at fb.com>
Date: Mon, 6 May 2024 09:30:01 -0700
Subject: [PATCH 2/2] Address Feedback nr.1

---
 lld/MachO/ObjC.cpp | 51 ++++++++++++++--------------------------------
 1 file changed, 15 insertions(+), 36 deletions(-)

diff --git a/lld/MachO/ObjC.cpp b/lld/MachO/ObjC.cpp
index a48baec9064f12..0f286c34398fef 100644
--- a/lld/MachO/ObjC.cpp
+++ b/lld/MachO/ObjC.cpp
@@ -341,13 +341,10 @@ class ObjcCategoryMerger {
     ConcatInputSection *catListIsec;
     ConcatInputSection *catBodyIsec;
     uint32_t offCatListIsec = 0;
-    uint32_t inputIndex = 0;
 
     bool wasMerged = false;
   };
 
-  typedef std::vector<InfoInputCategory> CategoryGroup;
-
   // To write new (merged) categories or classes, we will try make limited
   // assumptions about the alignment and the sections the various class/category
   // info are stored in and . So we'll just reuse the same sections and
@@ -419,13 +416,14 @@ class ObjcCategoryMerger {
 
 private:
   void collectAndValidateCategoriesData();
-  void mergeCategoriesIntoSingleCategory(CategoryGroup &categories);
+  void
+  mergeCategoriesIntoSingleCategory(std::vector<InfoInputCategory> &categories);
 
   void eraseISec(ConcatInputSection *isec);
   void eraseMergedCategories();
 
   void generateCatListForNonErasedCategories(
-      std::map<ConcatInputSection *, std::set<uint64_t>>
+      MapVector<ConcatInputSection *, std::set<uint64_t>>
           catListToErasedOffsets);
   void collectSectionWriteInfoFromIsec(const InputSection *isec,
                                        InfoWriteSection &catWriteInfo);
@@ -478,8 +476,8 @@ class ObjcCategoryMerger {
 
   InfoCategoryWriter infoCategoryWriter;
   std::vector<ConcatInputSection *> &allInputSections;
-  // Info for all input categories, grouped by base class
-  std::vector<CategoryGroup> categoryGroups;
+  // Map of base class Symbol to list of InfoInputCategory's for it
+  MapVector<const Symbol *, std::vector<InfoInputCategory>> categoryMap;
 
   // Normally, the binary data comes from the input files, but since we're
   // generating binary data ourselves, we use the below array to store it in.
@@ -1045,12 +1043,6 @@ void ObjcCategoryMerger::createSymbolReference(Defined *refFrom,
 }
 
 void ObjcCategoryMerger::collectAndValidateCategoriesData() {
-  // Make category merging deterministic by using a counter for found categories
-  uint32_t inputCategoryIndex = 0;
-  // Map of base class Symbol to list of InfoInputCategory's for it. We use this
-  // for fast lookup of categories to their base class. Later this info will be
-  // moved into 'categoryGroups' member.
-  DenseMap<const Symbol *, std::vector<InfoInputCategory>> categoryMap;
   for (InputSection *sec : allInputSections) {
     if (sec->getName() != section_names::objcCatList)
       continue;
@@ -1080,25 +1072,12 @@ void ObjcCategoryMerger::collectAndValidateCategoriesData() {
           tryGetSymbolAtIsecOffset(catBodyIsec, catLayout.klassOffset);
       assert(classSym && "Category does not have a valid base class");
 
-      InfoInputCategory catInputInfo{catListCisec, catBodyIsec, off,
-                                     inputCategoryIndex++};
+      InfoInputCategory catInputInfo{catListCisec, catBodyIsec, off};
       categoryMap[classSym].push_back(catInputInfo);
 
       collectCategoryWriterInfoFromCategory(catInputInfo);
     }
   }
-
-  // Move categoryMap into categoryGroups and sort by the first category's
-  // inputIndex. This way we can be sure that category merging will be
-  // deterministic across linker runs.
-  categoryGroups.reserve(categoryMap.size());
-  for (auto &mapEntry : categoryMap)
-    categoryGroups.push_back(mapEntry.second);
-
-  std::sort(categoryGroups.begin(), categoryGroups.end(),
-            [](const CategoryGroup &a, const CategoryGroup &b) {
-              return a[0].inputIndex < b[0].inputIndex;
-            });
 }
 
 // In the input we have multiple __objc_catlist InputSection, each of which may
@@ -1107,7 +1086,7 @@ void ObjcCategoryMerger::collectAndValidateCategoriesData() {
 // (not erased). For these not erased categories, we generate new __objc_catlist
 // entries since the parent __objc_catlist entry will be erased
 void ObjcCategoryMerger::generateCatListForNonErasedCategories(
-    const std::map<ConcatInputSection *, std::set<uint64_t>>
+    const MapVector<ConcatInputSection *, std::set<uint64_t>>
         catListToErasedOffsets) {
 
   // Go through all offsets of all __objc_catlist's that we process and if there
@@ -1172,10 +1151,10 @@ void ObjcCategoryMerger::eraseISec(ConcatInputSection *isec) {
 // them.
 void ObjcCategoryMerger::eraseMergedCategories() {
   // Map of InputSection to a set of offsets of the categories that were merged
-  std::map<ConcatInputSection *, std::set<uint64_t>> catListToErasedOffsets;
+  MapVector<ConcatInputSection *, std::set<uint64_t>> catListToErasedOffsets;
 
-  for (auto &catGroup : categoryGroups) {
-    for (InfoInputCategory &catInfo : catGroup) {
+  for (auto &mapEntry : categoryMap) {
+    for (InfoInputCategory &catInfo : mapEntry.second) {
       if (catInfo.wasMerged) {
         eraseISec(catInfo.catListIsec);
         catListToErasedOffsets[catInfo.catListIsec].insert(
@@ -1190,8 +1169,8 @@ void ObjcCategoryMerger::eraseMergedCategories() {
   generateCatListForNonErasedCategories(catListToErasedOffsets);
 
   // Erase the old method lists & names of the categories that were merged
-  for (auto &catgroup : categoryGroups) {
-    for (InfoInputCategory &catInfo : catgroup) {
+  for (auto &mapEntry : categoryMap) {
+    for (InfoInputCategory &catInfo : mapEntry.second) {
       if (!catInfo.wasMerged)
         continue;
 
@@ -1214,10 +1193,10 @@ void ObjcCategoryMerger::eraseMergedCategories() {
 void ObjcCategoryMerger::doMerge() {
   collectAndValidateCategoriesData();
 
-  for (auto &catGroup : categoryGroups)
-    if (catGroup.size() > 1)
+  for (auto &entry : categoryMap)
+    if (entry.second.size() > 1)
       // Merge all categories into a new, single category
-      mergeCategoriesIntoSingleCategory(catGroup);
+      mergeCategoriesIntoSingleCategory(entry.second);
 
   // Erase all categories that were merged
   eraseMergedCategories();



More information about the llvm-commits mailing list