[llvm] 9b5b305 - Temporarily revert "[ThinLTO] Re-order modules for optimal multi-threaded processing"

Fri Oct 9 15:19:08 PDT 2020

Author: Jordan Rupprecht
Date: 2020-10-09T14:36:20-07:00
New Revision: 9b5b3050237db3642ed7ab1bdb3ffa2202511b99

URL: https://github.com/llvm/llvm-project/commit/9b5b3050237db3642ed7ab1bdb3ffa2202511b99
DIFF: https://github.com/llvm/llvm-project/commit/9b5b3050237db3642ed7ab1bdb3ffa2202511b99.diff

LOG: Temporarily revert "[ThinLTO] Re-order modules for optimal multi-threaded processing"

This reverts commit 6537004913f3009d896bc30856698e7d22199ba7. This is causing test failures internally, and while a few of the cases turned out to be bad user code (relying on a specific order of static initialization across translation units), some cases are less clear. Temporarily reverting for now, and Teresa is going to follow up with more details.

Added: 
    

Modified: 
    llvm/include/llvm/LTO/LTO.h
    llvm/lib/LTO/LTO.cpp
    llvm/lib/LTO/ThinLTOCodeGenerator.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/LTO/LTO.h b/llvm/include/llvm/LTO/LTO.h
index a47f0cc0c3c0..93456c0ae7ae 100644

--- a/llvm/include/llvm/LTO/LTO.h
+++ b/llvm/include/llvm/LTO/LTO.h
@@ -91,10 +91,6 @@ setupLLVMOptimizationRemarks(LLVMContext &Context, StringRef RemarksFilename,
 Expected<std::unique_ptr<ToolOutputFile>>
 setupStatsFile(StringRef StatsFilename);
 
-/// Produces a container ordering for optimal multi-threaded processing. Returns
-/// ordered indices to elements in the input array.
-std::vector<int> generateModulesOrdering(ArrayRef<BitcodeModule *> R);
-
 class LTO;
 struct SymbolResolution;
 class ThinBackendProc;

diff  --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index 4fbb3ad5bb4a..6230216aa446 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -1443,21 +1443,15 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache,
   auto &ModuleMap =
       ThinLTO.ModulesToCompile ? *ThinLTO.ModulesToCompile : ThinLTO.ModuleMap;
 
-  std::vector<BitcodeModule *> ModulesVec;
-  ModulesVec.reserve(ModuleMap.size());
-  for (auto &Mod : ModuleMap)
-    ModulesVec.push_back(&Mod.second);
-  std::vector<int> ModulesOrdering = generateModulesOrdering(ModulesVec);
-
   // Tasks 0 through ParallelCodeGenParallelismLevel-1 are reserved for combined
   // module and parallel code generation partitions.
-  for (auto IndexCount : ModulesOrdering) {
-    auto &Mod = *(ModuleMap.begin() + IndexCount);
-    if (Error E = BackendProc->start(
-            RegularLTO.ParallelCodeGenParallelismLevel + IndexCount, Mod.second,
-            ImportLists[Mod.first], ExportLists[Mod.first],
-            ResolvedODR[Mod.first], ThinLTO.ModuleMap))
+  unsigned Task = RegularLTO.ParallelCodeGenParallelismLevel;
+  for (auto &Mod : ModuleMap) {
+    if (Error E = BackendProc->start(Task, Mod.second, ImportLists[Mod.first],
+                                     ExportLists[Mod.first],
+                                     ResolvedODR[Mod.first], ThinLTO.ModuleMap))
       return E;
+    ++Task;
   }
 
   return BackendProc->wait();
@@ -1501,18 +1495,3 @@ lto::setupStatsFile(StringRef StatsFilename) {
   StatsFile->keep();
   return std::move(StatsFile);
 }
-
-// Compute the ordering we will process the inputs: the rough heuristic here
-// is to sort them per size so that the largest module get schedule as soon as
-// possible. This is purely a compile-time optimization.
-std::vector<int> lto::generateModulesOrdering(ArrayRef<BitcodeModule *> R) {
-  std::vector<int> ModulesOrdering;
-  ModulesOrdering.resize(R.size());
-  std::iota(ModulesOrdering.begin(), ModulesOrdering.end(), 0);
-  llvm::sort(ModulesOrdering, [&](int LeftIndex, int RightIndex) {
-    auto LSize = R[LeftIndex]->getBuffer().size();
-    auto RSize = R[RightIndex]->getBuffer().size();
-    return LSize > RSize;
-  });
-  return ModulesOrdering;
-}

diff  --git a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
index 3f71487951c8..14dae848b362 100644
--- a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
+++ b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
@@ -1054,11 +1054,19 @@ void ThinLTOCodeGenerator::run() {
     ModuleToDefinedGVSummaries[ModuleIdentifier];
   }
 
-  std::vector<BitcodeModule *> ModulesVec;
-  ModulesVec.reserve(Modules.size());
-  for (auto &Mod : Modules)
-    ModulesVec.push_back(&Mod->getSingleBitcodeModule());
-  std::vector<int> ModulesOrdering = lto::generateModulesOrdering(ModulesVec);
+  // Compute the ordering we will process the inputs: the rough heuristic here
+  // is to sort them per size so that the largest module get schedule as soon as
+  // possible. This is purely a compile-time optimization.
+  std::vector<int> ModulesOrdering;
+  ModulesOrdering.resize(Modules.size());
+  std::iota(ModulesOrdering.begin(), ModulesOrdering.end(), 0);
+  llvm::sort(ModulesOrdering, [&](int LeftIndex, int RightIndex) {
+    auto LSize =
+        Modules[LeftIndex]->getSingleBitcodeModule().getBuffer().size();
+    auto RSize =
+        Modules[RightIndex]->getSingleBitcodeModule().getBuffer().size();
+    return LSize > RSize;
+  });
 
   // Parallel optimizer + codegen
   {