[PATCH] D87966: [ThinLTO] Re-order modules for optimal multi-threaded processing

Tue Sep 22 08:26:20 PDT 2020

This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
aganea marked an inline comment as done.
Closed by commit rG6537004913f3: [ThinLTO] Re-order modules for optimal multi-threaded processing (authored by aganea).

Changed prior to commit:
  https://reviews.llvm.org/D87966?vs=293455&id=293462#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D87966/new/

https://reviews.llvm.org/D87966

Files:
  llvm/include/llvm/LTO/LTO.h
  llvm/lib/LTO/LTO.cpp
  llvm/lib/LTO/ThinLTOCodeGenerator.cpp


Index: llvm/lib/LTO/ThinLTOCodeGenerator.cpp
===================================================================

--- llvm/lib/LTO/ThinLTOCodeGenerator.cpp
+++ llvm/lib/LTO/ThinLTOCodeGenerator.cpp
@@ -1054,19 +1054,11 @@
     ModuleToDefinedGVSummaries[ModuleIdentifier];
   }
 
-  // Compute the ordering we will process the inputs: the rough heuristic here
-  // is to sort them per size so that the largest module get schedule as soon as
-  // possible. This is purely a compile-time optimization.
-  std::vector<int> ModulesOrdering;
-  ModulesOrdering.resize(Modules.size());
-  std::iota(ModulesOrdering.begin(), ModulesOrdering.end(), 0);
-  llvm::sort(ModulesOrdering, [&](int LeftIndex, int RightIndex) {
-    auto LSize =
-        Modules[LeftIndex]->getSingleBitcodeModule().getBuffer().size();
-    auto RSize =
-        Modules[RightIndex]->getSingleBitcodeModule().getBuffer().size();
-    return LSize > RSize;
-  });
+  std::vector<BitcodeModule *> ModulesVec;
+  ModulesVec.reserve(Modules.size());
+  for (auto &Mod : Modules)
+    ModulesVec.push_back(&Mod->getSingleBitcodeModule());
+  std::vector<int> ModulesOrdering = lto::generateModulesOrdering(ModulesVec);
 
   // Parallel optimizer + codegen
   {
Index: llvm/lib/LTO/LTO.cpp
===================================================================
--- llvm/lib/LTO/LTO.cpp
+++ llvm/lib/LTO/LTO.cpp
@@ -1443,15 +1443,21 @@
   auto &ModuleMap =
       ThinLTO.ModulesToCompile ? *ThinLTO.ModulesToCompile : ThinLTO.ModuleMap;
 
+  std::vector<BitcodeModule *> ModulesVec;
+  ModulesVec.reserve(ModuleMap.size());
+  for (auto &Mod : ModuleMap)
+    ModulesVec.push_back(&Mod.second);
+  std::vector<int> ModulesOrdering = generateModulesOrdering(ModulesVec);
+
   // Tasks 0 through ParallelCodeGenParallelismLevel-1 are reserved for combined
   // module and parallel code generation partitions.
-  unsigned Task = RegularLTO.ParallelCodeGenParallelismLevel;
-  for (auto &Mod : ModuleMap) {
-    if (Error E = BackendProc->start(Task, Mod.second, ImportLists[Mod.first],
-                                     ExportLists[Mod.first],
-                                     ResolvedODR[Mod.first], ThinLTO.ModuleMap))
+  for (auto IndexCount : ModulesOrdering) {
+    auto &Mod = *(ModuleMap.begin() + IndexCount);
+    if (Error E = BackendProc->start(
+            RegularLTO.ParallelCodeGenParallelismLevel + IndexCount, Mod.second,
+            ImportLists[Mod.first], ExportLists[Mod.first],
+            ResolvedODR[Mod.first], ThinLTO.ModuleMap))
       return E;
-    ++Task;
   }
 
   return BackendProc->wait();
@@ -1495,3 +1501,18 @@
   StatsFile->keep();
   return std::move(StatsFile);
 }
+
+// Compute the ordering we will process the inputs: the rough heuristic here
+// is to sort them per size so that the largest module get schedule as soon as
+// possible. This is purely a compile-time optimization.
+std::vector<int> lto::generateModulesOrdering(ArrayRef<BitcodeModule *> R) {
+  std::vector<int> ModulesOrdering;
+  ModulesOrdering.resize(R.size());
+  std::iota(ModulesOrdering.begin(), ModulesOrdering.end(), 0);
+  llvm::sort(ModulesOrdering, [&](int LeftIndex, int RightIndex) {
+    auto LSize = R[LeftIndex]->getBuffer().size();
+    auto RSize = R[RightIndex]->getBuffer().size();
+    return LSize > RSize;
+  });
+  return ModulesOrdering;
+}
Index: llvm/include/llvm/LTO/LTO.h
===================================================================
--- llvm/include/llvm/LTO/LTO.h
+++ llvm/include/llvm/LTO/LTO.h
@@ -91,6 +91,10 @@
 Expected<std::unique_ptr<ToolOutputFile>>
 setupStatsFile(StringRef StatsFilename);
 
+/// Produces a container ordering for optimal multi-threaded processing. Returns
+/// ordered indices to elements in the input array.
+std::vector<int> generateModulesOrdering(ArrayRef<BitcodeModule *> R);
+
 class LTO;
 struct SymbolResolution;
 class ThinBackendProc;


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D87966.293462.patch
Type: text/x-patch
Size: 3889 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200922/8a199fef/attachment.bin>