[PATCH] D87966: [ThinLTO] Re-order modules for optimal multi-threaded processing
Alexandre Ganea via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 22 08:26:20 PDT 2020
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
aganea marked an inline comment as done.
Closed by commit rG6537004913f3: [ThinLTO] Re-order modules for optimal multi-threaded processing (authored by aganea).
Changed prior to commit:
https://reviews.llvm.org/D87966?vs=293455&id=293462#toc
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D87966/new/
https://reviews.llvm.org/D87966
Files:
llvm/include/llvm/LTO/LTO.h
llvm/lib/LTO/LTO.cpp
llvm/lib/LTO/ThinLTOCodeGenerator.cpp
Index: llvm/lib/LTO/ThinLTOCodeGenerator.cpp
===================================================================
--- llvm/lib/LTO/ThinLTOCodeGenerator.cpp
+++ llvm/lib/LTO/ThinLTOCodeGenerator.cpp
@@ -1054,19 +1054,11 @@
ModuleToDefinedGVSummaries[ModuleIdentifier];
}
- // Compute the ordering we will process the inputs: the rough heuristic here
- // is to sort them per size so that the largest module get schedule as soon as
- // possible. This is purely a compile-time optimization.
- std::vector<int> ModulesOrdering;
- ModulesOrdering.resize(Modules.size());
- std::iota(ModulesOrdering.begin(), ModulesOrdering.end(), 0);
- llvm::sort(ModulesOrdering, [&](int LeftIndex, int RightIndex) {
- auto LSize =
- Modules[LeftIndex]->getSingleBitcodeModule().getBuffer().size();
- auto RSize =
- Modules[RightIndex]->getSingleBitcodeModule().getBuffer().size();
- return LSize > RSize;
- });
+ std::vector<BitcodeModule *> ModulesVec;
+ ModulesVec.reserve(Modules.size());
+ for (auto &Mod : Modules)
+ ModulesVec.push_back(&Mod->getSingleBitcodeModule());
+ std::vector<int> ModulesOrdering = lto::generateModulesOrdering(ModulesVec);
// Parallel optimizer + codegen
{
Index: llvm/lib/LTO/LTO.cpp
===================================================================
--- llvm/lib/LTO/LTO.cpp
+++ llvm/lib/LTO/LTO.cpp
@@ -1443,15 +1443,21 @@
auto &ModuleMap =
ThinLTO.ModulesToCompile ? *ThinLTO.ModulesToCompile : ThinLTO.ModuleMap;
+ std::vector<BitcodeModule *> ModulesVec;
+ ModulesVec.reserve(ModuleMap.size());
+ for (auto &Mod : ModuleMap)
+ ModulesVec.push_back(&Mod.second);
+ std::vector<int> ModulesOrdering = generateModulesOrdering(ModulesVec);
+
// Tasks 0 through ParallelCodeGenParallelismLevel-1 are reserved for combined
// module and parallel code generation partitions.
- unsigned Task = RegularLTO.ParallelCodeGenParallelismLevel;
- for (auto &Mod : ModuleMap) {
- if (Error E = BackendProc->start(Task, Mod.second, ImportLists[Mod.first],
- ExportLists[Mod.first],
- ResolvedODR[Mod.first], ThinLTO.ModuleMap))
+ for (auto IndexCount : ModulesOrdering) {
+ auto &Mod = *(ModuleMap.begin() + IndexCount);
+ if (Error E = BackendProc->start(
+ RegularLTO.ParallelCodeGenParallelismLevel + IndexCount, Mod.second,
+ ImportLists[Mod.first], ExportLists[Mod.first],
+ ResolvedODR[Mod.first], ThinLTO.ModuleMap))
return E;
- ++Task;
}
return BackendProc->wait();
@@ -1495,3 +1501,18 @@
StatsFile->keep();
return std::move(StatsFile);
}
+
+// Compute the ordering we will process the inputs: the rough heuristic here
+// is to sort them per size so that the largest module get schedule as soon as
+// possible. This is purely a compile-time optimization.
+std::vector<int> lto::generateModulesOrdering(ArrayRef<BitcodeModule *> R) {
+ std::vector<int> ModulesOrdering;
+ ModulesOrdering.resize(R.size());
+ std::iota(ModulesOrdering.begin(), ModulesOrdering.end(), 0);
+ llvm::sort(ModulesOrdering, [&](int LeftIndex, int RightIndex) {
+ auto LSize = R[LeftIndex]->getBuffer().size();
+ auto RSize = R[RightIndex]->getBuffer().size();
+ return LSize > RSize;
+ });
+ return ModulesOrdering;
+}
Index: llvm/include/llvm/LTO/LTO.h
===================================================================
--- llvm/include/llvm/LTO/LTO.h
+++ llvm/include/llvm/LTO/LTO.h
@@ -91,6 +91,10 @@
Expected<std::unique_ptr<ToolOutputFile>>
setupStatsFile(StringRef StatsFilename);
+/// Produces a container ordering for optimal multi-threaded processing. Returns
+/// ordered indices to elements in the input array.
+std::vector<int> generateModulesOrdering(ArrayRef<BitcodeModule *> R);
+
class LTO;
struct SymbolResolution;
class ThinBackendProc;
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D87966.293462.patch
Type: text/x-patch
Size: 3889 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200922/8a199fef/attachment.bin>
More information about the llvm-commits
mailing list