[PATCH] D87966: [ThinLTO] Re-order modules for optimal multi-threaded processing
Alexandre Ganea via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 22 07:55:02 PDT 2020
aganea updated this revision to Diff 293455.
aganea edited the summary of this revision.
aganea added a comment.
Works as intended, thanks @tejohnson!
All tests pass. Please have another look.
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D87966/new/
https://reviews.llvm.org/D87966
Files:
llvm/include/llvm/LTO/LTO.h
llvm/lib/LTO/LTO.cpp
llvm/lib/LTO/ThinLTOCodeGenerator.cpp
Index: llvm/lib/LTO/ThinLTOCodeGenerator.cpp
===================================================================
--- llvm/lib/LTO/ThinLTOCodeGenerator.cpp
+++ llvm/lib/LTO/ThinLTOCodeGenerator.cpp
@@ -1054,19 +1054,11 @@
ModuleToDefinedGVSummaries[ModuleIdentifier];
}
- // Compute the ordering we will process the inputs: the rough heuristic here
- // is to sort them per size so that the largest module get schedule as soon as
- // possible. This is purely a compile-time optimization.
- std::vector<int> ModulesOrdering;
- ModulesOrdering.resize(Modules.size());
- std::iota(ModulesOrdering.begin(), ModulesOrdering.end(), 0);
- llvm::sort(ModulesOrdering, [&](int LeftIndex, int RightIndex) {
- auto LSize =
- Modules[LeftIndex]->getSingleBitcodeModule().getBuffer().size();
- auto RSize =
- Modules[RightIndex]->getSingleBitcodeModule().getBuffer().size();
- return LSize > RSize;
- });
+ std::vector<BitcodeModule *> ModulesVec;
+ ModulesVec.reserve(Modules.size());
+ for (auto &Mod : Modules)
+ ModulesVec.push_back(&Mod->getSingleBitcodeModule());
+ std::vector<int> ModulesOrdering = lto::generateModulesOrdering(ModulesVec);
// Parallel optimizer + codegen
{
Index: llvm/lib/LTO/LTO.cpp
===================================================================
--- llvm/lib/LTO/LTO.cpp
+++ llvm/lib/LTO/LTO.cpp
@@ -1443,15 +1443,21 @@
auto &ModuleMap =
ThinLTO.ModulesToCompile ? *ThinLTO.ModulesToCompile : ThinLTO.ModuleMap;
+ std::vector<BitcodeModule *> ModulesVec;
+ ModulesVec.reserve(ModuleMap.size());
+ for (auto &Mod : ModuleMap)
+ ModulesVec.push_back(&Mod.second);
+ std::vector<int> ModulesOrdering = generateModulesOrdering(ModulesVec);
+
// Tasks 0 through ParallelCodeGenParallelismLevel-1 are reserved for combined
// module and parallel code generation partitions.
- unsigned Task = RegularLTO.ParallelCodeGenParallelismLevel;
- for (auto &Mod : ModuleMap) {
- if (Error E = BackendProc->start(Task, Mod.second, ImportLists[Mod.first],
- ExportLists[Mod.first],
- ResolvedODR[Mod.first], ThinLTO.ModuleMap))
+ for (auto IndexCount : ModulesOrdering) {
+ auto &Mod = *(ModuleMap.begin() + IndexCount);
+ if (Error E = BackendProc->start(
+ RegularLTO.ParallelCodeGenParallelismLevel + IndexCount, Mod.second,
+ ImportLists[Mod.first], ExportLists[Mod.first],
+ ResolvedODR[Mod.first], ThinLTO.ModuleMap))
return E;
- ++Task;
}
return BackendProc->wait();
@@ -1495,3 +1501,18 @@
StatsFile->keep();
return std::move(StatsFile);
}
+
+// Compute the ordering we will process the inputs: the rough heuristic here
+// is to sort them per size so that the largest module get schedule as soon as
+// possible. This is purely a compile-time optimization.
+std::vector<int> lto::generateModulesOrdering(ArrayRef<BitcodeModule *> R) {
+ std::vector<int> ModulesOrdering;
+ ModulesOrdering.resize(R.size());
+ std::iota(ModulesOrdering.begin(), ModulesOrdering.end(), 0);
+ llvm::sort(ModulesOrdering, [&](int LeftIndex, int RightIndex) {
+ auto LSize = R[LeftIndex]->getBuffer().size();
+ auto RSize = R[RightIndex]->getBuffer().size();
+ return LSize > RSize;
+ });
+ return ModulesOrdering;
+}
Index: llvm/include/llvm/LTO/LTO.h
===================================================================
--- llvm/include/llvm/LTO/LTO.h
+++ llvm/include/llvm/LTO/LTO.h
@@ -91,6 +91,10 @@
Expected<std::unique_ptr<ToolOutputFile>>
setupStatsFile(StringRef StatsFilename);
+/// Produce a re-ordered container for optimal multi-threaded processing.
+/// Returns indices to elements into the input array.
+std::vector<int> generateModulesOrdering(ArrayRef<BitcodeModule *> R);
+
class LTO;
struct SymbolResolution;
class ThinBackendProc;
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D87966.293455.patch
Type: text/x-patch
Size: 3884 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200922/d96a2d7e/attachment.bin>
More information about the llvm-commits
mailing list