[llvm] c02e8f7 - [llvm][transforms] Add a new algorithm to SplitModule (#95941)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 3 12:26:24 PDT 2024
Author: Ilia Sergachev
Date: 2024-07-03T21:26:21+02:00
New Revision: c02e8f762a410e55581866c43636efcd6504c1bd
URL: https://github.com/llvm/llvm-project/commit/c02e8f762a410e55581866c43636efcd6504c1bd
DIFF: https://github.com/llvm/llvm-project/commit/c02e8f762a410e55581866c43636efcd6504c1bd.diff
LOG: [llvm][transforms] Add a new algorithm to SplitModule (#95941)
The new round-robin algorithm overrides the hash-based distribution of
functions to modules. It achieves a more even number of functions per
module when the number of functions is close to the number of requested
modules. It's not in use by default and is available under a new flag.
Added:
llvm/test/tools/llvm-split/name-hash-based-distribution.ll
llvm/test/tools/llvm-split/round-robin.ll
Modified:
llvm/include/llvm/Transforms/Utils/SplitModule.h
llvm/lib/Transforms/Utils/SplitModule.cpp
llvm/tools/llvm-split/llvm-split.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/Transforms/Utils/SplitModule.h b/llvm/include/llvm/Transforms/Utils/SplitModule.h
index a5450738060a84..e7e8ee6279ace3 100644
--- a/llvm/include/llvm/Transforms/Utils/SplitModule.h
+++ b/llvm/include/llvm/Transforms/Utils/SplitModule.h
@@ -24,6 +24,9 @@ class Module;
/// Splits the module M into N linkable partitions. The function ModuleCallback
/// is called N times passing each individual partition as the MPart argument.
+/// PreserveLocals: Split without externalizing locals.
+/// RoundRobin: Use round-robin distribution of functions to modules instead
+/// of the default name-hash-based one.
///
/// FIXME: This function does not deal with the somewhat subtle symbol
/// visibility issues around module splitting, including (but not limited to):
@@ -35,7 +38,7 @@ class Module;
void SplitModule(
Module &M, unsigned N,
function_ref<void(std::unique_ptr<Module> MPart)> ModuleCallback,
- bool PreserveLocals = false);
+ bool PreserveLocals = false, bool RoundRobin = false);
} // end namespace llvm
diff --git a/llvm/lib/Transforms/Utils/SplitModule.cpp b/llvm/lib/Transforms/Utils/SplitModule.cpp
index 55db3737a1c099..a30afadf0365de 100644
--- a/llvm/lib/Transforms/Utils/SplitModule.cpp
+++ b/llvm/lib/Transforms/Utils/SplitModule.cpp
@@ -55,6 +55,18 @@ using ClusterMapType = EquivalenceClasses<const GlobalValue *>;
using ComdatMembersType = DenseMap<const Comdat *, const GlobalValue *>;
using ClusterIDMapType = DenseMap<const GlobalValue *, unsigned>;
+bool compareClusters(const std::pair<unsigned, unsigned> &A,
+ const std::pair<unsigned, unsigned> &B) {
+ if (A.second || B.second)
+ return A.second > B.second;
+ return A.first > B.first;
+}
+
+using BalancingQueueType =
+ std::priority_queue<std::pair<unsigned, unsigned>,
+ std::vector<std::pair<unsigned, unsigned>>,
+ decltype(compareClusters) *>;
+
} // end anonymous namespace
static void addNonConstUser(ClusterMapType &GVtoClusterMap,
@@ -154,18 +166,7 @@ static void findPartitions(Module &M, ClusterIDMapType &ClusterIDMap,
// Assigned all GVs to merged clusters while balancing number of objects in
// each.
- auto CompareClusters = [](const std::pair<unsigned, unsigned> &a,
- const std::pair<unsigned, unsigned> &b) {
- if (a.second || b.second)
- return a.second > b.second;
- else
- return a.first > b.first;
- };
-
- std::priority_queue<std::pair<unsigned, unsigned>,
- std::vector<std::pair<unsigned, unsigned>>,
- decltype(CompareClusters)>
- BalancingQueue(CompareClusters);
+ BalancingQueueType BalancingQueue(compareClusters);
// Pre-populate priority queue with N slot blanks.
for (unsigned i = 0; i < N; ++i)
BalancingQueue.push(std::make_pair(i, 0));
@@ -254,7 +255,7 @@ static bool isInPartition(const GlobalValue *GV, unsigned I, unsigned N) {
void llvm::SplitModule(
Module &M, unsigned N,
function_ref<void(std::unique_ptr<Module> MPart)> ModuleCallback,
- bool PreserveLocals) {
+ bool PreserveLocals, bool RoundRobin) {
if (!PreserveLocals) {
for (Function &F : M)
externalize(&F);
@@ -271,6 +272,41 @@ void llvm::SplitModule(
ClusterIDMapType ClusterIDMap;
findPartitions(M, ClusterIDMap, N);
+ // Find functions not mapped to modules in ClusterIDMap and count functions
+ // per module. Map unmapped functions using round-robin so that they skip
+ // being distributed by isInPartition() based on function name hashes below.
+ // This provides better uniformity of distribution of functions to modules
+ // in some cases - for example when the number of functions equals to N.
+ if (RoundRobin) {
+ DenseMap<unsigned, unsigned> ModuleFunctionCount;
+ SmallVector<const GlobalValue *> UnmappedFunctions;
+ for (const auto &F : M.functions()) {
+ if (F.isDeclaration() ||
+ F.getLinkage() != GlobalValue::LinkageTypes::ExternalLinkage)
+ continue;
+ auto It = ClusterIDMap.find(&F);
+ if (It == ClusterIDMap.end())
+ UnmappedFunctions.push_back(&F);
+ else
+ ++ModuleFunctionCount[It->second];
+ }
+ BalancingQueueType BalancingQueue(compareClusters);
+ for (unsigned I = 0; I < N; ++I) {
+ if (auto It = ModuleFunctionCount.find(I);
+ It != ModuleFunctionCount.end())
+ BalancingQueue.push(*It);
+ else
+ BalancingQueue.push({I, 0});
+ }
+ for (const auto *const F : UnmappedFunctions) {
+ const unsigned I = BalancingQueue.top().first;
+ const unsigned Count = BalancingQueue.top().second;
+ BalancingQueue.pop();
+ ClusterIDMap.insert({F, I});
+ BalancingQueue.push({I, Count + 1});
+ }
+ }
+
// FIXME: We should be able to reuse M as the last partition instead of
// cloning it. Note that the callers at the moment expect the module to
// be preserved, so will need some adjustments as well.
diff --git a/llvm/test/tools/llvm-split/name-hash-based-distribution.ll b/llvm/test/tools/llvm-split/name-hash-based-distribution.ll
new file mode 100644
index 00000000000000..b0e2d5eba12f96
--- /dev/null
+++ b/llvm/test/tools/llvm-split/name-hash-based-distribution.ll
@@ -0,0 +1,29 @@
+; RUN: llvm-split -o %t %s -j 2
+; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s
+; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s
+
+; CHECK0-NOT: define
+; CHECK0: define void @D
+; CHECK0-NOT: define
+
+; CHECK1-NOT: define
+; CHECK1: define void @A
+; CHECK1: define void @B
+; CHECK1: define void @C
+; CHECK1-NOT: define
+
+define void @A() {
+ ret void
+}
+
+define void @B() {
+ ret void
+}
+
+define void @C() {
+ ret void
+}
+
+define void @D() {
+ ret void
+}
diff --git a/llvm/test/tools/llvm-split/round-robin.ll b/llvm/test/tools/llvm-split/round-robin.ll
new file mode 100644
index 00000000000000..385c8968887104
--- /dev/null
+++ b/llvm/test/tools/llvm-split/round-robin.ll
@@ -0,0 +1,33 @@
+; RUN: llvm-split -o %t %s -j 2 -round-robin
+; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s
+; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s
+
+; CHECK0-NOT: define
+; CHECK0: declare extern_weak void @e
+; CHECK0: define void @A
+; CHECK0: define void @C
+; CHECK0-NOT: define
+
+; CHECK1-NOT: define
+; CHECK1: declare extern_weak void @e
+; CHECK1: define void @B
+; CHECK1: define void @D
+; CHECK1-NOT: define
+
+declare extern_weak void @e(...)
+
+define void @A() {
+ ret void
+}
+
+define void @B() {
+ ret void
+}
+
+define void @C() {
+ ret void
+}
+
+define void @D() {
+ ret void
+}
diff --git a/llvm/tools/llvm-split/llvm-split.cpp b/llvm/tools/llvm-split/llvm-split.cpp
index 39a89cb1d2e75c..c456403e6bc68d 100644
--- a/llvm/tools/llvm-split/llvm-split.cpp
+++ b/llvm/tools/llvm-split/llvm-split.cpp
@@ -53,6 +53,12 @@ static cl::opt<bool>
cl::desc("Split without externalizing locals"),
cl::cat(SplitCategory));
+static cl::opt<bool>
+ RoundRobin("round-robin", cl::Prefix, cl::init(false),
+ cl::desc("Use round-robin distribution of functions to "
+ "modules instead of the default name-hash-based one"),
+ cl::cat(SplitCategory));
+
static cl::opt<std::string>
MTriple("mtriple",
cl::desc("Target triple. When present, a TargetMachine is created "
@@ -122,6 +128,9 @@ int main(int argc, char **argv) {
errs() << "warning: -preserve-locals has no effect when using "
"TargetMachine::splitModule\n";
}
+ if (RoundRobin)
+ errs() << "warning: -round-robin has no effect when using "
+ "TargetMachine::splitModule\n";
if (TM->splitModule(*M, NumOutputs, HandleModulePart))
return 0;
@@ -131,6 +140,6 @@ int main(int argc, char **argv) {
"splitModule implementation\n";
}
- SplitModule(*M, NumOutputs, HandleModulePart, PreserveLocals);
+ SplitModule(*M, NumOutputs, HandleModulePart, PreserveLocals, RoundRobin);
return 0;
}
More information about the llvm-commits
mailing list