[llvm] [AMDGPU][SplitModule] Do not create empty modules (PR #135761)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 15 01:41:17 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Pierre van Houtryve (Pierre-vh)
<details>
<summary>Changes</summary>
Skip creating a module if no function is going to be imported.
Also includes a change so that if the first partition is empty (which can happen),
we import global with non-local linkage into the first non-empty partition, instead
of P0 all the time.
I thought we'd need to change users of the SplitModule callback so they can deal with less modules
than the number requested, but no. We already return only 1 module in some cases and
it seems to be handled just fine.
Fixes SWDEV-523146
---
Full diff: https://github.com/llvm/llvm-project/pull/135761.diff
6 Files Affected:
- (modified) llvm/include/llvm/Target/TargetMachine.h (+3)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp (+14-1)
- (modified) llvm/test/tools/llvm-split/AMDGPU/address-taken-externalize-with-call.ll (+4-8)
- (modified) llvm/test/tools/llvm-split/AMDGPU/large-kernels-merging-weak_odr.ll (+10-12)
- (modified) llvm/test/tools/llvm-split/AMDGPU/large-kernels-merging.ll (+10-12)
- (added) llvm/test/tools/llvm-split/AMDGPU/preserve-globals.ll (+21)
``````````diff
diff --git a/llvm/include/llvm/Target/TargetMachine.h b/llvm/include/llvm/Target/TargetMachine.h
index 5a16c9cafcd7a..566e7dba6792b 100644
--- a/llvm/include/llvm/Target/TargetMachine.h
+++ b/llvm/include/llvm/Target/TargetMachine.h
@@ -451,6 +451,9 @@ class TargetMachine {
/// Entry point for module splitting. Targets can implement custom module
/// splitting logic, mainly used by LTO for --lto-partitions.
///
+ /// On success, this guarantees that between 1 and \p NumParts modules were
+ /// created and passed to \p ModuleCallBack.
+ ///
/// \returns `true` if the module was split, `false` otherwise. When `false`
/// is returned, it is assumed that \p ModuleCallback has never been called
/// and \p M has not been modified.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp
index dd3bec774ec67..b7b9814b93427 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp
@@ -1478,6 +1478,10 @@ static void splitAMDGPUModule(
<< "' - Partition summaries will not be printed\n";
}
+ // One module will import all GlobalValues that are not Functions
+ // and are not subject to conservative import.
+ bool ImportAllGVs = true;
+
for (unsigned PID = 0; PID < NumParts; ++PID) {
SplitModuleTimer SMT2("modules_creation",
"creating modules for each partition");
@@ -1487,6 +1491,13 @@ static void splitAMDGPUModule(
for (unsigned NodeID : (*Proposal)[PID].set_bits())
FnsInPart.insert(&SG.getNode(NodeID).getFunction());
+ // Don't create empty module, except for PID 0 because
+ if (FnsInPart.empty()) {
+ LLVM_DEBUG(dbgs() << "[split] P" << PID
+ << " is empty, not creating module\n");
+ continue;
+ }
+
ValueToValueMapTy VMap;
CostType PartCost = 0;
std::unique_ptr<Module> MPart(
@@ -1501,9 +1512,11 @@ static void splitAMDGPUModule(
}
// Everything else goes in the first partition.
- return needsConservativeImport(GV) || PID == 0;
+ return needsConservativeImport(GV) || ImportAllGVs;
}));
+ ImportAllGVs = false;
+
// FIXME: Aliases aren't seen often, and their handling isn't perfect so
// bugs are possible.
diff --git a/llvm/test/tools/llvm-split/AMDGPU/address-taken-externalize-with-call.ll b/llvm/test/tools/llvm-split/AMDGPU/address-taken-externalize-with-call.ll
index 708b5a006be60..0f20a73b1928e 100644
--- a/llvm/test/tools/llvm-split/AMDGPU/address-taken-externalize-with-call.ll
+++ b/llvm/test/tools/llvm-split/AMDGPU/address-taken-externalize-with-call.ll
@@ -1,7 +1,6 @@
; RUN: llvm-split -o %t %s -j 3 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-large-threshold=0
; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 --implicit-check-not=define %s
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 --implicit-check-not=define %s
-; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 --implicit-check-not=define %s
; 3 kernels:
; - A does a direct call to HelperA
@@ -11,14 +10,11 @@
; The helper functions will get externalized, so C/A will end up
; in the same partition.
-; P0 is empty.
-; CHECK0: declare
+; CHECK0: define amdgpu_kernel void @B(ptr %dst)
-; CHECK1: define amdgpu_kernel void @B(ptr %dst)
-
-; CHECK2: define hidden void @HelperA()
-; CHECK2: define amdgpu_kernel void @A()
-; CHECK2: define amdgpu_kernel void @C()
+; CHECK1: define hidden void @HelperA()
+; CHECK1: define amdgpu_kernel void @A()
+; CHECK1: define amdgpu_kernel void @C()
define internal void @HelperA() {
ret void
diff --git a/llvm/test/tools/llvm-split/AMDGPU/large-kernels-merging-weak_odr.ll b/llvm/test/tools/llvm-split/AMDGPU/large-kernels-merging-weak_odr.ll
index 839688e7feb8b..567275686fb9f 100644
--- a/llvm/test/tools/llvm-split/AMDGPU/large-kernels-merging-weak_odr.ll
+++ b/llvm/test/tools/llvm-split/AMDGPU/large-kernels-merging-weak_odr.ll
@@ -1,7 +1,6 @@
; RUN: llvm-split -o %t %s -j 3 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-max-depth=0 -amdgpu-module-splitting-large-threshold=1.2 -amdgpu-module-splitting-merge-threshold=0.5
; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 --implicit-check-not=define %s
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 --implicit-check-not=define %s
-; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 --implicit-check-not=define %s
; RUN: llvm-split -o %t.nolarge %s -j 3 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-large-threshold=0 -amdgpu-module-splitting-max-depth=0
; RUN: llvm-dis -o - %t.nolarge0 | FileCheck --check-prefix=NOLARGEKERNELS-CHECK0 --implicit-check-not=define %s
@@ -15,19 +14,18 @@
; Also check w/o large kernels processing to verify they are indeed handled
; differently.
-; P0 is empty
-; CHECK0: declare
+; Only two partitions created for the first command.
-; CHECK1: define internal void @HelperC()
-; CHECK1: define weak_odr amdgpu_kernel void @C
+; CHECK0: define internal void @HelperC()
+; CHECK0: define weak_odr amdgpu_kernel void @C
-; CHECK2: define internal void @large2()
-; CHECK2: define internal void @large1()
-; CHECK2: define internal void @large0()
-; CHECK2: define internal void @HelperA()
-; CHECK2: define internal void @HelperB()
-; CHECK2: define amdgpu_kernel void @A
-; CHECK2: define weak_odr amdgpu_kernel void @B
+; CHECK1: define internal void @large2()
+; CHECK1: define internal void @large1()
+; CHECK1: define internal void @large0()
+; CHECK1: define internal void @HelperA()
+; CHECK1: define internal void @HelperB()
+; CHECK1: define amdgpu_kernel void @A
+; CHECK1: define weak_odr amdgpu_kernel void @B
; NOLARGEKERNELS-CHECK0: define internal void @HelperC()
; NOLARGEKERNELS-CHECK0: define weak_odr amdgpu_kernel void @C
diff --git a/llvm/test/tools/llvm-split/AMDGPU/large-kernels-merging.ll b/llvm/test/tools/llvm-split/AMDGPU/large-kernels-merging.ll
index 807fb2e5f33ce..35133d20c4e07 100644
--- a/llvm/test/tools/llvm-split/AMDGPU/large-kernels-merging.ll
+++ b/llvm/test/tools/llvm-split/AMDGPU/large-kernels-merging.ll
@@ -1,7 +1,6 @@
; RUN: llvm-split -o %t %s -j 3 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-max-depth=0 -amdgpu-module-splitting-large-threshold=1.2 -amdgpu-module-splitting-merge-threshold=0.5
; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 --implicit-check-not=define %s
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 --implicit-check-not=define %s
-; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 --implicit-check-not=define %s
; RUN: llvm-split -o %t.nolarge %s -j 3 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-large-threshold=0 -amdgpu-module-splitting-max-depth=0
; RUN: llvm-dis -o - %t.nolarge0 | FileCheck --check-prefix=NOLARGEKERNELS-CHECK0 --implicit-check-not=define %s
@@ -15,19 +14,18 @@
; Also check w/o large kernels processing to verify they are indeed handled
; differently.
-; P0 is empty
-; CHECK0: declare
+; Only 2 partitions for the first command.
-; CHECK1: define internal void @HelperC()
-; CHECK1: define amdgpu_kernel void @C
+; CHECK0: define internal void @HelperC()
+; CHECK0: define amdgpu_kernel void @C
-; CHECK2: define internal void @large2()
-; CHECK2: define internal void @large1()
-; CHECK2: define internal void @large0()
-; CHECK2: define internal void @HelperA()
-; CHECK2: define internal void @HelperB()
-; CHECK2: define amdgpu_kernel void @A
-; CHECK2: define amdgpu_kernel void @B
+; CHECK1: define internal void @large2()
+; CHECK1: define internal void @large1()
+; CHECK1: define internal void @large0()
+; CHECK1: define internal void @HelperA()
+; CHECK1: define internal void @HelperB()
+; CHECK1: define amdgpu_kernel void @A
+; CHECK1: define amdgpu_kernel void @B
; NOLARGEKERNELS-CHECK0: define internal void @HelperC()
; NOLARGEKERNELS-CHECK0: define amdgpu_kernel void @C
diff --git a/llvm/test/tools/llvm-split/AMDGPU/preserve-globals.ll b/llvm/test/tools/llvm-split/AMDGPU/preserve-globals.ll
new file mode 100644
index 0000000000000..091010edd6be3
--- /dev/null
+++ b/llvm/test/tools/llvm-split/AMDGPU/preserve-globals.ll
@@ -0,0 +1,21 @@
+; RUN: llvm-split -o %t %s -j 3 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-max-depth=0 -amdgpu-module-splitting-large-threshold=1.2 -amdgpu-module-splitting-merge-threshold=0.5
+; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 --implicit-check-not=define %s
+; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 --implicit-check-not=define %s
+
+; Only 2 out of 3 partitions are created, check the external global is preserved in the first partition.
+
+; CHECK0: @foobar = linkonce_odr global i64 52
+; CHECK0: define amdgpu_kernel void @B
+
+; CHECK1-NOT: @foobar = linkonce_odr global i64 52
+; CHECK1: define amdgpu_kernel void @A
+
+ at foobar = linkonce_odr global i64 52
+
+define amdgpu_kernel void @A() {
+ ret void
+}
+
+define amdgpu_kernel void @B() {
+ ret void
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/135761
More information about the llvm-commits
mailing list