[llvm] 83ad90d - [AMDGPU] Fix module split's assumption on kernels
Siu Chi Chan via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 6 14:56:42 PST 2024
Author: Siu Chi Chan
Date: 2024-12-06T22:56:31Z
New Revision: 83ad90d851f9e32a51d56193125ab596cc3636b6
URL: https://github.com/llvm/llvm-project/commit/83ad90d851f9e32a51d56193125ab596cc3636b6
DIFF: https://github.com/llvm/llvm-project/commit/83ad90d851f9e32a51d56193125ab596cc3636b6.diff
LOG: [AMDGPU] Fix module split's assumption on kernels
Module split assumes that a kernel function must have an external
linkage; however, that isn't the case. For example, a static kernel
function will have a weak_odr linkage
Change-Id: I1e5dee0de1fd866b365f4090a574e1b2961f8dca
Added:
llvm/test/tools/llvm-split/AMDGPU/large-kernels-merging-weak_odr.ll
Modified:
llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp
index 3332ed9c006e54..65aee2f70104b8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp
@@ -158,13 +158,12 @@ static auto formatRatioOf(CostType Num, CostType Dem) {
/// Non-copyable functions cannot be cloned into multiple partitions, and only
/// one copy of the function can be present across all partitions.
///
-/// External functions fall into this category. If we were to clone them, we
-/// would end up with multiple symbol definitions and a very unhappy linker.
+/// Kernel functions and external functions fall into this category. If we were
+/// to clone them, we would end up with multiple symbol definitions and a very
+/// unhappy linker.
static bool isNonCopyable(const Function &F) {
- assert(AMDGPU::isEntryFunctionCC(F.getCallingConv())
- ? F.hasExternalLinkage()
- : true && "Kernel w/o external linkage?");
- return F.hasExternalLinkage() || !F.isDefinitionExact();
+ return F.hasExternalLinkage() || !F.isDefinitionExact() ||
+ AMDGPU::isEntryFunctionCC(F.getCallingConv());
}
/// If \p GV has local linkage, make it external + hidden.
diff --git a/llvm/test/tools/llvm-split/AMDGPU/large-kernels-merging-weak_odr.ll b/llvm/test/tools/llvm-split/AMDGPU/large-kernels-merging-weak_odr.ll
new file mode 100644
index 00000000000000..839688e7feb8bb
--- /dev/null
+++ b/llvm/test/tools/llvm-split/AMDGPU/large-kernels-merging-weak_odr.ll
@@ -0,0 +1,94 @@
+; RUN: llvm-split -o %t %s -j 3 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-max-depth=0 -amdgpu-module-splitting-large-threshold=1.2 -amdgpu-module-splitting-merge-threshold=0.5
+; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 --implicit-check-not=define %s
+; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 --implicit-check-not=define %s
+; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 --implicit-check-not=define %s
+
+; RUN: llvm-split -o %t.nolarge %s -j 3 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-large-threshold=0 -amdgpu-module-splitting-max-depth=0
+; RUN: llvm-dis -o - %t.nolarge0 | FileCheck --check-prefix=NOLARGEKERNELS-CHECK0 --implicit-check-not=define %s
+; RUN: llvm-dis -o - %t.nolarge1 | FileCheck --check-prefix=NOLARGEKERNELS-CHECK1 --implicit-check-not=define %s
+; RUN: llvm-dis -o - %t.nolarge2 | FileCheck --check-prefix=NOLARGEKERNELS-CHECK2 --implicit-check-not=define %s
+
+; 2 kernels (A/B) are large and share all their dependencies.
+; They should go in the same partition, the remaining kernel should
+; go somewhere else, and one partition should be empty.
+;
+; Also check w/o large kernels processing to verify they are indeed handled
+;
diff erently.
+
+; P0 is empty
+; CHECK0: declare
+
+; CHECK1: define internal void @HelperC()
+; CHECK1: define weak_odr amdgpu_kernel void @C
+
+; CHECK2: define internal void @large2()
+; CHECK2: define internal void @large1()
+; CHECK2: define internal void @large0()
+; CHECK2: define internal void @HelperA()
+; CHECK2: define internal void @HelperB()
+; CHECK2: define amdgpu_kernel void @A
+; CHECK2: define weak_odr amdgpu_kernel void @B
+
+; NOLARGEKERNELS-CHECK0: define internal void @HelperC()
+; NOLARGEKERNELS-CHECK0: define weak_odr amdgpu_kernel void @C
+
+; NOLARGEKERNELS-CHECK1: define internal void @large2()
+; NOLARGEKERNELS-CHECK1: define internal void @large1()
+; NOLARGEKERNELS-CHECK1: define internal void @large0()
+; NOLARGEKERNELS-CHECK1: define internal void @HelperB()
+; NOLARGEKERNELS-CHECK1: define weak_odr amdgpu_kernel void @B
+
+; NOLARGEKERNELS-CHECK2: define internal void @large2()
+; NOLARGEKERNELS-CHECK2: define internal void @large1()
+; NOLARGEKERNELS-CHECK2: define internal void @large0()
+; NOLARGEKERNELS-CHECK2: define internal void @HelperA()
+; NOLARGEKERNELS-CHECK2: define amdgpu_kernel void @A
+
+
+define internal void @large2() {
+ store volatile i32 42, ptr null
+ call void @large2()
+ ret void
+}
+
+define internal void @large1() {
+ call void @large1()
+ call void @large2()
+ ret void
+}
+
+define internal void @large0() {
+ call void @large0()
+ call void @large1()
+ call void @large2()
+ ret void
+}
+
+define internal void @HelperA() {
+ call void @large0()
+ ret void
+}
+
+define internal void @HelperB() {
+ call void @large0()
+ ret void
+}
+
+define amdgpu_kernel void @A() {
+ call void @HelperA()
+ ret void
+}
+
+define weak_odr amdgpu_kernel void @B() {
+ call void @HelperB()
+ ret void
+}
+
+define internal void @HelperC() {
+ ret void
+}
+
+define weak_odr amdgpu_kernel void @C() {
+ call void @HelperC()
+ ret void
+}
More information about the llvm-commits
mailing list