[llvm] [AMDGPU] Fix module split's assumption on kernels (PR #116280)
Siu Chi Chan via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 29 07:56:50 PST 2024
https://github.com/scchan updated https://github.com/llvm/llvm-project/pull/116280
>From 9ba9fd2da5507ae97835b2ded98ef1d3bacbd292 Mon Sep 17 00:00:00 2001
From: Siu Chi Chan <siuchi.chan at amd.com>
Date: Thu, 14 Nov 2024 01:01:44 +0000
Subject: [PATCH] [AMDGPU] Fix module split's assumption on kernels
Module split assumes that a kernel function must have an external
linkage; however, that isn't the case. For example, a static kernel
function will have a weak_odr linkage
Change-Id: I1e5dee0de1fd866b365f4090a574e1b2961f8dca
---
llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp | 11 +--
.../AMDGPU/large-kernels-merging-weak_odr.ll | 94 +++++++++++++++++++
2 files changed, 99 insertions(+), 6 deletions(-)
create mode 100644 llvm/test/tools/llvm-split/AMDGPU/large-kernels-merging-weak_odr.ll
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp
index 3332ed9c006e54..09e7c5389cdbf9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp
@@ -158,13 +158,12 @@ static auto formatRatioOf(CostType Num, CostType Dem) {
/// Non-copyable functions cannot be cloned into multiple partitions, and only
/// one copy of the function can be present across all partitions.
///
-/// External functions fall into this category. If we were to clone them, we
-/// would end up with multiple symbol definitions and a very unhappy linker.
+/// Kernel functions and external functions fall into this category. If we were
+/// to clone them, we would end up with multiple symbol definitions and a very
+/// unhappy linker.
static bool isNonCopyable(const Function &F) {
- assert(AMDGPU::isEntryFunctionCC(F.getCallingConv())
- ? F.hasExternalLinkage()
- : true && "Kernel w/o external linkage?");
- return F.hasExternalLinkage() || !F.isDefinitionExact();
+ return AMDGPU::isEntryFunctionCC(F.getCallingConv()) ||
+ F.hasExternalLinkage() || !F.isDefinitionExact();
}
/// If \p GV has local linkage, make it external + hidden.
diff --git a/llvm/test/tools/llvm-split/AMDGPU/large-kernels-merging-weak_odr.ll b/llvm/test/tools/llvm-split/AMDGPU/large-kernels-merging-weak_odr.ll
new file mode 100644
index 00000000000000..839688e7feb8bb
--- /dev/null
+++ b/llvm/test/tools/llvm-split/AMDGPU/large-kernels-merging-weak_odr.ll
@@ -0,0 +1,94 @@
+; RUN: llvm-split -o %t %s -j 3 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-max-depth=0 -amdgpu-module-splitting-large-threshold=1.2 -amdgpu-module-splitting-merge-threshold=0.5
+; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 --implicit-check-not=define %s
+; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 --implicit-check-not=define %s
+; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 --implicit-check-not=define %s
+
+; RUN: llvm-split -o %t.nolarge %s -j 3 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-large-threshold=0 -amdgpu-module-splitting-max-depth=0
+; RUN: llvm-dis -o - %t.nolarge0 | FileCheck --check-prefix=NOLARGEKERNELS-CHECK0 --implicit-check-not=define %s
+; RUN: llvm-dis -o - %t.nolarge1 | FileCheck --check-prefix=NOLARGEKERNELS-CHECK1 --implicit-check-not=define %s
+; RUN: llvm-dis -o - %t.nolarge2 | FileCheck --check-prefix=NOLARGEKERNELS-CHECK2 --implicit-check-not=define %s
+
+; 2 kernels (A/B) are large and share all their dependencies.
+; They should go in the same partition, the remaining kernel should
+; go somewhere else, and one partition should be empty.
+;
+; Also check w/o large kernels processing to verify they are indeed handled
+; differently.
+
+; P0 is empty
+; CHECK0: declare
+
+; CHECK1: define internal void @HelperC()
+; CHECK1: define weak_odr amdgpu_kernel void @C
+
+; CHECK2: define internal void @large2()
+; CHECK2: define internal void @large1()
+; CHECK2: define internal void @large0()
+; CHECK2: define internal void @HelperA()
+; CHECK2: define internal void @HelperB()
+; CHECK2: define amdgpu_kernel void @A
+; CHECK2: define weak_odr amdgpu_kernel void @B
+
+; NOLARGEKERNELS-CHECK0: define internal void @HelperC()
+; NOLARGEKERNELS-CHECK0: define weak_odr amdgpu_kernel void @C
+
+; NOLARGEKERNELS-CHECK1: define internal void @large2()
+; NOLARGEKERNELS-CHECK1: define internal void @large1()
+; NOLARGEKERNELS-CHECK1: define internal void @large0()
+; NOLARGEKERNELS-CHECK1: define internal void @HelperB()
+; NOLARGEKERNELS-CHECK1: define weak_odr amdgpu_kernel void @B
+
+; NOLARGEKERNELS-CHECK2: define internal void @large2()
+; NOLARGEKERNELS-CHECK2: define internal void @large1()
+; NOLARGEKERNELS-CHECK2: define internal void @large0()
+; NOLARGEKERNELS-CHECK2: define internal void @HelperA()
+; NOLARGEKERNELS-CHECK2: define amdgpu_kernel void @A
+
+
+define internal void @large2() {
+ store volatile i32 42, ptr null
+ call void @large2()
+ ret void
+}
+
+define internal void @large1() {
+ call void @large1()
+ call void @large2()
+ ret void
+}
+
+define internal void @large0() {
+ call void @large0()
+ call void @large1()
+ call void @large2()
+ ret void
+}
+
+define internal void @HelperA() {
+ call void @large0()
+ ret void
+}
+
+define internal void @HelperB() {
+ call void @large0()
+ ret void
+}
+
+define amdgpu_kernel void @A() {
+ call void @HelperA()
+ ret void
+}
+
+define weak_odr amdgpu_kernel void @B() {
+ call void @HelperB()
+ ret void
+}
+
+define internal void @HelperC() {
+ ret void
+}
+
+define weak_odr amdgpu_kernel void @C() {
+ call void @HelperC()
+ ret void
+}
More information about the llvm-commits
mailing list