[llvm] [IPO] Do not merge kernel functions (PR #174254)
Sebastian Neubauer via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 2 16:06:19 PST 2026
https://github.com/Flakebi created https://github.com/llvm/llvm-project/pull/174254
Kernels cannot be called, so we cannot introduce calls to them in MergeFunctions.
The test uses a `--implicit-check-not=call` to make sure that only the function with C calling convention gets merged.
Fixes #39579 and #173355
>From 753a5544b03756c8b2373ed5afc1c685dc654ffc Mon Sep 17 00:00:00 2001
From: Flakebi <flakebi at t-online.de>
Date: Sat, 3 Jan 2026 00:28:58 +0100
Subject: [PATCH] [IPO] Do not merge kernel functions
Kernels cannot be called, so we cannot introduce calls to them in
MergeFunctions.
---
llvm/lib/Transforms/IPO/MergeFunctions.cpp | 5 ++
.../MergeFunc/merge-calling-conv.ll | 57 +++++++++++++++++++
2 files changed, 62 insertions(+)
create mode 100644 llvm/test/Transforms/MergeFunc/merge-calling-conv.ll
diff --git a/llvm/lib/Transforms/IPO/MergeFunctions.cpp b/llvm/lib/Transforms/IPO/MergeFunctions.cpp
index 0faa36a495acc..664bc49764756 100644
--- a/llvm/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/llvm/lib/Transforms/IPO/MergeFunctions.cpp
@@ -697,6 +697,11 @@ static bool canCreateThunkFor(Function *F) {
if (F->isVarArg())
return false;
+ if (F->getCallingConv() == CallingConv::SPIR_KERNEL ||
+ F->getCallingConv() == CallingConv::AMDGPU_KERNEL ||
+ F->getCallingConv() == CallingConv::PTX_Kernel)
+ return false;
+
// Don't merge tiny functions using a thunk, since it can just end up
// making the function larger.
if (F->size() == 1) {
diff --git a/llvm/test/Transforms/MergeFunc/merge-calling-conv.ll b/llvm/test/Transforms/MergeFunc/merge-calling-conv.ll
new file mode 100644
index 0000000000000..e7aff026d433a
--- /dev/null
+++ b/llvm/test/Transforms/MergeFunc/merge-calling-conv.ll
@@ -0,0 +1,57 @@
+; RUN: opt -S -passes=mergefunc < %s | FileCheck --implicit-check-not=call %s
+
+; Check that no calls are generated for certain calling conventions
+
+ at debug = global i32 0
+
+ at llvm.compiler.used = appending global [4 x ptr] [ptr @normal, ptr @amdgpu_kernel, ptr @ptx_kernel, ptr @spir_kernel], section "llvm.metadata"
+
+; CHECK: call void @as_normal
+
+define internal void @normal(i32 %a) unnamed_addr {
+ %b = xor i32 %a, 0
+ store i32 %b, ptr @debug
+ ret void
+}
+
+define void @as_normal(i32 %a) unnamed_addr {
+ %b = xor i32 %a, 0
+ store i32 %b, ptr @debug
+ ret void
+}
+
+define internal amdgpu_kernel void @amdgpu_kernel(i32 %a) unnamed_addr {
+ %b = xor i32 %a, 1
+ store i32 %b, ptr @debug
+ ret void
+}
+
+define amdgpu_kernel void @as_amdgpu_kernel(i32 %a) unnamed_addr {
+ %b = xor i32 %a, 1
+ store i32 %b, ptr @debug
+ ret void
+}
+
+define internal ptx_kernel void @ptx_kernel(i32 %a) unnamed_addr {
+ %b = xor i32 %a, 2
+ store i32 %b, ptr @debug
+ ret void
+}
+
+define ptx_kernel void @as_ptx_kernel(i32 %a) unnamed_addr {
+ %b = xor i32 %a, 2
+ store i32 %b, ptr @debug
+ ret void
+}
+
+define internal spir_kernel void @spir_kernel(i32 %a) unnamed_addr {
+ %b = xor i32 %a, 3
+ store i32 %b, ptr @debug
+ ret void
+}
+
+define spir_kernel void @as_spir_kernel(i32 %a) unnamed_addr {
+ %b = xor i32 %a, 3
+ store i32 %b, ptr @debug
+ ret void
+}
More information about the llvm-commits
mailing list