[clang] [OpenCL][NVPTX] Don't set calling convention for OpenCL kernel (PR #170170)
Hongyu Chen via cfe-commits
cfe-commits at lists.llvm.org
Tue Dec 2 10:48:44 PST 2025
https://github.com/XChy updated https://github.com/llvm/llvm-project/pull/170170
>From def58994c7e783e50260be3eba888f100956797d Mon Sep 17 00:00:00 2001
From: XChy <xxs_chy at outlook.com>
Date: Tue, 2 Dec 2025 00:42:10 +0800
Subject: [PATCH 1/8] precommit tests
---
clang/test/CodeGenOpenCL/ptx-calls.cl | 19 +++++++++++++++----
1 file changed, 15 insertions(+), 4 deletions(-)
diff --git a/clang/test/CodeGenOpenCL/ptx-calls.cl b/clang/test/CodeGenOpenCL/ptx-calls.cl
index ae187173b1730..0aa7024aa44bf 100644
--- a/clang/test/CodeGenOpenCL/ptx-calls.cl
+++ b/clang/test/CodeGenOpenCL/ptx-calls.cl
@@ -1,11 +1,22 @@
-// RUN: %clang_cc1 %s -triple nvptx-unknown-unknown -emit-llvm -O0 -o - | FileCheck %s
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
+// RUN: %clang_cc1 %s -triple nvptx-unknown-unknown -emit-llvm -O1 -o - | FileCheck %s
+// CHECK-LABEL: define dso_local void @device_function(
+// CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret void
+//
void device_function() {
}
-// CHECK-LABEL: define{{.*}} void @device_function()
+// CHECK-LABEL: define dso_local ptx_kernel void @kernel_function(
+// CHECK-SAME: ) local_unnamed_addr #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META7:![0-9]+]] !kernel_arg_access_qual [[META7]] !kernel_arg_type [[META7]] !kernel_arg_base_type [[META7]] !kernel_arg_type_qual [[META7]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: unreachable
+//
__kernel void kernel_function() {
device_function();
}
-// CHECK-LABEL: define{{.*}} ptx_kernel void @kernel_function()
-// CHECK: call void @device_function()
+//.
+// CHECK: [[META7]] = !{}
+//.
>From 284f9f7dd2c2275566d7de4e9c51d67cb9a66911 Mon Sep 17 00:00:00 2001
From: XChy <xxs_chy at outlook.com>
Date: Tue, 2 Dec 2025 00:43:44 +0800
Subject: [PATCH 2/8] [OpenCL][NVPTX] Don't set calling convention for OpenCL
kernel
---
clang/lib/CodeGen/Targets/NVPTX.cpp | 4 +++-
clang/test/CodeGenOpenCL/ptx-calls.cl | 2 +-
2 files changed, 4 insertions(+), 2 deletions(-)
diff --git a/clang/lib/CodeGen/Targets/NVPTX.cpp b/clang/lib/CodeGen/Targets/NVPTX.cpp
index f6715861d91bc..5afef658c840b 100644
--- a/clang/lib/CodeGen/Targets/NVPTX.cpp
+++ b/clang/lib/CodeGen/Targets/NVPTX.cpp
@@ -277,7 +277,9 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
}
}
// Attach kernel metadata directly if compiling for NVPTX.
- if (FD->hasAttr<DeviceKernelAttr>())
+ // NOTE: Don't set kernel calling convention for handled OpenCL kernel,
+ // otherwise the stub version of kernel would be incorrect.
+ if (FD->hasAttr<DeviceKernelAttr>() && !M.getLangOpts().OpenCL)
F->setCallingConv(getDeviceKernelCallingConv());
}
diff --git a/clang/test/CodeGenOpenCL/ptx-calls.cl b/clang/test/CodeGenOpenCL/ptx-calls.cl
index 0aa7024aa44bf..d5e27fce426a7 100644
--- a/clang/test/CodeGenOpenCL/ptx-calls.cl
+++ b/clang/test/CodeGenOpenCL/ptx-calls.cl
@@ -12,7 +12,7 @@ void device_function() {
// CHECK-LABEL: define dso_local ptx_kernel void @kernel_function(
// CHECK-SAME: ) local_unnamed_addr #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META7:![0-9]+]] !kernel_arg_access_qual [[META7]] !kernel_arg_type [[META7]] !kernel_arg_base_type [[META7]] !kernel_arg_type_qual [[META7]] {
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: unreachable
+// CHECK-NEXT: ret void
//
__kernel void kernel_function() {
device_function();
>From 65787d0993ac4ba1bbdc56fffa961fd7764848ce Mon Sep 17 00:00:00 2001
From: XChy <xxs_chy at outlook.com>
Date: Tue, 2 Dec 2025 01:15:36 +0800
Subject: [PATCH 3/8] update test
---
clang/test/CodeGenOpenCL/reflect.cl | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/test/CodeGenOpenCL/reflect.cl b/clang/test/CodeGenOpenCL/reflect.cl
index 4abb40aa3ed50..a69e338641167 100644
--- a/clang/test/CodeGenOpenCL/reflect.cl
+++ b/clang/test/CodeGenOpenCL/reflect.cl
@@ -26,7 +26,7 @@ __kernel void kernel_function(__global int *i) {
// CHECK-NEXT: ret void
//
//
-// CHECK-LABEL: define dso_local ptx_kernel void @__clang_ocl_kern_imp_kernel_function(
+// CHECK-LABEL: define dso_local void @__clang_ocl_kern_imp_kernel_function(
// CHECK-SAME: ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR0]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META5]] !kernel_arg_base_type [[META5]] !kernel_arg_type_qual [[META6]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[I_ADDR:%.*]] = alloca ptr addrspace(1), align 4
>From e39968bfc20520a91220023ff32217f5d6619b73 Mon Sep 17 00:00:00 2001
From: XChy <xxs_chy at outlook.com>
Date: Tue, 2 Dec 2025 11:33:05 +0800
Subject: [PATCH 4/8] use mangled name
---
clang/lib/CodeGen/Targets/AMDGPU.cpp | 3 ++-
clang/lib/CodeGen/Targets/NVPTX.cpp | 7 +++---
clang/test/CodeGenOpenCL/ptx-calls.cl | 31 +++++++++++++++++----------
3 files changed, 26 insertions(+), 15 deletions(-)
diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp
index e4ad078dab197..f1a9f7dc94aa9 100644
--- a/clang/lib/CodeGen/Targets/AMDGPU.cpp
+++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp
@@ -441,7 +441,8 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes(
const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
if (FD) {
setFunctionDeclAttributes(FD, F, M);
- if (FD->hasAttr<DeviceKernelAttr>() && !M.getLangOpts().OpenCL)
+ if (FD->hasAttr<DeviceKernelAttr>() &&
+ !GV->getName().starts_with("__clang_ocl_kern_imp_"))
F->setCallingConv(getDeviceKernelCallingConv());
}
if (!getABIInfo().getCodeGenOpts().EmitIEEENaNCompliantInsts)
diff --git a/clang/lib/CodeGen/Targets/NVPTX.cpp b/clang/lib/CodeGen/Targets/NVPTX.cpp
index 5afef658c840b..f7b885dbf7b16 100644
--- a/clang/lib/CodeGen/Targets/NVPTX.cpp
+++ b/clang/lib/CodeGen/Targets/NVPTX.cpp
@@ -12,6 +12,7 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/IntrinsicsNVPTX.h"
+#include "llvm/Support/Signals.h"
using namespace clang;
using namespace clang::CodeGen;
@@ -277,9 +278,9 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
}
}
// Attach kernel metadata directly if compiling for NVPTX.
- // NOTE: Don't set kernel calling convention for handled OpenCL kernel,
- // otherwise the stub version of kernel would be incorrect.
- if (FD->hasAttr<DeviceKernelAttr>() && !M.getLangOpts().OpenCL)
+ // NOTE: Don't set kernel calling convention for OpenCL kernel stub.
+ if (FD->hasAttr<DeviceKernelAttr>() &&
+ !GV->getName().starts_with("__clang_ocl_kern_imp_"))
F->setCallingConv(getDeviceKernelCallingConv());
}
diff --git a/clang/test/CodeGenOpenCL/ptx-calls.cl b/clang/test/CodeGenOpenCL/ptx-calls.cl
index d5e27fce426a7..17c25ee78ef45 100644
--- a/clang/test/CodeGenOpenCL/ptx-calls.cl
+++ b/clang/test/CodeGenOpenCL/ptx-calls.cl
@@ -1,22 +1,31 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
-// RUN: %clang_cc1 %s -triple nvptx-unknown-unknown -emit-llvm -O1 -o - | FileCheck %s
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --version 6
+// RUN: %clang_cc1 %s -triple nvptx-unknown-unknown -emit-llvm -O0 -o - | FileCheck %s
+void device_function() {
+}
+
+__kernel void kernel_function() {
+ device_function();
+}
// CHECK-LABEL: define dso_local void @device_function(
-// CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: ret void
//
-void device_function() {
-}
-
+//
// CHECK-LABEL: define dso_local ptx_kernel void @kernel_function(
-// CHECK-SAME: ) local_unnamed_addr #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META7:![0-9]+]] !kernel_arg_access_qual [[META7]] !kernel_arg_type [[META7]] !kernel_arg_base_type [[META7]] !kernel_arg_type_qual [[META7]] {
+// CHECK-SAME: ) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META3:![0-9]+]] !kernel_arg_access_qual [[META3]] !kernel_arg_type [[META3]] !kernel_arg_base_type [[META3]] !kernel_arg_type_qual [[META3]] {
// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: call void @__clang_ocl_kern_imp_kernel_function() #[[ATTR2:[0-9]+]]
+// CHECK-NEXT: ret void
+//
+//
+// CHECK-LABEL: define dso_local void @__clang_ocl_kern_imp_kernel_function(
+// CHECK-SAME: ) #[[ATTR0]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META3]] !kernel_arg_type [[META3]] !kernel_arg_base_type [[META3]] !kernel_arg_type_qual [[META3]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: call void @device_function() #[[ATTR2]]
// CHECK-NEXT: ret void
//
-__kernel void kernel_function() {
- device_function();
-}
//.
-// CHECK: [[META7]] = !{}
+// CHECK: [[META3]] = !{}
//.
>From 15d3fb485395b8d6c564e182d98e9de5a45b24eb Mon Sep 17 00:00:00 2001
From: XChy <xxs_chy at outlook.com>
Date: Tue, 2 Dec 2025 11:38:49 +0800
Subject: [PATCH 5/8] remove debug
---
clang/lib/CodeGen/Targets/NVPTX.cpp | 1 -
1 file changed, 1 deletion(-)
diff --git a/clang/lib/CodeGen/Targets/NVPTX.cpp b/clang/lib/CodeGen/Targets/NVPTX.cpp
index f7b885dbf7b16..129026bb5fa04 100644
--- a/clang/lib/CodeGen/Targets/NVPTX.cpp
+++ b/clang/lib/CodeGen/Targets/NVPTX.cpp
@@ -12,7 +12,6 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/IntrinsicsNVPTX.h"
-#include "llvm/Support/Signals.h"
using namespace clang;
using namespace clang::CodeGen;
>From 725e8d97923431e32e49ef1590ffe2fe90fdc0a3 Mon Sep 17 00:00:00 2001
From: XChy <xxs_chy at outlook.com>
Date: Tue, 2 Dec 2025 21:30:11 +0800
Subject: [PATCH 6/8] don't set CC in setTargetAttributes
---
clang/lib/CodeGen/Targets/AMDGPU.cpp | 6 +-----
clang/lib/CodeGen/Targets/NVPTX.cpp | 5 -----
2 files changed, 1 insertion(+), 10 deletions(-)
diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp
index f1a9f7dc94aa9..0ab6c753b8bad 100644
--- a/clang/lib/CodeGen/Targets/AMDGPU.cpp
+++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp
@@ -439,12 +439,8 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes(
return;
const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
- if (FD) {
+ if (FD)
setFunctionDeclAttributes(FD, F, M);
- if (FD->hasAttr<DeviceKernelAttr>() &&
- !GV->getName().starts_with("__clang_ocl_kern_imp_"))
- F->setCallingConv(getDeviceKernelCallingConv());
- }
if (!getABIInfo().getCodeGenOpts().EmitIEEENaNCompliantInsts)
F->addFnAttr("amdgpu-ieee", "false");
}
diff --git a/clang/lib/CodeGen/Targets/NVPTX.cpp b/clang/lib/CodeGen/Targets/NVPTX.cpp
index 129026bb5fa04..ba2acd821c704 100644
--- a/clang/lib/CodeGen/Targets/NVPTX.cpp
+++ b/clang/lib/CodeGen/Targets/NVPTX.cpp
@@ -276,11 +276,6 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
M.handleCUDALaunchBoundsAttr(F, Attr);
}
}
- // Attach kernel metadata directly if compiling for NVPTX.
- // NOTE: Don't set kernel calling convention for OpenCL kernel stub.
- if (FD->hasAttr<DeviceKernelAttr>() &&
- !GV->getName().starts_with("__clang_ocl_kern_imp_"))
- F->setCallingConv(getDeviceKernelCallingConv());
}
void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV,
>From 5aa093394392a64396fc0235596a9aa6f10186aa Mon Sep 17 00:00:00 2001
From: XChy <xxs_chy at outlook.com>
Date: Wed, 3 Dec 2025 02:44:08 +0800
Subject: [PATCH 7/8] handle [[clang::xxx_kernel]]
---
clang/lib/CodeGen/Targets/SPIR.cpp | 21 ---------------------
clang/lib/Sema/SemaType.cpp | 5 ++++-
2 files changed, 4 insertions(+), 22 deletions(-)
diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp b/clang/lib/CodeGen/Targets/SPIR.cpp
index 1a8c85d8871ec..ccc35a22d9938 100644
--- a/clang/lib/CodeGen/Targets/SPIR.cpp
+++ b/clang/lib/CodeGen/Targets/SPIR.cpp
@@ -77,8 +77,6 @@ class CommonSPIRTargetCodeGenInfo : public TargetCodeGenInfo {
llvm::Constant *getNullPointer(const CodeGen::CodeGenModule &CGM,
llvm::PointerType *T,
QualType QT) const override;
- void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
- CodeGen::CodeGenModule &M) const override;
};
class SPIRVTargetCodeGenInfo : public CommonSPIRTargetCodeGenInfo {
public:
@@ -292,22 +290,6 @@ CommonSPIRTargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule &CGM,
llvm::ConstantPointerNull::get(NPT), PT);
}
-void CommonSPIRTargetCodeGenInfo::setTargetAttributes(
- const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
- if (M.getLangOpts().OpenCL || GV->isDeclaration())
- return;
-
- const FunctionDecl *FD = dyn_cast<FunctionDecl>(D);
- if (!FD)
- return;
-
- llvm::Function *F = dyn_cast<llvm::Function>(GV);
- assert(F && "Expected GlobalValue to be a Function");
-
- if (FD->hasAttr<DeviceKernelAttr>())
- F->setCallingConv(getDeviceKernelCallingConv());
-}
-
LangAS
SPIRVTargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM,
const VarDecl *D) const {
@@ -342,9 +324,6 @@ void SPIRVTargetCodeGenInfo::setTargetAttributes(
llvm::Function *F = dyn_cast<llvm::Function>(GV);
assert(F && "Expected GlobalValue to be a Function");
- if (FD->hasAttr<DeviceKernelAttr>())
- F->setCallingConv(getDeviceKernelCallingConv());
-
if (!M.getLangOpts().HIP ||
M.getTarget().getTriple().getVendor() != llvm::Triple::AMD)
return;
diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp
index eb8b1352d1be1..de87395b27405 100644
--- a/clang/lib/Sema/SemaType.cpp
+++ b/clang/lib/Sema/SemaType.cpp
@@ -3796,8 +3796,10 @@ static CallingConv getCCForDeclaratorChunk(
}
}
}
+
for (const ParsedAttr &AL : llvm::concat<ParsedAttr>(
- D.getDeclSpec().getAttributes(), D.getAttributes())) {
+ D.getDeclSpec().getAttributes(), D.getAttributes(),
+ D.getDeclarationAttributes())) {
if (AL.getKind() == ParsedAttr::AT_DeviceKernel) {
CC = CC_DeviceKernel;
break;
@@ -7843,6 +7845,7 @@ static bool handleArmStateAttribute(Sema &S,
/// indicate that the attribute was handled, false if it wasn't.
static bool handleFunctionTypeAttr(TypeProcessingState &state, ParsedAttr &attr,
QualType &type, CUDAFunctionTarget CFT) {
+
Sema &S = state.getSema();
FunctionTypeUnwrapper unwrapped(S, type);
>From aa697883cb5f1966c3277e7b26b54ded37d95b62 Mon Sep 17 00:00:00 2001
From: XChy <xxs_chy at outlook.com>
Date: Wed, 3 Dec 2025 02:48:26 +0800
Subject: [PATCH 8/8] format
---
clang/lib/Sema/SemaType.cpp | 1 -
1 file changed, 1 deletion(-)
diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp
index de87395b27405..b643fcc76cd99 100644
--- a/clang/lib/Sema/SemaType.cpp
+++ b/clang/lib/Sema/SemaType.cpp
@@ -7845,7 +7845,6 @@ static bool handleArmStateAttribute(Sema &S,
/// indicate that the attribute was handled, false if it wasn't.
static bool handleFunctionTypeAttr(TypeProcessingState &state, ParsedAttr &attr,
QualType &type, CUDAFunctionTarget CFT) {
-
Sema &S = state.getSema();
FunctionTypeUnwrapper unwrapped(S, type);
More information about the cfe-commits
mailing list