[clang] [clang] Fix inconsistencies with the device_kernel attr on different targets (PR #161905)
Nick Sarnie via cfe-commits
cfe-commits at lists.llvm.org
Fri Oct 3 14:00:09 PDT 2025
https://github.com/sarnex created https://github.com/llvm/llvm-project/pull/161905
The original [change](https://github.com/intel/llvm/pull/14114) unifying the device kernel attributes had some inexplicable behavior, such as `amdgpu_kernel` resulting in a function ending up with the `spir_kernel` CC but `nvptx_kernel` not doing the same.
For the target-specific spellings (`nvptx_kernel` and `amdgpu_kernel`), we warn and ignore if the spelling doesn't match the target.
Also we make sure the any valid spelling actually applies the CC. This worked for `NVPTX` already but was missing for `SPIR-V` and `AMDGPU`.
We actually don't need to do that, if we change `amdgpu_kernel` to apply the calling convention the same way that NVPTX does (setting the CC on the function in `TargetCodeGenInfo::setTargetAttribute`), we can remove all handling of `device_kernel` as a type attribute.
THese issues were reported [here](https://github.com/llvm/llvm-project/issues/161077) and [here](https://github.com/llvm/llvm-project/pull/161349).
Closes: https://github.com/llvm/llvm-project/issues/161077
>From cff89261fdf1e367bd5865f5014d4d9c115d19a7 Mon Sep 17 00:00:00 2001
From: "Sarnie, Nick" <nick.sarnie at intel.com>
Date: Fri, 3 Oct 2025 12:33:38 -0700
Subject: [PATCH] [clang] Fix inconsistencies with the device_kernel attr on
different targets
Signed-off-by: Sarnie, Nick <nick.sarnie at intel.com>
---
clang/docs/ReleaseNotes.rst | 1 +
clang/include/clang/Basic/Attr.td | 2 +-
clang/lib/AST/TypePrinter.cpp | 3 --
clang/lib/Basic/Targets/NVPTX.h | 2 +-
clang/lib/CodeGen/Targets/AMDGPU.cpp | 6 ++--
clang/lib/CodeGen/Targets/SPIR.cpp | 33 ++++++++++++++++++--
clang/lib/Sema/SemaDeclAttr.cpp | 36 ++++++++++++++++++++--
clang/lib/Sema/SemaType.cpp | 18 ++---------
clang/test/Sema/callingconv-devicekernel.c | 24 +++++++++++++++
clang/test/Sema/callingconv.c | 4 +++
10 files changed, 100 insertions(+), 29 deletions(-)
create mode 100644 clang/test/Sema/callingconv-devicekernel.c
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index d2e5bd284d350..9b57949112c22 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -401,6 +401,7 @@ Bug Fixes to Attribute Support
- Using ``[[gnu::cleanup(some_func)]]`` where some_func is annotated with
``[[gnu::error("some error")]]`` now correctly triggers an error. (#GH146520)
- Fix a crash when the function name is empty in the `swift_name` attribute. (#GH157075)
+- Fixes crashes or missing diagnostics with the `device_kernel` attribute. (#GHTODO)
Bug Fixes to C++ Support
^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td
index 3c697ed8dd882..6ec0eac529245 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -1599,7 +1599,7 @@ def CUDAShared : InheritableAttr {
}
def : MutualExclusions<[CUDAConstant, CUDAShared, HIPManaged]>;
-def DeviceKernel : DeclOrTypeAttr {
+def DeviceKernel : InheritableAttr {
let Spellings = [Clang<"device_kernel">, Clang<"sycl_kernel">,
Clang<"nvptx_kernel">, Clang<"amdgpu_kernel">,
CustomKeyword<"__kernel">, CustomKeyword<"kernel">];
diff --git a/clang/lib/AST/TypePrinter.cpp b/clang/lib/AST/TypePrinter.cpp
index 66a1b684ec68b..568d56ab0b911 100644
--- a/clang/lib/AST/TypePrinter.cpp
+++ b/clang/lib/AST/TypePrinter.cpp
@@ -2147,9 +2147,6 @@ void TypePrinter::printAttributedAfter(const AttributedType *T,
}
case attr::AArch64VectorPcs: OS << "aarch64_vector_pcs"; break;
case attr::AArch64SVEPcs: OS << "aarch64_sve_pcs"; break;
- case attr::DeviceKernel:
- OS << T->getAttr()->getSpelling();
- break;
case attr::IntelOclBicc:
OS << "inteloclbicc";
break;
diff --git a/clang/lib/Basic/Targets/NVPTX.h b/clang/lib/Basic/Targets/NVPTX.h
index 33c29586359be..f5c8396f398aa 100644
--- a/clang/lib/Basic/Targets/NVPTX.h
+++ b/clang/lib/Basic/Targets/NVPTX.h
@@ -200,7 +200,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXTargetInfo : public TargetInfo {
// a host function.
if (HostTarget)
return HostTarget->checkCallingConvention(CC);
- return CCCR_Warning;
+ return CC == CC_DeviceKernel ? CCCR_OK : CCCR_Warning;
}
bool hasBitIntType() const override { return true; }
diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp
index 0fcbf7e458a34..d54b1dc128254 100644
--- a/clang/lib/CodeGen/Targets/AMDGPU.cpp
+++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp
@@ -419,9 +419,11 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes(
return;
const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
- if (FD)
+ if (FD) {
setFunctionDeclAttributes(FD, F, M);
-
+ if (FD->hasAttr<DeviceKernelAttr>() && !M.getLangOpts().OpenCL)
+ F->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
+ }
if (!getABIInfo().getCodeGenOpts().EmitIEEENaNCompliantInsts)
F->addFnAttr("amdgpu-ieee", "false");
}
diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp b/clang/lib/CodeGen/Targets/SPIR.cpp
index 4aa63143a66cd..42ef1c704831c 100644
--- a/clang/lib/CodeGen/Targets/SPIR.cpp
+++ b/clang/lib/CodeGen/Targets/SPIR.cpp
@@ -61,6 +61,8 @@ class CommonSPIRTargetCodeGenInfo : public TargetCodeGenInfo {
QualType SampledType, CodeGenModule &CGM) const;
void
setOCLKernelStubCallingConvention(const FunctionType *&FT) const override;
+ void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
+ CodeGen::CodeGenModule &M) const override;
};
class SPIRVTargetCodeGenInfo : public CommonSPIRTargetCodeGenInfo {
public:
@@ -240,6 +242,26 @@ void CommonSPIRTargetCodeGenInfo::setOCLKernelStubCallingConvention(
FT, FT->getExtInfo().withCallingConv(CC_SpirFunction));
}
+void CommonSPIRTargetCodeGenInfo::setTargetAttributes(
+ const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
+ if (M.getLangOpts().OpenCL)
+ return;
+
+ if (GV->isDeclaration())
+ return;
+
+ llvm::Function *F = dyn_cast<llvm::Function>(GV);
+ if (!F)
+ return;
+
+ const FunctionDecl *FD = dyn_cast<FunctionDecl>(D);
+ if (!FD)
+ return;
+
+ if (FD->hasAttr<DeviceKernelAttr>())
+ F->setCallingConv(getDeviceKernelCallingConv());
+}
+
LangAS
SPIRVTargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM,
const VarDecl *D) const {
@@ -264,9 +286,6 @@ SPIRVTargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM,
void SPIRVTargetCodeGenInfo::setTargetAttributes(
const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
- if (!M.getLangOpts().HIP ||
- M.getTarget().getTriple().getVendor() != llvm::Triple::AMD)
- return;
if (GV->isDeclaration())
return;
@@ -277,6 +296,14 @@ void SPIRVTargetCodeGenInfo::setTargetAttributes(
auto FD = dyn_cast_or_null<FunctionDecl>(D);
if (!FD)
return;
+
+ if (FD->hasAttr<DeviceKernelAttr>())
+ F->setCallingConv(llvm::CallingConv::SPIR_KERNEL);
+
+ if (!M.getLangOpts().HIP ||
+ M.getTarget().getTriple().getVendor() != llvm::Triple::AMD)
+ return;
+
if (!FD->hasAttr<CUDAGlobalAttr>())
return;
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index 328ccf6694073..551d00b3c7476 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -5204,25 +5204,55 @@ static void handleCallConvAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
static void handleDeviceKernelAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
const auto *FD = dyn_cast_or_null<FunctionDecl>(D);
bool IsFunctionTemplate = FD && FD->getDescribedFunctionTemplate();
- if (S.getLangOpts().SYCLIsDevice) {
+ llvm::Triple Triple = S.getASTContext().getTargetInfo().getTriple();
+ const LangOptions &LangOpts = S.getLangOpts();
+
+ if (LangOpts.SYCLIsDevice) {
if (!IsFunctionTemplate) {
S.Diag(AL.getLoc(), diag::warn_attribute_wrong_decl_type_str)
<< AL << AL.isRegularKeywordAttribute() << "function templates";
+ AL.setInvalid();
+ return;
} else {
S.SYCL().handleKernelAttr(D, AL);
}
} else if (DeviceKernelAttr::isSYCLSpelling(AL)) {
S.Diag(AL.getLoc(), diag::warn_attribute_ignored) << AL;
- } else if (S.getASTContext().getTargetInfo().getTriple().isNVPTX()) {
+ AL.setInvalid();
+
+ return;
+ } else if (Triple.isNVPTX()) {
handleGlobalAttr(S, D, AL);
} else {
// OpenCL C++ will throw a more specific error.
- if (!S.getLangOpts().OpenCLCPlusPlus && (!FD || IsFunctionTemplate)) {
+ if (!LangOpts.OpenCLCPlusPlus && (!FD || IsFunctionTemplate)) {
S.Diag(AL.getLoc(), diag::err_attribute_wrong_decl_type_str)
<< AL << AL.isRegularKeywordAttribute() << "functions";
+ AL.setInvalid();
+ return;
}
handleSimpleAttribute<DeviceKernelAttr>(S, D, AL);
}
+ // TODO: isGPU() should probably return true for SPIR.
+ bool TargetDeviceEnvironment = Triple.isGPU() || Triple.isSPIR() ||
+ LangOpts.isTargetDevice() || LangOpts.OpenCL;
+ bool IsAMDGPUMismatch =
+ DeviceKernelAttr::isAMDGPUSpelling(AL) && !Triple.isAMDGPU();
+ bool IsNVPTXMismatch =
+ DeviceKernelAttr::isNVPTXSpelling(AL) && !Triple.isNVPTX();
+ if (IsAMDGPUMismatch || IsNVPTXMismatch || !TargetDeviceEnvironment) {
+ // While both are just different spellings of the same underlying
+ // attribute, it makes more sense to the user if amdgpu_kernel can only
+ // be used on AMDGPU and the equivalent for NVPTX, so warn and ignore
+ // the attribute if there's a mismatch.
+ // Also warn if this is not an environment where a device kernel makes
+ // sense.
+ S.Diag(AL.getLoc(), diag::warn_cconv_unsupported)
+ << AL << (int)Sema::CallingConventionIgnoredReason::ForThisTarget;
+ AL.setInvalid();
+ return;
+ }
+
// Make sure we validate the CC with the target
// and warn/error if necessary.
handleCallConvAttr(S, D, AL);
diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp
index bee613aa5f1c5..0d5b0e7e842b3 100644
--- a/clang/lib/Sema/SemaType.cpp
+++ b/clang/lib/Sema/SemaType.cpp
@@ -134,7 +134,6 @@ static void diagnoseBadTypeAttribute(Sema &S, const ParsedAttr &attr,
case ParsedAttr::AT_VectorCall: \
case ParsedAttr::AT_AArch64VectorPcs: \
case ParsedAttr::AT_AArch64SVEPcs: \
- case ParsedAttr::AT_DeviceKernel: \
case ParsedAttr::AT_MSABI: \
case ParsedAttr::AT_SysVABI: \
case ParsedAttr::AT_Pcs: \
@@ -3781,7 +3780,8 @@ static CallingConv getCCForDeclaratorChunk(
}
}
if (!S.getLangOpts().isSYCL()) {
- for (const ParsedAttr &AL : D.getDeclSpec().getAttributes()) {
+ for (const ParsedAttr &AL : llvm::concat<ParsedAttr>(
+ D.getDeclSpec().getAttributes(), D.getAttributes())) {
if (AL.getKind() == ParsedAttr::AT_DeviceKernel) {
CC = CC_DeviceKernel;
break;
@@ -7565,8 +7565,6 @@ static Attr *getCCTypeAttr(ASTContext &Ctx, ParsedAttr &Attr) {
return createSimpleAttr<AArch64SVEPcsAttr>(Ctx, Attr);
case ParsedAttr::AT_ArmStreaming:
return createSimpleAttr<ArmStreamingAttr>(Ctx, Attr);
- case ParsedAttr::AT_DeviceKernel:
- return createSimpleAttr<DeviceKernelAttr>(Ctx, Attr);
case ParsedAttr::AT_Pcs: {
// The attribute may have had a fixit applied where we treated an
// identifier as a string literal. The contents of the string are valid,
@@ -8805,16 +8803,6 @@ static void HandleHLSLParamModifierAttr(TypeProcessingState &State,
}
}
-static bool isMultiSubjectAttrAllowedOnType(const ParsedAttr &Attr) {
- // The DeviceKernel attribute is shared for many targets, and
- // it is only allowed to be a type attribute with the AMDGPU
- // spelling, so skip processing the attr as a type attr
- // unless it has that spelling.
- if (Attr.getKind() != ParsedAttr::AT_DeviceKernel)
- return true;
- return DeviceKernelAttr::isAMDGPUSpelling(Attr);
-}
-
static void processTypeAttrs(TypeProcessingState &state, QualType &type,
TypeAttrLocation TAL,
const ParsedAttributesView &attrs,
@@ -9068,8 +9056,6 @@ static void processTypeAttrs(TypeProcessingState &state, QualType &type,
break;
[[fallthrough]];
FUNCTION_TYPE_ATTRS_CASELIST:
- if (!isMultiSubjectAttrAllowedOnType(attr))
- break;
attr.setUsedAsTypeAttr();
diff --git a/clang/test/Sema/callingconv-devicekernel.c b/clang/test/Sema/callingconv-devicekernel.c
new file mode 100644
index 0000000000000..869687f8ca65d
--- /dev/null
+++ b/clang/test/Sema/callingconv-devicekernel.c
@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm %s 2>&1 -o -| FileCheck -check-prefix=CHECK-AMDGPU %s
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda- -emit-llvm %s 2>&1 -o -| FileCheck -check-prefix=CHECK-NVPTX %s
+// RUN: %clang_cc1 -triple spir64 -emit-llvm %s 2>&1 -o - | FileCheck -check-prefix=CHECK-SPIR %s
+// RUN: %clang_cc1 -triple spirv64 -emit-llvm %s 2>&1 -o - | FileCheck -check-prefix=CHECK-SPIR %s
+
+// CHECK-AMDGPU-DAG: amdgpu_kernel void @kernel1()
+// CHECK-NVPTX-DAG: ptx_kernel void @kernel1()
+// CHECK-SPIR-DAG: spir_kernel void @kernel1()
+[[clang::device_kernel]] void kernel1() {}
+
+// CHECK-AMDGPU-DAG: amdgpu_kernel void @kernel2()
+// CHECK-NVPTX-DAG: 14:3: warning: 'clang::amdgpu_kernel' calling convention is not supported for this target
+// CHECK-SPIR-DAG: 14:3: warning: 'clang::amdgpu_kernel' calling convention is not supported for this target
+[[clang::amdgpu_kernel]] void kernel2() {}
+
+// CHECK-AMDGPU-DAG: 19:3: warning: 'clang::nvptx_kernel' calling convention is not supported for this target
+// CHECK-NVPTX-DAG: ptx_kernel void @kernel3()
+// CHECK-SPIR-DAG: 19:3: warning: 'clang::nvptx_kernel' calling convention is not supported for this target
+[[clang::nvptx_kernel]] void kernel3() {}
+
+// CHECK-AMDGPU-DAG: 24:3: warning: 'clang::sycl_kernel' attribute ignored
+// CHECK-NVPTX-DAG: 24:3: warning: 'clang::sycl_kernel' attribute ignored
+// CHECK-SPIR-DAG: 24:3: warning: 'clang::sycl_kernel' attribute ignored
+[[clang::sycl_kernel]] void kernel4() {}
diff --git a/clang/test/Sema/callingconv.c b/clang/test/Sema/callingconv.c
index f0b8b80a32974..28342b56af39a 100644
--- a/clang/test/Sema/callingconv.c
+++ b/clang/test/Sema/callingconv.c
@@ -55,6 +55,10 @@ int __attribute__((aarch64_vector_pcs)) aavpcs(void); // expected-warning {{'aar
int __attribute__((aarch64_sve_pcs)) aasvepcs(void); // expected-warning {{'aarch64_sve_pcs' calling convention is not supported for this target}}
int __attribute__((amdgpu_kernel)) amdgpu_kernel(void); // expected-warning {{'amdgpu_kernel' calling convention is not supported for this target}}
+int __attribute__((device_kernel)) device_kernel(void) { // expected-warning {{'device_kernel' calling convention is not supported for this target}}
+}
+int __attribute__((sycl_kernel)) sycl_kernel(void) { // expected-warning {{'sycl_kernel' attribute ignored}}
+}
// PR6361
void ctest3();
More information about the cfe-commits
mailing list