[clang] e169cc1 - [Clang] Fix sema checks thinking kernels aren't kernels (#104460)

Fri Aug 16 16:36:32 PDT 2024

Author: Joseph Huber
Date: 2024-08-16T18:36:27-05:00
New Revision: e169cc162adbe89d498e774bccf4e228af989849

URL: https://github.com/llvm/llvm-project/commit/e169cc162adbe89d498e774bccf4e228af989849
DIFF: https://github.com/llvm/llvm-project/commit/e169cc162adbe89d498e774bccf4e228af989849.diff

LOG: [Clang] Fix sema checks thinking kernels aren't kernels (#104460)

Summary:
Currently we have some sema checks to make sure users don't apply
kernel-only attributes to non-kernel functions. However, this currently
did not correctly check for bare NVPTX / AMDGPU kernel attributes,
making it impossible to use them at all w/o CUDA enabled. This patch
fixes that by checking for the calling convention / attributes directly.

Added: 
    

Modified: 
    clang/lib/Sema/SemaDeclAttr.cpp
    clang/test/CodeGenCXX/amdgpu-kernel-arg-pointer-type.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index 3b5e984f4ee773..73d11ac972b020 100644

--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -7123,6 +7123,13 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL,
   }
 }
 
+static bool isKernelDecl(Decl *D) {
+  const FunctionType *FnTy = D->getFunctionType();
+  return D->hasAttr<OpenCLKernelAttr>() ||
+         (FnTy && FnTy->getCallConv() == CallingConv::CC_AMDGPUKernelCall) ||
+         D->hasAttr<CUDAGlobalAttr>() || D->getAttr<NVPTXKernelAttr>();
+}
+
 void Sema::ProcessDeclAttributeList(
     Scope *S, Decl *D, const ParsedAttributesView &AttrList,
     const ProcessDeclAttributeOptions &Options) {
@@ -7163,24 +7170,25 @@ void Sema::ProcessDeclAttributeList(
     } else if (const auto *A = D->getAttr<OpenCLIntelReqdSubGroupSizeAttr>()) {
       Diag(D->getLocation(), diag::err_opencl_kernel_attr) << A;
       D->setInvalidDecl();
-    } else if (!D->hasAttr<CUDAGlobalAttr>()) {
-      if (const auto *A = D->getAttr<AMDGPUFlatWorkGroupSizeAttr>()) {
-        Diag(D->getLocation(), diag::err_attribute_wrong_decl_type)
-            << A << A->isRegularKeywordAttribute() << ExpectedKernelFunction;
-        D->setInvalidDecl();
-      } else if (const auto *A = D->getAttr<AMDGPUWavesPerEUAttr>()) {
-        Diag(D->getLocation(), diag::err_attribute_wrong_decl_type)
-            << A << A->isRegularKeywordAttribute() << ExpectedKernelFunction;
-        D->setInvalidDecl();
-      } else if (const auto *A = D->getAttr<AMDGPUNumSGPRAttr>()) {
-        Diag(D->getLocation(), diag::err_attribute_wrong_decl_type)
-            << A << A->isRegularKeywordAttribute() << ExpectedKernelFunction;
-        D->setInvalidDecl();
-      } else if (const auto *A = D->getAttr<AMDGPUNumVGPRAttr>()) {
-        Diag(D->getLocation(), diag::err_attribute_wrong_decl_type)
-            << A << A->isRegularKeywordAttribute() << ExpectedKernelFunction;
-        D->setInvalidDecl();
-      }
+    }
+  }
+  if (!isKernelDecl(D)) {
+    if (const auto *A = D->getAttr<AMDGPUFlatWorkGroupSizeAttr>()) {
+      Diag(D->getLocation(), diag::err_attribute_wrong_decl_type)
+          << A << A->isRegularKeywordAttribute() << ExpectedKernelFunction;
+      D->setInvalidDecl();
+    } else if (const auto *A = D->getAttr<AMDGPUWavesPerEUAttr>()) {
+      Diag(D->getLocation(), diag::err_attribute_wrong_decl_type)
+          << A << A->isRegularKeywordAttribute() << ExpectedKernelFunction;
+      D->setInvalidDecl();
+    } else if (const auto *A = D->getAttr<AMDGPUNumSGPRAttr>()) {
+      Diag(D->getLocation(), diag::err_attribute_wrong_decl_type)
+          << A << A->isRegularKeywordAttribute() << ExpectedKernelFunction;
+      D->setInvalidDecl();
+    } else if (const auto *A = D->getAttr<AMDGPUNumVGPRAttr>()) {
+      Diag(D->getLocation(), diag::err_attribute_wrong_decl_type)
+          << A << A->isRegularKeywordAttribute() << ExpectedKernelFunction;
+      D->setInvalidDecl();
     }
   }
 

diff  --git a/clang/test/CodeGenCXX/amdgpu-kernel-arg-pointer-type.cpp b/clang/test/CodeGenCXX/amdgpu-kernel-arg-pointer-type.cpp
index c8fb12315d0eec..8df1d75e9ec8fa 100644
--- a/clang/test/CodeGenCXX/amdgpu-kernel-arg-pointer-type.cpp
+++ b/clang/test/CodeGenCXX/amdgpu-kernel-arg-pointer-type.cpp
@@ -6,6 +6,10 @@
 // The original test passes the result through opt O2, but that seems to introduce invalid
 // addrspace casts which are not being fixed as part of the present change.
 
+// COMMON: define{{.*}} amdgpu_kernel void @_Z6kernelv() #[[ATTR:[0-9]+]]
+__attribute__((amdgpu_kernel, amdgpu_flat_work_group_size(1, 256))) void
+    kernel() {}
+
 // COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel1Pi(ptr {{.*}} %x)
 // CHECK-NOT: ={{.*}} addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to ptr
 __attribute__((amdgpu_kernel)) void kernel1(int *x) {
@@ -81,3 +85,4 @@ __attribute__((amdgpu_kernel)) void kernel8(struct SS a) {
   *a.x += 3.f;
 }
 
+// COMMON: attributes #[[ATTR]] = { {{.*}}"amdgpu-flat-work-group-size"="1,256"{{.*}} }