[clang] 29a5c3c - [NVPTX] Introduce attribute to mark kernels without a language mode

Fri Mar 24 12:42:34 PDT 2023

Author: Joseph Huber
Date: 2023-03-24T14:42:25-05:00
New Revision: 29a5c3c8fe30dcfb19dd86dd5f3f201435311997

URL: https://github.com/llvm/llvm-project/commit/29a5c3c8fe30dcfb19dd86dd5f3f201435311997
DIFF: https://github.com/llvm/llvm-project/commit/29a5c3c8fe30dcfb19dd86dd5f3f201435311997.diff

LOG: [NVPTX] Introduce attribute to mark kernels without a language mode

We may want to be able to mark certain regions as kernels even without
being in an accepted CUDA or OpenCL language mode. This patch introduces
a new attribute limited to `nvptx` targets called `nvptx_kernel` which
will perform the same metadata action as the existing CUDA ones. This
closely mimics the behaviour of the `amdgpu_kernel` attribute. This
allows for making executable NVPTX device images without using an
existing offloading language model.

I was unsure how to do this, I could potentially re-use all the CUDA
attributes and just replace the `CUDA` language requirement with an
`NVPTX` architecture requirement. Also I don't know if I should add more
than just this attribute.

Reviewed By: tra

Differential Revision: https://reviews.llvm.org/D140226

Added: 
    clang/test/CodeGen/nvptx_attributes.c

Modified: 
    clang/include/clang/Basic/Attr.td
    clang/lib/CodeGen/TargetInfo.cpp
    clang/lib/Sema/SemaDeclAttr.cpp
    clang/test/Misc/pragma-attribute-supported-attributes-list.test

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td
index 3e086ebee6a46..7b883566f3ae1 100644

--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -414,6 +414,7 @@ def TargetRISCV : TargetArch<["riscv32", "riscv64"]>;
 def TargetX86 : TargetArch<["x86"]>;
 def TargetAnyX86 : TargetArch<["x86", "x86_64"]>;
 def TargetWebAssembly : TargetArch<["wasm32", "wasm64"]>;
+def TargetNVPTX : TargetArch<["nvptx", "nvptx64"]>;
 def TargetWindows : TargetSpec {
   let OSes = ["Win32"];
 }
@@ -1221,6 +1222,12 @@ def CUDAHost : InheritableAttr {
 }
 def : MutualExclusions<[CUDAGlobal, CUDAHost]>;
 
+def NVPTXKernel : InheritableAttr, TargetSpecificAttr<TargetNVPTX> {
+  let Spellings = [Clang<"nvptx_kernel">];
+  let Subjects = SubjectList<[Function]>;
+  let Documentation = [Undocumented];
+}
+
 def HIPManaged : InheritableAttr {
   let Spellings = [GNU<"managed">, Declspec<"__managed__">];
   let Subjects = SubjectList<[Var]>;

diff  --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp
index d3329ae023394..a95401bf6bf80 100644
--- a/clang/lib/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CodeGen/TargetInfo.cpp
@@ -7373,6 +7373,11 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
       }
     }
   }
+
+  // Attach kernel metadata directly if compiling for NVPTX.
+  if (FD->hasAttr<NVPTXKernelAttr>()) {
+    addNVVMMetadata(F, "kernel", 1);
+  }
 }
 
 void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV,

diff  --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index efa275c0aa12b..19bc03d30a21e 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -4970,7 +4970,10 @@ static void handleGlobalAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
   if (FD->isInlineSpecified() && !S.getLangOpts().CUDAIsDevice)
     S.Diag(FD->getBeginLoc(), diag::warn_kern_is_inline) << FD;
 
-  D->addAttr(::new (S.Context) CUDAGlobalAttr(S.Context, AL));
+  if (AL.getKind() == ParsedAttr::AT_NVPTXKernel)
+    D->addAttr(::new (S.Context) NVPTXKernelAttr(S.Context, AL));
+  else
+    D->addAttr(::new (S.Context) CUDAGlobalAttr(S.Context, AL));
   // In host compilation the kernel is emitted as a stub function, which is
   // a helper function for launching the kernel. The instructions in the helper
   // function has nothing to do with the source code of the kernel. Do not emit
@@ -8851,6 +8854,7 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL,
   case ParsedAttr::AT_CalledOnce:
     handleCalledOnceAttr(S, D, AL);
     break;
+  case ParsedAttr::AT_NVPTXKernel:
   case ParsedAttr::AT_CUDAGlobal:
     handleGlobalAttr(S, D, AL);
     break;

diff  --git a/clang/test/CodeGen/nvptx_attributes.c b/clang/test/CodeGen/nvptx_attributes.c
new file mode 100644
index 0000000000000..dcf79ad4e0e6a
--- /dev/null
+++ b/clang/test/CodeGen/nvptx_attributes.c
@@ -0,0 +1,17 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -target-cpu sm_61 -emit-llvm %s -o - | FileCheck %s
+
+// CHECK: Function Attrs: noinline nounwind optnone
+// CHECK-LABEL: define {{[^@]+}}@foo
+// CHECK-SAME: (ptr noundef [[RET:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[RET_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[RET]], ptr [[RET_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[RET_ADDR]], align 8
+// CHECK-NEXT:    store i32 1, ptr [[TMP0]], align 4
+// CHECK-NEXT:    ret void
+__attribute__((nvptx_kernel)) void foo(int *ret) {
+  *ret = 1;
+}
+
+// CHECK: !0 = !{ptr @foo, !"kernel", i32 1}

diff  --git a/clang/test/Misc/pragma-attribute-supported-attributes-list.test b/clang/test/Misc/pragma-attribute-supported-attributes-list.test
index 98cdf82ba0c11..8ec0550af6490 100644
--- a/clang/test/Misc/pragma-attribute-supported-attributes-list.test
+++ b/clang/test/Misc/pragma-attribute-supported-attributes-list.test
@@ -94,6 +94,7 @@
 // CHECK-NEXT: NSConsumed (SubjectMatchRule_variable_is_parameter)
 // CHECK-NEXT: NSConsumesSelf (SubjectMatchRule_objc_method)
 // CHECK-NEXT: NSErrorDomain (SubjectMatchRule_enum)
+// CHECK-NEXT: NVPTXKernel (SubjectMatchRule_function)
 // CHECK-NEXT: Naked (SubjectMatchRule_function)
 // CHECK-NEXT: NoBuiltin (SubjectMatchRule_function)
 // CHECK-NEXT: NoCommon (SubjectMatchRule_variable)