[clang] [CUDA] Add support for __grid_constant__ attribute (PR #114589)
Artem Belevich via cfe-commits
cfe-commits at lists.llvm.org
Mon Nov 4 12:36:27 PST 2024
https://github.com/Artem-B updated https://github.com/llvm/llvm-project/pull/114589
>From ac0790a431d94f78ee73e96fd97f9263192c3153 Mon Sep 17 00:00:00 2001
From: Artem Belevich <tra at google.com>
Date: Tue, 27 Aug 2024 16:16:14 -0700
Subject: [PATCH 1/2] [CUDA] Add support for __grid_constant__ attribute
---
clang/docs/ReleaseNotes.rst | 1 +
clang/include/clang/Basic/Attr.td | 7 ++++
.../clang/Basic/DiagnosticSemaKinds.td | 2 ++
clang/lib/CodeGen/Targets/NVPTX.cpp | 36 +++++++++++++++----
clang/lib/Sema/SemaDecl.cpp | 11 +++++-
clang/lib/Sema/SemaDeclAttr.cpp | 12 +++++++
.../lib/Sema/SemaTemplateInstantiateDecl.cpp | 6 ++++
clang/test/CodeGenCUDA/Inputs/cuda.h | 2 ++
clang/test/CodeGenCUDA/grid-constant.cu | 31 ++++++++++++++++
...a-attribute-supported-attributes-list.test | 1 +
clang/test/SemaCUDA/Inputs/cuda.h | 1 +
clang/test/SemaCUDA/grid-constant.cu | 33 +++++++++++++++++
12 files changed, 135 insertions(+), 8 deletions(-)
create mode 100644 clang/test/CodeGenCUDA/grid-constant.cu
create mode 100644 clang/test/SemaCUDA/grid-constant.cu
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 402203f89e23a0..9466df98747e27 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -699,6 +699,7 @@ CUDA Support
^^^^^^^^^^^^
- Clang now supports CUDA SDK up to 12.6
- Added support for sm_100
+- Added support for `__grid_constant__` attribute.
AIX Support
^^^^^^^^^^^
diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td
index 47c93b48175fc8..9925b46ab2505e 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -1450,6 +1450,13 @@ def CUDAHost : InheritableAttr {
}
def : MutualExclusions<[CUDAGlobal, CUDAHost]>;
+def CUDAGridConstant : InheritableAttr {
+ let Spellings = [GNU<"grid_constant">, Declspec<"__grid_constant__">];
+ let Subjects = SubjectList<[ParmVar]>;
+ let LangOpts = [CUDA];
+ let Documentation = [Undocumented];
+}
+
def NVPTXKernel : InheritableAttr, TargetSpecificAttr<TargetNVPTX> {
let Spellings = [Clang<"nvptx_kernel">];
let Subjects = SubjectList<[Function]>;
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 34ff49d7238a7f..61ff4c4fb5d646 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -9100,6 +9100,8 @@ def err_cuda_host_shared : Error<
"%select{__device__|__global__|__host__|__host__ __device__}0 functions">;
def err_cuda_nonstatic_constdev: Error<"__constant__, __device__, and "
"__managed__ are not allowed on non-static local variables">;
+def err_cuda_grid_constant_not_allowed : Error<
+ "__grid_constant__ is only allowed on const-qualified kernel parameters">;
def err_cuda_ovl_target : Error<
"%select{__device__|__global__|__host__|__host__ __device__}0 function %1 "
"cannot overload %select{__device__|__global__|__host__|__host__ __device__}2 function %3">;
diff --git a/clang/lib/CodeGen/Targets/NVPTX.cpp b/clang/lib/CodeGen/Targets/NVPTX.cpp
index ec7f1c439b1881..0431d2cc4ddc39 100644
--- a/clang/lib/CodeGen/Targets/NVPTX.cpp
+++ b/clang/lib/CodeGen/Targets/NVPTX.cpp
@@ -8,6 +8,7 @@
#include "ABIInfoImpl.h"
#include "TargetInfo.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/IR/IntrinsicsNVPTX.h"
using namespace clang;
@@ -78,7 +79,13 @@ class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo {
// Adds a NamedMDNode with GV, Name, and Operand as operands, and adds the
// resulting MDNode to the nvvm.annotations MDNode.
static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name,
- int Operand);
+ int Operand,
+ const SmallVectorImpl<int> &GridConstantArgs);
+
+ static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name,
+ int Operand) {
+ addNVVMMetadata(GV, Name, Operand, SmallVector<int, 1>(0));
+ }
private:
static void emitBuiltinSurfTexDeviceCopy(CodeGenFunction &CGF, LValue Dst,
@@ -240,7 +247,8 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
}
const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
- if (!FD) return;
+ if (!FD)
+ return;
llvm::Function *F = cast<llvm::Function>(GV);
@@ -263,8 +271,13 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
// __global__ functions cannot be called from the device, we do not
// need to set the noinline attribute.
if (FD->hasAttr<CUDAGlobalAttr>()) {
+ SmallVector<int, 10> GCI;
+ for (auto IV : llvm::enumerate(FD->parameters()))
+ if (IV.value()->hasAttr<CUDAGridConstantAttr>())
+ // For some reason arg indices are 1-based in NVVM
+ GCI.push_back(IV.index() + 1);
// Create !{<func-ref>, metadata !"kernel", i32 1} node
- addNVVMMetadata(F, "kernel", 1);
+ addNVVMMetadata(F, "kernel", 1, GCI);
}
if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>())
M.handleCUDALaunchBoundsAttr(F, Attr);
@@ -276,18 +289,27 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
}
}
-void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV,
- StringRef Name, int Operand) {
+void NVPTXTargetCodeGenInfo::addNVVMMetadata(
+ llvm::GlobalValue *GV, StringRef Name, int Operand,
+ const SmallVectorImpl<int> &GridConstantArgs) {
llvm::Module *M = GV->getParent();
llvm::LLVMContext &Ctx = M->getContext();
// Get "nvvm.annotations" metadata node
llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations");
- llvm::Metadata *MDVals[] = {
+ SmallVector<llvm::Metadata *, 5> MDVals = {
llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, Name),
llvm::ConstantAsMetadata::get(
llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), Operand))};
+ if (!GridConstantArgs.empty()) {
+ SmallVector<llvm::Metadata *, 10> GCM;
+ for (int I : GridConstantArgs)
+ GCM.push_back(llvm::ConstantAsMetadata::get(
+ llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), I)));
+ MDVals.append({llvm::MDString::get(Ctx, "grid_constant"),
+ llvm::MDNode::get(Ctx, GCM)});
+ }
// Append metadata to nvvm.annotations
MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
}
@@ -309,7 +331,7 @@ NVPTXTargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule &CGM,
return llvm::ConstantExpr::getAddrSpaceCast(
llvm::ConstantPointerNull::get(NPT), PT);
}
-}
+} // namespace
void CodeGenModule::handleCUDALaunchBoundsAttr(llvm::Function *F,
const CUDALaunchBoundsAttr *Attr,
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index f8e5f3c6d309d6..9de8cbc303016c 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -12222,8 +12222,17 @@ bool Sema::CheckFunctionDeclaration(Scope *S, FunctionDecl *NewFD,
<< NewFD;
}
- if (!Redeclaration && LangOpts.CUDA)
+ if (!Redeclaration && LangOpts.CUDA) {
+ bool IsKernel = NewFD->hasAttr<CUDAGlobalAttr>();
+ for (auto *Parm : NewFD->parameters()) {
+ if (!Parm->getType()->isDependentType() &&
+ Parm->hasAttr<CUDAGridConstantAttr>() &&
+ !(IsKernel && Parm->getType().isConstQualified()))
+ Diag(Parm->getAttr<CUDAGridConstantAttr>()->getLocation(),
+ diag::err_cuda_grid_constant_not_allowed);
+ }
CUDA().checkTargetOverload(NewFD, Previous);
+ }
}
// Check if the function definition uses any AArch64 SME features without
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index 601c6f2eef1d9c..d8550bab3eddd2 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -4748,6 +4748,15 @@ static void handleManagedAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
D->addAttr(CUDADeviceAttr::CreateImplicit(S.Context));
}
+static void handleGridConstantAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
+ if (D->isInvalidDecl())
+ return;
+ // Whether __grid_constant__ is allowed to be used will be checked in
+ // Sema::CheckFunctionDeclaration as we need complete function decl to make
+ // the call.
+ D->addAttr(::new (S.Context) CUDAGridConstantAttr(S.Context, AL));
+}
+
static void handleGNUInlineAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
const auto *Fn = cast<FunctionDecl>(D);
if (!Fn->isInlineSpecified()) {
@@ -6642,6 +6651,9 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL,
case ParsedAttr::AT_CUDADevice:
handleDeviceAttr(S, D, AL);
break;
+ case ParsedAttr::AT_CUDAGridConstant:
+ handleGridConstantAttr(S, D, AL);
+ break;
case ParsedAttr::AT_HIPManaged:
handleManagedAttr(S, D, AL);
break;
diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
index 3e948232057afe..ec3c3ce6057264 100644
--- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
+++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
@@ -876,6 +876,12 @@ void Sema::InstantiateAttrs(const MultiLevelTemplateArgumentList &TemplateArgs,
continue;
}
+ if (auto *A = dyn_cast<CUDAGridConstantAttr>(TmplAttr)) {
+ if (!New->hasAttr<CUDAGridConstantAttr>())
+ New->addAttr(A->clone(Context));
+ continue;
+ }
+
assert(!TmplAttr->isPackExpansion());
if (TmplAttr->isLateParsed() && LateAttrs) {
// Late parsed attributes must be instantiated and attached after the
diff --git a/clang/test/CodeGenCUDA/Inputs/cuda.h b/clang/test/CodeGenCUDA/Inputs/cuda.h
index dc760500e65d41..a8d85afb7cd21c 100644
--- a/clang/test/CodeGenCUDA/Inputs/cuda.h
+++ b/clang/test/CodeGenCUDA/Inputs/cuda.h
@@ -12,6 +12,7 @@
#define __managed__ __attribute__((managed))
#endif
#define __launch_bounds__(...) __attribute__((launch_bounds(__VA_ARGS__)))
+#define __grid_constant__ __attribute__((grid_constant))
#else
#define __constant__
#define __device__
@@ -20,6 +21,7 @@
#define __shared__
#define __managed__
#define __launch_bounds__(...)
+#define __grid_constant__
#endif
struct dim3 {
diff --git a/clang/test/CodeGenCUDA/grid-constant.cu b/clang/test/CodeGenCUDA/grid-constant.cu
new file mode 100644
index 00000000000000..8d4be9c9dc7e1e
--- /dev/null
+++ b/clang/test/CodeGenCUDA/grid-constant.cu
@@ -0,0 +1,31 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals all --version 5
+// RUN: %clang_cc1 "-triple" "nvptx64-nvidia-cuda" -emit-llvm -fcuda-is-device -o - %s | FileCheck %s
+
+#include "Inputs/cuda.h"
+
+struct S {};
+
+__global__ void kernel(__grid_constant__ const S gc_arg1, int arg2, __grid_constant__ const int gc_arg3) {}
+
+// dependent arguments get diagnosed after instantiation.
+template <typename T>
+__global__ void tkernel_const(__grid_constant__ const T arg) {}
+
+template <typename T>
+__global__ void tkernel(int dummy, __grid_constant__ T arg) {}
+
+void foo() {
+ tkernel_const<const S><<<1,1>>>({});
+ tkernel_const<S><<<1,1>>>({});
+ tkernel<const S><<<1,1>>>(1, {});
+}
+//.
+//.
+// CHECK: [[META0:![0-9]+]] = !{ptr @_Z6kernel1Sii, !"kernel", i32 1, !"grid_constant", [[META1:![0-9]+]]}
+// CHECK: [[META1]] = !{i32 1, i32 3}
+// CHECK: [[META2:![0-9]+]] = !{ptr @_Z13tkernel_constIK1SEvT_, !"kernel", i32 1, !"grid_constant", [[META3:![0-9]+]]}
+// CHECK: [[META3]] = !{i32 1}
+// CHECK: [[META4:![0-9]+]] = !{ptr @_Z13tkernel_constI1SEvT_, !"kernel", i32 1, !"grid_constant", [[META3]]}
+// CHECK: [[META5:![0-9]+]] = !{ptr @_Z7tkernelIK1SEviT_, !"kernel", i32 1, !"grid_constant", [[META6:![0-9]+]]}
+// CHECK: [[META6]] = !{i32 2}
+//.
diff --git a/clang/test/Misc/pragma-attribute-supported-attributes-list.test b/clang/test/Misc/pragma-attribute-supported-attributes-list.test
index e28b0775410c0a..b159a45c25a7f4 100644
--- a/clang/test/Misc/pragma-attribute-supported-attributes-list.test
+++ b/clang/test/Misc/pragma-attribute-supported-attributes-list.test
@@ -39,6 +39,7 @@
// CHECK-NEXT: CUDADeviceBuiltinSurfaceType (SubjectMatchRule_record)
// CHECK-NEXT: CUDADeviceBuiltinTextureType (SubjectMatchRule_record)
// CHECK-NEXT: CUDAGlobal (SubjectMatchRule_function)
+// CHECK-NEXT: CUDAGridConstant (SubjectMatchRule_variable_is_parameter)
// CHECK-NEXT: CUDAHost (SubjectMatchRule_function)
// CHECK-NEXT: CUDALaunchBounds (SubjectMatchRule_objc_method, SubjectMatchRule_hasType_functionType)
// CHECK-NEXT: CUDAShared (SubjectMatchRule_variable)
diff --git a/clang/test/SemaCUDA/Inputs/cuda.h b/clang/test/SemaCUDA/Inputs/cuda.h
index 405ef8bb807d90..10db947d8246ca 100644
--- a/clang/test/SemaCUDA/Inputs/cuda.h
+++ b/clang/test/SemaCUDA/Inputs/cuda.h
@@ -11,6 +11,7 @@
#define __host__ __attribute__((host))
#define __shared__ __attribute__((shared))
#define __managed__ __attribute__((managed))
+#define __grid_constant__ __attribute__((grid_constant))
#define __launch_bounds__(...) __attribute__((launch_bounds(__VA_ARGS__)))
struct dim3 {
diff --git a/clang/test/SemaCUDA/grid-constant.cu b/clang/test/SemaCUDA/grid-constant.cu
new file mode 100644
index 00000000000000..876e389355fd4b
--- /dev/null
+++ b/clang/test/SemaCUDA/grid-constant.cu
@@ -0,0 +1,33 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -fsyntax-only -fcuda-is-device -verify %s
+#include "Inputs/cuda.h"
+
+struct S {};
+
+__global__ void kernel_struct(__grid_constant__ const S arg) {}
+__global__ void kernel_scalar(__grid_constant__ const int arg) {}
+
+__global__ void gc_kernel_non_const(__grid_constant__ S arg) {} // expected-error {{__grid_constant__ is only allowed on const-qualified kernel parameters}}
+
+void non_kernel(__grid_constant__ S arg) {} // expected-error {{__grid_constant__ is only allowed on const-qualified kernel parameters}}
+
+// templates w/ non-dependent argument types get diagnosed right
+// away, without instantiation.
+template <typename T>
+__global__ void tkernel_nd_const(__grid_constant__ const S arg, T dummy) {}
+template <typename T>
+__global__ void tkernel_nd_non_const(__grid_constant__ S arg, T dummy) {} // expected-error {{__grid_constant__ is only allowed on const-qualified kernel parameters}}
+
+// dependent arguments get diagnosed after instantiation.
+template <typename T>
+__global__ void tkernel_const(__grid_constant__ const T arg) {}
+
+template <typename T>
+__global__ void tkernel(__grid_constant__ T arg) {} // expected-error {{__grid_constant__ is only allowed on const-qualified kernel parameters}}
+
+void foo() {
+ tkernel_const<const S><<<1,1>>>({});
+ tkernel_const<S><<<1,1>>>({});
+ tkernel<const S><<<1,1>>>({});
+ tkernel<S><<<1,1>>>({}); // expected-note {{in instantiation of function template specialization 'tkernel<S>' requested here}}
+}
>From 25d7b383db16d619570c39510b25908b157b30f5 Mon Sep 17 00:00:00 2001
From: Artem Belevich <tra at google.com>
Date: Mon, 4 Nov 2024 12:36:09 -0800
Subject: [PATCH 2/2] Added attribute docs
---
clang/include/clang/Basic/Attr.td | 2 +-
clang/include/clang/Basic/AttrDocs.td | 10 ++++++++++
2 files changed, 11 insertions(+), 1 deletion(-)
diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td
index 9925b46ab2505e..72534c3d8da458 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -1454,7 +1454,7 @@ def CUDAGridConstant : InheritableAttr {
let Spellings = [GNU<"grid_constant">, Declspec<"__grid_constant__">];
let Subjects = SubjectList<[ParmVar]>;
let LangOpts = [CUDA];
- let Documentation = [Undocumented];
+ let Documentation = [CUDAGridConstantAttrDocs];
}
def NVPTXKernel : InheritableAttr, TargetSpecificAttr<TargetNVPTX> {
diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td
index fbbfc4acdf391e..9a6f8c49bd4f1a 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -6620,6 +6620,16 @@ unbind runtime APIs.
}];
}
+def CUDAGridConstantAttrDocs : Documentation {
+ let Category = DocCatDecl;
+ let Content = [{
+The ``__grid_constant__`` attribute can be applied to a ``const``-qualified kernel
+function argument and allows compiler to take the address of that argument without
+making a copy. The argument applies to sm_70 or newer GPUs, during compilation
+with CUDA-11.7(PTX 7.7) or newer, and is ignored otherwise.
+ }];
+}
+
def HIPManagedAttrDocs : Documentation {
let Category = DocCatDecl;
let Content = [{
More information about the cfe-commits
mailing list