[clang] 7c3fdcc - [CUDA] Add support for __grid_constant__ attribute (#114589)
via cfe-commits
cfe-commits at lists.llvm.org
Tue Nov 5 10:48:58 PST 2024
Author: Artem Belevich
Date: 2024-11-05T10:48:54-08:00
New Revision: 7c3fdcc27603cd2d6b01fa7b057b3099da75bc8d
URL: https://github.com/llvm/llvm-project/commit/7c3fdcc27603cd2d6b01fa7b057b3099da75bc8d
DIFF: https://github.com/llvm/llvm-project/commit/7c3fdcc27603cd2d6b01fa7b057b3099da75bc8d.diff
LOG: [CUDA] Add support for __grid_constant__ attribute (#114589)
LLVM support for the attribute has been implemented already, so it just
plumbs it through to the CUDA front-end.
One notable difference from NVCC is that the attribute can be used
regardless of the targeted GPU. On the older GPUs it will just be
ignored. The attribute is a performance hint, and does not warrant a
hard error if compiler can't benefit from it on a particular GPU
variant.
Added:
clang/test/CodeGenCUDA/grid-constant.cu
clang/test/SemaCUDA/grid-constant.cu
Modified:
clang/docs/ReleaseNotes.rst
clang/include/clang/Basic/Attr.td
clang/include/clang/Basic/AttrDocs.td
clang/include/clang/Basic/DiagnosticSemaKinds.td
clang/lib/CodeGen/Targets/NVPTX.cpp
clang/lib/Sema/SemaDecl.cpp
clang/lib/Sema/SemaDeclAttr.cpp
clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
clang/test/CodeGenCUDA/Inputs/cuda.h
clang/test/Misc/pragma-attribute-supported-attributes-list.test
clang/test/SemaCUDA/Inputs/cuda.h
Removed:
################################################################################
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 4e555914caee8a..b2231bb4584aae 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -716,6 +716,7 @@ CUDA Support
^^^^^^^^^^^^
- Clang now supports CUDA SDK up to 12.6
- Added support for sm_100
+- Added support for `__grid_constant__` attribute.
AIX Support
^^^^^^^^^^^
diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td
index fbad11b376e7e9..24cfb5ddb6d4ca 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -1451,6 +1451,13 @@ def CUDAHost : InheritableAttr {
}
def : MutualExclusions<[CUDAGlobal, CUDAHost]>;
+def CUDAGridConstant : InheritableAttr {
+ let Spellings = [GNU<"grid_constant">, Declspec<"__grid_constant__">];
+ let Subjects = SubjectList<[ParmVar]>;
+ let LangOpts = [CUDA];
+ let Documentation = [CUDAGridConstantAttrDocs];
+}
+
def NVPTXKernel : InheritableAttr, TargetSpecificAttr<TargetNVPTX> {
let Spellings = [Clang<"nvptx_kernel">];
let Subjects = SubjectList<[Function]>;
diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td
index ed251b0a74c392..23c8eb2d163c86 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -6794,6 +6794,16 @@ unbind runtime APIs.
}];
}
+def CUDAGridConstantAttrDocs : Documentation {
+ let Category = DocCatDecl;
+ let Content = [{
+The ``__grid_constant__`` attribute can be applied to a ``const``-qualified kernel
+function argument and allows compiler to take the address of that argument without
+making a copy. The argument applies to sm_70 or newer GPUs, during compilation
+with CUDA-11.7(PTX 7.7) or newer, and is ignored otherwise.
+ }];
+}
+
def HIPManagedAttrDocs : Documentation {
let Category = DocCatDecl;
let Content = [{
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index d697e6d61afa9a..ae3e243bdc58bd 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -9100,6 +9100,8 @@ def err_cuda_host_shared : Error<
"%select{__device__|__global__|__host__|__host__ __device__}0 functions">;
def err_cuda_nonstatic_constdev: Error<"__constant__, __device__, and "
"__managed__ are not allowed on non-static local variables">;
+def err_cuda_grid_constant_not_allowed : Error<
+ "__grid_constant__ is only allowed on const-qualified kernel parameters">;
def err_cuda_ovl_target : Error<
"%select{__device__|__global__|__host__|__host__ __device__}0 function %1 "
"cannot overload %select{__device__|__global__|__host__|__host__ __device__}2 function %3">;
diff --git a/clang/lib/CodeGen/Targets/NVPTX.cpp b/clang/lib/CodeGen/Targets/NVPTX.cpp
index ec7f1c439b1881..0431d2cc4ddc39 100644
--- a/clang/lib/CodeGen/Targets/NVPTX.cpp
+++ b/clang/lib/CodeGen/Targets/NVPTX.cpp
@@ -8,6 +8,7 @@
#include "ABIInfoImpl.h"
#include "TargetInfo.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/IR/IntrinsicsNVPTX.h"
using namespace clang;
@@ -78,7 +79,13 @@ class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo {
// Adds a NamedMDNode with GV, Name, and Operand as operands, and adds the
// resulting MDNode to the nvvm.annotations MDNode.
static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name,
- int Operand);
+ int Operand,
+ const SmallVectorImpl<int> &GridConstantArgs);
+
+ static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name,
+ int Operand) {
+ addNVVMMetadata(GV, Name, Operand, SmallVector<int, 1>(0));
+ }
private:
static void emitBuiltinSurfTexDeviceCopy(CodeGenFunction &CGF, LValue Dst,
@@ -240,7 +247,8 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
}
const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
- if (!FD) return;
+ if (!FD)
+ return;
llvm::Function *F = cast<llvm::Function>(GV);
@@ -263,8 +271,13 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
// __global__ functions cannot be called from the device, we do not
// need to set the noinline attribute.
if (FD->hasAttr<CUDAGlobalAttr>()) {
+ SmallVector<int, 10> GCI;
+ for (auto IV : llvm::enumerate(FD->parameters()))
+ if (IV.value()->hasAttr<CUDAGridConstantAttr>())
+ // For some reason arg indices are 1-based in NVVM
+ GCI.push_back(IV.index() + 1);
// Create !{<func-ref>, metadata !"kernel", i32 1} node
- addNVVMMetadata(F, "kernel", 1);
+ addNVVMMetadata(F, "kernel", 1, GCI);
}
if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>())
M.handleCUDALaunchBoundsAttr(F, Attr);
@@ -276,18 +289,27 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
}
}
-void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV,
- StringRef Name, int Operand) {
+void NVPTXTargetCodeGenInfo::addNVVMMetadata(
+ llvm::GlobalValue *GV, StringRef Name, int Operand,
+ const SmallVectorImpl<int> &GridConstantArgs) {
llvm::Module *M = GV->getParent();
llvm::LLVMContext &Ctx = M->getContext();
// Get "nvvm.annotations" metadata node
llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations");
- llvm::Metadata *MDVals[] = {
+ SmallVector<llvm::Metadata *, 5> MDVals = {
llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, Name),
llvm::ConstantAsMetadata::get(
llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), Operand))};
+ if (!GridConstantArgs.empty()) {
+ SmallVector<llvm::Metadata *, 10> GCM;
+ for (int I : GridConstantArgs)
+ GCM.push_back(llvm::ConstantAsMetadata::get(
+ llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), I)));
+ MDVals.append({llvm::MDString::get(Ctx, "grid_constant"),
+ llvm::MDNode::get(Ctx, GCM)});
+ }
// Append metadata to nvvm.annotations
MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
}
@@ -309,7 +331,7 @@ NVPTXTargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule &CGM,
return llvm::ConstantExpr::getAddrSpaceCast(
llvm::ConstantPointerNull::get(NPT), PT);
}
-}
+} // namespace
void CodeGenModule::handleCUDALaunchBoundsAttr(llvm::Function *F,
const CUDALaunchBoundsAttr *Attr,
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 00c8f871bbb38c..c125a7b6abc82d 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -12225,8 +12225,17 @@ bool Sema::CheckFunctionDeclaration(Scope *S, FunctionDecl *NewFD,
<< NewFD;
}
- if (!Redeclaration && LangOpts.CUDA)
+ if (!Redeclaration && LangOpts.CUDA) {
+ bool IsKernel = NewFD->hasAttr<CUDAGlobalAttr>();
+ for (auto *Parm : NewFD->parameters()) {
+ if (!Parm->getType()->isDependentType() &&
+ Parm->hasAttr<CUDAGridConstantAttr>() &&
+ !(IsKernel && Parm->getType().isConstQualified()))
+ Diag(Parm->getAttr<CUDAGridConstantAttr>()->getLocation(),
+ diag::err_cuda_grid_constant_not_allowed);
+ }
CUDA().checkTargetOverload(NewFD, Previous);
+ }
}
// Check if the function definition uses any AArch64 SME features without
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index a90f8706fc0080..d05d326178e1b8 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -4748,6 +4748,15 @@ static void handleManagedAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
D->addAttr(CUDADeviceAttr::CreateImplicit(S.Context));
}
+static void handleGridConstantAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
+ if (D->isInvalidDecl())
+ return;
+ // Whether __grid_constant__ is allowed to be used will be checked in
+ // Sema::CheckFunctionDeclaration as we need complete function decl to make
+ // the call.
+ D->addAttr(::new (S.Context) CUDAGridConstantAttr(S.Context, AL));
+}
+
static void handleGNUInlineAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
const auto *Fn = cast<FunctionDecl>(D);
if (!Fn->isInlineSpecified()) {
@@ -6645,6 +6654,9 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL,
case ParsedAttr::AT_CUDADevice:
handleDeviceAttr(S, D, AL);
break;
+ case ParsedAttr::AT_CUDAGridConstant:
+ handleGridConstantAttr(S, D, AL);
+ break;
case ParsedAttr::AT_HIPManaged:
handleManagedAttr(S, D, AL);
break;
diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
index 3e948232057afe..ec3c3ce6057264 100644
--- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
+++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
@@ -876,6 +876,12 @@ void Sema::InstantiateAttrs(const MultiLevelTemplateArgumentList &TemplateArgs,
continue;
}
+ if (auto *A = dyn_cast<CUDAGridConstantAttr>(TmplAttr)) {
+ if (!New->hasAttr<CUDAGridConstantAttr>())
+ New->addAttr(A->clone(Context));
+ continue;
+ }
+
assert(!TmplAttr->isPackExpansion());
if (TmplAttr->isLateParsed() && LateAttrs) {
// Late parsed attributes must be instantiated and attached after the
diff --git a/clang/test/CodeGenCUDA/Inputs/cuda.h b/clang/test/CodeGenCUDA/Inputs/cuda.h
index dc760500e65d41..a8d85afb7cd21c 100644
--- a/clang/test/CodeGenCUDA/Inputs/cuda.h
+++ b/clang/test/CodeGenCUDA/Inputs/cuda.h
@@ -12,6 +12,7 @@
#define __managed__ __attribute__((managed))
#endif
#define __launch_bounds__(...) __attribute__((launch_bounds(__VA_ARGS__)))
+#define __grid_constant__ __attribute__((grid_constant))
#else
#define __constant__
#define __device__
@@ -20,6 +21,7 @@
#define __shared__
#define __managed__
#define __launch_bounds__(...)
+#define __grid_constant__
#endif
struct dim3 {
diff --git a/clang/test/CodeGenCUDA/grid-constant.cu b/clang/test/CodeGenCUDA/grid-constant.cu
new file mode 100644
index 00000000000000..8d4be9c9dc7e1e
--- /dev/null
+++ b/clang/test/CodeGenCUDA/grid-constant.cu
@@ -0,0 +1,31 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals all --version 5
+// RUN: %clang_cc1 "-triple" "nvptx64-nvidia-cuda" -emit-llvm -fcuda-is-device -o - %s | FileCheck %s
+
+#include "Inputs/cuda.h"
+
+struct S {};
+
+__global__ void kernel(__grid_constant__ const S gc_arg1, int arg2, __grid_constant__ const int gc_arg3) {}
+
+// dependent arguments get diagnosed after instantiation.
+template <typename T>
+__global__ void tkernel_const(__grid_constant__ const T arg) {}
+
+template <typename T>
+__global__ void tkernel(int dummy, __grid_constant__ T arg) {}
+
+void foo() {
+ tkernel_const<const S><<<1,1>>>({});
+ tkernel_const<S><<<1,1>>>({});
+ tkernel<const S><<<1,1>>>(1, {});
+}
+//.
+//.
+// CHECK: [[META0:![0-9]+]] = !{ptr @_Z6kernel1Sii, !"kernel", i32 1, !"grid_constant", [[META1:![0-9]+]]}
+// CHECK: [[META1]] = !{i32 1, i32 3}
+// CHECK: [[META2:![0-9]+]] = !{ptr @_Z13tkernel_constIK1SEvT_, !"kernel", i32 1, !"grid_constant", [[META3:![0-9]+]]}
+// CHECK: [[META3]] = !{i32 1}
+// CHECK: [[META4:![0-9]+]] = !{ptr @_Z13tkernel_constI1SEvT_, !"kernel", i32 1, !"grid_constant", [[META3]]}
+// CHECK: [[META5:![0-9]+]] = !{ptr @_Z7tkernelIK1SEviT_, !"kernel", i32 1, !"grid_constant", [[META6:![0-9]+]]}
+// CHECK: [[META6]] = !{i32 2}
+//.
diff --git a/clang/test/Misc/pragma-attribute-supported-attributes-list.test b/clang/test/Misc/pragma-attribute-supported-attributes-list.test
index 3f1977d4d408b9..4a6ac39da18ad2 100644
--- a/clang/test/Misc/pragma-attribute-supported-attributes-list.test
+++ b/clang/test/Misc/pragma-attribute-supported-attributes-list.test
@@ -39,6 +39,7 @@
// CHECK-NEXT: CUDADeviceBuiltinSurfaceType (SubjectMatchRule_record)
// CHECK-NEXT: CUDADeviceBuiltinTextureType (SubjectMatchRule_record)
// CHECK-NEXT: CUDAGlobal (SubjectMatchRule_function)
+// CHECK-NEXT: CUDAGridConstant (SubjectMatchRule_variable_is_parameter)
// CHECK-NEXT: CUDAHost (SubjectMatchRule_function)
// CHECK-NEXT: CUDALaunchBounds (SubjectMatchRule_objc_method, SubjectMatchRule_hasType_functionType)
// CHECK-NEXT: CUDAShared (SubjectMatchRule_variable)
diff --git a/clang/test/SemaCUDA/Inputs/cuda.h b/clang/test/SemaCUDA/Inputs/cuda.h
index 405ef8bb807d90..10db947d8246ca 100644
--- a/clang/test/SemaCUDA/Inputs/cuda.h
+++ b/clang/test/SemaCUDA/Inputs/cuda.h
@@ -11,6 +11,7 @@
#define __host__ __attribute__((host))
#define __shared__ __attribute__((shared))
#define __managed__ __attribute__((managed))
+#define __grid_constant__ __attribute__((grid_constant))
#define __launch_bounds__(...) __attribute__((launch_bounds(__VA_ARGS__)))
struct dim3 {
diff --git a/clang/test/SemaCUDA/grid-constant.cu b/clang/test/SemaCUDA/grid-constant.cu
new file mode 100644
index 00000000000000..876e389355fd4b
--- /dev/null
+++ b/clang/test/SemaCUDA/grid-constant.cu
@@ -0,0 +1,33 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -fsyntax-only -fcuda-is-device -verify %s
+#include "Inputs/cuda.h"
+
+struct S {};
+
+__global__ void kernel_struct(__grid_constant__ const S arg) {}
+__global__ void kernel_scalar(__grid_constant__ const int arg) {}
+
+__global__ void gc_kernel_non_const(__grid_constant__ S arg) {} // expected-error {{__grid_constant__ is only allowed on const-qualified kernel parameters}}
+
+void non_kernel(__grid_constant__ S arg) {} // expected-error {{__grid_constant__ is only allowed on const-qualified kernel parameters}}
+
+// templates w/ non-dependent argument types get diagnosed right
+// away, without instantiation.
+template <typename T>
+__global__ void tkernel_nd_const(__grid_constant__ const S arg, T dummy) {}
+template <typename T>
+__global__ void tkernel_nd_non_const(__grid_constant__ S arg, T dummy) {} // expected-error {{__grid_constant__ is only allowed on const-qualified kernel parameters}}
+
+// dependent arguments get diagnosed after instantiation.
+template <typename T>
+__global__ void tkernel_const(__grid_constant__ const T arg) {}
+
+template <typename T>
+__global__ void tkernel(__grid_constant__ T arg) {} // expected-error {{__grid_constant__ is only allowed on const-qualified kernel parameters}}
+
+void foo() {
+ tkernel_const<const S><<<1,1>>>({});
+ tkernel_const<S><<<1,1>>>({});
+ tkernel<const S><<<1,1>>>({});
+ tkernel<S><<<1,1>>>({}); // expected-note {{in instantiation of function template specialization 'tkernel<S>' requested here}}
+}
More information about the cfe-commits
mailing list