[clang] [CUDA] Add support for __grid_constant__ attribute (PR #114589)
via cfe-commits
cfe-commits at lists.llvm.org
Fri Nov 1 11:45:14 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang-codegen
@llvm/pr-subscribers-clang
Author: Artem Belevich (Artem-B)
<details>
<summary>Changes</summary>
LLVM support for the attribute has been implemented already, so it just plumbs it through to the CUDA front-end.
One notable difference from NVCC is that the attribute can be used regardless of the targeted GPU. On the older GPUs it will just be ignored. The attribute is a performance hint, and does not warrant a hard error if compiler can't benefit from it on a particular GPU variant.
---
Full diff: https://github.com/llvm/llvm-project/pull/114589.diff
12 Files Affected:
- (modified) clang/docs/ReleaseNotes.rst (+1)
- (modified) clang/include/clang/Basic/Attr.td (+7)
- (modified) clang/include/clang/Basic/DiagnosticSemaKinds.td (+2)
- (modified) clang/lib/CodeGen/Targets/NVPTX.cpp (+29-7)
- (modified) clang/lib/Sema/SemaDecl.cpp (+10-1)
- (modified) clang/lib/Sema/SemaDeclAttr.cpp (+12)
- (modified) clang/lib/Sema/SemaTemplateInstantiateDecl.cpp (+6)
- (modified) clang/test/CodeGenCUDA/Inputs/cuda.h (+2)
- (added) clang/test/CodeGenCUDA/grid-constant.cu (+31)
- (modified) clang/test/Misc/pragma-attribute-supported-attributes-list.test (+1)
- (modified) clang/test/SemaCUDA/Inputs/cuda.h (+1)
- (added) clang/test/SemaCUDA/grid-constant.cu (+33)
``````````diff
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 402203f89e23a0..9466df98747e27 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -699,6 +699,7 @@ CUDA Support
^^^^^^^^^^^^
- Clang now supports CUDA SDK up to 12.6
- Added support for sm_100
+- Added support for `__grid_constant__` attribute.
AIX Support
^^^^^^^^^^^
diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td
index 47c93b48175fc8..9925b46ab2505e 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -1450,6 +1450,13 @@ def CUDAHost : InheritableAttr {
}
def : MutualExclusions<[CUDAGlobal, CUDAHost]>;
+def CUDAGridConstant : InheritableAttr {
+ let Spellings = [GNU<"grid_constant">, Declspec<"__grid_constant__">];
+ let Subjects = SubjectList<[ParmVar]>;
+ let LangOpts = [CUDA];
+ let Documentation = [Undocumented];
+}
+
def NVPTXKernel : InheritableAttr, TargetSpecificAttr<TargetNVPTX> {
let Spellings = [Clang<"nvptx_kernel">];
let Subjects = SubjectList<[Function]>;
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 34ff49d7238a7f..61ff4c4fb5d646 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -9100,6 +9100,8 @@ def err_cuda_host_shared : Error<
"%select{__device__|__global__|__host__|__host__ __device__}0 functions">;
def err_cuda_nonstatic_constdev: Error<"__constant__, __device__, and "
"__managed__ are not allowed on non-static local variables">;
+def err_cuda_grid_constant_not_allowed : Error<
+ "__grid_constant__ is only allowed on const-qualified kernel parameters">;
def err_cuda_ovl_target : Error<
"%select{__device__|__global__|__host__|__host__ __device__}0 function %1 "
"cannot overload %select{__device__|__global__|__host__|__host__ __device__}2 function %3">;
diff --git a/clang/lib/CodeGen/Targets/NVPTX.cpp b/clang/lib/CodeGen/Targets/NVPTX.cpp
index ec7f1c439b1881..0431d2cc4ddc39 100644
--- a/clang/lib/CodeGen/Targets/NVPTX.cpp
+++ b/clang/lib/CodeGen/Targets/NVPTX.cpp
@@ -8,6 +8,7 @@
#include "ABIInfoImpl.h"
#include "TargetInfo.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/IR/IntrinsicsNVPTX.h"
using namespace clang;
@@ -78,7 +79,13 @@ class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo {
// Adds a NamedMDNode with GV, Name, and Operand as operands, and adds the
// resulting MDNode to the nvvm.annotations MDNode.
static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name,
- int Operand);
+ int Operand,
+ const SmallVectorImpl<int> &GridConstantArgs);
+
+ static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name,
+ int Operand) {
+ addNVVMMetadata(GV, Name, Operand, SmallVector<int, 1>(0));
+ }
private:
static void emitBuiltinSurfTexDeviceCopy(CodeGenFunction &CGF, LValue Dst,
@@ -240,7 +247,8 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
}
const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
- if (!FD) return;
+ if (!FD)
+ return;
llvm::Function *F = cast<llvm::Function>(GV);
@@ -263,8 +271,13 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
// __global__ functions cannot be called from the device, we do not
// need to set the noinline attribute.
if (FD->hasAttr<CUDAGlobalAttr>()) {
+ SmallVector<int, 10> GCI;
+ for (auto IV : llvm::enumerate(FD->parameters()))
+ if (IV.value()->hasAttr<CUDAGridConstantAttr>())
+ // For some reason arg indices are 1-based in NVVM
+ GCI.push_back(IV.index() + 1);
// Create !{<func-ref>, metadata !"kernel", i32 1} node
- addNVVMMetadata(F, "kernel", 1);
+ addNVVMMetadata(F, "kernel", 1, GCI);
}
if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>())
M.handleCUDALaunchBoundsAttr(F, Attr);
@@ -276,18 +289,27 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes(
}
}
-void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV,
- StringRef Name, int Operand) {
+void NVPTXTargetCodeGenInfo::addNVVMMetadata(
+ llvm::GlobalValue *GV, StringRef Name, int Operand,
+ const SmallVectorImpl<int> &GridConstantArgs) {
llvm::Module *M = GV->getParent();
llvm::LLVMContext &Ctx = M->getContext();
// Get "nvvm.annotations" metadata node
llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations");
- llvm::Metadata *MDVals[] = {
+ SmallVector<llvm::Metadata *, 5> MDVals = {
llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, Name),
llvm::ConstantAsMetadata::get(
llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), Operand))};
+ if (!GridConstantArgs.empty()) {
+ SmallVector<llvm::Metadata *, 10> GCM;
+ for (int I : GridConstantArgs)
+ GCM.push_back(llvm::ConstantAsMetadata::get(
+ llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), I)));
+ MDVals.append({llvm::MDString::get(Ctx, "grid_constant"),
+ llvm::MDNode::get(Ctx, GCM)});
+ }
// Append metadata to nvvm.annotations
MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
}
@@ -309,7 +331,7 @@ NVPTXTargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule &CGM,
return llvm::ConstantExpr::getAddrSpaceCast(
llvm::ConstantPointerNull::get(NPT), PT);
}
-}
+} // namespace
void CodeGenModule::handleCUDALaunchBoundsAttr(llvm::Function *F,
const CUDALaunchBoundsAttr *Attr,
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index f8e5f3c6d309d6..9de8cbc303016c 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -12222,8 +12222,17 @@ bool Sema::CheckFunctionDeclaration(Scope *S, FunctionDecl *NewFD,
<< NewFD;
}
- if (!Redeclaration && LangOpts.CUDA)
+ if (!Redeclaration && LangOpts.CUDA) {
+ bool IsKernel = NewFD->hasAttr<CUDAGlobalAttr>();
+ for (auto *Parm : NewFD->parameters()) {
+ if (!Parm->getType()->isDependentType() &&
+ Parm->hasAttr<CUDAGridConstantAttr>() &&
+ !(IsKernel && Parm->getType().isConstQualified()))
+ Diag(Parm->getAttr<CUDAGridConstantAttr>()->getLocation(),
+ diag::err_cuda_grid_constant_not_allowed);
+ }
CUDA().checkTargetOverload(NewFD, Previous);
+ }
}
// Check if the function definition uses any AArch64 SME features without
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index 601c6f2eef1d9c..d8550bab3eddd2 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -4748,6 +4748,15 @@ static void handleManagedAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
D->addAttr(CUDADeviceAttr::CreateImplicit(S.Context));
}
+static void handleGridConstantAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
+ if (D->isInvalidDecl())
+ return;
+ // Whether __grid_constant__ is allowed to be used will be checked in
+ // Sema::CheckFunctionDeclaration as we need complete function decl to make
+ // the call.
+ D->addAttr(::new (S.Context) CUDAGridConstantAttr(S.Context, AL));
+}
+
static void handleGNUInlineAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
const auto *Fn = cast<FunctionDecl>(D);
if (!Fn->isInlineSpecified()) {
@@ -6642,6 +6651,9 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL,
case ParsedAttr::AT_CUDADevice:
handleDeviceAttr(S, D, AL);
break;
+ case ParsedAttr::AT_CUDAGridConstant:
+ handleGridConstantAttr(S, D, AL);
+ break;
case ParsedAttr::AT_HIPManaged:
handleManagedAttr(S, D, AL);
break;
diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
index 3e948232057afe..ec3c3ce6057264 100644
--- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
+++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
@@ -876,6 +876,12 @@ void Sema::InstantiateAttrs(const MultiLevelTemplateArgumentList &TemplateArgs,
continue;
}
+ if (auto *A = dyn_cast<CUDAGridConstantAttr>(TmplAttr)) {
+ if (!New->hasAttr<CUDAGridConstantAttr>())
+ New->addAttr(A->clone(Context));
+ continue;
+ }
+
assert(!TmplAttr->isPackExpansion());
if (TmplAttr->isLateParsed() && LateAttrs) {
// Late parsed attributes must be instantiated and attached after the
diff --git a/clang/test/CodeGenCUDA/Inputs/cuda.h b/clang/test/CodeGenCUDA/Inputs/cuda.h
index dc760500e65d41..a8d85afb7cd21c 100644
--- a/clang/test/CodeGenCUDA/Inputs/cuda.h
+++ b/clang/test/CodeGenCUDA/Inputs/cuda.h
@@ -12,6 +12,7 @@
#define __managed__ __attribute__((managed))
#endif
#define __launch_bounds__(...) __attribute__((launch_bounds(__VA_ARGS__)))
+#define __grid_constant__ __attribute__((grid_constant))
#else
#define __constant__
#define __device__
@@ -20,6 +21,7 @@
#define __shared__
#define __managed__
#define __launch_bounds__(...)
+#define __grid_constant__
#endif
struct dim3 {
diff --git a/clang/test/CodeGenCUDA/grid-constant.cu b/clang/test/CodeGenCUDA/grid-constant.cu
new file mode 100644
index 00000000000000..8d4be9c9dc7e1e
--- /dev/null
+++ b/clang/test/CodeGenCUDA/grid-constant.cu
@@ -0,0 +1,31 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals all --version 5
+// RUN: %clang_cc1 "-triple" "nvptx64-nvidia-cuda" -emit-llvm -fcuda-is-device -o - %s | FileCheck %s
+
+#include "Inputs/cuda.h"
+
+struct S {};
+
+__global__ void kernel(__grid_constant__ const S gc_arg1, int arg2, __grid_constant__ const int gc_arg3) {}
+
+// dependent arguments get diagnosed after instantiation.
+template <typename T>
+__global__ void tkernel_const(__grid_constant__ const T arg) {}
+
+template <typename T>
+__global__ void tkernel(int dummy, __grid_constant__ T arg) {}
+
+void foo() {
+ tkernel_const<const S><<<1,1>>>({});
+ tkernel_const<S><<<1,1>>>({});
+ tkernel<const S><<<1,1>>>(1, {});
+}
+//.
+//.
+// CHECK: [[META0:![0-9]+]] = !{ptr @_Z6kernel1Sii, !"kernel", i32 1, !"grid_constant", [[META1:![0-9]+]]}
+// CHECK: [[META1]] = !{i32 1, i32 3}
+// CHECK: [[META2:![0-9]+]] = !{ptr @_Z13tkernel_constIK1SEvT_, !"kernel", i32 1, !"grid_constant", [[META3:![0-9]+]]}
+// CHECK: [[META3]] = !{i32 1}
+// CHECK: [[META4:![0-9]+]] = !{ptr @_Z13tkernel_constI1SEvT_, !"kernel", i32 1, !"grid_constant", [[META3]]}
+// CHECK: [[META5:![0-9]+]] = !{ptr @_Z7tkernelIK1SEviT_, !"kernel", i32 1, !"grid_constant", [[META6:![0-9]+]]}
+// CHECK: [[META6]] = !{i32 2}
+//.
diff --git a/clang/test/Misc/pragma-attribute-supported-attributes-list.test b/clang/test/Misc/pragma-attribute-supported-attributes-list.test
index e28b0775410c0a..b159a45c25a7f4 100644
--- a/clang/test/Misc/pragma-attribute-supported-attributes-list.test
+++ b/clang/test/Misc/pragma-attribute-supported-attributes-list.test
@@ -39,6 +39,7 @@
// CHECK-NEXT: CUDADeviceBuiltinSurfaceType (SubjectMatchRule_record)
// CHECK-NEXT: CUDADeviceBuiltinTextureType (SubjectMatchRule_record)
// CHECK-NEXT: CUDAGlobal (SubjectMatchRule_function)
+// CHECK-NEXT: CUDAGridConstant (SubjectMatchRule_variable_is_parameter)
// CHECK-NEXT: CUDAHost (SubjectMatchRule_function)
// CHECK-NEXT: CUDALaunchBounds (SubjectMatchRule_objc_method, SubjectMatchRule_hasType_functionType)
// CHECK-NEXT: CUDAShared (SubjectMatchRule_variable)
diff --git a/clang/test/SemaCUDA/Inputs/cuda.h b/clang/test/SemaCUDA/Inputs/cuda.h
index 405ef8bb807d90..10db947d8246ca 100644
--- a/clang/test/SemaCUDA/Inputs/cuda.h
+++ b/clang/test/SemaCUDA/Inputs/cuda.h
@@ -11,6 +11,7 @@
#define __host__ __attribute__((host))
#define __shared__ __attribute__((shared))
#define __managed__ __attribute__((managed))
+#define __grid_constant__ __attribute__((grid_constant))
#define __launch_bounds__(...) __attribute__((launch_bounds(__VA_ARGS__)))
struct dim3 {
diff --git a/clang/test/SemaCUDA/grid-constant.cu b/clang/test/SemaCUDA/grid-constant.cu
new file mode 100644
index 00000000000000..876e389355fd4b
--- /dev/null
+++ b/clang/test/SemaCUDA/grid-constant.cu
@@ -0,0 +1,33 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -fsyntax-only -fcuda-is-device -verify %s
+#include "Inputs/cuda.h"
+
+struct S {};
+
+__global__ void kernel_struct(__grid_constant__ const S arg) {}
+__global__ void kernel_scalar(__grid_constant__ const int arg) {}
+
+__global__ void gc_kernel_non_const(__grid_constant__ S arg) {} // expected-error {{__grid_constant__ is only allowed on const-qualified kernel parameters}}
+
+void non_kernel(__grid_constant__ S arg) {} // expected-error {{__grid_constant__ is only allowed on const-qualified kernel parameters}}
+
+// templates w/ non-dependent argument types get diagnosed right
+// away, without instantiation.
+template <typename T>
+__global__ void tkernel_nd_const(__grid_constant__ const S arg, T dummy) {}
+template <typename T>
+__global__ void tkernel_nd_non_const(__grid_constant__ S arg, T dummy) {} // expected-error {{__grid_constant__ is only allowed on const-qualified kernel parameters}}
+
+// dependent arguments get diagnosed after instantiation.
+template <typename T>
+__global__ void tkernel_const(__grid_constant__ const T arg) {}
+
+template <typename T>
+__global__ void tkernel(__grid_constant__ T arg) {} // expected-error {{__grid_constant__ is only allowed on const-qualified kernel parameters}}
+
+void foo() {
+ tkernel_const<const S><<<1,1>>>({});
+ tkernel_const<S><<<1,1>>>({});
+ tkernel<const S><<<1,1>>>({});
+ tkernel<S><<<1,1>>>({}); // expected-note {{in instantiation of function template specialization 'tkernel<S>' requested here}}
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/114589
More information about the cfe-commits
mailing list