[clang] 90abdf8 - [CUDA][HIP][NFC] add CodeGenModule::shouldEmitCUDAGlobalVar (#98543)
via cfe-commits
cfe-commits at lists.llvm.org
Thu Jul 11 18:52:07 PDT 2024
Author: Yaxun (Sam) Liu
Date: 2024-07-11T21:52:04-04:00
New Revision: 90abdf83e273586a43e1270e5f0a11de5cc35383
URL: https://github.com/llvm/llvm-project/commit/90abdf83e273586a43e1270e5f0a11de5cc35383
DIFF: https://github.com/llvm/llvm-project/commit/90abdf83e273586a43e1270e5f0a11de5cc35383.diff
LOG: [CUDA][HIP][NFC] add CodeGenModule::shouldEmitCUDAGlobalVar (#98543)
Extract the logic whether to emit a global var based on CUDA/HIP
host/device related attributes to CodeGenModule::shouldEmitCUDAGlobalVar
to be used by other places.
Added:
Modified:
clang/lib/CodeGen/CodeGenModule.cpp
clang/lib/CodeGen/CodeGenModule.h
Removed:
################################################################################
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 5c810cd332185..6c10b4a2edef8 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -3702,6 +3702,19 @@ template <typename AttrT> static bool hasImplicitAttr(const ValueDecl *D) {
return D->isImplicit();
}
+bool CodeGenModule::shouldEmitCUDAGlobalVar(const VarDecl *Global) const {
+ assert(LangOpts.CUDA && "Should not be called by non-CUDA languages");
+ // We need to emit host-side 'shadows' for all global
+ // device-side variables because the CUDA runtime needs their
+ // size and host-side address in order to provide access to
+ // their device-side incarnations.
+ return !LangOpts.CUDAIsDevice || Global->hasAttr<CUDADeviceAttr>() ||
+ Global->hasAttr<CUDAConstantAttr>() ||
+ Global->hasAttr<CUDASharedAttr>() ||
+ Global->getType()->isCUDADeviceBuiltinSurfaceType() ||
+ Global->getType()->isCUDADeviceBuiltinTextureType();
+}
+
void CodeGenModule::EmitGlobal(GlobalDecl GD) {
const auto *Global = cast<ValueDecl>(GD.getDecl());
@@ -3726,36 +3739,27 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) {
// Non-constexpr non-lambda implicit host device functions are not emitted
// unless they are used on device side.
if (LangOpts.CUDA) {
- if (LangOpts.CUDAIsDevice) {
+ assert((isa<FunctionDecl>(Global) || isa<VarDecl>(Global)) &&
+ "Expected Variable or Function");
+ if (const auto *VD = dyn_cast<VarDecl>(Global)) {
+ if (!shouldEmitCUDAGlobalVar(VD))
+ return;
+ } else if (LangOpts.CUDAIsDevice) {
const auto *FD = dyn_cast<FunctionDecl>(Global);
if ((!Global->hasAttr<CUDADeviceAttr>() ||
- (LangOpts.OffloadImplicitHostDeviceTemplates && FD &&
+ (LangOpts.OffloadImplicitHostDeviceTemplates &&
hasImplicitAttr<CUDAHostAttr>(FD) &&
hasImplicitAttr<CUDADeviceAttr>(FD) && !FD->isConstexpr() &&
!isLambdaCallOperator(FD) &&
!getContext().CUDAImplicitHostDeviceFunUsedByDevice.count(FD))) &&
!Global->hasAttr<CUDAGlobalAttr>() &&
- !Global->hasAttr<CUDAConstantAttr>() &&
- !Global->hasAttr<CUDASharedAttr>() &&
- !Global->getType()->isCUDADeviceBuiltinSurfaceType() &&
- !Global->getType()->isCUDADeviceBuiltinTextureType() &&
!(LangOpts.HIPStdPar && isa<FunctionDecl>(Global) &&
!Global->hasAttr<CUDAHostAttr>()))
return;
- } else {
- // We need to emit host-side 'shadows' for all global
- // device-side variables because the CUDA runtime needs their
- // size and host-side address in order to provide access to
- // their device-side incarnations.
-
- // So device-only functions are the only things we skip.
- if (isa<FunctionDecl>(Global) && !Global->hasAttr<CUDAHostAttr>() &&
- Global->hasAttr<CUDADeviceAttr>())
- return;
-
- assert((isa<FunctionDecl>(Global) || isa<VarDecl>(Global)) &&
- "Expected Variable or Function");
- }
+ // Device-only functions are the only things we skip.
+ } else if (!Global->hasAttr<CUDAHostAttr>() &&
+ Global->hasAttr<CUDADeviceAttr>())
+ return;
}
if (LangOpts.OpenMP) {
diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h
index 585c4ea697fea..caa3786c033b5 100644
--- a/clang/lib/CodeGen/CodeGenModule.h
+++ b/clang/lib/CodeGen/CodeGenModule.h
@@ -563,6 +563,9 @@ class CodeGenModule : public CodeGenTypeCache {
bool isTriviallyRecursive(const FunctionDecl *F);
bool shouldEmitFunction(GlobalDecl GD);
+ // Whether a global variable should be emitted by CUDA/HIP host/device
+ // related attributes.
+ bool shouldEmitCUDAGlobalVar(const VarDecl *VD) const;
bool shouldOpportunisticallyEmitVTables();
/// Map used to be sure we don't emit the same CompoundLiteral twice.
llvm::DenseMap<const CompoundLiteralExpr *, llvm::GlobalVariable *>
More information about the cfe-commits
mailing list