[clang] 0b2af1a - [NFC][CUDA] Refactor registering device variable
Yaxun Liu via cfe-commits
cfe-commits at lists.llvm.org
Wed Feb 3 11:30:38 PST 2021
Author: Yaxun (Sam) Liu
Date: 2021-02-03T14:29:51-05:00
New Revision: 0b2af1a2889423bb797856841ac81cf10d01c696
URL: https://github.com/llvm/llvm-project/commit/0b2af1a2889423bb797856841ac81cf10d01c696
DIFF: https://github.com/llvm/llvm-project/commit/0b2af1a2889423bb797856841ac81cf10d01c696.diff
LOG: [NFC][CUDA] Refactor registering device variable
Extract registering device variable to CUDA runtime codegen function since it
will be called in multiple places.
Reviewed by: Artem Belevich
Differential Revision: https://reviews.llvm.org/D95558
Added:
Modified:
clang/lib/CodeGen/CGCUDANV.cpp
clang/lib/CodeGen/CGCUDARuntime.h
clang/lib/CodeGen/CodeGenModule.cpp
Removed:
################################################################################
diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp
index 33a2d6f4483e..42105480eb7c 100644
--- a/clang/lib/CodeGen/CGCUDANV.cpp
+++ b/clang/lib/CodeGen/CGCUDANV.cpp
@@ -120,12 +120,8 @@ class CGNVCUDARuntime : public CGCUDARuntime {
void emitDeviceStubBodyNew(CodeGenFunction &CGF, FunctionArgList &Args);
std::string getDeviceSideName(const NamedDecl *ND) override;
-public:
- CGNVCUDARuntime(CodeGenModule &CGM);
-
- void emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) override;
void registerDeviceVar(const VarDecl *VD, llvm::GlobalVariable &Var,
- bool Extern, bool Constant) override {
+ bool Extern, bool Constant) {
DeviceVars.push_back({&Var,
VD,
{DeviceVarFlags::Variable, Extern, Constant,
@@ -133,7 +129,7 @@ class CGNVCUDARuntime : public CGCUDARuntime {
/*Normalized*/ false, 0}});
}
void registerDeviceSurf(const VarDecl *VD, llvm::GlobalVariable &Var,
- bool Extern, int Type) override {
+ bool Extern, int Type) {
DeviceVars.push_back({&Var,
VD,
{DeviceVarFlags::Surface, Extern, /*Constant*/ false,
@@ -141,17 +137,27 @@ class CGNVCUDARuntime : public CGCUDARuntime {
/*Normalized*/ false, Type}});
}
void registerDeviceTex(const VarDecl *VD, llvm::GlobalVariable &Var,
- bool Extern, int Type, bool Normalized) override {
+ bool Extern, int Type, bool Normalized) {
DeviceVars.push_back({&Var,
VD,
{DeviceVarFlags::Texture, Extern, /*Constant*/ false,
/*Managed*/ false, Normalized, Type}});
}
+public:
+ CGNVCUDARuntime(CodeGenModule &CGM);
+
+ void emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) override;
+ void handleVarRegistration(const VarDecl *VD,
+ llvm::GlobalVariable &Var) override;
+
/// Creates module constructor function
llvm::Function *makeModuleCtorFunction() override;
/// Creates module destructor function
llvm::Function *makeModuleDtorFunction() override;
+ void
+ internalizeDeviceSideVar(const VarDecl *D,
+ llvm::GlobalValue::LinkageTypes &Linkage) override;
};
}
@@ -915,3 +921,65 @@ llvm::Function *CGNVCUDARuntime::makeModuleDtorFunction() {
CGCUDARuntime *CodeGen::CreateNVCUDARuntime(CodeGenModule &CGM) {
return new CGNVCUDARuntime(CGM);
}
+
+void CGNVCUDARuntime::internalizeDeviceSideVar(
+ const VarDecl *D, llvm::GlobalValue::LinkageTypes &Linkage) {
+ // Host-side shadows of external declarations of device-side
+ // global variables become internal definitions. These have to
+ // be internal in order to prevent name conflicts with global
+ // host variables with the same name in a
diff erent TUs.
+ //
+ // __shared__ variables are odd. Shadows do get created, but
+ // they are not registered with the CUDA runtime, so they
+ // can't really be used to access their device-side
+ // counterparts. It's not clear yet whether it's nvcc's bug or
+ // a feature, but we've got to do the same for compatibility.
+ if (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>() ||
+ D->hasAttr<CUDASharedAttr>() ||
+ D->getType()->isCUDADeviceBuiltinSurfaceType() ||
+ D->getType()->isCUDADeviceBuiltinTextureType()) {
+ Linkage = llvm::GlobalValue::InternalLinkage;
+ }
+}
+
+void CGNVCUDARuntime::handleVarRegistration(const VarDecl *D,
+ llvm::GlobalVariable &GV) {
+ if (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>()) {
+ // Shadow variables and their properties must be registered with CUDA
+ // runtime. Skip Extern global variables, which will be registered in
+ // the TU where they are defined.
+ //
+ // Don't register a C++17 inline variable. The local symbol can be
+ // discarded and referencing a discarded local symbol from outside the
+ // comdat (__cuda_register_globals) is disallowed by the ELF spec.
+ // TODO: Reject __device__ constexpr and __device__ inline in Sema.
+ if (!D->hasExternalStorage() && !D->isInline())
+ registerDeviceVar(D, GV, !D->hasDefinition(),
+ D->hasAttr<CUDAConstantAttr>());
+ } else if (D->getType()->isCUDADeviceBuiltinSurfaceType() ||
+ D->getType()->isCUDADeviceBuiltinTextureType()) {
+ // Builtin surfaces and textures and their template arguments are
+ // also registered with CUDA runtime.
+ const ClassTemplateSpecializationDecl *TD =
+ cast<ClassTemplateSpecializationDecl>(
+ D->getType()->getAs<RecordType>()->getDecl());
+ const TemplateArgumentList &Args = TD->getTemplateArgs();
+ if (TD->hasAttr<CUDADeviceBuiltinSurfaceTypeAttr>()) {
+ assert(Args.size() == 2 &&
+ "Unexpected number of template arguments of CUDA device "
+ "builtin surface type.");
+ auto SurfType = Args[1].getAsIntegral();
+ if (!D->hasExternalStorage())
+ registerDeviceSurf(D, GV, !D->hasDefinition(), SurfType.getSExtValue());
+ } else {
+ assert(Args.size() == 3 &&
+ "Unexpected number of template arguments of CUDA device "
+ "builtin texture type.");
+ auto TexType = Args[1].getAsIntegral();
+ auto Normalized = Args[2].getAsIntegral();
+ if (!D->hasExternalStorage())
+ registerDeviceTex(D, GV, !D->hasDefinition(), TexType.getSExtValue(),
+ Normalized.getZExtValue());
+ }
+ }
+}
diff --git a/clang/lib/CodeGen/CGCUDARuntime.h b/clang/lib/CodeGen/CGCUDARuntime.h
index ba3404ead368..59d550102407 100644
--- a/clang/lib/CodeGen/CGCUDARuntime.h
+++ b/clang/lib/CodeGen/CGCUDARuntime.h
@@ -16,6 +16,7 @@
#define LLVM_CLANG_LIB_CODEGEN_CGCUDARUNTIME_H
#include "llvm/ADT/StringRef.h"
+#include "llvm/IR/GlobalValue.h"
namespace llvm {
class Function;
@@ -80,12 +81,10 @@ class CGCUDARuntime {
/// Emits a kernel launch stub.
virtual void emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) = 0;
- virtual void registerDeviceVar(const VarDecl *VD, llvm::GlobalVariable &Var,
- bool Extern, bool Constant) = 0;
- virtual void registerDeviceSurf(const VarDecl *VD, llvm::GlobalVariable &Var,
- bool Extern, int Type) = 0;
- virtual void registerDeviceTex(const VarDecl *VD, llvm::GlobalVariable &Var,
- bool Extern, int Type, bool Normalized) = 0;
+
+ /// Check whether a variable is a device variable and register it if true.
+ virtual void handleVarRegistration(const VarDecl *VD,
+ llvm::GlobalVariable &Var) = 0;
/// Constructs and returns a module initialization function or nullptr if it's
/// not needed. Must be called after all kernels have been emitted.
@@ -98,6 +97,11 @@ class CGCUDARuntime {
/// Returns function or variable name on device side even if the current
/// compilation is for host.
virtual std::string getDeviceSideName(const NamedDecl *ND) = 0;
+
+ /// Adjust linkage of shadow variables in host compilation.
+ virtual void
+ internalizeDeviceSideVar(const VarDecl *D,
+ llvm::GlobalValue::LinkageTypes &Linkage) = 0;
};
/// Creates an instance of a CUDA runtime class.
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index c9cf4076579b..b133d2a84a59 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -4297,59 +4297,8 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D,
(D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>()))
GV->setExternallyInitialized(true);
} else {
- // Host-side shadows of external declarations of device-side
- // global variables become internal definitions. These have to
- // be internal in order to prevent name conflicts with global
- // host variables with the same name in a
diff erent TUs.
- if (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>()) {
- Linkage = llvm::GlobalValue::InternalLinkage;
- // Shadow variables and their properties must be registered with CUDA
- // runtime. Skip Extern global variables, which will be registered in
- // the TU where they are defined.
- //
- // Don't register a C++17 inline variable. The local symbol can be
- // discarded and referencing a discarded local symbol from outside the
- // comdat (__cuda_register_globals) is disallowed by the ELF spec.
- // TODO: Reject __device__ constexpr and __device__ inline in Sema.
- if (!D->hasExternalStorage() && !D->isInline())
- getCUDARuntime().registerDeviceVar(D, *GV, !D->hasDefinition(),
- D->hasAttr<CUDAConstantAttr>());
- } else if (D->hasAttr<CUDASharedAttr>()) {
- // __shared__ variables are odd. Shadows do get created, but
- // they are not registered with the CUDA runtime, so they
- // can't really be used to access their device-side
- // counterparts. It's not clear yet whether it's nvcc's bug or
- // a feature, but we've got to do the same for compatibility.
- Linkage = llvm::GlobalValue::InternalLinkage;
- } else if (D->getType()->isCUDADeviceBuiltinSurfaceType() ||
- D->getType()->isCUDADeviceBuiltinTextureType()) {
- // Builtin surfaces and textures and their template arguments are
- // also registered with CUDA runtime.
- Linkage = llvm::GlobalValue::InternalLinkage;
- const ClassTemplateSpecializationDecl *TD =
- cast<ClassTemplateSpecializationDecl>(
- D->getType()->getAs<RecordType>()->getDecl());
- const TemplateArgumentList &Args = TD->getTemplateArgs();
- if (TD->hasAttr<CUDADeviceBuiltinSurfaceTypeAttr>()) {
- assert(Args.size() == 2 &&
- "Unexpected number of template arguments of CUDA device "
- "builtin surface type.");
- auto SurfType = Args[1].getAsIntegral();
- if (!D->hasExternalStorage())
- getCUDARuntime().registerDeviceSurf(D, *GV, !D->hasDefinition(),
- SurfType.getSExtValue());
- } else {
- assert(Args.size() == 3 &&
- "Unexpected number of template arguments of CUDA device "
- "builtin texture type.");
- auto TexType = Args[1].getAsIntegral();
- auto Normalized = Args[2].getAsIntegral();
- if (!D->hasExternalStorage())
- getCUDARuntime().registerDeviceTex(D, *GV, !D->hasDefinition(),
- TexType.getSExtValue(),
- Normalized.getZExtValue());
- }
- }
+ getCUDARuntime().internalizeDeviceSideVar(D, Linkage);
+ getCUDARuntime().handleVarRegistration(D, *GV);
}
}
More information about the cfe-commits
mailing list