[clang] 0b2af1a - [NFC][CUDA] Refactor registering device variable

Wed Feb 3 11:30:38 PST 2021

Author: Yaxun (Sam) Liu
Date: 2021-02-03T14:29:51-05:00
New Revision: 0b2af1a2889423bb797856841ac81cf10d01c696

URL: https://github.com/llvm/llvm-project/commit/0b2af1a2889423bb797856841ac81cf10d01c696
DIFF: https://github.com/llvm/llvm-project/commit/0b2af1a2889423bb797856841ac81cf10d01c696.diff

LOG: [NFC][CUDA] Refactor registering device variable

Extract registering device variable to CUDA runtime codegen function since it
will be called in multiple places.

Reviewed by: Artem Belevich

Differential Revision: https://reviews.llvm.org/D95558

Added: 
    

Modified: 
    clang/lib/CodeGen/CGCUDANV.cpp
    clang/lib/CodeGen/CGCUDARuntime.h
    clang/lib/CodeGen/CodeGenModule.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp
index 33a2d6f4483e..42105480eb7c 100644

--- a/clang/lib/CodeGen/CGCUDANV.cpp
+++ b/clang/lib/CodeGen/CGCUDANV.cpp
@@ -120,12 +120,8 @@ class CGNVCUDARuntime : public CGCUDARuntime {
   void emitDeviceStubBodyNew(CodeGenFunction &CGF, FunctionArgList &Args);
   std::string getDeviceSideName(const NamedDecl *ND) override;
 
-public:
-  CGNVCUDARuntime(CodeGenModule &CGM);
-
-  void emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) override;
   void registerDeviceVar(const VarDecl *VD, llvm::GlobalVariable &Var,
-                         bool Extern, bool Constant) override {
+                         bool Extern, bool Constant) {
     DeviceVars.push_back({&Var,
                           VD,
                           {DeviceVarFlags::Variable, Extern, Constant,
@@ -133,7 +129,7 @@ class CGNVCUDARuntime : public CGCUDARuntime {
                            /*Normalized*/ false, 0}});
   }
   void registerDeviceSurf(const VarDecl *VD, llvm::GlobalVariable &Var,
-                          bool Extern, int Type) override {
+                          bool Extern, int Type) {
     DeviceVars.push_back({&Var,
                           VD,
                           {DeviceVarFlags::Surface, Extern, /*Constant*/ false,
@@ -141,17 +137,27 @@ class CGNVCUDARuntime : public CGCUDARuntime {
                            /*Normalized*/ false, Type}});
   }
   void registerDeviceTex(const VarDecl *VD, llvm::GlobalVariable &Var,
-                         bool Extern, int Type, bool Normalized) override {
+                         bool Extern, int Type, bool Normalized) {
     DeviceVars.push_back({&Var,
                           VD,
                           {DeviceVarFlags::Texture, Extern, /*Constant*/ false,
                            /*Managed*/ false, Normalized, Type}});
   }
 
+public:
+  CGNVCUDARuntime(CodeGenModule &CGM);
+
+  void emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) override;
+  void handleVarRegistration(const VarDecl *VD,
+                             llvm::GlobalVariable &Var) override;
+
   /// Creates module constructor function
   llvm::Function *makeModuleCtorFunction() override;
   /// Creates module destructor function
   llvm::Function *makeModuleDtorFunction() override;
+  void
+  internalizeDeviceSideVar(const VarDecl *D,
+                           llvm::GlobalValue::LinkageTypes &Linkage) override;
 };
 
 }
@@ -915,3 +921,65 @@ llvm::Function *CGNVCUDARuntime::makeModuleDtorFunction() {
 CGCUDARuntime *CodeGen::CreateNVCUDARuntime(CodeGenModule &CGM) {
   return new CGNVCUDARuntime(CGM);
 }
+
+void CGNVCUDARuntime::internalizeDeviceSideVar(
+    const VarDecl *D, llvm::GlobalValue::LinkageTypes &Linkage) {
+  // Host-side shadows of external declarations of device-side
+  // global variables become internal definitions. These have to
+  // be internal in order to prevent name conflicts with global
+  // host variables with the same name in a 
diff erent TUs.
+  //
+  // __shared__ variables are odd. Shadows do get created, but
+  // they are not registered with the CUDA runtime, so they
+  // can't really be used to access their device-side
+  // counterparts. It's not clear yet whether it's nvcc's bug or
+  // a feature, but we've got to do the same for compatibility.
+  if (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>() ||
+      D->hasAttr<CUDASharedAttr>() ||
+      D->getType()->isCUDADeviceBuiltinSurfaceType() ||
+      D->getType()->isCUDADeviceBuiltinTextureType()) {
+    Linkage = llvm::GlobalValue::InternalLinkage;
+  }
+}
+
+void CGNVCUDARuntime::handleVarRegistration(const VarDecl *D,
+                                            llvm::GlobalVariable &GV) {
+  if (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>()) {
+    // Shadow variables and their properties must be registered with CUDA
+    // runtime. Skip Extern global variables, which will be registered in
+    // the TU where they are defined.
+    //
+    // Don't register a C++17 inline variable. The local symbol can be
+    // discarded and referencing a discarded local symbol from outside the
+    // comdat (__cuda_register_globals) is disallowed by the ELF spec.
+    // TODO: Reject __device__ constexpr and __device__ inline in Sema.
+    if (!D->hasExternalStorage() && !D->isInline())
+      registerDeviceVar(D, GV, !D->hasDefinition(),
+                        D->hasAttr<CUDAConstantAttr>());
+  } else if (D->getType()->isCUDADeviceBuiltinSurfaceType() ||
+             D->getType()->isCUDADeviceBuiltinTextureType()) {
+    // Builtin surfaces and textures and their template arguments are
+    // also registered with CUDA runtime.
+    const ClassTemplateSpecializationDecl *TD =
+        cast<ClassTemplateSpecializationDecl>(
+            D->getType()->getAs<RecordType>()->getDecl());
+    const TemplateArgumentList &Args = TD->getTemplateArgs();
+    if (TD->hasAttr<CUDADeviceBuiltinSurfaceTypeAttr>()) {
+      assert(Args.size() == 2 &&
+             "Unexpected number of template arguments of CUDA device "
+             "builtin surface type.");
+      auto SurfType = Args[1].getAsIntegral();
+      if (!D->hasExternalStorage())
+        registerDeviceSurf(D, GV, !D->hasDefinition(), SurfType.getSExtValue());
+    } else {
+      assert(Args.size() == 3 &&
+             "Unexpected number of template arguments of CUDA device "
+             "builtin texture type.");
+      auto TexType = Args[1].getAsIntegral();
+      auto Normalized = Args[2].getAsIntegral();
+      if (!D->hasExternalStorage())
+        registerDeviceTex(D, GV, !D->hasDefinition(), TexType.getSExtValue(),
+                          Normalized.getZExtValue());
+    }
+  }
+}

diff  --git a/clang/lib/CodeGen/CGCUDARuntime.h b/clang/lib/CodeGen/CGCUDARuntime.h
index ba3404ead368..59d550102407 100644
--- a/clang/lib/CodeGen/CGCUDARuntime.h
+++ b/clang/lib/CodeGen/CGCUDARuntime.h
@@ -16,6 +16,7 @@
 #define LLVM_CLANG_LIB_CODEGEN_CGCUDARUNTIME_H
 
 #include "llvm/ADT/StringRef.h"
+#include "llvm/IR/GlobalValue.h"
 
 namespace llvm {
 class Function;
@@ -80,12 +81,10 @@ class CGCUDARuntime {
 
   /// Emits a kernel launch stub.
   virtual void emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) = 0;
-  virtual void registerDeviceVar(const VarDecl *VD, llvm::GlobalVariable &Var,
-                                 bool Extern, bool Constant) = 0;
-  virtual void registerDeviceSurf(const VarDecl *VD, llvm::GlobalVariable &Var,
-                                  bool Extern, int Type) = 0;
-  virtual void registerDeviceTex(const VarDecl *VD, llvm::GlobalVariable &Var,
-                                 bool Extern, int Type, bool Normalized) = 0;
+
+  /// Check whether a variable is a device variable and register it if true.
+  virtual void handleVarRegistration(const VarDecl *VD,
+                                     llvm::GlobalVariable &Var) = 0;
 
   /// Constructs and returns a module initialization function or nullptr if it's
   /// not needed. Must be called after all kernels have been emitted.
@@ -98,6 +97,11 @@ class CGCUDARuntime {
   /// Returns function or variable name on device side even if the current
   /// compilation is for host.
   virtual std::string getDeviceSideName(const NamedDecl *ND) = 0;
+
+  /// Adjust linkage of shadow variables in host compilation.
+  virtual void
+  internalizeDeviceSideVar(const VarDecl *D,
+                           llvm::GlobalValue::LinkageTypes &Linkage) = 0;
 };
 
 /// Creates an instance of a CUDA runtime class.

diff  --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index c9cf4076579b..b133d2a84a59 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -4297,59 +4297,8 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D,
           (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>()))
         GV->setExternallyInitialized(true);
     } else {
-      // Host-side shadows of external declarations of device-side
-      // global variables become internal definitions. These have to
-      // be internal in order to prevent name conflicts with global
-      // host variables with the same name in a 
diff erent TUs.
-      if (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>()) {
-        Linkage = llvm::GlobalValue::InternalLinkage;
-        // Shadow variables and their properties must be registered with CUDA
-        // runtime. Skip Extern global variables, which will be registered in
-        // the TU where they are defined.
-        //
-        // Don't register a C++17 inline variable. The local symbol can be
-        // discarded and referencing a discarded local symbol from outside the
-        // comdat (__cuda_register_globals) is disallowed by the ELF spec.
-        // TODO: Reject __device__ constexpr and __device__ inline in Sema.
-        if (!D->hasExternalStorage() && !D->isInline())
-          getCUDARuntime().registerDeviceVar(D, *GV, !D->hasDefinition(),
-                                             D->hasAttr<CUDAConstantAttr>());
-      } else if (D->hasAttr<CUDASharedAttr>()) {
-        // __shared__ variables are odd. Shadows do get created, but
-        // they are not registered with the CUDA runtime, so they
-        // can't really be used to access their device-side
-        // counterparts. It's not clear yet whether it's nvcc's bug or
-        // a feature, but we've got to do the same for compatibility.
-        Linkage = llvm::GlobalValue::InternalLinkage;
-      } else if (D->getType()->isCUDADeviceBuiltinSurfaceType() ||
-                 D->getType()->isCUDADeviceBuiltinTextureType()) {
-        // Builtin surfaces and textures and their template arguments are
-        // also registered with CUDA runtime.
-        Linkage = llvm::GlobalValue::InternalLinkage;
-        const ClassTemplateSpecializationDecl *TD =
-            cast<ClassTemplateSpecializationDecl>(
-                D->getType()->getAs<RecordType>()->getDecl());
-        const TemplateArgumentList &Args = TD->getTemplateArgs();
-        if (TD->hasAttr<CUDADeviceBuiltinSurfaceTypeAttr>()) {
-          assert(Args.size() == 2 &&
-                 "Unexpected number of template arguments of CUDA device "
-                 "builtin surface type.");
-          auto SurfType = Args[1].getAsIntegral();
-          if (!D->hasExternalStorage())
-            getCUDARuntime().registerDeviceSurf(D, *GV, !D->hasDefinition(),
-                                                SurfType.getSExtValue());
-        } else {
-          assert(Args.size() == 3 &&
-                 "Unexpected number of template arguments of CUDA device "
-                 "builtin texture type.");
-          auto TexType = Args[1].getAsIntegral();
-          auto Normalized = Args[2].getAsIntegral();
-          if (!D->hasExternalStorage())
-            getCUDARuntime().registerDeviceTex(D, *GV, !D->hasDefinition(),
-                                               TexType.getSExtValue(),
-                                               Normalized.getZExtValue());
-        }
-      }
+      getCUDARuntime().internalizeDeviceSideVar(D, Linkage);
+      getCUDARuntime().handleVarRegistration(D, *GV);
     }
   }