[PATCH] D88786: [CUDA] Suppress comdat on host-side shadow variables registered by __cuda_register_globals

Fangrui Song via Phabricator via cfe-commits cfe-commits at lists.llvm.org
Sat Oct 3 11:52:00 PDT 2020


MaskRay created this revision.
MaskRay added reviewers: jlebar, tra.
Herald added subscribers: cfe-commits, yaxunl.
Herald added a project: clang.
MaskRay requested review of this revision.

Host-side shadow variables of external declarations of device-side global
variables have internal linkage and are referenced by `__cuda_register_globals`.

If such a variable has a comdat group (e.g. a C++17 inline variable), we need to
suppress the comdat group when the linkage is changed to internal (which
represents a local symbol), because:

- the copy in this translation unit can be discarded (a copy from another translation unit is picked)
- accessing a discarded local symbol from outside the section group is disallowed by the ELF specification:

> A symbol table entry with STB_LOCAL binding that is defined relative to one of a group's sections, and that is contained in a symbol table section that is not part of the group, must be discarded if the group members are discarded. References to this symbol table entry from outside the group are not allowed.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D88786

Files:
  clang/lib/CodeGen/CodeGenModule.cpp
  clang/test/CodeGenCUDA/device-stub.cu


Index: clang/test/CodeGenCUDA/device-stub.cu
===================================================================
--- clang/test/CodeGenCUDA/device-stub.cu
+++ clang/test/CodeGenCUDA/device-stub.cu
@@ -29,6 +29,10 @@
 // RUN:     -target-sdk-version=9.2 -fgpu-rdc -fcuda-include-gpubinary %t -o - \
 // RUN:   | FileCheck %s -allow-deprecated-dag-overlap \
 // RUN:       --check-prefixes=ALL,LNX,RDC,CUDA,CUDARDC,CUDA_NEW
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -std=c++17 \
+// RUN:     -target-sdk-version=9.2 -fgpu-rdc -fcuda-include-gpubinary %t -o - \
+// RUN:   | FileCheck %s -allow-deprecated-dag-overlap \
+// RUN:       --check-prefixes=ALL,LNX,RDC,CUDA,CUDARDC,CUDA_NEW,LNX_17
 // RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
 // RUN:     -target-sdk-version=9.2 -o - \
 // RUN:   | FileCheck -allow-deprecated-dag-overlap %s -check-prefix=NOGPUBIN
@@ -91,9 +95,17 @@
 // WIN-DAG: @"?ext_constant_var_def@@3HA" = internal global i32 undef
 __constant__ int ext_constant_var_def = 2;
 
+#if __cplusplus > 201402L
+/// The local symbol _ZN1C10inline_varE cannot have a comdat, because it is
+/// referenced from a section (__cuda_module_ctor's section) outside the section group.
+// LNX_17: @_ZN1C10inline_varE = internal constant i32 undef, align 4{{$}}
+struct C {
+  __device__ static constexpr int inline_var = 17;
+};
+#endif
 
 void use_pointers() {
-  int *p;
+  const int *p;
   p = &device_var;
   p = &constant_var;
   p = &shared_var;
@@ -101,6 +113,9 @@
   p = &ext_device_var;
   p = &ext_constant_var;
   p = &ext_host_var;
+#if __cplusplus > 201402L
+  p = &C::inline_var;
+#endif
 }
 
 // Make sure that all parts of GPU code init/cleanup are there:
@@ -185,6 +200,7 @@
 // ALL-DAG: call void {{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}constant_var{{[^,]*}}, {{[^@]*}}@2, {{.*}}i32 0, {{i32|i64}} 4, i32 1, i32 0
 // ALL-DAG: call void {{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}ext_device_var_def{{[^,]*}}, {{[^@]*}}@3, {{.*}}i32 0, {{i32|i64}} 4, i32 0, i32 0
 // ALL-DAG: call void {{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}ext_constant_var_def{{[^,]*}}, {{[^@]*}}@4, {{.*}}i32 0, {{i32|i64}} 4, i32 1, i32 0
+// LNX_17:  call void {{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}@_ZN1C10inline_varE{{[^,]*}}, {{[^@]*}}@5, {{.*}}i32 0, {{i32|i64}} 4, i32 0, i32 0
 // ALL: ret void
 
 // Test that we've built a constructor.
Index: clang/lib/CodeGen/CodeGenModule.cpp
===================================================================
--- clang/lib/CodeGen/CodeGenModule.cpp
+++ clang/lib/CodeGen/CodeGenModule.cpp
@@ -4114,6 +4114,7 @@
   // Is accessible from all the threads within the grid and from the host
   // through the runtime library (cudaGetSymbolAddress() / cudaGetSymbolSize()
   // / cudaMemcpyToSymbol() / cudaMemcpyFromSymbol())."
+  bool CudaModuleCtorReferenced = false;
   if (GV && LangOpts.CUDA) {
     if (LangOpts.CUDAIsDevice) {
       if (Linkage != llvm::GlobalValue::InternalLinkage &&
@@ -4128,10 +4129,16 @@
         Linkage = llvm::GlobalValue::InternalLinkage;
         // Shadow variables and their properties must be registered with CUDA
         // runtime. Skip Extern global variables, which will be registered in
-        // the TU where they are defined.
-        if (!D->hasExternalStorage())
+        // the TU where they are defined. The variable cannot be placed in a
+        // comdat, because the copy in this translation unit can be discarded
+        // and referencing a discarded local symbol from outside the comdat
+        // (__cuda_module_ctor is in a different section) is disallowed by the
+        // ELF spec.
+        if (!D->hasExternalStorage()) {
           getCUDARuntime().registerDeviceVar(D, *GV, !D->hasDefinition(),
                                              D->hasAttr<CUDAConstantAttr>());
+          CudaModuleCtorReferenced = true;
+        }
       } else if (D->hasAttr<CUDASharedAttr>()) {
         // __shared__ variables are odd. Shadows do get created, but
         // they are not registered with the CUDA runtime, so they
@@ -4235,7 +4242,8 @@
     setTLSMode(GV, *D);
   }
 
-  maybeSetTrivialComdat(*D, *GV);
+  if (!CudaModuleCtorReferenced)
+    maybeSetTrivialComdat(*D, *GV);
 
   // Emit the initializer function if necessary.
   if (NeedsGlobalCtor || NeedsGlobalDtor)


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D88786.295997.patch
Type: text/x-patch
Size: 4319 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20201003/caf5a51e/attachment-0001.bin>


More information about the cfe-commits mailing list