[clang] a2cc883 - [CUDA] Don't call __cudaRegisterVariable on C++17 inline variables

Fangrui Song via cfe-commits cfe-commits at lists.llvm.org
Mon Oct 5 12:55:02 PDT 2020


Author: Fangrui Song
Date: 2020-10-05T12:53:59-07:00
New Revision: a2cc8833683dd124cf2ee96f6d17f7f835da1fc8

URL: https://github.com/llvm/llvm-project/commit/a2cc8833683dd124cf2ee96f6d17f7f835da1fc8
DIFF: https://github.com/llvm/llvm-project/commit/a2cc8833683dd124cf2ee96f6d17f7f835da1fc8.diff

LOG: [CUDA] Don't call __cudaRegisterVariable on C++17 inline variables

D17779: host-side shadow variables of external declarations of device-side
global variables have internal linkage and are referenced by
`__cuda_register_globals`.

nvcc from CUDA 11 does not allow `__device__ inline` or `__device__ constexpr`
(C++17 inline variables) but clang has incorrectly supported them for a while:

```
error: A __device__ variable cannot be marked constexpr
error: An inline __device__/__constant__/__managed__ variable must have internal linkage when the program is compiled in whole program mode (-rdc=false)
```

If such a variable (which has a comdat group) is discarded (a copy from another
translation unit is prevailing and selected), accessing the variable from
outside the section group (`__cuda_register_globals`) is a violation of the ELF
specification and will be rejected by linkers:

> A symbol table entry with STB_LOCAL binding that is defined relative to one of a group's sections, and that is contained in a symbol table section that is not part of the group, must be discarded if the group members are discarded. References to this symbol table entry from outside the group are not allowed.

As a workaround, don't register such inline variables for now.
(If we register the variables in all TUs, we will keep multiple instances of the shadow and break the C++ semantics for inline variables).
We should reject such variables in Sema but our internal users need some time to migrate.

Reviewed By: tra

Differential Revision: https://reviews.llvm.org/D88786

Added: 
    

Modified: 
    clang/lib/CodeGen/CodeGenModule.cpp
    clang/test/CodeGenCUDA/device-stub.cu

Removed: 
    


################################################################################
diff  --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index c3457865c0b0..93b49ec981e8 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -4129,7 +4129,12 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D,
         // Shadow variables and their properties must be registered with CUDA
         // runtime. Skip Extern global variables, which will be registered in
         // the TU where they are defined.
-        if (!D->hasExternalStorage())
+        //
+        // Don't register a C++17 inline variable. The local symbol can be
+        // discarded and referencing a discarded local symbol from outside the
+        // comdat (__cuda_register_globals) is disallowed by the ELF spec.
+        // TODO: Reject __device__ constexpr and __device__ inline in Sema.
+        if (!D->hasExternalStorage() && !D->isInline())
           getCUDARuntime().registerDeviceVar(D, *GV, !D->hasDefinition(),
                                              D->hasAttr<CUDAConstantAttr>());
       } else if (D->hasAttr<CUDASharedAttr>()) {

diff  --git a/clang/test/CodeGenCUDA/device-stub.cu b/clang/test/CodeGenCUDA/device-stub.cu
index ca21116fc989..16bbef6cfad5 100644
--- a/clang/test/CodeGenCUDA/device-stub.cu
+++ b/clang/test/CodeGenCUDA/device-stub.cu
@@ -29,6 +29,10 @@
 // RUN:     -target-sdk-version=9.2 -fgpu-rdc -fcuda-include-gpubinary %t -o - \
 // RUN:   | FileCheck %s -allow-deprecated-dag-overlap \
 // RUN:       --check-prefixes=ALL,LNX,RDC,CUDA,CUDARDC,CUDA_NEW
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -std=c++17 \
+// RUN:     -target-sdk-version=9.2 -fgpu-rdc -fcuda-include-gpubinary %t -o - \
+// RUN:   | FileCheck %s -allow-deprecated-dag-overlap \
+// RUN:       --check-prefixes=ALL,LNX,RDC,CUDA,CUDARDC,CUDA_NEW,LNX_17
 // RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
 // RUN:     -target-sdk-version=9.2 -o - \
 // RUN:   | FileCheck -allow-deprecated-dag-overlap %s -check-prefix=NOGPUBIN
@@ -91,9 +95,18 @@ __device__ int ext_device_var_def = 1;
 // WIN-DAG: @"?ext_constant_var_def@@3HA" = internal global i32 undef
 __constant__ int ext_constant_var_def = 2;
 
+#if __cplusplus > 201402L
+/// FIXME: Reject __device__ constexpr and inline variables in Sema.
+// LNX_17: @inline_var = internal global i32 undef, comdat, align 4{{$}}
+// LNX_17: @_ZN1C17member_inline_varE = internal constant i32 undef, comdat, align 4{{$}}
+__device__ inline int inline_var = 3;
+struct C {
+  __device__ static constexpr int member_inline_var = 4;
+};
+#endif
 
 void use_pointers() {
-  int *p;
+  const int *p;
   p = &device_var;
   p = &constant_var;
   p = &shared_var;
@@ -101,6 +114,10 @@ void use_pointers() {
   p = &ext_device_var;
   p = &ext_constant_var;
   p = &ext_host_var;
+#if __cplusplus > 201402L
+  p = &inline_var;
+  p = &C::member_inline_var;
+#endif
 }
 
 // Make sure that all parts of GPU code init/cleanup are there:
@@ -185,6 +202,7 @@ void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); }
 // ALL-DAG: call void {{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}constant_var{{[^,]*}}, {{[^@]*}}@2, {{.*}}i32 0, {{i32|i64}} 4, i32 1, i32 0
 // ALL-DAG: call void {{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}ext_device_var_def{{[^,]*}}, {{[^@]*}}@3, {{.*}}i32 0, {{i32|i64}} 4, i32 0, i32 0
 // ALL-DAG: call void {{.*}}[[PREFIX]]RegisterVar(i8** %0, {{.*}}ext_constant_var_def{{[^,]*}}, {{[^@]*}}@4, {{.*}}i32 0, {{i32|i64}} 4, i32 1, i32 0
+// LNX_17-NOT: [[PREFIX]]RegisterVar(i8** %0, {{.*}}inline_var
 // ALL: ret void
 
 // Test that we've built a constructor.


        


More information about the cfe-commits mailing list