[clang] f2677af - [CUDA][HIP] Externalize device var in anonymous namespace
Yaxun Liu via cfe-commits
cfe-commits at lists.llvm.org
Tue Jun 6 09:04:31 PDT 2023
Author: Yaxun (Sam) Liu
Date: 2023-06-06T12:03:48-04:00
New Revision: f2677afe91592673663d24a63706c3218c477c1c
URL: https://github.com/llvm/llvm-project/commit/f2677afe91592673663d24a63706c3218c477c1c
DIFF: https://github.com/llvm/llvm-project/commit/f2677afe91592673663d24a63706c3218c477c1c.diff
LOG: [CUDA][HIP] Externalize device var in anonymous namespace
Device variables in an anonymous namespace may be
referenced by host code, therefore they need to
be externalized in a similar way as a static device
variables or kernels in an anonymous namespace.
Fixes: https://github.com/ROCm-Developer-Tools/HIP/issues/3246
Reviewed by: Artem Belevich
Differential Revision: https://reviews.llvm.org/D152164
Added:
clang/test/CodeGenCUDA/anon-ns.cu
Modified:
clang/lib/AST/ASTContext.cpp
clang/test/CodeGenCUDA/host-used-device-var.cu
Removed:
clang/test/CodeGenCUDA/kernel-in-anon-ns.cu
################################################################################
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index 9af6fa67db1ef..b7d9c3cc46e53 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -13581,16 +13581,17 @@ operator<<(const StreamingDiagnostic &DB,
}
bool ASTContext::mayExternalize(const Decl *D) const {
- bool IsStaticVar =
- isa<VarDecl>(D) && cast<VarDecl>(D)->getStorageClass() == SC_Static;
+ bool IsInternalVar =
+ isa<VarDecl>(D) &&
+ basicGVALinkageForVariable(*this, cast<VarDecl>(D)) == GVA_Internal;
bool IsExplicitDeviceVar = (D->hasAttr<CUDADeviceAttr>() &&
!D->getAttr<CUDADeviceAttr>()->isImplicit()) ||
(D->hasAttr<CUDAConstantAttr>() &&
!D->getAttr<CUDAConstantAttr>()->isImplicit());
- // CUDA/HIP: static managed variables need to be externalized since it is
+ // CUDA/HIP: managed variables need to be externalized since it is
// a declaration in IR, therefore cannot have internal linkage. Kernels in
// anonymous name space needs to be externalized to avoid duplicate symbols.
- return (IsStaticVar &&
+ return (IsInternalVar &&
(D->hasAttr<HIPManagedAttr>() || IsExplicitDeviceVar)) ||
(D->hasAttr<CUDAGlobalAttr>() &&
basicGVALinkageForFunction(*this, cast<FunctionDecl>(D)) ==
diff --git a/clang/test/CodeGenCUDA/anon-ns.cu b/clang/test/CodeGenCUDA/anon-ns.cu
new file mode 100644
index 0000000000000..3c55e9907dd6c
--- /dev/null
+++ b/clang/test/CodeGenCUDA/anon-ns.cu
@@ -0,0 +1,97 @@
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -cuid=abc \
+// RUN: -aux-triple x86_64-unknown-linux-gnu -std=c++17 -fgpu-rdc \
+// RUN: -emit-llvm -o - -x hip %s > %t.dev
+
+// RUN: %clang_cc1 -triple x86_64-gnu-linux -cuid=abc \
+// RUN: -aux-triple amdgcn-amd-amdhsa -std=c++17 -fgpu-rdc \
+// RUN: -emit-llvm -o - -x hip %s > %t.host
+
+// RUN: cat %t.dev %t.host | FileCheck -check-prefixes=HIP,COMMON %s
+// RUN: cat %t.dev %t.host | FileCheck -check-prefixes=COMNEG %s
+
+// RUN: echo "GPU binary" > %t.fatbin
+
+// RUN: %clang_cc1 -triple nvptx -fcuda-is-device -cuid=abc \
+// RUN: -aux-triple x86_64-unknown-linux-gnu -std=c++17 -fgpu-rdc \
+// RUN: -emit-llvm -o - %s > %t.dev
+
+// RUN: %clang_cc1 -triple x86_64-gnu-linux -cuid=abc \
+// RUN: -aux-triple nvptx -std=c++17 -fgpu-rdc -fcuda-include-gpubinary %t.fatbin \
+// RUN: -emit-llvm -o - %s > %t.host
+
+// RUN: cat %t.dev %t.host | FileCheck -check-prefixes=CUDA,COMMON %s
+// RUN: cat %t.dev %t.host | FileCheck -check-prefixes=COMNEG %s
+
+#include "Inputs/cuda.h"
+
+// HIP-DAG: define weak_odr {{.*}}void @[[KERN:_ZN12_GLOBAL__N_16kernelEv\.intern\.b04fd23c98500190]](
+// HIP-DAG: define weak_odr {{.*}}void @[[KTX:_Z2ktIN12_GLOBAL__N_11XEEvT_\.intern\.b04fd23c98500190]](
+// HIP-DAG: define weak_odr {{.*}}void @[[KTL:_Z2ktIN12_GLOBAL__N_1UlvE_EEvT_\.intern\.b04fd23c98500190]](
+// HIP-DAG: @[[VM:_ZN12_GLOBAL__N_12vmE\.static\.b04fd23c98500190]] = addrspace(1) externally_initialized global
+// HIP-DAG: @[[VC:_ZN12_GLOBAL__N_12vcE\.static\.b04fd23c98500190]] = addrspace(4) externally_initialized global
+// HIP-DAG: @[[VT:_Z2vtIN12_GLOBAL__N_11XEE\.static\.b04fd23c98500190]] = addrspace(1) externally_initialized global
+
+// CUDA-DAG: define weak_odr {{.*}}void @[[KERN:_ZN12_GLOBAL__N_16kernelEv__intern__b04fd23c98500190]](
+// CUDA-DAG: define weak_odr {{.*}}void @[[KTX:_Z2ktIN12_GLOBAL__N_11XEEvT___intern__b04fd23c98500190]](
+// CUDA-DAG: define weak_odr {{.*}}void @[[KTL:_Z2ktIN12_GLOBAL__N_1UlvE_EEvT___intern__b04fd23c98500190]](
+// CUDA-DAG: @[[VC:_ZN12_GLOBAL__N_12vcE__static__b04fd23c98500190]] = addrspace(4) externally_initialized global
+// CUDA-DAG: @[[VT:_Z2vtIN12_GLOBAL__N_11XEE__static__b04fd23c98500190]] = addrspace(1) externally_initialized global
+
+// COMMON-DAG: @_ZN12_GLOBAL__N_12vdE = internal addrspace(1) global
+// COMNEG-NOT: @{{.*}} = {{.*}} c"_ZN12_GLOBAL__N_12vdE{{.*}}\00"
+
+// HIP-DAG: @llvm.compiler.used = {{.*}}@[[VM]]{{.*}}@[[VT]]{{.*}}@[[VC]]
+// CUDA-DAG: @llvm.compiler.used = {{.*}}@[[VT]]{{.*}}@[[VC]]
+
+// COMMON-DAG: @[[KERNSTR:.*]] = {{.*}} c"[[KERN]]\00"
+// COMMON-DAG: @[[KTXSTR:.*]] = {{.*}} c"[[KTX]]\00"
+// COMMON-DAG: @[[KTLSTR:.*]] = {{.*}} c"[[KTL]]\00"
+// HIP-DAG: @[[VMSTR:.*]] = {{.*}} c"[[VM]]\00"
+// COMMON-DAG: @[[VCSTR:.*]] = {{.*}} c"[[VC]]\00"
+// COMMON-DAG: @[[VTSTR:.*]] = {{.*}} c"[[VT]]\00"
+
+// COMMON-DAG: call i32 @__{{.*}}RegisterFunction({{.*}}@[[KERNSTR]]
+// COMMON-DAG: call i32 @__{{.*}}RegisterFunction({{.*}}@[[KTXSTR]]
+// COMMON-DAG: call i32 @__{{.*}}RegisterFunction({{.*}}@[[KTLSTR]]
+// HIP-DAG: call void @__{{.*}}RegisterManagedVar({{.*}}@[[VMSTR]]
+// COMMON-DAG: call void @__{{.*}}RegisterVar({{.*}}@[[VCSTR]]
+// COMMON-DAG: call void @__{{.*}}RegisterVar({{.*}}@[[VTSTR]]
+
+template <typename T>
+__global__ void kt(T x) {}
+
+template <typename T>
+__device__ T vt;
+
+namespace {
+ struct X {};
+ X x;
+ auto lambda = [](){};
+#if __HIP__
+ __managed__ int vm = 1;
+#endif
+ __constant__ int vc = 2;
+
+ // C should not be externalized since it is used by device code only.
+ __device__ int vd = 3;
+ __global__ void kernel() { vd = 4; }
+}
+
+template<typename T>
+void getSymbol(T *x) {}
+
+void test() {
+ kernel<<<1, 1>>>();
+
+ kt<<<1, 1>>>(x);
+
+ kt<<<1, 1>>>(lambda);
+
+ // A, B, and tempVar<X> should be externalized since they are
+ // used by host code.
+#if __HIP__
+ getSymbol(&vm);
+#endif
+ getSymbol(&vc);
+ getSymbol(&vt<X>);
+}
diff --git a/clang/test/CodeGenCUDA/host-used-device-var.cu b/clang/test/CodeGenCUDA/host-used-device-var.cu
index 2c0d06d07c6f2..7cb31aff84264 100644
--- a/clang/test/CodeGenCUDA/host-used-device-var.cu
+++ b/clang/test/CodeGenCUDA/host-used-device-var.cu
@@ -73,9 +73,8 @@ constexpr int constexpr_var1a = 1;
inline constexpr int constexpr_var1b = 1;
// Check constant constexpr variables ODR-used by host code only.
-// Non-inline constexpr variable has internal linkage, therefore it is not accessible by host and not kept.
-// Inline constexpr variable has linkonce_ord linkage, therefore it can be accessed by host and kept.
-// DEV-NEG-NOT: constexpr_var2a
+// Device-side constexpr variables accessed by host code should be externalized and kept.
+// DEV-DAG: @_ZL15constexpr_var2a = addrspace(4) externally_initialized constant i32 2
// DEV-DAG: @constexpr_var2b = linkonce_odr addrspace(4) externally_initialized constant i32 2
__constant__ constexpr int constexpr_var2a = 2;
inline __constant__ constexpr int constexpr_var2b = 2;
@@ -184,6 +183,7 @@ public:
// Check the exact list of variables to ensure @_ZL2u4 is not among them.
// DEV: @llvm.compiler.used = {{[^@]*}} @_Z10p_add_funcIiE
+// DEV-SAME: {{^[^@]*}} @_ZL15constexpr_var2a
// DEV-SAME: {{^[^@]*}} @_ZL2u3
// DEV-SAME: {{^[^@]*}} @_ZZ4fun1vE11static_var1
// DEV-SAME: {{^[^@]*}} @_ZZZN21TestStaticVarInLambda3funEvENKUlPcE_clES0_E4var2
diff --git a/clang/test/CodeGenCUDA/kernel-in-anon-ns.cu b/clang/test/CodeGenCUDA/kernel-in-anon-ns.cu
deleted file mode 100644
index bc753d76e5c11..0000000000000
--- a/clang/test/CodeGenCUDA/kernel-in-anon-ns.cu
+++ /dev/null
@@ -1,58 +0,0 @@
-// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -cuid=abc \
-// RUN: -aux-triple x86_64-unknown-linux-gnu -std=c++11 -fgpu-rdc \
-// RUN: -emit-llvm -o - -x hip %s > %t.dev
-
-// RUN: %clang_cc1 -triple x86_64-gnu-linux -cuid=abc \
-// RUN: -aux-triple amdgcn-amd-amdhsa -std=c++11 -fgpu-rdc \
-// RUN: -emit-llvm -o - -x hip %s > %t.host
-
-// RUN: cat %t.dev %t.host | FileCheck -check-prefixes=HIP,COMMON %s
-
-// RUN: echo "GPU binary" > %t.fatbin
-
-// RUN: %clang_cc1 -triple nvptx -fcuda-is-device -cuid=abc \
-// RUN: -aux-triple x86_64-unknown-linux-gnu -std=c++11 -fgpu-rdc \
-// RUN: -emit-llvm -o - %s > %t.dev
-
-// RUN: %clang_cc1 -triple x86_64-gnu-linux -cuid=abc \
-// RUN: -aux-triple nvptx -std=c++11 -fgpu-rdc -fcuda-include-gpubinary %t.fatbin \
-// RUN: -emit-llvm -o - %s > %t.host
-
-// RUN: cat %t.dev %t.host | FileCheck -check-prefixes=CUDA,COMMON %s
-
-#include "Inputs/cuda.h"
-
-// HIP-DAG: define weak_odr {{.*}}void @[[KERN1:_ZN12_GLOBAL__N_16kernelEv\.intern\.b04fd23c98500190]](
-// HIP-DAG: define weak_odr {{.*}}void @[[KERN2:_Z8tempKernIN12_GLOBAL__N_11XEEvT_\.intern\.b04fd23c98500190]](
-// HIP-DAG: define weak_odr {{.*}}void @[[KERN3:_Z8tempKernIN12_GLOBAL__N_1UlvE_EEvT_\.intern\.b04fd23c98500190]](
-
-// CUDA-DAG: define weak_odr {{.*}}void @[[KERN1:_ZN12_GLOBAL__N_16kernelEv__intern__b04fd23c98500190]](
-// CUDA-DAG: define weak_odr {{.*}}void @[[KERN2:_Z8tempKernIN12_GLOBAL__N_11XEEvT___intern__b04fd23c98500190]](
-// CUDA-DAG: define weak_odr {{.*}}void @[[KERN3:_Z8tempKernIN12_GLOBAL__N_1UlvE_EEvT___intern__b04fd23c98500190]](
-
-// COMMON-DAG: @[[STR1:.*]] = {{.*}} c"[[KERN1]]\00"
-// COMMON-DAG: @[[STR2:.*]] = {{.*}} c"[[KERN2]]\00"
-// COMMON-DAG: @[[STR3:.*]] = {{.*}} c"[[KERN3]]\00"
-
-// COMMON-DAG: call i32 @__{{.*}}RegisterFunction({{.*}}@[[STR1]]
-// COMMON-DAG: call i32 @__{{.*}}RegisterFunction({{.*}}@[[STR2]]
-// COMMON-DAG: call i32 @__{{.*}}RegisterFunction({{.*}}@[[STR3]]
-
-
-template <typename T>
-__global__ void tempKern(T x) {}
-
-namespace {
- __global__ void kernel() {}
- struct X {};
- X x;
- auto lambda = [](){};
-}
-
-void test() {
- kernel<<<1, 1>>>();
-
- tempKern<<<1, 1>>>(x);
-
- tempKern<<<1, 1>>>(lambda);
-}
More information about the cfe-commits
mailing list