[clang] [cuda][[HIP] `__constant__` should imply constant (PR #110182)

via cfe-commits cfe-commits at lists.llvm.org
Thu Sep 26 15:30:39 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-clang-codegen

Author: Alex Voicu (AlexVlx)

<details>
<summary>Changes</summary>

Currently, `__constant__` variables do not get unconditionally marked as `constant` in IR, which seems a bit odd given their definition. This is generally inconsequential for NVPTX/AMDGPU, since said variables get emitted in the constant address space for those BEs. However, it is potentially significant for e.g. HIP-on-SPIR-V cases, as SPIR-V does not allow casts to/from the constant AS (`UniformConstant`), which forces `__constant__` variables to be emitted in the global AS, thus making IR constness meaningful.

---
Full diff: https://github.com/llvm/llvm-project/pull/110182.diff


11 Files Affected:

- (modified) clang/lib/CodeGen/CodeGenModule.cpp (+3-2) 
- (modified) clang/test/CodeGenCUDA/address-spaces.cu (+1-1) 
- (modified) clang/test/CodeGenCUDA/amdgpu-visibility.cu (+3-3) 
- (modified) clang/test/CodeGenCUDA/anon-ns.cu (+2-2) 
- (modified) clang/test/CodeGenCUDA/device-var-init.cu (+12-12) 
- (modified) clang/test/CodeGenCUDA/device-var-linkage.cu (+4-4) 
- (modified) clang/test/CodeGenCUDA/filter-decl.cu (+1-1) 
- (modified) clang/test/CodeGenCUDA/static-device-var-no-rdc.cu (+1-1) 
- (modified) clang/test/CodeGenCUDA/static-device-var-rdc.cu (+2-2) 
- (modified) clang/test/CodeGenCUDA/template-class-static-member.cu (+1-1) 
- (modified) clang/test/CodeGenHIP/hipspv-addr-spaces.cpp (+1-1) 


``````````diff
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 2381fa93e23fea..25c1c496a4f27f 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -5622,8 +5622,9 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D,
     emitter->finalize(GV);
 
   // If it is safe to mark the global 'constant', do so now.
-  GV->setConstant(!NeedsGlobalCtor && !NeedsGlobalDtor &&
-                  D->getType().isConstantStorage(getContext(), true, true));
+  GV->setConstant((D->hasAttr<CUDAConstantAttr>() && LangOpts.CUDAIsDevice) ||
+                  (!NeedsGlobalCtor && !NeedsGlobalDtor &&
+                   D->getType().isConstantStorage(getContext(), true, true)));
 
   // If it is in a read-only section, mark it 'constant'.
   if (const SectionAttr *SA = D->getAttr<SectionAttr>()) {
diff --git a/clang/test/CodeGenCUDA/address-spaces.cu b/clang/test/CodeGenCUDA/address-spaces.cu
index 0608c9cabd0489..66903c81b93339 100644
--- a/clang/test/CodeGenCUDA/address-spaces.cu
+++ b/clang/test/CodeGenCUDA/address-spaces.cu
@@ -9,7 +9,7 @@
 // CHECK: @i ={{.*}} addrspace(1) externally_initialized global
 __device__ int i;
 
-// CHECK: @j ={{.*}} addrspace(4) externally_initialized global
+// CHECK: @j ={{.*}} addrspace(4) externally_initialized constant
 __constant__ int j;
 
 // CHECK: @k ={{.*}} addrspace(3) global
diff --git a/clang/test/CodeGenCUDA/amdgpu-visibility.cu b/clang/test/CodeGenCUDA/amdgpu-visibility.cu
index d7dbab112a68c6..ef74d932ee8c8f 100644
--- a/clang/test/CodeGenCUDA/amdgpu-visibility.cu
+++ b/clang/test/CodeGenCUDA/amdgpu-visibility.cu
@@ -4,11 +4,11 @@
 
 #include "Inputs/cuda.h"
 
-// CHECK-DEFAULT: @c ={{.*}} addrspace(4) externally_initialized global
+// CHECK-DEFAULT: @c ={{.*}} addrspace(4) externally_initialized constant
 // CHECK-DEFAULT: @g ={{.*}} addrspace(1) externally_initialized global
-// CHECK-PROTECTED: @c = protected addrspace(4) externally_initialized global
+// CHECK-PROTECTED: @c = protected addrspace(4) externally_initialized constant
 // CHECK-PROTECTED: @g = protected addrspace(1) externally_initialized global
-// CHECK-HIDDEN: @c = protected addrspace(4) externally_initialized global
+// CHECK-HIDDEN: @c = protected addrspace(4) externally_initialized constant
 // CHECK-HIDDEN: @g = protected addrspace(1) externally_initialized global
 __constant__ int c;
 __device__ int g;
diff --git a/clang/test/CodeGenCUDA/anon-ns.cu b/clang/test/CodeGenCUDA/anon-ns.cu
index 3c55e9907dd6c1..d931f31d0207c5 100644
--- a/clang/test/CodeGenCUDA/anon-ns.cu
+++ b/clang/test/CodeGenCUDA/anon-ns.cu
@@ -28,13 +28,13 @@
 // HIP-DAG: define weak_odr {{.*}}void @[[KTX:_Z2ktIN12_GLOBAL__N_11XEEvT_\.intern\.b04fd23c98500190]](
 // HIP-DAG: define weak_odr {{.*}}void @[[KTL:_Z2ktIN12_GLOBAL__N_1UlvE_EEvT_\.intern\.b04fd23c98500190]](
 // HIP-DAG: @[[VM:_ZN12_GLOBAL__N_12vmE\.static\.b04fd23c98500190]] = addrspace(1) externally_initialized global
-// HIP-DAG: @[[VC:_ZN12_GLOBAL__N_12vcE\.static\.b04fd23c98500190]] = addrspace(4) externally_initialized global
+// HIP-DAG: @[[VC:_ZN12_GLOBAL__N_12vcE\.static\.b04fd23c98500190]] = addrspace(4) externally_initialized constant
 // HIP-DAG: @[[VT:_Z2vtIN12_GLOBAL__N_11XEE\.static\.b04fd23c98500190]] = addrspace(1) externally_initialized global
 
 // CUDA-DAG: define weak_odr {{.*}}void @[[KERN:_ZN12_GLOBAL__N_16kernelEv__intern__b04fd23c98500190]](
 // CUDA-DAG: define weak_odr {{.*}}void @[[KTX:_Z2ktIN12_GLOBAL__N_11XEEvT___intern__b04fd23c98500190]](
 // CUDA-DAG: define weak_odr {{.*}}void @[[KTL:_Z2ktIN12_GLOBAL__N_1UlvE_EEvT___intern__b04fd23c98500190]](
-// CUDA-DAG: @[[VC:_ZN12_GLOBAL__N_12vcE__static__b04fd23c98500190]] = addrspace(4) externally_initialized global
+// CUDA-DAG: @[[VC:_ZN12_GLOBAL__N_12vcE__static__b04fd23c98500190]] = addrspace(4) externally_initialized constant
 // CUDA-DAG: @[[VT:_Z2vtIN12_GLOBAL__N_11XEE__static__b04fd23c98500190]] = addrspace(1) externally_initialized global
 
 // COMMON-DAG: @_ZN12_GLOBAL__N_12vdE = internal addrspace(1) global
diff --git a/clang/test/CodeGenCUDA/device-var-init.cu b/clang/test/CodeGenCUDA/device-var-init.cu
index 226b7e295f4b45..9d62e4126b430d 100644
--- a/clang/test/CodeGenCUDA/device-var-init.cu
+++ b/clang/test/CodeGenCUDA/device-var-init.cu
@@ -26,7 +26,7 @@ __shared__ int s_v;
 // DEVICE: @s_v ={{.*}} addrspace(3) global i32 undef,
 // HOST:   @s_v = internal global i32 undef,
 __constant__ int c_v;
-// DEVICE: addrspace(4) externally_initialized global i32 0,
+// DEVICE: addrspace(4) externally_initialized constant i32 0,
 // HOST:   @c_v = internal global i32 undef,
 
 __device__ int d_v_i = 1;
@@ -51,14 +51,14 @@ __shared__ T s_t;
 // DEVICE: @s_t ={{.*}} addrspace(3) global %struct.T undef,
 // HOST:   @s_t = internal global %struct.T undef,
 __constant__ T c_t;
-// DEVICE: @c_t ={{.*}} addrspace(4) externally_initialized global %struct.T zeroinitializer,
+// DEVICE: @c_t ={{.*}} addrspace(4) externally_initialized constant %struct.T zeroinitializer,
 // HOST:   @c_t = internal global %struct.T undef,
 
 __device__ T d_t_i = {2};
 // DEVICE: @d_t_i ={{.*}} addrspace(1) externally_initialized global %struct.T { i32 2 },
 // HOST:   @d_t_i = internal global %struct.T undef,
 __constant__ T c_t_i = {2};
-// DEVICE: @c_t_i ={{.*}} addrspace(4) externally_initialized global %struct.T { i32 2 },
+// DEVICE: @c_t_i ={{.*}} addrspace(4) externally_initialized constant %struct.T { i32 2 },
 // HOST:   @c_t_i = internal global %struct.T undef,
 
 // empty constructor
@@ -69,7 +69,7 @@ __shared__ EC s_ec;
 // DEVICE: @s_ec ={{.*}} addrspace(3) global %struct.EC undef,
 // HOST:   @s_ec = internal global %struct.EC undef,
 __constant__ EC c_ec;
-// DEVICE: @c_ec ={{.*}} addrspace(4) externally_initialized global %struct.EC zeroinitializer,
+// DEVICE: @c_ec ={{.*}} addrspace(4) externally_initialized constant %struct.EC zeroinitializer,
 // HOST:   @c_ec = internal global %struct.EC undef
 
 // empty destructor
@@ -80,7 +80,7 @@ __shared__ ED s_ed;
 // DEVICE: @s_ed ={{.*}} addrspace(3) global %struct.ED undef,
 // HOST:   @s_ed = internal global %struct.ED undef,
 __constant__ ED c_ed;
-// DEVICE: @c_ed ={{.*}} addrspace(4) externally_initialized global %struct.ED zeroinitializer,
+// DEVICE: @c_ed ={{.*}} addrspace(4) externally_initialized constant %struct.ED zeroinitializer,
 // HOST:   @c_ed = internal global %struct.ED undef,
 
 __device__ ECD d_ecd;
@@ -90,7 +90,7 @@ __shared__ ECD s_ecd;
 // DEVICE: @s_ecd ={{.*}} addrspace(3) global %struct.ECD undef,
 // HOST:   @s_ecd = internal global %struct.ECD undef,
 __constant__ ECD c_ecd;
-// DEVICE: @c_ecd ={{.*}} addrspace(4) externally_initialized global %struct.ECD zeroinitializer,
+// DEVICE: @c_ecd ={{.*}} addrspace(4) externally_initialized constant %struct.ECD zeroinitializer,
 // HOST:   @c_ecd = internal global %struct.ECD undef,
 
 // empty templated constructor -- allowed with no arguments
@@ -101,14 +101,14 @@ __shared__ ETC s_etc;
 // DEVICE: @s_etc ={{.*}} addrspace(3) global %struct.ETC undef,
 // HOST:   @s_etc = internal global %struct.ETC undef,
 __constant__ ETC c_etc;
-// DEVICE: @c_etc ={{.*}} addrspace(4) externally_initialized global %struct.ETC zeroinitializer,
+// DEVICE: @c_etc ={{.*}} addrspace(4) externally_initialized constant %struct.ETC zeroinitializer,
 // HOST:   @c_etc = internal global %struct.ETC undef,
 
 __device__ NCFS d_ncfs;
 // DEVICE: @d_ncfs ={{.*}} addrspace(1) externally_initialized global %struct.NCFS { i32 3 }
 // HOST:   @d_ncfs = internal global %struct.NCFS undef,
 __constant__ NCFS c_ncfs;
-// DEVICE: @c_ncfs ={{.*}} addrspace(4) externally_initialized global %struct.NCFS { i32 3 }
+// DEVICE: @c_ncfs ={{.*}} addrspace(4) externally_initialized constant %struct.NCFS { i32 3 }
 // HOST:   @c_ncfs = internal global %struct.NCFS undef,
 
 // Regular base class -- allowed
@@ -119,7 +119,7 @@ __shared__ T_B_T s_t_b_t;
 // DEVICE: @s_t_b_t ={{.*}} addrspace(3) global %struct.T_B_T undef,
 // HOST:   @s_t_b_t = internal global %struct.T_B_T undef,
 __constant__ T_B_T c_t_b_t;
-// DEVICE: @c_t_b_t ={{.*}} addrspace(4) externally_initialized global %struct.T_B_T zeroinitializer,
+// DEVICE: @c_t_b_t ={{.*}} addrspace(4) externally_initialized constant %struct.T_B_T zeroinitializer,
 // HOST:   @c_t_b_t = internal global %struct.T_B_T undef,
 
 // Incapsulated object of allowed class -- allowed
@@ -130,7 +130,7 @@ __shared__ T_F_T s_t_f_t;
 // DEVICE: @s_t_f_t ={{.*}} addrspace(3) global %struct.T_F_T undef,
 // HOST:   @s_t_f_t = internal global %struct.T_F_T undef,
 __constant__ T_F_T c_t_f_t;
-// DEVICE: @c_t_f_t ={{.*}} addrspace(4) externally_initialized global %struct.T_F_T zeroinitializer,
+// DEVICE: @c_t_f_t ={{.*}} addrspace(4) externally_initialized constant %struct.T_F_T zeroinitializer,
 // HOST:   @c_t_f_t = internal global %struct.T_F_T undef,
 
 // array of allowed objects -- allowed
@@ -141,7 +141,7 @@ __shared__ T_FA_T s_t_fa_t;
 // DEVICE: @s_t_fa_t ={{.*}} addrspace(3) global %struct.T_FA_T undef,
 // HOST:   @s_t_fa_t = internal global %struct.T_FA_T undef,
 __constant__ T_FA_T c_t_fa_t;
-// DEVICE: @c_t_fa_t ={{.*}} addrspace(4) externally_initialized global %struct.T_FA_T zeroinitializer,
+// DEVICE: @c_t_fa_t ={{.*}} addrspace(4) externally_initialized constant %struct.T_FA_T zeroinitializer,
 // HOST:   @c_t_fa_t = internal global %struct.T_FA_T undef,
 
 
@@ -153,7 +153,7 @@ __shared__ EC_I_EC s_ec_i_ec;
 // DEVICE: @s_ec_i_ec ={{.*}} addrspace(3) global %struct.EC_I_EC undef,
 // HOST:   @s_ec_i_ec = internal global %struct.EC_I_EC undef,
 __constant__ EC_I_EC c_ec_i_ec;
-// DEVICE: @c_ec_i_ec ={{.*}} addrspace(4) externally_initialized global %struct.EC_I_EC zeroinitializer,
+// DEVICE: @c_ec_i_ec ={{.*}} addrspace(4) externally_initialized constant %struct.EC_I_EC zeroinitializer,
 // HOST:   @c_ec_i_ec = internal global %struct.EC_I_EC undef,
 
 // DEVICE: @_ZZ2dfvE4s_ec = internal addrspace(3) global %struct.EC undef
diff --git a/clang/test/CodeGenCUDA/device-var-linkage.cu b/clang/test/CodeGenCUDA/device-var-linkage.cu
index 3c2efb57525c9c..4c57323d85f9dd 100644
--- a/clang/test/CodeGenCUDA/device-var-linkage.cu
+++ b/clang/test/CodeGenCUDA/device-var-linkage.cu
@@ -20,7 +20,7 @@
 // NORDC-H-DAG: @v1 = internal global i32 undef
 // RDC-H-DAG: @v1 = global i32 undef
 __device__ int v1;
-// DEV-DAG: @v2 = addrspace(4) externally_initialized global i32 0
+// DEV-DAG: @v2 = addrspace(4) externally_initialized constant i32 0
 // NORDC-H-DAG: @v2 = internal global i32 undef
 // RDC-H-DAG: @v2 = global i32 undef
 __constant__ int v2;
@@ -48,10 +48,10 @@ extern __managed__ int ev3;
 // HOST-DAG: @_ZL3sv1 = internal global i32 undef
 // CUDA-DAG: @_ZL3sv1__static__[[HASH:.*]] = addrspace(1) externally_initialized global i32 0
 static __device__ int sv1;
-// NORDC-DAG: @_ZL3sv2 = addrspace(4) externally_initialized global i32 0
-// RDC-DAG: @_ZL3sv2.static.[[HASH]] = addrspace(4) externally_initialized global i32 0
+// NORDC-DAG: @_ZL3sv2 = addrspace(4) externally_initialized constant i32 0
+// RDC-DAG: @_ZL3sv2.static.[[HASH]] = addrspace(4) externally_initialized constant i32 0
 // HOST-DAG: @_ZL3sv2 = internal global i32 undef
-// CUDA-DAG: @_ZL3sv2__static__[[HASH]] = addrspace(4) externally_initialized global i32 0
+// CUDA-DAG: @_ZL3sv2__static__[[HASH]] = addrspace(4) externally_initialized constant i32 0
 static __constant__ int sv2;
 // NORDC-DAG: @_ZL3sv3 = addrspace(1) externally_initialized global ptr addrspace(1) null
 // RDC-DAG: @_ZL3sv3.static.[[HASH]] = addrspace(1) externally_initialized global ptr addrspace(1) null
diff --git a/clang/test/CodeGenCUDA/filter-decl.cu b/clang/test/CodeGenCUDA/filter-decl.cu
index 0f4691f7c8aa7c..02dacd0ad8ef41 100644
--- a/clang/test/CodeGenCUDA/filter-decl.cu
+++ b/clang/test/CodeGenCUDA/filter-decl.cu
@@ -10,7 +10,7 @@
 __asm__("file scope asm is host only");
 
 // CHECK-HOST: constantdata = internal global
-// CHECK-DEVICE: constantdata = {{(dso_local )?}}externally_initialized global
+// CHECK-DEVICE: constantdata = {{(dso_local )?}}externally_initialized constant
 __constant__ char constantdata[256];
 
 // CHECK-HOST: devicedata = internal global
diff --git a/clang/test/CodeGenCUDA/static-device-var-no-rdc.cu b/clang/test/CodeGenCUDA/static-device-var-no-rdc.cu
index 80655c2d296047..e92b00345e00c2 100644
--- a/clang/test/CodeGenCUDA/static-device-var-no-rdc.cu
+++ b/clang/test/CodeGenCUDA/static-device-var-no-rdc.cu
@@ -50,7 +50,7 @@ static __device__ int x5;
 }
 
 // Check a static constant variable referenced by host is externalized.
-// DEV-DAG: @_ZL1y ={{.*}} addrspace(4) externally_initialized global i32 0
+// DEV-DAG: @_ZL1y ={{.*}} addrspace(4) externally_initialized constant i32 0
 // HOST-DAG: @_ZL1y = internal global i32 undef
 // HOST-DAG: @[[DEVNAMEY:[0-9]+]] = {{.*}}c"_ZL1y\00"
 
diff --git a/clang/test/CodeGenCUDA/static-device-var-rdc.cu b/clang/test/CodeGenCUDA/static-device-var-rdc.cu
index 16ec413397235a..9d2811f9385e1e 100644
--- a/clang/test/CodeGenCUDA/static-device-var-rdc.cu
+++ b/clang/test/CodeGenCUDA/static-device-var-rdc.cu
@@ -81,11 +81,11 @@ static __device__ int x;
 static __device__ int x2;
 
 // Test normal static device variables
-// INT-DEV-DAG: @_ZL1y[[FILEID:.*]] = addrspace(4) externally_initialized global i32 0
+// INT-DEV-DAG: @_ZL1y[[FILEID:.*]] = addrspace(4) externally_initialized constant i32 0
 // INT-HOST-DAG: @[[DEVNAMEY:[0-9]+]] = {{.*}}c"_ZL1y[[FILEID:.*]]\00"
 
 // Test externalized static device variables
-// EXT-DEV-DAG: @_ZL1y.static.[[HASH]] = addrspace(4) externally_initialized global i32 0
+// EXT-DEV-DAG: @_ZL1y.static.[[HASH]] = addrspace(4) externally_initialized constant i32 0
 // EXT-HOST-DAG: @[[DEVNAMEY:[0-9]+]] = {{.*}}c"_ZL1y.static.[[HASH]]\00"
 
 static __constant__ int y;
diff --git a/clang/test/CodeGenCUDA/template-class-static-member.cu b/clang/test/CodeGenCUDA/template-class-static-member.cu
index d790d2dea66bab..b614cd9dcbb14d 100644
--- a/clang/test/CodeGenCUDA/template-class-static-member.cu
+++ b/clang/test/CodeGenCUDA/template-class-static-member.cu
@@ -38,7 +38,7 @@ const int A<T>::const_member;
 template class A<int>;
 
 //DEV-DAG: @_ZN1AIiE8d_memberE = internal addrspace(1) global i32 0, comdat, align 4
-//DEV-DAG: @_ZN1AIiE8c_memberE = internal addrspace(4) global i32 0, comdat, align 4
+//DEV-DAG: @_ZN1AIiE8c_memberE = internal addrspace(4) constant i32 0, comdat, align 4
 //DEV-DAG: @_ZN1AIiE8m_memberE = internal addrspace(1) externally_initialized global ptr addrspace(1) null
 //DEV-DAG: @_ZN1AIiE12const_memberE = internal addrspace(4) constant i32 0, comdat, align 4
 //DEV-NEG-NOT: @_ZN1AIiE8h_memberE
diff --git a/clang/test/CodeGenHIP/hipspv-addr-spaces.cpp b/clang/test/CodeGenHIP/hipspv-addr-spaces.cpp
index c575f49ff69716..05811bb7e1285d 100644
--- a/clang/test/CodeGenHIP/hipspv-addr-spaces.cpp
+++ b/clang/test/CodeGenHIP/hipspv-addr-spaces.cpp
@@ -12,7 +12,7 @@
 // CHECK: @d ={{.*}} addrspace(1) externally_initialized global
 __device__ int d;
 
-// CHECK: @c ={{.*}} addrspace(1) externally_initialized global
+// CHECK: @c ={{.*}} addrspace(1) externally_initialized constant
 __constant__ int c;
 
 // CHECK: @s ={{.*}} addrspace(3) global

``````````

</details>


https://github.com/llvm/llvm-project/pull/110182


More information about the cfe-commits mailing list