[clang] [llvm] [AMDGPU] Use COV6 by default (PR #118515)

via cfe-commits cfe-commits at lists.llvm.org
Tue Dec 3 09:10:44 PST 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang

@llvm/pr-subscribers-clang-driver

Author: Shilei Tian (shiltian)

<details>
<summary>Changes</summary>



---
Full diff: https://github.com/llvm/llvm-project/pull/118515.diff


13 Files Affected:

- (modified) clang/include/clang/Driver/Options.td (+5-5) 
- (modified) clang/lib/Driver/ToolChains/CommonArgs.cpp (+1-1) 
- (modified) clang/test/CodeGen/amdgpu-address-spaces.cpp (+1-1) 
- (modified) clang/test/CodeGenCUDA/amdgpu-code-object-version.cu (+1-1) 
- (modified) clang/test/CodeGenCXX/dynamic-cast-address-space.cpp (+3-3) 
- (modified) clang/test/CodeGenHIP/default-attributes.hip (+2-2) 
- (modified) clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl (+3-3) 
- (added) clang/test/Driver/Inputs/rocm-spack/llvm-amdgpu-4.0.0-ieagcs7inf7runpyfvepqkurasoglq4z/amdgcn/bitcode/oclc_abi_version_600.bc () 
- (added) clang/test/Driver/Inputs/rocm_resource_dir/lib/amdgcn/bitcode/oclc_abi_version_600.bc () 
- (added) clang/test/Driver/Inputs/rocm_resource_dir/lib64/amdgcn/bitcode/oclc_abi_version_600.bc () 
- (modified) clang/test/Driver/hip-device-libs.hip (+1-1) 
- (modified) clang/test/OpenMP/amdgcn_target_global_constructor.cpp (+2-2) 
- (modified) offload/plugins-nextgen/common/src/Utils/ELF.cpp (+3-2) 


``````````diff
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index cb96b5daed9d3a..29db1aa21ed298 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -1786,12 +1786,12 @@ defm debug_info_for_profiling : BoolFOption<"debug-info-for-profiling",
   PosFlag<SetTrue, [], [ClangOption, CC1Option],
           "Emit extra debug info to make sample profile more accurate">,
   NegFlag<SetFalse>>;
-def fprofile_generate_cold_function_coverage : Flag<["-"], "fprofile-generate-cold-function-coverage">, 
+def fprofile_generate_cold_function_coverage : Flag<["-"], "fprofile-generate-cold-function-coverage">,
     Group<f_Group>, Visibility<[ClangOption, CLOption]>,
     HelpText<"Generate instrumented code to collect coverage info for cold functions into default.profraw file (overridden by '=' form of option or LLVM_PROFILE_FILE env var)">;
-def fprofile_generate_cold_function_coverage_EQ : Joined<["-"], "fprofile-generate-cold-function-coverage=">, 
+def fprofile_generate_cold_function_coverage_EQ : Joined<["-"], "fprofile-generate-cold-function-coverage=">,
     Group<f_Group>, Visibility<[ClangOption, CLOption]>, MetaVarName<"<directory>">,
-    HelpText<"Generate instrumented code to collect coverage info for cold functions into <directory>/default.profraw (overridden by LLVM_PROFILE_FILE env var)">; 
+    HelpText<"Generate instrumented code to collect coverage info for cold functions into <directory>/default.profraw (overridden by LLVM_PROFILE_FILE env var)">;
 def fprofile_instr_generate : Flag<["-"], "fprofile-instr-generate">,
     Group<f_Group>, Visibility<[ClangOption, CLOption]>,
     HelpText<"Generate instrumented code to collect execution counts into default.profraw file (overridden by '=' form of option or LLVM_PROFILE_FILE env var)">;
@@ -5140,12 +5140,12 @@ defm amdgpu_ieee : BoolMOption<"amdgpu-ieee",
   NegFlag<SetFalse, [], [ClangOption, CC1Option]>>;
 
 def mcode_object_version_EQ : Joined<["-"], "mcode-object-version=">, Group<m_Group>,
-  HelpText<"Specify code object ABI version. Defaults to 5. (AMDGPU only)">,
+  HelpText<"Specify code object ABI version. Defaults to 6. (AMDGPU only)">,
   Visibility<[ClangOption, FlangOption, CC1Option, FC1Option]>,
   Values<"none,4,5,6">,
   NormalizedValuesScope<"llvm::CodeObjectVersionKind">,
   NormalizedValues<["COV_None", "COV_4", "COV_5", "COV_6"]>,
-  MarshallingInfoEnum<TargetOpts<"CodeObjectVersion">, "COV_5">;
+  MarshallingInfoEnum<TargetOpts<"CodeObjectVersion">, "COV_6">;
 
 defm cumode : SimpleMFlag<"cumode",
   "Specify CU wavefront", "Specify WGP wavefront",
diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp
index 8d977149e62485..4e6ace48c3ffb5 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -2705,7 +2705,7 @@ void tools::checkAMDGPUCodeObjectVersion(const Driver &D,
 
 unsigned tools::getAMDGPUCodeObjectVersion(const Driver &D,
                                            const llvm::opt::ArgList &Args) {
-  unsigned CodeObjVer = 5; // default
+  unsigned CodeObjVer = 6; // default
   if (auto *CodeObjArg = getAMDGPUCodeObjectArgument(D, Args))
     StringRef(CodeObjArg->getValue()).getAsInteger(0, CodeObjVer);
   return CodeObjVer;
diff --git a/clang/test/CodeGen/amdgpu-address-spaces.cpp b/clang/test/CodeGen/amdgpu-address-spaces.cpp
index ae2c61439f4ca5..b121b559f58dc3 100644
--- a/clang/test/CodeGen/amdgpu-address-spaces.cpp
+++ b/clang/test/CodeGen/amdgpu-address-spaces.cpp
@@ -29,7 +29,7 @@ int [[clang::address_space(999)]] bbb = 1234;
 // CHECK: @u = addrspace(5) global i32 undef, align 4
 // CHECK: @aaa = addrspace(6) global i32 1000, align 4
 // CHECK: @bbb = addrspace(999) global i32 1234, align 4
-// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500
+// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600
 //.
 // CHECK-LABEL: define dso_local amdgpu_kernel void @foo(
 // CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
diff --git a/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu b/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu
index ffe12544917f7f..aa0e3edec3f6a3 100644
--- a/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu
+++ b/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu
@@ -1,7 +1,7 @@
 // Create module flag for code object version.
 
 // RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \
-// RUN:   -o - %s | FileCheck %s -check-prefix=V5
+// RUN:   -o - %s | FileCheck %s -check-prefix=V6
 
 // RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \
 // RUN:   -mcode-object-version=4 -o - %s | FileCheck -check-prefix=V4 %s
diff --git a/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp b/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp
index 271d9ede79d0c4..7eebdf68115a98 100644
--- a/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp
+++ b/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp
@@ -13,7 +13,7 @@ B fail;
 // CHECK: @_ZTI1B = linkonce_odr addrspace(1) constant { ptr addrspace(1), ptr addrspace(1), ptr addrspace(1) } { ptr addrspace(1) getelementptr inbounds (ptr addrspace(1), ptr addrspace(1) @_ZTVN10__cxxabiv120__si_class_type_infoE, i64 2), ptr addrspace(1) @_ZTS1B, ptr addrspace(1) @_ZTI1A }, comdat, align 8
 // CHECK: @_ZTVN10__cxxabiv120__si_class_type_infoE = external addrspace(1) global [0 x ptr addrspace(1)]
 // CHECK: @_ZTS1B = linkonce_odr addrspace(1) constant [3 x i8] c"1B\00", comdat, align 1
-// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500
+// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600
 //.
 // WITH-NONZERO-DEFAULT-AS: @_ZTV1B = linkonce_odr unnamed_addr addrspace(1) constant { [3 x ptr addrspace(1)] } { [3 x ptr addrspace(1)] [ptr addrspace(1) null, ptr addrspace(1) @_ZTI1B, ptr addrspace(1) addrspacecast (ptr addrspace(4) @_ZN1A1fEv to ptr addrspace(1))] }, comdat, align 8
 // WITH-NONZERO-DEFAULT-AS: @fail = addrspace(1) global { ptr addrspace(1) } { ptr addrspace(1) getelementptr inbounds inrange(-16, 8) ({ [3 x ptr addrspace(1)] }, ptr addrspace(1) @_ZTV1B, i32 0, i32 0, i32 2) }, align 8
@@ -118,11 +118,11 @@ const B& f(A *a) {
 // WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR3]] = { nounwind }
 // WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR4]] = { noreturn }
 //.
-// CHECK: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
+// CHECK: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600}
 // CHECK: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
 // CHECK: [[META2:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
 //.
-// WITH-NONZERO-DEFAULT-AS: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
+// WITH-NONZERO-DEFAULT-AS: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600}
 // WITH-NONZERO-DEFAULT-AS: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
 // WITH-NONZERO-DEFAULT-AS: [[META2:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
 //.
diff --git a/clang/test/CodeGenHIP/default-attributes.hip b/clang/test/CodeGenHIP/default-attributes.hip
index 1b53ebec9b5821..f4dbad021987f1 100644
--- a/clang/test/CodeGenHIP/default-attributes.hip
+++ b/clang/test/CodeGenHIP/default-attributes.hip
@@ -8,7 +8,7 @@
 //.
 // OPTNONE: @__hip_cuid_ = addrspace(1) global i8 0
 // OPTNONE: @llvm.compiler.used = appending addrspace(1) global [1 x ptr] [ptr addrspacecast (ptr addrspace(1) @__hip_cuid_ to ptr)], section "llvm.metadata"
-// OPTNONE: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500
+// OPTNONE: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600
 //.
 __device__ void extern_func();
 
@@ -39,7 +39,7 @@ __global__ void kernel() {
 // OPTNONE: attributes #[[ATTR2]] = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
 // OPTNONE: attributes #[[ATTR3]] = { convergent nounwind }
 //.
-// OPTNONE: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
+// OPTNONE: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600}
 // OPTNONE: [[META1:![0-9]+]] = !{i32 1, !"amdgpu_printf_kind", !"hostcall"}
 // OPTNONE: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
 //.
diff --git a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
index 62b5661da9dbd8..7f2a17b6ef8c55 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
@@ -62,7 +62,7 @@ kernel void test_target_features_kernel(global int *i) {
 
 //.
 // CHECK: @__block_literal_global = internal addrspace(1) constant { i32, i32, ptr } { i32 16, i32 8, ptr @__test_target_features_kernel_block_invoke }, align 8 #0
-// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500
+// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600
 //.
 // NOCPU: Function Attrs: convergent noinline norecurse nounwind optnone
 // NOCPU-LABEL: define {{[^@]+}}@callee
@@ -759,7 +759,7 @@ kernel void test_target_features_kernel(global int *i) {
 // GFX900: attributes #[[ATTR8]] = { nounwind }
 // GFX900: attributes #[[ATTR9]] = { convergent nounwind }
 //.
-// NOCPU: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
+// NOCPU: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600}
 // NOCPU: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
 // NOCPU: [[META2:![0-9]+]] = !{i32 2, i32 0}
 // NOCPU: [[META3]] = !{i32 1, i32 0, i32 1, i32 0}
@@ -777,7 +777,7 @@ kernel void test_target_features_kernel(global int *i) {
 // NOCPU: [[META15]] = !{i32 1}
 // NOCPU: [[META16]] = !{!"int*"}
 //.
-// GFX900: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
+// GFX900: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600}
 // GFX900: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
 // GFX900: [[META2:![0-9]+]] = !{i32 2, i32 0}
 // GFX900: [[TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0}
diff --git a/clang/test/Driver/Inputs/rocm-spack/llvm-amdgpu-4.0.0-ieagcs7inf7runpyfvepqkurasoglq4z/amdgcn/bitcode/oclc_abi_version_600.bc b/clang/test/Driver/Inputs/rocm-spack/llvm-amdgpu-4.0.0-ieagcs7inf7runpyfvepqkurasoglq4z/amdgcn/bitcode/oclc_abi_version_600.bc
new file mode 100644
index 00000000000000..e69de29bb2d1d6
diff --git a/clang/test/Driver/Inputs/rocm_resource_dir/lib/amdgcn/bitcode/oclc_abi_version_600.bc b/clang/test/Driver/Inputs/rocm_resource_dir/lib/amdgcn/bitcode/oclc_abi_version_600.bc
new file mode 100644
index 00000000000000..e69de29bb2d1d6
diff --git a/clang/test/Driver/Inputs/rocm_resource_dir/lib64/amdgcn/bitcode/oclc_abi_version_600.bc b/clang/test/Driver/Inputs/rocm_resource_dir/lib64/amdgcn/bitcode/oclc_abi_version_600.bc
new file mode 100644
index 00000000000000..e69de29bb2d1d6
diff --git a/clang/test/Driver/hip-device-libs.hip b/clang/test/Driver/hip-device-libs.hip
index 6f1d31508e3302..3ae384cf05d972 100644
--- a/clang/test/Driver/hip-device-libs.hip
+++ b/clang/test/Driver/hip-device-libs.hip
@@ -157,7 +157,7 @@
 // Test default code object version.
 // RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \
 // RUN:   --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \
-// RUN: 2>&1 | FileCheck %s --check-prefixes=ABI5
+// RUN: 2>&1 | FileCheck %s --check-prefixes=ABI6
 
 // Test default code object version with old device library without abi_version_400.bc
 // RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \
diff --git a/clang/test/OpenMP/amdgcn_target_global_constructor.cpp b/clang/test/OpenMP/amdgcn_target_global_constructor.cpp
index 9f1e68d4ea0fec..d728dc1233e2c7 100644
--- a/clang/test/OpenMP/amdgcn_target_global_constructor.cpp
+++ b/clang/test/OpenMP/amdgcn_target_global_constructor.cpp
@@ -29,7 +29,7 @@ S A;
 // CHECK: @A = addrspace(1) global %struct.S zeroinitializer, align 4
 // CHECK: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @_GLOBAL__sub_I_amdgcn_target_global_constructor.cpp, ptr null }]
 // CHECK: @llvm.global_dtors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @__dtor_A, ptr null }]
-// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500
+// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600
 //.
 // CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init
 // CHECK-SAME: () #[[ATTR0:[0-9]+]] {
@@ -104,7 +104,7 @@ S A;
 // CHECK: attributes #[[ATTR4]] = { convergent nounwind }
 //.
 // CHECK: [[META0:![0-9]+]] = !{i32 1, !"A", i32 0, i32 0}
-// CHECK: [[META1:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
+// CHECK: [[META1:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600}
 // CHECK: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
 // CHECK: [[META3:![0-9]+]] = !{i32 7, !"openmp", i32 51}
 // CHECK: [[META4:![0-9]+]] = !{i32 7, !"openmp-device", i32 51}
diff --git a/offload/plugins-nextgen/common/src/Utils/ELF.cpp b/offload/plugins-nextgen/common/src/Utils/ELF.cpp
index 88642fd5b56400..18b5ad3351b12d 100644
--- a/offload/plugins-nextgen/common/src/Utils/ELF.cpp
+++ b/offload/plugins-nextgen/common/src/Utils/ELF.cpp
@@ -64,8 +64,9 @@ checkMachineImpl(const object::ELFObjectFile<ELFT> &ELFObj, uint16_t EMachine) {
     if (Header.e_ident[EI_OSABI] != ELFOSABI_AMDGPU_HSA)
       return createError("Invalid AMD OS/ABI, must be AMDGPU_HSA");
     if (Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V4 &&
-        Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V5)
-      return createError("Invalid AMD ABI version, must be version 4 or 5");
+        Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V5 &&
+        Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V6)
+      return createError("Invalid AMD ABI version, must be version above 4");
     if ((Header.e_flags & EF_AMDGPU_MACH) < EF_AMDGPU_MACH_AMDGCN_GFX700 ||
         (Header.e_flags & EF_AMDGPU_MACH) > EF_AMDGPU_MACH_AMDGCN_GFX1201)
       return createError("Unsupported AMDGPU architecture");

``````````

</details>


https://github.com/llvm/llvm-project/pull/118515


More information about the cfe-commits mailing list