[clang] [libc] [llvm] Reapply "[AMDGPU] Use COV6 by default (#118515)" (PR #130963)
via cfe-commits
cfe-commits at lists.llvm.org
Wed Mar 12 06:41:02 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-libc
Author: Shilei Tian (shiltian)
<details>
<summary>Changes</summary>
This reverts commit 68bcba6d7a1cc18996c0bcb7c62267c62d2040d0.
---
Full diff: https://github.com/llvm/llvm-project/pull/130963.diff
17 Files Affected:
- (modified) clang/docs/ReleaseNotes.rst (+3-1)
- (modified) clang/include/clang/Driver/Options.td (+2-2)
- (modified) clang/lib/Driver/ToolChains/CommonArgs.cpp (+1-1)
- (modified) clang/test/CodeGen/amdgpu-address-spaces.cpp (+1-1)
- (modified) clang/test/CodeGenCUDA/amdgpu-code-object-version.cu (+1-1)
- (modified) clang/test/CodeGenCXX/dynamic-cast-address-space.cpp (+3-3)
- (modified) clang/test/CodeGenHIP/default-attributes.hip (+2-2)
- (modified) clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl (+3-3)
- (added) clang/test/Driver/Inputs/rocm-spack/llvm-amdgpu-4.0.0-ieagcs7inf7runpyfvepqkurasoglq4z/amdgcn/bitcode/oclc_abi_version_600.bc ()
- (added) clang/test/Driver/Inputs/rocm_resource_dir/lib/amdgcn/bitcode/oclc_abi_version_600.bc ()
- (added) clang/test/Driver/Inputs/rocm_resource_dir/lib64/amdgcn/bitcode/oclc_abi_version_600.bc ()
- (modified) clang/test/Driver/hip-device-libs.hip (+1-1)
- (modified) clang/test/OpenMP/amdgcn_target_global_constructor.cpp (+2-2)
- (modified) libc/cmake/modules/prepare_libc_gpu_build.cmake (+1-1)
- (modified) llvm/docs/ReleaseNotes.md (+2)
- (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (+1-1)
- (added) llvm/test/CodeGen/AMDGPU/default_amdhsa_code_object_version.ll (+7)
``````````diff
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index f6e4d6a34e5dc..2ed05ce28a1bc 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -244,7 +244,7 @@ Improvements to Clang's diagnostics
as function arguments or return value respectively. Note that
:doc:`ThreadSafetyAnalysis` still does not perform alias analysis. The
feature will be default-enabled with ``-Wthread-safety`` in a future release.
-- The ``-Wsign-compare`` warning now treats expressions with bitwise not(~) and minus(-) as signed integers
+- The ``-Wsign-compare`` warning now treats expressions with bitwise not(~) and minus(-) as signed integers
except for the case where the operand is an unsigned integer
and throws warning if they are compared with unsigned integers (##18878).
@@ -332,6 +332,8 @@ Target Specific Changes
AMDGPU Support
^^^^^^^^^^^^^^
+- Bump the default code object version to 6. ROCm 6.3 is required to run any program compiled with COV6.
+
NVPTX Support
^^^^^^^^^^^^^^
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index e69cd6b833c3a..6fd09ec371e58 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -5153,12 +5153,12 @@ defm amdgpu_ieee : BoolMOption<"amdgpu-ieee",
NegFlag<SetFalse, [], [ClangOption, CC1Option]>>;
def mcode_object_version_EQ : Joined<["-"], "mcode-object-version=">, Group<m_Group>,
- HelpText<"Specify code object ABI version. Defaults to 5. (AMDGPU only)">,
+ HelpText<"Specify code object ABI version. Defaults to 6. (AMDGPU only)">,
Visibility<[ClangOption, FlangOption, CC1Option, FC1Option]>,
Values<"none,4,5,6">,
NormalizedValuesScope<"llvm::CodeObjectVersionKind">,
NormalizedValues<["COV_None", "COV_4", "COV_5", "COV_6"]>,
- MarshallingInfoEnum<TargetOpts<"CodeObjectVersion">, "COV_5">;
+ MarshallingInfoEnum<TargetOpts<"CodeObjectVersion">, "COV_6">;
defm cumode : SimpleMFlag<"cumode",
"Specify CU wavefront", "Specify WGP wavefront",
diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp
index b43472a52038b..0f6fd11e2837b 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -2756,7 +2756,7 @@ void tools::checkAMDGPUCodeObjectVersion(const Driver &D,
unsigned tools::getAMDGPUCodeObjectVersion(const Driver &D,
const llvm::opt::ArgList &Args) {
- unsigned CodeObjVer = 5; // default
+ unsigned CodeObjVer = 6; // default
if (auto *CodeObjArg = getAMDGPUCodeObjectArgument(D, Args))
StringRef(CodeObjArg->getValue()).getAsInteger(0, CodeObjVer);
return CodeObjVer;
diff --git a/clang/test/CodeGen/amdgpu-address-spaces.cpp b/clang/test/CodeGen/amdgpu-address-spaces.cpp
index ae2c61439f4ca..b121b559f58dc 100644
--- a/clang/test/CodeGen/amdgpu-address-spaces.cpp
+++ b/clang/test/CodeGen/amdgpu-address-spaces.cpp
@@ -29,7 +29,7 @@ int [[clang::address_space(999)]] bbb = 1234;
// CHECK: @u = addrspace(5) global i32 undef, align 4
// CHECK: @aaa = addrspace(6) global i32 1000, align 4
// CHECK: @bbb = addrspace(999) global i32 1234, align 4
-// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500
+// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600
//.
// CHECK-LABEL: define dso_local amdgpu_kernel void @foo(
// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
diff --git a/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu b/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu
index ffe12544917f7..aa0e3edec3f6a 100644
--- a/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu
+++ b/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu
@@ -1,7 +1,7 @@
// Create module flag for code object version.
// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \
-// RUN: -o - %s | FileCheck %s -check-prefix=V5
+// RUN: -o - %s | FileCheck %s -check-prefix=V6
// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \
// RUN: -mcode-object-version=4 -o - %s | FileCheck -check-prefix=V4 %s
diff --git a/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp b/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp
index 0460352cf7ffc..5d49cc0544b9c 100644
--- a/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp
+++ b/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp
@@ -13,7 +13,7 @@ B fail;
// CHECK: @_ZTI1B = linkonce_odr addrspace(1) constant { ptr addrspace(1), ptr addrspace(1), ptr addrspace(1) } { ptr addrspace(1) getelementptr inbounds (ptr addrspace(1), ptr addrspace(1) @_ZTVN10__cxxabiv120__si_class_type_infoE, i64 2), ptr addrspace(1) @_ZTS1B, ptr addrspace(1) @_ZTI1A }, comdat, align 8
// CHECK: @_ZTVN10__cxxabiv120__si_class_type_infoE = external addrspace(1) global [0 x ptr addrspace(1)]
// CHECK: @_ZTS1B = linkonce_odr addrspace(1) constant [3 x i8] c"1B\00", comdat, align 1
-// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500
+// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600
//.
// WITH-NONZERO-DEFAULT-AS: @_ZTV1B = linkonce_odr unnamed_addr addrspace(1) constant { [3 x ptr addrspace(1)] } { [3 x ptr addrspace(1)] [ptr addrspace(1) null, ptr addrspace(1) @_ZTI1B, ptr addrspace(1) addrspacecast (ptr addrspace(4) @_ZN1A1fEv to ptr addrspace(1))] }, comdat, align 8
// WITH-NONZERO-DEFAULT-AS: @fail = addrspace(1) global { ptr addrspace(1) } { ptr addrspace(1) getelementptr inbounds inrange(-16, 8) ({ [3 x ptr addrspace(1)] }, ptr addrspace(1) @_ZTV1B, i32 0, i32 0, i32 2) }, align 8
@@ -118,11 +118,11 @@ const B& f(A *a) {
// WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR3]] = { nounwind }
// WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR4]] = { noreturn }
//.
-// CHECK: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
+// CHECK: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600}
// CHECK: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
// CHECK: [[META2:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
//.
-// WITH-NONZERO-DEFAULT-AS: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
+// WITH-NONZERO-DEFAULT-AS: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600}
// WITH-NONZERO-DEFAULT-AS: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
// WITH-NONZERO-DEFAULT-AS: [[META2:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
//.
diff --git a/clang/test/CodeGenHIP/default-attributes.hip b/clang/test/CodeGenHIP/default-attributes.hip
index 1b53ebec9b582..f4dbad021987f 100644
--- a/clang/test/CodeGenHIP/default-attributes.hip
+++ b/clang/test/CodeGenHIP/default-attributes.hip
@@ -8,7 +8,7 @@
//.
// OPTNONE: @__hip_cuid_ = addrspace(1) global i8 0
// OPTNONE: @llvm.compiler.used = appending addrspace(1) global [1 x ptr] [ptr addrspacecast (ptr addrspace(1) @__hip_cuid_ to ptr)], section "llvm.metadata"
-// OPTNONE: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500
+// OPTNONE: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600
//.
__device__ void extern_func();
@@ -39,7 +39,7 @@ __global__ void kernel() {
// OPTNONE: attributes #[[ATTR2]] = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
// OPTNONE: attributes #[[ATTR3]] = { convergent nounwind }
//.
-// OPTNONE: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
+// OPTNONE: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600}
// OPTNONE: [[META1:![0-9]+]] = !{i32 1, !"amdgpu_printf_kind", !"hostcall"}
// OPTNONE: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
//.
diff --git a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
index 367217579e765..7e847367e1a13 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
@@ -68,7 +68,7 @@ kernel void test_target_features_kernel(global int *i) {
// CHECK: @__block_literal_global = internal addrspace(1) constant { i32, i32, ptr } { i32 16, i32 8, ptr @__test_target_features_kernel_block_invoke }, align 8 #0
// CHECK: @__test_target_features_kernel_block_invoke_kernel.runtime.handle = internal addrspace(1) externally_initialized constant %block.runtime.handle.t.3 zeroinitializer, section ".amdgpu.kernel.runtime.handle"
// CHECK: @llvm.used = appending addrspace(1) global [10 x ptr] [ptr @__test_block_invoke_kernel, ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_kernel.runtime.handle to ptr), ptr @__test_block_invoke_2_kernel, ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_2_kernel.runtime.handle to ptr), ptr @__test_block_invoke_3_kernel, ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_3_kernel.runtime.handle to ptr), ptr @__test_block_invoke_4_kernel, ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_4_kernel.runtime.handle to ptr), ptr @__test_target_features_kernel_block_invoke_kernel, ptr addrspacecast (ptr addrspace(1) @__test_target_features_kernel_block_invoke_kernel.runtime.handle to ptr)], section "llvm.metadata"
-// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500
+// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600
//.
// NOCPU: Function Attrs: convergent noinline norecurse nounwind optnone
// NOCPU-LABEL: define {{[^@]+}}@callee
@@ -764,7 +764,7 @@ kernel void test_target_features_kernel(global int *i) {
// GFX900: attributes #[[ATTR7]] = { nounwind }
// GFX900: attributes #[[ATTR8]] = { convergent nounwind }
//.
-// NOCPU: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
+// NOCPU: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600}
// NOCPU: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
// NOCPU: [[META2:![0-9]+]] = !{i32 2, i32 0}
// NOCPU: [[META3]] = !{i32 1, i32 0, i32 1, i32 0}
@@ -787,7 +787,7 @@ kernel void test_target_features_kernel(global int *i) {
// NOCPU: [[META20]] = !{!"int*"}
// NOCPU: [[META21]] = !{ptr addrspace(1) @__test_target_features_kernel_block_invoke_kernel.runtime.handle}
//.
-// GFX900: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
+// GFX900: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600}
// GFX900: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
// GFX900: [[META2:![0-9]+]] = !{i32 2, i32 0}
// GFX900: [[TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0}
diff --git a/clang/test/Driver/Inputs/rocm-spack/llvm-amdgpu-4.0.0-ieagcs7inf7runpyfvepqkurasoglq4z/amdgcn/bitcode/oclc_abi_version_600.bc b/clang/test/Driver/Inputs/rocm-spack/llvm-amdgpu-4.0.0-ieagcs7inf7runpyfvepqkurasoglq4z/amdgcn/bitcode/oclc_abi_version_600.bc
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/rocm_resource_dir/lib/amdgcn/bitcode/oclc_abi_version_600.bc b/clang/test/Driver/Inputs/rocm_resource_dir/lib/amdgcn/bitcode/oclc_abi_version_600.bc
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/rocm_resource_dir/lib64/amdgcn/bitcode/oclc_abi_version_600.bc b/clang/test/Driver/Inputs/rocm_resource_dir/lib64/amdgcn/bitcode/oclc_abi_version_600.bc
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/hip-device-libs.hip b/clang/test/Driver/hip-device-libs.hip
index 317fd79242697..c7cafd0027bc5 100644
--- a/clang/test/Driver/hip-device-libs.hip
+++ b/clang/test/Driver/hip-device-libs.hip
@@ -157,7 +157,7 @@
// Test default code object version.
// RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \
// RUN: --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \
-// RUN: 2>&1 | FileCheck %s --check-prefixes=ABI5
+// RUN: 2>&1 | FileCheck %s --check-prefixes=ABI6
// Test default code object version with old device library without abi_version_400.bc
// RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \
diff --git a/clang/test/OpenMP/amdgcn_target_global_constructor.cpp b/clang/test/OpenMP/amdgcn_target_global_constructor.cpp
index 9f1e68d4ea0fe..d728dc1233e2c 100644
--- a/clang/test/OpenMP/amdgcn_target_global_constructor.cpp
+++ b/clang/test/OpenMP/amdgcn_target_global_constructor.cpp
@@ -29,7 +29,7 @@ S A;
// CHECK: @A = addrspace(1) global %struct.S zeroinitializer, align 4
// CHECK: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @_GLOBAL__sub_I_amdgcn_target_global_constructor.cpp, ptr null }]
// CHECK: @llvm.global_dtors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @__dtor_A, ptr null }]
-// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500
+// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600
//.
// CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init
// CHECK-SAME: () #[[ATTR0:[0-9]+]] {
@@ -104,7 +104,7 @@ S A;
// CHECK: attributes #[[ATTR4]] = { convergent nounwind }
//.
// CHECK: [[META0:![0-9]+]] = !{i32 1, !"A", i32 0, i32 0}
-// CHECK: [[META1:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
+// CHECK: [[META1:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600}
// CHECK: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
// CHECK: [[META3:![0-9]+]] = !{i32 7, !"openmp", i32 51}
// CHECK: [[META4:![0-9]+]] = !{i32 7, !"openmp-device", i32 51}
diff --git a/libc/cmake/modules/prepare_libc_gpu_build.cmake b/libc/cmake/modules/prepare_libc_gpu_build.cmake
index 937bd22451c5f..f8f5a954e5e91 100644
--- a/libc/cmake/modules/prepare_libc_gpu_build.cmake
+++ b/libc/cmake/modules/prepare_libc_gpu_build.cmake
@@ -104,7 +104,7 @@ if(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU)
# The AMDGPU environment uses different code objects to encode the ABI for
# kernel calls and intrinsic functions. We want to specify this manually to
# conform to whatever the test suite was built to handle.
- set(LIBC_GPU_CODE_OBJECT_VERSION 5)
+ set(LIBC_GPU_CODE_OBJECT_VERSION 6)
endif()
if(LIBC_TARGET_ARCHITECTURE_IS_NVPTX)
diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md
index 3d608c636a8f6..5de7a3eab9914 100644
--- a/llvm/docs/ReleaseNotes.md
+++ b/llvm/docs/ReleaseNotes.md
@@ -83,6 +83,8 @@ Changes to the AArch64 Backend
Changes to the AMDGPU Backend
-----------------------------
+* Bump the default `.amdhsa_code_object_version` to 6. ROCm 6.3 is required to run any program compiled with COV6.
+
Changes to the ARM Backend
--------------------------
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index ac6b07bad3e35..e39959eae72dc 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -34,7 +34,7 @@
static llvm::cl::opt<unsigned> DefaultAMDHSACodeObjectVersion(
"amdhsa-code-object-version", llvm::cl::Hidden,
- llvm::cl::init(llvm::AMDGPU::AMDHSA_COV5),
+ llvm::cl::init(llvm::AMDGPU::AMDHSA_COV6),
llvm::cl::desc("Set default AMDHSA Code Object Version (module flag "
"or asm directive still take priority if present)"));
diff --git a/llvm/test/CodeGen/AMDGPU/default_amdhsa_code_object_version.ll b/llvm/test/CodeGen/AMDGPU/default_amdhsa_code_object_version.ll
new file mode 100644
index 0000000000000..6f79cf23bfbf7
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/default_amdhsa_code_object_version.ll
@@ -0,0 +1,7 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa %s -o - | FileCheck %s
+
+; CHECK: .amdhsa_code_object_version 6
+
+define amdgpu_kernel void @kernel() {
+ ret void
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/130963
More information about the cfe-commits
mailing list