[libcxx-commits] [libcxx] 2f41fa3 - [AMDGPU] Fix code object version not being set to 'none' (#135036)

Thu Apr 10 09:31:25 PDT 2025

Author: Joseph Huber
Date: 2025-04-10T11:31:21-05:00
New Revision: 2f41fa387d6734c637d02cbcf985c7b312b1e23b

URL: https://github.com/llvm/llvm-project/commit/2f41fa387d6734c637d02cbcf985c7b312b1e23b
DIFF: https://github.com/llvm/llvm-project/commit/2f41fa387d6734c637d02cbcf985c7b312b1e23b.diff

LOG: [AMDGPU] Fix code object version not being set to 'none' (#135036)

Summary:
Previously, we removed the special handling for the code object version
global. I erroneously thought that this meant we cold get rid of this
weird `-Xclang` option. However, this also emits an LLVM IR module flag,
which will then cause linking issues.

Added: 
    offload/test/api/amdgpu_code_object.c

Modified: 
    compiler-rt/cmake/builtin-config-ix.cmake
    compiler-rt/lib/builtins/CMakeLists.txt
    libc/cmake/modules/LLVMLibCCompileOptionRules.cmake
    libcxx/cmake/caches/AMDGPU.cmake
    offload/DeviceRTL/CMakeLists.txt
    offload/DeviceRTL/src/Mapping.cpp

Removed: 
    


################################################################################
diff  --git a/compiler-rt/cmake/builtin-config-ix.cmake b/compiler-rt/cmake/builtin-config-ix.cmake
index e1945ba2b2230..7bd3269bd999d 100644

--- a/compiler-rt/cmake/builtin-config-ix.cmake
+++ b/compiler-rt/cmake/builtin-config-ix.cmake
@@ -22,6 +22,7 @@ builtin_check_c_compiler_flag(-Wno-pedantic         COMPILER_RT_HAS_WNO_PEDANTIC
 builtin_check_c_compiler_flag(-nogpulib             COMPILER_RT_HAS_NOGPULIB_FLAG)
 builtin_check_c_compiler_flag(-flto                 COMPILER_RT_HAS_FLTO_FLAG)
 builtin_check_c_compiler_flag(-fconvergent-functions COMPILER_RT_HAS_FCONVERGENT_FUNCTIONS_FLAG)
+builtin_check_c_compiler_flag("-Xclang -mcode-object-version=none" COMPILER_RT_HAS_CODE_OBJECT_VERSION_FLAG)
 builtin_check_c_compiler_flag(-Wbuiltin-declaration-mismatch COMPILER_RT_HAS_WBUILTIN_DECLARATION_MISMATCH_FLAG)
 builtin_check_c_compiler_flag(/Zl COMPILER_RT_HAS_ZL_FLAG)
 builtin_check_c_compiler_flag(-fcf-protection=full COMPILER_RT_HAS_FCF_PROTECTION_FLAG)

diff  --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt
index 5d78b5a780428..3cdbf21ed403d 100644
--- a/compiler-rt/lib/builtins/CMakeLists.txt
+++ b/compiler-rt/lib/builtins/CMakeLists.txt
@@ -833,6 +833,12 @@ else ()
     append_list_if(COMPILER_RT_HAS_FLTO_FLAG -flto BUILTIN_CFLAGS)
     append_list_if(COMPILER_RT_HAS_FCONVERGENT_FUNCTIONS_FLAG
                    -fconvergent-functions BUILTIN_CFLAGS)
+
+    # AMDGPU targets want to use a generic ABI.
+    if("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "amdgcn")
+      append_list_if(COMPILER_RT_HAS_CODE_OBJECT_VERSION_FLAG
+                     "SHELL:-Xclang -mcode-object-version=none" BUILTIN_CFLAGS)
+    endif()
   endif()
 
   set(BUILTIN_DEFS "")

diff  --git a/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake b/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake
index ddd18ef293c8d..0facb0b9be0c1 100644
--- a/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake
+++ b/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake
@@ -215,6 +215,8 @@ function(_get_common_compile_options output_var flags)
       if(LIBC_CUDA_ROOT)
         list(APPEND compile_options "--cuda-path=${LIBC_CUDA_ROOT}")
       endif()
+    elseif(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU)
+      list(APPEND compile_options "SHELL:-Xclang -mcode-object-version=none")
     endif()
   endif()
   set(${output_var} ${compile_options} PARENT_SCOPE)

diff  --git a/libcxx/cmake/caches/AMDGPU.cmake b/libcxx/cmake/caches/AMDGPU.cmake
index d4aa28b4134ea..e7bf3f53891f0 100644
--- a/libcxx/cmake/caches/AMDGPU.cmake
+++ b/libcxx/cmake/caches/AMDGPU.cmake
@@ -32,6 +32,8 @@ set(LIBCXX_TEST_CONFIG "amdgpu-libc++-shared.cfg.in" CACHE STRING "")
 set(LIBCXX_TEST_PARAMS "optimization=none;long_tests=False;executor=amdhsa-loader" CACHE STRING "")
 
 # Necessary compile flags for AMDGPU.
-set(LIBCXX_ADDITIONAL_COMPILE_FLAGS "-nogpulib;-flto;-fconvergent-functions" CACHE STRING "")
-set(LIBCXXABI_ADDITIONAL_COMPILE_FLAGS "-nogpulib;-flto;-fconvergent-functions" CACHE STRING "")
+set(LIBCXX_ADDITIONAL_COMPILE_FLAGS
+    "-nogpulib;-flto;-fconvergent-functions;SHELL:-Xclang -mcode-object-version=none" CACHE STRING "")
+set(LIBCXXABI_ADDITIONAL_COMPILE_FLAGS
+    "-nogpulib;-flto;-fconvergent-functions;SHELL:-Xclang -mcode-object-version=none" CACHE STRING "")
 set(CMAKE_REQUIRED_FLAGS "-nogpulib" CACHE STRING "")

diff  --git a/offload/DeviceRTL/CMakeLists.txt b/offload/DeviceRTL/CMakeLists.txt
index 07888217b6c68..8f2a1fd01fabc 100644
--- a/offload/DeviceRTL/CMakeLists.txt
+++ b/offload/DeviceRTL/CMakeLists.txt
@@ -255,7 +255,7 @@ function(compileDeviceRTLLibrary target_name target_triple)
 endfunction()
 
 add_custom_target(omptarget.devicertl.amdgpu)
-compileDeviceRTLLibrary(amdgpu amdgcn-amd-amdhsa)
+compileDeviceRTLLibrary(amdgpu amdgcn-amd-amdhsa -Xclang -mcode-object-version=none)
 
 add_custom_target(omptarget.devicertl.nvptx)
 compileDeviceRTLLibrary(nvptx nvptx64-nvidia-cuda --cuda-feature=+ptx63)

diff  --git a/offload/DeviceRTL/src/Mapping.cpp b/offload/DeviceRTL/src/Mapping.cpp
index a9e027727b04b..e951556c2ad4e 100644
--- a/offload/DeviceRTL/src/Mapping.cpp
+++ b/offload/DeviceRTL/src/Mapping.cpp
@@ -20,6 +20,14 @@
 
 using namespace ompx;
 
+// FIXME: This resolves the handling for the AMDGPU workgroup size when the ABI
+// is set to 'none'. We only support COV5+ but this can be removed when COV4 is
+// fully deprecated.
+#ifdef __AMDGPU__
+extern const inline uint32_t __oclc_ABI_version = 500;
+[[gnu::alias("__oclc_ABI_version")]] const uint32_t __oclc_ABI_version__;
+#endif
+
 static bool isInLastWarp() {
   uint32_t MainTId = (mapping::getNumberOfThreadsInBlock() - 1) &
                      ~(mapping::getWarpSize() - 1);

diff  --git a/offload/test/api/amdgpu_code_object.c b/offload/test/api/amdgpu_code_object.c
new file mode 100644
index 0000000000000..95d14f6772e77
--- /dev/null
+++ b/offload/test/api/amdgpu_code_object.c
@@ -0,0 +1,16 @@
+// RUN: %libomptarget-compile-amdgcn-amd-amdhsa -Xclang \
+// RUN:   -mcode-object-version=5
+// RUN:   %libomptarget-run-amdgcn-amd-amdhsa | %fcheck-amdgcn-amd-amdhsa
+
+// REQUIRES: amdgcn-amd-amdhsa
+
+#include <stdio.h>
+
+// Test to make sure we can build and run with the previous COV.
+int main() {
+#pragma omp target
+  ;
+
+  // CHECK: PASS
+  printf("PASS\n");
+}