[Openmp-commits] [clang] [llvm] [openmp] [OpenMP] Use generic IR for the OpenMP DeviceRTL (PR #119091)
    Joseph Huber via Openmp-commits 
    openmp-commits at lists.llvm.org
       
    Sun Dec  8 05:36:04 PST 2024
    
    
  
================
@@ -288,18 +258,11 @@ function(compileDeviceRTLLibrary target_cpu target_name target_triple)
   endif()
 endfunction()
 
-# Generate a Bitcode library for all the gpu architectures the user requested.
-add_custom_target(omptarget.devicertl.nvptx)
 add_custom_target(omptarget.devicertl.amdgpu)
-foreach(gpu_arch ${LIBOMPTARGET_DEVICE_ARCHITECTURES})
-  if("${gpu_arch}" IN_LIST all_amdgpu_architectures)
-    compileDeviceRTLLibrary(${gpu_arch} amdgpu amdgcn-amd-amdhsa -Xclang -mcode-object-version=none)
-  elseif("${gpu_arch}" IN_LIST all_nvptx_architectures)
-    compileDeviceRTLLibrary(${gpu_arch} nvptx nvptx64-nvidia-cuda --cuda-feature=+ptx63)
-  else()
-    message(FATAL_ERROR "Unknown GPU architecture '${gpu_arch}'")
-  endif()
-endforeach()
+compileDeviceRTLLibrary(amdgpu amdgcn-amd-amdhsa -Xclang -mcode-object-version=none)
+
+add_custom_target(omptarget.devicertl.nvptx)
+compileDeviceRTLLibrary(nvptx nvptx64-nvidia-cuda --cuda-feature=+ptx63)
----------------
jhuber6 wrote:
This is really over-complicated, but right now we only use the `.bc` file for non-LTO NVPTX compilations which get put through each TU in a broken way via `-mlink-builtin-bitcode`. This is because it would get really slow if we didn't optimize out calls to the runtime.
https://github.com/llvm/llvm-project/pull/119091
    
    
More information about the Openmp-commits
mailing list