[libc-commits] [libc] e2356fb - [libc] Add special handling for CUDA PTX features

Mon Apr 17 09:51:53 PDT 2023

Author: Joseph Huber
Date: 2023-04-17T11:51:34-05:00
New Revision: e2356fb07e57538cbe5f731f23fc574848ea9bb1

URL: https://github.com/llvm/llvm-project/commit/e2356fb07e57538cbe5f731f23fc574848ea9bb1
DIFF: https://github.com/llvm/llvm-project/commit/e2356fb07e57538cbe5f731f23fc574848ea9bb1.diff

LOG: [libc] Add special handling for CUDA PTX features

The NVIDIA compilation path requires some special options. This is
mostly because compilation is dependent on having a valid CUDA
toolchain. We don't actually need the CUDA toolchain to create the
exported `libcgpu.a` library because it's pure LLVM-IR. However, for
some language features we need the PTX version to be set. This is
normally set by checking the CUDA version, but without one installed it
will fail to build. We instead choose a minimum set of features on the
desired target, inferred from
https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#release-notes
and the PTX refernece for functions like `nanosleep`.

Reviewed By: tianshilei1992

Differential Revision: https://reviews.llvm.org/D148532

Added: 
    

Modified: 
    libc/cmake/modules/LLVMLibCObjectRules.cmake
    libc/cmake/modules/LLVMLibCTestRules.cmake
    libc/startup/gpu/nvptx/CMakeLists.txt

Removed: 
    


################################################################################
diff  --git a/libc/cmake/modules/LLVMLibCObjectRules.cmake b/libc/cmake/modules/LLVMLibCObjectRules.cmake
index c3729699e4ef8..5a9fdc643f44a 100644

--- a/libc/cmake/modules/LLVMLibCObjectRules.cmake
+++ b/libc/cmake/modules/LLVMLibCObjectRules.cmake
@@ -64,6 +64,50 @@ function(_get_common_compile_options output_var flags)
   set(${output_var} ${compile_options} PARENT_SCOPE)
 endfunction()
 
+# Obtains NVPTX specific arguments for compilation.
+# The PTX feature is primarily based on the CUDA toolchain version. We want to
+# be able to target NVPTX without an existing architecture, so we need to set
+# this manually. This simply sets the PTX feature to the minimum required for
+# the features we wish to use on that target.
+# Adjust as needed for desired PTX features.
+function(get_nvptx_compile_options output_var gpu_arch)
+  list(APPEND nvptx_options "-march=${gpu_arch}")
+  if(${gpu_arch} STREQUAL "sm_35")
+    list(APPEND nvptx_options "--cuda-feature=+ptx42")
+  elseif(${gpu_arch} STREQUAL "sm_37")
+    list(APPEND nvptx_options "--cuda-feature=+ptx43")
+  elseif(${gpu_arch} STREQUAL "sm_50")
+    list(APPEND nvptx_options "--cuda-feature=+ptx43")
+  elseif(${gpu_arch} STREQUAL "sm_52")
+    list(APPEND nvptx_options "--cuda-feature=+ptx43")
+  elseif(${gpu_arch} STREQUAL "sm_53")
+    list(APPEND nvptx_options "--cuda-feature=+ptx43")
+  elseif(${gpu_arch} STREQUAL "sm_60")
+    list(APPEND nvptx_options "--cuda-feature=+ptx50")
+  elseif(${gpu_arch} STREQUAL "sm_61")
+    list(APPEND nvptx_options "--cuda-feature=+ptx50")
+  elseif(${gpu_arch} STREQUAL "sm_62")
+    list(APPEND nvptx_options "--cuda-feature=+ptx50")
+  elseif(${gpu_arch} STREQUAL "sm_70")
+    list(APPEND nvptx_options "--cuda-feature=+ptx63")
+  elseif(${gpu_arch} STREQUAL "sm_72")
+    list(APPEND nvptx_options "--cuda-feature=+ptx63")
+  elseif(${gpu_arch} STREQUAL "sm_75")
+    list(APPEND nvptx_options "--cuda-feature=+ptx63")
+  elseif(${gpu_arch} STREQUAL "sm_80")
+    list(APPEND nvptx_options "--cuda-feature=+ptx72")
+  elseif(${gpu_arch} STREQUAL "sm_86")
+    list(APPEND nvptx_options "--cuda-feature=+ptx72")
+  else()
+    message(FATAL_ERROR "Unknown Nvidia GPU architecture '${gpu_arch}'")
+  endif()
+
+  if(LIBC_CUDA_ROOT)
+    list(APPEND nvptx_options "--cuda-path=${LIBC_CUDA_ROOT}")
+  endif()
+  set(${output_var} ${nvptx_options} PARENT_SCOPE)
+endfunction()
+
 # Builds the object target for the GPU.
 # This compiles the target for all supported architectures and embeds it into
 # host binary for installing. The internal target contains the GPU code directly
@@ -103,7 +147,8 @@ function(_build_gpu_objects fq_target_name internal_target_name)
         list(APPEND compile_options "-mcpu=${gpu_arch}")
       elseif("${gpu_arch}" IN_LIST all_nvptx_architectures)
         set(gpu_target_triple "nvptx64-nvidia-cuda")
-        list(APPEND compile_options "-march=${gpu_arch}")
+        get_nvptx_compile_options(nvptx_options ${gpu_arch})
+        list(APPEND compile_options "${nvptx_options}")
       else()
         message(FATAL_ERROR "Unknown GPU architecture '${gpu_arch}'")
       endif()
@@ -200,9 +245,8 @@ function(_build_gpu_objects fq_target_name internal_target_name)
     if(LIBC_GPU_TARGET_ARCHITECTURE_IS_AMDGPU)
       target_compile_options(${internal_target_name} PRIVATE -mcpu=${LIBC_GPU_TARGET_ARCHITECTURE} -flto)
     elseif(LIBC_GPU_TARGET_ARCHITECTURE_IS_NVPTX)
-      target_compile_options(${internal_target_name} PRIVATE
-                             -march=${LIBC_GPU_TARGET_ARCHITECTURE}
-                             --cuda-path=${LIBC_CUDA_ROOT})
+      get_nvptx_compile_options(nvptx_options ${LIBC_GPU_TARGET_ARCHITECTURE})
+      target_compile_options(${internal_target_name} PRIVATE ${nvptx_options})
     endif()
     target_include_directories(${internal_target_name} PRIVATE ${include_dirs})
     if(full_deps_list)

diff  --git a/libc/cmake/modules/LLVMLibCTestRules.cmake b/libc/cmake/modules/LLVMLibCTestRules.cmake
index 592b632bbc116..75bb004a76c93 100644
--- a/libc/cmake/modules/LLVMLibCTestRules.cmake
+++ b/libc/cmake/modules/LLVMLibCTestRules.cmake
@@ -506,9 +506,9 @@ function(add_integration_test test_name)
                            -mcpu=${LIBC_GPU_TARGET_ARCHITECTURE} -flto
                            --target=${LIBC_GPU_TARGET_TRIPLE})
   elseif(LIBC_GPU_TARGET_ARCHITECTURE_IS_NVPTX)
+    get_nvptx_compile_options(nvptx_options ${LIBC_GPU_TARGET_ARCHITECTURE})
     target_compile_options(${fq_build_target_name} PRIVATE
-                           --cuda-path=${LIBC_CUDA_ROOT}
-                           -march=${LIBC_GPU_TARGET_ARCHITECTURE}
+                           ${nvptx_options}
                            --target=${LIBC_GPU_TARGET_TRIPLE})
   endif()
 

diff  --git a/libc/startup/gpu/nvptx/CMakeLists.txt b/libc/startup/gpu/nvptx/CMakeLists.txt
index f8839e9674159..49661691ecb57 100644
--- a/libc/startup/gpu/nvptx/CMakeLists.txt
+++ b/libc/startup/gpu/nvptx/CMakeLists.txt
@@ -1,3 +1,4 @@
+get_nvptx_compile_options(nvptx_options ${LIBC_GPU_TARGET_ARCHITECTURE})
 add_startup_object(
   crt1
   SRC
@@ -8,9 +9,8 @@ add_startup_object(
     -ffreestanding # To avoid compiler warnings about calling the main function.
     -fno-builtin
     -nogpulib # Do not include any GPU vendor libraries.
-    -march=${LIBC_GPU_TARGET_ARCHITECTURE}
     --target=${LIBC_GPU_TARGET_TRIPLE}
-    --cuda-path=${LIBC_CUDA_ROOT}
+    ${nvptx_options}
   NO_GPU_BUNDLE # Compile this file directly without special GPU handling.
 )
 get_fq_target_name(crt1 fq_name)