[flang] [llvm] [flang-rt] Add experimental support for GPU build (PR #131826)

Joseph Huber via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 24 05:30:04 PDT 2025


https://github.com/jhuber6 updated https://github.com/llvm/llvm-project/pull/131826

>From 66ea29443d806ac90aca4354a3ce2d4db7cbf5d3 Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Mon, 17 Mar 2025 16:37:44 -0500
Subject: [PATCH] [flang-rt] Add experimental support for GPU build

Summary:
This patch adds initial support for compiling `flang-rt` directly for
the GPU. The method used here matches what's already done for `libc` and
`libc++` for the GPU and builds off of those projects.

Mainly this requires setting up some flags and setting the sources that
currently work. This will deposit the resulting library in the
appropriate directory. These files are then intended to be linked via
`-Xoffload-linker` support in the offloading driver.
```
lib/clang/21/lib/nvptx64-nvidia-cuda/libflang_rt.runtime.a
lib/clang/21/lib/amdgcn-amd-amdhsa/libflang_rt.runtime.a
```

This is obviously missing a lot of functions, mainly the `io` support.
Most of what we cannot support is due to using POSIX things that just
don't make sense on the GPU. Stuff like `pthreads` or `sema`.

Getting unit tests to run on this will also be a challenge. We could run
tests the same way we do with `libc`, but the problem there is that the
`libc` test suite is freestanding while `gtest` currently doesn't
compile on the GPU bcause it uses a lot of weird stuff. If the unit
tests were simply `int main` then it would work.

I don't understand the actual runtime code very well, I'd appreciate
some guidance on how to actually support Fortran IO from this interface.
As I understand it, Fortran IO requires a stack-like operation, which
conflicts with the SIMT model GPUs use. Worst case scenario we could
burn some LDS to keep a stack, or serialize it somehow since we can
always just iterate over all the active lanes.

Building this right now looks like this, which depends on the arguments
added in https://github.com/llvm/llvm-project/pull/131695.
```
    -DRUNTIMES_nvptx64-nvidia-cuda_LLVM_ENABLE_RUNTIMES=compiler-rt;libc;libcxx;libcxxabi;flang-rt \
    -DRUNTIMES_amdgcn-amd-amdhsa_LLVM_ENABLE_RUNTIMES=compiler-rt;libc;libcxx;libcxxabi;flang-rt \
    -DRUNTIMES_nvptx64-nvidia-cuda_FLANG_RT_LIBC_PROVIDER=llvm \
    -DRUNTIMES_nvptx64-nvidia-cuda_FLANG_RT_LIBCXX_PROVIDER=llvm \
    -DRUNTIMES_amdgcn-amd-amdhsa_FLANG_RT_LIBC_PROVIDER=llvm \
    -DRUNTIMES_amdgcn-amd-amdhsa_FLANG_RT_LIBCXX_PROVIDER=llvm
```
---
 flang-rt/CMakeLists.txt                 |  9 ++++
 flang-rt/cmake/modules/AddFlangRT.cmake | 25 +++++++----
 flang-rt/cmake/modules/HandleLibs.cmake |  4 +-
 flang-rt/lib/runtime/CMakeLists.txt     | 55 ++++++++++++++++++++++++-
 flang/cmake/modules/FlangCommon.cmake   | 44 +++++++++++---------
 5 files changed, 108 insertions(+), 29 deletions(-)

diff --git a/flang-rt/CMakeLists.txt b/flang-rt/CMakeLists.txt
index a01b826686b3a..38e2613f1fee2 100644
--- a/flang-rt/CMakeLists.txt
+++ b/flang-rt/CMakeLists.txt
@@ -221,6 +221,15 @@ endif()
 # System Introspection #
 ########################
 
+# The GPU targets require a few mandatory arguments to make the standard CMake
+# check flags happy.
+if ("${LLVM_RUNTIMES_TARGET}" MATCHES "^amdgcn")
+  set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -nogpulib")
+elseif ("${LLVM_RUNTIMES_TARGET}" MATCHES "^nvptx")
+  set(CMAKE_REQUIRED_FLAGS
+      "${CMAKE_REQUIRED_FLAGS} -flto -c -Wno-unused-command-line-argument")
+endif()
+
 include(CheckCXXSymbolExists)
 include(CheckCXXSourceCompiles)
 check_cxx_symbol_exists(strerror_r string.h HAVE_STRERROR_R)
diff --git a/flang-rt/cmake/modules/AddFlangRT.cmake b/flang-rt/cmake/modules/AddFlangRT.cmake
index 39d0475b15326..3bd4873d9edee 100644
--- a/flang-rt/cmake/modules/AddFlangRT.cmake
+++ b/flang-rt/cmake/modules/AddFlangRT.cmake
@@ -211,6 +211,13 @@ function (add_flangrt_library name)
     # Minimum required C++ version for Flang-RT, even if CMAKE_CXX_STANDARD is defined to something else.
     target_compile_features(${tgtname} PRIVATE cxx_std_17)
 
+    # When building the flang runtime if LTO is enabled the archive file
+    # contains LLVM IR rather than object code. Currently flang is not
+    # LTO aware so cannot link this file to compiled Fortran code.
+    if (FLANG_RT_HAS_FNO_LTO_FLAG)
+      target_compile_options(${tgtname} PRIVATE -fno-lto)
+    endif ()
+
     # Use compiler-specific options to disable exceptions and RTTI.
     if (LLVM_COMPILER_IS_GCC_COMPATIBLE)
       target_compile_options(${tgtname} PRIVATE
@@ -226,6 +233,17 @@ function (add_flangrt_library name)
         )
     endif ()
 
+    # Add target specific options if necessary.
+    if ("${LLVM_RUNTIMES_TARGET}" MATCHES "^amdgcn")
+      target_compile_options(${tgtname} PRIVATE
+          $<$<COMPILE_LANGUAGE:CXX>:-nogpulib -flto -fvisibility=hidden>
+        )
+    elseif ("${LLVM_RUNTIMES_TARGET}" MATCHES "^nvptx")
+      target_compile_options(${tgtname} PRIVATE
+          $<$<COMPILE_LANGUAGE:CXX>:-nogpulib -flto -fvisibility=hidden -Wno-unknown-cuda-version --cuda-feature=+ptx63>
+        )
+    endif ()
+
     # Also for CUDA source when compiling with FLANG_RT_EXPERIMENTAL_OFFLOAD_SUPPORT=CUDA
     if (CMAKE_CUDA_COMPILER_ID MATCHES "NVIDIA")
       # Assuming gcc as host compiler.
@@ -256,13 +274,6 @@ function (add_flangrt_library name)
       target_compile_options(${tgtname} PUBLIC -U_LIBCPP_ENABLE_ASSERTIONS)
     endif ()
 
-    # When building the flang runtime if LTO is enabled the archive file
-    # contains LLVM IR rather than object code. Currently flang is not
-    # LTO aware so cannot link this file to compiled Fortran code.
-    if (FLANG_RT_HAS_FNO_LTO_FLAG)
-      target_compile_options(${tgtname} PRIVATE -fno-lto)
-    endif ()
-
     # Flang/Clang (including clang-cl) -compiled programs targeting the MSVC ABI
     # should only depend on msvcrt/ucrt. LLVM still emits libgcc/compiler-rt
     # functions in some cases like 128-bit integer math (__udivti3, __modti3,
diff --git a/flang-rt/cmake/modules/HandleLibs.cmake b/flang-rt/cmake/modules/HandleLibs.cmake
index 9987d6f668978..a193045fc0bfa 100644
--- a/flang-rt/cmake/modules/HandleLibs.cmake
+++ b/flang-rt/cmake/modules/HandleLibs.cmake
@@ -45,6 +45,8 @@ elseif (FLANG_RT_LIBCXX_PROVIDER STREQUAL "llvm")
   endif ()
 
   if (FLANG_RT_HAS_STDLIB_FLAG)
-    target_compile_options(flang-rt-libc-headers INTERFACE $<$<COMPILE_LANGUAGE:CXX,C>:-stdlib=libc++>)
+    target_compile_options(flang-rt-libc-headers INTERFACE
+      $<$<COMPILE_LANGUAGE:CXX,C>:$<COMPILE_ONLY:-stdlib=libc++>>
+    )
   endif ()
 endif ()
diff --git a/flang-rt/lib/runtime/CMakeLists.txt b/flang-rt/lib/runtime/CMakeLists.txt
index c07348ea78c9f..572b4d54552c1 100644
--- a/flang-rt/lib/runtime/CMakeLists.txt
+++ b/flang-rt/lib/runtime/CMakeLists.txt
@@ -12,7 +12,6 @@ find_package(Backtrace)
 set(HAVE_BACKTRACE ${Backtrace_FOUND})
 set(BACKTRACE_HEADER ${Backtrace_HEADER})
 
-
 # List of files that are buildable for all devices.
 set(supported_sources
   ${FLANG_SOURCE_DIR}/lib/Decimal/binary-to-decimal.cpp
@@ -88,6 +87,54 @@ set(host_sources
   unit-map.cpp
 )
 
+# Sources that can be compiled directly for the GPU.
+set(gpu_sources
+  ${FLANG_SOURCE_DIR}/lib/Decimal/binary-to-decimal.cpp
+  ${FLANG_SOURCE_DIR}/lib/Decimal/decimal-to-binary.cpp
+  ISO_Fortran_binding.cpp
+  allocator-registry.cpp
+  allocatable.cpp
+  array-constructor.cpp
+  assign.cpp
+  buffer.cpp
+  character.cpp
+  connection.cpp
+  copy.cpp
+  derived-api.cpp
+  derived.cpp
+  dot-product.cpp
+  edit-output.cpp
+  extrema.cpp
+  findloc.cpp
+  format.cpp
+  inquiry.cpp
+  internal-unit.cpp
+  io-error.cpp
+  iostat.cpp
+  matmul-transpose.cpp
+  matmul.cpp
+  memory.cpp
+  misc-intrinsic.cpp
+  non-tbp-dio.cpp
+  numeric.cpp
+  pointer.cpp
+  product.cpp
+  ragged.cpp
+  stat.cpp
+  sum.cpp
+  support.cpp
+  terminator.cpp
+  tools.cpp
+  transformational.cpp
+  type-code.cpp
+  type-info.cpp
+  utf.cpp
+  complex-powi.cpp
+  reduce.cpp
+  reduction.cpp
+  temporary-stack.cpp
+)
+
 file(GLOB_RECURSE public_headers
   "${FLANG_RT_SOURCE_DIR}/include/flang_rt/*.h"
   "${FLANG_SOURCE_DIR}/include/flang/Common/*.h"
@@ -124,7 +171,11 @@ else ()
   set(f128_sources "")
 endif ()
 
-set(sources ${supported_sources} ${host_sources} ${f128_sources})
+if ("${LLVM_RUNTIMES_TARGET}" MATCHES "^amdgcn|^nvptx")
+  set(sources ${gpu_sources})
+else ()
+  set(sources ${supported_sources} ${host_sources} ${f128_sources})
+endif ()
 
 
 if (NOT WIN32)
diff --git a/flang/cmake/modules/FlangCommon.cmake b/flang/cmake/modules/FlangCommon.cmake
index bb2a76cb19453..4726c640c97b7 100644
--- a/flang/cmake/modules/FlangCommon.cmake
+++ b/flang/cmake/modules/FlangCommon.cmake
@@ -24,24 +24,30 @@ if (FLANG_RUNTIME_F128_MATH_LIB)
   add_compile_definitions(FLANG_RUNTIME_F128_MATH_LIB="${FLANG_RUNTIME_F128_MATH_LIB}")
 endif()
 
-# Check if 128-bit float computations can be done via long double
-# Note that '-nostdinc++' might be implied when this code kicks in
-# (see 'runtimes/CMakeLists.txt'), so we cannot use 'cfloat' C++ header
-# file in the test below.
-# Compile it as C.
-check_c_source_compiles(
-  "#include <float.h>
-   #if LDBL_MANT_DIG != 113
-   #error LDBL_MANT_DIG != 113
-   #endif
-   int main() { return 0; }
-  "
-  HAVE_LDBL_MANT_DIG_113)
-
-include(TestBigEndian)
-test_big_endian(IS_BIGENDIAN)
-if (IS_BIGENDIAN)
-  add_compile_definitions(FLANG_BIG_ENDIAN=1)
-else ()
+# The NVPTX target can't emit a binary due to the PTXAS dependency, just
+# hard-code this.
+if ("${LLVM_RUNTIMES_TARGET}" MATCHES "^nvptx")
   add_compile_definitions(FLANG_LITTLE_ENDIAN=1)
+else ()
+  # Check if 128-bit float computations can be done via long double
+  # Note that '-nostdinc++' might be implied when this code kicks in
+  # (see 'runtimes/CMakeLists.txt'), so we cannot use 'cfloat' C++ header
+  # file in the test below.
+  # Compile it as C.
+  check_c_source_compiles(
+    "#include <float.h>
+     #if LDBL_MANT_DIG != 113
+     #error LDBL_MANT_DIG != 113
+     #endif
+     int main() { return 0; }
+    "
+    HAVE_LDBL_MANT_DIG_113)
+
+  include(TestBigEndian)
+  test_big_endian(IS_BIGENDIAN)
+  if (IS_BIGENDIAN)
+    add_compile_definitions(FLANG_BIG_ENDIAN=1)
+  else ()
+    add_compile_definitions(FLANG_LITTLE_ENDIAN=1)
+  endif ()
 endif ()



More information about the llvm-commits mailing list