[flang-commits] [flang] c49460b - [flang-rt] Enable more runtime functions for the GPU target (#183649)

via flang-commits flang-commits at lists.llvm.org
Fri Feb 27 10:27:43 PST 2026


Author: Joseph Huber
Date: 2026-02-27T12:27:39-06:00
New Revision: c49460bae76c873725ae77d5f9b4d6239b7c40a7

URL: https://github.com/llvm/llvm-project/commit/c49460bae76c873725ae77d5f9b4d6239b7c40a7
DIFF: https://github.com/llvm/llvm-project/commit/c49460bae76c873725ae77d5f9b4d6239b7c40a7.diff

LOG: [flang-rt] Enable more runtime functions for the GPU target (#183649)

Summary:
This enables primarily `stop.cpp` and `descriptor.cpp`. Requires a
little bit of wrangling to get it to compile. Unlike the CUDA build,
this build uses an in-tree libc++ configured for the GPU. This is
configured without thread support, environment, or filesystem, and it is
not POSIX at all. So, no mutexes, pthreads, or get/setenv.

I tested stop, but i don't know if it's actually legal to exit from
OpenMP offloading.

Added: 
    offload/test/offloading/fortran/target-descriptor-ops.f90

Modified: 
    flang-rt/include/flang-rt/runtime/lock.h
    flang-rt/include/flang-rt/runtime/tools.h
    flang-rt/lib/runtime/CMakeLists.txt
    flang-rt/lib/runtime/descriptor.cpp
    flang-rt/lib/runtime/environment.cpp
    flang-rt/lib/runtime/stop.cpp
    flang-rt/lib/runtime/terminator.cpp
    flang/include/flang/Common/api-attrs.h

Removed: 
    


################################################################################
diff  --git a/flang-rt/include/flang-rt/runtime/lock.h b/flang-rt/include/flang-rt/runtime/lock.h
index 7c88534245733..7147c3a6b940b 100644
--- a/flang-rt/include/flang-rt/runtime/lock.h
+++ b/flang-rt/include/flang-rt/runtime/lock.h
@@ -16,7 +16,7 @@
 
 // Avoid <mutex> if possible to avoid introduction of C++ runtime
 // library dependence.
-#ifndef _WIN32
+#if !defined(_WIN32) && !RT_GPU_TARGET
 #define USE_PTHREADS 1
 #else
 #undef USE_PTHREADS

diff  --git a/flang-rt/include/flang-rt/runtime/tools.h b/flang-rt/include/flang-rt/runtime/tools.h
index 1939c4d907be4..a45c2ac98f2fa 100644
--- a/flang-rt/include/flang-rt/runtime/tools.h
+++ b/flang-rt/include/flang-rt/runtime/tools.h
@@ -35,7 +35,7 @@
 #define RT_PRETTY_FUNCTION __func__
 #endif
 
-#if defined(RT_DEVICE_COMPILATION)
+#if defined(RT_DEVICE_COMPILATION) || RT_GPU_TARGET
 // Use the pseudo lock and pseudo file unit implementations
 // for the device.
 #define RT_USE_PSEUDO_LOCK 1

diff  --git a/flang-rt/lib/runtime/CMakeLists.txt b/flang-rt/lib/runtime/CMakeLists.txt
index 9fa8376e9b99c..d18ce6caccaa3 100644
--- a/flang-rt/lib/runtime/CMakeLists.txt
+++ b/flang-rt/lib/runtime/CMakeLists.txt
@@ -109,9 +109,11 @@ set(gpu_sources
   copy.cpp
   derived-api.cpp
   derived.cpp
+  descriptor.cpp
   dot-product.cpp
   edit-output.cpp
   extrema.cpp
+  environment.cpp
   findloc.cpp
   format.cpp
   inquiry.cpp
@@ -127,6 +129,7 @@ set(gpu_sources
   product.cpp
   ragged.cpp
   stat.cpp
+  stop.cpp
   sum.cpp
   support.cpp
   terminator.cpp

diff  --git a/flang-rt/lib/runtime/descriptor.cpp b/flang-rt/lib/runtime/descriptor.cpp
index 04bbb3877a0d0..6c9e76afb117e 100644
--- a/flang-rt/lib/runtime/descriptor.cpp
+++ b/flang-rt/lib/runtime/descriptor.cpp
@@ -8,10 +8,10 @@
 
 #include "flang-rt/runtime/descriptor.h"
 #include "ISO_Fortran_util.h"
-#include "memory.h"
 #include "flang-rt/runtime/allocator-registry.h"
 #include "flang-rt/runtime/derived.h"
 #include "flang-rt/runtime/environment.h"
+#include "flang-rt/runtime/memory.h"
 #include "flang-rt/runtime/stat.h"
 #include "flang-rt/runtime/terminator.h"
 #include "flang-rt/runtime/type-info.h"

diff  --git a/flang-rt/lib/runtime/environment.cpp b/flang-rt/lib/runtime/environment.cpp
index ae4d6d305f409..53e13cd929bf8 100644
--- a/flang-rt/lib/runtime/environment.cpp
+++ b/flang-rt/lib/runtime/environment.cpp
@@ -8,7 +8,7 @@
 
 #include "flang-rt/runtime/environment.h"
 #include "environment-default-list.h"
-#include "memory.h"
+#include "flang-rt/runtime/memory.h"
 #include "flang-rt/runtime/tools.h"
 #include <cstdio>
 #include <cstdlib>
@@ -19,10 +19,11 @@
 #ifdef _MSC_VER
 extern char **_environ;
 #endif
-#elif defined(__FreeBSD__)
+#elif defined(__FreeBSD__) || RT_GPU_TARGET
 // FreeBSD has environ in crt rather than libc. Using "extern char** environ"
 // in the code of a shared library makes it fail to link with -Wl,--no-undefined
 // See https://reviews.freebsd.org/D30842#840642
+// GPU targets do not provide environ.
 #else
 extern char **environ;
 #endif
@@ -51,6 +52,8 @@ static void (*PostConfigEnvCallback[ExecutionEnvironment::nConfigEnvCallback])(
     int, const char *[], const char *[], const EnvironmentDefaultList *){
     nullptr};
 
+// No environment support on the GPU.
+#if !RT_GPU_TARGET
 static void SetEnvironmentDefaults(const EnvironmentDefaultList *envDefaults) {
   if (!envDefaults) {
     return;
@@ -314,6 +317,7 @@ std::int32_t ExecutionEnvironment::UnsetEnv(
 
   return status;
 }
+#endif
 
 extern "C" {
 

diff  --git a/flang-rt/lib/runtime/stop.cpp b/flang-rt/lib/runtime/stop.cpp
index 75fa64c4c0039..5abb80af7e66d 100644
--- a/flang-rt/lib/runtime/stop.cpp
+++ b/flang-rt/lib/runtime/stop.cpp
@@ -24,10 +24,14 @@
 extern "C" {
 
 [[maybe_unused]] static void DescribeIEEESignaledExceptions() {
+#if defined(RT_DEVICE_COMPILATION) || RT_GPU_TARGET
+  unsigned excepts{}; // No fenv support on the device.
+#else
 #ifdef fetestexcept // a macro in some environments; omit std::
   auto excepts{fetestexcept(FE_ALL_EXCEPT)};
 #else
   auto excepts{std::fetestexcept(FE_ALL_EXCEPT)};
+#endif
 #endif
   if (excepts) {
     std::fputs("IEEE arithmetic exceptions signaled:", stderr);
@@ -61,8 +65,10 @@ extern "C" {
 }
 
 static void CloseAllExternalUnits(const char *why) {
+#if !RT_GPU_TARGET
   Fortran::runtime::io::IoErrorHandler handler{why};
   Fortran::runtime::io::ExternalFileUnit::CloseAll(handler);
+#endif
 }
 
 [[noreturn]] RT_API_ATTRS void RTNAME(StopStatement)(
@@ -134,6 +140,7 @@ static void CloseAllExternalUnits(const char *why) {
 #endif
 }
 
+#if !RT_GPU_TARGET
 static bool StartPause() {
   if (Fortran::runtime::io::IsATerminal(0)) {
     Fortran::runtime::io::IoErrorHandler handler{"PAUSE statement"};
@@ -173,6 +180,7 @@ void RTNAME(PauseStatementText)(const char *code, std::size_t length) {
     EndPause();
   }
 }
+#endif
 
 [[noreturn]] void RTNAME(FailImageStatement)() {
   CloseAllExternalUnits("FAIL IMAGE statement");

diff  --git a/flang-rt/lib/runtime/terminator.cpp b/flang-rt/lib/runtime/terminator.cpp
index e8d64223919e4..2c06c8de74d0f 100644
--- a/flang-rt/lib/runtime/terminator.cpp
+++ b/flang-rt/lib/runtime/terminator.cpp
@@ -70,8 +70,11 @@ RT_API_ATTRS void Terminator::CrashHeader() const {
   std::printf("\n");
 #else
   fputc('\n', stderr);
+  // TODO: This should flush the buffers through the RPC interface.
+#if !RT_GPU_TARGET
   // FIXME: re-enable the flush along with the IO enabling.
   io::FlushOutputOnCrash(*this);
+#endif
 #endif
   NotifyOtherImagesOfErrorTermination(EXIT_FAILURE);
 #if defined(RT_DEVICE_COMPILATION)

diff  --git a/flang/include/flang/Common/api-attrs.h b/flang/include/flang/Common/api-attrs.h
index fd524ee34ccff..efb495e7f132e 100644
--- a/flang/include/flang/Common/api-attrs.h
+++ b/flang/include/flang/Common/api-attrs.h
@@ -133,6 +133,18 @@
 #undef RT_DEVICE_COMPILATION
 #endif
 
+/*
+ * RT_GPU_TARGET is defined when compiling natively for a GPU
+ * target (AMDGPU or NVPTX) using a GPU-hosted libc/libc++. This is
+ * distinct from RT_DEVICE_COMPILATION which covers CUDA and OpenMP
+ * offload paths that use separate host/device compilation.
+ */
+#if defined(__AMDGPU__) || defined(__NVPTX__)
+#define RT_GPU_TARGET 1
+#else
+#undef RT_GPU_TARGET
+#endif
+
 /*
  * Recurrence in the call graph prevents computing minimal stack size
  * required for a kernel execution. This macro can be used to disable

diff  --git a/offload/test/offloading/fortran/target-descriptor-ops.f90 b/offload/test/offloading/fortran/target-descriptor-ops.f90
new file mode 100644
index 0000000000000..43dae03b33995
--- /dev/null
+++ b/offload/test/offloading/fortran/target-descriptor-ops.f90
@@ -0,0 +1,50 @@
+! REQUIRES: flang, amdgpu
+
+! RUN: %libomptarget-compile-fortran-run-and-check-generic
+program main
+  implicit none
+  integer :: result
+
+  ! CHECK: 100
+  result = 0
+  !$omp target map(from: result)
+  block
+    integer, allocatable :: arr(:)
+    integer :: i
+    allocate(arr(4))
+    do i = 1, 4
+      arr(i) = i * 10
+    end do
+    result = arr(1) + arr(2) + arr(3) + arr(4)
+    deallocate(arr)
+  end block
+  !$omp end target
+  print *, result
+
+  ! CHECK: 21
+  result = 0
+  !$omp target map(from: result)
+  block
+    integer, allocatable :: mat(:,:)
+    allocate(mat(2, 3))
+    mat(1,1) = 1; mat(2,1) = 2
+    mat(1,2) = 3; mat(2,2) = 4
+    mat(1,3) = 5; mat(2,3) = 6
+    result = mat(1,1) + mat(2,1) + mat(1,2) + mat(2,2) + mat(1,3) + mat(2,3)
+    deallocate(mat)
+  end block
+  !$omp end target
+  print *, result
+
+  ! CHECK: 17
+  result = 0
+  !$omp target map(from: result)
+  block
+    integer, allocatable :: arr(:)
+    allocate(arr(8))
+    result = size(arr) + lbound(arr, 1) + ubound(arr, 1)
+    deallocate(arr)
+  end block
+  !$omp end target
+  print *, result
+end program main


        


More information about the flang-commits mailing list