[llvm] [flang][rt] Add noinline attributes for CUDA compile path for successful compilation (PR #161760)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 2 18:36:05 PDT 2025
https://github.com/modiking created https://github.com/llvm/llvm-project/pull/161760
NVCC does more aggressive inlining than Clang/GCC causing the exported functions in extrema.cpp and findloc.cpp to become extremely large from function specializations leading to compilation timeouts. Marking the 2 functions in this change as noinline for NVCC alleviates this problem as it removes the worst of the cross-matrix argument specializations.
Also remove the workaround in https://github.com/llvm/llvm-project/pull/156542 that opted out findloc.cpp from the CUDA flang-rt build
Testing:
ninja flang-rt builds in ~30 minutes, these 2 files build in ~3 minutes
>From c95324337b4bb459760f46eaa4f4167b457cde0f Mon Sep 17 00:00:00 2001
From: Modi Mo <mmo at nvidia.com>
Date: Wed, 1 Oct 2025 20:04:16 -0700
Subject: [PATCH 1/2] enable full flang cuda build
---
flang-rt/lib/runtime/CMakeLists.txt | 3 ---
flang-rt/lib/runtime/extrema.cpp | 10 +++++++---
flang-rt/lib/runtime/findloc.cpp | 11 +++++++----
3 files changed, 14 insertions(+), 10 deletions(-)
diff --git a/flang-rt/lib/runtime/CMakeLists.txt b/flang-rt/lib/runtime/CMakeLists.txt
index 6548ec955b2b8..e8f70bd544e0b 100644
--- a/flang-rt/lib/runtime/CMakeLists.txt
+++ b/flang-rt/lib/runtime/CMakeLists.txt
@@ -178,9 +178,6 @@ endif ()
if ("${LLVM_RUNTIMES_TARGET}" MATCHES "^amdgcn|^nvptx")
set(sources ${gpu_sources})
elseif(FLANG_RT_EXPERIMENTAL_OFFLOAD_SUPPORT STREQUAL "CUDA")
- # findloc.cpp has some issues with higher compute capability. Remove it
- # from CUDA build until we can lower its memory footprint.
- list(REMOVE_ITEM supported_sources findloc.cpp)
set(sources ${supported_sources})
else ()
set(sources ${supported_sources} ${host_sources} ${f128_sources})
diff --git a/flang-rt/lib/runtime/extrema.cpp b/flang-rt/lib/runtime/extrema.cpp
index 9846529665e8b..29f0e93e9631f 100644
--- a/flang-rt/lib/runtime/extrema.cpp
+++ b/flang-rt/lib/runtime/extrema.cpp
@@ -397,9 +397,13 @@ template <TypeCategory CAT, bool IS_MAX,
template <typename, bool, bool> class COMPARE>
struct DoPartialMaxOrMinLocHelper {
template <int KIND> struct Functor {
- RT_API_ATTRS void operator()(const char *intrinsic, Descriptor &result,
- const Descriptor &x, int kind, int dim, const Descriptor *mask,
- bool back, Terminator &terminator) const {
+#if defined(__CUDACC__)
+ __attribute__((noinline))
+#endif
+ RT_API_ATTRS void
+ operator()(const char *intrinsic, Descriptor &result, const Descriptor &x,
+ int kind, int dim, const Descriptor *mask, bool back,
+ Terminator &terminator) const {
DoPartialMaxOrMinLoc<CAT, KIND, IS_MAX, COMPARE>(
intrinsic, result, x, kind, dim, mask, back, terminator);
}
diff --git a/flang-rt/lib/runtime/findloc.cpp b/flang-rt/lib/runtime/findloc.cpp
index 5485f4b97bd2f..fe11386988476 100644
--- a/flang-rt/lib/runtime/findloc.cpp
+++ b/flang-rt/lib/runtime/findloc.cpp
@@ -153,10 +153,13 @@ template <TypeCategory CAT,
class HELPER>
struct NumericFindlocHelper {
template <int KIND> struct Functor {
- RT_API_ATTRS void operator()(TypeCategory targetCat, int targetKind,
- Descriptor &result, const Descriptor &x, const Descriptor &target,
- int kind, int dim, const Descriptor *mask, bool back,
- Terminator &terminator) const {
+#if defined(__CUDACC__)
+ __attribute__((noinline))
+#endif
+ RT_API_ATTRS void
+ operator()(TypeCategory targetCat, int targetKind, Descriptor &result,
+ const Descriptor &x, const Descriptor &target, int kind, int dim,
+ const Descriptor *mask, bool back, Terminator &terminator) const {
switch (targetCat) {
case TypeCategory::Integer:
case TypeCategory::Unsigned:
>From 65c8c54335b1f3ef061744e9cca90c96c7358ec0 Mon Sep 17 00:00:00 2001
From: Modi Mo <mmo at nvidia.com>
Date: Thu, 2 Oct 2025 18:28:38 -0700
Subject: [PATCH 2/2] add comments
---
flang-rt/lib/runtime/extrema.cpp | 3 +++
flang-rt/lib/runtime/findloc.cpp | 3 +++
2 files changed, 6 insertions(+)
diff --git a/flang-rt/lib/runtime/extrema.cpp b/flang-rt/lib/runtime/extrema.cpp
index 29f0e93e9631f..3c9af38a64ddc 100644
--- a/flang-rt/lib/runtime/extrema.cpp
+++ b/flang-rt/lib/runtime/extrema.cpp
@@ -397,6 +397,9 @@ template <TypeCategory CAT, bool IS_MAX,
template <typename, bool, bool> class COMPARE>
struct DoPartialMaxOrMinLocHelper {
template <int KIND> struct Functor {
+ // NVCC inlines more aggressively which causes too many specializations of
+ // this function to be inlined causing compiler timeouts. Set as
+ // noinline to allow compilation to complete.
#if defined(__CUDACC__)
__attribute__((noinline))
#endif
diff --git a/flang-rt/lib/runtime/findloc.cpp b/flang-rt/lib/runtime/findloc.cpp
index fe11386988476..e1060bf82c333 100644
--- a/flang-rt/lib/runtime/findloc.cpp
+++ b/flang-rt/lib/runtime/findloc.cpp
@@ -153,6 +153,9 @@ template <TypeCategory CAT,
class HELPER>
struct NumericFindlocHelper {
template <int KIND> struct Functor {
+ // NVCC inlines more aggressively which causes too many specializations of
+ // this function to be inlined causing compiler timeouts. Set as
+ // noinline to allow compilation to complete.
#if defined(__CUDACC__)
__attribute__((noinline))
#endif
More information about the llvm-commits
mailing list