[flang-commits] [flang] [flang][cuda] Allow to set the stack limit size (PR #124859)
Valentin Clement バレンタイン クレメン via flang-commits
flang-commits at lists.llvm.org
Tue Jan 28 16:09:50 PST 2025
https://github.com/clementval updated https://github.com/llvm/llvm-project/pull/124859
>From 3641398393de637b3d172bdae52ab127368745e2 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Tue, 28 Jan 2025 15:52:08 -0800
Subject: [PATCH 1/2] [flang][cuda] Allow to set the stack limit size
---
flang/CMakeLists.txt | 13 +++++-----
.../flang/Optimizer/Builder/Runtime/Main.h | 3 ++-
flang/include/flang/Runtime/CUDA/init.h | 20 +++++++++++++++
flang/lib/Lower/Bridge.cpp | 4 ++-
flang/lib/Optimizer/Builder/Runtime/Main.cpp | 15 ++++++++++-
flang/runtime/CUDA/CMakeLists.txt | 1 +
flang/runtime/CUDA/init.cpp | 25 +++++++++++++++++++
flang/runtime/environment.cpp | 11 ++++++++
flang/runtime/environment.h | 3 +++
9 files changed, 86 insertions(+), 9 deletions(-)
create mode 100644 flang/include/flang/Runtime/CUDA/init.h
create mode 100644 flang/runtime/CUDA/init.cpp
diff --git a/flang/CMakeLists.txt b/flang/CMakeLists.txt
index b619553ef83021..fb7ab4759ad37e 100644
--- a/flang/CMakeLists.txt
+++ b/flang/CMakeLists.txt
@@ -471,6 +471,13 @@ if (FLANG_INCLUDE_TESTS)
add_compile_definitions(FLANG_INCLUDE_TESTS=1)
endif()
+option(FLANG_CUF_RUNTIME
+ "Compile CUDA Fortran runtime sources" OFF)
+if (FLANG_CUF_RUNTIME)
+ find_package(CUDAToolkit REQUIRED)
+ add_compile_definitions(FLANG_CUDA_SUPPORT=1)
+endif()
+
add_subdirectory(include)
add_subdirectory(lib)
add_subdirectory(cmake/modules)
@@ -481,12 +488,6 @@ if (FLANG_BUILD_TOOLS)
add_subdirectory(tools)
endif()
-option(FLANG_CUF_RUNTIME
- "Compile CUDA Fortran runtime sources" OFF)
-if (FLANG_CUF_RUNTIME)
- find_package(CUDAToolkit REQUIRED)
-endif()
-
add_subdirectory(runtime)
if (LLVM_INCLUDE_EXAMPLES)
diff --git a/flang/include/flang/Optimizer/Builder/Runtime/Main.h b/flang/include/flang/Optimizer/Builder/Runtime/Main.h
index e4c5dc914c700b..a0586deade42aa 100644
--- a/flang/include/flang/Optimizer/Builder/Runtime/Main.h
+++ b/flang/include/flang/Optimizer/Builder/Runtime/Main.h
@@ -24,7 +24,8 @@ class GlobalOp;
namespace fir::runtime {
void genMain(fir::FirOpBuilder &builder, mlir::Location loc,
- const std::vector<Fortran::lower::EnvironmentDefault> &defs);
+ const std::vector<Fortran::lower::EnvironmentDefault> &defs,
+ bool initCuda = false);
}
#endif // FORTRAN_OPTIMIZER_BUILDER_RUNTIME_MAIN_H
diff --git a/flang/include/flang/Runtime/CUDA/init.h b/flang/include/flang/Runtime/CUDA/init.h
new file mode 100644
index 00000000000000..24bc6838227208
--- /dev/null
+++ b/flang/include/flang/Runtime/CUDA/init.h
@@ -0,0 +1,20 @@
+//===-- include/flang/Runtime/CUDA/init.h -----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FORTRAN_RUNTIME_CUDA_INIT_H_
+#define FORTRAN_RUNTIME_CUDA_INIT_H_
+
+#include "common.h"
+#include "flang/Runtime/entry-names.h"
+
+extern "C" {
+
+void RTDECL(CUFInit)();
+}
+
+#endif // FORTRAN_RUNTIME_CUDA_INIT_H_
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index d92dc0cf9abd62..ff80826216e4f5 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -459,7 +459,9 @@ class FirConverter : public Fortran::lower::AbstractConverter {
if (hasMainProgram)
createGlobalOutsideOfFunctionLowering([&]() {
fir::runtime::genMain(*builder, toLocation(),
- bridge.getEnvironmentDefaults());
+ bridge.getEnvironmentDefaults(),
+ getFoldingContext().languageFeatures().IsEnabled(
+ Fortran::common::LanguageFeature::CUDA));
});
finalizeOpenACCLowering();
diff --git a/flang/lib/Optimizer/Builder/Runtime/Main.cpp b/flang/lib/Optimizer/Builder/Runtime/Main.cpp
index ab3c4ca81314ce..5156fd54020777 100644
--- a/flang/lib/Optimizer/Builder/Runtime/Main.cpp
+++ b/flang/lib/Optimizer/Builder/Runtime/Main.cpp
@@ -16,13 +16,17 @@
#include "flang/Optimizer/Dialect/FIRType.h"
#include "flang/Runtime/main.h"
#include "flang/Runtime/stop.h"
+#ifdef FLANG_CUDA_SUPPORT
+#include "flang/Runtime/CUDA/init.h"
+#endif
using namespace Fortran::runtime;
/// Create a `int main(...)` that calls the Fortran entry point
void fir::runtime::genMain(
fir::FirOpBuilder &builder, mlir::Location loc,
- const std::vector<Fortran::lower::EnvironmentDefault> &defs) {
+ const std::vector<Fortran::lower::EnvironmentDefault> &defs,
+ bool initCuda) {
auto *context = builder.getContext();
auto argcTy = builder.getDefaultIntegerType();
auto ptrTy = mlir::LLVM::LLVMPointerType::get(context);
@@ -61,6 +65,15 @@ void fir::runtime::genMain(
args.push_back(env);
builder.create<fir::CallOp>(loc, startFn, args);
+
+#ifdef FLANG_CUDA_SUPPORT
+ if (initCuda) {
+ auto initFn = builder.createFunction(
+ loc, RTNAME_STRING(CUFInit), mlir::FunctionType::get(context, {}, {}));
+ builder.create<fir::CallOp>(loc, initFn);
+ }
+#endif
+
builder.create<fir::CallOp>(loc, qqMainFn);
builder.create<fir::CallOp>(loc, stopFn);
diff --git a/flang/runtime/CUDA/CMakeLists.txt b/flang/runtime/CUDA/CMakeLists.txt
index 23e01da72eded1..bfbae58086c1fd 100644
--- a/flang/runtime/CUDA/CMakeLists.txt
+++ b/flang/runtime/CUDA/CMakeLists.txt
@@ -17,6 +17,7 @@ add_flang_library(${CUFRT_LIBNAME}
allocator.cpp
allocatable.cpp
descriptor.cpp
+ init.cpp
kernel.cpp
memmove-function.cpp
memory.cpp
diff --git a/flang/runtime/CUDA/init.cpp b/flang/runtime/CUDA/init.cpp
new file mode 100644
index 00000000000000..2bffce842b9526
--- /dev/null
+++ b/flang/runtime/CUDA/init.cpp
@@ -0,0 +1,25 @@
+//===-- runtime/CUDA/init.cpp ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "flang/Runtime/CUDA/init.h"
+#include "../environment.h"
+#include "../terminator.h"
+#include "flang/Runtime/CUDA/common.h"
+
+#include "cuda_runtime.h"
+
+extern "C" {
+
+void RTDEF(CUFInit)() {
+ // Perform ctx initialization based on execution environment if necessary.
+ if (Fortran::runtime::executionEnvironment.cudaStackLimit) {
+ CUDA_REPORT_IF_ERROR(cudaDeviceSetLimit(cudaLimitStackSize,
+ Fortran::runtime::executionEnvironment.cudaStackLimit));
+ }
+}
+}
diff --git a/flang/runtime/environment.cpp b/flang/runtime/environment.cpp
index 52b1d99ba536ed..0f927587fb4f88 100644
--- a/flang/runtime/environment.cpp
+++ b/flang/runtime/environment.cpp
@@ -143,6 +143,17 @@ void ExecutionEnvironment::Configure(int ac, const char *av[],
}
}
+ if (auto *x{std::getenv("CUDA_STACKLIMIT")}) {
+ char *end;
+ auto n{std::strtol(x, &end, 10)};
+ if (n >= 0 && n < std::numeric_limits<int>::max() && *end == '\0') {
+ cudaStackLimit = n;
+ } else {
+ std::fprintf(stderr,
+ "Fortran runtime: CUDA_STACKLIMIT=%s is invalid; ignored\n", x);
+ }
+ }
+
// TODO: Set RP/ROUND='PROCESSOR_DEFINED' from environment
}
diff --git a/flang/runtime/environment.h b/flang/runtime/environment.h
index b8b9f10e4e57f5..184f0eb8653a65 100644
--- a/flang/runtime/environment.h
+++ b/flang/runtime/environment.h
@@ -56,6 +56,9 @@ struct ExecutionEnvironment {
bool noStopMessage{false}; // NO_STOP_MESSAGE=1 inhibits "Fortran STOP"
bool defaultUTF8{false}; // DEFAULT_UTF8
bool checkPointerDeallocation{true}; // FORT_CHECK_POINTER_DEALLOCATION
+
+ // CUDA Fortran related variables
+ std::size_t cudaStackLimit{0}; // CUDA_STACKLIMIT
};
RT_OFFLOAD_VAR_GROUP_BEGIN
>From b3bcc4ec56cdee0864a962078f797a8e14583204 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Tue, 28 Jan 2025 16:09:19 -0800
Subject: [PATCH 2/2] Update comparison
---
flang/runtime/environment.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/flang/runtime/environment.cpp b/flang/runtime/environment.cpp
index 0f927587fb4f88..ee2e1e94fa5f30 100644
--- a/flang/runtime/environment.cpp
+++ b/flang/runtime/environment.cpp
@@ -145,8 +145,8 @@ void ExecutionEnvironment::Configure(int ac, const char *av[],
if (auto *x{std::getenv("CUDA_STACKLIMIT")}) {
char *end;
- auto n{std::strtol(x, &end, 10)};
- if (n >= 0 && n < std::numeric_limits<int>::max() && *end == '\0') {
+ auto n{std::strtoul(x, &end, 10)};
+ if (n > 0 && n < std::numeric_limits<std::size_t>::max() && *end == '\0') {
cudaStackLimit = n;
} else {
std::fprintf(stderr,
More information about the flang-commits
mailing list