[flang-commits] [flang] 654b763 - [flang][cuda] Allow to set the stack limit size (#124859)
via flang-commits
flang-commits at lists.llvm.org
Tue Jan 28 20:57:37 PST 2025
Author: Valentin Clement (バレンタイン クレメン)
Date: 2025-01-28T20:57:33-08:00
New Revision: 654b76321a602db4d68734e9fd11efbb7d8eb617
URL: https://github.com/llvm/llvm-project/commit/654b76321a602db4d68734e9fd11efbb7d8eb617
DIFF: https://github.com/llvm/llvm-project/commit/654b76321a602db4d68734e9fd11efbb7d8eb617.diff
LOG: [flang][cuda] Allow to set the stack limit size (#124859)
This patch adds a call to the CUFInit function just after `ProgramStart`
when CUDA Fortran is enabled to initialize the CUDA context. This allows
us to set up some context information like the stack limit that can be
defined by an environment variable `ACC_OFFLOAD_STACKSIZE=<value>`.
Added:
flang/include/flang/Runtime/CUDA/init.h
flang/runtime/CUDA/init.cpp
Modified:
flang/CMakeLists.txt
flang/include/flang/Optimizer/Builder/Runtime/Main.h
flang/lib/Lower/Bridge.cpp
flang/lib/Optimizer/Builder/Runtime/Main.cpp
flang/runtime/CUDA/CMakeLists.txt
flang/runtime/environment.cpp
flang/runtime/environment.h
Removed:
################################################################################
diff --git a/flang/CMakeLists.txt b/flang/CMakeLists.txt
index b619553ef83021..fb7ab4759ad37e 100644
--- a/flang/CMakeLists.txt
+++ b/flang/CMakeLists.txt
@@ -471,6 +471,13 @@ if (FLANG_INCLUDE_TESTS)
add_compile_definitions(FLANG_INCLUDE_TESTS=1)
endif()
+option(FLANG_CUF_RUNTIME
+ "Compile CUDA Fortran runtime sources" OFF)
+if (FLANG_CUF_RUNTIME)
+ find_package(CUDAToolkit REQUIRED)
+ add_compile_definitions(FLANG_CUDA_SUPPORT=1)
+endif()
+
add_subdirectory(include)
add_subdirectory(lib)
add_subdirectory(cmake/modules)
@@ -481,12 +488,6 @@ if (FLANG_BUILD_TOOLS)
add_subdirectory(tools)
endif()
-option(FLANG_CUF_RUNTIME
- "Compile CUDA Fortran runtime sources" OFF)
-if (FLANG_CUF_RUNTIME)
- find_package(CUDAToolkit REQUIRED)
-endif()
-
add_subdirectory(runtime)
if (LLVM_INCLUDE_EXAMPLES)
diff --git a/flang/include/flang/Optimizer/Builder/Runtime/Main.h b/flang/include/flang/Optimizer/Builder/Runtime/Main.h
index e4c5dc914c700b..a0586deade42aa 100644
--- a/flang/include/flang/Optimizer/Builder/Runtime/Main.h
+++ b/flang/include/flang/Optimizer/Builder/Runtime/Main.h
@@ -24,7 +24,8 @@ class GlobalOp;
namespace fir::runtime {
void genMain(fir::FirOpBuilder &builder, mlir::Location loc,
- const std::vector<Fortran::lower::EnvironmentDefault> &defs);
+ const std::vector<Fortran::lower::EnvironmentDefault> &defs,
+ bool initCuda = false);
}
#endif // FORTRAN_OPTIMIZER_BUILDER_RUNTIME_MAIN_H
diff --git a/flang/include/flang/Runtime/CUDA/init.h b/flang/include/flang/Runtime/CUDA/init.h
new file mode 100644
index 00000000000000..24bc6838227208
--- /dev/null
+++ b/flang/include/flang/Runtime/CUDA/init.h
@@ -0,0 +1,20 @@
+//===-- include/flang/Runtime/CUDA/init.h -----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FORTRAN_RUNTIME_CUDA_INIT_H_
+#define FORTRAN_RUNTIME_CUDA_INIT_H_
+
+#include "common.h"
+#include "flang/Runtime/entry-names.h"
+
+extern "C" {
+
+void RTDECL(CUFInit)();
+}
+
+#endif // FORTRAN_RUNTIME_CUDA_INIT_H_
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index d92dc0cf9abd62..ff80826216e4f5 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -459,7 +459,9 @@ class FirConverter : public Fortran::lower::AbstractConverter {
if (hasMainProgram)
createGlobalOutsideOfFunctionLowering([&]() {
fir::runtime::genMain(*builder, toLocation(),
- bridge.getEnvironmentDefaults());
+ bridge.getEnvironmentDefaults(),
+ getFoldingContext().languageFeatures().IsEnabled(
+ Fortran::common::LanguageFeature::CUDA));
});
finalizeOpenACCLowering();
diff --git a/flang/lib/Optimizer/Builder/Runtime/Main.cpp b/flang/lib/Optimizer/Builder/Runtime/Main.cpp
index ab3c4ca81314ce..5156fd54020777 100644
--- a/flang/lib/Optimizer/Builder/Runtime/Main.cpp
+++ b/flang/lib/Optimizer/Builder/Runtime/Main.cpp
@@ -16,13 +16,17 @@
#include "flang/Optimizer/Dialect/FIRType.h"
#include "flang/Runtime/main.h"
#include "flang/Runtime/stop.h"
+#ifdef FLANG_CUDA_SUPPORT
+#include "flang/Runtime/CUDA/init.h"
+#endif
using namespace Fortran::runtime;
/// Create a `int main(...)` that calls the Fortran entry point
void fir::runtime::genMain(
fir::FirOpBuilder &builder, mlir::Location loc,
- const std::vector<Fortran::lower::EnvironmentDefault> &defs) {
+ const std::vector<Fortran::lower::EnvironmentDefault> &defs,
+ bool initCuda) {
auto *context = builder.getContext();
auto argcTy = builder.getDefaultIntegerType();
auto ptrTy = mlir::LLVM::LLVMPointerType::get(context);
@@ -61,6 +65,15 @@ void fir::runtime::genMain(
args.push_back(env);
builder.create<fir::CallOp>(loc, startFn, args);
+
+#ifdef FLANG_CUDA_SUPPORT
+ if (initCuda) {
+ auto initFn = builder.createFunction(
+ loc, RTNAME_STRING(CUFInit), mlir::FunctionType::get(context, {}, {}));
+ builder.create<fir::CallOp>(loc, initFn);
+ }
+#endif
+
builder.create<fir::CallOp>(loc, qqMainFn);
builder.create<fir::CallOp>(loc, stopFn);
diff --git a/flang/runtime/CUDA/CMakeLists.txt b/flang/runtime/CUDA/CMakeLists.txt
index 23e01da72eded1..bfbae58086c1fd 100644
--- a/flang/runtime/CUDA/CMakeLists.txt
+++ b/flang/runtime/CUDA/CMakeLists.txt
@@ -17,6 +17,7 @@ add_flang_library(${CUFRT_LIBNAME}
allocator.cpp
allocatable.cpp
descriptor.cpp
+ init.cpp
kernel.cpp
memmove-function.cpp
memory.cpp
diff --git a/flang/runtime/CUDA/init.cpp b/flang/runtime/CUDA/init.cpp
new file mode 100644
index 00000000000000..2bffce842b9526
--- /dev/null
+++ b/flang/runtime/CUDA/init.cpp
@@ -0,0 +1,25 @@
+//===-- runtime/CUDA/init.cpp ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "flang/Runtime/CUDA/init.h"
+#include "../environment.h"
+#include "../terminator.h"
+#include "flang/Runtime/CUDA/common.h"
+
+#include "cuda_runtime.h"
+
+extern "C" {
+
+void RTDEF(CUFInit)() {
+ // Perform ctx initialization based on execution environment if necessary.
+ if (Fortran::runtime::executionEnvironment.cudaStackLimit) {
+ CUDA_REPORT_IF_ERROR(cudaDeviceSetLimit(cudaLimitStackSize,
+ Fortran::runtime::executionEnvironment.cudaStackLimit));
+ }
+}
+}
diff --git a/flang/runtime/environment.cpp b/flang/runtime/environment.cpp
index 52b1d99ba536ed..678d8745c9fd7b 100644
--- a/flang/runtime/environment.cpp
+++ b/flang/runtime/environment.cpp
@@ -143,6 +143,18 @@ void ExecutionEnvironment::Configure(int ac, const char *av[],
}
}
+ if (auto *x{std::getenv("ACC_OFFLOAD_STACK_SIZE")}) {
+ char *end;
+ auto n{std::strtoul(x, &end, 10)};
+ if (n > 0 && n < std::numeric_limits<std::size_t>::max() && *end == '\0') {
+ cudaStackLimit = n;
+ } else {
+ std::fprintf(stderr,
+ "Fortran runtime: ACC_OFFLOAD_STACK_SIZE=%s is invalid; ignored\n",
+ x);
+ }
+ }
+
// TODO: Set RP/ROUND='PROCESSOR_DEFINED' from environment
}
diff --git a/flang/runtime/environment.h b/flang/runtime/environment.h
index b8b9f10e4e57f5..500aa925a625b9 100644
--- a/flang/runtime/environment.h
+++ b/flang/runtime/environment.h
@@ -56,6 +56,9 @@ struct ExecutionEnvironment {
bool noStopMessage{false}; // NO_STOP_MESSAGE=1 inhibits "Fortran STOP"
bool defaultUTF8{false}; // DEFAULT_UTF8
bool checkPointerDeallocation{true}; // FORT_CHECK_POINTER_DEALLOCATION
+
+ // CUDA related variables
+ std::size_t cudaStackLimit{0}; // ACC_OFFLOAD_STACK_SIZE
};
RT_OFFLOAD_VAR_GROUP_BEGIN
More information about the flang-commits
mailing list