[llvm] [flang][cuda] Add support for NV_CUDAFOR_DEVICE_IS_MANAGED (PR #133778)
Valentin Clement バレンタイン クレメン via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 31 11:51:47 PDT 2025
https://github.com/clementval created https://github.com/llvm/llvm-project/pull/133778
Add support for the environment variable `NV_CUDAFOR_DEVICE_IS_MANAGED` as described in the documentation: https://docs.nvidia.com/hpc-sdk/compilers/cuda-fortran-prog-guide/index.html#controlling-device-data-is-managed.
This mainly switch device allocation to managed allocation.
>From 768485e9c0ed3a299705de2b32367e81cd975dfa Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Mon, 31 Mar 2025 11:49:40 -0700
Subject: [PATCH] [flang][cuda] Add support for NV_CUDAFOR_DEVICE_IS_MANAGED
---
flang-rt/include/flang-rt/runtime/environment.h | 1 +
flang-rt/lib/cuda/allocator.cpp | 8 +++++++-
flang-rt/lib/cuda/memory.cpp | 8 +++++++-
flang-rt/lib/runtime/environment.cpp | 13 +++++++++++++
4 files changed, 28 insertions(+), 2 deletions(-)
diff --git a/flang-rt/include/flang-rt/runtime/environment.h b/flang-rt/include/flang-rt/runtime/environment.h
index 142add432b5f7..ca6c2a7d44484 100644
--- a/flang-rt/include/flang-rt/runtime/environment.h
+++ b/flang-rt/include/flang-rt/runtime/environment.h
@@ -59,6 +59,7 @@ struct ExecutionEnvironment {
// CUDA related variables
std::size_t cudaStackLimit{0}; // ACC_OFFLOAD_STACK_SIZE
+ bool cudaDeviceIsManaged{false}; // NV_CUDAFOR_DEVICE_IS_MANAGED
};
RT_OFFLOAD_VAR_GROUP_BEGIN
diff --git a/flang-rt/lib/cuda/allocator.cpp b/flang-rt/lib/cuda/allocator.cpp
index 4199bf04b33f0..d6529957bc939 100644
--- a/flang-rt/lib/cuda/allocator.cpp
+++ b/flang-rt/lib/cuda/allocator.cpp
@@ -9,6 +9,7 @@
#include "flang/Runtime/CUDA/allocator.h"
#include "flang-rt/runtime/allocator-registry.h"
#include "flang-rt/runtime/derived.h"
+#include "flang-rt/runtime/environment.h"
#include "flang-rt/runtime/stat.h"
#include "flang-rt/runtime/terminator.h"
#include "flang-rt/runtime/type-info.h"
@@ -43,7 +44,12 @@ void CUFFreePinned(void *p) { CUDA_REPORT_IF_ERROR(cudaFreeHost(p)); }
void *CUFAllocDevice(std::size_t sizeInBytes) {
void *p;
- CUDA_REPORT_IF_ERROR(cudaMalloc(&p, sizeInBytes));
+ if (Fortran::runtime::executionEnvironment.cudaDeviceIsManaged) {
+ CUDA_REPORT_IF_ERROR(
+ cudaMallocManaged((void **)&p, sizeInBytes, cudaMemAttachGlobal));
+ } else {
+ CUDA_REPORT_IF_ERROR(cudaMalloc(&p, sizeInBytes));
+ }
return p;
}
diff --git a/flang-rt/lib/cuda/memory.cpp b/flang-rt/lib/cuda/memory.cpp
index adc24ff223729..766f6847946cb 100644
--- a/flang-rt/lib/cuda/memory.cpp
+++ b/flang-rt/lib/cuda/memory.cpp
@@ -9,6 +9,7 @@
#include "flang/Runtime/CUDA/memory.h"
#include "flang-rt/runtime/assign-impl.h"
#include "flang-rt/runtime/descriptor.h"
+#include "flang-rt/runtime/environment.h"
#include "flang-rt/runtime/terminator.h"
#include "flang/Runtime/CUDA/common.h"
#include "flang/Runtime/CUDA/descriptor.h"
@@ -26,7 +27,12 @@ void *RTDEF(CUFMemAlloc)(
void *ptr = nullptr;
if (bytes != 0) {
if (type == kMemTypeDevice) {
- CUDA_REPORT_IF_ERROR(cudaMalloc((void **)&ptr, bytes));
+ if (Fortran::runtime::executionEnvironment.cudaDeviceIsManaged) {
+ CUDA_REPORT_IF_ERROR(
+ cudaMallocManaged((void **)&ptr, bytes, cudaMemAttachGlobal));
+ } else {
+ CUDA_REPORT_IF_ERROR(cudaMalloc((void **)&ptr, bytes));
+ }
} else if (type == kMemTypeManaged || type == kMemTypeUnified) {
CUDA_REPORT_IF_ERROR(
cudaMallocManaged((void **)&ptr, bytes, cudaMemAttachGlobal));
diff --git a/flang-rt/lib/runtime/environment.cpp b/flang-rt/lib/runtime/environment.cpp
index 15380ba148df5..cf2c65dd4fac0 100644
--- a/flang-rt/lib/runtime/environment.cpp
+++ b/flang-rt/lib/runtime/environment.cpp
@@ -155,6 +155,19 @@ void ExecutionEnvironment::Configure(int ac, const char *av[],
}
}
+ if (auto *x{std::getenv("NV_CUDAFOR_DEVICE_IS_MANAGED")}) {
+ char *end;
+ auto n{std::strtol(x, &end, 10)};
+ if (n >= 0 && n <= 1 && *end == '\0') {
+ cudaDeviceIsManaged = n != 0;
+ } else {
+ std::fprintf(stderr,
+ "Fortran runtime: NV_CUDAFOR_DEVICE_IS_MANAGED=%s is invalid; "
+ "ignored\n",
+ x);
+ }
+ }
+
// TODO: Set RP/ROUND='PROCESSOR_DEFINED' from environment
}
More information about the llvm-commits
mailing list