[llvm] [flang][cuda] Use the aysncId in device allocation (PR #135099)
Valentin Clement バレンタイン クレメン via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 9 16:00:37 PDT 2025
https://github.com/clementval created https://github.com/llvm/llvm-project/pull/135099
Use `cudaMallocAsync` in the `CUFAllocDevice` allocator when asyncId is provided.
More work is needed to be able to call `cudaFreeAsync` since the allocated address and stream needs to be tracked.
>From 900f85639eb0a965eb92f7fe8a8c8f8d1d932fc7 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Wed, 9 Apr 2025 14:03:43 -0700
Subject: [PATCH] [flang][cuda] Use the aysncId in device allocation
---
flang-rt/lib/cuda/allocator.cpp | 11 ++++++++---
1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/flang-rt/lib/cuda/allocator.cpp b/flang-rt/lib/cuda/allocator.cpp
index d606ab2d4313b..a1c3a2c1b2ea8 100644
--- a/flang-rt/lib/cuda/allocator.cpp
+++ b/flang-rt/lib/cuda/allocator.cpp
@@ -9,6 +9,7 @@
#include "flang/Runtime/CUDA/allocator.h"
#include "flang-rt/runtime/allocator-registry.h"
#include "flang-rt/runtime/derived.h"
+#include "flang-rt/runtime/descriptor.h"
#include "flang-rt/runtime/environment.h"
#include "flang-rt/runtime/stat.h"
#include "flang-rt/runtime/terminator.h"
@@ -43,14 +44,18 @@ void *CUFAllocPinned(
void CUFFreePinned(void *p) { CUDA_REPORT_IF_ERROR(cudaFreeHost(p)); }
-void *CUFAllocDevice(
- std::size_t sizeInBytes, [[maybe_unused]] std::int64_t asyncId) {
+void *CUFAllocDevice(std::size_t sizeInBytes, std::int64_t asyncId) {
void *p;
if (Fortran::runtime::executionEnvironment.cudaDeviceIsManaged) {
CUDA_REPORT_IF_ERROR(
cudaMallocManaged((void **)&p, sizeInBytes, cudaMemAttachGlobal));
} else {
- CUDA_REPORT_IF_ERROR(cudaMalloc(&p, sizeInBytes));
+ if (asyncId == kNoAsyncId) {
+ CUDA_REPORT_IF_ERROR(cudaMalloc(&p, sizeInBytes));
+ } else {
+ CUDA_REPORT_IF_ERROR(
+ cudaMallocAsync(&p, sizeInBytes, (cudaStream_t)asyncId));
+ }
}
return p;
}
More information about the llvm-commits
mailing list