[flang-commits] [flang] [flang][OpenMP][CUDA] Place privatized device allocatable descriptors in managed memory (PR #187114)
Zhen Wang via flang-commits
flang-commits at lists.llvm.org
Tue Mar 17 14:10:52 PDT 2026
https://github.com/wangzpgi updated https://github.com/llvm/llvm-project/pull/187114
>From a8f84d5f6bc05235ebc37abdbc5c02cc6f6498d8 Mon Sep 17 00:00:00 2001
From: Zhen Wang <zhenw at nvidia.com>
Date: Tue, 17 Mar 2026 12:58:02 -0700
Subject: [PATCH 1/3] place descriptor for device data in managed memory
---
.../Lower/Support/PrivateReductionUtils.cpp | 26 +++++++++++++++++++
...elayed-privatization-cuda-device-array.cuf | 18 +++++++++----
2 files changed, 39 insertions(+), 5 deletions(-)
diff --git a/flang/lib/Lower/Support/PrivateReductionUtils.cpp b/flang/lib/Lower/Support/PrivateReductionUtils.cpp
index 551d8bae41fd4..5b99b3140b480 100644
--- a/flang/lib/Lower/Support/PrivateReductionUtils.cpp
+++ b/flang/lib/Lower/Support/PrivateReductionUtils.cpp
@@ -115,6 +115,16 @@ static void createCleanupRegion(Fortran::lower::AbstractConverter &converter,
fir::FreeMemOp::create(builder, loc, cast);
builder.setInsertionPointAfter(ifOp);
+ // Free the managed descriptor if this is a CUDA device allocatable.
+ if (sym) {
+ unsigned idx = Fortran::lower::getAllocatorIdx(sym->GetUltimate());
+ if (idx != kDefaultAllocator) {
+ cuf::DataAttributeAttr dataAttr =
+ Fortran::lower::translateSymbolCUFDataAttribute(
+ builder.getContext(), sym->GetUltimate());
+ cuf::FreeOp::create(builder, loc, block->getArgument(0), dataAttr);
+ }
+ }
if (isDoConcurrent)
fir::YieldOp::create(builder, loc);
else
@@ -665,6 +675,22 @@ void PopulateInitAndCleanupRegionsHelper::populateByRefInitAndCleanupRegions() {
if (auto boxTy = mlir::dyn_cast_or_null<fir::BaseBoxType>(valTy)) {
builder.setInsertionPointToEnd(initBlock);
+ // For CUDA device allocatables, allocate the descriptor in managed
+ // memory so that CUF kernels can access it from the GPU.
+ if (sym && mlir::isa<fir::HeapType>(boxTy.getEleTy())) {
+ unsigned idx = Fortran::lower::getAllocatorIdx(sym->GetUltimate());
+ if (idx != kDefaultAllocator) {
+ cuf::DataAttributeAttr dataAttr =
+ Fortran::lower::translateSymbolCUFDataAttribute(
+ builder.getContext(), sym->GetUltimate());
+ auto managedDesc = cuf::AllocOp::create(
+ builder, loc, valTy, /*uniq_name=*/llvm::StringRef{},
+ /*bindc_name=*/llvm::StringRef{}, dataAttr,
+ /*typeparams=*/mlir::ValueRange{}, /*shape=*/mlir::ValueRange{});
+ allocatedPrivVarArg = managedDesc.getResult();
+ }
+ }
+
// TODO: don't do this unless it is needed
getLengthParameters(builder, loc, getLoadedMoldArg(), lenParams);
diff --git a/flang/test/Lower/OpenMP/delayed-privatization-cuda-device-array.cuf b/flang/test/Lower/OpenMP/delayed-privatization-cuda-device-array.cuf
index cb9f9ea58d4cd..941e682c77068 100644
--- a/flang/test/Lower/OpenMP/delayed-privatization-cuda-device-array.cuf
+++ b/flang/test/Lower/OpenMP/delayed-privatization-cuda-device-array.cuf
@@ -1,6 +1,9 @@
-! Test that OpenMP privatization of CUDA Fortran allocatable device arrays
-! sets allocator_idx = 2 on the null descriptor so that user-written
-! allocate() (after cudaSetDevice) uses cudaMalloc on the correct GPU.
+! Test that OpenMP privatization of CUDA Fortran allocatable device arrays:
+! 1. Allocates the descriptor in managed memory (cuf.alloc) so CUF kernels
+! can access it from the GPU.
+! 2. Sets allocator_idx = 2 on the null descriptor so that allocate() uses
+! cudaMalloc.
+! 3. Frees the managed descriptor in the dealloc region (cuf.free).
! RUN: bbc -emit-hlfir -fcuda -fopenmp %s -o - | FileCheck %s
@@ -17,8 +20,13 @@ end subroutine
! CHECK-LABEL: omp.private {type = private}
! CHECK-SAME: @{{.*}} : !fir.box<!fir.heap<!fir.array<?xf64>>> init {
-! Null descriptor must carry allocator_idx = 2 so that a later
-! allocate() inside the parallel region calls cudaMalloc, not malloc.
+! Descriptor must be allocated in managed memory for GPU accessibility.
+! CHECK: cuf.alloc !fir.box<!fir.heap<!fir.array<?xf64>>> {data_attr = #cuf.cuda<device>}
+! Null descriptor must carry allocator_idx = 2.
! CHECK: fir.embox %{{.*}}(%{{.*}}) {allocator_idx = 2 : i32}
! CHECK: fir.store
+! CHECK: } dealloc {
+! Managed descriptor must be freed.
+! CHECK: cuf.free %{{.*}} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf64>>>> {data_attr = #cuf.cuda<device>}
+! CHECK: }
! CHECK-LABEL: func.func
>From 9ad00f21ac8e04e9222c6e0e08dd45f14620e033 Mon Sep 17 00:00:00 2001
From: Zhen Wang <37195552+wangzpgi at users.noreply.github.com>
Date: Tue, 17 Mar 2026 13:46:05 -0700
Subject: [PATCH 2/3] Update flang/lib/Lower/Support/PrivateReductionUtils.cpp
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Co-authored-by: Valentin Clement (バレンタイン クレメン) <clementval at gmail.com>
---
flang/lib/Lower/Support/PrivateReductionUtils.cpp | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/flang/lib/Lower/Support/PrivateReductionUtils.cpp b/flang/lib/Lower/Support/PrivateReductionUtils.cpp
index 5b99b3140b480..93375b7892427 100644
--- a/flang/lib/Lower/Support/PrivateReductionUtils.cpp
+++ b/flang/lib/Lower/Support/PrivateReductionUtils.cpp
@@ -683,11 +683,10 @@ void PopulateInitAndCleanupRegionsHelper::populateByRefInitAndCleanupRegions() {
cuf::DataAttributeAttr dataAttr =
Fortran::lower::translateSymbolCUFDataAttribute(
builder.getContext(), sym->GetUltimate());
- auto managedDesc = cuf::AllocOp::create(
+ allocatedPrivVarArg = cuf::AllocOp::create(
builder, loc, valTy, /*uniq_name=*/llvm::StringRef{},
/*bindc_name=*/llvm::StringRef{}, dataAttr,
- /*typeparams=*/mlir::ValueRange{}, /*shape=*/mlir::ValueRange{});
- allocatedPrivVarArg = managedDesc.getResult();
+ /*typeparams=*/mlir::ValueRange{}, /*shape=*/mlir::ValueRange{}).getResult();
}
}
>From 73178cd0b185cb649260f96547e910cbf22bfecf Mon Sep 17 00:00:00 2001
From: Zhen Wang <zhenw at nvidia.com>
Date: Tue, 17 Mar 2026 14:01:01 -0700
Subject: [PATCH 3/3] format
---
flang/lib/Lower/Support/PrivateReductionUtils.cpp | 11 +++++++----
1 file changed, 7 insertions(+), 4 deletions(-)
diff --git a/flang/lib/Lower/Support/PrivateReductionUtils.cpp b/flang/lib/Lower/Support/PrivateReductionUtils.cpp
index 93375b7892427..aae433c023d01 100644
--- a/flang/lib/Lower/Support/PrivateReductionUtils.cpp
+++ b/flang/lib/Lower/Support/PrivateReductionUtils.cpp
@@ -683,10 +683,13 @@ void PopulateInitAndCleanupRegionsHelper::populateByRefInitAndCleanupRegions() {
cuf::DataAttributeAttr dataAttr =
Fortran::lower::translateSymbolCUFDataAttribute(
builder.getContext(), sym->GetUltimate());
- allocatedPrivVarArg = cuf::AllocOp::create(
- builder, loc, valTy, /*uniq_name=*/llvm::StringRef{},
- /*bindc_name=*/llvm::StringRef{}, dataAttr,
- /*typeparams=*/mlir::ValueRange{}, /*shape=*/mlir::ValueRange{}).getResult();
+ allocatedPrivVarArg =
+ cuf::AllocOp::create(builder, loc, valTy,
+ /*uniq_name=*/llvm::StringRef{},
+ /*bindc_name=*/llvm::StringRef{}, dataAttr,
+ /*typeparams=*/mlir::ValueRange{},
+ /*shape=*/mlir::ValueRange{})
+ .getResult();
}
}
More information about the flang-commits
mailing list