[flang-commits] [flang] [flang][OpenMP][CUDA] Set allocator_idx on privatized allocatable device array descriptors (PR #186945)
Zhen Wang via flang-commits
flang-commits at lists.llvm.org
Tue Mar 17 09:09:23 PDT 2026
https://github.com/wangzpgi updated https://github.com/llvm/llvm-project/pull/186945
>From 16f090abdbc1a0f7a46bc4365e97009b3625cccd Mon Sep 17 00:00:00 2001
From: Zhen Wang <zhenw at nvidia.com>
Date: Mon, 16 Mar 2026 20:17:28 -0700
Subject: [PATCH 1/2] Set allocator_idx on privatized allocatable device array
descriptors
---
.../Lower/Support/PrivateReductionUtils.cpp | 11 ++++++---
...elayed-privatization-cuda-device-array.cuf | 24 +++++++++++++++++++
2 files changed, 32 insertions(+), 3 deletions(-)
create mode 100644 flang/test/Lower/OpenMP/delayed-privatization-cuda-device-array.cuf
diff --git a/flang/lib/Lower/Support/PrivateReductionUtils.cpp b/flang/lib/Lower/Support/PrivateReductionUtils.cpp
index f63fb6ecfe43f..551d8bae41fd4 100644
--- a/flang/lib/Lower/Support/PrivateReductionUtils.cpp
+++ b/flang/lib/Lower/Support/PrivateReductionUtils.cpp
@@ -14,6 +14,7 @@
#include "flang/Lower/AbstractConverter.h"
#include "flang/Lower/Allocatable.h"
+#include "flang/Lower/CUDA.h"
#include "flang/Lower/ConvertVariable.h"
#include "flang/Optimizer/Builder/BoxValue.h"
#include "flang/Optimizer/Builder/Character.h"
@@ -435,9 +436,13 @@ fir::IfOp PopulateInitAndCleanupRegionsHelper::handleNullAllocatable() {
// right rank. This returns an empty value if the types don't match.
mlir::Value shape = generateZeroShapeForRank(builder, loc, moldArg);
- mlir::Value nullBox =
- fir::EmboxOp::create(builder, loc, valType, addr, shape,
- /*slice=*/mlir::Value{}, lenParams);
+ auto nullBox = fir::EmboxOp::create(builder, loc, valType, addr, shape,
+ /*slice=*/mlir::Value{}, lenParams);
+ if (sym) {
+ unsigned idx = Fortran::lower::getAllocatorIdx(sym->GetUltimate());
+ if (idx != kDefaultAllocator)
+ nullBox.setAllocatorIdx(idx);
+ }
fir::StoreOp::create(builder, loc, nullBox, allocatedPrivVarArg);
return ifOp;
}
diff --git a/flang/test/Lower/OpenMP/delayed-privatization-cuda-device-array.cuf b/flang/test/Lower/OpenMP/delayed-privatization-cuda-device-array.cuf
new file mode 100644
index 0000000000000..cb9f9ea58d4cd
--- /dev/null
+++ b/flang/test/Lower/OpenMP/delayed-privatization-cuda-device-array.cuf
@@ -0,0 +1,24 @@
+! Test that OpenMP privatization of CUDA Fortran allocatable device arrays
+! sets allocator_idx = 2 on the null descriptor so that user-written
+! allocate() (after cudaSetDevice) uses cudaMalloc on the correct GPU.
+
+! RUN: bbc -emit-hlfir -fcuda -fopenmp %s -o - | FileCheck %s
+
+subroutine omp_private_device_allocatable()
+ implicit none
+ real(8), device, allocatable :: adev(:)
+
+ !$omp parallel private(adev)
+ allocate(adev(10))
+ adev(1) = 1.0d0
+ deallocate(adev)
+ !$omp end parallel
+end subroutine
+
+! CHECK-LABEL: omp.private {type = private}
+! CHECK-SAME: @{{.*}} : !fir.box<!fir.heap<!fir.array<?xf64>>> init {
+! Null descriptor must carry allocator_idx = 2 so that a later
+! allocate() inside the parallel region calls cudaMalloc, not malloc.
+! CHECK: fir.embox %{{.*}}(%{{.*}}) {allocator_idx = 2 : i32}
+! CHECK: fir.store
+! CHECK-LABEL: func.func
>From b0fb3edb30c0bfa3ace2df8cafb91ad87339b85a Mon Sep 17 00:00:00 2001
From: Zhen Wang <zhenw at nvidia.com>
Date: Tue, 17 Mar 2026 09:08:53 -0700
Subject: [PATCH 2/2] setAllocatorIdx unconditionally
---
flang/lib/Lower/Support/PrivateReductionUtils.cpp | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/flang/lib/Lower/Support/PrivateReductionUtils.cpp b/flang/lib/Lower/Support/PrivateReductionUtils.cpp
index 551d8bae41fd4..d440ebd094894 100644
--- a/flang/lib/Lower/Support/PrivateReductionUtils.cpp
+++ b/flang/lib/Lower/Support/PrivateReductionUtils.cpp
@@ -440,8 +440,7 @@ fir::IfOp PopulateInitAndCleanupRegionsHelper::handleNullAllocatable() {
/*slice=*/mlir::Value{}, lenParams);
if (sym) {
unsigned idx = Fortran::lower::getAllocatorIdx(sym->GetUltimate());
- if (idx != kDefaultAllocator)
- nullBox.setAllocatorIdx(idx);
+ nullBox.setAllocatorIdx(idx);
}
fir::StoreOp::create(builder, loc, nullBox, allocatedPrivVarArg);
return ifOp;
More information about the flang-commits
mailing list