[flang-commits] [flang] [flang][OpenMP][CUDA] Set allocator_idx on privatized allocatable device array descriptors (PR #186945)

Zhen Wang via flang-commits flang-commits at lists.llvm.org
Tue Mar 17 09:09:23 PDT 2026


https://github.com/wangzpgi updated https://github.com/llvm/llvm-project/pull/186945

>From 16f090abdbc1a0f7a46bc4365e97009b3625cccd Mon Sep 17 00:00:00 2001
From: Zhen Wang <zhenw at nvidia.com>
Date: Mon, 16 Mar 2026 20:17:28 -0700
Subject: [PATCH 1/2] Set allocator_idx on privatized allocatable device array
 descriptors

---
 .../Lower/Support/PrivateReductionUtils.cpp   | 11 ++++++---
 ...elayed-privatization-cuda-device-array.cuf | 24 +++++++++++++++++++
 2 files changed, 32 insertions(+), 3 deletions(-)
 create mode 100644 flang/test/Lower/OpenMP/delayed-privatization-cuda-device-array.cuf

diff --git a/flang/lib/Lower/Support/PrivateReductionUtils.cpp b/flang/lib/Lower/Support/PrivateReductionUtils.cpp
index f63fb6ecfe43f..551d8bae41fd4 100644
--- a/flang/lib/Lower/Support/PrivateReductionUtils.cpp
+++ b/flang/lib/Lower/Support/PrivateReductionUtils.cpp
@@ -14,6 +14,7 @@
 
 #include "flang/Lower/AbstractConverter.h"
 #include "flang/Lower/Allocatable.h"
+#include "flang/Lower/CUDA.h"
 #include "flang/Lower/ConvertVariable.h"
 #include "flang/Optimizer/Builder/BoxValue.h"
 #include "flang/Optimizer/Builder/Character.h"
@@ -435,9 +436,13 @@ fir::IfOp PopulateInitAndCleanupRegionsHelper::handleNullAllocatable() {
   // right rank. This returns an empty value if the types don't match.
   mlir::Value shape = generateZeroShapeForRank(builder, loc, moldArg);
 
-  mlir::Value nullBox =
-      fir::EmboxOp::create(builder, loc, valType, addr, shape,
-                           /*slice=*/mlir::Value{}, lenParams);
+  auto nullBox = fir::EmboxOp::create(builder, loc, valType, addr, shape,
+                                      /*slice=*/mlir::Value{}, lenParams);
+  if (sym) {
+    unsigned idx = Fortran::lower::getAllocatorIdx(sym->GetUltimate());
+    if (idx != kDefaultAllocator)
+      nullBox.setAllocatorIdx(idx);
+  }
   fir::StoreOp::create(builder, loc, nullBox, allocatedPrivVarArg);
   return ifOp;
 }
diff --git a/flang/test/Lower/OpenMP/delayed-privatization-cuda-device-array.cuf b/flang/test/Lower/OpenMP/delayed-privatization-cuda-device-array.cuf
new file mode 100644
index 0000000000000..cb9f9ea58d4cd
--- /dev/null
+++ b/flang/test/Lower/OpenMP/delayed-privatization-cuda-device-array.cuf
@@ -0,0 +1,24 @@
+! Test that OpenMP privatization of CUDA Fortran allocatable device arrays
+! sets allocator_idx = 2 on the null descriptor so that user-written
+! allocate() (after cudaSetDevice) uses cudaMalloc on the correct GPU.
+
+! RUN: bbc -emit-hlfir -fcuda -fopenmp %s -o - | FileCheck %s
+
+subroutine omp_private_device_allocatable()
+  implicit none
+  real(8), device, allocatable :: adev(:)
+
+  !$omp parallel private(adev)
+    allocate(adev(10))
+    adev(1) = 1.0d0
+    deallocate(adev)
+  !$omp end parallel
+end subroutine
+
+! CHECK-LABEL: omp.private {type = private}
+! CHECK-SAME: @{{.*}} : !fir.box<!fir.heap<!fir.array<?xf64>>> init {
+! Null descriptor must carry allocator_idx = 2 so that a later
+! allocate() inside the parallel region calls cudaMalloc, not malloc.
+! CHECK:          fir.embox %{{.*}}(%{{.*}}) {allocator_idx = 2 : i32}
+! CHECK:          fir.store
+! CHECK-LABEL: func.func

>From b0fb3edb30c0bfa3ace2df8cafb91ad87339b85a Mon Sep 17 00:00:00 2001
From: Zhen Wang <zhenw at nvidia.com>
Date: Tue, 17 Mar 2026 09:08:53 -0700
Subject: [PATCH 2/2] setAllocatorIdx unconditionally

---
 flang/lib/Lower/Support/PrivateReductionUtils.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/flang/lib/Lower/Support/PrivateReductionUtils.cpp b/flang/lib/Lower/Support/PrivateReductionUtils.cpp
index 551d8bae41fd4..d440ebd094894 100644
--- a/flang/lib/Lower/Support/PrivateReductionUtils.cpp
+++ b/flang/lib/Lower/Support/PrivateReductionUtils.cpp
@@ -440,8 +440,7 @@ fir::IfOp PopulateInitAndCleanupRegionsHelper::handleNullAllocatable() {
                                       /*slice=*/mlir::Value{}, lenParams);
   if (sym) {
     unsigned idx = Fortran::lower::getAllocatorIdx(sym->GetUltimate());
-    if (idx != kDefaultAllocator)
-      nullBox.setAllocatorIdx(idx);
+    nullBox.setAllocatorIdx(idx);
   }
   fir::StoreOp::create(builder, loc, nullBox, allocatedPrivVarArg);
   return ifOp;



More information about the flang-commits mailing list