[flang-commits] [flang] [mlir] [flang][acc] Add recipe populate testing through type interfaces (PR #163990)

Razvan Lupusoru via flang-commits flang-commits at lists.llvm.org
Fri Oct 17 09:48:33 PDT 2025


https://github.com/razvanlupusoru updated https://github.com/llvm/llvm-project/pull/163990

>From 80d23ce0ad2a27996b8bb78a46c35a8a18ac3aaf Mon Sep 17 00:00:00 2001
From: Razvan Lupusoru <rlupusoru at nvidia.com>
Date: Fri, 17 Oct 2025 09:43:20 -0700
Subject: [PATCH 1/2] [flang][acc] Add recipe populate testing through type
 interfaces

This PR does the following:
- Updates createAndPopulate implementation so instead of first
  building detached blocks it directly builds them in
  appropriate place - because the MappableType implementation
  in FIR is relying on properly connected regions in order to
  look up module
- Updates createAndPopulate to properly create destroy region
  with newly introduced generatePrivateDestroy API for
  MappableType
- Adds recipe-populate-private.mlir test with comprehensive
  type coverage including scalars, arrays (1D/3D), derived
  types, and extensive box type tests (heap/ptr scalars,
  dynamic arrays in 1D/2D, ptr arrays, and boxed derived
  types)
- Adds recipe-populate-firstprivate.mlir test with coverage
  for types with well-supported copy operations (scalars,
  static arrays, derived types); intentionally limits box
  type testing as copy implementation for complex box types
  is not yet complete
---
 .../OpenACC/recipe-populate-firstprivate.mlir | 166 +++++++++++++
 .../Fir/OpenACC/recipe-populate-private.mlir  | 223 ++++++++++++++++++
 mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp       | 152 ++++++------
 3 files changed, 457 insertions(+), 84 deletions(-)
 create mode 100644 flang/test/Fir/OpenACC/recipe-populate-firstprivate.mlir
 create mode 100644 flang/test/Fir/OpenACC/recipe-populate-private.mlir

diff --git a/flang/test/Fir/OpenACC/recipe-populate-firstprivate.mlir b/flang/test/Fir/OpenACC/recipe-populate-firstprivate.mlir
new file mode 100644
index 0000000000000..0c3f3fea13fa0
--- /dev/null
+++ b/flang/test/Fir/OpenACC/recipe-populate-firstprivate.mlir
@@ -0,0 +1,166 @@
+// RUN: fir-opt %s --split-input-file --pass-pipeline="builtin.module(test-acc-recipe-populate{recipe-type=firstprivate})" | FileCheck %s
+
+// The tests here use a synthetic hlfir.declare in order to ensure that the hlfir dialect is
+// loaded. This is required because the pass used is part of OpenACC test passes outside of
+// flang and the APIs being test may generate hlfir even when it does not appear.
+
+// Test scalar type (f32)
+// CHECK: acc.firstprivate.recipe @firstprivate_scalar : !fir.ref<f32> init {
+// CHECK: ^bb0(%{{.*}}: !fir.ref<f32>):
+// CHECK:   %[[ALLOC:.*]] = fir.alloca f32
+// CHECK:   %{{.*}}:2 = hlfir.declare %[[ALLOC]] {uniq_name = "scalar"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+// CHECK:   acc.yield %{{.*}}#0 : !fir.ref<f32>
+// CHECK: } copy {
+// CHECK: ^bb0(%[[SRC:.*]]: !fir.ref<f32>, %[[DST:.*]]: !fir.ref<f32>):
+// CHECK:   %[[LOAD:.*]] = fir.load %[[SRC]] : !fir.ref<f32>
+// CHECK:   fir.store %[[LOAD]] to %[[DST]] : !fir.ref<f32>
+// CHECK:   acc.terminator
+// CHECK: }
+// CHECK-NOT: destroy
+
+func.func @test_scalar() {
+  %0 = fir.alloca f32 {test.var = "scalar"}
+  %var = fir.alloca f32
+  %1:2 = hlfir.declare %var {uniq_name = "load_hlfir"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+  return
+}
+
+// -----
+
+// Test integer scalar
+// CHECK: acc.firstprivate.recipe @firstprivate_int : !fir.ref<i32> init {
+// CHECK: ^bb0(%{{.*}}: !fir.ref<i32>):
+// CHECK:   %[[ALLOC:.*]] = fir.alloca i32
+// CHECK:   %{{.*}}:2 = hlfir.declare %[[ALLOC]] {uniq_name = "int"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+// CHECK:   acc.yield %{{.*}}#0 : !fir.ref<i32>
+// CHECK: } copy {
+// CHECK: ^bb0(%[[SRC:.*]]: !fir.ref<i32>, %[[DST:.*]]: !fir.ref<i32>):
+// CHECK:   %[[LOAD:.*]] = fir.load %[[SRC]] : !fir.ref<i32>
+// CHECK:   fir.store %[[LOAD]] to %[[DST]] : !fir.ref<i32>
+// CHECK:   acc.terminator
+// CHECK: }
+// CHECK-NOT: destroy
+
+func.func @test_int() {
+  %0 = fir.alloca i32 {test.var = "int"}
+  %var = fir.alloca f32
+  %1:2 = hlfir.declare %var {uniq_name = "load_hlfir"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+  return
+}
+
+// -----
+
+// Test logical type
+// CHECK: acc.firstprivate.recipe @firstprivate_logical : !fir.ref<!fir.logical<4>> init {
+// CHECK: ^bb0(%{{.*}}: !fir.ref<!fir.logical<4>>):
+// CHECK:   %[[ALLOC:.*]] = fir.alloca !fir.logical<4>
+// CHECK:   %{{.*}}:2 = hlfir.declare %[[ALLOC]] {uniq_name = "logical"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+// CHECK:   acc.yield %{{.*}}#0 : !fir.ref<!fir.logical<4>>
+// CHECK: } copy {
+// CHECK: ^bb0(%[[SRC:.*]]: !fir.ref<!fir.logical<4>>, %[[DST:.*]]: !fir.ref<!fir.logical<4>>):
+// CHECK:   %[[LOAD:.*]] = fir.load %[[SRC]] : !fir.ref<!fir.logical<4>>
+// CHECK:   fir.store %[[LOAD]] to %[[DST]] : !fir.ref<!fir.logical<4>>
+// CHECK:   acc.terminator
+// CHECK: }
+// CHECK-NOT: destroy
+
+func.func @test_logical() {
+  %0 = fir.alloca !fir.logical<4> {test.var = "logical"}
+  %var = fir.alloca f32
+  %1:2 = hlfir.declare %var {uniq_name = "load_hlfir"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+  return
+}
+
+// -----
+
+// Test complex type
+// CHECK: acc.firstprivate.recipe @firstprivate_complex : !fir.ref<complex<f32>> init {
+// CHECK: ^bb0(%{{.*}}: !fir.ref<complex<f32>>):
+// CHECK:   %[[ALLOC:.*]] = fir.alloca complex<f32>
+// CHECK:   %{{.*}}:2 = hlfir.declare %[[ALLOC]] {uniq_name = "complex"} : (!fir.ref<complex<f32>>) -> (!fir.ref<complex<f32>>, !fir.ref<complex<f32>>)
+// CHECK:   acc.yield %{{.*}}#0 : !fir.ref<complex<f32>>
+// CHECK: } copy {
+// CHECK: ^bb0(%[[SRC:.*]]: !fir.ref<complex<f32>>, %[[DST:.*]]: !fir.ref<complex<f32>>):
+// CHECK:   %[[LOAD:.*]] = fir.load %[[SRC]] : !fir.ref<complex<f32>>
+// CHECK:   fir.store %[[LOAD]] to %[[DST]] : !fir.ref<complex<f32>>
+// CHECK:   acc.terminator
+// CHECK: }
+// CHECK-NOT: destroy
+
+func.func @test_complex() {
+  %0 = fir.alloca complex<f32> {test.var = "complex"}
+  %var = fir.alloca f32
+  %1:2 = hlfir.declare %var {uniq_name = "load_hlfir"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+  return
+}
+
+// -----
+
+// Test 1D static array
+// CHECK: acc.firstprivate.recipe @firstprivate_array_1d : !fir.ref<!fir.array<100xf32>> init {
+// CHECK: ^bb0(%{{.*}}: !fir.ref<!fir.array<100xf32>>):
+// CHECK:   %[[C100:.*]] = arith.constant 100 : index
+// CHECK:   %[[SHAPE:.*]] = fir.shape %[[C100]] : (index) -> !fir.shape<1>
+// CHECK:   %[[ALLOC:.*]] = fir.alloca !fir.array<100xf32>
+// CHECK:   %{{.*}}:2 = hlfir.declare %[[ALLOC]](%[[SHAPE]]) {uniq_name = "array_1d"} : (!fir.ref<!fir.array<100xf32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<100xf32>>, !fir.ref<!fir.array<100xf32>>)
+// CHECK:   acc.yield %{{.*}}#0 : !fir.ref<!fir.array<100xf32>>
+// CHECK: } copy {
+// CHECK: ^bb0(%[[SRC:.*]]: !fir.ref<!fir.array<100xf32>>, %[[DST:.*]]: !fir.ref<!fir.array<100xf32>>):
+// CHECK:   hlfir.assign %[[SRC]] to %[[DST]] : !fir.ref<!fir.array<100xf32>>, !fir.ref<!fir.array<100xf32>>
+// CHECK:   acc.terminator
+// CHECK: }
+// CHECK-NOT: destroy
+
+func.func @test_array_1d() {
+  %0 = fir.alloca !fir.array<100xf32> {test.var = "array_1d"}
+  %var = fir.alloca f32
+  %1:2 = hlfir.declare %var {uniq_name = "load_hlfir"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+  return
+}
+
+// -----
+
+// Test 2D static array
+// CHECK: acc.firstprivate.recipe @firstprivate_array_2d : !fir.ref<!fir.array<10x20xi32>> init {
+// CHECK: ^bb0(%{{.*}}: !fir.ref<!fir.array<10x20xi32>>):
+// CHECK:   %[[C10:.*]] = arith.constant 10 : index
+// CHECK:   %[[C20:.*]] = arith.constant 20 : index
+// CHECK:   %[[SHAPE:.*]] = fir.shape %[[C10]], %[[C20]] : (index, index) -> !fir.shape<2>
+// CHECK:   %[[ALLOC:.*]] = fir.alloca !fir.array<10x20xi32>
+// CHECK:   %{{.*}}:2 = hlfir.declare %[[ALLOC]](%[[SHAPE]]) {uniq_name = "array_2d"} : (!fir.ref<!fir.array<10x20xi32>>, !fir.shape<2>) -> (!fir.ref<!fir.array<10x20xi32>>, !fir.ref<!fir.array<10x20xi32>>)
+// CHECK:   acc.yield %{{.*}}#0 : !fir.ref<!fir.array<10x20xi32>>
+// CHECK: } copy {
+// CHECK: ^bb0(%[[SRC:.*]]: !fir.ref<!fir.array<10x20xi32>>, %[[DST:.*]]: !fir.ref<!fir.array<10x20xi32>>):
+// CHECK:   hlfir.assign %[[SRC]] to %[[DST]] : !fir.ref<!fir.array<10x20xi32>>, !fir.ref<!fir.array<10x20xi32>>
+// CHECK:   acc.terminator
+// CHECK: }
+// CHECK-NOT: destroy
+
+func.func @test_array_2d() {
+  %0 = fir.alloca !fir.array<10x20xi32> {test.var = "array_2d"}
+  %var = fir.alloca f32
+  %1:2 = hlfir.declare %var {uniq_name = "load_hlfir"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+  return
+}
+
+// -----
+
+// Test derived type with multiple fields
+// CHECK: acc.firstprivate.recipe @firstprivate_derived : !fir.ref<!fir.type<_QTpoint{x:f32,y:f32,z:f32}>> init {
+// CHECK: ^bb0(%{{.*}}: !fir.ref<!fir.type<_QTpoint{x:f32,y:f32,z:f32}>>):
+// CHECK:   %[[ALLOC:.*]] = fir.alloca !fir.type<_QTpoint{x:f32,y:f32,z:f32}>
+// CHECK:   %{{.*}}:2 = hlfir.declare %[[ALLOC]] {uniq_name = "derived"} : (!fir.ref<!fir.type<_QTpoint{x:f32,y:f32,z:f32}>>) -> (!fir.ref<!fir.type<_QTpoint{x:f32,y:f32,z:f32}>>, !fir.ref<!fir.type<_QTpoint{x:f32,y:f32,z:f32}>>)
+// CHECK:   acc.yield %{{.*}}#0 : !fir.ref<!fir.type<_QTpoint{x:f32,y:f32,z:f32}>>
+// CHECK: } copy {
+// CHECK: ^bb0(%[[SRC:.*]]: !fir.ref<!fir.type<_QTpoint{x:f32,y:f32,z:f32}>>, %[[DST:.*]]: !fir.ref<!fir.type<_QTpoint{x:f32,y:f32,z:f32}>>):
+// CHECK:   hlfir.assign %[[SRC]] to %[[DST]] : !fir.ref<!fir.type<_QTpoint{x:f32,y:f32,z:f32}>>, !fir.ref<!fir.type<_QTpoint{x:f32,y:f32,z:f32}>>
+// CHECK:   acc.terminator
+// CHECK: }
+// CHECK-NOT: destroy
+
+func.func @test_derived() {
+  %0 = fir.alloca !fir.type<_QTpoint{x:f32,y:f32,z:f32}> {test.var = "derived"}
+  %var = fir.alloca f32
+  %1:2 = hlfir.declare %var {uniq_name = "load_hlfir"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+  return
+}
diff --git a/flang/test/Fir/OpenACC/recipe-populate-private.mlir b/flang/test/Fir/OpenACC/recipe-populate-private.mlir
new file mode 100644
index 0000000000000..aeb60d6b4a37f
--- /dev/null
+++ b/flang/test/Fir/OpenACC/recipe-populate-private.mlir
@@ -0,0 +1,223 @@
+// RUN: fir-opt %s --split-input-file --pass-pipeline="builtin.module(test-acc-recipe-populate{recipe-type=private})" | FileCheck %s
+
+// The tests here use a synthetic hlfir.declare in order to ensure that the hlfir dialect is
+// loaded. This is required because the pass used is part of OpenACC test passes outside of
+// flang and the APIs being test may generate hlfir even when it does not appear.
+
+// Test scalar type (f32)
+// CHECK: acc.private.recipe @private_scalar : !fir.ref<f32> init {
+// CHECK: ^bb0(%{{.*}}: !fir.ref<f32>):
+// CHECK:   %[[ALLOC:.*]] = fir.alloca f32
+// CHECK:   %{{.*}}:2 = hlfir.declare %[[ALLOC]] {uniq_name = "scalar"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+// CHECK:   acc.yield %{{.*}}#0 : !fir.ref<f32>
+// CHECK: }
+// CHECK-NOT: destroy
+
+func.func @test_scalar() {
+  %0 = fir.alloca f32 {test.var = "scalar"}
+  %var = fir.alloca f32
+  %1:2 = hlfir.declare %var {uniq_name = "load_hlfir"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+  return
+}
+
+// -----
+
+// Test logical type
+// CHECK: acc.private.recipe @private_logical : !fir.ref<!fir.logical<4>> init {
+// CHECK: ^bb0(%{{.*}}: !fir.ref<!fir.logical<4>>):
+// CHECK:   %[[ALLOC:.*]] = fir.alloca !fir.logical<4>
+// CHECK:   %{{.*}}:2 = hlfir.declare %[[ALLOC]] {uniq_name = "logical"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+// CHECK:   acc.yield %{{.*}}#0 : !fir.ref<!fir.logical<4>>
+// CHECK: }
+// CHECK-NOT: destroy
+
+func.func @test_logical() {
+  %0 = fir.alloca !fir.logical<4> {test.var = "logical"}
+  %var = fir.alloca f32
+  %1:2 = hlfir.declare %var {uniq_name = "load_hlfir"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+  return
+}
+
+// -----
+
+// Test complex type
+// CHECK: acc.private.recipe @private_complex : !fir.ref<complex<f32>> init {
+// CHECK: ^bb0(%{{.*}}: !fir.ref<complex<f32>>):
+// CHECK:   %[[ALLOC:.*]] = fir.alloca complex<f32>
+// CHECK:   %{{.*}}:2 = hlfir.declare %[[ALLOC]] {uniq_name = "complex"} : (!fir.ref<complex<f32>>) -> (!fir.ref<complex<f32>>, !fir.ref<complex<f32>>)
+// CHECK:   acc.yield %{{.*}}#0 : !fir.ref<complex<f32>>
+// CHECK: }
+// CHECK-NOT: destroy
+
+func.func @test_complex() {
+  %0 = fir.alloca complex<f32> {test.var = "complex"}
+  %var = fir.alloca f32
+  %1:2 = hlfir.declare %var {uniq_name = "load_hlfir"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+  return
+}
+
+// -----
+
+// Test 1D static array
+// CHECK: acc.private.recipe @private_array_1d : !fir.ref<!fir.array<100xf32>> init {
+// CHECK: ^bb0(%{{.*}}: !fir.ref<!fir.array<100xf32>>):
+// CHECK:   %[[C100:.*]] = arith.constant 100 : index
+// CHECK:   %[[SHAPE:.*]] = fir.shape %[[C100]] : (index) -> !fir.shape<1>
+// CHECK:   %[[ALLOC:.*]] = fir.alloca !fir.array<100xf32>
+// CHECK:   %{{.*}}:2 = hlfir.declare %[[ALLOC]](%[[SHAPE]]) {uniq_name = "array_1d"} : (!fir.ref<!fir.array<100xf32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<100xf32>>, !fir.ref<!fir.array<100xf32>>)
+// CHECK:   acc.yield %{{.*}}#0 : !fir.ref<!fir.array<100xf32>>
+// CHECK: }
+// CHECK-NOT: destroy
+
+func.func @test_array_1d() {
+  %0 = fir.alloca !fir.array<100xf32> {test.var = "array_1d"}
+  %var = fir.alloca f32
+  %1:2 = hlfir.declare %var {uniq_name = "load_hlfir"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+  return
+}
+
+// -----
+
+// Test 3D static array
+// CHECK: acc.private.recipe @private_array_3d : !fir.ref<!fir.array<5x10x15xi32>> init {
+// CHECK: ^bb0(%{{.*}}: !fir.ref<!fir.array<5x10x15xi32>>):
+// CHECK:   %[[C5:.*]] = arith.constant 5 : index
+// CHECK:   %[[C10:.*]] = arith.constant 10 : index
+// CHECK:   %[[C15:.*]] = arith.constant 15 : index
+// CHECK:   %[[SHAPE:.*]] = fir.shape %[[C5]], %[[C10]], %[[C15]] : (index, index, index) -> !fir.shape<3>
+// CHECK:   %[[ALLOC:.*]] = fir.alloca !fir.array<5x10x15xi32>
+// CHECK:   %{{.*}}:2 = hlfir.declare %[[ALLOC]](%[[SHAPE]]) {uniq_name = "array_3d"} : (!fir.ref<!fir.array<5x10x15xi32>>, !fir.shape<3>) -> (!fir.ref<!fir.array<5x10x15xi32>>, !fir.ref<!fir.array<5x10x15xi32>>)
+// CHECK:   acc.yield %{{.*}}#0 : !fir.ref<!fir.array<5x10x15xi32>>
+// CHECK: }
+// CHECK-NOT: destroy
+
+func.func @test_array_3d() {
+  %0 = fir.alloca !fir.array<5x10x15xi32> {test.var = "array_3d"}
+  %var = fir.alloca f32
+  %1:2 = hlfir.declare %var {uniq_name = "load_hlfir"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+  return
+}
+
+// -----
+
+// Test derived type with multiple fields
+// CHECK: acc.private.recipe @private_derived : !fir.ref<!fir.type<_QTpoint{x:f32,y:f32,z:f32}>> init {
+// CHECK: ^bb0(%{{.*}}: !fir.ref<!fir.type<_QTpoint{x:f32,y:f32,z:f32}>>):
+// CHECK:   %[[ALLOC:.*]] = fir.alloca !fir.type<_QTpoint{x:f32,y:f32,z:f32}>
+// CHECK:   %{{.*}}:2 = hlfir.declare %[[ALLOC]] {uniq_name = "derived"} : (!fir.ref<!fir.type<_QTpoint{x:f32,y:f32,z:f32}>>) -> (!fir.ref<!fir.type<_QTpoint{x:f32,y:f32,z:f32}>>, !fir.ref<!fir.type<_QTpoint{x:f32,y:f32,z:f32}>>)
+// CHECK:   acc.yield %{{.*}}#0 : !fir.ref<!fir.type<_QTpoint{x:f32,y:f32,z:f32}>>
+// CHECK: }
+// CHECK-NOT: destroy
+
+func.func @test_derived() {
+  %0 = fir.alloca !fir.type<_QTpoint{x:f32,y:f32,z:f32}> {test.var = "derived"}
+  %var = fir.alloca f32
+  %1:2 = hlfir.declare %var {uniq_name = "load_hlfir"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+  return
+}
+
+// -----
+
+// Test box type with heap scalar (needs destroy)
+// CHECK: acc.private.recipe @private_box_heap_scalar : !fir.ref<!fir.box<!fir.heap<f64>>> init {
+// CHECK: ^bb0(%{{.*}}: !fir.ref<!fir.box<!fir.heap<f64>>>):
+// CHECK:   %[[BOXALLOC:.*]] = fir.alloca !fir.box<!fir.heap<f64>>
+// CHECK:   %{{.*}}:2 = hlfir.declare %[[BOXALLOC]] {uniq_name = "box_heap_scalar"} : (!fir.ref<!fir.box<!fir.heap<f64>>>) -> (!fir.ref<!fir.box<!fir.heap<f64>>>, !fir.ref<!fir.box<!fir.heap<f64>>>)
+// CHECK:   %[[SCALAR:.*]] = fir.allocmem f64
+// CHECK:   %[[EMBOX:.*]] = fir.embox %[[SCALAR]] : (!fir.heap<f64>) -> !fir.box<!fir.heap<f64>>
+// CHECK:   fir.store %[[EMBOX]] to %{{.*}}#0 : !fir.ref<!fir.box<!fir.heap<f64>>>
+// CHECK:   acc.yield %{{.*}}#0 : !fir.ref<!fir.box<!fir.heap<f64>>>
+// CHECK: } destroy {
+// CHECK: ^bb0(%{{.*}}: !fir.ref<!fir.box<!fir.heap<f64>>>, %{{.*}}: !fir.ref<!fir.box<!fir.heap<f64>>>):
+// CHECK:   acc.terminator
+// CHECK: }
+
+func.func @test_box_heap_scalar() {
+  %0 = fir.alloca !fir.box<!fir.heap<f64>> {test.var = "box_heap_scalar"}
+  %var = fir.alloca f32
+  %1:2 = hlfir.declare %var {uniq_name = "load_hlfir"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+  return
+}
+
+// -----
+
+// Test box type with pointer scalar (needs destroy)
+// CHECK: acc.private.recipe @private_box_ptr_scalar : !fir.ref<!fir.box<!fir.ptr<i32>>> init {
+// CHECK: ^bb0(%{{.*}}: !fir.ref<!fir.box<!fir.ptr<i32>>>):
+// CHECK:   %[[BOXALLOC:.*]] = fir.alloca !fir.box<!fir.ptr<i32>>
+// CHECK:   %{{.*}}:2 = hlfir.declare %[[BOXALLOC]] {uniq_name = "box_ptr_scalar"} : (!fir.ref<!fir.box<!fir.ptr<i32>>>) -> (!fir.ref<!fir.box<!fir.ptr<i32>>>, !fir.ref<!fir.box<!fir.ptr<i32>>>)
+// CHECK:   %[[SCALAR:.*]] = fir.allocmem i32
+// CHECK:   %[[EMBOX:.*]] = fir.embox %[[SCALAR]] : (!fir.heap<i32>) -> !fir.box<!fir.ptr<i32>>
+// CHECK:   fir.store %[[EMBOX]] to %{{.*}}#0 : !fir.ref<!fir.box<!fir.ptr<i32>>>
+// CHECK:   acc.yield %{{.*}}#0 : !fir.ref<!fir.box<!fir.ptr<i32>>>
+// CHECK: } destroy {
+// CHECK: ^bb0(%{{.*}}: !fir.ref<!fir.box<!fir.ptr<i32>>>, %{{.*}}: !fir.ref<!fir.box<!fir.ptr<i32>>>):
+// CHECK:   acc.terminator
+// CHECK: }
+
+func.func @test_box_ptr_scalar() {
+  %0 = fir.alloca !fir.box<!fir.ptr<i32>> {test.var = "box_ptr_scalar"}
+  %var = fir.alloca f32
+  %1:2 = hlfir.declare %var {uniq_name = "load_hlfir"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+  return
+}
+
+// -----
+
+// Test box type with 1D heap array (needs destroy)
+// CHECK: acc.private.recipe @private_box_heap_array_1d : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> init {
+// CHECK: ^bb0(%{{.*}}: !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>):
+// CHECK:   %[[BOXALLOC:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>>
+// CHECK:   %{{.*}}:2 = hlfir.declare %[[BOXALLOC]] {uniq_name = "box_heap_array_1d"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
+// CHECK:   acc.yield %{{.*}}#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+// CHECK: } destroy {
+// CHECK: ^bb0(%{{.*}}: !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, %{{.*}}: !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>):
+// CHECK:   acc.terminator
+// CHECK: }
+
+func.func @test_box_heap_array_1d() {
+  %0 = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {test.var = "box_heap_array_1d"}
+  %var = fir.alloca f32
+  %1:2 = hlfir.declare %var {uniq_name = "load_hlfir"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+  return
+}
+
+// -----
+
+// Test box type with 2D heap array (needs destroy)
+// CHECK: acc.private.recipe @private_box_heap_array_2d : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi64>>>> init {
+// CHECK: ^bb0(%{{.*}}: !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi64>>>>):
+// CHECK:   %[[BOXALLOC:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?x?xi64>>>
+// CHECK:   %{{.*}}:2 = hlfir.declare %[[BOXALLOC]] {uniq_name = "box_heap_array_2d"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi64>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi64>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi64>>>>)
+// CHECK:   acc.yield %{{.*}}#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi64>>>>
+// CHECK: } destroy {
+// CHECK: ^bb0(%{{.*}}: !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi64>>>>, %{{.*}}: !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xi64>>>>):
+// CHECK:   acc.terminator
+// CHECK: }
+
+func.func @test_box_heap_array_2d() {
+  %0 = fir.alloca !fir.box<!fir.heap<!fir.array<?x?xi64>>> {test.var = "box_heap_array_2d"}
+  %var = fir.alloca f32
+  %1:2 = hlfir.declare %var {uniq_name = "load_hlfir"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+  return
+}
+
+// -----
+
+// Test box type with pointer array (needs destroy)
+// CHECK: acc.private.recipe @private_box_ptr_array : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>> init {
+// CHECK: ^bb0(%{{.*}}: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>):
+// CHECK:   %[[BOXALLOC:.*]] = fir.alloca !fir.box<!fir.ptr<!fir.array<?xf32>>>
+// CHECK:   %{{.*}}:2 = hlfir.declare %[[BOXALLOC]] {uniq_name = "box_ptr_array"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>)
+// CHECK:   acc.yield %{{.*}}#0 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>
+// CHECK: } destroy {
+// CHECK: ^bb0(%{{.*}}: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, %{{.*}}: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>):
+// CHECK:   acc.terminator
+// CHECK: }
+
+func.func @test_box_ptr_array() {
+  %0 = fir.alloca !fir.box<!fir.ptr<!fir.array<?xf32>>> {test.var = "box_ptr_array"}
+  %var = fir.alloca f32
+  %1:2 = hlfir.declare %var {uniq_name = "load_hlfir"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+  return
+}
diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
index 90cbbd86dc002..a4931c30cf119 100644
--- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
+++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
@@ -1030,12 +1030,12 @@ struct RemoveConstantIfConditionWithRegion : public OpRewritePattern<OpTy> {
 //===----------------------------------------------------------------------===//
 
 /// Create and populate an init region for privatization recipes.
-/// Returns the init block on success, or nullptr on failure.
+/// Returns success if the region is populated, failure otherwise.
 /// Sets needsFree to indicate if the allocated memory requires deallocation.
-static std::unique_ptr<Block> createInitRegion(OpBuilder &builder, Location loc,
-                                               Type varType, StringRef varName,
-                                               ValueRange bounds,
-                                               bool &needsFree) {
+static LogicalResult createInitRegion(OpBuilder &builder, Location loc,
+                                      Region &initRegion, Type varType,
+                                      StringRef varName, ValueRange bounds,
+                                      bool &needsFree) {
   // Create init block with arguments: original value + bounds
   SmallVector<Type> argTypes{varType};
   SmallVector<Location> argLocs{loc};
@@ -1044,9 +1044,9 @@ static std::unique_ptr<Block> createInitRegion(OpBuilder &builder, Location loc,
     argLocs.push_back(loc);
   }
 
-  auto initBlock = std::make_unique<Block>();
+  Block *initBlock = builder.createBlock(&initRegion);
   initBlock->addArguments(argTypes, argLocs);
-  builder.setInsertionPointToStart(initBlock.get());
+  builder.setInsertionPointToStart(initBlock);
 
   Value privatizedValue;
 
@@ -1060,7 +1060,7 @@ static std::unique_ptr<Block> createInitRegion(OpBuilder &builder, Location loc,
     privatizedValue = mappableTy.generatePrivateInit(
         builder, loc, typedVar, varName, bounds, {}, needsFree);
     if (!privatizedValue)
-      return nullptr;
+      return failure();
   } else {
     assert(isa<PointerLikeType>(varType) && "Expected PointerLikeType");
     auto pointerLikeTy = cast<PointerLikeType>(varType);
@@ -1068,21 +1068,21 @@ static std::unique_ptr<Block> createInitRegion(OpBuilder &builder, Location loc,
     privatizedValue = pointerLikeTy.genAllocate(builder, loc, varName, varType,
                                                 blockArgVar, needsFree);
     if (!privatizedValue)
-      return nullptr;
+      return failure();
   }
 
   // Add yield operation to init block
   acc::YieldOp::create(builder, loc, privatizedValue);
 
-  return initBlock;
+  return success();
 }
 
 /// Create and populate a copy region for firstprivate recipes.
-/// Returns the copy block on success, or nullptr on failure.
+/// Returns success if the region is populated, failure otherwise.
 /// TODO: Handle MappableType - it does not yet have a copy API.
-static std::unique_ptr<Block> createCopyRegion(OpBuilder &builder, Location loc,
-                                               Type varType,
-                                               ValueRange bounds) {
+static LogicalResult createCopyRegion(OpBuilder &builder, Location loc,
+                                      Region &copyRegion, Type varType,
+                                      ValueRange bounds) {
   // Create copy block with arguments: original value + privatized value +
   // bounds
   SmallVector<Type> copyArgTypes{varType, varType};
@@ -1092,16 +1092,16 @@ static std::unique_ptr<Block> createCopyRegion(OpBuilder &builder, Location loc,
     copyArgLocs.push_back(loc);
   }
 
-  auto copyBlock = std::make_unique<Block>();
+  Block *copyBlock = builder.createBlock(&copyRegion);
   copyBlock->addArguments(copyArgTypes, copyArgLocs);
-  builder.setInsertionPointToStart(copyBlock.get());
+  builder.setInsertionPointToStart(copyBlock);
 
   bool isMappable = isa<MappableType>(varType);
   bool isPointerLike = isa<PointerLikeType>(varType);
   // TODO: Handle MappableType - it does not yet have a copy API.
   // Otherwise, for now just fallback to pointer-like behavior.
   if (isMappable && !isPointerLike)
-    return nullptr;
+    return failure();
 
   // Generate copy region body based on variable type
   if (isPointerLike) {
@@ -1113,21 +1113,20 @@ static std::unique_ptr<Block> createCopyRegion(OpBuilder &builder, Location loc,
     if (!pointerLikeTy.genCopy(
             builder, loc, cast<TypedValue<PointerLikeType>>(privatizedArg),
             cast<TypedValue<PointerLikeType>>(originalArg), varType))
-      return nullptr;
+      return failure();
   }
 
   // Add terminator to copy block
   acc::TerminatorOp::create(builder, loc);
 
-  return copyBlock;
+  return success();
 }
 
 /// Create and populate a destroy region for privatization recipes.
-/// Returns the destroy block on success, or nullptr if not needed.
-static std::unique_ptr<Block> createDestroyRegion(OpBuilder &builder,
-                                                  Location loc, Type varType,
-                                                  Value allocRes,
-                                                  ValueRange bounds) {
+/// Returns success if the region is populated, failure otherwise.
+static LogicalResult createDestroyRegion(OpBuilder &builder, Location loc,
+                                         Region &destroyRegion, Type varType,
+                                         Value allocRes, ValueRange bounds) {
   // Create destroy block with arguments: original value + privatized value +
   // bounds
   SmallVector<Type> destroyArgTypes{varType, varType};
@@ -1137,28 +1136,25 @@ static std::unique_ptr<Block> createDestroyRegion(OpBuilder &builder,
     destroyArgLocs.push_back(loc);
   }
 
-  auto destroyBlock = std::make_unique<Block>();
+  Block *destroyBlock = builder.createBlock(&destroyRegion);
   destroyBlock->addArguments(destroyArgTypes, destroyArgLocs);
-  builder.setInsertionPointToStart(destroyBlock.get());
+  builder.setInsertionPointToStart(destroyBlock);
 
-  bool isMappable = isa<MappableType>(varType);
-  bool isPointerLike = isa<PointerLikeType>(varType);
-  // TODO: Handle MappableType - it does not yet have a deallocation API.
-  // Otherwise, for now just fallback to pointer-like behavior.
-  if (isMappable && !isPointerLike)
-    return nullptr;
-
-  assert(isa<PointerLikeType>(varType) && "Expected PointerLikeType");
-  auto pointerLikeTy = cast<PointerLikeType>(varType);
-  auto privatizedArg =
+  auto varToFree =
       cast<TypedValue<PointerLikeType>>(destroyBlock->getArgument(1));
-  // Pass allocRes to help determine the allocation type
-  if (!pointerLikeTy.genFree(builder, loc, privatizedArg, allocRes, varType))
-    return nullptr;
+  if (isa<MappableType>(varType)) {
+    auto mappableTy = cast<MappableType>(varType);
+    if (!mappableTy.generatePrivateDestroy(builder, loc, varToFree))
+      return failure();
+  } else {
+    assert(isa<PointerLikeType>(varType) && "Expected PointerLikeType");
+    auto pointerLikeTy = cast<PointerLikeType>(varType);
+    if (!pointerLikeTy.genFree(builder, loc, varToFree, allocRes, varType))
+      return failure();
+  }
 
   acc::TerminatorOp::create(builder, loc);
-
-  return destroyBlock;
+  return success();
 }
 
 } // namespace
@@ -1220,40 +1216,33 @@ PrivateRecipeOp::createAndPopulate(OpBuilder &builder, Location loc,
   if (!isMappable && !isPointerLike)
     return std::nullopt;
 
-  // Create init and destroy blocks using shared helpers
   OpBuilder::InsertionGuard guard(builder);
 
-  // Save the original insertion point for creating the recipe operation later
-  auto originalInsertionPoint = builder.saveInsertionPoint();
+  // Create the recipe operation first so regions have proper parent context
+  auto recipe = PrivateRecipeOp::create(builder, loc, recipeName, varType);
 
+  // Populate the init region
   bool needsFree = false;
-  auto initBlock =
-      createInitRegion(builder, loc, varType, varName, bounds, needsFree);
-  if (!initBlock)
+  if (failed(createInitRegion(builder, loc, recipe.getInitRegion(), varType,
+                               varName, bounds, needsFree))) {
+    recipe.erase();
     return std::nullopt;
+  }
 
   // Only create destroy region if the allocation needs deallocation
-  std::unique_ptr<Block> destroyBlock;
   if (needsFree) {
     // Extract the allocated value from the init block's yield operation
-    auto yieldOp = cast<acc::YieldOp>(initBlock->getTerminator());
+    auto yieldOp = cast<acc::YieldOp>(
+        recipe.getInitRegion().front().getTerminator());
     Value allocRes = yieldOp.getOperand(0);
 
-    destroyBlock = createDestroyRegion(builder, loc, varType, allocRes, bounds);
-    if (!destroyBlock)
+    if (failed(createDestroyRegion(builder, loc, recipe.getDestroyRegion(),
+                                    varType, allocRes, bounds))) {
+      recipe.erase();
       return std::nullopt;
+    }
   }
 
-  // Now create the recipe operation at the original insertion point and attach
-  // the blocks
-  builder.restoreInsertionPoint(originalInsertionPoint);
-  auto recipe = PrivateRecipeOp::create(builder, loc, recipeName, varType);
-
-  // Move the blocks into the recipe's regions
-  recipe.getInitRegion().push_back(initBlock.release());
-  if (destroyBlock)
-    recipe.getDestroyRegion().push_back(destroyBlock.release());
-
   return recipe;
 }
 
@@ -1299,45 +1288,40 @@ FirstprivateRecipeOp::createAndPopulate(OpBuilder &builder, Location loc,
   if (!isMappable && !isPointerLike)
     return std::nullopt;
 
-  // Create init, copy, and destroy blocks using shared helpers
   OpBuilder::InsertionGuard guard(builder);
 
-  // Save the original insertion point for creating the recipe operation later
-  auto originalInsertionPoint = builder.saveInsertionPoint();
+  // Create the recipe operation first so regions have proper parent context
+  auto recipe = FirstprivateRecipeOp::create(builder, loc, recipeName, varType);
 
+  // Populate the init region
   bool needsFree = false;
-  auto initBlock =
-      createInitRegion(builder, loc, varType, varName, bounds, needsFree);
-  if (!initBlock)
+  if (failed(createInitRegion(builder, loc, recipe.getInitRegion(), varType,
+                               varName, bounds, needsFree))) {
+    recipe.erase();
     return std::nullopt;
+  }
 
-  auto copyBlock = createCopyRegion(builder, loc, varType, bounds);
-  if (!copyBlock)
+  // Populate the copy region
+  if (failed(createCopyRegion(builder, loc, recipe.getCopyRegion(), varType,
+                               bounds))) {
+    recipe.erase();
     return std::nullopt;
+  }
 
   // Only create destroy region if the allocation needs deallocation
-  std::unique_ptr<Block> destroyBlock;
   if (needsFree) {
     // Extract the allocated value from the init block's yield operation
-    auto yieldOp = cast<acc::YieldOp>(initBlock->getTerminator());
+    auto yieldOp = cast<acc::YieldOp>(
+        recipe.getInitRegion().front().getTerminator());
     Value allocRes = yieldOp.getOperand(0);
 
-    destroyBlock = createDestroyRegion(builder, loc, varType, allocRes, bounds);
-    if (!destroyBlock)
+    if (failed(createDestroyRegion(builder, loc, recipe.getDestroyRegion(),
+                                    varType, allocRes, bounds))) {
+      recipe.erase();
       return std::nullopt;
+    }
   }
 
-  // Now create the recipe operation at the original insertion point and attach
-  // the blocks
-  builder.restoreInsertionPoint(originalInsertionPoint);
-  auto recipe = FirstprivateRecipeOp::create(builder, loc, recipeName, varType);
-
-  // Move the blocks into the recipe's regions
-  recipe.getInitRegion().push_back(initBlock.release());
-  recipe.getCopyRegion().push_back(copyBlock.release());
-  if (destroyBlock)
-    recipe.getDestroyRegion().push_back(destroyBlock.release());
-
   return recipe;
 }
 

>From 3e067420c56602a1a991784f0f7d1c9b3ffa0c14 Mon Sep 17 00:00:00 2001
From: Razvan Lupusoru <rlupusoru at nvidia.com>
Date: Fri, 17 Oct 2025 09:48:22 -0700
Subject: [PATCH 2/2] Fix format

---
 mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
index a4931c30cf119..dcfe2c742407e 100644
--- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
+++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
@@ -1224,7 +1224,7 @@ PrivateRecipeOp::createAndPopulate(OpBuilder &builder, Location loc,
   // Populate the init region
   bool needsFree = false;
   if (failed(createInitRegion(builder, loc, recipe.getInitRegion(), varType,
-                               varName, bounds, needsFree))) {
+                              varName, bounds, needsFree))) {
     recipe.erase();
     return std::nullopt;
   }
@@ -1232,12 +1232,12 @@ PrivateRecipeOp::createAndPopulate(OpBuilder &builder, Location loc,
   // Only create destroy region if the allocation needs deallocation
   if (needsFree) {
     // Extract the allocated value from the init block's yield operation
-    auto yieldOp = cast<acc::YieldOp>(
-        recipe.getInitRegion().front().getTerminator());
+    auto yieldOp =
+        cast<acc::YieldOp>(recipe.getInitRegion().front().getTerminator());
     Value allocRes = yieldOp.getOperand(0);
 
     if (failed(createDestroyRegion(builder, loc, recipe.getDestroyRegion(),
-                                    varType, allocRes, bounds))) {
+                                   varType, allocRes, bounds))) {
       recipe.erase();
       return std::nullopt;
     }
@@ -1296,14 +1296,14 @@ FirstprivateRecipeOp::createAndPopulate(OpBuilder &builder, Location loc,
   // Populate the init region
   bool needsFree = false;
   if (failed(createInitRegion(builder, loc, recipe.getInitRegion(), varType,
-                               varName, bounds, needsFree))) {
+                              varName, bounds, needsFree))) {
     recipe.erase();
     return std::nullopt;
   }
 
   // Populate the copy region
   if (failed(createCopyRegion(builder, loc, recipe.getCopyRegion(), varType,
-                               bounds))) {
+                              bounds))) {
     recipe.erase();
     return std::nullopt;
   }
@@ -1311,12 +1311,12 @@ FirstprivateRecipeOp::createAndPopulate(OpBuilder &builder, Location loc,
   // Only create destroy region if the allocation needs deallocation
   if (needsFree) {
     // Extract the allocated value from the init block's yield operation
-    auto yieldOp = cast<acc::YieldOp>(
-        recipe.getInitRegion().front().getTerminator());
+    auto yieldOp =
+        cast<acc::YieldOp>(recipe.getInitRegion().front().getTerminator());
     Value allocRes = yieldOp.getOperand(0);
 
     if (failed(createDestroyRegion(builder, loc, recipe.getDestroyRegion(),
-                                    varType, allocRes, bounds))) {
+                                   varType, allocRes, bounds))) {
       recipe.erase();
       return std::nullopt;
     }



More information about the flang-commits mailing list