[Mlir-commits] [flang] [mlir] [acc] Support for Optional arguments in firstprivate recipes (PR #190079)

llvmlistbot at llvm.org llvmlistbot at llvm.org
Thu Apr 2 08:37:52 PDT 2026


https://github.com/nvptm updated https://github.com/llvm/llvm-project/pull/190079

>From 302f24f5336812d9190a22cc8e78ce657aabc80a Mon Sep 17 00:00:00 2001
From: nvpm <pmathew at nvidia.com>
Date: Wed, 1 Apr 2026 15:41:45 -0700
Subject: [PATCH 1/4] Support for Optional arguments in firstprivate recipes

---
 .../Support/FIROpenACCTypeInterfaces.cpp      | 187 +++++++++++-------
 .../OpenACC/recipe-populate-firstprivate.mlir |  14 +-
 .../OpenACC/acc-implicit-firstprivate.fir     |  18 +-
 mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp       |  37 ++--
 4 files changed, 154 insertions(+), 102 deletions(-)

diff --git a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp
index 25af35513d18d..517dc8fa50898 100644
--- a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp
+++ b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp
@@ -741,14 +741,31 @@ template <typename Ty>
 mlir::Value OpenACCMappableModel<Ty>::generatePrivateInit(
     mlir::Type type, mlir::OpBuilder &mlirBuilder, mlir::Location loc,
     mlir::TypedValue<mlir::acc::MappableType> var, llvm::StringRef varName,
-    mlir::ValueRange bounds, mlir::Value initVal, mlir::acc::VariableInfoAttr,
-    bool &needsDestroy) const {
+    mlir::ValueRange bounds, mlir::Value initVal,
+    mlir::acc::VariableInfoAttr varInfo, bool &needsDestroy) const {
   mlir::ModuleOp mod = mlirBuilder.getInsertionBlock()
                            ->getParent()
                            ->getParentOfType<mlir::ModuleOp>();
   assert(mod && "failed to retrieve ModuleOp");
   fir::FirOpBuilder builder(mlirBuilder, mod);
 
+  // When variable is optional: use fir.is_present to check. When non-optional,
+  // skip the conditional to avoid unnecessary branches.
+  std::optional<fir::IfOp> optIfOp;
+  bool isOptional = false;
+  if (auto fortranVarInfo =
+          mlir::dyn_cast_if_present<fir::OpenACCFortranVariableInfoAttr>(
+              varInfo)) {
+    isOptional = fortranVarInfo.getMayBeOptional();
+    if (isOptional) {
+      mlir::Value cond =
+          fir::IsPresentOp::create(builder, loc, builder.getI1Type(), var);
+      optIfOp = fir::IfOp::create(builder, loc, mlir::TypeRange{type}, cond,
+                                  /*withElseRegion=*/true);
+      builder.setInsertionPointToStart(&optIfOp->getThenRegion().front());
+    }
+  }
+
   hlfir::Entity inputVar = hlfir::Entity{var};
   if (inputVar.isPolymorphic())
     TODO(loc, "OpenACC: polymorphic variable privatization");
@@ -844,80 +861,91 @@ mlir::Value OpenACCMappableModel<Ty>::generatePrivateInit(
          "dynamic allocation of the whole base in case of section is not "
          "expected");
 
-  if (inputVar.getType() == alloc.getType() && !allocateSection)
-    return alloc;
-
-  // Step4: reconstruct the input variable from the privatized part:
-  // - get a mock base address if the privatized part is a section (so that any
-  // addressing of the input variable can be replaced by the same addressing of
-  // the privatized part even though the allocated part for the private does not
-  // cover all the input variable storage. This is relying on OpenACC
-  // constraint that any addressing of such privatized variable inside the
-  // construct region can only address the variable inside the privatized
-  // section).
-  // - reconstruct a descriptor with the same bounds and type parameters as the
-  // input if needed.
-  // - store this new descriptor in a temporary allocation if the input variable
-  // is a POINTER/ALLOCATABLE.
-  llvm::SmallVector<mlir::Value> inputVarLowerBounds, inputVarExtents;
-  if (dereferencedVar.isArray()) {
-    for (int dim = 0; dim < dereferencedVar.getRank(); ++dim) {
-      inputVarLowerBounds.push_back(
-          hlfir::genLBound(loc, builder, dereferencedVar, dim));
-      inputVarExtents.push_back(
-          hlfir::genExtent(loc, builder, dereferencedVar, dim));
+  mlir::Value retVal;
+  if (inputVar.getType() == alloc.getType() && !allocateSection) {
+    retVal = alloc;
+  } else {
+    // Step4: reconstruct the input variable from the privatized part:
+    // - get a mock base address if the privatized part is a section (so that
+    // any addressing of the input variable can be replaced by the same
+    // addressing of the privatized part even though the allocated part for the
+    // private does not cover all the input variable storage. This is relying on
+    // OpenACC constraint that any addressing of such privatized variable inside
+    // the construct region can only address the variable inside the privatized
+    // section).
+    // - reconstruct a descriptor with the same bounds and type parameters as
+    // the input if needed.
+    // - store this new descriptor in a temporary allocation if the input
+    // variable is a POINTER/ALLOCATABLE.
+    llvm::SmallVector<mlir::Value> inputVarLowerBounds, inputVarExtents;
+    if (dereferencedVar.isArray()) {
+      for (int dim = 0; dim < dereferencedVar.getRank(); ++dim) {
+        inputVarLowerBounds.push_back(
+            hlfir::genLBound(loc, builder, dereferencedVar, dim));
+        inputVarExtents.push_back(
+            hlfir::genExtent(loc, builder, dereferencedVar, dim));
+      }
     }
-  }
 
-  mlir::Value privateVarBaseAddr = alloc;
-  if (allocateSection) {
-    // To compute the mock base address without doing pointer arithmetic,
-    // compute: TYPE, TEMP(ZERO_BASED_SECTION_LB:) MOCK_BASE = TEMP(0)
-    // This addresses the section "backwards" (0 <= ZERO_BASED_SECTION_LB). This
-    // is currently OK, but care should be taken to avoid tripping bound checks
-    // if added in the future.
-    mlir::Type inputBaseAddrType =
-        dereferencedVar.getBoxType().getBaseAddressType();
-    mlir::Value tempBaseAddr =
-        builder.createConvert(loc, inputBaseAddrType, alloc);
-    mlir::Value zero =
-        builder.createIntegerConstant(loc, builder.getIndexType(), 0);
-    llvm::SmallVector<mlir::Value> lowerBounds;
-    llvm::SmallVector<mlir::Value> zeros;
-    for (unsigned i = 0; i < bounds.size(); i += 3) {
-      lowerBounds.push_back(bounds[i]);
-      zeros.push_back(zero);
+    mlir::Value privateVarBaseAddr = alloc;
+    if (allocateSection) {
+      // To compute the mock base address without doing pointer arithmetic,
+      // compute: TYPE, TEMP(ZERO_BASED_SECTION_LB:) MOCK_BASE = TEMP(0)
+      // This addresses the section "backwards" (0 <= ZERO_BASED_SECTION_LB).
+      // This is currently OK, but care should be taken to avoid tripping bound
+      // checks if added in the future.
+      mlir::Type inputBaseAddrType =
+          dereferencedVar.getBoxType().getBaseAddressType();
+      mlir::Value tempBaseAddr =
+          builder.createConvert(loc, inputBaseAddrType, alloc);
+      mlir::Value zero =
+          builder.createIntegerConstant(loc, builder.getIndexType(), 0);
+      llvm::SmallVector<mlir::Value> lowerBounds;
+      llvm::SmallVector<mlir::Value> zeros;
+      for (unsigned i = 0; i < bounds.size(); i += 3) {
+        lowerBounds.push_back(bounds[i]);
+        zeros.push_back(zero);
+      }
+      mlir::Value offsetShapeShift =
+          builder.genShape(loc, lowerBounds, inputVarExtents);
+      mlir::Type eleRefType =
+          builder.getRefType(privatizedVar.getFortranElementType());
+      mlir::Value mockBase = fir::ArrayCoorOp::create(
+          builder, loc, eleRefType, tempBaseAddr, offsetShapeShift,
+          /*slice=*/mlir::Value{}, /*indices=*/zeros,
+          /*typeParams=*/mlir::ValueRange{});
+      privateVarBaseAddr =
+          builder.createConvert(loc, inputBaseAddrType, mockBase);
     }
-    mlir::Value offsetShapeShift =
-        builder.genShape(loc, lowerBounds, inputVarExtents);
-    mlir::Type eleRefType =
-        builder.getRefType(privatizedVar.getFortranElementType());
-    mlir::Value mockBase = fir::ArrayCoorOp::create(
-        builder, loc, eleRefType, tempBaseAddr, offsetShapeShift,
-        /*slice=*/mlir::Value{}, /*indices=*/zeros,
-        /*typeParams=*/mlir::ValueRange{});
-    privateVarBaseAddr =
-        builder.createConvert(loc, inputBaseAddrType, mockBase);
-  }
 
-  mlir::Value retVal = privateVarBaseAddr;
-  if (inputVar.isBoxAddressOrValue()) {
-    // Recreate descriptor with same bounds as the input variable.
-    mlir::Value shape;
-    if (!inputVarExtents.empty())
-      shape = builder.genShape(loc, inputVarLowerBounds, inputVarExtents);
-    mlir::Value box = fir::EmboxOp::create(builder, loc, inputVar.getBoxType(),
-                                           privateVarBaseAddr, shape,
-                                           /*slice=*/mlir::Value{}, typeParams);
-    if (inputVar.isMutableBox()) {
-      mlir::Value boxAlloc =
-          fir::AllocaOp::create(builder, loc, inputVar.getBoxType());
-      fir::StoreOp::create(builder, loc, box, boxAlloc);
-      retVal = boxAlloc;
-    } else {
-      retVal = box;
+    retVal = privateVarBaseAddr;
+    if (inputVar.isBoxAddressOrValue()) {
+      // Recreate descriptor with same bounds as the input variable.
+      mlir::Value shape;
+      if (!inputVarExtents.empty())
+        shape = builder.genShape(loc, inputVarLowerBounds, inputVarExtents);
+      mlir::Value box = fir::EmboxOp::create(
+          builder, loc, inputVar.getBoxType(), privateVarBaseAddr, shape,
+          /*slice=*/mlir::Value{}, typeParams);
+      if (inputVar.isMutableBox()) {
+        mlir::Value boxAlloc =
+            fir::AllocaOp::create(builder, loc, inputVar.getBoxType());
+        fir::StoreOp::create(builder, loc, box, boxAlloc);
+        retVal = boxAlloc;
+      } else {
+        retVal = box;
+      }
     }
   }
+
+  if (isOptional) {
+    fir::ResultOp::create(builder, loc, retVal);
+    builder.setInsertionPointToStart(&optIfOp->getElseRegion().front());
+    mlir::Value absent = fir::AbsentOp::create(builder, loc, type);
+    fir::ResultOp::create(builder, loc, absent);
+    return optIfOp->getResult(0);
+  }
+
   return retVal;
 }
 
@@ -964,6 +992,25 @@ bool OpenACCMappableModel<Ty>::generateCopy(
   source = hlfir::derefPointersAndAllocatables(loc, builder, source);
   destination = hlfir::derefPointersAndAllocatables(loc, builder, destination);
 
+  // When optional: only copy when source is present (fir.is_present). When
+  // absent, destination is already null from init. When non-optional, copy
+  // directly without the conditional.
+  std::optional<fir::IfOp> optIfOp;
+  if (auto fortranVarInfo =
+          mlir::dyn_cast_if_present<fir::OpenACCFortranVariableInfoAttr>(
+              varInfo)) {
+    if (fortranVarInfo.getMayBeOptional()) {
+      // When variable is optional: use fir.is_present to check. When
+      // non-optional, skip the conditional to avoid unnecessary branches.
+      std::optional<fir::IfOp> optIfOp;
+      mlir::Value cond =
+          fir::IsPresentOp::create(builder, loc, builder.getI1Type(), src);
+      optIfOp = fir::IfOp::create(builder, loc, mlir::TypeRange{}, cond,
+                                  /*withElseRegion=*/false);
+      builder.setInsertionPointToStart(&optIfOp->getThenRegion().front());
+    }
+  }
+
   if (!bounds.empty())
     std::tie(source, destination) =
         genArraySectionsInRecipe(builder, loc, bounds, source, destination);
diff --git a/flang/test/Fir/OpenACC/recipe-populate-firstprivate.mlir b/flang/test/Fir/OpenACC/recipe-populate-firstprivate.mlir
index a033734d2ff0e..3eb850d2f468e 100644
--- a/flang/test/Fir/OpenACC/recipe-populate-firstprivate.mlir
+++ b/flang/test/Fir/OpenACC/recipe-populate-firstprivate.mlir
@@ -12,7 +12,7 @@
 // CHECK: } copy {
 // CHECK: ^bb0(%[[SRC:.*]]: !fir.ref<f32>, %[[DST:.*]]: !fir.ref<f32>):
 // CHECK:   %[[LOAD:.*]] = fir.load %[[SRC]] : !fir.ref<f32>
-// CHECK:   fir.store %[[LOAD]] to %[[DST]] : !fir.ref<f32>
+// CHECK:   hlfir.assign %[[LOAD]] to %[[DST]] temporary_lhs : f32, !fir.ref<f32>
 // CHECK:   acc.terminator
 // CHECK: }
 // CHECK-NOT: destroy
@@ -34,7 +34,7 @@ func.func @test_scalar() {
 // CHECK: } copy {
 // CHECK: ^bb0(%[[SRC:.*]]: !fir.ref<i32>, %[[DST:.*]]: !fir.ref<i32>):
 // CHECK:   %[[LOAD:.*]] = fir.load %[[SRC]] : !fir.ref<i32>
-// CHECK:   fir.store %[[LOAD]] to %[[DST]] : !fir.ref<i32>
+// CHECK:   hlfir.assign %[[LOAD]] to %[[DST]] temporary_lhs : i32, !fir.ref<i32>
 // CHECK:   acc.terminator
 // CHECK: }
 // CHECK-NOT: destroy
@@ -56,7 +56,7 @@ func.func @test_int() {
 // CHECK: } copy {
 // CHECK: ^bb0(%[[SRC:.*]]: !fir.ref<!fir.logical<4>>, %[[DST:.*]]: !fir.ref<!fir.logical<4>>):
 // CHECK:   %[[LOAD:.*]] = fir.load %[[SRC]] : !fir.ref<!fir.logical<4>>
-// CHECK:   fir.store %[[LOAD]] to %[[DST]] : !fir.ref<!fir.logical<4>>
+// CHECK:   hlfir.assign %[[LOAD]] to %[[DST]] temporary_lhs : !fir.logical<4>, !fir.ref<!fir.logical<4>>
 // CHECK:   acc.terminator
 // CHECK: }
 // CHECK-NOT: destroy
@@ -78,7 +78,7 @@ func.func @test_logical() {
 // CHECK: } copy {
 // CHECK: ^bb0(%[[SRC:.*]]: !fir.ref<complex<f32>>, %[[DST:.*]]: !fir.ref<complex<f32>>):
 // CHECK:   %[[LOAD:.*]] = fir.load %[[SRC]] : !fir.ref<complex<f32>>
-// CHECK:   fir.store %[[LOAD]] to %[[DST]] : !fir.ref<complex<f32>>
+// CHECK:   hlfir.assign %[[LOAD]] to %[[DST]] temporary_lhs : complex<f32>, !fir.ref<complex<f32>>
 // CHECK:   acc.terminator
 // CHECK: }
 // CHECK-NOT: destroy
@@ -99,7 +99,7 @@ func.func @test_complex() {
 // CHECK:   acc.yield %[[ALLOC]] : !fir.ref<!fir.array<100xf32>>
 // CHECK: } copy {
 // CHECK: ^bb0(%[[SRC:.*]]: !fir.ref<!fir.array<100xf32>>, %[[DST:.*]]: !fir.ref<!fir.array<100xf32>>):
-// CHECK:   hlfir.assign %[[SRC]] to %[[DST]] : !fir.ref<!fir.array<100xf32>>, !fir.ref<!fir.array<100xf32>>
+// CHECK:   hlfir.assign %[[SRC]] to %[[DST]] temporary_lhs : !fir.ref<!fir.array<100xf32>>, !fir.ref<!fir.array<100xf32>>
 // CHECK:   acc.terminator
 // CHECK: }
 // CHECK-NOT: destroy
@@ -120,7 +120,7 @@ func.func @test_array_1d() {
 // CHECK:   acc.yield %[[ALLOC]] : !fir.ref<!fir.array<10x20xi32>>
 // CHECK: } copy {
 // CHECK: ^bb0(%[[SRC:.*]]: !fir.ref<!fir.array<10x20xi32>>, %[[DST:.*]]: !fir.ref<!fir.array<10x20xi32>>):
-// CHECK:   hlfir.assign %[[SRC]] to %[[DST]] : !fir.ref<!fir.array<10x20xi32>>, !fir.ref<!fir.array<10x20xi32>>
+// CHECK:   hlfir.assign %[[SRC]] to %[[DST]] temporary_lhs : !fir.ref<!fir.array<10x20xi32>>, !fir.ref<!fir.array<10x20xi32>>
 // CHECK:   acc.terminator
 // CHECK: }
 // CHECK-NOT: destroy
@@ -141,7 +141,7 @@ func.func @test_array_2d() {
 // CHECK:   acc.yield %[[ALLOC]] : !fir.ref<!fir.type<_QTpoint{x:f32,y:f32,z:f32}>>
 // CHECK: } copy {
 // CHECK: ^bb0(%[[SRC:.*]]: !fir.ref<!fir.type<_QTpoint{x:f32,y:f32,z:f32}>>, %[[DST:.*]]: !fir.ref<!fir.type<_QTpoint{x:f32,y:f32,z:f32}>>):
-// CHECK:   hlfir.assign %[[SRC]] to %[[DST]] : !fir.ref<!fir.type<_QTpoint{x:f32,y:f32,z:f32}>>, !fir.ref<!fir.type<_QTpoint{x:f32,y:f32,z:f32}>>
+// CHECK:   hlfir.assign %[[SRC]] to %[[DST]] temporary_lhs : !fir.ref<!fir.type<_QTpoint{x:f32,y:f32,z:f32}>>, !fir.ref<!fir.type<_QTpoint{x:f32,y:f32,z:f32}>>
 // CHECK:   acc.terminator
 // CHECK: }
 // CHECK-NOT: destroy
diff --git a/flang/test/Transforms/OpenACC/acc-implicit-firstprivate.fir b/flang/test/Transforms/OpenACC/acc-implicit-firstprivate.fir
index d1033d1b580ff..48f3ac2e7fb8f 100644
--- a/flang/test/Transforms/OpenACC/acc-implicit-firstprivate.fir
+++ b/flang/test/Transforms/OpenACC/acc-implicit-firstprivate.fir
@@ -12,7 +12,7 @@
 // CHECK:       } copy {
 // CHECK:       ^bb0(%[[SRC:.*]]: !fir.ref<i32>, %[[DST:.*]]: !fir.ref<i32>):
 // CHECK:         %[[LOADED:.*]] = fir.load %[[SRC]] : !fir.ref<i32>
-// CHECK:         fir.store %[[LOADED]] to %[[DST]] : !fir.ref<i32>
+// CHECK:         hlfir.assign %[[LOADED]] to %[[DST]] temporary_lhs : i32, !fir.ref<i32>
 // CHECK:         acc.terminator
 // CHECK:       }
 
@@ -38,7 +38,7 @@ func.func @test_i32_scalar_in_parallel() {
 // CHECK:       } copy {
 // CHECK:       ^bb0(%[[SRC:.*]]: !fir.ref<i64>, %[[DST:.*]]: !fir.ref<i64>):
 // CHECK:         %[[LOADED:.*]] = fir.load %[[SRC]] : !fir.ref<i64>
-// CHECK:         fir.store %[[LOADED]] to %[[DST]] : !fir.ref<i64>
+// CHECK:         hlfir.assign %[[LOADED]] to %[[DST]] temporary_lhs : i64, !fir.ref<i64>
 // CHECK:         acc.terminator
 // CHECK:       }
 
@@ -64,7 +64,7 @@ func.func @test_i64_scalar_in_parallel() {
 // CHECK:       } copy {
 // CHECK:       ^bb0(%[[SRC:.*]]: !fir.ref<f32>, %[[DST:.*]]: !fir.ref<f32>):
 // CHECK:         %[[LOADED:.*]] = fir.load %[[SRC]] : !fir.ref<f32>
-// CHECK:         fir.store %[[LOADED]] to %[[DST]] : !fir.ref<f32>
+// CHECK:         hlfir.assign %[[LOADED]] to %[[DST]] temporary_lhs : f32, !fir.ref<f32>
 // CHECK:         acc.terminator
 // CHECK:       }
 
@@ -90,7 +90,7 @@ func.func @test_f32_scalar_in_parallel() {
 // CHECK:       } copy {
 // CHECK:       ^bb0(%[[SRC:.*]]: !fir.ref<f64>, %[[DST:.*]]: !fir.ref<f64>):
 // CHECK:         %[[LOADED:.*]] = fir.load %[[SRC]] : !fir.ref<f64>
-// CHECK:         fir.store %[[LOADED]] to %[[DST]] : !fir.ref<f64>
+// CHECK:         hlfir.assign %[[LOADED]] to %[[DST]] temporary_lhs : f64, !fir.ref<f64>
 // CHECK:         acc.terminator
 // CHECK:       }
 
@@ -116,7 +116,7 @@ func.func @test_f64_scalar_in_parallel() {
 // CHECK:       } copy {
 // CHECK:       ^bb0(%[[SRC:.*]]: !fir.ref<!fir.logical<4>>, %[[DST:.*]]: !fir.ref<!fir.logical<4>>):
 // CHECK:         %[[LOADED:.*]] = fir.load %[[SRC]] : !fir.ref<!fir.logical<4>>
-// CHECK:         fir.store %[[LOADED]] to %[[DST]] : !fir.ref<!fir.logical<4>>
+// CHECK:         hlfir.assign %[[LOADED]] to %[[DST]] temporary_lhs : !fir.logical<4>, !fir.ref<!fir.logical<4>>
 // CHECK:         acc.terminator
 // CHECK:       }
 
@@ -142,7 +142,7 @@ func.func @test_logical_scalar_in_parallel() {
 // CHECK:       } copy {
 // CHECK:       ^bb0(%[[SRC:.*]]: !fir.ref<complex<f32>>, %[[DST:.*]]: !fir.ref<complex<f32>>):
 // CHECK:         %[[LOADED:.*]] = fir.load %[[SRC]] : !fir.ref<complex<f32>>
-// CHECK:         fir.store %[[LOADED]] to %[[DST]] : !fir.ref<complex<f32>>
+// CHECK:         hlfir.assign %[[LOADED]] to %[[DST]] temporary_lhs : complex<f32>, !fir.ref<complex<f32>>
 // CHECK:         acc.terminator
 // CHECK:       }
 
@@ -168,7 +168,7 @@ func.func @test_complex_scalar_in_parallel() {
 // CHECK:       } copy {
 // CHECK:       ^bb0(%[[SRC:.*]]: !fir.ref<complex<f64>>, %[[DST:.*]]: !fir.ref<complex<f64>>):
 // CHECK:         %[[LOADED:.*]] = fir.load %[[SRC]] : !fir.ref<complex<f64>>
-// CHECK:         fir.store %[[LOADED]] to %[[DST]] : !fir.ref<complex<f64>>
+// CHECK:         hlfir.assign %[[LOADED]] to %[[DST]] temporary_lhs : complex<f64>, !fir.ref<complex<f64>>
 // CHECK:         acc.terminator
 // CHECK:       }
 
@@ -230,7 +230,7 @@ func.func @test_f64_scalar_in_serial() {
 // CHECK:       } copy {
 // CHECK:       ^bb0(%[[SRC:.*]]: !fir.ref<i8>, %[[DST:.*]]: !fir.ref<i8>):
 // CHECK:         %[[LOADED:.*]] = fir.load %[[SRC]] : !fir.ref<i8>
-// CHECK:         fir.store %[[LOADED]] to %[[DST]] : !fir.ref<i8>
+// CHECK:         hlfir.assign %[[LOADED]] to %[[DST]] temporary_lhs : i8, !fir.ref<i8>
 // CHECK:         acc.terminator
 // CHECK:       }
 
@@ -256,7 +256,7 @@ func.func @test_i8_scalar_in_parallel() {
 // CHECK:       } copy {
 // CHECK:       ^bb0(%[[SRC:.*]]: !fir.ref<i16>, %[[DST:.*]]: !fir.ref<i16>):
 // CHECK:         %[[LOADED:.*]] = fir.load %[[SRC]] : !fir.ref<i16>
-// CHECK:         fir.store %[[LOADED]] to %[[DST]] : !fir.ref<i16>
+// CHECK:         hlfir.assign %[[LOADED]] to %[[DST]] temporary_lhs : i16, !fir.ref<i16>
 // CHECK:         acc.terminator
 // CHECK:       }
 
diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
index 28138fd906d69..bd26c70eb1831 100644
--- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
+++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
@@ -1598,10 +1598,12 @@ static LogicalResult createInitRegion(OpBuilder &builder, Location loc,
 
 /// Create and populate a copy region for firstprivate recipes.
 /// Returns success if the region is populated, failure otherwise.
-/// TODO: Handle MappableType - it does not yet have a copy API.
+/// `varInfo` must be the attribute produced by `createInitRegion` for
+/// `MappableType` (it is unused for `PointerLikeType` copy paths).
 static LogicalResult createCopyRegion(OpBuilder &builder, Location loc,
                                       Region &copyRegion, Type varType,
-                                      ValueRange bounds) {
+                                      ValueRange bounds,
+                                      acc::VariableInfoAttr varInfo) {
   // Create copy block with arguments: original value + privatized value +
   // bounds
   SmallVector<Type> copyArgTypes{varType, varType};
@@ -1615,20 +1617,20 @@ static LogicalResult createCopyRegion(OpBuilder &builder, Location loc,
   copyBlock->addArguments(copyArgTypes, copyArgLocs);
   builder.setInsertionPointToStart(copyBlock);
 
-  bool isMappable = isa<MappableType>(varType);
-  bool isPointerLike = isa<PointerLikeType>(varType);
-  // TODO: Handle MappableType - it does not yet have a copy API.
-  // Otherwise, for now just fallback to pointer-like behavior.
-  if (isMappable && !isPointerLike)
-    return failure();
+  Value originalArg = copyBlock->getArgument(0);
+  Value privatizedArg = copyBlock->getArgument(1);
 
-  // Generate copy region body based on variable type
-  if (isPointerLike) {
+  if (isa<MappableType>(varType)) {
+    auto mappableTy = cast<MappableType>(varType);
+    // generateCopy(src, dest): copy from original (arg0) into privatized
+    // (arg1).
+    if (!mappableTy.generateCopy(
+            builder, loc, cast<TypedValue<MappableType>>(originalArg),
+            cast<TypedValue<MappableType>>(privatizedArg), bounds, varInfo))
+      return failure();
+  } else {
+    assert(isa<PointerLikeType>(varType) && "Expected PointerLikeType");
     auto pointerLikeTy = cast<PointerLikeType>(varType);
-    Value originalArg = copyBlock->getArgument(0);
-    Value privatizedArg = copyBlock->getArgument(1);
-
-    // Generate copy operation using PointerLikeType interface
     if (!pointerLikeTy.genCopy(
             builder, loc, cast<TypedValue<PointerLikeType>>(privatizedArg),
             cast<TypedValue<PointerLikeType>>(originalArg), varType))
@@ -1845,6 +1847,9 @@ FirstprivateRecipeOp::createAndPopulate(OpBuilder &builder, Location loc,
 
   // Populate the init region
   bool needsFree = false;
+  // Filled by createInitRegion for mappable variables (genPrivateVariableInfo);
+  // then passed through to copy/destroy so generateCopy /
+  // generatePrivateDestroy receive the same metadata as generatePrivateInit.
   acc::VariableInfoAttr varInfo;
   if (failed(createInitRegion(builder, loc, recipe.getInitRegion(), hostVar,
                               varName, bounds, needsFree, varInfo))) {
@@ -1852,9 +1857,9 @@ FirstprivateRecipeOp::createAndPopulate(OpBuilder &builder, Location loc,
     return std::nullopt;
   }
 
-  // Populate the copy region
+  // Populate the copy region (uses varInfo for MappableType::generateCopy).
   if (failed(createCopyRegion(builder, loc, recipe.getCopyRegion(), varType,
-                              bounds))) {
+                              bounds, varInfo))) {
     recipe.erase();
     return std::nullopt;
   }

>From 3d45fcd94afb7726baf8d47887d7c7b7cd7db372 Mon Sep 17 00:00:00 2001
From: nvpm <pmathew at nvidia.com>
Date: Wed, 1 Apr 2026 23:17:03 -0700
Subject: [PATCH 2/4] rename var; remove redundant else

---
 .../Support/FIROpenACCTypeInterfaces.cpp      | 141 +++++++++---------
 1 file changed, 70 insertions(+), 71 deletions(-)

diff --git a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp
index 517dc8fa50898..74ccdd624d3fb 100644
--- a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp
+++ b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp
@@ -752,12 +752,12 @@ mlir::Value OpenACCMappableModel<Ty>::generatePrivateInit(
   // When variable is optional: use fir.is_present to check. When non-optional,
   // skip the conditional to avoid unnecessary branches.
   std::optional<fir::IfOp> optIfOp;
-  bool isOptional = false;
+  bool mayBeOptional = false;
   if (auto fortranVarInfo =
           mlir::dyn_cast_if_present<fir::OpenACCFortranVariableInfoAttr>(
               varInfo)) {
-    isOptional = fortranVarInfo.getMayBeOptional();
-    if (isOptional) {
+    mayBeOptional = fortranVarInfo.getMayBeOptional();
+    if (mayBeOptional) {
       mlir::Value cond =
           fir::IsPresentOp::create(builder, loc, builder.getI1Type(), var);
       optIfOp = fir::IfOp::create(builder, loc, mlir::TypeRange{type}, cond,
@@ -864,81 +864,80 @@ mlir::Value OpenACCMappableModel<Ty>::generatePrivateInit(
   mlir::Value retVal;
   if (inputVar.getType() == alloc.getType() && !allocateSection) {
     retVal = alloc;
-  } else {
-    // Step4: reconstruct the input variable from the privatized part:
-    // - get a mock base address if the privatized part is a section (so that
-    // any addressing of the input variable can be replaced by the same
-    // addressing of the privatized part even though the allocated part for the
-    // private does not cover all the input variable storage. This is relying on
-    // OpenACC constraint that any addressing of such privatized variable inside
-    // the construct region can only address the variable inside the privatized
-    // section).
-    // - reconstruct a descriptor with the same bounds and type parameters as
-    // the input if needed.
-    // - store this new descriptor in a temporary allocation if the input
-    // variable is a POINTER/ALLOCATABLE.
-    llvm::SmallVector<mlir::Value> inputVarLowerBounds, inputVarExtents;
-    if (dereferencedVar.isArray()) {
-      for (int dim = 0; dim < dereferencedVar.getRank(); ++dim) {
-        inputVarLowerBounds.push_back(
-            hlfir::genLBound(loc, builder, dereferencedVar, dim));
-        inputVarExtents.push_back(
-            hlfir::genExtent(loc, builder, dereferencedVar, dim));
-      }
+  }
+  // Step4: reconstruct the input variable from the privatized part:
+  // - get a mock base address if the privatized part is a section (so that
+  // any addressing of the input variable can be replaced by the same
+  // addressing of the privatized part even though the allocated part for the
+  // private does not cover all the input variable storage. This is relying on
+  // OpenACC constraint that any addressing of such privatized variable inside
+  // the construct region can only address the variable inside the privatized
+  // section).
+  // - reconstruct a descriptor with the same bounds and type parameters as
+  // the input if needed.
+  // - store this new descriptor in a temporary allocation if the input
+  // variable is a POINTER/ALLOCATABLE.
+  llvm::SmallVector<mlir::Value> inputVarLowerBounds, inputVarExtents;
+  if (dereferencedVar.isArray()) {
+    for (int dim = 0; dim < dereferencedVar.getRank(); ++dim) {
+      inputVarLowerBounds.push_back(
+          hlfir::genLBound(loc, builder, dereferencedVar, dim));
+      inputVarExtents.push_back(
+          hlfir::genExtent(loc, builder, dereferencedVar, dim));
     }
+  }
 
-    mlir::Value privateVarBaseAddr = alloc;
-    if (allocateSection) {
-      // To compute the mock base address without doing pointer arithmetic,
-      // compute: TYPE, TEMP(ZERO_BASED_SECTION_LB:) MOCK_BASE = TEMP(0)
-      // This addresses the section "backwards" (0 <= ZERO_BASED_SECTION_LB).
-      // This is currently OK, but care should be taken to avoid tripping bound
-      // checks if added in the future.
-      mlir::Type inputBaseAddrType =
-          dereferencedVar.getBoxType().getBaseAddressType();
-      mlir::Value tempBaseAddr =
-          builder.createConvert(loc, inputBaseAddrType, alloc);
-      mlir::Value zero =
-          builder.createIntegerConstant(loc, builder.getIndexType(), 0);
-      llvm::SmallVector<mlir::Value> lowerBounds;
-      llvm::SmallVector<mlir::Value> zeros;
-      for (unsigned i = 0; i < bounds.size(); i += 3) {
-        lowerBounds.push_back(bounds[i]);
-        zeros.push_back(zero);
-      }
-      mlir::Value offsetShapeShift =
-          builder.genShape(loc, lowerBounds, inputVarExtents);
-      mlir::Type eleRefType =
-          builder.getRefType(privatizedVar.getFortranElementType());
-      mlir::Value mockBase = fir::ArrayCoorOp::create(
-          builder, loc, eleRefType, tempBaseAddr, offsetShapeShift,
-          /*slice=*/mlir::Value{}, /*indices=*/zeros,
-          /*typeParams=*/mlir::ValueRange{});
-      privateVarBaseAddr =
-          builder.createConvert(loc, inputBaseAddrType, mockBase);
+  mlir::Value privateVarBaseAddr = alloc;
+  if (allocateSection) {
+    // To compute the mock base address without doing pointer arithmetic,
+    // compute: TYPE, TEMP(ZERO_BASED_SECTION_LB:) MOCK_BASE = TEMP(0)
+    // This addresses the section "backwards" (0 <= ZERO_BASED_SECTION_LB).
+    // This is currently OK, but care should be taken to avoid tripping bound
+    // checks if added in the future.
+    mlir::Type inputBaseAddrType =
+        dereferencedVar.getBoxType().getBaseAddressType();
+    mlir::Value tempBaseAddr =
+        builder.createConvert(loc, inputBaseAddrType, alloc);
+    mlir::Value zero =
+        builder.createIntegerConstant(loc, builder.getIndexType(), 0);
+    llvm::SmallVector<mlir::Value> lowerBounds;
+    llvm::SmallVector<mlir::Value> zeros;
+    for (unsigned i = 0; i < bounds.size(); i += 3) {
+      lowerBounds.push_back(bounds[i]);
+      zeros.push_back(zero);
     }
+    mlir::Value offsetShapeShift =
+        builder.genShape(loc, lowerBounds, inputVarExtents);
+    mlir::Type eleRefType =
+        builder.getRefType(privatizedVar.getFortranElementType());
+    mlir::Value mockBase = fir::ArrayCoorOp::create(
+        builder, loc, eleRefType, tempBaseAddr, offsetShapeShift,
+        /*slice=*/mlir::Value{}, /*indices=*/zeros,
+        /*typeParams=*/mlir::ValueRange{});
+    privateVarBaseAddr =
+        builder.createConvert(loc, inputBaseAddrType, mockBase);
+  }
 
-    retVal = privateVarBaseAddr;
-    if (inputVar.isBoxAddressOrValue()) {
-      // Recreate descriptor with same bounds as the input variable.
-      mlir::Value shape;
-      if (!inputVarExtents.empty())
-        shape = builder.genShape(loc, inputVarLowerBounds, inputVarExtents);
-      mlir::Value box = fir::EmboxOp::create(
-          builder, loc, inputVar.getBoxType(), privateVarBaseAddr, shape,
-          /*slice=*/mlir::Value{}, typeParams);
-      if (inputVar.isMutableBox()) {
-        mlir::Value boxAlloc =
-            fir::AllocaOp::create(builder, loc, inputVar.getBoxType());
-        fir::StoreOp::create(builder, loc, box, boxAlloc);
-        retVal = boxAlloc;
-      } else {
-        retVal = box;
-      }
+  retVal = privateVarBaseAddr;
+  if (inputVar.isBoxAddressOrValue()) {
+    // Recreate descriptor with same bounds as the input variable.
+    mlir::Value shape;
+    if (!inputVarExtents.empty())
+      shape = builder.genShape(loc, inputVarLowerBounds, inputVarExtents);
+    mlir::Value box = fir::EmboxOp::create(builder, loc, inputVar.getBoxType(),
+                                           privateVarBaseAddr, shape,
+                                           /*slice=*/mlir::Value{}, typeParams);
+    if (inputVar.isMutableBox()) {
+      mlir::Value boxAlloc =
+          fir::AllocaOp::create(builder, loc, inputVar.getBoxType());
+      fir::StoreOp::create(builder, loc, box, boxAlloc);
+      retVal = boxAlloc;
+    } else {
+      retVal = box;
     }
   }
 
-  if (isOptional) {
+  if (mayBeOptional) {
     fir::ResultOp::create(builder, loc, retVal);
     builder.setInsertionPointToStart(&optIfOp->getElseRegion().front());
     mlir::Value absent = fir::AbsentOp::create(builder, loc, type);

>From 58ded97345416f3876df3aedc33a8332823cce34 Mon Sep 17 00:00:00 2001
From: nvpm <pmathew at nvidia.com>
Date: Thu, 2 Apr 2026 07:35:36 -0700
Subject: [PATCH 3/4] Remove unused variable

---
 flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp
index 74ccdd624d3fb..086abcc5bc940 100644
--- a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp
+++ b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp
@@ -994,7 +994,6 @@ bool OpenACCMappableModel<Ty>::generateCopy(
   // When optional: only copy when source is present (fir.is_present). When
   // absent, destination is already null from init. When non-optional, copy
   // directly without the conditional.
-  std::optional<fir::IfOp> optIfOp;
   if (auto fortranVarInfo =
           mlir::dyn_cast_if_present<fir::OpenACCFortranVariableInfoAttr>(
               varInfo)) {

>From b063ce17e1d82bca3313dc5532ea31dbcb0116ba Mon Sep 17 00:00:00 2001
From: nvpm <pmathew at nvidia.com>
Date: Thu, 2 Apr 2026 08:20:46 -0700
Subject: [PATCH 4/4] Add tests

---
 .../OpenACC/optional-firstprivate-recipe.fir  | 39 +++++++++++++++++++
 .../OpenACC/optional-firstprivate.fir         | 34 ++++++++++++++++
 2 files changed, 73 insertions(+)
 create mode 100644 flang/test/Transforms/OpenACC/optional-firstprivate-recipe.fir
 create mode 100644 flang/test/Transforms/OpenACC/optional-firstprivate.fir

diff --git a/flang/test/Transforms/OpenACC/optional-firstprivate-recipe.fir b/flang/test/Transforms/OpenACC/optional-firstprivate-recipe.fir
new file mode 100644
index 0000000000000..5085105ae8a15
--- /dev/null
+++ b/flang/test/Transforms/OpenACC/optional-firstprivate-recipe.fir
@@ -0,0 +1,39 @@
+// RUN: fir-opt %s --pass-pipeline="builtin.module(acc-initialize-fir-analyses,acc-implicit-data)" -split-input-file | FileCheck %s
+
+// Optional REAL dummy referenced inside acc.parallel (Fortran OPTIONAL) must get
+// @firstprivatization_optional_ref_f32 from acc-implicit-data
+
+// -----
+
+func.func @test_optional_firstprivate_f32(%arg0: !fir.ref<f32> {fir.bindc_name = "copt", fir.optional}) {
+  acc.parallel {
+    %load = fir.load %arg0 : !fir.ref<f32>
+    acc.yield
+  }
+  return
+}
+
+// CHECK-LABEL: acc.firstprivate.recipe @firstprivatization_optional_ref_f32 : !fir.ref<f32> init {
+// CHECK: ^bb0(%{{.*}}: !fir.ref<f32>):
+// CHECK:   %{{.*}} = fir.is_present %{{.*}} : (!fir.ref<f32>) -> i1
+// CHECK:   fir.if %{{.*}} -> (!fir.ref<f32>) {
+// CHECK:     %{{.*}} = fir.alloca f32
+// CHECK:     fir.result %{{.*}} : !fir.ref<f32>
+// CHECK:   } else {
+// CHECK:     %{{.*}} = fir.absent !fir.ref<f32>
+// CHECK:     fir.result %{{.*}} : !fir.ref<f32>
+// CHECK:   }
+// CHECK:   acc.yield %{{.*}} : !fir.ref<f32>
+// CHECK: } copy {
+// CHECK: ^bb0(%{{.*}}: !fir.ref<f32>, %{{.*}}: !fir.ref<f32>):
+// CHECK:   %{{.*}} = fir.is_present %{{.*}} : (!fir.ref<f32>) -> i1
+// CHECK:   fir.if %{{.*}} {
+// CHECK:     %{{.*}} = fir.load %{{.*}} : !fir.ref<f32>
+// CHECK:     hlfir.assign %{{.*}} to %{{.*}} temporary_lhs : f32, !fir.ref<f32>
+// CHECK:   }
+// CHECK:   acc.terminator
+// CHECK: }
+
+// bindc_name on the arg does not always flow into acc.firstprivate's name=; minimal FIR gets name = "".
+// CHECK: acc.firstprivate varPtr({{.*}} : !fir.ref<f32>) recipe(@firstprivatization_optional_ref_f32) -> !fir.ref<f32> {implicit = true, name = ""}
+// CHECK: acc.parallel firstprivate(
diff --git a/flang/test/Transforms/OpenACC/optional-firstprivate.fir b/flang/test/Transforms/OpenACC/optional-firstprivate.fir
new file mode 100644
index 0000000000000..44a59d33291f5
--- /dev/null
+++ b/flang/test/Transforms/OpenACC/optional-firstprivate.fir
@@ -0,0 +1,34 @@
+// RUN: fir-opt %s --pass-pipeline="builtin.module(acc-initialize-fir-analyses,acc-implicit-data,acc-recipe-materialization)" -split-input-file | FileCheck %s
+
+// Verify that optional scalar (Fortran OPTIONAL dummy) gets the optional
+// firstprivate recipe from acc-implicit-data and that acc-recipe-materialization
+// inlines the fir.is_present / fir.if / fir.load / hlfir.assign body.
+
+// -----
+
+func.func @test_optional_firstprivate(%arg0: !fir.ref<i32> {fir.bindc_name = "x", fir.optional}) {
+  acc.parallel {
+    %load = fir.load %arg0 : !fir.ref<i32>
+    acc.yield
+  }
+  return
+}
+
+// Recipe exists right after acc-implicit-data (before acc-recipe-materialization)
+// IMPLICIT: acc.firstprivate.recipe @firstprivatization_optional_ref_i32
+
+// After acc-recipe-materialization: recipe is materialized, check the inlined body
+// CHECK: acc.firstprivate_map varPtr
+// CHECK: acc.parallel {
+// CHECK: fir.is_present
+// CHECK: fir.if %{{.*}} -> (!fir.ref<i32>) {
+// CHECK: fir.alloca i32
+// CHECK: fir.result
+// CHECK: } else {
+// CHECK: fir.absent !fir.ref<i32>
+// CHECK: fir.result
+// CHECK: fir.is_present
+// CHECK: fir.if %{{.*}} {
+// CHECK: fir.load
+// CHECK: hlfir.assign
+// CHECK: acc.yield



More information about the Mlir-commits mailing list