[Mlir-commits] [mlir] f951f63 - [flang][acc] Add ACCOptimizeFirstprivateMap pass (#178546)

llvmlistbot at llvm.org llvmlistbot at llvm.org
Thu Jan 29 11:02:30 PST 2026


Author: Razvan Lupusoru
Date: 2026-01-29T19:02:22Z
New Revision: f951f6305ecc8b2406efb1a1a57193655a9aa6de

URL: https://github.com/llvm/llvm-project/commit/f951f6305ecc8b2406efb1a1a57193655a9aa6de
DIFF: https://github.com/llvm/llvm-project/commit/f951f6305ecc8b2406efb1a1a57193655a9aa6de.diff

LOG: [flang][acc] Add ACCOptimizeFirstprivateMap pass (#178546)

This pass optimizes acc.firstprivate_map operations generated during
OpenACC recipe materialization when acc.firstprivate is materialized
into the mapping and a private allocation inside region. The
optimization applies to scalar variables of trivial types (integers,
reals, logicals) as long as they are not optional.

The pass hoists loads from the firstprivate variable to before the
compute region, converting the firstprivate copy to a pass-by-value
pattern. This eliminates the need for runtime copying the firstprivate
variable since only its value is needed for initializing private copies.

Added: 
    flang/lib/Optimizer/OpenACC/Transforms/ACCOptimizeFirstprivateMap.cpp
    flang/test/Transforms/OpenACC/acc-optimize-firstprivate-map.fir

Modified: 
    flang/include/flang/Optimizer/OpenACC/Passes.h
    flang/include/flang/Optimizer/OpenACC/Passes.td
    flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCOpsInterfaces.h
    flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCUtils.h
    flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp
    flang/lib/Optimizer/OpenACC/Support/FIROpenACCUtils.cpp
    flang/lib/Optimizer/OpenACC/Transforms/CMakeLists.txt
    mlir/include/mlir/Dialect/OpenACC/OpenACCOpsInterfaces.td
    mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp

Removed: 
    


################################################################################
diff  --git a/flang/include/flang/Optimizer/OpenACC/Passes.h b/flang/include/flang/Optimizer/OpenACC/Passes.h
index 64ddb84e63c3e..fd1c4db8d78c0 100644
--- a/flang/include/flang/Optimizer/OpenACC/Passes.h
+++ b/flang/include/flang/Optimizer/OpenACC/Passes.h
@@ -29,6 +29,7 @@ namespace acc {
 #include "flang/Optimizer/OpenACC/Passes.h.inc"
 
 std::unique_ptr<mlir::Pass> createACCInitializeFIRAnalysesPass();
+std::unique_ptr<mlir::Pass> createACCOptimizeFirstprivateMapPass();
 std::unique_ptr<mlir::Pass> createACCRecipeBufferizationPass();
 std::unique_ptr<mlir::Pass> createACCUseDeviceCanonicalizerPass();
 

diff  --git a/flang/include/flang/Optimizer/OpenACC/Passes.td b/flang/include/flang/Optimizer/OpenACC/Passes.td
index 8579a471d9a56..ea324b0ae5f7f 100644
--- a/flang/include/flang/Optimizer/OpenACC/Passes.td
+++ b/flang/include/flang/Optimizer/OpenACC/Passes.td
@@ -70,4 +70,16 @@ def ACCUseDeviceCanonicalizer
   let dependentDialects = ["mlir::acc::OpenACCDialect", "fir::FIROpsDialect"];
 }
 
+def ACCOptimizeFirstprivateMap
+    : Pass<"acc-optimize-firstprivate-map", "mlir::func::FuncOp"> {
+  let summary = "Optimize firstprivate mapping";
+  let description = [{
+    This pass optimizes acc firstprivate mapping operations by hoisting
+    loads from the mapped variable to before the compute region. This enables
+    pass-by-value instead of using global memory mapping through the
+    runtime.
+  }];
+  let dependentDialects = ["mlir::acc::OpenACCDialect", "fir::FIROpsDialect"];
+}
+
 #endif // FORTRAN_OPTIMIZER_OPENACC_PASSES

diff  --git a/flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCOpsInterfaces.h b/flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCOpsInterfaces.h
index 4847f3920eec1..7a68ee6234ece 100644
--- a/flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCOpsInterfaces.h
+++ b/flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCOpsInterfaces.h
@@ -94,7 +94,11 @@ struct OutlineRematerializationModel
 template <typename Op>
 struct OffloadRegionModel
     : public mlir::acc::OffloadRegionOpInterface::ExternalModel<
-          OffloadRegionModel<Op>, Op> {};
+          OffloadRegionModel<Op>, Op> {
+  mlir::Region &getOffloadRegion(mlir::Operation *op) const {
+    return mlir::cast<Op>(op).getRegion();
+  }
+};
 
 /// External model for fir::OperationMoveOpInterface.
 /// This interface provides methods to identify whether

diff  --git a/flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCUtils.h b/flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCUtils.h
index 79f5626df4d24..360f4eb000a9e 100644
--- a/flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCUtils.h
+++ b/flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCUtils.h
@@ -88,6 +88,14 @@ createOrGetReductionRecipe(mlir::OpBuilder &builder, mlir::Location loc,
                            llvm::SmallVector<mlir::Value> &dataBoundOps,
                            mlir::Attribute fastMathAttr = {});
 
+/// Walks through operations that forward or view their operand and returns
+/// the original defining value. This strips operations like fir.convert,
+/// ViewLikeOpInterface, and optionally fir.declare/hlfir.declare.
+/// \param value The value to trace back to its origin
+/// \param stripDeclare If true (default), also strips declare operations
+/// \return The original value after stripping all intermediate operations
+mlir::Value getOriginalDef(mlir::Value value, bool stripDeclare = true);
+
 } // namespace acc
 } // namespace fir
 

diff  --git a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp
index 922985152a905..d1239946940dc 100644
--- a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp
+++ b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp
@@ -23,6 +23,7 @@
 #include "flang/Optimizer/Dialect/FIRType.h"
 #include "flang/Optimizer/Dialect/Support/FIRContext.h"
 #include "flang/Optimizer/Dialect/Support/KindMapping.h"
+#include "flang/Optimizer/OpenACC/Support/FIROpenACCUtils.h"
 #include "flang/Optimizer/Support/Utils.h"
 #include "mlir/Dialect/Arith/IR/Arith.h"
 #include "mlir/Dialect/OpenACC/OpenACC.h"
@@ -1211,41 +1212,6 @@ template mlir::Value OpenACCPointerLikeModel<fir::LLVMPointerType>::genAllocate(
     llvm::StringRef varName, mlir::Type varType, mlir::Value originalVar,
     bool &needsFree) const;
 
-static mlir::Value stripCasts(mlir::Value value, bool stripDeclare = true) {
-  mlir::Value currentValue = value;
-
-  while (currentValue) {
-    auto *definingOp = currentValue.getDefiningOp();
-    if (!definingOp)
-      break;
-
-    if (auto convertOp = mlir::dyn_cast<fir::ConvertOp>(definingOp)) {
-      currentValue = convertOp.getValue();
-      continue;
-    }
-
-    if (auto viewLike = mlir::dyn_cast<mlir::ViewLikeOpInterface>(definingOp)) {
-      currentValue = viewLike.getViewSource();
-      continue;
-    }
-
-    if (stripDeclare) {
-      if (auto declareOp = mlir::dyn_cast<hlfir::DeclareOp>(definingOp)) {
-        currentValue = declareOp.getMemref();
-        continue;
-      }
-
-      if (auto declareOp = mlir::dyn_cast<fir::DeclareOp>(definingOp)) {
-        currentValue = declareOp.getMemref();
-        continue;
-      }
-    }
-    break;
-  }
-
-  return currentValue;
-}
-
 template <typename Ty>
 bool OpenACCPointerLikeModel<Ty>::genFree(
     mlir::Type pointer, mlir::OpBuilder &builder, mlir::Location loc,
@@ -1273,7 +1239,7 @@ bool OpenACCPointerLikeModel<Ty>::genFree(
   mlir::Value valueToInspect = allocRes ? allocRes : varToFree;
 
   // Strip casts and declare operations to find the original allocation
-  mlir::Value strippedValue = stripCasts(valueToInspect);
+  mlir::Value strippedValue = fir::acc::getOriginalDef(valueToInspect);
   mlir::Operation *originalAlloc = strippedValue.getDefiningOp();
 
   // If we found an AllocMemOp (heap allocation), free it
@@ -1511,7 +1477,8 @@ static bool hasCUDADeviceAttrOnFuncArg(mlir::BlockArgument blockArg) {
 /// Shared implementation for checking if a value represents device data.
 static bool isDeviceDataImpl(mlir::Value var) {
   // Strip casts to find the underlying value.
-  mlir::Value currentVal = stripCasts(var, /*stripDeclare=*/false);
+  mlir::Value currentVal =
+      fir::acc::getOriginalDef(var, /*stripDeclare=*/false);
 
   if (auto blockArg = mlir::dyn_cast<mlir::BlockArgument>(currentVal))
     return hasCUDADeviceAttrOnFuncArg(blockArg);

diff  --git a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCUtils.cpp b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCUtils.cpp
index d66a9702ec4d4..a53ea9216f7ab 100644
--- a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCUtils.cpp
+++ b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCUtils.cpp
@@ -618,3 +618,38 @@ mlir::SymbolRefAttr fir::acc::createOrGetReductionRecipe(
   mlir::acc::YieldOp::create(builder, loc, dest);
   return mlir::SymbolRefAttr::get(builder.getContext(), recipe.getSymName());
 }
+
+mlir::Value fir::acc::getOriginalDef(mlir::Value value, bool stripDeclare) {
+  mlir::Value currentValue = value;
+
+  while (currentValue) {
+    auto *definingOp = currentValue.getDefiningOp();
+    if (!definingOp)
+      break;
+
+    if (auto convertOp = mlir::dyn_cast<fir::ConvertOp>(definingOp)) {
+      currentValue = convertOp.getValue();
+      continue;
+    }
+
+    if (auto viewLike = mlir::dyn_cast<mlir::ViewLikeOpInterface>(definingOp)) {
+      currentValue = viewLike.getViewSource();
+      continue;
+    }
+
+    if (stripDeclare) {
+      if (auto declareOp = mlir::dyn_cast<hlfir::DeclareOp>(definingOp)) {
+        currentValue = declareOp.getMemref();
+        continue;
+      }
+
+      if (auto declareOp = mlir::dyn_cast<fir::DeclareOp>(definingOp)) {
+        currentValue = declareOp.getMemref();
+        continue;
+      }
+    }
+    break;
+  }
+
+  return currentValue;
+}

diff  --git a/flang/lib/Optimizer/OpenACC/Transforms/ACCOptimizeFirstprivateMap.cpp b/flang/lib/Optimizer/OpenACC/Transforms/ACCOptimizeFirstprivateMap.cpp
new file mode 100644
index 0000000000000..ec40e1209f97a
--- /dev/null
+++ b/flang/lib/Optimizer/OpenACC/Transforms/ACCOptimizeFirstprivateMap.cpp
@@ -0,0 +1,193 @@
+//===- ACCOptimizeFirstprivateMap.cpp -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass optimizes firstprivate mapping operations (acc.firstprivate_map).
+// The optimization hoists loads from the firstprivate variable to before the
+// compute region, effectively converting the firstprivate copy to a
+// pass-by-value pattern. This eliminates the need for runtime copying into
+// global memory.
+//
+// Example transformation:
+//
+//   Before:
+//     %decl = fir.declare %alloca : !fir.ref<i32>
+//     %fp = acc.firstprivate_map varPtr(%decl) -> !fir.ref<i32>
+//     acc.parallel {
+//       %val = fir.load %fp : !fir.ref<i32>  // load inside region
+//       ...
+//     }
+//
+//   After:
+//     %decl = fir.declare %alloca : !fir.ref<i32>
+//     %val = fir.load %decl : !fir.ref<i32>  // load hoisted before region
+//     acc.parallel {
+//       ...  // uses %val directly
+//     }
+//
+//===----------------------------------------------------------------------===//
+
+#include "flang/Optimizer/Dialect/FIROps.h"
+#include "flang/Optimizer/Dialect/FIRType.h"
+#include "flang/Optimizer/Dialect/FortranVariableInterface.h"
+#include "flang/Optimizer/OpenACC/Passes.h"
+#include "flang/Optimizer/OpenACC/Support/FIROpenACCUtils.h"
+#include "mlir/Dialect/OpenACC/OpenACC.h"
+#include "llvm/ADT/SmallVector.h"
+
+namespace fir::acc {
+#define GEN_PASS_DEF_ACCOPTIMIZEFIRSTPRIVATEMAP
+#include "flang/Optimizer/OpenACC/Passes.h.inc"
+} // namespace fir::acc
+
+using namespace mlir;
+
+namespace {
+
+/// Returns the enclosing offload region interface, or nullptr if not inside
+/// one.
+static acc::OffloadRegionOpInterface getEnclosingOffloadRegion(Operation *op) {
+  return op->getParentOfType<acc::OffloadRegionOpInterface>();
+}
+
+/// Returns true if the value is defined by an OpenACC data clause operation.
+static bool isDefinedByDataClause(Value value) {
+  Operation *defOp = value.getDefiningOp();
+  if (!defOp)
+    return false;
+  return acc::getDataClause(defOp).has_value();
+}
+
+/// Returns true if the value is defined inside the given offload region.
+/// This handles both operation results and block arguments.
+static bool isDefinedInsideRegion(Value value,
+                                  acc::OffloadRegionOpInterface offloadOp) {
+  Region *valueRegion = value.getParentRegion();
+  if (!valueRegion)
+    return false;
+  return offloadOp.getOffloadRegion().isAncestor(valueRegion);
+}
+
+/// Returns true if the variable may be optional.
+static bool mayBeOptionalVariable(Value var) {
+  // Don't strip declare ops - we need to check the optional attribute on them.
+  Value originalDef = fir::acc::getOriginalDef(var, /*stripDeclare=*/false);
+  if (auto varIface = dyn_cast_or_null<fir::FortranVariableOpInterface>(
+          originalDef.getDefiningOp()))
+    return varIface.isOptional();
+  // If the defining op is an alloca, it's a local variable and not optional.
+  if (isa_and_nonnull<fir::AllocaOp, fir::AllocMemOp>(
+          originalDef.getDefiningOp()))
+    return false;
+  // Conservative: if we can't determine, assume it may be optional.
+  return true;
+}
+
+/// Returns true if the type is a reference to a trivial type.
+/// Note that this does not allow fir.heap, fir.ptr, or fir.llvm_ptr
+/// types - since we would need to check if the load is valid via
+/// a null-check to enable the optimization.
+static bool isRefToTrivialType(Type type) {
+  if (!mlir::isa<fir::ReferenceType>(type))
+    return false;
+  return fir::isa_trivial(fir::unwrapRefType(type));
+}
+
+/// Attempts to hoist loads from accVar to before firstprivateInitOp.
+/// Returns true if all uses of accVar are loads and they were hoisted.
+static bool hoistLoads(acc::FirstprivateMapInitialOp firstprivateInitOp,
+                       Value var, Value accVar) {
+  // Check if all uses are loads - only hoist if we can optimize all uses.
+  bool allLoads = llvm::all_of(accVar.getUsers(), [](Operation *user) {
+    return isa<fir::LoadOp>(user);
+  });
+  if (!allLoads)
+    return false;
+
+  // Hoist all loads before the firstprivate_map operation.
+  for (Operation *user : llvm::make_early_inc_range(accVar.getUsers())) {
+    auto loadOp = cast<fir::LoadOp>(user);
+    loadOp.getMemrefMutable().assign(var);
+    loadOp->moveBefore(firstprivateInitOp);
+  }
+  return true;
+}
+
+class ACCOptimizeFirstprivateMap
+    : public fir::acc::impl::ACCOptimizeFirstprivateMapBase<
+          ACCOptimizeFirstprivateMap> {
+public:
+  void runOnOperation() override {
+    func::FuncOp funcOp = getOperation();
+
+    // Collect all firstprivate_map ops first to avoid modifying IR during walk.
+    llvm::SmallVector<acc::FirstprivateMapInitialOp> firstprivateOps;
+    funcOp.walk([&](acc::FirstprivateMapInitialOp op) {
+      firstprivateOps.push_back(op);
+    });
+
+    llvm::SmallVector<acc::FirstprivateMapInitialOp> opsToErase;
+
+    for (acc::FirstprivateMapInitialOp firstprivateInitOp : firstprivateOps) {
+      Value var = firstprivateInitOp.getVar();
+
+      if (auto offloadOp = getEnclosingOffloadRegion(firstprivateInitOp)) {
+        // Inside an offload region.
+        if (isDefinedByDataClause(var) ||
+            isDefinedInsideRegion(var, offloadOp)) {
+          // The variable is already mapped or defined locally - just replace
+          // uses and erase.
+          firstprivateInitOp.getAccVar().replaceAllUsesWith(var);
+          opsToErase.push_back(firstprivateInitOp);
+        } else {
+          // Variable is defined outside - hoist the op out of the region,
+          // then apply optimization.
+          firstprivateInitOp->moveBefore(offloadOp);
+          if (optimizeFirstprivateMapping(firstprivateInitOp))
+            opsToErase.push_back(firstprivateInitOp);
+        }
+      } else {
+        // Outside offload region, apply type-restricted optimization
+        // to pre-load before the compute region.
+        if (optimizeFirstprivateMapping(firstprivateInitOp))
+          opsToErase.push_back(firstprivateInitOp);
+      }
+    }
+
+    for (auto op : opsToErase)
+      op.erase();
+  }
+
+private:
+  /// Returns true if the operation was optimized and can be erased.
+  static bool optimizeFirstprivateMapping(
+      acc::FirstprivateMapInitialOp firstprivateInitOp) {
+    Value var = firstprivateInitOp.getVar();
+    Value accVar = firstprivateInitOp.getAccVar();
+
+    // If there are no uses, we can erase the operation.
+    if (accVar.use_empty())
+      return true;
+
+    // Only optimize references to trivial types.
+    if (!isRefToTrivialType(var.getType()))
+      return false;
+
+    // Avoid hoisting optional variables as they may be
+    // null and thus not safe to access.
+    if (mayBeOptionalVariable(var))
+      return false;
+
+    return hoistLoads(firstprivateInitOp, var, accVar);
+  }
+};
+
+} // namespace
+
+std::unique_ptr<Pass> fir::acc::createACCOptimizeFirstprivateMapPass() {
+  return std::make_unique<ACCOptimizeFirstprivateMap>();
+}

diff  --git a/flang/lib/Optimizer/OpenACC/Transforms/CMakeLists.txt b/flang/lib/Optimizer/OpenACC/Transforms/CMakeLists.txt
index f122fae461a38..27c5ee64aea27 100644
--- a/flang/lib/Optimizer/OpenACC/Transforms/CMakeLists.txt
+++ b/flang/lib/Optimizer/OpenACC/Transforms/CMakeLists.txt
@@ -1,7 +1,8 @@
 add_flang_library(FIROpenACCTransforms
-  ACCUseDeviceCanonicalizer.cpp
   ACCInitializeFIRAnalyses.cpp
+  ACCOptimizeFirstprivateMap.cpp
   ACCRecipeBufferization.cpp
+  ACCUseDeviceCanonicalizer.cpp
 
   DEPENDS
   FIROpenACCPassesIncGen
@@ -12,6 +13,7 @@ add_flang_library(FIROpenACCTransforms
   FIRDialect
   FIRDialectSupport
   FIROpenACCAnalysis
+  FIROpenACCSupport
   HLFIRDialect
 
   MLIR_LIBS

diff  --git a/flang/test/Transforms/OpenACC/acc-optimize-firstprivate-map.fir b/flang/test/Transforms/OpenACC/acc-optimize-firstprivate-map.fir
new file mode 100644
index 0000000000000..bce575e752866
--- /dev/null
+++ b/flang/test/Transforms/OpenACC/acc-optimize-firstprivate-map.fir
@@ -0,0 +1,241 @@
+// RUN: fir-opt %s --acc-optimize-firstprivate-map -split-input-file | FileCheck %s
+
+// Test: Integer variable - should optimize
+
+func.func private @use_i32(i32)
+
+// CHECK-LABEL: func.func @test_trivial_scalar_hoist
+func.func @test_trivial_scalar_hoist() {
+  %scalar = fir.alloca i32 {bindc_name = "scalar_var"}
+  %decl = fir.declare %scalar {uniq_name = "_QFtest_trivial_scalarEscalar_var"} : (!fir.ref<i32>) -> !fir.ref<i32>
+  %fpmap = acc.firstprivate_map varPtr(%decl : !fir.ref<i32>) -> !fir.ref<i32>
+  // CHECK: %[[DECL:.*]] = fir.declare
+  // CHECK: %[[LOAD:.*]] = fir.load %[[DECL]] : !fir.ref<i32>
+  // CHECK-NOT: acc.firstprivate_map
+  // CHECK: acc.parallel
+  acc.parallel {
+    %load = fir.load %fpmap : !fir.ref<i32>
+    // CHECK: fir.call @use_i32(%[[LOAD]])
+    fir.call @use_i32(%load) : (i32) -> ()
+    acc.yield
+  }
+  return
+}
+
+// -----
+
+// Test: Inside offload region - should optimize by getting rid of firstprivate_map
+
+func.func private @use_i32(i32)
+
+// CHECK-LABEL: func.func @test_inside_offload_region
+func.func @test_inside_offload_region() {
+  %scalar = fir.alloca i32 {bindc_name = "scalar_var"}
+  %decl = fir.declare %scalar {uniq_name = "_QFtest_inside_offloadEscalar_var"} : (!fir.ref<i32>) -> !fir.ref<i32>
+  %copyin = acc.copyin varPtr(%decl : !fir.ref<i32>) -> !fir.ref<i32>
+  // CHECK: %[[DECL:.*]] = fir.declare
+  // CHECK: %[[COPYIN:.*]] = acc.copyin varPtr(%[[DECL]] : !fir.ref<i32>) -> !fir.ref<i32>
+  acc.parallel dataOperands(%copyin : !fir.ref<i32>) {
+    %fpmap = acc.firstprivate_map varPtr(%copyin : !fir.ref<i32>) -> !fir.ref<i32>
+    %load = fir.load %fpmap : !fir.ref<i32>
+    // CHECK: acc.parallel dataOperands(%[[COPYIN]] : !fir.ref<i32>)
+    // CHECK-NOT: acc.firstprivate_map
+    // CHECK: %[[LOAD:.*]] = fir.load %[[COPYIN]] : !fir.ref<i32>
+    fir.call @use_i32(%load) : (i32) -> ()
+    acc.yield
+  }
+  return
+}
+
+// -----
+
+// Test: Local alloca inside offload region - should optimize (erase)
+
+func.func private @use_i32(i32)
+
+// CHECK-LABEL: func.func @test_local_alloca_inside_offload
+func.func @test_local_alloca_inside_offload() {
+  acc.parallel {
+    %local = fir.alloca i32 {bindc_name = "local_var"}
+    %decl = fir.declare %local {uniq_name = "_QFtest_local_allocaElocal_var"} : (!fir.ref<i32>) -> !fir.ref<i32>
+    %fpmap = acc.firstprivate_map varPtr(%decl : !fir.ref<i32>) -> !fir.ref<i32>
+    %load = fir.load %fpmap : !fir.ref<i32>
+    // CHECK: acc.parallel
+    // CHECK: %[[LOCAL:.*]] = fir.alloca i32
+    // CHECK: %[[DECL:.*]] = fir.declare %[[LOCAL]]
+    // CHECK-NOT: acc.firstprivate_map
+    // CHECK: %[[LOAD:.*]] = fir.load %[[DECL]] : !fir.ref<i32>
+    fir.call @use_i32(%load) : (i32) -> ()
+    acc.yield
+  }
+  return
+}
+
+// -----
+
+// Test: firstprivate_map with acc.private input - should optimize (erase)
+
+func.func private @use_i32(i32)
+
+// CHECK-LABEL: func.func @test_private_input
+func.func @test_private_input() {
+  %scalar = fir.alloca i32 {bindc_name = "scalar_var"}
+  %decl = fir.declare %scalar {uniq_name = "_QFtest_private_inputEscalar_var"} : (!fir.ref<i32>) -> !fir.ref<i32>
+  %private = acc.private varPtr(%decl : !fir.ref<i32>) -> !fir.ref<i32>
+  // CHECK: %[[DECL:.*]] = fir.declare
+  // CHECK: %[[PRIVATE:.*]] = acc.private varPtr(%[[DECL]] : !fir.ref<i32>) -> !fir.ref<i32>
+  acc.parallel private(%private : !fir.ref<i32>) {
+    %fpmap = acc.firstprivate_map varPtr(%private : !fir.ref<i32>) -> !fir.ref<i32>
+    %load = fir.load %fpmap : !fir.ref<i32>
+    // CHECK: acc.parallel private(%[[PRIVATE]] : !fir.ref<i32>)
+    // CHECK-NOT: acc.firstprivate_map
+    // CHECK: %[[LOAD:.*]] = fir.load %[[PRIVATE]] : !fir.ref<i32>
+    fir.call @use_i32(%load) : (i32) -> ()
+    acc.yield
+  }
+  return
+}
+
+// -----
+
+// Test: Variable defined outside offload region but firstprivate_map inside -
+// should hoist out and then optimize
+
+func.func private @use_i32(i32)
+
+// CHECK-LABEL: func.func @test_hoist_from_offload_region
+func.func @test_hoist_from_offload_region() {
+  %scalar = fir.alloca i32 {bindc_name = "scalar_var"}
+  %decl = fir.declare %scalar {uniq_name = "_QFtest_hoistEscalar_var"} : (!fir.ref<i32>) -> !fir.ref<i32>
+  // CHECK: %[[DECL:.*]] = fir.declare
+  // CHECK: %[[LOAD:.*]] = fir.load %[[DECL]] : !fir.ref<i32>
+  // CHECK-NOT: acc.firstprivate_map
+  // CHECK: acc.parallel
+  acc.parallel {
+    %fpmap = acc.firstprivate_map varPtr(%decl : !fir.ref<i32>) -> !fir.ref<i32>
+    %load = fir.load %fpmap : !fir.ref<i32>
+    // CHECK: fir.call @use_i32(%[[LOAD]])
+    fir.call @use_i32(%load) : (i32) -> ()
+    acc.yield
+  }
+  return
+}
+
+// -----
+
+// Test: Optional variable - should NOT optimize
+
+func.func private @use_i32(i32)
+
+// CHECK-LABEL: func.func @test_optional_no_hoist
+func.func @test_optional_no_hoist(%arg0: !fir.ref<i32>) {
+  %decl = fir.declare %arg0 {fortran_attrs = #fir.var_attrs<optional>, uniq_name = "_QFtest_optionalEopt_var"} : (!fir.ref<i32>) -> !fir.ref<i32>
+  %fpmap = acc.firstprivate_map varPtr(%decl : !fir.ref<i32>) -> !fir.ref<i32>
+  // CHECK: %[[DECL:.*]] = fir.declare {{.*}} {fortran_attrs = #fir.var_attrs<optional>
+  // CHECK: %[[FPMAP:.*]] = acc.firstprivate_map varPtr(%[[DECL]] : !fir.ref<i32>) -> !fir.ref<i32>
+  // CHECK: acc.parallel
+  acc.parallel {
+    %load = fir.load %fpmap : !fir.ref<i32>
+    // CHECK: %[[LOAD:.*]] = fir.load %[[FPMAP]] : !fir.ref<i32>
+    fir.call @use_i32(%load) : (i32) -> ()
+    acc.yield
+  }
+  return
+}
+
+// -----
+
+// Test: Array type (non-trivial) - should NOT optimize
+
+func.func private @use_array(!fir.array<10xi32>)
+
+// CHECK-LABEL: func.func @test_array_no_hoist
+func.func @test_array_no_hoist() {
+  %c10 = arith.constant 10 : index
+  %array = fir.alloca !fir.array<10xi32> {bindc_name = "array_var"}
+  %shape = fir.shape %c10 : (index) -> !fir.shape<1>
+  %decl = fir.declare %array(%shape) {uniq_name = "_QFtest_arrayEarray_var"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> !fir.ref<!fir.array<10xi32>>
+  %fpmap = acc.firstprivate_map varPtr(%decl : !fir.ref<!fir.array<10xi32>>) -> !fir.ref<!fir.array<10xi32>>
+  // CHECK: %[[DECL:.*]] = fir.declare
+  // CHECK: %[[FPMAP:.*]] = acc.firstprivate_map varPtr(%[[DECL]] : !fir.ref<!fir.array<10xi32>>) -> !fir.ref<!fir.array<10xi32>>
+  // CHECK: acc.parallel
+  acc.parallel {
+    %load = fir.load %fpmap : !fir.ref<!fir.array<10xi32>>
+    // CHECK: %[[LOAD:.*]] = fir.load %[[FPMAP]] : !fir.ref<!fir.array<10xi32>>
+    fir.call @use_array(%load) : (!fir.array<10xi32>) -> ()
+    acc.yield
+  }
+  return
+}
+
+// -----
+
+// Test: Multiple loads from same firstprivate_map - should optimize and hoist all
+
+func.func private @use_i32_i32(i32, i32)
+
+// CHECK-LABEL: func.func @test_multiple_loads_hoist
+func.func @test_multiple_loads_hoist() {
+  %scalar = fir.alloca i32 {bindc_name = "scalar_var"}
+  %decl = fir.declare %scalar {uniq_name = "_QFtest_multiple_loadsEscalar_var"} : (!fir.ref<i32>) -> !fir.ref<i32>
+  %fpmap = acc.firstprivate_map varPtr(%decl : !fir.ref<i32>) -> !fir.ref<i32>
+  // CHECK: %[[DECL:.*]] = fir.declare
+  // CHECK-DAG: %[[LOAD1:.*]] = fir.load %[[DECL]] : !fir.ref<i32>
+  // CHECK-DAG: %[[LOAD2:.*]] = fir.load %[[DECL]] : !fir.ref<i32>
+  // CHECK-NOT: acc.firstprivate_map
+  // CHECK: acc.parallel
+  acc.parallel {
+    %load1 = fir.load %fpmap : !fir.ref<i32>
+    %load2 = fir.load %fpmap : !fir.ref<i32>
+    fir.call @use_i32_i32(%load1, %load2) : (i32, i32) -> ()
+    acc.yield
+  }
+  return
+}
+
+// -----
+
+// Test: Variable through fir.convert - should optimize
+
+func.func private @use_i32(i32)
+
+// CHECK-LABEL: func.func @test_through_convert
+func.func @test_through_convert() {
+  %scalar = fir.alloca i32 {bindc_name = "scalar_var"}
+  %decl = fir.declare %scalar {uniq_name = "_QFtest_convertEscalar_var"} : (!fir.ref<i32>) -> !fir.ref<i32>
+  %convert = fir.convert %decl : (!fir.ref<i32>) -> !fir.ref<i32>
+  %fpmap = acc.firstprivate_map varPtr(%convert : !fir.ref<i32>) -> !fir.ref<i32>
+  // CHECK: %[[DECL:.*]] = fir.declare
+  // CHECK: %[[CONVERT:.*]] = fir.convert %[[DECL]]
+  // CHECK: %[[LOAD:.*]] = fir.load %[[CONVERT]] : !fir.ref<i32>
+  // CHECK-NOT: acc.firstprivate_map
+  // CHECK: acc.parallel
+  acc.parallel {
+    %load = fir.load %fpmap : !fir.ref<i32>
+    // CHECK: fir.call @use_i32(%[[LOAD]])
+    fir.call @use_i32(%load) : (i32) -> ()
+    acc.yield
+  }
+  return
+}
+
+// -----
+
+// Test: Block argument (unknown origin) - should NOT optimize
+
+func.func private @use_i32(i32)
+
+// CHECK-LABEL: func.func @test_block_arg_no_hoist
+func.func @test_block_arg_no_hoist(%arg0: !fir.ref<i32>) {
+  // No declare op, so we can't determine if it's optional - conservative no-op
+  %fpmap = acc.firstprivate_map varPtr(%arg0 : !fir.ref<i32>) -> !fir.ref<i32>
+  // CHECK: %[[FPMAP:.*]] = acc.firstprivate_map varPtr(%arg0 : !fir.ref<i32>) -> !fir.ref<i32>
+  // CHECK: acc.parallel
+  acc.parallel {
+    %load = fir.load %fpmap : !fir.ref<i32>
+    // CHECK: %[[LOAD:.*]] = fir.load %[[FPMAP]] : !fir.ref<i32>
+    fir.call @use_i32(%load) : (i32) -> ()
+    acc.yield
+  }
+  return
+}

diff  --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCOpsInterfaces.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCOpsInterfaces.td
index 95a8f22a3ddfa..2b585d8d0db90 100644
--- a/mlir/include/mlir/Dialect/OpenACC/OpenACCOpsInterfaces.td
+++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOpsInterfaces.td
@@ -135,6 +135,14 @@ def OffloadRegionOpInterface : OpInterface<"OffloadRegionOpInterface"> {
     their regions will be extracted and compiled separately (e.g., as
     device kernels or outlined functions).
   }];
+
+  let methods = [
+    InterfaceMethod<"Get the offload region", "::mlir::Region&",
+      "getOffloadRegion",
+      (ins), [{
+        return $_op.getRegion();
+      }]>,
+  ];
 }
 
 #endif // OPENACC_OPS_INTERFACES

diff  --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
index 1d397fabef83c..f405fbbbd838d 100644
--- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
+++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
@@ -306,7 +306,11 @@ struct MemrefGlobalVariableModel
 
 struct GPULaunchOffloadRegionModel
     : public acc::OffloadRegionOpInterface::ExternalModel<
-          GPULaunchOffloadRegionModel, gpu::LaunchOp> {};
+          GPULaunchOffloadRegionModel, gpu::LaunchOp> {
+  mlir::Region &getOffloadRegion(mlir::Operation *op) const {
+    return cast<gpu::LaunchOp>(op).getBody();
+  }
+};
 
 /// Helper function for any of the times we need to modify an ArrayAttr based on
 /// a device type list.  Returns a new ArrayAttr with all of the


        


More information about the Mlir-commits mailing list