[flang-commits] [flang] 1fd1d63 - [MLIR][OpenMP] Add a new AutomapToTargetData conversion pass in FIR (#153048)

Akash Banerjee via flang-commits flang-commits at lists.llvm.org
Fri Aug 15 07:41:50 PDT 2025


Author: Akash Banerjee
Date: 2025-08-15T15:41:41+01:00
New Revision: 1fd1d634630754cc9b9c4b5526961d5856f64ff9

URL: https://github.com/llvm/llvm-project/commit/1fd1d634630754cc9b9c4b5526961d5856f64ff9
DIFF: https://github.com/llvm/llvm-project/commit/1fd1d634630754cc9b9c4b5526961d5856f64ff9.diff

LOG: [MLIR][OpenMP] Add a new AutomapToTargetData conversion pass in FIR (#153048)

Add a new AutomapToTargetData pass. This gathers the declare target
enter variables which have the AUTOMAP modifier. And adds
omp.declare_target_enter/exit mapping directives for fir.alloca and
fir.free oeprations on the AUTOMAP enabled variables.

Automap Ref: OpenMP 6.0 section 7.9.7.

Added: 
    flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp
    flang/test/Transforms/omp-automap-to-target-data.fir
    offload/test/offloading/fortran/declare-target-automap.f90

Modified: 
    flang/include/flang/Optimizer/OpenMP/Passes.td
    flang/lib/Optimizer/OpenMP/CMakeLists.txt
    flang/lib/Optimizer/Passes/Pipelines.cpp

Removed: 
    


################################################################################
diff  --git a/flang/include/flang/Optimizer/OpenMP/Passes.td b/flang/include/flang/Optimizer/OpenMP/Passes.td
index e06289cfa8229..99202f6ee81e7 100644
--- a/flang/include/flang/Optimizer/OpenMP/Passes.td
+++ b/flang/include/flang/Optimizer/OpenMP/Passes.td
@@ -117,4 +117,15 @@ def SimdOnlyPass : Pass<"omp-simd-only", "mlir::ModuleOp"> {
   let dependentDialects = ["mlir::omp::OpenMPDialect"];
 }
 
+def AutomapToTargetDataPass
+    : Pass<"omp-automap-to-target-data", "::mlir::ModuleOp"> {
+  let summary = "Insert OpenMP target data operations for AUTOMAP variables";
+  let description = [{
+    Inserts `omp.target_enter_data` and `omp.target_exit_data` operations to
+    map variables marked with the `AUTOMAP` modifier when their allocation
+    or deallocation is detected in the FIR.
+  }];
+  let dependentDialects = ["mlir::omp::OpenMPDialect"];
+}
+
 #endif //FORTRAN_OPTIMIZER_OPENMP_PASSES

diff  --git a/flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp b/flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp
new file mode 100644
index 0000000000000..8b9991301aae8
--- /dev/null
+++ b/flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp
@@ -0,0 +1,159 @@
+//===- AutomapToTargetData.cpp -------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "flang/Optimizer/Builder/DirectivesCommon.h"
+#include "flang/Optimizer/Builder/FIRBuilder.h"
+#include "flang/Optimizer/Builder/HLFIRTools.h"
+#include "flang/Optimizer/Dialect/FIROps.h"
+#include "flang/Optimizer/Dialect/FIRType.h"
+#include "flang/Optimizer/Dialect/Support/KindMapping.h"
+#include "flang/Optimizer/HLFIR/HLFIROps.h"
+
+#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
+#include "mlir/Dialect/OpenMP/OpenMPInterfaces.h"
+#include "mlir/IR/BuiltinAttributes.h"
+#include "mlir/IR/Operation.h"
+#include "mlir/Pass/Pass.h"
+
+#include "llvm/Frontend/OpenMP/OMPConstants.h"
+
+namespace flangomp {
+#define GEN_PASS_DEF_AUTOMAPTOTARGETDATAPASS
+#include "flang/Optimizer/OpenMP/Passes.h.inc"
+} // namespace flangomp
+
+using namespace mlir;
+
+namespace {
+class AutomapToTargetDataPass
+    : public flangomp::impl::AutomapToTargetDataPassBase<
+          AutomapToTargetDataPass> {
+
+  // Returns true if the variable has a dynamic size and therefore requires
+  // bounds operations to describe its extents.
+  inline bool needsBoundsOps(mlir::Value var) {
+    assert(mlir::isa<mlir::omp::PointerLikeType>(var.getType()) &&
+           "only pointer like types expected");
+    mlir::Type t = fir::unwrapRefType(var.getType());
+    if (mlir::Type inner = fir::dyn_cast_ptrOrBoxEleTy(t))
+      return fir::hasDynamicSize(inner);
+    return fir::hasDynamicSize(t);
+  }
+
+  // Generate MapBoundsOp operations for the variable if required.
+  inline void genBoundsOps(fir::FirOpBuilder &builder, mlir::Value var,
+                           llvm::SmallVectorImpl<mlir::Value> &boundsOps) {
+    mlir::Location loc = var.getLoc();
+    fir::factory::AddrAndBoundsInfo info =
+        fir::factory::getDataOperandBaseAddr(builder, var,
+                                             /*isOptional=*/false, loc);
+    fir::ExtendedValue exv =
+        hlfir::translateToExtendedValue(loc, builder, hlfir::Entity{info.addr},
+                                        /*contiguousHint=*/true)
+            .first;
+    llvm::SmallVector<mlir::Value> tmp =
+        fir::factory::genImplicitBoundsOps<mlir::omp::MapBoundsOp,
+                                           mlir::omp::MapBoundsType>(
+            builder, info, exv, /*dataExvIsAssumedSize=*/false, loc);
+    llvm::append_range(boundsOps, tmp);
+  }
+
+  void findRelatedAllocmemFreemem(fir::AddrOfOp addressOfOp,
+                                  llvm::DenseSet<fir::StoreOp> &allocmems,
+                                  llvm::DenseSet<fir::LoadOp> &freemems) {
+    assert(addressOfOp->hasOneUse() && "op must have single use");
+
+    auto declaredRef =
+        cast<hlfir::DeclareOp>(*addressOfOp->getUsers().begin())->getResult(0);
+
+    for (Operation *refUser : declaredRef.getUsers()) {
+      if (auto storeOp = dyn_cast<fir::StoreOp>(refUser))
+        if (auto emboxOp = storeOp.getValue().getDefiningOp<fir::EmboxOp>())
+          if (auto allocmemOp =
+                  emboxOp.getOperand(0).getDefiningOp<fir::AllocMemOp>())
+            allocmems.insert(storeOp);
+
+      if (auto loadOp = dyn_cast<fir::LoadOp>(refUser))
+        for (Operation *loadUser : loadOp.getResult().getUsers())
+          if (auto boxAddrOp = dyn_cast<fir::BoxAddrOp>(loadUser))
+            for (Operation *boxAddrUser : boxAddrOp.getResult().getUsers())
+              if (auto freememOp = dyn_cast<fir::FreeMemOp>(boxAddrUser))
+                freemems.insert(loadOp);
+    }
+  }
+
+  void runOnOperation() override {
+    ModuleOp module = getOperation()->getParentOfType<ModuleOp>();
+    if (!module)
+      module = dyn_cast<ModuleOp>(getOperation());
+    if (!module)
+      return;
+
+    // Build FIR builder for helper utilities.
+    fir::KindMapping kindMap = fir::getKindMapping(module);
+    fir::FirOpBuilder builder{module, std::move(kindMap)};
+
+    // Collect global variables with AUTOMAP flag.
+    llvm::DenseSet<fir::GlobalOp> automapGlobals;
+    module.walk([&](fir::GlobalOp globalOp) {
+      if (auto iface =
+              dyn_cast<omp::DeclareTargetInterface>(globalOp.getOperation()))
+        if (iface.isDeclareTarget() && iface.getDeclareTargetAutomap() &&
+            iface.getDeclareTargetDeviceType() !=
+                omp::DeclareTargetDeviceType::host)
+          automapGlobals.insert(globalOp);
+    });
+
+    auto addMapInfo = [&](auto globalOp, auto memOp) {
+      builder.setInsertionPointAfter(memOp);
+      SmallVector<Value> bounds;
+      if (needsBoundsOps(memOp.getMemref()))
+        genBoundsOps(builder, memOp.getMemref(), bounds);
+
+      omp::TargetEnterExitUpdateDataOperands clauses;
+      mlir::omp::MapInfoOp mapInfo = mlir::omp::MapInfoOp::create(
+          builder, memOp.getLoc(), memOp.getMemref().getType(),
+          memOp.getMemref(),
+          TypeAttr::get(fir::unwrapRefType(memOp.getMemref().getType())),
+          builder.getIntegerAttr(
+              builder.getIntegerType(64, false),
+              static_cast<unsigned>(
+                  isa<fir::StoreOp>(memOp)
+                      ? llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO
+                      : llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_DELETE)),
+          builder.getAttr<omp::VariableCaptureKindAttr>(
+              omp::VariableCaptureKind::ByCopy),
+          /*var_ptr_ptr=*/mlir::Value{},
+          /*members=*/SmallVector<Value>{},
+          /*members_index=*/ArrayAttr{}, bounds,
+          /*mapperId=*/mlir::FlatSymbolRefAttr(), globalOp.getSymNameAttr(),
+          builder.getBoolAttr(false));
+      clauses.mapVars.push_back(mapInfo);
+      isa<fir::StoreOp>(memOp)
+          ? builder.create<omp::TargetEnterDataOp>(memOp.getLoc(), clauses)
+          : builder.create<omp::TargetExitDataOp>(memOp.getLoc(), clauses);
+    };
+
+    for (fir::GlobalOp globalOp : automapGlobals) {
+      if (auto uses = globalOp.getSymbolUses(module.getOperation())) {
+        llvm::DenseSet<fir::StoreOp> allocmemStores;
+        llvm::DenseSet<fir::LoadOp> freememLoads;
+        for (auto &x : *uses)
+          if (auto addrOp = dyn_cast<fir::AddrOfOp>(x.getUser()))
+            findRelatedAllocmemFreemem(addrOp, allocmemStores, freememLoads);
+
+        for (auto storeOp : allocmemStores)
+          addMapInfo(globalOp, storeOp);
+
+        for (auto loadOp : freememLoads)
+          addMapInfo(globalOp, loadOp);
+      }
+    }
+  }
+};
+} // namespace

diff  --git a/flang/lib/Optimizer/OpenMP/CMakeLists.txt b/flang/lib/Optimizer/OpenMP/CMakeLists.txt
index 3fb0bac05ce0d..e0aebd0714c8f 100644
--- a/flang/lib/Optimizer/OpenMP/CMakeLists.txt
+++ b/flang/lib/Optimizer/OpenMP/CMakeLists.txt
@@ -1,6 +1,7 @@
 get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS)
 
 add_flang_library(FlangOpenMPTransforms
+  AutomapToTargetData.cpp
   DoConcurrentConversion.cpp
   FunctionFiltering.cpp
   GenericLoopConversion.cpp

diff  --git a/flang/lib/Optimizer/Passes/Pipelines.cpp b/flang/lib/Optimizer/Passes/Pipelines.cpp
index 5a870928f8413..98f947a1f635d 100644
--- a/flang/lib/Optimizer/Passes/Pipelines.cpp
+++ b/flang/lib/Optimizer/Passes/Pipelines.cpp
@@ -319,13 +319,13 @@ void createOpenMPFIRPassPipeline(mlir::PassManager &pm,
     pm.addPass(flangomp::createDoConcurrentConversionPass(
         opts.doConcurrentMappingKind == DoConcurrentMappingKind::DCMK_Device));
 
-  // The MapsForPrivatizedSymbols pass needs to run before
-  // MapInfoFinalizationPass because the former creates new
-  // MapInfoOp instances, typically for descriptors.
-  // MapInfoFinalizationPass adds MapInfoOp instances for the descriptors
-  // underlying data which is necessary to access the data on the offload
-  // target device.
+  // The MapsForPrivatizedSymbols and AutomapToTargetDataPass pass need to run
+  // before MapInfoFinalizationPass because they create new MapInfoOp
+  // instances, typically for descriptors. MapInfoFinalizationPass adds
+  // MapInfoOp instances for the descriptors underlying data which is necessary
+  // to access the data on the offload target device.
   pm.addPass(flangomp::createMapsForPrivatizedSymbolsPass());
+  pm.addPass(flangomp::createAutomapToTargetDataPass());
   pm.addPass(flangomp::createMapInfoFinalizationPass());
   pm.addPass(flangomp::createMarkDeclareTargetPass());
   pm.addPass(flangomp::createGenericLoopConversionPass());

diff  --git a/flang/test/Transforms/omp-automap-to-target-data.fir b/flang/test/Transforms/omp-automap-to-target-data.fir
new file mode 100644
index 0000000000000..7a19705a248b4
--- /dev/null
+++ b/flang/test/Transforms/omp-automap-to-target-data.fir
@@ -0,0 +1,58 @@
+// RUN: fir-opt --omp-automap-to-target-data %s | FileCheck %s
+// Test OMP AutomapToTargetData pass.
+
+module {
+  fir.global
+      @_QMtestEarr{omp.declare_target = #omp.declaretarget<device_type = (any),
+                       capture_clause = (enter), automap = true>} target
+                       : !fir.box<!fir.heap<!fir.array<?xi32>>>
+
+  func.func @automap() {
+    %c0 = arith.constant 0 : index
+    %c10 = arith.constant 10 : i32
+    %addr = fir.address_of(@_QMtestEarr) : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+    %decl:2 = hlfir.declare %addr {fortran_attrs = #fir.var_attrs<allocatable, target>, uniq_name = "_QMtestEarr"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
+    %idx = fir.convert %c10 : (i32) -> index
+    %cond = arith.cmpi sgt, %idx, %c0 : index
+    %n = arith.select %cond, %idx, %c0 : index
+    %mem = fir.allocmem !fir.array<?xi32>, %n {fir.must_be_heap = true}
+    %shape = fir.shape %n : (index) -> !fir.shape<1>
+    %box = fir.embox %mem(%shape) : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<?xi32>>>
+    fir.store %box to %decl#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+    %ld = fir.load %decl#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+    %base = fir.box_addr %ld : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>>
+    fir.freemem %base : !fir.heap<!fir.array<?xi32>>
+    %undef = fir.zero_bits !fir.heap<!fir.array<?xi32>>
+    %sh0 = fir.shape %c0 : (index) -> !fir.shape<1>
+    %empty = fir.embox %undef(%sh0) : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<?xi32>>>
+    fir.store %empty to %decl#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+    return
+  }
+}
+
+// CHECK:         fir.global @[[AUTOMAP:.*]] {{{.*}} automap = true
+// CHECK-LABEL:   func.func @automap()
+// CHECK:           %[[AUTOMAP_ADDR:.*]] = fir.address_of(@[[AUTOMAP]])
+// CHECK:           %[[AUTOMAP_DECL:.*]]:2 = hlfir.declare %[[AUTOMAP_ADDR]]
+// CHECK:           %[[ALLOC_MEM:.*]] = fir.allocmem
+// CHECK-NEXT:      fir.shape
+// CHECK-NEXT:      %[[ARR_BOXED:.*]] = fir.embox %[[ALLOC_MEM]]
+// CHECK-NEXT:      fir.store %[[ARR_BOXED]]
+// CHECK-NEXT:      %[[ARR_BOXED_LOADED:.*]] = fir.load %[[AUTOMAP_DECL]]#0
+// CHECK-NEXT:      %[[ARR_HEAP_PTR:.*]] = fir.box_addr %[[ARR_BOXED_LOADED]]
+// CHECK-NEXT:      %[[DIM0:.*]] = arith.constant 0 : index
+// CHECK-NEXT:      %[[BOX_DIMS:.*]]:3 = fir.box_dims %[[ARR_BOXED_LOADED]], %[[DIM0]]
+// CHECK-NEXT:      %[[ONE:.*]] = arith.constant 1 : index
+// CHECK-NEXT:      %[[ZERO:.*]] = arith.constant 0 : index
+// CHECK-NEXT:      %[[BOX_DIMS2:.*]]:3 = fir.box_dims %[[ARR_BOXED_LOADED]], %[[ZERO]]
+// CHECK-NEXT:      %[[LOWER_BOUND:.*]] = arith.constant 0 : index
+// CHECK-NEXT:      %[[UPPER_BOUND:.*]] = arith.subi %[[BOX_DIMS2]]#1, %[[ONE]] : index
+// CHECK-NEXT:      omp.map.bounds lower_bound(%[[LOWER_BOUND]] : index) upper_bound(%[[UPPER_BOUND]] : index) extent(%[[BOX_DIMS2]]#1 : index) stride(%[[BOX_DIMS2]]#2 : index) start_idx(%[[BOX_DIMS]]#0 : index) {stride_in_bytes = true}
+// CHECK-NEXT:      arith.muli %[[BOX_DIMS2]]#2, %[[BOX_DIMS2]]#1 : index
+// CHECK-NEXT:      %[[MAP_INFO:.*]] = omp.map.info var_ptr(%[[AUTOMAP_DECL]]#0 {{.*}} map_clauses(to) capture(ByCopy)
+// CHECK-NEXT:      omp.target_enter_data map_entries(%[[MAP_INFO]]
+// CHECK:           %[[LOAD:.*]] = fir.load %[[AUTOMAP_DECL]]#0
+// CHECK:           %[[EXIT_MAP:.*]] = omp.map.info var_ptr(%[[AUTOMAP_DECL]]#0 {{.*}} map_clauses(delete) capture(ByCopy)
+// CHECK-NEXT:      omp.target_exit_data map_entries(%[[EXIT_MAP]]
+// CHECK-NEXT:      %[[BOXADDR:.*]] = fir.box_addr %[[LOAD]]
+// CHECK-NEXT:      fir.freemem %[[BOXADDR]]

diff  --git a/offload/test/offloading/fortran/declare-target-automap.f90 b/offload/test/offloading/fortran/declare-target-automap.f90
new file mode 100644
index 0000000000000..50e8c124c25fc
--- /dev/null
+++ b/offload/test/offloading/fortran/declare-target-automap.f90
@@ -0,0 +1,36 @@
+!Offloading test for AUTOMAP modifier in declare target enter
+! REQUIRES: flang, amdgpu
+
+program automap_program
+   use iso_c_binding, only: c_loc
+   use omp_lib, only: omp_get_default_device, omp_target_is_present
+   integer, parameter :: N = 10
+   integer :: i
+   integer, allocatable, target :: automap_array(:)
+   !$omp declare target enter(automap:automap_array)
+
+   ! false since the storage is not present even though the descriptor is present
+   write (*, *) omp_target_is_present(c_loc(automap_array), omp_get_default_device())
+   ! CHECK: 0
+
+   allocate (automap_array(N))
+   ! true since the storage should be allocated and reference count incremented by the allocate
+   write (*, *) omp_target_is_present(c_loc(automap_array), omp_get_default_device())
+   ! CHECK: 1
+
+   ! since storage is present this should not be a runtime error
+   !$omp target teams loop
+   do i = 1, N
+      automap_array(i) = i
+   end do
+
+   !$omp target update from(automap_array)
+   write (*, *) automap_array
+   ! CHECK: 1 2 3 4 5 6 7 8 9 10
+
+   deallocate (automap_array)
+
+   ! automap_array should have it's storage unmapped on device here
+   write (*, *) omp_target_is_present(c_loc(automap_array), omp_get_default_device())
+   ! CHECK: 0
+end program


        


More information about the flang-commits mailing list