[flang-commits] [flang] [llvm] [mlir] [Flang][OpenMP] Fix Fortran automap handling (PR #162501)

Akash Banerjee via flang-commits flang-commits at lists.llvm.org
Tue Jun 16 07:47:16 PDT 2026


https://github.com/TIFitis updated https://github.com/llvm/llvm-project/pull/162501

>From ba1c14f9f56a8b97e603eaa15ea8ab5cf139beb0 Mon Sep 17 00:00:00 2001
From: Akash Banerjee <Akash.Banerjee at amd.com>
Date: Wed, 8 Oct 2025 16:51:51 +0100
Subject: [PATCH 1/4] [Flang][OpenMP] Fix Fortran automap handling

	- Move automapped Fortran descriptor mappings from map() to has_device_addr in target regions, updating block args and uses accordingly.
	- In libomptarget, detect CFI descriptors during pointer attachment and compute the correct descriptor size to transfer full metadata (including bounds).
	- Resolves lost bounds for automapped Fortran arrays on device; no change for C/C++.

This fixes test offload/test/offloading/fortran/declare-target-automap.f90 reported broken in #161265.
---
 .../Optimizer/OpenMP/AutomapToTargetData.cpp  | 102 ++++++++++++++++++
 offload/libomptarget/omptarget.cpp            |  32 ++++++
 .../fortran/declare-target-automap.f90        |   3 -
 3 files changed, 134 insertions(+), 3 deletions(-)

diff --git a/flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp b/flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp
index eeb08ebf51191..5159158f53339 100644
--- a/flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp
+++ b/flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp
@@ -18,8 +18,12 @@
 #include "mlir/Dialect/OpenMP/OpenMPInterfaces.h"
 #include "mlir/IR/BuiltinAttributes.h"
 #include "mlir/IR/Operation.h"
+#include "mlir/IR/SymbolTable.h"
 #include "mlir/Pass/Pass.h"
 
+#include "llvm/Frontend/OpenMP/OMPConstants.h"
+#include <algorithm>
+
 namespace flangomp {
 #define GEN_PASS_DEF_AUTOMAPTOTARGETDATAPASS
 #include "flang/Optimizer/OpenMP/Passes.h.inc"
@@ -150,6 +154,104 @@ class AutomapToTargetDataPass
           addMapInfo(globalOp, loadOp);
       }
     }
+
+    // Move automapped descriptors from map() to has_device_addr in target ops.
+    auto originatesFromAutomapGlobal = [&](mlir::Value varPtr) -> bool {
+      if (auto decl = mlir::dyn_cast_or_null<hlfir::DeclareOp>(
+              varPtr.getDefiningOp())) {
+        if (auto addrOp = mlir::dyn_cast_or_null<fir::AddrOfOp>(
+                decl.getMemref().getDefiningOp())) {
+          if (auto g =
+                  mlir::SymbolTable::lookupNearestSymbolFrom<fir::GlobalOp>(
+                      decl, addrOp.getSymbol()))
+            return automapGlobals.contains(g);
+        }
+      }
+      return false;
+    };
+
+    module.walk([&](mlir::omp::TargetOp target) {
+      // Collect candidates to move: descriptor maps of automapped globals.
+      llvm::SmallVector<mlir::Value> newMapOps;
+      llvm::SmallVector<unsigned> removedIndices;
+      llvm::SmallVector<mlir::Value> movedToHDA;
+      llvm::SmallVector<mlir::BlockArgument> oldMapArgsForMoved;
+
+      auto mapRange = target.getMapVars();
+      newMapOps.reserve(mapRange.size());
+
+      auto argIface = llvm::dyn_cast<mlir::omp::BlockArgOpenMPOpInterface>(
+          target.getOperation());
+      llvm::ArrayRef<mlir::BlockArgument> mapBlockArgs =
+          argIface.getMapBlockArgs();
+
+      for (auto [idx, mapVal] : llvm::enumerate(mapRange)) {
+        auto mapOp =
+            mlir::dyn_cast<mlir::omp::MapInfoOp>(mapVal.getDefiningOp());
+        if (!mapOp) {
+          newMapOps.push_back(mapVal);
+          continue;
+        }
+
+        mlir::Type varTy = fir::unwrapRefType(mapOp.getVarType());
+        bool isDescriptor = mlir::isa<fir::BaseBoxType>(varTy);
+        if (isDescriptor && originatesFromAutomapGlobal(mapOp.getVarPtr())) {
+          movedToHDA.push_back(mapVal);
+          removedIndices.push_back(idx);
+          oldMapArgsForMoved.push_back(mapBlockArgs[idx]);
+        } else {
+          newMapOps.push_back(mapVal);
+        }
+      }
+
+      if (movedToHDA.empty())
+        return;
+
+      // Update map vars to exclude moved entries.
+      mlir::MutableOperandRange mapMutable = target.getMapVarsMutable();
+      mapMutable.assign(newMapOps);
+
+      // Append moved entries to has_device_addr and insert corresponding block
+      // arguments.
+      mlir::MutableOperandRange hdaMutable =
+          target.getHasDeviceAddrVarsMutable();
+      llvm::SmallVector<mlir::Value> newHDA;
+      newHDA.reserve(hdaMutable.size() + movedToHDA.size());
+      llvm::for_each(hdaMutable.getAsOperandRange(),
+                     [&](mlir::Value v) { newHDA.push_back(v); });
+
+      unsigned hdaStart = argIface.getHasDeviceAddrBlockArgsStart();
+      unsigned oldHdaCount = argIface.numHasDeviceAddrBlockArgs();
+      llvm::SmallVector<mlir::BlockArgument> newHDAArgsForMoved;
+      unsigned insertIndex = hdaStart + oldHdaCount;
+      for (mlir::Value v : movedToHDA) {
+        newHDA.push_back(v);
+        target->getRegion(0).insertArgument(insertIndex, v.getType(),
+                                            v.getLoc());
+        // Capture the newly inserted block argument.
+        newHDAArgsForMoved.push_back(
+            target->getRegion(0).getArgument(insertIndex));
+        insertIndex++;
+      }
+      hdaMutable.assign(newHDA);
+
+      // Redirect uses in the region: replace old map block args with the
+      // corresponding new has_device_addr block args.
+      for (auto [oldArg, newArg] :
+           llvm::zip_equal(oldMapArgsForMoved, newHDAArgsForMoved))
+        oldArg.replaceAllUsesWith(newArg);
+
+      // Finally, erase corresponding map block arguments (descending order).
+      unsigned mapStart = argIface.getMapBlockArgsStart();
+      // Convert indices to absolute argument numbers before erasing.
+      llvm::SmallVector<unsigned> absArgNos;
+      absArgNos.reserve(removedIndices.size());
+      for (unsigned idx : removedIndices)
+        absArgNos.push_back(mapStart + idx);
+      std::sort(absArgNos.begin(), absArgNos.end(), std::greater<>());
+      for (unsigned absNo : absArgNos)
+        target->getRegion(0).eraseArgument(absNo);
+    });
   }
 };
 } // namespace
diff --git a/offload/libomptarget/omptarget.cpp b/offload/libomptarget/omptarget.cpp
index 17b215732d51b..2479fc489d051 100644
--- a/offload/libomptarget/omptarget.cpp
+++ b/offload/libomptarget/omptarget.cpp
@@ -33,6 +33,9 @@
 #include "llvm/Frontend/OpenMP/OMPConstants.h"
 #include "llvm/Object/ObjectFile.h"
 
+#include "flang/ISO_Fortran_binding.h"
+
+#include <algorithm>
 #include <cassert>
 #include <cstdint>
 #include <vector>
@@ -381,6 +384,34 @@ static void *calculateTargetPointeeBase(void *HstPteeBase, void *HstPteeBegin,
   return TgtPteeBase;
 }
 
+// Fortran pointer attachments treated descriptors as plain pointers, so
+// automapped arrays lose their declared bounds on the device. Recognize
+// CFI descriptors to compute their actual size before copying, ensuring the
+// full descriptor (including bounds) is transferred during attachment.
+static int64_t getFortranDescriptorSize(void **HstPtrAddr,
+                                        int64_t ReportedSize) {
+  constexpr int64_t VoidPtrSize = sizeof(void *);
+
+  if (!HstPtrAddr || ReportedSize > VoidPtrSize)
+    return ReportedSize;
+
+  const CFI_cdesc_t *Desc = reinterpret_cast<const CFI_cdesc_t *>(HstPtrAddr);
+
+  if (Desc->version != CFI_VERSION)
+    return ReportedSize;
+
+  if (Desc->rank > CFI_MAX_RANK)
+    return ReportedSize;
+
+  const char *RawDesc = reinterpret_cast<const char *>(Desc);
+  const char *DimsAddr = reinterpret_cast<const char *>(&Desc->dim);
+  size_t HeaderBytes = static_cast<size_t>(DimsAddr - RawDesc);
+  size_t DimsBytes = static_cast<size_t>(Desc->rank) * sizeof(CFI_dim_t);
+  size_t TotalBytes = HeaderBytes + DimsBytes;
+
+  return std::max<int64_t>(ReportedSize, static_cast<int64_t>(TotalBytes));
+}
+
 /// Utility function to perform a pointer attachment operation.
 ///
 /// For something like:
@@ -448,6 +479,7 @@ static int performPointerAttachment(DeviceTy &Device, AsyncInfoTy &AsyncInfo,
          "Need a valid pointer entry to perform pointer-attachment");
 
   constexpr int64_t VoidPtrSize = sizeof(void *);
+  HstPtrSize = getFortranDescriptorSize(HstPtrAddr, HstPtrSize);
   assert(HstPtrSize >= VoidPtrSize && "PointerSize is too small");
 
   void *TgtPteeBase =
diff --git a/offload/test/offloading/fortran/declare-target-automap.f90 b/offload/test/offloading/fortran/declare-target-automap.f90
index b44c0b2815274..b9c2d34c834fa 100644
--- a/offload/test/offloading/fortran/declare-target-automap.f90
+++ b/offload/test/offloading/fortran/declare-target-automap.f90
@@ -1,9 +1,6 @@
 !Offloading test for AUTOMAP modifier in declare target enter
 ! REQUIRES: flang, amdgpu
 
-! FIXME: https://github.com/llvm/llvm-project/issues/161265
-! XFAIL: amdgpu
-
 ! RUN: %libomptarget-compile-fortran-run-and-check-generic
 program automap_program
    use iso_c_binding, only: c_loc

>From ab202f9e708037b4c33c5ae8ac630c4595525927 Mon Sep 17 00:00:00 2001
From: Akash Banerjee <Akash.Banerjee at amd.com>
Date: Thu, 9 Oct 2025 16:30:31 +0100
Subject: [PATCH 2/4] Address copilot suggestions.

---
 flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp | 5 ++++-
 offload/libomptarget/omptarget.cpp                 | 2 +-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp b/flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp
index 5159158f53339..f0e0eefa7e220 100644
--- a/flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp
+++ b/flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp
@@ -241,7 +241,10 @@ class AutomapToTargetDataPass
            llvm::zip_equal(oldMapArgsForMoved, newHDAArgsForMoved))
         oldArg.replaceAllUsesWith(newArg);
 
-      // Finally, erase corresponding map block arguments (descending order).
+      // Finally, erase corresponding map block arguments in descending order.
+      // Descending order is necessary to avoid index invalidation: erasing
+      // arguments from highest to lowest index ensures that earlier erases do
+      // not shift the indices of arguments yet to be erased.
       unsigned mapStart = argIface.getMapBlockArgsStart();
       // Convert indices to absolute argument numbers before erasing.
       llvm::SmallVector<unsigned> absArgNos;
diff --git a/offload/libomptarget/omptarget.cpp b/offload/libomptarget/omptarget.cpp
index 2479fc489d051..6b8ba9e5390c1 100644
--- a/offload/libomptarget/omptarget.cpp
+++ b/offload/libomptarget/omptarget.cpp
@@ -384,7 +384,7 @@ static void *calculateTargetPointeeBase(void *HstPteeBase, void *HstPteeBegin,
   return TgtPteeBase;
 }
 
-// Fortran pointer attachments treated descriptors as plain pointers, so
+// Fortran pointer attachments treat descriptors as plain pointers, so
 // automapped arrays lose their declared bounds on the device. Recognize
 // CFI descriptors to compute their actual size before copying, ensuring the
 // full descriptor (including bounds) is transferred during attachment.

>From edf7bec4603aef064d3a10b8c26b57ec16de1602 Mon Sep 17 00:00:00 2001
From: Akash Banerjee <Akash.Banerjee at amd.com>
Date: Thu, 30 Oct 2025 14:46:11 +0000
Subject: [PATCH 3/4] Remove omptarget changes.

---
 offload/libomptarget/omptarget.cpp | 32 ------------------------------
 1 file changed, 32 deletions(-)

diff --git a/offload/libomptarget/omptarget.cpp b/offload/libomptarget/omptarget.cpp
index 6b8ba9e5390c1..17b215732d51b 100644
--- a/offload/libomptarget/omptarget.cpp
+++ b/offload/libomptarget/omptarget.cpp
@@ -33,9 +33,6 @@
 #include "llvm/Frontend/OpenMP/OMPConstants.h"
 #include "llvm/Object/ObjectFile.h"
 
-#include "flang/ISO_Fortran_binding.h"
-
-#include <algorithm>
 #include <cassert>
 #include <cstdint>
 #include <vector>
@@ -384,34 +381,6 @@ static void *calculateTargetPointeeBase(void *HstPteeBase, void *HstPteeBegin,
   return TgtPteeBase;
 }
 
-// Fortran pointer attachments treat descriptors as plain pointers, so
-// automapped arrays lose their declared bounds on the device. Recognize
-// CFI descriptors to compute their actual size before copying, ensuring the
-// full descriptor (including bounds) is transferred during attachment.
-static int64_t getFortranDescriptorSize(void **HstPtrAddr,
-                                        int64_t ReportedSize) {
-  constexpr int64_t VoidPtrSize = sizeof(void *);
-
-  if (!HstPtrAddr || ReportedSize > VoidPtrSize)
-    return ReportedSize;
-
-  const CFI_cdesc_t *Desc = reinterpret_cast<const CFI_cdesc_t *>(HstPtrAddr);
-
-  if (Desc->version != CFI_VERSION)
-    return ReportedSize;
-
-  if (Desc->rank > CFI_MAX_RANK)
-    return ReportedSize;
-
-  const char *RawDesc = reinterpret_cast<const char *>(Desc);
-  const char *DimsAddr = reinterpret_cast<const char *>(&Desc->dim);
-  size_t HeaderBytes = static_cast<size_t>(DimsAddr - RawDesc);
-  size_t DimsBytes = static_cast<size_t>(Desc->rank) * sizeof(CFI_dim_t);
-  size_t TotalBytes = HeaderBytes + DimsBytes;
-
-  return std::max<int64_t>(ReportedSize, static_cast<int64_t>(TotalBytes));
-}
-
 /// Utility function to perform a pointer attachment operation.
 ///
 /// For something like:
@@ -479,7 +448,6 @@ static int performPointerAttachment(DeviceTy &Device, AsyncInfoTy &AsyncInfo,
          "Need a valid pointer entry to perform pointer-attachment");
 
   constexpr int64_t VoidPtrSize = sizeof(void *);
-  HstPtrSize = getFortranDescriptorSize(HstPtrAddr, HstPtrSize);
   assert(HstPtrSize >= VoidPtrSize && "PointerSize is too small");
 
   void *TgtPteeBase =

>From 18af0fac40306e1fe1f3c2dca64090e7b73390bd Mon Sep 17 00:00:00 2001
From: Akash Banerjee <Akash.Banerjee at amd.com>
Date: Wed, 6 May 2026 15:30:12 +0100
Subject: [PATCH 4/4] Reworked PR to avoid kernel changes.

---
 .../Optimizer/OpenMP/AutomapToTargetData.cpp  | 197 +++++++-----------
 .../Optimizer/OpenMP/MapInfoFinalization.cpp  |  31 ++-
 .../Lower/OpenMP/declare-target-automap.f90   |  27 +++
 .../Transforms/omp-automap-to-target-data.fir |  35 ++--
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      |  51 ++++-
 .../omptarget-declare-target-llvm-host.mlir   |  10 +
 mlir/test/Target/LLVMIR/omptarget-llvm.mlir   |  31 +++
 7 files changed, 239 insertions(+), 143 deletions(-)
 create mode 100644 flang/test/Lower/OpenMP/declare-target-automap.f90

diff --git a/flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp b/flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp
index f0e0eefa7e220..dd006e046fa74 100644
--- a/flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp
+++ b/flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp
@@ -15,14 +15,11 @@
 #include "flang/Optimizer/HLFIR/HLFIROps.h"
 
 #include "mlir/Dialect/OpenMP/OpenMPDialect.h"
-#include "mlir/Dialect/OpenMP/OpenMPInterfaces.h"
 #include "mlir/IR/BuiltinAttributes.h"
 #include "mlir/IR/Operation.h"
-#include "mlir/IR/SymbolTable.h"
 #include "mlir/Pass/Pass.h"
 
 #include "llvm/Frontend/OpenMP/OMPConstants.h"
-#include <algorithm>
 
 namespace flangomp {
 #define GEN_PASS_DEF_AUTOMAPTOTARGETDATAPASS
@@ -90,11 +87,7 @@ class AutomapToTargetDataPass
   }
 
   void runOnOperation() override {
-    ModuleOp module = getOperation()->getParentOfType<ModuleOp>();
-    if (!module)
-      module = dyn_cast<ModuleOp>(getOperation());
-    if (!module)
-      return;
+    ModuleOp module = getOperation();
 
     // Build FIR builder for helper utilities.
     fir::KindMapping kindMap = fir::getKindMapping(module);
@@ -117,16 +110,61 @@ class AutomapToTargetDataPass
       if (needsBoundsOps(memOp.getMemref()))
         genBoundsOps(builder, memOp.getMemref(), bounds);
 
+      mlir::Value boxValue;
+      if (auto storeOp = mlir::dyn_cast<fir::StoreOp>(memOp.getOperation()))
+        boxValue = storeOp.getValue();
+      else
+        boxValue = mlir::cast<fir::LoadOp>(memOp.getOperation()).getResult();
+
+      mlir::Value baseAddr =
+          fir::BoxAddrOp::create(builder, memOp.getLoc(), boxValue);
+      mlir::Value dataAddr = builder.createConvert(
+          memOp.getLoc(),
+          builder.getRefType(fir::unwrapRefType(baseAddr.getType())), baseAddr);
+      mlir::Type baseTy = fir::unwrapRefType(dataAddr.getType());
+      if (mlir::Type eleTy = fir::dyn_cast_ptrOrBoxEleTy(baseTy))
+        baseTy = eleTy;
+      if (auto seqTy = mlir::dyn_cast<fir::SequenceType>(baseTy))
+        if (seqTy.hasDynamicExtents())
+          baseTy = seqTy.getEleTy();
+
       omp::TargetEnterExitUpdateDataOperands clauses;
+      bool isAlloc = isa<fir::StoreOp>(memOp);
+
+      auto createDescriptorMap =
+          [&](mlir::omp::ClauseMapFlags mapType) -> mlir::omp::MapInfoOp {
+        mlir::Type descriptorTy =
+            fir::unwrapRefType(memOp.getMemref().getType());
+        // The attach entry expects the descriptor object to already have a
+        // device mapping, but this raw object map must not be expanded as a
+        // Fortran descriptor member map.
+        return mlir::omp::MapInfoOp::create(
+            builder, memOp.getLoc(), memOp.getMemref().getType(),
+            memOp.getMemref(), TypeAttr::get(descriptorTy),
+            builder.getAttr<omp::ClauseMapFlagsAttr>(mapType),
+            builder.getAttr<omp::VariableCaptureKindAttr>(
+                omp::VariableCaptureKind::ByRef),
+            /*var_ptr_ptr=*/mlir::Value{},
+            /*var_ptr_ptr_type=*/mlir::TypeAttr{},
+            /*members=*/SmallVector<Value>{},
+            /*members_index=*/ArrayAttr{},
+            /*bounds=*/SmallVector<Value>{},
+            /*mapperId=*/mlir::FlatSymbolRefAttr(), globalOp.getSymNameAttr(),
+            builder.getBoolAttr(true));
+      };
+
+      if (isAlloc)
+        clauses.mapVars.push_back(createDescriptorMap(
+            omp::ClauseMapFlags::to | omp::ClauseMapFlags::always));
+
+      mlir::omp::ClauseMapFlags mapType =
+          isAlloc ? omp::ClauseMapFlags::storage : omp::ClauseMapFlags::del;
       mlir::omp::MapInfoOp mapInfo = mlir::omp::MapInfoOp::create(
-          builder, memOp.getLoc(), memOp.getMemref().getType(),
-          memOp.getMemref(),
-          TypeAttr::get(fir::unwrapRefType(memOp.getMemref().getType())),
-          builder.getAttr<omp::ClauseMapFlagsAttr>(
-              isa<fir::StoreOp>(memOp) ? omp::ClauseMapFlags::to
-                                       : omp::ClauseMapFlags::del),
+          builder, memOp.getLoc(), dataAddr.getType(), dataAddr,
+          TypeAttr::get(baseTy),
+          builder.getAttr<omp::ClauseMapFlagsAttr>(mapType),
           builder.getAttr<omp::VariableCaptureKindAttr>(
-              omp::VariableCaptureKind::ByCopy),
+              omp::VariableCaptureKind::ByRef),
           /*var_ptr_ptr=*/mlir::Value{},
           /*var_ptr_ptr_type=*/mlir::TypeAttr{},
           /*members=*/SmallVector<Value>{},
@@ -134,9 +172,31 @@ class AutomapToTargetDataPass
           /*mapperId=*/mlir::FlatSymbolRefAttr(), globalOp.getSymNameAttr(),
           builder.getBoolAttr(false));
       clauses.mapVars.push_back(mapInfo);
-      isa<fir::StoreOp>(memOp)
-          ? omp::TargetEnterDataOp::create(builder, memOp.getLoc(), clauses)
-          : omp::TargetExitDataOp::create(builder, memOp.getLoc(), clauses);
+
+      if (isAlloc) {
+        mlir::omp::MapInfoOp attachInfo = mlir::omp::MapInfoOp::create(
+            builder, memOp.getLoc(), dataAddr.getType(), dataAddr,
+            TypeAttr::get(fir::unwrapRefType(memOp.getMemref().getType())),
+            builder.getAttr<omp::ClauseMapFlagsAttr>(
+                omp::ClauseMapFlags::attach),
+            builder.getAttr<omp::VariableCaptureKindAttr>(
+                omp::VariableCaptureKind::ByRef),
+            /*var_ptr_ptr=*/memOp.getMemref(),
+            /*var_ptr_ptr_type=*/TypeAttr::get(
+                fir::unwrapRefType(memOp.getMemref().getType())),
+            /*members=*/SmallVector<Value>{},
+            /*members_index=*/ArrayAttr{},
+            /*bounds=*/SmallVector<Value>{},
+            /*mapperId=*/mlir::FlatSymbolRefAttr(), globalOp.getSymNameAttr(),
+            builder.getBoolAttr(false));
+        clauses.mapVars.push_back(attachInfo);
+      } else {
+        clauses.mapVars.push_back(
+            createDescriptorMap(omp::ClauseMapFlags::del));
+      }
+
+      isAlloc ? omp::TargetEnterDataOp::create(builder, memOp.getLoc(), clauses)
+              : omp::TargetExitDataOp::create(builder, memOp.getLoc(), clauses);
     };
 
     for (fir::GlobalOp globalOp : automapGlobals) {
@@ -154,107 +214,6 @@ class AutomapToTargetDataPass
           addMapInfo(globalOp, loadOp);
       }
     }
-
-    // Move automapped descriptors from map() to has_device_addr in target ops.
-    auto originatesFromAutomapGlobal = [&](mlir::Value varPtr) -> bool {
-      if (auto decl = mlir::dyn_cast_or_null<hlfir::DeclareOp>(
-              varPtr.getDefiningOp())) {
-        if (auto addrOp = mlir::dyn_cast_or_null<fir::AddrOfOp>(
-                decl.getMemref().getDefiningOp())) {
-          if (auto g =
-                  mlir::SymbolTable::lookupNearestSymbolFrom<fir::GlobalOp>(
-                      decl, addrOp.getSymbol()))
-            return automapGlobals.contains(g);
-        }
-      }
-      return false;
-    };
-
-    module.walk([&](mlir::omp::TargetOp target) {
-      // Collect candidates to move: descriptor maps of automapped globals.
-      llvm::SmallVector<mlir::Value> newMapOps;
-      llvm::SmallVector<unsigned> removedIndices;
-      llvm::SmallVector<mlir::Value> movedToHDA;
-      llvm::SmallVector<mlir::BlockArgument> oldMapArgsForMoved;
-
-      auto mapRange = target.getMapVars();
-      newMapOps.reserve(mapRange.size());
-
-      auto argIface = llvm::dyn_cast<mlir::omp::BlockArgOpenMPOpInterface>(
-          target.getOperation());
-      llvm::ArrayRef<mlir::BlockArgument> mapBlockArgs =
-          argIface.getMapBlockArgs();
-
-      for (auto [idx, mapVal] : llvm::enumerate(mapRange)) {
-        auto mapOp =
-            mlir::dyn_cast<mlir::omp::MapInfoOp>(mapVal.getDefiningOp());
-        if (!mapOp) {
-          newMapOps.push_back(mapVal);
-          continue;
-        }
-
-        mlir::Type varTy = fir::unwrapRefType(mapOp.getVarType());
-        bool isDescriptor = mlir::isa<fir::BaseBoxType>(varTy);
-        if (isDescriptor && originatesFromAutomapGlobal(mapOp.getVarPtr())) {
-          movedToHDA.push_back(mapVal);
-          removedIndices.push_back(idx);
-          oldMapArgsForMoved.push_back(mapBlockArgs[idx]);
-        } else {
-          newMapOps.push_back(mapVal);
-        }
-      }
-
-      if (movedToHDA.empty())
-        return;
-
-      // Update map vars to exclude moved entries.
-      mlir::MutableOperandRange mapMutable = target.getMapVarsMutable();
-      mapMutable.assign(newMapOps);
-
-      // Append moved entries to has_device_addr and insert corresponding block
-      // arguments.
-      mlir::MutableOperandRange hdaMutable =
-          target.getHasDeviceAddrVarsMutable();
-      llvm::SmallVector<mlir::Value> newHDA;
-      newHDA.reserve(hdaMutable.size() + movedToHDA.size());
-      llvm::for_each(hdaMutable.getAsOperandRange(),
-                     [&](mlir::Value v) { newHDA.push_back(v); });
-
-      unsigned hdaStart = argIface.getHasDeviceAddrBlockArgsStart();
-      unsigned oldHdaCount = argIface.numHasDeviceAddrBlockArgs();
-      llvm::SmallVector<mlir::BlockArgument> newHDAArgsForMoved;
-      unsigned insertIndex = hdaStart + oldHdaCount;
-      for (mlir::Value v : movedToHDA) {
-        newHDA.push_back(v);
-        target->getRegion(0).insertArgument(insertIndex, v.getType(),
-                                            v.getLoc());
-        // Capture the newly inserted block argument.
-        newHDAArgsForMoved.push_back(
-            target->getRegion(0).getArgument(insertIndex));
-        insertIndex++;
-      }
-      hdaMutable.assign(newHDA);
-
-      // Redirect uses in the region: replace old map block args with the
-      // corresponding new has_device_addr block args.
-      for (auto [oldArg, newArg] :
-           llvm::zip_equal(oldMapArgsForMoved, newHDAArgsForMoved))
-        oldArg.replaceAllUsesWith(newArg);
-
-      // Finally, erase corresponding map block arguments in descending order.
-      // Descending order is necessary to avoid index invalidation: erasing
-      // arguments from highest to lowest index ensures that earlier erases do
-      // not shift the indices of arguments yet to be erased.
-      unsigned mapStart = argIface.getMapBlockArgsStart();
-      // Convert indices to absolute argument numbers before erasing.
-      llvm::SmallVector<unsigned> absArgNos;
-      absArgNos.reserve(removedIndices.size());
-      for (unsigned idx : removedIndices)
-        absArgNos.push_back(mapStart + idx);
-      std::sort(absArgNos.begin(), absArgNos.end(), std::greater<>());
-      for (unsigned absNo : absArgNos)
-        target->getRegion(0).eraseArgument(absNo);
-    });
   }
 };
 } // namespace
diff --git a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp
index d4b343de988f2..66afbed93df7a 100644
--- a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp
+++ b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp
@@ -1351,6 +1351,34 @@ class MapInfoFinalizationPass
     return false;
   }
 
+  static bool isAttachMap(mlir::omp::MapInfoOp op) {
+    return (op.getMapType() & mlir::omp::ClauseMapFlags::attach) ==
+           mlir::omp::ClauseMapFlags::attach;
+  }
+
+  static bool isDescriptorOnlyMap(mlir::omp::MapInfoOp op) {
+    // A descriptor-only map keeps the descriptor object present so that a
+    // subsequent attach map can update its base address. It intentionally does
+    // not map the descriptor's pointee data, so descriptor member expansion
+    // must leave it alone.
+    if (!op.getPartialMap() || !op.getMembers().empty() ||
+        !op.getBounds().empty() || op.getVarPtrPtr() ||
+        op.getMapCaptureType() != mlir::omp::VariableCaptureKind::ByRef)
+      return false;
+
+    mlir::omp::ClauseMapFlags mapType = op.getMapType();
+    if ((mapType & mlir::omp::ClauseMapFlags::from) ==
+            mlir::omp::ClauseMapFlags::from ||
+        (mapType & mlir::omp::ClauseMapFlags::storage) ==
+            mlir::omp::ClauseMapFlags::storage)
+      return false;
+
+    return (mapType & mlir::omp::ClauseMapFlags::del) ==
+               mlir::omp::ClauseMapFlags::del ||
+           (mapType & mlir::omp::ClauseMapFlags::to) ==
+               mlir::omp::ClauseMapFlags::to;
+  }
+
   // This pass executes on omp::MapInfoOp's containing descriptor based types
   // (allocatables, pointers, assumed shape etc.) and expanding them into
   // multiple omp::MapInfoOp's for each pointer member contained within the
@@ -1612,7 +1640,8 @@ class MapInfoFinalizationPass
                "single users or up to two users when those users"
                "are a MapInfoOp and Target mapping directive");
 
-        if (hasADescriptor(op.getVarPtr().getDefiningOp(),
+        if (!isAttachMap(op) && !isDescriptorOnlyMap(op) &&
+            hasADescriptor(op.getVarPtr().getDefiningOp(),
                            fir::unwrapRefType(op.getVarPtrType()))) {
           builder.setInsertionPoint(op);
           mlir::Operation *targetUser = getFirstTargetUser(op);
diff --git a/flang/test/Lower/OpenMP/declare-target-automap.f90 b/flang/test/Lower/OpenMP/declare-target-automap.f90
new file mode 100644
index 0000000000000..fe46a3ab75c59
--- /dev/null
+++ b/flang/test/Lower/OpenMP/declare-target-automap.f90
@@ -0,0 +1,27 @@
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=60 %s -o - | FileCheck %s
+
+program automap_program
+   integer, allocatable, target :: automap_array(:)
+   !$omp declare target enter(automap:automap_array)
+
+   allocate (automap_array(10))
+
+   !$omp target
+      automap_array(1) = 1
+   !$omp end target
+
+   deallocate (automap_array)
+end program
+
+! CHECK-LABEL: func.func @_QQmain()
+! CHECK-NOT: has_device_addr
+! CHECK: %[[DESC_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(always, to) capture(ByRef) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {name = "_QFEautomap_array", partial_map = true}
+! CHECK: %[[STORAGE_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref<!fir.array<?xi32>>, i32) map_clauses(storage) capture(ByRef) bounds(%{{.*}}) -> !fir.ref<!fir.array<?xi32>> {name = "_QFEautomap_array"}
+! CHECK: %[[ATTACH_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref<!fir.array<?xi32>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(attach) capture(ByRef) var_ptr_ptr(%{{.*}} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.ref<!fir.array<?xi32>> {name = "_QFEautomap_array"}
+! CHECK: omp.target_enter_data map_entries(%[[DESC_MAP]], %[[STORAGE_MAP]], %[[ATTACH_MAP]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.array<?xi32>>, !fir.ref<!fir.array<?xi32>>)
+! CHECK-NOT: has_device_addr
+! CHECK: omp.target {{.*}}map_entries(
+! CHECK-NOT: has_device_addr
+! CHECK: %[[DELETE_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref<!fir.array<?xi32>>, i32) map_clauses(delete) capture(ByRef) bounds(%{{.*}}) -> !fir.ref<!fir.array<?xi32>> {name = "_QFEautomap_array"}
+! CHECK: %[[DESC_DELETE:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(delete) capture(ByRef) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {name = "_QFEautomap_array", partial_map = true}
+! CHECK: omp.target_exit_data map_entries(%[[DELETE_MAP]], %[[DESC_DELETE]] : !fir.ref<!fir.array<?xi32>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
diff --git a/flang/test/Transforms/omp-automap-to-target-data.fir b/flang/test/Transforms/omp-automap-to-target-data.fir
index 7a19705a248b4..7ed3cfa129e51 100644
--- a/flang/test/Transforms/omp-automap-to-target-data.fir
+++ b/flang/test/Transforms/omp-automap-to-target-data.fir
@@ -38,21 +38,26 @@ module {
 // CHECK-NEXT:      fir.shape
 // CHECK-NEXT:      %[[ARR_BOXED:.*]] = fir.embox %[[ALLOC_MEM]]
 // CHECK-NEXT:      fir.store %[[ARR_BOXED]]
-// CHECK-NEXT:      %[[ARR_BOXED_LOADED:.*]] = fir.load %[[AUTOMAP_DECL]]#0
-// CHECK-NEXT:      %[[ARR_HEAP_PTR:.*]] = fir.box_addr %[[ARR_BOXED_LOADED]]
-// CHECK-NEXT:      %[[DIM0:.*]] = arith.constant 0 : index
-// CHECK-NEXT:      %[[BOX_DIMS:.*]]:3 = fir.box_dims %[[ARR_BOXED_LOADED]], %[[DIM0]]
-// CHECK-NEXT:      %[[ONE:.*]] = arith.constant 1 : index
-// CHECK-NEXT:      %[[ZERO:.*]] = arith.constant 0 : index
-// CHECK-NEXT:      %[[BOX_DIMS2:.*]]:3 = fir.box_dims %[[ARR_BOXED_LOADED]], %[[ZERO]]
-// CHECK-NEXT:      %[[LOWER_BOUND:.*]] = arith.constant 0 : index
-// CHECK-NEXT:      %[[UPPER_BOUND:.*]] = arith.subi %[[BOX_DIMS2]]#1, %[[ONE]] : index
-// CHECK-NEXT:      omp.map.bounds lower_bound(%[[LOWER_BOUND]] : index) upper_bound(%[[UPPER_BOUND]] : index) extent(%[[BOX_DIMS2]]#1 : index) stride(%[[BOX_DIMS2]]#2 : index) start_idx(%[[BOX_DIMS]]#0 : index) {stride_in_bytes = true}
-// CHECK-NEXT:      arith.muli %[[BOX_DIMS2]]#2, %[[BOX_DIMS2]]#1 : index
-// CHECK-NEXT:      %[[MAP_INFO:.*]] = omp.map.info var_ptr(%[[AUTOMAP_DECL]]#0 {{.*}} map_clauses(to) capture(ByCopy)
-// CHECK-NEXT:      omp.target_enter_data map_entries(%[[MAP_INFO]]
+// CHECK:           %[[ARR_BOXED_LOADED:.*]] = fir.load %[[AUTOMAP_DECL]]#0
+// CHECK:           %[[DIM0:.*]] = arith.constant 0 : index
+// CHECK:           %[[BOX_DIMS:.*]]:3 = fir.box_dims %[[ARR_BOXED_LOADED]], %[[DIM0]]
+// CHECK:           %[[ONE:.*]] = arith.constant 1 : index
+// CHECK:           %[[ZERO:.*]] = arith.constant 0 : index
+// CHECK:           %[[BOX_DIMS2:.*]]:3 = fir.box_dims %[[ARR_BOXED_LOADED]], %[[ZERO]]
+// CHECK:           %[[LOWER_BOUND:.*]] = arith.constant 0 : index
+// CHECK:           %[[UPPER_BOUND:.*]] = arith.subi %[[BOX_DIMS2]]#1, %[[ONE]] : index
+// CHECK:           %[[BOUNDS:.*]] = omp.map.bounds lower_bound(%[[LOWER_BOUND]] : index) upper_bound(%[[UPPER_BOUND]] : index) extent(%[[BOX_DIMS2]]#1 : index) stride(%[[BOX_DIMS2]]#2 : index) start_idx(%[[BOX_DIMS]]#0 : index) {stride_in_bytes = true}
+// CHECK:           %[[DATA_ADDR:.*]] = fir.box_addr %[[ARR_BOXED]]
+// CHECK-NEXT:      %[[DATA_REF:.*]] = fir.convert %[[DATA_ADDR]] : (!fir.heap<!fir.array<?xi32>>) -> !fir.ref<!fir.array<?xi32>>
+// CHECK-NEXT:      %[[DESC_MAP:.*]] = omp.map.info var_ptr(%[[AUTOMAP_DECL]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(always, to) capture(ByRef) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {name = "_QMtestEarr", partial_map = true}
+// CHECK-NEXT:      %[[STORAGE_MAP:.*]] = omp.map.info var_ptr(%[[DATA_REF]] : !fir.ref<!fir.array<?xi32>>, i32) map_clauses(storage) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.ref<!fir.array<?xi32>> {name = "_QMtestEarr"}
+// CHECK-NEXT:      %[[ATTACH_MAP:.*]] = omp.map.info var_ptr(%[[DATA_REF]] : !fir.ref<!fir.array<?xi32>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(attach) capture(ByRef) var_ptr_ptr(%[[AUTOMAP_DECL]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.ref<!fir.array<?xi32>> {name = "_QMtestEarr"}
+// CHECK-NEXT:      omp.target_enter_data map_entries(%[[DESC_MAP]], %[[STORAGE_MAP]], %[[ATTACH_MAP]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.array<?xi32>>, !fir.ref<!fir.array<?xi32>>)
 // CHECK:           %[[LOAD:.*]] = fir.load %[[AUTOMAP_DECL]]#0
-// CHECK:           %[[EXIT_MAP:.*]] = omp.map.info var_ptr(%[[AUTOMAP_DECL]]#0 {{.*}} map_clauses(delete) capture(ByCopy)
-// CHECK-NEXT:      omp.target_exit_data map_entries(%[[EXIT_MAP]]
+// CHECK:           %[[EXIT_ADDR:.*]] = fir.box_addr %[[LOAD]]
+// CHECK-NEXT:      %[[EXIT_REF:.*]] = fir.convert %[[EXIT_ADDR]] : (!fir.heap<!fir.array<?xi32>>) -> !fir.ref<!fir.array<?xi32>>
+// CHECK-NEXT:      %[[EXIT_MAP:.*]] = omp.map.info var_ptr(%[[EXIT_REF]] : !fir.ref<!fir.array<?xi32>>, i32) map_clauses(delete) capture(ByRef)
+// CHECK-NEXT:      %[[DESC_DELETE:.*]] = omp.map.info var_ptr(%[[AUTOMAP_DECL]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(delete) capture(ByRef) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {name = "_QMtestEarr", partial_map = true}
+// CHECK-NEXT:      omp.target_exit_data map_entries(%[[EXIT_MAP]], %[[DESC_DELETE]] : !fir.ref<!fir.array<?xi32>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
 // CHECK-NEXT:      %[[BOXADDR:.*]] = fir.box_addr %[[LOAD]]
 // CHECK-NEXT:      fir.freemem %[[BOXADDR]]
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index d35e8612e158b..2dde7ab1614c0 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -5719,16 +5719,18 @@ static void collectMapDataFromMapOperands(
   // Process MapOperands
   for (Value mapValue : mapVars) {
     auto mapOp = cast<omp::MapInfoOp>(mapValue.getDefiningOp());
+    bool isAttachMap =
+        bitEnumContainsAll(mapOp.getMapType(), omp::ClauseMapFlags::attach);
     bool isRefPtrOrPteeMapWithAttach =
         checkRefPtrOrPteeMapWithAttach(mapOp.getMapType());
     Value offloadPtr = (mapOp.getVarPtrPtr() && !isRefPtrOrPteeMapWithAttach)
                            ? mapOp.getVarPtrPtr()
                            : mapOp.getVarPtr();
+    Value pointeePtr = isRefPtrOrPteeMapWithAttach
+                           ? mapOp.getVarPtrPtr()
+                           : (isAttachMap ? mapOp.getVarPtr() : offloadPtr);
     mapData.OriginalValue.push_back(moduleTranslation.lookupValue(offloadPtr));
-    mapData.Pointers.push_back(
-        isRefPtrOrPteeMapWithAttach
-            ? moduleTranslation.lookupValue(mapOp.getVarPtrPtr())
-            : mapData.OriginalValue.back());
+    mapData.Pointers.push_back(moduleTranslation.lookupValue(pointeePtr));
 
     if (llvm::Value *refPtr =
             getRefPtrIfDeclareTarget(offloadPtr, moduleTranslation)) {
@@ -6543,6 +6545,10 @@ createAlteredByCaptureMap(MapInfoData &mapData,
         ((convertClauseMapFlags(mapOp.getMapType()) &
           llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ATTACH) ==
          llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ATTACH);
+    bool isRefPtrOrPteeMapWithAttach =
+        isAttachMap &&
+        (bitEnumContainsAll(mapOp.getMapType(), omp::ClauseMapFlags::ref_ptr) ||
+         bitEnumContainsAll(mapOp.getMapType(), omp::ClauseMapFlags::ref_ptee));
 
     // If it's declare target, skip it, it's handled separately. However, if
     // it's declare target, and an attach map, we want to calculate the exact
@@ -6565,7 +6571,7 @@ createAlteredByCaptureMap(MapInfoData &mapData,
         std::vector<llvm::Value *> offsetIdx = calculateBoundsOffset(
             moduleTranslation, builder, mapData.BaseType[i]->isArrayTy(),
             mapOp.getBounds());
-        if (isPtrTy)
+        if (isPtrTy && (!isAttachMap || isRefPtrOrPteeMapWithAttach))
           newV = builder.CreateLoad(builder.getPtrTy(), newV);
 
         if (!offsetIdx.empty())
@@ -8182,6 +8188,35 @@ convertDeclareTargetAttr(Operation *op, mlir::omp::DeclareTargetAttr attribute,
           convertToCaptureClauseKind(attribute.getCaptureClause().getValue());
       auto deviceClause =
           convertToDeviceClauseKind(attribute.getDeviceType().getValue());
+      llvm::StringRef entryMangledName = mangledName;
+      llvm::Constant *entryAddr = llvm::cast<llvm::Constant>(gVal);
+      std::function<llvm::GlobalValue::LinkageTypes()> variableLinkage;
+      llvm::SmallString<128> entryNameStorage;
+      if ((attribute.getCaptureClause().getValue() ==
+               mlir::omp::DeclareTargetCaptureClause::to ||
+           attribute.getCaptureClause().getValue() ==
+               mlir::omp::DeclareTargetCaptureClause::enter) &&
+          !isDeclaration &&
+          (gVal->hasLocalLinkage() || gVal->hasHiddenVisibility())) {
+        // Keep the original symbol local so target code can address it using
+        // target-specific local relocations, but create a visible alias for
+        // the offload entry so libomptarget can associate the host global with
+        // the actual device global.
+        entryNameStorage = (mangledName + llvm::Twine("_decl_tgt_entry")).str();
+        entryMangledName = entryNameStorage;
+        if (llvm::GlobalValue *existing =
+                llvmModule->getNamedValue(entryMangledName)) {
+          entryAddr = llvm::cast<llvm::Constant>(existing);
+        } else {
+          entryAddr = llvm::GlobalAlias::create(
+              gVal->getValueType(), gVal->getAddressSpace(),
+              llvm::GlobalValue::WeakAnyLinkage, entryMangledName, entryAddr,
+              llvmModule);
+          llvm::cast<llvm::GlobalAlias>(entryAddr)->setVisibility(
+              llvm::GlobalValue::DefaultVisibility);
+        }
+        variableLinkage = [] { return llvm::GlobalValue::WeakAnyLinkage; };
+      }
       // unused for MLIR at the moment, required in Clang for book
       // keeping
       std::vector<llvm::GlobalVariable *> generatedRefs;
@@ -8211,9 +8246,9 @@ convertDeclareTargetAttr(Operation *op, mlir::omp::DeclareTargetAttr attribute,
       ompBuilder->registerTargetGlobalVariable(
           captureClause, deviceClause, isDeclaration, isExternallyVisible,
           ompBuilder->getTargetEntryUniqueInfo(fileInfoCallBack, vfs),
-          mangledName, generatedRefs, /*OpenMPSimd*/ false, targetTriple,
-          /*GlobalInitializer*/ nullptr, /*VariableLinkage*/ nullptr,
-          gVal->getType(), gVal);
+          entryMangledName, generatedRefs, /*OpenMPSimd*/ false, targetTriple,
+          /*GlobalInitializer*/ nullptr, variableLinkage, gVal->getType(),
+          entryAddr);
 
       bool requiresUSM = ompBuilder->Config.hasRequiresUnifiedSharedMemory();
       if (ompBuilder->Config.isTargetDevice() &&
diff --git a/mlir/test/Target/LLVMIR/omptarget-declare-target-llvm-host.mlir b/mlir/test/Target/LLVMIR/omptarget-declare-target-llvm-host.mlir
index 92c85738dbc72..3343c96962f6c 100644
--- a/mlir/test/Target/LLVMIR/omptarget-declare-target-llvm-host.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-declare-target-llvm-host.mlir
@@ -133,6 +133,16 @@ module attributes {llvm.target_triple = "x86_64-unknown-linux-gnu", omp.is_targe
     llvm.return %0 : i32
   }
 
+  // CHECK-DAG: @_QFEinternal_enter = internal global i32 7
+  // CHECK-DAG: @.offloading.entry_name{{.*}} = internal unnamed_addr constant [34 x i8] c"_QFEinternal_enter_decl_tgt_entry\00"
+  // CHECK-DAG: @.offloading.entry._QFEinternal_enter_decl_tgt_entry = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 1, i32 0, ptr @_QFEinternal_enter_decl_tgt_entry, ptr @.offloading.entry_name{{.*}}, i64 4, i64 0, ptr null }, section "llvm_offload_entries"
+  // CHECK-DAG: @_QFEinternal_enter_decl_tgt_entry = weak alias i32, ptr @_QFEinternal_enter
+  // CHECK-DAG: !{{.*}} = !{i32 {{.*}}, !"_QFEinternal_enter_decl_tgt_entry", i32 {{.*}}, i32 {{.*}}}
+  llvm.mlir.global internal @_QFEinternal_enter() {addr_space = 0 : i32, omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>} : i32 {
+    %0 = llvm.mlir.constant(7 : i32) : i32
+    llvm.return %0 : i32
+  }
+
   // CHECK-DAG: @_QMtest_0Ept1 = global { ptr, i64, i32, i8, i8, i8, i8 } { ptr null, i64 4, i32 20180515, i8 0, i8 9, i8 1, i8 0 }
   // CHECK-DAG: @_QMtest_0Ept1_decl_tgt_ref_ptr = weak global ptr @_QMtest_0Ept1
   // CHECK-DAG: @.offloading.entry_name{{.*}} = internal unnamed_addr constant [31 x i8] c"_QMtest_0Ept1_decl_tgt_ref_ptr\00"
diff --git a/mlir/test/Target/LLVMIR/omptarget-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-llvm.mlir
index b80e6220e6646..bc08fb9414e27 100644
--- a/mlir/test/Target/LLVMIR/omptarget-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-llvm.mlir
@@ -634,3 +634,34 @@ module attributes {omp.target_triples = ["amdgcn-amd-amdhsa"]} {
 // CHECK: @.offload_sizes = private unnamed_addr constant [2 x i64] [i64 8, i64 0]
 // CHECK: @.offload_maptypes = private unnamed_addr constant [2 x i64] [i64 288, i64 288]
 // CHECK-LABEL: define void @_QPomp_target_is_device_ptr
+
+// -----
+
+module attributes {omp.target_triples = ["amdgcn-amd-amdhsa"]} {
+  llvm.func @_QPomp_target_enter_attach(%desc : !llvm.ptr,
+                                        %pointee : !llvm.ptr) {
+    %map = omp.map.info
+        var_ptr(%pointee : !llvm.ptr, !llvm.struct<(ptr, i64)>)
+        map_clauses(attach) capture(ByRef)
+        var_ptr_ptr(%desc : !llvm.ptr, !llvm.struct<(ptr, i64)>)
+        -> !llvm.ptr {name = ""}
+    omp.target_enter_data map_entries(%map : !llvm.ptr)
+    llvm.return
+  }
+}
+
+// CHECK: @.offload_maptypes = private unnamed_addr constant [1 x i64] [i64 16384]
+// CHECK-LABEL: define void @_QPomp_target_enter_attach
+// CHECK-SAME: (ptr %[[DESC:.*]], ptr %[[POINTEE:.*]]) {
+// CHECK:         %[[BASEPTRS:.*]] = alloca [1 x ptr], align 8
+// CHECK:         %[[PTRS:.*]] = alloca [1 x ptr], align 8
+// CHECK:         %[[SIZES:.*]] = alloca [1 x i64], align 8
+// CHECK:         %[[IS_NULL:.*]] = icmp eq ptr %[[POINTEE]], null
+// CHECK-NEXT:    %[[SIZE:.*]] = select i1 %[[IS_NULL]], i64 0, i64 16
+// CHECK:         %[[BASEPTR:.*]] = getelementptr inbounds [1 x ptr], ptr %[[BASEPTRS]], i32 0, i32 0
+// CHECK-NEXT:    store ptr %[[DESC]], ptr %[[BASEPTR]], align 8
+// CHECK-NEXT:    %[[PTR:.*]] = getelementptr inbounds [1 x ptr], ptr %[[PTRS]], i32 0, i32 0
+// CHECK-NEXT:    store ptr %[[POINTEE]], ptr %[[PTR]], align 8
+// CHECK-NEXT:    %[[SIZEPTR:.*]] = getelementptr inbounds [1 x i64], ptr %[[SIZES]], i32 0, i32 0
+// CHECK-NEXT:    store i64 %[[SIZE]], ptr %[[SIZEPTR]], align 8
+// CHECK:         call void @__tgt_target_data_begin_mapper(ptr @2, i64 -1, i32 1, ptr %{{.*}}, ptr %{{.*}}, ptr %{{.*}}, ptr @.offload_maptypes, ptr @.offload_mapnames, ptr null)



More information about the flang-commits mailing list