[flang-commits] [flang] [llvm] [mlir] [Flang][OpenMP] Fix Fortran automap handling (PR #162501)
Akash Banerjee via flang-commits
flang-commits at lists.llvm.org
Thu Jun 18 07:46:06 PDT 2026
https://github.com/TIFitis updated https://github.com/llvm/llvm-project/pull/162501
>From ba1c14f9f56a8b97e603eaa15ea8ab5cf139beb0 Mon Sep 17 00:00:00 2001
From: Akash Banerjee <Akash.Banerjee at amd.com>
Date: Wed, 8 Oct 2025 16:51:51 +0100
Subject: [PATCH 1/6] [Flang][OpenMP] Fix Fortran automap handling
- Move automapped Fortran descriptor mappings from map() to has_device_addr in target regions, updating block args and uses accordingly.
- In libomptarget, detect CFI descriptors during pointer attachment and compute the correct descriptor size to transfer full metadata (including bounds).
- Resolves lost bounds for automapped Fortran arrays on device; no change for C/C++.
This fixes test offload/test/offloading/fortran/declare-target-automap.f90 reported broken in #161265.
---
.../Optimizer/OpenMP/AutomapToTargetData.cpp | 102 ++++++++++++++++++
offload/libomptarget/omptarget.cpp | 32 ++++++
.../fortran/declare-target-automap.f90 | 3 -
3 files changed, 134 insertions(+), 3 deletions(-)
diff --git a/flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp b/flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp
index eeb08ebf51191..5159158f53339 100644
--- a/flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp
+++ b/flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp
@@ -18,8 +18,12 @@
#include "mlir/Dialect/OpenMP/OpenMPInterfaces.h"
#include "mlir/IR/BuiltinAttributes.h"
#include "mlir/IR/Operation.h"
+#include "mlir/IR/SymbolTable.h"
#include "mlir/Pass/Pass.h"
+#include "llvm/Frontend/OpenMP/OMPConstants.h"
+#include <algorithm>
+
namespace flangomp {
#define GEN_PASS_DEF_AUTOMAPTOTARGETDATAPASS
#include "flang/Optimizer/OpenMP/Passes.h.inc"
@@ -150,6 +154,104 @@ class AutomapToTargetDataPass
addMapInfo(globalOp, loadOp);
}
}
+
+ // Move automapped descriptors from map() to has_device_addr in target ops.
+ auto originatesFromAutomapGlobal = [&](mlir::Value varPtr) -> bool {
+ if (auto decl = mlir::dyn_cast_or_null<hlfir::DeclareOp>(
+ varPtr.getDefiningOp())) {
+ if (auto addrOp = mlir::dyn_cast_or_null<fir::AddrOfOp>(
+ decl.getMemref().getDefiningOp())) {
+ if (auto g =
+ mlir::SymbolTable::lookupNearestSymbolFrom<fir::GlobalOp>(
+ decl, addrOp.getSymbol()))
+ return automapGlobals.contains(g);
+ }
+ }
+ return false;
+ };
+
+ module.walk([&](mlir::omp::TargetOp target) {
+ // Collect candidates to move: descriptor maps of automapped globals.
+ llvm::SmallVector<mlir::Value> newMapOps;
+ llvm::SmallVector<unsigned> removedIndices;
+ llvm::SmallVector<mlir::Value> movedToHDA;
+ llvm::SmallVector<mlir::BlockArgument> oldMapArgsForMoved;
+
+ auto mapRange = target.getMapVars();
+ newMapOps.reserve(mapRange.size());
+
+ auto argIface = llvm::dyn_cast<mlir::omp::BlockArgOpenMPOpInterface>(
+ target.getOperation());
+ llvm::ArrayRef<mlir::BlockArgument> mapBlockArgs =
+ argIface.getMapBlockArgs();
+
+ for (auto [idx, mapVal] : llvm::enumerate(mapRange)) {
+ auto mapOp =
+ mlir::dyn_cast<mlir::omp::MapInfoOp>(mapVal.getDefiningOp());
+ if (!mapOp) {
+ newMapOps.push_back(mapVal);
+ continue;
+ }
+
+ mlir::Type varTy = fir::unwrapRefType(mapOp.getVarType());
+ bool isDescriptor = mlir::isa<fir::BaseBoxType>(varTy);
+ if (isDescriptor && originatesFromAutomapGlobal(mapOp.getVarPtr())) {
+ movedToHDA.push_back(mapVal);
+ removedIndices.push_back(idx);
+ oldMapArgsForMoved.push_back(mapBlockArgs[idx]);
+ } else {
+ newMapOps.push_back(mapVal);
+ }
+ }
+
+ if (movedToHDA.empty())
+ return;
+
+ // Update map vars to exclude moved entries.
+ mlir::MutableOperandRange mapMutable = target.getMapVarsMutable();
+ mapMutable.assign(newMapOps);
+
+ // Append moved entries to has_device_addr and insert corresponding block
+ // arguments.
+ mlir::MutableOperandRange hdaMutable =
+ target.getHasDeviceAddrVarsMutable();
+ llvm::SmallVector<mlir::Value> newHDA;
+ newHDA.reserve(hdaMutable.size() + movedToHDA.size());
+ llvm::for_each(hdaMutable.getAsOperandRange(),
+ [&](mlir::Value v) { newHDA.push_back(v); });
+
+ unsigned hdaStart = argIface.getHasDeviceAddrBlockArgsStart();
+ unsigned oldHdaCount = argIface.numHasDeviceAddrBlockArgs();
+ llvm::SmallVector<mlir::BlockArgument> newHDAArgsForMoved;
+ unsigned insertIndex = hdaStart + oldHdaCount;
+ for (mlir::Value v : movedToHDA) {
+ newHDA.push_back(v);
+ target->getRegion(0).insertArgument(insertIndex, v.getType(),
+ v.getLoc());
+ // Capture the newly inserted block argument.
+ newHDAArgsForMoved.push_back(
+ target->getRegion(0).getArgument(insertIndex));
+ insertIndex++;
+ }
+ hdaMutable.assign(newHDA);
+
+ // Redirect uses in the region: replace old map block args with the
+ // corresponding new has_device_addr block args.
+ for (auto [oldArg, newArg] :
+ llvm::zip_equal(oldMapArgsForMoved, newHDAArgsForMoved))
+ oldArg.replaceAllUsesWith(newArg);
+
+ // Finally, erase corresponding map block arguments (descending order).
+ unsigned mapStart = argIface.getMapBlockArgsStart();
+ // Convert indices to absolute argument numbers before erasing.
+ llvm::SmallVector<unsigned> absArgNos;
+ absArgNos.reserve(removedIndices.size());
+ for (unsigned idx : removedIndices)
+ absArgNos.push_back(mapStart + idx);
+ std::sort(absArgNos.begin(), absArgNos.end(), std::greater<>());
+ for (unsigned absNo : absArgNos)
+ target->getRegion(0).eraseArgument(absNo);
+ });
}
};
} // namespace
diff --git a/offload/libomptarget/omptarget.cpp b/offload/libomptarget/omptarget.cpp
index 17b215732d51b..2479fc489d051 100644
--- a/offload/libomptarget/omptarget.cpp
+++ b/offload/libomptarget/omptarget.cpp
@@ -33,6 +33,9 @@
#include "llvm/Frontend/OpenMP/OMPConstants.h"
#include "llvm/Object/ObjectFile.h"
+#include "flang/ISO_Fortran_binding.h"
+
+#include <algorithm>
#include <cassert>
#include <cstdint>
#include <vector>
@@ -381,6 +384,34 @@ static void *calculateTargetPointeeBase(void *HstPteeBase, void *HstPteeBegin,
return TgtPteeBase;
}
+// Fortran pointer attachments treated descriptors as plain pointers, so
+// automapped arrays lose their declared bounds on the device. Recognize
+// CFI descriptors to compute their actual size before copying, ensuring the
+// full descriptor (including bounds) is transferred during attachment.
+static int64_t getFortranDescriptorSize(void **HstPtrAddr,
+ int64_t ReportedSize) {
+ constexpr int64_t VoidPtrSize = sizeof(void *);
+
+ if (!HstPtrAddr || ReportedSize > VoidPtrSize)
+ return ReportedSize;
+
+ const CFI_cdesc_t *Desc = reinterpret_cast<const CFI_cdesc_t *>(HstPtrAddr);
+
+ if (Desc->version != CFI_VERSION)
+ return ReportedSize;
+
+ if (Desc->rank > CFI_MAX_RANK)
+ return ReportedSize;
+
+ const char *RawDesc = reinterpret_cast<const char *>(Desc);
+ const char *DimsAddr = reinterpret_cast<const char *>(&Desc->dim);
+ size_t HeaderBytes = static_cast<size_t>(DimsAddr - RawDesc);
+ size_t DimsBytes = static_cast<size_t>(Desc->rank) * sizeof(CFI_dim_t);
+ size_t TotalBytes = HeaderBytes + DimsBytes;
+
+ return std::max<int64_t>(ReportedSize, static_cast<int64_t>(TotalBytes));
+}
+
/// Utility function to perform a pointer attachment operation.
///
/// For something like:
@@ -448,6 +479,7 @@ static int performPointerAttachment(DeviceTy &Device, AsyncInfoTy &AsyncInfo,
"Need a valid pointer entry to perform pointer-attachment");
constexpr int64_t VoidPtrSize = sizeof(void *);
+ HstPtrSize = getFortranDescriptorSize(HstPtrAddr, HstPtrSize);
assert(HstPtrSize >= VoidPtrSize && "PointerSize is too small");
void *TgtPteeBase =
diff --git a/offload/test/offloading/fortran/declare-target-automap.f90 b/offload/test/offloading/fortran/declare-target-automap.f90
index b44c0b2815274..b9c2d34c834fa 100644
--- a/offload/test/offloading/fortran/declare-target-automap.f90
+++ b/offload/test/offloading/fortran/declare-target-automap.f90
@@ -1,9 +1,6 @@
!Offloading test for AUTOMAP modifier in declare target enter
! REQUIRES: flang, amdgpu
-! FIXME: https://github.com/llvm/llvm-project/issues/161265
-! XFAIL: amdgpu
-
! RUN: %libomptarget-compile-fortran-run-and-check-generic
program automap_program
use iso_c_binding, only: c_loc
>From ab202f9e708037b4c33c5ae8ac630c4595525927 Mon Sep 17 00:00:00 2001
From: Akash Banerjee <Akash.Banerjee at amd.com>
Date: Thu, 9 Oct 2025 16:30:31 +0100
Subject: [PATCH 2/6] Address copilot suggestions.
---
flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp | 5 ++++-
offload/libomptarget/omptarget.cpp | 2 +-
2 files changed, 5 insertions(+), 2 deletions(-)
diff --git a/flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp b/flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp
index 5159158f53339..f0e0eefa7e220 100644
--- a/flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp
+++ b/flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp
@@ -241,7 +241,10 @@ class AutomapToTargetDataPass
llvm::zip_equal(oldMapArgsForMoved, newHDAArgsForMoved))
oldArg.replaceAllUsesWith(newArg);
- // Finally, erase corresponding map block arguments (descending order).
+ // Finally, erase corresponding map block arguments in descending order.
+ // Descending order is necessary to avoid index invalidation: erasing
+ // arguments from highest to lowest index ensures that earlier erases do
+ // not shift the indices of arguments yet to be erased.
unsigned mapStart = argIface.getMapBlockArgsStart();
// Convert indices to absolute argument numbers before erasing.
llvm::SmallVector<unsigned> absArgNos;
diff --git a/offload/libomptarget/omptarget.cpp b/offload/libomptarget/omptarget.cpp
index 2479fc489d051..6b8ba9e5390c1 100644
--- a/offload/libomptarget/omptarget.cpp
+++ b/offload/libomptarget/omptarget.cpp
@@ -384,7 +384,7 @@ static void *calculateTargetPointeeBase(void *HstPteeBase, void *HstPteeBegin,
return TgtPteeBase;
}
-// Fortran pointer attachments treated descriptors as plain pointers, so
+// Fortran pointer attachments treat descriptors as plain pointers, so
// automapped arrays lose their declared bounds on the device. Recognize
// CFI descriptors to compute their actual size before copying, ensuring the
// full descriptor (including bounds) is transferred during attachment.
>From edf7bec4603aef064d3a10b8c26b57ec16de1602 Mon Sep 17 00:00:00 2001
From: Akash Banerjee <Akash.Banerjee at amd.com>
Date: Thu, 30 Oct 2025 14:46:11 +0000
Subject: [PATCH 3/6] Remove omptarget changes.
---
offload/libomptarget/omptarget.cpp | 32 ------------------------------
1 file changed, 32 deletions(-)
diff --git a/offload/libomptarget/omptarget.cpp b/offload/libomptarget/omptarget.cpp
index 6b8ba9e5390c1..17b215732d51b 100644
--- a/offload/libomptarget/omptarget.cpp
+++ b/offload/libomptarget/omptarget.cpp
@@ -33,9 +33,6 @@
#include "llvm/Frontend/OpenMP/OMPConstants.h"
#include "llvm/Object/ObjectFile.h"
-#include "flang/ISO_Fortran_binding.h"
-
-#include <algorithm>
#include <cassert>
#include <cstdint>
#include <vector>
@@ -384,34 +381,6 @@ static void *calculateTargetPointeeBase(void *HstPteeBase, void *HstPteeBegin,
return TgtPteeBase;
}
-// Fortran pointer attachments treat descriptors as plain pointers, so
-// automapped arrays lose their declared bounds on the device. Recognize
-// CFI descriptors to compute their actual size before copying, ensuring the
-// full descriptor (including bounds) is transferred during attachment.
-static int64_t getFortranDescriptorSize(void **HstPtrAddr,
- int64_t ReportedSize) {
- constexpr int64_t VoidPtrSize = sizeof(void *);
-
- if (!HstPtrAddr || ReportedSize > VoidPtrSize)
- return ReportedSize;
-
- const CFI_cdesc_t *Desc = reinterpret_cast<const CFI_cdesc_t *>(HstPtrAddr);
-
- if (Desc->version != CFI_VERSION)
- return ReportedSize;
-
- if (Desc->rank > CFI_MAX_RANK)
- return ReportedSize;
-
- const char *RawDesc = reinterpret_cast<const char *>(Desc);
- const char *DimsAddr = reinterpret_cast<const char *>(&Desc->dim);
- size_t HeaderBytes = static_cast<size_t>(DimsAddr - RawDesc);
- size_t DimsBytes = static_cast<size_t>(Desc->rank) * sizeof(CFI_dim_t);
- size_t TotalBytes = HeaderBytes + DimsBytes;
-
- return std::max<int64_t>(ReportedSize, static_cast<int64_t>(TotalBytes));
-}
-
/// Utility function to perform a pointer attachment operation.
///
/// For something like:
@@ -479,7 +448,6 @@ static int performPointerAttachment(DeviceTy &Device, AsyncInfoTy &AsyncInfo,
"Need a valid pointer entry to perform pointer-attachment");
constexpr int64_t VoidPtrSize = sizeof(void *);
- HstPtrSize = getFortranDescriptorSize(HstPtrAddr, HstPtrSize);
assert(HstPtrSize >= VoidPtrSize && "PointerSize is too small");
void *TgtPteeBase =
>From 18af0fac40306e1fe1f3c2dca64090e7b73390bd Mon Sep 17 00:00:00 2001
From: Akash Banerjee <Akash.Banerjee at amd.com>
Date: Wed, 6 May 2026 15:30:12 +0100
Subject: [PATCH 4/6] Reworked PR to avoid kernel changes.
---
.../Optimizer/OpenMP/AutomapToTargetData.cpp | 197 +++++++-----------
.../Optimizer/OpenMP/MapInfoFinalization.cpp | 31 ++-
.../Lower/OpenMP/declare-target-automap.f90 | 27 +++
.../Transforms/omp-automap-to-target-data.fir | 35 ++--
.../OpenMP/OpenMPToLLVMIRTranslation.cpp | 51 ++++-
.../omptarget-declare-target-llvm-host.mlir | 10 +
mlir/test/Target/LLVMIR/omptarget-llvm.mlir | 31 +++
7 files changed, 239 insertions(+), 143 deletions(-)
create mode 100644 flang/test/Lower/OpenMP/declare-target-automap.f90
diff --git a/flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp b/flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp
index f0e0eefa7e220..dd006e046fa74 100644
--- a/flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp
+++ b/flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp
@@ -15,14 +15,11 @@
#include "flang/Optimizer/HLFIR/HLFIROps.h"
#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
-#include "mlir/Dialect/OpenMP/OpenMPInterfaces.h"
#include "mlir/IR/BuiltinAttributes.h"
#include "mlir/IR/Operation.h"
-#include "mlir/IR/SymbolTable.h"
#include "mlir/Pass/Pass.h"
#include "llvm/Frontend/OpenMP/OMPConstants.h"
-#include <algorithm>
namespace flangomp {
#define GEN_PASS_DEF_AUTOMAPTOTARGETDATAPASS
@@ -90,11 +87,7 @@ class AutomapToTargetDataPass
}
void runOnOperation() override {
- ModuleOp module = getOperation()->getParentOfType<ModuleOp>();
- if (!module)
- module = dyn_cast<ModuleOp>(getOperation());
- if (!module)
- return;
+ ModuleOp module = getOperation();
// Build FIR builder for helper utilities.
fir::KindMapping kindMap = fir::getKindMapping(module);
@@ -117,16 +110,61 @@ class AutomapToTargetDataPass
if (needsBoundsOps(memOp.getMemref()))
genBoundsOps(builder, memOp.getMemref(), bounds);
+ mlir::Value boxValue;
+ if (auto storeOp = mlir::dyn_cast<fir::StoreOp>(memOp.getOperation()))
+ boxValue = storeOp.getValue();
+ else
+ boxValue = mlir::cast<fir::LoadOp>(memOp.getOperation()).getResult();
+
+ mlir::Value baseAddr =
+ fir::BoxAddrOp::create(builder, memOp.getLoc(), boxValue);
+ mlir::Value dataAddr = builder.createConvert(
+ memOp.getLoc(),
+ builder.getRefType(fir::unwrapRefType(baseAddr.getType())), baseAddr);
+ mlir::Type baseTy = fir::unwrapRefType(dataAddr.getType());
+ if (mlir::Type eleTy = fir::dyn_cast_ptrOrBoxEleTy(baseTy))
+ baseTy = eleTy;
+ if (auto seqTy = mlir::dyn_cast<fir::SequenceType>(baseTy))
+ if (seqTy.hasDynamicExtents())
+ baseTy = seqTy.getEleTy();
+
omp::TargetEnterExitUpdateDataOperands clauses;
+ bool isAlloc = isa<fir::StoreOp>(memOp);
+
+ auto createDescriptorMap =
+ [&](mlir::omp::ClauseMapFlags mapType) -> mlir::omp::MapInfoOp {
+ mlir::Type descriptorTy =
+ fir::unwrapRefType(memOp.getMemref().getType());
+ // The attach entry expects the descriptor object to already have a
+ // device mapping, but this raw object map must not be expanded as a
+ // Fortran descriptor member map.
+ return mlir::omp::MapInfoOp::create(
+ builder, memOp.getLoc(), memOp.getMemref().getType(),
+ memOp.getMemref(), TypeAttr::get(descriptorTy),
+ builder.getAttr<omp::ClauseMapFlagsAttr>(mapType),
+ builder.getAttr<omp::VariableCaptureKindAttr>(
+ omp::VariableCaptureKind::ByRef),
+ /*var_ptr_ptr=*/mlir::Value{},
+ /*var_ptr_ptr_type=*/mlir::TypeAttr{},
+ /*members=*/SmallVector<Value>{},
+ /*members_index=*/ArrayAttr{},
+ /*bounds=*/SmallVector<Value>{},
+ /*mapperId=*/mlir::FlatSymbolRefAttr(), globalOp.getSymNameAttr(),
+ builder.getBoolAttr(true));
+ };
+
+ if (isAlloc)
+ clauses.mapVars.push_back(createDescriptorMap(
+ omp::ClauseMapFlags::to | omp::ClauseMapFlags::always));
+
+ mlir::omp::ClauseMapFlags mapType =
+ isAlloc ? omp::ClauseMapFlags::storage : omp::ClauseMapFlags::del;
mlir::omp::MapInfoOp mapInfo = mlir::omp::MapInfoOp::create(
- builder, memOp.getLoc(), memOp.getMemref().getType(),
- memOp.getMemref(),
- TypeAttr::get(fir::unwrapRefType(memOp.getMemref().getType())),
- builder.getAttr<omp::ClauseMapFlagsAttr>(
- isa<fir::StoreOp>(memOp) ? omp::ClauseMapFlags::to
- : omp::ClauseMapFlags::del),
+ builder, memOp.getLoc(), dataAddr.getType(), dataAddr,
+ TypeAttr::get(baseTy),
+ builder.getAttr<omp::ClauseMapFlagsAttr>(mapType),
builder.getAttr<omp::VariableCaptureKindAttr>(
- omp::VariableCaptureKind::ByCopy),
+ omp::VariableCaptureKind::ByRef),
/*var_ptr_ptr=*/mlir::Value{},
/*var_ptr_ptr_type=*/mlir::TypeAttr{},
/*members=*/SmallVector<Value>{},
@@ -134,9 +172,31 @@ class AutomapToTargetDataPass
/*mapperId=*/mlir::FlatSymbolRefAttr(), globalOp.getSymNameAttr(),
builder.getBoolAttr(false));
clauses.mapVars.push_back(mapInfo);
- isa<fir::StoreOp>(memOp)
- ? omp::TargetEnterDataOp::create(builder, memOp.getLoc(), clauses)
- : omp::TargetExitDataOp::create(builder, memOp.getLoc(), clauses);
+
+ if (isAlloc) {
+ mlir::omp::MapInfoOp attachInfo = mlir::omp::MapInfoOp::create(
+ builder, memOp.getLoc(), dataAddr.getType(), dataAddr,
+ TypeAttr::get(fir::unwrapRefType(memOp.getMemref().getType())),
+ builder.getAttr<omp::ClauseMapFlagsAttr>(
+ omp::ClauseMapFlags::attach),
+ builder.getAttr<omp::VariableCaptureKindAttr>(
+ omp::VariableCaptureKind::ByRef),
+ /*var_ptr_ptr=*/memOp.getMemref(),
+ /*var_ptr_ptr_type=*/TypeAttr::get(
+ fir::unwrapRefType(memOp.getMemref().getType())),
+ /*members=*/SmallVector<Value>{},
+ /*members_index=*/ArrayAttr{},
+ /*bounds=*/SmallVector<Value>{},
+ /*mapperId=*/mlir::FlatSymbolRefAttr(), globalOp.getSymNameAttr(),
+ builder.getBoolAttr(false));
+ clauses.mapVars.push_back(attachInfo);
+ } else {
+ clauses.mapVars.push_back(
+ createDescriptorMap(omp::ClauseMapFlags::del));
+ }
+
+ isAlloc ? omp::TargetEnterDataOp::create(builder, memOp.getLoc(), clauses)
+ : omp::TargetExitDataOp::create(builder, memOp.getLoc(), clauses);
};
for (fir::GlobalOp globalOp : automapGlobals) {
@@ -154,107 +214,6 @@ class AutomapToTargetDataPass
addMapInfo(globalOp, loadOp);
}
}
-
- // Move automapped descriptors from map() to has_device_addr in target ops.
- auto originatesFromAutomapGlobal = [&](mlir::Value varPtr) -> bool {
- if (auto decl = mlir::dyn_cast_or_null<hlfir::DeclareOp>(
- varPtr.getDefiningOp())) {
- if (auto addrOp = mlir::dyn_cast_or_null<fir::AddrOfOp>(
- decl.getMemref().getDefiningOp())) {
- if (auto g =
- mlir::SymbolTable::lookupNearestSymbolFrom<fir::GlobalOp>(
- decl, addrOp.getSymbol()))
- return automapGlobals.contains(g);
- }
- }
- return false;
- };
-
- module.walk([&](mlir::omp::TargetOp target) {
- // Collect candidates to move: descriptor maps of automapped globals.
- llvm::SmallVector<mlir::Value> newMapOps;
- llvm::SmallVector<unsigned> removedIndices;
- llvm::SmallVector<mlir::Value> movedToHDA;
- llvm::SmallVector<mlir::BlockArgument> oldMapArgsForMoved;
-
- auto mapRange = target.getMapVars();
- newMapOps.reserve(mapRange.size());
-
- auto argIface = llvm::dyn_cast<mlir::omp::BlockArgOpenMPOpInterface>(
- target.getOperation());
- llvm::ArrayRef<mlir::BlockArgument> mapBlockArgs =
- argIface.getMapBlockArgs();
-
- for (auto [idx, mapVal] : llvm::enumerate(mapRange)) {
- auto mapOp =
- mlir::dyn_cast<mlir::omp::MapInfoOp>(mapVal.getDefiningOp());
- if (!mapOp) {
- newMapOps.push_back(mapVal);
- continue;
- }
-
- mlir::Type varTy = fir::unwrapRefType(mapOp.getVarType());
- bool isDescriptor = mlir::isa<fir::BaseBoxType>(varTy);
- if (isDescriptor && originatesFromAutomapGlobal(mapOp.getVarPtr())) {
- movedToHDA.push_back(mapVal);
- removedIndices.push_back(idx);
- oldMapArgsForMoved.push_back(mapBlockArgs[idx]);
- } else {
- newMapOps.push_back(mapVal);
- }
- }
-
- if (movedToHDA.empty())
- return;
-
- // Update map vars to exclude moved entries.
- mlir::MutableOperandRange mapMutable = target.getMapVarsMutable();
- mapMutable.assign(newMapOps);
-
- // Append moved entries to has_device_addr and insert corresponding block
- // arguments.
- mlir::MutableOperandRange hdaMutable =
- target.getHasDeviceAddrVarsMutable();
- llvm::SmallVector<mlir::Value> newHDA;
- newHDA.reserve(hdaMutable.size() + movedToHDA.size());
- llvm::for_each(hdaMutable.getAsOperandRange(),
- [&](mlir::Value v) { newHDA.push_back(v); });
-
- unsigned hdaStart = argIface.getHasDeviceAddrBlockArgsStart();
- unsigned oldHdaCount = argIface.numHasDeviceAddrBlockArgs();
- llvm::SmallVector<mlir::BlockArgument> newHDAArgsForMoved;
- unsigned insertIndex = hdaStart + oldHdaCount;
- for (mlir::Value v : movedToHDA) {
- newHDA.push_back(v);
- target->getRegion(0).insertArgument(insertIndex, v.getType(),
- v.getLoc());
- // Capture the newly inserted block argument.
- newHDAArgsForMoved.push_back(
- target->getRegion(0).getArgument(insertIndex));
- insertIndex++;
- }
- hdaMutable.assign(newHDA);
-
- // Redirect uses in the region: replace old map block args with the
- // corresponding new has_device_addr block args.
- for (auto [oldArg, newArg] :
- llvm::zip_equal(oldMapArgsForMoved, newHDAArgsForMoved))
- oldArg.replaceAllUsesWith(newArg);
-
- // Finally, erase corresponding map block arguments in descending order.
- // Descending order is necessary to avoid index invalidation: erasing
- // arguments from highest to lowest index ensures that earlier erases do
- // not shift the indices of arguments yet to be erased.
- unsigned mapStart = argIface.getMapBlockArgsStart();
- // Convert indices to absolute argument numbers before erasing.
- llvm::SmallVector<unsigned> absArgNos;
- absArgNos.reserve(removedIndices.size());
- for (unsigned idx : removedIndices)
- absArgNos.push_back(mapStart + idx);
- std::sort(absArgNos.begin(), absArgNos.end(), std::greater<>());
- for (unsigned absNo : absArgNos)
- target->getRegion(0).eraseArgument(absNo);
- });
}
};
} // namespace
diff --git a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp
index d4b343de988f2..66afbed93df7a 100644
--- a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp
+++ b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp
@@ -1351,6 +1351,34 @@ class MapInfoFinalizationPass
return false;
}
+ static bool isAttachMap(mlir::omp::MapInfoOp op) {
+ return (op.getMapType() & mlir::omp::ClauseMapFlags::attach) ==
+ mlir::omp::ClauseMapFlags::attach;
+ }
+
+ static bool isDescriptorOnlyMap(mlir::omp::MapInfoOp op) {
+ // A descriptor-only map keeps the descriptor object present so that a
+ // subsequent attach map can update its base address. It intentionally does
+ // not map the descriptor's pointee data, so descriptor member expansion
+ // must leave it alone.
+ if (!op.getPartialMap() || !op.getMembers().empty() ||
+ !op.getBounds().empty() || op.getVarPtrPtr() ||
+ op.getMapCaptureType() != mlir::omp::VariableCaptureKind::ByRef)
+ return false;
+
+ mlir::omp::ClauseMapFlags mapType = op.getMapType();
+ if ((mapType & mlir::omp::ClauseMapFlags::from) ==
+ mlir::omp::ClauseMapFlags::from ||
+ (mapType & mlir::omp::ClauseMapFlags::storage) ==
+ mlir::omp::ClauseMapFlags::storage)
+ return false;
+
+ return (mapType & mlir::omp::ClauseMapFlags::del) ==
+ mlir::omp::ClauseMapFlags::del ||
+ (mapType & mlir::omp::ClauseMapFlags::to) ==
+ mlir::omp::ClauseMapFlags::to;
+ }
+
// This pass executes on omp::MapInfoOp's containing descriptor based types
// (allocatables, pointers, assumed shape etc.) and expanding them into
// multiple omp::MapInfoOp's for each pointer member contained within the
@@ -1612,7 +1640,8 @@ class MapInfoFinalizationPass
"single users or up to two users when those users"
"are a MapInfoOp and Target mapping directive");
- if (hasADescriptor(op.getVarPtr().getDefiningOp(),
+ if (!isAttachMap(op) && !isDescriptorOnlyMap(op) &&
+ hasADescriptor(op.getVarPtr().getDefiningOp(),
fir::unwrapRefType(op.getVarPtrType()))) {
builder.setInsertionPoint(op);
mlir::Operation *targetUser = getFirstTargetUser(op);
diff --git a/flang/test/Lower/OpenMP/declare-target-automap.f90 b/flang/test/Lower/OpenMP/declare-target-automap.f90
new file mode 100644
index 0000000000000..fe46a3ab75c59
--- /dev/null
+++ b/flang/test/Lower/OpenMP/declare-target-automap.f90
@@ -0,0 +1,27 @@
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=60 %s -o - | FileCheck %s
+
+program automap_program
+ integer, allocatable, target :: automap_array(:)
+ !$omp declare target enter(automap:automap_array)
+
+ allocate (automap_array(10))
+
+ !$omp target
+ automap_array(1) = 1
+ !$omp end target
+
+ deallocate (automap_array)
+end program
+
+! CHECK-LABEL: func.func @_QQmain()
+! CHECK-NOT: has_device_addr
+! CHECK: %[[DESC_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(always, to) capture(ByRef) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {name = "_QFEautomap_array", partial_map = true}
+! CHECK: %[[STORAGE_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref<!fir.array<?xi32>>, i32) map_clauses(storage) capture(ByRef) bounds(%{{.*}}) -> !fir.ref<!fir.array<?xi32>> {name = "_QFEautomap_array"}
+! CHECK: %[[ATTACH_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref<!fir.array<?xi32>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(attach) capture(ByRef) var_ptr_ptr(%{{.*}} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.ref<!fir.array<?xi32>> {name = "_QFEautomap_array"}
+! CHECK: omp.target_enter_data map_entries(%[[DESC_MAP]], %[[STORAGE_MAP]], %[[ATTACH_MAP]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.array<?xi32>>, !fir.ref<!fir.array<?xi32>>)
+! CHECK-NOT: has_device_addr
+! CHECK: omp.target {{.*}}map_entries(
+! CHECK-NOT: has_device_addr
+! CHECK: %[[DELETE_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref<!fir.array<?xi32>>, i32) map_clauses(delete) capture(ByRef) bounds(%{{.*}}) -> !fir.ref<!fir.array<?xi32>> {name = "_QFEautomap_array"}
+! CHECK: %[[DESC_DELETE:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(delete) capture(ByRef) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {name = "_QFEautomap_array", partial_map = true}
+! CHECK: omp.target_exit_data map_entries(%[[DELETE_MAP]], %[[DESC_DELETE]] : !fir.ref<!fir.array<?xi32>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
diff --git a/flang/test/Transforms/omp-automap-to-target-data.fir b/flang/test/Transforms/omp-automap-to-target-data.fir
index 7a19705a248b4..7ed3cfa129e51 100644
--- a/flang/test/Transforms/omp-automap-to-target-data.fir
+++ b/flang/test/Transforms/omp-automap-to-target-data.fir
@@ -38,21 +38,26 @@ module {
// CHECK-NEXT: fir.shape
// CHECK-NEXT: %[[ARR_BOXED:.*]] = fir.embox %[[ALLOC_MEM]]
// CHECK-NEXT: fir.store %[[ARR_BOXED]]
-// CHECK-NEXT: %[[ARR_BOXED_LOADED:.*]] = fir.load %[[AUTOMAP_DECL]]#0
-// CHECK-NEXT: %[[ARR_HEAP_PTR:.*]] = fir.box_addr %[[ARR_BOXED_LOADED]]
-// CHECK-NEXT: %[[DIM0:.*]] = arith.constant 0 : index
-// CHECK-NEXT: %[[BOX_DIMS:.*]]:3 = fir.box_dims %[[ARR_BOXED_LOADED]], %[[DIM0]]
-// CHECK-NEXT: %[[ONE:.*]] = arith.constant 1 : index
-// CHECK-NEXT: %[[ZERO:.*]] = arith.constant 0 : index
-// CHECK-NEXT: %[[BOX_DIMS2:.*]]:3 = fir.box_dims %[[ARR_BOXED_LOADED]], %[[ZERO]]
-// CHECK-NEXT: %[[LOWER_BOUND:.*]] = arith.constant 0 : index
-// CHECK-NEXT: %[[UPPER_BOUND:.*]] = arith.subi %[[BOX_DIMS2]]#1, %[[ONE]] : index
-// CHECK-NEXT: omp.map.bounds lower_bound(%[[LOWER_BOUND]] : index) upper_bound(%[[UPPER_BOUND]] : index) extent(%[[BOX_DIMS2]]#1 : index) stride(%[[BOX_DIMS2]]#2 : index) start_idx(%[[BOX_DIMS]]#0 : index) {stride_in_bytes = true}
-// CHECK-NEXT: arith.muli %[[BOX_DIMS2]]#2, %[[BOX_DIMS2]]#1 : index
-// CHECK-NEXT: %[[MAP_INFO:.*]] = omp.map.info var_ptr(%[[AUTOMAP_DECL]]#0 {{.*}} map_clauses(to) capture(ByCopy)
-// CHECK-NEXT: omp.target_enter_data map_entries(%[[MAP_INFO]]
+// CHECK: %[[ARR_BOXED_LOADED:.*]] = fir.load %[[AUTOMAP_DECL]]#0
+// CHECK: %[[DIM0:.*]] = arith.constant 0 : index
+// CHECK: %[[BOX_DIMS:.*]]:3 = fir.box_dims %[[ARR_BOXED_LOADED]], %[[DIM0]]
+// CHECK: %[[ONE:.*]] = arith.constant 1 : index
+// CHECK: %[[ZERO:.*]] = arith.constant 0 : index
+// CHECK: %[[BOX_DIMS2:.*]]:3 = fir.box_dims %[[ARR_BOXED_LOADED]], %[[ZERO]]
+// CHECK: %[[LOWER_BOUND:.*]] = arith.constant 0 : index
+// CHECK: %[[UPPER_BOUND:.*]] = arith.subi %[[BOX_DIMS2]]#1, %[[ONE]] : index
+// CHECK: %[[BOUNDS:.*]] = omp.map.bounds lower_bound(%[[LOWER_BOUND]] : index) upper_bound(%[[UPPER_BOUND]] : index) extent(%[[BOX_DIMS2]]#1 : index) stride(%[[BOX_DIMS2]]#2 : index) start_idx(%[[BOX_DIMS]]#0 : index) {stride_in_bytes = true}
+// CHECK: %[[DATA_ADDR:.*]] = fir.box_addr %[[ARR_BOXED]]
+// CHECK-NEXT: %[[DATA_REF:.*]] = fir.convert %[[DATA_ADDR]] : (!fir.heap<!fir.array<?xi32>>) -> !fir.ref<!fir.array<?xi32>>
+// CHECK-NEXT: %[[DESC_MAP:.*]] = omp.map.info var_ptr(%[[AUTOMAP_DECL]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(always, to) capture(ByRef) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {name = "_QMtestEarr", partial_map = true}
+// CHECK-NEXT: %[[STORAGE_MAP:.*]] = omp.map.info var_ptr(%[[DATA_REF]] : !fir.ref<!fir.array<?xi32>>, i32) map_clauses(storage) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.ref<!fir.array<?xi32>> {name = "_QMtestEarr"}
+// CHECK-NEXT: %[[ATTACH_MAP:.*]] = omp.map.info var_ptr(%[[DATA_REF]] : !fir.ref<!fir.array<?xi32>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(attach) capture(ByRef) var_ptr_ptr(%[[AUTOMAP_DECL]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.ref<!fir.array<?xi32>> {name = "_QMtestEarr"}
+// CHECK-NEXT: omp.target_enter_data map_entries(%[[DESC_MAP]], %[[STORAGE_MAP]], %[[ATTACH_MAP]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.array<?xi32>>, !fir.ref<!fir.array<?xi32>>)
// CHECK: %[[LOAD:.*]] = fir.load %[[AUTOMAP_DECL]]#0
-// CHECK: %[[EXIT_MAP:.*]] = omp.map.info var_ptr(%[[AUTOMAP_DECL]]#0 {{.*}} map_clauses(delete) capture(ByCopy)
-// CHECK-NEXT: omp.target_exit_data map_entries(%[[EXIT_MAP]]
+// CHECK: %[[EXIT_ADDR:.*]] = fir.box_addr %[[LOAD]]
+// CHECK-NEXT: %[[EXIT_REF:.*]] = fir.convert %[[EXIT_ADDR]] : (!fir.heap<!fir.array<?xi32>>) -> !fir.ref<!fir.array<?xi32>>
+// CHECK-NEXT: %[[EXIT_MAP:.*]] = omp.map.info var_ptr(%[[EXIT_REF]] : !fir.ref<!fir.array<?xi32>>, i32) map_clauses(delete) capture(ByRef)
+// CHECK-NEXT: %[[DESC_DELETE:.*]] = omp.map.info var_ptr(%[[AUTOMAP_DECL]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(delete) capture(ByRef) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {name = "_QMtestEarr", partial_map = true}
+// CHECK-NEXT: omp.target_exit_data map_entries(%[[EXIT_MAP]], %[[DESC_DELETE]] : !fir.ref<!fir.array<?xi32>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
// CHECK-NEXT: %[[BOXADDR:.*]] = fir.box_addr %[[LOAD]]
// CHECK-NEXT: fir.freemem %[[BOXADDR]]
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index d35e8612e158b..2dde7ab1614c0 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -5719,16 +5719,18 @@ static void collectMapDataFromMapOperands(
// Process MapOperands
for (Value mapValue : mapVars) {
auto mapOp = cast<omp::MapInfoOp>(mapValue.getDefiningOp());
+ bool isAttachMap =
+ bitEnumContainsAll(mapOp.getMapType(), omp::ClauseMapFlags::attach);
bool isRefPtrOrPteeMapWithAttach =
checkRefPtrOrPteeMapWithAttach(mapOp.getMapType());
Value offloadPtr = (mapOp.getVarPtrPtr() && !isRefPtrOrPteeMapWithAttach)
? mapOp.getVarPtrPtr()
: mapOp.getVarPtr();
+ Value pointeePtr = isRefPtrOrPteeMapWithAttach
+ ? mapOp.getVarPtrPtr()
+ : (isAttachMap ? mapOp.getVarPtr() : offloadPtr);
mapData.OriginalValue.push_back(moduleTranslation.lookupValue(offloadPtr));
- mapData.Pointers.push_back(
- isRefPtrOrPteeMapWithAttach
- ? moduleTranslation.lookupValue(mapOp.getVarPtrPtr())
- : mapData.OriginalValue.back());
+ mapData.Pointers.push_back(moduleTranslation.lookupValue(pointeePtr));
if (llvm::Value *refPtr =
getRefPtrIfDeclareTarget(offloadPtr, moduleTranslation)) {
@@ -6543,6 +6545,10 @@ createAlteredByCaptureMap(MapInfoData &mapData,
((convertClauseMapFlags(mapOp.getMapType()) &
llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ATTACH) ==
llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ATTACH);
+ bool isRefPtrOrPteeMapWithAttach =
+ isAttachMap &&
+ (bitEnumContainsAll(mapOp.getMapType(), omp::ClauseMapFlags::ref_ptr) ||
+ bitEnumContainsAll(mapOp.getMapType(), omp::ClauseMapFlags::ref_ptee));
// If it's declare target, skip it, it's handled separately. However, if
// it's declare target, and an attach map, we want to calculate the exact
@@ -6565,7 +6571,7 @@ createAlteredByCaptureMap(MapInfoData &mapData,
std::vector<llvm::Value *> offsetIdx = calculateBoundsOffset(
moduleTranslation, builder, mapData.BaseType[i]->isArrayTy(),
mapOp.getBounds());
- if (isPtrTy)
+ if (isPtrTy && (!isAttachMap || isRefPtrOrPteeMapWithAttach))
newV = builder.CreateLoad(builder.getPtrTy(), newV);
if (!offsetIdx.empty())
@@ -8182,6 +8188,35 @@ convertDeclareTargetAttr(Operation *op, mlir::omp::DeclareTargetAttr attribute,
convertToCaptureClauseKind(attribute.getCaptureClause().getValue());
auto deviceClause =
convertToDeviceClauseKind(attribute.getDeviceType().getValue());
+ llvm::StringRef entryMangledName = mangledName;
+ llvm::Constant *entryAddr = llvm::cast<llvm::Constant>(gVal);
+ std::function<llvm::GlobalValue::LinkageTypes()> variableLinkage;
+ llvm::SmallString<128> entryNameStorage;
+ if ((attribute.getCaptureClause().getValue() ==
+ mlir::omp::DeclareTargetCaptureClause::to ||
+ attribute.getCaptureClause().getValue() ==
+ mlir::omp::DeclareTargetCaptureClause::enter) &&
+ !isDeclaration &&
+ (gVal->hasLocalLinkage() || gVal->hasHiddenVisibility())) {
+ // Keep the original symbol local so target code can address it using
+ // target-specific local relocations, but create a visible alias for
+ // the offload entry so libomptarget can associate the host global with
+ // the actual device global.
+ entryNameStorage = (mangledName + llvm::Twine("_decl_tgt_entry")).str();
+ entryMangledName = entryNameStorage;
+ if (llvm::GlobalValue *existing =
+ llvmModule->getNamedValue(entryMangledName)) {
+ entryAddr = llvm::cast<llvm::Constant>(existing);
+ } else {
+ entryAddr = llvm::GlobalAlias::create(
+ gVal->getValueType(), gVal->getAddressSpace(),
+ llvm::GlobalValue::WeakAnyLinkage, entryMangledName, entryAddr,
+ llvmModule);
+ llvm::cast<llvm::GlobalAlias>(entryAddr)->setVisibility(
+ llvm::GlobalValue::DefaultVisibility);
+ }
+ variableLinkage = [] { return llvm::GlobalValue::WeakAnyLinkage; };
+ }
// unused for MLIR at the moment, required in Clang for book
// keeping
std::vector<llvm::GlobalVariable *> generatedRefs;
@@ -8211,9 +8246,9 @@ convertDeclareTargetAttr(Operation *op, mlir::omp::DeclareTargetAttr attribute,
ompBuilder->registerTargetGlobalVariable(
captureClause, deviceClause, isDeclaration, isExternallyVisible,
ompBuilder->getTargetEntryUniqueInfo(fileInfoCallBack, vfs),
- mangledName, generatedRefs, /*OpenMPSimd*/ false, targetTriple,
- /*GlobalInitializer*/ nullptr, /*VariableLinkage*/ nullptr,
- gVal->getType(), gVal);
+ entryMangledName, generatedRefs, /*OpenMPSimd*/ false, targetTriple,
+ /*GlobalInitializer*/ nullptr, variableLinkage, gVal->getType(),
+ entryAddr);
bool requiresUSM = ompBuilder->Config.hasRequiresUnifiedSharedMemory();
if (ompBuilder->Config.isTargetDevice() &&
diff --git a/mlir/test/Target/LLVMIR/omptarget-declare-target-llvm-host.mlir b/mlir/test/Target/LLVMIR/omptarget-declare-target-llvm-host.mlir
index 92c85738dbc72..3343c96962f6c 100644
--- a/mlir/test/Target/LLVMIR/omptarget-declare-target-llvm-host.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-declare-target-llvm-host.mlir
@@ -133,6 +133,16 @@ module attributes {llvm.target_triple = "x86_64-unknown-linux-gnu", omp.is_targe
llvm.return %0 : i32
}
+ // CHECK-DAG: @_QFEinternal_enter = internal global i32 7
+ // CHECK-DAG: @.offloading.entry_name{{.*}} = internal unnamed_addr constant [34 x i8] c"_QFEinternal_enter_decl_tgt_entry\00"
+ // CHECK-DAG: @.offloading.entry._QFEinternal_enter_decl_tgt_entry = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 1, i32 0, ptr @_QFEinternal_enter_decl_tgt_entry, ptr @.offloading.entry_name{{.*}}, i64 4, i64 0, ptr null }, section "llvm_offload_entries"
+ // CHECK-DAG: @_QFEinternal_enter_decl_tgt_entry = weak alias i32, ptr @_QFEinternal_enter
+ // CHECK-DAG: !{{.*}} = !{i32 {{.*}}, !"_QFEinternal_enter_decl_tgt_entry", i32 {{.*}}, i32 {{.*}}}
+ llvm.mlir.global internal @_QFEinternal_enter() {addr_space = 0 : i32, omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>} : i32 {
+ %0 = llvm.mlir.constant(7 : i32) : i32
+ llvm.return %0 : i32
+ }
+
// CHECK-DAG: @_QMtest_0Ept1 = global { ptr, i64, i32, i8, i8, i8, i8 } { ptr null, i64 4, i32 20180515, i8 0, i8 9, i8 1, i8 0 }
// CHECK-DAG: @_QMtest_0Ept1_decl_tgt_ref_ptr = weak global ptr @_QMtest_0Ept1
// CHECK-DAG: @.offloading.entry_name{{.*}} = internal unnamed_addr constant [31 x i8] c"_QMtest_0Ept1_decl_tgt_ref_ptr\00"
diff --git a/mlir/test/Target/LLVMIR/omptarget-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-llvm.mlir
index b80e6220e6646..bc08fb9414e27 100644
--- a/mlir/test/Target/LLVMIR/omptarget-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-llvm.mlir
@@ -634,3 +634,34 @@ module attributes {omp.target_triples = ["amdgcn-amd-amdhsa"]} {
// CHECK: @.offload_sizes = private unnamed_addr constant [2 x i64] [i64 8, i64 0]
// CHECK: @.offload_maptypes = private unnamed_addr constant [2 x i64] [i64 288, i64 288]
// CHECK-LABEL: define void @_QPomp_target_is_device_ptr
+
+// -----
+
+module attributes {omp.target_triples = ["amdgcn-amd-amdhsa"]} {
+ llvm.func @_QPomp_target_enter_attach(%desc : !llvm.ptr,
+ %pointee : !llvm.ptr) {
+ %map = omp.map.info
+ var_ptr(%pointee : !llvm.ptr, !llvm.struct<(ptr, i64)>)
+ map_clauses(attach) capture(ByRef)
+ var_ptr_ptr(%desc : !llvm.ptr, !llvm.struct<(ptr, i64)>)
+ -> !llvm.ptr {name = ""}
+ omp.target_enter_data map_entries(%map : !llvm.ptr)
+ llvm.return
+ }
+}
+
+// CHECK: @.offload_maptypes = private unnamed_addr constant [1 x i64] [i64 16384]
+// CHECK-LABEL: define void @_QPomp_target_enter_attach
+// CHECK-SAME: (ptr %[[DESC:.*]], ptr %[[POINTEE:.*]]) {
+// CHECK: %[[BASEPTRS:.*]] = alloca [1 x ptr], align 8
+// CHECK: %[[PTRS:.*]] = alloca [1 x ptr], align 8
+// CHECK: %[[SIZES:.*]] = alloca [1 x i64], align 8
+// CHECK: %[[IS_NULL:.*]] = icmp eq ptr %[[POINTEE]], null
+// CHECK-NEXT: %[[SIZE:.*]] = select i1 %[[IS_NULL]], i64 0, i64 16
+// CHECK: %[[BASEPTR:.*]] = getelementptr inbounds [1 x ptr], ptr %[[BASEPTRS]], i32 0, i32 0
+// CHECK-NEXT: store ptr %[[DESC]], ptr %[[BASEPTR]], align 8
+// CHECK-NEXT: %[[PTR:.*]] = getelementptr inbounds [1 x ptr], ptr %[[PTRS]], i32 0, i32 0
+// CHECK-NEXT: store ptr %[[POINTEE]], ptr %[[PTR]], align 8
+// CHECK-NEXT: %[[SIZEPTR:.*]] = getelementptr inbounds [1 x i64], ptr %[[SIZES]], i32 0, i32 0
+// CHECK-NEXT: store i64 %[[SIZE]], ptr %[[SIZEPTR]], align 8
+// CHECK: call void @__tgt_target_data_begin_mapper(ptr @2, i64 -1, i32 1, ptr %{{.*}}, ptr %{{.*}}, ptr %{{.*}}, ptr @.offload_maptypes, ptr @.offload_mapnames, ptr null)
>From 7e6cb127cd07f8c00fc24b18ded98fada77f4e58 Mon Sep 17 00:00:00 2001
From: Akash Banerjee <Akash.Banerjee at amd.com>
Date: Tue, 16 Jun 2026 15:58:03 +0100
Subject: [PATCH 5/6] Fix clang-format.
---
flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp b/flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp
index dd006e046fa74..251bf3aacd39c 100644
--- a/flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp
+++ b/flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp
@@ -182,8 +182,8 @@ class AutomapToTargetDataPass
builder.getAttr<omp::VariableCaptureKindAttr>(
omp::VariableCaptureKind::ByRef),
/*var_ptr_ptr=*/memOp.getMemref(),
- /*var_ptr_ptr_type=*/TypeAttr::get(
- fir::unwrapRefType(memOp.getMemref().getType())),
+ /*var_ptr_ptr_type=*/
+ TypeAttr::get(fir::unwrapRefType(memOp.getMemref().getType())),
/*members=*/SmallVector<Value>{},
/*members_index=*/ArrayAttr{},
/*bounds=*/SmallVector<Value>{},
>From d0b4e30e6788d4b96217c42325fc1082fa450985 Mon Sep 17 00:00:00 2001
From: Akash Banerjee <Akash.Banerjee at amd.com>
Date: Thu, 18 Jun 2026 15:45:08 +0100
Subject: [PATCH 6/6] Addressed reviewer comments.
---
.../Optimizer/OpenMP/AutomapToTargetData.cpp | 30 +++++-------------
.../Optimizer/OpenMP/MapInfoFinalization.cpp | 31 ++-----------------
.../Lower/OpenMP/declare-target-automap.f90 | 9 +++---
.../Transforms/omp-automap-to-target-data.fir | 7 ++---
4 files changed, 17 insertions(+), 60 deletions(-)
diff --git a/flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp b/flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp
index 251bf3aacd39c..bf269b5dd3ff2 100644
--- a/flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp
+++ b/flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp
@@ -135,9 +135,11 @@ class AutomapToTargetDataPass
[&](mlir::omp::ClauseMapFlags mapType) -> mlir::omp::MapInfoOp {
mlir::Type descriptorTy =
fir::unwrapRefType(memOp.getMemref().getType());
- // The attach entry expects the descriptor object to already have a
- // device mapping, but this raw object map must not be expanded as a
- // Fortran descriptor member map.
+ // Keep the descriptor itself present. MapInfoFinalization expands this
+ // ref_ptr map and emits the attach map when the descriptor is created.
+ mapType |= omp::ClauseMapFlags::ref_ptr;
+ if (!isAlloc)
+ mapType |= omp::ClauseMapFlags::attach_never;
return mlir::omp::MapInfoOp::create(
builder, memOp.getLoc(), memOp.getMemref().getType(),
memOp.getMemref(), TypeAttr::get(descriptorTy),
@@ -150,7 +152,7 @@ class AutomapToTargetDataPass
/*members_index=*/ArrayAttr{},
/*bounds=*/SmallVector<Value>{},
/*mapperId=*/mlir::FlatSymbolRefAttr(), globalOp.getSymNameAttr(),
- builder.getBoolAttr(true));
+ builder.getBoolAttr(false));
};
if (isAlloc)
@@ -173,27 +175,9 @@ class AutomapToTargetDataPass
builder.getBoolAttr(false));
clauses.mapVars.push_back(mapInfo);
- if (isAlloc) {
- mlir::omp::MapInfoOp attachInfo = mlir::omp::MapInfoOp::create(
- builder, memOp.getLoc(), dataAddr.getType(), dataAddr,
- TypeAttr::get(fir::unwrapRefType(memOp.getMemref().getType())),
- builder.getAttr<omp::ClauseMapFlagsAttr>(
- omp::ClauseMapFlags::attach),
- builder.getAttr<omp::VariableCaptureKindAttr>(
- omp::VariableCaptureKind::ByRef),
- /*var_ptr_ptr=*/memOp.getMemref(),
- /*var_ptr_ptr_type=*/
- TypeAttr::get(fir::unwrapRefType(memOp.getMemref().getType())),
- /*members=*/SmallVector<Value>{},
- /*members_index=*/ArrayAttr{},
- /*bounds=*/SmallVector<Value>{},
- /*mapperId=*/mlir::FlatSymbolRefAttr(), globalOp.getSymNameAttr(),
- builder.getBoolAttr(false));
- clauses.mapVars.push_back(attachInfo);
- } else {
+ if (!isAlloc)
clauses.mapVars.push_back(
createDescriptorMap(omp::ClauseMapFlags::del));
- }
isAlloc ? omp::TargetEnterDataOp::create(builder, memOp.getLoc(), clauses)
: omp::TargetExitDataOp::create(builder, memOp.getLoc(), clauses);
diff --git a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp
index 66afbed93df7a..b744dcaf3482b 100644
--- a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp
+++ b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp
@@ -1351,34 +1351,6 @@ class MapInfoFinalizationPass
return false;
}
- static bool isAttachMap(mlir::omp::MapInfoOp op) {
- return (op.getMapType() & mlir::omp::ClauseMapFlags::attach) ==
- mlir::omp::ClauseMapFlags::attach;
- }
-
- static bool isDescriptorOnlyMap(mlir::omp::MapInfoOp op) {
- // A descriptor-only map keeps the descriptor object present so that a
- // subsequent attach map can update its base address. It intentionally does
- // not map the descriptor's pointee data, so descriptor member expansion
- // must leave it alone.
- if (!op.getPartialMap() || !op.getMembers().empty() ||
- !op.getBounds().empty() || op.getVarPtrPtr() ||
- op.getMapCaptureType() != mlir::omp::VariableCaptureKind::ByRef)
- return false;
-
- mlir::omp::ClauseMapFlags mapType = op.getMapType();
- if ((mapType & mlir::omp::ClauseMapFlags::from) ==
- mlir::omp::ClauseMapFlags::from ||
- (mapType & mlir::omp::ClauseMapFlags::storage) ==
- mlir::omp::ClauseMapFlags::storage)
- return false;
-
- return (mapType & mlir::omp::ClauseMapFlags::del) ==
- mlir::omp::ClauseMapFlags::del ||
- (mapType & mlir::omp::ClauseMapFlags::to) ==
- mlir::omp::ClauseMapFlags::to;
- }
-
// This pass executes on omp::MapInfoOp's containing descriptor based types
// (allocatables, pointers, assumed shape etc.) and expanding them into
// multiple omp::MapInfoOp's for each pointer member contained within the
@@ -1640,7 +1612,8 @@ class MapInfoFinalizationPass
"single users or up to two users when those users"
"are a MapInfoOp and Target mapping directive");
- if (!isAttachMap(op) && !isDescriptorOnlyMap(op) &&
+ if (!bitEnumContainsAll(op.getMapType(),
+ mlir::omp::ClauseMapFlags::attach) &&
hasADescriptor(op.getVarPtr().getDefiningOp(),
fir::unwrapRefType(op.getVarPtrType()))) {
builder.setInsertionPoint(op);
diff --git a/flang/test/Lower/OpenMP/declare-target-automap.f90 b/flang/test/Lower/OpenMP/declare-target-automap.f90
index fe46a3ab75c59..63014cbf36725 100644
--- a/flang/test/Lower/OpenMP/declare-target-automap.f90
+++ b/flang/test/Lower/OpenMP/declare-target-automap.f90
@@ -15,13 +15,14 @@ program automap_program
! CHECK-LABEL: func.func @_QQmain()
! CHECK-NOT: has_device_addr
-! CHECK: %[[DESC_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(always, to) capture(ByRef) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {name = "_QFEautomap_array", partial_map = true}
+! CHECK: %[[DESC_MAP:.*]] = omp.map.info var_ptr(%[[DESC:.*]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(always, to, ref_ptr) capture(ByRef) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {name = "_QFEautomap_array"}
+! CHECK: %[[DESC_BASE:.*]] = fir.box_offset %[[DESC]] base_addr : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>
+! CHECK: %[[ATTACH_MAP:.*]] = omp.map.info var_ptr(%[[DESC]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(attach, ref_ptr) capture(ByRef) var_ptr_ptr(%[[DESC_BASE]] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>, i32) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {name = "_QFEautomap_array"}
! CHECK: %[[STORAGE_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref<!fir.array<?xi32>>, i32) map_clauses(storage) capture(ByRef) bounds(%{{.*}}) -> !fir.ref<!fir.array<?xi32>> {name = "_QFEautomap_array"}
-! CHECK: %[[ATTACH_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref<!fir.array<?xi32>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(attach) capture(ByRef) var_ptr_ptr(%{{.*}} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.ref<!fir.array<?xi32>> {name = "_QFEautomap_array"}
-! CHECK: omp.target_enter_data map_entries(%[[DESC_MAP]], %[[STORAGE_MAP]], %[[ATTACH_MAP]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.array<?xi32>>, !fir.ref<!fir.array<?xi32>>)
+! CHECK: omp.target_enter_data map_entries(%[[DESC_MAP]], %[[STORAGE_MAP]], %[[ATTACH_MAP]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.array<?xi32>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
! CHECK-NOT: has_device_addr
! CHECK: omp.target {{.*}}map_entries(
! CHECK-NOT: has_device_addr
! CHECK: %[[DELETE_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref<!fir.array<?xi32>>, i32) map_clauses(delete) capture(ByRef) bounds(%{{.*}}) -> !fir.ref<!fir.array<?xi32>> {name = "_QFEautomap_array"}
-! CHECK: %[[DESC_DELETE:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(delete) capture(ByRef) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {name = "_QFEautomap_array", partial_map = true}
+! CHECK: %[[DESC_DELETE:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(delete, attach_never, ref_ptr) capture(ByRef) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {name = "_QFEautomap_array"}
! CHECK: omp.target_exit_data map_entries(%[[DELETE_MAP]], %[[DESC_DELETE]] : !fir.ref<!fir.array<?xi32>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
diff --git a/flang/test/Transforms/omp-automap-to-target-data.fir b/flang/test/Transforms/omp-automap-to-target-data.fir
index 7ed3cfa129e51..37187203369a4 100644
--- a/flang/test/Transforms/omp-automap-to-target-data.fir
+++ b/flang/test/Transforms/omp-automap-to-target-data.fir
@@ -49,15 +49,14 @@ module {
// CHECK: %[[BOUNDS:.*]] = omp.map.bounds lower_bound(%[[LOWER_BOUND]] : index) upper_bound(%[[UPPER_BOUND]] : index) extent(%[[BOX_DIMS2]]#1 : index) stride(%[[BOX_DIMS2]]#2 : index) start_idx(%[[BOX_DIMS]]#0 : index) {stride_in_bytes = true}
// CHECK: %[[DATA_ADDR:.*]] = fir.box_addr %[[ARR_BOXED]]
// CHECK-NEXT: %[[DATA_REF:.*]] = fir.convert %[[DATA_ADDR]] : (!fir.heap<!fir.array<?xi32>>) -> !fir.ref<!fir.array<?xi32>>
-// CHECK-NEXT: %[[DESC_MAP:.*]] = omp.map.info var_ptr(%[[AUTOMAP_DECL]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(always, to) capture(ByRef) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {name = "_QMtestEarr", partial_map = true}
+// CHECK-NEXT: %[[DESC_MAP:.*]] = omp.map.info var_ptr(%[[AUTOMAP_DECL]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(always, to, ref_ptr) capture(ByRef) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {name = "_QMtestEarr"}
// CHECK-NEXT: %[[STORAGE_MAP:.*]] = omp.map.info var_ptr(%[[DATA_REF]] : !fir.ref<!fir.array<?xi32>>, i32) map_clauses(storage) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.ref<!fir.array<?xi32>> {name = "_QMtestEarr"}
-// CHECK-NEXT: %[[ATTACH_MAP:.*]] = omp.map.info var_ptr(%[[DATA_REF]] : !fir.ref<!fir.array<?xi32>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(attach) capture(ByRef) var_ptr_ptr(%[[AUTOMAP_DECL]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.ref<!fir.array<?xi32>> {name = "_QMtestEarr"}
-// CHECK-NEXT: omp.target_enter_data map_entries(%[[DESC_MAP]], %[[STORAGE_MAP]], %[[ATTACH_MAP]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.array<?xi32>>, !fir.ref<!fir.array<?xi32>>)
+// CHECK-NEXT: omp.target_enter_data map_entries(%[[DESC_MAP]], %[[STORAGE_MAP]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.array<?xi32>>)
// CHECK: %[[LOAD:.*]] = fir.load %[[AUTOMAP_DECL]]#0
// CHECK: %[[EXIT_ADDR:.*]] = fir.box_addr %[[LOAD]]
// CHECK-NEXT: %[[EXIT_REF:.*]] = fir.convert %[[EXIT_ADDR]] : (!fir.heap<!fir.array<?xi32>>) -> !fir.ref<!fir.array<?xi32>>
// CHECK-NEXT: %[[EXIT_MAP:.*]] = omp.map.info var_ptr(%[[EXIT_REF]] : !fir.ref<!fir.array<?xi32>>, i32) map_clauses(delete) capture(ByRef)
-// CHECK-NEXT: %[[DESC_DELETE:.*]] = omp.map.info var_ptr(%[[AUTOMAP_DECL]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(delete) capture(ByRef) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {name = "_QMtestEarr", partial_map = true}
+// CHECK-NEXT: %[[DESC_DELETE:.*]] = omp.map.info var_ptr(%[[AUTOMAP_DECL]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(delete, attach_never, ref_ptr) capture(ByRef) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {name = "_QMtestEarr"}
// CHECK-NEXT: omp.target_exit_data map_entries(%[[EXIT_MAP]], %[[DESC_DELETE]] : !fir.ref<!fir.array<?xi32>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
// CHECK-NEXT: %[[BOXADDR:.*]] = fir.box_addr %[[LOAD]]
// CHECK-NEXT: fir.freemem %[[BOXADDR]]
More information about the flang-commits
mailing list