[llvm] [WIP][Offload] Introduce ATTACH map-type support for pointer attachment. (PR #149036)
Abhinav Gaba via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 16 01:40:17 PDT 2025
https://github.com/abhinavgaba created https://github.com/llvm/llvm-project/pull/149036
This patch introduces libomptarget support for the ATTACH map-type, which can be used to implement OpenMP conditional compliant pointer attachment, based on whether the pointer/pointee is newly mapped on a given construct.
For example, for the following:
```c
int *p;
#pragma omp target enter data map(p[1:10])
```
The following maps can be emitted by clang:
```
(A)
&p[0], &p[1], 10 * sizeof(p[1]), TO | FROM
&p, &p[1], sizeof(p), ATTACH
```
Without this map-type, the two possible maps emitted by clang:
```
(B)
&p[0], &p[1], 10 * sizeof(p[1]), TO | FROM
(C)
&p, &p[1], 10 * sizeof(p[1]), TO | FROM | PTR_AND_OBJ
````
(B) does not perform any pointer attachment, while (C) also maps the pointer p, which are both incorrect.
In terms of implementation, maps with the ATTACH map-type are handled after all other maps have been processed, as it requires knowledge of which new allocations happened as part of the construct. As per OpenMP 5.0, an attachment should happen only when either the pointer or the pointee was newly mapped while handling the construct.
Maps with ATTACH map-type-bit do not increase/decrease the ref-count.
With OpenMP 6.1, `attach(always/never)` can be used to force/prevent attachment. For `attach(always)`, the compiler will insert the ALWAYS map-type, which would let libomptarget bypass the check about one of the pointer/pointee being new. With `attach(never)`, the ATTACH map will not be emitted at all.
The size argument of the ATTACH map-type can specify values greater than `sizeof(void*)` which can be used to support pointer attachment on Fortran descriptors. Note that this also requires shadow-pointer tracking to also support them. That has not been implemented in this patch.
This was worked upon in coordination with Ravi Narayanaswamy, who has since retired. Happy retirement, Ravi!
>From 5d72174220263f549c0d301ad9c4445a91bbde4a Mon Sep 17 00:00:00 2001
From: Abhinav Gaba <abhinav.gaba at intel.com>
Date: Wed, 16 Jul 2025 00:53:09 -0700
Subject: [PATCH] [WIP][Offload] Introduce ATTACH map-type support for pointer
attachment.
This patch introduces libomptarget support for the ATTACH map-type,
which can be used to implement OpenMP conditional compliant pointer attachment,
based on whether the pointer/pointee is newly mapped on a given construct.
For example, for the following:
```c
int *p;
#pragma omp target enter data map(p[1:10])
```
The following maps can be emitted by clang:
```
(A)
&p[0], &p[1], 10 * sizeof(p[1]), TO | FROM
&p, &p[1], sizeof(p), ATTACH
```
Without this map-type, the two possible maps emitted by clang:
```
(B)
&p[0], &p[1], 10 * sizeof(p[1]), TO | FROM
(C)
&p, &p[1], 10 * sizeof(p[1]), TO | FROM | PTR_AND_OBJ
````
(B) does not perform any pointer attachment, while (C) also maps the
pointer p, which are both incorrect.
In terms of implementation, maps with the ATTACH map-type are handled after
all other maps have been processed, as it requires knowledge of which new
allocations happened as part of the construct. As per OpenMP 5.0, an
attachment should happen only when either the pointer or the pointee was
newly mapped while handling the construct.
Maps with ATTACH map-type-bit do not increase/decrease the ref-count.
With OpenMP 6.1, `attach(always/never)` can be used to force/prevent
attachment. For `attach(always)`, the compiler will insert the ALWAYS
map-type, which would let libomptarget bypass the check about one of the
pointer/pointee being new. With `attach(never)`, the ATTACH map will not
be emitted at all.
The size argument of the ATTACH map-type can specify values greater than
`sizeof(void*)` which can be used to support pointer attachment on Fortran
descriptors. Note that this also requires shadow-pointer tracking to also
support them. That has not been implemented in this patch.
This was worked upon in coordination with Ravi Narayanaswamy, who has
since retired. Happy retirement, Ravi!
---
offload/include/OpenMP/Mapping.h | 40 ++-
offload/include/omptarget.h | 3 +
offload/libomptarget/interface.cpp | 22 +-
offload/libomptarget/omptarget.cpp | 409 ++++++++++++++++++++++++++---
4 files changed, 438 insertions(+), 36 deletions(-)
diff --git a/offload/include/OpenMP/Mapping.h b/offload/include/OpenMP/Mapping.h
index b9f5c16582931..93c1e56905ae4 100644
--- a/offload/include/OpenMP/Mapping.h
+++ b/offload/include/OpenMP/Mapping.h
@@ -417,12 +417,42 @@ struct MapperComponentsTy {
typedef void (*MapperFuncPtrTy)(void *, void *, void *, int64_t, int64_t,
void *);
+/// Structure to store information about a single ATTACH map entry.
+struct AttachMapInfo {
+ void *PointerBase;
+ void *PointeeBegin;
+ int64_t PointerSize;
+ int64_t MapType;
+ map_var_info_t Pointername;
+
+ AttachMapInfo(void *PointerBase, void *PointeeBegin, int64_t Size,
+ int64_t Type, map_var_info_t Name)
+ : PointerBase(PointerBase), PointeeBegin(PointeeBegin), PointerSize(Size),
+ MapType(Type), Pointername(Name) {}
+};
+
+/// Structure to track ATTACH entries and new allocations across recursive calls
+/// (for handling mappers) to targetDataBegin for a given construct.
+struct AttachInfoTy {
+ /// ATTACH map entries for deferred processing.
+ llvm::SmallVector<AttachMapInfo> AttachEntries;
+
+ /// Key: host pointer, Value: allocation size.
+ llvm::DenseMap<void *, int64_t> NewAllocations;
+
+ AttachInfoTy() = default;
+
+ // Delete copy constructor and copy assignment operator to prevent copying
+ AttachInfoTy(const AttachInfoTy &) = delete;
+ AttachInfoTy &operator=(const AttachInfoTy &) = delete;
+};
+
// Function pointer type for targetData* functions (targetDataBegin,
// targetDataEnd and targetDataUpdate).
typedef int (*TargetDataFuncPtrTy)(ident_t *, DeviceTy &, int32_t, void **,
void **, int64_t *, int64_t *,
map_var_info_t *, void **, AsyncInfoTy &,
- bool);
+ AttachInfoTy *, bool);
void dumpTargetPointerMappings(const ident_t *Loc, DeviceTy &Device,
bool toStdOut = false);
@@ -431,20 +461,26 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
void **ArgsBase, void **Args, int64_t *ArgSizes,
int64_t *ArgTypes, map_var_info_t *ArgNames,
void **ArgMappers, AsyncInfoTy &AsyncInfo,
+ AttachInfoTy *AttachInfo = nullptr,
bool FromMapper = false);
int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
void **ArgBases, void **Args, int64_t *ArgSizes,
int64_t *ArgTypes, map_var_info_t *ArgNames,
void **ArgMappers, AsyncInfoTy &AsyncInfo,
- bool FromMapper = false);
+ AttachInfoTy *AttachInfo = nullptr, bool FromMapper = false);
int targetDataUpdate(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
void **ArgsBase, void **Args, int64_t *ArgSizes,
int64_t *ArgTypes, map_var_info_t *ArgNames,
void **ArgMappers, AsyncInfoTy &AsyncInfo,
+ AttachInfoTy *AttachInfo = nullptr,
bool FromMapper = false);
+// Process deferred ATTACH map entries collected during targetDataBegin.
+int processAttachEntries(DeviceTy &Device, AttachInfoTy &AttachInfo,
+ AsyncInfoTy &AsyncInfo);
+
struct MappingInfoTy {
MappingInfoTy(DeviceTy &Device) : Device(Device) {}
diff --git a/offload/include/omptarget.h b/offload/include/omptarget.h
index 6971780c7bdb5..9e4bfd2f9cfbe 100644
--- a/offload/include/omptarget.h
+++ b/offload/include/omptarget.h
@@ -80,6 +80,9 @@ enum tgt_map_type {
// the structured region
// This is an OpenMP extension for the sake of OpenACC support.
OMP_TGT_MAPTYPE_OMPX_HOLD = 0x2000,
+ // Attach pointer and pointee, after processing all other maps.
+ // Applicable to map-entering directives. Does not change ref-count.
+ OMP_TGT_MAPTYPE_ATTACH = 0x4000,
// descriptor for non-contiguous target-update
OMP_TGT_MAPTYPE_NON_CONTIG = 0x100000000000,
// member of struct, member given by [16 MSBs] - 1
diff --git a/offload/libomptarget/interface.cpp b/offload/libomptarget/interface.cpp
index ea354400f2e99..16e46e6d23872 100644
--- a/offload/libomptarget/interface.cpp
+++ b/offload/libomptarget/interface.cpp
@@ -165,12 +165,28 @@ targetData(ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase,
OMPT_GET_RETURN_ADDRESS);)
int Rc = OFFLOAD_SUCCESS;
+
+ // Only allocate AttachInfo for targetDataBegin
+ AttachInfoTy *AttachInfo = nullptr;
+ if (TargetDataFunction == targetDataBegin) {
+ AttachInfo = new AttachInfoTy();
+ }
+
Rc = TargetDataFunction(Loc, *DeviceOrErr, ArgNum, ArgsBase, Args, ArgSizes,
ArgTypes, ArgNames, ArgMappers, AsyncInfo,
- false /*FromMapper=*/);
+ AttachInfo, false /*FromMapper=*/);
- if (Rc == OFFLOAD_SUCCESS)
- Rc = AsyncInfo.synchronize();
+ if (Rc == OFFLOAD_SUCCESS) {
+ // Process deferred ATTACH entries BEFORE synchronization
+ if (AttachInfo && !AttachInfo->AttachEntries.empty())
+ Rc = processAttachEntries(*DeviceOrErr, *AttachInfo, AsyncInfo);
+
+ if (Rc == OFFLOAD_SUCCESS)
+ Rc = AsyncInfo.synchronize();
+ }
+
+ if (AttachInfo)
+ delete AttachInfo;
handleTargetOutcome(Rc == OFFLOAD_SUCCESS, Loc);
}
diff --git a/offload/libomptarget/omptarget.cpp b/offload/libomptarget/omptarget.cpp
index 5b25d955dd320..aa142814e8384 100644
--- a/offload/libomptarget/omptarget.cpp
+++ b/offload/libomptarget/omptarget.cpp
@@ -293,7 +293,8 @@ void targetUnlockExplicit(void *HostPtr, int DeviceNum, const char *Name) {
int targetDataMapper(ident_t *Loc, DeviceTy &Device, void *ArgBase, void *Arg,
int64_t ArgSize, int64_t ArgType, map_var_info_t ArgNames,
void *ArgMapper, AsyncInfoTy &AsyncInfo,
- TargetDataFuncPtrTy TargetDataFunction) {
+ TargetDataFuncPtrTy TargetDataFunction,
+ AttachInfoTy *AttachInfo = nullptr) {
DP("Calling the mapper function " DPxMOD "\n", DPxPTR(ArgMapper));
// The mapper function fills up Components.
@@ -324,17 +325,196 @@ int targetDataMapper(ident_t *Loc, DeviceTy &Device, void *ArgBase, void *Arg,
MapperArgsBase.data(), MapperArgs.data(),
MapperArgSizes.data(), MapperArgTypes.data(),
MapperArgNames.data(), /*arg_mappers*/ nullptr,
- AsyncInfo, /*FromMapper=*/true);
+ AsyncInfo, AttachInfo, /*FromMapper=*/true);
return Rc;
}
+/// Utility function to perform a pointer attachment operation.
+///
+/// For something like:
+/// \code
+/// int *p;
+/// ...
+/// #pragma omp target enter data map(to:p[10:10])
+/// \endcode
+///
+/// for which the attachment operation gets represented using:
+/// \code
+/// &p, &p[10], sizeof(p), ATTACH
+/// \endcode
+///
+/// (Hst|Tgt)PtrAddr represents &p
+/// (Hst|Tgt)PteeBase represents &p[0]
+/// (Hst|Tgt)PteeBegin represents &p[10]
+///
+/// This function first computes the expected TgtPteeBase using:
+/// TgtPteeBase = TgtPteeBegin - (HstPteeBegin - HstPteeBase)
+///
+/// and then attaches TgtPteeBase to TgtPtrAddr.
+///
+/// \p HstPtrSize represents the size of the pointer p. For C/C++, this
+/// should be same as "sizeof(void*)" (say 8).
+///
+/// However, for Fortran, pointers/allocatables, which are also eligible for
+/// "pointer-attachment", may be implemented using descriptors that contain the
+/// address of the pointee in the first 8 bytes, but also contain other
+/// information such as lower-bound/upper-bound etc in their subsequent fields.
+///
+/// For example, for the following:
+/// \code
+/// integer, allocatable :: x(:)
+/// integer, pointer :: p(:)
+/// ...
+/// p => x(10: 19)
+/// ...
+/// !$omp target enter data map(to:p(:))
+/// \endcode
+///
+/// The map should trigger a pointer-attachment (assuming the pointer-attachment
+/// conditions as noted on processAttachEntries are met) between the descriptor
+/// for p, and its pointee data.
+///
+/// Since only the first 8 bytes of the descriptor contain the address of the
+/// pointee, an attachment operation on device descriptors involves:
+/// * Setting the first 8 bytes of the device descriptor to point the device
+/// address of the pointee.
+/// * Copying the remaining information about bounds/offset etc. from the host
+/// descriptor to the device descriptor.
+///
+/// The function also handles pointer-attachment portion of PTR_AND_OBJ maps,
+/// like:
+/// \code
+/// &p, &p[10], 10 * sizeof(p[10]), PTR_AND_OBJ
+/// \endcoe
+/// by using "sizeof(void*)" as \p HstPtrSize.
+static int performPointerAttachment(DeviceTy &Device, AsyncInfoTy &AsyncInfo,
+ void **HstPtrAddr, void *HstPteeBase,
+ void *HstPteeBegin, void **TgtPtrAddr,
+ void *TgtPteeBegin, int64_t HstPtrSize,
+ TargetPointerResultTy &PtrTPR) {
+ assert(PtrTPR.getEntry() &&
+ "Need a valid pointer entry to perform pointer-attachment");
+
+ int64_t VoidPtrSize = sizeof(void *);
+ assert(HstPtrSize >= VoidPtrSize && "PointerSize is too small");
+
+ uint64_t Delta = (uint64_t)HstPteeBegin - (uint64_t)HstPteeBase;
+ void *TgtPteeBase = (void *)((uint64_t)TgtPteeBegin - Delta);
+
+ // Add shadow pointer tracking
+ // TODO: Support shadow-tracking of larger than VoidPtrSize pointers,
+ // to support restoration of Fortran descriptors. Currently, this check
+ // would return false, even if the host Fortran descriptor was, and we
+ // should have done an update of the device descriptor. e.g.
+ //
+ // !$omp target enter data map(x(1:100)) ! (1)
+ // p => x(10: 19)
+ // !$omp target enter data map(p, p(:)) ! (2)
+ // p => x(5: 9)
+ // !$omp target enter data map(attach(always): p(:)) ! (3)
+ //
+ // While PtrAddr(&desc_p) and PteeBase(&p(1)) are same for (2) and (3), the
+ // pointer attachment for (3) needs to update the bounds information
+ // in the descriptor of p on device.
+ if (!PtrTPR.getEntry()->addShadowPointer(
+ ShadowPtrInfoTy{HstPtrAddr, HstPteeBase, TgtPtrAddr, TgtPteeBase}))
+ return OFFLOAD_SUCCESS;
+
+ DP("Update pointer (" DPxMOD ") -> [" DPxMOD "]\n", DPxPTR(TgtPtrAddr),
+ DPxPTR(TgtPteeBase));
+
+ // Lambda to handle submitData result and perform final steps.
+ auto HandleSubmitResult = [&](int SubmitResult) -> int {
+ if (SubmitResult != OFFLOAD_SUCCESS) {
+ REPORT("Failed to update pointer on device.\n");
+ return OFFLOAD_FAIL;
+ }
+
+ if (PtrTPR.getEntry()->addEventIfNecessary(Device, AsyncInfo) !=
+ OFFLOAD_SUCCESS)
+ return OFFLOAD_FAIL;
+
+ return OFFLOAD_SUCCESS;
+ };
+
+ bool IsPtrAFortranDescriptor = HstPtrSize > VoidPtrSize;
+ if (!IsPtrAFortranDescriptor) {
+ // For "regular" pointers, we can use the VoidPtrLocation from AsyncInfo as
+ // the buffer space for the submission.
+ void *&BufferElement = AsyncInfo.getVoidPtrLocation();
+ BufferElement = TgtPteeBase;
+
+ // Submit the updated pointer value to device
+ return HandleSubmitResult(Device.submitData(
+ TgtPtrAddr, &BufferElement, VoidPtrSize, AsyncInfo, PtrTPR.getEntry()));
+ }
+
+ // For larger "pointers" (like Fortran's descriptors), we create a dynamic
+ // buffer, which will be eventually destroyed by AsyncInfo's post-processing
+ // callback.
+ char *DataBuffer = new char[HstPtrSize];
+
+ // For such descriptors, to the first VoidPtrSize bytes, we store the
+ // pointee's device address.
+ std::memcpy(DataBuffer, &TgtPteeBase, sizeof(void *));
+
+ // And to the remaining bytes, we copy the remaining contents of the host
+ // descriptor after the initial VoidPtrSize bytes.
+ uint64_t HstDescriptorFieldsSize = HstPtrSize - VoidPtrSize;
+ void *HstDescriptorFieldsAddr = (char *)HstPtrAddr + VoidPtrSize;
+ std::memcpy(DataBuffer + VoidPtrSize, HstDescriptorFieldsAddr,
+ HstDescriptorFieldsSize);
+
+ DP("Updating %" PRId64 " bytes of descriptor (" DPxMOD ") (pointer + %" PRId64
+ " additional bytes from host descriptor " DPxMOD ")\n",
+ HstPtrSize, DPxPTR(TgtPtrAddr), HstDescriptorFieldsSize,
+ DPxPTR(HstDescriptorFieldsAddr));
+
+ // Submit the entire buffer to device
+ // FIXME: When handling ATTACH map-type, pointer attachment needs to happen
+ // after the other mapping operations are done, to avoid possibility of
+ // pending transfers clobbering the attachment, for example:
+ //
+ // int *p = ...;
+ // int **pp = &p;
+ // map(to: pp[0], p[0])
+ //
+ // Which would be represented by:
+ // &pp[0], &pp[0], sizeof(pp[0]), TO (1)
+ // &p[0], &p[0], sizeof(p[0]), TO (2)
+ //
+ // &pp, &pp[0], sizeof(pp), ATTACH (3)
+ // &p, &p[0], sizeof(p), ATTACH (4)
+ //
+ // (4) and (1) are both trying to modify the device memory corresponding to
+ // &p. We need to ensure that (4) happens last.
+ //
+ // One possible solution to this could be to insert a "device barrier" before
+ // the first ATTACH submitData call, so that every subsequent submitData waits
+ // for any prior operations to finish. Like:
+ // Device.submitData(..., /*InOrder=*/IsFirstAttachEntry)
+ // Where the boolean InOrder being true means that this submission should
+ // wait for prior memory submissions to finish.
+ int SubmitResult =
+ Device.submitData(TgtPtrAddr, DataBuffer, HstPtrSize, AsyncInfo,
+ PtrTPR.getEntry());
+
+ AsyncInfo.addPostProcessingFunction([DataBuffer]() -> int {
+ delete[] DataBuffer;
+ return OFFLOAD_SUCCESS;
+ });
+ return HandleSubmitResult(SubmitResult);
+}
+
/// Internal function to do the mapping and transfer the data to the device
int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
void **ArgsBase, void **Args, int64_t *ArgSizes,
int64_t *ArgTypes, map_var_info_t *ArgNames,
void **ArgMappers, AsyncInfoTy &AsyncInfo,
- bool FromMapper) {
+ AttachInfoTy *AttachInfo, bool FromMapper) {
+ assert(AttachInfo && "AttachInfo must be available for targetDataBegin for "
+ "handling ATTACH map-types.");
// process each input.
for (int32_t I = 0; I < ArgNum; ++I) {
// Ignore private variables and arrays - there is no mapping for them.
@@ -352,7 +532,7 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
map_var_info_t ArgName = (!ArgNames) ? nullptr : ArgNames[I];
int Rc = targetDataMapper(Loc, Device, ArgsBase[I], Args[I], ArgSizes[I],
ArgTypes[I], ArgName, ArgMappers[I], AsyncInfo,
- targetDataBegin);
+ targetDataBegin, AttachInfo);
if (Rc != OFFLOAD_SUCCESS) {
REPORT("Call to targetDataBegin via targetDataMapper for custom mapper"
@@ -369,6 +549,18 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
int64_t DataSize = ArgSizes[I];
map_var_info_t HstPtrName = (!ArgNames) ? nullptr : ArgNames[I];
+ // ATTACH map-types are supposed to be handled after all mapping for the
+ // construct is done. Defer their processing.
+ if (ArgTypes[I] & OMP_TGT_MAPTYPE_ATTACH) {
+ AttachInfo->AttachEntries.emplace_back(
+ /*PointerBase=*/HstPtrBase, /*PointeeBegin=*/HstPtrBegin,
+ /*PointerSize=*/DataSize, /*MapType=*/ArgTypes[I],
+ /*PointeeName=*/HstPtrName);
+
+ DP("Deferring ATTACH map-type processing for argument %d\n", I);
+ continue;
+ }
+
// Adjust for proper alignment if this is a combined entry (for structs).
// Look at the next argument - if that is MEMBER_OF this one, then this one
// is a combined entry.
@@ -434,6 +626,11 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
: "device failure or illegal mapping");
return OFFLOAD_FAIL;
}
+
+ // Track new allocation, for eventual use in attachment decision-making.
+ if (PointerTpr.Flags.IsNewEntry && !IsHostPtr)
+ AttachInfo->NewAllocations[HstPtrBase] = sizeof(void *);
+
DP("There are %zu bytes allocated at target address " DPxMOD " - is%s new"
"\n",
sizeof(void *), DPxPTR(PointerTgtPtrBegin),
@@ -464,6 +661,11 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
: "device failure or illegal mapping");
return OFFLOAD_FAIL;
}
+
+ // Track new allocation, for eventual use in attachment decision-making.
+ if (TPR.Flags.IsNewEntry && !IsHostPtr && TgtPtrBegin)
+ AttachInfo->NewAllocations[HstPtrBegin] = DataSize;
+
DP("There are %" PRId64 " bytes allocated at target address " DPxMOD
" - is%s new\n",
DataSize, DPxPTR(TgtPtrBegin), (TPR.Flags.IsNewEntry ? "" : " not"));
@@ -476,30 +678,12 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
}
if (ArgTypes[I] & OMP_TGT_MAPTYPE_PTR_AND_OBJ && !IsHostPtr) {
-
- uint64_t Delta = (uint64_t)HstPtrBegin - (uint64_t)HstPtrBase;
- void *ExpectedTgtPtrBase = (void *)((uint64_t)TgtPtrBegin - Delta);
-
- if (PointerTpr.getEntry()->addShadowPointer(ShadowPtrInfoTy{
- (void **)PointerHstPtrBegin, HstPtrBase,
- (void **)PointerTgtPtrBegin, ExpectedTgtPtrBase})) {
- DP("Update pointer (" DPxMOD ") -> [" DPxMOD "]\n",
- DPxPTR(PointerTgtPtrBegin), DPxPTR(TgtPtrBegin));
-
- void *&TgtPtrBase = AsyncInfo.getVoidPtrLocation();
- TgtPtrBase = ExpectedTgtPtrBase;
-
- int Ret =
- Device.submitData(PointerTgtPtrBegin, &TgtPtrBase, sizeof(void *),
- AsyncInfo, PointerTpr.getEntry());
- if (Ret != OFFLOAD_SUCCESS) {
- REPORT("Copying data to device failed.\n");
- return OFFLOAD_FAIL;
- }
- if (PointerTpr.getEntry()->addEventIfNecessary(Device, AsyncInfo) !=
- OFFLOAD_SUCCESS)
- return OFFLOAD_FAIL;
- }
+ int Ret = performPointerAttachment(Device, AsyncInfo,
+ (void **)PointerHstPtrBegin, HstPtrBase, HstPtrBegin,
+ (void **)PointerTgtPtrBegin, TgtPtrBegin,
+ sizeof(void *), PointerTpr);
+ if (Ret != OFFLOAD_SUCCESS)
+ return OFFLOAD_FAIL;
}
// Check if variable can be used on the device:
@@ -515,6 +699,145 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
return OFFLOAD_SUCCESS;
}
+/// Process deferred ATTACH map entries collected during targetDataBegin.
+///
+/// From OpenMP's perspective, when mapping something that has a base pointer,
+/// such as:
+/// \code
+/// int *p;
+/// #pragma omp enter target data map(to: p[10:20])
+/// \endcode
+///
+/// a pointer-attachment between p and &p[10] should occur if both p and
+/// p[10] are present on the device after doing all allocations for all maps
+/// on the construct, and one of the following is true:
+///
+/// * The pointer p was newly allocated while handling the construct
+/// * The pointee p[10:20] was newly allocated while handling the construct
+/// * attach(always) map-type modifier was specified (OpenMP 6.1)
+///
+/// That's why we collect all attach entries and new memory allocations during
+/// targetDataBegin, and use that information to make the decision of whether
+/// to perform a pointer-attachment or not here, after maps have been handled.
+int processAttachEntries(DeviceTy &Device, AttachInfoTy &AttachInfo,
+ AsyncInfoTy &AsyncInfo) {
+ // Report all tracked allocations from both main loop and ATTACH processing
+ if (!AttachInfo.NewAllocations.empty()) {
+ DP("Tracked %u total new allocations:\n",
+ (unsigned)AttachInfo.NewAllocations.size());
+ for (const auto &Alloc : AttachInfo.NewAllocations) {
+ DP(" Host ptr: " DPxMOD ", Size: %" PRId64 " bytes\n",
+ DPxPTR(Alloc.first), Alloc.second);
+ }
+ }
+
+ if (AttachInfo.AttachEntries.empty())
+ return OFFLOAD_SUCCESS;
+
+ DP("Processing %zu deferred ATTACH map entries\n",
+ AttachInfo.AttachEntries.size());
+
+ for (size_t EntryIdx = 0; EntryIdx < AttachInfo.AttachEntries.size();
+ ++EntryIdx) {
+ const auto &AttachEntry = AttachInfo.AttachEntries[EntryIdx];
+
+ void **HstPtr = (void **)AttachEntry.PointerBase;
+
+ void *HstPteeBase = *HstPtr;
+ void *HstPteeBegin = AttachEntry.PointeeBegin;
+
+ int64_t PtrSize = AttachEntry.PointerSize;
+ int64_t MapType = AttachEntry.MapType;
+
+ DP("Processing ATTACH entry %zu: HstPtr=" DPxMOD ", HstPteeBegin=" DPxMOD
+ ", Size=%" PRId64 ", Type=0x%" PRIx64 "\n",
+ EntryIdx, DPxPTR(HstPtr), DPxPTR(HstPteeBegin), PtrSize, MapType);
+
+ const bool IsAttachAlways = MapType & OMP_TGT_MAPTYPE_ALWAYS;
+
+ // Lambda to check if a pointer was newly allocated
+ auto WasNewlyAllocated = [&](void *Ptr, const char *PtrName) {
+ bool IsNewlyAllocated =
+ llvm::any_of(AttachInfo.NewAllocations, [&](const auto &Alloc) {
+ void *AllocPtr = Alloc.first;
+ int64_t AllocSize = Alloc.second;
+ return Ptr >= AllocPtr &&
+ Ptr < (void *)((char *)AllocPtr + AllocSize);
+ });
+ DP("ATTACH entry %zu: %s pointer " DPxMOD " was newly allocated: %s\n",
+ EntryIdx, PtrName, DPxPTR(Ptr), IsNewlyAllocated ? "yes" : "no");
+ return IsNewlyAllocated;
+ };
+
+ // Only process ATTACH if base/begin was newly allocated OR ALWAYS flag is
+ // set
+ if (!IsAttachAlways && !WasNewlyAllocated(HstPtr, "pointer") &&
+ !WasNewlyAllocated(HstPteeBegin, "pointee")) {
+ DP("Skipping ATTACH entry %zu: neither pointer nor pointee was newly "
+ "allocated and no ALWAYS flag\n",
+ EntryIdx);
+ continue;
+ }
+
+ DP("Processing ATTACH entry %zu: Always=%s\n", EntryIdx,
+ IsAttachAlways ? "yes" : "no");
+
+ // Lambda to perform target pointer lookup and validation
+ auto LookupTargetPointer = [&](void *Ptr, int64_t Size, const char *PtrType)
+ -> std::optional<TargetPointerResultTy> {
+ // ATTACH map-type does not change ref-count, or do any allocation
+ // We just need to do a lookup for the pointer/pointee.
+ TargetPointerResultTy TPR = Device.getMappingInfo().getTgtPtrBegin(
+ Ptr, Size, /*UpdateRefCount=*/false,
+ /*UseHoldRefCount=*/false, /*MustContain=*/true);
+
+ DP("ATTACH entry %zu: %s lookup - HstPtr=" DPxMOD ", TgtPtr=" DPxMOD
+ ", IsPresent=%s, IsHostPtr=%s\n",
+ EntryIdx, PtrType, DPxPTR(Ptr), DPxPTR(TPR.TargetPointer),
+ TPR.isPresent() ? "yes" : "no",
+ TPR.Flags.IsHostPointer ? "yes" : "no");
+
+ if (!TPR.isPresent()) {
+ DP("Skipping ATTACH entry %zu: %s not present on device\n", EntryIdx,
+ PtrType);
+ return std::nullopt;
+ }
+ if (TPR.Flags.IsHostPointer) {
+ DP("Skipping ATTACH entry %zu: device version of the %s is a host "
+ "pointer.\n",
+ EntryIdx, PtrType);
+ return std::nullopt;
+ }
+
+ return TPR;
+ };
+
+ // Get device version of the pointer (e.g., &p)
+ auto PtrTPROpt = LookupTargetPointer(HstPtr, PtrSize, "pointer");
+ if (!PtrTPROpt)
+ continue;
+ TargetPointerResultTy &PtrTPR = *PtrTPROpt;
+ void **TgtPtrBase = (void **)PtrTPR.TargetPointer;
+
+ // Get device version of the pointee (e.g., &p[10])
+ auto PteeTPROpt = LookupTargetPointer(HstPteeBegin, 0, "pointee");
+ if (!PteeTPROpt)
+ continue;
+ void *TgtPteeBegin = PteeTPROpt->TargetPointer;
+
+ // Update the device pointer to point to device pointee.
+ int Ret = performPointerAttachment(Device, AsyncInfo, HstPtr, HstPteeBase,
+ HstPteeBegin, TgtPtrBase, TgtPteeBegin,
+ PtrSize, PtrTPR);
+ if (Ret != OFFLOAD_SUCCESS)
+ return OFFLOAD_FAIL;
+
+ DP("ATTACH entry %zu processed successfully\n", EntryIdx);
+ }
+
+ return OFFLOAD_SUCCESS;
+}
+
namespace {
/// This structure contains information to deallocate a target pointer, aka.
/// used to fix up the shadow map and potentially delete the entry from the
@@ -624,7 +947,8 @@ postProcessingTargetDataEnd(DeviceTy *Device,
int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
void **ArgBases, void **Args, int64_t *ArgSizes,
int64_t *ArgTypes, map_var_info_t *ArgNames,
- void **ArgMappers, AsyncInfoTy &AsyncInfo, bool FromMapper) {
+ void **ArgMappers, AsyncInfoTy &AsyncInfo,
+ AttachInfoTy *AttachInfo, bool FromMapper) {
int Ret = OFFLOAD_SUCCESS;
auto *PostProcessingPtrs = new SmallVector<PostProcessingInfo>();
// process each input.
@@ -635,6 +959,14 @@ int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
(ArgTypes[I] & OMP_TGT_MAPTYPE_PRIVATE))
continue;
+ // Ignore ATTACH entries - they should only be honored on map-entering
+ // directives. They may be encountered here while handling the "end" part of
+ // "#pragma omp target".
+ if (ArgTypes[I] & OMP_TGT_MAPTYPE_ATTACH) {
+ DP("Ignoring ATTACH entry %d in targetDataEnd\n", I);
+ continue;
+ }
+
if (ArgMappers && ArgMappers[I]) {
// Instead of executing the regular path of targetDataEnd, call the
// targetDataMapper variant which will call targetDataEnd again
@@ -900,7 +1232,8 @@ static int getNonContigMergedDimension(__tgt_target_non_contig *NonContig,
int targetDataUpdate(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
void **ArgsBase, void **Args, int64_t *ArgSizes,
int64_t *ArgTypes, map_var_info_t *ArgNames,
- void **ArgMappers, AsyncInfoTy &AsyncInfo, bool) {
+ void **ArgMappers, AsyncInfoTy &AsyncInfo,
+ AttachInfoTy *AttachInfo, bool FromMapper) {
// process each input.
for (int32_t I = 0; I < ArgNum; ++I) {
if ((ArgTypes[I] & OMP_TGT_MAPTYPE_LITERAL) ||
@@ -1213,13 +1546,27 @@ static int processDataBefore(ident_t *Loc, int64_t DeviceId, void *HostPtr,
if (!DeviceOrErr)
FATAL_MESSAGE(DeviceId, "%s", toString(DeviceOrErr.takeError()).c_str());
+ // Create AttachInfo for tracking any ATTACH entries, or new-allocations
+ // when handling the "begin" mapping for a target constructs.
+ AttachInfoTy AttachInfo;
+
int Ret = targetDataBegin(Loc, *DeviceOrErr, ArgNum, ArgBases, Args, ArgSizes,
- ArgTypes, ArgNames, ArgMappers, AsyncInfo);
+ ArgTypes, ArgNames, ArgMappers, AsyncInfo,
+ &AttachInfo, false /*FromMapper=*/);
if (Ret != OFFLOAD_SUCCESS) {
REPORT("Call to targetDataBegin failed, abort target.\n");
return OFFLOAD_FAIL;
}
+ // Process collected ATTACH entries
+ if (!AttachInfo.AttachEntries.empty()) {
+ Ret = processAttachEntries(*DeviceOrErr, AttachInfo, AsyncInfo);
+ if (Ret != OFFLOAD_SUCCESS) {
+ REPORT("Failed to process ATTACH entries.\n");
+ return OFFLOAD_FAIL;
+ }
+ }
+
// List of (first-)private arrays allocated for this target region
SmallVector<int> TgtArgsPositions(ArgNum, -1);
More information about the llvm-commits
mailing list