[llvm] [WIP][Offload] Introduce ATTACH map-type support for pointer attachment. (PR #149036)
Abhinav Gaba via llvm-commits
llvm-commits at lists.llvm.org
Sun Jul 27 04:50:31 PDT 2025
================
@@ -515,6 +675,187 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
return OFFLOAD_SUCCESS;
}
+/// Process deferred ATTACH map entries collected during targetDataBegin.
+///
+/// From OpenMP's perspective, when mapping something that has a base pointer,
+/// such as:
+/// ```cpp
+/// int *p;
+/// #pragma omp enter target data map(to: p[10:20])
+/// ```
+///
+/// a pointer-attachment between p and &p[10] should occur if both p and
+/// p[10] are present on the device after doing all allocations for all maps
+/// on the construct, and one of the following is true:
+///
+/// * The pointer p was newly allocated while handling the construct
+/// * The pointee p[10:20] was newly allocated while handling the construct
+/// * attach(always) map-type modifier was specified (OpenMP 6.1)
+///
+/// That's why we collect all attach entries and new memory allocations during
+/// targetDataBegin, and use that information to make the decision of whether
+/// to perform a pointer-attachment or not here, after maps have been handled.
+///
+/// Additionally, once we decide that a pointer-attachment should be performed,
+/// we need to make sure that it happens after any previously submitted data
+/// transfers have completed, to avoid the possibility of the pending transfers
+/// clobbering the attachment. For example:
+///
+/// ```cpp
+/// int *p = ...;
+/// int **pp = &p;
+/// map(to: pp[0], p[0])
+/// ```
+///
+/// Which would be represented by:
+/// ```
+/// &pp[0], &pp[0], sizeof(pp[0]), TO (1)
+/// &p[0], &p[0], sizeof(p[0]), TO (2)
+///
+/// &pp, &pp[0], sizeof(pp), ATTACH (3)
+/// &p, &p[0], sizeof(p), ATTACH (4)
+/// ```
+///
+/// (4) and (1) are both trying to modify the device memory corresponding to
+/// `&p`. So, if we decide that (4) should do an attachment, we also need to
+/// ensure that (4) happens after (1) is complete.
+///
+/// For this purpose, we insert a data_fence before the first
+/// pointer-attachment, (3), to ensure that all pending transfers finish first.
+int processAttachEntries(DeviceTy &Device, AttachInfoTy &AttachInfo,
+ AsyncInfoTy &AsyncInfo) {
+ // Report all tracked allocations from both main loop and ATTACH processing
+ if (!AttachInfo.NewAllocations.empty()) {
+ DP("Tracked %u total new allocations:\n",
+ (unsigned)AttachInfo.NewAllocations.size());
+ for (const auto &Alloc : AttachInfo.NewAllocations) {
+ DP(" Host ptr: " DPxMOD ", Size: %" PRId64 " bytes\n",
+ DPxPTR(Alloc.first), Alloc.second);
+ }
+ }
+
+ if (AttachInfo.AttachEntries.empty())
+ return OFFLOAD_SUCCESS;
+
+ DP("Processing %zu deferred ATTACH map entries\n",
+ AttachInfo.AttachEntries.size());
+
+ int Ret = OFFLOAD_SUCCESS;
+ bool IsFirstPointerAttachment = true;
+ for (size_t EntryIdx = 0; EntryIdx < AttachInfo.AttachEntries.size();
+ ++EntryIdx) {
+ const auto &AttachEntry = AttachInfo.AttachEntries[EntryIdx];
+
+ void **HstPtr = (void **)AttachEntry.PointerBase;
+
+ void *HstPteeBase = *HstPtr;
+ void *HstPteeBegin = AttachEntry.PointeeBegin;
+
+ int64_t PtrSize = AttachEntry.PointerSize;
+ int64_t MapType = AttachEntry.MapType;
+
+ DP("Processing ATTACH entry %zu: HstPtr=" DPxMOD ", HstPteeBegin=" DPxMOD
+ ", Size=%" PRId64 ", Type=0x%" PRIx64 "\n",
+ EntryIdx, DPxPTR(HstPtr), DPxPTR(HstPteeBegin), PtrSize, MapType);
+
+ const bool IsAttachAlways = MapType & OMP_TGT_MAPTYPE_ALWAYS;
+
+ // Lambda to check if a pointer was newly allocated
+ auto WasNewlyAllocated = [&](void *Ptr, const char *PtrName) {
+ bool IsNewlyAllocated =
+ llvm::any_of(AttachInfo.NewAllocations, [&](const auto &Alloc) {
+ void *AllocPtr = Alloc.first;
+ int64_t AllocSize = Alloc.second;
+ return Ptr >= AllocPtr &&
+ Ptr < (void *)((char *)AllocPtr + AllocSize);
+ });
+ DP("ATTACH entry %zu: %s pointer " DPxMOD " was newly allocated: %s\n",
+ EntryIdx, PtrName, DPxPTR(Ptr), IsNewlyAllocated ? "yes" : "no");
+ return IsNewlyAllocated;
+ };
+
+ // Only process ATTACH if base/begin was newly allocated OR ALWAYS flag is
+ // set
+ if (!IsAttachAlways && !WasNewlyAllocated(HstPtr, "pointer") &&
+ !WasNewlyAllocated(HstPteeBegin, "pointee")) {
+ DP("Skipping ATTACH entry %zu: neither pointer nor pointee was newly "
+ "allocated and no ALWAYS flag\n",
+ EntryIdx);
+ continue;
+ }
+
+ DP("Processing ATTACH entry %zu: Always=%s\n", EntryIdx,
+ IsAttachAlways ? "yes" : "no");
+
+ // Lambda to perform target pointer lookup and validation
+ auto LookupTargetPointer =
+ [&](void *Ptr, int64_t Size,
+ const char *PtrType) -> std::optional<TargetPointerResultTy> {
+ // ATTACH map-type does not change ref-count, or do any allocation
+ // We just need to do a lookup for the pointer/pointee.
+ TargetPointerResultTy TPR = Device.getMappingInfo().getTgtPtrBegin(
+ Ptr, Size, /*UpdateRefCount=*/false,
+ /*UseHoldRefCount=*/false, /*MustContain=*/true);
+
+ DP("ATTACH entry %zu: %s lookup - HstPtr=" DPxMOD ", TgtPtr=" DPxMOD
+ ", IsPresent=%s, IsHostPtr=%s\n",
+ EntryIdx, PtrType, DPxPTR(Ptr), DPxPTR(TPR.TargetPointer),
+ TPR.isPresent() ? "yes" : "no",
+ TPR.Flags.IsHostPointer ? "yes" : "no");
+
+ if (!TPR.isPresent()) {
+ DP("Skipping ATTACH entry %zu: %s not present on device\n", EntryIdx,
+ PtrType);
+ return std::nullopt;
+ }
+ if (TPR.Flags.IsHostPointer) {
+ DP("Skipping ATTACH entry %zu: device version of the %s is a host "
+ "pointer.\n",
+ EntryIdx, PtrType);
+ return std::nullopt;
+ }
+
+ return TPR;
+ };
+
+ // Get device version of the pointer (e.g., &p)
+ auto PtrTPROpt = LookupTargetPointer(HstPtr, PtrSize, "pointer");
+ if (!PtrTPROpt)
+ continue;
+ TargetPointerResultTy &PtrTPR = *PtrTPROpt;
+ void **TgtPtrBase = (void **)PtrTPR.TargetPointer;
+
+ // Get device version of the pointee (e.g., &p[10])
+ auto PteeTPROpt = LookupTargetPointer(HstPteeBegin, 0, "pointee");
+ if (!PteeTPROpt)
+ continue;
+ void *TgtPteeBegin = PteeTPROpt->TargetPointer;
+
+ // Insert a data-fence before the first pointer-attachment.
+ if (IsFirstPointerAttachment) {
+ IsFirstPointerAttachment = false;
+ DP("Inserting a data fence before the first pointer attachment.\n");
+ Ret = Device.dataFence(AsyncInfo);
----------------
abhinavgaba wrote:
This can be delayed until just before the `submitData` call in `performPointerAttachment`, but the code-flow would become a bit uglier and less obvious to read, as the flag would have to be passed by reference into `performPointerAttachment`, and set to `false` in there.
https://github.com/llvm/llvm-project/pull/149036
More information about the llvm-commits
mailing list