[llvm] [WIP][Offload] Introduce ATTACH map-type support for pointer attachment. (PR #149036)

Abhinav Gaba via llvm-commits llvm-commits at lists.llvm.org
Sun Jul 27 04:50:31 PDT 2025


================
@@ -515,6 +675,187 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
   return OFFLOAD_SUCCESS;
 }
 
+/// Process deferred ATTACH map entries collected during targetDataBegin.
+///
+/// From OpenMP's perspective, when mapping something that has a base pointer,
+/// such as:
+/// ```cpp
+///   int *p;
+///   #pragma omp enter target data map(to: p[10:20])
+/// ```
+///
+/// a pointer-attachment between p and &p[10] should occur if both p and
+/// p[10] are present on the device after doing all allocations for all maps
+/// on the construct, and one of the following is true:
+///
+/// * The pointer p was newly allocated while handling the construct
+/// * The pointee p[10:20] was newly allocated while handling the construct
+/// * attach(always) map-type modifier was specified (OpenMP 6.1)
+///
+/// That's why we collect all attach entries and new memory allocations during
+/// targetDataBegin, and use that information to make the decision of whether
+/// to perform a pointer-attachment or not here, after maps have been handled.
+///
+/// Additionally, once we decide that a pointer-attachment should be performed,
+/// we need to make sure that it happens after any previously submitted data
+/// transfers have completed, to avoid the possibility of the pending transfers
+/// clobbering the attachment. For example:
+///
+/// ```cpp
+///   int *p = ...;
+///   int **pp = &p;
+///   map(to: pp[0], p[0])
+/// ```
+///
+/// Which would be represented by:
+/// ```
+/// &pp[0], &pp[0], sizeof(pp[0]), TO (1)
+/// &p[0], &p[0], sizeof(p[0]), TO    (2)
+///
+/// &pp, &pp[0], sizeof(pp), ATTACH   (3)
+/// &p, &p[0], sizeof(p), ATTACH      (4)
+/// ```
+///
+/// (4) and (1) are both trying to modify the device memory corresponding to
+/// `&p`. So, if we decide that (4) should do an attachment, we also need to
+/// ensure that (4) happens after (1) is complete.
+///
+/// For this purpose, we insert a data_fence before the first
+/// pointer-attachment, (3), to ensure that all pending transfers finish first.
+int processAttachEntries(DeviceTy &Device, AttachInfoTy &AttachInfo,
+                         AsyncInfoTy &AsyncInfo) {
+  // Report all tracked allocations from both main loop and ATTACH processing
+  if (!AttachInfo.NewAllocations.empty()) {
+    DP("Tracked %u total new allocations:\n",
+       (unsigned)AttachInfo.NewAllocations.size());
+    for (const auto &Alloc : AttachInfo.NewAllocations) {
+      DP("  Host ptr: " DPxMOD ", Size: %" PRId64 " bytes\n",
+         DPxPTR(Alloc.first), Alloc.second);
+    }
+  }
+
+  if (AttachInfo.AttachEntries.empty())
+    return OFFLOAD_SUCCESS;
+
+  DP("Processing %zu deferred ATTACH map entries\n",
+     AttachInfo.AttachEntries.size());
+
+  int Ret = OFFLOAD_SUCCESS;
+  bool IsFirstPointerAttachment = true;
+  for (size_t EntryIdx = 0; EntryIdx < AttachInfo.AttachEntries.size();
+       ++EntryIdx) {
+    const auto &AttachEntry = AttachInfo.AttachEntries[EntryIdx];
+
+    void **HstPtr = (void **)AttachEntry.PointerBase;
+
+    void *HstPteeBase = *HstPtr;
+    void *HstPteeBegin = AttachEntry.PointeeBegin;
+
+    int64_t PtrSize = AttachEntry.PointerSize;
+    int64_t MapType = AttachEntry.MapType;
+
+    DP("Processing ATTACH entry %zu: HstPtr=" DPxMOD ", HstPteeBegin=" DPxMOD
+       ", Size=%" PRId64 ", Type=0x%" PRIx64 "\n",
+       EntryIdx, DPxPTR(HstPtr), DPxPTR(HstPteeBegin), PtrSize, MapType);
+
+    const bool IsAttachAlways = MapType & OMP_TGT_MAPTYPE_ALWAYS;
+
+    // Lambda to check if a pointer was newly allocated
+    auto WasNewlyAllocated = [&](void *Ptr, const char *PtrName) {
+      bool IsNewlyAllocated =
+          llvm::any_of(AttachInfo.NewAllocations, [&](const auto &Alloc) {
+            void *AllocPtr = Alloc.first;
+            int64_t AllocSize = Alloc.second;
+            return Ptr >= AllocPtr &&
+                   Ptr < (void *)((char *)AllocPtr + AllocSize);
+          });
+      DP("ATTACH entry %zu: %s pointer " DPxMOD " was newly allocated: %s\n",
+         EntryIdx, PtrName, DPxPTR(Ptr), IsNewlyAllocated ? "yes" : "no");
+      return IsNewlyAllocated;
+    };
+
+    // Only process ATTACH if base/begin was newly allocated OR ALWAYS flag is
+    // set
+    if (!IsAttachAlways && !WasNewlyAllocated(HstPtr, "pointer") &&
+        !WasNewlyAllocated(HstPteeBegin, "pointee")) {
+      DP("Skipping ATTACH entry %zu: neither pointer nor pointee was newly "
+         "allocated and no ALWAYS flag\n",
+         EntryIdx);
+      continue;
+    }
+
+    DP("Processing ATTACH entry %zu: Always=%s\n", EntryIdx,
+       IsAttachAlways ? "yes" : "no");
+
+    // Lambda to perform target pointer lookup and validation
+    auto LookupTargetPointer =
+        [&](void *Ptr, int64_t Size,
+            const char *PtrType) -> std::optional<TargetPointerResultTy> {
+      // ATTACH map-type does not change ref-count, or do any allocation
+      // We just need to do a lookup for the pointer/pointee.
+      TargetPointerResultTy TPR = Device.getMappingInfo().getTgtPtrBegin(
+          Ptr, Size, /*UpdateRefCount=*/false,
+          /*UseHoldRefCount=*/false, /*MustContain=*/true);
+
+      DP("ATTACH entry %zu: %s lookup - HstPtr=" DPxMOD ", TgtPtr=" DPxMOD
+         ", IsPresent=%s, IsHostPtr=%s\n",
+         EntryIdx, PtrType, DPxPTR(Ptr), DPxPTR(TPR.TargetPointer),
+         TPR.isPresent() ? "yes" : "no",
+         TPR.Flags.IsHostPointer ? "yes" : "no");
+
+      if (!TPR.isPresent()) {
+        DP("Skipping ATTACH entry %zu: %s not present on device\n", EntryIdx,
+           PtrType);
+        return std::nullopt;
+      }
+      if (TPR.Flags.IsHostPointer) {
+        DP("Skipping ATTACH entry %zu: device version of the %s is a host "
+           "pointer.\n",
+           EntryIdx, PtrType);
+        return std::nullopt;
+      }
+
+      return TPR;
+    };
+
+    // Get device version of the pointer (e.g., &p)
+    auto PtrTPROpt = LookupTargetPointer(HstPtr, PtrSize, "pointer");
+    if (!PtrTPROpt)
+      continue;
+    TargetPointerResultTy &PtrTPR = *PtrTPROpt;
+    void **TgtPtrBase = (void **)PtrTPR.TargetPointer;
+
+    // Get device version of the pointee (e.g., &p[10])
+    auto PteeTPROpt = LookupTargetPointer(HstPteeBegin, 0, "pointee");
+    if (!PteeTPROpt)
+      continue;
+    void *TgtPteeBegin = PteeTPROpt->TargetPointer;
+
+    // Insert a data-fence before the first pointer-attachment.
+    if (IsFirstPointerAttachment) {
+      IsFirstPointerAttachment = false;
+      DP("Inserting a data fence before the first pointer attachment.\n");
+      Ret = Device.dataFence(AsyncInfo);
----------------
abhinavgaba wrote:

This can be delayed until just before the `submitData` call in `performPointerAttachment`, but the code-flow would become a bit uglier and less obvious to read, as the flag would have to be passed by reference into `performPointerAttachment`, and set to `false` in there.

https://github.com/llvm/llvm-project/pull/149036


More information about the llvm-commits mailing list