[llvm] [WIP][Offload] Introduce ATTACH map-type support for pointer attachment. (PR #149036)

Abhinav Gaba via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 16 01:56:56 PDT 2025


================
@@ -324,17 +325,195 @@ int targetDataMapper(ident_t *Loc, DeviceTy &Device, void *ArgBase, void *Arg,
                               MapperArgsBase.data(), MapperArgs.data(),
                               MapperArgSizes.data(), MapperArgTypes.data(),
                               MapperArgNames.data(), /*arg_mappers*/ nullptr,
-                              AsyncInfo, /*FromMapper=*/true);
+                              AsyncInfo, AttachInfo, /*FromMapper=*/true);
 
   return Rc;
 }
 
+/// Utility function to perform a pointer attachment operation.
+///
+/// For something like:
+/// \code
+///  int *p;
+///  ...
+///  #pragma omp target enter data map(to:p[10:10])
+/// \endcode
+///
+/// for which the attachment operation gets represented using:
+/// \code
+///   &p, &p[10], sizeof(p), ATTACH
+/// \endcode
+///
+/// (Hst|Tgt)PtrAddr   represents &p
+/// (Hst|Tgt)PteeBase  represents &p[0]
+/// (Hst|Tgt)PteeBegin represents &p[10]
+///
+/// This function first computes the expected TgtPteeBase using:
+///   TgtPteeBase = TgtPteeBegin - (HstPteeBegin - HstPteeBase)
+///
+/// and then attaches TgtPteeBase to TgtPtrAddr.
+///
+/// \p HstPtrSize represents the size of the pointer p. For C/C++, this
+/// should be same as "sizeof(void*)" (say 8).
+///
+/// However, for Fortran, pointers/allocatables, which are also eligible for
+/// "pointer-attachment", may be implemented using descriptors that contain the
+/// address of the pointee in the first 8 bytes, but also contain other
+/// information such as lower-bound/upper-bound etc in their subsequent fields.
+///
+/// For example, for the following:
+/// \code
+///   integer, allocatable :: x(:)
+///   integer, pointer :: p(:)
+///   ...
+///   p => x(10: 19)
+///   ...
+///   !$omp target enter data map(to:p(:))
+/// \endcode
+///
+/// The map should trigger a pointer-attachment (assuming the pointer-attachment
+/// conditions as noted on processAttachEntries are met) between the descriptor
+/// for p, and its pointee data.
+///
+/// Since only the first 8 bytes of the descriptor contain the address of the
+/// pointee, an attachment operation on device descriptors involves:
+/// * Setting the first 8 bytes of the device descriptor to point the device
+/// address of the pointee.
+/// * Copying the remaining information about bounds/offset etc. from the host
+/// descriptor to the device descriptor.
+///
+/// The function also handles pointer-attachment portion of PTR_AND_OBJ maps,
+/// like:
+/// \code
+///   &p, &p[10], 10 * sizeof(p[10]), PTR_AND_OBJ
+/// \endcoe
+/// by using "sizeof(void*)" as \p HstPtrSize.
+static int performPointerAttachment(DeviceTy &Device, AsyncInfoTy &AsyncInfo,
+                                    void **HstPtrAddr, void *HstPteeBase,
+                                    void *HstPteeBegin, void **TgtPtrAddr,
+                                    void *TgtPteeBegin, int64_t HstPtrSize,
+                                    TargetPointerResultTy &PtrTPR) {
+  assert(PtrTPR.getEntry() &&
+         "Need a valid pointer entry to perform pointer-attachment");
+
+  int64_t VoidPtrSize = sizeof(void *);
+  assert(HstPtrSize >= VoidPtrSize && "PointerSize is too small");
+
+  uint64_t Delta = (uint64_t)HstPteeBegin - (uint64_t)HstPteeBase;
+  void *TgtPteeBase = (void *)((uint64_t)TgtPteeBegin - Delta);
+
+  // Add shadow pointer tracking
+  // TODO: Support shadow-tracking of larger than VoidPtrSize pointers,
+  // to support restoration of Fortran descriptors. Currently, this check
+  // would return false, even if the host Fortran descriptor was, and we
+  // should have done an update of the device descriptor. e.g.
+  //
+  //   !$omp target enter data map(x(1:100)) !             (1)
+  //   p => x(10: 19)
+  //   !$omp target enter data map(p, p(:)) !              (2)
+  //   p => x(5: 9)
+  //   !$omp target enter data map(attach(always): p(:)) ! (3)
+  //
+  // While PtrAddr(&desc_p) and PteeBase(&p(1)) are same for (2) and (3), the
+  // pointer attachment for (3) needs to update the bounds information
+  // in the descriptor of p on device.
+  if (!PtrTPR.getEntry()->addShadowPointer(
+          ShadowPtrInfoTy{HstPtrAddr, HstPteeBase, TgtPtrAddr, TgtPteeBase}))
+    return OFFLOAD_SUCCESS;
+
+  DP("Update pointer (" DPxMOD ") -> [" DPxMOD "]\n", DPxPTR(TgtPtrAddr),
+     DPxPTR(TgtPteeBase));
+
+  // Lambda to handle submitData result and perform final steps.
+  auto HandleSubmitResult = [&](int SubmitResult) -> int {
+    if (SubmitResult != OFFLOAD_SUCCESS) {
+      REPORT("Failed to update pointer on device.\n");
+      return OFFLOAD_FAIL;
+    }
+
+    if (PtrTPR.getEntry()->addEventIfNecessary(Device, AsyncInfo) !=
+        OFFLOAD_SUCCESS)
+      return OFFLOAD_FAIL;
+
+    return OFFLOAD_SUCCESS;
+  };
+
+  bool IsPtrAFortranDescriptor = HstPtrSize > VoidPtrSize;
+  if (!IsPtrAFortranDescriptor) {
+    // For "regular" pointers, we can use the VoidPtrLocation from AsyncInfo as
+    // the buffer space for the submission.
+    void *&BufferElement = AsyncInfo.getVoidPtrLocation();
+    BufferElement = TgtPteeBase;
+
+    // Submit the updated pointer value to device
+    return HandleSubmitResult(Device.submitData(
+        TgtPtrAddr, &BufferElement, VoidPtrSize, AsyncInfo, PtrTPR.getEntry()));
+  }
+
+  // For larger "pointers" (like Fortran's descriptors), we create a dynamic
+  // buffer, which will be eventually destroyed by AsyncInfo's post-processing
+  // callback.
+  char *DataBuffer = new char[HstPtrSize];
+
+  // For such descriptors, to the first VoidPtrSize bytes, we store the
+  // pointee's device address.
+  std::memcpy(DataBuffer, &TgtPteeBase, sizeof(void *));
+
+  // And to the remaining bytes, we copy the remaining contents of the host
+  // descriptor after the initial VoidPtrSize bytes.
+  uint64_t HstDescriptorFieldsSize = HstPtrSize - VoidPtrSize;
+  void *HstDescriptorFieldsAddr = (char *)HstPtrAddr + VoidPtrSize;
+  std::memcpy(DataBuffer + VoidPtrSize, HstDescriptorFieldsAddr,
+              HstDescriptorFieldsSize);
+
+  DP("Updating %" PRId64 " bytes of descriptor (" DPxMOD ") (pointer + %" PRId64
+     " additional bytes from host descriptor " DPxMOD ")\n",
+     HstPtrSize, DPxPTR(TgtPtrAddr), HstDescriptorFieldsSize,
+     DPxPTR(HstDescriptorFieldsAddr));
+
+  // Submit the entire buffer to device
+  // FIXME: When handling ATTACH map-type, pointer attachment needs to happen
+  // after the other mapping operations are done, to avoid possibility of
+  // pending transfers clobbering the attachment, for example:
+  //
+  //   int *p = ...;
+  //   int **pp = &p;
+  //   map(to: pp[0], p[0])
+  //
+  // Which would be represented by:
+  // &pp[0], &pp[0], sizeof(pp[0]), TO (1)
+  // &p[0], &p[0], sizeof(p[0]), TO    (2)
+  //
+  // &pp, &pp[0], sizeof(pp), ATTACH   (3)
+  // &p, &p[0], sizeof(p), ATTACH      (4)
+  //
+  // (4) and (1) are both trying to modify the device memory corresponding to
+  // &p. We need to ensure that (4) happens last.
+  //
+  // One possible solution to this could be to insert a "device barrier" before
----------------
abhinavgaba wrote:

@adurang, please check if this is a proper description of your suggestion.

https://github.com/llvm/llvm-project/pull/149036


More information about the llvm-commits mailing list