[llvm] [OpenMP][Offload] Support `PRIVATE | ATTACH` maps for corresponding-pointer-initialization. (PR #160760)

Abhinav Gaba via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 25 12:10:39 PDT 2025


https://github.com/abhinavgaba updated https://github.com/llvm/llvm-project/pull/160760

>From f266f6be2acae415511035d9c98f4999d7c20934 Mon Sep 17 00:00:00 2001
From: Abhinav Gaba <abhinav.gaba at intel.com>
Date: Wed, 17 Sep 2025 11:19:02 -0700
Subject: [PATCH 1/5] [wip][Offload][OpenMP] Handle PRIVATE+ATTACH entries for
 pointer-initialization on target.

---
 offload/include/OpenMP/Mapping.h   |   4 +
 offload/libomptarget/omptarget.cpp | 279 ++++++++++++++++++++++++-----
 2 files changed, 235 insertions(+), 48 deletions(-)

diff --git a/offload/include/OpenMP/Mapping.h b/offload/include/OpenMP/Mapping.h
index 45bd9c6e7da8b..9e342aef0c205 100644
--- a/offload/include/OpenMP/Mapping.h
+++ b/offload/include/OpenMP/Mapping.h
@@ -493,6 +493,10 @@ struct AttachInfoTy {
   /// Key: host pointer, Value: allocation size.
   llvm::DenseMap<void *, int64_t> NewAllocations;
 
+  /// Key: host pointer, Value: target pointer for PRIVATE | ATTACH map entries, 
+  /// which need special initialization during ATTACH processing.
+  llvm::DenseMap<void *, void *> PrivateAttachAllocations;
+
   AttachInfoTy() = default;
 
   // Delete copy constructor and copy assignment operator to prevent copying
diff --git a/offload/libomptarget/omptarget.cpp b/offload/libomptarget/omptarget.cpp
index 39286d41ec865..15aded9972faa 100644
--- a/offload/libomptarget/omptarget.cpp
+++ b/offload/libomptarget/omptarget.cpp
@@ -330,6 +330,32 @@ int targetDataMapper(ident_t *Loc, DeviceTy &Device, void *ArgBase, void *Arg,
   return Rc;
 }
 
+/// Utility function to calculate the target pointee base by applying the host
+/// pointer delta to the target pointer begin.
+///
+/// This computes: TgtPteeBase = TgtPteeBegin - (HstPteeBegin - HstPteeBase)
+///
+/// \param HstPteeBase The base address of the pointee on the host
+/// \param HstPteeBegin The begin address of the pointee on the host
+/// \param TgtPteeBegin The begin address of the pointee on the target
+/// \return The adjusted target pointer base address
+static void *calculateTargetPointeeBase(void *HstPteeBase, void *HstPteeBegin,
+                                        void *TgtPteeBegin) {
+  uint64_t Delta = reinterpret_cast<uint64_t>(HstPteeBegin) -
+                   reinterpret_cast<uint64_t>(HstPteeBase);
+  void *TgtPteeBase = reinterpret_cast<void *>(
+      reinterpret_cast<uint64_t>(TgtPteeBegin) - Delta);
+
+  DP("HstPteeBase: " DPxMOD ", HstPteeBegin: " DPxMOD
+     ", Delta (HstPteeBegin - HstPteeBase): %" PRIu64 ".\n",
+     DPxPTR(HstPteeBase), DPxPTR(HstPteeBegin), Delta);
+  DP("TgtPteeBase (TgtPteeBegin - Delta): " DPxMOD ", TgtPteeBegin : " DPxMOD
+     "\n",
+     DPxPTR(TgtPteeBase), DPxPTR(TgtPteeBegin));
+
+  return TgtPteeBase;
+}
+
 /// Utility function to perform a pointer attachment operation.
 ///
 /// For something like:
@@ -399,16 +425,8 @@ static int performPointerAttachment(DeviceTy &Device, AsyncInfoTy &AsyncInfo,
   constexpr int64_t VoidPtrSize = sizeof(void *);
   assert(HstPtrSize >= VoidPtrSize && "PointerSize is too small");
 
-  uint64_t Delta = reinterpret_cast<uint64_t>(HstPteeBegin) -
-                   reinterpret_cast<uint64_t>(HstPteeBase);
-  void *TgtPteeBase = reinterpret_cast<void *>(
-      reinterpret_cast<uint64_t>(TgtPteeBegin) - Delta);
-  DP("HstPteeBase: " DPxMOD ", HstPteeBegin: " DPxMOD
-     ", Delta (HstPteeBegin - HstPteeBase): %" PRIu64 ".\n",
-     DPxPTR(HstPteeBase), DPxPTR(HstPteeBegin), Delta);
-  DP("TgtPteeBase (TgtPteeBegin - Delta): " DPxMOD ", TgtPteeBegin : " DPxMOD
-     "\n",
-     DPxPTR(TgtPteeBase), DPxPTR(TgtPteeBegin));
+  void *TgtPteeBase = calculateTargetPointeeBase(HstPteeBase, HstPteeBegin,
+                                                 TgtPteeBegin);
 
   // Add shadow pointer tracking
   if (!PtrTPR.getEntry()->addShadowPointer(
@@ -480,6 +498,112 @@ static int performPointerAttachment(DeviceTy &Device, AsyncInfoTy &AsyncInfo,
   return HandleSubmitResult(SubmitResult);
 }
 
+/// Initialize the privatized pointer \p TgtPtrAddr, using an offset-ed \p
+/// TgtPteeBegin, if present, \p HstPteeBase otherwise.
+///
+/// The private pointer initialization can be used for initializing the privatized
+/// version of the base pointer/referring-pointer on a target construct. e.g.
+///
+/// For example, for the following, a possible way to map `px[1]` is:
+/// ```cpp
+///   int x[10];
+///   int *px = &x[0];
+///   ...
+///   #pragma omp target data map(tofrom:px)
+///   {
+///     int **ppx = omp_get_mapped_ptr(&px, omp_get_default_device());
+///   
+///     // px is pre-determined firstprivate, and should get initialized using
+///     // the private-pointer-initialization here.
+///     //
+///     // Possible maps from FE for px:
+///     //
+///     // &px[0], &px[1], sizeof(px[1]), TO | FROM                // (1)
+///     // &px,    &px[1], sizeof(px),    ATTACH                   // (2)
+///     // &px,    &px[1], sizeof(px),    PRIVATE | ATTACH | PARAM // (3)
+///     #pragma omp target map(tofrom:px[1]) is_device_ptr(ppx)
+///     {
+///        ppx[0][0] = px[1] + 1;
+///     }
+///   }
+/// ```
+/// `(1)` maps the pointee `px[1].
+/// `(2)` attaches it to the mapped version of `px`. It can be controlled by the
+/// user based on the `attach(auto/always/never)` map-type modifier.
+/// `(3)` privatizes and initializes the private pointer `px`, and it becomes
+/// a kernel argument for the target construct. Can be skipped if `px` is not
+/// referenced in the target construct.
+///
+/// If \p TgtPteeBegin is non-null, the value used to initialize \p TgtPtrAddr is
+/// computed similar to performPointerAttachment. Otherwise, \p HstPteeBase is
+/// used.
+///
+/// Similar to performPointerAttachment, this function also handles
+/// initialization of the remaining fields of Fortran descriptors ( i.e.
+/// `HstPtrSize > sizeof(void*)`), by copying the remaining contents from
+/// \p HstPtrAddr, after the first `sizeof(void*)` bytes.
+///
+/// Unlike performPointerAttachment, this function doesn't need to work with
+/// shadow pointers or event tracking, because it works with newly allocated
+/// memory for the PRIVATE map-type.
+static int performPrivatePointerInitialization(DeviceTy &Device, AsyncInfoTy &AsyncInfo,
+                                               void *HstPtrAddr, void *HstPteeBase,
+                                          void *HstPteeBegin, void **TgtPtrAddr,
+                                          void *TgtPteeBegin, int64_t HstPtrSize) {
+  constexpr int64_t VoidPtrSize = sizeof(void *);
+  assert(HstPtrSize >= VoidPtrSize && "PointerSize is too small");
+
+
+  // If there is no pointee, a privatized pointer should retain its incoming
+  // host value.
+  void *TgtPteeBase = TgtPteeBegin ? calculateTargetPointeeBase(HstPteeBase,
+                                     HstPteeBegin, TgtPteeBegin) : HstPteeBase;
+
+  DP("Initializing private pointer (" DPxMOD ") -> [" DPxMOD "]\n", DPxPTR(TgtPtrAddr),
+     DPxPTR(TgtPteeBase));
+
+  // Create a buffer to hold the pointer data to be submitted to device
+  char *DataBuffer = new char[HstPtrSize];
+
+  // Store the pointee's device address in the first VoidPtrSize bytes
+  std::memcpy(DataBuffer, &TgtPteeBase, sizeof(void *));
+
+  bool IsPtrAFortranDescriptor = HstPtrSize > VoidPtrSize;
+  if (IsPtrAFortranDescriptor) {
+    // Copy the remaining descriptor fields from host for Fortran descriptors
+    uint64_t HstDescriptorFieldsSize = HstPtrSize - VoidPtrSize;
+    void *HstDescriptorFieldsAddr =
+        reinterpret_cast<char *>(HstPtrAddr) + VoidPtrSize;
+    std::memcpy(DataBuffer + VoidPtrSize, HstDescriptorFieldsAddr,
+                HstDescriptorFieldsSize);
+
+    DP("Updating private %" PRId64 " bytes of descriptor (" DPxMOD ") (pointer + %" PRId64
+       " additional bytes from host descriptor " DPxMOD ")\n",
+       HstPtrSize, DPxPTR(TgtPtrAddr), HstDescriptorFieldsSize,
+       DPxPTR(HstDescriptorFieldsAddr));
+  } else {
+    DP("Updating private pointer (" DPxMOD ") with value " DPxMOD "\n",
+       DPxPTR(TgtPtrAddr), DPxPTR(TgtPteeBase));
+  }
+
+  // Submit the entire buffer to device in a single operation
+  int SubmitResult = Device.submitData(TgtPtrAddr, DataBuffer, HstPtrSize,
+                                       AsyncInfo, nullptr);
+
+  // Clean up the buffer
+  AsyncInfo.addPostProcessingFunction([DataBuffer]() -> int {
+    delete[] DataBuffer;
+    return OFFLOAD_SUCCESS;
+  });
+
+  if (SubmitResult != OFFLOAD_SUCCESS) {
+    REPORT("Failed to update private %s on device.\n",
+           IsPtrAFortranDescriptor ? "descriptor" : "pointer");
+    return OFFLOAD_FAIL;
+  }
+
+  return OFFLOAD_SUCCESS;
+}
 /// Internal function to do the mapping and transfer the data to the device
 int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
                     void **ArgsBase, void **Args, int64_t *ArgSizes,
@@ -739,7 +863,7 @@ int processAttachEntries(DeviceTy &Device, AttachInfoTy &AttachInfo,
      AttachInfo.AttachEntries.size());
 
   int Ret = OFFLOAD_SUCCESS;
-  bool IsFirstPointerAttachment = true;
+  bool IsFirstNonPrivateAttachment = true;
   for (size_t EntryIdx = 0; EntryIdx < AttachInfo.AttachEntries.size();
        ++EntryIdx) {
     const auto &AttachEntry = AttachInfo.AttachEntries[EntryIdx];
@@ -773,15 +897,6 @@ int processAttachEntries(DeviceTy &Device, AttachInfoTy &AttachInfo,
       return IsNewlyAllocated;
     };
 
-    // Only process ATTACH if either the pointee or the pointer was newly
-    // allocated, or the ALWAYS flag is set.
-    if (!IsAttachAlways && !WasNewlyAllocated(HstPteeBegin, "pointee") &&
-        !WasNewlyAllocated(HstPtr, "pointer")) {
-      DP("Skipping ATTACH entry %zu: neither pointer nor pointee was newly "
-         "allocated and no ALWAYS flag\n",
-         EntryIdx);
-      continue;
-    }
 
     // Lambda to perform target pointer lookup and validation
     auto LookupTargetPointer =
@@ -819,22 +934,47 @@ int processAttachEntries(DeviceTy &Device, AttachInfoTy &AttachInfo,
         return PteeTPROpt->TargetPointer;
       return nullptr;
     }();
+    bool PointeeLookupSucceeded = TgtPteeBegin != nullptr;
+
+    // For non-private attachments, we need the pointee to be mapped
+    // For private attachments, we can proceed even if pointee lookup fails
+    auto PrivateAttachIt = AttachInfo.PrivateAttachAllocations.find(HstPtr);
+    bool IsPrivateAttach = (PrivateAttachIt != AttachInfo.PrivateAttachAllocations.end());
 
-    if (!TgtPteeBegin)
+    if (!PointeeLookupSucceeded && !IsPrivateAttach)
       continue;
 
     // Get device version of the pointer (e.g., &p) next. We need to keep its
     // TPR for use in shadow-pointer handling during pointer-attachment.
-    auto PtrTPROpt = LookupTargetPointer(HstPtr, PtrSize, "pointer");
-    if (!PtrTPROpt)
-      continue;
-    TargetPointerResultTy &PtrTPR = *PtrTPROpt;
-    void **TgtPtrBase = reinterpret_cast<void **>(PtrTPR.TargetPointer);
 
-    // Insert a data-fence before the first pointer-attachment.
-    if (IsFirstPointerAttachment) {
-      IsFirstPointerAttachment = false;
-      DP("Inserting a data fence before the first pointer attachment.\n");
+    // Check if this is a PRIVATE | ATTACH allocation
+    void **TgtPtrBasePrivate = nullptr;
+    void **TgtPtrBaseMapped = nullptr;
+    std::optional<TargetPointerResultTy> PtrTPROpt;
+
+    if (IsPrivateAttach) {
+      // This is a PRIVATE | ATTACH allocation - use the allocated target pointer
+      TgtPtrBasePrivate = reinterpret_cast<void **>(PrivateAttachIt->second);
+      DP("Using PRIVATE | ATTACH allocation for pointer " DPxMOD " at target " DPxMOD "\n",
+         DPxPTR(HstPtr), DPxPTR(TgtPtrBasePrivate));
+    }
+
+    // Look up the target pointer mapping
+    if (!IsPrivateAttach) {
+      // For regular ATTACH entries, we need to find the mapping
+      PtrTPROpt = LookupTargetPointer(HstPtr, PtrSize, "pointer");
+      if (!PtrTPROpt.has_value())
+        continue;  // Regular ATTACH entries need successful lookup
+      TgtPtrBaseMapped = reinterpret_cast<void **>(PtrTPROpt->TargetPointer);
+    }
+    // For PRIVATE | ATTACH entries, we don't need the mapping lookup
+    // since we already have TgtPtrBasePrivate from the private allocation
+
+    // Insert a data-fence before the first regular pointer-attachment.
+    // Private attachments don't need data fences since they don't transfer data.
+    if (!IsPrivateAttach && PtrTPROpt.has_value() && IsFirstNonPrivateAttachment) {
+      IsFirstNonPrivateAttachment = false;
+      DP("Inserting a data fence before the first regular pointer attachment.\n");
       Ret = Device.dataFence(AsyncInfo);
       if (Ret != OFFLOAD_SUCCESS) {
         REPORT("Failed to insert data fence.\n");
@@ -844,11 +984,35 @@ int processAttachEntries(DeviceTy &Device, AttachInfoTy &AttachInfo,
 
     // Do the pointer-attachment, i.e. update the device pointer to point to
     // device pointee.
-    Ret = performPointerAttachment(Device, AsyncInfo, HstPtr, HstPteeBase,
-                                   HstPteeBegin, TgtPtrBase, TgtPteeBegin,
-                                   PtrSize, PtrTPR);
-    if (Ret != OFFLOAD_SUCCESS)
-      return OFFLOAD_FAIL;
+
+    if (IsPrivateAttach) {
+      // For PRIVATE | ATTACH entries: only do private initialization
+      DP("Performing PRIVATE | ATTACH initialization\n");
+      Ret = performPrivatePointerInitialization(Device, AsyncInfo, HstPtr, HstPteeBase,
+                                               HstPteeBegin, TgtPtrBasePrivate,
+                                               TgtPteeBegin, PtrSize);
+      if (Ret != OFFLOAD_SUCCESS) {
+        REPORT("Failed to update PRIVATE | ATTACH pointer on device.\n");
+        return OFFLOAD_FAIL;
+      }
+    } else if (PtrTPROpt.has_value()) {
+      // For regular ATTACH entries: only do regular attachment
+      // Only process mapped ATTACH if either the pointee or the pointer was newly
+      // allocated, or the ALWAYS flag is set.
+      if (!IsAttachAlways && !WasNewlyAllocated(HstPteeBegin, "pointee") &&
+          !WasNewlyAllocated(HstPtr, "pointer")) {
+        DP("Skipping mapped ATTACH entry %zu: neither pointer nor pointee was newly "
+           "allocated and no ALWAYS flag\n",
+           EntryIdx);
+      } else {
+        DP("Performing regular ATTACH for pointer " DPxMOD "\n", DPxPTR(HstPtr));
+        Ret = performPointerAttachment(Device, AsyncInfo, HstPtr, HstPteeBase,
+                                       HstPteeBegin, TgtPtrBaseMapped, TgtPteeBegin,
+                                       PtrSize, *PtrTPROpt);
+        if (Ret != OFFLOAD_SUCCESS)
+          return OFFLOAD_FAIL;
+      }
+    }
 
     DP("ATTACH entry %zu processed successfully\n", EntryIdx);
   }
@@ -1684,19 +1848,38 @@ static int processDataBefore(ident_t *Loc, int64_t DeviceId, void *HostPtr,
     } else if (ArgTypes[I] & OMP_TGT_MAPTYPE_PRIVATE) {
       TgtBaseOffset = (intptr_t)HstPtrBase - (intptr_t)HstPtrBegin;
       const bool IsFirstPrivate = (ArgTypes[I] & OMP_TGT_MAPTYPE_TO);
-      // If there is a next argument and it depends on the current one, we need
-      // to allocate the private memory immediately. If this is not the case,
-      // then the argument can be marked for optimization and packed with the
-      // other privates.
-      const bool AllocImmediately =
-          (I < ArgNum - 1 && (ArgTypes[I + 1] & OMP_TGT_MAPTYPE_MEMBER_OF));
-      Ret = PrivateArgumentManager.addArg(
-          HstPtrBegin, ArgSizes[I], TgtBaseOffset, IsFirstPrivate, TgtPtrBegin,
-          TgtArgs.size(), HstPtrName, AllocImmediately);
-      if (Ret != OFFLOAD_SUCCESS) {
-        REPORT("Failed to process %sprivate argument " DPxMOD "\n",
-               (IsFirstPrivate ? "first-" : ""), DPxPTR(HstPtrBegin));
-        return OFFLOAD_FAIL;
+      const bool IsAttach = (ArgTypes[I] & OMP_TGT_MAPTYPE_ATTACH);
+
+      // Handle PRIVATE | ATTACH specially - allocate but defer initialization
+      if (IsAttach) {
+        DP("Processing PRIVATE | ATTACH map for argument %d\n", I);
+        // Allocate memory for the private variable
+        TgtPtrBegin = DeviceOrErr->allocData(ArgSizes[I], HstPtrBegin);
+        if (!TgtPtrBegin) {
+          DP("Data allocation for private attach array " DPxMOD " failed.\n",
+             DPxPTR(HstPtrBegin));
+          return OFFLOAD_FAIL;
+        }
+        // Track this allocation for later initialization during ATTACH processing
+        AttachInfo.PrivateAttachAllocations[HstPtrBegin] = TgtPtrBegin;
+        DP("Allocated %" PRId64 " bytes of target memory at " DPxMOD
+           " for private attach array " DPxMOD "\n",
+           ArgSizes[I], DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin));
+      } else {
+        // If there is a next argument and it depends on the current one, we need
+        // to allocate the private memory immediately. If this is not the case,
+        // then the argument can be marked for optimization and packed with the
+        // other privates.
+        const bool AllocImmediately =
+            (I < ArgNum - 1 && (ArgTypes[I + 1] & OMP_TGT_MAPTYPE_MEMBER_OF));
+        Ret = PrivateArgumentManager.addArg(
+            HstPtrBegin, ArgSizes[I], TgtBaseOffset, IsFirstPrivate, TgtPtrBegin,
+            TgtArgs.size(), HstPtrName, AllocImmediately);
+        if (Ret != OFFLOAD_SUCCESS) {
+          REPORT("Failed to process %sprivate argument " DPxMOD "\n",
+                 (IsFirstPrivate ? "first-" : ""), DPxPTR(HstPtrBegin));
+          return OFFLOAD_FAIL;
+        }
       }
     } else {
       if (ArgTypes[I] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)

>From 10e0069463f61e9d5438fd3fab1ef312b2340006 Mon Sep 17 00:00:00 2001
From: Abhinav Gaba <abhinav.gaba at intel.com>
Date: Tue, 23 Sep 2025 11:08:12 -0700
Subject: [PATCH 2/5] [OpenMP][Offload] Support `PRIVATE | ATTACH` maps for
 corresponding-pointer-initialization.

`PRIVATE | ATTACH` maps can be used to represent firstprivate pointers
that should be initialized by doing doing the pointee's device address,
if its lookup succeeds, or retain the original host pointee's address
otherwise.

With this, for a test like the following:

```f90
integer, pointer :: p(:)
!$omp target map(p(1))
... print*, p(1)
!$omp end target
```

The codegen can look like:
```llvm
 ; maps for p:
 ; &p(1),       &p(1), sizeof(p(1)),       TO|FROM              //(1)
 ; &ref_ptr(p), &p(1), sizeof(ref_ptr(p)), ATTACH               //(2)
 ; &ref_ptr(p), &p(1), sizeof(ref_ptr(p)), PRIVATE|ATTACH|PARAM //(3)
 call... @__omp_outlined...(ptr %ref_ptr_of_p)
```

 * `(1)` maps the pointee `p(1).
 * `(2)` attaches it to the (previously) mapped `ref_ptr(p)`, if present.
   It can be controlled via OpenMP 6.1's `attach(auto/always/never)`
   map-type modifiers.
 * `(3)` privatizes and initializes the local `ref_ptr(p)`, which gets passed
   in as the kernel argument `%ref_ptr_of_p`. Can be skipped if p is not
   referenced directly within the region.

While similar mapping can be used for C/C++, it's more important/useful
for Fortran as we can avoid creating another argument for passing the
descriptor, and use that to initialize the private copy in the body of the
kernel.
---
 offload/include/OpenMP/Mapping.h   |   4 -
 offload/libomptarget/omptarget.cpp | 575 ++++++++++++++++-------------
 2 files changed, 318 insertions(+), 261 deletions(-)

diff --git a/offload/include/OpenMP/Mapping.h b/offload/include/OpenMP/Mapping.h
index 9e342aef0c205..45bd9c6e7da8b 100644
--- a/offload/include/OpenMP/Mapping.h
+++ b/offload/include/OpenMP/Mapping.h
@@ -493,10 +493,6 @@ struct AttachInfoTy {
   /// Key: host pointer, Value: allocation size.
   llvm::DenseMap<void *, int64_t> NewAllocations;
 
-  /// Key: host pointer, Value: target pointer for PRIVATE | ATTACH map entries, 
-  /// which need special initialization during ATTACH processing.
-  llvm::DenseMap<void *, void *> PrivateAttachAllocations;
-
   AttachInfoTy() = default;
 
   // Delete copy constructor and copy assignment operator to prevent copying
diff --git a/offload/libomptarget/omptarget.cpp b/offload/libomptarget/omptarget.cpp
index 15aded9972faa..737d3497ca169 100644
--- a/offload/libomptarget/omptarget.cpp
+++ b/offload/libomptarget/omptarget.cpp
@@ -330,15 +330,37 @@ int targetDataMapper(ident_t *Loc, DeviceTy &Device, void *ArgBase, void *Arg,
   return Rc;
 }
 
-/// Utility function to calculate the target pointee base by applying the host
-/// pointer delta to the target pointer begin.
+/// Returns a buffer of the requested \p Size, to be used as the source for
+/// `submitData`.
 ///
-/// This computes: TgtPteeBase = TgtPteeBegin - (HstPteeBegin - HstPteeBase)
+/// For small buffers (`Size <= sizeof(void*)`), uses \p AsyncInfo's
+/// getVoidPtrLocation().
+/// For larger buffers, creates a dynamic buffer which will be eventually
+/// deleted by \p AsyncInfo's post-processing callback.
+static char *getOrCreateSourceBufferForSubmitData(AsyncInfoTy &AsyncInfo,
+                                                  int64_t Size) {
+  constexpr int64_t VoidPtrSize = sizeof(void *);
+
+  if (Size <= VoidPtrSize) {
+    void *&BufferElement = AsyncInfo.getVoidPtrLocation();
+    return reinterpret_cast<char *>(&BufferElement);
+  }
+
+  // Create a dynamic buffer for larger data and schedule its deletion.
+  char *DataBuffer = new char[Size];
+  AsyncInfo.addPostProcessingFunction([DataBuffer]() {
+    delete[] DataBuffer;
+    return OFFLOAD_SUCCESS;
+  });
+  return DataBuffer;
+}
+
+/// Calculates the target pointee base by applying the host
+/// pointee begin/base delta to the target pointee begin.
 ///
-/// \param HstPteeBase The base address of the pointee on the host
-/// \param HstPteeBegin The begin address of the pointee on the host
-/// \param TgtPteeBegin The begin address of the pointee on the target
-/// \return The adjusted target pointer base address
+/// ```
+/// TgtPteeBase = TgtPteeBegin - (HstPteeBegin - HstPteeBase)
+/// ```
 static void *calculateTargetPointeeBase(void *HstPteeBase, void *HstPteeBegin,
                                         void *TgtPteeBegin) {
   uint64_t Delta = reinterpret_cast<uint64_t>(HstPteeBegin) -
@@ -425,8 +447,8 @@ static int performPointerAttachment(DeviceTy &Device, AsyncInfoTy &AsyncInfo,
   constexpr int64_t VoidPtrSize = sizeof(void *);
   assert(HstPtrSize >= VoidPtrSize && "PointerSize is too small");
 
-  void *TgtPteeBase = calculateTargetPointeeBase(HstPteeBase, HstPteeBegin,
-                                                 TgtPteeBegin);
+  void *TgtPteeBase =
+      calculateTargetPointeeBase(HstPteeBase, HstPteeBegin, TgtPteeBegin);
 
   // Add shadow pointer tracking
   if (!PtrTPR.getEntry()->addShadowPointer(
@@ -453,157 +475,35 @@ static int performPointerAttachment(DeviceTy &Device, AsyncInfoTy &AsyncInfo,
     return OFFLOAD_SUCCESS;
   };
 
-  bool IsPtrAFortranDescriptor = HstPtrSize > VoidPtrSize;
-  if (!IsPtrAFortranDescriptor) {
-    // For "regular" pointers, we can use the VoidPtrLocation from AsyncInfo as
-    // the buffer space for the submission.
-    void *&BufferElement = AsyncInfo.getVoidPtrLocation();
-    BufferElement = TgtPteeBase;
+  // Get a buffer to be used as the source for data submission.
+  char *SrcBuffer = getOrCreateSourceBufferForSubmitData(AsyncInfo, HstPtrSize);
 
-    // Submit the updated pointer value to device
-    return HandleSubmitResult(Device.submitData(
-        TgtPtrAddr, &BufferElement, VoidPtrSize, AsyncInfo, PtrTPR.getEntry()));
-  }
-
-  // For larger "pointers" (like Fortran's descriptors), we create a dynamic
-  // buffer, which will be eventually destroyed by AsyncInfo's post-processing
-  // callback.
-  char *DataBuffer = new char[HstPtrSize];
-
-  // For such descriptors, to the first VoidPtrSize bytes, we store the
-  // pointee's device address.
-  std::memcpy(DataBuffer, &TgtPteeBase, sizeof(void *));
-
-  // And to the remaining bytes, we copy the remaining contents of the host
-  // descriptor after the initial VoidPtrSize bytes.
-  uint64_t HstDescriptorFieldsSize = HstPtrSize - VoidPtrSize;
-  void *HstDescriptorFieldsAddr =
-      reinterpret_cast<char *>(HstPtrAddr) + VoidPtrSize;
-  std::memcpy(DataBuffer + VoidPtrSize, HstDescriptorFieldsAddr,
-              HstDescriptorFieldsSize);
-
-  DP("Updating %" PRId64 " bytes of descriptor (" DPxMOD ") (pointer + %" PRId64
-     " additional bytes from host descriptor " DPxMOD ")\n",
-     HstPtrSize, DPxPTR(TgtPtrAddr), HstDescriptorFieldsSize,
-     DPxPTR(HstDescriptorFieldsAddr));
-
-  // Submit the entire buffer to device
-  int SubmitResult = Device.submitData(TgtPtrAddr, DataBuffer, HstPtrSize,
-                                       AsyncInfo, PtrTPR.getEntry());
-
-  AsyncInfo.addPostProcessingFunction([DataBuffer]() -> int {
-    delete[] DataBuffer;
-    return OFFLOAD_SUCCESS;
-  });
-  return HandleSubmitResult(SubmitResult);
-}
+  // The pointee's address should occupy the first VoidPtrSize bytes
+  // irrespective of HstPtrSize.
+  std::memcpy(SrcBuffer, &TgtPteeBase, VoidPtrSize);
 
-/// Initialize the privatized pointer \p TgtPtrAddr, using an offset-ed \p
-/// TgtPteeBegin, if present, \p HstPteeBase otherwise.
-///
-/// The private pointer initialization can be used for initializing the privatized
-/// version of the base pointer/referring-pointer on a target construct. e.g.
-///
-/// For example, for the following, a possible way to map `px[1]` is:
-/// ```cpp
-///   int x[10];
-///   int *px = &x[0];
-///   ...
-///   #pragma omp target data map(tofrom:px)
-///   {
-///     int **ppx = omp_get_mapped_ptr(&px, omp_get_default_device());
-///   
-///     // px is pre-determined firstprivate, and should get initialized using
-///     // the private-pointer-initialization here.
-///     //
-///     // Possible maps from FE for px:
-///     //
-///     // &px[0], &px[1], sizeof(px[1]), TO | FROM                // (1)
-///     // &px,    &px[1], sizeof(px),    ATTACH                   // (2)
-///     // &px,    &px[1], sizeof(px),    PRIVATE | ATTACH | PARAM // (3)
-///     #pragma omp target map(tofrom:px[1]) is_device_ptr(ppx)
-///     {
-///        ppx[0][0] = px[1] + 1;
-///     }
-///   }
-/// ```
-/// `(1)` maps the pointee `px[1].
-/// `(2)` attaches it to the mapped version of `px`. It can be controlled by the
-/// user based on the `attach(auto/always/never)` map-type modifier.
-/// `(3)` privatizes and initializes the private pointer `px`, and it becomes
-/// a kernel argument for the target construct. Can be skipped if `px` is not
-/// referenced in the target construct.
-///
-/// If \p TgtPteeBegin is non-null, the value used to initialize \p TgtPtrAddr is
-/// computed similar to performPointerAttachment. Otherwise, \p HstPteeBase is
-/// used.
-///
-/// Similar to performPointerAttachment, this function also handles
-/// initialization of the remaining fields of Fortran descriptors ( i.e.
-/// `HstPtrSize > sizeof(void*)`), by copying the remaining contents from
-/// \p HstPtrAddr, after the first `sizeof(void*)` bytes.
-///
-/// Unlike performPointerAttachment, this function doesn't need to work with
-/// shadow pointers or event tracking, because it works with newly allocated
-/// memory for the PRIVATE map-type.
-static int performPrivatePointerInitialization(DeviceTy &Device, AsyncInfoTy &AsyncInfo,
-                                               void *HstPtrAddr, void *HstPteeBase,
-                                          void *HstPteeBegin, void **TgtPtrAddr,
-                                          void *TgtPteeBegin, int64_t HstPtrSize) {
-  constexpr int64_t VoidPtrSize = sizeof(void *);
-  assert(HstPtrSize >= VoidPtrSize && "PointerSize is too small");
-
-
-  // If there is no pointee, a privatized pointer should retain its incoming
-  // host value.
-  void *TgtPteeBase = TgtPteeBegin ? calculateTargetPointeeBase(HstPteeBase,
-                                     HstPteeBegin, TgtPteeBegin) : HstPteeBase;
-
-  DP("Initializing private pointer (" DPxMOD ") -> [" DPxMOD "]\n", DPxPTR(TgtPtrAddr),
-     DPxPTR(TgtPteeBase));
-
-  // Create a buffer to hold the pointer data to be submitted to device
-  char *DataBuffer = new char[HstPtrSize];
-
-  // Store the pointee's device address in the first VoidPtrSize bytes
-  std::memcpy(DataBuffer, &TgtPteeBase, sizeof(void *));
-
-  bool IsPtrAFortranDescriptor = HstPtrSize > VoidPtrSize;
-  if (IsPtrAFortranDescriptor) {
-    // Copy the remaining descriptor fields from host for Fortran descriptors
+  // For larger "pointers" (e.g., Fortran descriptors), copy remaining
+  // descriptor fields from the host descriptor into the buffer.
+  if (HstPtrSize > VoidPtrSize) {
     uint64_t HstDescriptorFieldsSize = HstPtrSize - VoidPtrSize;
     void *HstDescriptorFieldsAddr =
         reinterpret_cast<char *>(HstPtrAddr) + VoidPtrSize;
-    std::memcpy(DataBuffer + VoidPtrSize, HstDescriptorFieldsAddr,
+    std::memcpy(SrcBuffer + VoidPtrSize, HstDescriptorFieldsAddr,
                 HstDescriptorFieldsSize);
 
-    DP("Updating private %" PRId64 " bytes of descriptor (" DPxMOD ") (pointer + %" PRId64
-       " additional bytes from host descriptor " DPxMOD ")\n",
+    DP("Updating %" PRId64 " bytes of descriptor (" DPxMOD
+       ") (pointer + %" PRId64 " additional bytes from host descriptor " DPxMOD
+       ")\n",
        HstPtrSize, DPxPTR(TgtPtrAddr), HstDescriptorFieldsSize,
        DPxPTR(HstDescriptorFieldsAddr));
-  } else {
-    DP("Updating private pointer (" DPxMOD ") with value " DPxMOD "\n",
-       DPxPTR(TgtPtrAddr), DPxPTR(TgtPteeBase));
-  }
-
-  // Submit the entire buffer to device in a single operation
-  int SubmitResult = Device.submitData(TgtPtrAddr, DataBuffer, HstPtrSize,
-                                       AsyncInfo, nullptr);
-
-  // Clean up the buffer
-  AsyncInfo.addPostProcessingFunction([DataBuffer]() -> int {
-    delete[] DataBuffer;
-    return OFFLOAD_SUCCESS;
-  });
-
-  if (SubmitResult != OFFLOAD_SUCCESS) {
-    REPORT("Failed to update private %s on device.\n",
-           IsPtrAFortranDescriptor ? "descriptor" : "pointer");
-    return OFFLOAD_FAIL;
   }
 
-  return OFFLOAD_SUCCESS;
+  // Submit the populated source buffer to device.
+  int SubmitResult = Device.submitData(TgtPtrAddr, SrcBuffer, HstPtrSize,
+                                       AsyncInfo, PtrTPR.getEntry());
+  return HandleSubmitResult(SubmitResult);
 }
+
 /// Internal function to do the mapping and transfer the data to the device
 int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
                     void **ArgsBase, void **Args, int64_t *ArgSizes,
@@ -649,10 +549,17 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
     // ATTACH map-types are supposed to be handled after all mapping for the
     // construct is done. Defer their processing.
     if (ArgTypes[I] & OMP_TGT_MAPTYPE_ATTACH) {
-      AttachInfo->AttachEntries.emplace_back(
-          /*PointerBase=*/HstPtrBase, /*PointeeBegin=*/HstPtrBegin,
-          /*PointerSize=*/DataSize, /*MapType=*/ArgTypes[I],
-          /*PointeeName=*/HstPtrName);
+      const bool IsCorrespondingPointerInit =
+          (ArgTypes[I] & OMP_TGT_MAPTYPE_PRIVATE);
+      // We don't need to keep track of PRIVATE | ATTACH entries. They
+      // represent corresponding-pointer-initialization, and are handled
+      // similar to firstprivate (PRIVATE | TO) entries by
+      // PrivateArgumentManager.
+      if (!IsCorrespondingPointerInit)
+        AttachInfo->AttachEntries.emplace_back(
+            /*PointerBase=*/HstPtrBase, /*PointeeBegin=*/HstPtrBegin,
+            /*PointerSize=*/DataSize, /*MapType=*/ArgTypes[I],
+            /*PointeeName=*/HstPtrName);
 
       DP("Deferring ATTACH map-type processing for argument %d\n", I);
       continue;
@@ -863,7 +770,7 @@ int processAttachEntries(DeviceTy &Device, AttachInfoTy &AttachInfo,
      AttachInfo.AttachEntries.size());
 
   int Ret = OFFLOAD_SUCCESS;
-  bool IsFirstNonPrivateAttachment = true;
+  bool IsFirstPointerAttachment = true;
   for (size_t EntryIdx = 0; EntryIdx < AttachInfo.AttachEntries.size();
        ++EntryIdx) {
     const auto &AttachEntry = AttachInfo.AttachEntries[EntryIdx];
@@ -897,6 +804,15 @@ int processAttachEntries(DeviceTy &Device, AttachInfoTy &AttachInfo,
       return IsNewlyAllocated;
     };
 
+    // Only process ATTACH if either the pointee or the pointer was newly
+    // allocated, or the ALWAYS flag is set.
+    if (!IsAttachAlways && !WasNewlyAllocated(HstPteeBegin, "pointee") &&
+        !WasNewlyAllocated(HstPtr, "pointer")) {
+      DP("Skipping ATTACH entry %zu: neither pointer nor pointee was newly "
+         "allocated and no ALWAYS flag\n",
+         EntryIdx);
+      continue;
+    }
 
     // Lambda to perform target pointer lookup and validation
     auto LookupTargetPointer =
@@ -934,47 +850,22 @@ int processAttachEntries(DeviceTy &Device, AttachInfoTy &AttachInfo,
         return PteeTPROpt->TargetPointer;
       return nullptr;
     }();
-    bool PointeeLookupSucceeded = TgtPteeBegin != nullptr;
-
-    // For non-private attachments, we need the pointee to be mapped
-    // For private attachments, we can proceed even if pointee lookup fails
-    auto PrivateAttachIt = AttachInfo.PrivateAttachAllocations.find(HstPtr);
-    bool IsPrivateAttach = (PrivateAttachIt != AttachInfo.PrivateAttachAllocations.end());
 
-    if (!PointeeLookupSucceeded && !IsPrivateAttach)
+    if (!TgtPteeBegin)
       continue;
 
     // Get device version of the pointer (e.g., &p) next. We need to keep its
     // TPR for use in shadow-pointer handling during pointer-attachment.
+    auto PtrTPROpt = LookupTargetPointer(HstPtr, PtrSize, "pointer");
+    if (!PtrTPROpt)
+      continue;
+    TargetPointerResultTy &PtrTPR = *PtrTPROpt;
+    void **TgtPtrBase = reinterpret_cast<void **>(PtrTPR.TargetPointer);
 
-    // Check if this is a PRIVATE | ATTACH allocation
-    void **TgtPtrBasePrivate = nullptr;
-    void **TgtPtrBaseMapped = nullptr;
-    std::optional<TargetPointerResultTy> PtrTPROpt;
-
-    if (IsPrivateAttach) {
-      // This is a PRIVATE | ATTACH allocation - use the allocated target pointer
-      TgtPtrBasePrivate = reinterpret_cast<void **>(PrivateAttachIt->second);
-      DP("Using PRIVATE | ATTACH allocation for pointer " DPxMOD " at target " DPxMOD "\n",
-         DPxPTR(HstPtr), DPxPTR(TgtPtrBasePrivate));
-    }
-
-    // Look up the target pointer mapping
-    if (!IsPrivateAttach) {
-      // For regular ATTACH entries, we need to find the mapping
-      PtrTPROpt = LookupTargetPointer(HstPtr, PtrSize, "pointer");
-      if (!PtrTPROpt.has_value())
-        continue;  // Regular ATTACH entries need successful lookup
-      TgtPtrBaseMapped = reinterpret_cast<void **>(PtrTPROpt->TargetPointer);
-    }
-    // For PRIVATE | ATTACH entries, we don't need the mapping lookup
-    // since we already have TgtPtrBasePrivate from the private allocation
-
-    // Insert a data-fence before the first regular pointer-attachment.
-    // Private attachments don't need data fences since they don't transfer data.
-    if (!IsPrivateAttach && PtrTPROpt.has_value() && IsFirstNonPrivateAttachment) {
-      IsFirstNonPrivateAttachment = false;
-      DP("Inserting a data fence before the first regular pointer attachment.\n");
+    // Insert a data-fence before the first pointer-attachment.
+    if (IsFirstPointerAttachment) {
+      IsFirstPointerAttachment = false;
+      DP("Inserting a data fence before the first pointer attachment.\n");
       Ret = Device.dataFence(AsyncInfo);
       if (Ret != OFFLOAD_SUCCESS) {
         REPORT("Failed to insert data fence.\n");
@@ -984,35 +875,11 @@ int processAttachEntries(DeviceTy &Device, AttachInfoTy &AttachInfo,
 
     // Do the pointer-attachment, i.e. update the device pointer to point to
     // device pointee.
-
-    if (IsPrivateAttach) {
-      // For PRIVATE | ATTACH entries: only do private initialization
-      DP("Performing PRIVATE | ATTACH initialization\n");
-      Ret = performPrivatePointerInitialization(Device, AsyncInfo, HstPtr, HstPteeBase,
-                                               HstPteeBegin, TgtPtrBasePrivate,
-                                               TgtPteeBegin, PtrSize);
-      if (Ret != OFFLOAD_SUCCESS) {
-        REPORT("Failed to update PRIVATE | ATTACH pointer on device.\n");
-        return OFFLOAD_FAIL;
-      }
-    } else if (PtrTPROpt.has_value()) {
-      // For regular ATTACH entries: only do regular attachment
-      // Only process mapped ATTACH if either the pointee or the pointer was newly
-      // allocated, or the ALWAYS flag is set.
-      if (!IsAttachAlways && !WasNewlyAllocated(HstPteeBegin, "pointee") &&
-          !WasNewlyAllocated(HstPtr, "pointer")) {
-        DP("Skipping mapped ATTACH entry %zu: neither pointer nor pointee was newly "
-           "allocated and no ALWAYS flag\n",
-           EntryIdx);
-      } else {
-        DP("Performing regular ATTACH for pointer " DPxMOD "\n", DPxPTR(HstPtr));
-        Ret = performPointerAttachment(Device, AsyncInfo, HstPtr, HstPteeBase,
-                                       HstPteeBegin, TgtPtrBaseMapped, TgtPteeBegin,
-                                       PtrSize, *PtrTPROpt);
-        if (Ret != OFFLOAD_SUCCESS)
-          return OFFLOAD_FAIL;
-      }
-    }
+    Ret = performPointerAttachment(Device, AsyncInfo, HstPtr, HstPteeBase,
+                                   HstPteeBegin, TgtPtrBase, TgtPteeBegin,
+                                   PtrSize, PtrTPR);
+    if (Ret != OFFLOAD_SUCCESS)
+      return OFFLOAD_FAIL;
 
     DP("ATTACH entry %zu processed successfully\n", EntryIdx);
   }
@@ -1561,13 +1428,24 @@ class PrivateArgumentManagerTy {
     uint32_t Padding;
     /// Host pointer name
     map_var_info_t HstPtrName = nullptr;
+    /// For corresponding-pointer-initialization: host pointee base address.
+    void *HstPteeBase = nullptr;
+    /// For corresponding-pointer-initialization: host pointee begin address.
+    void *HstPteeBegin = nullptr;
+    /// Whether this argument needs corresponding-pointer-initialization.
+    bool IsCorrespondingPointerInit = false;
 
     FirstPrivateArgInfoTy(int Index, void *HstPtr, uint32_t Size,
                           uint32_t Alignment, uint32_t Padding,
-                          map_var_info_t HstPtrName = nullptr)
+                          map_var_info_t HstPtrName = nullptr,
+                          void *HstPteeBase = nullptr,
+                          void *HstPteeBegin = nullptr,
+                          bool IsCorrespondingPointerInit = false)
         : HstPtrBegin(reinterpret_cast<char *>(HstPtr)),
           HstPtrEnd(HstPtrBegin + Size), Index(Index), Alignment(Alignment),
-          Size(Size), Padding(Padding), HstPtrName(HstPtrName) {}
+          Size(Size), Padding(Padding), HstPtrName(HstPtrName),
+          HstPteeBase(HstPteeBase), HstPteeBegin(HstPteeBegin),
+          IsCorrespondingPointerInit(IsCorrespondingPointerInit) {}
   };
 
   /// A vector of target pointers for all private arguments
@@ -1585,6 +1463,153 @@ class PrivateArgumentManagerTy {
   /// A pointer to a \p AsyncInfoTy object
   AsyncInfoTy &AsyncInfo;
 
+  /// \returns the value of the target pointee's base to be used for
+  /// corresponding-pointer-initialization.
+  void *getTargetPointeeBaseForCorrespondingPointerInitialization(
+      void *HstPteeBase, void *HstPteeBegin) {
+    // See if the pointee's begin address has corresponding storage on device.
+    void *TgtPteeBegin = [&]() -> void * {
+      if (!HstPteeBegin) {
+        DP("Corresponding-pointer-initialization: pointee begin address is "
+           "null\n");
+        return nullptr;
+      }
+
+      return Device.getMappingInfo()
+          .getTgtPtrBegin(HstPteeBegin, /*Size=*/0, /*UpdateRefCount=*/false,
+                          /*UseHoldRefCount=*/false)
+          .TargetPointer;
+    }();
+
+    // If it does, we calculate target pointee base using it, and return it.
+    // Otherwise, we retain the host pointee's base as the target pointee base
+    // of the initialized pointer. It's the user's responsibility to ensure
+    // that if a lookup fails, the host pointee is accessible on the device.
+    return TgtPteeBegin ? calculateTargetPointeeBase(HstPteeBase, HstPteeBegin,
+                                                     TgtPteeBegin)
+                        : HstPteeBase;
+  }
+
+  /// initialized the source buffer for corresponding-pointer-initialization.
+  ///
+  /// It computes and stores the target pointee base address (or the host
+  /// pointee's base address, if lookup of target pointee fails) to the first
+  /// `sizeof(void*)` bytes of \p Buffer, and for larger pointers
+  /// (Fortran descriptors), the remaining fields of the host descriptor
+  /// \p HstPtr after those `sizeof(void*)` bytes.
+  ///
+  /// Corresponding-pointer-initialization represents the initialization of the
+  /// private version of a base-pointer/referring-pointer on a target construct.
+  ///
+  /// For example, for the following test:
+  /// ```cpp
+  ///   int x[10];
+  ///   int *px = &x[0];
+  ///   ...
+  ///   #pragma omp target data map(tofrom:px)
+  ///   {
+  ///     int **ppx = omp_get_mapped_ptr(&px, omp_get_default_device());
+  ///     #pragma omp target map(tofrom:px[1]) is_device_ptr(ppx)
+  ///     {
+  ///        foo(px, ppx);
+  ///     }
+  ///   }
+  /// ```
+  /// The following shows a possible way to implement the mapping of `px`,
+  /// which is pre-determined firstprivate and should get initialized
+  /// via corresponding-pointer-initialization:
+  ///
+  /// (A) Possible way to implement the above with PRIVATE | ATTACH:
+  /// ```llvm
+  ///  ; maps for px:
+  ///  ; &px[0], &px[1], sizeof(px[1]), TO | FROM                // (1)
+  ///  ; &px,    &px[1], sizeof(px),    ATTACH                   // (2)
+  ///  ; &px,    &px[1], sizeof(px),    PRIVATE | ATTACH | PARAM // (3)
+  ///  call... @__omp_outlined...(ptr %px, ptr %ppx)
+  ///  define ... @__omp_outlined(ptr %px, ptr %ppx) {...
+  ///    foo(%px, %ppx)
+  ///  ...}
+  /// ```
+  /// `(1)` maps the pointee `px[1].
+  /// `(2)` attaches it to the mapped version of `px`. It can be controlled by the
+  /// user based on the `attach(auto/always/never)` map-type modifier.
+  /// `(3)` privatizes and initializes the private pointer `px`, and passes it
+  /// into the kernel as the argument `%px`. Can be skipped if `px` is not
+  /// referenced in the target construct.
+  ///
+  /// While this method is not too beneficial compared to just doing the
+  /// initialization in the body of the kernel, like:
+  /// (B) Possible way to implement the above without PRIVATE | ATTACH:
+  /// ```llvm
+  ///  ; maps for px:
+  ///  ; &px[0], &px[1], sizeof(px[1]), TO | FROM | PARAM        // (4)
+  ///  ; &px,    &px[1], sizeof(px),    ATTACH                   // (5)
+  ///  call... @__omp_outlined...(ptr %px0, ptr %ppx)
+  ///  define ... __omp_outlined...(ptr %px0, ptr %ppx) {
+  ///    %px = alloca ptr;
+  ///    store ptr %px0, ptr %px
+  ///    foo(%px, %ppx)
+  ///  }
+  /// ```
+  ///
+  /// (B) is not so convenient for Fortran descriptors, because in
+  /// addition to the lookup, the remaining fields of the descriptor have
+  /// to be passed into the kernel to initialize the private copy, which
+  /// makes (A) a cleaner option for them. e.g.
+  /// ```f90
+  /// integer, pointer :: p(:)
+  /// !$omp target map(p(1))
+  /// ```
+  ///
+  /// (C) Possible mapping for the above Fortran test using PRIVATE | ATTACH:
+  /// ```llvm
+  ///  ; maps for p:
+  ///  ; &p(1),       &p(1), sizeof(p(1)),       TO | FROM
+  ///  ; &ref_ptr(p), &p(1), sizeof(ref_ptr(p)), ATTACH
+  ///  ; &ref_ptr(p), &p(1), sizeof(ref_ptr(p)), PRIVATE | ATTACH | PARAM
+  ///  call... @__omp_outlined...(ptr %ref_ptr_of_p)
+  void initBufferForCorrespondingPointerInitialization(char *Buffer,
+                                                       void *HstPtr,
+                                                       int64_t HstPtrSize,
+                                                       void *HstPteeBase,
+                                                       void *HstPteeBegin) {
+    constexpr int64_t VoidPtrSize = sizeof(void *);
+    assert(HstPtrSize >= VoidPtrSize &&
+           "corresponding-pointer-initialization: pointer size is too small");
+
+    void *TgtPteeBase =
+        getTargetPointeeBaseForCorrespondingPointerInitialization(HstPteeBase,
+                                                                  HstPteeBegin);
+
+    // Store the target pointee base address to the first VoidPtrSize bytes
+    DP("Initializing corresponding-pointer-initialization source buffer "
+       "for " DPxMOD ", with pointee base " DPxMOD "\n",
+       DPxPTR(HstPtr), DPxPTR(TgtPteeBase));
+    std::memcpy(Buffer, &TgtPteeBase, VoidPtrSize);
+    if (HstPtrSize <= VoidPtrSize)
+      return;
+
+    // For Fortran descriptors, copy the remaining descriptor fields from host
+    uint64_t HstDescriptorFieldsSize = HstPtrSize - VoidPtrSize;
+    void *HstDescriptorFieldsAddr = static_cast<char *>(HstPtr) + VoidPtrSize;
+    DP("Copying %" PRId64
+       " bytes of descriptor fields into corresponding-pointer-initialization "
+       "buffer at offset %" PRId64 ", from " DPxMOD "\n",
+       HstDescriptorFieldsSize, VoidPtrSize, DPxPTR(HstDescriptorFieldsAddr));
+    std::memcpy(Buffer + VoidPtrSize, HstDescriptorFieldsAddr,
+                HstDescriptorFieldsSize);
+  }
+
+  /// Helper function to create and initialize a buffer to be used as the source
+  /// for corresponding-pointer-initialization.
+  void *createAndInitSourceBufferForCorrespondingPointerInitialization(
+      void *HstPtr, int64_t HstPtrSize, void *HstPteeBase, void *HstPteeBegin) {
+    char *Buffer = getOrCreateSourceBufferForSubmitData(AsyncInfo, HstPtrSize);
+    initBufferForCorrespondingPointerInitialization(Buffer, HstPtr, HstPtrSize,
+                                                    HstPteeBase, HstPteeBegin);
+    return Buffer;
+  }
+
   // TODO: What would be the best value here? Should we make it configurable?
   // If the size is larger than this threshold, we will allocate and transfer it
   // immediately instead of packing it.
@@ -1599,7 +1624,9 @@ class PrivateArgumentManagerTy {
   int addArg(void *HstPtr, int64_t ArgSize, int64_t ArgOffset,
              bool IsFirstPrivate, void *&TgtPtr, int TgtArgsIndex,
              map_var_info_t HstPtrName = nullptr,
-             const bool AllocImmediately = false) {
+             const bool AllocImmediately = false, void *HstPteeBase = nullptr,
+             void *HstPteeBegin = nullptr,
+             bool IsCorrespondingPointerInit = false) {
     // If the argument is not first-private, or its size is greater than a
     // predefined threshold, we will allocate memory and issue the transfer
     // immediately.
@@ -1622,9 +1649,19 @@ class PrivateArgumentManagerTy {
       // If first-private, copy data from host
       if (IsFirstPrivate) {
         DP("Submitting firstprivate data to the device.\n");
-        int Ret = Device.submitData(TgtPtr, HstPtr, ArgSize, AsyncInfo);
+
+        // The source value used for corresponding-pointer-initialization
+        // is different vs regular firstprivates.
+        void *DataSource =
+            IsCorrespondingPointerInit
+                ? createAndInitSourceBufferForCorrespondingPointerInitialization(
+                      HstPtr, ArgSize, HstPteeBase, HstPteeBegin)
+                : HstPtr;
+        int Ret = Device.submitData(TgtPtr, DataSource, ArgSize, AsyncInfo);
         if (Ret != OFFLOAD_SUCCESS) {
-          DP("Copying data to device failed, failed.\n");
+          DP("Copying %s data to device failed.\n",
+             IsCorrespondingPointerInit ? "corresponding-pointer-initialization"
+                                        : "firstprivate");
           return OFFLOAD_FAIL;
         }
       }
@@ -1670,8 +1707,10 @@ class PrivateArgumentManagerTy {
         }
       }
 
-      FirstPrivateArgInfo.emplace_back(TgtArgsIndex, HstPtr, ArgSize,
-                                       StartAlignment, Padding, HstPtrName);
+      FirstPrivateArgInfo.emplace_back(
+          TgtArgsIndex, HstPtr, ArgSize, StartAlignment, Padding, HstPtrName,
+          HstPteeBase, HstPteeBegin, IsCorrespondingPointerInit);
+
       FirstPrivateArgSize += Padding + ArgSize;
     }
 
@@ -1690,7 +1729,13 @@ class PrivateArgumentManagerTy {
       for (FirstPrivateArgInfoTy &Info : FirstPrivateArgInfo) {
         // First pad the pointer as we (have to) pad it on the device too.
         Itr = std::next(Itr, Info.Padding);
-        std::copy(Info.HstPtrBegin, Info.HstPtrEnd, Itr);
+
+        if (Info.IsCorrespondingPointerInit)
+          initBufferForCorrespondingPointerInitialization(
+              &*Itr, Info.HstPtrBegin, Info.Size, Info.HstPteeBase,
+              Info.HstPteeBegin);
+        else
+          std::copy(Info.HstPtrBegin, Info.HstPtrEnd, Itr);
         Itr = std::next(Itr, Info.Size);
       }
       // Allocate target memory
@@ -1846,40 +1891,56 @@ static int processDataBefore(ident_t *Loc, int64_t DeviceId, void *HostPtr,
       TgtPtrBegin = HstPtrBase;
       TgtBaseOffset = 0;
     } else if (ArgTypes[I] & OMP_TGT_MAPTYPE_PRIVATE) {
-      TgtBaseOffset = (intptr_t)HstPtrBase - (intptr_t)HstPtrBegin;
-      const bool IsFirstPrivate = (ArgTypes[I] & OMP_TGT_MAPTYPE_TO);
+      // For cases like:
+      // ```
+      // int *p = ...;
+      // #pragma omp target map(p[0:10])
+      // ```
+      // `p` is predetermined firstprivate on the target construct, and the
+      // method to determine the initial value of the private copy on the
+      // device is called "corresponding-pointer-initialization".
+      //
+      // Such firstprivate pointers that need
+      // corresponding-pointer-initialization are represented using the
+      // `PRIVATE | ATTACH` map-types, in contrast to regular firstprivate
+      // entries, which use `PRIVATE | TO`. The structure of these
+      // `PRIVATE | ATTACH` entries is the same as the non-private
+      // `ATTACH` entries used to represent pointer-attachments, i.e.:
+      // ```
+      //  &hst_ptr_base/begin, &hst_ptee_begin, sizeof(hst_ptr)
+      // ```
       const bool IsAttach = (ArgTypes[I] & OMP_TGT_MAPTYPE_ATTACH);
-
-      // Handle PRIVATE | ATTACH specially - allocate but defer initialization
+      void *HstPteeBase = nullptr;
+      void *HstPteeBegin = nullptr;
       if (IsAttach) {
-        DP("Processing PRIVATE | ATTACH map for argument %d\n", I);
-        // Allocate memory for the private variable
-        TgtPtrBegin = DeviceOrErr->allocData(ArgSizes[I], HstPtrBegin);
-        if (!TgtPtrBegin) {
-          DP("Data allocation for private attach array " DPxMOD " failed.\n",
-             DPxPTR(HstPtrBegin));
-          return OFFLOAD_FAIL;
-        }
-        // Track this allocation for later initialization during ATTACH processing
-        AttachInfo.PrivateAttachAllocations[HstPtrBegin] = TgtPtrBegin;
-        DP("Allocated %" PRId64 " bytes of target memory at " DPxMOD
-           " for private attach array " DPxMOD "\n",
-           ArgSizes[I], DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin));
-      } else {
-        // If there is a next argument and it depends on the current one, we need
-        // to allocate the private memory immediately. If this is not the case,
-        // then the argument can be marked for optimization and packed with the
-        // other privates.
-        const bool AllocImmediately =
-            (I < ArgNum - 1 && (ArgTypes[I + 1] & OMP_TGT_MAPTYPE_MEMBER_OF));
-        Ret = PrivateArgumentManager.addArg(
-            HstPtrBegin, ArgSizes[I], TgtBaseOffset, IsFirstPrivate, TgtPtrBegin,
-            TgtArgs.size(), HstPtrName, AllocImmediately);
-        if (Ret != OFFLOAD_SUCCESS) {
-          REPORT("Failed to process %sprivate argument " DPxMOD "\n",
-                 (IsFirstPrivate ? "first-" : ""), DPxPTR(HstPtrBegin));
-          return OFFLOAD_FAIL;
-        }
+        // For corresponding-pointer-initialization, Args[I] is HstPteeBegin,
+        // ArgBases[I] is HstPtrBase
+        HstPteeBase = *reinterpret_cast<void **>(HstPtrBase);
+        HstPteeBegin = Args[I];
+        HstPtrBegin =
+            ArgBases[I]; // Allocate memory for the pointer variable itself
+      }
+      TgtBaseOffset = (intptr_t)HstPtrBase - (intptr_t)HstPtrBegin;
+      // Corresponding-pointer-initialization is a special case of firstprivate,
+      // since it also involves initializing the private pointer.
+      const bool IsFirstPrivate =
+          (ArgTypes[I] & OMP_TGT_MAPTYPE_TO) || IsAttach;
+
+      // If there is a next argument and it depends on the current one, we need
+      // to allocate the private memory immediately. If this is not the case,
+      // then the argument can be marked for optimization and packed with the
+      // other privates.
+      const bool AllocImmediately =
+          (I < ArgNum - 1 && (ArgTypes[I + 1] & OMP_TGT_MAPTYPE_MEMBER_OF));
+      Ret = PrivateArgumentManager.addArg(
+          HstPtrBegin, ArgSizes[I], TgtBaseOffset, IsFirstPrivate, TgtPtrBegin,
+          TgtArgs.size(), HstPtrName, AllocImmediately, HstPteeBase,
+          HstPteeBegin, IsAttach);
+      if (Ret != OFFLOAD_SUCCESS) {
+        REPORT("Failed to process %s%sprivate argument " DPxMOD "\n",
+               IsAttach ? "corresponding-pointer-initialization " : "",
+               (IsFirstPrivate ? "first-" : ""), DPxPTR(HstPtrBegin));
+        return OFFLOAD_FAIL;
       }
     } else {
       if (ArgTypes[I] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)

>From 1ad9574b4ad30adc69cbd761af53462596564aad Mon Sep 17 00:00:00 2001
From: Abhinav Gaba <abhinav.gaba at intel.com>
Date: Thu, 25 Sep 2025 12:05:12 -0700
Subject: [PATCH 3/5] Minor comment change

---
 offload/libomptarget/omptarget.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/offload/libomptarget/omptarget.cpp b/offload/libomptarget/omptarget.cpp
index 737d3497ca169..b3014af1da3b4 100644
--- a/offload/libomptarget/omptarget.cpp
+++ b/offload/libomptarget/omptarget.cpp
@@ -1917,8 +1917,7 @@ static int processDataBefore(ident_t *Loc, int64_t DeviceId, void *HostPtr,
         // ArgBases[I] is HstPtrBase
         HstPteeBase = *reinterpret_cast<void **>(HstPtrBase);
         HstPteeBegin = Args[I];
-        HstPtrBegin =
-            ArgBases[I]; // Allocate memory for the pointer variable itself
+        HstPtrBegin = ArgBases[I];
       }
       TgtBaseOffset = (intptr_t)HstPtrBase - (intptr_t)HstPtrBegin;
       // Corresponding-pointer-initialization is a special case of firstprivate,

>From c589133004549738b1641ffd466575b6a3717f36 Mon Sep 17 00:00:00 2001
From: Abhinav Gaba <abhinav.gaba at intel.com>
Date: Thu, 25 Sep 2025 12:06:57 -0700
Subject: [PATCH 4/5] Another comment change.

---
 offload/libomptarget/omptarget.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/offload/libomptarget/omptarget.cpp b/offload/libomptarget/omptarget.cpp
index b3014af1da3b4..730c8af965a61 100644
--- a/offload/libomptarget/omptarget.cpp
+++ b/offload/libomptarget/omptarget.cpp
@@ -1914,7 +1914,7 @@ static int processDataBefore(ident_t *Loc, int64_t DeviceId, void *HostPtr,
       void *HstPteeBegin = nullptr;
       if (IsAttach) {
         // For corresponding-pointer-initialization, Args[I] is HstPteeBegin,
-        // ArgBases[I] is HstPtrBase
+        // and ArgBases[I] is both HstPtrBase/HstPtrBegin.
         HstPteeBase = *reinterpret_cast<void **>(HstPtrBase);
         HstPteeBegin = Args[I];
         HstPtrBegin = ArgBases[I];

>From d2dd8b84f67bbe2114f56de21b67980c447f8dea Mon Sep 17 00:00:00 2001
From: Abhinav Gaba <abhinav.gaba at intel.com>
Date: Thu, 25 Sep 2025 12:10:25 -0700
Subject: [PATCH 5/5] Yet another comment change, from clang-format this time.

---
 offload/libomptarget/omptarget.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/offload/libomptarget/omptarget.cpp b/offload/libomptarget/omptarget.cpp
index 730c8af965a61..254a3101791d2 100644
--- a/offload/libomptarget/omptarget.cpp
+++ b/offload/libomptarget/omptarget.cpp
@@ -1531,8 +1531,8 @@ class PrivateArgumentManagerTy {
   ///  ...}
   /// ```
   /// `(1)` maps the pointee `px[1].
-  /// `(2)` attaches it to the mapped version of `px`. It can be controlled by the
-  /// user based on the `attach(auto/always/never)` map-type modifier.
+  /// `(2)` attaches it to the mapped version of `px`. It can be controlled by
+  /// the user based on the `attach(auto/always/never)` map-type modifier.
   /// `(3)` privatizes and initializes the private pointer `px`, and passes it
   /// into the kernel as the argument `%px`. Can be skipped if `px` is not
   /// referenced in the target construct.



More information about the llvm-commits mailing list