[clang] [llvm] [OpenMPIRBuilder][Clang][NFC] - Combine `emitOffloadingArrays` and `emitOffloadingArraysArgument` in OpenMPIRBuilder (PR #97088)

Pranav Bhandarkar via cfe-commits cfe-commits at lists.llvm.org
Tue Jul 23 14:19:28 PDT 2024


https://github.com/bhandarkar-pranav updated https://github.com/llvm/llvm-project/pull/97088

>From ad6ef960b14c23bde1460a0977b6401dc21dfea4 Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Sat, 15 Jun 2024 02:00:48 -0500
Subject: [PATCH 01/14] checkpoint commit. Use emitOffloadinArrays from
 OMPIRBuilder in emitTargetCallKernelLaunch in CGOpenMPRuntime.cpp

---
 clang/lib/CodeGen/CGOpenMPRuntime.cpp         | 241 +++++++++++++++++-
 clang/lib/CodeGen/CGStmtOpenMP.cpp            |   1 +
 .../llvm/Frontend/OpenMP/OMPIRBuilder.h       |  22 +-
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp     |  10 +
 4 files changed, 265 insertions(+), 9 deletions(-)

diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index a6a87ec88ee8a..4c95aab3c33c1 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -40,6 +40,7 @@
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Value.h"
 #include "llvm/Support/AtomicOrdering.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cassert>
@@ -47,6 +48,8 @@
 #include <numeric>
 #include <optional>
 
+#define DEBUG_TYPE "clang-openmp-codegen"
+
 using namespace clang;
 using namespace CodeGen;
 using namespace llvm::omp;
@@ -8868,9 +8871,11 @@ emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
   }
 
   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
-  return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName,
+  auto *Str =  OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName,
                                          PLoc.getLine(), PLoc.getColumn(),
                                          SrcLocStrSize);
+  LLVM_DEBUG(llvm::dbgs() << "Output of emitMappingInfo: " << *Str << "\n");
+  return Str;
 }
 
 /// Emit the arrays used to pass the captures and map information to the
@@ -9484,8 +9489,96 @@ llvm::Value *emitDynCGGroupMem(const OMPExecutableDirective &D,
   }
   return DynCGroupMem;
 }
+static void genMapInfo(const OMPExecutableDirective &D, CodeGenFunction &CGF,
+                       const CapturedStmt &CS, llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
+                       llvm::OpenMPIRBuilder &OMPBuilder,
+                       MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
+  // Get mappable expression information.
+  MappableExprsHandler MEHandler(D, CGF);
+  llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
+  llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
+  CodeGenModule &CGM = CGF.CGM;
+  auto RI = CS.getCapturedRecordDecl()->field_begin();
+  auto *CV = CapturedVars.begin();
+  for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
+                                            CE = CS.capture_end();
+       CI != CE; ++CI, ++RI, ++CV) {
+    MappableExprsHandler::MapCombinedInfoTy CurInfo;
+    MappableExprsHandler::StructRangeInfoTy PartialStruct;
 
-static void emitTargetCallKernelLaunch(
+    // VLA sizes are passed to the outlined region by copy and do not have map
+    // information associated.
+    if (CI->capturesVariableArrayType()) {
+      CurInfo.Exprs.push_back(nullptr);
+      CurInfo.BasePointers.push_back(*CV);
+      CurInfo.DevicePtrDecls.push_back(nullptr);
+      CurInfo.DevicePointers.push_back(
+          MappableExprsHandler::DeviceInfoTy::None);
+      CurInfo.Pointers.push_back(*CV);
+      CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
+          CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
+      // Copy to the device as an argument. No need to retrieve it.
+      CurInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
+                              OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM |
+                              OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
+      CurInfo.Mappers.push_back(nullptr);
+    } else {
+      // If we have any information in the map clause, we use it, otherwise we
+      // just do a default mapping.
+      MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
+      if (!CI->capturesThis())
+        MappedVarSet.insert(CI->getCapturedVar());
+      else
+        MappedVarSet.insert(nullptr);
+      if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
+        MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
+      // Generate correct mapping for variables captured by reference in
+      // lambdas.
+      if (CI->capturesVariable())
+        MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
+                                                CurInfo, LambdaPointers);
+    }
+    // We expect to have at least an element of information for this capture.
+    assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
+           "Non-existing map pointer for capture!");
+    assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
+           CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
+           CurInfo.BasePointers.size() == CurInfo.Types.size() &&
+           CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
+           "Inconsistent map information sizes!");
+
+    // If there is an entry in PartialStruct it means we have a struct with
+    // individual members mapped. Emit an extra combined entry.
+    if (PartialStruct.Base.isValid()) {
+      CombinedInfo.append(PartialStruct.PreliminaryMapData);
+      MEHandler.emitCombinedEntry(
+          CombinedInfo, CurInfo.Types, PartialStruct, CI->capturesThis(),
+          OMPBuilder, nullptr,
+          !PartialStruct.PreliminaryMapData.BasePointers.empty());
+    }
+
+    // We need to append the results of this capture to what we already have.
+    CombinedInfo.append(CurInfo);
+  }
+  // Adjust MEMBER_OF flags for the lambdas captures.
+  MEHandler.adjustMemberOfForLambdaCaptures(
+      OMPBuilder, LambdaPointers, CombinedInfo.BasePointers,
+      CombinedInfo.Pointers, CombinedInfo.Types);
+  // Map any list items in a map clause that were not captures because they
+  // weren't referenced within the construct.
+  MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, MappedVarSet);
+
+  auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
+    return emitMappingInformation(CGF, OMPBuilder, MapExpr);
+  };
+  if (CGM.getCodeGenOpts().getDebugInfo() !=
+      llvm::codegenoptions::NoDebugInfo) {
+    CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
+    llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
+                    FillInfoMap);
+  }
+}
+static void emitTargetCallKernelLaunchNew(
     CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
     const OMPExecutableDirective &D,
     llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask,
@@ -9501,8 +9594,139 @@ static void emitTargetCallKernelLaunch(
 
   // Fill up the arrays with all the captured variables.
   MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
+  CGOpenMPRuntime::TargetDataInfo Info;
 
-  // Get mappable expression information.
+  auto GenMapInfoCB = [&](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP)
+                          -> llvm::OpenMPIRBuilder::MapInfosTy & {
+    CGF.Builder.restoreIP(CodeGenIP);
+    genMapInfo(D, CGF, CS, CapturedVars, OMPBuilder, CombinedInfo);
+    return CombinedInfo;
+  };
+  auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
+    if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
+      Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
+    }
+  };
+
+  auto CustomMapperCB = [&](unsigned int I) {
+    llvm::Value *MFunc = nullptr;
+    if (CombinedInfo.Mappers[I]) {
+      Info.HasMapper = true;
+      MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
+          cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
+    }
+    return MFunc;
+  };
+  // Fill up the arrays and create the arguments.
+  LLVM_DEBUG(llvm::dbgs() << "emitTargetCallKernelLaunchNew:InsertBlock before emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n");
+  OMPBuilder.emitOffloadingArrays(llvm::OpenMPIRBuilder::InsertPointTy(CGF.AllocaInsertPt->getParent(),
+                                                                       CGF.AllocaInsertPt->getIterator()),
+                                  CGF.Builder.saveIP(), Info,
+                                  GenMapInfoCB, /*IsNonContiguous=*/true,
+                                  DeviceAddrCB, CustomMapperCB);
+  bool EmitDebug = !CombinedInfo.Names.empty();
+  OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info,
+                                          EmitDebug,
+                                          /*ForEndCall=*/false);
+
+  LLVM_DEBUG(llvm::dbgs() << "emitTargetCallKernelLaunchNew:InsertBlock after emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n");
+  InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
+  InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
+                                        CGF.VoidPtrTy, CGM.getPointerAlign());
+  InputInfo.PointersArray =
+      Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
+  InputInfo.SizesArray =
+      Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
+  InputInfo.MappersArray =
+      Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
+  MapTypesArray = Info.RTArgs.MapTypesArray;
+  MapNamesArray = Info.RTArgs.MapNamesArray;
+
+  auto &&ThenGen = [&OMPRuntime, OutlinedFn, &D, &CapturedVars,
+                    RequiresOuterTask, &CS, OffloadingMandatory, Device,
+                    OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray,
+                    SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
+    bool IsReverseOffloading = Device.getInt() == OMPC_DEVICE_ancestor;
+
+    if (IsReverseOffloading) {
+      // Reverse offloading is not supported, so just execute on the host.
+      // FIXME: This fallback solution is incorrect since it ignores the
+      // OMP_TARGET_OFFLOAD environment variable. Instead it would be better to
+      // assert here and ensure SEMA emits an error.
+      emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
+                             RequiresOuterTask, CS, OffloadingMandatory, CGF);
+      return;
+    }
+
+    bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>();
+    unsigned NumTargetItems = InputInfo.NumberOfTargetItems;
+
+    llvm::Value *BasePointersArray =
+        InputInfo.BasePointersArray.emitRawPointer(CGF);
+    llvm::Value *PointersArray = InputInfo.PointersArray.emitRawPointer(CGF);
+    llvm::Value *SizesArray = InputInfo.SizesArray.emitRawPointer(CGF);
+    llvm::Value *MappersArray = InputInfo.MappersArray.emitRawPointer(CGF);
+
+    auto &&EmitTargetCallFallbackCB =
+        [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
+         OffloadingMandatory, &CGF](llvm::OpenMPIRBuilder::InsertPointTy IP)
+        -> llvm::OpenMPIRBuilder::InsertPointTy {
+      CGF.Builder.restoreIP(IP);
+      emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
+                             RequiresOuterTask, CS, OffloadingMandatory, CGF);
+      return CGF.Builder.saveIP();
+    };
+
+    llvm::Value *DeviceID = emitDeviceID(Device, CGF);
+    llvm::Value *NumTeams = OMPRuntime->emitNumTeamsForTargetDirective(CGF, D);
+    llvm::Value *NumThreads =
+        OMPRuntime->emitNumThreadsForTargetDirective(CGF, D);
+    llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, D.getBeginLoc());
+    llvm::Value *NumIterations =
+        OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter);
+    llvm::Value *DynCGGroupMem = emitDynCGGroupMem(D, CGF);
+    llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
+        CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
+
+    llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs(
+        BasePointersArray, PointersArray, SizesArray, MapTypesArray,
+        nullptr /* MapTypesArrayEnd */, MappersArray, MapNamesArray);
+
+    llvm::OpenMPIRBuilder::TargetKernelArgs Args(
+        NumTargetItems, RTArgs, NumIterations, NumTeams, NumThreads,
+        DynCGGroupMem, HasNoWait);
+
+    CGF.Builder.restoreIP(OMPRuntime->getOMPBuilder().emitKernelLaunch(
+        CGF.Builder, OutlinedFn, OutlinedFnID, EmitTargetCallFallbackCB, Args,
+        DeviceID, RTLoc, AllocaIP));
+  };
+
+  if (RequiresOuterTask) {
+    if (NewClangTargetTaskCodeGen) {
+      llvm::errs() << "Using OMPIRBuilder for target task codegen\n";
+    } else {
+      CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
+    }
+  } else
+    OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
+}
+static void emitTargetCallKernelLaunch(
+    CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
+    const OMPExecutableDirective &D,
+    llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask,
+    const CapturedStmt &CS, bool OffloadingMandatory,
+    llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
+    llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo,
+    llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray,
+    llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
+                                     const OMPLoopDirective &D)>
+        SizeEmitter,
+    CodeGenFunction &CGF, CodeGenModule &CGM) {
+  llvm::OpenMPIRBuilder &OMPBuilder = OMPRuntime->getOMPBuilder();
+
+  // Fill up the arrays with all the captured variables.
+  MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
+// Get mappable expression information.
   MappableExprsHandler MEHandler(D, CGF);
   llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
   llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
@@ -9579,6 +9803,7 @@ static void emitTargetCallKernelLaunch(
 
   CGOpenMPRuntime::TargetDataInfo Info;
   // Fill up the arrays and create the arguments.
+  LLVM_DEBUG(llvm::dbgs() << "InsertBlock before emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n");
   emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
   bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
                    llvm::codegenoptions::NoDebugInfo;
@@ -9586,6 +9811,7 @@ static void emitTargetCallKernelLaunch(
                                           EmitDebug,
                                           /*ForEndCall=*/false);
 
+  LLVM_DEBUG(llvm::dbgs() << "InsertBlock after emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n");
   InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
   InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
                                         CGF.VoidPtrTy, CGM.getPointerAlign());
@@ -9725,7 +9951,13 @@ void CGOpenMPRuntime::emitTargetCall(
                           OutlinedFnID, &InputInfo, &MapTypesArray,
                           &MapNamesArray, SizeEmitter](CodeGenFunction &CGF,
                                                        PrePostActionTy &) {
-    emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars,
+    if (OpenMPClangTargetCodegen)
+      emitTargetCallKernelLaunchNew(this, OutlinedFn, D, CapturedVars,
+                               RequiresOuterTask, CS, OffloadingMandatory,
+                               Device, OutlinedFnID, InputInfo, MapTypesArray,
+                               MapNamesArray, SizeEmitter, CGF, CGM);
+    else
+      emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars,
                                RequiresOuterTask, CS, OffloadingMandatory,
                                Device, OutlinedFnID, InputInfo, MapTypesArray,
                                MapNamesArray, SizeEmitter, CGF, CGM);
@@ -9748,6 +9980,7 @@ void CGOpenMPRuntime::emitTargetCall(
     } else {
       RegionCodeGenTy ThenRCG(TargetThenGen);
       ThenRCG(CGF);
+      LLVM_DEBUG(llvm::dbgs() << "Generated code after emitTargetCall:\n" << *(OutlinedFn->getParent()) << "\n");
     }
   } else {
     RegionCodeGenTy ElseRCG(TargetElseGen);
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 853046bf43495..379c7f16db9c5 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -40,6 +40,7 @@
 using namespace clang;
 using namespace CodeGen;
 using namespace llvm::omp;
+#define DEBUG_TYPE "clang-openmp-codegen"
 
 #define TTL_CODEGEN_TYPE "target-teams-loop-codegen"
 
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index a6995888de7d4..f4449f3c0a44f 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -2358,6 +2358,22 @@ class OpenMPIRBuilder {
                                    MapInfosTy &CombinedInfo,
                                    TargetDataInfo &Info);
 
+  /// Callback type for creating the map infos for the kernel parameters.
+  /// \param CodeGenIP is the insertion point where code should be generated,
+  ///        if any.
+  using GenMapInfoCallbackTy =
+      function_ref<MapInfosTy &(InsertPointTy CodeGenIP)>;
+
+  /// Emit the arrays used to pass the captures and map information to the
+  /// offloading runtime library. If there is no map or capture information,
+  /// return nullptr by reference.
+  void emitOffloadingArrays(
+      InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
+      TargetDataInfo &Info, GenMapInfoCallbackTy GenMapInfoCB,
+      bool IsNonContiguous = false,
+      function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
+      function_ref<Value *(unsigned int)> CustomMapperCB = nullptr);
+
   /// Emit the arrays used to pass the captures and map information to the
   /// offloading runtime library. If there is no map or capture information,
   /// return nullptr by reference.
@@ -2367,6 +2383,7 @@ class OpenMPIRBuilder {
       function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
       function_ref<Value *(unsigned int)> CustomMapperCB = nullptr);
 
+
   /// Creates offloading entry for the provided entry ID \a ID, address \a
   /// Addr, size \a Size, and flags \a Flags.
   void createOffloadEntry(Constant *ID, Constant *Addr, uint64_t Size,
@@ -2770,11 +2787,6 @@ class OpenMPIRBuilder {
   /// duplicating the body code.
   enum BodyGenTy { Priv, DupNoPriv, NoPriv };
 
-  /// Callback type for creating the map infos for the kernel parameters.
-  /// \param CodeGenIP is the insertion point where code should be generated,
-  ///        if any.
-  using GenMapInfoCallbackTy =
-      function_ref<MapInfosTy &(InsertPointTy CodeGenIP)>;
 
   /// Generator for '#omp target data'
   ///
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 98da6e2efcb5c..7b9e585d58664 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -7403,6 +7403,16 @@ void OpenMPIRBuilder::emitNonContiguousDescriptor(InsertPointTy AllocaIP,
   }
 }
 
+void OpenMPIRBuilder::emitOffloadingArrays(
+    InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info,
+    GenMapInfoCallbackTy GenMapInfoCB, bool IsNonContiguous,
+    function_ref<void(unsigned int, Value *)> DeviceAddrCB,
+    function_ref<Value *(unsigned int)> CustomMapperCB) {
+
+  OpenMPIRBuilder::MapInfosTy &MapInfo = GenMapInfoCB(CodeGenIP);
+  emitOffloadingArrays(AllocaIP, CodeGenIP, MapInfo,
+                       Info, IsNonContiguous, DeviceAddrCB, CustomMapperCB);
+}
 void OpenMPIRBuilder::emitOffloadingArrays(
     InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo,
     TargetDataInfo &Info, bool IsNonContiguous,

>From 449d1b46690659950f4d3c164ccc5fe5acd0128c Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Tue, 25 Jun 2024 16:07:37 -0500
Subject: [PATCH 02/14] emitOffloadingArraysArgument and some other prints

---
 clang/lib/CodeGen/CGOpenMPRuntime.cpp         | 50 +++++++++----------
 clang/lib/CodeGen/CGOpenMPRuntime.h           | 39 +++++++++++++++
 clang/lib/CodeGen/CGStmtOpenMP.cpp            |  2 +
 clang/lib/CodeGen/CodeGenFunction.h           |  1 +
 .../llvm/Frontend/OpenMP/OMPIRBuilder.h       | 12 ++++-
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp     | 39 +++++++++------
 .../Frontend/OpenMPIRBuilderTest.cpp          |  4 +-
 7 files changed, 102 insertions(+), 45 deletions(-)

diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 4c95aab3c33c1..9495a122cbe34 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -3013,6 +3013,10 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
   llvm::FunctionType *TaskEntryTy =
       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
+  LLVM_DEBUG(llvm::dbgs() << "TaskEntryTy=" << *TaskEntryTy << "\n");
+  LLVM_DEBUG(llvm::dbgs() << "KmpTaskTWithPrivatesPtrQTy = "
+                          << KmpTaskTWithPrivatesPtrQTy << "\n");
+  LLVM_DEBUG(llvm::dbgs() << "TaskTypeArg = " << TaskTypeArg << "\n");
   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
   auto *TaskEntry = llvm::Function::Create(
       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
@@ -3717,6 +3721,7 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
       TaskPrivatesMap);
 
+  LLVM_DEBUG(llvm::dbgs() << "ProxyTaskFunction is " << *TaskEntry);
   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
   // kmp_routine_entry_t *task_entry);
@@ -9619,15 +9624,14 @@ static void emitTargetCallKernelLaunchNew(
   };
   // Fill up the arrays and create the arguments.
   LLVM_DEBUG(llvm::dbgs() << "emitTargetCallKernelLaunchNew:InsertBlock before emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n");
-  OMPBuilder.emitOffloadingArrays(llvm::OpenMPIRBuilder::InsertPointTy(CGF.AllocaInsertPt->getParent(),
-                                                                       CGF.AllocaInsertPt->getIterator()),
-                                  CGF.Builder.saveIP(), Info,
-                                  GenMapInfoCB, /*IsNonContiguous=*/true,
-                                  DeviceAddrCB, CustomMapperCB);
-  bool EmitDebug = !CombinedInfo.Names.empty();
-  OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info,
-                                          EmitDebug,
-                                          /*ForEndCall=*/false);
+
+  llvm::OpenMPIRBuilder::InsertPointTy OffloadingArraysAllocaIP(
+      CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
+
+  OMPBuilder.emitOffloadingArraysAndArgs(
+      OffloadingArraysAllocaIP, CGF.Builder.saveIP(), Info, Info.RTArgs,
+      GenMapInfoCB, /*IsNonContiguous=*/true, /*ForEndCall=*/false,
+      DeviceAddrCB, CustomMapperCB);
 
   LLVM_DEBUG(llvm::dbgs() << "emitTargetCallKernelLaunchNew:InsertBlock after emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n");
   InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
@@ -9701,13 +9705,9 @@ static void emitTargetCallKernelLaunchNew(
         DeviceID, RTLoc, AllocaIP));
   };
 
-  if (RequiresOuterTask) {
-    if (NewClangTargetTaskCodeGen) {
-      llvm::errs() << "Using OMPIRBuilder for target task codegen\n";
-    } else {
-      CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
-    }
-  } else
+  if (RequiresOuterTask)
+    CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
+  else
     OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
 }
 static void emitTargetCallKernelLaunch(
@@ -9805,10 +9805,9 @@ static void emitTargetCallKernelLaunch(
   // Fill up the arrays and create the arguments.
   LLVM_DEBUG(llvm::dbgs() << "InsertBlock before emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n");
   emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
-  bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
+  Info.EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
                    llvm::codegenoptions::NoDebugInfo;
   OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info,
-                                          EmitDebug,
                                           /*ForEndCall=*/false);
 
   LLVM_DEBUG(llvm::dbgs() << "InsertBlock after emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n");
@@ -9951,16 +9950,16 @@ void CGOpenMPRuntime::emitTargetCall(
                           OutlinedFnID, &InputInfo, &MapTypesArray,
                           &MapNamesArray, SizeEmitter](CodeGenFunction &CGF,
                                                        PrePostActionTy &) {
-    if (OpenMPClangTargetCodegen)
+    // if (OpenMPClangTargetCodegen)
       emitTargetCallKernelLaunchNew(this, OutlinedFn, D, CapturedVars,
                                RequiresOuterTask, CS, OffloadingMandatory,
                                Device, OutlinedFnID, InputInfo, MapTypesArray,
                                MapNamesArray, SizeEmitter, CGF, CGM);
-    else
-      emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars,
-                               RequiresOuterTask, CS, OffloadingMandatory,
-                               Device, OutlinedFnID, InputInfo, MapTypesArray,
-                               MapNamesArray, SizeEmitter, CGF, CGM);
+    // else
+    //   emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars,
+    //                            RequiresOuterTask, CS, OffloadingMandatory,
+    //                            Device, OutlinedFnID, InputInfo, MapTypesArray,
+    //                            MapNamesArray, SizeEmitter, CGF, CGM);
   };
 
   auto &&TargetElseGen =
@@ -10723,10 +10722,9 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
                          /*IsNonContiguous=*/true);
     bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
                              D.hasClausesOfKind<OMPNowaitClause>();
-    bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
+    Info.EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
                      llvm::codegenoptions::NoDebugInfo;
     OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info,
-                                            EmitDebug,
                                             /*ForEndCall=*/false);
     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
     InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h
index f65314d014c08..f6e3677232f07 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.h
@@ -122,6 +122,45 @@ struct OMPTaskDataTy final {
   bool IsReductionWithTaskMod = false;
   bool IsWorksharingReduction = false;
   bool HasNowaitClause = false;
+  void printTo(llvm::raw_ostream &os, const ASTContext &Ctx) const {
+    auto &&printSVHelper =
+        [&os, &Ctx](const SmallVector<const Expr *, 4> &V) -> void {
+      for (auto &v : V) {
+        v->dump(os, Ctx);
+      }
+    };
+    auto &&printSV =
+        [&os, printSVHelper](std::string s,
+                             const SmallVector<const Expr *, 4> &V) -> void {
+      os << s << ":[\n";
+      printSVHelper(V);
+      os << "]\n";
+    };
+    // SmallVector<const Expr *, 4> PrivateVars;
+    // SmallVector<const Expr *, 4> PrivateCopies;
+    // SmallVector<const Expr *, 4> FirstprivateVars;
+    // SmallVector<const Expr *, 4> FirstprivateCopies;
+    // SmallVector<const Expr *, 4> FirstprivateInits;
+    // SmallVector<const Expr *, 4> LastprivateVars;
+    // SmallVector<const Expr *, 4> LastprivateCopies;
+    // SmallVector<const Expr *, 4> ReductionVars;
+    // SmallVector<const Expr *, 4> ReductionOrigs;
+    // SmallVector<const Expr *, 4> ReductionCopies;
+    // SmallVector<const Expr *, 4> ReductionOps;
+    // SmallVector<CanonicalDeclPtr<const VarDecl>, 4> PrivateLocals;
+
+    printSV("PrivateVars", PrivateVars);
+    printSV("PrivateCopies", PrivateCopies);
+    printSV("FirstprivateVars", FirstprivateVars);
+    printSV("FirstprivateCopies", FirstprivateCopies);
+    printSV("FirstprivateInits", FirstprivateInits);
+    printSV("LastprivateVars", LastprivateVars);
+    printSV("LastprivateCopies", LastprivateCopies);
+    printSV("ReductionVars", ReductionVars);
+    printSV("ReductionOrigs", ReductionOrigs);
+    printSV("ReductionCopies", ReductionCopies);
+    printSV("ReductionOps", ReductionOps);
+  }
 };
 
 /// Class intended to support codegen of all kind of the reduction clauses.
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 379c7f16db9c5..0a060324c60a7 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -5264,6 +5264,8 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
     }
     BodyGen(CGF);
   };
+  LLVM_DEBUG(llvm::dbgs() << "Inputs to emitTaskOutineFunction\n");
+  LLVM_DEBUG(llvm::dbgs() << "PartID = " << **PartId << "\n");
   llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
       S, *I, *PartId, *TaskT, EKind, CodeGen, /*Tied=*/true,
       Data.NumberOfParts);
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 67e3019565cd0..09ffe7a68a64f 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -440,6 +440,7 @@ class CodeGenFunction : public CodeGenTypeCache {
   llvm::AssertingVH<llvm::Instruction> PostAllocaInsertPt = nullptr;
 
 public:
+  void printLocalDeclMap();
   /// Return PostAllocaInsertPt. If it is not yet created, then insert it
   /// immediately after AllocaInsertPt.
   llvm::Instruction *getPostAllocaInsertPoint() {
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index f4449f3c0a44f..7782ad5998917 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -2231,6 +2231,8 @@ class OpenMPIRBuilder {
     /// The total number of pointers passed to the runtime library.
     unsigned NumberOfPtrs = 0u;
 
+    bool EmitDebug = false;
+
     explicit TargetDataInfo() {}
     explicit TargetDataInfo(bool RequiresDevicePointerInfo,
                             bool SeparateBeginEndCalls)
@@ -2349,7 +2351,6 @@ class OpenMPIRBuilder {
   void emitOffloadingArraysArgument(IRBuilderBase &Builder,
                                     OpenMPIRBuilder::TargetDataRTArgs &RTArgs,
                                     OpenMPIRBuilder::TargetDataInfo &Info,
-                                    bool EmitDebug = false,
                                     bool ForEndCall = false);
 
   /// Emit an array of struct descriptors to be assigned to the offload args.
@@ -2369,7 +2370,7 @@ class OpenMPIRBuilder {
   /// return nullptr by reference.
   void emitOffloadingArrays(
       InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
-      TargetDataInfo &Info, GenMapInfoCallbackTy GenMapInfoCB,
+      GenMapInfoCallbackTy GenMapInfoCB, TargetDataInfo &Info,
       bool IsNonContiguous = false,
       function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
       function_ref<Value *(unsigned int)> CustomMapperCB = nullptr);
@@ -2384,6 +2385,13 @@ class OpenMPIRBuilder {
       function_ref<Value *(unsigned int)> CustomMapperCB = nullptr);
 
 
+  void emitOffloadingArraysAndArgs(
+      InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info,
+      TargetDataRTArgs &RTArgs, GenMapInfoCallbackTy GenMapInfoCB,
+      bool IsNonContiguous = false, bool ForEndCall = false,
+      function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
+      function_ref<Value *(unsigned int)> CustomMapperCB = nullptr);
+
   /// Creates offloading entry for the provided entry ID \a ID, address \a
   /// Addr, size \a Size, and flags \a Flags.
   void createOffloadEntry(Constant *ID, Constant *Addr, uint64_t Size,
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 7b9e585d58664..88c04c3803e21 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -6372,8 +6372,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTargetData(
                          CustomMapperCB);
 
     TargetDataRTArgs RTArgs;
-    emitOffloadingArraysArgument(Builder, RTArgs, Info,
-                                 !MapInfo->Names.empty());
+    emitOffloadingArraysArgument(Builder, RTArgs, Info);
 
     // Emit the number of elements in the offloading arrays.
     Value *PointerNum = Builder.getInt32(Info.NumberOfPtrs);
@@ -6426,8 +6425,8 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTargetData(
   // Generate code for the closing of the data region.
   auto EndThenGen = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
     TargetDataRTArgs RTArgs;
-    emitOffloadingArraysArgument(Builder, RTArgs, Info, !MapInfo->Names.empty(),
-                                 /*ForEndCall=*/true);
+    Info.EmitDebug = !MapInfo->Names.empty();
+    emitOffloadingArraysArgument(Builder, RTArgs, Info, /*ForEndCall=*/true);
 
     // Emit the number of elements in the offloading arrays.
     Value *PointerNum = Builder.getInt32(Info.NumberOfPtrs);
@@ -7057,6 +7056,16 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitTargetTask(
                     << "\n");
   return Builder.saveIP();
 }
+void OpenMPIRBuilder::emitOffloadingArraysAndArgs(
+    InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info,
+    TargetDataRTArgs &RTArgs, GenMapInfoCallbackTy GenMapInfoCB,
+    bool IsNonContiguous, bool ForEndCall,
+    function_ref<void(unsigned int, Value *)> DeviceAddrCB,
+    function_ref<Value *(unsigned int)> CustomMapperCB) {
+  emitOffloadingArrays(AllocaIP, CodeGenIP, GenMapInfoCB, Info, IsNonContiguous,
+                       DeviceAddrCB, CustomMapperCB);
+  emitOffloadingArraysArgument(Builder, RTArgs, Info, ForEndCall);
+ }
 static void emitTargetCall(
     OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder,
     OpenMPIRBuilder::InsertPointTy AllocaIP, Function *OutlinedFn,
@@ -7069,13 +7078,11 @@ static void emitTargetCall(
       /*RequiresDevicePointerInfo=*/false,
       /*SeparateBeginEndCalls=*/true);
 
-  OpenMPIRBuilder::MapInfosTy &MapInfo = GenMapInfoCB(Builder.saveIP());
-  OMPBuilder.emitOffloadingArrays(AllocaIP, Builder.saveIP(), MapInfo, Info,
-                                  /*IsNonContiguous=*/true);
-
   OpenMPIRBuilder::TargetDataRTArgs RTArgs;
-  OMPBuilder.emitOffloadingArraysArgument(Builder, RTArgs, Info,
-                                          !MapInfo.Names.empty());
+  OMPBuilder.emitOffloadingArraysAndArgs(AllocaIP, Builder.saveIP(), Info,
+                                         RTArgs, GenMapInfoCB,
+                                         /*IsNonContiguous=*/true,
+                                         /*ForEndCall=*/false);
 
   //  emitKernelLaunch
   auto &&EmitTargetCallFallbackCB =
@@ -7085,7 +7092,7 @@ static void emitTargetCall(
     return Builder.saveIP();
   };
 
-  unsigned NumTargetItems = MapInfo.BasePointers.size();
+  unsigned NumTargetItems = Info.NumberOfPtrs;
   // TODO: Use correct device ID
   Value *DeviceID = Builder.getInt64(OMP_DEVICEID_UNDEF);
   Value *NumTeamsVal = Builder.getInt32(NumTeams);
@@ -7279,7 +7286,6 @@ void OpenMPIRBuilder::emitMapperCall(const LocationDescription &Loc,
 void OpenMPIRBuilder::emitOffloadingArraysArgument(IRBuilderBase &Builder,
                                                    TargetDataRTArgs &RTArgs,
                                                    TargetDataInfo &Info,
-                                                   bool EmitDebug,
                                                    bool ForEndCall) {
   assert((!ForEndCall || Info.separateBeginEndCalls()) &&
          "expected region end call to runtime only when end call is separate");
@@ -7319,7 +7325,7 @@ void OpenMPIRBuilder::emitOffloadingArraysArgument(IRBuilderBase &Builder,
 
   // Only emit the mapper information arrays if debug information is
   // requested.
-  if (!EmitDebug)
+  if (!Info.EmitDebug)
     RTArgs.MapNamesArray = ConstantPointerNull::get(VoidPtrPtrTy);
   else
     RTArgs.MapNamesArray = Builder.CreateConstInBoundsGEP2_32(
@@ -7404,8 +7410,9 @@ void OpenMPIRBuilder::emitNonContiguousDescriptor(InsertPointTy AllocaIP,
 }
 
 void OpenMPIRBuilder::emitOffloadingArrays(
-    InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info,
-    GenMapInfoCallbackTy GenMapInfoCB, bool IsNonContiguous,
+    InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
+    GenMapInfoCallbackTy GenMapInfoCB, TargetDataInfo &Info,
+    bool IsNonContiguous,
     function_ref<void(unsigned int, Value *)> DeviceAddrCB,
     function_ref<Value *(unsigned int)> CustomMapperCB) {
 
@@ -7518,9 +7525,11 @@ void OpenMPIRBuilder::emitOffloadingArrays(
     auto *MapNamesArrayGbl =
         createOffloadMapnames(CombinedInfo.Names, MapnamesName);
     Info.RTArgs.MapNamesArray = MapNamesArrayGbl;
+    Info.EmitDebug = true;
   } else {
     Info.RTArgs.MapNamesArray =
         Constant::getNullValue(PointerType::getUnqual(Builder.getContext()));
+    Info.EmitDebug = false;
   }
 
   // If there's a present map type modifier, it must not be applied to the end
diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
index 8653bbd3d38fd..cb4c289f409a1 100644
--- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
+++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
@@ -6902,8 +6902,8 @@ TEST_F(OpenMPIRBuilderTest, EmitOffloadingArraysArguments) {
   Info.RTArgs.MappersArray =
       ConstantPointerNull::get(Array4VoidPtrTy->getPointerTo());
   Info.NumberOfPtrs = 4;
-
-  OMPBuilder.emitOffloadingArraysArgument(Builder, RTArgs, Info, false, false);
+  Info.EmitDebug = false;
+  OMPBuilder.emitOffloadingArraysArgument(Builder, RTArgs, Info, false);
 
   EXPECT_NE(RTArgs.BasePointersArray, nullptr);
   EXPECT_NE(RTArgs.PointersArray, nullptr);

>From 5fac34338cda635a915fe2489cb30fc54c36fa4f Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Wed, 26 Jun 2024 11:33:53 -0500
Subject: [PATCH 03/14] clean up, clean up, everybody clean up

---
 clang/lib/CodeGen/CGOpenMPRuntime.cpp | 206 +-------------------------
 clang/lib/CodeGen/CGOpenMPRuntime.h   |  39 -----
 clang/lib/CodeGen/CGStmtOpenMP.cpp    |   4 -
 clang/lib/CodeGen/CodeGenFunction.h   |   1 -
 4 files changed, 5 insertions(+), 245 deletions(-)

diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 9495a122cbe34..e3b7cdbe0dfa8 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -40,7 +40,6 @@
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Value.h"
 #include "llvm/Support/AtomicOrdering.h"
-#include "llvm/Support/Debug.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cassert>
@@ -48,8 +47,6 @@
 #include <numeric>
 #include <optional>
 
-#define DEBUG_TYPE "clang-openmp-codegen"
-
 using namespace clang;
 using namespace CodeGen;
 using namespace llvm::omp;
@@ -3013,10 +3010,6 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
   llvm::FunctionType *TaskEntryTy =
       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
-  LLVM_DEBUG(llvm::dbgs() << "TaskEntryTy=" << *TaskEntryTy << "\n");
-  LLVM_DEBUG(llvm::dbgs() << "KmpTaskTWithPrivatesPtrQTy = "
-                          << KmpTaskTWithPrivatesPtrQTy << "\n");
-  LLVM_DEBUG(llvm::dbgs() << "TaskTypeArg = " << TaskTypeArg << "\n");
   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
   auto *TaskEntry = llvm::Function::Create(
       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
@@ -3721,7 +3714,6 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
       TaskPrivatesMap);
 
-  LLVM_DEBUG(llvm::dbgs() << "ProxyTaskFunction is " << *TaskEntry);
   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
   // kmp_routine_entry_t *task_entry);
@@ -8876,11 +8868,9 @@ emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
   }
 
   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
-  auto *Str =  OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName,
+  return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName,
                                          PLoc.getLine(), PLoc.getColumn(),
                                          SrcLocStrSize);
-  LLVM_DEBUG(llvm::dbgs() << "Output of emitMappingInfo: " << *Str << "\n");
-  return Str;
 }
 
 /// Emit the arrays used to pass the captures and map information to the
@@ -9583,7 +9573,7 @@ static void genMapInfo(const OMPExecutableDirective &D, CodeGenFunction &CGF,
                     FillInfoMap);
   }
 }
-static void emitTargetCallKernelLaunchNew(
+static void emitTargetCallKernelLaunch(
     CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
     const OMPExecutableDirective &D,
     llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask,
@@ -9622,9 +9612,8 @@ static void emitTargetCallKernelLaunchNew(
     }
     return MFunc;
   };
-  // Fill up the arrays and create the arguments.
-  LLVM_DEBUG(llvm::dbgs() << "emitTargetCallKernelLaunchNew:InsertBlock before emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n");
-
+  // Fill up the basepointers, pointers and mapper arrays and create the
+  // arguments.
   llvm::OpenMPIRBuilder::InsertPointTy OffloadingArraysAllocaIP(
       CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
 
@@ -9633,184 +9622,6 @@ static void emitTargetCallKernelLaunchNew(
       GenMapInfoCB, /*IsNonContiguous=*/true, /*ForEndCall=*/false,
       DeviceAddrCB, CustomMapperCB);
 
-  LLVM_DEBUG(llvm::dbgs() << "emitTargetCallKernelLaunchNew:InsertBlock after emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n");
-  InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
-  InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
-                                        CGF.VoidPtrTy, CGM.getPointerAlign());
-  InputInfo.PointersArray =
-      Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
-  InputInfo.SizesArray =
-      Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
-  InputInfo.MappersArray =
-      Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
-  MapTypesArray = Info.RTArgs.MapTypesArray;
-  MapNamesArray = Info.RTArgs.MapNamesArray;
-
-  auto &&ThenGen = [&OMPRuntime, OutlinedFn, &D, &CapturedVars,
-                    RequiresOuterTask, &CS, OffloadingMandatory, Device,
-                    OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray,
-                    SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
-    bool IsReverseOffloading = Device.getInt() == OMPC_DEVICE_ancestor;
-
-    if (IsReverseOffloading) {
-      // Reverse offloading is not supported, so just execute on the host.
-      // FIXME: This fallback solution is incorrect since it ignores the
-      // OMP_TARGET_OFFLOAD environment variable. Instead it would be better to
-      // assert here and ensure SEMA emits an error.
-      emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
-                             RequiresOuterTask, CS, OffloadingMandatory, CGF);
-      return;
-    }
-
-    bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>();
-    unsigned NumTargetItems = InputInfo.NumberOfTargetItems;
-
-    llvm::Value *BasePointersArray =
-        InputInfo.BasePointersArray.emitRawPointer(CGF);
-    llvm::Value *PointersArray = InputInfo.PointersArray.emitRawPointer(CGF);
-    llvm::Value *SizesArray = InputInfo.SizesArray.emitRawPointer(CGF);
-    llvm::Value *MappersArray = InputInfo.MappersArray.emitRawPointer(CGF);
-
-    auto &&EmitTargetCallFallbackCB =
-        [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
-         OffloadingMandatory, &CGF](llvm::OpenMPIRBuilder::InsertPointTy IP)
-        -> llvm::OpenMPIRBuilder::InsertPointTy {
-      CGF.Builder.restoreIP(IP);
-      emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
-                             RequiresOuterTask, CS, OffloadingMandatory, CGF);
-      return CGF.Builder.saveIP();
-    };
-
-    llvm::Value *DeviceID = emitDeviceID(Device, CGF);
-    llvm::Value *NumTeams = OMPRuntime->emitNumTeamsForTargetDirective(CGF, D);
-    llvm::Value *NumThreads =
-        OMPRuntime->emitNumThreadsForTargetDirective(CGF, D);
-    llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, D.getBeginLoc());
-    llvm::Value *NumIterations =
-        OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter);
-    llvm::Value *DynCGGroupMem = emitDynCGGroupMem(D, CGF);
-    llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
-        CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
-
-    llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs(
-        BasePointersArray, PointersArray, SizesArray, MapTypesArray,
-        nullptr /* MapTypesArrayEnd */, MappersArray, MapNamesArray);
-
-    llvm::OpenMPIRBuilder::TargetKernelArgs Args(
-        NumTargetItems, RTArgs, NumIterations, NumTeams, NumThreads,
-        DynCGGroupMem, HasNoWait);
-
-    CGF.Builder.restoreIP(OMPRuntime->getOMPBuilder().emitKernelLaunch(
-        CGF.Builder, OutlinedFn, OutlinedFnID, EmitTargetCallFallbackCB, Args,
-        DeviceID, RTLoc, AllocaIP));
-  };
-
-  if (RequiresOuterTask)
-    CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
-  else
-    OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
-}
-static void emitTargetCallKernelLaunch(
-    CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
-    const OMPExecutableDirective &D,
-    llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask,
-    const CapturedStmt &CS, bool OffloadingMandatory,
-    llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
-    llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo,
-    llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray,
-    llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
-                                     const OMPLoopDirective &D)>
-        SizeEmitter,
-    CodeGenFunction &CGF, CodeGenModule &CGM) {
-  llvm::OpenMPIRBuilder &OMPBuilder = OMPRuntime->getOMPBuilder();
-
-  // Fill up the arrays with all the captured variables.
-  MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
-// Get mappable expression information.
-  MappableExprsHandler MEHandler(D, CGF);
-  llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
-  llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
-
-  auto RI = CS.getCapturedRecordDecl()->field_begin();
-  auto *CV = CapturedVars.begin();
-  for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
-                                            CE = CS.capture_end();
-       CI != CE; ++CI, ++RI, ++CV) {
-    MappableExprsHandler::MapCombinedInfoTy CurInfo;
-    MappableExprsHandler::StructRangeInfoTy PartialStruct;
-
-    // VLA sizes are passed to the outlined region by copy and do not have map
-    // information associated.
-    if (CI->capturesVariableArrayType()) {
-      CurInfo.Exprs.push_back(nullptr);
-      CurInfo.BasePointers.push_back(*CV);
-      CurInfo.DevicePtrDecls.push_back(nullptr);
-      CurInfo.DevicePointers.push_back(
-          MappableExprsHandler::DeviceInfoTy::None);
-      CurInfo.Pointers.push_back(*CV);
-      CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
-          CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
-      // Copy to the device as an argument. No need to retrieve it.
-      CurInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
-                              OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM |
-                              OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
-      CurInfo.Mappers.push_back(nullptr);
-    } else {
-      // If we have any information in the map clause, we use it, otherwise we
-      // just do a default mapping.
-      MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
-      if (!CI->capturesThis())
-        MappedVarSet.insert(CI->getCapturedVar());
-      else
-        MappedVarSet.insert(nullptr);
-      if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
-        MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
-      // Generate correct mapping for variables captured by reference in
-      // lambdas.
-      if (CI->capturesVariable())
-        MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
-                                                CurInfo, LambdaPointers);
-    }
-    // We expect to have at least an element of information for this capture.
-    assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
-           "Non-existing map pointer for capture!");
-    assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
-           CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
-           CurInfo.BasePointers.size() == CurInfo.Types.size() &&
-           CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
-           "Inconsistent map information sizes!");
-
-    // If there is an entry in PartialStruct it means we have a struct with
-    // individual members mapped. Emit an extra combined entry.
-    if (PartialStruct.Base.isValid()) {
-      CombinedInfo.append(PartialStruct.PreliminaryMapData);
-      MEHandler.emitCombinedEntry(
-          CombinedInfo, CurInfo.Types, PartialStruct, CI->capturesThis(),
-          OMPBuilder, nullptr,
-          !PartialStruct.PreliminaryMapData.BasePointers.empty());
-    }
-
-    // We need to append the results of this capture to what we already have.
-    CombinedInfo.append(CurInfo);
-  }
-  // Adjust MEMBER_OF flags for the lambdas captures.
-  MEHandler.adjustMemberOfForLambdaCaptures(
-      OMPBuilder, LambdaPointers, CombinedInfo.BasePointers,
-      CombinedInfo.Pointers, CombinedInfo.Types);
-  // Map any list items in a map clause that were not captures because they
-  // weren't referenced within the construct.
-  MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, MappedVarSet);
-
-  CGOpenMPRuntime::TargetDataInfo Info;
-  // Fill up the arrays and create the arguments.
-  LLVM_DEBUG(llvm::dbgs() << "InsertBlock before emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n");
-  emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
-  Info.EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
-                   llvm::codegenoptions::NoDebugInfo;
-  OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info,
-                                          /*ForEndCall=*/false);
-
-  LLVM_DEBUG(llvm::dbgs() << "InsertBlock after emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n");
   InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
   InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
                                         CGF.VoidPtrTy, CGM.getPointerAlign());
@@ -9950,16 +9761,10 @@ void CGOpenMPRuntime::emitTargetCall(
                           OutlinedFnID, &InputInfo, &MapTypesArray,
                           &MapNamesArray, SizeEmitter](CodeGenFunction &CGF,
                                                        PrePostActionTy &) {
-    // if (OpenMPClangTargetCodegen)
-      emitTargetCallKernelLaunchNew(this, OutlinedFn, D, CapturedVars,
+    emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars,
                                RequiresOuterTask, CS, OffloadingMandatory,
                                Device, OutlinedFnID, InputInfo, MapTypesArray,
                                MapNamesArray, SizeEmitter, CGF, CGM);
-    // else
-    //   emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars,
-    //                            RequiresOuterTask, CS, OffloadingMandatory,
-    //                            Device, OutlinedFnID, InputInfo, MapTypesArray,
-    //                            MapNamesArray, SizeEmitter, CGF, CGM);
   };
 
   auto &&TargetElseGen =
@@ -9979,7 +9784,6 @@ void CGOpenMPRuntime::emitTargetCall(
     } else {
       RegionCodeGenTy ThenRCG(TargetThenGen);
       ThenRCG(CGF);
-      LLVM_DEBUG(llvm::dbgs() << "Generated code after emitTargetCall:\n" << *(OutlinedFn->getParent()) << "\n");
     }
   } else {
     RegionCodeGenTy ElseRCG(TargetElseGen);
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h
index f6e3677232f07..f65314d014c08 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.h
@@ -122,45 +122,6 @@ struct OMPTaskDataTy final {
   bool IsReductionWithTaskMod = false;
   bool IsWorksharingReduction = false;
   bool HasNowaitClause = false;
-  void printTo(llvm::raw_ostream &os, const ASTContext &Ctx) const {
-    auto &&printSVHelper =
-        [&os, &Ctx](const SmallVector<const Expr *, 4> &V) -> void {
-      for (auto &v : V) {
-        v->dump(os, Ctx);
-      }
-    };
-    auto &&printSV =
-        [&os, printSVHelper](std::string s,
-                             const SmallVector<const Expr *, 4> &V) -> void {
-      os << s << ":[\n";
-      printSVHelper(V);
-      os << "]\n";
-    };
-    // SmallVector<const Expr *, 4> PrivateVars;
-    // SmallVector<const Expr *, 4> PrivateCopies;
-    // SmallVector<const Expr *, 4> FirstprivateVars;
-    // SmallVector<const Expr *, 4> FirstprivateCopies;
-    // SmallVector<const Expr *, 4> FirstprivateInits;
-    // SmallVector<const Expr *, 4> LastprivateVars;
-    // SmallVector<const Expr *, 4> LastprivateCopies;
-    // SmallVector<const Expr *, 4> ReductionVars;
-    // SmallVector<const Expr *, 4> ReductionOrigs;
-    // SmallVector<const Expr *, 4> ReductionCopies;
-    // SmallVector<const Expr *, 4> ReductionOps;
-    // SmallVector<CanonicalDeclPtr<const VarDecl>, 4> PrivateLocals;
-
-    printSV("PrivateVars", PrivateVars);
-    printSV("PrivateCopies", PrivateCopies);
-    printSV("FirstprivateVars", FirstprivateVars);
-    printSV("FirstprivateCopies", FirstprivateCopies);
-    printSV("FirstprivateInits", FirstprivateInits);
-    printSV("LastprivateVars", LastprivateVars);
-    printSV("LastprivateCopies", LastprivateCopies);
-    printSV("ReductionVars", ReductionVars);
-    printSV("ReductionOrigs", ReductionOrigs);
-    printSV("ReductionCopies", ReductionCopies);
-    printSV("ReductionOps", ReductionOps);
-  }
 };
 
 /// Class intended to support codegen of all kind of the reduction clauses.
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 0a060324c60a7..74d99d9812bda 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -35,12 +35,10 @@
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/Support/AtomicOrdering.h"
-#include "llvm/Support/Debug.h"
 #include <optional>
 using namespace clang;
 using namespace CodeGen;
 using namespace llvm::omp;
-#define DEBUG_TYPE "clang-openmp-codegen"
 
 #define TTL_CODEGEN_TYPE "target-teams-loop-codegen"
 
@@ -5264,8 +5262,6 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
     }
     BodyGen(CGF);
   };
-  LLVM_DEBUG(llvm::dbgs() << "Inputs to emitTaskOutineFunction\n");
-  LLVM_DEBUG(llvm::dbgs() << "PartID = " << **PartId << "\n");
   llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
       S, *I, *PartId, *TaskT, EKind, CodeGen, /*Tied=*/true,
       Data.NumberOfParts);
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 09ffe7a68a64f..67e3019565cd0 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -440,7 +440,6 @@ class CodeGenFunction : public CodeGenTypeCache {
   llvm::AssertingVH<llvm::Instruction> PostAllocaInsertPt = nullptr;
 
 public:
-  void printLocalDeclMap();
   /// Return PostAllocaInsertPt. If it is not yet created, then insert it
   /// immediately after AllocaInsertPt.
   llvm::Instruction *getPostAllocaInsertPoint() {

>From f761f4c33afb873074782d51222ed87540627d49 Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Wed, 26 Jun 2024 11:39:07 -0500
Subject: [PATCH 04/14] Add Debug.h include in CGStmtOpenMP.cpp because removal
 is not related to my change

---
 clang/lib/CodeGen/CGStmtOpenMP.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 74d99d9812bda..853046bf43495 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -35,6 +35,7 @@
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/Support/AtomicOrdering.h"
+#include "llvm/Support/Debug.h"
 #include <optional>
 using namespace clang;
 using namespace CodeGen;

>From 5c34a68ada279b5ba9b5bc8116aa08c4af2d197c Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Wed, 26 Jun 2024 14:13:36 -0500
Subject: [PATCH 05/14] Document emitOffladingArrays and
 emitOffloadingArraysAndArgs in OMPIRBuilder.h

---
 .../llvm/Frontend/OpenMP/OMPIRBuilder.h       | 28 +++++++++++++------
 1 file changed, 20 insertions(+), 8 deletions(-)

diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 7782ad5998917..50786ac3d5261 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -2367,24 +2367,36 @@ class OpenMPIRBuilder {
 
   /// Emit the arrays used to pass the captures and map information to the
   /// offloading runtime library. If there is no map or capture information,
-  /// return nullptr by reference.
+  /// return nullptr by reference. This is the first of two overloads - this
+  /// one accepts a reference to a MapInfosTy object that contains combined
+  /// information generated for mappable clauses, including base pointers,
+  /// pointers, sizes, map types, user-defined mappers, and non-contiguous
+  /// information.
   void emitOffloadingArrays(
-      InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
-      GenMapInfoCallbackTy GenMapInfoCB, TargetDataInfo &Info,
-      bool IsNonContiguous = false,
+      InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo,
+      TargetDataInfo &Info, bool IsNonContiguous = false,
       function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
       function_ref<Value *(unsigned int)> CustomMapperCB = nullptr);
 
   /// Emit the arrays used to pass the captures and map information to the
   /// offloading runtime library. If there is no map or capture information,
-  /// return nullptr by reference.
+  /// return nullptr by reference. This is the second of two overloads - Instead
+  /// of accepting a reference to a MapInfosTy object, this overload accepts
+  /// a call back of type GenMapInfoCallbackTy to populate a MapInfosTy object
+  /// with mapping information.
   void emitOffloadingArrays(
-      InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo,
-      TargetDataInfo &Info, bool IsNonContiguous = false,
+      InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
+      GenMapInfoCallbackTy GenMapInfoCB, TargetDataInfo &Info,
+      bool IsNonContiguous = false,
       function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
       function_ref<Value *(unsigned int)> CustomMapperCB = nullptr);
 
-
+  /// Allocates memory for and populates the arrays required for offloading
+  /// (offload_{baseptrs|ptrs|mappers|sizes|maptypes|mapnames}). Then, it
+  /// emits their base addresses as arguments to be passed to the runtime
+  /// library. In essence, this function is a combination of
+  /// emitOffloadingArrays and emitOffloadingArraysArgument and should arguably
+  /// be preferred by clients of OpenMPIRBuilder.
   void emitOffloadingArraysAndArgs(
       InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info,
       TargetDataRTArgs &RTArgs, GenMapInfoCallbackTy GenMapInfoCB,

>From 09ee31148d9d0297e893a49cdb546427c88258cc Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Wed, 26 Jun 2024 15:29:44 -0500
Subject: [PATCH 06/14] refactor genMapInfo

---
 clang/lib/CodeGen/CGOpenMPRuntime.cpp | 42 +++++++++++++++++++++------
 1 file changed, 33 insertions(+), 9 deletions(-)

diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index e3b7cdbe0dfa8..0122f33d201d7 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -9484,14 +9484,14 @@ llvm::Value *emitDynCGGroupMem(const OMPExecutableDirective &D,
   }
   return DynCGroupMem;
 }
-static void genMapInfo(const OMPExecutableDirective &D, CodeGenFunction &CGF,
-                       const CapturedStmt &CS, llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
-                       llvm::OpenMPIRBuilder &OMPBuilder,
-                       MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
-  // Get mappable expression information.
-  MappableExprsHandler MEHandler(D, CGF);
-  llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
-  llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
+static void genMapInfoForCaptures(
+    MappableExprsHandler &MEHandler, CodeGenFunction &CGF,
+    const CapturedStmt &CS, llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
+    llvm::OpenMPIRBuilder &OMPBuilder,
+    llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
+    llvm::DenseSet<CanonicalDeclPtr<const Decl>> &MappedVarSet,
+    MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
+
   CodeGenModule &CGM = CGF.CGM;
   auto RI = CS.getCapturedRecordDecl()->field_begin();
   auto *CV = CapturedVars.begin();
@@ -9559,9 +9559,18 @@ static void genMapInfo(const OMPExecutableDirective &D, CodeGenFunction &CGF,
   MEHandler.adjustMemberOfForLambdaCaptures(
       OMPBuilder, LambdaPointers, CombinedInfo.BasePointers,
       CombinedInfo.Pointers, CombinedInfo.Types);
+}
+static void
+genMapInfo(MappableExprsHandler &MEHandler, CodeGenFunction &CGF,
+           MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
+           llvm::OpenMPIRBuilder &OMPBuilder,
+           const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkippedVarSet =
+               llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) {
+
+  CodeGenModule &CGM = CGF.CGM;
   // Map any list items in a map clause that were not captures because they
   // weren't referenced within the construct.
-  MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, MappedVarSet);
+  MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, SkippedVarSet);
 
   auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
     return emitMappingInformation(CGF, OMPBuilder, MapExpr);
@@ -9573,6 +9582,21 @@ static void genMapInfo(const OMPExecutableDirective &D, CodeGenFunction &CGF,
                     FillInfoMap);
   }
 }
+
+static void genMapInfo(const OMPExecutableDirective &D, CodeGenFunction &CGF,
+                       const CapturedStmt &CS,
+                       llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
+                       llvm::OpenMPIRBuilder &OMPBuilder,
+                       MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
+  // Get mappable expression information.
+  MappableExprsHandler MEHandler(D, CGF);
+  llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
+  llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
+
+  genMapInfoForCaptures(MEHandler, CGF, CS, CapturedVars, OMPBuilder,
+                        LambdaPointers, MappedVarSet, CombinedInfo);
+  genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder, MappedVarSet);
+}
 static void emitTargetCallKernelLaunch(
     CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
     const OMPExecutableDirective &D,

>From 4872692f6ac421e86924f2d524fe10b45fe69dad Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Thu, 27 Jun 2024 13:51:34 -0500
Subject: [PATCH 07/14] Use CGOpenMPRuntime::emitTargetDataStandAloneCall

---
 clang/lib/CodeGen/CGOpenMPRuntime.cpp | 44 +++++++++++++++++++++------
 1 file changed, 34 insertions(+), 10 deletions(-)

diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 0122f33d201d7..53f7d4d807f21 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -10539,21 +10539,45 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
                                           PrePostActionTy &) {
     // Fill up the arrays with all the mapped variables.
     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
+    CGOpenMPRuntime::TargetDataInfo Info;
 
     // Get map clause information.
-    MappableExprsHandler MEHandler(D, CGF);
-    MEHandler.generateAllInfo(CombinedInfo, OMPBuilder);
+    auto GenMapInfoCB = [&](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP)
+        -> llvm::OpenMPIRBuilder::MapInfosTy & {
+      CGF.Builder.restoreIP(CodeGenIP);
+      MappableExprsHandler MEHandler(D, CGF);
+      genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder);
+      return CombinedInfo;
+    };
 
-    CGOpenMPRuntime::TargetDataInfo Info;
-    // Fill up the arrays and create the arguments.
-    emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
-                         /*IsNonContiguous=*/true);
+    auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
+      if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
+        Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
+      }
+    };
+
+    auto CustomMapperCB = [&](unsigned int I) {
+      llvm::Value *MFunc = nullptr;
+      if (CombinedInfo.Mappers[I]) {
+        Info.HasMapper = true;
+        MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
+            cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
+      }
+      return MFunc;
+    };
+
+    // Fill up the basepointers, pointers and mapper arrays and create the
+    // arguments.
+    using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
+    InsertPointTy OffloadingArraysAllocaIP(CGF.AllocaInsertPt->getParent(),
+                                           CGF.AllocaInsertPt->getIterator());
+
+    OMPBuilder.emitOffloadingArraysAndArgs(
+        OffloadingArraysAllocaIP, CGF.Builder.saveIP(), Info, Info.RTArgs,
+        GenMapInfoCB, /*IsNonContiguous=*/true, /*ForEndCall=*/false,
+        DeviceAddrCB, CustomMapperCB);
     bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
                              D.hasClausesOfKind<OMPNowaitClause>();
-    Info.EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
-                     llvm::codegenoptions::NoDebugInfo;
-    OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info,
-                                            /*ForEndCall=*/false);
     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
     InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
                                           CGF.VoidPtrTy, CGM.getPointerAlign());

>From 0e1f43f7615bede8b2d9d124bd2cc2a2a36e061e Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Fri, 28 Jun 2024 10:49:35 -0500
Subject: [PATCH 08/14] Use static function emitOffloadingArraysAndArgs in
 emitTargetCallKernelLaunch in CGOpenMPRuntime.cpp

---
 clang/lib/CodeGen/CGOpenMPRuntime.cpp         | 67 ++++++++++---------
 .../llvm/Frontend/OpenMP/OMPIRBuilder.h       | 13 ++++
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp     | 13 ++++
 3 files changed, 63 insertions(+), 30 deletions(-)

diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 53f7d4d807f21..0b41f80706f2e 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -8921,6 +8921,40 @@ static void emitOffloadingArrays(
                                   /*IsNonContiguous=*/true, DeviceAddrCB,
                                   CustomMapperCB);
 }
+/// Emit the arrays used to pass the captures and map information to the
+/// offloading runtime library. If there is no map or capture information,
+/// return nullptr by reference.
+static void emitOffloadingArraysAndArgs(
+    CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
+    CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
+    bool IsNonContiguous = false, bool ForEndCall = false) {
+  CodeGenModule &CGM = CGF.CGM;
+
+  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
+  InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
+                         CGF.AllocaInsertPt->getIterator());
+  InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
+                          CGF.Builder.GetInsertPoint());
+
+  auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
+    if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
+      Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
+    }
+  };
+
+  auto CustomMapperCB = [&](unsigned int I) {
+    llvm::Value *MFunc = nullptr;
+    if (CombinedInfo.Mappers[I]) {
+      Info.HasMapper = true;
+      MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
+          cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
+    }
+    return MFunc;
+  };
+  OMPBuilder.emitOffloadingArraysAndArgs(
+      AllocaIP, CodeGenIP, Info, Info.RTArgs, CombinedInfo, IsNonContiguous,
+      ForEndCall, DeviceAddrCB, CustomMapperCB);
+}
 
 /// Check for inner distribute directive.
 static const OMPExecutableDirective *
@@ -9614,37 +9648,10 @@ static void emitTargetCallKernelLaunch(
   // Fill up the arrays with all the captured variables.
   MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
   CGOpenMPRuntime::TargetDataInfo Info;
+  genMapInfo(D, CGF, CS, CapturedVars, OMPBuilder, CombinedInfo);
 
-  auto GenMapInfoCB = [&](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP)
-                          -> llvm::OpenMPIRBuilder::MapInfosTy & {
-    CGF.Builder.restoreIP(CodeGenIP);
-    genMapInfo(D, CGF, CS, CapturedVars, OMPBuilder, CombinedInfo);
-    return CombinedInfo;
-  };
-  auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
-    if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
-      Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
-    }
-  };
-
-  auto CustomMapperCB = [&](unsigned int I) {
-    llvm::Value *MFunc = nullptr;
-    if (CombinedInfo.Mappers[I]) {
-      Info.HasMapper = true;
-      MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
-          cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
-    }
-    return MFunc;
-  };
-  // Fill up the basepointers, pointers and mapper arrays and create the
-  // arguments.
-  llvm::OpenMPIRBuilder::InsertPointTy OffloadingArraysAllocaIP(
-      CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
-
-  OMPBuilder.emitOffloadingArraysAndArgs(
-      OffloadingArraysAllocaIP, CGF.Builder.saveIP(), Info, Info.RTArgs,
-      GenMapInfoCB, /*IsNonContiguous=*/true, /*ForEndCall=*/false,
-      DeviceAddrCB, CustomMapperCB);
+  emitOffloadingArraysAndArgs(CGF, CombinedInfo, Info, OMPBuilder,
+                              /*IsNonContiguous=*/true, /*ForEndCall=*/false);
 
   InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
   InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 50786ac3d5261..1377ca8ce45b2 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -2404,6 +2404,19 @@ class OpenMPIRBuilder {
       function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
       function_ref<Value *(unsigned int)> CustomMapperCB = nullptr);
 
+  /// Allocates memory for and populates the arrays required for offloading
+  /// (offload_{baseptrs|ptrs|mappers|sizes|maptypes|mapnames}). Then, it
+  /// emits their base addresses as arguments to be passed to the runtime
+  /// library. In essence, this function is a combination of
+  /// emitOffloadingArrays and emitOffloadingArraysArgument and should arguably
+  /// be preferred by clients of OpenMPIRBuilder.
+  void emitOffloadingArraysAndArgs(
+      InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info,
+      TargetDataRTArgs &RTArgs, MapInfosTy &CombinedInfo,
+      bool IsNonContiguous = false, bool ForEndCall = false,
+      function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
+      function_ref<Value *(unsigned int)> CustomMapperCB = nullptr);
+
   /// Creates offloading entry for the provided entry ID \a ID, address \a
   /// Addr, size \a Size, and flags \a Flags.
   void createOffloadEntry(Constant *ID, Constant *Addr, uint64_t Size,
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 88c04c3803e21..3b71b2be3086f 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -7066,6 +7066,17 @@ void OpenMPIRBuilder::emitOffloadingArraysAndArgs(
                        DeviceAddrCB, CustomMapperCB);
   emitOffloadingArraysArgument(Builder, RTArgs, Info, ForEndCall);
  }
+
+void OpenMPIRBuilder::emitOffloadingArraysAndArgs(
+    InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info,
+    TargetDataRTArgs &RTArgs, MapInfosTy &CombinedInfo, bool IsNonContiguous,
+    bool ForEndCall, function_ref<void(unsigned int, Value *)> DeviceAddrCB,
+    function_ref<Value *(unsigned int)> CustomMapperCB) {
+  emitOffloadingArrays(AllocaIP, CodeGenIP, CombinedInfo, Info,
+                       IsNonContiguous, DeviceAddrCB, CustomMapperCB);
+  emitOffloadingArraysArgument(Builder, RTArgs, Info, ForEndCall);
+ }
+
 static void emitTargetCall(
     OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder,
     OpenMPIRBuilder::InsertPointTy AllocaIP, Function *OutlinedFn,
@@ -7074,6 +7085,8 @@ static void emitTargetCall(
     OpenMPIRBuilder::GenMapInfoCallbackTy GenMapInfoCB,
     SmallVector<llvm::OpenMPIRBuilder::DependData> Dependencies = {}) {
 
+ 
+
   OpenMPIRBuilder::TargetDataInfo Info(
       /*RequiresDevicePointerInfo=*/false,
       /*SeparateBeginEndCalls=*/true);

>From 32edf70615a2a049cca7bd275c9d7436e749e725 Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Fri, 28 Jun 2024 11:27:58 -0500
Subject: [PATCH 09/14] Use static function emitOffloadingArraysAndArgs in
 emitTargetDataStandaloneCall in CGOpenMPRuntime.cpp

---
 clang/lib/CodeGen/CGOpenMPRuntime.cpp | 40 ++++-----------------------
 1 file changed, 5 insertions(+), 35 deletions(-)

diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 0b41f80706f2e..26976b1565209 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -10547,44 +10547,14 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
     // Fill up the arrays with all the mapped variables.
     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
     CGOpenMPRuntime::TargetDataInfo Info;
+    MappableExprsHandler MEHandler(D, CGF);
+    genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder);
+    emitOffloadingArraysAndArgs(CGF, CombinedInfo, Info, OMPBuilder,
+                                /*IsNonContiguous=*/true, /*ForEndCall=*/false);
 
-    // Get map clause information.
-    auto GenMapInfoCB = [&](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP)
-        -> llvm::OpenMPIRBuilder::MapInfosTy & {
-      CGF.Builder.restoreIP(CodeGenIP);
-      MappableExprsHandler MEHandler(D, CGF);
-      genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder);
-      return CombinedInfo;
-    };
-
-    auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
-      if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
-        Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
-      }
-    };
-
-    auto CustomMapperCB = [&](unsigned int I) {
-      llvm::Value *MFunc = nullptr;
-      if (CombinedInfo.Mappers[I]) {
-        Info.HasMapper = true;
-        MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
-            cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
-      }
-      return MFunc;
-    };
-
-    // Fill up the basepointers, pointers and mapper arrays and create the
-    // arguments.
-    using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
-    InsertPointTy OffloadingArraysAllocaIP(CGF.AllocaInsertPt->getParent(),
-                                           CGF.AllocaInsertPt->getIterator());
-
-    OMPBuilder.emitOffloadingArraysAndArgs(
-        OffloadingArraysAllocaIP, CGF.Builder.saveIP(), Info, Info.RTArgs,
-        GenMapInfoCB, /*IsNonContiguous=*/true, /*ForEndCall=*/false,
-        DeviceAddrCB, CustomMapperCB);
     bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
                              D.hasClausesOfKind<OMPNowaitClause>();
+
     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
     InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
                                           CGF.VoidPtrTy, CGM.getPointerAlign());

>From 6ce0c84f93d1066c1a4f4bb7b1530b5c2b9d4144 Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Fri, 28 Jun 2024 11:30:10 -0500
Subject: [PATCH 10/14] Remove emitOffloadingArrays from CGOpenMPRuntime.cpp
 because it is not used anymore. Use emitOffloadingArraysAndArgs

---
 clang/lib/CodeGen/CGOpenMPRuntime.cpp | 49 ---------------------------
 1 file changed, 49 deletions(-)

diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 26976b1565209..57e958f644b18 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -8872,55 +8872,6 @@ emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
                                          PLoc.getLine(), PLoc.getColumn(),
                                          SrcLocStrSize);
 }
-
-/// Emit the arrays used to pass the captures and map information to the
-/// offloading runtime library. If there is no map or capture information,
-/// return nullptr by reference.
-static void emitOffloadingArrays(
-    CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
-    CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
-    bool IsNonContiguous = false) {
-  CodeGenModule &CGM = CGF.CGM;
-
-  // Reset the array information.
-  Info.clearArrayInfo();
-  Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
-
-  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
-  InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
-                         CGF.AllocaInsertPt->getIterator());
-  InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
-                          CGF.Builder.GetInsertPoint());
-
-  auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
-    return emitMappingInformation(CGF, OMPBuilder, MapExpr);
-  };
-  if (CGM.getCodeGenOpts().getDebugInfo() !=
-      llvm::codegenoptions::NoDebugInfo) {
-    CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
-    llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
-                    FillInfoMap);
-  }
-
-  auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
-    if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
-      Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
-    }
-  };
-
-  auto CustomMapperCB = [&](unsigned int I) {
-    llvm::Value *MFunc = nullptr;
-    if (CombinedInfo.Mappers[I]) {
-      Info.HasMapper = true;
-      MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
-          cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
-    }
-    return MFunc;
-  };
-  OMPBuilder.emitOffloadingArrays(AllocaIP, CodeGenIP, CombinedInfo, Info,
-                                  /*IsNonContiguous=*/true, DeviceAddrCB,
-                                  CustomMapperCB);
-}
 /// Emit the arrays used to pass the captures and map information to the
 /// offloading runtime library. If there is no map or capture information,
 /// return nullptr by reference.

>From a7ce3ce2ad355427c0aa58b996df0f304edc2185 Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Fri, 28 Jun 2024 12:05:02 -0500
Subject: [PATCH 11/14] Remove overloads of emitOffloadingArrays and
 emitOffloadingArraysAndArgs that accept GenMapInfoCallBackTy

---
 clang/lib/CodeGen/CGOpenMPRuntime.cpp         |  1 -
 .../llvm/Frontend/OpenMP/OMPIRBuilder.h       | 34 ++-----------------
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp     | 31 +++--------------
 3 files changed, 8 insertions(+), 58 deletions(-)

diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 57e958f644b18..8a39dbdbeec53 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -9477,7 +9477,6 @@ static void genMapInfoForCaptures(
     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &MappedVarSet,
     MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
 
-  CodeGenModule &CGM = CGF.CGM;
   auto RI = CS.getCapturedRecordDecl()->field_begin();
   auto *CV = CapturedVars.begin();
   for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 1377ca8ce45b2..60e79ec3726ce 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -2367,43 +2367,15 @@ class OpenMPIRBuilder {
 
   /// Emit the arrays used to pass the captures and map information to the
   /// offloading runtime library. If there is no map or capture information,
-  /// return nullptr by reference. This is the first of two overloads - this
-  /// one accepts a reference to a MapInfosTy object that contains combined
-  /// information generated for mappable clauses, including base pointers,
-  /// pointers, sizes, map types, user-defined mappers, and non-contiguous
-  /// information.
+  /// return nullptr by reference. Accepts a reference to a MapInfosTy object
+  /// that contains information generated for mappable clauses,
+  /// including base pointers, pointers, sizes, map types, user-defined mappers.
   void emitOffloadingArrays(
       InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo,
       TargetDataInfo &Info, bool IsNonContiguous = false,
       function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
       function_ref<Value *(unsigned int)> CustomMapperCB = nullptr);
 
-  /// Emit the arrays used to pass the captures and map information to the
-  /// offloading runtime library. If there is no map or capture information,
-  /// return nullptr by reference. This is the second of two overloads - Instead
-  /// of accepting a reference to a MapInfosTy object, this overload accepts
-  /// a call back of type GenMapInfoCallbackTy to populate a MapInfosTy object
-  /// with mapping information.
-  void emitOffloadingArrays(
-      InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
-      GenMapInfoCallbackTy GenMapInfoCB, TargetDataInfo &Info,
-      bool IsNonContiguous = false,
-      function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
-      function_ref<Value *(unsigned int)> CustomMapperCB = nullptr);
-
-  /// Allocates memory for and populates the arrays required for offloading
-  /// (offload_{baseptrs|ptrs|mappers|sizes|maptypes|mapnames}). Then, it
-  /// emits their base addresses as arguments to be passed to the runtime
-  /// library. In essence, this function is a combination of
-  /// emitOffloadingArrays and emitOffloadingArraysArgument and should arguably
-  /// be preferred by clients of OpenMPIRBuilder.
-  void emitOffloadingArraysAndArgs(
-      InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info,
-      TargetDataRTArgs &RTArgs, GenMapInfoCallbackTy GenMapInfoCB,
-      bool IsNonContiguous = false, bool ForEndCall = false,
-      function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
-      function_ref<Value *(unsigned int)> CustomMapperCB = nullptr);
-
   /// Allocates memory for and populates the arrays required for offloading
   /// (offload_{baseptrs|ptrs|mappers|sizes|maptypes|mapnames}). Then, it
   /// emits their base addresses as arguments to be passed to the runtime
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 3b71b2be3086f..e97029ceca33f 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -7056,26 +7056,15 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitTargetTask(
                     << "\n");
   return Builder.saveIP();
 }
-void OpenMPIRBuilder::emitOffloadingArraysAndArgs(
-    InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info,
-    TargetDataRTArgs &RTArgs, GenMapInfoCallbackTy GenMapInfoCB,
-    bool IsNonContiguous, bool ForEndCall,
-    function_ref<void(unsigned int, Value *)> DeviceAddrCB,
-    function_ref<Value *(unsigned int)> CustomMapperCB) {
-  emitOffloadingArrays(AllocaIP, CodeGenIP, GenMapInfoCB, Info, IsNonContiguous,
-                       DeviceAddrCB, CustomMapperCB);
-  emitOffloadingArraysArgument(Builder, RTArgs, Info, ForEndCall);
- }
-
 void OpenMPIRBuilder::emitOffloadingArraysAndArgs(
     InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info,
     TargetDataRTArgs &RTArgs, MapInfosTy &CombinedInfo, bool IsNonContiguous,
     bool ForEndCall, function_ref<void(unsigned int, Value *)> DeviceAddrCB,
     function_ref<Value *(unsigned int)> CustomMapperCB) {
-  emitOffloadingArrays(AllocaIP, CodeGenIP, CombinedInfo, Info,
-                       IsNonContiguous, DeviceAddrCB, CustomMapperCB);
+  emitOffloadingArrays(AllocaIP, CodeGenIP, CombinedInfo, Info, IsNonContiguous,
+                       DeviceAddrCB, CustomMapperCB);
   emitOffloadingArraysArgument(Builder, RTArgs, Info, ForEndCall);
- }
+}
 
 static void emitTargetCall(
     OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder,
@@ -7091,9 +7080,10 @@ static void emitTargetCall(
       /*RequiresDevicePointerInfo=*/false,
       /*SeparateBeginEndCalls=*/true);
 
+  OpenMPIRBuilder::MapInfosTy &MapInfo = GenMapInfoCB(Builder.saveIP());
   OpenMPIRBuilder::TargetDataRTArgs RTArgs;
   OMPBuilder.emitOffloadingArraysAndArgs(AllocaIP, Builder.saveIP(), Info,
-                                         RTArgs, GenMapInfoCB,
+                                         RTArgs, MapInfo,
                                          /*IsNonContiguous=*/true,
                                          /*ForEndCall=*/false);
 
@@ -7422,17 +7412,6 @@ void OpenMPIRBuilder::emitNonContiguousDescriptor(InsertPointTy AllocaIP,
   }
 }
 
-void OpenMPIRBuilder::emitOffloadingArrays(
-    InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
-    GenMapInfoCallbackTy GenMapInfoCB, TargetDataInfo &Info,
-    bool IsNonContiguous,
-    function_ref<void(unsigned int, Value *)> DeviceAddrCB,
-    function_ref<Value *(unsigned int)> CustomMapperCB) {
-
-  OpenMPIRBuilder::MapInfosTy &MapInfo = GenMapInfoCB(CodeGenIP);
-  emitOffloadingArrays(AllocaIP, CodeGenIP, MapInfo,
-                       Info, IsNonContiguous, DeviceAddrCB, CustomMapperCB);
-}
 void OpenMPIRBuilder::emitOffloadingArrays(
     InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo,
     TargetDataInfo &Info, bool IsNonContiguous,

>From b9b687eb564dfda81afd3007a80b7ce5f3c0174b Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Fri, 28 Jun 2024 12:16:41 -0500
Subject: [PATCH 12/14] Undo an unnecessary change in the location of the
 declaration of GenMapInfoCallBackTy in OMPIRBuilder.h

---
 llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 60e79ec3726ce..1614d5716d28c 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -2359,12 +2359,6 @@ class OpenMPIRBuilder {
                                    MapInfosTy &CombinedInfo,
                                    TargetDataInfo &Info);
 
-  /// Callback type for creating the map infos for the kernel parameters.
-  /// \param CodeGenIP is the insertion point where code should be generated,
-  ///        if any.
-  using GenMapInfoCallbackTy =
-      function_ref<MapInfosTy &(InsertPointTy CodeGenIP)>;
-
   /// Emit the arrays used to pass the captures and map information to the
   /// offloading runtime library. If there is no map or capture information,
   /// return nullptr by reference. Accepts a reference to a MapInfosTy object
@@ -2792,6 +2786,11 @@ class OpenMPIRBuilder {
   /// duplicating the body code.
   enum BodyGenTy { Priv, DupNoPriv, NoPriv };
 
+  /// Callback type for creating the map infos for the kernel parameters.
+  /// \param CodeGenIP is the insertion point where code should be generated,
+  ///        if any.
+  using GenMapInfoCallbackTy =
+      function_ref<MapInfosTy &(InsertPointTy CodeGenIP)>;
 
   /// Generator for '#omp target data'
   ///

>From e74f34bbec42f634e030a08259f6fdd64a7ce7c7 Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Mon, 22 Jul 2024 13:23:00 -0500
Subject: [PATCH 13/14] Address review comments

---
 clang/lib/CodeGen/CGOpenMPRuntime.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 8a39dbdbeec53..3210bd414f8ba 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -9473,10 +9473,10 @@ static void genMapInfoForCaptures(
     MappableExprsHandler &MEHandler, CodeGenFunction &CGF,
     const CapturedStmt &CS, llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
     llvm::OpenMPIRBuilder &OMPBuilder,
-    llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &MappedVarSet,
     MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
 
+  llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
   auto RI = CS.getCapturedRecordDecl()->field_begin();
   auto *CV = CapturedVars.begin();
   for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
@@ -9574,11 +9574,10 @@ static void genMapInfo(const OMPExecutableDirective &D, CodeGenFunction &CGF,
                        MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
   // Get mappable expression information.
   MappableExprsHandler MEHandler(D, CGF);
-  llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
   llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
 
   genMapInfoForCaptures(MEHandler, CGF, CS, CapturedVars, OMPBuilder,
-                        LambdaPointers, MappedVarSet, CombinedInfo);
+                        MappedVarSet, CombinedInfo);
   genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder, MappedVarSet);
 }
 static void emitTargetCallKernelLaunch(

>From 0e7dd273509389c9eca61b3ffa97f2a3f4f54e5d Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Tue, 23 Jul 2024 16:19:03 -0500
Subject: [PATCH 14/14] clang-format fix

---
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index e97029ceca33f..eba9129bf4953 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -7074,8 +7074,6 @@ static void emitTargetCall(
     OpenMPIRBuilder::GenMapInfoCallbackTy GenMapInfoCB,
     SmallVector<llvm::OpenMPIRBuilder::DependData> Dependencies = {}) {
 
- 
-
   OpenMPIRBuilder::TargetDataInfo Info(
       /*RequiresDevicePointerInfo=*/false,
       /*SeparateBeginEndCalls=*/true);



More information about the cfe-commits mailing list