[clang] [llvm] [OpenMPIRBuilder][Clang][NFC] - Combine `emitOffloadingArrays` and `emitOffloadingArraysArgument` in OpenMPIRBuilder (PR #97088)

Pranav Bhandarkar via cfe-commits cfe-commits at lists.llvm.org
Fri Jun 28 10:23:48 PDT 2024


https://github.com/bhandarkar-pranav created https://github.com/llvm/llvm-project/pull/97088

This PR introduces a new interface in `OpenMPIRBuilder` that combines the creation of the so-called offloading pointer arrays and their subsequent preparation as arguments to the OpenMP runtime library. We then use this in Clang. This is intended to be used in the near future by other frontends such as Flang when lowering MLIR to LLVMIR.

>From dc9e64a29d6d1fd84ad630cb002d1129ea6a0a31 Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Sat, 15 Jun 2024 02:00:48 -0500
Subject: [PATCH 01/12] checkpoint commit. Use emitOffloadinArrays from
 OMPIRBuilder in emitTargetCallKernelLaunch in CGOpenMPRuntime.cpp

---
 clang/lib/CodeGen/CGOpenMPRuntime.cpp         | 241 +++++++++++++++++-
 clang/lib/CodeGen/CGStmtOpenMP.cpp            |   1 +
 .../llvm/Frontend/OpenMP/OMPIRBuilder.h       |  22 +-
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp     |  10 +
 4 files changed, 265 insertions(+), 9 deletions(-)

diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index f6d12d46cfc07..9632ef912ebfe 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -38,6 +38,7 @@
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Value.h"
 #include "llvm/Support/AtomicOrdering.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cassert>
@@ -45,6 +46,8 @@
 #include <numeric>
 #include <optional>
 
+#define DEBUG_TYPE "clang-openmp-codegen"
+
 using namespace clang;
 using namespace CodeGen;
 using namespace llvm::omp;
@@ -8831,9 +8834,11 @@ emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
   }
 
   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
-  return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName,
+  auto *Str =  OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName,
                                          PLoc.getLine(), PLoc.getColumn(),
                                          SrcLocStrSize);
+  LLVM_DEBUG(llvm::dbgs() << "Output of emitMappingInfo: " << *Str << "\n");
+  return Str;
 }
 
 /// Emit the arrays used to pass the captures and map information to the
@@ -9447,8 +9452,96 @@ llvm::Value *emitDynCGGroupMem(const OMPExecutableDirective &D,
   }
   return DynCGroupMem;
 }
+static void genMapInfo(const OMPExecutableDirective &D, CodeGenFunction &CGF,
+                       const CapturedStmt &CS, llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
+                       llvm::OpenMPIRBuilder &OMPBuilder,
+                       MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
+  // Get mappable expression information.
+  MappableExprsHandler MEHandler(D, CGF);
+  llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
+  llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
+  CodeGenModule &CGM = CGF.CGM;
+  auto RI = CS.getCapturedRecordDecl()->field_begin();
+  auto *CV = CapturedVars.begin();
+  for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
+                                            CE = CS.capture_end();
+       CI != CE; ++CI, ++RI, ++CV) {
+    MappableExprsHandler::MapCombinedInfoTy CurInfo;
+    MappableExprsHandler::StructRangeInfoTy PartialStruct;
 
-static void emitTargetCallKernelLaunch(
+    // VLA sizes are passed to the outlined region by copy and do not have map
+    // information associated.
+    if (CI->capturesVariableArrayType()) {
+      CurInfo.Exprs.push_back(nullptr);
+      CurInfo.BasePointers.push_back(*CV);
+      CurInfo.DevicePtrDecls.push_back(nullptr);
+      CurInfo.DevicePointers.push_back(
+          MappableExprsHandler::DeviceInfoTy::None);
+      CurInfo.Pointers.push_back(*CV);
+      CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
+          CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
+      // Copy to the device as an argument. No need to retrieve it.
+      CurInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
+                              OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM |
+                              OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
+      CurInfo.Mappers.push_back(nullptr);
+    } else {
+      // If we have any information in the map clause, we use it, otherwise we
+      // just do a default mapping.
+      MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
+      if (!CI->capturesThis())
+        MappedVarSet.insert(CI->getCapturedVar());
+      else
+        MappedVarSet.insert(nullptr);
+      if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
+        MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
+      // Generate correct mapping for variables captured by reference in
+      // lambdas.
+      if (CI->capturesVariable())
+        MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
+                                                CurInfo, LambdaPointers);
+    }
+    // We expect to have at least an element of information for this capture.
+    assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
+           "Non-existing map pointer for capture!");
+    assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
+           CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
+           CurInfo.BasePointers.size() == CurInfo.Types.size() &&
+           CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
+           "Inconsistent map information sizes!");
+
+    // If there is an entry in PartialStruct it means we have a struct with
+    // individual members mapped. Emit an extra combined entry.
+    if (PartialStruct.Base.isValid()) {
+      CombinedInfo.append(PartialStruct.PreliminaryMapData);
+      MEHandler.emitCombinedEntry(
+          CombinedInfo, CurInfo.Types, PartialStruct, CI->capturesThis(),
+          OMPBuilder, nullptr,
+          !PartialStruct.PreliminaryMapData.BasePointers.empty());
+    }
+
+    // We need to append the results of this capture to what we already have.
+    CombinedInfo.append(CurInfo);
+  }
+  // Adjust MEMBER_OF flags for the lambdas captures.
+  MEHandler.adjustMemberOfForLambdaCaptures(
+      OMPBuilder, LambdaPointers, CombinedInfo.BasePointers,
+      CombinedInfo.Pointers, CombinedInfo.Types);
+  // Map any list items in a map clause that were not captures because they
+  // weren't referenced within the construct.
+  MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, MappedVarSet);
+
+  auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
+    return emitMappingInformation(CGF, OMPBuilder, MapExpr);
+  };
+  if (CGM.getCodeGenOpts().getDebugInfo() !=
+      llvm::codegenoptions::NoDebugInfo) {
+    CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
+    llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
+                    FillInfoMap);
+  }
+}
+static void emitTargetCallKernelLaunchNew(
     CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
     const OMPExecutableDirective &D,
     llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask,
@@ -9464,8 +9557,139 @@ static void emitTargetCallKernelLaunch(
 
   // Fill up the arrays with all the captured variables.
   MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
+  CGOpenMPRuntime::TargetDataInfo Info;
 
-  // Get mappable expression information.
+  auto GenMapInfoCB = [&](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP)
+                          -> llvm::OpenMPIRBuilder::MapInfosTy & {
+    CGF.Builder.restoreIP(CodeGenIP);
+    genMapInfo(D, CGF, CS, CapturedVars, OMPBuilder, CombinedInfo);
+    return CombinedInfo;
+  };
+  auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
+    if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
+      Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
+    }
+  };
+
+  auto CustomMapperCB = [&](unsigned int I) {
+    llvm::Value *MFunc = nullptr;
+    if (CombinedInfo.Mappers[I]) {
+      Info.HasMapper = true;
+      MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
+          cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
+    }
+    return MFunc;
+  };
+  // Fill up the arrays and create the arguments.
+  LLVM_DEBUG(llvm::dbgs() << "emitTargetCallKernelLaunchNew:InsertBlock before emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n");
+  OMPBuilder.emitOffloadingArrays(llvm::OpenMPIRBuilder::InsertPointTy(CGF.AllocaInsertPt->getParent(),
+                                                                       CGF.AllocaInsertPt->getIterator()),
+                                  CGF.Builder.saveIP(), Info,
+                                  GenMapInfoCB, /*IsNonContiguous=*/true,
+                                  DeviceAddrCB, CustomMapperCB);
+  bool EmitDebug = !CombinedInfo.Names.empty();
+  OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info,
+                                          EmitDebug,
+                                          /*ForEndCall=*/false);
+
+  LLVM_DEBUG(llvm::dbgs() << "emitTargetCallKernelLaunchNew:InsertBlock after emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n");
+  InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
+  InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
+                                        CGF.VoidPtrTy, CGM.getPointerAlign());
+  InputInfo.PointersArray =
+      Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
+  InputInfo.SizesArray =
+      Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
+  InputInfo.MappersArray =
+      Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
+  MapTypesArray = Info.RTArgs.MapTypesArray;
+  MapNamesArray = Info.RTArgs.MapNamesArray;
+
+  auto &&ThenGen = [&OMPRuntime, OutlinedFn, &D, &CapturedVars,
+                    RequiresOuterTask, &CS, OffloadingMandatory, Device,
+                    OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray,
+                    SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
+    bool IsReverseOffloading = Device.getInt() == OMPC_DEVICE_ancestor;
+
+    if (IsReverseOffloading) {
+      // Reverse offloading is not supported, so just execute on the host.
+      // FIXME: This fallback solution is incorrect since it ignores the
+      // OMP_TARGET_OFFLOAD environment variable. Instead it would be better to
+      // assert here and ensure SEMA emits an error.
+      emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
+                             RequiresOuterTask, CS, OffloadingMandatory, CGF);
+      return;
+    }
+
+    bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>();
+    unsigned NumTargetItems = InputInfo.NumberOfTargetItems;
+
+    llvm::Value *BasePointersArray =
+        InputInfo.BasePointersArray.emitRawPointer(CGF);
+    llvm::Value *PointersArray = InputInfo.PointersArray.emitRawPointer(CGF);
+    llvm::Value *SizesArray = InputInfo.SizesArray.emitRawPointer(CGF);
+    llvm::Value *MappersArray = InputInfo.MappersArray.emitRawPointer(CGF);
+
+    auto &&EmitTargetCallFallbackCB =
+        [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
+         OffloadingMandatory, &CGF](llvm::OpenMPIRBuilder::InsertPointTy IP)
+        -> llvm::OpenMPIRBuilder::InsertPointTy {
+      CGF.Builder.restoreIP(IP);
+      emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
+                             RequiresOuterTask, CS, OffloadingMandatory, CGF);
+      return CGF.Builder.saveIP();
+    };
+
+    llvm::Value *DeviceID = emitDeviceID(Device, CGF);
+    llvm::Value *NumTeams = OMPRuntime->emitNumTeamsForTargetDirective(CGF, D);
+    llvm::Value *NumThreads =
+        OMPRuntime->emitNumThreadsForTargetDirective(CGF, D);
+    llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, D.getBeginLoc());
+    llvm::Value *NumIterations =
+        OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter);
+    llvm::Value *DynCGGroupMem = emitDynCGGroupMem(D, CGF);
+    llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
+        CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
+
+    llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs(
+        BasePointersArray, PointersArray, SizesArray, MapTypesArray,
+        nullptr /* MapTypesArrayEnd */, MappersArray, MapNamesArray);
+
+    llvm::OpenMPIRBuilder::TargetKernelArgs Args(
+        NumTargetItems, RTArgs, NumIterations, NumTeams, NumThreads,
+        DynCGGroupMem, HasNoWait);
+
+    CGF.Builder.restoreIP(OMPRuntime->getOMPBuilder().emitKernelLaunch(
+        CGF.Builder, OutlinedFn, OutlinedFnID, EmitTargetCallFallbackCB, Args,
+        DeviceID, RTLoc, AllocaIP));
+  };
+
+  if (RequiresOuterTask) {
+    if (NewClangTargetTaskCodeGen) {
+      llvm::errs() << "Using OMPIRBuilder for target task codegen\n";
+    } else {
+      CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
+    }
+  } else
+    OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
+}
+static void emitTargetCallKernelLaunch(
+    CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
+    const OMPExecutableDirective &D,
+    llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask,
+    const CapturedStmt &CS, bool OffloadingMandatory,
+    llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
+    llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo,
+    llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray,
+    llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
+                                     const OMPLoopDirective &D)>
+        SizeEmitter,
+    CodeGenFunction &CGF, CodeGenModule &CGM) {
+  llvm::OpenMPIRBuilder &OMPBuilder = OMPRuntime->getOMPBuilder();
+
+  // Fill up the arrays with all the captured variables.
+  MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
+// Get mappable expression information.
   MappableExprsHandler MEHandler(D, CGF);
   llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
   llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
@@ -9542,6 +9766,7 @@ static void emitTargetCallKernelLaunch(
 
   CGOpenMPRuntime::TargetDataInfo Info;
   // Fill up the arrays and create the arguments.
+  LLVM_DEBUG(llvm::dbgs() << "InsertBlock before emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n");
   emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
   bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
                    llvm::codegenoptions::NoDebugInfo;
@@ -9549,6 +9774,7 @@ static void emitTargetCallKernelLaunch(
                                           EmitDebug,
                                           /*ForEndCall=*/false);
 
+  LLVM_DEBUG(llvm::dbgs() << "InsertBlock after emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n");
   InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
   InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
                                         CGF.VoidPtrTy, CGM.getPointerAlign());
@@ -9688,7 +9914,13 @@ void CGOpenMPRuntime::emitTargetCall(
                           OutlinedFnID, &InputInfo, &MapTypesArray,
                           &MapNamesArray, SizeEmitter](CodeGenFunction &CGF,
                                                        PrePostActionTy &) {
-    emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars,
+    if (OpenMPClangTargetCodegen)
+      emitTargetCallKernelLaunchNew(this, OutlinedFn, D, CapturedVars,
+                               RequiresOuterTask, CS, OffloadingMandatory,
+                               Device, OutlinedFnID, InputInfo, MapTypesArray,
+                               MapNamesArray, SizeEmitter, CGF, CGM);
+    else
+      emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars,
                                RequiresOuterTask, CS, OffloadingMandatory,
                                Device, OutlinedFnID, InputInfo, MapTypesArray,
                                MapNamesArray, SizeEmitter, CGF, CGM);
@@ -9711,6 +9943,7 @@ void CGOpenMPRuntime::emitTargetCall(
     } else {
       RegionCodeGenTy ThenRCG(TargetThenGen);
       ThenRCG(CGF);
+      LLVM_DEBUG(llvm::dbgs() << "Generated code after emitTargetCall:\n" << *(OutlinedFn->getParent()) << "\n");
     }
   } else {
     RegionCodeGenTy ElseRCG(TargetElseGen);
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index f73d32de7c484..123cfbe1b229d 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -40,6 +40,7 @@
 using namespace clang;
 using namespace CodeGen;
 using namespace llvm::omp;
+#define DEBUG_TYPE "clang-openmp-codegen"
 
 #define TTL_CODEGEN_TYPE "target-teams-loop-codegen"
 
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index bff49dab4a313..035639b10e31a 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -1778,6 +1778,22 @@ class OpenMPIRBuilder {
                                    MapInfosTy &CombinedInfo,
                                    TargetDataInfo &Info);
 
+  /// Callback type for creating the map infos for the kernel parameters.
+  /// \param CodeGenIP is the insertion point where code should be generated,
+  ///        if any.
+  using GenMapInfoCallbackTy =
+      function_ref<MapInfosTy &(InsertPointTy CodeGenIP)>;
+
+  /// Emit the arrays used to pass the captures and map information to the
+  /// offloading runtime library. If there is no map or capture information,
+  /// return nullptr by reference.
+  void emitOffloadingArrays(
+      InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
+      TargetDataInfo &Info, GenMapInfoCallbackTy GenMapInfoCB,
+      bool IsNonContiguous = false,
+      function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
+      function_ref<Value *(unsigned int)> CustomMapperCB = nullptr);
+
   /// Emit the arrays used to pass the captures and map information to the
   /// offloading runtime library. If there is no map or capture information,
   /// return nullptr by reference.
@@ -1787,6 +1803,7 @@ class OpenMPIRBuilder {
       function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
       function_ref<Value *(unsigned int)> CustomMapperCB = nullptr);
 
+
   /// Creates offloading entry for the provided entry ID \a ID, address \a
   /// Addr, size \a Size, and flags \a Flags.
   void createOffloadEntry(Constant *ID, Constant *Addr, uint64_t Size,
@@ -2190,11 +2207,6 @@ class OpenMPIRBuilder {
   /// duplicating the body code.
   enum BodyGenTy { Priv, DupNoPriv, NoPriv };
 
-  /// Callback type for creating the map infos for the kernel parameters.
-  /// \param CodeGenIP is the insertion point where code should be generated,
-  ///        if any.
-  using GenMapInfoCallbackTy =
-      function_ref<MapInfosTy &(InsertPointTy CodeGenIP)>;
 
   /// Generator for '#omp target data'
   ///
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 92213e19c9d9d..7c0dbc0925306 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -5562,6 +5562,16 @@ void OpenMPIRBuilder::emitNonContiguousDescriptor(InsertPointTy AllocaIP,
   }
 }
 
+void OpenMPIRBuilder::emitOffloadingArrays(
+    InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info,
+    GenMapInfoCallbackTy GenMapInfoCB, bool IsNonContiguous,
+    function_ref<void(unsigned int, Value *)> DeviceAddrCB,
+    function_ref<Value *(unsigned int)> CustomMapperCB) {
+
+  OpenMPIRBuilder::MapInfosTy &MapInfo = GenMapInfoCB(CodeGenIP);
+  emitOffloadingArrays(AllocaIP, CodeGenIP, MapInfo,
+                       Info, IsNonContiguous, DeviceAddrCB, CustomMapperCB);
+}
 void OpenMPIRBuilder::emitOffloadingArrays(
     InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo,
     TargetDataInfo &Info, bool IsNonContiguous,

>From 0bb7eaaee6ca6301cd7e9a9285ad9959ca801613 Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Tue, 25 Jun 2024 16:07:37 -0500
Subject: [PATCH 02/12] emitOffloadingArraysArgument and some other prints

---
 clang/lib/CodeGen/CGOpenMPRuntime.cpp         | 50 +++++++++----------
 clang/lib/CodeGen/CGOpenMPRuntime.h           | 39 +++++++++++++++
 clang/lib/CodeGen/CGStmtOpenMP.cpp            |  2 +
 clang/lib/CodeGen/CodeGenFunction.h           |  1 +
 .../llvm/Frontend/OpenMP/OMPIRBuilder.h       | 12 ++++-
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp     | 42 ++++++++++------
 .../Frontend/OpenMPIRBuilderTest.cpp          |  4 +-
 7 files changed, 104 insertions(+), 46 deletions(-)

diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 9632ef912ebfe..ee03183f3f5a3 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -3002,6 +3002,10 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
   llvm::FunctionType *TaskEntryTy =
       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
+  LLVM_DEBUG(llvm::dbgs() << "TaskEntryTy=" << *TaskEntryTy << "\n");
+  LLVM_DEBUG(llvm::dbgs() << "KmpTaskTWithPrivatesPtrQTy = "
+                          << KmpTaskTWithPrivatesPtrQTy << "\n");
+  LLVM_DEBUG(llvm::dbgs() << "TaskTypeArg = " << TaskTypeArg << "\n");
   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
   auto *TaskEntry = llvm::Function::Create(
       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
@@ -3706,6 +3710,7 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
       TaskPrivatesMap);
 
+  LLVM_DEBUG(llvm::dbgs() << "ProxyTaskFunction is " << *TaskEntry);
   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
   // kmp_routine_entry_t *task_entry);
@@ -9582,15 +9587,14 @@ static void emitTargetCallKernelLaunchNew(
   };
   // Fill up the arrays and create the arguments.
   LLVM_DEBUG(llvm::dbgs() << "emitTargetCallKernelLaunchNew:InsertBlock before emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n");
-  OMPBuilder.emitOffloadingArrays(llvm::OpenMPIRBuilder::InsertPointTy(CGF.AllocaInsertPt->getParent(),
-                                                                       CGF.AllocaInsertPt->getIterator()),
-                                  CGF.Builder.saveIP(), Info,
-                                  GenMapInfoCB, /*IsNonContiguous=*/true,
-                                  DeviceAddrCB, CustomMapperCB);
-  bool EmitDebug = !CombinedInfo.Names.empty();
-  OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info,
-                                          EmitDebug,
-                                          /*ForEndCall=*/false);
+
+  llvm::OpenMPIRBuilder::InsertPointTy OffloadingArraysAllocaIP(
+      CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
+
+  OMPBuilder.emitOffloadingArraysAndArgs(
+      OffloadingArraysAllocaIP, CGF.Builder.saveIP(), Info, Info.RTArgs,
+      GenMapInfoCB, /*IsNonContiguous=*/true, /*ForEndCall=*/false,
+      DeviceAddrCB, CustomMapperCB);
 
   LLVM_DEBUG(llvm::dbgs() << "emitTargetCallKernelLaunchNew:InsertBlock after emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n");
   InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
@@ -9664,13 +9668,9 @@ static void emitTargetCallKernelLaunchNew(
         DeviceID, RTLoc, AllocaIP));
   };
 
-  if (RequiresOuterTask) {
-    if (NewClangTargetTaskCodeGen) {
-      llvm::errs() << "Using OMPIRBuilder for target task codegen\n";
-    } else {
-      CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
-    }
-  } else
+  if (RequiresOuterTask)
+    CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
+  else
     OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
 }
 static void emitTargetCallKernelLaunch(
@@ -9768,10 +9768,9 @@ static void emitTargetCallKernelLaunch(
   // Fill up the arrays and create the arguments.
   LLVM_DEBUG(llvm::dbgs() << "InsertBlock before emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n");
   emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
-  bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
+  Info.EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
                    llvm::codegenoptions::NoDebugInfo;
   OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info,
-                                          EmitDebug,
                                           /*ForEndCall=*/false);
 
   LLVM_DEBUG(llvm::dbgs() << "InsertBlock after emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n");
@@ -9914,16 +9913,16 @@ void CGOpenMPRuntime::emitTargetCall(
                           OutlinedFnID, &InputInfo, &MapTypesArray,
                           &MapNamesArray, SizeEmitter](CodeGenFunction &CGF,
                                                        PrePostActionTy &) {
-    if (OpenMPClangTargetCodegen)
+    // if (OpenMPClangTargetCodegen)
       emitTargetCallKernelLaunchNew(this, OutlinedFn, D, CapturedVars,
                                RequiresOuterTask, CS, OffloadingMandatory,
                                Device, OutlinedFnID, InputInfo, MapTypesArray,
                                MapNamesArray, SizeEmitter, CGF, CGM);
-    else
-      emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars,
-                               RequiresOuterTask, CS, OffloadingMandatory,
-                               Device, OutlinedFnID, InputInfo, MapTypesArray,
-                               MapNamesArray, SizeEmitter, CGF, CGM);
+    // else
+    //   emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars,
+    //                            RequiresOuterTask, CS, OffloadingMandatory,
+    //                            Device, OutlinedFnID, InputInfo, MapTypesArray,
+    //                            MapNamesArray, SizeEmitter, CGF, CGM);
   };
 
   auto &&TargetElseGen =
@@ -10684,10 +10683,9 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
                          /*IsNonContiguous=*/true);
     bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
                              D.hasClausesOfKind<OMPNowaitClause>();
-    bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
+    Info.EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
                      llvm::codegenoptions::NoDebugInfo;
     OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info,
-                                            EmitDebug,
                                             /*ForEndCall=*/false);
     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
     InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h
index 522ae3d35d22d..b9303a9414b22 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.h
@@ -122,6 +122,45 @@ struct OMPTaskDataTy final {
   bool IsReductionWithTaskMod = false;
   bool IsWorksharingReduction = false;
   bool HasNowaitClause = false;
+  void printTo(llvm::raw_ostream &os, const ASTContext &Ctx) const {
+    auto &&printSVHelper =
+        [&os, &Ctx](const SmallVector<const Expr *, 4> &V) -> void {
+      for (auto &v : V) {
+        v->dump(os, Ctx);
+      }
+    };
+    auto &&printSV =
+        [&os, printSVHelper](std::string s,
+                             const SmallVector<const Expr *, 4> &V) -> void {
+      os << s << ":[\n";
+      printSVHelper(V);
+      os << "]\n";
+    };
+    // SmallVector<const Expr *, 4> PrivateVars;
+    // SmallVector<const Expr *, 4> PrivateCopies;
+    // SmallVector<const Expr *, 4> FirstprivateVars;
+    // SmallVector<const Expr *, 4> FirstprivateCopies;
+    // SmallVector<const Expr *, 4> FirstprivateInits;
+    // SmallVector<const Expr *, 4> LastprivateVars;
+    // SmallVector<const Expr *, 4> LastprivateCopies;
+    // SmallVector<const Expr *, 4> ReductionVars;
+    // SmallVector<const Expr *, 4> ReductionOrigs;
+    // SmallVector<const Expr *, 4> ReductionCopies;
+    // SmallVector<const Expr *, 4> ReductionOps;
+    // SmallVector<CanonicalDeclPtr<const VarDecl>, 4> PrivateLocals;
+
+    printSV("PrivateVars", PrivateVars);
+    printSV("PrivateCopies", PrivateCopies);
+    printSV("FirstprivateVars", FirstprivateVars);
+    printSV("FirstprivateCopies", FirstprivateCopies);
+    printSV("FirstprivateInits", FirstprivateInits);
+    printSV("LastprivateVars", LastprivateVars);
+    printSV("LastprivateCopies", LastprivateCopies);
+    printSV("ReductionVars", ReductionVars);
+    printSV("ReductionOrigs", ReductionOrigs);
+    printSV("ReductionCopies", ReductionCopies);
+    printSV("ReductionOps", ReductionOps);
+  }
 };
 
 /// Class intended to support codegen of all kind of the reduction clauses.
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 123cfbe1b229d..bd6743666826b 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -5181,6 +5181,8 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
     }
     BodyGen(CGF);
   };
+  LLVM_DEBUG(llvm::dbgs() << "Inputs to emitTaskOutineFunction\n");
+  LLVM_DEBUG(llvm::dbgs() << "PartID = " << **PartId << "\n");
   llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
       S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, /*Tied=*/true,
       Data.NumberOfParts);
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 06fc7259b5901..6092ab1684267 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -441,6 +441,7 @@ class CodeGenFunction : public CodeGenTypeCache {
   llvm::AssertingVH<llvm::Instruction> PostAllocaInsertPt = nullptr;
 
 public:
+  void printLocalDeclMap();
   /// Return PostAllocaInsertPt. If it is not yet created, then insert it
   /// immediately after AllocaInsertPt.
   llvm::Instruction *getPostAllocaInsertPoint() {
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 035639b10e31a..03573b4e02029 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -1671,6 +1671,8 @@ class OpenMPIRBuilder {
     /// The total number of pointers passed to the runtime library.
     unsigned NumberOfPtrs = 0u;
 
+    bool EmitDebug = false;
+
     explicit TargetDataInfo() {}
     explicit TargetDataInfo(bool RequiresDevicePointerInfo,
                             bool SeparateBeginEndCalls)
@@ -1769,7 +1771,6 @@ class OpenMPIRBuilder {
   void emitOffloadingArraysArgument(IRBuilderBase &Builder,
                                     OpenMPIRBuilder::TargetDataRTArgs &RTArgs,
                                     OpenMPIRBuilder::TargetDataInfo &Info,
-                                    bool EmitDebug = false,
                                     bool ForEndCall = false);
 
   /// Emit an array of struct descriptors to be assigned to the offload args.
@@ -1789,7 +1790,7 @@ class OpenMPIRBuilder {
   /// return nullptr by reference.
   void emitOffloadingArrays(
       InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
-      TargetDataInfo &Info, GenMapInfoCallbackTy GenMapInfoCB,
+      GenMapInfoCallbackTy GenMapInfoCB, TargetDataInfo &Info,
       bool IsNonContiguous = false,
       function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
       function_ref<Value *(unsigned int)> CustomMapperCB = nullptr);
@@ -1804,6 +1805,13 @@ class OpenMPIRBuilder {
       function_ref<Value *(unsigned int)> CustomMapperCB = nullptr);
 
 
+  void emitOffloadingArraysAndArgs(
+      InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info,
+      TargetDataRTArgs &RTArgs, GenMapInfoCallbackTy GenMapInfoCB,
+      bool IsNonContiguous = false, bool ForEndCall = false,
+      function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
+      function_ref<Value *(unsigned int)> CustomMapperCB = nullptr);
+
   /// Creates offloading entry for the provided entry ID \a ID, address \a
   /// Addr, size \a Size, and flags \a Flags.
   void createOffloadEntry(Constant *ID, Constant *Addr, uint64_t Size,
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 7c0dbc0925306..8d6e6a354a1d8 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -4923,8 +4923,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTargetData(
                          CustomMapperCB);
 
     TargetDataRTArgs RTArgs;
-    emitOffloadingArraysArgument(Builder, RTArgs, Info,
-                                 !MapInfo->Names.empty());
+    emitOffloadingArraysArgument(Builder, RTArgs, Info);
 
     // Emit the number of elements in the offloading arrays.
     Value *PointerNum = Builder.getInt32(Info.NumberOfPtrs);
@@ -4977,8 +4976,8 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTargetData(
   // Generate code for the closing of the data region.
   auto EndThenGen = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
     TargetDataRTArgs RTArgs;
-    emitOffloadingArraysArgument(Builder, RTArgs, Info, !MapInfo->Names.empty(),
-                                 /*ForEndCall=*/true);
+    Info.EmitDebug = !MapInfo->Names.empty();
+    emitOffloadingArraysArgument(Builder, RTArgs, Info, /*ForEndCall=*/true);
 
     // Emit the number of elements in the offloading arrays.
     Value *PointerNum = Builder.getInt32(Info.NumberOfPtrs);
@@ -5234,7 +5233,18 @@ static void emitTargetOutlinedFunction(
                                       OutlinedFn, OutlinedFnID);
 }
 
-static void emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder,
+void OpenMPIRBuilder::emitOffloadingArraysAndArgs(
+    InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info,
+    TargetDataRTArgs &RTArgs, GenMapInfoCallbackTy GenMapInfoCB,
+    bool IsNonContiguous, bool ForEndCall,
+    function_ref<void(unsigned int, Value *)> DeviceAddrCB,
+    function_ref<Value *(unsigned int)> CustomMapperCB) {
+  emitOffloadingArrays(AllocaIP, CodeGenIP, GenMapInfoCB, Info, IsNonContiguous,
+                       DeviceAddrCB, CustomMapperCB);
+  emitOffloadingArraysArgument(Builder, RTArgs, Info, ForEndCall);
+ }
+
+ static void emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder,
                            OpenMPIRBuilder::InsertPointTy AllocaIP,
                            Function *OutlinedFn, Constant *OutlinedFnID,
                            int32_t NumTeams, int32_t NumThreads,
@@ -5245,13 +5255,11 @@ static void emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder,
       /*RequiresDevicePointerInfo=*/false,
       /*SeparateBeginEndCalls=*/true);
 
-  OpenMPIRBuilder::MapInfosTy &MapInfo = GenMapInfoCB(Builder.saveIP());
-  OMPBuilder.emitOffloadingArrays(AllocaIP, Builder.saveIP(), MapInfo, Info,
-                                  /*IsNonContiguous=*/true);
-
   OpenMPIRBuilder::TargetDataRTArgs RTArgs;
-  OMPBuilder.emitOffloadingArraysArgument(Builder, RTArgs, Info,
-                                          !MapInfo.Names.empty());
+  OMPBuilder.emitOffloadingArraysAndArgs(AllocaIP, Builder.saveIP(), Info,
+                                         RTArgs, GenMapInfoCB,
+                                         /*IsNonContiguous=*/true,
+                                         /*ForEndCall=*/false);
 
   //  emitKernelLaunch
   auto &&EmitTargetCallFallbackCB =
@@ -5261,7 +5269,7 @@ static void emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder,
     return Builder.saveIP();
   };
 
-  unsigned NumTargetItems = MapInfo.BasePointers.size();
+  unsigned NumTargetItems = Info.NumberOfPtrs;
   // TODO: Use correct device ID
   Value *DeviceID = Builder.getInt64(OMP_DEVICEID_UNDEF);
   Value *NumTeamsVal = Builder.getInt32(NumTeams);
@@ -5438,7 +5446,6 @@ void OpenMPIRBuilder::emitMapperCall(const LocationDescription &Loc,
 void OpenMPIRBuilder::emitOffloadingArraysArgument(IRBuilderBase &Builder,
                                                    TargetDataRTArgs &RTArgs,
                                                    TargetDataInfo &Info,
-                                                   bool EmitDebug,
                                                    bool ForEndCall) {
   assert((!ForEndCall || Info.separateBeginEndCalls()) &&
          "expected region end call to runtime only when end call is separate");
@@ -5478,7 +5485,7 @@ void OpenMPIRBuilder::emitOffloadingArraysArgument(IRBuilderBase &Builder,
 
   // Only emit the mapper information arrays if debug information is
   // requested.
-  if (!EmitDebug)
+  if (!Info.EmitDebug)
     RTArgs.MapNamesArray = ConstantPointerNull::get(VoidPtrPtrTy);
   else
     RTArgs.MapNamesArray = Builder.CreateConstInBoundsGEP2_32(
@@ -5563,8 +5570,9 @@ void OpenMPIRBuilder::emitNonContiguousDescriptor(InsertPointTy AllocaIP,
 }
 
 void OpenMPIRBuilder::emitOffloadingArrays(
-    InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info,
-    GenMapInfoCallbackTy GenMapInfoCB, bool IsNonContiguous,
+    InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
+    GenMapInfoCallbackTy GenMapInfoCB, TargetDataInfo &Info,
+    bool IsNonContiguous,
     function_ref<void(unsigned int, Value *)> DeviceAddrCB,
     function_ref<Value *(unsigned int)> CustomMapperCB) {
 
@@ -5677,9 +5685,11 @@ void OpenMPIRBuilder::emitOffloadingArrays(
     auto *MapNamesArrayGbl =
         createOffloadMapnames(CombinedInfo.Names, MapnamesName);
     Info.RTArgs.MapNamesArray = MapNamesArrayGbl;
+    Info.EmitDebug = true;
   } else {
     Info.RTArgs.MapNamesArray =
         Constant::getNullValue(PointerType::getUnqual(Builder.getContext()));
+    Info.EmitDebug = false;
   }
 
   // If there's a present map type modifier, it must not be applied to the end
diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
index 3ed3034f489ce..54070a1ae35f8 100644
--- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
+++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
@@ -6891,8 +6891,8 @@ TEST_F(OpenMPIRBuilderTest, EmitOffloadingArraysArguments) {
   Info.RTArgs.MappersArray =
       ConstantPointerNull::get(Array4VoidPtrTy->getPointerTo());
   Info.NumberOfPtrs = 4;
-
-  OMPBuilder.emitOffloadingArraysArgument(Builder, RTArgs, Info, false, false);
+  Info.EmitDebug = false;
+  OMPBuilder.emitOffloadingArraysArgument(Builder, RTArgs, Info, false);
 
   EXPECT_NE(RTArgs.BasePointersArray, nullptr);
   EXPECT_NE(RTArgs.PointersArray, nullptr);

>From af98fabd5685e42dade598caf3c1279ccfab7fba Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Wed, 26 Jun 2024 11:33:53 -0500
Subject: [PATCH 03/12] clean up, clean up, everybody clean up

---
 clang/lib/CodeGen/CGOpenMPRuntime.cpp | 206 +-------------------------
 clang/lib/CodeGen/CGOpenMPRuntime.h   |  39 -----
 clang/lib/CodeGen/CGStmtOpenMP.cpp    |   4 -
 clang/lib/CodeGen/CodeGenFunction.h   |   1 -
 4 files changed, 5 insertions(+), 245 deletions(-)

diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index ee03183f3f5a3..b2fa50d16437e 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -38,7 +38,6 @@
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Value.h"
 #include "llvm/Support/AtomicOrdering.h"
-#include "llvm/Support/Debug.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cassert>
@@ -46,8 +45,6 @@
 #include <numeric>
 #include <optional>
 
-#define DEBUG_TYPE "clang-openmp-codegen"
-
 using namespace clang;
 using namespace CodeGen;
 using namespace llvm::omp;
@@ -3002,10 +2999,6 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
   llvm::FunctionType *TaskEntryTy =
       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
-  LLVM_DEBUG(llvm::dbgs() << "TaskEntryTy=" << *TaskEntryTy << "\n");
-  LLVM_DEBUG(llvm::dbgs() << "KmpTaskTWithPrivatesPtrQTy = "
-                          << KmpTaskTWithPrivatesPtrQTy << "\n");
-  LLVM_DEBUG(llvm::dbgs() << "TaskTypeArg = " << TaskTypeArg << "\n");
   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
   auto *TaskEntry = llvm::Function::Create(
       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
@@ -3710,7 +3703,6 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
       TaskPrivatesMap);
 
-  LLVM_DEBUG(llvm::dbgs() << "ProxyTaskFunction is " << *TaskEntry);
   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
   // kmp_routine_entry_t *task_entry);
@@ -8839,11 +8831,9 @@ emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
   }
 
   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
-  auto *Str =  OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName,
+  return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName,
                                          PLoc.getLine(), PLoc.getColumn(),
                                          SrcLocStrSize);
-  LLVM_DEBUG(llvm::dbgs() << "Output of emitMappingInfo: " << *Str << "\n");
-  return Str;
 }
 
 /// Emit the arrays used to pass the captures and map information to the
@@ -9546,7 +9536,7 @@ static void genMapInfo(const OMPExecutableDirective &D, CodeGenFunction &CGF,
                     FillInfoMap);
   }
 }
-static void emitTargetCallKernelLaunchNew(
+static void emitTargetCallKernelLaunch(
     CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
     const OMPExecutableDirective &D,
     llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask,
@@ -9585,9 +9575,8 @@ static void emitTargetCallKernelLaunchNew(
     }
     return MFunc;
   };
-  // Fill up the arrays and create the arguments.
-  LLVM_DEBUG(llvm::dbgs() << "emitTargetCallKernelLaunchNew:InsertBlock before emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n");
-
+  // Fill up the basepointers, pointers and mapper arrays and create the
+  // arguments.
   llvm::OpenMPIRBuilder::InsertPointTy OffloadingArraysAllocaIP(
       CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
 
@@ -9596,184 +9585,6 @@ static void emitTargetCallKernelLaunchNew(
       GenMapInfoCB, /*IsNonContiguous=*/true, /*ForEndCall=*/false,
       DeviceAddrCB, CustomMapperCB);
 
-  LLVM_DEBUG(llvm::dbgs() << "emitTargetCallKernelLaunchNew:InsertBlock after emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n");
-  InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
-  InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
-                                        CGF.VoidPtrTy, CGM.getPointerAlign());
-  InputInfo.PointersArray =
-      Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
-  InputInfo.SizesArray =
-      Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
-  InputInfo.MappersArray =
-      Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
-  MapTypesArray = Info.RTArgs.MapTypesArray;
-  MapNamesArray = Info.RTArgs.MapNamesArray;
-
-  auto &&ThenGen = [&OMPRuntime, OutlinedFn, &D, &CapturedVars,
-                    RequiresOuterTask, &CS, OffloadingMandatory, Device,
-                    OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray,
-                    SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
-    bool IsReverseOffloading = Device.getInt() == OMPC_DEVICE_ancestor;
-
-    if (IsReverseOffloading) {
-      // Reverse offloading is not supported, so just execute on the host.
-      // FIXME: This fallback solution is incorrect since it ignores the
-      // OMP_TARGET_OFFLOAD environment variable. Instead it would be better to
-      // assert here and ensure SEMA emits an error.
-      emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
-                             RequiresOuterTask, CS, OffloadingMandatory, CGF);
-      return;
-    }
-
-    bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>();
-    unsigned NumTargetItems = InputInfo.NumberOfTargetItems;
-
-    llvm::Value *BasePointersArray =
-        InputInfo.BasePointersArray.emitRawPointer(CGF);
-    llvm::Value *PointersArray = InputInfo.PointersArray.emitRawPointer(CGF);
-    llvm::Value *SizesArray = InputInfo.SizesArray.emitRawPointer(CGF);
-    llvm::Value *MappersArray = InputInfo.MappersArray.emitRawPointer(CGF);
-
-    auto &&EmitTargetCallFallbackCB =
-        [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
-         OffloadingMandatory, &CGF](llvm::OpenMPIRBuilder::InsertPointTy IP)
-        -> llvm::OpenMPIRBuilder::InsertPointTy {
-      CGF.Builder.restoreIP(IP);
-      emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
-                             RequiresOuterTask, CS, OffloadingMandatory, CGF);
-      return CGF.Builder.saveIP();
-    };
-
-    llvm::Value *DeviceID = emitDeviceID(Device, CGF);
-    llvm::Value *NumTeams = OMPRuntime->emitNumTeamsForTargetDirective(CGF, D);
-    llvm::Value *NumThreads =
-        OMPRuntime->emitNumThreadsForTargetDirective(CGF, D);
-    llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, D.getBeginLoc());
-    llvm::Value *NumIterations =
-        OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter);
-    llvm::Value *DynCGGroupMem = emitDynCGGroupMem(D, CGF);
-    llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
-        CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
-
-    llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs(
-        BasePointersArray, PointersArray, SizesArray, MapTypesArray,
-        nullptr /* MapTypesArrayEnd */, MappersArray, MapNamesArray);
-
-    llvm::OpenMPIRBuilder::TargetKernelArgs Args(
-        NumTargetItems, RTArgs, NumIterations, NumTeams, NumThreads,
-        DynCGGroupMem, HasNoWait);
-
-    CGF.Builder.restoreIP(OMPRuntime->getOMPBuilder().emitKernelLaunch(
-        CGF.Builder, OutlinedFn, OutlinedFnID, EmitTargetCallFallbackCB, Args,
-        DeviceID, RTLoc, AllocaIP));
-  };
-
-  if (RequiresOuterTask)
-    CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
-  else
-    OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
-}
-static void emitTargetCallKernelLaunch(
-    CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
-    const OMPExecutableDirective &D,
-    llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask,
-    const CapturedStmt &CS, bool OffloadingMandatory,
-    llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
-    llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo,
-    llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray,
-    llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
-                                     const OMPLoopDirective &D)>
-        SizeEmitter,
-    CodeGenFunction &CGF, CodeGenModule &CGM) {
-  llvm::OpenMPIRBuilder &OMPBuilder = OMPRuntime->getOMPBuilder();
-
-  // Fill up the arrays with all the captured variables.
-  MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
-// Get mappable expression information.
-  MappableExprsHandler MEHandler(D, CGF);
-  llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
-  llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
-
-  auto RI = CS.getCapturedRecordDecl()->field_begin();
-  auto *CV = CapturedVars.begin();
-  for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
-                                            CE = CS.capture_end();
-       CI != CE; ++CI, ++RI, ++CV) {
-    MappableExprsHandler::MapCombinedInfoTy CurInfo;
-    MappableExprsHandler::StructRangeInfoTy PartialStruct;
-
-    // VLA sizes are passed to the outlined region by copy and do not have map
-    // information associated.
-    if (CI->capturesVariableArrayType()) {
-      CurInfo.Exprs.push_back(nullptr);
-      CurInfo.BasePointers.push_back(*CV);
-      CurInfo.DevicePtrDecls.push_back(nullptr);
-      CurInfo.DevicePointers.push_back(
-          MappableExprsHandler::DeviceInfoTy::None);
-      CurInfo.Pointers.push_back(*CV);
-      CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
-          CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
-      // Copy to the device as an argument. No need to retrieve it.
-      CurInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
-                              OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM |
-                              OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
-      CurInfo.Mappers.push_back(nullptr);
-    } else {
-      // If we have any information in the map clause, we use it, otherwise we
-      // just do a default mapping.
-      MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
-      if (!CI->capturesThis())
-        MappedVarSet.insert(CI->getCapturedVar());
-      else
-        MappedVarSet.insert(nullptr);
-      if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
-        MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
-      // Generate correct mapping for variables captured by reference in
-      // lambdas.
-      if (CI->capturesVariable())
-        MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
-                                                CurInfo, LambdaPointers);
-    }
-    // We expect to have at least an element of information for this capture.
-    assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
-           "Non-existing map pointer for capture!");
-    assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
-           CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
-           CurInfo.BasePointers.size() == CurInfo.Types.size() &&
-           CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
-           "Inconsistent map information sizes!");
-
-    // If there is an entry in PartialStruct it means we have a struct with
-    // individual members mapped. Emit an extra combined entry.
-    if (PartialStruct.Base.isValid()) {
-      CombinedInfo.append(PartialStruct.PreliminaryMapData);
-      MEHandler.emitCombinedEntry(
-          CombinedInfo, CurInfo.Types, PartialStruct, CI->capturesThis(),
-          OMPBuilder, nullptr,
-          !PartialStruct.PreliminaryMapData.BasePointers.empty());
-    }
-
-    // We need to append the results of this capture to what we already have.
-    CombinedInfo.append(CurInfo);
-  }
-  // Adjust MEMBER_OF flags for the lambdas captures.
-  MEHandler.adjustMemberOfForLambdaCaptures(
-      OMPBuilder, LambdaPointers, CombinedInfo.BasePointers,
-      CombinedInfo.Pointers, CombinedInfo.Types);
-  // Map any list items in a map clause that were not captures because they
-  // weren't referenced within the construct.
-  MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, MappedVarSet);
-
-  CGOpenMPRuntime::TargetDataInfo Info;
-  // Fill up the arrays and create the arguments.
-  LLVM_DEBUG(llvm::dbgs() << "InsertBlock before emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n");
-  emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
-  Info.EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
-                   llvm::codegenoptions::NoDebugInfo;
-  OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info,
-                                          /*ForEndCall=*/false);
-
-  LLVM_DEBUG(llvm::dbgs() << "InsertBlock after emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n");
   InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
   InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
                                         CGF.VoidPtrTy, CGM.getPointerAlign());
@@ -9913,16 +9724,10 @@ void CGOpenMPRuntime::emitTargetCall(
                           OutlinedFnID, &InputInfo, &MapTypesArray,
                           &MapNamesArray, SizeEmitter](CodeGenFunction &CGF,
                                                        PrePostActionTy &) {
-    // if (OpenMPClangTargetCodegen)
-      emitTargetCallKernelLaunchNew(this, OutlinedFn, D, CapturedVars,
+    emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars,
                                RequiresOuterTask, CS, OffloadingMandatory,
                                Device, OutlinedFnID, InputInfo, MapTypesArray,
                                MapNamesArray, SizeEmitter, CGF, CGM);
-    // else
-    //   emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars,
-    //                            RequiresOuterTask, CS, OffloadingMandatory,
-    //                            Device, OutlinedFnID, InputInfo, MapTypesArray,
-    //                            MapNamesArray, SizeEmitter, CGF, CGM);
   };
 
   auto &&TargetElseGen =
@@ -9942,7 +9747,6 @@ void CGOpenMPRuntime::emitTargetCall(
     } else {
       RegionCodeGenTy ThenRCG(TargetThenGen);
       ThenRCG(CGF);
-      LLVM_DEBUG(llvm::dbgs() << "Generated code after emitTargetCall:\n" << *(OutlinedFn->getParent()) << "\n");
     }
   } else {
     RegionCodeGenTy ElseRCG(TargetElseGen);
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h
index b9303a9414b22..522ae3d35d22d 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.h
@@ -122,45 +122,6 @@ struct OMPTaskDataTy final {
   bool IsReductionWithTaskMod = false;
   bool IsWorksharingReduction = false;
   bool HasNowaitClause = false;
-  void printTo(llvm::raw_ostream &os, const ASTContext &Ctx) const {
-    auto &&printSVHelper =
-        [&os, &Ctx](const SmallVector<const Expr *, 4> &V) -> void {
-      for (auto &v : V) {
-        v->dump(os, Ctx);
-      }
-    };
-    auto &&printSV =
-        [&os, printSVHelper](std::string s,
-                             const SmallVector<const Expr *, 4> &V) -> void {
-      os << s << ":[\n";
-      printSVHelper(V);
-      os << "]\n";
-    };
-    // SmallVector<const Expr *, 4> PrivateVars;
-    // SmallVector<const Expr *, 4> PrivateCopies;
-    // SmallVector<const Expr *, 4> FirstprivateVars;
-    // SmallVector<const Expr *, 4> FirstprivateCopies;
-    // SmallVector<const Expr *, 4> FirstprivateInits;
-    // SmallVector<const Expr *, 4> LastprivateVars;
-    // SmallVector<const Expr *, 4> LastprivateCopies;
-    // SmallVector<const Expr *, 4> ReductionVars;
-    // SmallVector<const Expr *, 4> ReductionOrigs;
-    // SmallVector<const Expr *, 4> ReductionCopies;
-    // SmallVector<const Expr *, 4> ReductionOps;
-    // SmallVector<CanonicalDeclPtr<const VarDecl>, 4> PrivateLocals;
-
-    printSV("PrivateVars", PrivateVars);
-    printSV("PrivateCopies", PrivateCopies);
-    printSV("FirstprivateVars", FirstprivateVars);
-    printSV("FirstprivateCopies", FirstprivateCopies);
-    printSV("FirstprivateInits", FirstprivateInits);
-    printSV("LastprivateVars", LastprivateVars);
-    printSV("LastprivateCopies", LastprivateCopies);
-    printSV("ReductionVars", ReductionVars);
-    printSV("ReductionOrigs", ReductionOrigs);
-    printSV("ReductionCopies", ReductionCopies);
-    printSV("ReductionOps", ReductionOps);
-  }
 };
 
 /// Class intended to support codegen of all kind of the reduction clauses.
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index bd6743666826b..c85135978e2b3 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -35,12 +35,10 @@
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/Support/AtomicOrdering.h"
-#include "llvm/Support/Debug.h"
 #include <optional>
 using namespace clang;
 using namespace CodeGen;
 using namespace llvm::omp;
-#define DEBUG_TYPE "clang-openmp-codegen"
 
 #define TTL_CODEGEN_TYPE "target-teams-loop-codegen"
 
@@ -5181,8 +5179,6 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
     }
     BodyGen(CGF);
   };
-  LLVM_DEBUG(llvm::dbgs() << "Inputs to emitTaskOutineFunction\n");
-  LLVM_DEBUG(llvm::dbgs() << "PartID = " << **PartId << "\n");
   llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
       S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, /*Tied=*/true,
       Data.NumberOfParts);
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 6092ab1684267..06fc7259b5901 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -441,7 +441,6 @@ class CodeGenFunction : public CodeGenTypeCache {
   llvm::AssertingVH<llvm::Instruction> PostAllocaInsertPt = nullptr;
 
 public:
-  void printLocalDeclMap();
   /// Return PostAllocaInsertPt. If it is not yet created, then insert it
   /// immediately after AllocaInsertPt.
   llvm::Instruction *getPostAllocaInsertPoint() {

>From c99d13fb3bd3da60d8b7362e7135cb160917d800 Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Wed, 26 Jun 2024 11:39:07 -0500
Subject: [PATCH 04/12] Add Debug.h include in CGStmtOpenMP.cpp because removal
 is not related to my change

---
 clang/lib/CodeGen/CGStmtOpenMP.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index c85135978e2b3..f73d32de7c484 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -35,6 +35,7 @@
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/Support/AtomicOrdering.h"
+#include "llvm/Support/Debug.h"
 #include <optional>
 using namespace clang;
 using namespace CodeGen;

>From ae97854579cf0d966c766c211f65c647c2e9fa4a Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Wed, 26 Jun 2024 14:13:36 -0500
Subject: [PATCH 05/12] Document emitOffladingArrays and
 emitOffloadingArraysAndArgs in OMPIRBuilder.h

---
 .../llvm/Frontend/OpenMP/OMPIRBuilder.h       | 28 +++++++++++++------
 1 file changed, 20 insertions(+), 8 deletions(-)

diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 03573b4e02029..38d90983c2817 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -1787,24 +1787,36 @@ class OpenMPIRBuilder {
 
   /// Emit the arrays used to pass the captures and map information to the
   /// offloading runtime library. If there is no map or capture information,
-  /// return nullptr by reference.
+  /// return nullptr by reference. This is the first of two overloads - this
+  /// one accepts a reference to a MapInfosTy object that contains combined
+  /// information generated for mappable clauses, including base pointers,
+  /// pointers, sizes, map types, user-defined mappers, and non-contiguous
+  /// information.
   void emitOffloadingArrays(
-      InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
-      GenMapInfoCallbackTy GenMapInfoCB, TargetDataInfo &Info,
-      bool IsNonContiguous = false,
+      InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo,
+      TargetDataInfo &Info, bool IsNonContiguous = false,
       function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
       function_ref<Value *(unsigned int)> CustomMapperCB = nullptr);
 
   /// Emit the arrays used to pass the captures and map information to the
   /// offloading runtime library. If there is no map or capture information,
-  /// return nullptr by reference.
+  /// return nullptr by reference. This is the second of two overloads - Instead
+  /// of accepting a reference to a MapInfosTy object, this overload accepts
+  /// a call back of type GenMapInfoCallbackTy to populate a MapInfosTy object
+  /// with mapping information.
   void emitOffloadingArrays(
-      InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo,
-      TargetDataInfo &Info, bool IsNonContiguous = false,
+      InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
+      GenMapInfoCallbackTy GenMapInfoCB, TargetDataInfo &Info,
+      bool IsNonContiguous = false,
       function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
       function_ref<Value *(unsigned int)> CustomMapperCB = nullptr);
 
-
+  /// Allocates memory for and populates the arrays required for offloading
+  /// (offload_{baseptrs|ptrs|mappers|sizes|maptypes|mapnames}). Then, it
+  /// emits their base addresses as arguments to be passed to the runtime
+  /// library. In essence, this function is a combination of
+  /// emitOffloadingArrays and emitOffloadingArraysArgument and should arguably
+  /// be preferred by clients of OpenMPIRBuilder.
   void emitOffloadingArraysAndArgs(
       InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info,
       TargetDataRTArgs &RTArgs, GenMapInfoCallbackTy GenMapInfoCB,

>From 88a47b0449cb332f7cc835214efcdaea2c3a8a9f Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Wed, 26 Jun 2024 15:29:44 -0500
Subject: [PATCH 06/12] refactor genMapInfo

---
 clang/lib/CodeGen/CGOpenMPRuntime.cpp | 42 +++++++++++++++++++++------
 1 file changed, 33 insertions(+), 9 deletions(-)

diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index b2fa50d16437e..b3493324a27a1 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -9447,14 +9447,14 @@ llvm::Value *emitDynCGGroupMem(const OMPExecutableDirective &D,
   }
   return DynCGroupMem;
 }
-static void genMapInfo(const OMPExecutableDirective &D, CodeGenFunction &CGF,
-                       const CapturedStmt &CS, llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
-                       llvm::OpenMPIRBuilder &OMPBuilder,
-                       MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
-  // Get mappable expression information.
-  MappableExprsHandler MEHandler(D, CGF);
-  llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
-  llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
+static void genMapInfoForCaptures(
+    MappableExprsHandler &MEHandler, CodeGenFunction &CGF,
+    const CapturedStmt &CS, llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
+    llvm::OpenMPIRBuilder &OMPBuilder,
+    llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
+    llvm::DenseSet<CanonicalDeclPtr<const Decl>> &MappedVarSet,
+    MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
+
   CodeGenModule &CGM = CGF.CGM;
   auto RI = CS.getCapturedRecordDecl()->field_begin();
   auto *CV = CapturedVars.begin();
@@ -9522,9 +9522,18 @@ static void genMapInfo(const OMPExecutableDirective &D, CodeGenFunction &CGF,
   MEHandler.adjustMemberOfForLambdaCaptures(
       OMPBuilder, LambdaPointers, CombinedInfo.BasePointers,
       CombinedInfo.Pointers, CombinedInfo.Types);
+}
+static void
+genMapInfo(MappableExprsHandler &MEHandler, CodeGenFunction &CGF,
+           MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
+           llvm::OpenMPIRBuilder &OMPBuilder,
+           const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkippedVarSet =
+               llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) {
+
+  CodeGenModule &CGM = CGF.CGM;
   // Map any list items in a map clause that were not captures because they
   // weren't referenced within the construct.
-  MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, MappedVarSet);
+  MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, SkippedVarSet);
 
   auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
     return emitMappingInformation(CGF, OMPBuilder, MapExpr);
@@ -9536,6 +9545,21 @@ static void genMapInfo(const OMPExecutableDirective &D, CodeGenFunction &CGF,
                     FillInfoMap);
   }
 }
+
+static void genMapInfo(const OMPExecutableDirective &D, CodeGenFunction &CGF,
+                       const CapturedStmt &CS,
+                       llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
+                       llvm::OpenMPIRBuilder &OMPBuilder,
+                       MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
+  // Get mappable expression information.
+  MappableExprsHandler MEHandler(D, CGF);
+  llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
+  llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
+
+  genMapInfoForCaptures(MEHandler, CGF, CS, CapturedVars, OMPBuilder,
+                        LambdaPointers, MappedVarSet, CombinedInfo);
+  genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder, MappedVarSet);
+}
 static void emitTargetCallKernelLaunch(
     CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
     const OMPExecutableDirective &D,

>From e97cd161933d07a08aad52e37b506ae27be27560 Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Thu, 27 Jun 2024 13:51:34 -0500
Subject: [PATCH 07/12] Use CGOpenMPRuntime::emitTargetDataStandAloneCall

---
 clang/lib/CodeGen/CGOpenMPRuntime.cpp | 44 +++++++++++++++++++++------
 1 file changed, 34 insertions(+), 10 deletions(-)

diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index b3493324a27a1..5372bbbbc2da1 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -10500,21 +10500,45 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
                                           PrePostActionTy &) {
     // Fill up the arrays with all the mapped variables.
     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
+    CGOpenMPRuntime::TargetDataInfo Info;
 
     // Get map clause information.
-    MappableExprsHandler MEHandler(D, CGF);
-    MEHandler.generateAllInfo(CombinedInfo, OMPBuilder);
+    auto GenMapInfoCB = [&](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP)
+        -> llvm::OpenMPIRBuilder::MapInfosTy & {
+      CGF.Builder.restoreIP(CodeGenIP);
+      MappableExprsHandler MEHandler(D, CGF);
+      genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder);
+      return CombinedInfo;
+    };
 
-    CGOpenMPRuntime::TargetDataInfo Info;
-    // Fill up the arrays and create the arguments.
-    emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
-                         /*IsNonContiguous=*/true);
+    auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
+      if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
+        Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
+      }
+    };
+
+    auto CustomMapperCB = [&](unsigned int I) {
+      llvm::Value *MFunc = nullptr;
+      if (CombinedInfo.Mappers[I]) {
+        Info.HasMapper = true;
+        MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
+            cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
+      }
+      return MFunc;
+    };
+
+    // Fill up the basepointers, pointers and mapper arrays and create the
+    // arguments.
+    using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
+    InsertPointTy OffloadingArraysAllocaIP(CGF.AllocaInsertPt->getParent(),
+                                           CGF.AllocaInsertPt->getIterator());
+
+    OMPBuilder.emitOffloadingArraysAndArgs(
+        OffloadingArraysAllocaIP, CGF.Builder.saveIP(), Info, Info.RTArgs,
+        GenMapInfoCB, /*IsNonContiguous=*/true, /*ForEndCall=*/false,
+        DeviceAddrCB, CustomMapperCB);
     bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
                              D.hasClausesOfKind<OMPNowaitClause>();
-    Info.EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
-                     llvm::codegenoptions::NoDebugInfo;
-    OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info,
-                                            /*ForEndCall=*/false);
     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
     InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
                                           CGF.VoidPtrTy, CGM.getPointerAlign());

>From 200bd07a9384242cd7999442860bc90e2ba9b69b Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Fri, 28 Jun 2024 10:49:35 -0500
Subject: [PATCH 08/12] Use static function emitOffloadingArraysAndArgs in
 emitTargetCallKernelLaunch in CGOpenMPRuntime.cpp

---
 clang/lib/CodeGen/CGOpenMPRuntime.cpp         | 67 ++++++++++---------
 .../llvm/Frontend/OpenMP/OMPIRBuilder.h       | 13 ++++
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp     |  9 +++
 3 files changed, 59 insertions(+), 30 deletions(-)

diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 5372bbbbc2da1..c0e9eb3b6a07e 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -8884,6 +8884,40 @@ static void emitOffloadingArrays(
                                   /*IsNonContiguous=*/true, DeviceAddrCB,
                                   CustomMapperCB);
 }
+/// Emit the arrays used to pass the captures and map information to the
+/// offloading runtime library. If there is no map or capture information,
+/// return nullptr by reference.
+static void emitOffloadingArraysAndArgs(
+    CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
+    CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
+    bool IsNonContiguous = false, bool ForEndCall = false) {
+  CodeGenModule &CGM = CGF.CGM;
+
+  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
+  InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
+                         CGF.AllocaInsertPt->getIterator());
+  InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
+                          CGF.Builder.GetInsertPoint());
+
+  auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
+    if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
+      Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
+    }
+  };
+
+  auto CustomMapperCB = [&](unsigned int I) {
+    llvm::Value *MFunc = nullptr;
+    if (CombinedInfo.Mappers[I]) {
+      Info.HasMapper = true;
+      MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
+          cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
+    }
+    return MFunc;
+  };
+  OMPBuilder.emitOffloadingArraysAndArgs(
+      AllocaIP, CodeGenIP, Info, Info.RTArgs, CombinedInfo, IsNonContiguous,
+      ForEndCall, DeviceAddrCB, CustomMapperCB);
+}
 
 /// Check for inner distribute directive.
 static const OMPExecutableDirective *
@@ -9577,37 +9611,10 @@ static void emitTargetCallKernelLaunch(
   // Fill up the arrays with all the captured variables.
   MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
   CGOpenMPRuntime::TargetDataInfo Info;
+  genMapInfo(D, CGF, CS, CapturedVars, OMPBuilder, CombinedInfo);
 
-  auto GenMapInfoCB = [&](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP)
-                          -> llvm::OpenMPIRBuilder::MapInfosTy & {
-    CGF.Builder.restoreIP(CodeGenIP);
-    genMapInfo(D, CGF, CS, CapturedVars, OMPBuilder, CombinedInfo);
-    return CombinedInfo;
-  };
-  auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
-    if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
-      Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
-    }
-  };
-
-  auto CustomMapperCB = [&](unsigned int I) {
-    llvm::Value *MFunc = nullptr;
-    if (CombinedInfo.Mappers[I]) {
-      Info.HasMapper = true;
-      MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
-          cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
-    }
-    return MFunc;
-  };
-  // Fill up the basepointers, pointers and mapper arrays and create the
-  // arguments.
-  llvm::OpenMPIRBuilder::InsertPointTy OffloadingArraysAllocaIP(
-      CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
-
-  OMPBuilder.emitOffloadingArraysAndArgs(
-      OffloadingArraysAllocaIP, CGF.Builder.saveIP(), Info, Info.RTArgs,
-      GenMapInfoCB, /*IsNonContiguous=*/true, /*ForEndCall=*/false,
-      DeviceAddrCB, CustomMapperCB);
+  emitOffloadingArraysAndArgs(CGF, CombinedInfo, Info, OMPBuilder,
+                              /*IsNonContiguous=*/true, /*ForEndCall=*/false);
 
   InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
   InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 38d90983c2817..2cb3da09a97c1 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -1824,6 +1824,19 @@ class OpenMPIRBuilder {
       function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
       function_ref<Value *(unsigned int)> CustomMapperCB = nullptr);
 
+  /// Allocates memory for and populates the arrays required for offloading
+  /// (offload_{baseptrs|ptrs|mappers|sizes|maptypes|mapnames}). Then, it
+  /// emits their base addresses as arguments to be passed to the runtime
+  /// library. In essence, this function is a combination of
+  /// emitOffloadingArrays and emitOffloadingArraysArgument and should arguably
+  /// be preferred by clients of OpenMPIRBuilder.
+  void emitOffloadingArraysAndArgs(
+      InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info,
+      TargetDataRTArgs &RTArgs, MapInfosTy &CombinedInfo,
+      bool IsNonContiguous = false, bool ForEndCall = false,
+      function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
+      function_ref<Value *(unsigned int)> CustomMapperCB = nullptr);
+
   /// Creates offloading entry for the provided entry ID \a ID, address \a
   /// Addr, size \a Size, and flags \a Flags.
   void createOffloadEntry(Constant *ID, Constant *Addr, uint64_t Size,
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 8d6e6a354a1d8..abf21da0fa7d4 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -5243,6 +5243,15 @@ void OpenMPIRBuilder::emitOffloadingArraysAndArgs(
                        DeviceAddrCB, CustomMapperCB);
   emitOffloadingArraysArgument(Builder, RTArgs, Info, ForEndCall);
  }
+ void OpenMPIRBuilder::emitOffloadingArraysAndArgs(
+     InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info,
+     TargetDataRTArgs &RTArgs, MapInfosTy &CombinedInfo, bool IsNonContiguous,
+     bool ForEndCall, function_ref<void(unsigned int, Value *)> DeviceAddrCB,
+     function_ref<Value *(unsigned int)> CustomMapperCB) {
+   emitOffloadingArrays(AllocaIP, CodeGenIP, CombinedInfo, Info,
+                        IsNonContiguous, DeviceAddrCB, CustomMapperCB);
+   emitOffloadingArraysArgument(Builder, RTArgs, Info, ForEndCall);
+ }
 
  static void emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder,
                            OpenMPIRBuilder::InsertPointTy AllocaIP,

>From 879cfa1cf2714a0bfa0e42152634ec841b94c3ce Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Fri, 28 Jun 2024 11:27:58 -0500
Subject: [PATCH 09/12] Use static function emitOffloadingArraysAndArgs in
 emitTargetDataStandaloneCall in CGOpenMPRuntime.cpp

---
 clang/lib/CodeGen/CGOpenMPRuntime.cpp | 40 ++++-----------------------
 1 file changed, 5 insertions(+), 35 deletions(-)

diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index c0e9eb3b6a07e..14590146ceb51 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -10508,44 +10508,14 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
     // Fill up the arrays with all the mapped variables.
     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
     CGOpenMPRuntime::TargetDataInfo Info;
+    MappableExprsHandler MEHandler(D, CGF);
+    genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder);
+    emitOffloadingArraysAndArgs(CGF, CombinedInfo, Info, OMPBuilder,
+                                /*IsNonContiguous=*/true, /*ForEndCall=*/false);
 
-    // Get map clause information.
-    auto GenMapInfoCB = [&](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP)
-        -> llvm::OpenMPIRBuilder::MapInfosTy & {
-      CGF.Builder.restoreIP(CodeGenIP);
-      MappableExprsHandler MEHandler(D, CGF);
-      genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder);
-      return CombinedInfo;
-    };
-
-    auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
-      if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
-        Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
-      }
-    };
-
-    auto CustomMapperCB = [&](unsigned int I) {
-      llvm::Value *MFunc = nullptr;
-      if (CombinedInfo.Mappers[I]) {
-        Info.HasMapper = true;
-        MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
-            cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
-      }
-      return MFunc;
-    };
-
-    // Fill up the basepointers, pointers and mapper arrays and create the
-    // arguments.
-    using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
-    InsertPointTy OffloadingArraysAllocaIP(CGF.AllocaInsertPt->getParent(),
-                                           CGF.AllocaInsertPt->getIterator());
-
-    OMPBuilder.emitOffloadingArraysAndArgs(
-        OffloadingArraysAllocaIP, CGF.Builder.saveIP(), Info, Info.RTArgs,
-        GenMapInfoCB, /*IsNonContiguous=*/true, /*ForEndCall=*/false,
-        DeviceAddrCB, CustomMapperCB);
     bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
                              D.hasClausesOfKind<OMPNowaitClause>();
+
     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
     InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
                                           CGF.VoidPtrTy, CGM.getPointerAlign());

>From 178be4f9b97226523d43f7ae9e11a438348774fc Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Fri, 28 Jun 2024 11:30:10 -0500
Subject: [PATCH 10/12] Remove emitOffloadingArrays from CGOpenMPRuntime.cpp
 because it is not used anymore. Use emitOffloadingArraysAndArgs

---
 clang/lib/CodeGen/CGOpenMPRuntime.cpp | 49 ---------------------------
 1 file changed, 49 deletions(-)

diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 14590146ceb51..9fbc06e89f017 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -8835,55 +8835,6 @@ emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
                                          PLoc.getLine(), PLoc.getColumn(),
                                          SrcLocStrSize);
 }
-
-/// Emit the arrays used to pass the captures and map information to the
-/// offloading runtime library. If there is no map or capture information,
-/// return nullptr by reference.
-static void emitOffloadingArrays(
-    CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
-    CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
-    bool IsNonContiguous = false) {
-  CodeGenModule &CGM = CGF.CGM;
-
-  // Reset the array information.
-  Info.clearArrayInfo();
-  Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
-
-  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
-  InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
-                         CGF.AllocaInsertPt->getIterator());
-  InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
-                          CGF.Builder.GetInsertPoint());
-
-  auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
-    return emitMappingInformation(CGF, OMPBuilder, MapExpr);
-  };
-  if (CGM.getCodeGenOpts().getDebugInfo() !=
-      llvm::codegenoptions::NoDebugInfo) {
-    CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
-    llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
-                    FillInfoMap);
-  }
-
-  auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
-    if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
-      Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
-    }
-  };
-
-  auto CustomMapperCB = [&](unsigned int I) {
-    llvm::Value *MFunc = nullptr;
-    if (CombinedInfo.Mappers[I]) {
-      Info.HasMapper = true;
-      MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
-          cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
-    }
-    return MFunc;
-  };
-  OMPBuilder.emitOffloadingArrays(AllocaIP, CodeGenIP, CombinedInfo, Info,
-                                  /*IsNonContiguous=*/true, DeviceAddrCB,
-                                  CustomMapperCB);
-}
 /// Emit the arrays used to pass the captures and map information to the
 /// offloading runtime library. If there is no map or capture information,
 /// return nullptr by reference.

>From d21f7f6f0ba9063260a08bc9d770bb0f1e0761bf Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Fri, 28 Jun 2024 12:05:02 -0500
Subject: [PATCH 11/12] Remove overloads of emitOffloadingArrays and
 emitOffloadingArraysAndArgs that accept GenMapInfoCallBackTy

---
 clang/lib/CodeGen/CGOpenMPRuntime.cpp         |  1 -
 .../llvm/Frontend/OpenMP/OMPIRBuilder.h       | 34 ++-----------------
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp     | 32 ++++-------------
 3 files changed, 9 insertions(+), 58 deletions(-)

diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 9fbc06e89f017..262bbd988e1e3 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -9440,7 +9440,6 @@ static void genMapInfoForCaptures(
     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &MappedVarSet,
     MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
 
-  CodeGenModule &CGM = CGF.CGM;
   auto RI = CS.getCapturedRecordDecl()->field_begin();
   auto *CV = CapturedVars.begin();
   for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 2cb3da09a97c1..a0b54e25124d7 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -1787,43 +1787,15 @@ class OpenMPIRBuilder {
 
   /// Emit the arrays used to pass the captures and map information to the
   /// offloading runtime library. If there is no map or capture information,
-  /// return nullptr by reference. This is the first of two overloads - this
-  /// one accepts a reference to a MapInfosTy object that contains combined
-  /// information generated for mappable clauses, including base pointers,
-  /// pointers, sizes, map types, user-defined mappers, and non-contiguous
-  /// information.
+  /// return nullptr by reference. Accepts a reference to a MapInfosTy object
+  /// that contains information generated for mappable clauses,
+  /// including base pointers, pointers, sizes, map types, user-defined mappers.
   void emitOffloadingArrays(
       InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo,
       TargetDataInfo &Info, bool IsNonContiguous = false,
       function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
       function_ref<Value *(unsigned int)> CustomMapperCB = nullptr);
 
-  /// Emit the arrays used to pass the captures and map information to the
-  /// offloading runtime library. If there is no map or capture information,
-  /// return nullptr by reference. This is the second of two overloads - Instead
-  /// of accepting a reference to a MapInfosTy object, this overload accepts
-  /// a call back of type GenMapInfoCallbackTy to populate a MapInfosTy object
-  /// with mapping information.
-  void emitOffloadingArrays(
-      InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
-      GenMapInfoCallbackTy GenMapInfoCB, TargetDataInfo &Info,
-      bool IsNonContiguous = false,
-      function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
-      function_ref<Value *(unsigned int)> CustomMapperCB = nullptr);
-
-  /// Allocates memory for and populates the arrays required for offloading
-  /// (offload_{baseptrs|ptrs|mappers|sizes|maptypes|mapnames}). Then, it
-  /// emits their base addresses as arguments to be passed to the runtime
-  /// library. In essence, this function is a combination of
-  /// emitOffloadingArrays and emitOffloadingArraysArgument and should arguably
-  /// be preferred by clients of OpenMPIRBuilder.
-  void emitOffloadingArraysAndArgs(
-      InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info,
-      TargetDataRTArgs &RTArgs, GenMapInfoCallbackTy GenMapInfoCB,
-      bool IsNonContiguous = false, bool ForEndCall = false,
-      function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
-      function_ref<Value *(unsigned int)> CustomMapperCB = nullptr);
-
   /// Allocates memory for and populates the arrays required for offloading
   /// (offload_{baseptrs|ptrs|mappers|sizes|maptypes|mapnames}). Then, it
   /// emits their base addresses as arguments to be passed to the runtime
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index abf21da0fa7d4..df48eb430e097 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -5235,23 +5235,13 @@ static void emitTargetOutlinedFunction(
 
 void OpenMPIRBuilder::emitOffloadingArraysAndArgs(
     InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info,
-    TargetDataRTArgs &RTArgs, GenMapInfoCallbackTy GenMapInfoCB,
-    bool IsNonContiguous, bool ForEndCall,
-    function_ref<void(unsigned int, Value *)> DeviceAddrCB,
+    TargetDataRTArgs &RTArgs, MapInfosTy &CombinedInfo, bool IsNonContiguous,
+    bool ForEndCall, function_ref<void(unsigned int, Value *)> DeviceAddrCB,
     function_ref<Value *(unsigned int)> CustomMapperCB) {
-  emitOffloadingArrays(AllocaIP, CodeGenIP, GenMapInfoCB, Info, IsNonContiguous,
+  emitOffloadingArrays(AllocaIP, CodeGenIP, CombinedInfo, Info, IsNonContiguous,
                        DeviceAddrCB, CustomMapperCB);
   emitOffloadingArraysArgument(Builder, RTArgs, Info, ForEndCall);
- }
- void OpenMPIRBuilder::emitOffloadingArraysAndArgs(
-     InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info,
-     TargetDataRTArgs &RTArgs, MapInfosTy &CombinedInfo, bool IsNonContiguous,
-     bool ForEndCall, function_ref<void(unsigned int, Value *)> DeviceAddrCB,
-     function_ref<Value *(unsigned int)> CustomMapperCB) {
-   emitOffloadingArrays(AllocaIP, CodeGenIP, CombinedInfo, Info,
-                        IsNonContiguous, DeviceAddrCB, CustomMapperCB);
-   emitOffloadingArraysArgument(Builder, RTArgs, Info, ForEndCall);
- }
+}
 
  static void emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder,
                            OpenMPIRBuilder::InsertPointTy AllocaIP,
@@ -5264,9 +5254,10 @@ void OpenMPIRBuilder::emitOffloadingArraysAndArgs(
       /*RequiresDevicePointerInfo=*/false,
       /*SeparateBeginEndCalls=*/true);
 
+  OpenMPIRBuilder::MapInfosTy &MapInfo = GenMapInfoCB(Builder.saveIP());
   OpenMPIRBuilder::TargetDataRTArgs RTArgs;
   OMPBuilder.emitOffloadingArraysAndArgs(AllocaIP, Builder.saveIP(), Info,
-                                         RTArgs, GenMapInfoCB,
+                                         RTArgs, MapInfo,
                                          /*IsNonContiguous=*/true,
                                          /*ForEndCall=*/false);
 
@@ -5578,17 +5569,6 @@ void OpenMPIRBuilder::emitNonContiguousDescriptor(InsertPointTy AllocaIP,
   }
 }
 
-void OpenMPIRBuilder::emitOffloadingArrays(
-    InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
-    GenMapInfoCallbackTy GenMapInfoCB, TargetDataInfo &Info,
-    bool IsNonContiguous,
-    function_ref<void(unsigned int, Value *)> DeviceAddrCB,
-    function_ref<Value *(unsigned int)> CustomMapperCB) {
-
-  OpenMPIRBuilder::MapInfosTy &MapInfo = GenMapInfoCB(CodeGenIP);
-  emitOffloadingArrays(AllocaIP, CodeGenIP, MapInfo,
-                       Info, IsNonContiguous, DeviceAddrCB, CustomMapperCB);
-}
 void OpenMPIRBuilder::emitOffloadingArrays(
     InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo,
     TargetDataInfo &Info, bool IsNonContiguous,

>From bff9d7b276df201baa48b4739dabfa9329c71dd0 Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Fri, 28 Jun 2024 12:16:41 -0500
Subject: [PATCH 12/12] Undo an unnecessary change in the location of the
 declaration of GenMapInfoCallBackTy in OMPIRBuilder.h

---
 llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index a0b54e25124d7..db748611ac501 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -1779,12 +1779,6 @@ class OpenMPIRBuilder {
                                    MapInfosTy &CombinedInfo,
                                    TargetDataInfo &Info);
 
-  /// Callback type for creating the map infos for the kernel parameters.
-  /// \param CodeGenIP is the insertion point where code should be generated,
-  ///        if any.
-  using GenMapInfoCallbackTy =
-      function_ref<MapInfosTy &(InsertPointTy CodeGenIP)>;
-
   /// Emit the arrays used to pass the captures and map information to the
   /// offloading runtime library. If there is no map or capture information,
   /// return nullptr by reference. Accepts a reference to a MapInfosTy object
@@ -2212,6 +2206,11 @@ class OpenMPIRBuilder {
   /// duplicating the body code.
   enum BodyGenTy { Priv, DupNoPriv, NoPriv };
 
+  /// Callback type for creating the map infos for the kernel parameters.
+  /// \param CodeGenIP is the insertion point where code should be generated,
+  ///        if any.
+  using GenMapInfoCallbackTy =
+      function_ref<MapInfosTy &(InsertPointTy CodeGenIP)>;
 
   /// Generator for '#omp target data'
   ///



More information about the cfe-commits mailing list