[clang] [llvm] [OpenMPIRBuilder][Clang][NFC] - Combine `emitOffloadingArrays` and `emitOffloadingArraysArgument` in OpenMPIRBuilder (PR #97088)

Pranav Bhandarkar via cfe-commits cfe-commits at lists.llvm.org
Mon Jul 22 11:25:18 PDT 2024


https://github.com/bhandarkar-pranav updated https://github.com/llvm/llvm-project/pull/97088

>From dc9e64a29d6d1fd84ad630cb002d1129ea6a0a31 Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Sat, 15 Jun 2024 02:00:48 -0500
Subject: [PATCH 01/13] checkpoint commit. Use emitOffloadinArrays from
 OMPIRBuilder in emitTargetCallKernelLaunch in CGOpenMPRuntime.cpp

---
 clang/lib/CodeGen/CGOpenMPRuntime.cpp         | 241 +++++++++++++++++-
 clang/lib/CodeGen/CGStmtOpenMP.cpp            |   1 +
 .../llvm/Frontend/OpenMP/OMPIRBuilder.h       |  22 +-
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp     |  10 +
 4 files changed, 265 insertions(+), 9 deletions(-)

diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index f6d12d46cfc07..9632ef912ebfe 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -38,6 +38,7 @@
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Value.h"
 #include "llvm/Support/AtomicOrdering.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cassert>
@@ -45,6 +46,8 @@
 #include <numeric>
 #include <optional>
 
+#define DEBUG_TYPE "clang-openmp-codegen"
+
 using namespace clang;
 using namespace CodeGen;
 using namespace llvm::omp;
@@ -8831,9 +8834,11 @@ emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
   }
 
   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
-  return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName,
+  auto *Str =  OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName,
                                          PLoc.getLine(), PLoc.getColumn(),
                                          SrcLocStrSize);
+  LLVM_DEBUG(llvm::dbgs() << "Output of emitMappingInfo: " << *Str << "\n");
+  return Str;
 }
 
 /// Emit the arrays used to pass the captures and map information to the
@@ -9447,8 +9452,96 @@ llvm::Value *emitDynCGGroupMem(const OMPExecutableDirective &D,
   }
   return DynCGroupMem;
 }
+static void genMapInfo(const OMPExecutableDirective &D, CodeGenFunction &CGF,
+                       const CapturedStmt &CS, llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
+                       llvm::OpenMPIRBuilder &OMPBuilder,
+                       MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
+  // Get mappable expression information.
+  MappableExprsHandler MEHandler(D, CGF);
+  llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
+  llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
+  CodeGenModule &CGM = CGF.CGM;
+  auto RI = CS.getCapturedRecordDecl()->field_begin();
+  auto *CV = CapturedVars.begin();
+  for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
+                                            CE = CS.capture_end();
+       CI != CE; ++CI, ++RI, ++CV) {
+    MappableExprsHandler::MapCombinedInfoTy CurInfo;
+    MappableExprsHandler::StructRangeInfoTy PartialStruct;
 
-static void emitTargetCallKernelLaunch(
+    // VLA sizes are passed to the outlined region by copy and do not have map
+    // information associated.
+    if (CI->capturesVariableArrayType()) {
+      CurInfo.Exprs.push_back(nullptr);
+      CurInfo.BasePointers.push_back(*CV);
+      CurInfo.DevicePtrDecls.push_back(nullptr);
+      CurInfo.DevicePointers.push_back(
+          MappableExprsHandler::DeviceInfoTy::None);
+      CurInfo.Pointers.push_back(*CV);
+      CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
+          CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
+      // Copy to the device as an argument. No need to retrieve it.
+      CurInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
+                              OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM |
+                              OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
+      CurInfo.Mappers.push_back(nullptr);
+    } else {
+      // If we have any information in the map clause, we use it, otherwise we
+      // just do a default mapping.
+      MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
+      if (!CI->capturesThis())
+        MappedVarSet.insert(CI->getCapturedVar());
+      else
+        MappedVarSet.insert(nullptr);
+      if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
+        MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
+      // Generate correct mapping for variables captured by reference in
+      // lambdas.
+      if (CI->capturesVariable())
+        MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
+                                                CurInfo, LambdaPointers);
+    }
+    // We expect to have at least an element of information for this capture.
+    assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
+           "Non-existing map pointer for capture!");
+    assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
+           CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
+           CurInfo.BasePointers.size() == CurInfo.Types.size() &&
+           CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
+           "Inconsistent map information sizes!");
+
+    // If there is an entry in PartialStruct it means we have a struct with
+    // individual members mapped. Emit an extra combined entry.
+    if (PartialStruct.Base.isValid()) {
+      CombinedInfo.append(PartialStruct.PreliminaryMapData);
+      MEHandler.emitCombinedEntry(
+          CombinedInfo, CurInfo.Types, PartialStruct, CI->capturesThis(),
+          OMPBuilder, nullptr,
+          !PartialStruct.PreliminaryMapData.BasePointers.empty());
+    }
+
+    // We need to append the results of this capture to what we already have.
+    CombinedInfo.append(CurInfo);
+  }
+  // Adjust MEMBER_OF flags for the lambdas captures.
+  MEHandler.adjustMemberOfForLambdaCaptures(
+      OMPBuilder, LambdaPointers, CombinedInfo.BasePointers,
+      CombinedInfo.Pointers, CombinedInfo.Types);
+  // Map any list items in a map clause that were not captures because they
+  // weren't referenced within the construct.
+  MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, MappedVarSet);
+
+  auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
+    return emitMappingInformation(CGF, OMPBuilder, MapExpr);
+  };
+  if (CGM.getCodeGenOpts().getDebugInfo() !=
+      llvm::codegenoptions::NoDebugInfo) {
+    CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
+    llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
+                    FillInfoMap);
+  }
+}
+static void emitTargetCallKernelLaunchNew(
     CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
     const OMPExecutableDirective &D,
     llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask,
@@ -9464,8 +9557,139 @@ static void emitTargetCallKernelLaunch(
 
   // Fill up the arrays with all the captured variables.
   MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
+  CGOpenMPRuntime::TargetDataInfo Info;
 
-  // Get mappable expression information.
+  auto GenMapInfoCB = [&](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP)
+                          -> llvm::OpenMPIRBuilder::MapInfosTy & {
+    CGF.Builder.restoreIP(CodeGenIP);
+    genMapInfo(D, CGF, CS, CapturedVars, OMPBuilder, CombinedInfo);
+    return CombinedInfo;
+  };
+  auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
+    if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
+      Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
+    }
+  };
+
+  auto CustomMapperCB = [&](unsigned int I) {
+    llvm::Value *MFunc = nullptr;
+    if (CombinedInfo.Mappers[I]) {
+      Info.HasMapper = true;
+      MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
+          cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
+    }
+    return MFunc;
+  };
+  // Fill up the arrays and create the arguments.
+  LLVM_DEBUG(llvm::dbgs() << "emitTargetCallKernelLaunchNew:InsertBlock before emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n");
+  OMPBuilder.emitOffloadingArrays(llvm::OpenMPIRBuilder::InsertPointTy(CGF.AllocaInsertPt->getParent(),
+                                                                       CGF.AllocaInsertPt->getIterator()),
+                                  CGF.Builder.saveIP(), Info,
+                                  GenMapInfoCB, /*IsNonContiguous=*/true,
+                                  DeviceAddrCB, CustomMapperCB);
+  bool EmitDebug = !CombinedInfo.Names.empty();
+  OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info,
+                                          EmitDebug,
+                                          /*ForEndCall=*/false);
+
+  LLVM_DEBUG(llvm::dbgs() << "emitTargetCallKernelLaunchNew:InsertBlock after emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n");
+  InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
+  InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
+                                        CGF.VoidPtrTy, CGM.getPointerAlign());
+  InputInfo.PointersArray =
+      Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
+  InputInfo.SizesArray =
+      Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
+  InputInfo.MappersArray =
+      Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
+  MapTypesArray = Info.RTArgs.MapTypesArray;
+  MapNamesArray = Info.RTArgs.MapNamesArray;
+
+  auto &&ThenGen = [&OMPRuntime, OutlinedFn, &D, &CapturedVars,
+                    RequiresOuterTask, &CS, OffloadingMandatory, Device,
+                    OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray,
+                    SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
+    bool IsReverseOffloading = Device.getInt() == OMPC_DEVICE_ancestor;
+
+    if (IsReverseOffloading) {
+      // Reverse offloading is not supported, so just execute on the host.
+      // FIXME: This fallback solution is incorrect since it ignores the
+      // OMP_TARGET_OFFLOAD environment variable. Instead it would be better to
+      // assert here and ensure SEMA emits an error.
+      emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
+                             RequiresOuterTask, CS, OffloadingMandatory, CGF);
+      return;
+    }
+
+    bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>();
+    unsigned NumTargetItems = InputInfo.NumberOfTargetItems;
+
+    llvm::Value *BasePointersArray =
+        InputInfo.BasePointersArray.emitRawPointer(CGF);
+    llvm::Value *PointersArray = InputInfo.PointersArray.emitRawPointer(CGF);
+    llvm::Value *SizesArray = InputInfo.SizesArray.emitRawPointer(CGF);
+    llvm::Value *MappersArray = InputInfo.MappersArray.emitRawPointer(CGF);
+
+    auto &&EmitTargetCallFallbackCB =
+        [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
+         OffloadingMandatory, &CGF](llvm::OpenMPIRBuilder::InsertPointTy IP)
+        -> llvm::OpenMPIRBuilder::InsertPointTy {
+      CGF.Builder.restoreIP(IP);
+      emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
+                             RequiresOuterTask, CS, OffloadingMandatory, CGF);
+      return CGF.Builder.saveIP();
+    };
+
+    llvm::Value *DeviceID = emitDeviceID(Device, CGF);
+    llvm::Value *NumTeams = OMPRuntime->emitNumTeamsForTargetDirective(CGF, D);
+    llvm::Value *NumThreads =
+        OMPRuntime->emitNumThreadsForTargetDirective(CGF, D);
+    llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, D.getBeginLoc());
+    llvm::Value *NumIterations =
+        OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter);
+    llvm::Value *DynCGGroupMem = emitDynCGGroupMem(D, CGF);
+    llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
+        CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
+
+    llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs(
+        BasePointersArray, PointersArray, SizesArray, MapTypesArray,
+        nullptr /* MapTypesArrayEnd */, MappersArray, MapNamesArray);
+
+    llvm::OpenMPIRBuilder::TargetKernelArgs Args(
+        NumTargetItems, RTArgs, NumIterations, NumTeams, NumThreads,
+        DynCGGroupMem, HasNoWait);
+
+    CGF.Builder.restoreIP(OMPRuntime->getOMPBuilder().emitKernelLaunch(
+        CGF.Builder, OutlinedFn, OutlinedFnID, EmitTargetCallFallbackCB, Args,
+        DeviceID, RTLoc, AllocaIP));
+  };
+
+  if (RequiresOuterTask) {
+    if (NewClangTargetTaskCodeGen) {
+      llvm::errs() << "Using OMPIRBuilder for target task codegen\n";
+    } else {
+      CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
+    }
+  } else
+    OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
+}
+static void emitTargetCallKernelLaunch(
+    CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
+    const OMPExecutableDirective &D,
+    llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask,
+    const CapturedStmt &CS, bool OffloadingMandatory,
+    llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
+    llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo,
+    llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray,
+    llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
+                                     const OMPLoopDirective &D)>
+        SizeEmitter,
+    CodeGenFunction &CGF, CodeGenModule &CGM) {
+  llvm::OpenMPIRBuilder &OMPBuilder = OMPRuntime->getOMPBuilder();
+
+  // Fill up the arrays with all the captured variables.
+  MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
+// Get mappable expression information.
   MappableExprsHandler MEHandler(D, CGF);
   llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
   llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
@@ -9542,6 +9766,7 @@ static void emitTargetCallKernelLaunch(
 
   CGOpenMPRuntime::TargetDataInfo Info;
   // Fill up the arrays and create the arguments.
+  LLVM_DEBUG(llvm::dbgs() << "InsertBlock before emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n");
   emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
   bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
                    llvm::codegenoptions::NoDebugInfo;
@@ -9549,6 +9774,7 @@ static void emitTargetCallKernelLaunch(
                                           EmitDebug,
                                           /*ForEndCall=*/false);
 
+  LLVM_DEBUG(llvm::dbgs() << "InsertBlock after emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n");
   InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
   InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
                                         CGF.VoidPtrTy, CGM.getPointerAlign());
@@ -9688,7 +9914,13 @@ void CGOpenMPRuntime::emitTargetCall(
                           OutlinedFnID, &InputInfo, &MapTypesArray,
                           &MapNamesArray, SizeEmitter](CodeGenFunction &CGF,
                                                        PrePostActionTy &) {
-    emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars,
+    if (OpenMPClangTargetCodegen)
+      emitTargetCallKernelLaunchNew(this, OutlinedFn, D, CapturedVars,
+                               RequiresOuterTask, CS, OffloadingMandatory,
+                               Device, OutlinedFnID, InputInfo, MapTypesArray,
+                               MapNamesArray, SizeEmitter, CGF, CGM);
+    else
+      emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars,
                                RequiresOuterTask, CS, OffloadingMandatory,
                                Device, OutlinedFnID, InputInfo, MapTypesArray,
                                MapNamesArray, SizeEmitter, CGF, CGM);
@@ -9711,6 +9943,7 @@ void CGOpenMPRuntime::emitTargetCall(
     } else {
       RegionCodeGenTy ThenRCG(TargetThenGen);
       ThenRCG(CGF);
+      LLVM_DEBUG(llvm::dbgs() << "Generated code after emitTargetCall:\n" << *(OutlinedFn->getParent()) << "\n");
     }
   } else {
     RegionCodeGenTy ElseRCG(TargetElseGen);
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index f73d32de7c484..123cfbe1b229d 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -40,6 +40,7 @@
 using namespace clang;
 using namespace CodeGen;
 using namespace llvm::omp;
+#define DEBUG_TYPE "clang-openmp-codegen"
 
 #define TTL_CODEGEN_TYPE "target-teams-loop-codegen"
 
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index bff49dab4a313..035639b10e31a 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -1778,6 +1778,22 @@ class OpenMPIRBuilder {
                                    MapInfosTy &CombinedInfo,
                                    TargetDataInfo &Info);
 
+  /// Callback type for creating the map infos for the kernel parameters.
+  /// \param CodeGenIP is the insertion point where code should be generated,
+  ///        if any.
+  using GenMapInfoCallbackTy =
+      function_ref<MapInfosTy &(InsertPointTy CodeGenIP)>;
+
+  /// Emit the arrays used to pass the captures and map information to the
+  /// offloading runtime library. If there is no map or capture information,
+  /// return nullptr by reference.
+  void emitOffloadingArrays(
+      InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
+      TargetDataInfo &Info, GenMapInfoCallbackTy GenMapInfoCB,
+      bool IsNonContiguous = false,
+      function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
+      function_ref<Value *(unsigned int)> CustomMapperCB = nullptr);
+
   /// Emit the arrays used to pass the captures and map information to the
   /// offloading runtime library. If there is no map or capture information,
   /// return nullptr by reference.
@@ -1787,6 +1803,7 @@ class OpenMPIRBuilder {
       function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
       function_ref<Value *(unsigned int)> CustomMapperCB = nullptr);
 
+
   /// Creates offloading entry for the provided entry ID \a ID, address \a
   /// Addr, size \a Size, and flags \a Flags.
   void createOffloadEntry(Constant *ID, Constant *Addr, uint64_t Size,
@@ -2190,11 +2207,6 @@ class OpenMPIRBuilder {
   /// duplicating the body code.
   enum BodyGenTy { Priv, DupNoPriv, NoPriv };
 
-  /// Callback type for creating the map infos for the kernel parameters.
-  /// \param CodeGenIP is the insertion point where code should be generated,
-  ///        if any.
-  using GenMapInfoCallbackTy =
-      function_ref<MapInfosTy &(InsertPointTy CodeGenIP)>;
 
   /// Generator for '#omp target data'
   ///
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 92213e19c9d9d..7c0dbc0925306 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -5562,6 +5562,16 @@ void OpenMPIRBuilder::emitNonContiguousDescriptor(InsertPointTy AllocaIP,
   }
 }
 
+void OpenMPIRBuilder::emitOffloadingArrays(
+    InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info,
+    GenMapInfoCallbackTy GenMapInfoCB, bool IsNonContiguous,
+    function_ref<void(unsigned int, Value *)> DeviceAddrCB,
+    function_ref<Value *(unsigned int)> CustomMapperCB) {
+
+  OpenMPIRBuilder::MapInfosTy &MapInfo = GenMapInfoCB(CodeGenIP);
+  emitOffloadingArrays(AllocaIP, CodeGenIP, MapInfo,
+                       Info, IsNonContiguous, DeviceAddrCB, CustomMapperCB);
+}
 void OpenMPIRBuilder::emitOffloadingArrays(
     InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo,
     TargetDataInfo &Info, bool IsNonContiguous,

>From 0bb7eaaee6ca6301cd7e9a9285ad9959ca801613 Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Tue, 25 Jun 2024 16:07:37 -0500
Subject: [PATCH 02/13] emitOffloadingArraysArgument and some other prints

---
 clang/lib/CodeGen/CGOpenMPRuntime.cpp         | 50 +++++++++----------
 clang/lib/CodeGen/CGOpenMPRuntime.h           | 39 +++++++++++++++
 clang/lib/CodeGen/CGStmtOpenMP.cpp            |  2 +
 clang/lib/CodeGen/CodeGenFunction.h           |  1 +
 .../llvm/Frontend/OpenMP/OMPIRBuilder.h       | 12 ++++-
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp     | 42 ++++++++++------
 .../Frontend/OpenMPIRBuilderTest.cpp          |  4 +-
 7 files changed, 104 insertions(+), 46 deletions(-)

diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 9632ef912ebfe..ee03183f3f5a3 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -3002,6 +3002,10 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
   llvm::FunctionType *TaskEntryTy =
       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
+  LLVM_DEBUG(llvm::dbgs() << "TaskEntryTy=" << *TaskEntryTy << "\n");
+  LLVM_DEBUG(llvm::dbgs() << "KmpTaskTWithPrivatesPtrQTy = "
+                          << KmpTaskTWithPrivatesPtrQTy << "\n");
+  LLVM_DEBUG(llvm::dbgs() << "TaskTypeArg = " << TaskTypeArg << "\n");
   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
   auto *TaskEntry = llvm::Function::Create(
       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
@@ -3706,6 +3710,7 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
       TaskPrivatesMap);
 
+  LLVM_DEBUG(llvm::dbgs() << "ProxyTaskFunction is " << *TaskEntry);
   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
   // kmp_routine_entry_t *task_entry);
@@ -9582,15 +9587,14 @@ static void emitTargetCallKernelLaunchNew(
   };
   // Fill up the arrays and create the arguments.
   LLVM_DEBUG(llvm::dbgs() << "emitTargetCallKernelLaunchNew:InsertBlock before emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n");
-  OMPBuilder.emitOffloadingArrays(llvm::OpenMPIRBuilder::InsertPointTy(CGF.AllocaInsertPt->getParent(),
-                                                                       CGF.AllocaInsertPt->getIterator()),
-                                  CGF.Builder.saveIP(), Info,
-                                  GenMapInfoCB, /*IsNonContiguous=*/true,
-                                  DeviceAddrCB, CustomMapperCB);
-  bool EmitDebug = !CombinedInfo.Names.empty();
-  OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info,
-                                          EmitDebug,
-                                          /*ForEndCall=*/false);
+
+  llvm::OpenMPIRBuilder::InsertPointTy OffloadingArraysAllocaIP(
+      CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
+
+  OMPBuilder.emitOffloadingArraysAndArgs(
+      OffloadingArraysAllocaIP, CGF.Builder.saveIP(), Info, Info.RTArgs,
+      GenMapInfoCB, /*IsNonContiguous=*/true, /*ForEndCall=*/false,
+      DeviceAddrCB, CustomMapperCB);
 
   LLVM_DEBUG(llvm::dbgs() << "emitTargetCallKernelLaunchNew:InsertBlock after emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n");
   InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
@@ -9664,13 +9668,9 @@ static void emitTargetCallKernelLaunchNew(
         DeviceID, RTLoc, AllocaIP));
   };
 
-  if (RequiresOuterTask) {
-    if (NewClangTargetTaskCodeGen) {
-      llvm::errs() << "Using OMPIRBuilder for target task codegen\n";
-    } else {
-      CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
-    }
-  } else
+  if (RequiresOuterTask)
+    CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
+  else
     OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
 }
 static void emitTargetCallKernelLaunch(
@@ -9768,10 +9768,9 @@ static void emitTargetCallKernelLaunch(
   // Fill up the arrays and create the arguments.
   LLVM_DEBUG(llvm::dbgs() << "InsertBlock before emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n");
   emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
-  bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
+  Info.EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
                    llvm::codegenoptions::NoDebugInfo;
   OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info,
-                                          EmitDebug,
                                           /*ForEndCall=*/false);
 
   LLVM_DEBUG(llvm::dbgs() << "InsertBlock after emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n");
@@ -9914,16 +9913,16 @@ void CGOpenMPRuntime::emitTargetCall(
                           OutlinedFnID, &InputInfo, &MapTypesArray,
                           &MapNamesArray, SizeEmitter](CodeGenFunction &CGF,
                                                        PrePostActionTy &) {
-    if (OpenMPClangTargetCodegen)
+    // if (OpenMPClangTargetCodegen)
       emitTargetCallKernelLaunchNew(this, OutlinedFn, D, CapturedVars,
                                RequiresOuterTask, CS, OffloadingMandatory,
                                Device, OutlinedFnID, InputInfo, MapTypesArray,
                                MapNamesArray, SizeEmitter, CGF, CGM);
-    else
-      emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars,
-                               RequiresOuterTask, CS, OffloadingMandatory,
-                               Device, OutlinedFnID, InputInfo, MapTypesArray,
-                               MapNamesArray, SizeEmitter, CGF, CGM);
+    // else
+    //   emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars,
+    //                            RequiresOuterTask, CS, OffloadingMandatory,
+    //                            Device, OutlinedFnID, InputInfo, MapTypesArray,
+    //                            MapNamesArray, SizeEmitter, CGF, CGM);
   };
 
   auto &&TargetElseGen =
@@ -10684,10 +10683,9 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
                          /*IsNonContiguous=*/true);
     bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
                              D.hasClausesOfKind<OMPNowaitClause>();
-    bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
+    Info.EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
                      llvm::codegenoptions::NoDebugInfo;
     OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info,
-                                            EmitDebug,
                                             /*ForEndCall=*/false);
     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
     InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h
index 522ae3d35d22d..b9303a9414b22 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.h
@@ -122,6 +122,45 @@ struct OMPTaskDataTy final {
   bool IsReductionWithTaskMod = false;
   bool IsWorksharingReduction = false;
   bool HasNowaitClause = false;
+  void printTo(llvm::raw_ostream &os, const ASTContext &Ctx) const {
+    auto &&printSVHelper =
+        [&os, &Ctx](const SmallVector<const Expr *, 4> &V) -> void {
+      for (auto &v : V) {
+        v->dump(os, Ctx);
+      }
+    };
+    auto &&printSV =
+        [&os, printSVHelper](std::string s,
+                             const SmallVector<const Expr *, 4> &V) -> void {
+      os << s << ":[\n";
+      printSVHelper(V);
+      os << "]\n";
+    };
+    // SmallVector<const Expr *, 4> PrivateVars;
+    // SmallVector<const Expr *, 4> PrivateCopies;
+    // SmallVector<const Expr *, 4> FirstprivateVars;
+    // SmallVector<const Expr *, 4> FirstprivateCopies;
+    // SmallVector<const Expr *, 4> FirstprivateInits;
+    // SmallVector<const Expr *, 4> LastprivateVars;
+    // SmallVector<const Expr *, 4> LastprivateCopies;
+    // SmallVector<const Expr *, 4> ReductionVars;
+    // SmallVector<const Expr *, 4> ReductionOrigs;
+    // SmallVector<const Expr *, 4> ReductionCopies;
+    // SmallVector<const Expr *, 4> ReductionOps;
+    // SmallVector<CanonicalDeclPtr<const VarDecl>, 4> PrivateLocals;
+
+    printSV("PrivateVars", PrivateVars);
+    printSV("PrivateCopies", PrivateCopies);
+    printSV("FirstprivateVars", FirstprivateVars);
+    printSV("FirstprivateCopies", FirstprivateCopies);
+    printSV("FirstprivateInits", FirstprivateInits);
+    printSV("LastprivateVars", LastprivateVars);
+    printSV("LastprivateCopies", LastprivateCopies);
+    printSV("ReductionVars", ReductionVars);
+    printSV("ReductionOrigs", ReductionOrigs);
+    printSV("ReductionCopies", ReductionCopies);
+    printSV("ReductionOps", ReductionOps);
+  }
 };
 
 /// Class intended to support codegen of all kind of the reduction clauses.
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 123cfbe1b229d..bd6743666826b 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -5181,6 +5181,8 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
     }
     BodyGen(CGF);
   };
+  LLVM_DEBUG(llvm::dbgs() << "Inputs to emitTaskOutineFunction\n");
+  LLVM_DEBUG(llvm::dbgs() << "PartID = " << **PartId << "\n");
   llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
       S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, /*Tied=*/true,
       Data.NumberOfParts);
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 06fc7259b5901..6092ab1684267 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -441,6 +441,7 @@ class CodeGenFunction : public CodeGenTypeCache {
   llvm::AssertingVH<llvm::Instruction> PostAllocaInsertPt = nullptr;
 
 public:
+  void printLocalDeclMap();
   /// Return PostAllocaInsertPt. If it is not yet created, then insert it
   /// immediately after AllocaInsertPt.
   llvm::Instruction *getPostAllocaInsertPoint() {
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 035639b10e31a..03573b4e02029 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -1671,6 +1671,8 @@ class OpenMPIRBuilder {
     /// The total number of pointers passed to the runtime library.
     unsigned NumberOfPtrs = 0u;
 
+    bool EmitDebug = false;
+
     explicit TargetDataInfo() {}
     explicit TargetDataInfo(bool RequiresDevicePointerInfo,
                             bool SeparateBeginEndCalls)
@@ -1769,7 +1771,6 @@ class OpenMPIRBuilder {
   void emitOffloadingArraysArgument(IRBuilderBase &Builder,
                                     OpenMPIRBuilder::TargetDataRTArgs &RTArgs,
                                     OpenMPIRBuilder::TargetDataInfo &Info,
-                                    bool EmitDebug = false,
                                     bool ForEndCall = false);
 
   /// Emit an array of struct descriptors to be assigned to the offload args.
@@ -1789,7 +1790,7 @@ class OpenMPIRBuilder {
   /// return nullptr by reference.
   void emitOffloadingArrays(
       InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
-      TargetDataInfo &Info, GenMapInfoCallbackTy GenMapInfoCB,
+      GenMapInfoCallbackTy GenMapInfoCB, TargetDataInfo &Info,
       bool IsNonContiguous = false,
       function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
       function_ref<Value *(unsigned int)> CustomMapperCB = nullptr);
@@ -1804,6 +1805,13 @@ class OpenMPIRBuilder {
       function_ref<Value *(unsigned int)> CustomMapperCB = nullptr);
 
 
+  void emitOffloadingArraysAndArgs(
+      InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info,
+      TargetDataRTArgs &RTArgs, GenMapInfoCallbackTy GenMapInfoCB,
+      bool IsNonContiguous = false, bool ForEndCall = false,
+      function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
+      function_ref<Value *(unsigned int)> CustomMapperCB = nullptr);
+
   /// Creates offloading entry for the provided entry ID \a ID, address \a
   /// Addr, size \a Size, and flags \a Flags.
   void createOffloadEntry(Constant *ID, Constant *Addr, uint64_t Size,
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 7c0dbc0925306..8d6e6a354a1d8 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -4923,8 +4923,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTargetData(
                          CustomMapperCB);
 
     TargetDataRTArgs RTArgs;
-    emitOffloadingArraysArgument(Builder, RTArgs, Info,
-                                 !MapInfo->Names.empty());
+    emitOffloadingArraysArgument(Builder, RTArgs, Info);
 
     // Emit the number of elements in the offloading arrays.
     Value *PointerNum = Builder.getInt32(Info.NumberOfPtrs);
@@ -4977,8 +4976,8 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTargetData(
   // Generate code for the closing of the data region.
   auto EndThenGen = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
     TargetDataRTArgs RTArgs;
-    emitOffloadingArraysArgument(Builder, RTArgs, Info, !MapInfo->Names.empty(),
-                                 /*ForEndCall=*/true);
+    Info.EmitDebug = !MapInfo->Names.empty();
+    emitOffloadingArraysArgument(Builder, RTArgs, Info, /*ForEndCall=*/true);
 
     // Emit the number of elements in the offloading arrays.
     Value *PointerNum = Builder.getInt32(Info.NumberOfPtrs);
@@ -5234,7 +5233,18 @@ static void emitTargetOutlinedFunction(
                                       OutlinedFn, OutlinedFnID);
 }
 
-static void emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder,
+void OpenMPIRBuilder::emitOffloadingArraysAndArgs(
+    InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info,
+    TargetDataRTArgs &RTArgs, GenMapInfoCallbackTy GenMapInfoCB,
+    bool IsNonContiguous, bool ForEndCall,
+    function_ref<void(unsigned int, Value *)> DeviceAddrCB,
+    function_ref<Value *(unsigned int)> CustomMapperCB) {
+  emitOffloadingArrays(AllocaIP, CodeGenIP, GenMapInfoCB, Info, IsNonContiguous,
+                       DeviceAddrCB, CustomMapperCB);
+  emitOffloadingArraysArgument(Builder, RTArgs, Info, ForEndCall);
+ }
+
+ static void emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder,
                            OpenMPIRBuilder::InsertPointTy AllocaIP,
                            Function *OutlinedFn, Constant *OutlinedFnID,
                            int32_t NumTeams, int32_t NumThreads,
@@ -5245,13 +5255,11 @@ static void emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder,
       /*RequiresDevicePointerInfo=*/false,
       /*SeparateBeginEndCalls=*/true);
 
-  OpenMPIRBuilder::MapInfosTy &MapInfo = GenMapInfoCB(Builder.saveIP());
-  OMPBuilder.emitOffloadingArrays(AllocaIP, Builder.saveIP(), MapInfo, Info,
-                                  /*IsNonContiguous=*/true);
-
   OpenMPIRBuilder::TargetDataRTArgs RTArgs;
-  OMPBuilder.emitOffloadingArraysArgument(Builder, RTArgs, Info,
-                                          !MapInfo.Names.empty());
+  OMPBuilder.emitOffloadingArraysAndArgs(AllocaIP, Builder.saveIP(), Info,
+                                         RTArgs, GenMapInfoCB,
+                                         /*IsNonContiguous=*/true,
+                                         /*ForEndCall=*/false);
 
   //  emitKernelLaunch
   auto &&EmitTargetCallFallbackCB =
@@ -5261,7 +5269,7 @@ static void emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder,
     return Builder.saveIP();
   };
 
-  unsigned NumTargetItems = MapInfo.BasePointers.size();
+  unsigned NumTargetItems = Info.NumberOfPtrs;
   // TODO: Use correct device ID
   Value *DeviceID = Builder.getInt64(OMP_DEVICEID_UNDEF);
   Value *NumTeamsVal = Builder.getInt32(NumTeams);
@@ -5438,7 +5446,6 @@ void OpenMPIRBuilder::emitMapperCall(const LocationDescription &Loc,
 void OpenMPIRBuilder::emitOffloadingArraysArgument(IRBuilderBase &Builder,
                                                    TargetDataRTArgs &RTArgs,
                                                    TargetDataInfo &Info,
-                                                   bool EmitDebug,
                                                    bool ForEndCall) {
   assert((!ForEndCall || Info.separateBeginEndCalls()) &&
          "expected region end call to runtime only when end call is separate");
@@ -5478,7 +5485,7 @@ void OpenMPIRBuilder::emitOffloadingArraysArgument(IRBuilderBase &Builder,
 
   // Only emit the mapper information arrays if debug information is
   // requested.
-  if (!EmitDebug)
+  if (!Info.EmitDebug)
     RTArgs.MapNamesArray = ConstantPointerNull::get(VoidPtrPtrTy);
   else
     RTArgs.MapNamesArray = Builder.CreateConstInBoundsGEP2_32(
@@ -5563,8 +5570,9 @@ void OpenMPIRBuilder::emitNonContiguousDescriptor(InsertPointTy AllocaIP,
 }
 
 void OpenMPIRBuilder::emitOffloadingArrays(
-    InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info,
-    GenMapInfoCallbackTy GenMapInfoCB, bool IsNonContiguous,
+    InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
+    GenMapInfoCallbackTy GenMapInfoCB, TargetDataInfo &Info,
+    bool IsNonContiguous,
     function_ref<void(unsigned int, Value *)> DeviceAddrCB,
     function_ref<Value *(unsigned int)> CustomMapperCB) {
 
@@ -5677,9 +5685,11 @@ void OpenMPIRBuilder::emitOffloadingArrays(
     auto *MapNamesArrayGbl =
         createOffloadMapnames(CombinedInfo.Names, MapnamesName);
     Info.RTArgs.MapNamesArray = MapNamesArrayGbl;
+    Info.EmitDebug = true;
   } else {
     Info.RTArgs.MapNamesArray =
         Constant::getNullValue(PointerType::getUnqual(Builder.getContext()));
+    Info.EmitDebug = false;
   }
 
   // If there's a present map type modifier, it must not be applied to the end
diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
index 3ed3034f489ce..54070a1ae35f8 100644
--- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
+++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
@@ -6891,8 +6891,8 @@ TEST_F(OpenMPIRBuilderTest, EmitOffloadingArraysArguments) {
   Info.RTArgs.MappersArray =
       ConstantPointerNull::get(Array4VoidPtrTy->getPointerTo());
   Info.NumberOfPtrs = 4;
-
-  OMPBuilder.emitOffloadingArraysArgument(Builder, RTArgs, Info, false, false);
+  Info.EmitDebug = false;
+  OMPBuilder.emitOffloadingArraysArgument(Builder, RTArgs, Info, false);
 
   EXPECT_NE(RTArgs.BasePointersArray, nullptr);
   EXPECT_NE(RTArgs.PointersArray, nullptr);

>From af98fabd5685e42dade598caf3c1279ccfab7fba Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Wed, 26 Jun 2024 11:33:53 -0500
Subject: [PATCH 03/13] clean up, clean up, everybody clean up

---
 clang/lib/CodeGen/CGOpenMPRuntime.cpp | 206 +-------------------------
 clang/lib/CodeGen/CGOpenMPRuntime.h   |  39 -----
 clang/lib/CodeGen/CGStmtOpenMP.cpp    |   4 -
 clang/lib/CodeGen/CodeGenFunction.h   |   1 -
 4 files changed, 5 insertions(+), 245 deletions(-)

diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index ee03183f3f5a3..b2fa50d16437e 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -38,7 +38,6 @@
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Value.h"
 #include "llvm/Support/AtomicOrdering.h"
-#include "llvm/Support/Debug.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cassert>
@@ -46,8 +45,6 @@
 #include <numeric>
 #include <optional>
 
-#define DEBUG_TYPE "clang-openmp-codegen"
-
 using namespace clang;
 using namespace CodeGen;
 using namespace llvm::omp;
@@ -3002,10 +2999,6 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
   llvm::FunctionType *TaskEntryTy =
       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
-  LLVM_DEBUG(llvm::dbgs() << "TaskEntryTy=" << *TaskEntryTy << "\n");
-  LLVM_DEBUG(llvm::dbgs() << "KmpTaskTWithPrivatesPtrQTy = "
-                          << KmpTaskTWithPrivatesPtrQTy << "\n");
-  LLVM_DEBUG(llvm::dbgs() << "TaskTypeArg = " << TaskTypeArg << "\n");
   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
   auto *TaskEntry = llvm::Function::Create(
       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
@@ -3710,7 +3703,6 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
       TaskPrivatesMap);
 
-  LLVM_DEBUG(llvm::dbgs() << "ProxyTaskFunction is " << *TaskEntry);
   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
   // kmp_routine_entry_t *task_entry);
@@ -8839,11 +8831,9 @@ emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
   }
 
   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
-  auto *Str =  OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName,
+  return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName,
                                          PLoc.getLine(), PLoc.getColumn(),
                                          SrcLocStrSize);
-  LLVM_DEBUG(llvm::dbgs() << "Output of emitMappingInfo: " << *Str << "\n");
-  return Str;
 }
 
 /// Emit the arrays used to pass the captures and map information to the
@@ -9546,7 +9536,7 @@ static void genMapInfo(const OMPExecutableDirective &D, CodeGenFunction &CGF,
                     FillInfoMap);
   }
 }
-static void emitTargetCallKernelLaunchNew(
+static void emitTargetCallKernelLaunch(
     CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
     const OMPExecutableDirective &D,
     llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask,
@@ -9585,9 +9575,8 @@ static void emitTargetCallKernelLaunchNew(
     }
     return MFunc;
   };
-  // Fill up the arrays and create the arguments.
-  LLVM_DEBUG(llvm::dbgs() << "emitTargetCallKernelLaunchNew:InsertBlock before emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n");
-
+  // Fill up the basepointers, pointers and mapper arrays and create the
+  // arguments.
   llvm::OpenMPIRBuilder::InsertPointTy OffloadingArraysAllocaIP(
       CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
 
@@ -9596,184 +9585,6 @@ static void emitTargetCallKernelLaunchNew(
       GenMapInfoCB, /*IsNonContiguous=*/true, /*ForEndCall=*/false,
       DeviceAddrCB, CustomMapperCB);
 
-  LLVM_DEBUG(llvm::dbgs() << "emitTargetCallKernelLaunchNew:InsertBlock after emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n");
-  InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
-  InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
-                                        CGF.VoidPtrTy, CGM.getPointerAlign());
-  InputInfo.PointersArray =
-      Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
-  InputInfo.SizesArray =
-      Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
-  InputInfo.MappersArray =
-      Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
-  MapTypesArray = Info.RTArgs.MapTypesArray;
-  MapNamesArray = Info.RTArgs.MapNamesArray;
-
-  auto &&ThenGen = [&OMPRuntime, OutlinedFn, &D, &CapturedVars,
-                    RequiresOuterTask, &CS, OffloadingMandatory, Device,
-                    OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray,
-                    SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
-    bool IsReverseOffloading = Device.getInt() == OMPC_DEVICE_ancestor;
-
-    if (IsReverseOffloading) {
-      // Reverse offloading is not supported, so just execute on the host.
-      // FIXME: This fallback solution is incorrect since it ignores the
-      // OMP_TARGET_OFFLOAD environment variable. Instead it would be better to
-      // assert here and ensure SEMA emits an error.
-      emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
-                             RequiresOuterTask, CS, OffloadingMandatory, CGF);
-      return;
-    }
-
-    bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>();
-    unsigned NumTargetItems = InputInfo.NumberOfTargetItems;
-
-    llvm::Value *BasePointersArray =
-        InputInfo.BasePointersArray.emitRawPointer(CGF);
-    llvm::Value *PointersArray = InputInfo.PointersArray.emitRawPointer(CGF);
-    llvm::Value *SizesArray = InputInfo.SizesArray.emitRawPointer(CGF);
-    llvm::Value *MappersArray = InputInfo.MappersArray.emitRawPointer(CGF);
-
-    auto &&EmitTargetCallFallbackCB =
-        [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
-         OffloadingMandatory, &CGF](llvm::OpenMPIRBuilder::InsertPointTy IP)
-        -> llvm::OpenMPIRBuilder::InsertPointTy {
-      CGF.Builder.restoreIP(IP);
-      emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
-                             RequiresOuterTask, CS, OffloadingMandatory, CGF);
-      return CGF.Builder.saveIP();
-    };
-
-    llvm::Value *DeviceID = emitDeviceID(Device, CGF);
-    llvm::Value *NumTeams = OMPRuntime->emitNumTeamsForTargetDirective(CGF, D);
-    llvm::Value *NumThreads =
-        OMPRuntime->emitNumThreadsForTargetDirective(CGF, D);
-    llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, D.getBeginLoc());
-    llvm::Value *NumIterations =
-        OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter);
-    llvm::Value *DynCGGroupMem = emitDynCGGroupMem(D, CGF);
-    llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
-        CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
-
-    llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs(
-        BasePointersArray, PointersArray, SizesArray, MapTypesArray,
-        nullptr /* MapTypesArrayEnd */, MappersArray, MapNamesArray);
-
-    llvm::OpenMPIRBuilder::TargetKernelArgs Args(
-        NumTargetItems, RTArgs, NumIterations, NumTeams, NumThreads,
-        DynCGGroupMem, HasNoWait);
-
-    CGF.Builder.restoreIP(OMPRuntime->getOMPBuilder().emitKernelLaunch(
-        CGF.Builder, OutlinedFn, OutlinedFnID, EmitTargetCallFallbackCB, Args,
-        DeviceID, RTLoc, AllocaIP));
-  };
-
-  if (RequiresOuterTask)
-    CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
-  else
-    OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
-}
-static void emitTargetCallKernelLaunch(
-    CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
-    const OMPExecutableDirective &D,
-    llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask,
-    const CapturedStmt &CS, bool OffloadingMandatory,
-    llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
-    llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo,
-    llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray,
-    llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
-                                     const OMPLoopDirective &D)>
-        SizeEmitter,
-    CodeGenFunction &CGF, CodeGenModule &CGM) {
-  llvm::OpenMPIRBuilder &OMPBuilder = OMPRuntime->getOMPBuilder();
-
-  // Fill up the arrays with all the captured variables.
-  MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
-// Get mappable expression information.
-  MappableExprsHandler MEHandler(D, CGF);
-  llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
-  llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
-
-  auto RI = CS.getCapturedRecordDecl()->field_begin();
-  auto *CV = CapturedVars.begin();
-  for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
-                                            CE = CS.capture_end();
-       CI != CE; ++CI, ++RI, ++CV) {
-    MappableExprsHandler::MapCombinedInfoTy CurInfo;
-    MappableExprsHandler::StructRangeInfoTy PartialStruct;
-
-    // VLA sizes are passed to the outlined region by copy and do not have map
-    // information associated.
-    if (CI->capturesVariableArrayType()) {
-      CurInfo.Exprs.push_back(nullptr);
-      CurInfo.BasePointers.push_back(*CV);
-      CurInfo.DevicePtrDecls.push_back(nullptr);
-      CurInfo.DevicePointers.push_back(
-          MappableExprsHandler::DeviceInfoTy::None);
-      CurInfo.Pointers.push_back(*CV);
-      CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
-          CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
-      // Copy to the device as an argument. No need to retrieve it.
-      CurInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
-                              OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM |
-                              OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
-      CurInfo.Mappers.push_back(nullptr);
-    } else {
-      // If we have any information in the map clause, we use it, otherwise we
-      // just do a default mapping.
-      MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
-      if (!CI->capturesThis())
-        MappedVarSet.insert(CI->getCapturedVar());
-      else
-        MappedVarSet.insert(nullptr);
-      if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
-        MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
-      // Generate correct mapping for variables captured by reference in
-      // lambdas.
-      if (CI->capturesVariable())
-        MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
-                                                CurInfo, LambdaPointers);
-    }
-    // We expect to have at least an element of information for this capture.
-    assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
-           "Non-existing map pointer for capture!");
-    assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
-           CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
-           CurInfo.BasePointers.size() == CurInfo.Types.size() &&
-           CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
-           "Inconsistent map information sizes!");
-
-    // If there is an entry in PartialStruct it means we have a struct with
-    // individual members mapped. Emit an extra combined entry.
-    if (PartialStruct.Base.isValid()) {
-      CombinedInfo.append(PartialStruct.PreliminaryMapData);
-      MEHandler.emitCombinedEntry(
-          CombinedInfo, CurInfo.Types, PartialStruct, CI->capturesThis(),
-          OMPBuilder, nullptr,
-          !PartialStruct.PreliminaryMapData.BasePointers.empty());
-    }
-
-    // We need to append the results of this capture to what we already have.
-    CombinedInfo.append(CurInfo);
-  }
-  // Adjust MEMBER_OF flags for the lambdas captures.
-  MEHandler.adjustMemberOfForLambdaCaptures(
-      OMPBuilder, LambdaPointers, CombinedInfo.BasePointers,
-      CombinedInfo.Pointers, CombinedInfo.Types);
-  // Map any list items in a map clause that were not captures because they
-  // weren't referenced within the construct.
-  MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, MappedVarSet);
-
-  CGOpenMPRuntime::TargetDataInfo Info;
-  // Fill up the arrays and create the arguments.
-  LLVM_DEBUG(llvm::dbgs() << "InsertBlock before emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n");
-  emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
-  Info.EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
-                   llvm::codegenoptions::NoDebugInfo;
-  OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info,
-                                          /*ForEndCall=*/false);
-
-  LLVM_DEBUG(llvm::dbgs() << "InsertBlock after emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n");
   InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
   InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
                                         CGF.VoidPtrTy, CGM.getPointerAlign());
@@ -9913,16 +9724,10 @@ void CGOpenMPRuntime::emitTargetCall(
                           OutlinedFnID, &InputInfo, &MapTypesArray,
                           &MapNamesArray, SizeEmitter](CodeGenFunction &CGF,
                                                        PrePostActionTy &) {
-    // if (OpenMPClangTargetCodegen)
-      emitTargetCallKernelLaunchNew(this, OutlinedFn, D, CapturedVars,
+    emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars,
                                RequiresOuterTask, CS, OffloadingMandatory,
                                Device, OutlinedFnID, InputInfo, MapTypesArray,
                                MapNamesArray, SizeEmitter, CGF, CGM);
-    // else
-    //   emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars,
-    //                            RequiresOuterTask, CS, OffloadingMandatory,
-    //                            Device, OutlinedFnID, InputInfo, MapTypesArray,
-    //                            MapNamesArray, SizeEmitter, CGF, CGM);
   };
 
   auto &&TargetElseGen =
@@ -9942,7 +9747,6 @@ void CGOpenMPRuntime::emitTargetCall(
     } else {
       RegionCodeGenTy ThenRCG(TargetThenGen);
       ThenRCG(CGF);
-      LLVM_DEBUG(llvm::dbgs() << "Generated code after emitTargetCall:\n" << *(OutlinedFn->getParent()) << "\n");
     }
   } else {
     RegionCodeGenTy ElseRCG(TargetElseGen);
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h
index b9303a9414b22..522ae3d35d22d 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.h
@@ -122,45 +122,6 @@ struct OMPTaskDataTy final {
   bool IsReductionWithTaskMod = false;
   bool IsWorksharingReduction = false;
   bool HasNowaitClause = false;
-  void printTo(llvm::raw_ostream &os, const ASTContext &Ctx) const {
-    auto &&printSVHelper =
-        [&os, &Ctx](const SmallVector<const Expr *, 4> &V) -> void {
-      for (auto &v : V) {
-        v->dump(os, Ctx);
-      }
-    };
-    auto &&printSV =
-        [&os, printSVHelper](std::string s,
-                             const SmallVector<const Expr *, 4> &V) -> void {
-      os << s << ":[\n";
-      printSVHelper(V);
-      os << "]\n";
-    };
-    // SmallVector<const Expr *, 4> PrivateVars;
-    // SmallVector<const Expr *, 4> PrivateCopies;
-    // SmallVector<const Expr *, 4> FirstprivateVars;
-    // SmallVector<const Expr *, 4> FirstprivateCopies;
-    // SmallVector<const Expr *, 4> FirstprivateInits;
-    // SmallVector<const Expr *, 4> LastprivateVars;
-    // SmallVector<const Expr *, 4> LastprivateCopies;
-    // SmallVector<const Expr *, 4> ReductionVars;
-    // SmallVector<const Expr *, 4> ReductionOrigs;
-    // SmallVector<const Expr *, 4> ReductionCopies;
-    // SmallVector<const Expr *, 4> ReductionOps;
-    // SmallVector<CanonicalDeclPtr<const VarDecl>, 4> PrivateLocals;
-
-    printSV("PrivateVars", PrivateVars);
-    printSV("PrivateCopies", PrivateCopies);
-    printSV("FirstprivateVars", FirstprivateVars);
-    printSV("FirstprivateCopies", FirstprivateCopies);
-    printSV("FirstprivateInits", FirstprivateInits);
-    printSV("LastprivateVars", LastprivateVars);
-    printSV("LastprivateCopies", LastprivateCopies);
-    printSV("ReductionVars", ReductionVars);
-    printSV("ReductionOrigs", ReductionOrigs);
-    printSV("ReductionCopies", ReductionCopies);
-    printSV("ReductionOps", ReductionOps);
-  }
 };
 
 /// Class intended to support codegen of all kind of the reduction clauses.
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index bd6743666826b..c85135978e2b3 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -35,12 +35,10 @@
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/Support/AtomicOrdering.h"
-#include "llvm/Support/Debug.h"
 #include <optional>
 using namespace clang;
 using namespace CodeGen;
 using namespace llvm::omp;
-#define DEBUG_TYPE "clang-openmp-codegen"
 
 #define TTL_CODEGEN_TYPE "target-teams-loop-codegen"
 
@@ -5181,8 +5179,6 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
     }
     BodyGen(CGF);
   };
-  LLVM_DEBUG(llvm::dbgs() << "Inputs to emitTaskOutineFunction\n");
-  LLVM_DEBUG(llvm::dbgs() << "PartID = " << **PartId << "\n");
   llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
       S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, /*Tied=*/true,
       Data.NumberOfParts);
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 6092ab1684267..06fc7259b5901 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -441,7 +441,6 @@ class CodeGenFunction : public CodeGenTypeCache {
   llvm::AssertingVH<llvm::Instruction> PostAllocaInsertPt = nullptr;
 
 public:
-  void printLocalDeclMap();
   /// Return PostAllocaInsertPt. If it is not yet created, then insert it
   /// immediately after AllocaInsertPt.
   llvm::Instruction *getPostAllocaInsertPoint() {

>From c99d13fb3bd3da60d8b7362e7135cb160917d800 Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Wed, 26 Jun 2024 11:39:07 -0500
Subject: [PATCH 04/13] Add Debug.h include in CGStmtOpenMP.cpp because removal
 is not related to my change

---
 clang/lib/CodeGen/CGStmtOpenMP.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index c85135978e2b3..f73d32de7c484 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -35,6 +35,7 @@
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/Support/AtomicOrdering.h"
+#include "llvm/Support/Debug.h"
 #include <optional>
 using namespace clang;
 using namespace CodeGen;

>From ae97854579cf0d966c766c211f65c647c2e9fa4a Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Wed, 26 Jun 2024 14:13:36 -0500
Subject: [PATCH 05/13] Document emitOffladingArrays and
 emitOffloadingArraysAndArgs in OMPIRBuilder.h

---
 .../llvm/Frontend/OpenMP/OMPIRBuilder.h       | 28 +++++++++++++------
 1 file changed, 20 insertions(+), 8 deletions(-)

diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 03573b4e02029..38d90983c2817 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -1787,24 +1787,36 @@ class OpenMPIRBuilder {
 
   /// Emit the arrays used to pass the captures and map information to the
   /// offloading runtime library. If there is no map or capture information,
-  /// return nullptr by reference.
+  /// return nullptr by reference. This is the first of two overloads - this
+  /// one accepts a reference to a MapInfosTy object that contains combined
+  /// information generated for mappable clauses, including base pointers,
+  /// pointers, sizes, map types, user-defined mappers, and non-contiguous
+  /// information.
   void emitOffloadingArrays(
-      InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
-      GenMapInfoCallbackTy GenMapInfoCB, TargetDataInfo &Info,
-      bool IsNonContiguous = false,
+      InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo,
+      TargetDataInfo &Info, bool IsNonContiguous = false,
       function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
       function_ref<Value *(unsigned int)> CustomMapperCB = nullptr);
 
   /// Emit the arrays used to pass the captures and map information to the
   /// offloading runtime library. If there is no map or capture information,
-  /// return nullptr by reference.
+  /// return nullptr by reference. This is the second of two overloads - Instead
+  /// of accepting a reference to a MapInfosTy object, this overload accepts
+  /// a call back of type GenMapInfoCallbackTy to populate a MapInfosTy object
+  /// with mapping information.
   void emitOffloadingArrays(
-      InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo,
-      TargetDataInfo &Info, bool IsNonContiguous = false,
+      InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
+      GenMapInfoCallbackTy GenMapInfoCB, TargetDataInfo &Info,
+      bool IsNonContiguous = false,
       function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
       function_ref<Value *(unsigned int)> CustomMapperCB = nullptr);
 
-
+  /// Allocates memory for and populates the arrays required for offloading
+  /// (offload_{baseptrs|ptrs|mappers|sizes|maptypes|mapnames}). Then, it
+  /// emits their base addresses as arguments to be passed to the runtime
+  /// library. In essence, this function is a combination of
+  /// emitOffloadingArrays and emitOffloadingArraysArgument and should arguably
+  /// be preferred by clients of OpenMPIRBuilder.
   void emitOffloadingArraysAndArgs(
       InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info,
       TargetDataRTArgs &RTArgs, GenMapInfoCallbackTy GenMapInfoCB,

>From 88a47b0449cb332f7cc835214efcdaea2c3a8a9f Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Wed, 26 Jun 2024 15:29:44 -0500
Subject: [PATCH 06/13] refactor genMapInfo

---
 clang/lib/CodeGen/CGOpenMPRuntime.cpp | 42 +++++++++++++++++++++------
 1 file changed, 33 insertions(+), 9 deletions(-)

diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index b2fa50d16437e..b3493324a27a1 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -9447,14 +9447,14 @@ llvm::Value *emitDynCGGroupMem(const OMPExecutableDirective &D,
   }
   return DynCGroupMem;
 }
-static void genMapInfo(const OMPExecutableDirective &D, CodeGenFunction &CGF,
-                       const CapturedStmt &CS, llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
-                       llvm::OpenMPIRBuilder &OMPBuilder,
-                       MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
-  // Get mappable expression information.
-  MappableExprsHandler MEHandler(D, CGF);
-  llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
-  llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
+static void genMapInfoForCaptures(
+    MappableExprsHandler &MEHandler, CodeGenFunction &CGF,
+    const CapturedStmt &CS, llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
+    llvm::OpenMPIRBuilder &OMPBuilder,
+    llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
+    llvm::DenseSet<CanonicalDeclPtr<const Decl>> &MappedVarSet,
+    MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
+
   CodeGenModule &CGM = CGF.CGM;
   auto RI = CS.getCapturedRecordDecl()->field_begin();
   auto *CV = CapturedVars.begin();
@@ -9522,9 +9522,18 @@ static void genMapInfo(const OMPExecutableDirective &D, CodeGenFunction &CGF,
   MEHandler.adjustMemberOfForLambdaCaptures(
       OMPBuilder, LambdaPointers, CombinedInfo.BasePointers,
       CombinedInfo.Pointers, CombinedInfo.Types);
+}
+static void
+genMapInfo(MappableExprsHandler &MEHandler, CodeGenFunction &CGF,
+           MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
+           llvm::OpenMPIRBuilder &OMPBuilder,
+           const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkippedVarSet =
+               llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) {
+
+  CodeGenModule &CGM = CGF.CGM;
   // Map any list items in a map clause that were not captures because they
   // weren't referenced within the construct.
-  MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, MappedVarSet);
+  MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, SkippedVarSet);
 
   auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
     return emitMappingInformation(CGF, OMPBuilder, MapExpr);
@@ -9536,6 +9545,21 @@ static void genMapInfo(const OMPExecutableDirective &D, CodeGenFunction &CGF,
                     FillInfoMap);
   }
 }
+
+static void genMapInfo(const OMPExecutableDirective &D, CodeGenFunction &CGF,
+                       const CapturedStmt &CS,
+                       llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
+                       llvm::OpenMPIRBuilder &OMPBuilder,
+                       MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
+  // Get mappable expression information.
+  MappableExprsHandler MEHandler(D, CGF);
+  llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
+  llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
+
+  genMapInfoForCaptures(MEHandler, CGF, CS, CapturedVars, OMPBuilder,
+                        LambdaPointers, MappedVarSet, CombinedInfo);
+  genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder, MappedVarSet);
+}
 static void emitTargetCallKernelLaunch(
     CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
     const OMPExecutableDirective &D,

>From e97cd161933d07a08aad52e37b506ae27be27560 Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Thu, 27 Jun 2024 13:51:34 -0500
Subject: [PATCH 07/13] Use CGOpenMPRuntime::emitTargetDataStandAloneCall

---
 clang/lib/CodeGen/CGOpenMPRuntime.cpp | 44 +++++++++++++++++++++------
 1 file changed, 34 insertions(+), 10 deletions(-)

diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index b3493324a27a1..5372bbbbc2da1 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -10500,21 +10500,45 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
                                           PrePostActionTy &) {
     // Fill up the arrays with all the mapped variables.
     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
+    CGOpenMPRuntime::TargetDataInfo Info;
 
     // Get map clause information.
-    MappableExprsHandler MEHandler(D, CGF);
-    MEHandler.generateAllInfo(CombinedInfo, OMPBuilder);
+    auto GenMapInfoCB = [&](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP)
+        -> llvm::OpenMPIRBuilder::MapInfosTy & {
+      CGF.Builder.restoreIP(CodeGenIP);
+      MappableExprsHandler MEHandler(D, CGF);
+      genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder);
+      return CombinedInfo;
+    };
 
-    CGOpenMPRuntime::TargetDataInfo Info;
-    // Fill up the arrays and create the arguments.
-    emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
-                         /*IsNonContiguous=*/true);
+    auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
+      if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
+        Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
+      }
+    };
+
+    auto CustomMapperCB = [&](unsigned int I) {
+      llvm::Value *MFunc = nullptr;
+      if (CombinedInfo.Mappers[I]) {
+        Info.HasMapper = true;
+        MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
+            cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
+      }
+      return MFunc;
+    };
+
+    // Fill up the basepointers, pointers and mapper arrays and create the
+    // arguments.
+    using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
+    InsertPointTy OffloadingArraysAllocaIP(CGF.AllocaInsertPt->getParent(),
+                                           CGF.AllocaInsertPt->getIterator());
+
+    OMPBuilder.emitOffloadingArraysAndArgs(
+        OffloadingArraysAllocaIP, CGF.Builder.saveIP(), Info, Info.RTArgs,
+        GenMapInfoCB, /*IsNonContiguous=*/true, /*ForEndCall=*/false,
+        DeviceAddrCB, CustomMapperCB);
     bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
                              D.hasClausesOfKind<OMPNowaitClause>();
-    Info.EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
-                     llvm::codegenoptions::NoDebugInfo;
-    OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info,
-                                            /*ForEndCall=*/false);
     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
     InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
                                           CGF.VoidPtrTy, CGM.getPointerAlign());

>From 200bd07a9384242cd7999442860bc90e2ba9b69b Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Fri, 28 Jun 2024 10:49:35 -0500
Subject: [PATCH 08/13] Use static function emitOffloadingArraysAndArgs in
 emitTargetCallKernelLaunch in CGOpenMPRuntime.cpp

---
 clang/lib/CodeGen/CGOpenMPRuntime.cpp         | 67 ++++++++++---------
 .../llvm/Frontend/OpenMP/OMPIRBuilder.h       | 13 ++++
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp     |  9 +++
 3 files changed, 59 insertions(+), 30 deletions(-)

diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 5372bbbbc2da1..c0e9eb3b6a07e 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -8884,6 +8884,40 @@ static void emitOffloadingArrays(
                                   /*IsNonContiguous=*/true, DeviceAddrCB,
                                   CustomMapperCB);
 }
+/// Emit the arrays used to pass the captures and map information to the
+/// offloading runtime library. If there is no map or capture information,
+/// return nullptr by reference.
+static void emitOffloadingArraysAndArgs(
+    CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
+    CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
+    bool IsNonContiguous = false, bool ForEndCall = false) {
+  CodeGenModule &CGM = CGF.CGM;
+
+  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
+  InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
+                         CGF.AllocaInsertPt->getIterator());
+  InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
+                          CGF.Builder.GetInsertPoint());
+
+  auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
+    if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
+      Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
+    }
+  };
+
+  auto CustomMapperCB = [&](unsigned int I) {
+    llvm::Value *MFunc = nullptr;
+    if (CombinedInfo.Mappers[I]) {
+      Info.HasMapper = true;
+      MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
+          cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
+    }
+    return MFunc;
+  };
+  OMPBuilder.emitOffloadingArraysAndArgs(
+      AllocaIP, CodeGenIP, Info, Info.RTArgs, CombinedInfo, IsNonContiguous,
+      ForEndCall, DeviceAddrCB, CustomMapperCB);
+}
 
 /// Check for inner distribute directive.
 static const OMPExecutableDirective *
@@ -9577,37 +9611,10 @@ static void emitTargetCallKernelLaunch(
   // Fill up the arrays with all the captured variables.
   MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
   CGOpenMPRuntime::TargetDataInfo Info;
+  genMapInfo(D, CGF, CS, CapturedVars, OMPBuilder, CombinedInfo);
 
-  auto GenMapInfoCB = [&](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP)
-                          -> llvm::OpenMPIRBuilder::MapInfosTy & {
-    CGF.Builder.restoreIP(CodeGenIP);
-    genMapInfo(D, CGF, CS, CapturedVars, OMPBuilder, CombinedInfo);
-    return CombinedInfo;
-  };
-  auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
-    if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
-      Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
-    }
-  };
-
-  auto CustomMapperCB = [&](unsigned int I) {
-    llvm::Value *MFunc = nullptr;
-    if (CombinedInfo.Mappers[I]) {
-      Info.HasMapper = true;
-      MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
-          cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
-    }
-    return MFunc;
-  };
-  // Fill up the basepointers, pointers and mapper arrays and create the
-  // arguments.
-  llvm::OpenMPIRBuilder::InsertPointTy OffloadingArraysAllocaIP(
-      CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
-
-  OMPBuilder.emitOffloadingArraysAndArgs(
-      OffloadingArraysAllocaIP, CGF.Builder.saveIP(), Info, Info.RTArgs,
-      GenMapInfoCB, /*IsNonContiguous=*/true, /*ForEndCall=*/false,
-      DeviceAddrCB, CustomMapperCB);
+  emitOffloadingArraysAndArgs(CGF, CombinedInfo, Info, OMPBuilder,
+                              /*IsNonContiguous=*/true, /*ForEndCall=*/false);
 
   InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
   InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 38d90983c2817..2cb3da09a97c1 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -1824,6 +1824,19 @@ class OpenMPIRBuilder {
       function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
       function_ref<Value *(unsigned int)> CustomMapperCB = nullptr);
 
+  /// Allocates memory for and populates the arrays required for offloading
+  /// (offload_{baseptrs|ptrs|mappers|sizes|maptypes|mapnames}). Then, it
+  /// emits their base addresses as arguments to be passed to the runtime
+  /// library. In essence, this function is a combination of
+  /// emitOffloadingArrays and emitOffloadingArraysArgument and should arguably
+  /// be preferred by clients of OpenMPIRBuilder.
+  void emitOffloadingArraysAndArgs(
+      InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info,
+      TargetDataRTArgs &RTArgs, MapInfosTy &CombinedInfo,
+      bool IsNonContiguous = false, bool ForEndCall = false,
+      function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
+      function_ref<Value *(unsigned int)> CustomMapperCB = nullptr);
+
   /// Creates offloading entry for the provided entry ID \a ID, address \a
   /// Addr, size \a Size, and flags \a Flags.
   void createOffloadEntry(Constant *ID, Constant *Addr, uint64_t Size,
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 8d6e6a354a1d8..abf21da0fa7d4 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -5243,6 +5243,15 @@ void OpenMPIRBuilder::emitOffloadingArraysAndArgs(
                        DeviceAddrCB, CustomMapperCB);
   emitOffloadingArraysArgument(Builder, RTArgs, Info, ForEndCall);
  }
+ void OpenMPIRBuilder::emitOffloadingArraysAndArgs(
+     InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info,
+     TargetDataRTArgs &RTArgs, MapInfosTy &CombinedInfo, bool IsNonContiguous,
+     bool ForEndCall, function_ref<void(unsigned int, Value *)> DeviceAddrCB,
+     function_ref<Value *(unsigned int)> CustomMapperCB) {
+   emitOffloadingArrays(AllocaIP, CodeGenIP, CombinedInfo, Info,
+                        IsNonContiguous, DeviceAddrCB, CustomMapperCB);
+   emitOffloadingArraysArgument(Builder, RTArgs, Info, ForEndCall);
+ }
 
  static void emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder,
                            OpenMPIRBuilder::InsertPointTy AllocaIP,

>From 879cfa1cf2714a0bfa0e42152634ec841b94c3ce Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Fri, 28 Jun 2024 11:27:58 -0500
Subject: [PATCH 09/13] Use static function emitOffloadingArraysAndArgs in
 emitTargetDataStandaloneCall in CGOpenMPRuntime.cpp

---
 clang/lib/CodeGen/CGOpenMPRuntime.cpp | 40 ++++-----------------------
 1 file changed, 5 insertions(+), 35 deletions(-)

diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index c0e9eb3b6a07e..14590146ceb51 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -10508,44 +10508,14 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
     // Fill up the arrays with all the mapped variables.
     MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
     CGOpenMPRuntime::TargetDataInfo Info;
+    MappableExprsHandler MEHandler(D, CGF);
+    genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder);
+    emitOffloadingArraysAndArgs(CGF, CombinedInfo, Info, OMPBuilder,
+                                /*IsNonContiguous=*/true, /*ForEndCall=*/false);
 
-    // Get map clause information.
-    auto GenMapInfoCB = [&](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP)
-        -> llvm::OpenMPIRBuilder::MapInfosTy & {
-      CGF.Builder.restoreIP(CodeGenIP);
-      MappableExprsHandler MEHandler(D, CGF);
-      genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder);
-      return CombinedInfo;
-    };
-
-    auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
-      if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
-        Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
-      }
-    };
-
-    auto CustomMapperCB = [&](unsigned int I) {
-      llvm::Value *MFunc = nullptr;
-      if (CombinedInfo.Mappers[I]) {
-        Info.HasMapper = true;
-        MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
-            cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
-      }
-      return MFunc;
-    };
-
-    // Fill up the basepointers, pointers and mapper arrays and create the
-    // arguments.
-    using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
-    InsertPointTy OffloadingArraysAllocaIP(CGF.AllocaInsertPt->getParent(),
-                                           CGF.AllocaInsertPt->getIterator());
-
-    OMPBuilder.emitOffloadingArraysAndArgs(
-        OffloadingArraysAllocaIP, CGF.Builder.saveIP(), Info, Info.RTArgs,
-        GenMapInfoCB, /*IsNonContiguous=*/true, /*ForEndCall=*/false,
-        DeviceAddrCB, CustomMapperCB);
     bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
                              D.hasClausesOfKind<OMPNowaitClause>();
+
     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
     InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
                                           CGF.VoidPtrTy, CGM.getPointerAlign());

>From 178be4f9b97226523d43f7ae9e11a438348774fc Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Fri, 28 Jun 2024 11:30:10 -0500
Subject: [PATCH 10/13] Remove emitOffloadingArrays from CGOpenMPRuntime.cpp
 because it is not used anymore. Use emitOffloadingArraysAndArgs

---
 clang/lib/CodeGen/CGOpenMPRuntime.cpp | 49 ---------------------------
 1 file changed, 49 deletions(-)

diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 14590146ceb51..9fbc06e89f017 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -8835,55 +8835,6 @@ emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
                                          PLoc.getLine(), PLoc.getColumn(),
                                          SrcLocStrSize);
 }
-
-/// Emit the arrays used to pass the captures and map information to the
-/// offloading runtime library. If there is no map or capture information,
-/// return nullptr by reference.
-static void emitOffloadingArrays(
-    CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
-    CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
-    bool IsNonContiguous = false) {
-  CodeGenModule &CGM = CGF.CGM;
-
-  // Reset the array information.
-  Info.clearArrayInfo();
-  Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
-
-  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
-  InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
-                         CGF.AllocaInsertPt->getIterator());
-  InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
-                          CGF.Builder.GetInsertPoint());
-
-  auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
-    return emitMappingInformation(CGF, OMPBuilder, MapExpr);
-  };
-  if (CGM.getCodeGenOpts().getDebugInfo() !=
-      llvm::codegenoptions::NoDebugInfo) {
-    CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
-    llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
-                    FillInfoMap);
-  }
-
-  auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
-    if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
-      Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
-    }
-  };
-
-  auto CustomMapperCB = [&](unsigned int I) {
-    llvm::Value *MFunc = nullptr;
-    if (CombinedInfo.Mappers[I]) {
-      Info.HasMapper = true;
-      MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
-          cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
-    }
-    return MFunc;
-  };
-  OMPBuilder.emitOffloadingArrays(AllocaIP, CodeGenIP, CombinedInfo, Info,
-                                  /*IsNonContiguous=*/true, DeviceAddrCB,
-                                  CustomMapperCB);
-}
 /// Emit the arrays used to pass the captures and map information to the
 /// offloading runtime library. If there is no map or capture information,
 /// return nullptr by reference.

>From d21f7f6f0ba9063260a08bc9d770bb0f1e0761bf Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Fri, 28 Jun 2024 12:05:02 -0500
Subject: [PATCH 11/13] Remove overloads of emitOffloadingArrays and
 emitOffloadingArraysAndArgs that accept GenMapInfoCallBackTy

---
 clang/lib/CodeGen/CGOpenMPRuntime.cpp         |  1 -
 .../llvm/Frontend/OpenMP/OMPIRBuilder.h       | 34 ++-----------------
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp     | 32 ++++-------------
 3 files changed, 9 insertions(+), 58 deletions(-)

diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 9fbc06e89f017..262bbd988e1e3 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -9440,7 +9440,6 @@ static void genMapInfoForCaptures(
     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &MappedVarSet,
     MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
 
-  CodeGenModule &CGM = CGF.CGM;
   auto RI = CS.getCapturedRecordDecl()->field_begin();
   auto *CV = CapturedVars.begin();
   for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 2cb3da09a97c1..a0b54e25124d7 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -1787,43 +1787,15 @@ class OpenMPIRBuilder {
 
   /// Emit the arrays used to pass the captures and map information to the
   /// offloading runtime library. If there is no map or capture information,
-  /// return nullptr by reference. This is the first of two overloads - this
-  /// one accepts a reference to a MapInfosTy object that contains combined
-  /// information generated for mappable clauses, including base pointers,
-  /// pointers, sizes, map types, user-defined mappers, and non-contiguous
-  /// information.
+  /// return nullptr by reference. Accepts a reference to a MapInfosTy object
+  /// that contains information generated for mappable clauses,
+  /// including base pointers, pointers, sizes, map types, user-defined mappers.
   void emitOffloadingArrays(
       InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo,
       TargetDataInfo &Info, bool IsNonContiguous = false,
       function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
       function_ref<Value *(unsigned int)> CustomMapperCB = nullptr);
 
-  /// Emit the arrays used to pass the captures and map information to the
-  /// offloading runtime library. If there is no map or capture information,
-  /// return nullptr by reference. This is the second of two overloads - Instead
-  /// of accepting a reference to a MapInfosTy object, this overload accepts
-  /// a call back of type GenMapInfoCallbackTy to populate a MapInfosTy object
-  /// with mapping information.
-  void emitOffloadingArrays(
-      InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
-      GenMapInfoCallbackTy GenMapInfoCB, TargetDataInfo &Info,
-      bool IsNonContiguous = false,
-      function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
-      function_ref<Value *(unsigned int)> CustomMapperCB = nullptr);
-
-  /// Allocates memory for and populates the arrays required for offloading
-  /// (offload_{baseptrs|ptrs|mappers|sizes|maptypes|mapnames}). Then, it
-  /// emits their base addresses as arguments to be passed to the runtime
-  /// library. In essence, this function is a combination of
-  /// emitOffloadingArrays and emitOffloadingArraysArgument and should arguably
-  /// be preferred by clients of OpenMPIRBuilder.
-  void emitOffloadingArraysAndArgs(
-      InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info,
-      TargetDataRTArgs &RTArgs, GenMapInfoCallbackTy GenMapInfoCB,
-      bool IsNonContiguous = false, bool ForEndCall = false,
-      function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
-      function_ref<Value *(unsigned int)> CustomMapperCB = nullptr);
-
   /// Allocates memory for and populates the arrays required for offloading
   /// (offload_{baseptrs|ptrs|mappers|sizes|maptypes|mapnames}). Then, it
   /// emits their base addresses as arguments to be passed to the runtime
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index abf21da0fa7d4..df48eb430e097 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -5235,23 +5235,13 @@ static void emitTargetOutlinedFunction(
 
 void OpenMPIRBuilder::emitOffloadingArraysAndArgs(
     InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info,
-    TargetDataRTArgs &RTArgs, GenMapInfoCallbackTy GenMapInfoCB,
-    bool IsNonContiguous, bool ForEndCall,
-    function_ref<void(unsigned int, Value *)> DeviceAddrCB,
+    TargetDataRTArgs &RTArgs, MapInfosTy &CombinedInfo, bool IsNonContiguous,
+    bool ForEndCall, function_ref<void(unsigned int, Value *)> DeviceAddrCB,
     function_ref<Value *(unsigned int)> CustomMapperCB) {
-  emitOffloadingArrays(AllocaIP, CodeGenIP, GenMapInfoCB, Info, IsNonContiguous,
+  emitOffloadingArrays(AllocaIP, CodeGenIP, CombinedInfo, Info, IsNonContiguous,
                        DeviceAddrCB, CustomMapperCB);
   emitOffloadingArraysArgument(Builder, RTArgs, Info, ForEndCall);
- }
- void OpenMPIRBuilder::emitOffloadingArraysAndArgs(
-     InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info,
-     TargetDataRTArgs &RTArgs, MapInfosTy &CombinedInfo, bool IsNonContiguous,
-     bool ForEndCall, function_ref<void(unsigned int, Value *)> DeviceAddrCB,
-     function_ref<Value *(unsigned int)> CustomMapperCB) {
-   emitOffloadingArrays(AllocaIP, CodeGenIP, CombinedInfo, Info,
-                        IsNonContiguous, DeviceAddrCB, CustomMapperCB);
-   emitOffloadingArraysArgument(Builder, RTArgs, Info, ForEndCall);
- }
+}
 
  static void emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder,
                            OpenMPIRBuilder::InsertPointTy AllocaIP,
@@ -5264,9 +5254,10 @@ void OpenMPIRBuilder::emitOffloadingArraysAndArgs(
       /*RequiresDevicePointerInfo=*/false,
       /*SeparateBeginEndCalls=*/true);
 
+  OpenMPIRBuilder::MapInfosTy &MapInfo = GenMapInfoCB(Builder.saveIP());
   OpenMPIRBuilder::TargetDataRTArgs RTArgs;
   OMPBuilder.emitOffloadingArraysAndArgs(AllocaIP, Builder.saveIP(), Info,
-                                         RTArgs, GenMapInfoCB,
+                                         RTArgs, MapInfo,
                                          /*IsNonContiguous=*/true,
                                          /*ForEndCall=*/false);
 
@@ -5578,17 +5569,6 @@ void OpenMPIRBuilder::emitNonContiguousDescriptor(InsertPointTy AllocaIP,
   }
 }
 
-void OpenMPIRBuilder::emitOffloadingArrays(
-    InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
-    GenMapInfoCallbackTy GenMapInfoCB, TargetDataInfo &Info,
-    bool IsNonContiguous,
-    function_ref<void(unsigned int, Value *)> DeviceAddrCB,
-    function_ref<Value *(unsigned int)> CustomMapperCB) {
-
-  OpenMPIRBuilder::MapInfosTy &MapInfo = GenMapInfoCB(CodeGenIP);
-  emitOffloadingArrays(AllocaIP, CodeGenIP, MapInfo,
-                       Info, IsNonContiguous, DeviceAddrCB, CustomMapperCB);
-}
 void OpenMPIRBuilder::emitOffloadingArrays(
     InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo,
     TargetDataInfo &Info, bool IsNonContiguous,

>From bff9d7b276df201baa48b4739dabfa9329c71dd0 Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Fri, 28 Jun 2024 12:16:41 -0500
Subject: [PATCH 12/13] Undo an unnecessary change in the location of the
 declaration of GenMapInfoCallBackTy in OMPIRBuilder.h

---
 llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index a0b54e25124d7..db748611ac501 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -1779,12 +1779,6 @@ class OpenMPIRBuilder {
                                    MapInfosTy &CombinedInfo,
                                    TargetDataInfo &Info);
 
-  /// Callback type for creating the map infos for the kernel parameters.
-  /// \param CodeGenIP is the insertion point where code should be generated,
-  ///        if any.
-  using GenMapInfoCallbackTy =
-      function_ref<MapInfosTy &(InsertPointTy CodeGenIP)>;
-
   /// Emit the arrays used to pass the captures and map information to the
   /// offloading runtime library. If there is no map or capture information,
   /// return nullptr by reference. Accepts a reference to a MapInfosTy object
@@ -2212,6 +2206,11 @@ class OpenMPIRBuilder {
   /// duplicating the body code.
   enum BodyGenTy { Priv, DupNoPriv, NoPriv };
 
+  /// Callback type for creating the map infos for the kernel parameters.
+  /// \param CodeGenIP is the insertion point where code should be generated,
+  ///        if any.
+  using GenMapInfoCallbackTy =
+      function_ref<MapInfosTy &(InsertPointTy CodeGenIP)>;
 
   /// Generator for '#omp target data'
   ///

>From 9d5c42b905b308b84c54dea81518bc84aa1d79b4 Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Mon, 22 Jul 2024 13:23:00 -0500
Subject: [PATCH 13/13] Address review comments

---
 clang/lib/CodeGen/CGOpenMPRuntime.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 262bbd988e1e3..4ab9195bded2a 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -9436,10 +9436,10 @@ static void genMapInfoForCaptures(
     MappableExprsHandler &MEHandler, CodeGenFunction &CGF,
     const CapturedStmt &CS, llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
     llvm::OpenMPIRBuilder &OMPBuilder,
-    llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
     llvm::DenseSet<CanonicalDeclPtr<const Decl>> &MappedVarSet,
     MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
 
+  llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
   auto RI = CS.getCapturedRecordDecl()->field_begin();
   auto *CV = CapturedVars.begin();
   for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
@@ -9537,11 +9537,10 @@ static void genMapInfo(const OMPExecutableDirective &D, CodeGenFunction &CGF,
                        MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
   // Get mappable expression information.
   MappableExprsHandler MEHandler(D, CGF);
-  llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
   llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
 
   genMapInfoForCaptures(MEHandler, CGF, CS, CapturedVars, OMPBuilder,
-                        LambdaPointers, MappedVarSet, CombinedInfo);
+                        MappedVarSet, CombinedInfo);
   genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder, MappedVarSet);
 }
 static void emitTargetCallKernelLaunch(



More information about the cfe-commits mailing list