[clang] [llvm] [OpenMPIRBuilder][Clang][NFC] - Combine `emitOffloadingArrays` and `emitOffloadingArraysArgument` in OpenMPIRBuilder (PR #97088)
Pranav Bhandarkar via cfe-commits
cfe-commits at lists.llvm.org
Tue Jul 23 14:19:28 PDT 2024
https://github.com/bhandarkar-pranav updated https://github.com/llvm/llvm-project/pull/97088
>From ad6ef960b14c23bde1460a0977b6401dc21dfea4 Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Sat, 15 Jun 2024 02:00:48 -0500
Subject: [PATCH 01/14] checkpoint commit. Use emitOffloadinArrays from
OMPIRBuilder in emitTargetCallKernelLaunch in CGOpenMPRuntime.cpp
---
clang/lib/CodeGen/CGOpenMPRuntime.cpp | 241 +++++++++++++++++-
clang/lib/CodeGen/CGStmtOpenMP.cpp | 1 +
.../llvm/Frontend/OpenMP/OMPIRBuilder.h | 22 +-
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 10 +
4 files changed, 265 insertions(+), 9 deletions(-)
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index a6a87ec88ee8a..4c95aab3c33c1 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -40,6 +40,7 @@
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/AtomicOrdering.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
@@ -47,6 +48,8 @@
#include <numeric>
#include <optional>
+#define DEBUG_TYPE "clang-openmp-codegen"
+
using namespace clang;
using namespace CodeGen;
using namespace llvm::omp;
@@ -8868,9 +8871,11 @@ emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
}
PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
- return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName,
+ auto *Str = OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName,
PLoc.getLine(), PLoc.getColumn(),
SrcLocStrSize);
+ LLVM_DEBUG(llvm::dbgs() << "Output of emitMappingInfo: " << *Str << "\n");
+ return Str;
}
/// Emit the arrays used to pass the captures and map information to the
@@ -9484,8 +9489,96 @@ llvm::Value *emitDynCGGroupMem(const OMPExecutableDirective &D,
}
return DynCGroupMem;
}
+static void genMapInfo(const OMPExecutableDirective &D, CodeGenFunction &CGF,
+ const CapturedStmt &CS, llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
+ llvm::OpenMPIRBuilder &OMPBuilder,
+ MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
+ // Get mappable expression information.
+ MappableExprsHandler MEHandler(D, CGF);
+ llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
+ llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
+ CodeGenModule &CGM = CGF.CGM;
+ auto RI = CS.getCapturedRecordDecl()->field_begin();
+ auto *CV = CapturedVars.begin();
+ for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
+ CE = CS.capture_end();
+ CI != CE; ++CI, ++RI, ++CV) {
+ MappableExprsHandler::MapCombinedInfoTy CurInfo;
+ MappableExprsHandler::StructRangeInfoTy PartialStruct;
-static void emitTargetCallKernelLaunch(
+ // VLA sizes are passed to the outlined region by copy and do not have map
+ // information associated.
+ if (CI->capturesVariableArrayType()) {
+ CurInfo.Exprs.push_back(nullptr);
+ CurInfo.BasePointers.push_back(*CV);
+ CurInfo.DevicePtrDecls.push_back(nullptr);
+ CurInfo.DevicePointers.push_back(
+ MappableExprsHandler::DeviceInfoTy::None);
+ CurInfo.Pointers.push_back(*CV);
+ CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
+ CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
+ // Copy to the device as an argument. No need to retrieve it.
+ CurInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
+ OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM |
+ OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
+ CurInfo.Mappers.push_back(nullptr);
+ } else {
+ // If we have any information in the map clause, we use it, otherwise we
+ // just do a default mapping.
+ MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
+ if (!CI->capturesThis())
+ MappedVarSet.insert(CI->getCapturedVar());
+ else
+ MappedVarSet.insert(nullptr);
+ if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
+ MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
+ // Generate correct mapping for variables captured by reference in
+ // lambdas.
+ if (CI->capturesVariable())
+ MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
+ CurInfo, LambdaPointers);
+ }
+ // We expect to have at least an element of information for this capture.
+ assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
+ "Non-existing map pointer for capture!");
+ assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
+ CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
+ CurInfo.BasePointers.size() == CurInfo.Types.size() &&
+ CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
+ "Inconsistent map information sizes!");
+
+ // If there is an entry in PartialStruct it means we have a struct with
+ // individual members mapped. Emit an extra combined entry.
+ if (PartialStruct.Base.isValid()) {
+ CombinedInfo.append(PartialStruct.PreliminaryMapData);
+ MEHandler.emitCombinedEntry(
+ CombinedInfo, CurInfo.Types, PartialStruct, CI->capturesThis(),
+ OMPBuilder, nullptr,
+ !PartialStruct.PreliminaryMapData.BasePointers.empty());
+ }
+
+ // We need to append the results of this capture to what we already have.
+ CombinedInfo.append(CurInfo);
+ }
+ // Adjust MEMBER_OF flags for the lambdas captures.
+ MEHandler.adjustMemberOfForLambdaCaptures(
+ OMPBuilder, LambdaPointers, CombinedInfo.BasePointers,
+ CombinedInfo.Pointers, CombinedInfo.Types);
+ // Map any list items in a map clause that were not captures because they
+ // weren't referenced within the construct.
+ MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, MappedVarSet);
+
+ auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
+ return emitMappingInformation(CGF, OMPBuilder, MapExpr);
+ };
+ if (CGM.getCodeGenOpts().getDebugInfo() !=
+ llvm::codegenoptions::NoDebugInfo) {
+ CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
+ llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
+ FillInfoMap);
+ }
+}
+static void emitTargetCallKernelLaunchNew(
CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
const OMPExecutableDirective &D,
llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask,
@@ -9501,8 +9594,139 @@ static void emitTargetCallKernelLaunch(
// Fill up the arrays with all the captured variables.
MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
+ CGOpenMPRuntime::TargetDataInfo Info;
- // Get mappable expression information.
+ auto GenMapInfoCB = [&](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP)
+ -> llvm::OpenMPIRBuilder::MapInfosTy & {
+ CGF.Builder.restoreIP(CodeGenIP);
+ genMapInfo(D, CGF, CS, CapturedVars, OMPBuilder, CombinedInfo);
+ return CombinedInfo;
+ };
+ auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
+ if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
+ Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
+ }
+ };
+
+ auto CustomMapperCB = [&](unsigned int I) {
+ llvm::Value *MFunc = nullptr;
+ if (CombinedInfo.Mappers[I]) {
+ Info.HasMapper = true;
+ MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
+ cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
+ }
+ return MFunc;
+ };
+ // Fill up the arrays and create the arguments.
+ LLVM_DEBUG(llvm::dbgs() << "emitTargetCallKernelLaunchNew:InsertBlock before emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n");
+ OMPBuilder.emitOffloadingArrays(llvm::OpenMPIRBuilder::InsertPointTy(CGF.AllocaInsertPt->getParent(),
+ CGF.AllocaInsertPt->getIterator()),
+ CGF.Builder.saveIP(), Info,
+ GenMapInfoCB, /*IsNonContiguous=*/true,
+ DeviceAddrCB, CustomMapperCB);
+ bool EmitDebug = !CombinedInfo.Names.empty();
+ OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info,
+ EmitDebug,
+ /*ForEndCall=*/false);
+
+ LLVM_DEBUG(llvm::dbgs() << "emitTargetCallKernelLaunchNew:InsertBlock after emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n");
+ InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
+ InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
+ CGF.VoidPtrTy, CGM.getPointerAlign());
+ InputInfo.PointersArray =
+ Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
+ InputInfo.SizesArray =
+ Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
+ InputInfo.MappersArray =
+ Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
+ MapTypesArray = Info.RTArgs.MapTypesArray;
+ MapNamesArray = Info.RTArgs.MapNamesArray;
+
+ auto &&ThenGen = [&OMPRuntime, OutlinedFn, &D, &CapturedVars,
+ RequiresOuterTask, &CS, OffloadingMandatory, Device,
+ OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray,
+ SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
+ bool IsReverseOffloading = Device.getInt() == OMPC_DEVICE_ancestor;
+
+ if (IsReverseOffloading) {
+ // Reverse offloading is not supported, so just execute on the host.
+ // FIXME: This fallback solution is incorrect since it ignores the
+ // OMP_TARGET_OFFLOAD environment variable. Instead it would be better to
+ // assert here and ensure SEMA emits an error.
+ emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
+ RequiresOuterTask, CS, OffloadingMandatory, CGF);
+ return;
+ }
+
+ bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>();
+ unsigned NumTargetItems = InputInfo.NumberOfTargetItems;
+
+ llvm::Value *BasePointersArray =
+ InputInfo.BasePointersArray.emitRawPointer(CGF);
+ llvm::Value *PointersArray = InputInfo.PointersArray.emitRawPointer(CGF);
+ llvm::Value *SizesArray = InputInfo.SizesArray.emitRawPointer(CGF);
+ llvm::Value *MappersArray = InputInfo.MappersArray.emitRawPointer(CGF);
+
+ auto &&EmitTargetCallFallbackCB =
+ [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
+ OffloadingMandatory, &CGF](llvm::OpenMPIRBuilder::InsertPointTy IP)
+ -> llvm::OpenMPIRBuilder::InsertPointTy {
+ CGF.Builder.restoreIP(IP);
+ emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
+ RequiresOuterTask, CS, OffloadingMandatory, CGF);
+ return CGF.Builder.saveIP();
+ };
+
+ llvm::Value *DeviceID = emitDeviceID(Device, CGF);
+ llvm::Value *NumTeams = OMPRuntime->emitNumTeamsForTargetDirective(CGF, D);
+ llvm::Value *NumThreads =
+ OMPRuntime->emitNumThreadsForTargetDirective(CGF, D);
+ llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, D.getBeginLoc());
+ llvm::Value *NumIterations =
+ OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter);
+ llvm::Value *DynCGGroupMem = emitDynCGGroupMem(D, CGF);
+ llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
+ CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
+
+ llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs(
+ BasePointersArray, PointersArray, SizesArray, MapTypesArray,
+ nullptr /* MapTypesArrayEnd */, MappersArray, MapNamesArray);
+
+ llvm::OpenMPIRBuilder::TargetKernelArgs Args(
+ NumTargetItems, RTArgs, NumIterations, NumTeams, NumThreads,
+ DynCGGroupMem, HasNoWait);
+
+ CGF.Builder.restoreIP(OMPRuntime->getOMPBuilder().emitKernelLaunch(
+ CGF.Builder, OutlinedFn, OutlinedFnID, EmitTargetCallFallbackCB, Args,
+ DeviceID, RTLoc, AllocaIP));
+ };
+
+ if (RequiresOuterTask) {
+ if (NewClangTargetTaskCodeGen) {
+ llvm::errs() << "Using OMPIRBuilder for target task codegen\n";
+ } else {
+ CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
+ }
+ } else
+ OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
+}
+static void emitTargetCallKernelLaunch(
+ CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
+ const OMPExecutableDirective &D,
+ llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask,
+ const CapturedStmt &CS, bool OffloadingMandatory,
+ llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
+ llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo,
+ llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray,
+ llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
+ const OMPLoopDirective &D)>
+ SizeEmitter,
+ CodeGenFunction &CGF, CodeGenModule &CGM) {
+ llvm::OpenMPIRBuilder &OMPBuilder = OMPRuntime->getOMPBuilder();
+
+ // Fill up the arrays with all the captured variables.
+ MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
+// Get mappable expression information.
MappableExprsHandler MEHandler(D, CGF);
llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
@@ -9579,6 +9803,7 @@ static void emitTargetCallKernelLaunch(
CGOpenMPRuntime::TargetDataInfo Info;
// Fill up the arrays and create the arguments.
+ LLVM_DEBUG(llvm::dbgs() << "InsertBlock before emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n");
emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
llvm::codegenoptions::NoDebugInfo;
@@ -9586,6 +9811,7 @@ static void emitTargetCallKernelLaunch(
EmitDebug,
/*ForEndCall=*/false);
+ LLVM_DEBUG(llvm::dbgs() << "InsertBlock after emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n");
InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
CGF.VoidPtrTy, CGM.getPointerAlign());
@@ -9725,7 +9951,13 @@ void CGOpenMPRuntime::emitTargetCall(
OutlinedFnID, &InputInfo, &MapTypesArray,
&MapNamesArray, SizeEmitter](CodeGenFunction &CGF,
PrePostActionTy &) {
- emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars,
+ if (OpenMPClangTargetCodegen)
+ emitTargetCallKernelLaunchNew(this, OutlinedFn, D, CapturedVars,
+ RequiresOuterTask, CS, OffloadingMandatory,
+ Device, OutlinedFnID, InputInfo, MapTypesArray,
+ MapNamesArray, SizeEmitter, CGF, CGM);
+ else
+ emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars,
RequiresOuterTask, CS, OffloadingMandatory,
Device, OutlinedFnID, InputInfo, MapTypesArray,
MapNamesArray, SizeEmitter, CGF, CGM);
@@ -9748,6 +9980,7 @@ void CGOpenMPRuntime::emitTargetCall(
} else {
RegionCodeGenTy ThenRCG(TargetThenGen);
ThenRCG(CGF);
+ LLVM_DEBUG(llvm::dbgs() << "Generated code after emitTargetCall:\n" << *(OutlinedFn->getParent()) << "\n");
}
} else {
RegionCodeGenTy ElseRCG(TargetElseGen);
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 853046bf43495..379c7f16db9c5 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -40,6 +40,7 @@
using namespace clang;
using namespace CodeGen;
using namespace llvm::omp;
+#define DEBUG_TYPE "clang-openmp-codegen"
#define TTL_CODEGEN_TYPE "target-teams-loop-codegen"
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index a6995888de7d4..f4449f3c0a44f 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -2358,6 +2358,22 @@ class OpenMPIRBuilder {
MapInfosTy &CombinedInfo,
TargetDataInfo &Info);
+ /// Callback type for creating the map infos for the kernel parameters.
+ /// \param CodeGenIP is the insertion point where code should be generated,
+ /// if any.
+ using GenMapInfoCallbackTy =
+ function_ref<MapInfosTy &(InsertPointTy CodeGenIP)>;
+
+ /// Emit the arrays used to pass the captures and map information to the
+ /// offloading runtime library. If there is no map or capture information,
+ /// return nullptr by reference.
+ void emitOffloadingArrays(
+ InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
+ TargetDataInfo &Info, GenMapInfoCallbackTy GenMapInfoCB,
+ bool IsNonContiguous = false,
+ function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
+ function_ref<Value *(unsigned int)> CustomMapperCB = nullptr);
+
/// Emit the arrays used to pass the captures and map information to the
/// offloading runtime library. If there is no map or capture information,
/// return nullptr by reference.
@@ -2367,6 +2383,7 @@ class OpenMPIRBuilder {
function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
function_ref<Value *(unsigned int)> CustomMapperCB = nullptr);
+
/// Creates offloading entry for the provided entry ID \a ID, address \a
/// Addr, size \a Size, and flags \a Flags.
void createOffloadEntry(Constant *ID, Constant *Addr, uint64_t Size,
@@ -2770,11 +2787,6 @@ class OpenMPIRBuilder {
/// duplicating the body code.
enum BodyGenTy { Priv, DupNoPriv, NoPriv };
- /// Callback type for creating the map infos for the kernel parameters.
- /// \param CodeGenIP is the insertion point where code should be generated,
- /// if any.
- using GenMapInfoCallbackTy =
- function_ref<MapInfosTy &(InsertPointTy CodeGenIP)>;
/// Generator for '#omp target data'
///
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 98da6e2efcb5c..7b9e585d58664 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -7403,6 +7403,16 @@ void OpenMPIRBuilder::emitNonContiguousDescriptor(InsertPointTy AllocaIP,
}
}
+void OpenMPIRBuilder::emitOffloadingArrays(
+ InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info,
+ GenMapInfoCallbackTy GenMapInfoCB, bool IsNonContiguous,
+ function_ref<void(unsigned int, Value *)> DeviceAddrCB,
+ function_ref<Value *(unsigned int)> CustomMapperCB) {
+
+ OpenMPIRBuilder::MapInfosTy &MapInfo = GenMapInfoCB(CodeGenIP);
+ emitOffloadingArrays(AllocaIP, CodeGenIP, MapInfo,
+ Info, IsNonContiguous, DeviceAddrCB, CustomMapperCB);
+}
void OpenMPIRBuilder::emitOffloadingArrays(
InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo,
TargetDataInfo &Info, bool IsNonContiguous,
>From 449d1b46690659950f4d3c164ccc5fe5acd0128c Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Tue, 25 Jun 2024 16:07:37 -0500
Subject: [PATCH 02/14] emitOffloadingArraysArgument and some other prints
---
clang/lib/CodeGen/CGOpenMPRuntime.cpp | 50 +++++++++----------
clang/lib/CodeGen/CGOpenMPRuntime.h | 39 +++++++++++++++
clang/lib/CodeGen/CGStmtOpenMP.cpp | 2 +
clang/lib/CodeGen/CodeGenFunction.h | 1 +
.../llvm/Frontend/OpenMP/OMPIRBuilder.h | 12 ++++-
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 39 +++++++++------
.../Frontend/OpenMPIRBuilderTest.cpp | 4 +-
7 files changed, 102 insertions(+), 45 deletions(-)
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 4c95aab3c33c1..9495a122cbe34 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -3013,6 +3013,10 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
llvm::FunctionType *TaskEntryTy =
CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
+ LLVM_DEBUG(llvm::dbgs() << "TaskEntryTy=" << *TaskEntryTy << "\n");
+ LLVM_DEBUG(llvm::dbgs() << "KmpTaskTWithPrivatesPtrQTy = "
+ << KmpTaskTWithPrivatesPtrQTy << "\n");
+ LLVM_DEBUG(llvm::dbgs() << "TaskTypeArg = " << TaskTypeArg << "\n");
std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
auto *TaskEntry = llvm::Function::Create(
TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
@@ -3717,6 +3721,7 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
TaskPrivatesMap);
+ LLVM_DEBUG(llvm::dbgs() << "ProxyTaskFunction is " << *TaskEntry);
// Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
// kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
// kmp_routine_entry_t *task_entry);
@@ -9619,15 +9624,14 @@ static void emitTargetCallKernelLaunchNew(
};
// Fill up the arrays and create the arguments.
LLVM_DEBUG(llvm::dbgs() << "emitTargetCallKernelLaunchNew:InsertBlock before emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n");
- OMPBuilder.emitOffloadingArrays(llvm::OpenMPIRBuilder::InsertPointTy(CGF.AllocaInsertPt->getParent(),
- CGF.AllocaInsertPt->getIterator()),
- CGF.Builder.saveIP(), Info,
- GenMapInfoCB, /*IsNonContiguous=*/true,
- DeviceAddrCB, CustomMapperCB);
- bool EmitDebug = !CombinedInfo.Names.empty();
- OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info,
- EmitDebug,
- /*ForEndCall=*/false);
+
+ llvm::OpenMPIRBuilder::InsertPointTy OffloadingArraysAllocaIP(
+ CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
+
+ OMPBuilder.emitOffloadingArraysAndArgs(
+ OffloadingArraysAllocaIP, CGF.Builder.saveIP(), Info, Info.RTArgs,
+ GenMapInfoCB, /*IsNonContiguous=*/true, /*ForEndCall=*/false,
+ DeviceAddrCB, CustomMapperCB);
LLVM_DEBUG(llvm::dbgs() << "emitTargetCallKernelLaunchNew:InsertBlock after emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n");
InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
@@ -9701,13 +9705,9 @@ static void emitTargetCallKernelLaunchNew(
DeviceID, RTLoc, AllocaIP));
};
- if (RequiresOuterTask) {
- if (NewClangTargetTaskCodeGen) {
- llvm::errs() << "Using OMPIRBuilder for target task codegen\n";
- } else {
- CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
- }
- } else
+ if (RequiresOuterTask)
+ CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
+ else
OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
}
static void emitTargetCallKernelLaunch(
@@ -9805,10 +9805,9 @@ static void emitTargetCallKernelLaunch(
// Fill up the arrays and create the arguments.
LLVM_DEBUG(llvm::dbgs() << "InsertBlock before emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n");
emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
- bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
+ Info.EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
llvm::codegenoptions::NoDebugInfo;
OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info,
- EmitDebug,
/*ForEndCall=*/false);
LLVM_DEBUG(llvm::dbgs() << "InsertBlock after emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n");
@@ -9951,16 +9950,16 @@ void CGOpenMPRuntime::emitTargetCall(
OutlinedFnID, &InputInfo, &MapTypesArray,
&MapNamesArray, SizeEmitter](CodeGenFunction &CGF,
PrePostActionTy &) {
- if (OpenMPClangTargetCodegen)
+ // if (OpenMPClangTargetCodegen)
emitTargetCallKernelLaunchNew(this, OutlinedFn, D, CapturedVars,
RequiresOuterTask, CS, OffloadingMandatory,
Device, OutlinedFnID, InputInfo, MapTypesArray,
MapNamesArray, SizeEmitter, CGF, CGM);
- else
- emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars,
- RequiresOuterTask, CS, OffloadingMandatory,
- Device, OutlinedFnID, InputInfo, MapTypesArray,
- MapNamesArray, SizeEmitter, CGF, CGM);
+ // else
+ // emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars,
+ // RequiresOuterTask, CS, OffloadingMandatory,
+ // Device, OutlinedFnID, InputInfo, MapTypesArray,
+ // MapNamesArray, SizeEmitter, CGF, CGM);
};
auto &&TargetElseGen =
@@ -10723,10 +10722,9 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
/*IsNonContiguous=*/true);
bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
D.hasClausesOfKind<OMPNowaitClause>();
- bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
+ Info.EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
llvm::codegenoptions::NoDebugInfo;
OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info,
- EmitDebug,
/*ForEndCall=*/false);
InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h
index f65314d014c08..f6e3677232f07 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.h
@@ -122,6 +122,45 @@ struct OMPTaskDataTy final {
bool IsReductionWithTaskMod = false;
bool IsWorksharingReduction = false;
bool HasNowaitClause = false;
+ void printTo(llvm::raw_ostream &os, const ASTContext &Ctx) const {
+ auto &&printSVHelper =
+ [&os, &Ctx](const SmallVector<const Expr *, 4> &V) -> void {
+ for (auto &v : V) {
+ v->dump(os, Ctx);
+ }
+ };
+ auto &&printSV =
+ [&os, printSVHelper](std::string s,
+ const SmallVector<const Expr *, 4> &V) -> void {
+ os << s << ":[\n";
+ printSVHelper(V);
+ os << "]\n";
+ };
+ // SmallVector<const Expr *, 4> PrivateVars;
+ // SmallVector<const Expr *, 4> PrivateCopies;
+ // SmallVector<const Expr *, 4> FirstprivateVars;
+ // SmallVector<const Expr *, 4> FirstprivateCopies;
+ // SmallVector<const Expr *, 4> FirstprivateInits;
+ // SmallVector<const Expr *, 4> LastprivateVars;
+ // SmallVector<const Expr *, 4> LastprivateCopies;
+ // SmallVector<const Expr *, 4> ReductionVars;
+ // SmallVector<const Expr *, 4> ReductionOrigs;
+ // SmallVector<const Expr *, 4> ReductionCopies;
+ // SmallVector<const Expr *, 4> ReductionOps;
+ // SmallVector<CanonicalDeclPtr<const VarDecl>, 4> PrivateLocals;
+
+ printSV("PrivateVars", PrivateVars);
+ printSV("PrivateCopies", PrivateCopies);
+ printSV("FirstprivateVars", FirstprivateVars);
+ printSV("FirstprivateCopies", FirstprivateCopies);
+ printSV("FirstprivateInits", FirstprivateInits);
+ printSV("LastprivateVars", LastprivateVars);
+ printSV("LastprivateCopies", LastprivateCopies);
+ printSV("ReductionVars", ReductionVars);
+ printSV("ReductionOrigs", ReductionOrigs);
+ printSV("ReductionCopies", ReductionCopies);
+ printSV("ReductionOps", ReductionOps);
+ }
};
/// Class intended to support codegen of all kind of the reduction clauses.
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 379c7f16db9c5..0a060324c60a7 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -5264,6 +5264,8 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
}
BodyGen(CGF);
};
+ LLVM_DEBUG(llvm::dbgs() << "Inputs to emitTaskOutineFunction\n");
+ LLVM_DEBUG(llvm::dbgs() << "PartID = " << **PartId << "\n");
llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
S, *I, *PartId, *TaskT, EKind, CodeGen, /*Tied=*/true,
Data.NumberOfParts);
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 67e3019565cd0..09ffe7a68a64f 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -440,6 +440,7 @@ class CodeGenFunction : public CodeGenTypeCache {
llvm::AssertingVH<llvm::Instruction> PostAllocaInsertPt = nullptr;
public:
+ void printLocalDeclMap();
/// Return PostAllocaInsertPt. If it is not yet created, then insert it
/// immediately after AllocaInsertPt.
llvm::Instruction *getPostAllocaInsertPoint() {
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index f4449f3c0a44f..7782ad5998917 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -2231,6 +2231,8 @@ class OpenMPIRBuilder {
/// The total number of pointers passed to the runtime library.
unsigned NumberOfPtrs = 0u;
+ bool EmitDebug = false;
+
explicit TargetDataInfo() {}
explicit TargetDataInfo(bool RequiresDevicePointerInfo,
bool SeparateBeginEndCalls)
@@ -2349,7 +2351,6 @@ class OpenMPIRBuilder {
void emitOffloadingArraysArgument(IRBuilderBase &Builder,
OpenMPIRBuilder::TargetDataRTArgs &RTArgs,
OpenMPIRBuilder::TargetDataInfo &Info,
- bool EmitDebug = false,
bool ForEndCall = false);
/// Emit an array of struct descriptors to be assigned to the offload args.
@@ -2369,7 +2370,7 @@ class OpenMPIRBuilder {
/// return nullptr by reference.
void emitOffloadingArrays(
InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
- TargetDataInfo &Info, GenMapInfoCallbackTy GenMapInfoCB,
+ GenMapInfoCallbackTy GenMapInfoCB, TargetDataInfo &Info,
bool IsNonContiguous = false,
function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
function_ref<Value *(unsigned int)> CustomMapperCB = nullptr);
@@ -2384,6 +2385,13 @@ class OpenMPIRBuilder {
function_ref<Value *(unsigned int)> CustomMapperCB = nullptr);
+ void emitOffloadingArraysAndArgs(
+ InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info,
+ TargetDataRTArgs &RTArgs, GenMapInfoCallbackTy GenMapInfoCB,
+ bool IsNonContiguous = false, bool ForEndCall = false,
+ function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
+ function_ref<Value *(unsigned int)> CustomMapperCB = nullptr);
+
/// Creates offloading entry for the provided entry ID \a ID, address \a
/// Addr, size \a Size, and flags \a Flags.
void createOffloadEntry(Constant *ID, Constant *Addr, uint64_t Size,
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 7b9e585d58664..88c04c3803e21 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -6372,8 +6372,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTargetData(
CustomMapperCB);
TargetDataRTArgs RTArgs;
- emitOffloadingArraysArgument(Builder, RTArgs, Info,
- !MapInfo->Names.empty());
+ emitOffloadingArraysArgument(Builder, RTArgs, Info);
// Emit the number of elements in the offloading arrays.
Value *PointerNum = Builder.getInt32(Info.NumberOfPtrs);
@@ -6426,8 +6425,8 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTargetData(
// Generate code for the closing of the data region.
auto EndThenGen = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
TargetDataRTArgs RTArgs;
- emitOffloadingArraysArgument(Builder, RTArgs, Info, !MapInfo->Names.empty(),
- /*ForEndCall=*/true);
+ Info.EmitDebug = !MapInfo->Names.empty();
+ emitOffloadingArraysArgument(Builder, RTArgs, Info, /*ForEndCall=*/true);
// Emit the number of elements in the offloading arrays.
Value *PointerNum = Builder.getInt32(Info.NumberOfPtrs);
@@ -7057,6 +7056,16 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitTargetTask(
<< "\n");
return Builder.saveIP();
}
+void OpenMPIRBuilder::emitOffloadingArraysAndArgs(
+ InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info,
+ TargetDataRTArgs &RTArgs, GenMapInfoCallbackTy GenMapInfoCB,
+ bool IsNonContiguous, bool ForEndCall,
+ function_ref<void(unsigned int, Value *)> DeviceAddrCB,
+ function_ref<Value *(unsigned int)> CustomMapperCB) {
+ emitOffloadingArrays(AllocaIP, CodeGenIP, GenMapInfoCB, Info, IsNonContiguous,
+ DeviceAddrCB, CustomMapperCB);
+ emitOffloadingArraysArgument(Builder, RTArgs, Info, ForEndCall);
+ }
static void emitTargetCall(
OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder,
OpenMPIRBuilder::InsertPointTy AllocaIP, Function *OutlinedFn,
@@ -7069,13 +7078,11 @@ static void emitTargetCall(
/*RequiresDevicePointerInfo=*/false,
/*SeparateBeginEndCalls=*/true);
- OpenMPIRBuilder::MapInfosTy &MapInfo = GenMapInfoCB(Builder.saveIP());
- OMPBuilder.emitOffloadingArrays(AllocaIP, Builder.saveIP(), MapInfo, Info,
- /*IsNonContiguous=*/true);
-
OpenMPIRBuilder::TargetDataRTArgs RTArgs;
- OMPBuilder.emitOffloadingArraysArgument(Builder, RTArgs, Info,
- !MapInfo.Names.empty());
+ OMPBuilder.emitOffloadingArraysAndArgs(AllocaIP, Builder.saveIP(), Info,
+ RTArgs, GenMapInfoCB,
+ /*IsNonContiguous=*/true,
+ /*ForEndCall=*/false);
// emitKernelLaunch
auto &&EmitTargetCallFallbackCB =
@@ -7085,7 +7092,7 @@ static void emitTargetCall(
return Builder.saveIP();
};
- unsigned NumTargetItems = MapInfo.BasePointers.size();
+ unsigned NumTargetItems = Info.NumberOfPtrs;
// TODO: Use correct device ID
Value *DeviceID = Builder.getInt64(OMP_DEVICEID_UNDEF);
Value *NumTeamsVal = Builder.getInt32(NumTeams);
@@ -7279,7 +7286,6 @@ void OpenMPIRBuilder::emitMapperCall(const LocationDescription &Loc,
void OpenMPIRBuilder::emitOffloadingArraysArgument(IRBuilderBase &Builder,
TargetDataRTArgs &RTArgs,
TargetDataInfo &Info,
- bool EmitDebug,
bool ForEndCall) {
assert((!ForEndCall || Info.separateBeginEndCalls()) &&
"expected region end call to runtime only when end call is separate");
@@ -7319,7 +7325,7 @@ void OpenMPIRBuilder::emitOffloadingArraysArgument(IRBuilderBase &Builder,
// Only emit the mapper information arrays if debug information is
// requested.
- if (!EmitDebug)
+ if (!Info.EmitDebug)
RTArgs.MapNamesArray = ConstantPointerNull::get(VoidPtrPtrTy);
else
RTArgs.MapNamesArray = Builder.CreateConstInBoundsGEP2_32(
@@ -7404,8 +7410,9 @@ void OpenMPIRBuilder::emitNonContiguousDescriptor(InsertPointTy AllocaIP,
}
void OpenMPIRBuilder::emitOffloadingArrays(
- InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info,
- GenMapInfoCallbackTy GenMapInfoCB, bool IsNonContiguous,
+ InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
+ GenMapInfoCallbackTy GenMapInfoCB, TargetDataInfo &Info,
+ bool IsNonContiguous,
function_ref<void(unsigned int, Value *)> DeviceAddrCB,
function_ref<Value *(unsigned int)> CustomMapperCB) {
@@ -7518,9 +7525,11 @@ void OpenMPIRBuilder::emitOffloadingArrays(
auto *MapNamesArrayGbl =
createOffloadMapnames(CombinedInfo.Names, MapnamesName);
Info.RTArgs.MapNamesArray = MapNamesArrayGbl;
+ Info.EmitDebug = true;
} else {
Info.RTArgs.MapNamesArray =
Constant::getNullValue(PointerType::getUnqual(Builder.getContext()));
+ Info.EmitDebug = false;
}
// If there's a present map type modifier, it must not be applied to the end
diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
index 8653bbd3d38fd..cb4c289f409a1 100644
--- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
+++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
@@ -6902,8 +6902,8 @@ TEST_F(OpenMPIRBuilderTest, EmitOffloadingArraysArguments) {
Info.RTArgs.MappersArray =
ConstantPointerNull::get(Array4VoidPtrTy->getPointerTo());
Info.NumberOfPtrs = 4;
-
- OMPBuilder.emitOffloadingArraysArgument(Builder, RTArgs, Info, false, false);
+ Info.EmitDebug = false;
+ OMPBuilder.emitOffloadingArraysArgument(Builder, RTArgs, Info, false);
EXPECT_NE(RTArgs.BasePointersArray, nullptr);
EXPECT_NE(RTArgs.PointersArray, nullptr);
>From 5fac34338cda635a915fe2489cb30fc54c36fa4f Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Wed, 26 Jun 2024 11:33:53 -0500
Subject: [PATCH 03/14] clean up, clean up, everybody clean up
---
clang/lib/CodeGen/CGOpenMPRuntime.cpp | 206 +-------------------------
clang/lib/CodeGen/CGOpenMPRuntime.h | 39 -----
clang/lib/CodeGen/CGStmtOpenMP.cpp | 4 -
clang/lib/CodeGen/CodeGenFunction.h | 1 -
4 files changed, 5 insertions(+), 245 deletions(-)
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 9495a122cbe34..e3b7cdbe0dfa8 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -40,7 +40,6 @@
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/AtomicOrdering.h"
-#include "llvm/Support/Debug.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
@@ -48,8 +47,6 @@
#include <numeric>
#include <optional>
-#define DEBUG_TYPE "clang-openmp-codegen"
-
using namespace clang;
using namespace CodeGen;
using namespace llvm::omp;
@@ -3013,10 +3010,6 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
llvm::FunctionType *TaskEntryTy =
CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
- LLVM_DEBUG(llvm::dbgs() << "TaskEntryTy=" << *TaskEntryTy << "\n");
- LLVM_DEBUG(llvm::dbgs() << "KmpTaskTWithPrivatesPtrQTy = "
- << KmpTaskTWithPrivatesPtrQTy << "\n");
- LLVM_DEBUG(llvm::dbgs() << "TaskTypeArg = " << TaskTypeArg << "\n");
std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
auto *TaskEntry = llvm::Function::Create(
TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
@@ -3721,7 +3714,6 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
TaskPrivatesMap);
- LLVM_DEBUG(llvm::dbgs() << "ProxyTaskFunction is " << *TaskEntry);
// Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
// kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
// kmp_routine_entry_t *task_entry);
@@ -8876,11 +8868,9 @@ emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
}
PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
- auto *Str = OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName,
+ return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName,
PLoc.getLine(), PLoc.getColumn(),
SrcLocStrSize);
- LLVM_DEBUG(llvm::dbgs() << "Output of emitMappingInfo: " << *Str << "\n");
- return Str;
}
/// Emit the arrays used to pass the captures and map information to the
@@ -9583,7 +9573,7 @@ static void genMapInfo(const OMPExecutableDirective &D, CodeGenFunction &CGF,
FillInfoMap);
}
}
-static void emitTargetCallKernelLaunchNew(
+static void emitTargetCallKernelLaunch(
CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
const OMPExecutableDirective &D,
llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask,
@@ -9622,9 +9612,8 @@ static void emitTargetCallKernelLaunchNew(
}
return MFunc;
};
- // Fill up the arrays and create the arguments.
- LLVM_DEBUG(llvm::dbgs() << "emitTargetCallKernelLaunchNew:InsertBlock before emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n");
-
+ // Fill up the basepointers, pointers and mapper arrays and create the
+ // arguments.
llvm::OpenMPIRBuilder::InsertPointTy OffloadingArraysAllocaIP(
CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
@@ -9633,184 +9622,6 @@ static void emitTargetCallKernelLaunchNew(
GenMapInfoCB, /*IsNonContiguous=*/true, /*ForEndCall=*/false,
DeviceAddrCB, CustomMapperCB);
- LLVM_DEBUG(llvm::dbgs() << "emitTargetCallKernelLaunchNew:InsertBlock after emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n");
- InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
- InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
- CGF.VoidPtrTy, CGM.getPointerAlign());
- InputInfo.PointersArray =
- Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
- InputInfo.SizesArray =
- Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
- InputInfo.MappersArray =
- Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
- MapTypesArray = Info.RTArgs.MapTypesArray;
- MapNamesArray = Info.RTArgs.MapNamesArray;
-
- auto &&ThenGen = [&OMPRuntime, OutlinedFn, &D, &CapturedVars,
- RequiresOuterTask, &CS, OffloadingMandatory, Device,
- OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray,
- SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
- bool IsReverseOffloading = Device.getInt() == OMPC_DEVICE_ancestor;
-
- if (IsReverseOffloading) {
- // Reverse offloading is not supported, so just execute on the host.
- // FIXME: This fallback solution is incorrect since it ignores the
- // OMP_TARGET_OFFLOAD environment variable. Instead it would be better to
- // assert here and ensure SEMA emits an error.
- emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
- RequiresOuterTask, CS, OffloadingMandatory, CGF);
- return;
- }
-
- bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>();
- unsigned NumTargetItems = InputInfo.NumberOfTargetItems;
-
- llvm::Value *BasePointersArray =
- InputInfo.BasePointersArray.emitRawPointer(CGF);
- llvm::Value *PointersArray = InputInfo.PointersArray.emitRawPointer(CGF);
- llvm::Value *SizesArray = InputInfo.SizesArray.emitRawPointer(CGF);
- llvm::Value *MappersArray = InputInfo.MappersArray.emitRawPointer(CGF);
-
- auto &&EmitTargetCallFallbackCB =
- [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
- OffloadingMandatory, &CGF](llvm::OpenMPIRBuilder::InsertPointTy IP)
- -> llvm::OpenMPIRBuilder::InsertPointTy {
- CGF.Builder.restoreIP(IP);
- emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
- RequiresOuterTask, CS, OffloadingMandatory, CGF);
- return CGF.Builder.saveIP();
- };
-
- llvm::Value *DeviceID = emitDeviceID(Device, CGF);
- llvm::Value *NumTeams = OMPRuntime->emitNumTeamsForTargetDirective(CGF, D);
- llvm::Value *NumThreads =
- OMPRuntime->emitNumThreadsForTargetDirective(CGF, D);
- llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, D.getBeginLoc());
- llvm::Value *NumIterations =
- OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter);
- llvm::Value *DynCGGroupMem = emitDynCGGroupMem(D, CGF);
- llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
- CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
-
- llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs(
- BasePointersArray, PointersArray, SizesArray, MapTypesArray,
- nullptr /* MapTypesArrayEnd */, MappersArray, MapNamesArray);
-
- llvm::OpenMPIRBuilder::TargetKernelArgs Args(
- NumTargetItems, RTArgs, NumIterations, NumTeams, NumThreads,
- DynCGGroupMem, HasNoWait);
-
- CGF.Builder.restoreIP(OMPRuntime->getOMPBuilder().emitKernelLaunch(
- CGF.Builder, OutlinedFn, OutlinedFnID, EmitTargetCallFallbackCB, Args,
- DeviceID, RTLoc, AllocaIP));
- };
-
- if (RequiresOuterTask)
- CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
- else
- OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
-}
-static void emitTargetCallKernelLaunch(
- CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
- const OMPExecutableDirective &D,
- llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask,
- const CapturedStmt &CS, bool OffloadingMandatory,
- llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
- llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo,
- llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray,
- llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
- const OMPLoopDirective &D)>
- SizeEmitter,
- CodeGenFunction &CGF, CodeGenModule &CGM) {
- llvm::OpenMPIRBuilder &OMPBuilder = OMPRuntime->getOMPBuilder();
-
- // Fill up the arrays with all the captured variables.
- MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
-// Get mappable expression information.
- MappableExprsHandler MEHandler(D, CGF);
- llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
- llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
-
- auto RI = CS.getCapturedRecordDecl()->field_begin();
- auto *CV = CapturedVars.begin();
- for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
- CE = CS.capture_end();
- CI != CE; ++CI, ++RI, ++CV) {
- MappableExprsHandler::MapCombinedInfoTy CurInfo;
- MappableExprsHandler::StructRangeInfoTy PartialStruct;
-
- // VLA sizes are passed to the outlined region by copy and do not have map
- // information associated.
- if (CI->capturesVariableArrayType()) {
- CurInfo.Exprs.push_back(nullptr);
- CurInfo.BasePointers.push_back(*CV);
- CurInfo.DevicePtrDecls.push_back(nullptr);
- CurInfo.DevicePointers.push_back(
- MappableExprsHandler::DeviceInfoTy::None);
- CurInfo.Pointers.push_back(*CV);
- CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
- CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
- // Copy to the device as an argument. No need to retrieve it.
- CurInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
- OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM |
- OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
- CurInfo.Mappers.push_back(nullptr);
- } else {
- // If we have any information in the map clause, we use it, otherwise we
- // just do a default mapping.
- MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
- if (!CI->capturesThis())
- MappedVarSet.insert(CI->getCapturedVar());
- else
- MappedVarSet.insert(nullptr);
- if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
- MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
- // Generate correct mapping for variables captured by reference in
- // lambdas.
- if (CI->capturesVariable())
- MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
- CurInfo, LambdaPointers);
- }
- // We expect to have at least an element of information for this capture.
- assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
- "Non-existing map pointer for capture!");
- assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
- CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
- CurInfo.BasePointers.size() == CurInfo.Types.size() &&
- CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
- "Inconsistent map information sizes!");
-
- // If there is an entry in PartialStruct it means we have a struct with
- // individual members mapped. Emit an extra combined entry.
- if (PartialStruct.Base.isValid()) {
- CombinedInfo.append(PartialStruct.PreliminaryMapData);
- MEHandler.emitCombinedEntry(
- CombinedInfo, CurInfo.Types, PartialStruct, CI->capturesThis(),
- OMPBuilder, nullptr,
- !PartialStruct.PreliminaryMapData.BasePointers.empty());
- }
-
- // We need to append the results of this capture to what we already have.
- CombinedInfo.append(CurInfo);
- }
- // Adjust MEMBER_OF flags for the lambdas captures.
- MEHandler.adjustMemberOfForLambdaCaptures(
- OMPBuilder, LambdaPointers, CombinedInfo.BasePointers,
- CombinedInfo.Pointers, CombinedInfo.Types);
- // Map any list items in a map clause that were not captures because they
- // weren't referenced within the construct.
- MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, MappedVarSet);
-
- CGOpenMPRuntime::TargetDataInfo Info;
- // Fill up the arrays and create the arguments.
- LLVM_DEBUG(llvm::dbgs() << "InsertBlock before emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n");
- emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
- Info.EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
- llvm::codegenoptions::NoDebugInfo;
- OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info,
- /*ForEndCall=*/false);
-
- LLVM_DEBUG(llvm::dbgs() << "InsertBlock after emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n");
InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
CGF.VoidPtrTy, CGM.getPointerAlign());
@@ -9950,16 +9761,10 @@ void CGOpenMPRuntime::emitTargetCall(
OutlinedFnID, &InputInfo, &MapTypesArray,
&MapNamesArray, SizeEmitter](CodeGenFunction &CGF,
PrePostActionTy &) {
- // if (OpenMPClangTargetCodegen)
- emitTargetCallKernelLaunchNew(this, OutlinedFn, D, CapturedVars,
+ emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars,
RequiresOuterTask, CS, OffloadingMandatory,
Device, OutlinedFnID, InputInfo, MapTypesArray,
MapNamesArray, SizeEmitter, CGF, CGM);
- // else
- // emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars,
- // RequiresOuterTask, CS, OffloadingMandatory,
- // Device, OutlinedFnID, InputInfo, MapTypesArray,
- // MapNamesArray, SizeEmitter, CGF, CGM);
};
auto &&TargetElseGen =
@@ -9979,7 +9784,6 @@ void CGOpenMPRuntime::emitTargetCall(
} else {
RegionCodeGenTy ThenRCG(TargetThenGen);
ThenRCG(CGF);
- LLVM_DEBUG(llvm::dbgs() << "Generated code after emitTargetCall:\n" << *(OutlinedFn->getParent()) << "\n");
}
} else {
RegionCodeGenTy ElseRCG(TargetElseGen);
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h
index f6e3677232f07..f65314d014c08 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.h
@@ -122,45 +122,6 @@ struct OMPTaskDataTy final {
bool IsReductionWithTaskMod = false;
bool IsWorksharingReduction = false;
bool HasNowaitClause = false;
- void printTo(llvm::raw_ostream &os, const ASTContext &Ctx) const {
- auto &&printSVHelper =
- [&os, &Ctx](const SmallVector<const Expr *, 4> &V) -> void {
- for (auto &v : V) {
- v->dump(os, Ctx);
- }
- };
- auto &&printSV =
- [&os, printSVHelper](std::string s,
- const SmallVector<const Expr *, 4> &V) -> void {
- os << s << ":[\n";
- printSVHelper(V);
- os << "]\n";
- };
- // SmallVector<const Expr *, 4> PrivateVars;
- // SmallVector<const Expr *, 4> PrivateCopies;
- // SmallVector<const Expr *, 4> FirstprivateVars;
- // SmallVector<const Expr *, 4> FirstprivateCopies;
- // SmallVector<const Expr *, 4> FirstprivateInits;
- // SmallVector<const Expr *, 4> LastprivateVars;
- // SmallVector<const Expr *, 4> LastprivateCopies;
- // SmallVector<const Expr *, 4> ReductionVars;
- // SmallVector<const Expr *, 4> ReductionOrigs;
- // SmallVector<const Expr *, 4> ReductionCopies;
- // SmallVector<const Expr *, 4> ReductionOps;
- // SmallVector<CanonicalDeclPtr<const VarDecl>, 4> PrivateLocals;
-
- printSV("PrivateVars", PrivateVars);
- printSV("PrivateCopies", PrivateCopies);
- printSV("FirstprivateVars", FirstprivateVars);
- printSV("FirstprivateCopies", FirstprivateCopies);
- printSV("FirstprivateInits", FirstprivateInits);
- printSV("LastprivateVars", LastprivateVars);
- printSV("LastprivateCopies", LastprivateCopies);
- printSV("ReductionVars", ReductionVars);
- printSV("ReductionOrigs", ReductionOrigs);
- printSV("ReductionCopies", ReductionCopies);
- printSV("ReductionOps", ReductionOps);
- }
};
/// Class intended to support codegen of all kind of the reduction clauses.
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 0a060324c60a7..74d99d9812bda 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -35,12 +35,10 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Metadata.h"
#include "llvm/Support/AtomicOrdering.h"
-#include "llvm/Support/Debug.h"
#include <optional>
using namespace clang;
using namespace CodeGen;
using namespace llvm::omp;
-#define DEBUG_TYPE "clang-openmp-codegen"
#define TTL_CODEGEN_TYPE "target-teams-loop-codegen"
@@ -5264,8 +5262,6 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
}
BodyGen(CGF);
};
- LLVM_DEBUG(llvm::dbgs() << "Inputs to emitTaskOutineFunction\n");
- LLVM_DEBUG(llvm::dbgs() << "PartID = " << **PartId << "\n");
llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
S, *I, *PartId, *TaskT, EKind, CodeGen, /*Tied=*/true,
Data.NumberOfParts);
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 09ffe7a68a64f..67e3019565cd0 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -440,7 +440,6 @@ class CodeGenFunction : public CodeGenTypeCache {
llvm::AssertingVH<llvm::Instruction> PostAllocaInsertPt = nullptr;
public:
- void printLocalDeclMap();
/// Return PostAllocaInsertPt. If it is not yet created, then insert it
/// immediately after AllocaInsertPt.
llvm::Instruction *getPostAllocaInsertPoint() {
>From f761f4c33afb873074782d51222ed87540627d49 Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Wed, 26 Jun 2024 11:39:07 -0500
Subject: [PATCH 04/14] Add Debug.h include in CGStmtOpenMP.cpp because removal
is not related to my change
---
clang/lib/CodeGen/CGStmtOpenMP.cpp | 1 +
1 file changed, 1 insertion(+)
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 74d99d9812bda..853046bf43495 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -35,6 +35,7 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Metadata.h"
#include "llvm/Support/AtomicOrdering.h"
+#include "llvm/Support/Debug.h"
#include <optional>
using namespace clang;
using namespace CodeGen;
>From 5c34a68ada279b5ba9b5bc8116aa08c4af2d197c Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Wed, 26 Jun 2024 14:13:36 -0500
Subject: [PATCH 05/14] Document emitOffladingArrays and
emitOffloadingArraysAndArgs in OMPIRBuilder.h
---
.../llvm/Frontend/OpenMP/OMPIRBuilder.h | 28 +++++++++++++------
1 file changed, 20 insertions(+), 8 deletions(-)
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 7782ad5998917..50786ac3d5261 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -2367,24 +2367,36 @@ class OpenMPIRBuilder {
/// Emit the arrays used to pass the captures and map information to the
/// offloading runtime library. If there is no map or capture information,
- /// return nullptr by reference.
+ /// return nullptr by reference. This is the first of two overloads - this
+ /// one accepts a reference to a MapInfosTy object that contains combined
+ /// information generated for mappable clauses, including base pointers,
+ /// pointers, sizes, map types, user-defined mappers, and non-contiguous
+ /// information.
void emitOffloadingArrays(
- InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
- GenMapInfoCallbackTy GenMapInfoCB, TargetDataInfo &Info,
- bool IsNonContiguous = false,
+ InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo,
+ TargetDataInfo &Info, bool IsNonContiguous = false,
function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
function_ref<Value *(unsigned int)> CustomMapperCB = nullptr);
/// Emit the arrays used to pass the captures and map information to the
/// offloading runtime library. If there is no map or capture information,
- /// return nullptr by reference.
+ /// return nullptr by reference. This is the second of two overloads - Instead
+ /// of accepting a reference to a MapInfosTy object, this overload accepts
+ /// a call back of type GenMapInfoCallbackTy to populate a MapInfosTy object
+ /// with mapping information.
void emitOffloadingArrays(
- InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo,
- TargetDataInfo &Info, bool IsNonContiguous = false,
+ InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
+ GenMapInfoCallbackTy GenMapInfoCB, TargetDataInfo &Info,
+ bool IsNonContiguous = false,
function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
function_ref<Value *(unsigned int)> CustomMapperCB = nullptr);
-
+ /// Allocates memory for and populates the arrays required for offloading
+ /// (offload_{baseptrs|ptrs|mappers|sizes|maptypes|mapnames}). Then, it
+ /// emits their base addresses as arguments to be passed to the runtime
+ /// library. In essence, this function is a combination of
+ /// emitOffloadingArrays and emitOffloadingArraysArgument and should arguably
+ /// be preferred by clients of OpenMPIRBuilder.
void emitOffloadingArraysAndArgs(
InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info,
TargetDataRTArgs &RTArgs, GenMapInfoCallbackTy GenMapInfoCB,
>From 09ee31148d9d0297e893a49cdb546427c88258cc Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Wed, 26 Jun 2024 15:29:44 -0500
Subject: [PATCH 06/14] refactor genMapInfo
---
clang/lib/CodeGen/CGOpenMPRuntime.cpp | 42 +++++++++++++++++++++------
1 file changed, 33 insertions(+), 9 deletions(-)
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index e3b7cdbe0dfa8..0122f33d201d7 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -9484,14 +9484,14 @@ llvm::Value *emitDynCGGroupMem(const OMPExecutableDirective &D,
}
return DynCGroupMem;
}
-static void genMapInfo(const OMPExecutableDirective &D, CodeGenFunction &CGF,
- const CapturedStmt &CS, llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
- llvm::OpenMPIRBuilder &OMPBuilder,
- MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
- // Get mappable expression information.
- MappableExprsHandler MEHandler(D, CGF);
- llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
- llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
+static void genMapInfoForCaptures(
+ MappableExprsHandler &MEHandler, CodeGenFunction &CGF,
+ const CapturedStmt &CS, llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
+ llvm::OpenMPIRBuilder &OMPBuilder,
+ llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
+ llvm::DenseSet<CanonicalDeclPtr<const Decl>> &MappedVarSet,
+ MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
+
CodeGenModule &CGM = CGF.CGM;
auto RI = CS.getCapturedRecordDecl()->field_begin();
auto *CV = CapturedVars.begin();
@@ -9559,9 +9559,18 @@ static void genMapInfo(const OMPExecutableDirective &D, CodeGenFunction &CGF,
MEHandler.adjustMemberOfForLambdaCaptures(
OMPBuilder, LambdaPointers, CombinedInfo.BasePointers,
CombinedInfo.Pointers, CombinedInfo.Types);
+}
+static void
+genMapInfo(MappableExprsHandler &MEHandler, CodeGenFunction &CGF,
+ MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
+ llvm::OpenMPIRBuilder &OMPBuilder,
+ const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkippedVarSet =
+ llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) {
+
+ CodeGenModule &CGM = CGF.CGM;
// Map any list items in a map clause that were not captures because they
// weren't referenced within the construct.
- MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, MappedVarSet);
+ MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, SkippedVarSet);
auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
return emitMappingInformation(CGF, OMPBuilder, MapExpr);
@@ -9573,6 +9582,21 @@ static void genMapInfo(const OMPExecutableDirective &D, CodeGenFunction &CGF,
FillInfoMap);
}
}
+
+static void genMapInfo(const OMPExecutableDirective &D, CodeGenFunction &CGF,
+ const CapturedStmt &CS,
+ llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
+ llvm::OpenMPIRBuilder &OMPBuilder,
+ MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
+ // Get mappable expression information.
+ MappableExprsHandler MEHandler(D, CGF);
+ llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
+ llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
+
+ genMapInfoForCaptures(MEHandler, CGF, CS, CapturedVars, OMPBuilder,
+ LambdaPointers, MappedVarSet, CombinedInfo);
+ genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder, MappedVarSet);
+}
static void emitTargetCallKernelLaunch(
CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
const OMPExecutableDirective &D,
>From 4872692f6ac421e86924f2d524fe10b45fe69dad Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Thu, 27 Jun 2024 13:51:34 -0500
Subject: [PATCH 07/14] Use CGOpenMPRuntime::emitTargetDataStandAloneCall
---
clang/lib/CodeGen/CGOpenMPRuntime.cpp | 44 +++++++++++++++++++++------
1 file changed, 34 insertions(+), 10 deletions(-)
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 0122f33d201d7..53f7d4d807f21 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -10539,21 +10539,45 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
PrePostActionTy &) {
// Fill up the arrays with all the mapped variables.
MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
+ CGOpenMPRuntime::TargetDataInfo Info;
// Get map clause information.
- MappableExprsHandler MEHandler(D, CGF);
- MEHandler.generateAllInfo(CombinedInfo, OMPBuilder);
+ auto GenMapInfoCB = [&](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP)
+ -> llvm::OpenMPIRBuilder::MapInfosTy & {
+ CGF.Builder.restoreIP(CodeGenIP);
+ MappableExprsHandler MEHandler(D, CGF);
+ genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder);
+ return CombinedInfo;
+ };
- CGOpenMPRuntime::TargetDataInfo Info;
- // Fill up the arrays and create the arguments.
- emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
- /*IsNonContiguous=*/true);
+ auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
+ if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
+ Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
+ }
+ };
+
+ auto CustomMapperCB = [&](unsigned int I) {
+ llvm::Value *MFunc = nullptr;
+ if (CombinedInfo.Mappers[I]) {
+ Info.HasMapper = true;
+ MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
+ cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
+ }
+ return MFunc;
+ };
+
+ // Fill up the basepointers, pointers and mapper arrays and create the
+ // arguments.
+ using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
+ InsertPointTy OffloadingArraysAllocaIP(CGF.AllocaInsertPt->getParent(),
+ CGF.AllocaInsertPt->getIterator());
+
+ OMPBuilder.emitOffloadingArraysAndArgs(
+ OffloadingArraysAllocaIP, CGF.Builder.saveIP(), Info, Info.RTArgs,
+ GenMapInfoCB, /*IsNonContiguous=*/true, /*ForEndCall=*/false,
+ DeviceAddrCB, CustomMapperCB);
bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
D.hasClausesOfKind<OMPNowaitClause>();
- Info.EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
- llvm::codegenoptions::NoDebugInfo;
- OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info,
- /*ForEndCall=*/false);
InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
CGF.VoidPtrTy, CGM.getPointerAlign());
>From 0e1f43f7615bede8b2d9d124bd2cc2a2a36e061e Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Fri, 28 Jun 2024 10:49:35 -0500
Subject: [PATCH 08/14] Use static function emitOffloadingArraysAndArgs in
emitTargetCallKernelLaunch in CGOpenMPRuntime.cpp
---
clang/lib/CodeGen/CGOpenMPRuntime.cpp | 67 ++++++++++---------
.../llvm/Frontend/OpenMP/OMPIRBuilder.h | 13 ++++
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 13 ++++
3 files changed, 63 insertions(+), 30 deletions(-)
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 53f7d4d807f21..0b41f80706f2e 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -8921,6 +8921,40 @@ static void emitOffloadingArrays(
/*IsNonContiguous=*/true, DeviceAddrCB,
CustomMapperCB);
}
+/// Emit the arrays used to pass the captures and map information to the
+/// offloading runtime library. If there is no map or capture information,
+/// return nullptr by reference.
+static void emitOffloadingArraysAndArgs(
+ CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
+ CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
+ bool IsNonContiguous = false, bool ForEndCall = false) {
+ CodeGenModule &CGM = CGF.CGM;
+
+ using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
+ InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
+ CGF.AllocaInsertPt->getIterator());
+ InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
+ CGF.Builder.GetInsertPoint());
+
+ auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
+ if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
+ Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
+ }
+ };
+
+ auto CustomMapperCB = [&](unsigned int I) {
+ llvm::Value *MFunc = nullptr;
+ if (CombinedInfo.Mappers[I]) {
+ Info.HasMapper = true;
+ MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
+ cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
+ }
+ return MFunc;
+ };
+ OMPBuilder.emitOffloadingArraysAndArgs(
+ AllocaIP, CodeGenIP, Info, Info.RTArgs, CombinedInfo, IsNonContiguous,
+ ForEndCall, DeviceAddrCB, CustomMapperCB);
+}
/// Check for inner distribute directive.
static const OMPExecutableDirective *
@@ -9614,37 +9648,10 @@ static void emitTargetCallKernelLaunch(
// Fill up the arrays with all the captured variables.
MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
CGOpenMPRuntime::TargetDataInfo Info;
+ genMapInfo(D, CGF, CS, CapturedVars, OMPBuilder, CombinedInfo);
- auto GenMapInfoCB = [&](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP)
- -> llvm::OpenMPIRBuilder::MapInfosTy & {
- CGF.Builder.restoreIP(CodeGenIP);
- genMapInfo(D, CGF, CS, CapturedVars, OMPBuilder, CombinedInfo);
- return CombinedInfo;
- };
- auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
- if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
- Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
- }
- };
-
- auto CustomMapperCB = [&](unsigned int I) {
- llvm::Value *MFunc = nullptr;
- if (CombinedInfo.Mappers[I]) {
- Info.HasMapper = true;
- MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
- cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
- }
- return MFunc;
- };
- // Fill up the basepointers, pointers and mapper arrays and create the
- // arguments.
- llvm::OpenMPIRBuilder::InsertPointTy OffloadingArraysAllocaIP(
- CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
-
- OMPBuilder.emitOffloadingArraysAndArgs(
- OffloadingArraysAllocaIP, CGF.Builder.saveIP(), Info, Info.RTArgs,
- GenMapInfoCB, /*IsNonContiguous=*/true, /*ForEndCall=*/false,
- DeviceAddrCB, CustomMapperCB);
+ emitOffloadingArraysAndArgs(CGF, CombinedInfo, Info, OMPBuilder,
+ /*IsNonContiguous=*/true, /*ForEndCall=*/false);
InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 50786ac3d5261..1377ca8ce45b2 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -2404,6 +2404,19 @@ class OpenMPIRBuilder {
function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
function_ref<Value *(unsigned int)> CustomMapperCB = nullptr);
+ /// Allocates memory for and populates the arrays required for offloading
+ /// (offload_{baseptrs|ptrs|mappers|sizes|maptypes|mapnames}). Then, it
+ /// emits their base addresses as arguments to be passed to the runtime
+ /// library. In essence, this function is a combination of
+ /// emitOffloadingArrays and emitOffloadingArraysArgument and should arguably
+ /// be preferred by clients of OpenMPIRBuilder.
+ void emitOffloadingArraysAndArgs(
+ InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info,
+ TargetDataRTArgs &RTArgs, MapInfosTy &CombinedInfo,
+ bool IsNonContiguous = false, bool ForEndCall = false,
+ function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
+ function_ref<Value *(unsigned int)> CustomMapperCB = nullptr);
+
/// Creates offloading entry for the provided entry ID \a ID, address \a
/// Addr, size \a Size, and flags \a Flags.
void createOffloadEntry(Constant *ID, Constant *Addr, uint64_t Size,
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 88c04c3803e21..3b71b2be3086f 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -7066,6 +7066,17 @@ void OpenMPIRBuilder::emitOffloadingArraysAndArgs(
DeviceAddrCB, CustomMapperCB);
emitOffloadingArraysArgument(Builder, RTArgs, Info, ForEndCall);
}
+
+void OpenMPIRBuilder::emitOffloadingArraysAndArgs(
+ InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info,
+ TargetDataRTArgs &RTArgs, MapInfosTy &CombinedInfo, bool IsNonContiguous,
+ bool ForEndCall, function_ref<void(unsigned int, Value *)> DeviceAddrCB,
+ function_ref<Value *(unsigned int)> CustomMapperCB) {
+ emitOffloadingArrays(AllocaIP, CodeGenIP, CombinedInfo, Info,
+ IsNonContiguous, DeviceAddrCB, CustomMapperCB);
+ emitOffloadingArraysArgument(Builder, RTArgs, Info, ForEndCall);
+ }
+
static void emitTargetCall(
OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder,
OpenMPIRBuilder::InsertPointTy AllocaIP, Function *OutlinedFn,
@@ -7074,6 +7085,8 @@ static void emitTargetCall(
OpenMPIRBuilder::GenMapInfoCallbackTy GenMapInfoCB,
SmallVector<llvm::OpenMPIRBuilder::DependData> Dependencies = {}) {
+
+
OpenMPIRBuilder::TargetDataInfo Info(
/*RequiresDevicePointerInfo=*/false,
/*SeparateBeginEndCalls=*/true);
>From 32edf70615a2a049cca7bd275c9d7436e749e725 Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Fri, 28 Jun 2024 11:27:58 -0500
Subject: [PATCH 09/14] Use static function emitOffloadingArraysAndArgs in
emitTargetDataStandaloneCall in CGOpenMPRuntime.cpp
---
clang/lib/CodeGen/CGOpenMPRuntime.cpp | 40 ++++-----------------------
1 file changed, 5 insertions(+), 35 deletions(-)
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 0b41f80706f2e..26976b1565209 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -10547,44 +10547,14 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
// Fill up the arrays with all the mapped variables.
MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
CGOpenMPRuntime::TargetDataInfo Info;
+ MappableExprsHandler MEHandler(D, CGF);
+ genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder);
+ emitOffloadingArraysAndArgs(CGF, CombinedInfo, Info, OMPBuilder,
+ /*IsNonContiguous=*/true, /*ForEndCall=*/false);
- // Get map clause information.
- auto GenMapInfoCB = [&](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP)
- -> llvm::OpenMPIRBuilder::MapInfosTy & {
- CGF.Builder.restoreIP(CodeGenIP);
- MappableExprsHandler MEHandler(D, CGF);
- genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder);
- return CombinedInfo;
- };
-
- auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
- if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
- Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
- }
- };
-
- auto CustomMapperCB = [&](unsigned int I) {
- llvm::Value *MFunc = nullptr;
- if (CombinedInfo.Mappers[I]) {
- Info.HasMapper = true;
- MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
- cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
- }
- return MFunc;
- };
-
- // Fill up the basepointers, pointers and mapper arrays and create the
- // arguments.
- using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
- InsertPointTy OffloadingArraysAllocaIP(CGF.AllocaInsertPt->getParent(),
- CGF.AllocaInsertPt->getIterator());
-
- OMPBuilder.emitOffloadingArraysAndArgs(
- OffloadingArraysAllocaIP, CGF.Builder.saveIP(), Info, Info.RTArgs,
- GenMapInfoCB, /*IsNonContiguous=*/true, /*ForEndCall=*/false,
- DeviceAddrCB, CustomMapperCB);
bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
D.hasClausesOfKind<OMPNowaitClause>();
+
InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
CGF.VoidPtrTy, CGM.getPointerAlign());
>From 6ce0c84f93d1066c1a4f4bb7b1530b5c2b9d4144 Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Fri, 28 Jun 2024 11:30:10 -0500
Subject: [PATCH 10/14] Remove emitOffloadingArrays from CGOpenMPRuntime.cpp
because it is not used anymore. Use emitOffloadingArraysAndArgs
---
clang/lib/CodeGen/CGOpenMPRuntime.cpp | 49 ---------------------------
1 file changed, 49 deletions(-)
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 26976b1565209..57e958f644b18 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -8872,55 +8872,6 @@ emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
PLoc.getLine(), PLoc.getColumn(),
SrcLocStrSize);
}
-
-/// Emit the arrays used to pass the captures and map information to the
-/// offloading runtime library. If there is no map or capture information,
-/// return nullptr by reference.
-static void emitOffloadingArrays(
- CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
- CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
- bool IsNonContiguous = false) {
- CodeGenModule &CGM = CGF.CGM;
-
- // Reset the array information.
- Info.clearArrayInfo();
- Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
-
- using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
- InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
- CGF.AllocaInsertPt->getIterator());
- InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
- CGF.Builder.GetInsertPoint());
-
- auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
- return emitMappingInformation(CGF, OMPBuilder, MapExpr);
- };
- if (CGM.getCodeGenOpts().getDebugInfo() !=
- llvm::codegenoptions::NoDebugInfo) {
- CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
- llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
- FillInfoMap);
- }
-
- auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
- if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
- Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
- }
- };
-
- auto CustomMapperCB = [&](unsigned int I) {
- llvm::Value *MFunc = nullptr;
- if (CombinedInfo.Mappers[I]) {
- Info.HasMapper = true;
- MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
- cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
- }
- return MFunc;
- };
- OMPBuilder.emitOffloadingArrays(AllocaIP, CodeGenIP, CombinedInfo, Info,
- /*IsNonContiguous=*/true, DeviceAddrCB,
- CustomMapperCB);
-}
/// Emit the arrays used to pass the captures and map information to the
/// offloading runtime library. If there is no map or capture information,
/// return nullptr by reference.
>From a7ce3ce2ad355427c0aa58b996df0f304edc2185 Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Fri, 28 Jun 2024 12:05:02 -0500
Subject: [PATCH 11/14] Remove overloads of emitOffloadingArrays and
emitOffloadingArraysAndArgs that accept GenMapInfoCallBackTy
---
clang/lib/CodeGen/CGOpenMPRuntime.cpp | 1 -
.../llvm/Frontend/OpenMP/OMPIRBuilder.h | 34 ++-----------------
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 31 +++--------------
3 files changed, 8 insertions(+), 58 deletions(-)
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 57e958f644b18..8a39dbdbeec53 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -9477,7 +9477,6 @@ static void genMapInfoForCaptures(
llvm::DenseSet<CanonicalDeclPtr<const Decl>> &MappedVarSet,
MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
- CodeGenModule &CGM = CGF.CGM;
auto RI = CS.getCapturedRecordDecl()->field_begin();
auto *CV = CapturedVars.begin();
for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 1377ca8ce45b2..60e79ec3726ce 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -2367,43 +2367,15 @@ class OpenMPIRBuilder {
/// Emit the arrays used to pass the captures and map information to the
/// offloading runtime library. If there is no map or capture information,
- /// return nullptr by reference. This is the first of two overloads - this
- /// one accepts a reference to a MapInfosTy object that contains combined
- /// information generated for mappable clauses, including base pointers,
- /// pointers, sizes, map types, user-defined mappers, and non-contiguous
- /// information.
+ /// return nullptr by reference. Accepts a reference to a MapInfosTy object
+ /// that contains information generated for mappable clauses,
+ /// including base pointers, pointers, sizes, map types, user-defined mappers.
void emitOffloadingArrays(
InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo,
TargetDataInfo &Info, bool IsNonContiguous = false,
function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
function_ref<Value *(unsigned int)> CustomMapperCB = nullptr);
- /// Emit the arrays used to pass the captures and map information to the
- /// offloading runtime library. If there is no map or capture information,
- /// return nullptr by reference. This is the second of two overloads - Instead
- /// of accepting a reference to a MapInfosTy object, this overload accepts
- /// a call back of type GenMapInfoCallbackTy to populate a MapInfosTy object
- /// with mapping information.
- void emitOffloadingArrays(
- InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
- GenMapInfoCallbackTy GenMapInfoCB, TargetDataInfo &Info,
- bool IsNonContiguous = false,
- function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
- function_ref<Value *(unsigned int)> CustomMapperCB = nullptr);
-
- /// Allocates memory for and populates the arrays required for offloading
- /// (offload_{baseptrs|ptrs|mappers|sizes|maptypes|mapnames}). Then, it
- /// emits their base addresses as arguments to be passed to the runtime
- /// library. In essence, this function is a combination of
- /// emitOffloadingArrays and emitOffloadingArraysArgument and should arguably
- /// be preferred by clients of OpenMPIRBuilder.
- void emitOffloadingArraysAndArgs(
- InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info,
- TargetDataRTArgs &RTArgs, GenMapInfoCallbackTy GenMapInfoCB,
- bool IsNonContiguous = false, bool ForEndCall = false,
- function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
- function_ref<Value *(unsigned int)> CustomMapperCB = nullptr);
-
/// Allocates memory for and populates the arrays required for offloading
/// (offload_{baseptrs|ptrs|mappers|sizes|maptypes|mapnames}). Then, it
/// emits their base addresses as arguments to be passed to the runtime
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 3b71b2be3086f..e97029ceca33f 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -7056,26 +7056,15 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitTargetTask(
<< "\n");
return Builder.saveIP();
}
-void OpenMPIRBuilder::emitOffloadingArraysAndArgs(
- InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info,
- TargetDataRTArgs &RTArgs, GenMapInfoCallbackTy GenMapInfoCB,
- bool IsNonContiguous, bool ForEndCall,
- function_ref<void(unsigned int, Value *)> DeviceAddrCB,
- function_ref<Value *(unsigned int)> CustomMapperCB) {
- emitOffloadingArrays(AllocaIP, CodeGenIP, GenMapInfoCB, Info, IsNonContiguous,
- DeviceAddrCB, CustomMapperCB);
- emitOffloadingArraysArgument(Builder, RTArgs, Info, ForEndCall);
- }
-
void OpenMPIRBuilder::emitOffloadingArraysAndArgs(
InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info,
TargetDataRTArgs &RTArgs, MapInfosTy &CombinedInfo, bool IsNonContiguous,
bool ForEndCall, function_ref<void(unsigned int, Value *)> DeviceAddrCB,
function_ref<Value *(unsigned int)> CustomMapperCB) {
- emitOffloadingArrays(AllocaIP, CodeGenIP, CombinedInfo, Info,
- IsNonContiguous, DeviceAddrCB, CustomMapperCB);
+ emitOffloadingArrays(AllocaIP, CodeGenIP, CombinedInfo, Info, IsNonContiguous,
+ DeviceAddrCB, CustomMapperCB);
emitOffloadingArraysArgument(Builder, RTArgs, Info, ForEndCall);
- }
+}
static void emitTargetCall(
OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder,
@@ -7091,9 +7080,10 @@ static void emitTargetCall(
/*RequiresDevicePointerInfo=*/false,
/*SeparateBeginEndCalls=*/true);
+ OpenMPIRBuilder::MapInfosTy &MapInfo = GenMapInfoCB(Builder.saveIP());
OpenMPIRBuilder::TargetDataRTArgs RTArgs;
OMPBuilder.emitOffloadingArraysAndArgs(AllocaIP, Builder.saveIP(), Info,
- RTArgs, GenMapInfoCB,
+ RTArgs, MapInfo,
/*IsNonContiguous=*/true,
/*ForEndCall=*/false);
@@ -7422,17 +7412,6 @@ void OpenMPIRBuilder::emitNonContiguousDescriptor(InsertPointTy AllocaIP,
}
}
-void OpenMPIRBuilder::emitOffloadingArrays(
- InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
- GenMapInfoCallbackTy GenMapInfoCB, TargetDataInfo &Info,
- bool IsNonContiguous,
- function_ref<void(unsigned int, Value *)> DeviceAddrCB,
- function_ref<Value *(unsigned int)> CustomMapperCB) {
-
- OpenMPIRBuilder::MapInfosTy &MapInfo = GenMapInfoCB(CodeGenIP);
- emitOffloadingArrays(AllocaIP, CodeGenIP, MapInfo,
- Info, IsNonContiguous, DeviceAddrCB, CustomMapperCB);
-}
void OpenMPIRBuilder::emitOffloadingArrays(
InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo,
TargetDataInfo &Info, bool IsNonContiguous,
>From b9b687eb564dfda81afd3007a80b7ce5f3c0174b Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Fri, 28 Jun 2024 12:16:41 -0500
Subject: [PATCH 12/14] Undo an unnecessary change in the location of the
declaration of GenMapInfoCallBackTy in OMPIRBuilder.h
---
llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h | 11 +++++------
1 file changed, 5 insertions(+), 6 deletions(-)
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 60e79ec3726ce..1614d5716d28c 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -2359,12 +2359,6 @@ class OpenMPIRBuilder {
MapInfosTy &CombinedInfo,
TargetDataInfo &Info);
- /// Callback type for creating the map infos for the kernel parameters.
- /// \param CodeGenIP is the insertion point where code should be generated,
- /// if any.
- using GenMapInfoCallbackTy =
- function_ref<MapInfosTy &(InsertPointTy CodeGenIP)>;
-
/// Emit the arrays used to pass the captures and map information to the
/// offloading runtime library. If there is no map or capture information,
/// return nullptr by reference. Accepts a reference to a MapInfosTy object
@@ -2792,6 +2786,11 @@ class OpenMPIRBuilder {
/// duplicating the body code.
enum BodyGenTy { Priv, DupNoPriv, NoPriv };
+ /// Callback type for creating the map infos for the kernel parameters.
+ /// \param CodeGenIP is the insertion point where code should be generated,
+ /// if any.
+ using GenMapInfoCallbackTy =
+ function_ref<MapInfosTy &(InsertPointTy CodeGenIP)>;
/// Generator for '#omp target data'
///
>From e74f34bbec42f634e030a08259f6fdd64a7ce7c7 Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Mon, 22 Jul 2024 13:23:00 -0500
Subject: [PATCH 13/14] Address review comments
---
clang/lib/CodeGen/CGOpenMPRuntime.cpp | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 8a39dbdbeec53..3210bd414f8ba 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -9473,10 +9473,10 @@ static void genMapInfoForCaptures(
MappableExprsHandler &MEHandler, CodeGenFunction &CGF,
const CapturedStmt &CS, llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
llvm::OpenMPIRBuilder &OMPBuilder,
- llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
llvm::DenseSet<CanonicalDeclPtr<const Decl>> &MappedVarSet,
MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
+ llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
auto RI = CS.getCapturedRecordDecl()->field_begin();
auto *CV = CapturedVars.begin();
for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
@@ -9574,11 +9574,10 @@ static void genMapInfo(const OMPExecutableDirective &D, CodeGenFunction &CGF,
MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
// Get mappable expression information.
MappableExprsHandler MEHandler(D, CGF);
- llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
genMapInfoForCaptures(MEHandler, CGF, CS, CapturedVars, OMPBuilder,
- LambdaPointers, MappedVarSet, CombinedInfo);
+ MappedVarSet, CombinedInfo);
genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder, MappedVarSet);
}
static void emitTargetCallKernelLaunch(
>From 0e7dd273509389c9eca61b3ffa97f2a3f4f54e5d Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Tue, 23 Jul 2024 16:19:03 -0500
Subject: [PATCH 14/14] clang-format fix
---
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 2 --
1 file changed, 2 deletions(-)
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index e97029ceca33f..eba9129bf4953 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -7074,8 +7074,6 @@ static void emitTargetCall(
OpenMPIRBuilder::GenMapInfoCallbackTy GenMapInfoCB,
SmallVector<llvm::OpenMPIRBuilder::DependData> Dependencies = {}) {
-
-
OpenMPIRBuilder::TargetDataInfo Info(
/*RequiresDevicePointerInfo=*/false,
/*SeparateBeginEndCalls=*/true);
More information about the cfe-commits
mailing list