[clang] [llvm] [OpenMPIRBuilder][Clang][NFC] - Combine `emitOffloadingArrays` and `emitOffloadingArraysArgument` in OpenMPIRBuilder (PR #97088)
Pranav Bhandarkar via cfe-commits
cfe-commits at lists.llvm.org
Mon Jul 22 11:25:18 PDT 2024
https://github.com/bhandarkar-pranav updated https://github.com/llvm/llvm-project/pull/97088
>From dc9e64a29d6d1fd84ad630cb002d1129ea6a0a31 Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Sat, 15 Jun 2024 02:00:48 -0500
Subject: [PATCH 01/13] checkpoint commit. Use emitOffloadinArrays from
OMPIRBuilder in emitTargetCallKernelLaunch in CGOpenMPRuntime.cpp
---
clang/lib/CodeGen/CGOpenMPRuntime.cpp | 241 +++++++++++++++++-
clang/lib/CodeGen/CGStmtOpenMP.cpp | 1 +
.../llvm/Frontend/OpenMP/OMPIRBuilder.h | 22 +-
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 10 +
4 files changed, 265 insertions(+), 9 deletions(-)
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index f6d12d46cfc07..9632ef912ebfe 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -38,6 +38,7 @@
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/AtomicOrdering.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
@@ -45,6 +46,8 @@
#include <numeric>
#include <optional>
+#define DEBUG_TYPE "clang-openmp-codegen"
+
using namespace clang;
using namespace CodeGen;
using namespace llvm::omp;
@@ -8831,9 +8834,11 @@ emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
}
PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
- return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName,
+ auto *Str = OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName,
PLoc.getLine(), PLoc.getColumn(),
SrcLocStrSize);
+ LLVM_DEBUG(llvm::dbgs() << "Output of emitMappingInfo: " << *Str << "\n");
+ return Str;
}
/// Emit the arrays used to pass the captures and map information to the
@@ -9447,8 +9452,96 @@ llvm::Value *emitDynCGGroupMem(const OMPExecutableDirective &D,
}
return DynCGroupMem;
}
+static void genMapInfo(const OMPExecutableDirective &D, CodeGenFunction &CGF,
+ const CapturedStmt &CS, llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
+ llvm::OpenMPIRBuilder &OMPBuilder,
+ MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
+ // Get mappable expression information.
+ MappableExprsHandler MEHandler(D, CGF);
+ llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
+ llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
+ CodeGenModule &CGM = CGF.CGM;
+ auto RI = CS.getCapturedRecordDecl()->field_begin();
+ auto *CV = CapturedVars.begin();
+ for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
+ CE = CS.capture_end();
+ CI != CE; ++CI, ++RI, ++CV) {
+ MappableExprsHandler::MapCombinedInfoTy CurInfo;
+ MappableExprsHandler::StructRangeInfoTy PartialStruct;
-static void emitTargetCallKernelLaunch(
+ // VLA sizes are passed to the outlined region by copy and do not have map
+ // information associated.
+ if (CI->capturesVariableArrayType()) {
+ CurInfo.Exprs.push_back(nullptr);
+ CurInfo.BasePointers.push_back(*CV);
+ CurInfo.DevicePtrDecls.push_back(nullptr);
+ CurInfo.DevicePointers.push_back(
+ MappableExprsHandler::DeviceInfoTy::None);
+ CurInfo.Pointers.push_back(*CV);
+ CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
+ CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
+ // Copy to the device as an argument. No need to retrieve it.
+ CurInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
+ OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM |
+ OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
+ CurInfo.Mappers.push_back(nullptr);
+ } else {
+ // If we have any information in the map clause, we use it, otherwise we
+ // just do a default mapping.
+ MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
+ if (!CI->capturesThis())
+ MappedVarSet.insert(CI->getCapturedVar());
+ else
+ MappedVarSet.insert(nullptr);
+ if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
+ MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
+ // Generate correct mapping for variables captured by reference in
+ // lambdas.
+ if (CI->capturesVariable())
+ MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
+ CurInfo, LambdaPointers);
+ }
+ // We expect to have at least an element of information for this capture.
+ assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
+ "Non-existing map pointer for capture!");
+ assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
+ CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
+ CurInfo.BasePointers.size() == CurInfo.Types.size() &&
+ CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
+ "Inconsistent map information sizes!");
+
+ // If there is an entry in PartialStruct it means we have a struct with
+ // individual members mapped. Emit an extra combined entry.
+ if (PartialStruct.Base.isValid()) {
+ CombinedInfo.append(PartialStruct.PreliminaryMapData);
+ MEHandler.emitCombinedEntry(
+ CombinedInfo, CurInfo.Types, PartialStruct, CI->capturesThis(),
+ OMPBuilder, nullptr,
+ !PartialStruct.PreliminaryMapData.BasePointers.empty());
+ }
+
+ // We need to append the results of this capture to what we already have.
+ CombinedInfo.append(CurInfo);
+ }
+ // Adjust MEMBER_OF flags for the lambdas captures.
+ MEHandler.adjustMemberOfForLambdaCaptures(
+ OMPBuilder, LambdaPointers, CombinedInfo.BasePointers,
+ CombinedInfo.Pointers, CombinedInfo.Types);
+ // Map any list items in a map clause that were not captures because they
+ // weren't referenced within the construct.
+ MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, MappedVarSet);
+
+ auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
+ return emitMappingInformation(CGF, OMPBuilder, MapExpr);
+ };
+ if (CGM.getCodeGenOpts().getDebugInfo() !=
+ llvm::codegenoptions::NoDebugInfo) {
+ CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
+ llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
+ FillInfoMap);
+ }
+}
+static void emitTargetCallKernelLaunchNew(
CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
const OMPExecutableDirective &D,
llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask,
@@ -9464,8 +9557,139 @@ static void emitTargetCallKernelLaunch(
// Fill up the arrays with all the captured variables.
MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
+ CGOpenMPRuntime::TargetDataInfo Info;
- // Get mappable expression information.
+ auto GenMapInfoCB = [&](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP)
+ -> llvm::OpenMPIRBuilder::MapInfosTy & {
+ CGF.Builder.restoreIP(CodeGenIP);
+ genMapInfo(D, CGF, CS, CapturedVars, OMPBuilder, CombinedInfo);
+ return CombinedInfo;
+ };
+ auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
+ if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
+ Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
+ }
+ };
+
+ auto CustomMapperCB = [&](unsigned int I) {
+ llvm::Value *MFunc = nullptr;
+ if (CombinedInfo.Mappers[I]) {
+ Info.HasMapper = true;
+ MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
+ cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
+ }
+ return MFunc;
+ };
+ // Fill up the arrays and create the arguments.
+ LLVM_DEBUG(llvm::dbgs() << "emitTargetCallKernelLaunchNew:InsertBlock before emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n");
+ OMPBuilder.emitOffloadingArrays(llvm::OpenMPIRBuilder::InsertPointTy(CGF.AllocaInsertPt->getParent(),
+ CGF.AllocaInsertPt->getIterator()),
+ CGF.Builder.saveIP(), Info,
+ GenMapInfoCB, /*IsNonContiguous=*/true,
+ DeviceAddrCB, CustomMapperCB);
+ bool EmitDebug = !CombinedInfo.Names.empty();
+ OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info,
+ EmitDebug,
+ /*ForEndCall=*/false);
+
+ LLVM_DEBUG(llvm::dbgs() << "emitTargetCallKernelLaunchNew:InsertBlock after emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n");
+ InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
+ InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
+ CGF.VoidPtrTy, CGM.getPointerAlign());
+ InputInfo.PointersArray =
+ Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
+ InputInfo.SizesArray =
+ Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
+ InputInfo.MappersArray =
+ Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
+ MapTypesArray = Info.RTArgs.MapTypesArray;
+ MapNamesArray = Info.RTArgs.MapNamesArray;
+
+ auto &&ThenGen = [&OMPRuntime, OutlinedFn, &D, &CapturedVars,
+ RequiresOuterTask, &CS, OffloadingMandatory, Device,
+ OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray,
+ SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
+ bool IsReverseOffloading = Device.getInt() == OMPC_DEVICE_ancestor;
+
+ if (IsReverseOffloading) {
+ // Reverse offloading is not supported, so just execute on the host.
+ // FIXME: This fallback solution is incorrect since it ignores the
+ // OMP_TARGET_OFFLOAD environment variable. Instead it would be better to
+ // assert here and ensure SEMA emits an error.
+ emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
+ RequiresOuterTask, CS, OffloadingMandatory, CGF);
+ return;
+ }
+
+ bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>();
+ unsigned NumTargetItems = InputInfo.NumberOfTargetItems;
+
+ llvm::Value *BasePointersArray =
+ InputInfo.BasePointersArray.emitRawPointer(CGF);
+ llvm::Value *PointersArray = InputInfo.PointersArray.emitRawPointer(CGF);
+ llvm::Value *SizesArray = InputInfo.SizesArray.emitRawPointer(CGF);
+ llvm::Value *MappersArray = InputInfo.MappersArray.emitRawPointer(CGF);
+
+ auto &&EmitTargetCallFallbackCB =
+ [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
+ OffloadingMandatory, &CGF](llvm::OpenMPIRBuilder::InsertPointTy IP)
+ -> llvm::OpenMPIRBuilder::InsertPointTy {
+ CGF.Builder.restoreIP(IP);
+ emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
+ RequiresOuterTask, CS, OffloadingMandatory, CGF);
+ return CGF.Builder.saveIP();
+ };
+
+ llvm::Value *DeviceID = emitDeviceID(Device, CGF);
+ llvm::Value *NumTeams = OMPRuntime->emitNumTeamsForTargetDirective(CGF, D);
+ llvm::Value *NumThreads =
+ OMPRuntime->emitNumThreadsForTargetDirective(CGF, D);
+ llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, D.getBeginLoc());
+ llvm::Value *NumIterations =
+ OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter);
+ llvm::Value *DynCGGroupMem = emitDynCGGroupMem(D, CGF);
+ llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
+ CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
+
+ llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs(
+ BasePointersArray, PointersArray, SizesArray, MapTypesArray,
+ nullptr /* MapTypesArrayEnd */, MappersArray, MapNamesArray);
+
+ llvm::OpenMPIRBuilder::TargetKernelArgs Args(
+ NumTargetItems, RTArgs, NumIterations, NumTeams, NumThreads,
+ DynCGGroupMem, HasNoWait);
+
+ CGF.Builder.restoreIP(OMPRuntime->getOMPBuilder().emitKernelLaunch(
+ CGF.Builder, OutlinedFn, OutlinedFnID, EmitTargetCallFallbackCB, Args,
+ DeviceID, RTLoc, AllocaIP));
+ };
+
+ if (RequiresOuterTask) {
+ if (NewClangTargetTaskCodeGen) {
+ llvm::errs() << "Using OMPIRBuilder for target task codegen\n";
+ } else {
+ CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
+ }
+ } else
+ OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
+}
+static void emitTargetCallKernelLaunch(
+ CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
+ const OMPExecutableDirective &D,
+ llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask,
+ const CapturedStmt &CS, bool OffloadingMandatory,
+ llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
+ llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo,
+ llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray,
+ llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
+ const OMPLoopDirective &D)>
+ SizeEmitter,
+ CodeGenFunction &CGF, CodeGenModule &CGM) {
+ llvm::OpenMPIRBuilder &OMPBuilder = OMPRuntime->getOMPBuilder();
+
+ // Fill up the arrays with all the captured variables.
+ MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
+// Get mappable expression information.
MappableExprsHandler MEHandler(D, CGF);
llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
@@ -9542,6 +9766,7 @@ static void emitTargetCallKernelLaunch(
CGOpenMPRuntime::TargetDataInfo Info;
// Fill up the arrays and create the arguments.
+ LLVM_DEBUG(llvm::dbgs() << "InsertBlock before emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n");
emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
llvm::codegenoptions::NoDebugInfo;
@@ -9549,6 +9774,7 @@ static void emitTargetCallKernelLaunch(
EmitDebug,
/*ForEndCall=*/false);
+ LLVM_DEBUG(llvm::dbgs() << "InsertBlock after emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n");
InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
CGF.VoidPtrTy, CGM.getPointerAlign());
@@ -9688,7 +9914,13 @@ void CGOpenMPRuntime::emitTargetCall(
OutlinedFnID, &InputInfo, &MapTypesArray,
&MapNamesArray, SizeEmitter](CodeGenFunction &CGF,
PrePostActionTy &) {
- emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars,
+ if (OpenMPClangTargetCodegen)
+ emitTargetCallKernelLaunchNew(this, OutlinedFn, D, CapturedVars,
+ RequiresOuterTask, CS, OffloadingMandatory,
+ Device, OutlinedFnID, InputInfo, MapTypesArray,
+ MapNamesArray, SizeEmitter, CGF, CGM);
+ else
+ emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars,
RequiresOuterTask, CS, OffloadingMandatory,
Device, OutlinedFnID, InputInfo, MapTypesArray,
MapNamesArray, SizeEmitter, CGF, CGM);
@@ -9711,6 +9943,7 @@ void CGOpenMPRuntime::emitTargetCall(
} else {
RegionCodeGenTy ThenRCG(TargetThenGen);
ThenRCG(CGF);
+ LLVM_DEBUG(llvm::dbgs() << "Generated code after emitTargetCall:\n" << *(OutlinedFn->getParent()) << "\n");
}
} else {
RegionCodeGenTy ElseRCG(TargetElseGen);
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index f73d32de7c484..123cfbe1b229d 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -40,6 +40,7 @@
using namespace clang;
using namespace CodeGen;
using namespace llvm::omp;
+#define DEBUG_TYPE "clang-openmp-codegen"
#define TTL_CODEGEN_TYPE "target-teams-loop-codegen"
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index bff49dab4a313..035639b10e31a 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -1778,6 +1778,22 @@ class OpenMPIRBuilder {
MapInfosTy &CombinedInfo,
TargetDataInfo &Info);
+ /// Callback type for creating the map infos for the kernel parameters.
+ /// \param CodeGenIP is the insertion point where code should be generated,
+ /// if any.
+ using GenMapInfoCallbackTy =
+ function_ref<MapInfosTy &(InsertPointTy CodeGenIP)>;
+
+ /// Emit the arrays used to pass the captures and map information to the
+ /// offloading runtime library. If there is no map or capture information,
+ /// return nullptr by reference.
+ void emitOffloadingArrays(
+ InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
+ TargetDataInfo &Info, GenMapInfoCallbackTy GenMapInfoCB,
+ bool IsNonContiguous = false,
+ function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
+ function_ref<Value *(unsigned int)> CustomMapperCB = nullptr);
+
/// Emit the arrays used to pass the captures and map information to the
/// offloading runtime library. If there is no map or capture information,
/// return nullptr by reference.
@@ -1787,6 +1803,7 @@ class OpenMPIRBuilder {
function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
function_ref<Value *(unsigned int)> CustomMapperCB = nullptr);
+
/// Creates offloading entry for the provided entry ID \a ID, address \a
/// Addr, size \a Size, and flags \a Flags.
void createOffloadEntry(Constant *ID, Constant *Addr, uint64_t Size,
@@ -2190,11 +2207,6 @@ class OpenMPIRBuilder {
/// duplicating the body code.
enum BodyGenTy { Priv, DupNoPriv, NoPriv };
- /// Callback type for creating the map infos for the kernel parameters.
- /// \param CodeGenIP is the insertion point where code should be generated,
- /// if any.
- using GenMapInfoCallbackTy =
- function_ref<MapInfosTy &(InsertPointTy CodeGenIP)>;
/// Generator for '#omp target data'
///
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 92213e19c9d9d..7c0dbc0925306 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -5562,6 +5562,16 @@ void OpenMPIRBuilder::emitNonContiguousDescriptor(InsertPointTy AllocaIP,
}
}
+void OpenMPIRBuilder::emitOffloadingArrays(
+ InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info,
+ GenMapInfoCallbackTy GenMapInfoCB, bool IsNonContiguous,
+ function_ref<void(unsigned int, Value *)> DeviceAddrCB,
+ function_ref<Value *(unsigned int)> CustomMapperCB) {
+
+ OpenMPIRBuilder::MapInfosTy &MapInfo = GenMapInfoCB(CodeGenIP);
+ emitOffloadingArrays(AllocaIP, CodeGenIP, MapInfo,
+ Info, IsNonContiguous, DeviceAddrCB, CustomMapperCB);
+}
void OpenMPIRBuilder::emitOffloadingArrays(
InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo,
TargetDataInfo &Info, bool IsNonContiguous,
>From 0bb7eaaee6ca6301cd7e9a9285ad9959ca801613 Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Tue, 25 Jun 2024 16:07:37 -0500
Subject: [PATCH 02/13] emitOffloadingArraysArgument and some other prints
---
clang/lib/CodeGen/CGOpenMPRuntime.cpp | 50 +++++++++----------
clang/lib/CodeGen/CGOpenMPRuntime.h | 39 +++++++++++++++
clang/lib/CodeGen/CGStmtOpenMP.cpp | 2 +
clang/lib/CodeGen/CodeGenFunction.h | 1 +
.../llvm/Frontend/OpenMP/OMPIRBuilder.h | 12 ++++-
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 42 ++++++++++------
.../Frontend/OpenMPIRBuilderTest.cpp | 4 +-
7 files changed, 104 insertions(+), 46 deletions(-)
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 9632ef912ebfe..ee03183f3f5a3 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -3002,6 +3002,10 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
llvm::FunctionType *TaskEntryTy =
CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
+ LLVM_DEBUG(llvm::dbgs() << "TaskEntryTy=" << *TaskEntryTy << "\n");
+ LLVM_DEBUG(llvm::dbgs() << "KmpTaskTWithPrivatesPtrQTy = "
+ << KmpTaskTWithPrivatesPtrQTy << "\n");
+ LLVM_DEBUG(llvm::dbgs() << "TaskTypeArg = " << TaskTypeArg << "\n");
std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
auto *TaskEntry = llvm::Function::Create(
TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
@@ -3706,6 +3710,7 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
TaskPrivatesMap);
+ LLVM_DEBUG(llvm::dbgs() << "ProxyTaskFunction is " << *TaskEntry);
// Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
// kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
// kmp_routine_entry_t *task_entry);
@@ -9582,15 +9587,14 @@ static void emitTargetCallKernelLaunchNew(
};
// Fill up the arrays and create the arguments.
LLVM_DEBUG(llvm::dbgs() << "emitTargetCallKernelLaunchNew:InsertBlock before emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n");
- OMPBuilder.emitOffloadingArrays(llvm::OpenMPIRBuilder::InsertPointTy(CGF.AllocaInsertPt->getParent(),
- CGF.AllocaInsertPt->getIterator()),
- CGF.Builder.saveIP(), Info,
- GenMapInfoCB, /*IsNonContiguous=*/true,
- DeviceAddrCB, CustomMapperCB);
- bool EmitDebug = !CombinedInfo.Names.empty();
- OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info,
- EmitDebug,
- /*ForEndCall=*/false);
+
+ llvm::OpenMPIRBuilder::InsertPointTy OffloadingArraysAllocaIP(
+ CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
+
+ OMPBuilder.emitOffloadingArraysAndArgs(
+ OffloadingArraysAllocaIP, CGF.Builder.saveIP(), Info, Info.RTArgs,
+ GenMapInfoCB, /*IsNonContiguous=*/true, /*ForEndCall=*/false,
+ DeviceAddrCB, CustomMapperCB);
LLVM_DEBUG(llvm::dbgs() << "emitTargetCallKernelLaunchNew:InsertBlock after emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n");
InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
@@ -9664,13 +9668,9 @@ static void emitTargetCallKernelLaunchNew(
DeviceID, RTLoc, AllocaIP));
};
- if (RequiresOuterTask) {
- if (NewClangTargetTaskCodeGen) {
- llvm::errs() << "Using OMPIRBuilder for target task codegen\n";
- } else {
- CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
- }
- } else
+ if (RequiresOuterTask)
+ CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
+ else
OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
}
static void emitTargetCallKernelLaunch(
@@ -9768,10 +9768,9 @@ static void emitTargetCallKernelLaunch(
// Fill up the arrays and create the arguments.
LLVM_DEBUG(llvm::dbgs() << "InsertBlock before emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n");
emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
- bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
+ Info.EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
llvm::codegenoptions::NoDebugInfo;
OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info,
- EmitDebug,
/*ForEndCall=*/false);
LLVM_DEBUG(llvm::dbgs() << "InsertBlock after emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n");
@@ -9914,16 +9913,16 @@ void CGOpenMPRuntime::emitTargetCall(
OutlinedFnID, &InputInfo, &MapTypesArray,
&MapNamesArray, SizeEmitter](CodeGenFunction &CGF,
PrePostActionTy &) {
- if (OpenMPClangTargetCodegen)
+ // if (OpenMPClangTargetCodegen)
emitTargetCallKernelLaunchNew(this, OutlinedFn, D, CapturedVars,
RequiresOuterTask, CS, OffloadingMandatory,
Device, OutlinedFnID, InputInfo, MapTypesArray,
MapNamesArray, SizeEmitter, CGF, CGM);
- else
- emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars,
- RequiresOuterTask, CS, OffloadingMandatory,
- Device, OutlinedFnID, InputInfo, MapTypesArray,
- MapNamesArray, SizeEmitter, CGF, CGM);
+ // else
+ // emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars,
+ // RequiresOuterTask, CS, OffloadingMandatory,
+ // Device, OutlinedFnID, InputInfo, MapTypesArray,
+ // MapNamesArray, SizeEmitter, CGF, CGM);
};
auto &&TargetElseGen =
@@ -10684,10 +10683,9 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
/*IsNonContiguous=*/true);
bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
D.hasClausesOfKind<OMPNowaitClause>();
- bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
+ Info.EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
llvm::codegenoptions::NoDebugInfo;
OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info,
- EmitDebug,
/*ForEndCall=*/false);
InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h
index 522ae3d35d22d..b9303a9414b22 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.h
@@ -122,6 +122,45 @@ struct OMPTaskDataTy final {
bool IsReductionWithTaskMod = false;
bool IsWorksharingReduction = false;
bool HasNowaitClause = false;
+ void printTo(llvm::raw_ostream &os, const ASTContext &Ctx) const {
+ auto &&printSVHelper =
+ [&os, &Ctx](const SmallVector<const Expr *, 4> &V) -> void {
+ for (auto &v : V) {
+ v->dump(os, Ctx);
+ }
+ };
+ auto &&printSV =
+ [&os, printSVHelper](std::string s,
+ const SmallVector<const Expr *, 4> &V) -> void {
+ os << s << ":[\n";
+ printSVHelper(V);
+ os << "]\n";
+ };
+ // SmallVector<const Expr *, 4> PrivateVars;
+ // SmallVector<const Expr *, 4> PrivateCopies;
+ // SmallVector<const Expr *, 4> FirstprivateVars;
+ // SmallVector<const Expr *, 4> FirstprivateCopies;
+ // SmallVector<const Expr *, 4> FirstprivateInits;
+ // SmallVector<const Expr *, 4> LastprivateVars;
+ // SmallVector<const Expr *, 4> LastprivateCopies;
+ // SmallVector<const Expr *, 4> ReductionVars;
+ // SmallVector<const Expr *, 4> ReductionOrigs;
+ // SmallVector<const Expr *, 4> ReductionCopies;
+ // SmallVector<const Expr *, 4> ReductionOps;
+ // SmallVector<CanonicalDeclPtr<const VarDecl>, 4> PrivateLocals;
+
+ printSV("PrivateVars", PrivateVars);
+ printSV("PrivateCopies", PrivateCopies);
+ printSV("FirstprivateVars", FirstprivateVars);
+ printSV("FirstprivateCopies", FirstprivateCopies);
+ printSV("FirstprivateInits", FirstprivateInits);
+ printSV("LastprivateVars", LastprivateVars);
+ printSV("LastprivateCopies", LastprivateCopies);
+ printSV("ReductionVars", ReductionVars);
+ printSV("ReductionOrigs", ReductionOrigs);
+ printSV("ReductionCopies", ReductionCopies);
+ printSV("ReductionOps", ReductionOps);
+ }
};
/// Class intended to support codegen of all kind of the reduction clauses.
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 123cfbe1b229d..bd6743666826b 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -5181,6 +5181,8 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
}
BodyGen(CGF);
};
+ LLVM_DEBUG(llvm::dbgs() << "Inputs to emitTaskOutineFunction\n");
+ LLVM_DEBUG(llvm::dbgs() << "PartID = " << **PartId << "\n");
llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, /*Tied=*/true,
Data.NumberOfParts);
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 06fc7259b5901..6092ab1684267 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -441,6 +441,7 @@ class CodeGenFunction : public CodeGenTypeCache {
llvm::AssertingVH<llvm::Instruction> PostAllocaInsertPt = nullptr;
public:
+ void printLocalDeclMap();
/// Return PostAllocaInsertPt. If it is not yet created, then insert it
/// immediately after AllocaInsertPt.
llvm::Instruction *getPostAllocaInsertPoint() {
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 035639b10e31a..03573b4e02029 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -1671,6 +1671,8 @@ class OpenMPIRBuilder {
/// The total number of pointers passed to the runtime library.
unsigned NumberOfPtrs = 0u;
+ bool EmitDebug = false;
+
explicit TargetDataInfo() {}
explicit TargetDataInfo(bool RequiresDevicePointerInfo,
bool SeparateBeginEndCalls)
@@ -1769,7 +1771,6 @@ class OpenMPIRBuilder {
void emitOffloadingArraysArgument(IRBuilderBase &Builder,
OpenMPIRBuilder::TargetDataRTArgs &RTArgs,
OpenMPIRBuilder::TargetDataInfo &Info,
- bool EmitDebug = false,
bool ForEndCall = false);
/// Emit an array of struct descriptors to be assigned to the offload args.
@@ -1789,7 +1790,7 @@ class OpenMPIRBuilder {
/// return nullptr by reference.
void emitOffloadingArrays(
InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
- TargetDataInfo &Info, GenMapInfoCallbackTy GenMapInfoCB,
+ GenMapInfoCallbackTy GenMapInfoCB, TargetDataInfo &Info,
bool IsNonContiguous = false,
function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
function_ref<Value *(unsigned int)> CustomMapperCB = nullptr);
@@ -1804,6 +1805,13 @@ class OpenMPIRBuilder {
function_ref<Value *(unsigned int)> CustomMapperCB = nullptr);
+ void emitOffloadingArraysAndArgs(
+ InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info,
+ TargetDataRTArgs &RTArgs, GenMapInfoCallbackTy GenMapInfoCB,
+ bool IsNonContiguous = false, bool ForEndCall = false,
+ function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
+ function_ref<Value *(unsigned int)> CustomMapperCB = nullptr);
+
/// Creates offloading entry for the provided entry ID \a ID, address \a
/// Addr, size \a Size, and flags \a Flags.
void createOffloadEntry(Constant *ID, Constant *Addr, uint64_t Size,
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 7c0dbc0925306..8d6e6a354a1d8 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -4923,8 +4923,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTargetData(
CustomMapperCB);
TargetDataRTArgs RTArgs;
- emitOffloadingArraysArgument(Builder, RTArgs, Info,
- !MapInfo->Names.empty());
+ emitOffloadingArraysArgument(Builder, RTArgs, Info);
// Emit the number of elements in the offloading arrays.
Value *PointerNum = Builder.getInt32(Info.NumberOfPtrs);
@@ -4977,8 +4976,8 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTargetData(
// Generate code for the closing of the data region.
auto EndThenGen = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
TargetDataRTArgs RTArgs;
- emitOffloadingArraysArgument(Builder, RTArgs, Info, !MapInfo->Names.empty(),
- /*ForEndCall=*/true);
+ Info.EmitDebug = !MapInfo->Names.empty();
+ emitOffloadingArraysArgument(Builder, RTArgs, Info, /*ForEndCall=*/true);
// Emit the number of elements in the offloading arrays.
Value *PointerNum = Builder.getInt32(Info.NumberOfPtrs);
@@ -5234,7 +5233,18 @@ static void emitTargetOutlinedFunction(
OutlinedFn, OutlinedFnID);
}
-static void emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder,
+void OpenMPIRBuilder::emitOffloadingArraysAndArgs(
+ InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info,
+ TargetDataRTArgs &RTArgs, GenMapInfoCallbackTy GenMapInfoCB,
+ bool IsNonContiguous, bool ForEndCall,
+ function_ref<void(unsigned int, Value *)> DeviceAddrCB,
+ function_ref<Value *(unsigned int)> CustomMapperCB) {
+ emitOffloadingArrays(AllocaIP, CodeGenIP, GenMapInfoCB, Info, IsNonContiguous,
+ DeviceAddrCB, CustomMapperCB);
+ emitOffloadingArraysArgument(Builder, RTArgs, Info, ForEndCall);
+ }
+
+ static void emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder,
OpenMPIRBuilder::InsertPointTy AllocaIP,
Function *OutlinedFn, Constant *OutlinedFnID,
int32_t NumTeams, int32_t NumThreads,
@@ -5245,13 +5255,11 @@ static void emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder,
/*RequiresDevicePointerInfo=*/false,
/*SeparateBeginEndCalls=*/true);
- OpenMPIRBuilder::MapInfosTy &MapInfo = GenMapInfoCB(Builder.saveIP());
- OMPBuilder.emitOffloadingArrays(AllocaIP, Builder.saveIP(), MapInfo, Info,
- /*IsNonContiguous=*/true);
-
OpenMPIRBuilder::TargetDataRTArgs RTArgs;
- OMPBuilder.emitOffloadingArraysArgument(Builder, RTArgs, Info,
- !MapInfo.Names.empty());
+ OMPBuilder.emitOffloadingArraysAndArgs(AllocaIP, Builder.saveIP(), Info,
+ RTArgs, GenMapInfoCB,
+ /*IsNonContiguous=*/true,
+ /*ForEndCall=*/false);
// emitKernelLaunch
auto &&EmitTargetCallFallbackCB =
@@ -5261,7 +5269,7 @@ static void emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder,
return Builder.saveIP();
};
- unsigned NumTargetItems = MapInfo.BasePointers.size();
+ unsigned NumTargetItems = Info.NumberOfPtrs;
// TODO: Use correct device ID
Value *DeviceID = Builder.getInt64(OMP_DEVICEID_UNDEF);
Value *NumTeamsVal = Builder.getInt32(NumTeams);
@@ -5438,7 +5446,6 @@ void OpenMPIRBuilder::emitMapperCall(const LocationDescription &Loc,
void OpenMPIRBuilder::emitOffloadingArraysArgument(IRBuilderBase &Builder,
TargetDataRTArgs &RTArgs,
TargetDataInfo &Info,
- bool EmitDebug,
bool ForEndCall) {
assert((!ForEndCall || Info.separateBeginEndCalls()) &&
"expected region end call to runtime only when end call is separate");
@@ -5478,7 +5485,7 @@ void OpenMPIRBuilder::emitOffloadingArraysArgument(IRBuilderBase &Builder,
// Only emit the mapper information arrays if debug information is
// requested.
- if (!EmitDebug)
+ if (!Info.EmitDebug)
RTArgs.MapNamesArray = ConstantPointerNull::get(VoidPtrPtrTy);
else
RTArgs.MapNamesArray = Builder.CreateConstInBoundsGEP2_32(
@@ -5563,8 +5570,9 @@ void OpenMPIRBuilder::emitNonContiguousDescriptor(InsertPointTy AllocaIP,
}
void OpenMPIRBuilder::emitOffloadingArrays(
- InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info,
- GenMapInfoCallbackTy GenMapInfoCB, bool IsNonContiguous,
+ InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
+ GenMapInfoCallbackTy GenMapInfoCB, TargetDataInfo &Info,
+ bool IsNonContiguous,
function_ref<void(unsigned int, Value *)> DeviceAddrCB,
function_ref<Value *(unsigned int)> CustomMapperCB) {
@@ -5677,9 +5685,11 @@ void OpenMPIRBuilder::emitOffloadingArrays(
auto *MapNamesArrayGbl =
createOffloadMapnames(CombinedInfo.Names, MapnamesName);
Info.RTArgs.MapNamesArray = MapNamesArrayGbl;
+ Info.EmitDebug = true;
} else {
Info.RTArgs.MapNamesArray =
Constant::getNullValue(PointerType::getUnqual(Builder.getContext()));
+ Info.EmitDebug = false;
}
// If there's a present map type modifier, it must not be applied to the end
diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
index 3ed3034f489ce..54070a1ae35f8 100644
--- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
+++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
@@ -6891,8 +6891,8 @@ TEST_F(OpenMPIRBuilderTest, EmitOffloadingArraysArguments) {
Info.RTArgs.MappersArray =
ConstantPointerNull::get(Array4VoidPtrTy->getPointerTo());
Info.NumberOfPtrs = 4;
-
- OMPBuilder.emitOffloadingArraysArgument(Builder, RTArgs, Info, false, false);
+ Info.EmitDebug = false;
+ OMPBuilder.emitOffloadingArraysArgument(Builder, RTArgs, Info, false);
EXPECT_NE(RTArgs.BasePointersArray, nullptr);
EXPECT_NE(RTArgs.PointersArray, nullptr);
>From af98fabd5685e42dade598caf3c1279ccfab7fba Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Wed, 26 Jun 2024 11:33:53 -0500
Subject: [PATCH 03/13] clean up, clean up, everybody clean up
---
clang/lib/CodeGen/CGOpenMPRuntime.cpp | 206 +-------------------------
clang/lib/CodeGen/CGOpenMPRuntime.h | 39 -----
clang/lib/CodeGen/CGStmtOpenMP.cpp | 4 -
clang/lib/CodeGen/CodeGenFunction.h | 1 -
4 files changed, 5 insertions(+), 245 deletions(-)
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index ee03183f3f5a3..b2fa50d16437e 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -38,7 +38,6 @@
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/AtomicOrdering.h"
-#include "llvm/Support/Debug.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
@@ -46,8 +45,6 @@
#include <numeric>
#include <optional>
-#define DEBUG_TYPE "clang-openmp-codegen"
-
using namespace clang;
using namespace CodeGen;
using namespace llvm::omp;
@@ -3002,10 +2999,6 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
llvm::FunctionType *TaskEntryTy =
CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
- LLVM_DEBUG(llvm::dbgs() << "TaskEntryTy=" << *TaskEntryTy << "\n");
- LLVM_DEBUG(llvm::dbgs() << "KmpTaskTWithPrivatesPtrQTy = "
- << KmpTaskTWithPrivatesPtrQTy << "\n");
- LLVM_DEBUG(llvm::dbgs() << "TaskTypeArg = " << TaskTypeArg << "\n");
std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
auto *TaskEntry = llvm::Function::Create(
TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
@@ -3710,7 +3703,6 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
TaskPrivatesMap);
- LLVM_DEBUG(llvm::dbgs() << "ProxyTaskFunction is " << *TaskEntry);
// Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
// kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
// kmp_routine_entry_t *task_entry);
@@ -8839,11 +8831,9 @@ emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
}
PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
- auto *Str = OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName,
+ return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName,
PLoc.getLine(), PLoc.getColumn(),
SrcLocStrSize);
- LLVM_DEBUG(llvm::dbgs() << "Output of emitMappingInfo: " << *Str << "\n");
- return Str;
}
/// Emit the arrays used to pass the captures and map information to the
@@ -9546,7 +9536,7 @@ static void genMapInfo(const OMPExecutableDirective &D, CodeGenFunction &CGF,
FillInfoMap);
}
}
-static void emitTargetCallKernelLaunchNew(
+static void emitTargetCallKernelLaunch(
CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
const OMPExecutableDirective &D,
llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask,
@@ -9585,9 +9575,8 @@ static void emitTargetCallKernelLaunchNew(
}
return MFunc;
};
- // Fill up the arrays and create the arguments.
- LLVM_DEBUG(llvm::dbgs() << "emitTargetCallKernelLaunchNew:InsertBlock before emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n");
-
+ // Fill up the basepointers, pointers and mapper arrays and create the
+ // arguments.
llvm::OpenMPIRBuilder::InsertPointTy OffloadingArraysAllocaIP(
CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
@@ -9596,184 +9585,6 @@ static void emitTargetCallKernelLaunchNew(
GenMapInfoCB, /*IsNonContiguous=*/true, /*ForEndCall=*/false,
DeviceAddrCB, CustomMapperCB);
- LLVM_DEBUG(llvm::dbgs() << "emitTargetCallKernelLaunchNew:InsertBlock after emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n");
- InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
- InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
- CGF.VoidPtrTy, CGM.getPointerAlign());
- InputInfo.PointersArray =
- Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
- InputInfo.SizesArray =
- Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
- InputInfo.MappersArray =
- Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
- MapTypesArray = Info.RTArgs.MapTypesArray;
- MapNamesArray = Info.RTArgs.MapNamesArray;
-
- auto &&ThenGen = [&OMPRuntime, OutlinedFn, &D, &CapturedVars,
- RequiresOuterTask, &CS, OffloadingMandatory, Device,
- OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray,
- SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
- bool IsReverseOffloading = Device.getInt() == OMPC_DEVICE_ancestor;
-
- if (IsReverseOffloading) {
- // Reverse offloading is not supported, so just execute on the host.
- // FIXME: This fallback solution is incorrect since it ignores the
- // OMP_TARGET_OFFLOAD environment variable. Instead it would be better to
- // assert here and ensure SEMA emits an error.
- emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
- RequiresOuterTask, CS, OffloadingMandatory, CGF);
- return;
- }
-
- bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>();
- unsigned NumTargetItems = InputInfo.NumberOfTargetItems;
-
- llvm::Value *BasePointersArray =
- InputInfo.BasePointersArray.emitRawPointer(CGF);
- llvm::Value *PointersArray = InputInfo.PointersArray.emitRawPointer(CGF);
- llvm::Value *SizesArray = InputInfo.SizesArray.emitRawPointer(CGF);
- llvm::Value *MappersArray = InputInfo.MappersArray.emitRawPointer(CGF);
-
- auto &&EmitTargetCallFallbackCB =
- [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
- OffloadingMandatory, &CGF](llvm::OpenMPIRBuilder::InsertPointTy IP)
- -> llvm::OpenMPIRBuilder::InsertPointTy {
- CGF.Builder.restoreIP(IP);
- emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
- RequiresOuterTask, CS, OffloadingMandatory, CGF);
- return CGF.Builder.saveIP();
- };
-
- llvm::Value *DeviceID = emitDeviceID(Device, CGF);
- llvm::Value *NumTeams = OMPRuntime->emitNumTeamsForTargetDirective(CGF, D);
- llvm::Value *NumThreads =
- OMPRuntime->emitNumThreadsForTargetDirective(CGF, D);
- llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, D.getBeginLoc());
- llvm::Value *NumIterations =
- OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter);
- llvm::Value *DynCGGroupMem = emitDynCGGroupMem(D, CGF);
- llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
- CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
-
- llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs(
- BasePointersArray, PointersArray, SizesArray, MapTypesArray,
- nullptr /* MapTypesArrayEnd */, MappersArray, MapNamesArray);
-
- llvm::OpenMPIRBuilder::TargetKernelArgs Args(
- NumTargetItems, RTArgs, NumIterations, NumTeams, NumThreads,
- DynCGGroupMem, HasNoWait);
-
- CGF.Builder.restoreIP(OMPRuntime->getOMPBuilder().emitKernelLaunch(
- CGF.Builder, OutlinedFn, OutlinedFnID, EmitTargetCallFallbackCB, Args,
- DeviceID, RTLoc, AllocaIP));
- };
-
- if (RequiresOuterTask)
- CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
- else
- OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
-}
-static void emitTargetCallKernelLaunch(
- CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
- const OMPExecutableDirective &D,
- llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask,
- const CapturedStmt &CS, bool OffloadingMandatory,
- llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
- llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo,
- llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray,
- llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
- const OMPLoopDirective &D)>
- SizeEmitter,
- CodeGenFunction &CGF, CodeGenModule &CGM) {
- llvm::OpenMPIRBuilder &OMPBuilder = OMPRuntime->getOMPBuilder();
-
- // Fill up the arrays with all the captured variables.
- MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
-// Get mappable expression information.
- MappableExprsHandler MEHandler(D, CGF);
- llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
- llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
-
- auto RI = CS.getCapturedRecordDecl()->field_begin();
- auto *CV = CapturedVars.begin();
- for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
- CE = CS.capture_end();
- CI != CE; ++CI, ++RI, ++CV) {
- MappableExprsHandler::MapCombinedInfoTy CurInfo;
- MappableExprsHandler::StructRangeInfoTy PartialStruct;
-
- // VLA sizes are passed to the outlined region by copy and do not have map
- // information associated.
- if (CI->capturesVariableArrayType()) {
- CurInfo.Exprs.push_back(nullptr);
- CurInfo.BasePointers.push_back(*CV);
- CurInfo.DevicePtrDecls.push_back(nullptr);
- CurInfo.DevicePointers.push_back(
- MappableExprsHandler::DeviceInfoTy::None);
- CurInfo.Pointers.push_back(*CV);
- CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
- CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
- // Copy to the device as an argument. No need to retrieve it.
- CurInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
- OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM |
- OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
- CurInfo.Mappers.push_back(nullptr);
- } else {
- // If we have any information in the map clause, we use it, otherwise we
- // just do a default mapping.
- MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
- if (!CI->capturesThis())
- MappedVarSet.insert(CI->getCapturedVar());
- else
- MappedVarSet.insert(nullptr);
- if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
- MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
- // Generate correct mapping for variables captured by reference in
- // lambdas.
- if (CI->capturesVariable())
- MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
- CurInfo, LambdaPointers);
- }
- // We expect to have at least an element of information for this capture.
- assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
- "Non-existing map pointer for capture!");
- assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
- CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
- CurInfo.BasePointers.size() == CurInfo.Types.size() &&
- CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
- "Inconsistent map information sizes!");
-
- // If there is an entry in PartialStruct it means we have a struct with
- // individual members mapped. Emit an extra combined entry.
- if (PartialStruct.Base.isValid()) {
- CombinedInfo.append(PartialStruct.PreliminaryMapData);
- MEHandler.emitCombinedEntry(
- CombinedInfo, CurInfo.Types, PartialStruct, CI->capturesThis(),
- OMPBuilder, nullptr,
- !PartialStruct.PreliminaryMapData.BasePointers.empty());
- }
-
- // We need to append the results of this capture to what we already have.
- CombinedInfo.append(CurInfo);
- }
- // Adjust MEMBER_OF flags for the lambdas captures.
- MEHandler.adjustMemberOfForLambdaCaptures(
- OMPBuilder, LambdaPointers, CombinedInfo.BasePointers,
- CombinedInfo.Pointers, CombinedInfo.Types);
- // Map any list items in a map clause that were not captures because they
- // weren't referenced within the construct.
- MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, MappedVarSet);
-
- CGOpenMPRuntime::TargetDataInfo Info;
- // Fill up the arrays and create the arguments.
- LLVM_DEBUG(llvm::dbgs() << "InsertBlock before emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n");
- emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
- Info.EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
- llvm::codegenoptions::NoDebugInfo;
- OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info,
- /*ForEndCall=*/false);
-
- LLVM_DEBUG(llvm::dbgs() << "InsertBlock after emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n");
InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
CGF.VoidPtrTy, CGM.getPointerAlign());
@@ -9913,16 +9724,10 @@ void CGOpenMPRuntime::emitTargetCall(
OutlinedFnID, &InputInfo, &MapTypesArray,
&MapNamesArray, SizeEmitter](CodeGenFunction &CGF,
PrePostActionTy &) {
- // if (OpenMPClangTargetCodegen)
- emitTargetCallKernelLaunchNew(this, OutlinedFn, D, CapturedVars,
+ emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars,
RequiresOuterTask, CS, OffloadingMandatory,
Device, OutlinedFnID, InputInfo, MapTypesArray,
MapNamesArray, SizeEmitter, CGF, CGM);
- // else
- // emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars,
- // RequiresOuterTask, CS, OffloadingMandatory,
- // Device, OutlinedFnID, InputInfo, MapTypesArray,
- // MapNamesArray, SizeEmitter, CGF, CGM);
};
auto &&TargetElseGen =
@@ -9942,7 +9747,6 @@ void CGOpenMPRuntime::emitTargetCall(
} else {
RegionCodeGenTy ThenRCG(TargetThenGen);
ThenRCG(CGF);
- LLVM_DEBUG(llvm::dbgs() << "Generated code after emitTargetCall:\n" << *(OutlinedFn->getParent()) << "\n");
}
} else {
RegionCodeGenTy ElseRCG(TargetElseGen);
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h
index b9303a9414b22..522ae3d35d22d 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.h
@@ -122,45 +122,6 @@ struct OMPTaskDataTy final {
bool IsReductionWithTaskMod = false;
bool IsWorksharingReduction = false;
bool HasNowaitClause = false;
- void printTo(llvm::raw_ostream &os, const ASTContext &Ctx) const {
- auto &&printSVHelper =
- [&os, &Ctx](const SmallVector<const Expr *, 4> &V) -> void {
- for (auto &v : V) {
- v->dump(os, Ctx);
- }
- };
- auto &&printSV =
- [&os, printSVHelper](std::string s,
- const SmallVector<const Expr *, 4> &V) -> void {
- os << s << ":[\n";
- printSVHelper(V);
- os << "]\n";
- };
- // SmallVector<const Expr *, 4> PrivateVars;
- // SmallVector<const Expr *, 4> PrivateCopies;
- // SmallVector<const Expr *, 4> FirstprivateVars;
- // SmallVector<const Expr *, 4> FirstprivateCopies;
- // SmallVector<const Expr *, 4> FirstprivateInits;
- // SmallVector<const Expr *, 4> LastprivateVars;
- // SmallVector<const Expr *, 4> LastprivateCopies;
- // SmallVector<const Expr *, 4> ReductionVars;
- // SmallVector<const Expr *, 4> ReductionOrigs;
- // SmallVector<const Expr *, 4> ReductionCopies;
- // SmallVector<const Expr *, 4> ReductionOps;
- // SmallVector<CanonicalDeclPtr<const VarDecl>, 4> PrivateLocals;
-
- printSV("PrivateVars", PrivateVars);
- printSV("PrivateCopies", PrivateCopies);
- printSV("FirstprivateVars", FirstprivateVars);
- printSV("FirstprivateCopies", FirstprivateCopies);
- printSV("FirstprivateInits", FirstprivateInits);
- printSV("LastprivateVars", LastprivateVars);
- printSV("LastprivateCopies", LastprivateCopies);
- printSV("ReductionVars", ReductionVars);
- printSV("ReductionOrigs", ReductionOrigs);
- printSV("ReductionCopies", ReductionCopies);
- printSV("ReductionOps", ReductionOps);
- }
};
/// Class intended to support codegen of all kind of the reduction clauses.
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index bd6743666826b..c85135978e2b3 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -35,12 +35,10 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Metadata.h"
#include "llvm/Support/AtomicOrdering.h"
-#include "llvm/Support/Debug.h"
#include <optional>
using namespace clang;
using namespace CodeGen;
using namespace llvm::omp;
-#define DEBUG_TYPE "clang-openmp-codegen"
#define TTL_CODEGEN_TYPE "target-teams-loop-codegen"
@@ -5181,8 +5179,6 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
}
BodyGen(CGF);
};
- LLVM_DEBUG(llvm::dbgs() << "Inputs to emitTaskOutineFunction\n");
- LLVM_DEBUG(llvm::dbgs() << "PartID = " << **PartId << "\n");
llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, /*Tied=*/true,
Data.NumberOfParts);
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 6092ab1684267..06fc7259b5901 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -441,7 +441,6 @@ class CodeGenFunction : public CodeGenTypeCache {
llvm::AssertingVH<llvm::Instruction> PostAllocaInsertPt = nullptr;
public:
- void printLocalDeclMap();
/// Return PostAllocaInsertPt. If it is not yet created, then insert it
/// immediately after AllocaInsertPt.
llvm::Instruction *getPostAllocaInsertPoint() {
>From c99d13fb3bd3da60d8b7362e7135cb160917d800 Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Wed, 26 Jun 2024 11:39:07 -0500
Subject: [PATCH 04/13] Add Debug.h include in CGStmtOpenMP.cpp because removal
is not related to my change
---
clang/lib/CodeGen/CGStmtOpenMP.cpp | 1 +
1 file changed, 1 insertion(+)
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index c85135978e2b3..f73d32de7c484 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -35,6 +35,7 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Metadata.h"
#include "llvm/Support/AtomicOrdering.h"
+#include "llvm/Support/Debug.h"
#include <optional>
using namespace clang;
using namespace CodeGen;
>From ae97854579cf0d966c766c211f65c647c2e9fa4a Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Wed, 26 Jun 2024 14:13:36 -0500
Subject: [PATCH 05/13] Document emitOffladingArrays and
emitOffloadingArraysAndArgs in OMPIRBuilder.h
---
.../llvm/Frontend/OpenMP/OMPIRBuilder.h | 28 +++++++++++++------
1 file changed, 20 insertions(+), 8 deletions(-)
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 03573b4e02029..38d90983c2817 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -1787,24 +1787,36 @@ class OpenMPIRBuilder {
/// Emit the arrays used to pass the captures and map information to the
/// offloading runtime library. If there is no map or capture information,
- /// return nullptr by reference.
+ /// return nullptr by reference. This is the first of two overloads - this
+ /// one accepts a reference to a MapInfosTy object that contains combined
+ /// information generated for mappable clauses, including base pointers,
+ /// pointers, sizes, map types, user-defined mappers, and non-contiguous
+ /// information.
void emitOffloadingArrays(
- InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
- GenMapInfoCallbackTy GenMapInfoCB, TargetDataInfo &Info,
- bool IsNonContiguous = false,
+ InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo,
+ TargetDataInfo &Info, bool IsNonContiguous = false,
function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
function_ref<Value *(unsigned int)> CustomMapperCB = nullptr);
/// Emit the arrays used to pass the captures and map information to the
/// offloading runtime library. If there is no map or capture information,
- /// return nullptr by reference.
+ /// return nullptr by reference. This is the second of two overloads - Instead
+ /// of accepting a reference to a MapInfosTy object, this overload accepts
+ /// a call back of type GenMapInfoCallbackTy to populate a MapInfosTy object
+ /// with mapping information.
void emitOffloadingArrays(
- InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo,
- TargetDataInfo &Info, bool IsNonContiguous = false,
+ InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
+ GenMapInfoCallbackTy GenMapInfoCB, TargetDataInfo &Info,
+ bool IsNonContiguous = false,
function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
function_ref<Value *(unsigned int)> CustomMapperCB = nullptr);
-
+ /// Allocates memory for and populates the arrays required for offloading
+ /// (offload_{baseptrs|ptrs|mappers|sizes|maptypes|mapnames}). Then, it
+ /// emits their base addresses as arguments to be passed to the runtime
+ /// library. In essence, this function is a combination of
+ /// emitOffloadingArrays and emitOffloadingArraysArgument and should arguably
+ /// be preferred by clients of OpenMPIRBuilder.
void emitOffloadingArraysAndArgs(
InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info,
TargetDataRTArgs &RTArgs, GenMapInfoCallbackTy GenMapInfoCB,
>From 88a47b0449cb332f7cc835214efcdaea2c3a8a9f Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Wed, 26 Jun 2024 15:29:44 -0500
Subject: [PATCH 06/13] refactor genMapInfo
---
clang/lib/CodeGen/CGOpenMPRuntime.cpp | 42 +++++++++++++++++++++------
1 file changed, 33 insertions(+), 9 deletions(-)
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index b2fa50d16437e..b3493324a27a1 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -9447,14 +9447,14 @@ llvm::Value *emitDynCGGroupMem(const OMPExecutableDirective &D,
}
return DynCGroupMem;
}
-static void genMapInfo(const OMPExecutableDirective &D, CodeGenFunction &CGF,
- const CapturedStmt &CS, llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
- llvm::OpenMPIRBuilder &OMPBuilder,
- MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
- // Get mappable expression information.
- MappableExprsHandler MEHandler(D, CGF);
- llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
- llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
+static void genMapInfoForCaptures(
+ MappableExprsHandler &MEHandler, CodeGenFunction &CGF,
+ const CapturedStmt &CS, llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
+ llvm::OpenMPIRBuilder &OMPBuilder,
+ llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
+ llvm::DenseSet<CanonicalDeclPtr<const Decl>> &MappedVarSet,
+ MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
+
CodeGenModule &CGM = CGF.CGM;
auto RI = CS.getCapturedRecordDecl()->field_begin();
auto *CV = CapturedVars.begin();
@@ -9522,9 +9522,18 @@ static void genMapInfo(const OMPExecutableDirective &D, CodeGenFunction &CGF,
MEHandler.adjustMemberOfForLambdaCaptures(
OMPBuilder, LambdaPointers, CombinedInfo.BasePointers,
CombinedInfo.Pointers, CombinedInfo.Types);
+}
+static void
+genMapInfo(MappableExprsHandler &MEHandler, CodeGenFunction &CGF,
+ MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
+ llvm::OpenMPIRBuilder &OMPBuilder,
+ const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkippedVarSet =
+ llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) {
+
+ CodeGenModule &CGM = CGF.CGM;
// Map any list items in a map clause that were not captures because they
// weren't referenced within the construct.
- MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, MappedVarSet);
+ MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, SkippedVarSet);
auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
return emitMappingInformation(CGF, OMPBuilder, MapExpr);
@@ -9536,6 +9545,21 @@ static void genMapInfo(const OMPExecutableDirective &D, CodeGenFunction &CGF,
FillInfoMap);
}
}
+
+static void genMapInfo(const OMPExecutableDirective &D, CodeGenFunction &CGF,
+ const CapturedStmt &CS,
+ llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
+ llvm::OpenMPIRBuilder &OMPBuilder,
+ MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
+ // Get mappable expression information.
+ MappableExprsHandler MEHandler(D, CGF);
+ llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
+ llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
+
+ genMapInfoForCaptures(MEHandler, CGF, CS, CapturedVars, OMPBuilder,
+ LambdaPointers, MappedVarSet, CombinedInfo);
+ genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder, MappedVarSet);
+}
static void emitTargetCallKernelLaunch(
CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
const OMPExecutableDirective &D,
>From e97cd161933d07a08aad52e37b506ae27be27560 Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Thu, 27 Jun 2024 13:51:34 -0500
Subject: [PATCH 07/13] Use CGOpenMPRuntime::emitTargetDataStandAloneCall
---
clang/lib/CodeGen/CGOpenMPRuntime.cpp | 44 +++++++++++++++++++++------
1 file changed, 34 insertions(+), 10 deletions(-)
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index b3493324a27a1..5372bbbbc2da1 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -10500,21 +10500,45 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
PrePostActionTy &) {
// Fill up the arrays with all the mapped variables.
MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
+ CGOpenMPRuntime::TargetDataInfo Info;
// Get map clause information.
- MappableExprsHandler MEHandler(D, CGF);
- MEHandler.generateAllInfo(CombinedInfo, OMPBuilder);
+ auto GenMapInfoCB = [&](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP)
+ -> llvm::OpenMPIRBuilder::MapInfosTy & {
+ CGF.Builder.restoreIP(CodeGenIP);
+ MappableExprsHandler MEHandler(D, CGF);
+ genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder);
+ return CombinedInfo;
+ };
- CGOpenMPRuntime::TargetDataInfo Info;
- // Fill up the arrays and create the arguments.
- emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
- /*IsNonContiguous=*/true);
+ auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
+ if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
+ Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
+ }
+ };
+
+ auto CustomMapperCB = [&](unsigned int I) {
+ llvm::Value *MFunc = nullptr;
+ if (CombinedInfo.Mappers[I]) {
+ Info.HasMapper = true;
+ MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
+ cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
+ }
+ return MFunc;
+ };
+
+ // Fill up the basepointers, pointers and mapper arrays and create the
+ // arguments.
+ using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
+ InsertPointTy OffloadingArraysAllocaIP(CGF.AllocaInsertPt->getParent(),
+ CGF.AllocaInsertPt->getIterator());
+
+ OMPBuilder.emitOffloadingArraysAndArgs(
+ OffloadingArraysAllocaIP, CGF.Builder.saveIP(), Info, Info.RTArgs,
+ GenMapInfoCB, /*IsNonContiguous=*/true, /*ForEndCall=*/false,
+ DeviceAddrCB, CustomMapperCB);
bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
D.hasClausesOfKind<OMPNowaitClause>();
- Info.EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
- llvm::codegenoptions::NoDebugInfo;
- OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info,
- /*ForEndCall=*/false);
InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
CGF.VoidPtrTy, CGM.getPointerAlign());
>From 200bd07a9384242cd7999442860bc90e2ba9b69b Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Fri, 28 Jun 2024 10:49:35 -0500
Subject: [PATCH 08/13] Use static function emitOffloadingArraysAndArgs in
emitTargetCallKernelLaunch in CGOpenMPRuntime.cpp
---
clang/lib/CodeGen/CGOpenMPRuntime.cpp | 67 ++++++++++---------
.../llvm/Frontend/OpenMP/OMPIRBuilder.h | 13 ++++
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 9 +++
3 files changed, 59 insertions(+), 30 deletions(-)
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 5372bbbbc2da1..c0e9eb3b6a07e 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -8884,6 +8884,40 @@ static void emitOffloadingArrays(
/*IsNonContiguous=*/true, DeviceAddrCB,
CustomMapperCB);
}
+/// Emit the arrays used to pass the captures and map information to the
+/// offloading runtime library. If there is no map or capture information,
+/// return nullptr by reference.
+static void emitOffloadingArraysAndArgs(
+ CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
+ CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
+ bool IsNonContiguous = false, bool ForEndCall = false) {
+ CodeGenModule &CGM = CGF.CGM;
+
+ using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
+ InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
+ CGF.AllocaInsertPt->getIterator());
+ InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
+ CGF.Builder.GetInsertPoint());
+
+ auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
+ if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
+ Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
+ }
+ };
+
+ auto CustomMapperCB = [&](unsigned int I) {
+ llvm::Value *MFunc = nullptr;
+ if (CombinedInfo.Mappers[I]) {
+ Info.HasMapper = true;
+ MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
+ cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
+ }
+ return MFunc;
+ };
+ OMPBuilder.emitOffloadingArraysAndArgs(
+ AllocaIP, CodeGenIP, Info, Info.RTArgs, CombinedInfo, IsNonContiguous,
+ ForEndCall, DeviceAddrCB, CustomMapperCB);
+}
/// Check for inner distribute directive.
static const OMPExecutableDirective *
@@ -9577,37 +9611,10 @@ static void emitTargetCallKernelLaunch(
// Fill up the arrays with all the captured variables.
MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
CGOpenMPRuntime::TargetDataInfo Info;
+ genMapInfo(D, CGF, CS, CapturedVars, OMPBuilder, CombinedInfo);
- auto GenMapInfoCB = [&](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP)
- -> llvm::OpenMPIRBuilder::MapInfosTy & {
- CGF.Builder.restoreIP(CodeGenIP);
- genMapInfo(D, CGF, CS, CapturedVars, OMPBuilder, CombinedInfo);
- return CombinedInfo;
- };
- auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
- if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
- Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
- }
- };
-
- auto CustomMapperCB = [&](unsigned int I) {
- llvm::Value *MFunc = nullptr;
- if (CombinedInfo.Mappers[I]) {
- Info.HasMapper = true;
- MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
- cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
- }
- return MFunc;
- };
- // Fill up the basepointers, pointers and mapper arrays and create the
- // arguments.
- llvm::OpenMPIRBuilder::InsertPointTy OffloadingArraysAllocaIP(
- CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
-
- OMPBuilder.emitOffloadingArraysAndArgs(
- OffloadingArraysAllocaIP, CGF.Builder.saveIP(), Info, Info.RTArgs,
- GenMapInfoCB, /*IsNonContiguous=*/true, /*ForEndCall=*/false,
- DeviceAddrCB, CustomMapperCB);
+ emitOffloadingArraysAndArgs(CGF, CombinedInfo, Info, OMPBuilder,
+ /*IsNonContiguous=*/true, /*ForEndCall=*/false);
InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 38d90983c2817..2cb3da09a97c1 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -1824,6 +1824,19 @@ class OpenMPIRBuilder {
function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
function_ref<Value *(unsigned int)> CustomMapperCB = nullptr);
+ /// Allocates memory for and populates the arrays required for offloading
+ /// (offload_{baseptrs|ptrs|mappers|sizes|maptypes|mapnames}). Then, it
+ /// emits their base addresses as arguments to be passed to the runtime
+ /// library. In essence, this function is a combination of
+ /// emitOffloadingArrays and emitOffloadingArraysArgument and should arguably
+ /// be preferred by clients of OpenMPIRBuilder.
+ void emitOffloadingArraysAndArgs(
+ InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info,
+ TargetDataRTArgs &RTArgs, MapInfosTy &CombinedInfo,
+ bool IsNonContiguous = false, bool ForEndCall = false,
+ function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
+ function_ref<Value *(unsigned int)> CustomMapperCB = nullptr);
+
/// Creates offloading entry for the provided entry ID \a ID, address \a
/// Addr, size \a Size, and flags \a Flags.
void createOffloadEntry(Constant *ID, Constant *Addr, uint64_t Size,
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 8d6e6a354a1d8..abf21da0fa7d4 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -5243,6 +5243,15 @@ void OpenMPIRBuilder::emitOffloadingArraysAndArgs(
DeviceAddrCB, CustomMapperCB);
emitOffloadingArraysArgument(Builder, RTArgs, Info, ForEndCall);
}
+ void OpenMPIRBuilder::emitOffloadingArraysAndArgs(
+ InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info,
+ TargetDataRTArgs &RTArgs, MapInfosTy &CombinedInfo, bool IsNonContiguous,
+ bool ForEndCall, function_ref<void(unsigned int, Value *)> DeviceAddrCB,
+ function_ref<Value *(unsigned int)> CustomMapperCB) {
+ emitOffloadingArrays(AllocaIP, CodeGenIP, CombinedInfo, Info,
+ IsNonContiguous, DeviceAddrCB, CustomMapperCB);
+ emitOffloadingArraysArgument(Builder, RTArgs, Info, ForEndCall);
+ }
static void emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder,
OpenMPIRBuilder::InsertPointTy AllocaIP,
>From 879cfa1cf2714a0bfa0e42152634ec841b94c3ce Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Fri, 28 Jun 2024 11:27:58 -0500
Subject: [PATCH 09/13] Use static function emitOffloadingArraysAndArgs in
emitTargetDataStandaloneCall in CGOpenMPRuntime.cpp
---
clang/lib/CodeGen/CGOpenMPRuntime.cpp | 40 ++++-----------------------
1 file changed, 5 insertions(+), 35 deletions(-)
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index c0e9eb3b6a07e..14590146ceb51 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -10508,44 +10508,14 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
// Fill up the arrays with all the mapped variables.
MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
CGOpenMPRuntime::TargetDataInfo Info;
+ MappableExprsHandler MEHandler(D, CGF);
+ genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder);
+ emitOffloadingArraysAndArgs(CGF, CombinedInfo, Info, OMPBuilder,
+ /*IsNonContiguous=*/true, /*ForEndCall=*/false);
- // Get map clause information.
- auto GenMapInfoCB = [&](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP)
- -> llvm::OpenMPIRBuilder::MapInfosTy & {
- CGF.Builder.restoreIP(CodeGenIP);
- MappableExprsHandler MEHandler(D, CGF);
- genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder);
- return CombinedInfo;
- };
-
- auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
- if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
- Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
- }
- };
-
- auto CustomMapperCB = [&](unsigned int I) {
- llvm::Value *MFunc = nullptr;
- if (CombinedInfo.Mappers[I]) {
- Info.HasMapper = true;
- MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
- cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
- }
- return MFunc;
- };
-
- // Fill up the basepointers, pointers and mapper arrays and create the
- // arguments.
- using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
- InsertPointTy OffloadingArraysAllocaIP(CGF.AllocaInsertPt->getParent(),
- CGF.AllocaInsertPt->getIterator());
-
- OMPBuilder.emitOffloadingArraysAndArgs(
- OffloadingArraysAllocaIP, CGF.Builder.saveIP(), Info, Info.RTArgs,
- GenMapInfoCB, /*IsNonContiguous=*/true, /*ForEndCall=*/false,
- DeviceAddrCB, CustomMapperCB);
bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
D.hasClausesOfKind<OMPNowaitClause>();
+
InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
CGF.VoidPtrTy, CGM.getPointerAlign());
>From 178be4f9b97226523d43f7ae9e11a438348774fc Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Fri, 28 Jun 2024 11:30:10 -0500
Subject: [PATCH 10/13] Remove emitOffloadingArrays from CGOpenMPRuntime.cpp
because it is not used anymore. Use emitOffloadingArraysAndArgs
---
clang/lib/CodeGen/CGOpenMPRuntime.cpp | 49 ---------------------------
1 file changed, 49 deletions(-)
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 14590146ceb51..9fbc06e89f017 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -8835,55 +8835,6 @@ emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
PLoc.getLine(), PLoc.getColumn(),
SrcLocStrSize);
}
-
-/// Emit the arrays used to pass the captures and map information to the
-/// offloading runtime library. If there is no map or capture information,
-/// return nullptr by reference.
-static void emitOffloadingArrays(
- CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
- CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
- bool IsNonContiguous = false) {
- CodeGenModule &CGM = CGF.CGM;
-
- // Reset the array information.
- Info.clearArrayInfo();
- Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
-
- using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
- InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
- CGF.AllocaInsertPt->getIterator());
- InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
- CGF.Builder.GetInsertPoint());
-
- auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
- return emitMappingInformation(CGF, OMPBuilder, MapExpr);
- };
- if (CGM.getCodeGenOpts().getDebugInfo() !=
- llvm::codegenoptions::NoDebugInfo) {
- CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
- llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
- FillInfoMap);
- }
-
- auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
- if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
- Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
- }
- };
-
- auto CustomMapperCB = [&](unsigned int I) {
- llvm::Value *MFunc = nullptr;
- if (CombinedInfo.Mappers[I]) {
- Info.HasMapper = true;
- MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
- cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
- }
- return MFunc;
- };
- OMPBuilder.emitOffloadingArrays(AllocaIP, CodeGenIP, CombinedInfo, Info,
- /*IsNonContiguous=*/true, DeviceAddrCB,
- CustomMapperCB);
-}
/// Emit the arrays used to pass the captures and map information to the
/// offloading runtime library. If there is no map or capture information,
/// return nullptr by reference.
>From d21f7f6f0ba9063260a08bc9d770bb0f1e0761bf Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Fri, 28 Jun 2024 12:05:02 -0500
Subject: [PATCH 11/13] Remove overloads of emitOffloadingArrays and
emitOffloadingArraysAndArgs that accept GenMapInfoCallBackTy
---
clang/lib/CodeGen/CGOpenMPRuntime.cpp | 1 -
.../llvm/Frontend/OpenMP/OMPIRBuilder.h | 34 ++-----------------
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 32 ++++-------------
3 files changed, 9 insertions(+), 58 deletions(-)
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 9fbc06e89f017..262bbd988e1e3 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -9440,7 +9440,6 @@ static void genMapInfoForCaptures(
llvm::DenseSet<CanonicalDeclPtr<const Decl>> &MappedVarSet,
MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
- CodeGenModule &CGM = CGF.CGM;
auto RI = CS.getCapturedRecordDecl()->field_begin();
auto *CV = CapturedVars.begin();
for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 2cb3da09a97c1..a0b54e25124d7 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -1787,43 +1787,15 @@ class OpenMPIRBuilder {
/// Emit the arrays used to pass the captures and map information to the
/// offloading runtime library. If there is no map or capture information,
- /// return nullptr by reference. This is the first of two overloads - this
- /// one accepts a reference to a MapInfosTy object that contains combined
- /// information generated for mappable clauses, including base pointers,
- /// pointers, sizes, map types, user-defined mappers, and non-contiguous
- /// information.
+ /// return nullptr by reference. Accepts a reference to a MapInfosTy object
+ /// that contains information generated for mappable clauses,
+ /// including base pointers, pointers, sizes, map types, user-defined mappers.
void emitOffloadingArrays(
InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo,
TargetDataInfo &Info, bool IsNonContiguous = false,
function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
function_ref<Value *(unsigned int)> CustomMapperCB = nullptr);
- /// Emit the arrays used to pass the captures and map information to the
- /// offloading runtime library. If there is no map or capture information,
- /// return nullptr by reference. This is the second of two overloads - Instead
- /// of accepting a reference to a MapInfosTy object, this overload accepts
- /// a call back of type GenMapInfoCallbackTy to populate a MapInfosTy object
- /// with mapping information.
- void emitOffloadingArrays(
- InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
- GenMapInfoCallbackTy GenMapInfoCB, TargetDataInfo &Info,
- bool IsNonContiguous = false,
- function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
- function_ref<Value *(unsigned int)> CustomMapperCB = nullptr);
-
- /// Allocates memory for and populates the arrays required for offloading
- /// (offload_{baseptrs|ptrs|mappers|sizes|maptypes|mapnames}). Then, it
- /// emits their base addresses as arguments to be passed to the runtime
- /// library. In essence, this function is a combination of
- /// emitOffloadingArrays and emitOffloadingArraysArgument and should arguably
- /// be preferred by clients of OpenMPIRBuilder.
- void emitOffloadingArraysAndArgs(
- InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info,
- TargetDataRTArgs &RTArgs, GenMapInfoCallbackTy GenMapInfoCB,
- bool IsNonContiguous = false, bool ForEndCall = false,
- function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
- function_ref<Value *(unsigned int)> CustomMapperCB = nullptr);
-
/// Allocates memory for and populates the arrays required for offloading
/// (offload_{baseptrs|ptrs|mappers|sizes|maptypes|mapnames}). Then, it
/// emits their base addresses as arguments to be passed to the runtime
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index abf21da0fa7d4..df48eb430e097 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -5235,23 +5235,13 @@ static void emitTargetOutlinedFunction(
void OpenMPIRBuilder::emitOffloadingArraysAndArgs(
InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info,
- TargetDataRTArgs &RTArgs, GenMapInfoCallbackTy GenMapInfoCB,
- bool IsNonContiguous, bool ForEndCall,
- function_ref<void(unsigned int, Value *)> DeviceAddrCB,
+ TargetDataRTArgs &RTArgs, MapInfosTy &CombinedInfo, bool IsNonContiguous,
+ bool ForEndCall, function_ref<void(unsigned int, Value *)> DeviceAddrCB,
function_ref<Value *(unsigned int)> CustomMapperCB) {
- emitOffloadingArrays(AllocaIP, CodeGenIP, GenMapInfoCB, Info, IsNonContiguous,
+ emitOffloadingArrays(AllocaIP, CodeGenIP, CombinedInfo, Info, IsNonContiguous,
DeviceAddrCB, CustomMapperCB);
emitOffloadingArraysArgument(Builder, RTArgs, Info, ForEndCall);
- }
- void OpenMPIRBuilder::emitOffloadingArraysAndArgs(
- InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info,
- TargetDataRTArgs &RTArgs, MapInfosTy &CombinedInfo, bool IsNonContiguous,
- bool ForEndCall, function_ref<void(unsigned int, Value *)> DeviceAddrCB,
- function_ref<Value *(unsigned int)> CustomMapperCB) {
- emitOffloadingArrays(AllocaIP, CodeGenIP, CombinedInfo, Info,
- IsNonContiguous, DeviceAddrCB, CustomMapperCB);
- emitOffloadingArraysArgument(Builder, RTArgs, Info, ForEndCall);
- }
+}
static void emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder,
OpenMPIRBuilder::InsertPointTy AllocaIP,
@@ -5264,9 +5254,10 @@ void OpenMPIRBuilder::emitOffloadingArraysAndArgs(
/*RequiresDevicePointerInfo=*/false,
/*SeparateBeginEndCalls=*/true);
+ OpenMPIRBuilder::MapInfosTy &MapInfo = GenMapInfoCB(Builder.saveIP());
OpenMPIRBuilder::TargetDataRTArgs RTArgs;
OMPBuilder.emitOffloadingArraysAndArgs(AllocaIP, Builder.saveIP(), Info,
- RTArgs, GenMapInfoCB,
+ RTArgs, MapInfo,
/*IsNonContiguous=*/true,
/*ForEndCall=*/false);
@@ -5578,17 +5569,6 @@ void OpenMPIRBuilder::emitNonContiguousDescriptor(InsertPointTy AllocaIP,
}
}
-void OpenMPIRBuilder::emitOffloadingArrays(
- InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
- GenMapInfoCallbackTy GenMapInfoCB, TargetDataInfo &Info,
- bool IsNonContiguous,
- function_ref<void(unsigned int, Value *)> DeviceAddrCB,
- function_ref<Value *(unsigned int)> CustomMapperCB) {
-
- OpenMPIRBuilder::MapInfosTy &MapInfo = GenMapInfoCB(CodeGenIP);
- emitOffloadingArrays(AllocaIP, CodeGenIP, MapInfo,
- Info, IsNonContiguous, DeviceAddrCB, CustomMapperCB);
-}
void OpenMPIRBuilder::emitOffloadingArrays(
InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo,
TargetDataInfo &Info, bool IsNonContiguous,
>From bff9d7b276df201baa48b4739dabfa9329c71dd0 Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Fri, 28 Jun 2024 12:16:41 -0500
Subject: [PATCH 12/13] Undo an unnecessary change in the location of the
declaration of GenMapInfoCallBackTy in OMPIRBuilder.h
---
llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h | 11 +++++------
1 file changed, 5 insertions(+), 6 deletions(-)
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index a0b54e25124d7..db748611ac501 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -1779,12 +1779,6 @@ class OpenMPIRBuilder {
MapInfosTy &CombinedInfo,
TargetDataInfo &Info);
- /// Callback type for creating the map infos for the kernel parameters.
- /// \param CodeGenIP is the insertion point where code should be generated,
- /// if any.
- using GenMapInfoCallbackTy =
- function_ref<MapInfosTy &(InsertPointTy CodeGenIP)>;
-
/// Emit the arrays used to pass the captures and map information to the
/// offloading runtime library. If there is no map or capture information,
/// return nullptr by reference. Accepts a reference to a MapInfosTy object
@@ -2212,6 +2206,11 @@ class OpenMPIRBuilder {
/// duplicating the body code.
enum BodyGenTy { Priv, DupNoPriv, NoPriv };
+ /// Callback type for creating the map infos for the kernel parameters.
+ /// \param CodeGenIP is the insertion point where code should be generated,
+ /// if any.
+ using GenMapInfoCallbackTy =
+ function_ref<MapInfosTy &(InsertPointTy CodeGenIP)>;
/// Generator for '#omp target data'
///
>From 9d5c42b905b308b84c54dea81518bc84aa1d79b4 Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Mon, 22 Jul 2024 13:23:00 -0500
Subject: [PATCH 13/13] Address review comments
---
clang/lib/CodeGen/CGOpenMPRuntime.cpp | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 262bbd988e1e3..4ab9195bded2a 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -9436,10 +9436,10 @@ static void genMapInfoForCaptures(
MappableExprsHandler &MEHandler, CodeGenFunction &CGF,
const CapturedStmt &CS, llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
llvm::OpenMPIRBuilder &OMPBuilder,
- llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
llvm::DenseSet<CanonicalDeclPtr<const Decl>> &MappedVarSet,
MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
+ llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
auto RI = CS.getCapturedRecordDecl()->field_begin();
auto *CV = CapturedVars.begin();
for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
@@ -9537,11 +9537,10 @@ static void genMapInfo(const OMPExecutableDirective &D, CodeGenFunction &CGF,
MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
// Get mappable expression information.
MappableExprsHandler MEHandler(D, CGF);
- llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
genMapInfoForCaptures(MEHandler, CGF, CS, CapturedVars, OMPBuilder,
- LambdaPointers, MappedVarSet, CombinedInfo);
+ MappedVarSet, CombinedInfo);
genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder, MappedVarSet);
}
static void emitTargetCallKernelLaunch(
More information about the cfe-commits
mailing list