[llvm-branch-commits] [clang] [llvm] [mlir] [OMPIRBuilder] Introduce struct to hold default kernel teams/threads (PR #116050)
Sergio Afonso via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed Jan 8 08:36:06 PST 2025
https://github.com/skatrak updated https://github.com/llvm/llvm-project/pull/116050
>From f73a439832c4e8454274b7677570d190231dcf46 Mon Sep 17 00:00:00 2001
From: Sergio Afonso <safonsof at amd.com>
Date: Fri, 8 Nov 2024 15:46:48 +0000
Subject: [PATCH 1/2] [OMPIRBuilder] Introduce struct to hold default kernel
teams/threads
This patch introduces the `OpenMPIRBuilder::TargetKernelDefaultAttrs` structure
used to simplify passing default and constant values for number of teams and
threads, and possibly other target kernel-related information in the future.
This is used to forward values passed to `createTarget` to `createTargetInit`,
which previously used a default unrelated set of values.
---
clang/lib/CodeGen/CGOpenMPRuntime.cpp | 13 ++--
clang/lib/CodeGen/CGOpenMPRuntime.h | 9 +--
clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp | 9 +--
.../llvm/Frontend/OpenMP/OMPIRBuilder.h | 39 ++++++----
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 71 +++++++++++--------
.../Frontend/OpenMPIRBuilderTest.cpp | 29 ++++----
.../OpenMP/OpenMPToLLVMIRTranslation.cpp | 11 +--
.../LLVMIR/omptarget-region-device-llvm.mlir | 2 +-
8 files changed, 102 insertions(+), 81 deletions(-)
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 30c3834de139c3..1cb3bab454c26a 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -5881,10 +5881,13 @@ void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
void CGOpenMPRuntime::computeMinAndMaxThreadsAndTeams(
const OMPExecutableDirective &D, CodeGenFunction &CGF,
- int32_t &MinThreadsVal, int32_t &MaxThreadsVal, int32_t &MinTeamsVal,
- int32_t &MaxTeamsVal) {
+ llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs) {
+ assert(Attrs.MaxTeams.size() == 1 && Attrs.MaxThreads.size() == 1 &&
+ "invalid default attrs structure");
+ int32_t &MaxTeamsVal = Attrs.MaxTeams.front();
+ int32_t &MaxThreadsVal = Attrs.MaxThreads.front();
- getNumTeamsExprForTargetDirective(CGF, D, MinTeamsVal, MaxTeamsVal);
+ getNumTeamsExprForTargetDirective(CGF, D, Attrs.MinTeams, MaxTeamsVal);
getNumThreadsExprForTargetDirective(CGF, D, MaxThreadsVal,
/*UpperBoundOnly=*/true);
@@ -5902,12 +5905,12 @@ void CGOpenMPRuntime::computeMinAndMaxThreadsAndTeams(
else
continue;
- MinThreadsVal = std::max(MinThreadsVal, AttrMinThreadsVal);
+ Attrs.MinThreads = std::max(Attrs.MinThreads, AttrMinThreadsVal);
if (AttrMaxThreadsVal > 0)
MaxThreadsVal = MaxThreadsVal > 0
? std::min(MaxThreadsVal, AttrMaxThreadsVal)
: AttrMaxThreadsVal;
- MinTeamsVal = std::max(MinTeamsVal, AttrMinBlocksVal);
+ Attrs.MinTeams = std::max(Attrs.MinTeams, AttrMinBlocksVal);
if (AttrMaxBlocksVal > 0)
MaxTeamsVal = MaxTeamsVal > 0 ? std::min(MaxTeamsVal, AttrMaxBlocksVal)
: AttrMaxBlocksVal;
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h
index 8ab5ee70a19fa2..3791bb71592350 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.h
@@ -313,12 +313,9 @@ class CGOpenMPRuntime {
llvm::OpenMPIRBuilder OMPBuilder;
/// Helper to determine the min/max number of threads/teams for \p D.
- void computeMinAndMaxThreadsAndTeams(const OMPExecutableDirective &D,
- CodeGenFunction &CGF,
- int32_t &MinThreadsVal,
- int32_t &MaxThreadsVal,
- int32_t &MinTeamsVal,
- int32_t &MaxTeamsVal);
+ void computeMinAndMaxThreadsAndTeams(
+ const OMPExecutableDirective &D, CodeGenFunction &CGF,
+ llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs);
/// Helper to emit outlined function for 'target' directive.
/// \param D Directive to emit.
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
index 756f0482b8ea72..659783a813c83e 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -744,14 +744,11 @@ void CGOpenMPRuntimeGPU::emitNonSPMDKernel(const OMPExecutableDirective &D,
void CGOpenMPRuntimeGPU::emitKernelInit(const OMPExecutableDirective &D,
CodeGenFunction &CGF,
EntryFunctionState &EST, bool IsSPMD) {
- int32_t MinThreadsVal = 1, MaxThreadsVal = -1, MinTeamsVal = 1,
- MaxTeamsVal = -1;
- computeMinAndMaxThreadsAndTeams(D, CGF, MinThreadsVal, MaxThreadsVal,
- MinTeamsVal, MaxTeamsVal);
+ llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs Attrs;
+ computeMinAndMaxThreadsAndTeams(D, CGF, Attrs);
CGBuilderTy &Bld = CGF.Builder;
- Bld.restoreIP(OMPBuilder.createTargetInit(
- Bld, IsSPMD, MinThreadsVal, MaxThreadsVal, MinTeamsVal, MaxTeamsVal));
+ Bld.restoreIP(OMPBuilder.createTargetInit(Bld, IsSPMD, Attrs));
if (!IsSPMD)
emitGenericVarsProlog(CGF, EST.Loc);
}
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 4ce47b1c05d9b0..2f28d2258176a9 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -2225,6 +2225,20 @@ class OpenMPIRBuilder {
MapNamesArray(MapNamesArray) {}
};
+ /// Container to pass the default attributes with which a kernel must be
+ /// launched, used to set kernel attributes and populate associated static
+ /// structures.
+ ///
+ /// For max values, < 0 means unset, == 0 means set but unknown at compile
+ /// time. The number of max values will be 1 except for the case where
+ /// ompx_bare is set.
+ struct TargetKernelDefaultAttrs {
+ SmallVector<int32_t, 3> MaxTeams = {-1};
+ int32_t MinTeams = 1;
+ SmallVector<int32_t, 3> MaxThreads = {-1};
+ int32_t MinThreads = 1;
+ };
+
/// Data structure that contains the needed information to construct the
/// kernel args vector.
struct TargetKernelArgs {
@@ -2728,15 +2742,11 @@ class OpenMPIRBuilder {
///
/// \param Loc The insert and source location description.
/// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not.
- /// \param MinThreads Minimal number of threads, or 0.
- /// \param MaxThreads Maximal number of threads, or 0.
- /// \param MinTeams Minimal number of teams, or 0.
- /// \param MaxTeams Maximal number of teams, or 0.
- InsertPointTy createTargetInit(const LocationDescription &Loc, bool IsSPMD,
- int32_t MinThreadsVal = 0,
- int32_t MaxThreadsVal = 0,
- int32_t MinTeamsVal = 0,
- int32_t MaxTeamsVal = 0);
+ /// \param Attrs Structure containing the default numbers of threads and teams
+ /// to launch the kernel with.
+ InsertPointTy createTargetInit(
+ const LocationDescription &Loc, bool IsSPMD,
+ const llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs);
/// Create a runtime call for kmpc_target_deinit
///
@@ -2961,8 +2971,8 @@ class OpenMPIRBuilder {
/// \param CodeGenIP The insertion point where the call to the outlined
/// function should be emitted.
/// \param EntryInfo The entry information about the function.
- /// \param NumTeams Number of teams specified in the num_teams clause.
- /// \param NumThreads Number of teams specified in the thread_limit clause.
+ /// \param DefaultAttrs Structure containing the default numbers of threads
+ /// and teams to launch the kernel with.
/// \param Inputs The input values to the region that will be passed.
/// as arguments to the outlined function.
/// \param BodyGenCB Callback that will generate the region code.
@@ -2975,9 +2985,10 @@ class OpenMPIRBuilder {
const LocationDescription &Loc, bool IsOffloadEntry,
OpenMPIRBuilder::InsertPointTy AllocaIP,
OpenMPIRBuilder::InsertPointTy CodeGenIP,
- TargetRegionEntryInfo &EntryInfo, ArrayRef<int32_t> NumTeams,
- ArrayRef<int32_t> NumThreads, SmallVectorImpl<Value *> &Inputs,
- GenMapInfoCallbackTy GenMapInfoCB, TargetBodyGenCallbackTy BodyGenCB,
+ TargetRegionEntryInfo &EntryInfo,
+ const TargetKernelDefaultAttrs &DefaultAttrs,
+ SmallVectorImpl<Value *> &Inputs, GenMapInfoCallbackTy GenMapInfoCB,
+ TargetBodyGenCallbackTy BodyGenCB,
TargetGenArgAccessorsCallbackTy ArgAccessorFuncCB,
SmallVector<DependData> Dependencies = {}, bool HasNowait = false);
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 8dbf2aa7e0a243..73ac0f298ce404 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -6128,10 +6128,12 @@ CallInst *OpenMPIRBuilder::createCachedThreadPrivate(
return Builder.CreateCall(Fn, Args);
}
-OpenMPIRBuilder::InsertPointTy
-OpenMPIRBuilder::createTargetInit(const LocationDescription &Loc, bool IsSPMD,
- int32_t MinThreadsVal, int32_t MaxThreadsVal,
- int32_t MinTeamsVal, int32_t MaxTeamsVal) {
+OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTargetInit(
+ const LocationDescription &Loc, bool IsSPMD,
+ const llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs) {
+ assert(!Attrs.MaxThreads.empty() && !Attrs.MaxTeams.empty() &&
+ "expected num_threads and num_teams to be specified");
+
if (!updateToLocation(Loc))
return Loc.IP;
@@ -6158,21 +6160,23 @@ OpenMPIRBuilder::createTargetInit(const LocationDescription &Loc, bool IsSPMD,
// Manifest the launch configuration in the metadata matching the kernel
// environment.
- if (MinTeamsVal > 1 || MaxTeamsVal > 0)
- writeTeamsForKernel(T, *Kernel, MinTeamsVal, MaxTeamsVal);
+ if (Attrs.MinTeams > 1 || Attrs.MaxTeams.front() > 0)
+ writeTeamsForKernel(T, *Kernel, Attrs.MinTeams, Attrs.MaxTeams.front());
- // For max values, < 0 means unset, == 0 means set but unknown.
+ // If MaxThreads not set, select the maximum between the default workgroup
+ // size and the MinThreads value.
+ int32_t MaxThreadsVal = Attrs.MaxThreads.front();
if (MaxThreadsVal < 0)
MaxThreadsVal = std::max(
- int32_t(getGridValue(T, Kernel).GV_Default_WG_Size), MinThreadsVal);
+ int32_t(getGridValue(T, Kernel).GV_Default_WG_Size), Attrs.MinThreads);
if (MaxThreadsVal > 0)
- writeThreadBoundsForKernel(T, *Kernel, MinThreadsVal, MaxThreadsVal);
+ writeThreadBoundsForKernel(T, *Kernel, Attrs.MinThreads, MaxThreadsVal);
- Constant *MinThreads = ConstantInt::getSigned(Int32, MinThreadsVal);
+ Constant *MinThreads = ConstantInt::getSigned(Int32, Attrs.MinThreads);
Constant *MaxThreads = ConstantInt::getSigned(Int32, MaxThreadsVal);
- Constant *MinTeams = ConstantInt::getSigned(Int32, MinTeamsVal);
- Constant *MaxTeams = ConstantInt::getSigned(Int32, MaxTeamsVal);
+ Constant *MinTeams = ConstantInt::getSigned(Int32, Attrs.MinTeams);
+ Constant *MaxTeams = ConstantInt::getSigned(Int32, Attrs.MaxTeams.front());
Constant *ReductionDataSize = ConstantInt::getSigned(Int32, 0);
Constant *ReductionBufferLength = ConstantInt::getSigned(Int32, 0);
@@ -6743,8 +6747,9 @@ FunctionCallee OpenMPIRBuilder::createDispatchDeinitFunction() {
}
static Expected<Function *> createOutlinedFunction(
- OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, StringRef FuncName,
- SmallVectorImpl<Value *> &Inputs,
+ OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder,
+ const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs,
+ StringRef FuncName, SmallVectorImpl<Value *> &Inputs,
OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc,
OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB) {
SmallVector<Type *> ParameterTypes;
@@ -6811,7 +6816,8 @@ static Expected<Function *> createOutlinedFunction(
// Insert target init call in the device compilation pass.
if (OMPBuilder.Config.isTargetDevice())
- Builder.restoreIP(OMPBuilder.createTargetInit(Builder, /*IsSPMD*/ false));
+ Builder.restoreIP(
+ OMPBuilder.createTargetInit(Builder, /*IsSPMD=*/false, DefaultAttrs));
BasicBlock *UserCodeEntryBB = Builder.GetInsertBlock();
@@ -7010,16 +7016,18 @@ static Function *emitTargetTaskProxyFunction(OpenMPIRBuilder &OMPBuilder,
static Error emitTargetOutlinedFunction(
OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, bool IsOffloadEntry,
- TargetRegionEntryInfo &EntryInfo, Function *&OutlinedFn,
- Constant *&OutlinedFnID, SmallVectorImpl<Value *> &Inputs,
+ TargetRegionEntryInfo &EntryInfo,
+ const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs,
+ Function *&OutlinedFn, Constant *&OutlinedFnID,
+ SmallVectorImpl<Value *> &Inputs,
OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc,
OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB) {
OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction =
- [&OMPBuilder, &Builder, &Inputs, &CBFunc,
- &ArgAccessorFuncCB](StringRef EntryFnName) {
- return createOutlinedFunction(OMPBuilder, Builder, EntryFnName, Inputs,
- CBFunc, ArgAccessorFuncCB);
+ [&](StringRef EntryFnName) {
+ return createOutlinedFunction(OMPBuilder, Builder, DefaultAttrs,
+ EntryFnName, Inputs, CBFunc,
+ ArgAccessorFuncCB);
};
return OMPBuilder.emitTargetRegionFunction(
@@ -7315,9 +7323,10 @@ void OpenMPIRBuilder::emitOffloadingArraysAndArgs(
static void
emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder,
- OpenMPIRBuilder::InsertPointTy AllocaIP, Function *OutlinedFn,
- Constant *OutlinedFnID, ArrayRef<int32_t> NumTeams,
- ArrayRef<int32_t> NumThreads, SmallVectorImpl<Value *> &Args,
+ OpenMPIRBuilder::InsertPointTy AllocaIP,
+ const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs,
+ Function *OutlinedFn, Constant *OutlinedFnID,
+ SmallVectorImpl<Value *> &Args,
OpenMPIRBuilder::GenMapInfoCallbackTy GenMapInfoCB,
SmallVector<llvm::OpenMPIRBuilder::DependData> Dependencies = {},
bool HasNoWait = false) {
@@ -7398,9 +7407,9 @@ emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder,
SmallVector<Value *, 3> NumTeamsC;
SmallVector<Value *, 3> NumThreadsC;
- for (auto V : NumTeams)
+ for (auto V : DefaultAttrs.MaxTeams)
NumTeamsC.push_back(llvm::ConstantInt::get(Builder.getInt32Ty(), V));
- for (auto V : NumThreads)
+ for (auto V : DefaultAttrs.MaxThreads)
NumThreadsC.push_back(llvm::ConstantInt::get(Builder.getInt32Ty(), V));
unsigned NumTargetItems = Info.NumberOfPtrs;
@@ -7441,7 +7450,7 @@ emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder,
OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTarget(
const LocationDescription &Loc, bool IsOffloadEntry, InsertPointTy AllocaIP,
InsertPointTy CodeGenIP, TargetRegionEntryInfo &EntryInfo,
- ArrayRef<int32_t> NumTeams, ArrayRef<int32_t> NumThreads,
+ const TargetKernelDefaultAttrs &DefaultAttrs,
SmallVectorImpl<Value *> &Args, GenMapInfoCallbackTy GenMapInfoCB,
OpenMPIRBuilder::TargetBodyGenCallbackTy CBFunc,
OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy ArgAccessorFuncCB,
@@ -7458,16 +7467,16 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTarget(
// the target region itself is generated using the callbacks CBFunc
// and ArgAccessorFuncCB
if (Error Err = emitTargetOutlinedFunction(
- *this, Builder, IsOffloadEntry, EntryInfo, OutlinedFn, OutlinedFnID,
- Args, CBFunc, ArgAccessorFuncCB))
+ *this, Builder, IsOffloadEntry, EntryInfo, DefaultAttrs, OutlinedFn,
+ OutlinedFnID, Args, CBFunc, ArgAccessorFuncCB))
return Err;
// If we are not on the target device, then we need to generate code
// to make a remote call (offload) to the previously outlined function
// that represents the target region. Do that now.
if (!Config.isTargetDevice())
- emitTargetCall(*this, Builder, AllocaIP, OutlinedFn, OutlinedFnID, NumTeams,
- NumThreads, Args, GenMapInfoCB, Dependencies, HasNowait);
+ emitTargetCall(*this, Builder, AllocaIP, DefaultAttrs, OutlinedFn,
+ OutlinedFnID, Args, GenMapInfoCB, Dependencies, HasNowait);
return Builder.saveIP();
}
diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
index 9faae88b8dbc78..a833f8e83369cc 100644
--- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
+++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
@@ -6182,9 +6182,12 @@ TEST_F(OpenMPIRBuilderTest, TargetRegion) {
TargetRegionEntryInfo EntryInfo("func", 42, 4711, 17);
OpenMPIRBuilder::LocationDescription OmpLoc({Builder.saveIP(), DL});
- OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createTarget(
- OmpLoc, /*IsOffloadEntry=*/true, Builder.saveIP(), Builder.saveIP(),
- EntryInfo, -1, 0, Inputs, GenMapInfoCB, BodyGenCB, SimpleArgAccessorCB);
+ OpenMPIRBuilder::TargetKernelDefaultAttrs DefaultAttrs = {
+ /*MaxTeams=*/{-1}, /*MinTeams=*/0, /*MaxThreads=*/{0}, /*MinThreads=*/0};
+ OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
+ OMPBuilder.createTarget(OmpLoc, /*IsOffloadEntry=*/true, Builder.saveIP(),
+ Builder.saveIP(), EntryInfo, DefaultAttrs, Inputs,
+ GenMapInfoCB, BodyGenCB, SimpleArgAccessorCB);
assert(AfterIP && "unexpected error");
Builder.restoreIP(*AfterIP);
OMPBuilder.finalize();
@@ -6292,11 +6295,11 @@ TEST_F(OpenMPIRBuilderTest, TargetRegionDevice) {
TargetRegionEntryInfo EntryInfo("parent", /*DeviceID=*/1, /*FileID=*/2,
/*Line=*/3, /*Count=*/0);
- OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
- OMPBuilder.createTarget(Loc, /*IsOffloadEntry=*/true, EntryIP, EntryIP,
- EntryInfo, /*NumTeams=*/-1,
- /*NumThreads=*/0, CapturedArgs, GenMapInfoCB,
- BodyGenCB, SimpleArgAccessorCB);
+ OpenMPIRBuilder::TargetKernelDefaultAttrs DefaultAttrs = {
+ /*MaxTeams=*/{-1}, /*MinTeams=*/0, /*MaxThreads=*/{0}, /*MinThreads=*/0};
+ OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createTarget(
+ Loc, /*IsOffloadEntry=*/true, EntryIP, EntryIP, EntryInfo, DefaultAttrs,
+ CapturedArgs, GenMapInfoCB, BodyGenCB, SimpleArgAccessorCB);
assert(AfterIP && "unexpected error");
Builder.restoreIP(*AfterIP);
@@ -6449,11 +6452,11 @@ TEST_F(OpenMPIRBuilderTest, ConstantAllocaRaise) {
TargetRegionEntryInfo EntryInfo("parent", /*DeviceID=*/1, /*FileID=*/2,
/*Line=*/3, /*Count=*/0);
- OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
- OMPBuilder.createTarget(Loc, /*IsOffloadEntry=*/true, EntryIP, EntryIP,
- EntryInfo, /*NumTeams=*/-1,
- /*NumThreads=*/0, CapturedArgs, GenMapInfoCB,
- BodyGenCB, SimpleArgAccessorCB);
+ OpenMPIRBuilder::TargetKernelDefaultAttrs DefaultAttrs = {
+ /*MaxTeams=*/{-1}, /*MinTeams=*/0, /*MaxThreads=*/{0}, /*MinThreads=*/0};
+ OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createTarget(
+ Loc, /*IsOffloadEntry=*/true, EntryIP, EntryIP, EntryInfo, DefaultAttrs,
+ CapturedArgs, GenMapInfoCB, BodyGenCB, SimpleArgAccessorCB);
assert(AfterIP && "unexpected error");
Builder.restoreIP(*AfterIP);
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index db2a2343fc2f62..98d0df9f4d68f6 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -4054,9 +4054,6 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
if (!getTargetEntryUniqueInfo(entryInfo, targetOp, parentName))
return failure();
- int32_t defaultValTeams = -1;
- int32_t defaultValThreads = 0;
-
MapInfoData mapData;
collectMapDataFromMapOperands(mapData, mapVars, moduleTranslation, dl,
builder);
@@ -4088,6 +4085,10 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
allocaIP, codeGenIP);
};
+ // TODO: Populate default attributes based on the construct and clauses.
+ llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs defaultAttrs = {
+ /*MaxTeams=*/{-1}, /*MinTeams=*/0, /*MaxThreads=*/{0}, /*MinThreads=*/0};
+
llvm::SmallVector<llvm::Value *, 4> kernelInput;
for (size_t i = 0; i < mapVars.size(); ++i) {
// declare target arguments are not passed to kernels as arguments
@@ -4111,8 +4112,8 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
moduleTranslation.getOpenMPBuilder()->createTarget(
ompLoc, isOffloadEntry, allocaIP, builder.saveIP(), entryInfo,
- defaultValTeams, defaultValThreads, kernelInput, genMapInfoCB, bodyCB,
- argAccessorCB, dds, targetOp.getNowait());
+ defaultAttrs, kernelInput, genMapInfoCB, bodyCB, argAccessorCB, dds,
+ targetOp.getNowait());
if (failed(handleError(afterIP, opInst)))
return failure();
diff --git a/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir
index 8993c0e85c5dea..fa32a3030108d8 100644
--- a/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir
@@ -29,7 +29,7 @@ module attributes {omp.is_target_device = true} {
// CHECK: @[[SRC_LOC:.*]] = private unnamed_addr constant [23 x i8] c"{{[^"]*}}", align 1
// CHECK: @[[IDENT:.*]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @[[SRC_LOC]] }, align 8
// CHECK: @[[DYNA_ENV:.*]] = weak_odr protected global %struct.DynamicEnvironmentTy zeroinitializer
-// CHECK: @[[KERNEL_ENV:.*]] = weak_odr protected constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 1, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[IDENT]], ptr @[[DYNA_ENV]] }
+// CHECK: @[[KERNEL_ENV:.*]] = weak_odr protected constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 1, i8 1, i32 0, i32 0, i32 0, i32 -1, i32 0, i32 0 }, ptr @[[IDENT]], ptr @[[DYNA_ENV]] }
// CHECK: define weak_odr protected void @__omp_offloading_{{[^_]+}}_{{[^_]+}}_omp_target_region__l{{[0-9]+}}(ptr %[[DYN_PTR:.*]], ptr %[[ADDR_A:.*]], ptr %[[ADDR_B:.*]], ptr %[[ADDR_C:.*]])
// CHECK: %[[TMP_A:.*]] = alloca ptr, align 8
// CHECK: store ptr %[[ADDR_A]], ptr %[[TMP_A]], align 8
>From 45c6667a70aa16409d0cc24df8f88168c883c51d Mon Sep 17 00:00:00 2001
From: Sergio Afonso <safonsof at amd.com>
Date: Wed, 8 Jan 2025 16:33:18 +0000
Subject: [PATCH 2/2] Move IsSPMD into the new structure
---
clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp | 3 ++-
llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h | 8 ++++----
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 10 +++++-----
llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp | 9 ++++++---
.../Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp | 3 ++-
5 files changed, 19 insertions(+), 14 deletions(-)
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
index 659783a813c83e..654a13d75ec810 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -745,10 +745,11 @@ void CGOpenMPRuntimeGPU::emitKernelInit(const OMPExecutableDirective &D,
CodeGenFunction &CGF,
EntryFunctionState &EST, bool IsSPMD) {
llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs Attrs;
+ Attrs.IsSPMD = IsSPMD;
computeMinAndMaxThreadsAndTeams(D, CGF, Attrs);
CGBuilderTy &Bld = CGF.Builder;
- Bld.restoreIP(OMPBuilder.createTargetInit(Bld, IsSPMD, Attrs));
+ Bld.restoreIP(OMPBuilder.createTargetInit(Bld, Attrs));
if (!IsSPMD)
emitGenericVarsProlog(CGF, EST.Loc);
}
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 2f28d2258176a9..8ca3bc08b5ad49 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -2233,6 +2233,7 @@ class OpenMPIRBuilder {
/// time. The number of max values will be 1 except for the case where
/// ompx_bare is set.
struct TargetKernelDefaultAttrs {
+ bool IsSPMD = false;
SmallVector<int32_t, 3> MaxTeams = {-1};
int32_t MinTeams = 1;
SmallVector<int32_t, 3> MaxThreads = {-1};
@@ -2741,11 +2742,10 @@ class OpenMPIRBuilder {
/// Create a runtime call for kmpc_target_init
///
/// \param Loc The insert and source location description.
- /// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not.
- /// \param Attrs Structure containing the default numbers of threads and teams
- /// to launch the kernel with.
+ /// \param Attrs Structure containing the default attributes, including
+ /// numbers of threads and teams to launch the kernel with.
InsertPointTy createTargetInit(
- const LocationDescription &Loc, bool IsSPMD,
+ const LocationDescription &Loc,
const llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs);
/// Create a runtime call for kmpc_target_deinit
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 73ac0f298ce404..82c7be79cae2af 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -6129,7 +6129,7 @@ CallInst *OpenMPIRBuilder::createCachedThreadPrivate(
}
OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTargetInit(
- const LocationDescription &Loc, bool IsSPMD,
+ const LocationDescription &Loc,
const llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs) {
assert(!Attrs.MaxThreads.empty() && !Attrs.MaxTeams.empty() &&
"expected num_threads and num_teams to be specified");
@@ -6141,8 +6141,9 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTargetInit(
Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
Constant *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
Constant *IsSPMDVal = ConstantInt::getSigned(
- Int8, IsSPMD ? OMP_TGT_EXEC_MODE_SPMD : OMP_TGT_EXEC_MODE_GENERIC);
- Constant *UseGenericStateMachineVal = ConstantInt::getSigned(Int8, !IsSPMD);
+ Int8, Attrs.IsSPMD ? OMP_TGT_EXEC_MODE_SPMD : OMP_TGT_EXEC_MODE_GENERIC);
+ Constant *UseGenericStateMachineVal =
+ ConstantInt::getSigned(Int8, !Attrs.IsSPMD);
Constant *MayUseNestedParallelismVal = ConstantInt::getSigned(Int8, true);
Constant *DebugIndentionLevelVal = ConstantInt::getSigned(Int16, 0);
@@ -6816,8 +6817,7 @@ static Expected<Function *> createOutlinedFunction(
// Insert target init call in the device compilation pass.
if (OMPBuilder.Config.isTargetDevice())
- Builder.restoreIP(
- OMPBuilder.createTargetInit(Builder, /*IsSPMD=*/false, DefaultAttrs));
+ Builder.restoreIP(OMPBuilder.createTargetInit(Builder, DefaultAttrs));
BasicBlock *UserCodeEntryBB = Builder.GetInsertBlock();
diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
index a833f8e83369cc..670841aadafc2d 100644
--- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
+++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
@@ -6183,7 +6183,8 @@ TEST_F(OpenMPIRBuilderTest, TargetRegion) {
TargetRegionEntryInfo EntryInfo("func", 42, 4711, 17);
OpenMPIRBuilder::LocationDescription OmpLoc({Builder.saveIP(), DL});
OpenMPIRBuilder::TargetKernelDefaultAttrs DefaultAttrs = {
- /*MaxTeams=*/{-1}, /*MinTeams=*/0, /*MaxThreads=*/{0}, /*MinThreads=*/0};
+ /*IsSPMD=*/false, /*MaxTeams=*/{-1}, /*MinTeams=*/0, /*MaxThreads=*/{0},
+ /*MinThreads=*/0};
OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
OMPBuilder.createTarget(OmpLoc, /*IsOffloadEntry=*/true, Builder.saveIP(),
Builder.saveIP(), EntryInfo, DefaultAttrs, Inputs,
@@ -6296,7 +6297,8 @@ TEST_F(OpenMPIRBuilderTest, TargetRegionDevice) {
/*Line=*/3, /*Count=*/0);
OpenMPIRBuilder::TargetKernelDefaultAttrs DefaultAttrs = {
- /*MaxTeams=*/{-1}, /*MinTeams=*/0, /*MaxThreads=*/{0}, /*MinThreads=*/0};
+ /*IsSPMD=*/false, /*MaxTeams=*/{-1}, /*MinTeams=*/0, /*MaxThreads=*/{0},
+ /*MinThreads=*/0};
OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createTarget(
Loc, /*IsOffloadEntry=*/true, EntryIP, EntryIP, EntryInfo, DefaultAttrs,
CapturedArgs, GenMapInfoCB, BodyGenCB, SimpleArgAccessorCB);
@@ -6453,7 +6455,8 @@ TEST_F(OpenMPIRBuilderTest, ConstantAllocaRaise) {
/*Line=*/3, /*Count=*/0);
OpenMPIRBuilder::TargetKernelDefaultAttrs DefaultAttrs = {
- /*MaxTeams=*/{-1}, /*MinTeams=*/0, /*MaxThreads=*/{0}, /*MinThreads=*/0};
+ /*IsSPMD=*/false, /*MaxTeams=*/{-1}, /*MinTeams=*/0, /*MaxThreads=*/{0},
+ /*MinThreads=*/0};
OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createTarget(
Loc, /*IsOffloadEntry=*/true, EntryIP, EntryIP, EntryInfo, DefaultAttrs,
CapturedArgs, GenMapInfoCB, BodyGenCB, SimpleArgAccessorCB);
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 98d0df9f4d68f6..b2faefc6199485 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -4087,7 +4087,8 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
// TODO: Populate default attributes based on the construct and clauses.
llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs defaultAttrs = {
- /*MaxTeams=*/{-1}, /*MinTeams=*/0, /*MaxThreads=*/{0}, /*MinThreads=*/0};
+ /*IsSPMD=*/false, /*MaxTeams=*/{-1}, /*MinTeams=*/0, /*MaxThreads=*/{0},
+ /*MinThreads=*/0};
llvm::SmallVector<llvm::Value *, 4> kernelInput;
for (size_t i = 0; i < mapVars.size(); ++i) {
More information about the llvm-branch-commits
mailing list