[llvm-branch-commits] [flang] [llvm] [mlir] [MLIR][OpenMP][OMPIRBuilder] Add lowering support for omp.target_triples (PR #100156)
Sergio Afonso via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Thu Aug 1 08:50:20 PDT 2024
https://github.com/skatrak updated https://github.com/llvm/llvm-project/pull/100156
>From 366b716e3a192265aed4a1328c49d0ffeef0166d Mon Sep 17 00:00:00 2001
From: Sergio Afonso <safonsof at amd.com>
Date: Tue, 23 Jul 2024 16:53:40 +0100
Subject: [PATCH] [MLIR][OpenMP][OMPIRBuilder] Add lowering support for
omp.target_triples
This patch modifies MLIR to LLVM IR lowering of the OpenMP dialect to take into
consideration the contents of the `omp.target_triples` module attribute while
generating code for `omp.target` operations.
It adds the `OpenMPIRBuilderConfig::TargetTriples` field and initializes it
using the `amendOperation` flow of the `OpenMPToLLVMIRTranslation` pass. Some
changes are introduced into the `OpenMPIRBuilder` to allow passing the
information about whether a target region is intended to be offloaded from
outside.
The result of this change is that offloading calls are only generated when the
`--offload-arch` or `-fopenmp-targets` options are given to the compiler.
Otherwise, only the host fallback code is generated. This fixes linker errors
currently triggered by `flang-new` if a source file containing a `target`
construct is compiled without any of the aforementioned options.
Several unit tests impacted by these changes, which are intended to check host
code generated for `omp.target` operations, are updated to contain the new
attribute. Without it, no calls to `__tgt_target_kernel` and associated control
flow operations are generated.
Fixes #100209.
---
.../OpenMP/map-types-and-sizes.f90 | 2 +-
.../llvm/Frontend/OpenMP/OMPIRBuilder.h | 21 +++--
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 91 ++++++++++++-------
.../Frontend/OpenMPIRBuilderTest.cpp | 10 +-
.../OpenMP/OpenMPToLLVMIRTranslation.cpp | 34 +++++--
.../omptarget-array-sectioning-host.mlir | 2 +-
...mptarget-byref-bycopy-generation-host.mlir | 2 +-
.../LLVMIR/omptarget-depend-host-only.mlir | 33 +++++++
mlir/test/Target/LLVMIR/omptarget-depend.mlir | 3 +
...target-fortran-allocatable-types-host.mlir | 2 +-
.../omptarget-fortran-common-block-host.mlir | 2 +-
...arget-nested-record-type-mapping-host.mlir | 2 +-
.../omptarget-record-type-mapping-host.mlir | 2 +-
.../LLVMIR/omptarget-region-host-only.mlir | 54 +++++++++++
.../Target/LLVMIR/omptarget-region-llvm.mlir | 2 +-
15 files changed, 204 insertions(+), 58 deletions(-)
create mode 100644 mlir/test/Target/LLVMIR/omptarget-depend-host-only.mlir
create mode 100644 mlir/test/Target/LLVMIR/omptarget-region-host-only.mlir
diff --git a/flang/test/Integration/OpenMP/map-types-and-sizes.f90 b/flang/test/Integration/OpenMP/map-types-and-sizes.f90
index 591be0b680a51..055fdecc91464 100644
--- a/flang/test/Integration/OpenMP/map-types-and-sizes.f90
+++ b/flang/test/Integration/OpenMP/map-types-and-sizes.f90
@@ -6,7 +6,7 @@
! added to this directory and sub-directories.
!===----------------------------------------------------------------------===!
-!RUN: %flang_fc1 -emit-llvm -fopenmp -flang-deprecated-no-hlfir %s -o - | FileCheck %s
+!RUN: %flang_fc1 -emit-llvm -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -flang-deprecated-no-hlfir %s -o - | FileCheck %s
!===============================================================================
! Check MapTypes for target implicit captures
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 1614d5716d28c..58d298e0c9752 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -115,6 +115,10 @@ class OpenMPIRBuilderConfig {
// Grid Value for the GPU target
std::optional<omp::GV> GridValue;
+ /// When compilation is being done for the OpenMP host (i.e. `IsTargetDevice =
+ /// false`), this contains the list of offloading triples associated, if any.
+ SmallVector<Triple> TargetTriples;
+
OpenMPIRBuilderConfig();
OpenMPIRBuilderConfig(bool IsTargetDevice, bool IsGPU,
bool OpenMPOffloadMandatory,
@@ -2180,21 +2184,22 @@ class OpenMPIRBuilder {
/// kernel args vector.
struct TargetKernelArgs {
/// Number of arguments passed to the runtime library.
- unsigned NumTargetItems;
+ unsigned NumTargetItems = 0;
/// Arguments passed to the runtime library
TargetDataRTArgs RTArgs;
/// The number of iterations
- Value *NumIterations;
+ Value *NumIterations = nullptr;
/// The number of teams.
- Value *NumTeams;
+ Value *NumTeams = nullptr;
/// The number of threads.
- Value *NumThreads;
+ Value *NumThreads = nullptr;
/// The size of the dynamic shared memory.
- Value *DynCGGroupMem;
+ Value *DynCGGroupMem = nullptr;
/// True if the kernel has 'no wait' clause.
- bool HasNoWait;
+ bool HasNoWait = false;
- /// Constructor for TargetKernelArgs
+ // Constructors for TargetKernelArgs.
+ TargetKernelArgs() {}
TargetKernelArgs(unsigned NumTargetItems, TargetDataRTArgs RTArgs,
Value *NumIterations, Value *NumTeams, Value *NumThreads,
Value *DynCGGroupMem, bool HasNoWait)
@@ -2831,6 +2836,7 @@ class OpenMPIRBuilder {
/// Generator for '#omp target'
///
/// \param Loc where the target data construct was encountered.
+ /// \param IsOffloadEntry whether it is an offload entry.
/// \param CodeGenIP The insertion point where the call to the outlined
/// function should be emitted.
/// \param EntryInfo The entry information about the function.
@@ -2844,6 +2850,7 @@ class OpenMPIRBuilder {
/// \param Dependencies A vector of DependData objects that carry
// dependency information as passed in the depend clause
InsertPointTy createTarget(const LocationDescription &Loc,
+ bool IsOffloadEntry,
OpenMPIRBuilder::InsertPointTy AllocaIP,
OpenMPIRBuilder::InsertPointTy CodeGenIP,
TargetRegionEntryInfo &EntryInfo, int32_t NumTeams,
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 77e350e7276ab..4650c6774a474 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -6768,7 +6768,7 @@ static Function *emitTargetTaskProxyFunction(OpenMPIRBuilder &OMPBuilder,
return ProxyFn;
}
static void emitTargetOutlinedFunction(
- OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder,
+ OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, bool IsOffloadEntry,
TargetRegionEntryInfo &EntryInfo, Function *&OutlinedFn,
Constant *&OutlinedFnID, SmallVectorImpl<Value *> &Inputs,
OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc,
@@ -6781,8 +6781,8 @@ static void emitTargetOutlinedFunction(
CBFunc, ArgAccessorFuncCB);
};
- OMPBuilder.emitTargetRegionFunction(EntryInfo, GenerateOutlinedFunction, true,
- OutlinedFn, OutlinedFnID);
+ OMPBuilder.emitTargetRegionFunction(EntryInfo, GenerateOutlinedFunction,
+ IsOffloadEntry, OutlinedFn, OutlinedFnID);
}
OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitTargetTask(
Function *OutlinedFn, Value *OutlinedFnID,
@@ -6898,15 +6898,22 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitTargetTask(
Builder.restoreIP(TargetTaskBodyIP);
- // emitKernelLaunch makes the necessary runtime call to offload the kernel.
- // We then outline all that code into a separate function
- // ('kernel_launch_function' in the pseudo code above). This function is then
- // called by the target task proxy function (see
- // '@.omp_target_task_proxy_func' in the pseudo code above)
- // "@.omp_target_task_proxy_func' is generated by emitTargetTaskProxyFunction
- Builder.restoreIP(emitKernelLaunch(Builder, OutlinedFn, OutlinedFnID,
- EmitTargetCallFallbackCB, Args, DeviceID,
- RTLoc, TargetTaskAllocaIP));
+ if (OutlinedFnID) {
+ // emitKernelLaunch makes the necessary runtime call to offload the kernel.
+ // We then outline all that code into a separate function
+ // ('kernel_launch_function' in the pseudo code above). This function is
+ // then called by the target task proxy function (see
+ // '@.omp_target_task_proxy_func' in the pseudo code above)
+ // "@.omp_target_task_proxy_func' is generated by
+ // emitTargetTaskProxyFunction.
+ Builder.restoreIP(emitKernelLaunch(Builder, OutlinedFn, OutlinedFnID,
+ EmitTargetCallFallbackCB, Args, DeviceID,
+ RTLoc, TargetTaskAllocaIP));
+ } else {
+ // When OutlinedFnID is set to nullptr, then it's not an offloading call. In
+ // this case, we execute the host implementation directly.
+ Builder.restoreIP(EmitTargetCallFallbackCB(Builder.saveIP()));
+ }
OI.ExitBB = Builder.saveIP().getBlock();
OI.PostOutlineCB = [this, ToBeDeleted, Dependencies,
@@ -7015,11 +7022,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitTargetTask(
Function *TaskCompleteFn =
getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_complete_if0);
Builder.CreateCall(TaskBeginFn, {Ident, ThreadID, TaskData});
- CallInst *CI = nullptr;
- if (HasShareds)
- CI = Builder.CreateCall(ProxyFn, {ThreadID, TaskData});
- else
- CI = Builder.CreateCall(ProxyFn, {ThreadID});
+ CallInst *CI = Builder.CreateCall(ProxyFn, {ThreadID, TaskData});
CI->setDebugLoc(StaleCI->getDebugLoc());
Builder.CreateCall(TaskCompleteFn, {Ident, ThreadID, TaskData});
} else if (DepArray) {
@@ -7052,6 +7055,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitTargetTask(
<< "\n");
return Builder.saveIP();
}
+
void OpenMPIRBuilder::emitOffloadingArraysAndArgs(
InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info,
TargetDataRTArgs &RTArgs, MapInfosTy &CombinedInfo, bool IsNonContiguous,
@@ -7069,6 +7073,37 @@ static void emitTargetCall(
SmallVectorImpl<Value *> &Args,
OpenMPIRBuilder::GenMapInfoCallbackTy GenMapInfoCB,
SmallVector<llvm::OpenMPIRBuilder::DependData> Dependencies = {}) {
+ // Generate a function call to the host fallback implementation of the target
+ // region. This is called by the host when no offload entry was generated for
+ // the target region and when the offloading call fails at runtime.
+ auto &&EmitTargetCallFallbackCB =
+ [&](OpenMPIRBuilder::InsertPointTy IP) -> OpenMPIRBuilder::InsertPointTy {
+ Builder.restoreIP(IP);
+ Builder.CreateCall(OutlinedFn, Args);
+ return Builder.saveIP();
+ };
+
+ bool HasNoWait = false;
+ bool HasDependencies = Dependencies.size() > 0;
+ bool RequiresOuterTargetTask = HasNoWait || HasDependencies;
+
+ // If we don't have an ID for the target region, it means an offload entry
+ // wasn't created. In this case we just run the host fallback directly.
+ if (!OutlinedFnID) {
+ if (RequiresOuterTargetTask) {
+ // Arguments that are intended to be directly forwarded to an
+ // emitKernelLaunch call are pased as nullptr, since OutlinedFnID=nullptr
+ // results in that call not being done.
+ OpenMPIRBuilder::TargetKernelArgs KArgs;
+ Builder.restoreIP(OMPBuilder.emitTargetTask(
+ OutlinedFn, /*OutlinedFnID=*/nullptr, EmitTargetCallFallbackCB, KArgs,
+ /*DeviceID=*/nullptr, /*RTLoc=*/nullptr, AllocaIP, Dependencies,
+ HasNoWait));
+ } else {
+ Builder.restoreIP(EmitTargetCallFallbackCB(Builder.saveIP()));
+ }
+ return;
+ }
OpenMPIRBuilder::TargetDataInfo Info(
/*RequiresDevicePointerInfo=*/false,
@@ -7081,14 +7116,6 @@ static void emitTargetCall(
/*IsNonContiguous=*/true,
/*ForEndCall=*/false);
- // emitKernelLaunch
- auto &&EmitTargetCallFallbackCB =
- [&](OpenMPIRBuilder::InsertPointTy IP) -> OpenMPIRBuilder::InsertPointTy {
- Builder.restoreIP(IP);
- Builder.CreateCall(OutlinedFn, Args);
- return Builder.saveIP();
- };
-
unsigned NumTargetItems = Info.NumberOfPtrs;
// TODO: Use correct device ID
Value *DeviceID = Builder.getInt64(OMP_DEVICEID_UNDEF);
@@ -7103,10 +7130,6 @@ static void emitTargetCall(
// TODO: Use correct DynCGGroupMem
Value *DynCGGroupMem = Builder.getInt32(0);
- bool HasNoWait = false;
- bool HasDependencies = Dependencies.size() > 0;
- bool RequiresOuterTargetTask = HasNoWait || HasDependencies;
-
OpenMPIRBuilder::TargetKernelArgs KArgs(NumTargetItems, RTArgs, NumIterations,
NumTeamsVal, NumThreadsVal,
DynCGGroupMem, HasNoWait);
@@ -7123,8 +7146,9 @@ static void emitTargetCall(
DeviceID, RTLoc, AllocaIP));
}
}
+
OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTarget(
- const LocationDescription &Loc, InsertPointTy AllocaIP,
+ const LocationDescription &Loc, bool IsOffloadEntry, InsertPointTy AllocaIP,
InsertPointTy CodeGenIP, TargetRegionEntryInfo &EntryInfo, int32_t NumTeams,
int32_t NumThreads, SmallVectorImpl<Value *> &Args,
GenMapInfoCallbackTy GenMapInfoCB,
@@ -7138,12 +7162,13 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTarget(
Builder.restoreIP(CodeGenIP);
Function *OutlinedFn;
- Constant *OutlinedFnID;
+ Constant *OutlinedFnID = nullptr;
// The target region is outlined into its own function. The LLVM IR for
// the target region itself is generated using the callbacks CBFunc
// and ArgAccessorFuncCB
- emitTargetOutlinedFunction(*this, Builder, EntryInfo, OutlinedFn,
- OutlinedFnID, Args, CBFunc, ArgAccessorFuncCB);
+ emitTargetOutlinedFunction(*this, Builder, IsOffloadEntry, EntryInfo,
+ OutlinedFn, OutlinedFnID, Args, CBFunc,
+ ArgAccessorFuncCB);
// If we are not on the target device, then we need to generate code
// to make a remote call (offload) to the previously outlined function
diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
index cb4c289f409a1..6207792f9f0d0 100644
--- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
+++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
@@ -5983,8 +5983,8 @@ TEST_F(OpenMPIRBuilderTest, TargetRegion) {
TargetRegionEntryInfo EntryInfo("func", 42, 4711, 17);
OpenMPIRBuilder::LocationDescription OmpLoc({Builder.saveIP(), DL});
Builder.restoreIP(OMPBuilder.createTarget(
- OmpLoc, Builder.saveIP(), Builder.saveIP(), EntryInfo, -1, 0, Inputs,
- GenMapInfoCB, BodyGenCB, SimpleArgAccessorCB));
+ OmpLoc, /*IsOffloadEntry=*/true, Builder.saveIP(), Builder.saveIP(),
+ EntryInfo, -1, 0, Inputs, GenMapInfoCB, BodyGenCB, SimpleArgAccessorCB));
OMPBuilder.finalize();
Builder.CreateRetVoid();
@@ -6087,7 +6087,8 @@ TEST_F(OpenMPIRBuilderTest, TargetRegionDevice) {
/*Line=*/3, /*Count=*/0);
Builder.restoreIP(
- OMPBuilder.createTarget(Loc, EntryIP, EntryIP, EntryInfo, /*NumTeams=*/-1,
+ OMPBuilder.createTarget(Loc, /*IsOffloadEntry=*/true, EntryIP, EntryIP,
+ EntryInfo, /*NumTeams=*/-1,
/*NumThreads=*/0, CapturedArgs, GenMapInfoCB,
BodyGenCB, SimpleArgAccessorCB));
@@ -6235,7 +6236,8 @@ TEST_F(OpenMPIRBuilderTest, ConstantAllocaRaise) {
/*Line=*/3, /*Count=*/0);
Builder.restoreIP(
- OMPBuilder.createTarget(Loc, EntryIP, EntryIP, EntryInfo, /*NumTeams=*/-1,
+ OMPBuilder.createTarget(Loc, /*IsOffloadEntry=*/true, EntryIP, EntryIP,
+ EntryInfo, /*NumTeams=*/-1,
/*NumThreads=*/0, CapturedArgs, GenMapInfoCB,
BodyGenCB, SimpleArgAccessorCB));
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index ddee117838697..458d05d5059db 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -3233,6 +3233,8 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
if (!targetOpSupported(opInst))
return failure();
+ llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
+ bool isTargetDevice = ompBuilder->Config.isTargetDevice();
auto parentFn = opInst.getParentOfType<LLVM::LLVMFuncOp>();
auto targetOp = cast<omp::TargetOp>(opInst);
auto &targetRegion = targetOp.getRegion();
@@ -3240,6 +3242,11 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
SmallVector<Value> mapVars = targetOp.getMapVars();
llvm::Function *llvmOutlinedFn = nullptr;
+ // TODO: It can also be false if a compile-time constant `false` IF clause is
+ // specified.
+ bool isOffloadEntry =
+ isTargetDevice || !ompBuilder->Config.TargetTriples.empty();
+
LogicalResult bodyGenStatus = success();
using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
auto bodyCB = [&](InsertPointTy allocaIP,
@@ -3306,14 +3313,12 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
auto argAccessorCB = [&](llvm::Argument &arg, llvm::Value *input,
llvm::Value *&retVal, InsertPointTy allocaIP,
InsertPointTy codeGenIP) {
- llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
-
// We just return the unaltered argument for the host function
// for now, some alterations may be required in the future to
// keep host fallback functions working identically to the device
// version (e.g. pass ByCopy values should be treated as such on
// host and device, currently not always the case)
- if (!ompBuilder->Config.isTargetDevice()) {
+ if (!isTargetDevice) {
retVal = cast<llvm::Value>(&arg);
return codeGenIP;
}
@@ -3339,9 +3344,9 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
moduleTranslation, dds);
builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createTarget(
- ompLoc, allocaIP, builder.saveIP(), entryInfo, defaultValTeams,
- defaultValThreads, kernelInput, genMapInfoCB, bodyCB, argAccessorCB,
- dds));
+ ompLoc, isOffloadEntry, allocaIP, builder.saveIP(), entryInfo,
+ defaultValTeams, defaultValThreads, kernelInput, genMapInfoCB, bodyCB,
+ argAccessorCB, dds));
// Remap access operations to declare target reference pointers for the
// device, essentially generating extra loadop's as necessary
@@ -3714,6 +3719,23 @@ LogicalResult OpenMPDialectLLVMIRTranslationInterface::amendOperation(
}
return failure();
})
+ .Case("omp.target_triples",
+ [&](Attribute attr) {
+ if (auto triplesAttr = dyn_cast<ArrayAttr>(attr)) {
+ llvm::OpenMPIRBuilderConfig &config =
+ moduleTranslation.getOpenMPBuilder()->Config;
+ config.TargetTriples.clear();
+ config.TargetTriples.reserve(triplesAttr.size());
+ for (Attribute tripleAttr : triplesAttr) {
+ if (auto tripleStrAttr = dyn_cast<StringAttr>(tripleAttr))
+ config.TargetTriples.emplace_back(tripleStrAttr.getValue());
+ else
+ return failure();
+ }
+ return success();
+ }
+ return failure();
+ })
.Default([](Attribute) {
// Fall through for omp attributes that do not require lowering.
return success();
diff --git a/mlir/test/Target/LLVMIR/omptarget-array-sectioning-host.mlir b/mlir/test/Target/LLVMIR/omptarget-array-sectioning-host.mlir
index 0016a1f05a2b1..a14214cd8c1cb 100644
--- a/mlir/test/Target/LLVMIR/omptarget-array-sectioning-host.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-array-sectioning-host.mlir
@@ -7,7 +7,7 @@
// array bounds to lower to the full size of the array and the sectioned
// array to be the size of 3*3*1*element-byte-size (36 bytes in this case).
-module attributes {omp.is_target_device = false} {
+module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-amd-amdhsa"]} {
llvm.func @_3d_target_array_section() {
%0 = llvm.mlir.addressof @_QFEinarray : !llvm.ptr
%1 = llvm.mlir.addressof @_QFEoutarray : !llvm.ptr
diff --git a/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-host.mlir b/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-host.mlir
index 8635ea4956706..7c494e80155bb 100644
--- a/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-host.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-host.mlir
@@ -1,6 +1,6 @@
// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
-module attributes {omp.is_target_device = false} {
+module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-amd-amdhsa"]} {
llvm.func @_QQmain() attributes {fir.bindc_name = "main"} {
%0 = llvm.mlir.addressof @_QFEi : !llvm.ptr
%1 = llvm.mlir.addressof @_QFEsp : !llvm.ptr
diff --git a/mlir/test/Target/LLVMIR/omptarget-depend-host-only.mlir b/mlir/test/Target/LLVMIR/omptarget-depend-host-only.mlir
new file mode 100644
index 0000000000000..a951593d26741
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/omptarget-depend-host-only.mlir
@@ -0,0 +1,33 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+module attributes {omp.is_target_device = false} {
+ llvm.func @omp_target_depend_() {
+ %0 = llvm.mlir.constant(39 : index) : i64
+ %1 = llvm.mlir.constant(1 : index) : i64
+ %2 = llvm.mlir.constant(40 : index) : i64
+ %3 = omp.map.bounds lower_bound(%1 : i64) upper_bound(%0 : i64) extent(%2 : i64) stride(%1 : i64) start_idx(%1 : i64)
+ %4 = llvm.mlir.addressof @_QFEa : !llvm.ptr
+ %5 = omp.map.info var_ptr(%4 : !llvm.ptr, !llvm.array<40 x i32>) map_clauses(from) capture(ByRef) bounds(%3) -> !llvm.ptr {name = "a"}
+ omp.target map_entries(%5 -> %arg0 : !llvm.ptr) depend(taskdependin -> %4 : !llvm.ptr) {
+ ^bb0(%arg0: !llvm.ptr):
+ %6 = llvm.mlir.constant(100 : index) : i32
+ llvm.store %6, %arg0 : i32, !llvm.ptr
+ omp.terminator
+ }
+ llvm.return
+ }
+
+ llvm.mlir.global internal @_QFEa() {addr_space = 0 : i32} : !llvm.array<40 x i32> {
+ %0 = llvm.mlir.zero : !llvm.array<40 x i32>
+ llvm.return %0 : !llvm.array<40 x i32>
+ }
+}
+
+// CHECK: define void @omp_target_depend_()
+// CHECK-NOT: define {{.*}} @
+// CHECK-NOT: call i32 @__tgt_target_kernel({{.*}})
+// CHECK: call void @__omp_offloading_[[DEV:.*]]_[[FIL:.*]]_omp_target_depend__l[[LINE:.*]](ptr {{.*}})
+// CHECK-NEXT: ret void
+
+// CHECK: define internal void @__omp_offloading_[[DEV]]_[[FIL]]_omp_target_depend__l[[LINE]](ptr %[[ADDR_A:.*]])
+// CHECK: store i32 100, ptr %[[ADDR_A]], align 4
diff --git a/mlir/test/Target/LLVMIR/omptarget-depend.mlir b/mlir/test/Target/LLVMIR/omptarget-depend.mlir
index c386342005e5e..c66fe8f455dfb 100644
--- a/mlir/test/Target/LLVMIR/omptarget-depend.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-depend.mlir
@@ -1,4 +1,6 @@
// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-amd-amdhsa"]} {
llvm.func @_QQmain() attributes {fir.bindc_name = "main"} {
%0 = llvm.mlir.constant(39 : index) : i64
%1 = llvm.mlir.constant(0 : index) : i64
@@ -117,6 +119,7 @@
llvm.call @_FortranAProgramEndStatement() {fastmathFlags = #llvm.fastmath<contract>} : () -> ()
llvm.return %0 : i32
}
+}
// %strucArg holds pointers to shared data.
// CHECK: define void @_QQmain() {
diff --git a/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-types-host.mlir b/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-types-host.mlir
index 9b46f84e5050f..f0e301bd70e3b 100644
--- a/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-types-host.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-types-host.mlir
@@ -6,7 +6,7 @@
// alongside the omp.map.info, the test utilises mapping of array sections,
// full arrays and individual allocated scalars.
-module attributes {omp.is_target_device = false} {
+module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-amd-amdhsa"]} {
llvm.func @_QQmain() {
%0 = llvm.mlir.constant(5 : index) : i64
%1 = llvm.mlir.constant(2 : index) : i64
diff --git a/mlir/test/Target/LLVMIR/omptarget-fortran-common-block-host.mlir b/mlir/test/Target/LLVMIR/omptarget-fortran-common-block-host.mlir
index 7273f53d0a3db..396628e1081e9 100644
--- a/mlir/test/Target/LLVMIR/omptarget-fortran-common-block-host.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-fortran-common-block-host.mlir
@@ -5,7 +5,7 @@
// to LLVM-IR from MLIR when a fortran common block is lowered alongside
// the omp.map.info.
-module attributes {omp.is_target_device = false} {
+module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-amd-amdhsa"]} {
llvm.func @omp_map_common_block_using_common_block_members() {
%0 = llvm.mlir.constant(4 : index) : i64
%1 = llvm.mlir.constant(0 : index) : i64
diff --git a/mlir/test/Target/LLVMIR/omptarget-nested-record-type-mapping-host.mlir b/mlir/test/Target/LLVMIR/omptarget-nested-record-type-mapping-host.mlir
index e4d82d4a58c89..8cec94abf968b 100644
--- a/mlir/test/Target/LLVMIR/omptarget-nested-record-type-mapping-host.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-nested-record-type-mapping-host.mlir
@@ -7,7 +7,7 @@
// derived type) where members of both the nested and outer record type have
// members mapped.
-module attributes {omp.is_target_device = false} {
+module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-amd-amdhsa"]} {
llvm.func @_QQmain() {
%0 = llvm.mlir.constant(10 : index) : i64
%1 = llvm.mlir.constant(4 : index) : i64
diff --git a/mlir/test/Target/LLVMIR/omptarget-record-type-mapping-host.mlir b/mlir/test/Target/LLVMIR/omptarget-record-type-mapping-host.mlir
index c7a87e44d6537..bbfcb4eecb3e8 100644
--- a/mlir/test/Target/LLVMIR/omptarget-record-type-mapping-host.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-record-type-mapping-host.mlir
@@ -6,7 +6,7 @@
// (C++/C class/structure, Fortran derived type) where only members of the record
// type are mapped.
-module attributes {omp.is_target_device = false} {
+module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-amd-amdhsa"]} {
llvm.func @_QQmain() {
%0 = llvm.mlir.constant(10 : index) : i64
%1 = llvm.mlir.constant(4 : index) : i64
diff --git a/mlir/test/Target/LLVMIR/omptarget-region-host-only.mlir b/mlir/test/Target/LLVMIR/omptarget-region-host-only.mlir
new file mode 100644
index 0000000000000..61b6f3b91cd79
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/omptarget-region-host-only.mlir
@@ -0,0 +1,54 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+module attributes {omp.is_target_device = false} {
+ llvm.func @omp_target_region_() {
+ %0 = llvm.mlir.constant(20 : i32) : i32
+ %1 = llvm.mlir.constant(10 : i32) : i32
+ %2 = llvm.mlir.constant(1 : i64) : i64
+ %3 = llvm.alloca %2 x i32 {bindc_name = "a", in_type = i32, operandSegmentSizes = array<i32: 0, 0>, uniq_name = "_QFomp_target_regionEa"} : (i64) -> !llvm.ptr
+ %4 = llvm.mlir.constant(1 : i64) : i64
+ %5 = llvm.alloca %4 x i32 {bindc_name = "b", in_type = i32, operandSegmentSizes = array<i32: 0, 0>, uniq_name = "_QFomp_target_regionEb"} : (i64) -> !llvm.ptr
+ %6 = llvm.mlir.constant(1 : i64) : i64
+ %7 = llvm.alloca %6 x i32 {bindc_name = "c", in_type = i32, operandSegmentSizes = array<i32: 0, 0>, uniq_name = "_QFomp_target_regionEc"} : (i64) -> !llvm.ptr
+ llvm.store %1, %3 : i32, !llvm.ptr
+ llvm.store %0, %5 : i32, !llvm.ptr
+ %map1 = omp.map.info var_ptr(%3 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
+ %map2 = omp.map.info var_ptr(%5 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
+ %map3 = omp.map.info var_ptr(%7 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
+ omp.target map_entries(%map1 -> %arg0, %map2 -> %arg1, %map3 -> %arg2 : !llvm.ptr, !llvm.ptr, !llvm.ptr) {
+ ^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: !llvm.ptr):
+ %8 = llvm.load %arg0 : !llvm.ptr -> i32
+ %9 = llvm.load %arg1 : !llvm.ptr -> i32
+ %10 = llvm.add %8, %9 : i32
+ llvm.store %10, %arg2 : i32, !llvm.ptr
+ omp.terminator
+ }
+ llvm.return
+ }
+
+ llvm.func @omp_target_no_map() {
+ omp.target {
+ omp.terminator
+ }
+ llvm.return
+ }
+}
+
+// CHECK: define void @omp_target_region_()
+// CHECK-NOT: call i32 @__tgt_target_kernel({{.*}})
+// CHECK: call void @__omp_offloading_[[DEV:.*]]_[[FIL:.*]]_omp_target_region__l[[LINE1:.*]](ptr %{{.*}}, ptr %{{.*}}, ptr %{{.*}})
+// CHECK-NEXT: ret void
+
+// CHECK: define void @omp_target_no_map()
+// CHECK-NOT: call i32 @__tgt_target_kernel({{.*}})
+// CHECK: call void @__omp_offloading_[[DEV]]_[[FIL]]_omp_target_no_map_l[[LINE2:.*]]()
+// CHECK-NEXT: ret void
+
+// CHECK: define internal void @__omp_offloading_[[DEV]]_[[FIL]]_omp_target_region__l[[LINE1]](ptr %[[ADDR_A:.*]], ptr %[[ADDR_B:.*]], ptr %[[ADDR_C:.*]])
+// CHECK: %[[VAL_A:.*]] = load i32, ptr %[[ADDR_A]], align 4
+// CHECK: %[[VAL_B:.*]] = load i32, ptr %[[ADDR_B]], align 4
+// CHECK: %[[SUM:.*]] = add i32 %[[VAL_A]], %[[VAL_B]]
+// CHECK: store i32 %[[SUM]], ptr %[[ADDR_C]], align 4
+
+// CHECK: define internal void @__omp_offloading_[[DEV]]_[[FIL]]_omp_target_no_map_l[[LINE2]]()
+// CHECK: ret void
diff --git a/mlir/test/Target/LLVMIR/omptarget-region-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-region-llvm.mlir
index a32ee3e184e26..3af960d6ffcd0 100644
--- a/mlir/test/Target/LLVMIR/omptarget-region-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-region-llvm.mlir
@@ -1,6 +1,6 @@
// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
-module attributes {omp.is_target_device = false} {
+module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-amd-amdhsa"]} {
llvm.func @omp_target_region_() {
%0 = llvm.mlir.constant(20 : i32) : i32
%1 = llvm.mlir.constant(10 : i32) : i32
More information about the llvm-branch-commits
mailing list