[llvm-branch-commits] [flang] [llvm] [mlir] [MLIR][OpenMP][OMPIRBuilder] Add lowering support for omp.target_triples (PR #100156)
Sergio Afonso via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Jul 26 05:32:02 PDT 2024
https://github.com/skatrak updated https://github.com/llvm/llvm-project/pull/100156
>From 9094873e1176b0e8af5065cfc9a7c3fbc12794c0 Mon Sep 17 00:00:00 2001
From: Sergio Afonso <safonsof at amd.com>
Date: Tue, 23 Jul 2024 16:53:40 +0100
Subject: [PATCH] [MLIR][OpenMP][OMPIRBuilder] Add lowering support for
omp.target_triples
This patch modifies MLIR to LLVM IR lowering of the OpenMP dialect to take into
consideration the contents of the `omp.target_triples` module attribute while
generating code for `omp.target` operations.
It adds the `OpenMPIRBuilderConfig::TargetTriples` field and initializes it
using the `amendOperation` flow of the `OpenMPToLLVMIRTranslation` pass. Some
changes are introduced into the `OpenMPIRBuilder` to allow passing the
information about whether a target region is intended to be offloaded from
outside.
The result of this change is that offloading calls are only generated when the
`--offload-arch` or `-fopenmp-targets` options are given to the compiler.
Otherwise, only the host fallback code is generated. This fixes linker errors
currently triggered by `flang-new` if a source file containing a `target`
construct is compiled without any of the aforementioned options.
Several unit tests impacted by these changes, which are intended to check host
code generated for `omp.target` operations, are updated to contain the new
attribute. Without it, no calls to `__tgt_target_kernel` and associated control
flow operations are generated.
Fixes #100209.
---
.../OpenMP/map-types-and-sizes.f90 | 2 +-
.../llvm/Frontend/OpenMP/OMPIRBuilder.h | 6 +++
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 40 ++++++++------
.../Frontend/OpenMPIRBuilderTest.cpp | 10 ++--
.../OpenMP/OpenMPToLLVMIRTranslation.cpp | 34 +++++++++---
.../omptarget-array-sectioning-host.mlir | 2 +-
...mptarget-byref-bycopy-generation-host.mlir | 2 +-
mlir/test/Target/LLVMIR/omptarget-depend.mlir | 3 ++
...target-fortran-allocatable-types-host.mlir | 2 +-
.../omptarget-fortran-common-block-host.mlir | 2 +-
...arget-nested-record-type-mapping-host.mlir | 2 +-
.../omptarget-record-type-mapping-host.mlir | 2 +-
.../LLVMIR/omptarget-region-host-only.mlir | 54 +++++++++++++++++++
.../Target/LLVMIR/omptarget-region-llvm.mlir | 2 +-
14 files changed, 130 insertions(+), 33 deletions(-)
create mode 100644 mlir/test/Target/LLVMIR/omptarget-region-host-only.mlir
diff --git a/flang/test/Integration/OpenMP/map-types-and-sizes.f90 b/flang/test/Integration/OpenMP/map-types-and-sizes.f90
index 591be0b680a51..055fdecc91464 100644
--- a/flang/test/Integration/OpenMP/map-types-and-sizes.f90
+++ b/flang/test/Integration/OpenMP/map-types-and-sizes.f90
@@ -6,7 +6,7 @@
! added to this directory and sub-directories.
!===----------------------------------------------------------------------===!
-!RUN: %flang_fc1 -emit-llvm -fopenmp -flang-deprecated-no-hlfir %s -o - | FileCheck %s
+!RUN: %flang_fc1 -emit-llvm -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -flang-deprecated-no-hlfir %s -o - | FileCheck %s
!===============================================================================
! Check MapTypes for target implicit captures
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index a6995888de7d4..dbfe95790f4de 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -115,6 +115,10 @@ class OpenMPIRBuilderConfig {
// Grid Value for the GPU target
std::optional<omp::GV> GridValue;
+ /// When compilation is being done for the OpenMP host (i.e. `IsTargetDevice =
+ /// false`), this contains the list of offloading triples associated, if any.
+ SmallVector<Triple> TargetTriples;
+
OpenMPIRBuilderConfig();
OpenMPIRBuilderConfig(bool IsTargetDevice, bool IsGPU,
bool OpenMPOffloadMandatory,
@@ -2815,6 +2819,7 @@ class OpenMPIRBuilder {
/// Generator for '#omp target'
///
/// \param Loc where the target data construct was encountered.
+ /// \param IsOffloadEntry whether it is an offload entry.
/// \param CodeGenIP The insertion point where the call to the outlined
/// function should be emitted.
/// \param EntryInfo The entry information about the function.
@@ -2828,6 +2833,7 @@ class OpenMPIRBuilder {
/// \param Dependencies A vector of DependData objects that carry
// dependency information as passed in the depend clause
InsertPointTy createTarget(const LocationDescription &Loc,
+ bool IsOffloadEntry,
OpenMPIRBuilder::InsertPointTy AllocaIP,
OpenMPIRBuilder::InsertPointTy CodeGenIP,
TargetRegionEntryInfo &EntryInfo, int32_t NumTeams,
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index ab2482b91213b..edf0bdc4c3211 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -6769,7 +6769,7 @@ static Function *emitTargetTaskProxyFunction(OpenMPIRBuilder &OMPBuilder,
return ProxyFn;
}
static void emitTargetOutlinedFunction(
- OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder,
+ OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, bool IsOffloadEntry,
TargetRegionEntryInfo &EntryInfo, Function *&OutlinedFn,
Constant *&OutlinedFnID, SmallVectorImpl<Value *> &Inputs,
OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc,
@@ -6782,8 +6782,8 @@ static void emitTargetOutlinedFunction(
CBFunc, ArgAccessorFuncCB);
};
- OMPBuilder.emitTargetRegionFunction(EntryInfo, GenerateOutlinedFunction, true,
- OutlinedFn, OutlinedFnID);
+ OMPBuilder.emitTargetRegionFunction(EntryInfo, GenerateOutlinedFunction,
+ IsOffloadEntry, OutlinedFn, OutlinedFnID);
}
OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitTargetTask(
Function *OutlinedFn, Value *OutlinedFnID,
@@ -7053,6 +7053,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitTargetTask(
<< "\n");
return Builder.saveIP();
}
+
static void emitTargetCall(
OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder,
OpenMPIRBuilder::InsertPointTy AllocaIP, Function *OutlinedFn,
@@ -7060,6 +7061,22 @@ static void emitTargetCall(
SmallVectorImpl<Value *> &Args,
OpenMPIRBuilder::GenMapInfoCallbackTy GenMapInfoCB,
SmallVector<llvm::OpenMPIRBuilder::DependData> Dependencies = {}) {
+ // Generate a function call to the host fallback implementation of the target
+ // region. This is called by the host when no offload entry was generated for
+ // the target region and when the offloading call fails at runtime.
+ auto &&EmitTargetCallFallbackCB =
+ [&](OpenMPIRBuilder::InsertPointTy IP) -> OpenMPIRBuilder::InsertPointTy {
+ Builder.restoreIP(IP);
+ Builder.CreateCall(OutlinedFn, Args);
+ return Builder.saveIP();
+ };
+
+ // If we don't have an ID for the target region, it means an offload entry
+ // wasn't created. In this case we just run the host fallback directly.
+ if (!OutlinedFnID) {
+ Builder.restoreIP(EmitTargetCallFallbackCB(Builder.saveIP()));
+ return;
+ }
OpenMPIRBuilder::TargetDataInfo Info(
/*RequiresDevicePointerInfo=*/false,
@@ -7073,14 +7090,6 @@ static void emitTargetCall(
OMPBuilder.emitOffloadingArraysArgument(Builder, RTArgs, Info,
!MapInfo.Names.empty());
- // emitKernelLaunch
- auto &&EmitTargetCallFallbackCB =
- [&](OpenMPIRBuilder::InsertPointTy IP) -> OpenMPIRBuilder::InsertPointTy {
- Builder.restoreIP(IP);
- Builder.CreateCall(OutlinedFn, Args);
- return Builder.saveIP();
- };
-
unsigned NumTargetItems = MapInfo.BasePointers.size();
// TODO: Use correct device ID
Value *DeviceID = Builder.getInt64(OMP_DEVICEID_UNDEF);
@@ -7116,7 +7125,7 @@ static void emitTargetCall(
}
}
OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTarget(
- const LocationDescription &Loc, InsertPointTy AllocaIP,
+ const LocationDescription &Loc, bool IsOffloadEntry, InsertPointTy AllocaIP,
InsertPointTy CodeGenIP, TargetRegionEntryInfo &EntryInfo, int32_t NumTeams,
int32_t NumThreads, SmallVectorImpl<Value *> &Args,
GenMapInfoCallbackTy GenMapInfoCB,
@@ -7130,12 +7139,13 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTarget(
Builder.restoreIP(CodeGenIP);
Function *OutlinedFn;
- Constant *OutlinedFnID;
+ Constant *OutlinedFnID = nullptr;
// The target region is outlined into its own function. The LLVM IR for
// the target region itself is generated using the callbacks CBFunc
// and ArgAccessorFuncCB
- emitTargetOutlinedFunction(*this, Builder, EntryInfo, OutlinedFn,
- OutlinedFnID, Args, CBFunc, ArgAccessorFuncCB);
+ emitTargetOutlinedFunction(*this, Builder, IsOffloadEntry, EntryInfo,
+ OutlinedFn, OutlinedFnID, Args, CBFunc,
+ ArgAccessorFuncCB);
// If we are not on the target device, then we need to generate code
// to make a remote call (offload) to the previously outlined function
diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
index 8653bbd3d38fd..d6213e84bbbb5 100644
--- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
+++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
@@ -5983,8 +5983,8 @@ TEST_F(OpenMPIRBuilderTest, TargetRegion) {
TargetRegionEntryInfo EntryInfo("func", 42, 4711, 17);
OpenMPIRBuilder::LocationDescription OmpLoc({Builder.saveIP(), DL});
Builder.restoreIP(OMPBuilder.createTarget(
- OmpLoc, Builder.saveIP(), Builder.saveIP(), EntryInfo, -1, 0, Inputs,
- GenMapInfoCB, BodyGenCB, SimpleArgAccessorCB));
+ OmpLoc, /*IsOffloadEntry=*/true, Builder.saveIP(), Builder.saveIP(),
+ EntryInfo, -1, 0, Inputs, GenMapInfoCB, BodyGenCB, SimpleArgAccessorCB));
OMPBuilder.finalize();
Builder.CreateRetVoid();
@@ -6087,7 +6087,8 @@ TEST_F(OpenMPIRBuilderTest, TargetRegionDevice) {
/*Line=*/3, /*Count=*/0);
Builder.restoreIP(
- OMPBuilder.createTarget(Loc, EntryIP, EntryIP, EntryInfo, /*NumTeams=*/-1,
+ OMPBuilder.createTarget(Loc, /*IsOffloadEntry=*/true, EntryIP, EntryIP,
+ EntryInfo, /*NumTeams=*/-1,
/*NumThreads=*/0, CapturedArgs, GenMapInfoCB,
BodyGenCB, SimpleArgAccessorCB));
@@ -6235,7 +6236,8 @@ TEST_F(OpenMPIRBuilderTest, ConstantAllocaRaise) {
/*Line=*/3, /*Count=*/0);
Builder.restoreIP(
- OMPBuilder.createTarget(Loc, EntryIP, EntryIP, EntryInfo, /*NumTeams=*/-1,
+ OMPBuilder.createTarget(Loc, /*IsOffloadEntry=*/true, EntryIP, EntryIP,
+ EntryInfo, /*NumTeams=*/-1,
/*NumThreads=*/0, CapturedArgs, GenMapInfoCB,
BodyGenCB, SimpleArgAccessorCB));
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 8b031deca8931..5ebcec0156d3d 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -3196,6 +3196,8 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
if (!targetOpSupported(opInst))
return failure();
+ llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
+ bool isTargetDevice = ompBuilder->Config.isTargetDevice();
auto parentFn = opInst.getParentOfType<LLVM::LLVMFuncOp>();
auto targetOp = cast<omp::TargetOp>(opInst);
auto &targetRegion = targetOp.getRegion();
@@ -3203,6 +3205,11 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
SmallVector<Value> mapOperands = targetOp.getMapOperands();
llvm::Function *llvmOutlinedFn = nullptr;
+ // TODO: It can also be false if a compile-time constant `false` IF clause is
+ // specified.
+ bool isOffloadEntry =
+ isTargetDevice || !ompBuilder->Config.TargetTriples.empty();
+
LogicalResult bodyGenStatus = success();
using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
auto bodyCB = [&](InsertPointTy allocaIP,
@@ -3270,14 +3277,12 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
auto argAccessorCB = [&](llvm::Argument &arg, llvm::Value *input,
llvm::Value *&retVal, InsertPointTy allocaIP,
InsertPointTy codeGenIP) {
- llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
-
// We just return the unaltered argument for the host function
// for now, some alterations may be required in the future to
// keep host fallback functions working identically to the device
// version (e.g. pass ByCopy values should be treated as such on
// host and device, currently not always the case)
- if (!ompBuilder->Config.isTargetDevice()) {
+ if (!isTargetDevice) {
retVal = cast<llvm::Value>(&arg);
return codeGenIP;
}
@@ -3303,9 +3308,9 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
moduleTranslation, dds);
builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createTarget(
- ompLoc, allocaIP, builder.saveIP(), entryInfo, defaultValTeams,
- defaultValThreads, kernelInput, genMapInfoCB, bodyCB, argAccessorCB,
- dds));
+ ompLoc, isOffloadEntry, allocaIP, builder.saveIP(), entryInfo,
+ defaultValTeams, defaultValThreads, kernelInput, genMapInfoCB, bodyCB,
+ argAccessorCB, dds));
// Remap access operations to declare target reference pointers for the
// device, essentially generating extra loadop's as necessary
@@ -3679,6 +3684,23 @@ LogicalResult OpenMPDialectLLVMIRTranslationInterface::amendOperation(
}
return failure();
})
+ .Case("omp.target_triples",
+ [&](Attribute attr) {
+ if (auto triplesAttr = dyn_cast<ArrayAttr>(attr)) {
+ llvm::OpenMPIRBuilderConfig &config =
+ moduleTranslation.getOpenMPBuilder()->Config;
+ config.TargetTriples.clear();
+ config.TargetTriples.reserve(triplesAttr.size());
+ for (Attribute tripleAttr : triplesAttr) {
+ if (auto tripleStrAttr = dyn_cast<StringAttr>(tripleAttr))
+ config.TargetTriples.emplace_back(tripleStrAttr.getValue());
+ else
+ return failure();
+ }
+ return success();
+ }
+ return failure();
+ })
.Default([](Attribute) {
// Fall through for omp attributes that do not require lowering.
return success();
diff --git a/mlir/test/Target/LLVMIR/omptarget-array-sectioning-host.mlir b/mlir/test/Target/LLVMIR/omptarget-array-sectioning-host.mlir
index 0016a1f05a2b1..a14214cd8c1cb 100644
--- a/mlir/test/Target/LLVMIR/omptarget-array-sectioning-host.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-array-sectioning-host.mlir
@@ -7,7 +7,7 @@
// array bounds to lower to the full size of the array and the sectioned
// array to be the size of 3*3*1*element-byte-size (36 bytes in this case).
-module attributes {omp.is_target_device = false} {
+module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-amd-amdhsa"]} {
llvm.func @_3d_target_array_section() {
%0 = llvm.mlir.addressof @_QFEinarray : !llvm.ptr
%1 = llvm.mlir.addressof @_QFEoutarray : !llvm.ptr
diff --git a/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-host.mlir b/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-host.mlir
index 8635ea4956706..7c494e80155bb 100644
--- a/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-host.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-host.mlir
@@ -1,6 +1,6 @@
// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
-module attributes {omp.is_target_device = false} {
+module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-amd-amdhsa"]} {
llvm.func @_QQmain() attributes {fir.bindc_name = "main"} {
%0 = llvm.mlir.addressof @_QFEi : !llvm.ptr
%1 = llvm.mlir.addressof @_QFEsp : !llvm.ptr
diff --git a/mlir/test/Target/LLVMIR/omptarget-depend.mlir b/mlir/test/Target/LLVMIR/omptarget-depend.mlir
index c386342005e5e..c66fe8f455dfb 100644
--- a/mlir/test/Target/LLVMIR/omptarget-depend.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-depend.mlir
@@ -1,4 +1,6 @@
// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-amd-amdhsa"]} {
llvm.func @_QQmain() attributes {fir.bindc_name = "main"} {
%0 = llvm.mlir.constant(39 : index) : i64
%1 = llvm.mlir.constant(0 : index) : i64
@@ -117,6 +119,7 @@
llvm.call @_FortranAProgramEndStatement() {fastmathFlags = #llvm.fastmath<contract>} : () -> ()
llvm.return %0 : i32
}
+}
// %strucArg holds pointers to shared data.
// CHECK: define void @_QQmain() {
diff --git a/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-types-host.mlir b/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-types-host.mlir
index 9b46f84e5050f..f0e301bd70e3b 100644
--- a/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-types-host.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-types-host.mlir
@@ -6,7 +6,7 @@
// alongside the omp.map.info, the test utilises mapping of array sections,
// full arrays and individual allocated scalars.
-module attributes {omp.is_target_device = false} {
+module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-amd-amdhsa"]} {
llvm.func @_QQmain() {
%0 = llvm.mlir.constant(5 : index) : i64
%1 = llvm.mlir.constant(2 : index) : i64
diff --git a/mlir/test/Target/LLVMIR/omptarget-fortran-common-block-host.mlir b/mlir/test/Target/LLVMIR/omptarget-fortran-common-block-host.mlir
index 7273f53d0a3db..396628e1081e9 100644
--- a/mlir/test/Target/LLVMIR/omptarget-fortran-common-block-host.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-fortran-common-block-host.mlir
@@ -5,7 +5,7 @@
// to LLVM-IR from MLIR when a fortran common block is lowered alongside
// the omp.map.info.
-module attributes {omp.is_target_device = false} {
+module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-amd-amdhsa"]} {
llvm.func @omp_map_common_block_using_common_block_members() {
%0 = llvm.mlir.constant(4 : index) : i64
%1 = llvm.mlir.constant(0 : index) : i64
diff --git a/mlir/test/Target/LLVMIR/omptarget-nested-record-type-mapping-host.mlir b/mlir/test/Target/LLVMIR/omptarget-nested-record-type-mapping-host.mlir
index e4d82d4a58c89..8cec94abf968b 100644
--- a/mlir/test/Target/LLVMIR/omptarget-nested-record-type-mapping-host.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-nested-record-type-mapping-host.mlir
@@ -7,7 +7,7 @@
// derived type) where members of both the nested and outer record type have
// members mapped.
-module attributes {omp.is_target_device = false} {
+module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-amd-amdhsa"]} {
llvm.func @_QQmain() {
%0 = llvm.mlir.constant(10 : index) : i64
%1 = llvm.mlir.constant(4 : index) : i64
diff --git a/mlir/test/Target/LLVMIR/omptarget-record-type-mapping-host.mlir b/mlir/test/Target/LLVMIR/omptarget-record-type-mapping-host.mlir
index c7a87e44d6537..bbfcb4eecb3e8 100644
--- a/mlir/test/Target/LLVMIR/omptarget-record-type-mapping-host.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-record-type-mapping-host.mlir
@@ -6,7 +6,7 @@
// (C++/C class/structure, Fortran derived type) where only members of the record
// type are mapped.
-module attributes {omp.is_target_device = false} {
+module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-amd-amdhsa"]} {
llvm.func @_QQmain() {
%0 = llvm.mlir.constant(10 : index) : i64
%1 = llvm.mlir.constant(4 : index) : i64
diff --git a/mlir/test/Target/LLVMIR/omptarget-region-host-only.mlir b/mlir/test/Target/LLVMIR/omptarget-region-host-only.mlir
new file mode 100644
index 0000000000000..61b6f3b91cd79
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/omptarget-region-host-only.mlir
@@ -0,0 +1,54 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+module attributes {omp.is_target_device = false} {
+ llvm.func @omp_target_region_() {
+ %0 = llvm.mlir.constant(20 : i32) : i32
+ %1 = llvm.mlir.constant(10 : i32) : i32
+ %2 = llvm.mlir.constant(1 : i64) : i64
+ %3 = llvm.alloca %2 x i32 {bindc_name = "a", in_type = i32, operandSegmentSizes = array<i32: 0, 0>, uniq_name = "_QFomp_target_regionEa"} : (i64) -> !llvm.ptr
+ %4 = llvm.mlir.constant(1 : i64) : i64
+ %5 = llvm.alloca %4 x i32 {bindc_name = "b", in_type = i32, operandSegmentSizes = array<i32: 0, 0>, uniq_name = "_QFomp_target_regionEb"} : (i64) -> !llvm.ptr
+ %6 = llvm.mlir.constant(1 : i64) : i64
+ %7 = llvm.alloca %6 x i32 {bindc_name = "c", in_type = i32, operandSegmentSizes = array<i32: 0, 0>, uniq_name = "_QFomp_target_regionEc"} : (i64) -> !llvm.ptr
+ llvm.store %1, %3 : i32, !llvm.ptr
+ llvm.store %0, %5 : i32, !llvm.ptr
+ %map1 = omp.map.info var_ptr(%3 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
+ %map2 = omp.map.info var_ptr(%5 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
+ %map3 = omp.map.info var_ptr(%7 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""}
+ omp.target map_entries(%map1 -> %arg0, %map2 -> %arg1, %map3 -> %arg2 : !llvm.ptr, !llvm.ptr, !llvm.ptr) {
+ ^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: !llvm.ptr):
+ %8 = llvm.load %arg0 : !llvm.ptr -> i32
+ %9 = llvm.load %arg1 : !llvm.ptr -> i32
+ %10 = llvm.add %8, %9 : i32
+ llvm.store %10, %arg2 : i32, !llvm.ptr
+ omp.terminator
+ }
+ llvm.return
+ }
+
+ llvm.func @omp_target_no_map() {
+ omp.target {
+ omp.terminator
+ }
+ llvm.return
+ }
+}
+
+// CHECK: define void @omp_target_region_()
+// CHECK-NOT: call i32 @__tgt_target_kernel({{.*}})
+// CHECK: call void @__omp_offloading_[[DEV:.*]]_[[FIL:.*]]_omp_target_region__l[[LINE1:.*]](ptr %{{.*}}, ptr %{{.*}}, ptr %{{.*}})
+// CHECK-NEXT: ret void
+
+// CHECK: define void @omp_target_no_map()
+// CHECK-NOT: call i32 @__tgt_target_kernel({{.*}})
+// CHECK: call void @__omp_offloading_[[DEV]]_[[FIL]]_omp_target_no_map_l[[LINE2:.*]]()
+// CHECK-NEXT: ret void
+
+// CHECK: define internal void @__omp_offloading_[[DEV]]_[[FIL]]_omp_target_region__l[[LINE1]](ptr %[[ADDR_A:.*]], ptr %[[ADDR_B:.*]], ptr %[[ADDR_C:.*]])
+// CHECK: %[[VAL_A:.*]] = load i32, ptr %[[ADDR_A]], align 4
+// CHECK: %[[VAL_B:.*]] = load i32, ptr %[[ADDR_B]], align 4
+// CHECK: %[[SUM:.*]] = add i32 %[[VAL_A]], %[[VAL_B]]
+// CHECK: store i32 %[[SUM]], ptr %[[ADDR_C]], align 4
+
+// CHECK: define internal void @__omp_offloading_[[DEV]]_[[FIL]]_omp_target_no_map_l[[LINE2]]()
+// CHECK: ret void
diff --git a/mlir/test/Target/LLVMIR/omptarget-region-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-region-llvm.mlir
index a32ee3e184e26..3af960d6ffcd0 100644
--- a/mlir/test/Target/LLVMIR/omptarget-region-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-region-llvm.mlir
@@ -1,6 +1,6 @@
// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
-module attributes {omp.is_target_device = false} {
+module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-amd-amdhsa"]} {
llvm.func @omp_target_region_() {
%0 = llvm.mlir.constant(20 : i32) : i32
%1 = llvm.mlir.constant(10 : i32) : i32
More information about the llvm-branch-commits
mailing list