[llvm-branch-commits] [flang] [llvm] [mlir] [MLIR][OpenMP][OMPIRBuilder] Improve shared memory checks (PR #161864)
Sergio Afonso via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Feb 23 06:36:47 PST 2026
https://github.com/skatrak updated https://github.com/llvm/llvm-project/pull/161864
>From 406f24203e40130318f319ad87cf8b2311d79b36 Mon Sep 17 00:00:00 2001
From: Sergio Afonso <safonsof at amd.com>
Date: Tue, 16 Sep 2025 14:18:39 +0100
Subject: [PATCH 1/5] [MLIR][OpenMP][OMPIRBuilder] Improve shared memory checks
This patch refines checks to decide whether to use device shared memory or
regular stack allocations. In particular, it adds support for parallel regions
residing on standalone target device functions.
The changes are:
- Shared memory is introduced for `omp.target` implicit allocations, such as
those related to privatization and mapping, as long as they are shared across
threads in a nested parallel region.
- Standalone target device functions are interpreted as being part of a Generic
kernel, since the fact that they are present in the module after filtering
means they must be reachable from a target region.
- Prevent allocations whose only shared uses inside of an `omp.parallel` region
are as part of a `private` clause from being moved to device shared memory.
---
.../llvm/Frontend/OpenMP/OMPIRBuilder.h | 4 +-
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 5 +-
.../Frontend/OpenMPIRBuilderTest.cpp | 35 ++---
.../OpenMP/OpenMPToLLVMIRTranslation.cpp | 122 ++++++++++++------
.../LLVMIR/omptarget-parallel-llvm.mlir | 8 +-
.../fortran/target-generic-outlined-loops.f90 | 109 ++++++++++++++++
6 files changed, 222 insertions(+), 61 deletions(-)
create mode 100644 offload/test/offloading/fortran/target-generic-outlined-loops.f90
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 865be4d1f1c93..c292d4924d0ce 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -3557,8 +3557,8 @@ class OpenMPIRBuilder {
ArrayRef<BasicBlock *> DeallocBlocks)>;
using TargetGenArgAccessorsCallbackTy = function_ref<InsertPointOrErrorTy(
- Argument &Arg, Value *Input, Value *&RetVal, InsertPointTy AllocaIP,
- InsertPointTy CodeGenIP)>;
+ Argument &Arg, Value *Input, Value *&RetVal, InsertPointTy AllocIP,
+ InsertPointTy CodeGenIP, ArrayRef<InsertPointTy> DeallocIPs)>;
/// Generator for '#omp target'
///
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index fb768c2fe443c..e46d7ad032b38 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -8736,8 +8736,9 @@ static Expected<Function *> createOutlinedFunction(
Argument &Arg = std::get<1>(InArg);
Value *InputCopy = nullptr;
- llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
- ArgAccessorFuncCB(Arg, Input, InputCopy, AllocaIP, Builder.saveIP());
+ llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = ArgAccessorFuncCB(
+ Arg, Input, InputCopy, AllocaIP, Builder.saveIP(),
+ OpenMPIRBuilder::InsertPointTy(ExitBB, ExitBB->begin()));
if (!AfterIP)
return AfterIP.takeError();
Builder.restoreIP(*AfterIP);
diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
index 596a8818fe6b2..8a205990cbf21 100644
--- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
+++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
@@ -6474,7 +6474,8 @@ TEST_F(OpenMPIRBuilderTest, TargetRegion) {
auto SimpleArgAccessorCB =
[&](llvm::Argument &Arg, llvm::Value *Input, llvm::Value *&RetVal,
llvm::OpenMPIRBuilder::InsertPointTy AllocaIP,
- llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP) {
+ llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP,
+ llvm::ArrayRef<llvm::OpenMPIRBuilder::InsertPointTy> DeallocIPs) {
IRBuilderBase::InsertPointGuard guard(Builder);
Builder.SetCurrentDebugLocation(llvm::DebugLoc());
if (!OMPBuilder.Config.isTargetDevice()) {
@@ -6640,7 +6641,8 @@ TEST_F(OpenMPIRBuilderTest, TargetRegionDevice) {
auto SimpleArgAccessorCB =
[&](llvm::Argument &Arg, llvm::Value *Input, llvm::Value *&RetVal,
llvm::OpenMPIRBuilder::InsertPointTy AllocaIP,
- llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP) {
+ llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP,
+ llvm::ArrayRef<llvm::OpenMPIRBuilder::InsertPointTy> DeallocIPs) {
IRBuilderBase::InsertPointGuard guard(Builder);
Builder.SetCurrentDebugLocation(llvm::DebugLoc());
if (!OMPBuilder.Config.isTargetDevice()) {
@@ -6842,12 +6844,13 @@ TEST_F(OpenMPIRBuilderTest, TargetRegionSPMD) {
return Builder.saveIP();
};
- auto SimpleArgAccessorCB = [&](Argument &, Value *, Value *&,
- OpenMPIRBuilder::InsertPointTy,
- OpenMPIRBuilder::InsertPointTy CodeGenIP) {
- Builder.restoreIP(CodeGenIP);
- return Builder.saveIP();
- };
+ auto SimpleArgAccessorCB =
+ [&](Argument &, Value *, Value *&, OpenMPIRBuilder::InsertPointTy,
+ OpenMPIRBuilder::InsertPointTy CodeGenIP,
+ llvm::ArrayRef<llvm::OpenMPIRBuilder::InsertPointTy>) {
+ Builder.restoreIP(CodeGenIP);
+ return Builder.saveIP();
+ };
SmallVector<Value *> Inputs;
OpenMPIRBuilder::MapInfosTy CombinedInfos;
@@ -6942,12 +6945,13 @@ TEST_F(OpenMPIRBuilderTest, TargetRegionDeviceSPMD) {
Function *OutlinedFn = nullptr;
SmallVector<Value *> CapturedArgs;
- auto SimpleArgAccessorCB = [&](Argument &, Value *, Value *&,
- OpenMPIRBuilder::InsertPointTy,
- OpenMPIRBuilder::InsertPointTy CodeGenIP) {
- Builder.restoreIP(CodeGenIP);
- return Builder.saveIP();
- };
+ auto SimpleArgAccessorCB =
+ [&](Argument &, Value *, Value *&, OpenMPIRBuilder::InsertPointTy,
+ OpenMPIRBuilder::InsertPointTy CodeGenIP,
+ llvm::ArrayRef<llvm::OpenMPIRBuilder::InsertPointTy>) {
+ Builder.restoreIP(CodeGenIP);
+ return Builder.saveIP();
+ };
OpenMPIRBuilder::MapInfosTy CombinedInfos;
auto GenMapInfoCB =
@@ -7041,7 +7045,8 @@ TEST_F(OpenMPIRBuilderTest, ConstantAllocaRaise) {
auto SimpleArgAccessorCB =
[&](llvm::Argument &Arg, llvm::Value *Input, llvm::Value *&RetVal,
llvm::OpenMPIRBuilder::InsertPointTy AllocaIP,
- llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP) {
+ llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP,
+ llvm::ArrayRef<llvm::OpenMPIRBuilder::InsertPointTy> DeallocIPs) {
IRBuilderBase::InsertPointGuard guard(Builder);
Builder.SetCurrentDebugLocation(llvm::DebugLoc());
if (!OMPBuilder.Config.isTargetDevice()) {
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index dfe714085b8af..bd6fee9bf0c13 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -1144,9 +1144,10 @@ struct DeferredStore {
} // namespace
/// Check whether allocations for the given operation might potentially have to
-/// be done in device shared memory. That means we're compiling for a offloading
-/// target, the operation is an `omp::TargetOp` or nested inside of one and that
-/// target region represents a Generic (non-SPMD) kernel.
+/// be done in device shared memory. That means we're compiling for an
+/// offloading target, the operation is neither an `omp::TargetOp` nor nested
+/// inside of one, or it is and that target region represents a Generic
+/// (non-SPMD) kernel.
///
/// This represents a necessary but not sufficient set of conditions to use
/// device shared memory in place of regular allocas. For some variables, the
@@ -1162,7 +1163,7 @@ mightAllocInDeviceSharedMemory(Operation &op,
if (!targetOp)
targetOp = op.getParentOfType<omp::TargetOp>();
- return targetOp &&
+ return !targetOp ||
targetOp.getKernelExecFlags(targetOp.getInnermostCapturedOmpOp()) ==
omp::TargetExecMode::generic;
}
@@ -1176,18 +1177,36 @@ mightAllocInDeviceSharedMemory(Operation &op,
/// operation that owns the specified block argument.
static bool mustAllocPrivateVarInDeviceSharedMemory(BlockArgument value) {
Operation *parentOp = value.getOwner()->getParentOp();
- auto targetOp = dyn_cast<omp::TargetOp>(parentOp);
- if (!targetOp)
- targetOp = parentOp->getParentOfType<omp::TargetOp>();
- assert(targetOp && "expected a parent omp.target operation");
-
+ auto moduleOp = parentOp->getParentOfType<ModuleOp>();
for (auto *user : value.getUsers()) {
if (auto parallelOp = dyn_cast<omp::ParallelOp>(user)) {
if (llvm::is_contained(parallelOp.getReductionVars(), value))
return true;
} else if (auto parallelOp = user->getParentOfType<omp::ParallelOp>()) {
- if (parentOp->isProperAncestor(parallelOp))
- return true;
+ if (parentOp->isProperAncestor(parallelOp)) {
+ // If it is used directly inside of a parallel region, skip private
+ // clause uses.
+ bool isPrivateClauseUse = false;
+ if (auto argIface = dyn_cast<omp::BlockArgOpenMPOpInterface>(user)) {
+ if (auto privateSyms = llvm::cast_or_null<ArrayAttr>(
+ user->getAttr("private_syms"))) {
+ for (auto [var, sym] :
+ llvm::zip_equal(argIface.getPrivateVars(), privateSyms)) {
+ if (var != value)
+ continue;
+
+ auto privateOp = cast<omp::PrivateClauseOp>(
+ moduleOp.lookupSymbol(cast<SymbolRefAttr>(sym)));
+ if (privateOp.getCopyRegion().empty()) {
+ isPrivateClauseUse = true;
+ break;
+ }
+ }
+ }
+ }
+ if (!isPrivateClauseUse)
+ return true;
+ }
}
}
@@ -1212,8 +1231,8 @@ allocReductionVars(T op, ArrayRef<BlockArgument> reductionArgs,
builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
- bool useDeviceSharedMem =
- isa<omp::TeamsOp>(op) && mightAllocInDeviceSharedMemory(*op, *ompBuilder);
+ bool useDeviceSharedMem = isa<omp::TeamsOp>(*op) &&
+ mightAllocInDeviceSharedMemory(*op, *ompBuilder);
// delay creating stores until after all allocas
deferredStores.reserve(op.getNumReductionVars());
@@ -1344,8 +1363,8 @@ initReductionVars(OP op, ArrayRef<BlockArgument> reductionArgs,
return success();
llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
- bool useDeviceSharedMem =
- isa<omp::TeamsOp>(op) && mightAllocInDeviceSharedMemory(*op, *ompBuilder);
+ bool useDeviceSharedMem = isa<omp::TeamsOp>(*op) &&
+ mightAllocInDeviceSharedMemory(*op, *ompBuilder);
llvm::BasicBlock *initBlock = splitBB(builder, true, "omp.reduction.init");
auto allocaIP = llvm::IRBuilderBase::InsertPoint(
@@ -1588,8 +1607,8 @@ static LogicalResult createReductionsAndCleanup(
reductionRegions, privateReductionVariables, moduleTranslation, builder,
"omp.reduction.cleanup");
- bool useDeviceSharedMem =
- isa<omp::TeamsOp>(op) && mightAllocInDeviceSharedMemory(*op, *ompBuilder);
+ bool useDeviceSharedMem = isa<omp::TeamsOp>(*op) &&
+ mightAllocInDeviceSharedMemory(*op, *ompBuilder);
if (useDeviceSharedMem) {
for (auto [var, reductionDecl] :
llvm::zip_equal(privateReductionVariables, reductionDecls))
@@ -1781,7 +1800,7 @@ allocatePrivateVars(T op, llvm::IRBuilderBase &builder,
llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
bool mightUseDeviceSharedMem =
- isa<omp::TeamsOp, omp::DistributeOp>(*op) &&
+ isa<omp::TargetOp, omp::TeamsOp, omp::DistributeOp>(*op) &&
mightAllocInDeviceSharedMemory(*op, *ompBuilder);
unsigned int allocaAS =
moduleTranslation.getLLVMModule()->getDataLayout().getAllocaAddrSpace();
@@ -1935,7 +1954,7 @@ cleanupPrivateVars(T op, llvm::IRBuilderBase &builder,
llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
bool mightUseDeviceSharedMem =
- isa<omp::TeamsOp, omp::DistributeOp>(*op) &&
+ isa<omp::TargetOp, omp::TeamsOp, omp::DistributeOp>(*op) &&
mightAllocInDeviceSharedMemory(*op, *ompBuilder);
for (auto [privDecl, llvmPrivVar, blockArg] :
llvm::zip_equal(privateVarsInfo.privatizers, privateVarsInfo.llvmVars,
@@ -6190,42 +6209,68 @@ handleDeclareTargetMapVar(MapInfoData &mapData,
// a store of the kernel argument into this allocated memory which
// will then be loaded from, ByCopy will use the allocated memory
// directly.
-static llvm::IRBuilderBase::InsertPoint
-createDeviceArgumentAccessor(MapInfoData &mapData, llvm::Argument &arg,
- llvm::Value *input, llvm::Value *&retVal,
- llvm::IRBuilderBase &builder,
- llvm::OpenMPIRBuilder &ompBuilder,
- LLVM::ModuleTranslation &moduleTranslation,
- llvm::IRBuilderBase::InsertPoint allocaIP,
- llvm::IRBuilderBase::InsertPoint codeGenIP) {
+static llvm::IRBuilderBase::InsertPoint createDeviceArgumentAccessor(
+ omp::TargetOp targetOp, MapInfoData &mapData, llvm::Argument &arg,
+ llvm::Value *input, llvm::Value *&retVal, llvm::IRBuilderBase &builder,
+ llvm::OpenMPIRBuilder &ompBuilder,
+ LLVM::ModuleTranslation &moduleTranslation,
+ llvm::IRBuilderBase::InsertPoint allocIP,
+ llvm::IRBuilderBase::InsertPoint codeGenIP,
+ llvm::ArrayRef<llvm::IRBuilderBase::InsertPoint> deallocIPs) {
assert(ompBuilder.Config.isTargetDevice() &&
"function only supported for target device codegen");
- builder.restoreIP(allocaIP);
+ builder.restoreIP(allocIP);
omp::VariableCaptureKind capture = omp::VariableCaptureKind::ByRef;
LLVM::TypeToLLVMIRTranslator typeToLLVMIRTranslator(
ompBuilder.M.getContext());
unsigned alignmentValue = 0;
+ BlockArgument mlirArg;
// Find the associated MapInfoData entry for the current input
- for (size_t i = 0; i < mapData.MapClause.size(); ++i)
+ for (size_t i = 0; i < mapData.MapClause.size(); ++i) {
if (mapData.OriginalValue[i] == input) {
auto mapOp = cast<omp::MapInfoOp>(mapData.MapClause[i]);
capture = mapOp.getMapCaptureType();
// Get information of alignment of mapped object
alignmentValue = typeToLLVMIRTranslator.getPreferredAlignment(
mapOp.getVarType(), ompBuilder.M.getDataLayout());
+ // Get the corresponding target entry block argument
+ mlirArg =
+ cast<omp::BlockArgOpenMPOpInterface>(*targetOp).getMapBlockArgs()[i];
break;
}
+ }
unsigned int allocaAS = ompBuilder.M.getDataLayout().getAllocaAddrSpace();
unsigned int defaultAS =
ompBuilder.M.getDataLayout().getProgramAddressSpace();
- // Create the alloca for the argument the current point.
- llvm::Value *v = builder.CreateAlloca(arg.getType(), allocaAS);
+ // Create the allocation for the argument.
+ llvm::Value *v = nullptr;
+ if (mightAllocInDeviceSharedMemory(*targetOp, ompBuilder) &&
+ mustAllocPrivateVarInDeviceSharedMemory(mlirArg)) {
+ // Use the beginning of the codeGenIP rather than the usual allocation point
+ // for shared memory allocations because otherwise these would be done prior
+ // to the target initialization call. Also, the exit block (where the
+ // deallocation is placed) is only executed if the initialization call
+ // succeeds.
+ builder.SetInsertPoint(codeGenIP.getBlock()->getFirstInsertionPt());
+ v = ompBuilder.createOMPAllocShared(builder, arg.getType());
+
+ // Create deallocations in all provided deallocation points and then restore
+ // the insertion point to right after the new allocations.
+ llvm::IRBuilderBase::InsertPointGuard guard(builder);
+ for (auto deallocIP : deallocIPs) {
+ builder.SetInsertPoint(deallocIP.getBlock(), deallocIP.getPoint());
+ ompBuilder.createOMPFreeShared(builder, v, arg.getType());
+ }
+ } else {
+ // Use the current point, which was previously set to allocIP.
+ v = builder.CreateAlloca(arg.getType(), allocaAS);
- if (allocaAS != defaultAS && arg.getType()->isPointerTy())
- v = builder.CreateAddrSpaceCast(v, builder.getPtrTy(defaultAS));
+ if (allocaAS != defaultAS && arg.getType()->isPointerTy())
+ v = builder.CreateAddrSpaceCast(v, builder.getPtrTy(defaultAS));
+ }
builder.CreateStore(&arg, v);
@@ -6831,8 +6876,9 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
};
auto argAccessorCB = [&](llvm::Argument &arg, llvm::Value *input,
- llvm::Value *&retVal, InsertPointTy allocaIP,
- InsertPointTy codeGenIP)
+ llvm::Value *&retVal, InsertPointTy allocIP,
+ InsertPointTy codeGenIP,
+ llvm::ArrayRef<InsertPointTy> deallocIPs)
-> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
llvm::IRBuilderBase::InsertPointGuard guard(builder);
builder.SetCurrentDebugLocation(llvm::DebugLoc());
@@ -6846,9 +6892,9 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
return codeGenIP;
}
- return createDeviceArgumentAccessor(mapData, arg, input, retVal, builder,
- *ompBuilder, moduleTranslation,
- allocaIP, codeGenIP);
+ return createDeviceArgumentAccessor(targetOp, mapData, arg, input, retVal,
+ builder, *ompBuilder, moduleTranslation,
+ allocIP, codeGenIP, deallocIPs);
};
llvm::OpenMPIRBuilder::TargetKernelRuntimeAttrs runtimeAttrs;
diff --git a/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir
index 0b1ce7fff1e04..c1016775270a6 100644
--- a/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir
@@ -55,15 +55,14 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo
// CHECK: define weak_odr protected amdgpu_kernel void @[[FUNC0:.*]](
// CHECK-SAME: ptr %[[TMP:.*]], ptr %[[TMP0:.*]]) #{{[0-9]+}} {
// CHECK: %[[TMP1:.*]] = alloca [1 x ptr], align 8, addrspace(5)
-// CHECK: %[[TMP2:.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK: %[[TMP3:.*]] = addrspacecast ptr addrspace(5) %[[TMP2]] to ptr
-// CHECK: store ptr %[[TMP0]], ptr %[[TMP3]], align 8
// CHECK: %[[TMP4:.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{.*}} to ptr), ptr %[[TMP]])
// CHECK: %[[EXEC_USER_CODE:.*]] = icmp eq i32 %[[TMP4]], -1
// CHECK: br i1 %[[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[WORKER_EXIT:.*]]
// CHECK: %[[TMP5:.*]] = addrspacecast ptr addrspace(5) %[[TMP1]] to ptr
// CHECK: %[[STRUCTARG:.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 8)
-// CHECK: %[[TMP6:.*]] = load ptr, ptr %[[TMP3]], align 8
+// CHECK: %[[TMP2:.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 8)
+// CHECK: store ptr %[[TMP0]], ptr %[[TMP2]], align 8
+// CHECK: %[[TMP6:.*]] = load ptr, ptr %[[TMP2]], align 8
// CHECK: %[[OMP_GLOBAL_THREAD_NUM:.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr))
// CHECK: %[[GEP_:.*]] = getelementptr { ptr }, ptr %[[STRUCTARG]], i32 0, i32 0
// CHECK: store ptr %[[TMP6]], ptr %[[GEP_]], align 8
@@ -71,6 +70,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo
// CHECK: store ptr %[[STRUCTARG]], ptr %[[TMP7]], align 8
// CHECK: call void @__kmpc_parallel_60(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 1, i32 -1, i32 -1, ptr @[[FUNC1:.*]], ptr @[[FUNC1_WRAPPER:.*]], ptr %[[TMP5]], i64 1, i32 0)
// CHECK: call void @__kmpc_free_shared(ptr %[[STRUCTARG]], i64 8)
+// CHECK: call void @__kmpc_free_shared(ptr %[[TMP2]], i64 8)
// CHECK: call void @__kmpc_target_deinit()
// CHECK: define internal void @[[FUNC1]](
diff --git a/offload/test/offloading/fortran/target-generic-outlined-loops.f90 b/offload/test/offloading/fortran/target-generic-outlined-loops.f90
new file mode 100644
index 0000000000000..594809027e115
--- /dev/null
+++ b/offload/test/offloading/fortran/target-generic-outlined-loops.f90
@@ -0,0 +1,109 @@
+! Offloading test for generic target regions containing different kinds of
+! loop constructs inside, moving parallel regions into a separate subroutine.
+! REQUIRES: flang, amdgpu
+
+! RUN: %libomptarget-compile-fortran-run-and-check-generic
+subroutine parallel_loop(n, counter)
+ implicit none
+ integer, intent(in) :: n
+ integer, intent(inout) :: counter
+ integer :: i
+
+ !$omp parallel do reduction(+:counter)
+ do i=1, n
+ counter = counter + 1
+ end do
+end subroutine
+
+program main
+ integer :: i1, i2, n1, n2, counter
+
+ n1 = 100
+ n2 = 50
+
+ counter = 0
+ !$omp target map(tofrom:counter)
+ !$omp teams distribute reduction(+:counter)
+ do i1=1, n1
+ counter = counter + 1
+ end do
+ !$omp end target
+
+ ! CHECK: 1 100
+ print '(I2" "I0)', 1, counter
+
+ counter = 0
+ !$omp target map(tofrom:counter)
+ call parallel_loop(n1, counter)
+ call parallel_loop(n1, counter)
+ !$omp end target
+
+ ! CHECK: 2 200
+ print '(I2" "I0)', 2, counter
+
+ counter = 0
+ !$omp target map(tofrom:counter)
+ counter = counter + 1
+ call parallel_loop(n1, counter)
+ counter = counter + 1
+ call parallel_loop(n1, counter)
+ counter = counter + 1
+ !$omp end target
+
+ ! CHECK: 3 203
+ print '(I2" "I0)', 3, counter
+
+ counter = 0
+ !$omp target map(tofrom: counter)
+ counter = counter + 1
+ call parallel_loop(n1, counter)
+ counter = counter + 1
+ !$omp end target
+
+ ! CHECK: 4 102
+ print '(I2" "I0)', 4, counter
+
+
+ counter = 0
+ !$omp target teams distribute reduction(+:counter)
+ do i1=1, n1
+ call parallel_loop(n2, counter)
+ end do
+
+ ! CHECK: 5 5000
+ print '(I2" "I0)', 5, counter
+
+ counter = 0
+ !$omp target teams distribute reduction(+:counter)
+ do i1=1, n1
+ counter = counter + 1
+ call parallel_loop(n2, counter)
+ counter = counter + 1
+ end do
+
+ ! CHECK: 6 5200
+ print '(I2" "I0)', 6, counter
+
+ counter = 0
+ !$omp target teams distribute reduction(+:counter)
+ do i1=1, n1
+ call parallel_loop(n2, counter)
+ call parallel_loop(n2, counter)
+ end do
+
+ ! CHECK: 7 10000
+ print '(I2" "I0)', 7, counter
+
+ counter = 0
+ !$omp target teams distribute reduction(+:counter)
+ do i1=1, n1
+ counter = counter + 1
+ call parallel_loop(n2, counter)
+ counter = counter + 1
+ call parallel_loop(n2, counter)
+ counter = counter + 1
+ end do
+
+ ! CHECK: 8 10300
+ print '(I2" "I0)', 8, counter
+end program
>From c357bbfefa80c7a9f8d853af1243eb2c2c705114 Mon Sep 17 00:00:00 2001
From: Sergio Afonso <Sergio.AfonsoFumero at amd.com>
Date: Wed, 4 Feb 2026 13:44:12 +0000
Subject: [PATCH 2/5] add missing check
---
.../OpenMP/target-use-device-nested.f90 | 25 +++---
.../OpenMP/threadprivate-target-device.f90 | 14 ++--
.../OpenMP/OpenMPToLLVMIRTranslation.cpp | 7 +-
.../omptarget-constant-alloca-raise.mlir | 2 +-
.../openmp-target-private-shared-mem.mlir | 76 +++++++++++++++++++
5 files changed, 101 insertions(+), 23 deletions(-)
create mode 100644 mlir/test/Target/LLVMIR/openmp-target-private-shared-mem.mlir
diff --git a/flang/test/Integration/OpenMP/target-use-device-nested.f90 b/flang/test/Integration/OpenMP/target-use-device-nested.f90
index 9bb4c39842731..97644383f00ed 100644
--- a/flang/test/Integration/OpenMP/target-use-device-nested.f90
+++ b/flang/test/Integration/OpenMP/target-use-device-nested.f90
@@ -7,7 +7,7 @@
!===----------------------------------------------------------------------===!
! This tests check that target code nested inside a target data region which
-! has only use_device_ptr mapping corectly generates code on the device pass.
+! has only use_device_ptr mapping correctly generates code on the device pass.
!REQUIRES: amdgpu-registered-target
!RUN: %flang_fc1 -triple amdgcn-amd-amdhsa -emit-llvm -fopenmp -fopenmp-version=50 -fopenmp-is-target-device %s -o - | FileCheck %s
@@ -25,22 +25,21 @@ program main
! CHECK: define weak_odr protected amdgpu_kernel void @__omp_offloading{{.*}}main_
! CHECK-NEXT: entry:
-! CHECK-NEXT: %[[VAL_3:.*]] = alloca ptr, align 8, addrspace(5)
-! CHECK-NEXT: %[[ASCAST:.*]] = addrspacecast ptr addrspace(5) %[[VAL_3]] to ptr
-! CHECK-NEXT: store ptr %[[VAL_4:.*]], ptr %[[ASCAST]], align 8
-! CHECK-NEXT: %[[VAL_5:.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @__omp_offloading_{{.*}}_kernel_environment to ptr), ptr %[[VAL_6:.*]])
-! CHECK-NEXT: %[[VAL_7:.*]] = icmp eq i32 %[[VAL_5]], -1
-! CHECK-NEXT: br i1 %[[VAL_7]], label %[[VAL_8:.*]], label %[[VAL_9:.*]]
-! CHECK: user_code.entry: ; preds = %[[VAL_10:.*]]
-! CHECK-NEXT: %[[VAL_11:.*]] = load ptr, ptr %[[ASCAST]], align 8
+! CHECK-NEXT: %[[VAL_0:.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @__omp_offloading_{{.*}}_kernel_environment to ptr), ptr %[[VAL_6:.*]])
+! CHECK-NEXT: %[[VAL_1:.*]] = icmp eq i32 %[[VAL_0]], -1
+! CHECK-NEXT: br i1 %[[VAL_1]], label %[[USER_ENTRY:.*]], label %[[EXIT:.*]]
+! CHECK: [[USER_ENTRY]]: ; preds = %entry
+! CHECK-NEXT: %[[VAL_2:.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 8)
+! CHECK-NEXT: store ptr %[[VAL_3:.*]], ptr %[[VAL_2]], align 8
+! CHECK-NEXT: %[[VAL_4:.*]] = load ptr, ptr %[[VAL_2]], align 8
! CHECK-NEXT: br label %[[AFTER_ALLOC:.*]]
! CHECK: [[AFTER_ALLOC]]:
-! CHECK-NEXT: br label %[[VAL_12:.*]]
+! CHECK-NEXT: br label %[[VAL_5:.*]]
-! CHECK: [[VAL_12]]:
+! CHECK: [[VAL_5]]:
! CHECK-NEXT: br label %[[TARGET_REG_ENTRY:.*]]
-! CHECK: [[TARGET_REG_ENTRY]]: ; preds = %[[VAL_12]]
-! CHECK-NEXT: call void @{{.*}}foo{{.*}}(ptr %[[VAL_11]])
+! CHECK: [[TARGET_REG_ENTRY]]: ; preds = %[[VAL_5]]
+! CHECK-NEXT: call void @{{.*}}foo{{.*}}(ptr %[[VAL_4]])
! CHECK-NEXT: br label
diff --git a/flang/test/Integration/OpenMP/threadprivate-target-device.f90 b/flang/test/Integration/OpenMP/threadprivate-target-device.f90
index 662d6c6357af0..2d5d073520abe 100644
--- a/flang/test/Integration/OpenMP/threadprivate-target-device.f90
+++ b/flang/test/Integration/OpenMP/threadprivate-target-device.f90
@@ -14,16 +14,14 @@
! target code in the same function.
! CHECK: define weak_odr protected amdgpu_kernel void @{{.*}}(ptr %{{.*}}, ptr %[[ARG1:.*]], ptr %[[ARG2:.*]]) #{{[0-9]+}} {
-! CHECK: %[[ALLOCA_X:.*]] = alloca ptr, align 8, addrspace(5)
-! CHECK: %[[ASCAST_X:.*]] = addrspacecast ptr addrspace(5) %[[ALLOCA_X]] to ptr
-! CHECK: store ptr %[[ARG1]], ptr %[[ASCAST_X]], align 8
+! CHECK: %[[ALLOC_N:.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 8)
+! CHECK: store ptr %[[ARG2]], ptr %[[ALLOC_N]], align 8
-! CHECK: %[[ALLOCA_N:.*]] = alloca ptr, align 8, addrspace(5)
-! CHECK: %[[ASCAST_N:.*]] = addrspacecast ptr addrspace(5) %[[ALLOCA_N]] to ptr
-! CHECK: store ptr %[[ARG2]], ptr %[[ASCAST_N]], align 8
+! CHECK: %[[ALLOC_X:.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 8)
+! CHECK: store ptr %[[ARG1]], ptr %[[ALLOC_X]], align 8
-! CHECK: %[[LOAD_X:.*]] = load ptr, ptr %[[ASCAST_X]], align 8
-! CHECK: call void @bar_(ptr %[[LOAD_X]], ptr %[[ASCAST_N]])
+! CHECK: %[[LOAD_X:.*]] = load ptr, ptr %[[ALLOC_X]], align 8
+! CHECK: call void @bar_(ptr %[[LOAD_X]], ptr %[[ALLOC_N]])
module test
implicit none
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index bd6fee9bf0c13..47caec8d91c4c 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -1182,7 +1182,12 @@ static bool mustAllocPrivateVarInDeviceSharedMemory(BlockArgument value) {
if (auto parallelOp = dyn_cast<omp::ParallelOp>(user)) {
if (llvm::is_contained(parallelOp.getReductionVars(), value))
return true;
- } else if (auto parallelOp = user->getParentOfType<omp::ParallelOp>()) {
+ } else if (auto callOp = dyn_cast<CallOpInterface>(user)) {
+ if (llvm::is_contained(callOp.getArgOperands(), value))
+ return true;
+ }
+
+ if (auto parallelOp = user->getParentOfType<omp::ParallelOp>()) {
if (parentOp->isProperAncestor(parallelOp)) {
// If it is used directly inside of a parallel region, skip private
// clause uses.
diff --git a/mlir/test/Target/LLVMIR/omptarget-constant-alloca-raise.mlir b/mlir/test/Target/LLVMIR/omptarget-constant-alloca-raise.mlir
index 724e03885d146..3543a23f46d7d 100644
--- a/mlir/test/Target/LLVMIR/omptarget-constant-alloca-raise.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-constant-alloca-raise.mlir
@@ -39,6 +39,6 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo
// CHECK-NEXT: entry:
// CHECK-NEXT: %[[MOVED_ALLOCA1:.*]] = alloca { ptr }, align 8
// CHECK-NEXT: %[[MOVED_ALLOCA2:.*]] = alloca i32, i64 1, align 4
-// CHECK-NEXT: %[[MAP_ARG_ALLOCA:.*]] = alloca ptr, align 8
// CHECK: user_code.entry: ; preds = %entry
+// CHECK-NEXT: %[[MAP_ARG_ALLOC:.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 8)
diff --git a/mlir/test/Target/LLVMIR/openmp-target-private-shared-mem.mlir b/mlir/test/Target/LLVMIR/openmp-target-private-shared-mem.mlir
new file mode 100644
index 0000000000000..05faf582df649
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/openmp-target-private-shared-mem.mlir
@@ -0,0 +1,76 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+module attributes {omp.is_target_device = true, llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, dlti.dl_spec = #dlti.dl_spec<!llvm.ptr = dense<64> : vector<4xi64>, !llvm.ptr<1> = dense<64> : vector<4xi64>, !llvm.ptr<2> = dense<32> : vector<4xi64>, !llvm.ptr<3> = dense<32> : vector<4xi64>, !llvm.ptr<4> = dense<64> : vector<4xi64>, !llvm.ptr<5> = dense<32> : vector<4xi64>, !llvm.ptr<6> = dense<32> : vector<4xi64>, !llvm.ptr<7> = dense<[160, 256, 256, 32]> : vector<4xi64>, !llvm.ptr<8> = dense<[128, 128, 128, 48]> : vector<4xi64>, !llvm.ptr<9> = dense<[192, 256, 256, 32]> : vector<4xi64>, i64 = dense<64> : vector<2xi64>, i1 = dense<8> : vector<2xi64>, i8 = dense<8> : vector<2xi64>, i16 = dense<16> : vector<2xi64>, i32 = dense<32> : vector<2xi64>, f16 = dense<16> : vector<2xi64>, f64 = dense<64> : vector<2xi64>, f128 = dense<128> : vector<2xi64>, "dlti.endianness" = "little", "dlti.legal_int_widths" = array<i32: 32, 64>, "dlti.stack_alignment" = 32 : i64, "dlti.alloca_memory_space" = 5 : ui64, "dlti.global_memory_space" = 1 : ui64>} {
+ omp.private {type = private} @simple_var.privatizer : i32
+ omp.declare_reduction @simple_var.reducer : i32 init {
+ ^bb0(%arg0: i32):
+ %0 = llvm.mlir.constant(0 : i32) : i32
+ omp.yield(%0 : i32)
+ } combiner {
+ ^bb0(%arg0: i32, %arg1: i32):
+ %0 = llvm.add %arg0, %arg1 : i32
+ omp.yield(%0 : i32)
+ }
+
+ // CHECK-LABEL: declare void @device_func(ptr)
+ llvm.func @device_func(!llvm.ptr) attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to)>}
+
+ // CHECK-NOT: define {{.*}} void @target_map_single_shared_mem_private
+ llvm.func @target_map_single_shared_mem_private() attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (to)>} {
+ %0 = llvm.mlir.constant(1 : i64) : i64
+ %1 = llvm.alloca %0 x i32 : (i64) -> !llvm.ptr<5>
+ %2 = llvm.addrspacecast %1 : !llvm.ptr<5> to !llvm.ptr
+
+ // CHECK-LABEL: define {{.*}} void @__omp_offloading_{{.*}}target_map_single_shared_mem_private{{.*}}({{.*}})
+ // CHECK: call i32 @__kmpc_target_init
+ // CHECK: %[[ALLOC0:.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4)
+ // CHECK: call void @device_func(ptr %[[ALLOC0]])
+ // CHECK: call void @__kmpc_free_shared(ptr %[[ALLOC0]], i64 4)
+ // CHECK: call void @__kmpc_target_deinit
+ omp.target private(@simple_var.privatizer %2 -> %arg0 : !llvm.ptr) {
+ llvm.call @device_func(%arg0) : (!llvm.ptr) -> ()
+ omp.terminator
+ }
+
+ // CHECK-LABEL: define {{.*}} void @__omp_offloading_{{.*}}target_map_single_shared_mem_private{{.*}}({{.*}})
+ // CHECK: call i32 @__kmpc_target_init
+ // CHECK: %[[ALLOC_ARGS0:.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 8)
+ // CHECK: %[[ALLOC1:.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4)
+ // CHECK: %[[GEP0:.*]] = getelementptr { ptr }, ptr %[[ALLOC_ARGS0]], i32 0, i32 0
+ // CHECK: store ptr %[[ALLOC1]], ptr %[[GEP0]], align 8
+ // CHECK: %[[GEP1:.*]] = getelementptr inbounds [1 x ptr], ptr %[[PAR_ARGS0:.*]], i64 0, i64 0
+ // CHECK: store ptr %[[ALLOC_ARGS0]], ptr %[[GEP1]], align 8
+ // CHECK: call void @__kmpc_parallel_60(ptr {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr %[[PAR_ARGS0]], i64 1, i32 0)
+ // CHECK: call void @__kmpc_free_shared(ptr %[[ALLOC1]], i64 4)
+ // CHECK: call void @__kmpc_free_shared(ptr %[[ALLOC_ARGS0]], i64 8)
+ // CHECK: call void @__kmpc_target_deinit
+ omp.target private(@simple_var.privatizer %2 -> %arg0 : !llvm.ptr) {
+ omp.parallel reduction(@simple_var.reducer %arg0 -> %arg1 : !llvm.ptr) {
+ %3 = llvm.load %arg1 : !llvm.ptr -> i32
+ omp.terminator
+ }
+ omp.terminator
+ }
+
+ // CHECK-LABEL: define {{.*}} void @__omp_offloading_{{.*}}target_map_single_shared_mem_private{{.*}}({{.*}})
+ // CHECK: call i32 @__kmpc_target_init
+ // CHECK: %[[ALLOC_ARGS1:.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 8)
+ // CHECK: %[[ALLOC2:.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4)
+ // CHECK: %[[GEP2:.*]] = getelementptr { ptr }, ptr %[[ALLOC_ARGS1]], i32 0, i32 0
+ // CHECK: store ptr %[[ALLOC2]], ptr %[[GEP2]], align 8
+ // CHECK: %[[GEP3:.*]] = getelementptr inbounds [1 x ptr], ptr %[[PAR_ARGS1:.*]], i64 0, i64 0
+ // CHECK: store ptr %[[ALLOC_ARGS1]], ptr %[[GEP3]], align 8
+ // CHECK: call void @__kmpc_parallel_60(ptr {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr %[[PAR_ARGS1]], i64 1, i32 0)
+ // CHECK: call void @__kmpc_free_shared(ptr %[[ALLOC2]], i64 4)
+ // CHECK: call void @__kmpc_free_shared(ptr %[[ALLOC_ARGS1]], i64 8)
+ // CHECK: call void @__kmpc_target_deinit
+ omp.target private(@simple_var.privatizer %2 -> %arg0 : !llvm.ptr) {
+ omp.parallel {
+ %4 = llvm.load %arg0 : !llvm.ptr -> i32
+ omp.terminator
+ }
+ omp.terminator
+ }
+ llvm.return
+ }
+}
>From d32d77e9e45c5789356b5ea0eeeb2a646419308d Mon Sep 17 00:00:00 2001
From: Sergio Afonso <Sergio.AfonsoFumero at amd.com>
Date: Thu, 5 Feb 2026 12:15:14 +0000
Subject: [PATCH 3/5] support other map-like clauses
---
.../Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp | 16 +++++++++++++---
1 file changed, 13 insertions(+), 3 deletions(-)
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 47caec8d91c4c..ef5d096ee9bb3 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -6231,6 +6231,9 @@ static llvm::IRBuilderBase::InsertPoint createDeviceArgumentAccessor(
ompBuilder.M.getContext());
unsigned alignmentValue = 0;
BlockArgument mlirArg;
+ SmallVector<std::pair<Value, BlockArgument>> blockArgsPairs;
+ cast<omp::BlockArgOpenMPOpInterface>(*targetOp).getBlockArgsPairs(
+ blockArgsPairs);
// Find the associated MapInfoData entry for the current input
for (size_t i = 0; i < mapData.MapClause.size(); ++i) {
if (mapData.OriginalValue[i] == input) {
@@ -6239,9 +6242,16 @@ static llvm::IRBuilderBase::InsertPoint createDeviceArgumentAccessor(
// Get information of alignment of mapped object
alignmentValue = typeToLLVMIRTranslator.getPreferredAlignment(
mapOp.getVarType(), ompBuilder.M.getDataLayout());
- // Get the corresponding target entry block argument
- mlirArg =
- cast<omp::BlockArgOpenMPOpInterface>(*targetOp).getMapBlockArgs()[i];
+
+ // Find the corresponding entry block argument, which can be associated to
+ // a map, use_device* or has_device* clause.
+ for (auto &[val, arg] : blockArgsPairs) {
+ if (mapOp.getResult() == val) {
+ mlirArg = arg;
+ break;
+ }
+ }
+ assert(mlirArg && "expected to find entry block argument for map clause");
break;
}
}
>From f303e08651624b0b0241e277f6987b81d7ae4138 Mon Sep 17 00:00:00 2001
From: Sergio Afonso <Sergio.AfonsoFumero at amd.com>
Date: Tue, 17 Feb 2026 17:20:35 +0000
Subject: [PATCH 4/5] update after rebase
---
mlir/test/Target/LLVMIR/openmp-target-private-shared-mem.mlir | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/mlir/test/Target/LLVMIR/openmp-target-private-shared-mem.mlir b/mlir/test/Target/LLVMIR/openmp-target-private-shared-mem.mlir
index 05faf582df649..1481d8133cb0c 100644
--- a/mlir/test/Target/LLVMIR/openmp-target-private-shared-mem.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-target-private-shared-mem.mlir
@@ -41,8 +41,8 @@ module attributes {omp.is_target_device = true, llvm.target_triple = "amdgcn-amd
// CHECK: %[[GEP1:.*]] = getelementptr inbounds [1 x ptr], ptr %[[PAR_ARGS0:.*]], i64 0, i64 0
// CHECK: store ptr %[[ALLOC_ARGS0]], ptr %[[GEP1]], align 8
// CHECK: call void @__kmpc_parallel_60(ptr {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr %[[PAR_ARGS0]], i64 1, i32 0)
- // CHECK: call void @__kmpc_free_shared(ptr %[[ALLOC1]], i64 4)
// CHECK: call void @__kmpc_free_shared(ptr %[[ALLOC_ARGS0]], i64 8)
+ // CHECK: call void @__kmpc_free_shared(ptr %[[ALLOC1]], i64 4)
// CHECK: call void @__kmpc_target_deinit
omp.target private(@simple_var.privatizer %2 -> %arg0 : !llvm.ptr) {
omp.parallel reduction(@simple_var.reducer %arg0 -> %arg1 : !llvm.ptr) {
@@ -61,8 +61,8 @@ module attributes {omp.is_target_device = true, llvm.target_triple = "amdgcn-amd
// CHECK: %[[GEP3:.*]] = getelementptr inbounds [1 x ptr], ptr %[[PAR_ARGS1:.*]], i64 0, i64 0
// CHECK: store ptr %[[ALLOC_ARGS1]], ptr %[[GEP3]], align 8
// CHECK: call void @__kmpc_parallel_60(ptr {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr %[[PAR_ARGS1]], i64 1, i32 0)
- // CHECK: call void @__kmpc_free_shared(ptr %[[ALLOC2]], i64 4)
// CHECK: call void @__kmpc_free_shared(ptr %[[ALLOC_ARGS1]], i64 8)
+ // CHECK: call void @__kmpc_free_shared(ptr %[[ALLOC2]], i64 4)
// CHECK: call void @__kmpc_target_deinit
omp.target private(@simple_var.privatizer %2 -> %arg0 : !llvm.ptr) {
omp.parallel {
>From 7cfd76db241305a0f0bd1dd2727d4bf129dff0d5 Mon Sep 17 00:00:00 2001
From: Sergio Afonso <Sergio.AfonsoFumero at amd.com>
Date: Thu, 19 Feb 2026 12:38:56 +0000
Subject: [PATCH 5/5] add internal linkage to target device functions
---
.../Optimizer/OpenMP/FunctionFiltering.cpp | 6 +++
.../OpenMP/declare-target-func-and-subr.f90 | 46 +++++++++----------
...arget-implicit-func-and-subr-cap-enter.f90 | 40 ++++++++--------
...lare-target-implicit-func-and-subr-cap.f90 | 44 +++++++++---------
.../declare-target-implicit-tarop-cap.f90 | 14 +++---
.../Lower/OpenMP/function-filtering-2.f90 | 45 ++++++++++--------
6 files changed, 104 insertions(+), 91 deletions(-)
diff --git a/flang/lib/Optimizer/OpenMP/FunctionFiltering.cpp b/flang/lib/Optimizer/OpenMP/FunctionFiltering.cpp
index 472d6a9f08a6e..4a132365e437e 100644
--- a/flang/lib/Optimizer/OpenMP/FunctionFiltering.cpp
+++ b/flang/lib/Optimizer/OpenMP/FunctionFiltering.cpp
@@ -181,6 +181,12 @@ class FunctionFilteringPass
declareTargetOp.setDeclareTarget(
declareType, omp::DeclareTargetCaptureClause::to,
declareTargetOp.getDeclareTargetAutomap());
+ } else if (!funcOp.isExternal()) {
+ // For user-defined device functions, set internal linkage.
+ auto internalLinkage = mlir::LLVM::linkage::Linkage::Internal;
+ auto linkage =
+ mlir::LLVM::LinkageAttr::get(funcOp->getContext(), internalLinkage);
+ funcOp->setAttr("llvm.linkage", linkage);
}
return WalkResult::advance();
});
diff --git a/flang/test/Lower/OpenMP/declare-target-func-and-subr.f90 b/flang/test/Lower/OpenMP/declare-target-func-and-subr.f90
index d6175dd8730c2..83f303541e132 100644
--- a/flang/test/Lower/OpenMP/declare-target-func-and-subr.f90
+++ b/flang/test/Lower/OpenMP/declare-target-func-and-subr.f90
@@ -6,7 +6,7 @@
! zero clause declare target
! DEVICE-LABEL: func.func @_QPfunc_t_device()
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to), automap = false>{{.*}}
+! DEVICE-SAME: {{.*}}attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to), automap = false>{{.*}}
FUNCTION FUNC_T_DEVICE() RESULT(I)
!$omp declare target to(FUNC_T_DEVICE) device_type(nohost)
INTEGER :: I
@@ -14,7 +14,7 @@ FUNCTION FUNC_T_DEVICE() RESULT(I)
END FUNCTION FUNC_T_DEVICE
! DEVICE-LABEL: func.func @_QPfunc_enter_device()
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter), automap = false>{{.*}}
+! DEVICE-SAME: {{.*}}attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter), automap = false>{{.*}}
FUNCTION FUNC_ENTER_DEVICE() RESULT(I)
!$omp declare target enter(FUNC_ENTER_DEVICE) device_type(nohost)
INTEGER :: I
@@ -22,7 +22,7 @@ FUNCTION FUNC_ENTER_DEVICE() RESULT(I)
END FUNCTION FUNC_ENTER_DEVICE
! HOST-LABEL: func.func @_QPfunc_t_host()
-! HOST-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (to), automap = false>{{.*}}
+! HOST-SAME: {{.*}}attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (to), automap = false>{{.*}}
FUNCTION FUNC_T_HOST() RESULT(I)
!$omp declare target to(FUNC_T_HOST) device_type(host)
INTEGER :: I
@@ -30,7 +30,7 @@ FUNCTION FUNC_T_HOST() RESULT(I)
END FUNCTION FUNC_T_HOST
! HOST-LABEL: func.func @_QPfunc_enter_host()
-! HOST-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (enter), automap = false>{{.*}}
+! HOST-SAME: {{.*}}attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (enter), automap = false>{{.*}}
FUNCTION FUNC_ENTER_HOST() RESULT(I)
!$omp declare target enter(FUNC_ENTER_HOST) device_type(host)
INTEGER :: I
@@ -38,7 +38,7 @@ FUNCTION FUNC_ENTER_HOST() RESULT(I)
END FUNCTION FUNC_ENTER_HOST
! ALL-LABEL: func.func @_QPfunc_t_any()
-! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to), automap = false>{{.*}}
+! ALL-SAME: {{.*}}attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to), automap = false>{{.*}}
FUNCTION FUNC_T_ANY() RESULT(I)
!$omp declare target to(FUNC_T_ANY) device_type(any)
INTEGER :: I
@@ -46,7 +46,7 @@ FUNCTION FUNC_T_ANY() RESULT(I)
END FUNCTION FUNC_T_ANY
! ALL-LABEL: func.func @_QPfunc_enter_any()
-! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter), automap = false>{{.*}}
+! ALL-SAME: {{.*}}attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter), automap = false>{{.*}}
FUNCTION FUNC_ENTER_ANY() RESULT(I)
!$omp declare target enter(FUNC_ENTER_ANY) device_type(any)
INTEGER :: I
@@ -54,7 +54,7 @@ FUNCTION FUNC_ENTER_ANY() RESULT(I)
END FUNCTION FUNC_ENTER_ANY
! ALL-LABEL: func.func @_QPfunc_default_t_any()
-! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to), automap = false>{{.*}}
+! ALL-SAME: {{.*}}attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to), automap = false>{{.*}}
FUNCTION FUNC_DEFAULT_T_ANY() RESULT(I)
!$omp declare target to(FUNC_DEFAULT_T_ANY)
INTEGER :: I
@@ -62,7 +62,7 @@ FUNCTION FUNC_DEFAULT_T_ANY() RESULT(I)
END FUNCTION FUNC_DEFAULT_T_ANY
! ALL-LABEL: func.func @_QPfunc_default_enter_any()
-! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter), automap = false>{{.*}}
+! ALL-SAME: {{.*}}attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter), automap = false>{{.*}}
FUNCTION FUNC_DEFAULT_ENTER_ANY() RESULT(I)
!$omp declare target enter(FUNC_DEFAULT_ENTER_ANY)
INTEGER :: I
@@ -70,7 +70,7 @@ FUNCTION FUNC_DEFAULT_ENTER_ANY() RESULT(I)
END FUNCTION FUNC_DEFAULT_ENTER_ANY
! ALL-LABEL: func.func @_QPfunc_default_any()
-! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to), automap = false>{{.*}}
+! ALL-SAME: {{.*}}attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to), automap = false>{{.*}}
FUNCTION FUNC_DEFAULT_ANY() RESULT(I)
!$omp declare target
INTEGER :: I
@@ -78,7 +78,7 @@ FUNCTION FUNC_DEFAULT_ANY() RESULT(I)
END FUNCTION FUNC_DEFAULT_ANY
! ALL-LABEL: func.func @_QPfunc_default_extendedlist()
-! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to), automap = false>{{.*}}
+! ALL-SAME: {{.*}}attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to), automap = false>{{.*}}
FUNCTION FUNC_DEFAULT_EXTENDEDLIST() RESULT(I)
!$omp declare target(FUNC_DEFAULT_EXTENDEDLIST)
INTEGER :: I
@@ -86,7 +86,7 @@ FUNCTION FUNC_DEFAULT_EXTENDEDLIST() RESULT(I)
END FUNCTION FUNC_DEFAULT_EXTENDEDLIST
! ALL-LABEL: func.func @_QPfunc_name_as_result()
-! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to), automap = false>{{.*}}
+! ALL-SAME: {{.*}}attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to), automap = false>{{.*}}
FUNCTION FUNC_NAME_AS_RESULT()
!$omp declare target(FUNC_NAME_AS_RESULT)
FUNC_NAME_AS_RESULT = 1.0
@@ -99,61 +99,61 @@ END FUNCTION FUNC_NAME_AS_RESULT
! zero clause declare target
! DEVICE-LABEL: func.func @_QPsubr_t_device()
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to), automap = false>{{.*}}
+! DEVICE-SAME: {{.*}}attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to), automap = false>{{.*}}
SUBROUTINE SUBR_T_DEVICE()
!$omp declare target to(SUBR_T_DEVICE) device_type(nohost)
END
! DEVICE-LABEL: func.func @_QPsubr_enter_device()
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter), automap = false>{{.*}}
+! DEVICE-SAME: {{.*}}attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter), automap = false>{{.*}}
SUBROUTINE SUBR_ENTER_DEVICE()
!$omp declare target enter(SUBR_ENTER_DEVICE) device_type(nohost)
END
! HOST-LABEL: func.func @_QPsubr_t_host()
-! HOST-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (to), automap = false>{{.*}}
+! HOST-SAME: {{.*}}attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (to), automap = false>{{.*}}
SUBROUTINE SUBR_T_HOST()
!$omp declare target to(SUBR_T_HOST) device_type(host)
END
! HOST-LABEL: func.func @_QPsubr_enter_host()
-! HOST-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (enter), automap = false>{{.*}}
+! HOST-SAME: {{.*}}attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (enter), automap = false>{{.*}}
SUBROUTINE SUBR_ENTER_HOST()
!$omp declare target enter(SUBR_ENTER_HOST) device_type(host)
END
! ALL-LABEL: func.func @_QPsubr_t_any()
-! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to), automap = false>{{.*}}
+! ALL-SAME: {{.*}}attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to), automap = false>{{.*}}
SUBROUTINE SUBR_T_ANY()
!$omp declare target to(SUBR_T_ANY) device_type(any)
END
! ALL-LABEL: func.func @_QPsubr_enter_any()
-! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter), automap = false>{{.*}}
+! ALL-SAME: {{.*}}attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter), automap = false>{{.*}}
SUBROUTINE SUBR_ENTER_ANY()
!$omp declare target enter(SUBR_ENTER_ANY) device_type(any)
END
! ALL-LABEL: func.func @_QPsubr_default_t_any()
-! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to), automap = false>{{.*}}
+! ALL-SAME: {{.*}}attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to), automap = false>{{.*}}
SUBROUTINE SUBR_DEFAULT_T_ANY()
!$omp declare target to(SUBR_DEFAULT_T_ANY)
END
! ALL-LABEL: func.func @_QPsubr_default_enter_any()
-! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter), automap = false>{{.*}}
+! ALL-SAME: {{.*}}attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter), automap = false>{{.*}}
SUBROUTINE SUBR_DEFAULT_ENTER_ANY()
!$omp declare target enter(SUBR_DEFAULT_ENTER_ANY)
END
! ALL-LABEL: func.func @_QPsubr_default_any()
-! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to), automap = false>{{.*}}
+! ALL-SAME: {{.*}}attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to), automap = false>{{.*}}
SUBROUTINE SUBR_DEFAULT_ANY()
!$omp declare target
END
! ALL-LABEL: func.func @_QPsubr_default_extendedlist()
-! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to), automap = false>{{.*}}
+! ALL-SAME: {{.*}}attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to), automap = false>{{.*}}
SUBROUTINE SUBR_DEFAULT_EXTENDEDLIST()
!$omp declare target(SUBR_DEFAULT_EXTENDEDLIST)
END
@@ -161,7 +161,7 @@ SUBROUTINE SUBR_DEFAULT_EXTENDEDLIST()
!! -----
! DEVICE-LABEL: func.func @_QPrecursive_declare_target
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to), automap = false>{{.*}}
+! DEVICE-SAME: {{.*}}attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to), automap = false>{{.*}}
RECURSIVE FUNCTION RECURSIVE_DECLARE_TARGET(INCREMENT) RESULT(K)
!$omp declare target to(RECURSIVE_DECLARE_TARGET) device_type(nohost)
INTEGER :: INCREMENT, K
@@ -173,7 +173,7 @@ RECURSIVE FUNCTION RECURSIVE_DECLARE_TARGET(INCREMENT) RESULT(K)
END FUNCTION RECURSIVE_DECLARE_TARGET
! DEVICE-LABEL: func.func @_QPrecursive_declare_target_enter
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter), automap = false>{{.*}}
+! DEVICE-SAME: {{.*}}attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter), automap = false>{{.*}}
RECURSIVE FUNCTION RECURSIVE_DECLARE_TARGET_ENTER(INCREMENT) RESULT(K)
!$omp declare target enter(RECURSIVE_DECLARE_TARGET_ENTER) device_type(nohost)
INTEGER :: INCREMENT, K
diff --git a/flang/test/Lower/OpenMP/declare-target-implicit-func-and-subr-cap-enter.f90 b/flang/test/Lower/OpenMP/declare-target-implicit-func-and-subr-cap-enter.f90
index e8709f23c5413..2e993850b8b85 100644
--- a/flang/test/Lower/OpenMP/declare-target-implicit-func-and-subr-cap-enter.f90
+++ b/flang/test/Lower/OpenMP/declare-target-implicit-func-and-subr-cap-enter.f90
@@ -4,7 +4,7 @@
!RUN: bbc -emit-hlfir -fopenmp -fopenmp-version=52 -fopenmp-is-target-device %s -o - | FileCheck %s --check-prefix=DEVICE
! CHECK-LABEL: func.func @_QPimplicitly_captured_twice
-! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter), automap = false>{{.*}}}
+! CHECK-SAME: {{.*}}attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter), automap = false>{{.*}}}
function implicitly_captured_twice() result(k)
integer :: i
i = 10
@@ -12,7 +12,7 @@ function implicitly_captured_twice() result(k)
end function implicitly_captured_twice
! CHECK-LABEL: func.func @_QPtarget_function_twice_host
-! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (enter), automap = false>{{.*}}}
+! CHECK-SAME: {{.*}}attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (enter), automap = false>{{.*}}}
function target_function_twice_host() result(i)
!$omp declare target enter(target_function_twice_host) device_type(host)
integer :: i
@@ -20,7 +20,7 @@ function target_function_twice_host() result(i)
end function target_function_twice_host
! DEVICE-LABEL: func.func @_QPtarget_function_twice_device
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter), automap = false>{{.*}}}
+! DEVICE-SAME: {{.*}}attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter), automap = false>{{.*}}}
function target_function_twice_device() result(i)
!$omp declare target enter(target_function_twice_device) device_type(nohost)
integer :: i
@@ -30,7 +30,7 @@ end function target_function_twice_device
!! -----
! DEVICE-LABEL: func.func @_QPimplicitly_captured_nest
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter), automap = false>{{.*}}}
+! DEVICE-SAME: {{.*}}attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter), automap = false>{{.*}}}
function implicitly_captured_nest() result(k)
integer :: i
i = 10
@@ -38,13 +38,13 @@ function implicitly_captured_nest() result(k)
end function implicitly_captured_nest
! DEVICE-LABEL: func.func @_QPimplicitly_captured_one
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter){{.*}}}
+! DEVICE-SAME: {{.*}}attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter){{.*}}}
function implicitly_captured_one() result(k)
k = implicitly_captured_nest()
end function implicitly_captured_one
! DEVICE-LABEL: func.func @_QPimplicitly_captured_two
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter), automap = false>{{.*}}}
+! DEVICE-SAME: {{.*}}attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter), automap = false>{{.*}}}
function implicitly_captured_two() result(k)
integer :: i
i = 10
@@ -52,7 +52,7 @@ function implicitly_captured_two() result(k)
end function implicitly_captured_two
! DEVICE-LABEL: func.func @_QPtarget_function_test
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter), automap = false>{{.*}}}
+! DEVICE-SAME: {{.*}}attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter), automap = false>{{.*}}}
function target_function_test() result(j)
!$omp declare target enter(target_function_test) device_type(nohost)
integer :: i, j
@@ -63,7 +63,7 @@ end function target_function_test
!! -----
! CHECK-LABEL: func.func @_QPimplicitly_captured_nest_twice
-! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter), automap = false>{{.*}}}
+! CHECK-SAME: {{.*}}attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter), automap = false>{{.*}}}
function implicitly_captured_nest_twice() result(k)
integer :: i
i = 10
@@ -71,13 +71,13 @@ function implicitly_captured_nest_twice() result(k)
end function implicitly_captured_nest_twice
! CHECK-LABEL: func.func @_QPimplicitly_captured_one_twice
-! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter), automap = false>{{.*}}}
+! CHECK-SAME: {{.*}}attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter), automap = false>{{.*}}}
function implicitly_captured_one_twice() result(k)
k = implicitly_captured_nest_twice()
end function implicitly_captured_one_twice
! CHECK-LABEL: func.func @_QPimplicitly_captured_two_twice
-! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter), automap = false>{{.*}}}
+! CHECK-SAME: {{.*}}attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter), automap = false>{{.*}}}
function implicitly_captured_two_twice() result(k)
integer :: i
i = 10
@@ -85,7 +85,7 @@ function implicitly_captured_two_twice() result(k)
end function implicitly_captured_two_twice
! DEVICE-LABEL: func.func @_QPtarget_function_test_device
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter), automap = false>{{.*}}}
+! DEVICE-SAME: {{.*}}attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter), automap = false>{{.*}}}
function target_function_test_device() result(j)
!$omp declare target enter(target_function_test_device) device_type(nohost)
integer :: i, j
@@ -94,7 +94,7 @@ function target_function_test_device() result(j)
end function target_function_test_device
! CHECK-LABEL: func.func @_QPtarget_function_test_host
-! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (enter), automap = false>{{.*}}}
+! CHECK-SAME: {{.*}}attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (enter), automap = false>{{.*}}}
function target_function_test_host() result(j)
!$omp declare target enter(target_function_test_host) device_type(host)
integer :: i, j
@@ -105,7 +105,7 @@ end function target_function_test_host
!! -----
! DEVICE-LABEL: func.func @_QPimplicitly_captured_with_dev_type_recursive
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter), automap = false>{{.*}}}
+! DEVICE-SAME: {{.*}}attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter), automap = false>{{.*}}}
recursive function implicitly_captured_with_dev_type_recursive(increment) result(k)
!$omp declare target enter(implicitly_captured_with_dev_type_recursive) device_type(host)
integer :: increment, k
@@ -117,7 +117,7 @@ recursive function implicitly_captured_with_dev_type_recursive(increment) result
end function implicitly_captured_with_dev_type_recursive
! DEVICE-LABEL: func.func @_QPtarget_function_with_dev_type_recurse
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter), automap = false>{{.*}}}
+! DEVICE-SAME: {{.*}}attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter), automap = false>{{.*}}}
function target_function_with_dev_type_recurse() result(i)
!$omp declare target enter(target_function_with_dev_type_recurse) device_type(nohost)
integer :: i
@@ -129,28 +129,28 @@ end function target_function_with_dev_type_recurse
module test_module
contains
! CHECK-LABEL: func.func @_QMtest_modulePimplicitly_captured_nest_twice
-! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter), automap = false>{{.*}}}
+! CHECK-SAME: {{.*}}attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter), automap = false>{{.*}}}
function implicitly_captured_nest_twice() result(i)
integer :: i
i = 10
end function implicitly_captured_nest_twice
! CHECK-LABEL: func.func @_QMtest_modulePimplicitly_captured_one_twice
-! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter), automap = false>{{.*}}}
+! CHECK-SAME: {{.*}}attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter), automap = false>{{.*}}}
function implicitly_captured_one_twice() result(k)
!$omp declare target enter(implicitly_captured_one_twice) device_type(host)
k = implicitly_captured_nest_twice()
end function implicitly_captured_one_twice
! DEVICE-LABEL: func.func @_QMtest_modulePimplicitly_captured_two_twice
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter), automap = false>{{.*}}}
+! DEVICE-SAME: {{.*}}attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter), automap = false>{{.*}}}
function implicitly_captured_two_twice() result(y)
integer :: y
y = 5
end function implicitly_captured_two_twice
! DEVICE-LABEL: func.func @_QMtest_modulePtarget_function_test_device
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter), automap = false>{{.*}}}
+! DEVICE-SAME: {{.*}}attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter), automap = false>{{.*}}}
function target_function_test_device() result(j)
!$omp declare target enter(target_function_test_device) device_type(nohost)
integer :: i, j
@@ -174,7 +174,7 @@ recursive subroutine implicitly_captured_recursive(increment)
end program
! DEVICE-LABEL: func.func @_QPimplicitly_captured_recursive
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter), automap = false>{{.*}}}
+! DEVICE-SAME: {{.*}}attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter), automap = false>{{.*}}}
recursive subroutine implicitly_captured_recursive(increment)
integer :: increment
if (increment == 10) then
@@ -185,7 +185,7 @@ recursive subroutine implicitly_captured_recursive(increment)
end subroutine
! DEVICE-LABEL: func.func @_QPcaller_recursive
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter), automap = false>{{.*}}}
+! DEVICE-SAME: {{.*}}attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter), automap = false>{{.*}}}
subroutine caller_recursive
!$omp declare target enter(caller_recursive) device_type(nohost)
call implicitly_captured_recursive(0)
diff --git a/flang/test/Lower/OpenMP/declare-target-implicit-func-and-subr-cap.f90 b/flang/test/Lower/OpenMP/declare-target-implicit-func-and-subr-cap.f90
index be1e5a0d31f4b..751936d4cb307 100644
--- a/flang/test/Lower/OpenMP/declare-target-implicit-func-and-subr-cap.f90
+++ b/flang/test/Lower/OpenMP/declare-target-implicit-func-and-subr-cap.f90
@@ -4,7 +4,7 @@
!RUN: bbc -emit-hlfir -fopenmp -fopenmp-version=50 -fopenmp-is-target-device %s -o - | FileCheck %s --check-prefix=DEVICE
! CHECK-LABEL: func.func @_QPimplicitly_captured
-! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to), automap = false>{{.*}}}
+! CHECK-SAME: {{.*}}attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to), automap = false>{{.*}}}
function implicitly_captured(toggle) result(k)
integer :: i, j, k
logical :: toggle
@@ -19,7 +19,7 @@ end function implicitly_captured
! CHECK-LABEL: func.func @_QPtarget_function
-! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to), automap = false>{{.*}}}
+! CHECK-SAME: {{.*}}attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to), automap = false>{{.*}}}
function target_function(toggle) result(i)
!$omp declare target
integer :: i
@@ -30,7 +30,7 @@ end function target_function
!! -----
! CHECK-LABEL: func.func @_QPimplicitly_captured_twice
-! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to), automap = false>{{.*}}}
+! CHECK-SAME: {{.*}}attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to), automap = false>{{.*}}}
function implicitly_captured_twice() result(k)
integer :: i
i = 10
@@ -38,7 +38,7 @@ function implicitly_captured_twice() result(k)
end function implicitly_captured_twice
! CHECK-LABEL: func.func @_QPtarget_function_twice_host
-! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (to), automap = false>{{.*}}}
+! CHECK-SAME: {{.*}}attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (to), automap = false>{{.*}}}
function target_function_twice_host() result(i)
!$omp declare target to(target_function_twice_host) device_type(host)
integer :: i
@@ -46,7 +46,7 @@ function target_function_twice_host() result(i)
end function target_function_twice_host
! DEVICE-LABEL: func.func @_QPtarget_function_twice_device
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to), automap = false>{{.*}}}
+! DEVICE-SAME: {{.*}}attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to), automap = false>{{.*}}}
function target_function_twice_device() result(i)
!$omp declare target to(target_function_twice_device) device_type(nohost)
integer :: i
@@ -56,7 +56,7 @@ end function target_function_twice_device
!! -----
! DEVICE-LABEL: func.func @_QPimplicitly_captured_nest
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to), automap = false>{{.*}}}
+! DEVICE-SAME: {{.*}}attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to), automap = false>{{.*}}}
function implicitly_captured_nest() result(k)
integer :: i
i = 10
@@ -64,13 +64,13 @@ function implicitly_captured_nest() result(k)
end function implicitly_captured_nest
! DEVICE-LABEL: func.func @_QPimplicitly_captured_one
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to){{.*}}}
+! DEVICE-SAME: {{.*}}attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to){{.*}}}
function implicitly_captured_one() result(k)
k = implicitly_captured_nest()
end function implicitly_captured_one
! DEVICE-LABEL: func.func @_QPimplicitly_captured_two
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to), automap = false>{{.*}}}
+! DEVICE-SAME: {{.*}}attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to), automap = false>{{.*}}}
function implicitly_captured_two() result(k)
integer :: i
i = 10
@@ -78,7 +78,7 @@ function implicitly_captured_two() result(k)
end function implicitly_captured_two
! DEVICE-LABEL: func.func @_QPtarget_function_test
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to), automap = false>{{.*}}}
+! DEVICE-SAME: {{.*}}attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to), automap = false>{{.*}}}
function target_function_test() result(j)
!$omp declare target to(target_function_test) device_type(nohost)
integer :: i, j
@@ -89,7 +89,7 @@ end function target_function_test
!! -----
! CHECK-LABEL: func.func @_QPimplicitly_captured_nest_twice
-! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to), automap = false>{{.*}}}
+! CHECK-SAME: {{.*}}attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to), automap = false>{{.*}}}
function implicitly_captured_nest_twice() result(k)
integer :: i
i = 10
@@ -97,13 +97,13 @@ function implicitly_captured_nest_twice() result(k)
end function implicitly_captured_nest_twice
! CHECK-LABEL: func.func @_QPimplicitly_captured_one_twice
-! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to), automap = false>{{.*}}}
+! CHECK-SAME: {{.*}}attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to), automap = false>{{.*}}}
function implicitly_captured_one_twice() result(k)
k = implicitly_captured_nest_twice()
end function implicitly_captured_one_twice
! CHECK-LABEL: func.func @_QPimplicitly_captured_two_twice
-! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to), automap = false>{{.*}}}
+! CHECK-SAME: {{.*}}attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to), automap = false>{{.*}}}
function implicitly_captured_two_twice() result(k)
integer :: i
i = 10
@@ -111,7 +111,7 @@ function implicitly_captured_two_twice() result(k)
end function implicitly_captured_two_twice
! DEVICE-LABEL: func.func @_QPtarget_function_test_device
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to), automap = false>{{.*}}}
+! DEVICE-SAME: {{.*}}attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to), automap = false>{{.*}}}
function target_function_test_device() result(j)
!$omp declare target to(target_function_test_device) device_type(nohost)
integer :: i, j
@@ -120,7 +120,7 @@ function target_function_test_device() result(j)
end function target_function_test_device
! CHECK-LABEL: func.func @_QPtarget_function_test_host
-! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (to), automap = false>{{.*}}}
+! CHECK-SAME: {{.*}}attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (to), automap = false>{{.*}}}
function target_function_test_host() result(j)
!$omp declare target to(target_function_test_host) device_type(host)
integer :: i, j
@@ -131,7 +131,7 @@ end function target_function_test_host
!! -----
! DEVICE-LABEL: func.func @_QPimplicitly_captured_with_dev_type_recursive
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to), automap = false>{{.*}}}
+! DEVICE-SAME: {{.*}}attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to), automap = false>{{.*}}}
recursive function implicitly_captured_with_dev_type_recursive(increment) result(k)
!$omp declare target to(implicitly_captured_with_dev_type_recursive) device_type(host)
integer :: increment, k
@@ -143,7 +143,7 @@ recursive function implicitly_captured_with_dev_type_recursive(increment) result
end function implicitly_captured_with_dev_type_recursive
! DEVICE-LABEL: func.func @_QPtarget_function_with_dev_type_recurse
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to), automap = false>{{.*}}}
+! DEVICE-SAME: {{.*}}attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to), automap = false>{{.*}}}
function target_function_with_dev_type_recurse() result(i)
!$omp declare target to(target_function_with_dev_type_recurse) device_type(nohost)
integer :: i
@@ -155,28 +155,28 @@ end function target_function_with_dev_type_recurse
module test_module
contains
! CHECK-LABEL: func.func @_QMtest_modulePimplicitly_captured_nest_twice
-! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to), automap = false>{{.*}}}
+! CHECK-SAME: {{.*}}attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to), automap = false>{{.*}}}
function implicitly_captured_nest_twice() result(i)
integer :: i
i = 10
end function implicitly_captured_nest_twice
! CHECK-LABEL: func.func @_QMtest_modulePimplicitly_captured_one_twice
-! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to), automap = false>{{.*}}}
+! CHECK-SAME: {{.*}}attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to), automap = false>{{.*}}}
function implicitly_captured_one_twice() result(k)
!$omp declare target to(implicitly_captured_one_twice) device_type(host)
k = implicitly_captured_nest_twice()
end function implicitly_captured_one_twice
! DEVICE-LABEL: func.func @_QMtest_modulePimplicitly_captured_two_twice
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to), automap = false>{{.*}}}
+! DEVICE-SAME: {{.*}}attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to), automap = false>{{.*}}}
function implicitly_captured_two_twice() result(y)
integer :: y
y = 5
end function implicitly_captured_two_twice
! DEVICE-LABEL: func.func @_QMtest_modulePtarget_function_test_device
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to), automap = false>{{.*}}}
+! DEVICE-SAME: {{.*}}attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to), automap = false>{{.*}}}
function target_function_test_device() result(j)
!$omp declare target to(target_function_test_device) device_type(nohost)
integer :: i, j
@@ -200,7 +200,7 @@ recursive subroutine implicitly_captured_recursive(increment)
end program
! DEVICE-LABEL: func.func @_QPimplicitly_captured_recursive
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to), automap = false>{{.*}}}
+! DEVICE-SAME: {{.*}}attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to), automap = false>{{.*}}}
recursive subroutine implicitly_captured_recursive(increment)
integer :: increment
if (increment == 10) then
@@ -211,7 +211,7 @@ recursive subroutine implicitly_captured_recursive(increment)
end subroutine
! DEVICE-LABEL: func.func @_QPcaller_recursive
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to), automap = false>{{.*}}}
+! DEVICE-SAME: {{.*}}attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to), automap = false>{{.*}}}
subroutine caller_recursive
!$omp declare target to(caller_recursive) device_type(nohost)
call implicitly_captured_recursive(0)
diff --git a/flang/test/Lower/OpenMP/declare-target-implicit-tarop-cap.f90 b/flang/test/Lower/OpenMP/declare-target-implicit-tarop-cap.f90
index c1c1ea37fe471..e157710e0d557 100644
--- a/flang/test/Lower/OpenMP/declare-target-implicit-tarop-cap.f90
+++ b/flang/test/Lower/OpenMP/declare-target-implicit-tarop-cap.f90
@@ -4,7 +4,7 @@
!RUN: bbc -emit-hlfir -fopenmp -fopenmp-version=52 -fopenmp-is-target-device %s -o - | FileCheck %s --check-prefix=DEVICE
! DEVICE-LABEL: func.func @_QPimplicit_capture
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to), automap = false>{{.*}}}
+! DEVICE-SAME: {{.*}}attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to), automap = false>{{.*}}}
function implicit_capture() result(i)
implicit none
integer :: i
@@ -21,35 +21,35 @@ subroutine subr_target()
!! -----
! CHECK-LABEL: func.func @_QPimplicitly_captured_nest_twice
-! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to), automap = false>{{.*}}}
+! CHECK-SAME: {{.*}}attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to), automap = false>{{.*}}}
function implicitly_captured_nest_twice() result(i)
integer :: i
i = 10
end function implicitly_captured_nest_twice
! CHECK-LABEL: func.func @_QPimplicitly_captured_one_twice
-! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to), automap = false>{{.*}}}
+! CHECK-SAME: {{.*}}attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to), automap = false>{{.*}}}
function implicitly_captured_one_twice() result(k)
!$omp declare target to(implicitly_captured_one_twice) device_type(host)
k = implicitly_captured_nest_twice()
end function implicitly_captured_one_twice
! CHECK-LABEL: func.func @_QPimplicitly_captured_nest_twice_enter
-! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (enter), automap = false>{{.*}}}
+! CHECK-SAME: {{.*}}attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (enter), automap = false>{{.*}}}
function implicitly_captured_nest_twice_enter() result(i)
integer :: i
i = 10
end function implicitly_captured_nest_twice_enter
! CHECK-LABEL: func.func @_QPimplicitly_captured_one_twice_enter
-! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (enter), automap = false>{{.*}}}
+! CHECK-SAME: {{.*}}attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (enter), automap = false>{{.*}}}
function implicitly_captured_one_twice_enter() result(k)
!$omp declare target enter(implicitly_captured_one_twice_enter) device_type(host)
k = implicitly_captured_nest_twice_enter()
end function implicitly_captured_one_twice_enter
! DEVICE-LABEL: func.func @_QPimplicitly_captured_two_twice
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to), automap = false>{{.*}}}
+! DEVICE-SAME: {{.*}}attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to), automap = false>{{.*}}}
function implicitly_captured_two_twice() result(y)
integer :: y
y = 5
@@ -67,7 +67,7 @@ end function target_function_test_device
!! -----
! DEVICE-LABEL: func.func @_QPimplicitly_captured_recursive
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to), automap = false>{{.*}}}
+! DEVICE-SAME: {{.*}}attributes {{{.*}}omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to), automap = false>{{.*}}}
recursive function implicitly_captured_recursive(increment) result(k)
integer :: increment, k
if (increment == 10) then
diff --git a/flang/test/Lower/OpenMP/function-filtering-2.f90 b/flang/test/Lower/OpenMP/function-filtering-2.f90
index 34d910c53d6ea..e220ea15a6482 100644
--- a/flang/test/Lower/OpenMP/function-filtering-2.f90
+++ b/flang/test/Lower/OpenMP/function-filtering-2.f90
@@ -1,40 +1,47 @@
-! RUN: %flang_fc1 -fopenmp -fopenmp-version=52 -flang-experimental-hlfir -emit-llvm %s -o - | FileCheck --check-prefixes=LLVM,LLVM-HOST %s
-! RUN: %flang_fc1 -fopenmp -fopenmp-version=52 -emit-hlfir %s -o - | FileCheck --check-prefix=MLIR %s
-! RUN: %if amdgpu-registered-target %{ %flang_fc1 -triple amdgcn-amd-amdhsa -fopenmp -fopenmp-version=52 -fopenmp-is-target-device -flang-experimental-hlfir -emit-llvm %s -o - | FileCheck --check-prefixes=LLVM,LLVM-DEVICE %s %}
-! RUN: %if amdgpu-registered-target %{ %flang_fc1 -triple amdgcn-amd-amdhsa -fopenmp -fopenmp-version=52 -fopenmp-is-target-device -emit-hlfir %s -o - | FileCheck --check-prefix=MLIR %s %}
-! RUN: bbc -fopenmp -fopenmp-version=52 -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-HOST,MLIR-ALL %s
+! RUN: %flang_fc1 -fopenmp -fopenmp-version=52 -flang-experimental-hlfir -emit-llvm %s -o - | FileCheck --check-prefixes=LLVM-HOST,LLVM-ALL %s
+! RUN: %flang_fc1 -fopenmp -fopenmp-version=52 -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-ALL %s
+! RUN: %if amdgpu-registered-target %{ %flang_fc1 -triple amdgcn-amd-amdhsa -fopenmp -fopenmp-version=52 -fopenmp-is-target-device -flang-experimental-hlfir -emit-llvm %s -o - | FileCheck --check-prefixes=LLVM-DEVICE,LLVM-ALL %s %}
+! RUN: %if amdgpu-registered-target %{ %flang_fc1 -triple amdgcn-amd-amdhsa -fopenmp -fopenmp-version=52 -fopenmp-is-target-device -emit-hlfir %s -o - | FileCheck --check-prefix=MLIR-DEVICE,MLIR-ALL %s %}
+! RUN: bbc -fopenmp -fopenmp-version=52 -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-ALL %s
! RUN: %if amdgpu-registered-target %{ bbc -target amdgcn-amd-amdhsa -fopenmp -fopenmp-version=52 -fopenmp-is-target-device -emit-hlfir %s -o - | FileCheck --check-prefixes=MLIR-DEVICE,MLIR-ALL %s %}
-! MLIR: func.func @{{.*}}implicit_invocation() attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to), automap = false>}
-! MLIR: return
-! LLVM: define {{.*}} @{{.*}}implicit_invocation{{.*}}(
+! MLIR-ALL: func.func @{{.*}}implicit_invocation() attributes {
+! MLIR-DEVICE: llvm.linkage = #llvm.linkage<internal>
+! MLIR-ALL-SAME: omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to), automap = false>}
+! MLIR-ALL: return
+! LLVM-ALL: define {{.*}} @{{.*}}implicit_invocation{{.*}}(
subroutine implicit_invocation()
end subroutine implicit_invocation
-! MLIR: func.func @{{.*}}declaretarget() attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to), automap = false>}
-! MLIR: return
-! LLVM: define {{.*}} @{{.*}}declaretarget{{.*}}(
+! MLIR-ALL: func.func @{{.*}}declaretarget() attributes {
+! MLIR-DEVICE: llvm.linkage = #llvm.linkage<internal>
+! MLIR-ALL-SAME: omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to), automap = false>}
+! MLIR-ALL: return
+! LLVM-ALL: define {{.*}} @{{.*}}declaretarget{{.*}}(
subroutine declaretarget()
!$omp declare target to(declaretarget) device_type(nohost)
call implicit_invocation()
end subroutine declaretarget
-! MLIR: func.func @{{.*}}declaretarget_enter() attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter), automap = false>}
-! MLIR: return
-! LLVM: define {{.*}} @{{.*}}declaretarget_enter{{.*}}(
+! MLIR-ALL: func.func @{{.*}}declaretarget_enter() attributes {
+! MLIR-DEVICE: llvm.linkage = #llvm.linkage<internal>
+! MLIR-ALL-SAME: omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter), automap = false>}
+! MLIR-ALL: return
+! LLVM-ALL: define {{.*}} @{{.*}}declaretarget_enter{{.*}}(
subroutine declaretarget_enter()
!$omp declare target enter(declaretarget_enter) device_type(nohost)
call implicit_invocation()
end subroutine declaretarget_enter
-! MLIR: func.func @{{.*}}no_declaretarget() attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to), automap = false>}
-! MLIR: return
-! LLVM: define {{.*}} @{{.*}}no_declaretarget{{.*}}(
+! MLIR-ALL: func.func @{{.*}}no_declaretarget() attributes {
+! MLIR-DEVICE: llvm.linkage = #llvm.linkage<internal>
+! MLIR-ALL-SAME: omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to), automap = false>}
+! MLIR-ALL: return
+! LLVM-ALL: define {{.*}} @{{.*}}no_declaretarget{{.*}}(
subroutine no_declaretarget()
end subroutine no_declaretarget
-! MLIR-HOST: func.func @{{.*}}main(
-! MLIR-DEVICE-NOT: func.func @{{.*}}main(
+! MLIR-ALL: func.func @{{.*}}main(
! MLIR-ALL: return
! LLVM-HOST: define {{.*}} @{{.*}}main{{.*}}(
More information about the llvm-branch-commits
mailing list