[Mlir-commits] [llvm] [mlir] [mlir][OpenMP] - Implement lowering from MLIR to LLVMIR for `private` clause on `target` constructs (PR #105471)
Pranav Bhandarkar
llvmlistbot at llvm.org
Mon Aug 26 05:40:54 PDT 2024
https://github.com/bhandarkar-pranav updated https://github.com/llvm/llvm-project/pull/105471
>From 9f28204b6ab27f32105ed79a4300a116966557dc Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Wed, 14 Aug 2024 11:25:31 -0500
Subject: [PATCH 1/4] [mlir][OpenMP] - Implement lowering from MLIR to LLVMIR
for `private` clause on `target` constructs
This patch contains support to lower the `private` clause when used while offloading.
We implement this by inlining the `alloc` region of `omp.private` at the start
of the `target` region and adjusting the uses so that the values yielded from the `alloc`
region are used in the `target` region.
At this point, we do not handle the following (the first of the two below is being worked on)
- allocatables because we do not handle `dealloc` regions in this patch.
- firstprivate because there isn't support to lower firstprivate to mlir yet.
---
mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 7 +-
.../OpenMP/OpenMPToLLVMIRTranslation.cpp | 132 +++++++++++++++++-
mlir/test/Dialect/OpenMP/invalid.mlir | 2 +-
.../Target/LLVMIR/openmp-target-private.mlir | 73 ++++++++++
.../offloading/fortran/target-private-map.f90 | 41 ++++++
5 files changed, 252 insertions(+), 3 deletions(-)
create mode 100644 mlir/test/Target/LLVMIR/openmp-target-private.mlir
create mode 100644 offload/test/offloading/fortran/target-private-map.f90
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index 11780f84697b15..31d97313aedb7a 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -2494,7 +2494,12 @@ LogicalResult PrivateClauseOp::verify() {
<< "Did not expect any values to be yielded.";
}
- if (yieldedTypes.size() == 1 && yieldedTypes.front() == symType)
+ if (yieldedTypes.size() != 1)
+ return mlir::emitError(terminator->getLoc())
+ << "Expected exactly 1 yielded value, but got "
+ << yieldedTypes.size();
+
+ if (yieldedTypes.front() == symType)
return success();
auto error = mlir::emitError(yieldOp.getLoc())
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 458d05d5059db7..af48b5379140c4 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -13,6 +13,7 @@
#include "mlir/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.h"
#include "mlir/Analysis/TopologicalSortUtils.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/Dialect/OpenMP/OpenMPClauseOperands.h"
#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
#include "mlir/Dialect/OpenMP/OpenMPInterfaces.h"
#include "mlir/IR/IRMapping.h"
@@ -1480,6 +1481,7 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
counter);
clone.setSymName(cloneName);
+
return {privVar, clone};
}
}
@@ -3241,12 +3243,140 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
DataLayout dl = DataLayout(opInst.getParentOfType<ModuleOp>());
SmallVector<Value> mapVars = targetOp.getMapVars();
llvm::Function *llvmOutlinedFn = nullptr;
-
// TODO: It can also be false if a compile-time constant `false` IF clause is
// specified.
bool isOffloadEntry =
isTargetDevice || !ompBuilder->Config.TargetTriples.empty();
+ if (!targetOp.getPrivateVars().empty()) {
+ auto privateVars = targetOp.getPrivateVars();
+ auto privateSyms = targetOp.getPrivateSyms();
+ auto &firstTargetRegion = opInst.getRegion(0);
+ auto &firstTargetBlock = firstTargetRegion.front();
+ auto *regionArgsStart = firstTargetBlock.getArguments().begin();
+ auto *privArgsStart = regionArgsStart + targetOp.getMapVars().size();
+ auto *privArgsEnd = privArgsStart + targetOp.getPrivateVars().size();
+ BitVector blockArgsBV(firstTargetBlock.getNumArguments(), false);
+ struct omp::PrivateClauseOps newPrivateClauses;
+ MutableArrayRef argSubRangePrivates(privArgsStart, privArgsEnd);
+ for (auto [privVar, privatizerNameAttr, blockArg] :
+ llvm::zip_equal(privateVars, *privateSyms, argSubRangePrivates)) {
+
+ SymbolRefAttr privSym = llvm::cast<SymbolRefAttr>(privatizerNameAttr);
+
+ // 1. Clone the privatizer so that we can make changes to it freely
+ MLIRContext &context = moduleTranslation.getContext();
+ mlir::IRRewriter rewriter(&context);
+ omp::PrivateClauseOp privatizer =
+ SymbolTable::lookupNearestSymbolFrom<omp::PrivateClauseOp>(&opInst,
+ privSym);
+ // Handle only private for now. Also, not handling allocatables yet.
+ if (privatizer.getDataSharingType() !=
+ omp::DataSharingClauseType::Private ||
+ !privatizer.getDeallocRegion().empty()) {
+ newPrivateClauses.privateSyms.push_back(privSym);
+ newPrivateClauses.privateVars.push_back(privVar);
+ continue;
+ }
+
+ auto &allocRegion = privatizer.getAllocRegion();
+ // `alloc` region should have only 1 argument
+ assert(allocRegion.getNumArguments() == 1 &&
+ "alloc region of omp::PrivateClauseOp should have one argument");
+ auto allocRegionArg = allocRegion.getArgument(0);
+ // Assuming we have the following targetOp and Privatizer
+ //
+ // omp.private {type = private} @x.privatizer : !llvm.ptr alloc {
+ // %1 = alloca...
+ // omp.yield(%1)
+ // }
+ // omp.target map(..) map(..) private(privVar) {
+ // ^bb0(map_arg0, ..., map_argn, priv_arg0):
+ // ...
+ // ...
+ // store %v, %priv_arg0
+ // omp.terminator
+ // }
+ // Roughly, we do the following -
+ // Split the first block (bb0) of the first region of the target into
+ // two block. Then clone the alloc region of the privatizer between the
+ // two new blocks. When cloning replace the alloc argument with privVar.
+ // We'll then have
+ //
+ // omp.target map(..) map(..) private(privVar) {
+ // ^bb0(map_arg0, ..., map_argn, priv_arg0, ..., priv_argn):
+ // ^bb1: (cloned region) // no predecessor
+ // %1 = alloca...
+ // omp.yield(%1)
+ // ^bb2: // no predecessor
+ // ...
+ // ...
+ // store %v, %priv_arg0
+ // omp.terminator
+ // }
+ // Next, add an unconditional branch from ^bb0 to ^bb1 and change the
+ // yield in the last block of the cloned alloc region to an unconditional
+ // branch before replacing all uses of 'priv_arg0' with the yielded value
+ // to finally get the following
+ //
+ // omp.target map(..) map(..) private(privVar) {
+ // ^bb0(map_arg0, ..., map_argn, priv_arg0, ..., priv_argn):
+ // llvm.br ^bb1
+ // ^bb1: (cloned region) // pred: ^bb0
+ // %1 = alloca...
+ // llvm.br ^bb2
+ // ^bb2: // pred: ^bb1
+ // ...
+ // ...
+ // store %v, %1 // %priv_arg0 replaced with the yield value
+ // omp.terminator
+ // }
+ Block &firstBlock = firstTargetRegion.front();
+ Block *newBlock = rewriter.splitBlock(&firstBlock, firstBlock.begin());
+ rewriter.setInsertionPointToStart(&firstBlock);
+ Location loc = targetOp.getLoc();
+ rewriter.create<LLVM::BrOp>(loc, ValueRange(), newBlock);
+
+ IRMapping cloneMap;
+ cloneMap.map(allocRegionArg, privVar);
+ auto secondBlockIter = std::next(firstTargetRegion.begin(), 1);
+ allocRegion.cloneInto(&firstTargetRegion, secondBlockIter, cloneMap);
+
+ unsigned allocRegNumBlocks = allocRegion.getBlocks().size();
+ secondBlockIter = std::next(firstTargetRegion.begin(), 1);
+ auto clonedAllocRegionEndIter =
+ std::next(secondBlockIter, (allocRegNumBlocks - 1));
+ Block &clonedAllocRegEndBlock = *clonedAllocRegionEndIter;
+
+ Operation *br = firstBlock.getTerminator();
+ LLVM::BrOp brOp = dyn_cast<LLVM::BrOp>(br);
+ Operation *yield = clonedAllocRegEndBlock.getTerminator();
+ omp::YieldOp yieldOp = dyn_cast<omp::YieldOp>(yield);
+ auto newValue = yieldOp.getResults().front();
+ rewriter.setInsertionPointAfter(yield);
+ auto *oldSucc = brOp.getSuccessor();
+ brOp.setSuccessor(&*secondBlockIter);
+ // TODO:Consider cloning brOp and adding it to clonedAllocRegEngBlock
+ // TODO: Can we not simply merge clonedAllocRegEngBlock and oldSucc?
+ rewriter.create<LLVM::BrOp>(loc, ValueRange(), oldSucc);
+ blockArgsBV.set(blockArg.getArgNumber());
+ rewriter.replaceAllUsesWith(blockArg, newValue);
+ rewriter.eraseOp(yield);
+ }
+ // Do some fix ups now.
+ // First, remove the blockArguments that we just privatized
+ firstTargetBlock.eraseArguments(blockArgsBV);
+ // Then remove the private vars and privatizers that we have
+ // processed i.e privatized just now.
+ if (newPrivateClauses.privateSyms.empty()) {
+ targetOp.getPrivateVarsMutable().clear();
+ targetOp.removePrivateSymsAttr();
+ } else {
+ targetOp.setPrivateSymsAttr(mlir::ArrayAttr::get(
+ targetOp.getContext(), newPrivateClauses.privateSyms));
+ targetOp.getPrivateVarsMutable().assign(newPrivateClauses.privateVars);
+ }
+ }
LogicalResult bodyGenStatus = success();
using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
auto bodyCB = [&](InsertPointTy allocaIP,
diff --git a/mlir/test/Dialect/OpenMP/invalid.mlir b/mlir/test/Dialect/OpenMP/invalid.mlir
index 1d1d93f0977588..e3267c520490a2 100644
--- a/mlir/test/Dialect/OpenMP/invalid.mlir
+++ b/mlir/test/Dialect/OpenMP/invalid.mlir
@@ -2219,7 +2219,7 @@ omp.private {type = private} @x.privatizer : i32 alloc {
omp.private {type = private} @x.privatizer : i32 alloc {
^bb0(%arg0: i32):
- // expected-error @below {{Invalid yielded value. Expected type: 'i32', got: None}}
+ // expected-error @below {{Expected exactly 1 yielded value, but got 0}}
omp.yield
}
diff --git a/mlir/test/Target/LLVMIR/openmp-target-private.mlir b/mlir/test/Target/LLVMIR/openmp-target-private.mlir
new file mode 100644
index 00000000000000..38c93b3f0568f8
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/openmp-target-private.mlir
@@ -0,0 +1,73 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+omp.private {type = private} @simple_var.privatizer : !llvm.ptr alloc {
+^bb0(%arg0: !llvm.ptr):
+ %0 = llvm.mlir.constant(1 : i64) : i64
+ %1 = llvm.alloca %0 x i32 {bindc_name = "simple_var", pinned} : (i64) -> !llvm.ptr
+ omp.yield(%1 : !llvm.ptr)
+}
+llvm.func @target_map_single_private() attributes {fir.internal_name = "_QPtarget_map_single_private"} {
+ %0 = llvm.mlir.constant(1 : i64) : i64
+ %1 = llvm.alloca %0 x i32 {bindc_name = "simple_var"} : (i64) -> !llvm.ptr
+ %2 = llvm.mlir.constant(1 : i64) : i64
+ %3 = llvm.alloca %2 x i32 {bindc_name = "a"} : (i64) -> !llvm.ptr
+ %4 = llvm.mlir.constant(2 : i32) : i32
+ llvm.store %4, %3 : i32, !llvm.ptr
+ %5 = omp.map.info var_ptr(%3 : !llvm.ptr, i32) map_clauses(to) capture(ByRef) -> !llvm.ptr {name = "a"}
+ omp.target map_entries(%5 -> %arg0 : !llvm.ptr) private(@simple_var.privatizer %1 -> %arg1 : !llvm.ptr) {
+ ^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
+ %6 = llvm.mlir.constant(10 : i32) : i32
+ %7 = llvm.load %arg0 : !llvm.ptr -> i32
+ %8 = llvm.add %7, %6 : i32
+ llvm.store %8, %arg1 : i32, !llvm.ptr
+ omp.terminator
+ }
+ llvm.return
+}
+// CHECK: define internal void @__omp_offloading_fd00
+// CHECK-DAG: %[[PRIV_ALLOC:.*]] = alloca i32, i64 1, align 4
+// CHECK-DAG: %[[ADD:.*]] = add i32 {{.*}}, 10
+// CHECK-DAG: store i32 %[[ADD]], ptr %[[PRIV_ALLOC]], align 4
+
+omp.private {type = private} @n.privatizer : !llvm.ptr alloc {
+^bb0(%arg0: !llvm.ptr):
+ %0 = llvm.mlir.constant(1 : i64) : i64
+ %1 = llvm.alloca %0 x f32 {bindc_name = "n", pinned} : (i64) -> !llvm.ptr
+ omp.yield(%1 : !llvm.ptr)
+}
+llvm.func @target_map_2_privates() attributes {fir.internal_name = "_QPtarget_map_2_privates"} {
+ %0 = llvm.mlir.constant(1 : i64) : i64
+ %1 = llvm.alloca %0 x i32 {bindc_name = "simple_var"} : (i64) -> !llvm.ptr
+ %2 = llvm.mlir.constant(1 : i64) : i64
+ %3 = llvm.alloca %2 x f32 {bindc_name = "n"} : (i64) -> !llvm.ptr
+ %4 = llvm.mlir.constant(1 : i64) : i64
+ %5 = llvm.alloca %4 x i32 {bindc_name = "a"} : (i64) -> !llvm.ptr
+ %6 = llvm.mlir.constant(2 : i32) : i32
+ llvm.store %6, %5 : i32, !llvm.ptr
+ %7 = omp.map.info var_ptr(%5 : !llvm.ptr, i32) map_clauses(to) capture(ByRef) -> !llvm.ptr {name = "a"}
+ omp.target map_entries(%7 -> %arg0 : !llvm.ptr) private(@simple_var.privatizer %1 -> %arg1 : !llvm.ptr, @n.privatizer %3 -> %arg2 : !llvm.ptr) {
+ ^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: !llvm.ptr):
+ %8 = llvm.mlir.constant(1.100000e+01 : f32) : f32
+ %9 = llvm.mlir.constant(10 : i32) : i32
+ %10 = llvm.load %arg0 : !llvm.ptr -> i32
+ %11 = llvm.add %10, %9 : i32
+ llvm.store %11, %arg1 : i32, !llvm.ptr
+ %12 = llvm.load %arg1 : !llvm.ptr -> i32
+ %13 = llvm.sitofp %12 : i32 to f32
+ %14 = llvm.fadd %13, %8 {fastmathFlags = #llvm.fastmath<contract>} : f32
+ llvm.store %14, %arg2 : f32, !llvm.ptr
+ omp.terminator
+ }
+ llvm.return
+}
+
+// CHECK: define internal void @__omp_offloading_fd00
+// CHECK-DAG: %[[PRIV_I32_ALLOC:.*]] = alloca i32, i64 1, align 4
+// CHECK-DAG: %[[PRIV_FLOAT_ALLOC:.*]] = alloca float, i64 1, align 4
+// CHECK-DAG: %[[ADD_I32:.*]] = add i32 {{.*}}, 10
+// CHECK-DAG: store i32 %[[ADD_I32]], ptr %[[PRIV_I32_ALLOC]], align 4
+// CHECK-DAG: %[[LOAD_I32_AGAIN:.*]] = load i32, ptr %6, align 4
+// CHECK-DAG: %[[CAST_TO_FLOAT:.*]] = sitofp i32 %[[LOAD_I32_AGAIN]] to float
+// CHECK-DAG: %[[ADD_FLOAT:.*]] = fadd contract float %[[CAST_TO_FLOAT]], 1.100000e+01
+// CHECK-DAG: store float %[[ADD_FLOAT]], ptr %[[PRIV_FLOAT_ALLOC]], align 4
+
diff --git a/offload/test/offloading/fortran/target-private-map.f90 b/offload/test/offloading/fortran/target-private-map.f90
new file mode 100644
index 00000000000000..861a1886c03844
--- /dev/null
+++ b/offload/test/offloading/fortran/target-private-map.f90
@@ -0,0 +1,41 @@
+! Test target private
+! REQUIRES: flang, amdgcn-amd-amdhsa
+! UNSUPPORTED: nvptx64-nvidia-cuda
+! UNSUPPORTED: nvptx64-nvidia-cuda-LTO
+! UNSUPPORTED: aarch64-unknown-linux-gnu
+! UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
+! UNSUPPORTED: x86_64-pc-linux-gnu
+! UNSUPPORTED: x86_64-pc-linux-gnu-LTO
+
+! RUN: %libomptarget-compile-fortran-run-and-check-generic
+program main
+ integer :: a = 0
+ call target_private(a)
+ print*, "======= FORTRAN Test passed! ======="
+ print*, "foo(a) should not return 20, got " , a
+ if (a /= 20) then
+ stop 0
+ else
+ stop 1
+ end if
+
+ ! stop 0
+end program main
+subroutine target_private(r)
+ implicit none
+ integer, dimension(2) :: simple_vars
+ integer :: a, r
+ ! set a to 10
+ a = 5
+ simple_vars(1) = a
+ simple_vars(2) = a
+ !$omp target map(tofrom: simple_vars) private(a)
+ ! Without private(a), a would be firstprivate, meaning it's value would be 5
+ ! with private(a), it's value would be uninitialized, which means it'd have
+ ! a very small chance of being 5.
+ ! So, simple_vars(1|2) should be 5 + a_private
+ simple_vars(1) = simple_vars(1) + a
+ simple_vars(2) = simple_vars(2) + a
+ !$omp end target
+ r = simple_vars(1) + simple_vars(2)
+end subroutine target_private
>From 08ff51e8b852a7d08dcfbed31e0f6fb907c22b52 Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Tue, 20 Aug 2024 23:50:19 -0500
Subject: [PATCH 2/4] remove a new line added accidently
---
.../Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp | 1 -
1 file changed, 1 deletion(-)
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index af48b5379140c4..d2f90615eb8e0f 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -1481,7 +1481,6 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
counter);
clone.setSymName(cloneName);
-
return {privVar, clone};
}
}
>From 39a4db4c501ca44e0a67d88097a57ab2a2f25b99 Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Wed, 21 Aug 2024 11:54:32 -0500
Subject: [PATCH 3/4] Address review comments
---
.../Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index d2f90615eb8e0f..1ea8b80e15711d 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -3248,15 +3248,15 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
isTargetDevice || !ompBuilder->Config.TargetTriples.empty();
if (!targetOp.getPrivateVars().empty()) {
- auto privateVars = targetOp.getPrivateVars();
- auto privateSyms = targetOp.getPrivateSyms();
+ OperandRange privateVars = targetOp.getPrivateVars();
+ std::optional<mlir::ArrayAttr> privateSyms = targetOp.getPrivateSyms();
auto &firstTargetRegion = opInst.getRegion(0);
auto &firstTargetBlock = firstTargetRegion.front();
auto *regionArgsStart = firstTargetBlock.getArguments().begin();
auto *privArgsStart = regionArgsStart + targetOp.getMapVars().size();
auto *privArgsEnd = privArgsStart + targetOp.getPrivateVars().size();
BitVector blockArgsBV(firstTargetBlock.getNumArguments(), false);
- struct omp::PrivateClauseOps newPrivateClauses;
+ omp::PrivateClauseOps newPrivateClauses;
MutableArrayRef argSubRangePrivates(privArgsStart, privArgsEnd);
for (auto [privVar, privatizerNameAttr, blockArg] :
llvm::zip_equal(privateVars, *privateSyms, argSubRangePrivates)) {
@@ -3330,9 +3330,9 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
// store %v, %1 // %priv_arg0 replaced with the yield value
// omp.terminator
// }
- Block &firstBlock = firstTargetRegion.front();
- Block *newBlock = rewriter.splitBlock(&firstBlock, firstBlock.begin());
- rewriter.setInsertionPointToStart(&firstBlock);
+ Block *newBlock =
+ rewriter.splitBlock(&firstTargetBlock, firstTargetBlock.begin());
+ rewriter.setInsertionPointToStart(&firstTargetBlock);
Location loc = targetOp.getLoc();
rewriter.create<LLVM::BrOp>(loc, ValueRange(), newBlock);
@@ -3347,7 +3347,7 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
std::next(secondBlockIter, (allocRegNumBlocks - 1));
Block &clonedAllocRegEndBlock = *clonedAllocRegionEndIter;
- Operation *br = firstBlock.getTerminator();
+ Operation *br = firstTargetBlock.getTerminator();
LLVM::BrOp brOp = dyn_cast<LLVM::BrOp>(br);
Operation *yield = clonedAllocRegEndBlock.getTerminator();
omp::YieldOp yieldOp = dyn_cast<omp::YieldOp>(yield);
>From 54ff624ddf69d3d107a4ba6cc29f49cd4a59d20f Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Mon, 26 Aug 2024 07:40:30 -0500
Subject: [PATCH 4/4] Address review comments
---
.../OpenMP/OpenMPToLLVMIRTranslation.cpp | 44 +++++++++----------
.../Target/LLVMIR/openmp-target-private.mlir | 7 ++-
.../offloading/fortran/target-private-map.f90 | 2 +-
3 files changed, 26 insertions(+), 27 deletions(-)
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 1ea8b80e15711d..29ab6f987906fc 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -3250,8 +3250,8 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
if (!targetOp.getPrivateVars().empty()) {
OperandRange privateVars = targetOp.getPrivateVars();
std::optional<mlir::ArrayAttr> privateSyms = targetOp.getPrivateSyms();
- auto &firstTargetRegion = opInst.getRegion(0);
- auto &firstTargetBlock = firstTargetRegion.front();
+ auto &targetRegion = opInst.getRegion(0);
+ auto &firstTargetBlock = targetRegion.front();
auto *regionArgsStart = firstTargetBlock.getArguments().begin();
auto *privArgsStart = regionArgsStart + targetOp.getMapVars().size();
auto *privArgsEnd = privArgsStart + targetOp.getPrivateVars().size();
@@ -3263,7 +3263,7 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
SymbolRefAttr privSym = llvm::cast<SymbolRefAttr>(privatizerNameAttr);
- // 1. Clone the privatizer so that we can make changes to it freely
+ // 1. Look up the privatizer in the symbol table
MLIRContext &context = moduleTranslation.getContext();
mlir::IRRewriter rewriter(&context);
omp::PrivateClauseOp privatizer =
@@ -3297,9 +3297,9 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
// omp.terminator
// }
// Roughly, we do the following -
- // Split the first block (bb0) of the first region of the target into
- // two block. Then clone the alloc region of the privatizer between the
- // two new blocks. When cloning replace the alloc argument with privVar.
+ // Split the first block (bb0) of the target region into two blocks.
+ // Then, clone the alloc region of the privatizer between the two
+ // new blocks. When cloning replace the alloc argument with privVar.
// We'll then have
//
// omp.target map(..) map(..) private(privVar) {
@@ -3315,8 +3315,9 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
// }
// Next, add an unconditional branch from ^bb0 to ^bb1 and change the
// yield in the last block of the cloned alloc region to an unconditional
- // branch before replacing all uses of 'priv_arg0' with the yielded value
- // to finally get the following
+ // branch to the other split block before replacing all uses of
+ // 'priv_arg0' with the yielded value to finally get the following code
+ // structure
//
// omp.target map(..) map(..) private(privVar) {
// ^bb0(map_arg0, ..., map_argn, priv_arg0, ..., priv_argn):
@@ -3338,29 +3339,28 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
IRMapping cloneMap;
cloneMap.map(allocRegionArg, privVar);
- auto secondBlockIter = std::next(firstTargetRegion.begin(), 1);
- allocRegion.cloneInto(&firstTargetRegion, secondBlockIter, cloneMap);
+ auto newBlockIter = std::next(targetRegion.begin(), 1);
+ allocRegion.cloneInto(&targetRegion, newBlockIter, cloneMap);
unsigned allocRegNumBlocks = allocRegion.getBlocks().size();
- secondBlockIter = std::next(firstTargetRegion.begin(), 1);
- auto clonedAllocRegionEndIter =
- std::next(secondBlockIter, (allocRegNumBlocks - 1));
- Block &clonedAllocRegEndBlock = *clonedAllocRegionEndIter;
-
- Operation *br = firstTargetBlock.getTerminator();
- LLVM::BrOp brOp = dyn_cast<LLVM::BrOp>(br);
- Operation *yield = clonedAllocRegEndBlock.getTerminator();
- omp::YieldOp yieldOp = dyn_cast<omp::YieldOp>(yield);
+ newBlockIter = std::next(targetRegion.begin(), 1);
+ auto clonedAllocRegionBackIter =
+ std::next(newBlockIter, (allocRegNumBlocks - 1));
+ Block &clonedAllocRegionLastBlock = *clonedAllocRegionBackIter;
+
+ LLVM::BrOp brOp = dyn_cast<LLVM::BrOp>(firstTargetBlock.getTerminator());
+ omp::YieldOp yieldOp =
+ dyn_cast<omp::YieldOp>(clonedAllocRegionLastBlock.getTerminator());
auto newValue = yieldOp.getResults().front();
- rewriter.setInsertionPointAfter(yield);
+ rewriter.setInsertionPointAfter(yieldOp);
auto *oldSucc = brOp.getSuccessor();
- brOp.setSuccessor(&*secondBlockIter);
+ brOp.setSuccessor(&*newBlockIter);
// TODO:Consider cloning brOp and adding it to clonedAllocRegEngBlock
// TODO: Can we not simply merge clonedAllocRegEngBlock and oldSucc?
rewriter.create<LLVM::BrOp>(loc, ValueRange(), oldSucc);
blockArgsBV.set(blockArg.getArgNumber());
rewriter.replaceAllUsesWith(blockArg, newValue);
- rewriter.eraseOp(yield);
+ rewriter.eraseOp(yieldOp);
}
// Do some fix ups now.
// First, remove the blockArguments that we just privatized
diff --git a/mlir/test/Target/LLVMIR/openmp-target-private.mlir b/mlir/test/Target/LLVMIR/openmp-target-private.mlir
index 38c93b3f0568f8..97da4c832ca8a8 100644
--- a/mlir/test/Target/LLVMIR/openmp-target-private.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-target-private.mlir
@@ -25,6 +25,7 @@ llvm.func @target_map_single_private() attributes {fir.internal_name = "_QPtarge
llvm.return
}
// CHECK: define internal void @__omp_offloading_fd00
+// CHECK-NOT: define {{.*}}
// CHECK-DAG: %[[PRIV_ALLOC:.*]] = alloca i32, i64 1, align 4
// CHECK-DAG: %[[ADD:.*]] = add i32 {{.*}}, 10
// CHECK-DAG: store i32 %[[ADD]], ptr %[[PRIV_ALLOC]], align 4
@@ -38,10 +39,8 @@ omp.private {type = private} @n.privatizer : !llvm.ptr alloc {
llvm.func @target_map_2_privates() attributes {fir.internal_name = "_QPtarget_map_2_privates"} {
%0 = llvm.mlir.constant(1 : i64) : i64
%1 = llvm.alloca %0 x i32 {bindc_name = "simple_var"} : (i64) -> !llvm.ptr
- %2 = llvm.mlir.constant(1 : i64) : i64
- %3 = llvm.alloca %2 x f32 {bindc_name = "n"} : (i64) -> !llvm.ptr
- %4 = llvm.mlir.constant(1 : i64) : i64
- %5 = llvm.alloca %4 x i32 {bindc_name = "a"} : (i64) -> !llvm.ptr
+ %3 = llvm.alloca %0 x f32 {bindc_name = "n"} : (i64) -> !llvm.ptr
+ %5 = llvm.alloca %0 x i32 {bindc_name = "a"} : (i64) -> !llvm.ptr
%6 = llvm.mlir.constant(2 : i32) : i32
llvm.store %6, %5 : i32, !llvm.ptr
%7 = omp.map.info var_ptr(%5 : !llvm.ptr, i32) map_clauses(to) capture(ByRef) -> !llvm.ptr {name = "a"}
diff --git a/offload/test/offloading/fortran/target-private-map.f90 b/offload/test/offloading/fortran/target-private-map.f90
index 861a1886c03844..dfe29f33aae9d0 100644
--- a/offload/test/offloading/fortran/target-private-map.f90
+++ b/offload/test/offloading/fortran/target-private-map.f90
@@ -25,7 +25,7 @@ subroutine target_private(r)
implicit none
integer, dimension(2) :: simple_vars
integer :: a, r
- ! set a to 10
+ ! set a to 5
a = 5
simple_vars(1) = a
simple_vars(2) = a
More information about the Mlir-commits
mailing list