[llvm] [mlir] [mlir][OpenMP] - Implement lowering from MLIR to LLVMIR for `private` clause on `target` constructs (PR #105471)
Pranav Bhandarkar via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 20 21:49:29 PDT 2024
https://github.com/bhandarkar-pranav created https://github.com/llvm/llvm-project/pull/105471
This patch contains support to lower the `private` clause when used while offloading. We implement this by inlining the `alloc` region of `omp.private` at the start of the `target` region and adjusting the uses so that the values yielded from the `alloc` region are used in the `target` region.
At this point, we do not handle the following (the first of the two below is being worked on)
- [ ] allocatables because we do not handle `dealloc` regions in this patch.
- [ ] firstprivate because there isn't support to lower firstprivate to mlir yet.
>From 9f28204b6ab27f32105ed79a4300a116966557dc Mon Sep 17 00:00:00 2001
From: Pranav Bhandarkar <pranav.bhandarkar at amd.com>
Date: Wed, 14 Aug 2024 11:25:31 -0500
Subject: [PATCH] [mlir][OpenMP] - Implement lowering from MLIR to LLVMIR for
`private` clause on `target` constructs
This patch contains support to lower the `private` clause when used while offloading.
We implement this by inlining the `alloc` region of `omp.private` at the start
of the `target` region and adjusting the uses so that the values yielded from the `alloc`
region are used in the `target` region.
At this point, we do not handle the following (the first of the two below is being worked on)
- allocatables because we do not handle `dealloc` regions in this patch.
- firstprivate because there isn't support to lower firstprivate to mlir yet.
---
mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 7 +-
.../OpenMP/OpenMPToLLVMIRTranslation.cpp | 132 +++++++++++++++++-
mlir/test/Dialect/OpenMP/invalid.mlir | 2 +-
.../Target/LLVMIR/openmp-target-private.mlir | 73 ++++++++++
.../offloading/fortran/target-private-map.f90 | 41 ++++++
5 files changed, 252 insertions(+), 3 deletions(-)
create mode 100644 mlir/test/Target/LLVMIR/openmp-target-private.mlir
create mode 100644 offload/test/offloading/fortran/target-private-map.f90
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index 11780f84697b15..31d97313aedb7a 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -2494,7 +2494,12 @@ LogicalResult PrivateClauseOp::verify() {
<< "Did not expect any values to be yielded.";
}
- if (yieldedTypes.size() == 1 && yieldedTypes.front() == symType)
+ if (yieldedTypes.size() != 1)
+ return mlir::emitError(terminator->getLoc())
+ << "Expected exactly 1 yielded value, but got "
+ << yieldedTypes.size();
+
+ if (yieldedTypes.front() == symType)
return success();
auto error = mlir::emitError(yieldOp.getLoc())
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 458d05d5059db7..af48b5379140c4 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -13,6 +13,7 @@
#include "mlir/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.h"
#include "mlir/Analysis/TopologicalSortUtils.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/Dialect/OpenMP/OpenMPClauseOperands.h"
#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
#include "mlir/Dialect/OpenMP/OpenMPInterfaces.h"
#include "mlir/IR/IRMapping.h"
@@ -1480,6 +1481,7 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
counter);
clone.setSymName(cloneName);
+
return {privVar, clone};
}
}
@@ -3241,12 +3243,140 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
DataLayout dl = DataLayout(opInst.getParentOfType<ModuleOp>());
SmallVector<Value> mapVars = targetOp.getMapVars();
llvm::Function *llvmOutlinedFn = nullptr;
-
// TODO: It can also be false if a compile-time constant `false` IF clause is
// specified.
bool isOffloadEntry =
isTargetDevice || !ompBuilder->Config.TargetTriples.empty();
+ if (!targetOp.getPrivateVars().empty()) {
+ auto privateVars = targetOp.getPrivateVars();
+ auto privateSyms = targetOp.getPrivateSyms();
+ auto &firstTargetRegion = opInst.getRegion(0);
+ auto &firstTargetBlock = firstTargetRegion.front();
+ auto *regionArgsStart = firstTargetBlock.getArguments().begin();
+ auto *privArgsStart = regionArgsStart + targetOp.getMapVars().size();
+ auto *privArgsEnd = privArgsStart + targetOp.getPrivateVars().size();
+ BitVector blockArgsBV(firstTargetBlock.getNumArguments(), false);
+ struct omp::PrivateClauseOps newPrivateClauses;
+ MutableArrayRef argSubRangePrivates(privArgsStart, privArgsEnd);
+ for (auto [privVar, privatizerNameAttr, blockArg] :
+ llvm::zip_equal(privateVars, *privateSyms, argSubRangePrivates)) {
+
+ SymbolRefAttr privSym = llvm::cast<SymbolRefAttr>(privatizerNameAttr);
+
+ // 1. Clone the privatizer so that we can make changes to it freely
+ MLIRContext &context = moduleTranslation.getContext();
+ mlir::IRRewriter rewriter(&context);
+ omp::PrivateClauseOp privatizer =
+ SymbolTable::lookupNearestSymbolFrom<omp::PrivateClauseOp>(&opInst,
+ privSym);
+ // Handle only private for now. Also, not handling allocatables yet.
+ if (privatizer.getDataSharingType() !=
+ omp::DataSharingClauseType::Private ||
+ !privatizer.getDeallocRegion().empty()) {
+ newPrivateClauses.privateSyms.push_back(privSym);
+ newPrivateClauses.privateVars.push_back(privVar);
+ continue;
+ }
+
+ auto &allocRegion = privatizer.getAllocRegion();
+ // `alloc` region should have only 1 argument
+ assert(allocRegion.getNumArguments() == 1 &&
+ "alloc region of omp::PrivateClauseOp should have one argument");
+ auto allocRegionArg = allocRegion.getArgument(0);
+ // Assuming we have the following targetOp and Privatizer
+ //
+ // omp.private {type = private} @x.privatizer : !llvm.ptr alloc {
+ // %1 = alloca...
+ // omp.yield(%1)
+ // }
+ // omp.target map(..) map(..) private(privVar) {
+ // ^bb0(map_arg0, ..., map_argn, priv_arg0):
+ // ...
+ // ...
+ // store %v, %priv_arg0
+ // omp.terminator
+ // }
+ // Roughly, we do the following -
+ // Split the first block (bb0) of the first region of the target into
+ // two block. Then clone the alloc region of the privatizer between the
+ // two new blocks. When cloning replace the alloc argument with privVar.
+ // We'll then have
+ //
+ // omp.target map(..) map(..) private(privVar) {
+ // ^bb0(map_arg0, ..., map_argn, priv_arg0, ..., priv_argn):
+ // ^bb1: (cloned region) // no predecessor
+ // %1 = alloca...
+ // omp.yield(%1)
+ // ^bb2: // no predecessor
+ // ...
+ // ...
+ // store %v, %priv_arg0
+ // omp.terminator
+ // }
+ // Next, add an unconditional branch from ^bb0 to ^bb1 and change the
+ // yield in the last block of the cloned alloc region to an unconditional
+ // branch before replacing all uses of 'priv_arg0' with the yielded value
+ // to finally get the following
+ //
+ // omp.target map(..) map(..) private(privVar) {
+ // ^bb0(map_arg0, ..., map_argn, priv_arg0, ..., priv_argn):
+ // llvm.br ^bb1
+ // ^bb1: (cloned region) // pred: ^bb0
+ // %1 = alloca...
+ // llvm.br ^bb2
+ // ^bb2: // pred: ^bb1
+ // ...
+ // ...
+ // store %v, %1 // %priv_arg0 replaced with the yield value
+ // omp.terminator
+ // }
+ Block &firstBlock = firstTargetRegion.front();
+ Block *newBlock = rewriter.splitBlock(&firstBlock, firstBlock.begin());
+ rewriter.setInsertionPointToStart(&firstBlock);
+ Location loc = targetOp.getLoc();
+ rewriter.create<LLVM::BrOp>(loc, ValueRange(), newBlock);
+
+ IRMapping cloneMap;
+ cloneMap.map(allocRegionArg, privVar);
+ auto secondBlockIter = std::next(firstTargetRegion.begin(), 1);
+ allocRegion.cloneInto(&firstTargetRegion, secondBlockIter, cloneMap);
+
+ unsigned allocRegNumBlocks = allocRegion.getBlocks().size();
+ secondBlockIter = std::next(firstTargetRegion.begin(), 1);
+ auto clonedAllocRegionEndIter =
+ std::next(secondBlockIter, (allocRegNumBlocks - 1));
+ Block &clonedAllocRegEndBlock = *clonedAllocRegionEndIter;
+
+ Operation *br = firstBlock.getTerminator();
+ LLVM::BrOp brOp = dyn_cast<LLVM::BrOp>(br);
+ Operation *yield = clonedAllocRegEndBlock.getTerminator();
+ omp::YieldOp yieldOp = dyn_cast<omp::YieldOp>(yield);
+ auto newValue = yieldOp.getResults().front();
+ rewriter.setInsertionPointAfter(yield);
+ auto *oldSucc = brOp.getSuccessor();
+ brOp.setSuccessor(&*secondBlockIter);
+ // TODO:Consider cloning brOp and adding it to clonedAllocRegEngBlock
+ // TODO: Can we not simply merge clonedAllocRegEngBlock and oldSucc?
+ rewriter.create<LLVM::BrOp>(loc, ValueRange(), oldSucc);
+ blockArgsBV.set(blockArg.getArgNumber());
+ rewriter.replaceAllUsesWith(blockArg, newValue);
+ rewriter.eraseOp(yield);
+ }
+ // Do some fix ups now.
+ // First, remove the blockArguments that we just privatized
+ firstTargetBlock.eraseArguments(blockArgsBV);
+ // Then remove the private vars and privatizers that we have
+ // processed i.e privatized just now.
+ if (newPrivateClauses.privateSyms.empty()) {
+ targetOp.getPrivateVarsMutable().clear();
+ targetOp.removePrivateSymsAttr();
+ } else {
+ targetOp.setPrivateSymsAttr(mlir::ArrayAttr::get(
+ targetOp.getContext(), newPrivateClauses.privateSyms));
+ targetOp.getPrivateVarsMutable().assign(newPrivateClauses.privateVars);
+ }
+ }
LogicalResult bodyGenStatus = success();
using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
auto bodyCB = [&](InsertPointTy allocaIP,
diff --git a/mlir/test/Dialect/OpenMP/invalid.mlir b/mlir/test/Dialect/OpenMP/invalid.mlir
index 1d1d93f0977588..e3267c520490a2 100644
--- a/mlir/test/Dialect/OpenMP/invalid.mlir
+++ b/mlir/test/Dialect/OpenMP/invalid.mlir
@@ -2219,7 +2219,7 @@ omp.private {type = private} @x.privatizer : i32 alloc {
omp.private {type = private} @x.privatizer : i32 alloc {
^bb0(%arg0: i32):
- // expected-error @below {{Invalid yielded value. Expected type: 'i32', got: None}}
+ // expected-error @below {{Expected exactly 1 yielded value, but got 0}}
omp.yield
}
diff --git a/mlir/test/Target/LLVMIR/openmp-target-private.mlir b/mlir/test/Target/LLVMIR/openmp-target-private.mlir
new file mode 100644
index 00000000000000..38c93b3f0568f8
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/openmp-target-private.mlir
@@ -0,0 +1,73 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+omp.private {type = private} @simple_var.privatizer : !llvm.ptr alloc {
+^bb0(%arg0: !llvm.ptr):
+ %0 = llvm.mlir.constant(1 : i64) : i64
+ %1 = llvm.alloca %0 x i32 {bindc_name = "simple_var", pinned} : (i64) -> !llvm.ptr
+ omp.yield(%1 : !llvm.ptr)
+}
+llvm.func @target_map_single_private() attributes {fir.internal_name = "_QPtarget_map_single_private"} {
+ %0 = llvm.mlir.constant(1 : i64) : i64
+ %1 = llvm.alloca %0 x i32 {bindc_name = "simple_var"} : (i64) -> !llvm.ptr
+ %2 = llvm.mlir.constant(1 : i64) : i64
+ %3 = llvm.alloca %2 x i32 {bindc_name = "a"} : (i64) -> !llvm.ptr
+ %4 = llvm.mlir.constant(2 : i32) : i32
+ llvm.store %4, %3 : i32, !llvm.ptr
+ %5 = omp.map.info var_ptr(%3 : !llvm.ptr, i32) map_clauses(to) capture(ByRef) -> !llvm.ptr {name = "a"}
+ omp.target map_entries(%5 -> %arg0 : !llvm.ptr) private(@simple_var.privatizer %1 -> %arg1 : !llvm.ptr) {
+ ^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
+ %6 = llvm.mlir.constant(10 : i32) : i32
+ %7 = llvm.load %arg0 : !llvm.ptr -> i32
+ %8 = llvm.add %7, %6 : i32
+ llvm.store %8, %arg1 : i32, !llvm.ptr
+ omp.terminator
+ }
+ llvm.return
+}
+// CHECK: define internal void @__omp_offloading_fd00
+// CHECK-DAG: %[[PRIV_ALLOC:.*]] = alloca i32, i64 1, align 4
+// CHECK-DAG: %[[ADD:.*]] = add i32 {{.*}}, 10
+// CHECK-DAG: store i32 %[[ADD]], ptr %[[PRIV_ALLOC]], align 4
+
+omp.private {type = private} @n.privatizer : !llvm.ptr alloc {
+^bb0(%arg0: !llvm.ptr):
+ %0 = llvm.mlir.constant(1 : i64) : i64
+ %1 = llvm.alloca %0 x f32 {bindc_name = "n", pinned} : (i64) -> !llvm.ptr
+ omp.yield(%1 : !llvm.ptr)
+}
+llvm.func @target_map_2_privates() attributes {fir.internal_name = "_QPtarget_map_2_privates"} {
+ %0 = llvm.mlir.constant(1 : i64) : i64
+ %1 = llvm.alloca %0 x i32 {bindc_name = "simple_var"} : (i64) -> !llvm.ptr
+ %2 = llvm.mlir.constant(1 : i64) : i64
+ %3 = llvm.alloca %2 x f32 {bindc_name = "n"} : (i64) -> !llvm.ptr
+ %4 = llvm.mlir.constant(1 : i64) : i64
+ %5 = llvm.alloca %4 x i32 {bindc_name = "a"} : (i64) -> !llvm.ptr
+ %6 = llvm.mlir.constant(2 : i32) : i32
+ llvm.store %6, %5 : i32, !llvm.ptr
+ %7 = omp.map.info var_ptr(%5 : !llvm.ptr, i32) map_clauses(to) capture(ByRef) -> !llvm.ptr {name = "a"}
+ omp.target map_entries(%7 -> %arg0 : !llvm.ptr) private(@simple_var.privatizer %1 -> %arg1 : !llvm.ptr, @n.privatizer %3 -> %arg2 : !llvm.ptr) {
+ ^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: !llvm.ptr):
+ %8 = llvm.mlir.constant(1.100000e+01 : f32) : f32
+ %9 = llvm.mlir.constant(10 : i32) : i32
+ %10 = llvm.load %arg0 : !llvm.ptr -> i32
+ %11 = llvm.add %10, %9 : i32
+ llvm.store %11, %arg1 : i32, !llvm.ptr
+ %12 = llvm.load %arg1 : !llvm.ptr -> i32
+ %13 = llvm.sitofp %12 : i32 to f32
+ %14 = llvm.fadd %13, %8 {fastmathFlags = #llvm.fastmath<contract>} : f32
+ llvm.store %14, %arg2 : f32, !llvm.ptr
+ omp.terminator
+ }
+ llvm.return
+}
+
+// CHECK: define internal void @__omp_offloading_fd00
+// CHECK-DAG: %[[PRIV_I32_ALLOC:.*]] = alloca i32, i64 1, align 4
+// CHECK-DAG: %[[PRIV_FLOAT_ALLOC:.*]] = alloca float, i64 1, align 4
+// CHECK-DAG: %[[ADD_I32:.*]] = add i32 {{.*}}, 10
+// CHECK-DAG: store i32 %[[ADD_I32]], ptr %[[PRIV_I32_ALLOC]], align 4
+// CHECK-DAG: %[[LOAD_I32_AGAIN:.*]] = load i32, ptr %6, align 4
+// CHECK-DAG: %[[CAST_TO_FLOAT:.*]] = sitofp i32 %[[LOAD_I32_AGAIN]] to float
+// CHECK-DAG: %[[ADD_FLOAT:.*]] = fadd contract float %[[CAST_TO_FLOAT]], 1.100000e+01
+// CHECK-DAG: store float %[[ADD_FLOAT]], ptr %[[PRIV_FLOAT_ALLOC]], align 4
+
diff --git a/offload/test/offloading/fortran/target-private-map.f90 b/offload/test/offloading/fortran/target-private-map.f90
new file mode 100644
index 00000000000000..861a1886c03844
--- /dev/null
+++ b/offload/test/offloading/fortran/target-private-map.f90
@@ -0,0 +1,41 @@
+! Test target private
+! REQUIRES: flang, amdgcn-amd-amdhsa
+! UNSUPPORTED: nvptx64-nvidia-cuda
+! UNSUPPORTED: nvptx64-nvidia-cuda-LTO
+! UNSUPPORTED: aarch64-unknown-linux-gnu
+! UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
+! UNSUPPORTED: x86_64-pc-linux-gnu
+! UNSUPPORTED: x86_64-pc-linux-gnu-LTO
+
+! RUN: %libomptarget-compile-fortran-run-and-check-generic
+program main
+ integer :: a = 0
+ call target_private(a)
+ print*, "======= FORTRAN Test passed! ======="
+ print*, "foo(a) should not return 20, got " , a
+ if (a /= 20) then
+ stop 0
+ else
+ stop 1
+ end if
+
+ ! stop 0
+end program main
+subroutine target_private(r)
+ implicit none
+ integer, dimension(2) :: simple_vars
+ integer :: a, r
+ ! set a to 10
+ a = 5
+ simple_vars(1) = a
+ simple_vars(2) = a
+ !$omp target map(tofrom: simple_vars) private(a)
+ ! Without private(a), a would be firstprivate, meaning it's value would be 5
+ ! with private(a), it's value would be uninitialized, which means it'd have
+ ! a very small chance of being 5.
+ ! So, simple_vars(1|2) should be 5 + a_private
+ simple_vars(1) = simple_vars(1) + a
+ simple_vars(2) = simple_vars(2) + a
+ !$omp end target
+ r = simple_vars(1) + simple_vars(2)
+end subroutine target_private
More information about the llvm-commits
mailing list