[llvm-branch-commits] [flang] [flang] Lower omp.workshare to other omp constructs (PR #101446)
Tom Eccles via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Aug 5 02:26:04 PDT 2024
================
@@ -0,0 +1,259 @@
+//===- LowerWorkshare.cpp - special cases for bufferization -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// Lower omp workshare construct.
+//===----------------------------------------------------------------------===//
+
+#include "flang/Optimizer/Dialect/FIROps.h"
+#include "flang/Optimizer/Dialect/FIRType.h"
+#include "flang/Optimizer/OpenMP/Passes.h"
+#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
+#include "mlir/IR/BuiltinOps.h"
+#include "mlir/IR/IRMapping.h"
+#include "mlir/IR/OpDefinition.h"
+#include "mlir/IR/PatternMatch.h"
+#include "mlir/Support/LLVM.h"
+#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/iterator_range.h"
+
+#include <variant>
+
+namespace flangomp {
+#define GEN_PASS_DEF_LOWERWORKSHARE
+#include "flang/Optimizer/OpenMP/Passes.h.inc"
+} // namespace flangomp
+
+#define DEBUG_TYPE "lower-workshare"
+
+using namespace mlir;
+
+namespace flangomp {
+bool shouldUseWorkshareLowering(Operation *op) {
+ auto workshare = dyn_cast<omp::WorkshareOp>(op->getParentOp());
+ if (!workshare)
+ return false;
+ return workshare->getParentOfType<omp::ParallelOp>();
+}
+} // namespace flangomp
+
+namespace {
+
+struct SingleRegion {
+ Block::iterator begin, end;
+};
+
+static bool isSupportedByFirAlloca(Type ty) {
+ return !isa<fir::ReferenceType>(ty);
+}
+
+static bool isSafeToParallelize(Operation *op) {
+ if (isa<fir::DeclareOp>(op))
+ return true;
+
+ llvm::SmallVector<MemoryEffects::EffectInstance> effects;
+ MemoryEffectOpInterface interface = dyn_cast<MemoryEffectOpInterface>(op);
+ if (!interface) {
+ return false;
+ }
+ interface.getEffects(effects);
+ if (effects.empty())
+ return true;
+
+ return false;
+}
+
+/// Lowers workshare to a sequence of single-thread regions and parallel loops
+///
+/// For example:
+///
+/// omp.workshare {
+/// %a = fir.allocmem
+/// omp.wsloop {}
+/// fir.call Assign %b %a
+/// fir.freemem %a
+/// }
+///
+/// becomes
+///
+/// omp.single {
+/// %a = fir.allocmem
+/// fir.store %a %tmp
+/// }
+/// %a_reloaded = fir.load %tmp
+/// omp.wsloop {}
+/// omp.single {
+/// fir.call Assign %b %a_reloaded
+/// fir.freemem %a_reloaded
+/// }
+///
+/// Note that we allocate temporary memory for values in omp.single's which need
+/// to be accessed in all threads in the closest omp.parallel
+///
+/// TODO currently we need to be able to access the encompassing omp.parallel so
+/// that we can allocate temporaries accessible by all threads outside of it.
+/// In case we do not find it, we fall back to converting the omp.workshare to
+/// omp.single.
+/// To better handle this we should probably enable yielding values out of an
+/// omp.single which will be supported by the omp runtime.
+void lowerWorkshare(mlir::omp::WorkshareOp wsOp) {
+ assert(wsOp.getRegion().getBlocks().size() == 1);
+
+ Location loc = wsOp->getLoc();
+
+ omp::ParallelOp parallelOp = wsOp->getParentOfType<omp::ParallelOp>();
+ if (!parallelOp) {
+ wsOp.emitWarning("cannot handle workshare, converting to single");
+ Operation *terminator = wsOp.getRegion().front().getTerminator();
+ wsOp->getBlock()->getOperations().splice(
+ wsOp->getIterator(), wsOp.getRegion().front().getOperations());
+ terminator->erase();
+ return;
+ }
+
+ OpBuilder allocBuilder(parallelOp);
+ OpBuilder rootBuilder(wsOp);
+ IRMapping rootMapping;
+
+ omp::SingleOp singleOp = nullptr;
+
+ auto mapReloadedValue = [&](Value v, OpBuilder singleBuilder,
+ IRMapping singleMapping) {
+ if (auto reloaded = rootMapping.lookupOrNull(v))
+ return;
+ Type llvmPtrTy = LLVM::LLVMPointerType::get(allocBuilder.getContext());
+ Type ty = v.getType();
+ Value alloc, reloaded;
+ if (isSupportedByFirAlloca(ty)) {
+ alloc = allocBuilder.create<fir::AllocaOp>(loc, ty);
+ singleBuilder.create<fir::StoreOp>(loc, singleMapping.lookup(v), alloc);
----------------
tblah wrote:
Ahh you are correct for the allocatable array case. If the fir.freemem or runtime destroy call is in a single region this will be fine.
https://github.com/llvm/llvm-project/pull/101446
More information about the llvm-branch-commits
mailing list