[llvm-branch-commits] [flang] [Flang][OMP] Replace SUM intrinsic call with SUM operations (PR #113082)
Thirumalai Shaktivel via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Sun Oct 20 04:01:34 PDT 2024
https://github.com/Thirumalai-Shaktivel created https://github.com/llvm/llvm-project/pull/113082
Continuation from https://github.com/llvm/llvm-project/pull/104748
> From Documentation:
\
Evaluation of transformational array intrinsic functions may be freely
subdivided into any number of units of work.
The transformational array intrinsic functions are MATMUL, DOT_PRODUCT, SUM,
PRODUCT, MAXVAL, MINVAL, COUNT, ANY, ALL, SPREAD, PACK, UNPACK, RESHAPE,
TRANSPOSE, EOSHIFT, CSHIFT, MINLOC, and MAXLOC.
Using Ivan's patch: https://github.com/llvm/llvm-project/pull/104748 the intrinsic calls were enclosed within the single construct.
So, I decided to pick one and experiment to enclose them into a wsloop node
which helps us to share the operations among different threads.
Here, I picked SUM intrinsic and fixed it to generate wsloop and its operations
as a body. See: for the generated MLIR.
The same idea has to be performed to implement all the intrinsics mentioned in
the documentation.
>From 3a48a370d212f50f86eb017afeb2f4d8dbf9bd3e Mon Sep 17 00:00:00 2001
From: Thirumalai-Shaktivel <thirumalaishaktivel at gmail.com>
Date: Thu, 3 Oct 2024 10:08:41 +0000
Subject: [PATCH 1/4] [CMake] Fix build failure
---
flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt | 1 +
1 file changed, 1 insertion(+)
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt b/flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt
index fa3a59303137ff..43da1eb92d0306 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt
+++ b/flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt
@@ -26,6 +26,7 @@ add_flang_library(HLFIRTransforms
FIRTransforms
HLFIRDialect
MLIRIR
+ FlangOpenMPTransforms
${dialect_libs}
LINK_COMPONENTS
>From 52b71419e758a1123e429c4df259a1aff6523b0c Mon Sep 17 00:00:00 2001
From: Thirumalai-Shaktivel <thirumalaishaktivel at gmail.com>
Date: Sun, 20 Oct 2024 09:16:18 +0000
Subject: [PATCH 2/4] [Flang][Pass] Move LowerWorkshare pass before
LowerHLFIRIntrinsics pass
---
flang/lib/Optimizer/Passes/Pipelines.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/flang/lib/Optimizer/Passes/Pipelines.cpp b/flang/lib/Optimizer/Passes/Pipelines.cpp
index c1a5902b747887..3a9166f2e0aa52 100644
--- a/flang/lib/Optimizer/Passes/Pipelines.cpp
+++ b/flang/lib/Optimizer/Passes/Pipelines.cpp
@@ -227,11 +227,11 @@ void createHLFIRToFIRPassPipeline(mlir::PassManager &pm, bool enableOpenMP,
hlfir::createOptimizedBufferization);
}
pm.addPass(hlfir::createLowerHLFIROrderedAssignments());
+ if (enableOpenMP)
+ pm.addPass(flangomp::createLowerWorkshare());
pm.addPass(hlfir::createLowerHLFIRIntrinsics());
pm.addPass(hlfir::createBufferizeHLFIR());
pm.addPass(hlfir::createConvertHLFIRtoFIR());
- if (enableOpenMP)
- pm.addPass(flangomp::createLowerWorkshare());
}
/// Create a pass pipeline for handling certain OpenMP transformations needed
>From 2236526ce084e30039a8ff74251ea796868ae712 Mon Sep 17 00:00:00 2001
From: Thirumalai-Shaktivel <thirumalaishaktivel at gmail.com>
Date: Sun, 20 Oct 2024 09:21:50 +0000
Subject: [PATCH 3/4] [NFC][Flang][Pass] Add SUM intrinsic operations inside
the workshare construct
---
flang/lib/Optimizer/OpenMP/CMakeLists.txt | 1 +
flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp | 151 ++++++++++++++----
2 files changed, 117 insertions(+), 35 deletions(-)
diff --git a/flang/lib/Optimizer/OpenMP/CMakeLists.txt b/flang/lib/Optimizer/OpenMP/CMakeLists.txt
index 39e92d388288d4..776798d7239117 100644
--- a/flang/lib/Optimizer/OpenMP/CMakeLists.txt
+++ b/flang/lib/Optimizer/OpenMP/CMakeLists.txt
@@ -21,6 +21,7 @@ add_flang_library(FlangOpenMPTransforms
FortranCommon
MLIRFuncDialect
MLIROpenMPDialect
+ MLIRArithDialect
HLFIRDialect
MLIRIR
MLIRPass
diff --git a/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp b/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp
index 225c585a02d913..9fb84f3e099c4c 100644
--- a/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp
+++ b/flang/lib/Optimizer/OpenMP/LowerWorkshare.cpp
@@ -16,6 +16,7 @@
//
//===----------------------------------------------------------------------===//
+#include "flang/Optimizer/Builder/HLFIRTools.h"
#include <flang/Optimizer/Builder/FIRBuilder.h>
#include <flang/Optimizer/Dialect/FIROps.h>
#include <flang/Optimizer/Dialect/FIRType.h>
@@ -335,49 +336,129 @@ static void parallelizeRegion(Region &sourceRegion, Region &targetRegion,
for (auto [i, opOrSingle] : llvm::enumerate(regions)) {
bool isLast = i + 1 == regions.size();
if (std::holds_alternative<SingleRegion>(opOrSingle)) {
- OpBuilder singleBuilder(sourceRegion.getContext());
- Block *singleBlock = new Block();
- singleBuilder.setInsertionPointToStart(singleBlock);
-
OpBuilder allocaBuilder(sourceRegion.getContext());
Block *allocaBlock = new Block();
allocaBuilder.setInsertionPointToStart(allocaBlock);
- OpBuilder parallelBuilder(sourceRegion.getContext());
- Block *parallelBlock = new Block();
- parallelBuilder.setInsertionPointToStart(parallelBlock);
-
- auto [allParallelized, copyprivateVars] =
- moveToSingle(std::get<SingleRegion>(opOrSingle), allocaBuilder,
- singleBuilder, parallelBuilder);
- if (allParallelized) {
- // The single region was not required as all operations were safe to
- // parallelize
- assert(copyprivateVars.empty());
- assert(allocaBlock->empty());
- delete singleBlock;
+ it = block.begin();
+ while (&*it != terminator)
+ if (isa<hlfir::SumOp>(it))
+ break;
+ else
+ it++;
+
+ if (auto sumOp = dyn_cast<hlfir::SumOp>(it)) {
+ /// Implementation:
+ /// Intrinsic function `SUM` operations
+ /// --
+ /// x = sum(array)
+ ///
+ /// is converted to
+ ///
+ /// !$omp parallel do
+ /// do i = 1, size(array)
+ /// x = x + array(i)
+ /// end do
+ /// !$omp end parallel do
+
+ OpBuilder wslBuilder(sourceRegion.getContext());
+ Block *wslBlock = new Block();
+ wslBuilder.setInsertionPointToStart(wslBlock);
+
+ Value target = dyn_cast<hlfir::AssignOp>(++it).getLhs();
+ Value array = sumOp.getArray();
+ Value dim = sumOp.getDim();
+ fir::SequenceType arrayTy = dyn_cast<fir::SequenceType>(
+ hlfir::getFortranElementOrSequenceType(array.getType()));
+ llvm::ArrayRef<int64_t> arrayShape = arrayTy.getShape();
+ if (arrayShape.size() == 1 && !dim) {
+ Value itr = allocaBuilder.create<fir::AllocaOp>(
+ loc, allocaBuilder.getI64Type());
+ Value c_one = allocaBuilder.create<arith::ConstantOp>(
+ loc, allocaBuilder.getI64IntegerAttr(1));
+ Value c_arr_size = allocaBuilder.create<arith::ConstantOp>(
+ loc, allocaBuilder.getI64IntegerAttr(arrayShape[0]));
+ // Value c_zero = allocaBuilder.create<arith::ConstantOp>(loc,
+ // allocaBuilder.getZeroAttr(arrayTy.getEleTy()));
+ // allocaBuilder.create<fir::StoreOp>(loc, c_zero, target);
+
+ omp::WsloopOperands wslOps;
+ omp::WsloopOp wslOp =
+ rootBuilder.create<omp::WsloopOp>(loc, wslOps);
+
+ hlfir::LoopNest ln;
+ ln.outerOp = wslOp;
+ omp::LoopNestOperands lnOps;
+ lnOps.loopLowerBounds.push_back(c_one);
+ lnOps.loopUpperBounds.push_back(c_arr_size);
+ lnOps.loopSteps.push_back(c_one);
+ lnOps.loopInclusive = wslBuilder.getUnitAttr();
+ omp::LoopNestOp lnOp =
+ wslBuilder.create<omp::LoopNestOp>(loc, lnOps);
+ Block *lnBlock = wslBuilder.createBlock(&lnOp.getRegion());
+ lnBlock->addArgument(c_one.getType(), loc);
+ wslBuilder.create<fir::StoreOp>(
+ loc, lnOp.getRegion().getArgument(0), itr);
+ Value tarLoad = wslBuilder.create<fir::LoadOp>(loc, target);
+ Value itrLoad = wslBuilder.create<fir::LoadOp>(loc, itr);
+ hlfir::DesignateOp arrDesOp = wslBuilder.create<hlfir::DesignateOp>(
+ loc, fir::ReferenceType::get(arrayTy.getEleTy()), array,
+ itrLoad);
+ Value desLoad = wslBuilder.create<fir::LoadOp>(loc, arrDesOp);
+ Value addf =
+ wslBuilder.create<arith::AddFOp>(loc, tarLoad, desLoad);
+ wslBuilder.create<fir::StoreOp>(loc, addf, target);
+ wslBuilder.create<omp::YieldOp>(loc);
+ ln.body = lnBlock;
+ wslOp.getRegion().push_back(wslBlock);
+ targetRegion.front().getOperations().splice(
+ wslOp->getIterator(), allocaBlock->getOperations());
+ } else {
+ emitError(loc, "Only 1D array scalar assignment for sum "
+ "instrinsic is supported in workshare construct");
+ return;
+ }
} else {
- omp::SingleOperands singleOperands;
- if (isLast)
- singleOperands.nowait = rootBuilder.getUnitAttr();
- singleOperands.copyprivateVars = copyprivateVars;
- cleanupBlock(singleBlock);
- for (auto var : singleOperands.copyprivateVars) {
- mlir::func::FuncOp funcOp =
- createCopyFunc(loc, var.getType(), firCopyFuncBuilder);
- singleOperands.copyprivateSyms.push_back(
- SymbolRefAttr::get(funcOp));
+ OpBuilder singleBuilder(sourceRegion.getContext());
+ Block *singleBlock = new Block();
+ singleBuilder.setInsertionPointToStart(singleBlock);
+
+ OpBuilder parallelBuilder(sourceRegion.getContext());
+ Block *parallelBlock = new Block();
+ parallelBuilder.setInsertionPointToStart(parallelBlock);
+
+ auto [allParallelized, copyprivateVars] =
+ moveToSingle(std::get<SingleRegion>(opOrSingle), allocaBuilder,
+ singleBuilder, parallelBuilder);
+ if (allParallelized) {
+ // The single region was not required as all operations were safe to
+ // parallelize
+ assert(copyprivateVars.empty());
+ assert(allocaBlock->empty());
+ delete singleBlock;
+ } else {
+ omp::SingleOperands singleOperands;
+ if (isLast)
+ singleOperands.nowait = rootBuilder.getUnitAttr();
+ singleOperands.copyprivateVars = copyprivateVars;
+ cleanupBlock(singleBlock);
+ for (auto var : singleOperands.copyprivateVars) {
+ mlir::func::FuncOp funcOp =
+ createCopyFunc(loc, var.getType(), firCopyFuncBuilder);
+ singleOperands.copyprivateSyms.push_back(
+ SymbolRefAttr::get(funcOp));
+ }
+ omp::SingleOp singleOp =
+ rootBuilder.create<omp::SingleOp>(loc, singleOperands);
+ singleOp.getRegion().push_back(singleBlock);
+ targetRegion.front().getOperations().splice(
+ singleOp->getIterator(), allocaBlock->getOperations());
}
- omp::SingleOp singleOp =
- rootBuilder.create<omp::SingleOp>(loc, singleOperands);
- singleOp.getRegion().push_back(singleBlock);
- targetRegion.front().getOperations().splice(
- singleOp->getIterator(), allocaBlock->getOperations());
+ rootBuilder.getInsertionBlock()->getOperations().splice(
+ rootBuilder.getInsertionPoint(), parallelBlock->getOperations());
+ delete parallelBlock;
}
- rootBuilder.getInsertionBlock()->getOperations().splice(
- rootBuilder.getInsertionPoint(), parallelBlock->getOperations());
delete allocaBlock;
- delete parallelBlock;
} else {
auto op = std::get<Operation *>(opOrSingle);
if (auto wslw = dyn_cast<omp::WorkshareLoopWrapperOp>(op)) {
>From 6c9d9f5215babedfb1d45719a7a763f2726b3eaf Mon Sep 17 00:00:00 2001
From: Thirumalai-Shaktivel <thirumalaishaktivel at gmail.com>
Date: Sun, 20 Oct 2024 09:32:42 +0000
Subject: [PATCH 4/4] [Test] Add and update tests
---
flang/test/Fir/basic-program.fir | 2 +-
flang/test/Integration/OpenMP/workshare02.f90 | 36 +++++++++++++++++++
2 files changed, 37 insertions(+), 1 deletion(-)
create mode 100644 flang/test/Integration/OpenMP/workshare02.f90
diff --git a/flang/test/Fir/basic-program.fir b/flang/test/Fir/basic-program.fir
index 4b18acb7c2b430..9b9651a476e583 100644
--- a/flang/test/Fir/basic-program.fir
+++ b/flang/test/Fir/basic-program.fir
@@ -44,10 +44,10 @@ func.func @_QQmain() {
// PASSES-NEXT: 'omp.private' Pipeline
// PASSES-NEXT: OptimizedBufferization
// PASSES-NEXT: LowerHLFIROrderedAssignments
+// PASSES-NEXT: LowerWorkshare
// PASSES-NEXT: LowerHLFIRIntrinsics
// PASSES-NEXT: BufferizeHLFIR
// PASSES-NEXT: ConvertHLFIRtoFIR
-// PASSES-NEXT: LowerWorkshare
// PASSES-NEXT: CSE
// PASSES-NEXT: (S) 0 num-cse'd - Number of operations CSE'd
// PASSES-NEXT: (S) 0 num-dce'd - Number of operations DCE'd
diff --git a/flang/test/Integration/OpenMP/workshare02.f90 b/flang/test/Integration/OpenMP/workshare02.f90
new file mode 100644
index 00000000000000..68b810a32f247e
--- /dev/null
+++ b/flang/test/Integration/OpenMP/workshare02.f90
@@ -0,0 +1,36 @@
+!===----------------------------------------------------------------------===!
+! This directory can be used to add Integration tests involving multiple
+! stages of the compiler (for eg. from Fortran to LLVM IR). It should not
+! contain executable tests. We should only add tests here sparingly and only
+! if there is no other way to test. Repeat this message in each test that is
+! added to this directory and sub-directories.
+!===----------------------------------------------------------------------===!
+
+!RUN: %flang_fc1 -emit-mlir -fopenmp -O3 %s -o - | FileCheck %s --check-prefix MLIR
+
+program test_ws_01
+ implicit none
+ real(8) :: arr_01(10), x
+ arr_01 = [0.347,0.892,0.573,0.126,0.788,0.412,0.964,0.205,0.631,0.746]
+
+ !$omp parallel workshare
+ x = sum(arr_01)
+ !$omp end parallel workshare
+end program test_ws_01
+
+! MLIR: func.func @_QQmain
+! MLIR: omp.parallel {
+! [...]
+! MLIR: omp.wsloop {
+! MLIR: omp.loop_nest {{.*}}
+! [...]
+! MLIR: %[[SUM:.*]] = arith.addf {{.*}}
+! [...]
+! MLIR: omp.yield
+! MLIR: }
+! MLIR: }
+! MLIR: omp.barrier
+! MLIR: omp.terminator
+! MLIR: }
+! MLIR: return
+! MLIR: }
More information about the llvm-branch-commits
mailing list