[flang-commits] [flang] [mlir] [flang][openmp] Changes for invoking scan Op (PR #123254)

Thu Jan 23 17:32:44 PST 2025

https://github.com/anchuraj updated https://github.com/llvm/llvm-project/pull/123254

>From d8e27cd3db6c0181b91b186069416934b3e13610 Mon Sep 17 00:00:00 2001
From: Anchu Rajendran <asudhaku at amd.com>
Date: Thu, 16 Jan 2025 17:13:56 -0600
Subject: [PATCH 1/2] Changes for invoking scan Op

---
 flang/lib/Lower/OpenMP/ClauseProcessor.cpp    | 33 +++++++++++++++---
 flang/lib/Lower/OpenMP/ClauseProcessor.h      |  4 +++
 flang/lib/Lower/OpenMP/Clauses.cpp            |  8 ++---
 flang/lib/Lower/OpenMP/OpenMP.cpp             | 21 +++++++++++-
 flang/lib/Lower/OpenMP/ReductionProcessor.cpp | 32 ++++++++++++++---
 flang/lib/Lower/OpenMP/ReductionProcessor.h   |  6 +++-
 .../Lower/OpenMP/Todo/reduction-inscan.f90    | 15 --------
 .../Lower/OpenMP/Todo/reduction-modifiers.f90 | 14 --------
 .../test/Lower/OpenMP/Todo/reduction-task.f90 |  2 +-
 flang/test/Lower/OpenMP/scan.f90              | 34 +++++++++++++++++++
 .../Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp  |  3 +-
 11 files changed, 127 insertions(+), 45 deletions(-)
 delete mode 100644 flang/test/Lower/OpenMP/Todo/reduction-inscan.f90
 delete mode 100644 flang/test/Lower/OpenMP/Todo/reduction-modifiers.f90
 create mode 100644 flang/test/Lower/OpenMP/scan.f90

diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
index 299d9d438f1156..8bec29c74a1542 100644
--- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
@@ -344,6 +344,19 @@ bool ClauseProcessor::processDistSchedule(
   return false;
 }
 
+bool ClauseProcessor::processExclusive(
+    mlir::Location currentLocation,
+    mlir::omp::ExclusiveClauseOps &result) const {
+  return findRepeatableClause<omp::clause::Exclusive>(
+      [&](const omp::clause::Exclusive &clause, const parser::CharBlock &) {
+        for (const Object &object : clause.v) {
+          const semantics::Symbol *symbol = object.sym();
+          mlir::Value symVal = converter.getSymbolAddress(*symbol);
+          result.exclusiveVars.push_back(symVal);
+        }
+      });
+}
+
 bool ClauseProcessor::processFilter(lower::StatementContext &stmtCtx,
                                     mlir::omp::FilterClauseOps &result) const {
   if (auto *clause = findUniqueClause<omp::clause::Filter>()) {
@@ -380,6 +393,19 @@ bool ClauseProcessor::processHint(mlir::omp::HintClauseOps &result) const {
   return false;
 }
 
+bool ClauseProcessor::processInclusive(
+    mlir::Location currentLocation,
+    mlir::omp::InclusiveClauseOps &result) const {
+  return findRepeatableClause<omp::clause::Inclusive>(
+      [&](const omp::clause::Inclusive &clause, const parser::CharBlock &) {
+        for (const Object &object : clause.v) {
+          const semantics::Symbol *symbol = object.sym();
+          mlir::Value symVal = converter.getSymbolAddress(*symbol);
+          result.inclusiveVars.push_back(symVal);
+        }
+      });
+}
+
 bool ClauseProcessor::processMergeable(
     mlir::omp::MergeableClauseOps &result) const {
   return markClauseOccurrence<omp::clause::Mergeable>(result.mergeable);
@@ -1135,10 +1161,9 @@ bool ClauseProcessor::processReduction(
         llvm::SmallVector<mlir::Attribute> reductionDeclSymbols;
         llvm::SmallVector<const semantics::Symbol *> reductionSyms;
         ReductionProcessor rp;
-        rp.addDeclareReduction(currentLocation, converter, clause,
-                               reductionVars, reduceVarByRef,
-                               reductionDeclSymbols, reductionSyms);
-
+        rp.addDeclareReduction(
+            currentLocation, converter, clause, reductionVars, reduceVarByRef,
+            reductionDeclSymbols, reductionSyms, &result.reductionMod);
         // Copy local lists into the output.
         llvm::copy(reductionVars, std::back_inserter(result.reductionVars));
         llvm::copy(reduceVarByRef, std::back_inserter(result.reductionByref));
diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.h b/flang/lib/Lower/OpenMP/ClauseProcessor.h
index 7b047d4a7567ad..e05f66c7666844 100644
--- a/flang/lib/Lower/OpenMP/ClauseProcessor.h
+++ b/flang/lib/Lower/OpenMP/ClauseProcessor.h
@@ -64,6 +64,8 @@ class ClauseProcessor {
   bool processDeviceType(mlir::omp::DeviceTypeClauseOps &result) const;
   bool processDistSchedule(lower::StatementContext &stmtCtx,
                            mlir::omp::DistScheduleClauseOps &result) const;
+  bool processExclusive(mlir::Location currentLocation,
+                        mlir::omp::ExclusiveClauseOps &result) const;
   bool processFilter(lower::StatementContext &stmtCtx,
                      mlir::omp::FilterClauseOps &result) const;
   bool processFinal(lower::StatementContext &stmtCtx,
@@ -72,6 +74,8 @@ class ClauseProcessor {
       mlir::omp::HasDeviceAddrClauseOps &result,
       llvm::SmallVectorImpl<const semantics::Symbol *> &isDeviceSyms) const;
   bool processHint(mlir::omp::HintClauseOps &result) const;
+  bool processInclusive(mlir::Location currentLocation,
+                        mlir::omp::InclusiveClauseOps &result) const;
   bool processMergeable(mlir::omp::MergeableClauseOps &result) const;
   bool processNowait(mlir::omp::NowaitClauseOps &result) const;
   bool processNumTeams(lower::StatementContext &stmtCtx,
diff --git a/flang/lib/Lower/OpenMP/Clauses.cpp b/flang/lib/Lower/OpenMP/Clauses.cpp
index b424e209d56da9..a26bdcdf343e13 100644
--- a/flang/lib/Lower/OpenMP/Clauses.cpp
+++ b/flang/lib/Lower/OpenMP/Clauses.cpp
@@ -728,8 +728,8 @@ Enter make(const parser::OmpClause::Enter &inp,
 
 Exclusive make(const parser::OmpClause::Exclusive &inp,
                semantics::SemanticsContext &semaCtx) {
-  // inp -> empty
-  llvm_unreachable("Empty: exclusive");
+  // inp.v -> parser::OmpObjectList
+  return Exclusive{makeObjects(/*List=*/inp.v, semaCtx)};
 }
 
 Fail make(const parser::OmpClause::Fail &inp,
@@ -838,8 +838,8 @@ If make(const parser::OmpClause::If &inp,
 
 Inclusive make(const parser::OmpClause::Inclusive &inp,
                semantics::SemanticsContext &semaCtx) {
-  // inp -> empty
-  llvm_unreachable("Empty: inclusive");
+  // inp.v -> parser::OmpObjectList
+  return Inclusive{makeObjects(/*List=*/inp.v, semaCtx)};
 }
 
 Indirect make(const parser::OmpClause::Indirect &inp,
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 1434bcd6330e02..48e8e433e1f1fa 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -1578,6 +1578,15 @@ static void genParallelClauses(
   cp.processReduction(loc, clauseOps, reductionSyms);
 }
 
+static void genScanClauses(lower::AbstractConverter &converter,
+                           semantics::SemanticsContext &semaCtx,
+                           const List<Clause> &clauses, mlir::Location loc,
+                           mlir::omp::ScanOperands &clauseOps) {
+  ClauseProcessor cp(converter, semaCtx, clauses);
+  cp.processInclusive(loc, clauseOps);
+  cp.processExclusive(loc, clauseOps);
+}
+
 static void genSectionsClauses(
     lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
     const List<Clause> &clauses, mlir::Location loc,
@@ -1975,6 +1984,16 @@ genParallelOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
   return parallelOp;
 }
 
+static mlir::omp::ScanOp
+genScanOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
+          semantics::SemanticsContext &semaCtx, mlir::Location loc,
+          const ConstructQueue &queue, ConstructQueue::const_iterator item) {
+  mlir::omp::ScanOperands clauseOps;
+  genScanClauses(converter, semaCtx, item->clauses, loc, clauseOps);
+  return converter.getFirOpBuilder().create<mlir::omp::ScanOp>(
+      converter.getCurrentLocation(), clauseOps);
+}
+
 /// This breaks the normal prototype of the gen*Op functions: adding the
 /// sectionBlocks argument so that the enclosed section constructs can be
 /// lowered here with correct reduction symbol remapping.
@@ -2978,7 +2997,7 @@ static void genOMPDispatch(lower::AbstractConverter &converter,
     genStandaloneParallel(converter, symTable, semaCtx, eval, loc, queue, item);
     break;
   case llvm::omp::Directive::OMPD_scan:
-    TODO(loc, "Unhandled directive " + llvm::omp::getOpenMPDirectiveName(dir));
+    genScanOp(converter, symTable, semaCtx, loc, queue, item);
     break;
   case llvm::omp::Directive::OMPD_section:
     llvm_unreachable("genOMPDispatch: OMPD_section");
diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
index 2cd21107a916e4..e6ca5d5073c336 100644
--- a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
@@ -25,6 +25,7 @@
 #include "flang/Parser/tools.h"
 #include "mlir/Dialect/OpenMP/OpenMPDialect.h"
 #include "llvm/Support/CommandLine.h"
+#include <string>
 
 static llvm::cl::opt<bool> forceByrefReduction(
     "force-byref-reduction",
@@ -514,18 +515,36 @@ static bool doReductionByRef(mlir::Value reductionVar) {
   return false;
 }
 
+mlir::omp::ReductionModifier
+translateReductionModifier(const ReductionModifier &m) {
+  switch (m) {
+  case ReductionModifier::Default:
+    return mlir::omp::ReductionModifier::defaultmod;
+  case ReductionModifier::Inscan:
+    return mlir::omp::ReductionModifier::inscan;
+  case ReductionModifier::Task:
+    return mlir::omp::ReductionModifier::task;
+  }
+  return mlir::omp::ReductionModifier::defaultmod;
+}
+
 void ReductionProcessor::addDeclareReduction(
     mlir::Location currentLocation, lower::AbstractConverter &converter,
     const omp::clause::Reduction &reduction,
     llvm::SmallVectorImpl<mlir::Value> &reductionVars,
     llvm::SmallVectorImpl<bool> &reduceVarByRef,
     llvm::SmallVectorImpl<mlir::Attribute> &reductionDeclSymbols,
-    llvm::SmallVectorImpl<const semantics::Symbol *> &reductionSymbols) {
+    llvm::SmallVectorImpl<const semantics::Symbol *> &reductionSymbols,
+    mlir::omp::ReductionModifierAttr *reductionMod) {
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
 
-  if (std::get<std::optional<omp::clause::Reduction::ReductionModifier>>(
-          reduction.t))
-    TODO(currentLocation, "Reduction modifiers are not supported");
+  auto mod = std::get<std::optional<ReductionModifier>>(reduction.t);
+  if (mod.has_value() && (mod.value() != ReductionModifier::Inscan)) {
+    std::string modStr = "default";
+    if (mod.value() == ReductionModifier::Task)
+      modStr = "task";
+    TODO(currentLocation, "Reduction modifier " + modStr + " is not supported");
+  }
 
   mlir::omp::DeclareReductionOp decl;
   const auto &redOperatorList{
@@ -649,6 +668,11 @@ void ReductionProcessor::addDeclareReduction(
                                   currentLocation, isByRef);
     reductionDeclSymbols.push_back(
         mlir::SymbolRefAttr::get(firOpBuilder.getContext(), decl.getSymName()));
+    auto redMod = std::get<std::optional<ReductionModifier>>(reduction.t);
+    if (redMod.has_value())
+      *reductionMod = mlir::omp::ReductionModifierAttr::get(
+          firOpBuilder.getContext(),
+          translateReductionModifier(redMod.value()));
   }
 }
 
diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.h b/flang/lib/Lower/OpenMP/ReductionProcessor.h
index 5f4d742b62cb10..44ab67979d5db9 100644
--- a/flang/lib/Lower/OpenMP/ReductionProcessor.h
+++ b/flang/lib/Lower/OpenMP/ReductionProcessor.h
@@ -19,6 +19,7 @@
 #include "flang/Parser/parse-tree.h"
 #include "flang/Semantics/symbol.h"
 #include "flang/Semantics/type.h"
+#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
 #include "mlir/IR/Location.h"
 #include "mlir/IR/Types.h"
 
@@ -126,7 +127,8 @@ class ReductionProcessor {
       llvm::SmallVectorImpl<mlir::Value> &reductionVars,
       llvm::SmallVectorImpl<bool> &reduceVarByRef,
       llvm::SmallVectorImpl<mlir::Attribute> &reductionDeclSymbols,
-      llvm::SmallVectorImpl<const semantics::Symbol *> &reductionSymbols);
+      llvm::SmallVectorImpl<const semantics::Symbol *> &reductionSymbols,
+      mlir::omp::ReductionModifierAttr *reductionMod);
 };
 
 template <typename FloatOp, typename IntegerOp>
@@ -156,6 +158,8 @@ ReductionProcessor::getReductionOperation(fir::FirOpBuilder &builder,
   return builder.create<ComplexOp>(loc, op1, op2);
 }
 
+using ReductionModifier = omp::clause::Reduction::ReductionModifier;
+
 } // namespace omp
 } // namespace lower
 } // namespace Fortran
diff --git a/flang/test/Lower/OpenMP/Todo/reduction-inscan.f90 b/flang/test/Lower/OpenMP/Todo/reduction-inscan.f90
deleted file mode 100644
index 152d91a16f80fe..00000000000000
--- a/flang/test/Lower/OpenMP/Todo/reduction-inscan.f90
+++ /dev/null
@@ -1,15 +0,0 @@
-! RUN: %not_todo_cmd bbc -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s
-! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s
-
-! CHECK: not yet implemented: Reduction modifiers are not supported
-subroutine reduction_inscan()
-  integer :: i,j
-  i = 0
-
-  !$omp do reduction(inscan, +:i)
-  do j=1,10
-     !$omp scan inclusive(i)
-     i = i + 1
-  end do
-  !$omp end do
-end subroutine reduction_inscan
diff --git a/flang/test/Lower/OpenMP/Todo/reduction-modifiers.f90 b/flang/test/Lower/OpenMP/Todo/reduction-modifiers.f90
deleted file mode 100644
index 82625ed8c5f31c..00000000000000
--- a/flang/test/Lower/OpenMP/Todo/reduction-modifiers.f90
+++ /dev/null
@@ -1,14 +0,0 @@
-! RUN: %not_todo_cmd bbc -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s
-! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s
-
-! CHECK: not yet implemented: Reduction modifiers are not supported
-
-subroutine foo()
-  integer :: i, j
-  j = 0
-  !$omp do reduction (inscan, *: j)
-  do i = 1, 10
-    !$omp scan inclusive(j)
-    j = j + 1
-  end do
-end subroutine
diff --git a/flang/test/Lower/OpenMP/Todo/reduction-task.f90 b/flang/test/Lower/OpenMP/Todo/reduction-task.f90
index 6707f65e1a4cc3..b746872e9e7edf 100644
--- a/flang/test/Lower/OpenMP/Todo/reduction-task.f90
+++ b/flang/test/Lower/OpenMP/Todo/reduction-task.f90
@@ -1,7 +1,7 @@
 ! RUN: %not_todo_cmd bbc -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s
 ! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s
 
-! CHECK: not yet implemented: Reduction modifiers are not supported
+! CHECK: not yet implemented: Reduction modifier task is not supported
 subroutine reduction_task()
   integer :: i
   i = 0
diff --git a/flang/test/Lower/OpenMP/scan.f90 b/flang/test/Lower/OpenMP/scan.f90
new file mode 100644
index 00000000000000..9cf2174a7f3314
--- /dev/null
+++ b/flang/test/Lower/OpenMP/scan.f90
@@ -0,0 +1,34 @@
+!RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s
+
+subroutine inclusive_scan
+ implicit none
+ integer, parameter :: n = 100
+ integer a(n), b(n)
+ integer x, k
+
+ !CHECK: omp.wsloop reduction(mod: inscan, {{.*}}) {
+ !$omp parallel do reduction(inscan, +: x)
+ do k = 1, n
+   x = x + a(k)
+   !CHECK: omp.scan inclusive({{.*}})
+   !$omp scan inclusive(x)
+   b(k) = x
+ end do
+end subroutine inclusive_scan
+
+
+subroutine exclusive_scan
+ implicit none
+ integer, parameter :: n = 100
+ integer a(n), b(n)
+ integer x, k
+
+ !CHECK: omp.wsloop reduction(mod: inscan, {{.*}}) {
+ !$omp parallel do reduction(inscan, +: x)
+ do k = 1, n
+   x = x + a(k)
+   !CHECK: omp.scan exclusive({{.*}})
+   !$omp scan exclusive(x)
+   b(k) = x
+ end do
+end subroutine exclusive_scan
diff --git a/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp b/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp
index 5d0003911bca87..89444118a9d04d 100644
--- a/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp
+++ b/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp
@@ -226,7 +226,7 @@ void mlir::configureOpenMPToLLVMConversionLegality(
   target.addDynamicallyLegalOp<
       omp::AtomicReadOp, omp::AtomicWriteOp, omp::CancellationPointOp,
       omp::CancelOp, omp::CriticalDeclareOp, omp::FlushOp, omp::MapBoundsOp,
-      omp::MapInfoOp, omp::OrderedOp, omp::TargetEnterDataOp,
+      omp::MapInfoOp, omp::OrderedOp, omp::ScanOp, omp::TargetEnterDataOp,
       omp::TargetExitDataOp, omp::TargetUpdateOp, omp::ThreadprivateOp,
       omp::YieldOp>([&](Operation *op) {
     return typeConverter.isLegal(op->getOperandTypes()) &&
@@ -264,6 +264,7 @@ void mlir::populateOpenMPToLLVMConversionPatterns(LLVMTypeConverter &converter,
       RegionLessOpConversion<omp::CancelOp>,
       RegionLessOpConversion<omp::CriticalDeclareOp>,
       RegionLessOpConversion<omp::OrderedOp>,
+      RegionLessOpConversion<omp::ScanOp>,
       RegionLessOpConversion<omp::TargetEnterDataOp>,
       RegionLessOpConversion<omp::TargetExitDataOp>,
       RegionLessOpConversion<omp::TargetUpdateOp>,

>From bb6a40c724bf61f5927a39f046ff338958cf1f95 Mon Sep 17 00:00:00 2001
From: Anchu Rajendran <asudhaku at amd.com>
Date: Thu, 23 Jan 2025 15:26:10 -0600
Subject: [PATCH 2/2] R2: Addressing Review Comments

---
 flang/lib/Lower/OpenMP/ClauseProcessor.cpp    |  36 +++---
 flang/lib/Lower/OpenMP/ReductionProcessor.cpp |  24 ++--
 flang/lib/Lower/OpenMP/ReductionProcessor.h   |   4 +-
 .../test/Lower/OpenMP/Todo/reduction-task.f90 |   2 +-
 flang/test/Lower/OpenMP/scan.f90              | 120 ++++++++++++++++--
 5 files changed, 138 insertions(+), 48 deletions(-)

diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
index 8bec29c74a1542..d61fd7ba1f8810 100644
--- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
@@ -347,14 +347,15 @@ bool ClauseProcessor::processDistSchedule(
 bool ClauseProcessor::processExclusive(
     mlir::Location currentLocation,
     mlir::omp::ExclusiveClauseOps &result) const {
-  return findRepeatableClause<omp::clause::Exclusive>(
-      [&](const omp::clause::Exclusive &clause, const parser::CharBlock &) {
-        for (const Object &object : clause.v) {
-          const semantics::Symbol *symbol = object.sym();
-          mlir::Value symVal = converter.getSymbolAddress(*symbol);
-          result.exclusiveVars.push_back(symVal);
-        }
-      });
+  if (auto *clause = findUniqueClause<omp::clause::Exclusive>()) {
+    for (const Object &object : clause->v) {
+      const semantics::Symbol *symbol = object.sym();
+      mlir::Value symVal = converter.getSymbolAddress(*symbol);
+      result.exclusiveVars.push_back(symVal);
+    }
+    return true;
+  }
+  return false;
 }
 
 bool ClauseProcessor::processFilter(lower::StatementContext &stmtCtx,
@@ -396,14 +397,15 @@ bool ClauseProcessor::processHint(mlir::omp::HintClauseOps &result) const {
 bool ClauseProcessor::processInclusive(
     mlir::Location currentLocation,
     mlir::omp::InclusiveClauseOps &result) const {
-  return findRepeatableClause<omp::clause::Inclusive>(
-      [&](const omp::clause::Inclusive &clause, const parser::CharBlock &) {
-        for (const Object &object : clause.v) {
-          const semantics::Symbol *symbol = object.sym();
-          mlir::Value symVal = converter.getSymbolAddress(*symbol);
-          result.inclusiveVars.push_back(symVal);
-        }
-      });
+  if (auto *clause = findUniqueClause<omp::clause::Inclusive>()) {
+    for (const Object &object : clause->v) {
+      const semantics::Symbol *symbol = object.sym();
+      mlir::Value symVal = converter.getSymbolAddress(*symbol);
+      result.inclusiveVars.push_back(symVal);
+    }
+    return true;
+  }
+  return false;
 }
 
 bool ClauseProcessor::processMergeable(
@@ -1163,7 +1165,7 @@ bool ClauseProcessor::processReduction(
         ReductionProcessor rp;
         rp.addDeclareReduction(
             currentLocation, converter, clause, reductionVars, reduceVarByRef,
-            reductionDeclSymbols, reductionSyms, &result.reductionMod);
+            reductionDeclSymbols, reductionSyms, result.reductionMod);
         // Copy local lists into the output.
         llvm::copy(reductionVars, std::back_inserter(result.reductionVars));
         llvm::copy(reduceVarByRef, std::back_inserter(result.reductionByref));
diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
index e6ca5d5073c336..3d67048a9f7ee0 100644
--- a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
@@ -25,7 +25,6 @@
 #include "flang/Parser/tools.h"
 #include "mlir/Dialect/OpenMP/OpenMPDialect.h"
 #include "llvm/Support/CommandLine.h"
-#include <string>
 
 static llvm::cl::opt<bool> forceByrefReduction(
     "force-byref-reduction",
@@ -36,6 +35,8 @@ namespace Fortran {
 namespace lower {
 namespace omp {
 
+using ReductionModifier = omp::clause::Reduction::ReductionModifier;
+
 ReductionProcessor::ReductionIdentifier ReductionProcessor::getReductionType(
     const omp::clause::ProcedureDesignator &pd) {
   auto redType = llvm::StringSwitch<std::optional<ReductionIdentifier>>(
@@ -515,9 +516,8 @@ static bool doReductionByRef(mlir::Value reductionVar) {
   return false;
 }
 
-mlir::omp::ReductionModifier
-translateReductionModifier(const ReductionModifier &m) {
-  switch (m) {
+mlir::omp::ReductionModifier translateReductionModifier(ReductionModifier mod) {
+  switch (mod) {
   case ReductionModifier::Default:
     return mlir::omp::ReductionModifier::defaultmod;
   case ReductionModifier::Inscan:
@@ -535,15 +535,16 @@ void ReductionProcessor::addDeclareReduction(
     llvm::SmallVectorImpl<bool> &reduceVarByRef,
     llvm::SmallVectorImpl<mlir::Attribute> &reductionDeclSymbols,
     llvm::SmallVectorImpl<const semantics::Symbol *> &reductionSymbols,
-    mlir::omp::ReductionModifierAttr *reductionMod) {
+    mlir::omp::ReductionModifierAttr &reductionMod) {
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
 
   auto mod = std::get<std::optional<ReductionModifier>>(reduction.t);
-  if (mod.has_value() && (mod.value() != ReductionModifier::Inscan)) {
-    std::string modStr = "default";
+  if (mod.has_value()) {
     if (mod.value() == ReductionModifier::Task)
-      modStr = "task";
-    TODO(currentLocation, "Reduction modifier " + modStr + " is not supported");
+      TODO(currentLocation, "Reduction modifier `task` is not supported");
+    else
+      reductionMod = mlir::omp::ReductionModifierAttr::get(
+          firOpBuilder.getContext(), translateReductionModifier(mod.value()));
   }
 
   mlir::omp::DeclareReductionOp decl;
@@ -668,11 +669,6 @@ void ReductionProcessor::addDeclareReduction(
                                   currentLocation, isByRef);
     reductionDeclSymbols.push_back(
         mlir::SymbolRefAttr::get(firOpBuilder.getContext(), decl.getSymName()));
-    auto redMod = std::get<std::optional<ReductionModifier>>(reduction.t);
-    if (redMod.has_value())
-      *reductionMod = mlir::omp::ReductionModifierAttr::get(
-          firOpBuilder.getContext(),
-          translateReductionModifier(redMod.value()));
   }
 }
 
diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.h b/flang/lib/Lower/OpenMP/ReductionProcessor.h
index 44ab67979d5db9..621d61aad69b36 100644
--- a/flang/lib/Lower/OpenMP/ReductionProcessor.h
+++ b/flang/lib/Lower/OpenMP/ReductionProcessor.h
@@ -128,7 +128,7 @@ class ReductionProcessor {
       llvm::SmallVectorImpl<bool> &reduceVarByRef,
       llvm::SmallVectorImpl<mlir::Attribute> &reductionDeclSymbols,
       llvm::SmallVectorImpl<const semantics::Symbol *> &reductionSymbols,
-      mlir::omp::ReductionModifierAttr *reductionMod);
+      mlir::omp::ReductionModifierAttr &reductionMod);
 };
 
 template <typename FloatOp, typename IntegerOp>
@@ -158,8 +158,6 @@ ReductionProcessor::getReductionOperation(fir::FirOpBuilder &builder,
   return builder.create<ComplexOp>(loc, op1, op2);
 }
 
-using ReductionModifier = omp::clause::Reduction::ReductionModifier;
-
 } // namespace omp
 } // namespace lower
 } // namespace Fortran
diff --git a/flang/test/Lower/OpenMP/Todo/reduction-task.f90 b/flang/test/Lower/OpenMP/Todo/reduction-task.f90
index b746872e9e7edf..b8bfc37d1758f9 100644
--- a/flang/test/Lower/OpenMP/Todo/reduction-task.f90
+++ b/flang/test/Lower/OpenMP/Todo/reduction-task.f90
@@ -1,7 +1,7 @@
 ! RUN: %not_todo_cmd bbc -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s
 ! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s
 
-! CHECK: not yet implemented: Reduction modifier task is not supported
+! CHECK: not yet implemented: Reduction modifier `task` is not supported
 subroutine reduction_task()
   integer :: i
   i = 0
diff --git a/flang/test/Lower/OpenMP/scan.f90 b/flang/test/Lower/OpenMP/scan.f90
index 9cf2174a7f3314..4a316d74c81e41 100644
--- a/flang/test/Lower/OpenMP/scan.f90
+++ b/flang/test/Lower/OpenMP/scan.f90
@@ -1,33 +1,127 @@
-!RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s
+! RUN: bbc -emit-hlfir -fopenmp %s -o - | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s
 
-subroutine inclusive_scan
+! NOTE: Assertions have been autogenerated by utils/generate-test-checks.py
+
+! CHECK-LABEL:   omp.declare_reduction @add_reduction_i32 : i32 init {
+! CHECK:         ^bb0(%[[VAL_0:.*]]: i32):
+! CHECK:           %[[VAL_1:.*]] = arith.constant 0 : i32
+! CHECK:           omp.yield(%[[VAL_1]] : i32)
+!
+! CHECK-LABEL:   } combiner {
+! CHECK:         ^bb0(%[[VAL_0:.*]]: i32, %[[VAL_1:.*]]: i32):
+! CHECK:           %[[VAL_2:.*]] = arith.addi %[[VAL_0]], %[[VAL_1]] : i32
+! CHECK:           omp.yield(%[[VAL_2]] : i32)
+! CHECK:         }
+!
+! CHECK-LABEL:   func.func @_QPinclusive_scan(
+! CHECK-SAME:                                 %[[VAL_0:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "a"},
+! CHECK-SAME:                                 %[[VAL_1:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "b"},
+! CHECK-SAME:                                 %[[VAL_2:.*]]: !fir.ref<i32> {fir.bindc_name = "n"}) {
+! CHECK:           %[[VAL_3:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK:           %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_0]] dummy_scope %[[VAL_3]] {uniq_name = "_QFinclusive_scanEa"} : (!fir.box<!fir.array<?xi32>>, !fir.dscope) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+! CHECK:           %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_1]] dummy_scope %[[VAL_3]] {uniq_name = "_QFinclusive_scanEb"} : (!fir.box<!fir.array<?xi32>>, !fir.dscope) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+! CHECK:           %[[VAL_6:.*]] = fir.alloca i32 {bindc_name = "k", uniq_name = "_QFinclusive_scanEk"}
+! CHECK:           %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]] {uniq_name = "_QFinclusive_scanEk"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_2]] dummy_scope %[[VAL_3]] {uniq_name = "_QFinclusive_scanEn"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_9:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFinclusive_scanEx"}
+! CHECK:           %[[VAL_10:.*]]:2 = hlfir.declare %[[VAL_9]] {uniq_name = "_QFinclusive_scanEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_11:.*]] = fir.alloca i32 {bindc_name = "k", pinned, uniq_name = "_QFinclusive_scanEk"}
+! CHECK:             %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_11]] {uniq_name = "_QFinclusive_scanEk"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:             %[[VAL_13:.*]] = arith.constant 1 : i32
+! CHECK:             %[[VAL_14:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
+! CHECK:             %[[VAL_15:.*]] = arith.constant 1 : i32
+! CHECK:             omp.wsloop reduction(mod: inscan, @add_reduction_i32 %[[VAL_10]]#0 -> %[[VAL_16:.*]] : !fir.ref<i32>) {
+! CHECK:               omp.loop_nest (%[[VAL_17:.*]]) : i32 = (%[[VAL_13]]) to (%[[VAL_14]]) inclusive step (%[[VAL_15]]) {
+! CHECK:                 %[[VAL_18:.*]]:2 = hlfir.declare %[[VAL_16]] {uniq_name = "_QFinclusive_scanEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:                 fir.store %[[VAL_17]] to %[[VAL_12]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_19:.*]] = fir.load %[[VAL_18]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_20:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (i32) -> i64
+! CHECK:                 %[[VAL_22:.*]] = hlfir.designate %[[VAL_4]]#0 (%[[VAL_21]])  : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
+! CHECK:                 %[[VAL_23:.*]] = fir.load %[[VAL_22]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_24:.*]] = arith.addi %[[VAL_19]], %[[VAL_23]] : i32
+! CHECK:                 hlfir.assign %[[VAL_24]] to %[[VAL_18]]#0 : i32, !fir.ref<i32>
+! CHECK:                 omp.scan inclusive(%[[VAL_18]]#1 : !fir.ref<i32>)
+! CHECK:                 %[[VAL_25:.*]] = fir.load %[[VAL_18]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_26:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_27:.*]] = fir.convert %[[VAL_26]] : (i32) -> i64
+! CHECK:                 %[[VAL_28:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_27]])  : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
+! CHECK:                 hlfir.assign %[[VAL_25]] to %[[VAL_28]] : i32, !fir.ref<i32>
+! CHECK:                 omp.yield
+! CHECK:               }
+! CHECK:             }
+! CHECK:             omp.terminator
+! CHECK:           }
+! CHECK:           return
+! CHECK:         }
+!
+! CHECK-LABEL:   func.func @_QPexclusive_scan(
+! CHECK-SAME:                                 %[[VAL_0:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "a"},
+! CHECK-SAME:                                 %[[VAL_1:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "b"},
+! CHECK-SAME:                                 %[[VAL_2:.*]]: !fir.ref<i32> {fir.bindc_name = "n"}) {
+! CHECK:           %[[VAL_3:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK:           %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_0]] dummy_scope %[[VAL_3]] {uniq_name = "_QFexclusive_scanEa"} : (!fir.box<!fir.array<?xi32>>, !fir.dscope) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+! CHECK:           %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_1]] dummy_scope %[[VAL_3]] {uniq_name = "_QFexclusive_scanEb"} : (!fir.box<!fir.array<?xi32>>, !fir.dscope) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+! CHECK:           %[[VAL_6:.*]] = fir.alloca i32 {bindc_name = "k", uniq_name = "_QFexclusive_scanEk"}
+! CHECK:           %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]] {uniq_name = "_QFexclusive_scanEk"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_2]] dummy_scope %[[VAL_3]] {uniq_name = "_QFexclusive_scanEn"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_9:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFexclusive_scanEx"}
+! CHECK:           %[[VAL_10:.*]]:2 = hlfir.declare %[[VAL_9]] {uniq_name = "_QFexclusive_scanEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_11:.*]] = fir.alloca i32 {bindc_name = "k", pinned, uniq_name = "_QFexclusive_scanEk"}
+! CHECK:             %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_11]] {uniq_name = "_QFexclusive_scanEk"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:             %[[VAL_13:.*]] = arith.constant 1 : i32
+! CHECK:             %[[VAL_14:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
+! CHECK:             %[[VAL_15:.*]] = arith.constant 1 : i32
+! CHECK:             omp.wsloop reduction(mod: inscan, @add_reduction_i32 %[[VAL_10]]#0 -> %[[VAL_16:.*]] : !fir.ref<i32>) {
+! CHECK:               omp.loop_nest (%[[VAL_17:.*]]) : i32 = (%[[VAL_13]]) to (%[[VAL_14]]) inclusive step (%[[VAL_15]]) {
+! CHECK:                 %[[VAL_18:.*]]:2 = hlfir.declare %[[VAL_16]] {uniq_name = "_QFexclusive_scanEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:                 fir.store %[[VAL_17]] to %[[VAL_12]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_19:.*]] = fir.load %[[VAL_18]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_20:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (i32) -> i64
+! CHECK:                 %[[VAL_22:.*]] = hlfir.designate %[[VAL_4]]#0 (%[[VAL_21]])  : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
+! CHECK:                 %[[VAL_23:.*]] = fir.load %[[VAL_22]] : !fir.ref<i32>
+! CHECK:                 %[[VAL_24:.*]] = arith.addi %[[VAL_19]], %[[VAL_23]] : i32
+! CHECK:                 hlfir.assign %[[VAL_24]] to %[[VAL_18]]#0 : i32, !fir.ref<i32>
+! CHECK:                 omp.scan exclusive(%[[VAL_18]]#1 : !fir.ref<i32>)
+! CHECK:                 %[[VAL_25:.*]] = fir.load %[[VAL_18]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_26:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<i32>
+! CHECK:                 %[[VAL_27:.*]] = fir.convert %[[VAL_26]] : (i32) -> i64
+! CHECK:                 %[[VAL_28:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_27]])  : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
+! CHECK:                 hlfir.assign %[[VAL_25]] to %[[VAL_28]] : i32, !fir.ref<i32>
+! CHECK:                 omp.yield
+! CHECK:               }
+! CHECK:             }
+! CHECK:             omp.terminator
+! CHECK:           }
+! CHECK:           return
+! CHECK:         }
+
+subroutine inclusive_scan(a, b, n)
  implicit none
- integer, parameter :: n = 100
- integer a(n), b(n)
- integer x, k
+ integer a(:), b(:)
+ integer x, k, n
 
- !CHECK: omp.wsloop reduction(mod: inscan, {{.*}}) {
  !$omp parallel do reduction(inscan, +: x)
  do k = 1, n
    x = x + a(k)
-   !CHECK: omp.scan inclusive({{.*}})
    !$omp scan inclusive(x)
    b(k) = x
  end do
 end subroutine inclusive_scan
 
 
-subroutine exclusive_scan
+subroutine exclusive_scan(a, b, n)
  implicit none
- integer, parameter :: n = 100
- integer a(n), b(n)
- integer x, k
+ integer a(:), b(:)
+ integer x, k, n
 
- !CHECK: omp.wsloop reduction(mod: inscan, {{.*}}) {
  !$omp parallel do reduction(inscan, +: x)
  do k = 1, n
    x = x + a(k)
-   !CHECK: omp.scan exclusive({{.*}})
    !$omp scan exclusive(x)
    b(k) = x
  end do