[flang-commits] [flang] [Flang][OpenMP] Correct ArrayElements in Reduction Clause (PR #196094)

Jack Styles via flang-commits flang-commits at lists.llvm.org
Tue May 26 06:36:55 PDT 2026


https://github.com/Stylie777 updated https://github.com/llvm/llvm-project/pull/196094

>From 30d2c6e78836bb0989c77522b45b9eed3598f451 Mon Sep 17 00:00:00 2001
From: Jack Styles <jack.styles at arm.com>
Date: Thu, 26 Feb 2026 09:58:06 +0000
Subject: [PATCH] [Flang][OpenMP] Reduce ArrayElements in Reduction Clause

Currently, when an ArrayElement is used within a Reduction clause,
it will be lowered with the reduction referencing the box containing
the array, not just the element.

To address this, adjust Flang lowering to track expressions alongside
symbol to ensure that the Array Element context is not lost and
considered when lowering a reduction with Array Element. This ensures
that, when represented in HLFIR, it will be just the element's type,
rather than the full array.

The Data Sharing Processor has also been adjusted to understand when
an Expression is used within a Reduction Clause, and in cases where
the only reference to the original Symbol is the array element used
in the Reduction clause, the privatisation is ensured to just be the
array element, rather than the full array.

Currently this excludes DO CONCURRENT as it excludes Array Elements,
and is limited to Array Elements but there are options to expand this
into Array Sections in the future.

Assisted-by: Codex
---
 .../flang/Lower/Support/ReductionProcessor.h  |  10 +
 flang/lib/Lower/Bridge.cpp                    |   3 +-
 flang/lib/Lower/ConvertExprToHLFIR.cpp        |  40 ++-
 flang/lib/Lower/OpenMP/ClauseProcessor.cpp    |   8 +-
 .../lib/Lower/OpenMP/DataSharingProcessor.cpp |  44 ++-
 flang/lib/Lower/OpenMP/DataSharingProcessor.h |  13 +-
 flang/lib/Lower/OpenMP/OpenMP.cpp             | 292 +++++++++++++++++-
 .../lib/Lower/Support/ReductionProcessor.cpp  | 147 +++++----
 .../Lower/OpenMP/reduction-array-element.f90  | 143 +++++++++
 9 files changed, 618 insertions(+), 82 deletions(-)
 create mode 100644 flang/test/Lower/OpenMP/reduction-array-element.f90

diff --git a/flang/include/flang/Lower/Support/ReductionProcessor.h b/flang/include/flang/Lower/Support/ReductionProcessor.h
index 0b4a692827a79..7c9d4bae9b7ad 100644
--- a/flang/include/flang/Lower/Support/ReductionProcessor.h
+++ b/flang/include/flang/Lower/Support/ReductionProcessor.h
@@ -13,6 +13,7 @@
 #ifndef FORTRAN_LOWER_REDUCTIONPROCESSOR_H
 #define FORTRAN_LOWER_REDUCTIONPROCESSOR_H
 
+#include "flang/Lower/AbstractConverter.h"
 #include "flang/Lower/OpenMP/Clauses.h"
 #include "flang/Optimizer/Builder/FIRBuilder.h"
 #include "flang/Optimizer/Dialect/FIRType.h"
@@ -21,6 +22,7 @@
 #include "flang/Semantics/type.h"
 #include "mlir/IR/Location.h"
 #include "mlir/IR/Types.h"
+#include "llvm/ADT/ArrayRef.h"
 
 namespace mlir {
 namespace omp {
@@ -158,8 +160,16 @@ class ReductionProcessor {
       llvm::SmallVectorImpl<bool> &reduceVarByRef,
       llvm::SmallVectorImpl<mlir::Attribute> &reductionDeclSymbols,
       const llvm::SmallVectorImpl<const semantics::Symbol *> &reductionSymbols,
+      llvm::ArrayRef<Object> reductionObjects, lower::SymMap &symMap,
       llvm::DenseMap<const semantics::Symbol *, mlir::Value>
           *reductionVarCache = nullptr);
+
+  /// Check if an expression is lowered as a Reduction object. This ensures
+  /// reductions such as Array Elements are properly represented, rather than
+  /// reducing the full array.
+  // TODO support more types of objects
+  // to avoid Reduction clauses being represented in FIR as full arrays.
+  static bool isExpressionLoweredAsReductionObject(const Object *object);
 };
 
 template <typename FloatOp, typename IntegerOp>
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index 1dfcb9980f18f..84c2fa8222dd0 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -2401,7 +2401,8 @@ class FirConverter : public Fortran::lower::AbstractConverter {
     Fortran::lower::omp::ReductionProcessor rp;
     bool result = rp.processReductionArguments<fir::DeclareReductionOp>(
         toLocation(), *this, info.reduceOperatorList, reduceVars,
-        reduceVarByRef, reductionDeclSymbols, info.reduceSymList);
+        reduceVarByRef, reductionDeclSymbols, info.reduceSymList,
+        /*reductionObjects=*/{}, getSymbolMap());
     if (!result)
       TODO(toLocation(), "Lowering unrecognised reduction type");
 
diff --git a/flang/lib/Lower/ConvertExprToHLFIR.cpp b/flang/lib/Lower/ConvertExprToHLFIR.cpp
index ad680269dea5c..ab18864068040 100644
--- a/flang/lib/Lower/ConvertExprToHLFIR.cpp
+++ b/flang/lib/Lower/ConvertExprToHLFIR.cpp
@@ -1550,6 +1550,38 @@ static bool hasDeferredCharacterLength(const Fortran::semantics::Symbol &sym) {
          type->characterTypeSpec().length().isDeferred();
 }
 
+static mlir::Value
+findOverriddenExprValue(const Fortran::lower::ExprToValueMap &map,
+                        const Fortran::lower::SomeExpr &expr) {
+  if (auto match = map.find(&expr); match != map.end())
+    return match->second;
+
+  // The map uses pointer identity, but the some expressions
+  // (e.g. a(2)) may appear at multiple AST nodes with different addresses.
+  // Fall back to structural comparison via ArrayRef::operator==.
+  std::optional<Fortran::evaluate::DataRef> exprDataRef =
+      Fortran::evaluate::ExtractDataRef(expr);
+  if (!exprDataRef)
+    return {};
+  const Fortran::evaluate::ArrayRef *exprArrayRef =
+      std::get_if<Fortran::evaluate::ArrayRef>(&exprDataRef->u);
+  if (!exprArrayRef)
+    return {};
+
+  for (auto [key, value] : map) {
+    std::optional<Fortran::evaluate::DataRef> keyDataRef =
+        Fortran::evaluate::ExtractDataRef(*key);
+    if (!keyDataRef)
+      continue;
+    const Fortran::evaluate::ArrayRef *keyArrayRef =
+        std::get_if<Fortran::evaluate::ArrayRef>(&keyDataRef->u);
+    if (keyArrayRef && Fortran::lower::isEqual(keyArrayRef, exprArrayRef))
+      return value;
+  }
+
+  return {};
+}
+
 /// Lower Expr to HLFIR.
 class HlfirBuilder {
 public:
@@ -1563,12 +1595,12 @@ class HlfirBuilder {
     if (const Fortran::lower::ExprToValueMap *map =
             getConverter().getExprOverrides()) {
       if constexpr (std::is_same_v<T, Fortran::evaluate::SomeType>) {
-        if (auto match = map->find(&expr); match != map->end())
-          return hlfir::EntityWithAttributes{match->second};
+        if (mlir::Value value = findOverriddenExprValue(*map, expr))
+          return hlfir::EntityWithAttributes{value};
       } else {
         Fortran::lower::SomeExpr someExpr = toEvExpr(expr);
-        if (auto match = map->find(&someExpr); match != map->end())
-          return hlfir::EntityWithAttributes{match->second};
+        if (mlir::Value value = findOverriddenExprValue(*map, someExpr))
+          return hlfir::EntityWithAttributes{value};
       }
     }
     return Fortran::common::visit([&](const auto &x) { return gen(x); },
diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
index 213b5f783430e..2e45c0a836291 100644
--- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
@@ -1576,7 +1576,7 @@ bool ClauseProcessor::processInReduction(
                 currentLocation, converter,
                 std::get<typename omp::clause::ReductionOperatorList>(clause.t),
                 inReductionVars, inReduceVarByRef, inReductionDeclSymbols,
-                inReductionSyms))
+                inReductionSyms, inReductionObjects, converter.getSymbolMap()))
           TODO(currentLocation, "Lowering unrecognised reduction type");
 
         // Copy local lists into the output.
@@ -2131,7 +2131,8 @@ bool ClauseProcessor::processReduction(
                 currentLocation, converter,
                 std::get<typename omp::clause::ReductionOperatorList>(clause.t),
                 reductionVars, reduceVarByRef, reductionDeclSymbols,
-                reductionSyms, reductionVarCache))
+                reductionSyms, reductionObjects, converter.getSymbolMap(),
+                reductionVarCache))
           TODO(currentLocation, "Lowering unrecognised reduction type");
         // Copy local lists into the output.
         llvm::copy(reductionVars, std::back_inserter(result.reductionVars));
@@ -2160,7 +2161,8 @@ bool ClauseProcessor::processTaskReduction(
                 currentLocation, converter,
                 std::get<typename omp::clause::ReductionOperatorList>(clause.t),
                 taskReductionVars, taskReduceVarByRef, taskReductionDeclSymbols,
-                taskReductionSyms))
+                taskReductionSyms, taskReductionObjects,
+                converter.getSymbolMap()))
           TODO(currentLocation, "Lowering unrecognised reduction type");
         // Copy local lists into the output.
         llvm::copy(taskReductionVars,
diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
index e392497d30de7..b4c699daafd1b 100644
--- a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
@@ -68,27 +68,30 @@ DataSharingProcessor::DataSharingProcessor(
     lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
     const List<Clause> &clauses, lower::pft::Evaluation &eval,
     bool shouldCollectPreDeterminedSymbols, bool useDelayedPrivatization,
-    lower::SymMap &symTable, bool isTargetPrivatization)
+    lower::SymMap &symTable, bool isTargetPrivatization,
+    llvm::ArrayRef<const semantics::Symbol *> symbolsCoveredByReductionElements)
     : converter(converter), semaCtx(semaCtx),
       firOpBuilder(converter.getFirOpBuilder()), clauses(clauses), eval(eval),
       shouldCollectPreDeterminedSymbols(shouldCollectPreDeterminedSymbols),
       useDelayedPrivatization(useDelayedPrivatization), symTable(symTable),
       isTargetPrivatization(isTargetPrivatization), visitor(semaCtx) {
+  this->symbolsCoveredByReductionElements.insert(
+      symbolsCoveredByReductionElements.begin(),
+      symbolsCoveredByReductionElements.end());
   eval.visit([&](const auto &functionParserNode) {
     parser::Walk(functionParserNode, visitor);
   });
 }
 
-DataSharingProcessor::DataSharingProcessor(lower::AbstractConverter &converter,
-                                           semantics::SemanticsContext &semaCtx,
-                                           lower::pft::Evaluation &eval,
-                                           bool useDelayedPrivatization,
-                                           lower::SymMap &symTable,
-                                           bool isTargetPrivatization)
-    : DataSharingProcessor(converter, semaCtx, {}, eval,
-                           /*shouldCollectPreDeterminedSymols=*/false,
-                           useDelayedPrivatization, symTable,
-                           isTargetPrivatization) {}
+DataSharingProcessor::DataSharingProcessor(
+    lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx,
+    lower::pft::Evaluation &eval, bool useDelayedPrivatization,
+    lower::SymMap &symTable, bool isTargetPrivatization,
+    llvm::ArrayRef<const semantics::Symbol *> symbolsCoveredByReductionElements)
+    : DataSharingProcessor(
+          converter, semaCtx, {}, eval,
+          /*shouldCollectPreDeterminedSymols=*/false, useDelayedPrivatization,
+          symTable, isTargetPrivatization, symbolsCoveredByReductionElements) {}
 
 void DataSharingProcessor::processStep1(
     mlir::omp::PrivateClauseOps *clauseOps,
@@ -293,6 +296,20 @@ void DataSharingProcessor::collectSymbolsForPrivatization() {
       allPrivatizedSymbols.insert(sym);
 }
 
+bool DataSharingProcessor::isCoveredByReductionElement(
+    const semantics::Symbol *sym) const {
+  if (symbolsCoveredByReductionElements.contains(sym) ||
+      symbolsCoveredByReductionElements.contains(&sym->GetUltimate()))
+    return true;
+
+  if (const auto *hostAssoc = sym->detailsIf<semantics::HostAssocDetails>())
+    return symbolsCoveredByReductionElements.contains(&hostAssoc->symbol()) ||
+           symbolsCoveredByReductionElements.contains(
+               &hostAssoc->symbol().GetUltimate());
+
+  return false;
+}
+
 bool DataSharingProcessor::needBarrier() {
   // Emit implicit barrier to synchronize threads and avoid data races on
   // initialization of firstprivate variables and post-update of lastprivate
@@ -498,6 +515,11 @@ void DataSharingProcessor::collectPrivatizedSymbols(
 
   auto shouldCollectSymbol = [&](const semantics::Symbol *sym) {
     if (collectImplicit) {
+      // If all uses of a privatisaed variable are covered by an expr in a
+      // reduction clause, these should be ignored.
+      if (isCoveredByReductionElement(sym))
+        return false;
+
       // If we're a combined construct with a target region, implicit
       // firstprivate captures, should only belong to the target region
       // and not be added/captured by later directives. Parallel regions
diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.h b/flang/lib/Lower/OpenMP/DataSharingProcessor.h
index 5dd564d4bbb61..299f478f17b3a 100644
--- a/flang/lib/Lower/OpenMP/DataSharingProcessor.h
+++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.h
@@ -19,6 +19,8 @@
 #include "flang/Parser/parse-tree.h"
 #include "flang/Semantics/symbol.h"
 #include "mlir/Dialect/OpenMP/OpenMPDialect.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include <variant>
 
 namespace mlir {
@@ -106,6 +108,8 @@ class DataSharingProcessor {
   bool shouldCollectPreDeterminedSymbols;
   bool useDelayedPrivatization;
   llvm::SmallPtrSet<const semantics::Symbol *, 16> mightHaveReadHostSym;
+  llvm::SmallPtrSet<const semantics::Symbol *, 4>
+      symbolsCoveredByReductionElements;
   lower::SymMap &symTable;
   bool isTargetPrivatization;
   OMPConstructSymbolVisitor visitor;
@@ -126,6 +130,7 @@ class DataSharingProcessor {
       const omp::ObjectList &objects,
       llvm::SetVector<const semantics::Symbol *> &symbolSet);
   void collectSymbolsForPrivatization();
+  bool isCoveredByReductionElement(const semantics::Symbol *sym) const;
   void insertBarrier(mlir::omp::PrivateClauseOps *clauseOps);
   void collectDefaultSymbols();
   void collectImplicitSymbols();
@@ -154,13 +159,17 @@ class DataSharingProcessor {
                        lower::pft::Evaluation &eval,
                        bool shouldCollectPreDeterminedSymbols,
                        bool useDelayedPrivatization, lower::SymMap &symTable,
-                       bool isTargetPrivatization = false);
+                       bool isTargetPrivatization = false,
+                       llvm::ArrayRef<const semantics::Symbol *>
+                           symbolsCoveredByReductionElements = {});
 
   DataSharingProcessor(lower::AbstractConverter &converter,
                        semantics::SemanticsContext &semaCtx,
                        lower::pft::Evaluation &eval,
                        bool useDelayedPrivatization, lower::SymMap &symTable,
-                       bool isTargetPrivatization = false);
+                       bool isTargetPrivatization = false,
+                       llvm::ArrayRef<const semantics::Symbol *>
+                           symbolsCoveredByReductionElements = {});
 
   // Privatisation is split into two steps.
   // Step1 performs cloning of all privatisation clauses and copying for
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 7cb7e379eb503..2bca9ce840a90 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -412,18 +412,31 @@ static void bindEntryBlockArgs(lower::AbstractConverter &converter,
                              llvm::ArrayRef<mlir::Value> vars,
                              llvm::ArrayRef<mlir::BlockArgument> args) {
     llvm::SmallVector<const semantics::Symbol *> processedSyms;
+    llvm::SmallVector<const Object *> processedObjects;
     for (const Object &object : objects) {
       const semantics::Symbol *sym = object.sym();
       if (const auto *commonDet =
               sym->detailsIf<semantics::CommonBlockDetails>()) {
-        llvm::transform(commonDet->objects(), std::back_inserter(processedSyms),
-                        [&](const auto &mem) { return &*mem; });
+        for (auto &mem : commonDet->objects()) {
+          processedSyms.push_back(&*mem);
+          processedObjects.push_back(&object);
+        }
       } else {
         processedSyms.push_back(sym);
+        processedObjects.push_back(&object);
       }
     }
 
-    for (auto [sym, var, arg] : llvm::zip_equal(processedSyms, vars, args))
+    assert(processedSyms.size() == processedObjects.size());
+    for (auto [sym, var, arg, object] :
+         llvm::zip_equal(processedSyms, vars, args, processedObjects)) {
+      bool skipBind =
+          ReductionProcessor::isExpressionLoweredAsReductionObject(object) ||
+          (object && sym->Rank() > 0 &&
+           !fir::unwrapUntilSeqType(arg.getType()));
+      if (skipBind)
+        continue;
+
       converter.bindSymbol(
           *sym,
           hlfir::translateToExtendedValue(
@@ -431,6 +444,7 @@ static void bindEntryBlockArgs(lower::AbstractConverter &converter,
               /*contiguousHint=*/
               evaluate::IsSimplyContiguous(*sym, converter.getFoldingContext()))
               .first);
+    }
   };
 
   // Process in clause name alphabetical order to match block arguments order.
@@ -980,13 +994,44 @@ genLoopVars(mlir::Operation *op, lower::AbstractConverter &converter,
   // next one would result in 'hlfir.declare' operations being introduced inside
   // of a wrapper, which is illegal.
   mlir::IRMapping mapper;
+  llvm::SmallVector<std::pair<Object, mlir::Value>> mappedReductionObjects;
+  auto mapEquivalentReductionObjects =
+      [&](const ObjectEntryBlockArgsEntry &entry) {
+        for (auto [object, var] : llvm::zip(entry.objects, entry.vars)) {
+          for (auto [mappedObject, mappedValue] :
+               llvm::reverse(mappedReductionObjects)) {
+            if (object.id() == mappedObject.id()) {
+              mapper.map(var, mappedValue);
+              break;
+            }
+          }
+        }
+      };
+  auto rememberReductionObjects =
+      [&](const ObjectEntryBlockArgsEntry &entry,
+          llvm::ArrayRef<mlir::BlockArgument> args) {
+        for (auto [object, arg] : llvm::zip(entry.objects, args))
+          mappedReductionObjects.emplace_back(object, arg);
+      };
+
   for (auto [argGeneratingOp, blockArgs] : wrapperArgs) {
+    mapEquivalentReductionObjects(blockArgs.inReduction);
+    mapEquivalentReductionObjects(blockArgs.reduction);
+    mapEquivalentReductionObjects(blockArgs.taskReduction);
+
     for (mlir::OpOperand &operand : argGeneratingOp->getOpOperands())
       operand.set(mapper.lookupOrDefault(operand.get()));
 
     for (const auto [arg, var] : llvm::zip_equal(
              argGeneratingOp->getRegion(0).getArguments(), blockArgs.getVars()))
       mapper.map(var, arg);
+
+    rememberReductionObjects(blockArgs.inReduction,
+                             argGeneratingOp.getInReductionBlockArgs());
+    rememberReductionObjects(blockArgs.reduction,
+                             argGeneratingOp.getReductionBlockArgs());
+    rememberReductionObjects(blockArgs.taskReduction,
+                             argGeneratingOp.getTaskReductionBlockArgs());
   }
 
   // Bind the entry block arguments of parent wrappers to the corresponding
@@ -1259,6 +1304,186 @@ struct OpWithBodyGenInfo {
   bool privatize = true;
 };
 
+static mlir::Value getReductionOverrideValue(fir::FirOpBuilder &builder,
+                                             mlir::Location loc,
+                                             const Object *object,
+                                             mlir::BlockArgument arg) {
+  if (hlfir::isFortranEntityWithAttributes(arg))
+    return arg;
+
+  fir::FortranVariableFlagsAttr attributes;
+  llvm::SmallVector<mlir::Value> typeParams;
+  auto declareOp = hlfir::DeclareOp::create(
+      builder, loc, arg, "omp.reduction.element", nullptr, typeParams, nullptr,
+      nullptr, 0, attributes);
+  return declareOp.getBase();
+}
+
+static void
+addReductionObjectOverrides(fir::FirOpBuilder &builder, mlir::Location loc,
+                            lower::ExprToValueMap &overrides,
+                            const ObjectEntryBlockArgsEntry &entry,
+                            llvm::ArrayRef<mlir::BlockArgument> blockArgs) {
+  if (entry.objects.empty())
+    return;
+
+  assert(entry.objects.size() == blockArgs.size() &&
+         "reduction object list must match block arguments");
+  for (auto [object, arg] : llvm::zip_equal(entry.objects, blockArgs)) {
+    if (!ReductionProcessor::isExpressionLoweredAsReductionObject(&object))
+      continue;
+    const SomeExpr *expr = &object.ref().value();
+
+    // Evict any outer-scope entry for the same array element so the
+    // innermost scope always wins regardless of DenseMap iteration order.
+    llvm::SmallVector<const SomeExpr *> toEvict;
+    for (auto [key, value] : overrides) {
+      if (Fortran::lower::isEqual(key, expr)) {
+        toEvict.push_back(key);
+      }
+    }
+    for (const SomeExpr *key : toEvict) {
+      overrides.erase(key);
+    }
+
+    overrides[expr] = getReductionOverrideValue(builder, loc, &object, arg);
+  }
+}
+
+static const semantics::Symbol *getArrayElementSymbol(const SomeExpr &expr) {
+  std::optional<Fortran::evaluate::DataRef> dataRef =
+      Fortran::evaluate::ExtractDataRef(expr);
+  if (!dataRef)
+    return nullptr;
+
+  if (const auto *arrayRef =
+          std::get_if<Fortran::evaluate::ArrayRef>(&dataRef->u))
+    return &arrayRef->GetLastSymbol();
+
+  return nullptr;
+}
+
+static void
+addSymbolAliases(llvm::SmallVectorImpl<const semantics::Symbol *> &aliases,
+                 const semantics::Symbol *symbol) {
+  aliases.push_back(symbol);
+  aliases.push_back(&symbol->GetUltimate());
+  if (const auto *hostAssoc =
+          symbol->detailsIf<semantics::HostAssocDetails>()) {
+    aliases.push_back(&hostAssoc->symbol());
+    aliases.push_back(&hostAssoc->symbol().GetUltimate());
+  }
+}
+
+struct ArrayElementReductionUseCollector {
+  explicit ArrayElementReductionUseCollector(
+      const llvm::DenseMap<const semantics::Symbol *, const semantics::Symbol *>
+          &aliasToReductionSymbol,
+      llvm::DenseMap<const semantics::Symbol *,
+                     llvm::SmallVector<const SomeExpr *>>
+          &reductionElementExprs)
+      : aliasToReductionSymbol(aliasToReductionSymbol),
+        reductionElementExprs(reductionElementExprs) {}
+
+  const llvm::DenseMap<const semantics::Symbol *, const semantics::Symbol *>
+      &aliasToReductionSymbol;
+  llvm::DenseMap<const semantics::Symbol *, llvm::SmallVector<const SomeExpr *>>
+      &reductionElementExprs;
+  llvm::SmallPtrSet<const semantics::Symbol *, 16> seen;
+  llvm::SmallPtrSet<const semantics::Symbol *, 16> uncovered;
+
+  void classifyReductionElementUses(const SomeExpr &expr) {
+    llvm::SmallPtrSet<const semantics::Symbol *, 4> exprCandidates;
+    auto getReductionSymbol = [this](const semantics::Symbol &symbol) {
+      auto it = aliasToReductionSymbol.find(&symbol);
+      return it == aliasToReductionSymbol.end() ? nullptr : it->second;
+    };
+    for (const semantics::Symbol &symbol :
+         Fortran::evaluate::CollectSymbols(expr))
+      if (const semantics::Symbol *reductionSymbol = getReductionSymbol(symbol))
+        exprCandidates.insert(reductionSymbol);
+    if (exprCandidates.empty())
+      return;
+
+    auto isCoveredReductionUse =
+        [this](const semantics::Symbol *reductionSymbol, const SomeExpr &expr) {
+          auto it = reductionElementExprs.find(reductionSymbol);
+          return it != reductionElementExprs.end() &&
+                 llvm::any_of(it->second, [&](const SomeExpr *reductionExpr) {
+                   return Fortran::lower::isEqual(&expr, reductionExpr);
+                 });
+        };
+    llvm::SmallPtrSet<const semantics::Symbol *, 4> seenInExpr;
+    for (const SomeExpr &designator :
+         semantics::omp::GetTopLevelDesignators(expr)) {
+      const semantics::Symbol *symbol = getArrayElementSymbol(designator);
+      const semantics::Symbol *reductionSymbol =
+          symbol ? getReductionSymbol(*symbol) : nullptr;
+      if (!reductionSymbol)
+        continue;
+
+      if (isCoveredReductionUse(reductionSymbol, designator)) {
+        seen.insert(reductionSymbol);
+        seenInExpr.insert(reductionSymbol);
+      } else {
+        uncovered.insert(reductionSymbol);
+      }
+    }
+
+    for (const semantics::Symbol *symbol : exprCandidates)
+      if (!seenInExpr.contains(symbol))
+        uncovered.insert(symbol);
+  }
+
+  template <typename T>
+  bool Pre(const T &node) {
+    if constexpr (parser::HasTypedExpr<T>::value) {
+      if (const SomeExpr *expr = semantics::GetExpr(nullptr, node)) {
+        classifyReductionElementUses(*expr);
+        return false;
+      }
+    }
+    return true;
+  }
+
+  bool Pre(const parser::Name &name) { return false; }
+
+  template <typename T>
+  void Post(const T &) {}
+};
+
+static llvm::SmallVector<const semantics::Symbol *>
+getSymbolsCoveredByReductionElements(lower::pft::Evaluation &eval,
+                                     llvm::ArrayRef<Object> reductionObjects) {
+  llvm::DenseMap<const semantics::Symbol *, const semantics::Symbol *>
+      aliasToReductionSymbol;
+  llvm::DenseMap<const semantics::Symbol *, llvm::SmallVector<const SomeExpr *>>
+      reductionElementExprs;
+  for (const Object &object : reductionObjects) {
+    if (!ReductionProcessor::isExpressionLoweredAsReductionObject(&object))
+      continue;
+    llvm::SmallVector<const semantics::Symbol *> aliases;
+    addSymbolAliases(aliases, object.sym());
+    for (const semantics::Symbol *alias : aliases)
+      aliasToReductionSymbol[alias] = object.sym();
+    reductionElementExprs[object.sym()].push_back(&*object.ref());
+  }
+
+  if (reductionElementExprs.empty())
+    return {};
+
+  ArrayElementReductionUseCollector collector(aliasToReductionSymbol,
+                                              reductionElementExprs);
+  eval.visit([&](const auto &node) { parser::Walk(node, collector); });
+
+  llvm::SmallVector<const semantics::Symbol *> suppressList;
+  for (auto &[symbol, exprs] : reductionElementExprs)
+    if (collector.seen.contains(symbol) &&
+        !collector.uncovered.contains(symbol))
+      suppressList.push_back(symbol);
+  return suppressList;
+}
+
 /// Create the body (block) for an OpenMP Operation.
 ///
 /// \param [in]   op  - the operation the body belongs to.
@@ -1339,6 +1564,27 @@ static void createBodyOfOp(mlir::Operation &op, const OpWithBodyGenInfo &info,
   }
 
   if (!info.genSkeletonOnly) {
+    lower::ExprToValueMap local;
+    if (auto *old = info.converter.getExprOverrides())
+      local.insert(old->begin(), old->end());
+    if (info.blockArgs) {
+      if (auto ompBlockArgOp =
+              mlir::dyn_cast<mlir::omp::BlockArgOpenMPOpInterface>(op)) {
+        addReductionObjectOverrides(firOpBuilder, info.loc, local,
+                                    info.blockArgs->inReduction,
+                                    ompBlockArgOp.getInReductionBlockArgs());
+        addReductionObjectOverrides(firOpBuilder, info.loc, local,
+                                    info.blockArgs->reduction,
+                                    ompBlockArgOp.getReductionBlockArgs());
+        addReductionObjectOverrides(firOpBuilder, info.loc, local,
+                                    info.blockArgs->taskReduction,
+                                    ompBlockArgOp.getTaskReductionBlockArgs());
+      }
+    }
+
+    auto *old = info.converter.getExprOverrides();
+    info.converter.overrideExprValues(local.empty() ? old : &local);
+
     if (ConstructQueue::const_iterator next = std::next(item);
         next != queue.end()) {
       genOMPDispatch(info.converter, info.symTable, info.semaCtx, info.eval,
@@ -1354,6 +1600,8 @@ static void createBodyOfOp(mlir::Operation &op, const OpWithBodyGenInfo &info,
       genNestedEvaluations(info.converter, info.eval);
       temp->erase();
     }
+
+    info.converter.overrideExprValues(old);
   }
 
   // Get or create a unique exiting block from the given region, or
@@ -2171,21 +2419,40 @@ genLoopNestOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
                                        const ObjectEntryBlockArgs &>>
                   wrapperArgs,
               llvm::omp::Directive directive, DataSharingProcessor &dsp) {
+  const lower::ExprToValueMap *oldOverrides = converter.getExprOverrides();
+  lower::ExprToValueMap loopNestOverrides;
   auto ivCallback = [&](mlir::Operation *op) {
     genLoopVars(op, converter, loc, iv, wrapperArgs);
+    if (oldOverrides)
+      loopNestOverrides.insert(oldOverrides->begin(), oldOverrides->end());
+    for (auto [argGeneratingOp, blockArgs] : wrapperArgs) {
+      addReductionObjectOverrides(converter.getFirOpBuilder(), loc,
+                                  loopNestOverrides, blockArgs.inReduction,
+                                  argGeneratingOp.getInReductionBlockArgs());
+      addReductionObjectOverrides(converter.getFirOpBuilder(), loc,
+                                  loopNestOverrides, blockArgs.reduction,
+                                  argGeneratingOp.getReductionBlockArgs());
+      addReductionObjectOverrides(converter.getFirOpBuilder(), loc,
+                                  loopNestOverrides, blockArgs.taskReduction,
+                                  argGeneratingOp.getTaskReductionBlockArgs());
+    }
+    converter.overrideExprValues(
+        loopNestOverrides.empty() ? oldOverrides : &loopNestOverrides);
     return llvm::SmallVector<const semantics::Symbol *>(iv);
   };
 
   uint64_t nestValue = getCollapseValue(item->clauses);
   nestValue = nestValue < iv.size() ? iv.size() : nestValue;
   auto *nestedEval = getCollapsedLoopEval(eval, nestValue);
-  return genOpWithBody<mlir::omp::LoopNestOp>(
+  auto loopNestOp = genOpWithBody<mlir::omp::LoopNestOp>(
       OpWithBodyGenInfo(converter, symTable, semaCtx, loc, *nestedEval,
                         directive)
           .setClauses(&item->clauses)
           .setDataSharingProcessor(&dsp)
           .setGenRegionEntryCb(ivCallback),
       queue, item, clauseOps);
+  converter.overrideExprValues(oldOverrides);
+  return loopNestOp;
 }
 
 static mlir::omp::LoopOp
@@ -3209,9 +3476,14 @@ genTaskOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
             .setClauses(&item->clauses),
         queue, item, clauseOps);
 
+  llvm::SmallVector<const semantics::Symbol *>
+      symbolsCoveredByReductionElements =
+          getSymbolsCoveredByReductionElements(eval, inReductionObjects);
   DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
                            lower::omp::isLastItemInQueue(item, queue),
-                           /*useDelayedPrivatization=*/true, symTable);
+                           /*useDelayedPrivatization=*/true, symTable,
+                           /*isTargetPrivatization=*/false,
+                           symbolsCoveredByReductionElements);
   dsp.processStep1(&clauseOps);
 
   ObjectEntryBlockArgs taskArgs;
@@ -3471,9 +3743,17 @@ static mlir::omp::TaskloopContextOp genStandaloneTaskloop(
 
   genTaskloopClauses(converter, semaCtx, stmtCtx, item->clauses, loc,
                      taskloopClauseOps, reductionObjects, inReductionObjects);
+  llvm::SmallVector<Object> allReductionObjects;
+  llvm::append_range(allReductionObjects, reductionObjects);
+  llvm::append_range(allReductionObjects, inReductionObjects);
+  llvm::SmallVector<const semantics::Symbol *>
+      symbolsCoveredByReductionElements =
+          getSymbolsCoveredByReductionElements(eval, allReductionObjects);
   DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
                            /*shouldCollectPreDeterminedSymbols=*/true,
-                           enableDelayedPrivatization, symTable);
+                           enableDelayedPrivatization, symTable,
+                           /*isTargetPrivatization=*/false,
+                           symbolsCoveredByReductionElements);
   dsp.processStep1(&taskloopClauseOps);
 
   mlir::omp::LoopNestOperands loopNestClauseOps;
diff --git a/flang/lib/Lower/Support/ReductionProcessor.cpp b/flang/lib/Lower/Support/ReductionProcessor.cpp
index b3a27736d1616..0e37683179a6f 100644
--- a/flang/lib/Lower/Support/ReductionProcessor.cpp
+++ b/flang/lib/Lower/Support/ReductionProcessor.cpp
@@ -13,6 +13,7 @@
 #include "flang/Lower/Support/ReductionProcessor.h"
 
 #include "flang/Lower/AbstractConverter.h"
+#include "flang/Lower/ConvertExprToHLFIR.h"
 #include "flang/Lower/ConvertType.h"
 #include "flang/Lower/OpenMP/Clauses.h"
 #include "flang/Lower/Support/PrivateReductionUtils.h"
@@ -47,6 +48,7 @@ template bool ReductionProcessor::processReductionArguments<
     llvm::SmallVectorImpl<bool> &reduceVarByRef,
     llvm::SmallVectorImpl<mlir::Attribute> &reductionDeclSymbols,
     const llvm::SmallVectorImpl<const semantics::Symbol *> &reductionSymbols,
+    llvm::ArrayRef<Object> reductionObjects, lower::SymMap &symMap,
     llvm::DenseMap<const semantics::Symbol *, mlir::Value> *reductionVarCache);
 
 template bool ReductionProcessor::processReductionArguments<
@@ -57,6 +59,7 @@ template bool ReductionProcessor::processReductionArguments<
     llvm::SmallVectorImpl<bool> &reduceVarByRef,
     llvm::SmallVectorImpl<mlir::Attribute> &reductionDeclSymbols,
     const llvm::SmallVectorImpl<const semantics::Symbol *> &reductionSymbols,
+    llvm::ArrayRef<Object> reductionObjects, lower::SymMap &symMap,
     llvm::DenseMap<const semantics::Symbol *, mlir::Value> *reductionVarCache);
 
 template mlir::omp::DeclareReductionOp
@@ -368,6 +371,14 @@ mlir::Value ReductionProcessor::createScalarCombiner(
   return reductionOp;
 }
 
+bool ReductionProcessor::isExpressionLoweredAsReductionObject(
+    const Object *object) {
+  if (!object || !object->ref())
+    return false;
+  const SomeExpr &expr = *object->ref();
+  return evaluate::IsArrayElement(expr);
+}
+
 template <typename ParentDeclOpType>
 static void genYield(fir::FirOpBuilder &builder, mlir::Location loc,
                      mlir::Value yieldedValue) {
@@ -675,6 +686,7 @@ bool ReductionProcessor::processReductionArguments(
     llvm::SmallVectorImpl<bool> &reduceVarByRef,
     llvm::SmallVectorImpl<mlir::Attribute> &reductionDeclSymbols,
     const llvm::SmallVectorImpl<const semantics::Symbol *> &reductionSymbols,
+    llvm::ArrayRef<Object> reductionObjects, lower::SymMap &symMap,
     llvm::DenseMap<const semantics::Symbol *, mlir::Value> *reductionVarCache) {
   fir::FirOpBuilder &builder = converter.getFirOpBuilder();
 
@@ -717,79 +729,104 @@ bool ReductionProcessor::processReductionArguments(
         builder.getRegion().getParentOfType<fir::DoConcurrentOp>());
   }
 
-  for (const semantics::Symbol *symbol : reductionSymbols) {
+  assert((reductionObjects.empty() ||
+          reductionSymbols.size() == reductionObjects.size()) &&
+         "mismatched reduction symbol and object lists");
+
+  for (unsigned i = 0; i < reductionSymbols.size(); ++i) {
+    const Object *object =
+        reductionObjects.empty() ? nullptr : &reductionObjects[i];
+    const semantics::Symbol *symbol =
+        object ? object->sym() : reductionSymbols[i];
+    const SomeExpr *expr = object && object->ref() ? &*object->ref() : nullptr;
+    const bool isObjectExpr =
+        ReductionProcessor::isExpressionLoweredAsReductionObject(object);
+
     // If a cached reduction variable exists for this symbol, reuse it.
     // This ensures that composite constructs (e.g. DO SIMD) where both
     // the outer wrapper (wsloop) and inner wrapper (simd) process the same
     // reduction clause share the same SSA value, enabling genLoopVars()'s
     // IRMapping to correctly remap inner wrapper operands to outer wrapper
-    // block arguments.
-    if (reductionVarCache) {
-      auto it = reductionVarCache->find(symbol);
-      if (it != reductionVarCache->end()) {
+    // block arguments. Array element reductions are intentionally not cached:
+    // block-argument object tracking maps their scoped uses.
+    if (reductionVarCache && !isObjectExpr) {
+      if (auto it = reductionVarCache->find(symbol);
+          it != reductionVarCache->end()) {
         reductionVars.push_back(it->second);
         reduceVarByRef.push_back(doReductionByRef(it->second));
         continue;
       }
     }
 
-    mlir::Value symVal = converter.getSymbolAddress(*symbol);
-
-    if (auto declOp = symVal.getDefiningOp<hlfir::DeclareOp>())
-      symVal = declOp.getBase();
-
-    mlir::Type eleType;
-    auto refType = mlir::dyn_cast_or_null<fir::ReferenceType>(symVal.getType());
-    if (refType)
-      eleType = refType.getEleTy();
-    else
-      eleType = symVal.getType();
-
-    // all arrays must be boxed so that we have convenient access to all the
-    // information needed to iterate over the array
-    if (mlir::isa<fir::SequenceType>(eleType)) {
-      // For Host associated symbols, use `SymbolBox` instead
-      lower::SymbolBox symBox = converter.lookupOneLevelUpSymbol(*symbol);
-      hlfir::Entity entity{symBox.getAddr()};
-      entity = genVariableBox(currentLocation, builder, entity);
-      mlir::Value box = entity.getBase();
-
-      // Always pass the box by reference so that the OpenMP dialect
-      // verifiers don't need to know anything about fir.box
-      auto alloca =
-          fir::AllocaOp::create(builder, currentLocation, box.getType());
-      fir::StoreOp::create(builder, currentLocation, box, alloca);
-
-      symVal = alloca;
-    } else if (mlir::isa<fir::BaseBoxType>(symVal.getType())) {
-      // boxed arrays are passed as values not by reference. Unfortunately,
-      // we can't pass a box by value to omp.redution_declare, so turn it
-      // into a reference
-      auto oldIP = builder.saveInsertionPoint();
-      builder.setInsertionPointToStart(builder.getAllocaBlock());
-      auto alloca =
-          fir::AllocaOp::create(builder, currentLocation, symVal.getType());
-      builder.restoreInsertionPoint(oldIP);
-      fir::StoreOp::create(builder, currentLocation, symVal, alloca);
-      symVal = alloca;
-    }
+    mlir::Value reductionVal;
+    mlir::Type refTy;
 
-    // this isn't the same as the by-val and by-ref passing later in the
-    // pipeline. Both styles assume that the variable is a reference at
-    // this point
-    assert(fir::isa_ref_type(symVal.getType()) &&
-           "reduction input var is passed by reference");
-    mlir::Type elementType = fir::dyn_cast_ptrEleTy(symVal.getType());
-    const bool symIsVolatile = fir::isa_volatile_type(symVal.getType());
-    mlir::Type refTy = fir::ReferenceType::get(elementType, symIsVolatile);
+    if (isObjectExpr) {
+      StatementContext stmtCtx;
+      hlfir::EntityWithAttributes entity = convertExprToHLFIR(
+          converter.getCurrentLocation(), converter, *expr, symMap, stmtCtx);
+      reductionVal = entity.getBase();
+      refTy = reductionVal.getType();
+    } else {
+      mlir::Value symVal = converter.getSymbolAddress(*symbol);
+
+      if (auto declOp = symVal.getDefiningOp<hlfir::DeclareOp>())
+        symVal = declOp.getBase();
+
+      mlir::Type eleType;
+      auto refType =
+          mlir::dyn_cast_or_null<fir::ReferenceType>(symVal.getType());
+      if (refType)
+        eleType = refType.getEleTy();
+      else
+        eleType = symVal.getType();
+
+      // all arrays must be boxed so that we have convenient access to all the
+      // information needed to iterate over the array
+      if (mlir::isa<fir::SequenceType>(eleType)) {
+        // For Host associated symbols, use `SymbolBox` instead
+        lower::SymbolBox symBox = converter.lookupOneLevelUpSymbol(*symbol);
+        hlfir::Entity entity{symBox.getAddr()};
+        entity = genVariableBox(currentLocation, builder, entity);
+        mlir::Value box = entity.getBase();
+
+        // Always pass the box by reference so that the OpenMP dialect
+        // verifiers don't need to know anything about fir.box
+        auto alloca =
+            fir::AllocaOp::create(builder, currentLocation, box.getType());
+        fir::StoreOp::create(builder, currentLocation, box, alloca);
+
+        symVal = alloca;
+      } else if (mlir::isa<fir::BaseBoxType>(symVal.getType())) {
+        // boxed arrays are passed as values not by reference. Unfortunately,
+        // we can't pass a box by value to omp.redution_declare, so turn it
+        // into a reference
+        auto oldIP = builder.saveInsertionPoint();
+        builder.setInsertionPointToStart(builder.getAllocaBlock());
+        auto alloca =
+            fir::AllocaOp::create(builder, currentLocation, symVal.getType());
+        builder.restoreInsertionPoint(oldIP);
+        fir::StoreOp::create(builder, currentLocation, symVal, alloca);
+        symVal = alloca;
+      }
 
+      // this isn't the same as the by-val and by-ref passing later in the
+      // pipeline. Both styles assume that the variable is a reference at
+      // this point
+      assert(fir::isa_ref_type(symVal.getType()) &&
+             "reduction input var is passed by reference");
+      mlir::Type elementType = fir::dyn_cast_ptrEleTy(symVal.getType());
+      const bool symIsVolatile = fir::isa_volatile_type(symVal.getType());
+      refTy = fir::ReferenceType::get(elementType, symIsVolatile);
+      reductionVal = symVal;
+    }
     reductionVars.push_back(
-        builder.createConvert(currentLocation, refTy, symVal));
+        builder.createConvert(currentLocation, refTy, reductionVal));
     reduceVarByRef.push_back(doReductionByRef(reductionVars.back()));
 
     // Cache the final SSA value for this symbol so that subsequent calls
     // (e.g. for the inner wrapper in a composite construct) reuse it.
-    if (reductionVarCache)
+    if (reductionVarCache && !isObjectExpr)
       reductionVarCache->try_emplace(symbol, reductionVars.back());
   }
 
diff --git a/flang/test/Lower/OpenMP/reduction-array-element.f90 b/flang/test/Lower/OpenMP/reduction-array-element.f90
new file mode 100644
index 0000000000000..1aabc7b54401e
--- /dev/null
+++ b/flang/test/Lower/OpenMP/reduction-array-element.f90
@@ -0,0 +1,143 @@
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=50 %s -o - | FileCheck %s --implicit-check-not=add_reduction_byref_box
+
+subroutine reduction_literal(a, n)
+  integer :: a(4), n
+!$omp parallel do reduction(+: a(2))
+  do i = 1, n
+    a(2) = a(2) + i
+  end do
+end subroutine
+
+! CHECK-LABEL: func.func @_QPreduction_literal
+! CHECK: omp.wsloop {{.*}} reduction(@add_reduction_i32 {{.*}} : !fir.ref<i32>) {
+! CHECK: hlfir.declare %arg{{[0-9]+}} {uniq_name = "omp.reduction.element"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: fir.load %{{[0-9]+}}#0 : !fir.ref<i32>
+! CHECK: hlfir.assign {{.*}} to %{{[0-9]+}}#0 : i32, !fir.ref<i32>
+
+subroutine reduction_multiple(a, n)
+  integer :: a(4), n
+!$omp parallel do reduction(+: a(2), a(3))
+  do i = 1, n
+    a(2) = a(2) + i
+    a(3) = a(3) + i
+  end do
+end subroutine
+
+! CHECK-LABEL: func.func @_QPreduction_multiple
+! CHECK: omp.wsloop {{.*}} reduction(@add_reduction_i32 {{.*}}, @add_reduction_i32 {{.*}} : !fir.ref<i32>, !fir.ref<i32>) {
+! CHECK: hlfir.declare %arg{{[0-9]+}} {uniq_name = "omp.reduction.element"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: hlfir.declare %arg{{[0-9]+}} {uniq_name = "omp.reduction.element"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: hlfir.assign {{.*}} to %{{[0-9]+}}#0 : i32, !fir.ref<i32>
+! CHECK: hlfir.assign {{.*}} to %{{[0-9]+}}#0 : i32, !fir.ref<i32>
+
+subroutine reduction_arrays(a, b, n)
+  integer :: a(4), b(4), n
+!$omp parallel do reduction(+: a(2), b(2))
+  do i = 1, n
+    a(2) = a(2) + b(2) + i
+  end do
+end subroutine
+
+! CHECK-LABEL: func.func @_QPreduction_arrays
+! CHECK: omp.wsloop {{.*}} reduction(@add_reduction_i32 {{.*}}, @add_reduction_i32 {{.*}} : !fir.ref<i32>, !fir.ref<i32>) {
+! CHECK: hlfir.declare %arg{{[0-9]+}} {uniq_name = "omp.reduction.element"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: hlfir.declare %arg{{[0-9]+}} {uniq_name = "omp.reduction.element"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+
+subroutine reduction_variable(a, n, j)
+  integer :: a(4), n, j
+!$omp parallel do reduction(+: a(j))
+  do i = 1, n
+    a(j) = a(j) + i
+  end do
+end subroutine
+
+! CHECK-LABEL: func.func @_QPreduction_variable
+! CHECK: omp.wsloop {{.*}} reduction(@add_reduction_i32 {{.*}} : !fir.ref<i32>) {
+! CHECK: hlfir.declare %arg{{[0-9]+}} {uniq_name = "omp.reduction.element"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: hlfir.assign {{.*}} to %{{[0-9]+}}#0 : i32, !fir.ref<i32>
+
+subroutine reduction_do_simd(a, n)
+  integer :: a(4), n
+!$omp parallel do simd reduction(+: a(2))
+  do i = 1, n
+    a(2) = a(2) + i
+  end do
+end subroutine
+
+! CHECK-LABEL: func.func @_QPreduction_do_simd
+! CHECK: omp.wsloop reduction(@add_reduction_i32 {{.*}} -> [[WSARG:%arg[0-9]+]] : !fir.ref<i32>) {
+! CHECK: omp.simd {{.*}} reduction(@add_reduction_i32 [[WSARG]] -> [[SIMDARG:%arg[0-9]+]] : !fir.ref<i32>) {
+! CHECK: hlfir.declare [[SIMDARG]] {uniq_name = "omp.reduction.element"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: hlfir.assign {{.*}} to %{{[0-9]+}}#0 : i32, !fir.ref<i32>
+
+subroutine task_reduction_element(a)
+  integer :: a(4)
+!$omp taskgroup task_reduction(+: a(2))
+!$omp task in_reduction(+: a(2))
+  a(2) = a(2) + 1
+!$omp end task
+!$omp end taskgroup
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtask_reduction_element
+! CHECK-NOT: _QFtask_reduction_elementEa_firstprivate_box_4xi32
+! CHECK: omp.taskgroup task_reduction(@add_reduction_i32 {{.*}} -> [[TGARG:%arg[0-9]+]] : !fir.ref<i32>) {
+! CHECK: [[TGDECL:%[0-9]+]]:2 = hlfir.declare [[TGARG]] {uniq_name = "omp.reduction.element"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: omp.task in_reduction(@add_reduction_i32 [[TGDECL]]#0 -> [[TASKARG:%arg[0-9]+]] : !fir.ref<i32>)
+! CHECK: [[TASKDECL:%[0-9]+]]:2 = hlfir.declare [[TASKARG]] {uniq_name = "omp.reduction.element"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: fir.load [[TASKDECL]]#0 : !fir.ref<i32>
+! CHECK: hlfir.assign {{.*}} to [[TASKDECL]]#0 : i32, !fir.ref<i32>
+
+subroutine taskloop_in_reduction_element(a, n)
+  integer :: a(4), n
+!$omp taskloop in_reduction(+: a(2))
+  do i = 1, n
+    a(2) = a(2) + i
+  end do
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtaskloop_in_reduction_element
+! CHECK-NOT: _QFtaskloop_in_reduction_elementEa_firstprivate_box_4xi32
+! CHECK: omp.taskloop.context in_reduction(@add_reduction_i32 {{.*}} -> [[TLARG:%arg[0-9]+]] : !fir.ref<i32>)
+! CHECK: hlfir.declare [[TLARG]] {uniq_name = "omp.reduction.element"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: hlfir.assign {{.*}} to %{{[0-9]+}}#0 : i32, !fir.ref<i32>
+
+subroutine taskloop_reduction_element(a, n)
+  integer :: a(4), n
+!$omp taskloop reduction(+: a(2))
+  do i = 1, n
+    a(2) = a(2) + i
+  end do
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtaskloop_reduction_element
+! CHECK-NOT: _QFtaskloop_reduction_elementEa_firstprivate_box_4xi32
+! CHECK: omp.taskloop.context {{.*}} reduction(@add_reduction_i32 {{.*}} -> [[TLRARG:%arg[0-9]+]] : !fir.ref<i32>)
+! CHECK: hlfir.declare [[TLRARG]] {uniq_name = "omp.reduction.element"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: hlfir.assign {{.*}} to %{{[0-9]+}}#0 : i32, !fir.ref<i32>
+
+subroutine taskloop_reduction_mixed_use(a, n)
+  integer :: a(4), n
+!$omp taskloop reduction(+: a(2))
+  do i = 1, n
+    a(2) = a(2) + i
+    a(1) = a(1) + 1
+  end do
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtaskloop_reduction_mixed_use
+! CHECK: omp.taskloop.context private({{.*}}@_QFtaskloop_reduction_mixed_useEa_firstprivate_box_4xi32{{.*}}) reduction(@add_reduction_i32 {{.*}} -> [[TLMARG:%arg[0-9]+]] : !fir.ref<i32>)
+! CHECK: hlfir.declare [[TLMARG]] {uniq_name = "omp.reduction.element"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+
+subroutine taskloop_reduction_nested_index_use(a, b, n)
+  integer :: a(4), b(4), n
+!$omp taskloop reduction(+: a(2))
+  do i = 1, n
+    a(2) = a(2) + i
+    b(a(1)) = b(a(1)) + 1
+  end do
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtaskloop_reduction_nested_index_use
+! CHECK: omp.taskloop.context private({{.*}}@_QFtaskloop_reduction_nested_index_useEa_firstprivate_box_4xi32{{.*}}) reduction(@add_reduction_i32 {{.*}} -> [[TLNARG:%arg[0-9]+]] : !fir.ref<i32>)
+! CHECK: hlfir.declare [[TLNARG]] {uniq_name = "omp.reduction.element"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)



More information about the flang-commits mailing list