[flang-commits] [flang] 5d0c5c5 - [flang][hlfir] Lower hlfir.where when there are no conflicts

Jean Perier via flang-commits flang-commits at lists.llvm.org
Mon May 22 03:51:08 PDT 2023


Author: Jean Perier
Date: 2023-05-22T12:50:54+02:00
New Revision: 5d0c5c5928409306c860c8d0506de81b9ade854c

URL: https://github.com/llvm/llvm-project/commit/5d0c5c5928409306c860c8d0506de81b9ade854c
DIFF: https://github.com/llvm/llvm-project/commit/5d0c5c5928409306c860c8d0506de81b9ade854c.diff

LOG: [flang][hlfir] Lower hlfir.where when there are no conflicts

Lower hlfir.where when the scheduling analysis determined that no
temporary storage is needed.

Differential Revision: https://reviews.llvm.org/D150881

Added: 
    flang/test/HLFIR/order_assignments/where-codegen-no-conflict.fir

Modified: 
    flang/include/flang/Optimizer/Builder/HLFIRTools.h
    flang/include/flang/Optimizer/HLFIR/HLFIROps.td
    flang/lib/Lower/ConvertCall.cpp
    flang/lib/Optimizer/Builder/HLFIRTools.cpp
    flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp
    flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp

Removed: 
    


################################################################################
diff  --git a/flang/include/flang/Optimizer/Builder/HLFIRTools.h b/flang/include/flang/Optimizer/Builder/HLFIRTools.h
index 0b1e36590f10b..06bd4d8ccd2d3 100644
--- a/flang/include/flang/Optimizer/Builder/HLFIRTools.h
+++ b/flang/include/flang/Optimizer/Builder/HLFIRTools.h
@@ -23,6 +23,10 @@ namespace fir {
 class FirOpBuilder;
 }
 
+namespace mlir {
+class IRMapping;
+}
+
 namespace hlfir {
 
 class AssociateOp;
@@ -359,13 +363,18 @@ hlfir::ElementalOp genElementalOp(mlir::Location loc,
                                   mlir::ValueRange typeParams,
                                   const ElementalKernelGenerator &genKernel);
 
+/// Structure to describe a loop nest.
+struct LoopNest {
+  fir::DoLoopOp outerLoop;
+  fir::DoLoopOp innerLoop;
+  llvm::SmallVector<mlir::Value> oneBasedIndices;
+};
+
 /// Generate a fir.do_loop nest looping from 1 to extents[i].
-/// Return the inner fir.do_loop and the indices of the loops.
-std::pair<fir::DoLoopOp, llvm::SmallVector<mlir::Value>>
-genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder,
-            mlir::ValueRange extents);
-inline std::pair<fir::DoLoopOp, llvm::SmallVector<mlir::Value>>
-genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder, mlir::Value shape) {
+LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder,
+                     mlir::ValueRange extents);
+inline LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder,
+                            mlir::Value shape) {
   return genLoopNest(loc, builder, getIndexExtents(loc, builder, shape));
 }
 
@@ -379,6 +388,20 @@ hlfir::YieldElementOp inlineElementalOp(mlir::Location loc,
                                         hlfir::ElementalOp elemental,
                                         mlir::ValueRange oneBasedIndices);
 
+/// Inline the body of an hlfir.elemental without cloning the resulting
+/// hlfir.yield_element, and return the cloned operand of the
+/// hlfir.yield_element. The mapper must be provided to cover complex cases
+/// where the inlined elemental is not defined in the current context and uses
+/// values that have been cloned already.
+/// A callback is provided to indicate if an hlfir.apply inside the
+/// hlfir.elemental must be immediately replaced by the inlining of the
+/// applied hlfir.elemental.
+mlir::Value inlineElementalOp(
+    mlir::Location loc, fir::FirOpBuilder &builder,
+    hlfir::ElementalOp elemental, mlir::ValueRange oneBasedIndices,
+    mlir::IRMapping &mapper,
+    const std::function<bool(hlfir::ElementalOp)> &mustRecursivelyInline);
+
 std::pair<fir::ExtendedValue, std::optional<hlfir::CleanupFunction>>
 convertToValue(mlir::Location loc, fir::FirOpBuilder &builder,
                const hlfir::Entity &entity);

diff  --git a/flang/include/flang/Optimizer/HLFIR/HLFIROps.td b/flang/include/flang/Optimizer/HLFIR/HLFIROps.td
index 572faf06d14c2..15b92385a7720 100644
--- a/flang/include/flang/Optimizer/HLFIR/HLFIROps.td
+++ b/flang/include/flang/Optimizer/HLFIR/HLFIROps.td
@@ -644,10 +644,14 @@ def hlfir_ElementalOp : hlfir_Op<"elemental", [RecursiveMemoryEffects]> {
   let extraClassDeclaration = [{
       mlir::Block *getBody() { return &getRegion().front(); }
 
-      // Get the indices iterating over the shape.
+      /// Get the indices iterating over the shape.
       mlir::Block::BlockArgListType getIndices() {
        return getBody()->getArguments();
       }
+
+      /// Must this elemental be evaluated in order?
+      /// TODO: add attribute and set it in lowering.
+      bool isOrdered() {return true;}
   }];
 
   let skipDefaultBuilders = 1;

diff  --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp
index 17fbdc437aa82..ad7e177020522 100644
--- a/flang/lib/Lower/ConvertCall.cpp
+++ b/flang/lib/Lower/ConvertCall.cpp
@@ -1517,11 +1517,10 @@ class ElementalCallBuilder {
     // iterations are cleaned up inside the iterations.
     if (!callContext.resultType) {
       // Subroutine case. Generate call inside loop nest.
-      auto [innerLoop, oneBasedIndicesVector] =
-          hlfir::genLoopNest(loc, builder, shape);
-      mlir::ValueRange oneBasedIndices = oneBasedIndicesVector;
+      hlfir::LoopNest loopNest = hlfir::genLoopNest(loc, builder, shape);
+      mlir::ValueRange oneBasedIndices = loopNest.oneBasedIndices;
       auto insPt = builder.saveInsertionPoint();
-      builder.setInsertionPointToStart(innerLoop.getBody());
+      builder.setInsertionPointToStart(loopNest.innerLoop.getBody());
       callContext.stmtCtx.pushScope();
       for (auto &preparedActual : loweredActuals)
         if (preparedActual)

diff  --git a/flang/lib/Optimizer/Builder/HLFIRTools.cpp b/flang/lib/Optimizer/Builder/HLFIRTools.cpp
index 30bae873a5eaa..7fd41a214ac45 100644
--- a/flang/lib/Optimizer/Builder/HLFIRTools.cpp
+++ b/flang/lib/Optimizer/Builder/HLFIRTools.cpp
@@ -764,26 +764,62 @@ hlfir::inlineElementalOp(mlir::Location loc, fir::FirOpBuilder &builder,
   return yield;
 }
 
-std::pair<fir::DoLoopOp, llvm::SmallVector<mlir::Value>>
-hlfir::genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder,
-                   mlir::ValueRange extents) {
+mlir::Value hlfir::inlineElementalOp(
+    mlir::Location loc, fir::FirOpBuilder &builder,
+    hlfir::ElementalOp elemental, mlir::ValueRange oneBasedIndices,
+    mlir::IRMapping &mapper,
+    const std::function<bool(hlfir::ElementalOp)> &mustRecursivelyInline) {
+  mlir::Region &region = elemental.getRegion();
+  // hlfir.elemental region is a SizedRegion<1>.
+  assert(region.hasOneBlock() && "elemental region must have one block");
+  mapper.map(elemental.getIndices(), oneBasedIndices);
+  mlir::Block::OpListType &ops = region.back().getOperations();
+  assert(!ops.empty() && "elemental block cannot be empty");
+  auto end = ops.end();
+  for (auto opIt = ops.begin(); std::next(opIt) != end; ++opIt) {
+    if (auto apply = mlir::dyn_cast<hlfir::ApplyOp>(*opIt))
+      if (auto appliedElemental =
+              apply.getExpr().getDefiningOp<hlfir::ElementalOp>())
+        if (mustRecursivelyInline(appliedElemental)) {
+          llvm::SmallVector<mlir::Value> clonedApplyIndices;
+          for (auto indice : apply.getIndices())
+            clonedApplyIndices.push_back(mapper.lookupOrDefault(indice));
+          mlir::Value inlined = inlineElementalOp(
+              loc, builder, appliedElemental, clonedApplyIndices, mapper,
+              mustRecursivelyInline);
+          mapper.map(apply.getResult(), inlined);
+          continue;
+        }
+    (void)builder.clone(*opIt, mapper);
+  }
+  auto oldYield = mlir::dyn_cast_or_null<hlfir::YieldElementOp>(
+      region.back().getOperations().back());
+  assert(oldYield && "must terminate with yieldElementalOp");
+  return mapper.lookupOrDefault(oldYield.getElementValue());
+}
+
+hlfir::LoopNest hlfir::genLoopNest(mlir::Location loc,
+                                   fir::FirOpBuilder &builder,
+                                   mlir::ValueRange extents) {
+  hlfir::LoopNest loopNest;
   assert(!extents.empty() && "must have at least one extent");
   auto insPt = builder.saveInsertionPoint();
-  llvm::SmallVector<mlir::Value> indices(extents.size());
+  loopNest.oneBasedIndices.assign(extents.size(), mlir::Value{});
   // Build loop nest from column to row.
   auto one = builder.create<mlir::arith::ConstantIndexOp>(loc, 1);
   mlir::Type indexType = builder.getIndexType();
   unsigned dim = extents.size() - 1;
-  fir::DoLoopOp innerLoop;
   for (auto extent : llvm::reverse(extents)) {
     auto ub = builder.createConvert(loc, indexType, extent);
-    innerLoop = builder.create<fir::DoLoopOp>(loc, one, ub, one);
-    builder.setInsertionPointToStart(innerLoop.getBody());
+    loopNest.innerLoop = builder.create<fir::DoLoopOp>(loc, one, ub, one);
+    builder.setInsertionPointToStart(loopNest.innerLoop.getBody());
     // Reverse the indices so they are in column-major order.
-    indices[dim--] = innerLoop.getInductionVar();
+    loopNest.oneBasedIndices[dim--] = loopNest.innerLoop.getInductionVar();
+    if (!loopNest.outerLoop)
+      loopNest.outerLoop = loopNest.innerLoop;
   }
   builder.restoreInsertionPoint(insPt);
-  return {innerLoop, indices};
+  return loopNest;
 }
 
 static fir::ExtendedValue

diff  --git a/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp b/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp
index 7b12fad984f4e..12cc236384111 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/BufferizeHLFIR.cpp
@@ -552,12 +552,11 @@ struct ElementalOpConversion
                         adaptor.getTypeparams());
     // Generate a loop nest looping around the fir.elemental shape and clone
     // fir.elemental region inside the inner loop.
-    auto [innerLoop, oneBasedLoopIndices] =
-        hlfir::genLoopNest(loc, builder, extents);
+    hlfir::LoopNest loopNest = hlfir::genLoopNest(loc, builder, extents);
     auto insPt = builder.saveInsertionPoint();
-    builder.setInsertionPointToStart(innerLoop.getBody());
-    auto yield =
-        hlfir::inlineElementalOp(loc, builder, elemental, oneBasedLoopIndices);
+    builder.setInsertionPointToStart(loopNest.innerLoop.getBody());
+    auto yield = hlfir::inlineElementalOp(loc, builder, elemental,
+                                          loopNest.oneBasedIndices);
     hlfir::Entity elementValue(yield.getElementValue());
     // Skip final AsExpr if any. It would create an element temporary,
     // which is no needed since the element will be assigned right away in
@@ -572,7 +571,7 @@ struct ElementalOpConversion
     rewriter.eraseOp(yield);
     // Assign the element value to the temp element for this iteration.
     auto tempElement =
-        hlfir::getElementAt(loc, builder, temp, oneBasedLoopIndices);
+        hlfir::getElementAt(loc, builder, temp, loopNest.oneBasedIndices);
     builder.create<hlfir::AssignOp>(loc, elementValue, tempElement);
     // hlfir.yield_element implicitly marks the end-of-life its operand if
     // it is an expression created in the hlfir.elemental (since it is its

diff  --git a/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp b/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp
index 5cea4d743841b..d49bc1e6bdff3 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp
@@ -19,11 +19,13 @@
 
 #include "ScheduleOrderedAssignments.h"
 #include "flang/Optimizer/Builder/FIRBuilder.h"
+#include "flang/Optimizer/Builder/HLFIRTools.h"
 #include "flang/Optimizer/Builder/Todo.h"
 #include "flang/Optimizer/Dialect/Support/FIRContext.h"
 #include "flang/Optimizer/HLFIR/Passes.h"
 #include "mlir/IR/IRMapping.h"
 #include "mlir/Transforms/DialectConversion.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/TypeSwitch.h"
 #include "llvm/Support/Debug.h"
 
@@ -42,6 +44,52 @@ static llvm::cl::opt<bool> dbgScheduleOnly(
     llvm::cl::desc("Only run ordered assignment scheduling with no codegen"),
     llvm::cl::init(false));
 
+namespace {
+
+/// Structure that represents a masked expression being lowered. Masked
+/// expressions are any expressions inside an hlfir.where. As described in
+/// Fortran 2018 section 10.2.3.2, the evaluation of the elemental parts of such
+/// expressions must be masked, while the evaluation of none elemental parts
+/// must not be masked. This structure analyzes the region evaluating the
+/// expression and allows splitting the generation of the none elemental part
+/// from the elemental part.
+struct MaskedArrayExpr {
+  MaskedArrayExpr(mlir::Location loc, mlir::Region &region);
+
+  /// Generate the none elemental part. Must be called outside of the
+  /// loops created for the WHERE construct.
+  void generateNoneElementalPart(fir::FirOpBuilder &builder,
+                                 mlir::IRMapping &mapper);
+
+  /// Methods below can only be called once generateNoneElementalPart has been
+  /// called.
+
+  /// Return the shape of the expression.
+  mlir::Value generateShape(fir::FirOpBuilder &builder,
+                            mlir::IRMapping &mapper);
+  /// Return the value of an element value for this expression given the current
+  /// where loop indices.
+  mlir::Value generateElementalParts(fir::FirOpBuilder &builder,
+                                     mlir::ValueRange oneBasedIndices,
+                                     mlir::IRMapping &mapper);
+  /// Generate the cleanup for the none elemental parts, if any. This must be
+  /// called after the loops created for the WHERE construct.
+  void generateNoneElementalCleanupIfAny(fir::FirOpBuilder &builder,
+                                         mlir::IRMapping &mapper);
+
+  mlir::Location loc;
+  mlir::Region ®ion;
+  /// Was generateNoneElementalPart called?
+  bool noneElementalPartWasGenerated = false;
+  /// Set of operations that form the elemental parts of the
+  /// expression evaluation. These are the hlfir.elemental and
+  /// hlfir.elemental_addr that form the elemental tree producing
+  /// the expression value. hlfir.elemental that produce values
+  /// used inside transformational operations are not part of this set.
+  llvm::SmallSet<mlir::Operation *, 4> elementalParts{};
+};
+} // namespace
+
 namespace {
 /// Structure that visits an ordered assignment tree and generates code for
 /// it according to a schedule.
@@ -76,6 +124,8 @@ class OrderedAssignmentRewriter {
   /// Generate code when leaving a given ordered assignment node.
   void post(hlfir::ForallOp);
   void post(hlfir::ForallMaskOp);
+  void post(hlfir::WhereOp);
+  void post(hlfir::ElseWhereOp);
 
   /// Is this an assignment to a vector subscripted entity?
   static bool hasVectorSubscriptedLhs(hlfir::RegionAssignOp regionAssignOp);
@@ -105,9 +155,23 @@ class OrderedAssignmentRewriter {
   /// at the current insertion point (by cloning).
   void generateCleanupIfAny(std::optional<hlfir::YieldOp> maybeYield);
 
+  /// Generate a masked entity. This can only be called when whereLoopNest was
+  /// set (When an hlfir.where is being visited).
+  /// This method returns the scalar element (that may have been previously
+  /// saved) for the current indices inside the where loop.
+  mlir::Value generateMaskedEntity(mlir::Location loc, mlir::Region &region) {
+    MaskedArrayExpr maskedExpr(loc, region);
+    return generateMaskedEntity(maskedExpr);
+  }
+  mlir::Value generateMaskedEntity(MaskedArrayExpr &maskedExpr);
+
+  /// Create a fir.if at the current position inside the where loop nest
+  /// given a mask expression.
+  void generateMaskIfOp(MaskedArrayExpr &mask);
+
   fir::FirOpBuilder &builder;
 
-  /// Map containg the mapping between the original order assignment tree
+  /// Map containing the mapping between the original order assignment tree
   /// operations and the operations that have been cloned in the current run.
   /// It is reset between two runs.
   mlir::IRMapping mapper;
@@ -115,6 +179,9 @@ class OrderedAssignmentRewriter {
   /// point correctly when leaving a node that requires a fir.do_loop or fir.if
   /// operation.
   llvm::SmallVector<mlir::Operation *> constructStack;
+  /// Current where loop nest, if any.
+  std::optional<hlfir::LoopNest> whereLoopNest;
+
   /// Root of the order assignment tree being lowered.
   hlfir::OrderedAssignmentTreeOpInterface root;
   /// Pointer to the current run of the schedule being lowered.
@@ -139,8 +206,8 @@ void OrderedAssignmentRewriter::walk(
                 mlir::dyn_cast<hlfir::OrderedAssignmentTreeOpInterface>(op))
           walk(subNode);
       llvm::TypeSwitch<mlir::Operation *, void>(node.getOperation())
-          .Case<hlfir::ForallOp, hlfir::ForallMaskOp>(
-              [&](auto concreteOp) { post(concreteOp); })
+          .Case<hlfir::ForallOp, hlfir::ForallMaskOp, hlfir::WhereOp,
+                hlfir::ElseWhereOp>([&](auto concreteOp) { post(concreteOp); })
           .Default([](auto) {});
     }
   }
@@ -218,19 +285,78 @@ void OrderedAssignmentRewriter::pre(hlfir::RegionAssignOp regionAssignOp) {
   generateCleanupIfAny(oldLhsYield);
 }
 
+void OrderedAssignmentRewriter::generateMaskIfOp(MaskedArrayExpr &mask) {
+  assert(whereLoopNest.has_value() && "must be inside a WHERE");
+  mlir::Location loc = mask.loc;
+  hlfir::Entity maskVal{generateMaskedEntity(mask)};
+  maskVal = hlfir::loadTrivialScalar(loc, builder, maskVal);
+  mlir::Value cdt = builder.createConvert(loc, builder.getI1Type(), maskVal);
+  // Else region is added when visiting nested hlfir.elseWhereOp, if any.
+  auto ifOp = builder.create<fir::IfOp>(loc, std::nullopt, cdt,
+                                        /*withElseRegion=*/false);
+  constructStack.push_back(ifOp.getOperation());
+  builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
+}
+
 void OrderedAssignmentRewriter::pre(hlfir::WhereOp whereOp) {
   mlir::Location loc = whereOp.getLoc();
-  TODO(loc, "WHERE in HLFIR");
+  MaskedArrayExpr mask(loc, whereOp.getMaskRegion());
+  if (!whereLoopNest) {
+    // Start a loop nest iterating on the shape of the where mask.
+    mask.generateNoneElementalPart(builder, mapper);
+    mlir::Value shape = mask.generateShape(builder, mapper);
+    whereLoopNest = hlfir::genLoopNest(loc, builder, shape);
+    constructStack.push_back(whereLoopNest->outerLoop.getOperation());
+    builder.setInsertionPointToStart(whereLoopNest->innerLoop.getBody());
+  }
+  // Generate a fir.if with the value of the current element of the mask
+  // inside the loops.
+  generateMaskIfOp(mask);
+}
+
+void OrderedAssignmentRewriter::post(hlfir::WhereOp whereOp) {
+  assert(!constructStack.empty() && "must contain a fir.if");
+  builder.setInsertionPointAfter(constructStack.pop_back_val());
+  // If all where/elsewhere fir.if have been popped, this is the outer whereOp,
+  // and the where loop must be exited.
+  assert(!constructStack.empty() && "must contain a  fir.do_loop or fir.if");
+  if (mlir::isa<fir::DoLoopOp>(constructStack.back())) {
+    builder.setInsertionPointAfter(constructStack.pop_back_val());
+    whereLoopNest.reset();
+  }
 }
 
 void OrderedAssignmentRewriter::pre(hlfir::ElseWhereOp elseWhereOp) {
+  assert(!constructStack.empty() && "cannot be empty inside a where");
   mlir::Location loc = elseWhereOp.getLoc();
-  TODO(loc, "ELSEWHERE in HLFIR");
+  // Create an "else" region for the current where/elsewhere fir.if.
+  auto ifOp = mlir::dyn_cast<fir::IfOp>(constructStack.back());
+  assert(ifOp && ifOp.getElseRegion().empty() && "must be an if without else");
+  builder.createBlock(&ifOp.getElseRegion());
+  auto end = builder.create<fir::ResultOp>(loc);
+  builder.setInsertionPoint(end);
+  if (elseWhereOp.getMaskRegion().empty())
+    return;
+  // Create new nested fir.if with elsewhere mask if any.
+  MaskedArrayExpr mask(loc, elseWhereOp.getMaskRegion());
+  generateMaskIfOp(mask);
+}
+
+void OrderedAssignmentRewriter::post(hlfir::ElseWhereOp elseWhereOp) {
+  // Exit ifOp that was created for the elseWhereOp mask, if any.
+  if (elseWhereOp.getMaskRegion().empty())
+    return;
+  assert(!constructStack.empty() && "must contain a fir.if");
+  builder.setInsertionPointAfter(constructStack.pop_back_val());
 }
 
 std::pair<mlir::Value, std::optional<hlfir::YieldOp>>
 OrderedAssignmentRewriter::generateYieldedEntity(mlir::Region &region) {
   // TODO: if the region was saved, use that instead of generating code again.
+  if (whereLoopNest.has_value()) {
+    mlir::Location loc = region.getParentOp()->getLoc();
+    return {generateMaskedEntity(loc, region), std::nullopt};
+  }
   assert(region.hasOneBlock() && "region must contain one block");
   // Clone all operations except the final hlfir.yield.
   mlir::Block::OpListType &ops = region.back().getOperations();
@@ -258,6 +384,27 @@ OrderedAssignmentRewriter::generateYieldedScalarValue(mlir::Region &region) {
   return value;
 }
 
+mlir::Value
+OrderedAssignmentRewriter::generateMaskedEntity(MaskedArrayExpr &maskedExpr) {
+  assert(whereLoopNest.has_value() && "must be inside WHERE loop nest");
+  auto insertionPoint = builder.saveInsertionPoint();
+  if (!maskedExpr.noneElementalPartWasGenerated) {
+    // Generate none elemental part before the where loops (but inside the
+    // current forall loops if any).
+    builder.setInsertionPoint(whereLoopNest->outerLoop);
+    maskedExpr.generateNoneElementalPart(builder, mapper);
+  }
+  // Generate the none elemental part cleanup after the where loops.
+  builder.setInsertionPointAfter(whereLoopNest->outerLoop);
+  maskedExpr.generateNoneElementalCleanupIfAny(builder, mapper);
+  // Generate the value of the current element for the masked expression
+  // at the current insertion point (inside the where loops, and any fir.if
+  // generated for previous masks).
+  builder.restoreInsertionPoint(insertionPoint);
+  return maskedExpr.generateElementalParts(
+      builder, whereLoopNest->oneBasedIndices, mapper);
+}
+
 void OrderedAssignmentRewriter::generateCleanupIfAny(
     std::optional<hlfir::YieldOp> maybeYield) {
   if (maybeYield.has_value())
@@ -310,6 +457,127 @@ bool OrderedAssignmentRewriter::isRequiredInCurrentRun(
   return false;
 }
 
+/// Is the apply using all the elemental indices in order?
+static bool isInOrderApply(hlfir::ApplyOp apply, hlfir::ElementalOp elemental) {
+  if (elemental.getIndices().size() != apply.getIndices().size())
+    return false;
+  for (auto [elementalIdx, applyIdx] :
+       llvm::zip(elemental.getIndices(), apply.getIndices()))
+    if (elementalIdx != applyIdx)
+      return false;
+  return true;
+}
+
+/// Gather the chain of hlfir::ElementalOp, if any, that produced \p value.
+static void
+gatherElementalTree(mlir::Value value,
+                    llvm::SmallPtrSetImpl<mlir::Operation *> &elementalOps,
+                    bool isOutOfOrder) {
+  if (auto elemental = value.getDefiningOp<hlfir::ElementalOp>()) {
+    // Only inline an applied elemental that must be executed in order if the
+    // applying indices are in order. An hlfir::Elemental may have been created
+    // for a transformational like transpose, and Fortran 2018 standard
+    // section 10.2.3.2, point 10 imply that impure elemental sub-expression
+    // evaluations should not be masked if they are the arguments of
+    // transformational expressions.
+    if (isOutOfOrder && elemental.isOrdered())
+      return;
+    elementalOps.insert(elemental.getOperation());
+    for (mlir::Operation &op : elemental.getBody()->getOperations())
+      if (auto apply = mlir::dyn_cast<hlfir::ApplyOp>(op)) {
+        bool isUnorderedApply =
+            isOutOfOrder || !isInOrderApply(apply, elemental);
+        gatherElementalTree(apply.getExpr(), elementalOps, isUnorderedApply);
+      }
+  }
+}
+
+MaskedArrayExpr::MaskedArrayExpr(mlir::Location loc, mlir::Region &region)
+    : loc{loc}, region{region} {
+  mlir::Operation &terminator = region.back().back();
+  // TODO: clarify if vector subscripts must be inlined or not here.
+  // In case of x(elemental(A), :), this could lead to more elemental(A)
+  // evaluation than needed, which is not OK if "elemental" is impure.
+  // The standard is not very clear here.
+  if (mlir::isa<hlfir::ElementalAddrOp>(terminator))
+    TODO(loc, "vector subscripted assignments inside WHERE");
+  mlir::Value entity = mlir::cast<hlfir::YieldOp>(terminator).getEntity();
+  gatherElementalTree(entity, elementalParts, /*isOutOfOrder=*/false);
+}
+
+void MaskedArrayExpr::generateNoneElementalPart(fir::FirOpBuilder &builder,
+                                                mlir::IRMapping &mapper) {
+  assert(!noneElementalPartWasGenerated &&
+         "none elemental parts already generated");
+  // Clone all operations, except the elemental and the final yield.
+  mlir::Block::OpListType &ops = region.back().getOperations();
+  assert(!ops.empty() && "yield block cannot be empty");
+  auto end = ops.end();
+  for (auto opIt = ops.begin(); std::next(opIt) != end; ++opIt)
+    if (!elementalParts.contains(&*opIt))
+      (void)builder.clone(*opIt, mapper);
+  noneElementalPartWasGenerated = true;
+}
+
+mlir::Value MaskedArrayExpr::generateShape(fir::FirOpBuilder &builder,
+                                           mlir::IRMapping &mapper) {
+  assert(noneElementalPartWasGenerated &&
+         "non elemental part must have been generated");
+  mlir::Operation &terminator = region.back().back();
+  // If the operation that produced the yielded entity is elemental, it was not
+  // cloned, but it holds a shape argument that was cloned. Return the cloned
+  // shape.
+  if (auto elementalAddrOp = mlir::dyn_cast<hlfir::ElementalAddrOp>(terminator))
+    return mapper.lookupOrDefault(elementalAddrOp.getShape());
+  mlir::Value entity = mlir::cast<hlfir::YieldOp>(terminator).getEntity();
+  if (auto elemental = entity.getDefiningOp<hlfir::ElementalOp>())
+    return mapper.lookupOrDefault(elemental.getShape());
+  // Otherwise, the whole entity was cloned, and the shape can be generated
+  // from it.
+  hlfir::Entity clonedEntity{mapper.lookupOrDefault(entity)};
+  return hlfir::genShape(loc, builder, hlfir::Entity{clonedEntity});
+}
+
+mlir::Value
+MaskedArrayExpr::generateElementalParts(fir::FirOpBuilder &builder,
+                                        mlir::ValueRange oneBasedIndices,
+                                        mlir::IRMapping &mapper) {
+  assert(noneElementalPartWasGenerated &&
+         "non elemental part must have been generated");
+  mlir::Operation &terminator = region.back().back();
+  if (mlir::isa<hlfir::ElementalAddrOp>(terminator))
+    TODO(loc, "vector subscripted assignments inside WHERE");
+  mlir::Value entity = mlir::cast<hlfir::YieldOp>(terminator).getEntity();
+  auto elemental = entity.getDefiningOp<hlfir::ElementalOp>();
+  if (!elemental) {
+    hlfir::Entity clonedEntity{mapper.lookupOrDefault(entity)};
+    return hlfir::getElementAt(loc, builder, clonedEntity, oneBasedIndices);
+  }
+  auto mustRecursivelyInline =
+      [&](hlfir::ElementalOp appliedElemental) -> bool {
+    return elementalParts.contains(appliedElemental.getOperation());
+  };
+  return inlineElementalOp(loc, builder, elemental, oneBasedIndices, mapper,
+                           mustRecursivelyInline);
+}
+
+void MaskedArrayExpr::generateNoneElementalCleanupIfAny(
+    fir::FirOpBuilder &builder, mlir::IRMapping &mapper) {
+  mlir::Operation &terminator = region.back().back();
+  if (mlir::isa<hlfir::ElementalAddrOp>(terminator))
+    TODO(loc, "vector subscripted assignments inside WHERE");
+  auto yieldOp = mlir::cast<hlfir::YieldOp>(terminator);
+  if (yieldOp.getCleanup().empty())
+    return;
+  for (mlir::Operation &op : yieldOp.getCleanup().getOps()) {
+    if (auto destroy = mlir::dyn_cast<hlfir::DestroyOp>(op))
+      if (elementalParts.contains(destroy.getExpr().getDefiningOp()))
+        continue;
+    if (!mlir::isa<fir::FirEndOp>(op))
+      (void)builder.clone(op, mapper);
+  }
+}
+
 /// Lower an ordered assignment tree to fir.do_loop and hlfir.assign given
 /// a schedule.
 static void lower(hlfir::OrderedAssignmentTreeOpInterface root,

diff  --git a/flang/test/HLFIR/order_assignments/where-codegen-no-conflict.fir b/flang/test/HLFIR/order_assignments/where-codegen-no-conflict.fir
new file mode 100644
index 0000000000000..ac93e6828096a
--- /dev/null
+++ b/flang/test/HLFIR/order_assignments/where-codegen-no-conflict.fir
@@ -0,0 +1,309 @@
+// Test code generation of hlfir.where, and hlfir.elsewhere when there
+// is no need to create temporary storage.
+// RUN: fir-opt %s --lower-hlfir-ordered-assignments | FileCheck %s
+
+func.func @test_simple(%arg0: !fir.box<!fir.array<?xf32>>, %arg1: !fir.box<!fir.array<?x!fir.logical<4>>>) {
+  %cst = arith.constant 4.200000e+01 : f32
+  %0:2 = hlfir.declare %arg1 {uniq_name = "mask"} : (!fir.box<!fir.array<?x!fir.logical<4>>>) -> (!fir.box<!fir.array<?x!fir.logical<4>>>, !fir.box<!fir.array<?x!fir.logical<4>>>)
+  %1:2 = hlfir.declare %arg0 {uniq_name = "x"} : (!fir.box<!fir.array<?xf32>>) -> (!fir.box<!fir.array<?xf32>>, !fir.box<!fir.array<?xf32>>)
+  hlfir.where {
+    hlfir.yield %0#0 : !fir.box<!fir.array<?x!fir.logical<4>>>
+  } do {
+    hlfir.region_assign {
+      hlfir.yield %cst : f32
+    } to {
+      hlfir.yield %1#0 : !fir.box<!fir.array<?xf32>>
+    }
+  }
+  return
+}
+// CHECK-LABEL:   func.func @test_simple(
+// CHECK-SAME:                           %[[VAL_0:.*]]: !fir.box<!fir.array<?xf32>>,
+// CHECK-SAME:                           %[[VAL_1:.*]]: !fir.box<!fir.array<?x!fir.logical<4>>>) {
+// CHECK:           %[[VAL_2:.*]] = arith.constant 4.200000e+01 : f32
+// CHECK:           %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "mask"} : (!fir.box<!fir.array<?x!fir.logical<4>>>) -> (!fir.box<!fir.array<?x!fir.logical<4>>>, !fir.box<!fir.array<?x!fir.logical<4>>>)
+// CHECK:           %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "x"} : (!fir.box<!fir.array<?xf32>>) -> (!fir.box<!fir.array<?xf32>>, !fir.box<!fir.array<?xf32>>)
+// CHECK:           %[[VAL_5:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_6:.*]]:3 = fir.box_dims %[[VAL_3]]#0, %[[VAL_5]] : (!fir.box<!fir.array<?x!fir.logical<4>>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_7:.*]] = fir.shape %[[VAL_6]]#1 : (index) -> !fir.shape<1>
+// CHECK:           %[[VAL_8:.*]] = arith.constant 1 : index
+// CHECK:           fir.do_loop %[[VAL_9:.*]] = %[[VAL_8]] to %[[VAL_6]]#1 step %[[VAL_8]] {
+// CHECK:             %[[VAL_10:.*]] = hlfir.designate %[[VAL_3]]#0 (%[[VAL_9]])  : (!fir.box<!fir.array<?x!fir.logical<4>>>, index) -> !fir.ref<!fir.logical<4>>
+// CHECK:             %[[VAL_11:.*]] = fir.load %[[VAL_10]] : !fir.ref<!fir.logical<4>>
+// CHECK:             %[[VAL_12:.*]] = fir.convert %[[VAL_11]] : (!fir.logical<4>) -> i1
+// CHECK:             fir.if %[[VAL_12]] {
+// CHECK:               %[[VAL_13:.*]] = hlfir.designate %[[VAL_4]]#0 (%[[VAL_9]])  : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
+// CHECK:               hlfir.assign %[[VAL_2]] to %[[VAL_13]] : f32, !fir.ref<f32>
+// CHECK:             }
+// CHECK:           }
+// CHECK:           return
+// CHECK:         }
+
+
+func.func @test_elsewhere(%arg0: !fir.ref<!fir.array<100xf32>>, %arg1: !fir.ref<!fir.array<100xf32>>, %arg2: !fir.ref<!fir.array<100xf32>>, %arg3: !fir.ref<!fir.array<100x!fir.logical<4>>>, %arg4: !fir.ref<!fir.array<100x!fir.logical<4>>> {fir.bindc_name = "mask2"}) {
+  %c100 = arith.constant 100 : index
+  %0 = fir.shape %c100 : (index) -> !fir.shape<1>
+  %1:2 = hlfir.declare %arg3(%0) {uniq_name = "mask"} : (!fir.ref<!fir.array<100x!fir.logical<4>>>, !fir.shape<1>) -> (!fir.ref<!fir.array<100x!fir.logical<4>>>, !fir.ref<!fir.array<100x!fir.logical<4>>>)
+  %2:2 = hlfir.declare %arg4(%0) {uniq_name = "mask2"} : (!fir.ref<!fir.array<100x!fir.logical<4>>>, !fir.shape<1>) -> (!fir.ref<!fir.array<100x!fir.logical<4>>>, !fir.ref<!fir.array<100x!fir.logical<4>>>)
+  %3:2 = hlfir.declare %arg0(%0) {uniq_name = "x"} : (!fir.ref<!fir.array<100xf32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<100xf32>>, !fir.ref<!fir.array<100xf32>>)
+  %4:2 = hlfir.declare %arg1(%0) {uniq_name = "y"} : (!fir.ref<!fir.array<100xf32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<100xf32>>, !fir.ref<!fir.array<100xf32>>)
+  %5:2 = hlfir.declare %arg2(%0) {uniq_name = "z"} : (!fir.ref<!fir.array<100xf32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<100xf32>>, !fir.ref<!fir.array<100xf32>>)
+  hlfir.where {
+    hlfir.yield %1#0 : !fir.ref<!fir.array<100x!fir.logical<4>>>
+  } do {
+    hlfir.region_assign {
+      hlfir.yield %4#0 : !fir.ref<!fir.array<100xf32>>
+    } to {
+      hlfir.yield %3#0 : !fir.ref<!fir.array<100xf32>>
+    }
+    hlfir.elsewhere mask {
+      hlfir.yield %2#0 : !fir.ref<!fir.array<100x!fir.logical<4>>>
+    } do {
+      hlfir.region_assign {
+        hlfir.yield %3#0 : !fir.ref<!fir.array<100xf32>>
+      } to {
+        hlfir.yield %4#0 : !fir.ref<!fir.array<100xf32>>
+      }
+      hlfir.elsewhere do {
+        hlfir.region_assign {
+          hlfir.yield %4#0 : !fir.ref<!fir.array<100xf32>>
+        } to {
+          hlfir.yield %5#0 : !fir.ref<!fir.array<100xf32>>
+        }
+      }
+    }
+  }
+  return
+}
+// CHECK-LABEL:   func.func @test_elsewhere(
+// CHECK-SAME:                              %[[VAL_0:[^:]*]]: !fir.ref<!fir.array<100xf32>>,
+// CHECK-SAME:                              %[[VAL_1:[^:]*]]: !fir.ref<!fir.array<100xf32>>,
+// CHECK-SAME:                              %[[VAL_2:[^:]*]]: !fir.ref<!fir.array<100xf32>>,
+// CHECK-SAME:                              %[[VAL_3:[^:]*]]: !fir.ref<!fir.array<100x!fir.logical<4>>>,
+// CHECK-SAME:                              %[[VAL_4:[^:]*]]: !fir.ref<!fir.array<100x!fir.logical<4>>> {fir.bindc_name = "mask2"}) {
+// CHECK:           %[[VAL_5:.*]] = arith.constant 100 : index
+// CHECK:           %[[VAL_6:.*]] = fir.shape %[[VAL_5]] : (index) -> !fir.shape<1>
+// CHECK:           %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_3]](%[[VAL_6]]) {uniq_name = "mask"} : (!fir.ref<!fir.array<100x!fir.logical<4>>>, !fir.shape<1>) -> (!fir.ref<!fir.array<100x!fir.logical<4>>>, !fir.ref<!fir.array<100x!fir.logical<4>>>)
+// CHECK:           %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_4]](%[[VAL_6]]) {uniq_name = "mask2"} : (!fir.ref<!fir.array<100x!fir.logical<4>>>, !fir.shape<1>) -> (!fir.ref<!fir.array<100x!fir.logical<4>>>, !fir.ref<!fir.array<100x!fir.logical<4>>>)
+// CHECK:           %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_6]]) {uniq_name = "x"} : (!fir.ref<!fir.array<100xf32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<100xf32>>, !fir.ref<!fir.array<100xf32>>)
+// CHECK:           %[[VAL_10:.*]]:2 = hlfir.declare %[[VAL_1]](%[[VAL_6]]) {uniq_name = "y"} : (!fir.ref<!fir.array<100xf32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<100xf32>>, !fir.ref<!fir.array<100xf32>>)
+// CHECK:           %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_2]](%[[VAL_6]]) {uniq_name = "z"} : (!fir.ref<!fir.array<100xf32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<100xf32>>, !fir.ref<!fir.array<100xf32>>)
+// CHECK:           %[[VAL_12:.*]] = arith.constant 1 : index
+// CHECK:           fir.do_loop %[[VAL_13:.*]] = %[[VAL_12]] to %[[VAL_5]] step %[[VAL_12]] {
+// CHECK:             %[[VAL_14:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_13]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, index) -> !fir.ref<!fir.logical<4>>
+// CHECK:             %[[VAL_15:.*]] = fir.load %[[VAL_14]] : !fir.ref<!fir.logical<4>>
+// CHECK:             %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (!fir.logical<4>) -> i1
+// CHECK:             fir.if %[[VAL_16]] {
+// CHECK:               %[[VAL_17:.*]] = hlfir.designate %[[VAL_10]]#0 (%[[VAL_13]])  : (!fir.ref<!fir.array<100xf32>>, index) -> !fir.ref<f32>
+// CHECK:               %[[VAL_18:.*]] = hlfir.designate %[[VAL_9]]#0 (%[[VAL_13]])  : (!fir.ref<!fir.array<100xf32>>, index) -> !fir.ref<f32>
+// CHECK:               hlfir.assign %[[VAL_17]] to %[[VAL_18]] : !fir.ref<f32>, !fir.ref<f32>
+// CHECK:             }
+// CHECK:           }
+// CHECK:           %[[VAL_19:.*]] = arith.constant 1 : index
+// CHECK:           fir.do_loop %[[VAL_20:.*]] = %[[VAL_19]] to %[[VAL_5]] step %[[VAL_19]] {
+// CHECK:             %[[VAL_21:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_20]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, index) -> !fir.ref<!fir.logical<4>>
+// CHECK:             %[[VAL_22:.*]] = fir.load %[[VAL_21]] : !fir.ref<!fir.logical<4>>
+// CHECK:             %[[VAL_23:.*]] = fir.convert %[[VAL_22]] : (!fir.logical<4>) -> i1
+// CHECK:             fir.if %[[VAL_23]] {
+// CHECK:             } else {
+// CHECK:               %[[VAL_24:.*]] = hlfir.designate %[[VAL_8]]#0 (%[[VAL_20]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, index) -> !fir.ref<!fir.logical<4>>
+// CHECK:               %[[VAL_25:.*]] = fir.load %[[VAL_24]] : !fir.ref<!fir.logical<4>>
+// CHECK:               %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (!fir.logical<4>) -> i1
+// CHECK:               fir.if %[[VAL_26]] {
+// CHECK:                 %[[VAL_27:.*]] = hlfir.designate %[[VAL_9]]#0 (%[[VAL_20]])  : (!fir.ref<!fir.array<100xf32>>, index) -> !fir.ref<f32>
+// CHECK:                 %[[VAL_28:.*]] = hlfir.designate %[[VAL_10]]#0 (%[[VAL_20]])  : (!fir.ref<!fir.array<100xf32>>, index) -> !fir.ref<f32>
+// CHECK:                 hlfir.assign %[[VAL_27]] to %[[VAL_28]] : !fir.ref<f32>, !fir.ref<f32>
+// CHECK:               }
+// CHECK:             }
+// CHECK:           }
+// CHECK:           %[[VAL_29:.*]] = arith.constant 1 : index
+// CHECK:           fir.do_loop %[[VAL_30:.*]] = %[[VAL_29]] to %[[VAL_5]] step %[[VAL_29]] {
+// CHECK:             %[[VAL_31:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_30]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, index) -> !fir.ref<!fir.logical<4>>
+// CHECK:             %[[VAL_32:.*]] = fir.load %[[VAL_31]] : !fir.ref<!fir.logical<4>>
+// CHECK:             %[[VAL_33:.*]] = fir.convert %[[VAL_32]] : (!fir.logical<4>) -> i1
+// CHECK:             fir.if %[[VAL_33]] {
+// CHECK:             } else {
+// CHECK:               %[[VAL_34:.*]] = hlfir.designate %[[VAL_8]]#0 (%[[VAL_30]])  : (!fir.ref<!fir.array<100x!fir.logical<4>>>, index) -> !fir.ref<!fir.logical<4>>
+// CHECK:               %[[VAL_35:.*]] = fir.load %[[VAL_34]] : !fir.ref<!fir.logical<4>>
+// CHECK:               %[[VAL_36:.*]] = fir.convert %[[VAL_35]] : (!fir.logical<4>) -> i1
+// CHECK:               fir.if %[[VAL_36]] {
+// CHECK:               } else {
+// CHECK:                 %[[VAL_37:.*]] = hlfir.designate %[[VAL_10]]#0 (%[[VAL_30]])  : (!fir.ref<!fir.array<100xf32>>, index) -> !fir.ref<f32>
+// CHECK:                 %[[VAL_38:.*]] = hlfir.designate %[[VAL_11]]#0 (%[[VAL_30]])  : (!fir.ref<!fir.array<100xf32>>, index) -> !fir.ref<f32>
+// CHECK:                 hlfir.assign %[[VAL_37]] to %[[VAL_38]] : !fir.ref<f32>, !fir.ref<f32>
+// CHECK:               }
+// CHECK:             }
+// CHECK:           }
+// CHECK:           return
+// CHECK:         }
+
+func.func @expr_tree(%arg0: !fir.box<!fir.array<?xf32>>, %arg1: !fir.box<!fir.array<?xf32>>, %arg2: !fir.box<!fir.array<?x!fir.logical<4>>>) {
+  %cst = arith.constant 0.000000e+00 : f32
+  %c-1 = arith.constant -1 : index
+  %c1 = arith.constant 1 : index
+  %c10 = arith.constant 10 : index
+  %0:2 = hlfir.declare %arg2 {uniq_name = "mask"} : (!fir.box<!fir.array<?x!fir.logical<4>>>) -> (!fir.box<!fir.array<?x!fir.logical<4>>>, !fir.box<!fir.array<?x!fir.logical<4>>>)
+  %1:2 = hlfir.declare %arg0 {uniq_name = "x"} : (!fir.box<!fir.array<?xf32>>) -> (!fir.box<!fir.array<?xf32>>, !fir.box<!fir.array<?xf32>>)
+  %2:2 = hlfir.declare %arg1 {uniq_name = "y"} : (!fir.box<!fir.array<?xf32>>) -> (!fir.box<!fir.array<?xf32>>, !fir.box<!fir.array<?xf32>>)
+  hlfir.where {
+    %3 = fir.shape %c10 : (index) -> !fir.shape<1>
+    %4 = hlfir.designate %2#0 (%c10:%c1:%c-1)  shape %3 : (!fir.box<!fir.array<?xf32>>, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<10xf32>>
+    %5 = hlfir.elemental %3 : (!fir.shape<1>) -> !hlfir.expr<10xf32> {
+    ^bb0(%arg3: index):
+      %9 = hlfir.designate %4 (%arg3)  : (!fir.box<!fir.array<10xf32>>, index) -> !fir.ref<f32>
+      %10 = fir.load %9 : !fir.ref<f32>
+      %11 = math.absf %10 fastmath<contract> : f32
+      hlfir.yield_element %11 : f32
+    }
+    %6 = hlfir.elemental %3 : (!fir.shape<1>) -> !hlfir.expr<10x!fir.logical<4>> {
+    ^bb0(%arg3: index):
+      %9 = hlfir.apply %5, %arg3 : (!hlfir.expr<10xf32>, index) -> f32
+      %10 = arith.cmpf ogt, %9, %cst : f32
+      %11 = fir.convert %10 : (i1) -> !fir.logical<4>
+      hlfir.yield_element %11 : !fir.logical<4>
+    }
+    %7 = hlfir.elemental %3 : (!fir.shape<1>) -> !hlfir.expr<10x!fir.logical<4>> {
+    ^bb0(%arg3: index):
+      %9 = hlfir.apply %6, %arg3 : (!hlfir.expr<10x!fir.logical<4>>, index) -> !fir.logical<4>
+      %10 = hlfir.no_reassoc %9 : !fir.logical<4>
+      hlfir.yield_element %10 : !fir.logical<4>
+    }
+    %8 = hlfir.elemental %3 : (!fir.shape<1>) -> !hlfir.expr<10x!fir.logical<4>> {
+    ^bb0(%arg3: index):
+      %9 = hlfir.apply %7, %arg3 : (!hlfir.expr<10x!fir.logical<4>>, index) -> !fir.logical<4>
+      %10 = hlfir.designate %0#0 (%arg3)  : (!fir.box<!fir.array<?x!fir.logical<4>>>, index) -> !fir.ref<!fir.logical<4>>
+      %11 = fir.load %10 : !fir.ref<!fir.logical<4>>
+      %12 = fir.convert %9 : (!fir.logical<4>) -> i1
+      %13 = fir.convert %11 : (!fir.logical<4>) -> i1
+      %14 = arith.andi %12, %13 : i1
+      %15 = fir.convert %14 : (i1) -> !fir.logical<4>
+      hlfir.yield_element %15 : !fir.logical<4>
+    }
+    hlfir.yield %8 : !hlfir.expr<10x!fir.logical<4>> cleanup {
+      hlfir.destroy %8 : !hlfir.expr<10x!fir.logical<4>>
+      hlfir.destroy %7 : !hlfir.expr<10x!fir.logical<4>>
+      hlfir.destroy %6 : !hlfir.expr<10x!fir.logical<4>>
+      hlfir.destroy %5 : !hlfir.expr<10xf32>
+    }
+  } do {
+    hlfir.region_assign {
+      hlfir.yield %2#0 : !fir.box<!fir.array<?xf32>>
+    } to {
+      hlfir.yield %1#0 : !fir.box<!fir.array<?xf32>>
+    }
+  }
+  return
+}
+// CHECK-LABEL:   func.func @expr_tree(
+// CHECK-SAME:                         %[[VAL_0:[^:]*]]: !fir.box<!fir.array<?xf32>>,
+// CHECK-SAME:                         %[[VAL_1:[^:]*]]: !fir.box<!fir.array<?xf32>>,
+// CHECK-SAME:                         %[[VAL_2:.*]]: !fir.box<!fir.array<?x!fir.logical<4>>>) {
+// CHECK:           %[[VAL_3:.*]] = arith.constant 0.000000e+00 : f32
+// CHECK:           %[[VAL_4:.*]] = arith.constant -1 : index
+// CHECK:           %[[VAL_5:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_6:.*]] = arith.constant 10 : index
+// CHECK:           %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "mask"} : (!fir.box<!fir.array<?x!fir.logical<4>>>) -> (!fir.box<!fir.array<?x!fir.logical<4>>>, !fir.box<!fir.array<?x!fir.logical<4>>>)
+// CHECK:           %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "x"} : (!fir.box<!fir.array<?xf32>>) -> (!fir.box<!fir.array<?xf32>>, !fir.box<!fir.array<?xf32>>)
+// CHECK:           %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "y"} : (!fir.box<!fir.array<?xf32>>) -> (!fir.box<!fir.array<?xf32>>, !fir.box<!fir.array<?xf32>>)
+// CHECK:           %[[VAL_10:.*]] = fir.shape %[[VAL_6]] : (index) -> !fir.shape<1>
+// CHECK:           %[[VAL_11:.*]] = hlfir.designate %[[VAL_9]]#0 (%[[VAL_6]]:%[[VAL_5]]:%[[VAL_4]])  shape %[[VAL_10]] : (!fir.box<!fir.array<?xf32>>, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<10xf32>>
+// CHECK:           %[[VAL_12:.*]] = arith.constant 1 : index
+// CHECK:           fir.do_loop %[[VAL_13:.*]] = %[[VAL_12]] to %[[VAL_6]] step %[[VAL_12]] {
+// CHECK:             %[[VAL_14:.*]] = hlfir.designate %[[VAL_11]] (%[[VAL_13]])  : (!fir.box<!fir.array<10xf32>>, index) -> !fir.ref<f32>
+// CHECK:             %[[VAL_15:.*]] = fir.load %[[VAL_14]] : !fir.ref<f32>
+// CHECK:             %[[VAL_16:.*]] = math.absf %[[VAL_15]] fastmath<contract> : f32
+// CHECK:             %[[VAL_17:.*]] = arith.cmpf ogt, %[[VAL_16]], %[[VAL_3]] : f32
+// CHECK:             %[[VAL_18:.*]] = fir.convert %[[VAL_17]] : (i1) -> !fir.logical<4>
+// CHECK:             %[[VAL_19:.*]] = hlfir.no_reassoc %[[VAL_18]] : !fir.logical<4>
+// CHECK:             %[[VAL_20:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_13]])  : (!fir.box<!fir.array<?x!fir.logical<4>>>, index) -> !fir.ref<!fir.logical<4>>
+// CHECK:             %[[VAL_21:.*]] = fir.load %[[VAL_20]] : !fir.ref<!fir.logical<4>>
+// CHECK:             %[[VAL_22:.*]] = fir.convert %[[VAL_19]] : (!fir.logical<4>) -> i1
+// CHECK:             %[[VAL_23:.*]] = fir.convert %[[VAL_21]] : (!fir.logical<4>) -> i1
+// CHECK:             %[[VAL_24:.*]] = arith.andi %[[VAL_22]], %[[VAL_23]] : i1
+// CHECK:             %[[VAL_25:.*]] = fir.convert %[[VAL_24]] : (i1) -> !fir.logical<4>
+// CHECK:             %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (!fir.logical<4>) -> i1
+// CHECK:             fir.if %[[VAL_26]] {
+// CHECK:               %[[VAL_27:.*]] = hlfir.designate %[[VAL_9]]#0 (%[[VAL_13]])  : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
+// CHECK:               %[[VAL_28:.*]] = hlfir.designate %[[VAL_8]]#0 (%[[VAL_13]])  : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
+// CHECK:               hlfir.assign %[[VAL_27]] to %[[VAL_28]] : !fir.ref<f32>, !fir.ref<f32>
+// CHECK:             }
+// CHECK:           }
+// CHECK:           return
+// CHECK:         }
+
+func.func @inside_forall(%arg0: !fir.ref<!fir.array<10x20xf32>>, %arg1: !fir.ref<!fir.array<20xf32>>) {
+  %c1 = arith.constant 1 : index
+  %cst = arith.constant 0.000000e+00 : f32
+  %c10_i32 = arith.constant 10 : i32
+  %c1_i32 = arith.constant 1 : i32
+  %c10 = arith.constant 10 : index
+  %c20 = arith.constant 20 : index
+  %0 = fir.shape %c10, %c20 : (index, index) -> !fir.shape<2>
+  %1:2 = hlfir.declare %arg0(%0) {uniq_name = "x"} : (!fir.ref<!fir.array<10x20xf32>>, !fir.shape<2>) -> (!fir.ref<!fir.array<10x20xf32>>, !fir.ref<!fir.array<10x20xf32>>)
+  %2 = fir.shape %c20 : (index) -> !fir.shape<1>
+  %3:2 = hlfir.declare %arg1(%2) {uniq_name = "y"} : (!fir.ref<!fir.array<20xf32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<20xf32>>, !fir.ref<!fir.array<20xf32>>)
+  hlfir.forall lb {
+    hlfir.yield %c1_i32 : i32
+  } ub {
+    hlfir.yield %c10_i32 : i32
+  }  (%arg2: i32) {
+    hlfir.where {
+      %4 = hlfir.elemental %2 : (!fir.shape<1>) -> !hlfir.expr<20x!fir.logical<4>> {
+      ^bb0(%arg3: index):
+        %5 = hlfir.designate %3#0 (%arg3)  : (!fir.ref<!fir.array<20xf32>>, index) -> !fir.ref<f32>
+        %6 = fir.load %5 : !fir.ref<f32>
+        %7 = arith.cmpf ogt, %6, %cst : f32
+        %8 = fir.convert %7 : (i1) -> !fir.logical<4>
+        hlfir.yield_element %8 : !fir.logical<4>
+      }
+      hlfir.yield %4 : !hlfir.expr<20x!fir.logical<4>> cleanup {
+        hlfir.destroy %4 : !hlfir.expr<20x!fir.logical<4>>
+      }
+    } do {
+      hlfir.region_assign {
+        hlfir.yield %3#0 : !fir.ref<!fir.array<20xf32>>
+      } to {
+        %4 = fir.convert %arg2 : (i32) -> i64
+        %5 = hlfir.designate %1#0 (%4, %c1:%c20:%c1)  shape %2 : (!fir.ref<!fir.array<10x20xf32>>, i64, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<20xf32>>
+        hlfir.yield %5 : !fir.box<!fir.array<20xf32>>
+      }
+    }
+  }
+  return
+}
+// CHECK-LABEL:   func.func @inside_forall(
+// CHECK-SAME:                             %[[VAL_0:.*]]: !fir.ref<!fir.array<10x20xf32>>,
+// CHECK-SAME:                             %[[VAL_1:.*]]: !fir.ref<!fir.array<20xf32>>) {
+// CHECK:           %[[VAL_2:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_3:.*]] = arith.constant 0.000000e+00 : f32
+// CHECK:           %[[VAL_4:.*]] = arith.constant 10 : i32
+// CHECK:           %[[VAL_5:.*]] = arith.constant 1 : i32
+// CHECK:           %[[VAL_6:.*]] = arith.constant 10 : index
+// CHECK:           %[[VAL_7:.*]] = arith.constant 20 : index
+// CHECK:           %[[VAL_8:.*]] = fir.shape %[[VAL_6]], %[[VAL_7]] : (index, index) -> !fir.shape<2>
+// CHECK:           %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_8]]) {uniq_name = "x"} : (!fir.ref<!fir.array<10x20xf32>>, !fir.shape<2>) -> (!fir.ref<!fir.array<10x20xf32>>, !fir.ref<!fir.array<10x20xf32>>)
+// CHECK:           %[[VAL_10:.*]] = fir.shape %[[VAL_7]] : (index) -> !fir.shape<1>
+// CHECK:           %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_1]](%[[VAL_10]]) {uniq_name = "y"} : (!fir.ref<!fir.array<20xf32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<20xf32>>, !fir.ref<!fir.array<20xf32>>)
+// CHECK:           %[[VAL_12:.*]] = fir.convert %[[VAL_5]] : (i32) -> index
+// CHECK:           %[[VAL_13:.*]] = fir.convert %[[VAL_4]] : (i32) -> index
+// CHECK:           %[[VAL_14:.*]] = arith.constant 1 : index
+// CHECK:           fir.do_loop %[[VAL_15:.*]] = %[[VAL_12]] to %[[VAL_13]] step %[[VAL_14]] {
+// CHECK:             %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (index) -> i32
+// CHECK:             %[[VAL_17:.*]] = arith.constant 1 : index
+// CHECK:             %[[VAL_18:.*]] = fir.convert %[[VAL_16]] : (i32) -> i64
+// CHECK:             %[[VAL_19:.*]] = hlfir.designate %[[VAL_9]]#0 (%[[VAL_18]], %[[VAL_2]]:%[[VAL_7]]:%[[VAL_2]])  shape %[[VAL_10]] : (!fir.ref<!fir.array<10x20xf32>>, i64, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<20xf32>>
+// CHECK:             fir.do_loop %[[VAL_20:.*]] = %[[VAL_17]] to %[[VAL_7]] step %[[VAL_17]] {
+// CHECK:               %[[VAL_21:.*]] = hlfir.designate %[[VAL_11]]#0 (%[[VAL_20]])  : (!fir.ref<!fir.array<20xf32>>, index) -> !fir.ref<f32>
+// CHECK:               %[[VAL_22:.*]] = fir.load %[[VAL_21]] : !fir.ref<f32>
+// CHECK:               %[[VAL_23:.*]] = arith.cmpf ogt, %[[VAL_22]], %[[VAL_3]] : f32
+// CHECK:               %[[VAL_24:.*]] = fir.convert %[[VAL_23]] : (i1) -> !fir.logical<4>
+// CHECK:               %[[VAL_25:.*]] = fir.convert %[[VAL_24]] : (!fir.logical<4>) -> i1
+// CHECK:               fir.if %[[VAL_25]] {
+// CHECK:                 %[[VAL_26:.*]] = hlfir.designate %[[VAL_11]]#0 (%[[VAL_20]])  : (!fir.ref<!fir.array<20xf32>>, index) -> !fir.ref<f32>
+// CHECK:                 %[[VAL_27:.*]] = hlfir.designate %[[VAL_19]] (%[[VAL_20]])  : (!fir.box<!fir.array<20xf32>>, index) -> !fir.ref<f32>
+// CHECK:                 hlfir.assign %[[VAL_26]] to %[[VAL_27]] : !fir.ref<f32>, !fir.ref<f32>
+// CHECK:               }
+// CHECK:             }
+// CHECK:           }
+// CHECK:           return
+// CHECK:         }


        


More information about the flang-commits mailing list