[flang-commits] [flang] 88ae0d6 - [flang] Lower general forall statement

Valentin Clement via flang-commits flang-commits at lists.llvm.org
Thu Mar 10 10:44:04 PST 2022


Author: Valentin Clement
Date: 2022-03-10T19:43:50+01:00
New Revision: 88ae0d61c31674bd75144c246ae25b55ecc5bff9

URL: https://github.com/llvm/llvm-project/commit/88ae0d61c31674bd75144c246ae25b55ecc5bff9
DIFF: https://github.com/llvm/llvm-project/commit/88ae0d61c31674bd75144c246ae25b55ecc5bff9.diff

LOG: [flang] Lower general forall statement

This patch lowers general forall statements. The forall
are lowered to nested loops.

This patch is part of the upstreaming effort from fir-dev branch.

Depends on D121385

Reviewed By: PeteSteinfeld, schweitz

Differential Revision: https://reviews.llvm.org/D121386

Co-authored-by: V Donaldson <vdonaldson at nvidia.com>
Co-authored-by: Jean Perier <jperier at nvidia.com>
Co-authored-by: Eric Schweitz <eschweitz at nvidia.com>

Added: 
    flang/include/flang/Optimizer/Builder/Runtime/Inquiry.h
    flang/lib/Optimizer/Builder/Runtime/Inquiry.cpp
    flang/test/Lower/forall/forall-construct.f90

Modified: 
    flang/include/flang/Lower/Allocatable.h
    flang/include/flang/Lower/ConvertExpr.h
    flang/include/flang/Optimizer/Builder/BoxValue.h
    flang/lib/Lower/Allocatable.cpp
    flang/lib/Lower/Bridge.cpp
    flang/lib/Lower/ConvertExpr.cpp
    flang/lib/Lower/IntrinsicCall.cpp
    flang/lib/Optimizer/Builder/BoxValue.cpp
    flang/lib/Optimizer/Builder/CMakeLists.txt

Removed: 
    


################################################################################
diff  --git a/flang/include/flang/Lower/Allocatable.h b/flang/include/flang/Lower/Allocatable.h
index 24eafeb92a97e..8fb95a506e348 100644
--- a/flang/include/flang/Lower/Allocatable.h
+++ b/flang/include/flang/Lower/Allocatable.h
@@ -24,15 +24,22 @@ class Location;
 
 namespace fir {
 class MutableBoxValue;
-} // namespace fir
+}
 
 namespace Fortran::parser {
 struct AllocateStmt;
 struct DeallocateStmt;
 } // namespace Fortran::parser
 
+namespace Fortran::evaluate {
+template <typename T>
+class Expr;
+struct SomeType;
+} // namespace Fortran::evaluate
+
 namespace Fortran::lower {
 class AbstractConverter;
+class StatementContext;
 
 namespace pft {
 struct Variable;
@@ -48,13 +55,23 @@ void genDeallocateStmt(Fortran::lower::AbstractConverter &,
 
 /// Create a MutableBoxValue for an allocatable or pointer entity.
 /// If the variables is a local variable that is not a dummy, it will be
-/// initialized to unallocated/disassociated status.
+/// initialized to unallocated/diassociated status.
 fir::MutableBoxValue createMutableBox(Fortran::lower::AbstractConverter &,
                                       mlir::Location,
                                       const Fortran::lower::pft::Variable &var,
                                       mlir::Value boxAddr,
                                       mlir::ValueRange nonDeferredParams);
 
+/// Update a MutableBoxValue to describe the entity designated by the expression
+/// \p source. This version takes care of \p source lowering.
+/// If \lbounds is not empty, it is used to defined the MutableBoxValue
+/// lower bounds, otherwise, the lower bounds from \p source are used.
+void associateMutableBox(
+    Fortran::lower::AbstractConverter &, mlir::Location,
+    const fir::MutableBoxValue &,
+    const Fortran::evaluate::Expr<Fortran::evaluate::SomeType> &source,
+    mlir::ValueRange lbounds, Fortran::lower::StatementContext &);
+
 } // namespace Fortran::lower
 
 #endif // FORTRAN_LOWER_ALLOCATABLE_H

diff  --git a/flang/include/flang/Lower/ConvertExpr.h b/flang/include/flang/Lower/ConvertExpr.h
index 966007696e150..dd246ab3b2e36 100644
--- a/flang/include/flang/Lower/ConvertExpr.h
+++ b/flang/include/flang/Lower/ConvertExpr.h
@@ -100,7 +100,10 @@ fir::ExtendedValue createSomeArrayBox(AbstractConverter &converter,
 /// The returned value is null otherwise.
 mlir::Value createSubroutineCall(AbstractConverter &converter,
                                  const evaluate::ProcedureRef &call,
-                                 SymMap &symMap, StatementContext &stmtCtx);
+                                 ExplicitIterSpace &explicitIterSpace,
+                                 ImplicitIterSpace &implicitIterSpace,
+                                 SymMap &symMap, StatementContext &stmtCtx,
+                                 bool isUserDefAssignment);
 
 /// Create the address of the box.
 /// \p expr must be the designator of an allocatable/pointer entity.

diff  --git a/flang/include/flang/Optimizer/Builder/BoxValue.h b/flang/include/flang/Optimizer/Builder/BoxValue.h
index 134b177515dbd..b2bb80eea29ee 100644
--- a/flang/include/flang/Optimizer/Builder/BoxValue.h
+++ b/flang/include/flang/Optimizer/Builder/BoxValue.h
@@ -24,6 +24,8 @@
 #include <utility>
 
 namespace fir {
+class FirOpBuilder;
+
 class CharBoxValue;
 class ArrayBoxValue;
 class CharArrayBoxValue;
@@ -402,6 +404,12 @@ bool isArray(const ExtendedValue &exv);
 /// Get the type parameters for `exv`.
 llvm::SmallVector<mlir::Value> getTypeParams(const ExtendedValue &exv);
 
+/// Get exactly one extent for any array-like extended value, \p exv. If \p exv
+/// is not an array or has rank less then \p dim, the result will be a nullptr.
+mlir::Value getExtentAtDimension(const ExtendedValue &exv,
+                                 FirOpBuilder &builder, mlir::Location loc,
+                                 unsigned dim);
+
 /// An extended value is a box of values pertaining to a discrete entity. It is
 /// used in lowering to track all the runtime values related to an entity. For
 /// example, an entity may have an address in memory that contains its value(s)

diff  --git a/flang/include/flang/Optimizer/Builder/Runtime/Inquiry.h b/flang/include/flang/Optimizer/Builder/Runtime/Inquiry.h
new file mode 100644
index 0000000000000..7e07cc9663340
--- /dev/null
+++ b/flang/include/flang/Optimizer/Builder/Runtime/Inquiry.h
@@ -0,0 +1,46 @@
+//===-- Inquiry.h - generate inquiry runtime API calls ----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FORTRAN_OPTIMIZER_BUILDER_RUNTIME_INQUIRY_H
+#define FORTRAN_OPTIMIZER_BUILDER_RUNTIME_INQUIRY_H
+
+namespace mlir {
+class Value;
+class Location;
+} // namespace mlir
+
+namespace fir {
+class FirOpBuilder;
+}
+
+namespace fir::runtime {
+
+/// Generate call to general `LboundDim` runtime routine.  Calls to LBOUND
+/// without a DIM argument get transformed into descriptor inquiries so they're
+/// not handled in the runtime.
+mlir::Value genLboundDim(fir::FirOpBuilder &builder, mlir::Location loc,
+                         mlir::Value array, mlir::Value dim);
+
+/// Generate call to general `Ubound` runtime routine.  Calls to UBOUND
+/// with a DIM argument get transformed into an expression equivalent to
+/// SIZE() + LBOUND() - 1, so they don't have an intrinsic in the runtime.
+void genUbound(fir::FirOpBuilder &builder, mlir::Location loc,
+               mlir::Value resultBox, mlir::Value array, mlir::Value kind);
+
+/// Generate call to `Size` runtime routine. This routine is a specialized
+/// version when the DIM argument is not specified by the user.
+mlir::Value genSize(fir::FirOpBuilder &builder, mlir::Location loc,
+                    mlir::Value array);
+
+/// Generate call to general `SizeDim` runtime routine.  This version is for
+/// when the user specifies a DIM argument.
+mlir::Value genSizeDim(fir::FirOpBuilder &builder, mlir::Location loc,
+                       mlir::Value array, mlir::Value dim);
+
+} // namespace fir::runtime
+#endif // FORTRAN_OPTIMIZER_BUILDER_RUNTIME_INQUIRY_H

diff  --git a/flang/lib/Lower/Allocatable.cpp b/flang/lib/Lower/Allocatable.cpp
index 862d29e74056c..a2ab4a6a576fc 100644
--- a/flang/lib/Lower/Allocatable.cpp
+++ b/flang/lib/Lower/Allocatable.cpp
@@ -666,3 +666,33 @@ fir::MutableBoxValue Fortran::lower::createMutableBox(
     fir::factory::disassociateMutableBox(builder, loc, box);
   return box;
 }
+
+//===----------------------------------------------------------------------===//
+// MutableBoxValue reading interface implementation
+//===----------------------------------------------------------------------===//
+
+static bool
+isArraySectionWithoutVectorSubscript(const Fortran::lower::SomeExpr &expr) {
+  return expr.Rank() > 0 && Fortran::evaluate::IsVariable(expr) &&
+         !Fortran::evaluate::UnwrapWholeSymbolDataRef(expr) &&
+         !Fortran::evaluate::HasVectorSubscript(expr);
+}
+
+void Fortran::lower::associateMutableBox(
+    Fortran::lower::AbstractConverter &converter, mlir::Location loc,
+    const fir::MutableBoxValue &box, const Fortran::lower::SomeExpr &source,
+    mlir::ValueRange lbounds, Fortran::lower::StatementContext &stmtCtx) {
+  fir::FirOpBuilder &builder = converter.getFirOpBuilder();
+  if (Fortran::evaluate::UnwrapExpr<Fortran::evaluate::NullPointer>(source)) {
+    fir::factory::disassociateMutableBox(builder, loc, box);
+    return;
+  }
+  // The right hand side must not be evaluated in a temp.
+  // Array sections can be described by fir.box without making a temp.
+  // Otherwise, do not generate a fir.box to avoid having to later use a
+  // fir.rebox to implement the pointer association.
+  fir::ExtendedValue rhs = isArraySectionWithoutVectorSubscript(source)
+                               ? converter.genExprBox(source, stmtCtx, loc)
+                               : converter.genExprAddr(source, stmtCtx);
+  fir::factory::associateMutableBox(builder, loc, box, rhs, lbounds);
+}

diff  --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index 6bfe8ccb34c36..204fecad4901d 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -29,6 +29,7 @@
 #include "flang/Optimizer/Builder/BoxValue.h"
 #include "flang/Optimizer/Builder/Character.h"
 #include "flang/Optimizer/Builder/MutableBox.h"
+#include "flang/Optimizer/Builder/Runtime/Ragged.h"
 #include "flang/Optimizer/Dialect/FIRAttr.h"
 #include "flang/Optimizer/Support/FIRContext.h"
 #include "flang/Optimizer/Support/InternalNames.h"
@@ -849,9 +850,14 @@ class FirConverter : public Fortran::lower::AbstractConverter {
     return sym && Fortran::semantics::IsAllocatable(*sym);
   }
 
+  /// Shared for both assignments and pointer assignments.
   void genAssignment(const Fortran::evaluate::Assignment &assign) {
     Fortran::lower::StatementContext stmtCtx;
     mlir::Location loc = toLocation();
+    if (explicitIterationSpace()) {
+      Fortran::lower::createArrayLoads(*this, explicitIterSpace, localSymbols);
+      explicitIterSpace.genLoopNest();
+    }
     std::visit(
         Fortran::common::visitors{
             // [1] Plain old assignment.
@@ -875,7 +881,7 @@ class FirConverter : public Fortran::lower::AbstractConverter {
               // on a pointer returns the target address and not the address of
               // the pointer variable.
 
-              if (assign.lhs.Rank() > 0) {
+              if (assign.lhs.Rank() > 0 || explicitIterationSpace()) {
                 // Array assignment
                 // See Fortran 2018 10.2.1.3 p5, p6, and p7
                 genArrayAssignment(assign, stmtCtx);
@@ -934,7 +940,9 @@ class FirConverter : public Fortran::lower::AbstractConverter {
                 fir::factory::CharacterExprHelper{*builder, loc}.createAssign(
                     lhs, rhs);
               } else if (isDerivedCategory(lhsType->category())) {
-                TODO(toLocation(), "Derived type assignment");
+                // Fortran 2018 10.2.1.3 p13 and p14
+                // Recursively gen an assignment on each element pair.
+                fir::factory::genRecordAssignment(*builder, loc, lhs, rhs);
               } else {
                 llvm_unreachable("unknown category");
               }
@@ -948,36 +956,132 @@ class FirConverter : public Fortran::lower::AbstractConverter {
             // [2] User defined assignment. If the context is a scalar
             // expression then call the procedure.
             [&](const Fortran::evaluate::ProcedureRef &procRef) {
-              TODO(toLocation(), "User defined assignment");
+              Fortran::lower::StatementContext &ctx =
+                  explicitIterationSpace() ? explicitIterSpace.stmtContext()
+                                           : stmtCtx;
+              Fortran::lower::createSubroutineCall(
+                  *this, procRef, explicitIterSpace, implicitIterSpace,
+                  localSymbols, ctx, /*isUserDefAssignment=*/true);
             },
 
             // [3] Pointer assignment with possibly empty bounds-spec. R1035: a
             // bounds-spec is a lower bound value.
             [&](const Fortran::evaluate::Assignment::BoundsSpec &lbExprs) {
-              TODO(toLocation(),
-                   "Pointer assignment with possibly empty bounds-spec");
+              if (IsProcedure(assign.rhs))
+                TODO(loc, "procedure pointer assignment");
+              std::optional<Fortran::evaluate::DynamicType> lhsType =
+                  assign.lhs.GetType();
+              std::optional<Fortran::evaluate::DynamicType> rhsType =
+                  assign.rhs.GetType();
+              // Polymorphic lhs/rhs may need more care. See F2018 10.2.2.3.
+              if ((lhsType && lhsType->IsPolymorphic()) ||
+                  (rhsType && rhsType->IsPolymorphic()))
+                TODO(loc, "pointer assignment involving polymorphic entity");
+
+              // FIXME: in the explicit space context, we want to use
+              // ScalarArrayExprLowering here.
+              fir::MutableBoxValue lhs = genExprMutableBox(loc, assign.lhs);
+              llvm::SmallVector<mlir::Value> lbounds;
+              for (const Fortran::evaluate::ExtentExpr &lbExpr : lbExprs)
+                lbounds.push_back(
+                    fir::getBase(genExprValue(toEvExpr(lbExpr), stmtCtx)));
+              Fortran::lower::associateMutableBox(*this, loc, lhs, assign.rhs,
+                                                  lbounds, stmtCtx);
+              if (explicitIterationSpace()) {
+                mlir::ValueRange inners = explicitIterSpace.getInnerArgs();
+                if (!inners.empty()) {
+                  // TODO: should force a copy-in/copy-out here.
+                  // e.g., obj%ptr(i+1) => obj%ptr(i)
+                  builder->create<fir::ResultOp>(loc, inners);
+                }
+              }
             },
 
             // [4] Pointer assignment with bounds-remapping. R1036: a
             // bounds-remapping is a pair, lower bound and upper bound.
             [&](const Fortran::evaluate::Assignment::BoundsRemapping
                     &boundExprs) {
-              TODO(toLocation(), "Pointer assignment with bounds-remapping");
+              std::optional<Fortran::evaluate::DynamicType> lhsType =
+                  assign.lhs.GetType();
+              std::optional<Fortran::evaluate::DynamicType> rhsType =
+                  assign.rhs.GetType();
+              // Polymorphic lhs/rhs may need more care. See F2018 10.2.2.3.
+              if ((lhsType && lhsType->IsPolymorphic()) ||
+                  (rhsType && rhsType->IsPolymorphic()))
+                TODO(loc, "pointer assignment involving polymorphic entity");
+
+              // FIXME: in the explicit space context, we want to use
+              // ScalarArrayExprLowering here.
+              fir::MutableBoxValue lhs = genExprMutableBox(loc, assign.lhs);
+              if (Fortran::evaluate::UnwrapExpr<Fortran::evaluate::NullPointer>(
+                      assign.rhs)) {
+                fir::factory::disassociateMutableBox(*builder, loc, lhs);
+                return;
+              }
+              llvm::SmallVector<mlir::Value> lbounds;
+              llvm::SmallVector<mlir::Value> ubounds;
+              for (const std::pair<Fortran::evaluate::ExtentExpr,
+                                   Fortran::evaluate::ExtentExpr> &pair :
+                   boundExprs) {
+                const Fortran::evaluate::ExtentExpr &lbExpr = pair.first;
+                const Fortran::evaluate::ExtentExpr &ubExpr = pair.second;
+                lbounds.push_back(
+                    fir::getBase(genExprValue(toEvExpr(lbExpr), stmtCtx)));
+                ubounds.push_back(
+                    fir::getBase(genExprValue(toEvExpr(ubExpr), stmtCtx)));
+              }
+              // Do not generate a temp in case rhs is an array section.
+              fir::ExtendedValue rhs =
+                  isArraySectionWithoutVectorSubscript(assign.rhs)
+                      ? Fortran::lower::createSomeArrayBox(
+                            *this, assign.rhs, localSymbols, stmtCtx)
+                      : genExprAddr(assign.rhs, stmtCtx);
+              fir::factory::associateMutableBoxWithRemap(*builder, loc, lhs,
+                                                         rhs, lbounds, ubounds);
+              if (explicitIterationSpace()) {
+                mlir::ValueRange inners = explicitIterSpace.getInnerArgs();
+                if (!inners.empty()) {
+                  // TODO: should force a copy-in/copy-out here.
+                  // e.g., obj%ptr(i+1) => obj%ptr(i)
+                  builder->create<fir::ResultOp>(loc, inners);
+                }
+              }
             },
         },
         assign.u);
+    if (explicitIterationSpace())
+      Fortran::lower::createArrayMergeStores(*this, explicitIterSpace);
   }
 
   /// Lowering of CALL statement
   void genFIR(const Fortran::parser::CallStmt &stmt) {
     Fortran::lower::StatementContext stmtCtx;
+    Fortran::lower::pft::Evaluation &eval = getEval();
     setCurrentPosition(stmt.v.source);
     assert(stmt.typedCall && "Call was not analyzed");
     // Call statement lowering shares code with function call lowering.
     mlir::Value res = Fortran::lower::createSubroutineCall(
-        *this, *stmt.typedCall, localSymbols, stmtCtx);
+        *this, *stmt.typedCall, explicitIterSpace, implicitIterSpace,
+        localSymbols, stmtCtx, /*isUserDefAssignment=*/false);
     if (!res)
       return; // "Normal" subroutine call.
+    // Call with alternate return specifiers.
+    // The call returns an index that selects an alternate return branch target.
+    llvm::SmallVector<int64_t> indexList;
+    llvm::SmallVector<mlir::Block *> blockList;
+    int64_t index = 0;
+    for (const Fortran::parser::ActualArgSpec &arg :
+         std::get<std::list<Fortran::parser::ActualArgSpec>>(stmt.v.t)) {
+      const auto &actual = std::get<Fortran::parser::ActualArg>(arg.t);
+      if (const auto *altReturn =
+              std::get_if<Fortran::parser::AltReturnSpec>(&actual.u)) {
+        indexList.push_back(++index);
+        blockList.push_back(blockOfLabel(eval, altReturn->v));
+      }
+    }
+    blockList.push_back(eval.nonNopSuccessor().block); // default = fallthrough
+    stmtCtx.finalize();
+    builder->create<fir::SelectOp>(toLocation(), res, indexList, blockList);
   }
 
   void genFIR(const Fortran::parser::ComputedGotoStmt &stmt) {
@@ -1171,28 +1275,199 @@ class FirConverter : public Fortran::lower::AbstractConverter {
     genFIR(stmt.statement);
   }
 
+  /// Force the binding of an explicit symbol. This is used to bind and re-bind
+  /// a concurrent control symbol to its value.
+  void forceControlVariableBinding(const Fortran::semantics::Symbol *sym,
+                                   mlir::Value inducVar) {
+    mlir::Location loc = toLocation();
+    assert(sym && "There must be a symbol to bind");
+    mlir::Type toTy = genType(*sym);
+    // FIXME: this should be a "per iteration" temporary.
+    mlir::Value tmp = builder->createTemporary(
+        loc, toTy, toStringRef(sym->name()),
+        llvm::ArrayRef<mlir::NamedAttribute>{
+            Fortran::lower::getAdaptToByRefAttr(*builder)});
+    mlir::Value cast = builder->createConvert(loc, toTy, inducVar);
+    builder->create<fir::StoreOp>(loc, cast, tmp);
+    localSymbols.addSymbol(*sym, tmp, /*force=*/true);
+  }
+
+  /// Process a concurrent header for a FORALL. (Concurrent headers for DO
+  /// CONCURRENT loops are lowered elsewhere.)
   void genFIR(const Fortran::parser::ConcurrentHeader &header) {
-    TODO(toLocation(), "ConcurrentHeader lowering");
+    llvm::SmallVector<mlir::Value> lows;
+    llvm::SmallVector<mlir::Value> highs;
+    llvm::SmallVector<mlir::Value> steps;
+    if (explicitIterSpace.isOutermostForall()) {
+      // For the outermost forall, we evaluate the bounds expressions once.
+      // Contrastingly, if this forall is nested, the bounds expressions are
+      // assumed to be pure, possibly dependent on outer concurrent control
+      // variables, possibly variant with respect to arguments, and will be
+      // re-evaluated.
+      mlir::Location loc = toLocation();
+      mlir::Type idxTy = builder->getIndexType();
+      Fortran::lower::StatementContext &stmtCtx =
+          explicitIterSpace.stmtContext();
+      auto lowerExpr = [&](auto &e) {
+        return fir::getBase(genExprValue(e, stmtCtx));
+      };
+      for (const Fortran::parser::ConcurrentControl &ctrl :
+           std::get<std::list<Fortran::parser::ConcurrentControl>>(header.t)) {
+        const Fortran::lower::SomeExpr *lo =
+            Fortran::semantics::GetExpr(std::get<1>(ctrl.t));
+        const Fortran::lower::SomeExpr *hi =
+            Fortran::semantics::GetExpr(std::get<2>(ctrl.t));
+        auto &optStep =
+            std::get<std::optional<Fortran::parser::ScalarIntExpr>>(ctrl.t);
+        lows.push_back(builder->createConvert(loc, idxTy, lowerExpr(*lo)));
+        highs.push_back(builder->createConvert(loc, idxTy, lowerExpr(*hi)));
+        steps.push_back(
+            optStep.has_value()
+                ? builder->createConvert(
+                      loc, idxTy,
+                      lowerExpr(*Fortran::semantics::GetExpr(*optStep)))
+                : builder->createIntegerConstant(loc, idxTy, 1));
+      }
+    }
+    auto lambda = [&, lows, highs, steps]() {
+      // Create our iteration space from the header spec.
+      mlir::Location loc = toLocation();
+      mlir::Type idxTy = builder->getIndexType();
+      llvm::SmallVector<fir::DoLoopOp> loops;
+      Fortran::lower::StatementContext &stmtCtx =
+          explicitIterSpace.stmtContext();
+      auto lowerExpr = [&](auto &e) {
+        return fir::getBase(genExprValue(e, stmtCtx));
+      };
+      const bool outermost = !lows.empty();
+      std::size_t headerIndex = 0;
+      for (const Fortran::parser::ConcurrentControl &ctrl :
+           std::get<std::list<Fortran::parser::ConcurrentControl>>(header.t)) {
+        const Fortran::semantics::Symbol *ctrlVar =
+            std::get<Fortran::parser::Name>(ctrl.t).symbol;
+        mlir::Value lb;
+        mlir::Value ub;
+        mlir::Value by;
+        if (outermost) {
+          assert(headerIndex < lows.size());
+          if (headerIndex == 0)
+            explicitIterSpace.resetInnerArgs();
+          lb = lows[headerIndex];
+          ub = highs[headerIndex];
+          by = steps[headerIndex++];
+        } else {
+          const Fortran::lower::SomeExpr *lo =
+              Fortran::semantics::GetExpr(std::get<1>(ctrl.t));
+          const Fortran::lower::SomeExpr *hi =
+              Fortran::semantics::GetExpr(std::get<2>(ctrl.t));
+          auto &optStep =
+              std::get<std::optional<Fortran::parser::ScalarIntExpr>>(ctrl.t);
+          lb = builder->createConvert(loc, idxTy, lowerExpr(*lo));
+          ub = builder->createConvert(loc, idxTy, lowerExpr(*hi));
+          by = optStep.has_value()
+                   ? builder->createConvert(
+                         loc, idxTy,
+                         lowerExpr(*Fortran::semantics::GetExpr(*optStep)))
+                   : builder->createIntegerConstant(loc, idxTy, 1);
+        }
+        auto lp = builder->create<fir::DoLoopOp>(
+            loc, lb, ub, by, /*unordered=*/true,
+            /*finalCount=*/false, explicitIterSpace.getInnerArgs());
+        if (!loops.empty() || !outermost)
+          builder->create<fir::ResultOp>(loc, lp.getResults());
+        explicitIterSpace.setInnerArgs(lp.getRegionIterArgs());
+        builder->setInsertionPointToStart(lp.getBody());
+        forceControlVariableBinding(ctrlVar, lp.getInductionVar());
+        loops.push_back(lp);
+      }
+      if (outermost)
+        explicitIterSpace.setOuterLoop(loops[0]);
+      explicitIterSpace.appendLoops(loops);
+      if (const auto &mask =
+              std::get<std::optional<Fortran::parser::ScalarLogicalExpr>>(
+                  header.t);
+          mask.has_value()) {
+        mlir::Type i1Ty = builder->getI1Type();
+        fir::ExtendedValue maskExv =
+            genExprValue(*Fortran::semantics::GetExpr(mask.value()), stmtCtx);
+        mlir::Value cond =
+            builder->createConvert(loc, i1Ty, fir::getBase(maskExv));
+        auto ifOp = builder->create<fir::IfOp>(
+            loc, explicitIterSpace.innerArgTypes(), cond,
+            /*withElseRegion=*/true);
+        builder->create<fir::ResultOp>(loc, ifOp.getResults());
+        builder->setInsertionPointToStart(&ifOp.getElseRegion().front());
+        builder->create<fir::ResultOp>(loc, explicitIterSpace.getInnerArgs());
+        builder->setInsertionPointToStart(&ifOp.getThenRegion().front());
+      }
+    };
+    // Push the lambda to gen the loop nest context.
+    explicitIterSpace.pushLoopNest(lambda);
   }
 
   void genFIR(const Fortran::parser::ForallAssignmentStmt &stmt) {
-    TODO(toLocation(), "ForallAssignmentStmt lowering");
+    std::visit([&](const auto &x) { genFIR(x); }, stmt.u);
   }
 
   void genFIR(const Fortran::parser::EndForallStmt &) {
-    TODO(toLocation(), "EndForallStmt lowering");
+    cleanupExplicitSpace();
   }
 
-  void genFIR(const Fortran::parser::ForallStmt &) {
-    TODO(toLocation(), "ForallStmt lowering");
+  template <typename A>
+  void prepareExplicitSpace(const A &forall) {
+    if (!explicitIterSpace.isActive())
+      analyzeExplicitSpace(forall);
+    localSymbols.pushScope();
+    explicitIterSpace.enter();
+  }
+
+  /// Cleanup all the FORALL context information when we exit.
+  void cleanupExplicitSpace() {
+    explicitIterSpace.leave();
+    localSymbols.popScope();
   }
 
-  void genFIR(const Fortran::parser::ForallConstruct &) {
-    TODO(toLocation(), "ForallConstruct lowering");
+  /// Generate FIR for a FORALL statement.
+  void genFIR(const Fortran::parser::ForallStmt &stmt) {
+    prepareExplicitSpace(stmt);
+    genFIR(std::get<
+               Fortran::common::Indirection<Fortran::parser::ConcurrentHeader>>(
+               stmt.t)
+               .value());
+    genFIR(std::get<Fortran::parser::UnlabeledStatement<
+               Fortran::parser::ForallAssignmentStmt>>(stmt.t)
+               .statement);
+    cleanupExplicitSpace();
+  }
+
+  /// Generate FIR for a FORALL construct.
+  void genFIR(const Fortran::parser::ForallConstruct &forall) {
+    prepareExplicitSpace(forall);
+    genNestedStatement(
+        std::get<
+            Fortran::parser::Statement<Fortran::parser::ForallConstructStmt>>(
+            forall.t));
+    for (const Fortran::parser::ForallBodyConstruct &s :
+         std::get<std::list<Fortran::parser::ForallBodyConstruct>>(forall.t)) {
+      std::visit(
+          Fortran::common::visitors{
+              [&](const Fortran::parser::WhereConstruct &b) { genFIR(b); },
+              [&](const Fortran::common::Indirection<
+                  Fortran::parser::ForallConstruct> &b) { genFIR(b.value()); },
+              [&](const auto &b) { genNestedStatement(b); }},
+          s.u);
+    }
+    genNestedStatement(
+        std::get<Fortran::parser::Statement<Fortran::parser::EndForallStmt>>(
+            forall.t));
   }
 
-  void genFIR(const Fortran::parser::ForallConstructStmt &) {
-    TODO(toLocation(), "ForallConstructStmt lowering");
+  /// Lower the concurrent header specification.
+  void genFIR(const Fortran::parser::ForallConstructStmt &stmt) {
+    genFIR(std::get<
+               Fortran::common::Indirection<Fortran::parser::ConcurrentHeader>>(
+               stmt.t)
+               .value());
   }
 
   void genFIR(const Fortran::parser::CompilerDirective &) {
@@ -1750,6 +2025,208 @@ class FirConverter : public Fortran::lower::AbstractConverter {
     eval.visit([&](const auto &stmt) { genFIR(stmt); });
   }
 
+  //===--------------------------------------------------------------------===//
+  // Analysis on a nested explicit iteration space.
+  //===--------------------------------------------------------------------===//
+
+  void analyzeExplicitSpace(const Fortran::parser::ConcurrentHeader &header) {
+    explicitIterSpace.pushLevel();
+    for (const Fortran::parser::ConcurrentControl &ctrl :
+         std::get<std::list<Fortran::parser::ConcurrentControl>>(header.t)) {
+      const Fortran::semantics::Symbol *ctrlVar =
+          std::get<Fortran::parser::Name>(ctrl.t).symbol;
+      explicitIterSpace.addSymbol(ctrlVar);
+    }
+    if (const auto &mask =
+            std::get<std::optional<Fortran::parser::ScalarLogicalExpr>>(
+                header.t);
+        mask.has_value())
+      analyzeExplicitSpace(*Fortran::semantics::GetExpr(*mask));
+  }
+  template <bool LHS = false, typename A>
+  void analyzeExplicitSpace(const Fortran::evaluate::Expr<A> &e) {
+    explicitIterSpace.exprBase(&e, LHS);
+  }
+  void analyzeExplicitSpace(const Fortran::evaluate::Assignment *assign) {
+    auto analyzeAssign = [&](const Fortran::lower::SomeExpr &lhs,
+                             const Fortran::lower::SomeExpr &rhs) {
+      analyzeExplicitSpace</*LHS=*/true>(lhs);
+      analyzeExplicitSpace(rhs);
+    };
+    std::visit(
+        Fortran::common::visitors{
+            [&](const Fortran::evaluate::ProcedureRef &procRef) {
+              // Ensure the procRef expressions are the one being visited.
+              assert(procRef.arguments().size() == 2);
+              const Fortran::lower::SomeExpr *lhs =
+                  procRef.arguments()[0].value().UnwrapExpr();
+              const Fortran::lower::SomeExpr *rhs =
+                  procRef.arguments()[1].value().UnwrapExpr();
+              assert(lhs && rhs &&
+                     "user defined assignment arguments must be expressions");
+              analyzeAssign(*lhs, *rhs);
+            },
+            [&](const auto &) { analyzeAssign(assign->lhs, assign->rhs); }},
+        assign->u);
+    explicitIterSpace.endAssign();
+  }
+  void analyzeExplicitSpace(const Fortran::parser::ForallAssignmentStmt &stmt) {
+    std::visit([&](const auto &s) { analyzeExplicitSpace(s); }, stmt.u);
+  }
+  void analyzeExplicitSpace(const Fortran::parser::AssignmentStmt &s) {
+    analyzeExplicitSpace(s.typedAssignment->v.operator->());
+  }
+  void analyzeExplicitSpace(const Fortran::parser::PointerAssignmentStmt &s) {
+    analyzeExplicitSpace(s.typedAssignment->v.operator->());
+  }
+  void analyzeExplicitSpace(const Fortran::parser::WhereConstruct &c) {
+    analyzeExplicitSpace(
+        std::get<
+            Fortran::parser::Statement<Fortran::parser::WhereConstructStmt>>(
+            c.t)
+            .statement);
+    for (const Fortran::parser::WhereBodyConstruct &body :
+         std::get<std::list<Fortran::parser::WhereBodyConstruct>>(c.t))
+      analyzeExplicitSpace(body);
+    for (const Fortran::parser::WhereConstruct::MaskedElsewhere &e :
+         std::get<std::list<Fortran::parser::WhereConstruct::MaskedElsewhere>>(
+             c.t))
+      analyzeExplicitSpace(e);
+    if (const auto &e =
+            std::get<std::optional<Fortran::parser::WhereConstruct::Elsewhere>>(
+                c.t);
+        e.has_value())
+      analyzeExplicitSpace(e.operator->());
+  }
+  void analyzeExplicitSpace(const Fortran::parser::WhereConstructStmt &ws) {
+    const Fortran::lower::SomeExpr *exp = Fortran::semantics::GetExpr(
+        std::get<Fortran::parser::LogicalExpr>(ws.t));
+    addMaskVariable(exp);
+    analyzeExplicitSpace(*exp);
+  }
+  void analyzeExplicitSpace(
+      const Fortran::parser::WhereConstruct::MaskedElsewhere &ew) {
+    analyzeExplicitSpace(
+        std::get<
+            Fortran::parser::Statement<Fortran::parser::MaskedElsewhereStmt>>(
+            ew.t)
+            .statement);
+    for (const Fortran::parser::WhereBodyConstruct &e :
+         std::get<std::list<Fortran::parser::WhereBodyConstruct>>(ew.t))
+      analyzeExplicitSpace(e);
+  }
+  void analyzeExplicitSpace(const Fortran::parser::WhereBodyConstruct &body) {
+    std::visit(Fortran::common::visitors{
+                   [&](const Fortran::common::Indirection<
+                       Fortran::parser::WhereConstruct> &wc) {
+                     analyzeExplicitSpace(wc.value());
+                   },
+                   [&](const auto &s) { analyzeExplicitSpace(s.statement); }},
+               body.u);
+  }
+  void analyzeExplicitSpace(const Fortran::parser::MaskedElsewhereStmt &stmt) {
+    const Fortran::lower::SomeExpr *exp = Fortran::semantics::GetExpr(
+        std::get<Fortran::parser::LogicalExpr>(stmt.t));
+    addMaskVariable(exp);
+    analyzeExplicitSpace(*exp);
+  }
+  void
+  analyzeExplicitSpace(const Fortran::parser::WhereConstruct::Elsewhere *ew) {
+    for (const Fortran::parser::WhereBodyConstruct &e :
+         std::get<std::list<Fortran::parser::WhereBodyConstruct>>(ew->t))
+      analyzeExplicitSpace(e);
+  }
+  void analyzeExplicitSpace(const Fortran::parser::WhereStmt &stmt) {
+    const Fortran::lower::SomeExpr *exp = Fortran::semantics::GetExpr(
+        std::get<Fortran::parser::LogicalExpr>(stmt.t));
+    addMaskVariable(exp);
+    analyzeExplicitSpace(*exp);
+    const std::optional<Fortran::evaluate::Assignment> &assign =
+        std::get<Fortran::parser::AssignmentStmt>(stmt.t).typedAssignment->v;
+    assert(assign.has_value() && "WHERE has no statement");
+    analyzeExplicitSpace(assign.operator->());
+  }
+  void analyzeExplicitSpace(const Fortran::parser::ForallStmt &forall) {
+    analyzeExplicitSpace(
+        std::get<
+            Fortran::common::Indirection<Fortran::parser::ConcurrentHeader>>(
+            forall.t)
+            .value());
+    analyzeExplicitSpace(std::get<Fortran::parser::UnlabeledStatement<
+                             Fortran::parser::ForallAssignmentStmt>>(forall.t)
+                             .statement);
+    analyzeExplicitSpacePop();
+  }
+  void
+  analyzeExplicitSpace(const Fortran::parser::ForallConstructStmt &forall) {
+    analyzeExplicitSpace(
+        std::get<
+            Fortran::common::Indirection<Fortran::parser::ConcurrentHeader>>(
+            forall.t)
+            .value());
+  }
+  void analyzeExplicitSpace(const Fortran::parser::ForallConstruct &forall) {
+    analyzeExplicitSpace(
+        std::get<
+            Fortran::parser::Statement<Fortran::parser::ForallConstructStmt>>(
+            forall.t)
+            .statement);
+    for (const Fortran::parser::ForallBodyConstruct &s :
+         std::get<std::list<Fortran::parser::ForallBodyConstruct>>(forall.t)) {
+      std::visit(Fortran::common::visitors{
+                     [&](const Fortran::common::Indirection<
+                         Fortran::parser::ForallConstruct> &b) {
+                       analyzeExplicitSpace(b.value());
+                     },
+                     [&](const Fortran::parser::WhereConstruct &w) {
+                       analyzeExplicitSpace(w);
+                     },
+                     [&](const auto &b) { analyzeExplicitSpace(b.statement); }},
+                 s.u);
+    }
+    analyzeExplicitSpacePop();
+  }
+
+  void analyzeExplicitSpacePop() { explicitIterSpace.popLevel(); }
+
+  void addMaskVariable(Fortran::lower::FrontEndExpr exp) {
+    // Note: use i8 to store bool values. This avoids round-down behavior found
+    // with sequences of i1. That is, an array of i1 will be truncated in size
+    // and be too small. For example, a buffer of type fir.array<7xi1> will have
+    // 0 size.
+    mlir::Type i64Ty = builder->getIntegerType(64);
+    mlir::TupleType ty = fir::factory::getRaggedArrayHeaderType(*builder);
+    mlir::Type buffTy = ty.getType(1);
+    mlir::Type shTy = ty.getType(2);
+    mlir::Location loc = toLocation();
+    mlir::Value hdr = builder->createTemporary(loc, ty);
+    // FIXME: Is there a way to create a `zeroinitializer` in LLVM-IR dialect?
+    // For now, explicitly set lazy ragged header to all zeros.
+    // auto nilTup = builder->createNullConstant(loc, ty);
+    // builder->create<fir::StoreOp>(loc, nilTup, hdr);
+    mlir::Type i32Ty = builder->getIntegerType(32);
+    mlir::Value zero = builder->createIntegerConstant(loc, i32Ty, 0);
+    mlir::Value zero64 = builder->createIntegerConstant(loc, i64Ty, 0);
+    mlir::Value flags = builder->create<fir::CoordinateOp>(
+        loc, builder->getRefType(i64Ty), hdr, zero);
+    builder->create<fir::StoreOp>(loc, zero64, flags);
+    mlir::Value one = builder->createIntegerConstant(loc, i32Ty, 1);
+    mlir::Value nullPtr1 = builder->createNullConstant(loc, buffTy);
+    mlir::Value var = builder->create<fir::CoordinateOp>(
+        loc, builder->getRefType(buffTy), hdr, one);
+    builder->create<fir::StoreOp>(loc, nullPtr1, var);
+    mlir::Value two = builder->createIntegerConstant(loc, i32Ty, 2);
+    mlir::Value nullPtr2 = builder->createNullConstant(loc, shTy);
+    mlir::Value shape = builder->create<fir::CoordinateOp>(
+        loc, builder->getRefType(shTy), hdr, two);
+    builder->create<fir::StoreOp>(loc, nullPtr2, shape);
+    implicitIterSpace.addMaskVariable(exp, var, shape, hdr);
+    explicitIterSpace.outermostContext().attachCleanup(
+        [builder = this->builder, hdr, loc]() {
+          fir::runtime::genRaggedArrayDeallocate(loc, *builder, hdr);
+        });
+  }
+
   //===--------------------------------------------------------------------===//
 
   Fortran::lower::LoweringBridge &bridge;

diff  --git a/flang/lib/Lower/ConvertExpr.cpp b/flang/lib/Lower/ConvertExpr.cpp
index 7fdd4ca83a585..15d6ba614dc86 100644
--- a/flang/lib/Lower/ConvertExpr.cpp
+++ b/flang/lib/Lower/ConvertExpr.cpp
@@ -188,6 +188,73 @@ static mlir::Value genActualIsPresentTest(fir::FirOpBuilder &builder,
                                           fir::getBase(actual));
 }
 
+/// Convert the array_load, `load`, to an extended value. If `path` is not
+/// empty, then traverse through the components designated. The base value is
+/// `newBase`. This does not accept an array_load with a slice operand.
+static fir::ExtendedValue
+arrayLoadExtValue(fir::FirOpBuilder &builder, mlir::Location loc,
+                  fir::ArrayLoadOp load, llvm::ArrayRef<mlir::Value> path,
+                  mlir::Value newBase, mlir::Value newLen = {}) {
+  // Recover the extended value from the load.
+  assert(!load.getSlice() && "slice is not allowed");
+  mlir::Type arrTy = load.getType();
+  if (!path.empty()) {
+    mlir::Type ty = fir::applyPathToType(arrTy, path);
+    if (!ty)
+      fir::emitFatalError(loc, "path does not apply to type");
+    if (!ty.isa<fir::SequenceType>()) {
+      if (fir::isa_char(ty)) {
+        mlir::Value len = newLen;
+        if (!len)
+          len = fir::factory::CharacterExprHelper{builder, loc}.getLength(
+              load.getMemref());
+        if (!len) {
+          assert(load.getTypeparams().size() == 1 &&
+                 "length must be in array_load");
+          len = load.getTypeparams()[0];
+        }
+        return fir::CharBoxValue{newBase, len};
+      }
+      return newBase;
+    }
+    arrTy = ty.cast<fir::SequenceType>();
+  }
+
+  // Use the shape op, if there is one.
+  mlir::Value shapeVal = load.getShape();
+  if (shapeVal) {
+    if (!mlir::isa<fir::ShiftOp>(shapeVal.getDefiningOp())) {
+      mlir::Type eleTy = fir::unwrapSequenceType(arrTy);
+      std::vector<mlir::Value> extents = fir::factory::getExtents(shapeVal);
+      std::vector<mlir::Value> origins = fir::factory::getOrigins(shapeVal);
+      if (fir::isa_char(eleTy)) {
+        mlir::Value len = newLen;
+        if (!len)
+          len = fir::factory::CharacterExprHelper{builder, loc}.getLength(
+              load.getMemref());
+        if (!len) {
+          assert(load.getTypeparams().size() == 1 &&
+                 "length must be in array_load");
+          len = load.getTypeparams()[0];
+        }
+        return fir::CharArrayBoxValue(newBase, len, extents, origins);
+      }
+      return fir::ArrayBoxValue(newBase, extents, origins);
+    }
+    if (!fir::isa_box_type(load.getMemref().getType()))
+      fir::emitFatalError(loc, "shift op is invalid in this context");
+  }
+
+  // There is no shape or the array is in a box. Extents and lower bounds must
+  // be read at runtime.
+  if (path.empty() && !shapeVal) {
+    fir::ExtendedValue exv =
+        fir::factory::readBoxValue(builder, loc, load.getMemref());
+    return fir::substBase(exv, newBase);
+  }
+  TODO(loc, "component is boxed, retreive its type parameters");
+}
+
 /// Place \p exv in memory if it is not already a memory reference. If
 /// \p forceValueType is provided, the value is first casted to the provided
 /// type before being stored (this is mainly intended for logicals whose value
@@ -552,6 +619,7 @@ class ScalarExprLowering {
           [&val](auto &) { return val.toExtendedValue(); });
     LLVM_DEBUG(llvm::dbgs()
                << "unknown symbol: " << sym << "\nmap: " << symMap << '\n');
+    llvm::errs() << "SYM: " << sym << "\n";
     fir::emitFatalError(getLoc(), "symbol is not mapped to any IR value");
   }
 
@@ -2273,6 +2341,11 @@ class ScalarExprLowering {
 static bool isAdjustedArrayElementType(mlir::Type t) {
   return fir::isa_char(t) || fir::isa_derived(t) || t.isa<fir::SequenceType>();
 }
+static bool elementTypeWasAdjusted(mlir::Type t) {
+  if (auto ty = t.dyn_cast<fir::ReferenceType>())
+    return isAdjustedArrayElementType(ty.getEleTy());
+  return false;
+}
 
 /// Build an ExtendedValue from a fir.array<?x...?xT> without actually setting
 /// the actual extents and lengths. This is only to allow their propagation as
@@ -2293,6 +2366,70 @@ convertToArrayBoxValue(mlir::Location loc, fir::FirOpBuilder &builder,
   return fir::ArrayBoxValue(val, extents);
 }
 
+/// Helper to generate calls to scalar user defined assignment procedures.
+static void genScalarUserDefinedAssignmentCall(fir::FirOpBuilder &builder,
+                                               mlir::Location loc,
+                                               mlir::FuncOp func,
+                                               const fir::ExtendedValue &lhs,
+                                               const fir::ExtendedValue &rhs) {
+  auto prepareUserDefinedArg =
+      [](fir::FirOpBuilder &builder, mlir::Location loc,
+         const fir::ExtendedValue &value, mlir::Type argType) -> mlir::Value {
+    if (argType.isa<fir::BoxCharType>()) {
+      const fir::CharBoxValue *charBox = value.getCharBox();
+      assert(charBox && "argument type mismatch in elemental user assignment");
+      return fir::factory::CharacterExprHelper{builder, loc}.createEmbox(
+          *charBox);
+    }
+    if (argType.isa<fir::BoxType>()) {
+      mlir::Value box = builder.createBox(loc, value);
+      return builder.createConvert(loc, argType, box);
+    }
+    // Simple pass by address.
+    mlir::Type argBaseType = fir::unwrapRefType(argType);
+    assert(!fir::hasDynamicSize(argBaseType));
+    mlir::Value from = fir::getBase(value);
+    if (argBaseType != fir::unwrapRefType(from.getType())) {
+      // With logicals, it is possible that from is i1 here.
+      if (fir::isa_ref_type(from.getType()))
+        from = builder.create<fir::LoadOp>(loc, from);
+      from = builder.createConvert(loc, argBaseType, from);
+    }
+    if (!fir::isa_ref_type(from.getType())) {
+      mlir::Value temp = builder.createTemporary(loc, argBaseType);
+      builder.create<fir::StoreOp>(loc, from, temp);
+      from = temp;
+    }
+    return builder.createConvert(loc, argType, from);
+  };
+  assert(func.getNumArguments() == 2);
+  mlir::Type lhsType = func.getType().getInput(0);
+  mlir::Type rhsType = func.getType().getInput(1);
+  mlir::Value lhsArg = prepareUserDefinedArg(builder, loc, lhs, lhsType);
+  mlir::Value rhsArg = prepareUserDefinedArg(builder, loc, rhs, rhsType);
+  builder.create<fir::CallOp>(loc, func, mlir::ValueRange{lhsArg, rhsArg});
+}
+
+/// Convert the result of a fir.array_modify to an ExtendedValue given the
+/// related fir.array_load.
+static fir::ExtendedValue arrayModifyToExv(fir::FirOpBuilder &builder,
+                                           mlir::Location loc,
+                                           fir::ArrayLoadOp load,
+                                           mlir::Value elementAddr) {
+  mlir::Type eleTy = fir::unwrapPassByRefType(elementAddr.getType());
+  if (fir::isa_char(eleTy)) {
+    auto len = fir::factory::CharacterExprHelper{builder, loc}.getLength(
+        load.getMemref());
+    if (!len) {
+      assert(load.getTypeparams().size() == 1 &&
+             "length must be in array_load");
+      len = load.getTypeparams()[0];
+    }
+    return fir::CharBoxValue{elementAddr, len};
+  }
+  return elementAddr;
+}
+
 //===----------------------------------------------------------------------===//
 //
 // Lowering of scalar expressions in an explicit iteration space context.
@@ -2678,6 +2815,82 @@ class ArrayExprLowering {
     assert(fir::getBase(loopRes));
   }
 
+  static void
+  lowerElementalUserAssignment(Fortran::lower::AbstractConverter &converter,
+                               Fortran::lower::SymMap &symMap,
+                               Fortran::lower::StatementContext &stmtCtx,
+                               Fortran::lower::ExplicitIterSpace &explicitSpace,
+                               Fortran::lower::ImplicitIterSpace &implicitSpace,
+                               const Fortran::evaluate::ProcedureRef &procRef) {
+    ArrayExprLowering ael(converter, stmtCtx, symMap,
+                          ConstituentSemantics::CustomCopyInCopyOut,
+                          &explicitSpace, &implicitSpace);
+    assert(procRef.arguments().size() == 2);
+    const auto *lhs = procRef.arguments()[0].value().UnwrapExpr();
+    const auto *rhs = procRef.arguments()[1].value().UnwrapExpr();
+    assert(lhs && rhs &&
+           "user defined assignment arguments must be expressions");
+    mlir::FuncOp func =
+        Fortran::lower::CallerInterface(procRef, converter).getFuncOp();
+    ael.lowerElementalUserAssignment(func, *lhs, *rhs);
+  }
+
+  void lowerElementalUserAssignment(mlir::FuncOp userAssignment,
+                                    const Fortran::lower::SomeExpr &lhs,
+                                    const Fortran::lower::SomeExpr &rhs) {
+    mlir::Location loc = getLoc();
+    PushSemantics(ConstituentSemantics::CustomCopyInCopyOut);
+    auto genArrayModify = genarr(lhs);
+    ccStoreToDest = [=](IterSpace iters) -> ExtValue {
+      auto modifiedArray = genArrayModify(iters);
+      auto arrayModify = mlir::dyn_cast_or_null<fir::ArrayModifyOp>(
+          fir::getBase(modifiedArray).getDefiningOp());
+      assert(arrayModify && "must be created by ArrayModifyOp");
+      fir::ExtendedValue lhs =
+          arrayModifyToExv(builder, loc, destination, arrayModify.getResult(0));
+      genScalarUserDefinedAssignmentCall(builder, loc, userAssignment, lhs,
+                                         iters.elementExv());
+      return modifiedArray;
+    };
+    determineShapeOfDest(lhs);
+    semant = ConstituentSemantics::RefTransparent;
+    auto exv = lowerArrayExpression(rhs);
+    if (explicitSpaceIsActive()) {
+      explicitSpace->finalizeContext();
+      builder.create<fir::ResultOp>(loc, fir::getBase(exv));
+    } else {
+      builder.create<fir::ArrayMergeStoreOp>(
+          loc, destination, fir::getBase(exv), destination.getMemref(),
+          destination.getSlice(), destination.getTypeparams());
+    }
+  }
+
+  /// Lower an elemental subroutine call with at least one array argument.
+  /// An elemental subroutine is an exception and does not have copy-in/copy-out
+  /// semantics. See 15.8.3.
+  /// Do NOT use this for user defined assignments.
+  static void
+  lowerElementalSubroutine(Fortran::lower::AbstractConverter &converter,
+                           Fortran::lower::SymMap &symMap,
+                           Fortran::lower::StatementContext &stmtCtx,
+                           const Fortran::lower::SomeExpr &call) {
+    ArrayExprLowering ael(converter, stmtCtx, symMap,
+                          ConstituentSemantics::RefTransparent);
+    ael.lowerElementalSubroutine(call);
+  }
+
+  // TODO: See the comment in genarr(const Fortran::lower::Parentheses<T>&).
+  // This is skipping generation of copy-in/copy-out code for analysis that is
+  // required when arguments are in parentheses.
+  void lowerElementalSubroutine(const Fortran::lower::SomeExpr &call) {
+    auto f = genarr(call);
+    llvm::SmallVector<mlir::Value> shape = genIterationShape();
+    auto [iterSpace, insPt] = genImplicitLoops(shape, /*innerArg=*/{});
+    f(iterSpace);
+    finalizeElementCtx();
+    builder.restoreInsertionPoint(insPt);
+  }
+
   template <typename A, typename B>
   ExtValue lowerScalarAssignment(const A &lhs, const B &rhs) {
     // 1) Lower the rhs expression with array_fetch op(s).
@@ -2710,6 +2923,61 @@ class ArrayExprLowering {
     return lexv;
   }
 
+  static ExtValue lowerScalarUserAssignment(
+      Fortran::lower::AbstractConverter &converter,
+      Fortran::lower::SymMap &symMap, Fortran::lower::StatementContext &stmtCtx,
+      Fortran::lower::ExplicitIterSpace &explicitIterSpace,
+      mlir::FuncOp userAssignmentFunction, const Fortran::lower::SomeExpr &lhs,
+      const Fortran::lower::SomeExpr &rhs) {
+    Fortran::lower::ImplicitIterSpace implicit;
+    ArrayExprLowering ael(converter, stmtCtx, symMap,
+                          ConstituentSemantics::RefTransparent,
+                          &explicitIterSpace, &implicit);
+    return ael.lowerScalarUserAssignment(userAssignmentFunction, lhs, rhs);
+  }
+
+  ExtValue lowerScalarUserAssignment(mlir::FuncOp userAssignment,
+                                     const Fortran::lower::SomeExpr &lhs,
+                                     const Fortran::lower::SomeExpr &rhs) {
+    mlir::Location loc = getLoc();
+    if (rhs.Rank() > 0)
+      TODO(loc, "user-defined elemental assigment from expression with rank");
+    // 1) Lower the rhs expression with array_fetch op(s).
+    IterationSpace iters;
+    iters.setElement(genarr(rhs)(iters));
+    fir::ExtendedValue elementalExv = iters.elementExv();
+    // 2) Lower the lhs expression to an array_modify.
+    semant = ConstituentSemantics::CustomCopyInCopyOut;
+    auto lexv = genarr(lhs)(iters);
+    bool isIllFormedLHS = false;
+    // 3) Insert the call
+    if (auto modifyOp = mlir::dyn_cast<fir::ArrayModifyOp>(
+            fir::getBase(lexv).getDefiningOp())) {
+      mlir::Value oldInnerArg = modifyOp.getSequence();
+      std::size_t offset = explicitSpace->argPosition(oldInnerArg);
+      explicitSpace->setInnerArg(offset, fir::getBase(lexv));
+      fir::ExtendedValue exv = arrayModifyToExv(
+          builder, loc, explicitSpace->getLhsLoad(0).getValue(),
+          modifyOp.getResult(0));
+      genScalarUserDefinedAssignmentCall(builder, loc, userAssignment, exv,
+                                         elementalExv);
+    } else {
+      // LHS is ill formed, it is a scalar with no references to FORALL
+      // subscripts, so there is actually no array assignment here. The user
+      // code is probably bad, but still insert user assignment call since it
+      // was not rejected by semantics (a warning was emitted).
+      isIllFormedLHS = true;
+      genScalarUserDefinedAssignmentCall(builder, getLoc(), userAssignment,
+                                         lexv, elementalExv);
+    }
+    // 4) Finalize the inner context.
+    explicitSpace->finalizeContext();
+    // 5). Thread the array value updated forward.
+    if (!isIllFormedLHS)
+      builder.create<fir::ResultOp>(getLoc(), fir::getBase(lexv));
+    return lexv;
+  }
+
   bool explicitSpaceIsActive() const {
     return explicitSpace && explicitSpace->isActive();
   }
@@ -3074,6 +3342,15 @@ class ArrayExprLowering {
     return ScalarExprLowering{getLoc(), converter, symMap, stmtCtx}.genval(x);
   }
 
+  /// Lower the expression, \p x, in a scalar context. If this is an explicit
+  /// space, the expression may be scalar and refer to an array. We want to
+  /// raise the array access to array operations in FIR to analyze potential
+  /// conflicts even when the result is a scalar element.
+  template <typename A>
+  ExtValue asScalarArray(const A &x) {
+    return explicitSpaceIsActive() ? genarr(x)(IterationSpace{}) : asScalar(x);
+  }
+
   /// Lower the expression in a scalar context to a memory reference.
   template <typename A>
   ExtValue asScalarRef(const A &x) {
@@ -3339,11 +3616,41 @@ class ArrayExprLowering {
     return genScalarAndForwardValue(x);
   }
 
+  // Converting a value of memory bound type requires creating a temp and
+  // copying the value.
+  static ExtValue convertAdjustedType(fir::FirOpBuilder &builder,
+                                      mlir::Location loc, mlir::Type toType,
+                                      const ExtValue &exv) {
+    return exv.match(
+        [&](const fir::CharBoxValue &cb) -> ExtValue {
+          mlir::Value len = cb.getLen();
+          auto mem =
+              builder.create<fir::AllocaOp>(loc, toType, mlir::ValueRange{len});
+          fir::CharBoxValue result(mem, len);
+          fir::factory::CharacterExprHelper{builder, loc}.createAssign(
+              ExtValue{result}, exv);
+          return result;
+        },
+        [&](const auto &) -> ExtValue {
+          fir::emitFatalError(loc, "convert on adjusted extended value");
+        });
+  }
   template <Fortran::common::TypeCategory TC1, int KIND,
             Fortran::common::TypeCategory TC2>
   CC genarr(const Fortran::evaluate::Convert<Fortran::evaluate::Type<TC1, KIND>,
                                              TC2> &x) {
-    TODO(getLoc(), "");
+    mlir::Location loc = getLoc();
+    auto lambda = genarr(x.left());
+    mlir::Type ty = converter.genType(TC1, KIND);
+    return [=](IterSpace iters) -> ExtValue {
+      auto exv = lambda(iters);
+      mlir::Value val = fir::getBase(exv);
+      auto valTy = val.getType();
+      if (elementTypeWasAdjusted(valTy) &&
+          !(fir::isa_ref_type(valTy) && fir::isa_integer(ty)))
+        return convertAdjustedType(builder, loc, ty, exv);
+      return builder.createConvert(loc, ty, val);
+    };
   }
 
   template <int KIND>
@@ -3504,6 +3811,292 @@ class ArrayExprLowering {
     return genarr(fir::ArrayBoxValue{addr, extents});
   }
 
+  //===--------------------------------------------------------------------===//
+  // A vector subscript expression may be wrapped with a cast to INTEGER*8.
+  // Get rid of it here so the vector can be loaded. Add it back when
+  // generating the elemental evaluation (inside the loop nest).
+
+  static Fortran::lower::SomeExpr
+  ignoreEvConvert(const Fortran::evaluate::Expr<Fortran::evaluate::Type<
+                      Fortran::common::TypeCategory::Integer, 8>> &x) {
+    return std::visit([&](const auto &v) { return ignoreEvConvert(v); }, x.u);
+  }
+  template <Fortran::common::TypeCategory FROM>
+  static Fortran::lower::SomeExpr ignoreEvConvert(
+      const Fortran::evaluate::Convert<
+          Fortran::evaluate::Type<Fortran::common::TypeCategory::Integer, 8>,
+          FROM> &x) {
+    return toEvExpr(x.left());
+  }
+  template <typename A>
+  static Fortran::lower::SomeExpr ignoreEvConvert(const A &x) {
+    return toEvExpr(x);
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Get the `Se::Symbol*` for the subscript expression, `x`. This symbol can
+  // be used to determine the lbound, ubound of the vector.
+
+  template <typename A>
+  static const Fortran::semantics::Symbol *
+  extractSubscriptSymbol(const Fortran::evaluate::Expr<A> &x) {
+    return std::visit([&](const auto &v) { return extractSubscriptSymbol(v); },
+                      x.u);
+  }
+  template <typename A>
+  static const Fortran::semantics::Symbol *
+  extractSubscriptSymbol(const Fortran::evaluate::Designator<A> &x) {
+    return Fortran::evaluate::UnwrapWholeSymbolDataRef(x);
+  }
+  template <typename A>
+  static const Fortran::semantics::Symbol *extractSubscriptSymbol(const A &x) {
+    return nullptr;
+  }
+
+  //===--------------------------------------------------------------------===//
+
+  /// Get the declared lower bound value of the array `x` in dimension `dim`.
+  /// The argument `one` must be an ssa-value for the constant 1.
+  mlir::Value getLBound(const ExtValue &x, unsigned dim, mlir::Value one) {
+    return fir::factory::readLowerBound(builder, getLoc(), x, dim, one);
+  }
+
+  /// Get the declared upper bound value of the array `x` in dimension `dim`.
+  /// The argument `one` must be an ssa-value for the constant 1.
+  mlir::Value getUBound(const ExtValue &x, unsigned dim, mlir::Value one) {
+    mlir::Location loc = getLoc();
+    mlir::Value lb = getLBound(x, dim, one);
+    mlir::Value extent = fir::factory::readExtent(builder, loc, x, dim);
+    auto add = builder.create<mlir::arith::AddIOp>(loc, lb, extent);
+    return builder.create<mlir::arith::SubIOp>(loc, add, one);
+  }
+
+  /// Return the extent of the boxed array `x` in dimesion `dim`.
+  mlir::Value getExtent(const ExtValue &x, unsigned dim) {
+    return fir::factory::readExtent(builder, getLoc(), x, dim);
+  }
+
+  template <typename A>
+  ExtValue genArrayBase(const A &base) {
+    ScalarExprLowering sel{getLoc(), converter, symMap, stmtCtx};
+    return base.IsSymbol() ? sel.gen(base.GetFirstSymbol())
+                           : sel.gen(base.GetComponent());
+  }
+
+  template <typename A>
+  bool hasEvArrayRef(const A &x) {
+    struct HasEvArrayRefHelper
+        : public Fortran::evaluate::AnyTraverse<HasEvArrayRefHelper> {
+      HasEvArrayRefHelper()
+          : Fortran::evaluate::AnyTraverse<HasEvArrayRefHelper>(*this) {}
+      using Fortran::evaluate::AnyTraverse<HasEvArrayRefHelper>::operator();
+      bool operator()(const Fortran::evaluate::ArrayRef &) const {
+        return true;
+      }
+    } helper;
+    return helper(x);
+  }
+
+  CC genVectorSubscriptArrayFetch(const Fortran::lower::SomeExpr &expr,
+                                  std::size_t dim) {
+    PushSemantics(ConstituentSemantics::RefTransparent);
+    auto saved = Fortran::common::ScopedSet(explicitSpace, nullptr);
+    llvm::SmallVector<mlir::Value> savedDestShape = destShape;
+    destShape.clear();
+    auto result = genarr(expr);
+    if (destShape.empty())
+      TODO(getLoc(), "expected vector to have an extent");
+    assert(destShape.size() == 1 && "vector has rank > 1");
+    if (destShape[0] != savedDestShape[dim]) {
+      // Not the same, so choose the smaller value.
+      mlir::Location loc = getLoc();
+      auto cmp = builder.create<mlir::arith::CmpIOp>(
+          loc, mlir::arith::CmpIPredicate::sgt, destShape[0],
+          savedDestShape[dim]);
+      auto sel = builder.create<mlir::arith::SelectOp>(
+          loc, cmp, savedDestShape[dim], destShape[0]);
+      savedDestShape[dim] = sel;
+      destShape = savedDestShape;
+    }
+    return result;
+  }
+
+  /// Generate an access by vector subscript using the index in the iteration
+  /// vector at `dim`.
+  mlir::Value genAccessByVector(mlir::Location loc, CC genArrFetch,
+                                IterSpace iters, std::size_t dim) {
+    IterationSpace vecIters(iters,
+                            llvm::ArrayRef<mlir::Value>{iters.iterValue(dim)});
+    fir::ExtendedValue fetch = genArrFetch(vecIters);
+    mlir::IndexType idxTy = builder.getIndexType();
+    return builder.createConvert(loc, idxTy, fir::getBase(fetch));
+  }
+
+  /// When we have an array reference, the expressions specified in each
+  /// dimension may be slice operations (e.g. `i:j:k`), vectors, or simple
+  /// (loop-invarianet) scalar expressions. This returns the base entity, the
+  /// resulting type, and a continuation to adjust the default iteration space.
+  void genSliceIndices(ComponentPath &cmptData, const ExtValue &arrayExv,
+                       const Fortran::evaluate::ArrayRef &x, bool atBase) {
+    mlir::Location loc = getLoc();
+    mlir::IndexType idxTy = builder.getIndexType();
+    mlir::Value one = builder.createIntegerConstant(loc, idxTy, 1);
+    llvm::SmallVector<mlir::Value> &trips = cmptData.trips;
+    LLVM_DEBUG(llvm::dbgs() << "array: " << arrayExv << '\n');
+    auto &pc = cmptData.pc;
+    const bool useTripsForSlice = !explicitSpaceIsActive();
+    const bool createDestShape = destShape.empty();
+    bool useSlice = false;
+    std::size_t shapeIndex = 0;
+    for (auto sub : llvm::enumerate(x.subscript())) {
+      const std::size_t subsIndex = sub.index();
+      std::visit(
+          Fortran::common::visitors{
+              [&](const Fortran::evaluate::Triplet &t) {
+                mlir::Value lowerBound;
+                if (auto optLo = t.lower())
+                  lowerBound = fir::getBase(asScalar(*optLo));
+                else
+                  lowerBound = getLBound(arrayExv, subsIndex, one);
+                lowerBound = builder.createConvert(loc, idxTy, lowerBound);
+                mlir::Value stride = fir::getBase(asScalar(t.stride()));
+                stride = builder.createConvert(loc, idxTy, stride);
+                if (useTripsForSlice || createDestShape) {
+                  // Generate a slice operation for the triplet. The first and
+                  // second position of the triplet may be omitted, and the
+                  // declared lbound and/or ubound expression values,
+                  // respectively, should be used instead.
+                  trips.push_back(lowerBound);
+                  mlir::Value upperBound;
+                  if (auto optUp = t.upper())
+                    upperBound = fir::getBase(asScalar(*optUp));
+                  else
+                    upperBound = getUBound(arrayExv, subsIndex, one);
+                  upperBound = builder.createConvert(loc, idxTy, upperBound);
+                  trips.push_back(upperBound);
+                  trips.push_back(stride);
+                  if (createDestShape) {
+                    auto extent = builder.genExtentFromTriplet(
+                        loc, lowerBound, upperBound, stride, idxTy);
+                    destShape.push_back(extent);
+                  }
+                  useSlice = true;
+                }
+                if (!useTripsForSlice) {
+                  auto currentPC = pc;
+                  pc = [=](IterSpace iters) {
+                    IterationSpace newIters = currentPC(iters);
+                    mlir::Value impliedIter = newIters.iterValue(subsIndex);
+                    // FIXME: must use the lower bound of this component.
+                    auto arrLowerBound =
+                        atBase ? getLBound(arrayExv, subsIndex, one) : one;
+                    auto initial = builder.create<mlir::arith::SubIOp>(
+                        loc, lowerBound, arrLowerBound);
+                    auto prod = builder.create<mlir::arith::MulIOp>(
+                        loc, impliedIter, stride);
+                    auto result =
+                        builder.create<mlir::arith::AddIOp>(loc, initial, prod);
+                    newIters.setIndexValue(subsIndex, result);
+                    return newIters;
+                  };
+                }
+                shapeIndex++;
+              },
+              [&](const Fortran::evaluate::IndirectSubscriptIntegerExpr &ie) {
+                const auto &e = ie.value(); // dereference
+                if (isArray(e)) {
+                  // This is a vector subscript. Use the index values as read
+                  // from a vector to determine the temporary array value.
+                  // Note: 9.5.3.3.3(3) specifies undefined behavior for
+                  // multiple updates to any specific array element through a
+                  // vector subscript with replicated values.
+                  assert(!isBoxValue() &&
+                         "fir.box cannot be created with vector subscripts");
+                  auto arrExpr = ignoreEvConvert(e);
+                  if (createDestShape) {
+                    destShape.push_back(fir::getExtentAtDimension(
+                        arrayExv, builder, loc, subsIndex));
+                  }
+                  auto genArrFetch =
+                      genVectorSubscriptArrayFetch(arrExpr, shapeIndex);
+                  auto currentPC = pc;
+                  pc = [=](IterSpace iters) {
+                    IterationSpace newIters = currentPC(iters);
+                    auto val = genAccessByVector(loc, genArrFetch, newIters,
+                                                 subsIndex);
+                    // Value read from vector subscript array and normalized
+                    // using the base array's lower bound value.
+                    mlir::Value lb = fir::factory::readLowerBound(
+                        builder, loc, arrayExv, subsIndex, one);
+                    auto origin = builder.create<mlir::arith::SubIOp>(
+                        loc, idxTy, val, lb);
+                    newIters.setIndexValue(subsIndex, origin);
+                    return newIters;
+                  };
+                  if (useTripsForSlice) {
+                    LLVM_ATTRIBUTE_UNUSED auto vectorSubscriptShape =
+                        getShape(arrayOperands.back());
+                    auto undef = builder.create<fir::UndefOp>(loc, idxTy);
+                    trips.push_back(undef);
+                    trips.push_back(undef);
+                    trips.push_back(undef);
+                  }
+                  shapeIndex++;
+                } else {
+                  // This is a regular scalar subscript.
+                  if (useTripsForSlice) {
+                    // A regular scalar index, which does not yield an array
+                    // section. Use a degenerate slice operation
+                    // `(e:undef:undef)` in this dimension as a placeholder.
+                    // This does not necessarily change the rank of the original
+                    // array, so the iteration space must also be extended to
+                    // include this expression in this dimension to adjust to
+                    // the array's declared rank.
+                    mlir::Value v = fir::getBase(asScalar(e));
+                    trips.push_back(v);
+                    auto undef = builder.create<fir::UndefOp>(loc, idxTy);
+                    trips.push_back(undef);
+                    trips.push_back(undef);
+                    auto currentPC = pc;
+                    // Cast `e` to index type.
+                    mlir::Value iv = builder.createConvert(loc, idxTy, v);
+                    // Normalize `e` by subtracting the declared lbound.
+                    mlir::Value lb = fir::factory::readLowerBound(
+                        builder, loc, arrayExv, subsIndex, one);
+                    mlir::Value ivAdj =
+                        builder.create<mlir::arith::SubIOp>(loc, idxTy, iv, lb);
+                    // Add lbound adjusted value of `e` to the iteration vector
+                    // (except when creating a box because the iteration vector
+                    // is empty).
+                    if (!isBoxValue())
+                      pc = [=](IterSpace iters) {
+                        IterationSpace newIters = currentPC(iters);
+                        newIters.insertIndexValue(subsIndex, ivAdj);
+                        return newIters;
+                      };
+                  } else {
+                    auto currentPC = pc;
+                    mlir::Value newValue = fir::getBase(asScalarArray(e));
+                    mlir::Value result =
+                        builder.createConvert(loc, idxTy, newValue);
+                    mlir::Value lb = fir::factory::readLowerBound(
+                        builder, loc, arrayExv, subsIndex, one);
+                    result = builder.create<mlir::arith::SubIOp>(loc, idxTy,
+                                                                 result, lb);
+                    pc = [=](IterSpace iters) {
+                      IterationSpace newIters = currentPC(iters);
+                      newIters.insertIndexValue(subsIndex, result);
+                      return newIters;
+                    };
+                  }
+                }
+              }},
+          sub.value().u);
+    }
+    if (!useSlice)
+      trips.clear();
+  }
+
   CC genarr(const Fortran::semantics::SymbolRef &sym,
             ComponentPath &components) {
     return genarr(sym.get(), components);
@@ -4017,6 +4610,228 @@ class ArrayExprLowering {
         funRef, fir::unwrapSequenceType(converter.genType(toEvExpr(funRef))));
   }
 
+  //===-------------------------------------------------------------------===//
+  // Array data references in an explicit iteration space.
+  //
+  // Use the base array that was loaded before the loop nest.
+  //===-------------------------------------------------------------------===//
+
+  /// Lower the path (`revPath`, in reverse) to be appended to an array_fetch or
+  /// array_update op. \p ty is the initial type of the array
+  /// (reference). Returns the type of the element after application of the
+  /// path in \p components.
+  ///
+  /// TODO: This needs to deal with array's with initial bounds other than 1.
+  /// TODO: Thread type parameters correctly.
+  mlir::Type lowerPath(const ExtValue &arrayExv, ComponentPath &components) {
+    mlir::Location loc = getLoc();
+    mlir::Type ty = fir::getBase(arrayExv).getType();
+    auto &revPath = components.reversePath;
+    ty = fir::unwrapPassByRefType(ty);
+    bool prefix = true;
+    auto addComponent = [&](mlir::Value v) {
+      if (prefix)
+        components.prefixComponents.push_back(v);
+      else
+        components.suffixComponents.push_back(v);
+    };
+    mlir::IndexType idxTy = builder.getIndexType();
+    mlir::Value one = builder.createIntegerConstant(loc, idxTy, 1);
+    bool atBase = true;
+    auto saveSemant = semant;
+    if (isProjectedCopyInCopyOut())
+      semant = ConstituentSemantics::RefTransparent;
+    for (const auto &v : llvm::reverse(revPath)) {
+      std::visit(
+          Fortran::common::visitors{
+              [&](const ImplicitSubscripts &) {
+                prefix = false;
+                ty = fir::unwrapSequenceType(ty);
+              },
+              [&](const Fortran::evaluate::ComplexPart *x) {
+                assert(!prefix && "complex part must be at end");
+                mlir::Value offset = builder.createIntegerConstant(
+                    loc, builder.getI32Type(),
+                    x->part() == Fortran::evaluate::ComplexPart::Part::RE ? 0
+                                                                          : 1);
+                components.suffixComponents.push_back(offset);
+                ty = fir::applyPathToType(ty, mlir::ValueRange{offset});
+              },
+              [&](const Fortran::evaluate::ArrayRef *x) {
+                if (Fortran::lower::isRankedArrayAccess(*x)) {
+                  genSliceIndices(components, arrayExv, *x, atBase);
+                } else {
+                  // Array access where the expressions are scalar and cannot
+                  // depend upon the implied iteration space.
+                  unsigned ssIndex = 0u;
+                  for (const auto &ss : x->subscript()) {
+                    std::visit(
+                        Fortran::common::visitors{
+                            [&](const Fortran::evaluate::
+                                    IndirectSubscriptIntegerExpr &ie) {
+                              const auto &e = ie.value();
+                              if (isArray(e))
+                                fir::emitFatalError(
+                                    loc,
+                                    "multiple components along single path "
+                                    "generating array subexpressions");
+                              // Lower scalar index expression, append it to
+                              // subs.
+                              mlir::Value subscriptVal =
+                                  fir::getBase(asScalarArray(e));
+                              // arrayExv is the base array. It needs to reflect
+                              // the current array component instead.
+                              // FIXME: must use lower bound of this component,
+                              // not just the constant 1.
+                              mlir::Value lb =
+                                  atBase ? fir::factory::readLowerBound(
+                                               builder, loc, arrayExv, ssIndex,
+                                               one)
+                                         : one;
+                              mlir::Value val = builder.createConvert(
+                                  loc, idxTy, subscriptVal);
+                              mlir::Value ivAdj =
+                                  builder.create<mlir::arith::SubIOp>(
+                                      loc, idxTy, val, lb);
+                              addComponent(
+                                  builder.createConvert(loc, idxTy, ivAdj));
+                            },
+                            [&](const auto &) {
+                              fir::emitFatalError(
+                                  loc, "multiple components along single path "
+                                       "generating array subexpressions");
+                            }},
+                        ss.u);
+                    ssIndex++;
+                  }
+                }
+                ty = fir::unwrapSequenceType(ty);
+              },
+              [&](const Fortran::evaluate::Component *x) {
+                auto fieldTy = fir::FieldType::get(builder.getContext());
+                llvm::StringRef name = toStringRef(x->GetLastSymbol().name());
+                auto recTy = ty.cast<fir::RecordType>();
+                ty = recTy.getType(name);
+                auto fld = builder.create<fir::FieldIndexOp>(
+                    loc, fieldTy, name, recTy, fir::getTypeParams(arrayExv));
+                addComponent(fld);
+              }},
+          v);
+      atBase = false;
+    }
+    semant = saveSemant;
+    ty = fir::unwrapSequenceType(ty);
+    components.applied = true;
+    return ty;
+  }
+
+  llvm::SmallVector<mlir::Value> genSubstringBounds(ComponentPath &components) {
+    llvm::SmallVector<mlir::Value> result;
+    if (components.substring)
+      populateBounds(result, components.substring);
+    return result;
+  }
+
+  CC applyPathToArrayLoad(fir::ArrayLoadOp load, ComponentPath &components) {
+    mlir::Location loc = getLoc();
+    auto revPath = components.reversePath;
+    fir::ExtendedValue arrayExv =
+        arrayLoadExtValue(builder, loc, load, {}, load);
+    mlir::Type eleTy = lowerPath(arrayExv, components);
+    auto currentPC = components.pc;
+    auto pc = [=, prefix = components.prefixComponents,
+               suffix = components.suffixComponents](IterSpace iters) {
+      IterationSpace newIters = currentPC(iters);
+      // Add path prefix and suffix.
+      IterationSpace addIters(newIters, prefix, suffix);
+      return addIters;
+    };
+    components.pc = [=](IterSpace iters) { return iters; };
+    llvm::SmallVector<mlir::Value> substringBounds =
+        genSubstringBounds(components);
+    if (isProjectedCopyInCopyOut()) {
+      destination = load;
+      auto lambda = [=, esp = this->explicitSpace](IterSpace iters) mutable {
+        mlir::Value innerArg = esp->findArgumentOfLoad(load);
+        if (isAdjustedArrayElementType(eleTy)) {
+          mlir::Type eleRefTy = builder.getRefType(eleTy);
+          auto arrayOp = builder.create<fir::ArrayAccessOp>(
+              loc, eleRefTy, innerArg, iters.iterVec(), load.getTypeparams());
+          if (auto charTy = eleTy.dyn_cast<fir::CharacterType>()) {
+            mlir::Value dstLen = fir::factory::genLenOfCharacter(
+                builder, loc, load, iters.iterVec(), substringBounds);
+            fir::ArrayAmendOp amend = createCharArrayAmend(
+                loc, builder, arrayOp, dstLen, iters.elementExv(), innerArg,
+                substringBounds);
+            return arrayLoadExtValue(builder, loc, load, iters.iterVec(), amend,
+                                     dstLen);
+          } else if (fir::isa_derived(eleTy)) {
+            fir::ArrayAmendOp amend =
+                createDerivedArrayAmend(loc, load, builder, arrayOp,
+                                        iters.elementExv(), eleTy, innerArg);
+            return arrayLoadExtValue(builder, loc, load, iters.iterVec(),
+                                     amend);
+          }
+          assert(eleTy.isa<fir::SequenceType>());
+          TODO(loc, "array (as element) assignment");
+        }
+        mlir::Value castedElement =
+            builder.createConvert(loc, eleTy, iters.getElement());
+        auto update = builder.create<fir::ArrayUpdateOp>(
+            loc, innerArg.getType(), innerArg, castedElement, iters.iterVec(),
+            load.getTypeparams());
+        return arrayLoadExtValue(builder, loc, load, iters.iterVec(), update);
+      };
+      return [=](IterSpace iters) mutable { return lambda(pc(iters)); };
+    }
+    if (isCustomCopyInCopyOut()) {
+      // Create an array_modify to get the LHS element address and indicate
+      // the assignment, and create the call to the user defined assignment.
+      destination = load;
+      auto lambda = [=](IterSpace iters) mutable {
+        mlir::Value innerArg = explicitSpace->findArgumentOfLoad(load);
+        mlir::Type refEleTy =
+            fir::isa_ref_type(eleTy) ? eleTy : builder.getRefType(eleTy);
+        auto arrModify = builder.create<fir::ArrayModifyOp>(
+            loc, mlir::TypeRange{refEleTy, innerArg.getType()}, innerArg,
+            iters.iterVec(), load.getTypeparams());
+        return arrayLoadExtValue(builder, loc, load, iters.iterVec(),
+                                 arrModify.getResult(1));
+      };
+      return [=](IterSpace iters) mutable { return lambda(pc(iters)); };
+    }
+    auto lambda = [=, semant = this->semant](IterSpace iters) mutable {
+      if (semant == ConstituentSemantics::RefOpaque ||
+          isAdjustedArrayElementType(eleTy)) {
+        mlir::Type resTy = builder.getRefType(eleTy);
+        // Use array element reference semantics.
+        auto access = builder.create<fir::ArrayAccessOp>(
+            loc, resTy, load, iters.iterVec(), load.getTypeparams());
+        mlir::Value newBase = access;
+        if (fir::isa_char(eleTy)) {
+          mlir::Value dstLen = fir::factory::genLenOfCharacter(
+              builder, loc, load, iters.iterVec(), substringBounds);
+          if (!substringBounds.empty()) {
+            fir::CharBoxValue charDst{access, dstLen};
+            fir::factory::CharacterExprHelper helper{builder, loc};
+            charDst = helper.createSubstring(charDst, substringBounds);
+            newBase = charDst.getAddr();
+          }
+          return arrayLoadExtValue(builder, loc, load, iters.iterVec(), newBase,
+                                   dstLen);
+        }
+        return arrayLoadExtValue(builder, loc, load, iters.iterVec(), newBase);
+      }
+      auto fetch = builder.create<fir::ArrayFetchOp>(
+          loc, eleTy, load, iters.iterVec(), load.getTypeparams());
+      return arrayLoadExtValue(builder, loc, load, iters.iterVec(), fetch);
+    };
+    return [=](IterSpace iters) mutable {
+      auto newIters = pc(iters);
+      return lambda(newIters);
+    };
+  }
+
   template <typename A>
   CC genImplicitArrayAccess(const A &x, ComponentPath &components) {
     components.reversePath.push_back(ImplicitSubscripts{});
@@ -4060,10 +4875,19 @@ class ArrayExprLowering {
 
   CC genarr(const Fortran::semantics::Symbol &x, ComponentPath &components) {
     if (explicitSpaceIsActive()) {
-      TODO(getLoc(), "genarr Symbol explicitSpace");
+      if (x.Rank() > 0)
+        components.reversePath.push_back(ImplicitSubscripts{});
+      if (fir::ArrayLoadOp load = explicitSpace->findBinding(&x))
+        return applyPathToArrayLoad(load, components);
     } else {
       return genImplicitArrayAccess(x, components);
     }
+    if (pathIsEmpty(components))
+      return genAsScalar(x);
+    mlir::Location loc = getLoc();
+    return [=](IterSpace) -> ExtValue {
+      fir::emitFatalError(loc, "reached symbol with path");
+    };
   }
 
   CC genarr(const Fortran::evaluate::Component &x, ComponentPath &components) {
@@ -4080,7 +4904,12 @@ class ArrayExprLowering {
   /// the array expression evaluation.
   CC genarr(const Fortran::evaluate::ArrayRef &x, ComponentPath &components) {
     if (explicitSpaceIsActive()) {
-      TODO(getLoc(), "genarr ArrayRef explicitSpace");
+      if (Fortran::lower::isRankedArrayAccess(x))
+        components.reversePath.push_back(ImplicitSubscripts{});
+      if (fir::ArrayLoadOp load = explicitSpace->findBinding(&x)) {
+        components.reversePath.push_back(&x);
+        return applyPathToArrayLoad(load, components);
+      }
     } else {
       if (Fortran::lower::isRankedArrayAccess(x)) {
         components.reversePath.push_back(&x);
@@ -4865,15 +5694,135 @@ fir::ExtendedValue Fortran::lower::createBoxValue(
 
 mlir::Value Fortran::lower::createSubroutineCall(
     AbstractConverter &converter, const evaluate::ProcedureRef &call,
-    SymMap &symMap, StatementContext &stmtCtx) {
+    ExplicitIterSpace &explicitIterSpace, ImplicitIterSpace &implicitIterSpace,
+    SymMap &symMap, StatementContext &stmtCtx, bool isUserDefAssignment) {
   mlir::Location loc = converter.getCurrentLocation();
 
+  if (isUserDefAssignment) {
+    assert(call.arguments().size() == 2);
+    const auto *lhs = call.arguments()[0].value().UnwrapExpr();
+    const auto *rhs = call.arguments()[1].value().UnwrapExpr();
+    assert(lhs && rhs &&
+           "user defined assignment arguments must be expressions");
+    if (call.IsElemental() && lhs->Rank() > 0) {
+      // Elemental user defined assignment has special requirements to deal with
+      // LHS/RHS overlaps. See 10.2.1.5 p2.
+      ArrayExprLowering::lowerElementalUserAssignment(
+          converter, symMap, stmtCtx, explicitIterSpace, implicitIterSpace,
+          call);
+    } else if (explicitIterSpace.isActive() && lhs->Rank() == 0) {
+      // Scalar defined assignment (elemental or not) in a FORALL context.
+      mlir::FuncOp func =
+          Fortran::lower::CallerInterface(call, converter).getFuncOp();
+      ArrayExprLowering::lowerScalarUserAssignment(
+          converter, symMap, stmtCtx, explicitIterSpace, func, *lhs, *rhs);
+    } else if (explicitIterSpace.isActive()) {
+      // TODO: need to array fetch/modify sub-arrays?
+      TODO(loc, "non elemental user defined array assignment inside FORALL");
+    } else {
+      if (!implicitIterSpace.empty())
+        fir::emitFatalError(
+            loc,
+            "C1032: user defined assignment inside WHERE must be elemental");
+      // Non elemental user defined assignment outside of FORALL and WHERE.
+      // FIXME: The non elemental user defined assignment case with array
+      // arguments must be take into account potential overlap. So far the front
+      // end does not add parentheses around the RHS argument in the call as it
+      // should according to 15.4.3.4.3 p2.
+      Fortran::lower::createSomeExtendedExpression(
+          loc, converter, toEvExpr(call), symMap, stmtCtx);
+    }
+    return {};
+  }
+
+  assert(implicitIterSpace.empty() && !explicitIterSpace.isActive() &&
+         "subroutine calls are not allowed inside WHERE and FORALL");
+
+  if (isElementalProcWithArrayArgs(call)) {
+    ArrayExprLowering::lowerElementalSubroutine(converter, symMap, stmtCtx,
+                                                toEvExpr(call));
+    return {};
+  }
   // Simple subroutine call, with potential alternate return.
   auto res = Fortran::lower::createSomeExtendedExpression(
       loc, converter, toEvExpr(call), symMap, stmtCtx);
   return fir::getBase(res);
 }
 
+template <typename A>
+fir::ArrayLoadOp genArrayLoad(mlir::Location loc,
+                              Fortran::lower::AbstractConverter &converter,
+                              fir::FirOpBuilder &builder, const A *x,
+                              Fortran::lower::SymMap &symMap,
+                              Fortran::lower::StatementContext &stmtCtx) {
+  auto exv = ScalarExprLowering{loc, converter, symMap, stmtCtx}.gen(*x);
+  mlir::Value addr = fir::getBase(exv);
+  mlir::Value shapeOp = builder.createShape(loc, exv);
+  mlir::Type arrTy = fir::dyn_cast_ptrOrBoxEleTy(addr.getType());
+  return builder.create<fir::ArrayLoadOp>(loc, arrTy, addr, shapeOp,
+                                          /*slice=*/mlir::Value{},
+                                          fir::getTypeParams(exv));
+}
+template <>
+fir::ArrayLoadOp
+genArrayLoad(mlir::Location loc, Fortran::lower::AbstractConverter &converter,
+             fir::FirOpBuilder &builder, const Fortran::evaluate::ArrayRef *x,
+             Fortran::lower::SymMap &symMap,
+             Fortran::lower::StatementContext &stmtCtx) {
+  if (x->base().IsSymbol())
+    return genArrayLoad(loc, converter, builder, &x->base().GetLastSymbol(),
+                        symMap, stmtCtx);
+  return genArrayLoad(loc, converter, builder, &x->base().GetComponent(),
+                      symMap, stmtCtx);
+}
+
+void Fortran::lower::createArrayLoads(
+    Fortran::lower::AbstractConverter &converter,
+    Fortran::lower::ExplicitIterSpace &esp, Fortran::lower::SymMap &symMap) {
+  std::size_t counter = esp.getCounter();
+  fir::FirOpBuilder &builder = converter.getFirOpBuilder();
+  mlir::Location loc = converter.getCurrentLocation();
+  Fortran::lower::StatementContext &stmtCtx = esp.stmtContext();
+  // Gen the fir.array_load ops.
+  auto genLoad = [&](const auto *x) -> fir::ArrayLoadOp {
+    return genArrayLoad(loc, converter, builder, x, symMap, stmtCtx);
+  };
+  if (esp.lhsBases[counter].hasValue()) {
+    auto &base = esp.lhsBases[counter].getValue();
+    auto load = std::visit(genLoad, base);
+    esp.initialArgs.push_back(load);
+    esp.resetInnerArgs();
+    esp.bindLoad(base, load);
+  }
+  for (const auto &base : esp.rhsBases[counter])
+    esp.bindLoad(base, std::visit(genLoad, base));
+}
+
+void Fortran::lower::createArrayMergeStores(
+    Fortran::lower::AbstractConverter &converter,
+    Fortran::lower::ExplicitIterSpace &esp) {
+  fir::FirOpBuilder &builder = converter.getFirOpBuilder();
+  mlir::Location loc = converter.getCurrentLocation();
+  builder.setInsertionPointAfter(esp.getOuterLoop());
+  // Gen the fir.array_merge_store ops for all LHS arrays.
+  for (auto i : llvm::enumerate(esp.getOuterLoop().getResults()))
+    if (llvm::Optional<fir::ArrayLoadOp> ldOpt = esp.getLhsLoad(i.index())) {
+      fir::ArrayLoadOp load = ldOpt.getValue();
+      builder.create<fir::ArrayMergeStoreOp>(loc, load, i.value(),
+                                             load.getMemref(), load.getSlice(),
+                                             load.getTypeparams());
+    }
+  if (esp.loopCleanup.hasValue()) {
+    esp.loopCleanup.getValue()(builder);
+    esp.loopCleanup = llvm::None;
+  }
+  esp.initialArgs.clear();
+  esp.innerArgs.clear();
+  esp.outerLoop = llvm::None;
+  esp.resetBindings();
+  esp.incrementCounter();
+}
+
 void Fortran::lower::createSomeArrayAssignment(
     Fortran::lower::AbstractConverter &converter,
     const Fortran::lower::SomeExpr &lhs, const Fortran::lower::SomeExpr &rhs,

diff  --git a/flang/lib/Lower/IntrinsicCall.cpp b/flang/lib/Lower/IntrinsicCall.cpp
index b4ed072a73b80..3d99fcafd1169 100644
--- a/flang/lib/Lower/IntrinsicCall.cpp
+++ b/flang/lib/Lower/IntrinsicCall.cpp
@@ -23,6 +23,7 @@
 #include "flang/Optimizer/Builder/Complex.h"
 #include "flang/Optimizer/Builder/FIRBuilder.h"
 #include "flang/Optimizer/Builder/MutableBox.h"
+#include "flang/Optimizer/Builder/Runtime/Inquiry.h"
 #include "flang/Optimizer/Builder/Runtime/RTBuilder.h"
 #include "flang/Optimizer/Builder/Runtime/Reduction.h"
 #include "flang/Optimizer/Support/FatalError.h"
@@ -98,6 +99,9 @@ fir::ExtendedValue Fortran::lower::getAbsentIntrinsicArgument() {
 static bool isAbsent(const fir::ExtendedValue &exv) {
   return !fir::getBase(exv);
 }
+static bool isAbsent(llvm::ArrayRef<fir::ExtendedValue> args, size_t argIndex) {
+  return args.size() <= argIndex || isAbsent(args[argIndex]);
+}
 
 /// Process calls to Maxval, Minval, Product, Sum intrinsic functions that
 /// take a DIM argument.
@@ -233,10 +237,13 @@ struct IntrinsicLibrary {
   /// Lowering for the IAND intrinsic. The IAND intrinsic expects two arguments
   /// in the llvm::ArrayRef.
   mlir::Value genIand(mlir::Type, llvm::ArrayRef<mlir::Value>);
+  fir::ExtendedValue genLbound(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
+  fir::ExtendedValue genSize(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
   fir::ExtendedValue genSum(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
+  fir::ExtendedValue genUbound(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
+
   /// Define the 
diff erent FIR generators that can be mapped to intrinsic to
-  /// generate the related code. The intrinsic is lowered into an MLIR
-  /// arith::AndIOp.
+  /// generate the related code.
   using ElementalGenerator = decltype(&IntrinsicLibrary::genAbs);
   using ExtendedGenerator = decltype(&IntrinsicLibrary::genSum);
   using Generator = std::variant<ElementalGenerator, ExtendedGenerator>;
@@ -268,6 +275,13 @@ struct IntrinsicLibrary {
                               mlir::Type resultType,
                               llvm::ArrayRef<mlir::Value> args);
 
+  /// Add clean-up for \p temp to the current statement context;
+  void addCleanUpForTemp(mlir::Location loc, mlir::Value temp);
+  /// Helper function for generating code clean-up for result descriptors
+  fir::ExtendedValue readAndAddCleanUp(fir::MutableBoxValue resultMutableBox,
+                                       mlir::Type resultType,
+                                       llvm::StringRef errMsg);
+
   fir::FirOpBuilder &builder;
   mlir::Location loc;
   Fortran::lower::StatementContext *stmtCtx;
@@ -320,6 +334,10 @@ static constexpr IntrinsicHandler handlers[]{
        {"dim", asValue},
        {"mask", asBox, handleDynamicOptional}}},
      /*isElemental=*/false},
+    {"ubound",
+     &I::genUbound,
+     {{{"array", asBox}, {"dim", asValue}, {"kind", asValue}}},
+     /*isElemental=*/false},
 };
 
 static const IntrinsicHandler *findIntrinsicHandler(llvm::StringRef name) {
@@ -940,6 +958,52 @@ IntrinsicLibrary::getRuntimeCallGenerator(llvm::StringRef name,
     return builder.createConvert(loc, soughtType, call.getResult(0));
   };
 }
+
+void IntrinsicLibrary::addCleanUpForTemp(mlir::Location loc, mlir::Value temp) {
+  assert(stmtCtx);
+  fir::FirOpBuilder *bldr = &builder;
+  stmtCtx->attachCleanup([=]() { bldr->create<fir::FreeMemOp>(loc, temp); });
+}
+
+fir::ExtendedValue
+IntrinsicLibrary::readAndAddCleanUp(fir::MutableBoxValue resultMutableBox,
+                                    mlir::Type resultType,
+                                    llvm::StringRef intrinsicName) {
+  fir::ExtendedValue res =
+      fir::factory::genMutableBoxRead(builder, loc, resultMutableBox);
+  return res.match(
+      [&](const fir::ArrayBoxValue &box) -> fir::ExtendedValue {
+        // Add cleanup code
+        addCleanUpForTemp(loc, box.getAddr());
+        return box;
+      },
+      [&](const fir::BoxValue &box) -> fir::ExtendedValue {
+        // Add cleanup code
+        auto addr =
+            builder.create<fir::BoxAddrOp>(loc, box.getMemTy(), box.getAddr());
+        addCleanUpForTemp(loc, addr);
+        return box;
+      },
+      [&](const fir::CharArrayBoxValue &box) -> fir::ExtendedValue {
+        // Add cleanup code
+        addCleanUpForTemp(loc, box.getAddr());
+        return box;
+      },
+      [&](const mlir::Value &tempAddr) -> fir::ExtendedValue {
+        // Add cleanup code
+        addCleanUpForTemp(loc, tempAddr);
+        return builder.create<fir::LoadOp>(loc, resultType, tempAddr);
+      },
+      [&](const fir::CharBoxValue &box) -> fir::ExtendedValue {
+        // Add cleanup code
+        addCleanUpForTemp(loc, box.getAddr());
+        return box;
+      },
+      [&](const auto &) -> fir::ExtendedValue {
+        fir::emitFatalError(loc, "unexpected result for " + intrinsicName);
+      });
+}
+
 //===----------------------------------------------------------------------===//
 // Code generators for the intrinsic
 //===----------------------------------------------------------------------===//
@@ -1071,6 +1135,128 @@ IntrinsicLibrary::genSum(mlir::Type resultType,
                       builder, loc, stmtCtx, "unexpected result for Sum", args);
 }
 
+// SIZE
+fir::ExtendedValue
+IntrinsicLibrary::genSize(mlir::Type resultType,
+                          llvm::ArrayRef<fir::ExtendedValue> args) {
+  // Note that the value of the KIND argument is already reflected in the
+  // resultType
+  assert(args.size() == 3);
+  if (const auto *boxValue = args[0].getBoxOf<fir::BoxValue>())
+    if (boxValue->hasAssumedRank())
+      TODO(loc, "SIZE intrinsic with assumed rank argument");
+
+  // Get the ARRAY argument
+  mlir::Value array = builder.createBox(loc, args[0]);
+
+  // The front-end rewrites SIZE without the DIM argument to
+  // an array of SIZE with DIM in most cases, but it may not be
+  // possible in some cases like when in SIZE(function_call()).
+  if (isAbsent(args, 1))
+    return builder.createConvert(loc, resultType,
+                                 fir::runtime::genSize(builder, loc, array));
+
+  // Get the DIM argument.
+  mlir::Value dim = fir::getBase(args[1]);
+  if (!fir::isa_ref_type(dim.getType()))
+    return builder.createConvert(
+        loc, resultType, fir::runtime::genSizeDim(builder, loc, array, dim));
+
+  mlir::Value isDynamicallyAbsent = builder.genIsNull(loc, dim);
+  return builder
+      .genIfOp(loc, {resultType}, isDynamicallyAbsent,
+               /*withElseRegion=*/true)
+      .genThen([&]() {
+        mlir::Value size = builder.createConvert(
+            loc, resultType, fir::runtime::genSize(builder, loc, array));
+        builder.create<fir::ResultOp>(loc, size);
+      })
+      .genElse([&]() {
+        mlir::Value dimValue = builder.create<fir::LoadOp>(loc, dim);
+        mlir::Value size = builder.createConvert(
+            loc, resultType,
+            fir::runtime::genSizeDim(builder, loc, array, dimValue));
+        builder.create<fir::ResultOp>(loc, size);
+      })
+      .getResults()[0];
+}
+
+// LBOUND
+fir::ExtendedValue
+IntrinsicLibrary::genLbound(mlir::Type resultType,
+                            llvm::ArrayRef<fir::ExtendedValue> args) {
+  // Calls to LBOUND that don't have the DIM argument, or for which
+  // the DIM is a compile time constant, are folded to descriptor inquiries by
+  // semantics.  This function covers the situations where a call to the
+  // runtime is required.
+  assert(args.size() == 3);
+  assert(!isAbsent(args[1]));
+  if (const auto *boxValue = args[0].getBoxOf<fir::BoxValue>())
+    if (boxValue->hasAssumedRank())
+      TODO(loc, "LBOUND intrinsic with assumed rank argument");
+
+  const fir::ExtendedValue &array = args[0];
+  mlir::Value box = array.match(
+      [&](const fir::BoxValue &boxValue) -> mlir::Value {
+        // This entity is mapped to a fir.box that may not contain the local
+        // lower bound information if it is a dummy. Rebox it with the local
+        // shape information.
+        mlir::Value localShape = builder.createShape(loc, array);
+        mlir::Value oldBox = boxValue.getAddr();
+        return builder.create<fir::ReboxOp>(
+            loc, oldBox.getType(), oldBox, localShape, /*slice=*/mlir::Value{});
+      },
+      [&](const auto &) -> mlir::Value {
+        // This a pointer/allocatable, or an entity not yet tracked with a
+        // fir.box. For pointer/allocatable, createBox will forward the
+        // descriptor that contains the correct lower bound information. For
+        // other entities, a new fir.box will be made with the local lower
+        // bounds.
+        return builder.createBox(loc, array);
+      });
+
+  mlir::Value dim = fir::getBase(args[1]);
+  return builder.createConvert(
+      loc, resultType,
+      fir::runtime::genLboundDim(builder, loc, fir::getBase(box), dim));
+}
+
+// UBOUND
+fir::ExtendedValue
+IntrinsicLibrary::genUbound(mlir::Type resultType,
+                            llvm::ArrayRef<fir::ExtendedValue> args) {
+  assert(args.size() == 3 || args.size() == 2);
+  if (args.size() == 3) {
+    // Handle calls to UBOUND with the DIM argument, which return a scalar
+    mlir::Value extent = fir::getBase(genSize(resultType, args));
+    mlir::Value lbound = fir::getBase(genLbound(resultType, args));
+
+    mlir::Value one = builder.createIntegerConstant(loc, resultType, 1);
+    mlir::Value ubound = builder.create<mlir::arith::SubIOp>(loc, lbound, one);
+    return builder.create<mlir::arith::AddIOp>(loc, ubound, extent);
+  } else {
+    // Handle calls to UBOUND without the DIM argument, which return an array
+    mlir::Value kind = isAbsent(args[1])
+                           ? builder.createIntegerConstant(
+                                 loc, builder.getIndexType(),
+                                 builder.getKindMap().defaultIntegerKind())
+                           : fir::getBase(args[1]);
+
+    // Create mutable fir.box to be passed to the runtime for the result.
+    mlir::Type type = builder.getVarLenSeqTy(resultType, /*rank=*/1);
+    fir::MutableBoxValue resultMutableBox =
+        fir::factory::createTempMutableBox(builder, loc, type);
+    mlir::Value resultIrBox =
+        fir::factory::getMutableIRBox(builder, loc, resultMutableBox);
+
+    fir::runtime::genUbound(builder, loc, resultIrBox, fir::getBase(args[0]),
+                            kind);
+
+    return readAndAddCleanUp(resultMutableBox, resultType, "UBOUND");
+  }
+  return mlir::Value();
+}
+
 //===----------------------------------------------------------------------===//
 // Argument lowering rules interface
 //===----------------------------------------------------------------------===//

diff  --git a/flang/lib/Optimizer/Builder/BoxValue.cpp b/flang/lib/Optimizer/Builder/BoxValue.cpp
index 367950741d44c..8cb9fbd61c738 100644
--- a/flang/lib/Optimizer/Builder/BoxValue.cpp
+++ b/flang/lib/Optimizer/Builder/BoxValue.cpp
@@ -11,6 +11,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "flang/Optimizer/Builder/BoxValue.h"
+#include "flang/Optimizer/Builder/FIRBuilder.h"
 #include "mlir/IR/BuiltinTypes.h"
 #include "llvm/Support/Debug.h"
 
@@ -224,3 +225,14 @@ bool fir::BoxValue::verify() const {
     return false;
   return true;
 }
+
+/// Get exactly one extent for any array-like extended value, \p exv. If \p exv
+/// is not an array or has rank less then \p dim, the result will be a nullptr.
+mlir::Value fir::getExtentAtDimension(const fir::ExtendedValue &exv,
+                                      fir::FirOpBuilder &builder,
+                                      mlir::Location loc, unsigned dim) {
+  auto extents = fir::factory::getExtents(builder, loc, exv);
+  if (dim < extents.size())
+    return extents[dim];
+  return {};
+}

diff  --git a/flang/lib/Optimizer/Builder/CMakeLists.txt b/flang/lib/Optimizer/Builder/CMakeLists.txt
index d783961b822de..779256bfd1c6d 100644
--- a/flang/lib/Optimizer/Builder/CMakeLists.txt
+++ b/flang/lib/Optimizer/Builder/CMakeLists.txt
@@ -12,6 +12,7 @@ add_flang_library(FIRBuilder
   Runtime/Character.cpp
   Runtime/Command.cpp
   Runtime/Derived.cpp
+  Runtime/Inquiry.cpp
   Runtime/Numeric.cpp
   Runtime/Ragged.cpp
   Runtime/Reduction.cpp

diff  --git a/flang/lib/Optimizer/Builder/Runtime/Inquiry.cpp b/flang/lib/Optimizer/Builder/Runtime/Inquiry.cpp
new file mode 100644
index 0000000000000..6c20919e0ab18
--- /dev/null
+++ b/flang/lib/Optimizer/Builder/Runtime/Inquiry.cpp
@@ -0,0 +1,77 @@
+//===-- Inquiry.h - generate inquiry runtime API calls ----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "flang/Optimizer/Builder/Runtime/Inquiry.h"
+#include "flang/Optimizer/Builder/FIRBuilder.h"
+#include "flang/Optimizer/Builder/Runtime/RTBuilder.h"
+#include "flang/Runtime/inquiry.h"
+
+using namespace Fortran::runtime;
+
+/// Generate call to `Lbound` runtime routine when the DIM argument is present.
+mlir::Value fir::runtime::genLboundDim(fir::FirOpBuilder &builder,
+                                       mlir::Location loc, mlir::Value array,
+                                       mlir::Value dim) {
+  mlir::FuncOp lboundFunc =
+      fir::runtime::getRuntimeFunc<mkRTKey(LboundDim)>(loc, builder);
+  auto fTy = lboundFunc.getType();
+  auto sourceFile = fir::factory::locationToFilename(builder, loc);
+  auto sourceLine =
+      fir::factory::locationToLineNo(builder, loc, fTy.getInput(3));
+  auto args = fir::runtime::createArguments(builder, loc, fTy, array, dim,
+                                            sourceFile, sourceLine);
+  return builder.create<fir::CallOp>(loc, lboundFunc, args).getResult(0);
+}
+
+/// Generate call to `Ubound` runtime routine.  Calls to UBOUND with a DIM
+/// argument get transformed into an expression equivalent to
+/// SIZE() + LBOUND() - 1, so they don't have an intrinsic in the runtime.
+void fir::runtime::genUbound(fir::FirOpBuilder &builder, mlir::Location loc,
+                             mlir::Value resultBox, mlir::Value array,
+                             mlir::Value kind) {
+  mlir::FuncOp uboundFunc =
+      fir::runtime::getRuntimeFunc<mkRTKey(Ubound)>(loc, builder);
+  auto fTy = uboundFunc.getType();
+  auto sourceFile = fir::factory::locationToFilename(builder, loc);
+  auto sourceLine =
+      fir::factory::locationToLineNo(builder, loc, fTy.getInput(2));
+  auto args = fir::runtime::createArguments(builder, loc, fTy, resultBox, array,
+                                            kind, sourceFile, sourceLine);
+  builder.create<fir::CallOp>(loc, uboundFunc, args).getResult(0);
+}
+
+/// Generate call to `Size` runtime routine. This routine is a version when
+/// the DIM argument is present.
+mlir::Value fir::runtime::genSizeDim(fir::FirOpBuilder &builder,
+                                     mlir::Location loc, mlir::Value array,
+                                     mlir::Value dim) {
+  mlir::FuncOp sizeFunc =
+      fir::runtime::getRuntimeFunc<mkRTKey(SizeDim)>(loc, builder);
+  auto fTy = sizeFunc.getType();
+  auto sourceFile = fir::factory::locationToFilename(builder, loc);
+  auto sourceLine =
+      fir::factory::locationToLineNo(builder, loc, fTy.getInput(3));
+  auto args = fir::runtime::createArguments(builder, loc, fTy, array, dim,
+                                            sourceFile, sourceLine);
+  return builder.create<fir::CallOp>(loc, sizeFunc, args).getResult(0);
+}
+
+/// Generate call to `Size` runtime routine. This routine is a version when
+/// the DIM argument is absent.
+mlir::Value fir::runtime::genSize(fir::FirOpBuilder &builder,
+                                  mlir::Location loc, mlir::Value array) {
+  mlir::FuncOp sizeFunc =
+      fir::runtime::getRuntimeFunc<mkRTKey(Size)>(loc, builder);
+  auto fTy = sizeFunc.getType();
+  auto sourceFile = fir::factory::locationToFilename(builder, loc);
+  auto sourceLine =
+      fir::factory::locationToLineNo(builder, loc, fTy.getInput(2));
+  auto args = fir::runtime::createArguments(builder, loc, fTy, array,
+                                            sourceFile, sourceLine);
+  return builder.create<fir::CallOp>(loc, sizeFunc, args).getResult(0);
+}

diff  --git a/flang/test/Lower/forall/forall-construct.f90 b/flang/test/Lower/forall/forall-construct.f90
new file mode 100644
index 0000000000000..0bd463090854f
--- /dev/null
+++ b/flang/test/Lower/forall/forall-construct.f90
@@ -0,0 +1,98 @@
+! Test forall lowering
+
+! RUN: bbc -emit-fir %s -o - | FileCheck %s
+
+!*** Test a FORALL construct
+subroutine test_forall_construct(a,b)
+    integer :: i, j
+    real :: a(:,:), b(:,:)
+    forall (i=1:ubound(a,1), j=1:ubound(a,2), b(j,i) > 0.0)
+       a(i,j) = b(j,i) / 3.14
+    end forall
+  end subroutine test_forall_construct
+  
+  ! CHECK-LABEL: func @_QPtest_forall_construct(
+  ! CHECK-SAME:     %[[VAL_0:.*]]: !fir.box<!fir.array<?x?xf32>>{{.*}}, %[[VAL_1:.*]]: !fir.box<!fir.array<?x?xf32>>{{.*}}) {
+  ! CHECK:         %[[VAL_2:.*]] = fir.alloca i32 {adapt.valuebyref, bindc_name = "j"}
+  ! CHECK:         %[[VAL_3:.*]] = fir.alloca i32 {adapt.valuebyref, bindc_name = "i"}
+  ! CHECK:         %[[VAL_4:.*]] = arith.constant 1 : i32
+  ! CHECK:         %[[VAL_5:.*]] = fir.convert %[[VAL_4]] : (i32) -> index
+  ! CHECK:         %[[VAL_6:.*]] = arith.constant 0 : index
+  ! CHECK:         %[[VAL_7:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_6]] : (!fir.box<!fir.array<?x?xf32>>, index) -> (index, index, index)
+  ! CHECK:         %[[VAL_8:.*]] = fir.convert %[[VAL_7]]#1 : (index) -> i64
+  ! CHECK:         %[[VAL_9:.*]] = arith.constant 1 : index
+  ! CHECK:         %[[VAL_10:.*]] = fir.convert %[[VAL_9]] : (index) -> i64
+  ! CHECK:         %[[VAL_11:.*]] = arith.addi %[[VAL_8]], %[[VAL_10]] : i64
+  ! CHECK:         %[[VAL_12:.*]] = arith.constant 1 : i64
+  ! CHECK:         %[[VAL_13:.*]] = arith.subi %[[VAL_11]], %[[VAL_12]] : i64
+  ! CHECK:         %[[VAL_14:.*]] = fir.convert %[[VAL_13]] : (i64) -> i32
+  ! CHECK:         %[[VAL_15:.*]] = fir.convert %[[VAL_14]] : (i32) -> index
+  ! CHECK:         %[[VAL_16:.*]] = arith.constant 1 : index
+  ! CHECK:         %[[VAL_17:.*]] = arith.constant 1 : i32
+  ! CHECK:         %[[VAL_18:.*]] = fir.convert %[[VAL_17]] : (i32) -> index
+  ! CHECK:         %[[VAL_19:.*]] = arith.constant 1 : index
+  ! CHECK:         %[[VAL_20:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_19]] : (!fir.box<!fir.array<?x?xf32>>, index) -> (index, index, index)
+  ! CHECK:         %[[VAL_21:.*]] = fir.convert %[[VAL_20]]#1 : (index) -> i64
+  ! CHECK:         %[[VAL_22:.*]] = arith.constant 1 : index
+  ! CHECK:         %[[VAL_23:.*]] = fir.convert %[[VAL_22]] : (index) -> i64
+  ! CHECK:         %[[VAL_24:.*]] = arith.addi %[[VAL_21]], %[[VAL_23]] : i64
+  ! CHECK:         %[[VAL_25:.*]] = arith.constant 1 : i64
+  ! CHECK:         %[[VAL_26:.*]] = arith.subi %[[VAL_24]], %[[VAL_25]] : i64
+  ! CHECK:         %[[VAL_27:.*]] = fir.convert %[[VAL_26]] : (i64) -> i32
+  ! CHECK:         %[[VAL_28:.*]] = fir.convert %[[VAL_27]] : (i32) -> index
+  ! CHECK:         %[[VAL_29:.*]] = arith.constant 1 : index
+  ! CHECK:         %[[VAL_30:.*]] = fir.array_load %[[VAL_0]] : (!fir.box<!fir.array<?x?xf32>>) -> !fir.array<?x?xf32>
+  ! CHECK:         %[[VAL_31:.*]] = fir.array_load %[[VAL_1]] : (!fir.box<!fir.array<?x?xf32>>) -> !fir.array<?x?xf32>
+  ! CHECK:         %[[VAL_32:.*]] = fir.do_loop %[[VAL_33:.*]] = %[[VAL_5]] to %[[VAL_15]] step %[[VAL_16]] unordered iter_args(%[[VAL_34:.*]] = %[[VAL_30]]) -> (!fir.array<?x?xf32>) {
+  ! CHECK:           %[[VAL_35:.*]] = fir.convert %[[VAL_33]] : (index) -> i32
+  ! CHECK:           fir.store %[[VAL_35]] to %[[VAL_3]] : !fir.ref<i32>
+  ! CHECK:           %[[VAL_36:.*]] = fir.do_loop %[[VAL_37:.*]] = %[[VAL_18]] to %[[VAL_28]] step %[[VAL_29]] unordered iter_args(%[[VAL_38:.*]] = %[[VAL_34]]) -> (!fir.array<?x?xf32>) {
+  ! CHECK:             %[[VAL_39:.*]] = fir.convert %[[VAL_37]] : (index) -> i32
+  ! CHECK:             fir.store %[[VAL_39]] to %[[VAL_2]] : !fir.ref<i32>
+  ! CHECK:             %[[VAL_40:.*]] = fir.load %[[VAL_2]] : !fir.ref<i32>
+  ! CHECK:             %[[VAL_41:.*]] = fir.convert %[[VAL_40]] : (i32) -> i64
+  ! CHECK:             %[[VAL_42:.*]] = arith.constant 1 : i64
+  ! CHECK:             %[[VAL_43:.*]] = arith.subi %[[VAL_41]], %[[VAL_42]] : i64
+  ! CHECK:             %[[VAL_44:.*]] = fir.load %[[VAL_3]] : !fir.ref<i32>
+  ! CHECK:             %[[VAL_45:.*]] = fir.convert %[[VAL_44]] : (i32) -> i64
+  ! CHECK:             %[[VAL_46:.*]] = arith.constant 1 : i64
+  ! CHECK:             %[[VAL_47:.*]] = arith.subi %[[VAL_45]], %[[VAL_46]] : i64
+  ! CHECK:             %[[VAL_48:.*]] = fir.coordinate_of %[[VAL_1]], %[[VAL_43]], %[[VAL_47]] : (!fir.box<!fir.array<?x?xf32>>, i64, i64) -> !fir.ref<f32>
+  ! CHECK:             %[[VAL_49:.*]] = fir.load %[[VAL_48]] : !fir.ref<f32>
+  ! CHECK:             %[[VAL_50:.*]] = arith.constant 0.000000e+00 : f32
+  ! CHECK:             %[[VAL_51:.*]] = arith.cmpf ogt, %[[VAL_49]], %[[VAL_50]] : f32
+  ! CHECK:             %[[VAL_52:.*]] = fir.if %[[VAL_51]] -> (!fir.array<?x?xf32>) {
+  ! CHECK:               %[[VAL_53:.*]] = arith.constant 1 : index
+  ! CHECK:               %[[VAL_54:.*]] = fir.load %[[VAL_2]] : !fir.ref<i32>
+  ! CHECK:               %[[VAL_55:.*]] = fir.convert %[[VAL_54]] : (i32) -> i64
+  ! CHECK:               %[[VAL_56:.*]] = fir.convert %[[VAL_55]] : (i64) -> index
+  ! CHECK:               %[[VAL_57:.*]] = arith.subi %[[VAL_56]], %[[VAL_53]] : index
+  ! CHECK:               %[[VAL_58:.*]] = fir.load %[[VAL_3]] : !fir.ref<i32>
+  ! CHECK:               %[[VAL_59:.*]] = fir.convert %[[VAL_58]] : (i32) -> i64
+  ! CHECK:               %[[VAL_60:.*]] = fir.convert %[[VAL_59]] : (i64) -> index
+  ! CHECK:               %[[VAL_61:.*]] = arith.subi %[[VAL_60]], %[[VAL_53]] : index
+  ! CHECK:               %[[VAL_62:.*]] = arith.constant 3.140000e+00 : f32
+  ! CHECK:               %[[VAL_63:.*]] = fir.array_fetch %[[VAL_31]], %[[VAL_57]], %[[VAL_61]] : (!fir.array<?x?xf32>, index, index) -> f32
+  ! CHECK:               %[[VAL_64:.*]] = arith.divf %[[VAL_63]], %[[VAL_62]] : f32
+  ! CHECK:               %[[VAL_65:.*]] = arith.constant 1 : index
+  ! CHECK:               %[[VAL_66:.*]] = fir.load %[[VAL_3]] : !fir.ref<i32>
+  ! CHECK:               %[[VAL_67:.*]] = fir.convert %[[VAL_66]] : (i32) -> i64
+  ! CHECK:               %[[VAL_68:.*]] = fir.convert %[[VAL_67]] : (i64) -> index
+  ! CHECK:               %[[VAL_69:.*]] = arith.subi %[[VAL_68]], %[[VAL_65]] : index
+  ! CHECK:               %[[VAL_70:.*]] = fir.load %[[VAL_2]] : !fir.ref<i32>
+  ! CHECK:               %[[VAL_71:.*]] = fir.convert %[[VAL_70]] : (i32) -> i64
+  ! CHECK:               %[[VAL_72:.*]] = fir.convert %[[VAL_71]] : (i64) -> index
+  ! CHECK:               %[[VAL_73:.*]] = arith.subi %[[VAL_72]], %[[VAL_65]] : index
+  ! CHECK:               %[[VAL_74:.*]] = fir.array_update %[[VAL_38]], %[[VAL_64]], %[[VAL_69]], %[[VAL_73]] : (!fir.array<?x?xf32>, f32, index, index) -> !fir.array<?x?xf32>
+  ! CHECK:               fir.result %[[VAL_74]] : !fir.array<?x?xf32>
+  ! CHECK:             } else {
+  ! CHECK:               fir.result %[[VAL_38]] : !fir.array<?x?xf32>
+  ! CHECK:             }
+  ! CHECK:             fir.result %[[VAL_75:.*]] : !fir.array<?x?xf32>
+  ! CHECK:           }
+  ! CHECK:           fir.result %[[VAL_76:.*]] : !fir.array<?x?xf32>
+  ! CHECK:         }
+  ! CHECK:         fir.array_merge_store %[[VAL_30]], %[[VAL_77:.*]] to %[[VAL_0]] : !fir.array<?x?xf32>, !fir.array<?x?xf32>, !fir.box<!fir.array<?x?xf32>>
+  ! CHECK:         return
+  ! CHECK:       }
+  
\ No newline at end of file


        


More information about the flang-commits mailing list