[flang-commits] [flang] c9e3840 - [flang][hlfir] Implement hlfir.forall codegen when no temp is required

Jean Perier via flang-commits flang-commits at lists.llvm.org
Fri May 19 06:56:40 PDT 2023


Author: Jean Perier
Date: 2023-05-19T15:55:40+02:00
New Revision: c9e3840c832d4c3fbbf57f0a8c90ae3d80af8913

URL: https://github.com/llvm/llvm-project/commit/c9e3840c832d4c3fbbf57f0a8c90ae3d80af8913
DIFF: https://github.com/llvm/llvm-project/commit/c9e3840c832d4c3fbbf57f0a8c90ae3d80af8913.diff

LOG: [flang][hlfir] Implement hlfir.forall codegen when no temp is required

The patch applies the schedule built with the utility added in the
previous D150455 patch to generate the code for an ordered assignment
tree. For now, it only supports forall that do not contain user defined
assignments or assignments to vector subscripted entities, and for
which the scheduling analysis does not require temporary storages.

Support for temporary, WHERE, and user-defined/vector subscript
assignment will be added in later patches.

This enables end-to-end support with HLFIR for forall where the schedule
analysis can prove there is no need to create temporary storage.

Differential Revision: https://reviews.llvm.org/D150564

Added: 
    flang/test/HLFIR/order_assignments/forall-codegen-fuse-assignments.fir
    flang/test/HLFIR/order_assignments/forall-codegen-no-conflict.fir

Modified: 
    flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp
    flang/test/HLFIR/ordered-assignments-codegen-todo.fir

Removed: 
    


################################################################################
diff  --git a/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp b/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp
index a0dbd46975cdc..38c4efdb835d8 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIROrderedAssignments.cpp
@@ -18,9 +18,13 @@
 //===----------------------------------------------------------------------===//
 
 #include "ScheduleOrderedAssignments.h"
+#include "flang/Optimizer/Builder/FIRBuilder.h"
 #include "flang/Optimizer/Builder/Todo.h"
+#include "flang/Optimizer/Dialect/Support/FIRContext.h"
 #include "flang/Optimizer/HLFIR/Passes.h"
+#include "mlir/IR/IRMapping.h"
 #include "mlir/Transforms/DialectConversion.h"
+#include "llvm/ADT/TypeSwitch.h"
 #include "llvm/Support/Debug.h"
 
 namespace hlfir {
@@ -38,12 +42,292 @@ static llvm::cl::opt<bool> dbgScheduleOnly(
     llvm::cl::desc("Only run ordered assignment scheduling with no codegen"),
     llvm::cl::init(false));
 
+namespace {
+/// Structure that visits an ordered assignment tree and generates code for
+/// it according to a schedule.
+class OrderedAssignmentRewriter {
+public:
+  OrderedAssignmentRewriter(fir::FirOpBuilder &builder,
+                            hlfir::OrderedAssignmentTreeOpInterface root)
+      : builder{builder}, root{root} {}
+
+  /// Generate code for the current run of the schedule.
+  void lowerRun(hlfir::Run &run) {
+    currentRun = &run;
+    walk(root);
+    currentRun = nullptr;
+    assert(constructStack.empty() && "must exit constructs after a run");
+    mapper.clear();
+  }
+
+private:
+  /// Walk the part of an order assignment tree node that needs
+  /// to be evaluated in the current run.
+  void walk(hlfir::OrderedAssignmentTreeOpInterface node);
+
+  /// Generate code when entering a given ordered assignment node.
+  void pre(hlfir::ForallOp forallOp);
+  void pre(hlfir::ForallIndexOp);
+  void pre(hlfir::ForallMaskOp);
+  void pre(hlfir::WhereOp whereOp);
+  void pre(hlfir::ElseWhereOp elseWhereOp);
+  void pre(hlfir::RegionAssignOp);
+
+  /// Generate code when leaving a given ordered assignment node.
+  void post(hlfir::ForallOp);
+  void post(hlfir::ForallMaskOp);
+
+  /// Is this an assignment to a vector subscripted entity?
+  static bool hasVectorSubscriptedLhs(hlfir::RegionAssignOp regionAssignOp);
+  /// Are they any leaf region in node that must be saved in the current run?
+  bool mustSavedRegionIn(hlfir::OrderedAssignmentTreeOpInterface node) const;
+  /// Should this node be evaluated in the current run? Saving a region in a
+  /// node does not imply the node needs to be evaluated.
+  bool
+  isRequiredInCurrentRun(hlfir::OrderedAssignmentTreeOpInterface node) const;
+
+  /// Generate a scalar value yielded by an ordered assignment tree region.
+  /// If the value was not saved in a previous run, this clone the region
+  /// code, except the final yield, at the current execution point.
+  /// If the value was saved in a previous run, this fetches the saved value
+  /// from the temporary storage and returns the value.
+  mlir::Value generateYieldedScalarValue(mlir::Region &region);
+
+  /// Generate an entity yielded by an ordered assignment tree region, and
+  /// optionally return the (uncloned) yield if there is any clean-up that
+  /// should be done after using the entity. Like, generateYieldedScalarValue,
+  /// this will return the saved value if the region was saved in a previous
+  /// run.
+  std::pair<mlir::Value, std::optional<hlfir::YieldOp>>
+  generateYieldedEntity(mlir::Region &region);
+
+  /// If \p maybeYield is present and has a clean-up, generate the clean-up
+  /// at the current insertion point (by cloning).
+  void generateCleanupIfAny(std::optional<hlfir::YieldOp> maybeYield);
+
+  fir::FirOpBuilder &builder;
+
+  /// Map containg the mapping between the original order assignment tree
+  /// operations and the operations that have been cloned in the current run.
+  /// It is reset between two runs.
+  mlir::IRMapping mapper;
+  /// Construct stack in the current run. This allows setting back the insertion
+  /// point correctly when leaving a node that requires a fir.do_loop or fir.if
+  /// operation.
+  llvm::SmallVector<mlir::Operation *> constructStack;
+  /// Root of the order assignment tree being lowered.
+  hlfir::OrderedAssignmentTreeOpInterface root;
+  /// Pointer to the current run of the schedule being lowered.
+  hlfir::Run *currentRun = nullptr;
+};
+} // namespace
+
+void OrderedAssignmentRewriter::walk(
+    hlfir::OrderedAssignmentTreeOpInterface node) {
+  if (mustSavedRegionIn(node))
+    TODO(node.getLoc(),
+         "creating temporary storage in FORALL or WHERE constructs");
+  if (isRequiredInCurrentRun(node) || mlir::isa<hlfir::ForallIndexOp>(node)) {
+    llvm::TypeSwitch<mlir::Operation *, void>(node.getOperation())
+        .Case<hlfir::ForallOp, hlfir::ForallIndexOp, hlfir::ForallMaskOp,
+              hlfir::RegionAssignOp, hlfir::WhereOp, hlfir::ElseWhereOp>(
+            [&](auto concreteOp) { pre(concreteOp); })
+        .Default([](auto) {});
+    if (auto *body = node.getSubTreeRegion()) {
+      for (mlir::Operation &op : body->getOps())
+        if (auto subNode =
+                mlir::dyn_cast<hlfir::OrderedAssignmentTreeOpInterface>(op))
+          walk(subNode);
+      llvm::TypeSwitch<mlir::Operation *, void>(node.getOperation())
+          .Case<hlfir::ForallOp, hlfir::ForallMaskOp>(
+              [&](auto concreteOp) { post(concreteOp); })
+          .Default([](auto) {});
+    }
+  }
+}
+
+void OrderedAssignmentRewriter::pre(hlfir::ForallOp forallOp) {
+  /// Create a fir.do_loop given the hlfir.forall control values.
+  mlir::Value rawLowerBound =
+      generateYieldedScalarValue(forallOp.getLbRegion());
+  mlir::Location loc = forallOp.getLoc();
+  mlir::Type idxTy = builder.getIndexType();
+  mlir::Value lb = builder.createConvert(loc, idxTy, rawLowerBound);
+  mlir::Value rawUpperBound =
+      generateYieldedScalarValue(forallOp.getUbRegion());
+  mlir::Value ub = builder.createConvert(loc, idxTy, rawUpperBound);
+  mlir::Value step;
+  if (forallOp.getStepRegion().empty()) {
+    step = builder.createIntegerConstant(loc, idxTy, 1);
+  } else {
+    step = generateYieldedScalarValue(forallOp.getStepRegion());
+    step = builder.createConvert(loc, idxTy, step);
+  }
+  auto doLoop = builder.create<fir::DoLoopOp>(loc, lb, ub, step);
+  builder.setInsertionPointToStart(doLoop.getBody());
+  mlir::Value oldIndex = forallOp.getForallIndexValue();
+  mlir::Value newIndex =
+      builder.createConvert(loc, oldIndex.getType(), doLoop.getInductionVar());
+  mapper.map(oldIndex, newIndex);
+  constructStack.push_back(doLoop);
+}
+
+void OrderedAssignmentRewriter::post(hlfir::ForallOp) {
+  assert(!constructStack.empty() && "must contain a loop");
+  builder.setInsertionPointAfter(constructStack.pop_back_val());
+}
+
+void OrderedAssignmentRewriter::pre(hlfir::ForallIndexOp forallIndexOp) {
+  mlir::Location loc = forallIndexOp.getLoc();
+  mlir::Type intTy = fir::unwrapRefType(forallIndexOp.getType());
+  mlir::Value indexVar =
+      builder.createTemporary(loc, intTy, forallIndexOp.getName());
+  mlir::Value newVal = mapper.lookupOrDefault(forallIndexOp.getIndex());
+  builder.createStoreWithConvert(loc, newVal, indexVar);
+  mapper.map(forallIndexOp, indexVar);
+}
+
+void OrderedAssignmentRewriter::pre(hlfir::ForallMaskOp forallMaskOp) {
+  mlir::Location loc = forallMaskOp.getLoc();
+  mlir::Value mask = generateYieldedScalarValue(forallMaskOp.getMaskRegion());
+  mask = builder.createConvert(loc, builder.getI1Type(), mask);
+  auto ifOp = builder.create<fir::IfOp>(loc, std::nullopt, mask, false);
+  builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
+  constructStack.push_back(ifOp);
+}
+
+void OrderedAssignmentRewriter::post(hlfir::ForallMaskOp forallMaskOp) {
+  assert(!constructStack.empty() && "must contain an ifop");
+  builder.setInsertionPointAfter(constructStack.pop_back_val());
+}
+
+void OrderedAssignmentRewriter::pre(hlfir::RegionAssignOp regionAssignOp) {
+  mlir::Location loc = regionAssignOp.getLoc();
+  auto [rhs, oldRhsYield] =
+      generateYieldedEntity(regionAssignOp.getRhsRegion());
+  if (hasVectorSubscriptedLhs(regionAssignOp))
+    TODO(loc, "assignment to vector subscripted entity");
+  auto [lhs, oldLhsYield] =
+      generateYieldedEntity(regionAssignOp.getLhsRegion());
+  if (!regionAssignOp.getUserDefinedAssignment().empty())
+    TODO(loc, "user defined assignment inside FORALL or WHERE");
+  // TODO: preserve allocatable assignment aspects for forall once
+  // they are conveyed in hlfir.region_assign.
+  builder.create<hlfir::AssignOp>(loc, rhs, lhs);
+  generateCleanupIfAny(oldRhsYield);
+  generateCleanupIfAny(oldLhsYield);
+}
+
+void OrderedAssignmentRewriter::pre(hlfir::WhereOp whereOp) {
+  mlir::Location loc = whereOp.getLoc();
+  TODO(loc, "WHERE in HLFIR");
+}
+
+void OrderedAssignmentRewriter::pre(hlfir::ElseWhereOp elseWhereOp) {
+  mlir::Location loc = elseWhereOp.getLoc();
+  TODO(loc, "ELSEWHERE in HLFIR");
+}
+
+std::pair<mlir::Value, std::optional<hlfir::YieldOp>>
+OrderedAssignmentRewriter::generateYieldedEntity(mlir::Region &region) {
+  // TODO: if the region was saved, use that instead of generating code again.
+  assert(region.hasOneBlock() && "region must contain one block");
+  // Clone all operations except the final hlfir.yield.
+  mlir::Block::OpListType &ops = region.back().getOperations();
+  assert(!ops.empty() && "yield block cannot be empty");
+  auto end = ops.end();
+  for (auto opIt = ops.begin(); std::next(opIt) != end; ++opIt)
+    (void)builder.clone(*opIt, mapper);
+  auto oldYield = mlir::dyn_cast_or_null<hlfir::YieldOp>(
+      region.back().getOperations().back());
+  assert(oldYield && "region computing scalar must end with a YieldOp");
+  // Get the value for the yielded entity, it may be the result of an operation
+  // that was cloned, or it may be the same as the previous value if the yield
+  // operand was created before the ordered assignment tree.
+  mlir::Value newEntity = mapper.lookupOrDefault(oldYield.getEntity());
+  if (oldYield.getCleanup().empty())
+    return {newEntity, std::nullopt};
+  return {newEntity, oldYield};
+}
+
+mlir::Value
+OrderedAssignmentRewriter::generateYieldedScalarValue(mlir::Region &region) {
+  auto [value, maybeYield] = generateYieldedEntity(region);
+  assert(fir::isa_trivial(value.getType()) && "not a trivial scalar value");
+  generateCleanupIfAny(maybeYield);
+  return value;
+}
+
+void OrderedAssignmentRewriter::generateCleanupIfAny(
+    std::optional<hlfir::YieldOp> maybeYield) {
+  if (maybeYield.has_value())
+    if (!maybeYield->getCleanup().empty()) {
+      assert(maybeYield->getCleanup().hasOneBlock() &&
+             "region must contain one block");
+      for (auto &op : maybeYield->getCleanup().back().getOperations())
+        builder.clone(op, mapper);
+    }
+}
+
+bool OrderedAssignmentRewriter::hasVectorSubscriptedLhs(
+    hlfir::RegionAssignOp regionAssignOp) {
+  return mlir::isa<hlfir::ElementalAddrOp>(
+      regionAssignOp.getLhsRegion().back().back());
+}
+
+bool OrderedAssignmentRewriter::mustSavedRegionIn(
+    hlfir::OrderedAssignmentTreeOpInterface node) const {
+  for (auto &action : currentRun->actions)
+    if (hlfir::SaveEntity *savedEntity =
+            std::get_if<hlfir::SaveEntity>(&action))
+      if (node.getOperation() == savedEntity->yieldRegion->getParentOp())
+        return true;
+  return false;
+}
+
+bool OrderedAssignmentRewriter::isRequiredInCurrentRun(
+    hlfir::OrderedAssignmentTreeOpInterface node) const {
+  // hlfir.forall_index do not contain saved regions/assignments,
+  // but if their hlfir.forall parent was required, they are
+  // required (the forall indices needs to be mapped).
+  if (mlir::isa<hlfir::ForallIndexOp>(node))
+    return true;
+  for (auto &action : currentRun->actions)
+    if (hlfir::SaveEntity *savedEntity =
+            std::get_if<hlfir::SaveEntity>(&action)) {
+      // A SaveEntity action does not require evaluating the node that contains
+      // it, but it requires to evaluate all the parents of the nodes that
+      // contains it. For instance, an saving a bound in hlfir.forall B does not
+      // require creating the loops for B, but it requires creating the loops
+      // for any forall parent A of the forall B.
+      if (node->isProperAncestor(savedEntity->yieldRegion->getParentOp()))
+        return true;
+    } else {
+      auto assign = std::get<hlfir::RegionAssignOp>(action);
+      if (node->isAncestor(assign.getOperation()))
+        return true;
+    }
+  return false;
+}
+
+/// Lower an ordered assignment tree to fir.do_loop and hlfir.assign given
+/// a schedule.
+static void lower(hlfir::OrderedAssignmentTreeOpInterface root,
+                  mlir::PatternRewriter &rewriter, hlfir::Schedule &schedule) {
+  auto module = root->getParentOfType<mlir::ModuleOp>();
+  fir::FirOpBuilder builder(rewriter, fir::getKindMapping(module));
+  OrderedAssignmentRewriter assignmentRewriter(builder, root);
+  for (auto &run : schedule)
+    assignmentRewriter.lowerRun(run);
+}
+
 /// Shared rewrite entry point for all the ordered assignment tree root
 /// operations. It calls the scheduler and then apply the schedule.
-static mlir::LogicalResult
-rewrite(hlfir::OrderedAssignmentTreeOpInterface &root,
-        bool tryFusingAssignments, mlir::PatternRewriter &rewriter) {
-  (void)hlfir::buildEvaluationSchedule(root, tryFusingAssignments);
+static mlir::LogicalResult rewrite(hlfir::OrderedAssignmentTreeOpInterface root,
+                                   bool tryFusingAssignments,
+                                   mlir::PatternRewriter &rewriter) {
+  hlfir::Schedule schedule =
+      hlfir::buildEvaluationSchedule(root, tryFusingAssignments);
 
   LLVM_DEBUG(
       /// Debug option to print the scheduling debug info without doing
@@ -55,8 +339,9 @@ rewrite(hlfir::OrderedAssignmentTreeOpInterface &root,
         rewriter.eraseOp(root);
         return mlir::success();
       });
-  // TODO: lower to loops according to schedule.
-  return mlir::failure();
+  lower(root, rewriter, schedule);
+  rewriter.eraseOp(root);
+  return mlir::success();
 }
 
 namespace {

diff  --git a/flang/test/HLFIR/order_assignments/forall-codegen-fuse-assignments.fir b/flang/test/HLFIR/order_assignments/forall-codegen-fuse-assignments.fir
new file mode 100644
index 0000000000000..d5ee7a66c772a
--- /dev/null
+++ b/flang/test/HLFIR/order_assignments/forall-codegen-fuse-assignments.fir
@@ -0,0 +1,51 @@
+// Test code generation of hlfir.forall when assignment fusing is enabled
+// and possible.
+// RUN: fir-opt %s --lower-hlfir-ordered-assignments=fuse-assignments=true | FileCheck %s --check-prefix=FUSE
+// RUN: fir-opt %s --lower-hlfir-ordered-assignments=fuse-assignments=false | FileCheck %s --check-prefix=NOFUSE
+
+func.func @test_assignment_fusing(%x: !fir.ref<!fir.array<10xi32>>, %y : !fir.box<!fir.array<?xi32>>) {
+  %c42 = arith.constant 42 : i32
+  hlfir.forall lb {
+    %c1 = arith.constant 1 : index
+    hlfir.yield %c1 : index
+  } ub {
+    %c10 = arith.constant 10 : index
+    hlfir.yield %c10 : index
+  }  (%i: index) {
+    hlfir.region_assign {
+      hlfir.yield %c42 : i32
+    } to {
+      %2 = hlfir.designate %x (%i)  : (!fir.ref<!fir.array<10xi32>>, index) -> !fir.ref<i32>
+      hlfir.yield %2 : !fir.ref<i32>
+    }
+    hlfir.region_assign {
+      hlfir.yield %c42 : i32
+    } to {
+      %2 = hlfir.designate %y (%i)  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+      hlfir.yield %2 : !fir.ref<i32>
+    }
+  }
+  return
+}
+// FUSE-LABEL:   func.func @test_assignment_fusing(
+// FUSE-SAME:                                      %[[VAL_0:.*]]: !fir.ref<!fir.array<10xi32>>,
+// FUSE-SAME:                                      %[[VAL_1:.*]]: !fir.box<!fir.array<?xi32>>) {
+// FUSE:           %[[VAL_2:.*]] = arith.constant 42 : i32
+// FUSE:           %[[VAL_3:.*]] = arith.constant 1 : index
+// FUSE:           %[[VAL_4:.*]] = arith.constant 10 : index
+// FUSE:           %[[VAL_5:.*]] = arith.constant 1 : index
+// FUSE:           fir.do_loop %[[VAL_6:.*]] = %[[VAL_3]] to %[[VAL_4]] step %[[VAL_5]] {
+// FUSE-NEXT:         %[[VAL_7:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_6]])  : (!fir.ref<!fir.array<10xi32>>, index) -> !fir.ref<i32>
+// FUSE-NEXT:         hlfir.assign %[[VAL_2]] to %[[VAL_7]] : i32, !fir.ref<i32>
+// FUSE-NEXT:         %[[VAL_8:.*]] = hlfir.designate %[[VAL_1]] (%[[VAL_6]])  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// FUSE-NEXT:         hlfir.assign %[[VAL_2]] to %[[VAL_8]] : i32, !fir.ref<i32>
+// FUSE-NEXT:      }
+// FUSE-NEXT:      return
+
+// NOFUSE-LABEL: func.func @test_assignment_fusing(
+// NOFUSE:         fir.do_loop
+// NOFUSE:           hlfir.assign
+// NOFUSE:         }
+// NOFUSE:         fir.do_loop
+// NOFUSE:           hlfir.assign
+// NOFUSE:         }

diff  --git a/flang/test/HLFIR/order_assignments/forall-codegen-no-conflict.fir b/flang/test/HLFIR/order_assignments/forall-codegen-no-conflict.fir
new file mode 100644
index 0000000000000..dace9b2f245df
--- /dev/null
+++ b/flang/test/HLFIR/order_assignments/forall-codegen-no-conflict.fir
@@ -0,0 +1,201 @@
+// Test code generation of hlfir.forall, hlfir.forall_index, and hlfir.forall_mask.
+// RUN: fir-opt %s --lower-hlfir-ordered-assignments | FileCheck %s
+
+func.func @test_simple(%x: !fir.ref<!fir.array<10xi32>>) {
+  hlfir.forall lb {
+    %c1 = arith.constant 1 : index
+    hlfir.yield %c1 : index
+  } ub {
+    %c10 = arith.constant 10 : index
+    hlfir.yield %c10 : index
+  }  (%i: index) {
+    hlfir.region_assign {
+      %c42 = arith.constant 42 : i32
+      hlfir.yield %c42 : i32
+    } to {
+      %2 = hlfir.designate %x (%i)  : (!fir.ref<!fir.array<10xi32>>, index) -> !fir.ref<i32>
+      hlfir.yield %2 : !fir.ref<i32>
+    }
+  }
+  return
+}
+// CHECK-LABEL:   func.func @test_simple(
+// CHECK-SAME:                           %[[VAL_0:.*]]: !fir.ref<!fir.array<10xi32>>) {
+// CHECK:           %[[VAL_1:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_2:.*]] = arith.constant 10 : index
+// CHECK:           %[[VAL_3:.*]] = arith.constant 1 : index
+// CHECK:           fir.do_loop %[[VAL_4:.*]] = %[[VAL_1]] to %[[VAL_2]] step %[[VAL_3]] {
+// CHECK:             %[[VAL_5:.*]] = arith.constant 42 : i32
+// CHECK:             %[[VAL_6:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_4]])  : (!fir.ref<!fir.array<10xi32>>, index) -> !fir.ref<i32>
+// CHECK:             hlfir.assign %[[VAL_5]] to %[[VAL_6]] : i32, !fir.ref<i32>
+// CHECK:           }
+
+func.func @test_index(%x: !fir.ref<!fir.array<10xi32>>) {
+  hlfir.forall lb {
+    %c1 = arith.constant 1 : index
+    hlfir.yield %c1 : index
+  } ub {
+    %c10 = arith.constant 10 : index
+    hlfir.yield %c10 : index
+  }  (%arg1: i32) {
+    %i = hlfir.forall_index "i" %arg1 : (i32) -> !fir.ref<i32>
+    hlfir.region_assign {
+      %i_load = fir.load %i : !fir.ref<i32>
+      hlfir.yield %i_load : i32
+    } to {
+      %2 = hlfir.designate %x (%arg1)  : (!fir.ref<!fir.array<10xi32>>, i32) -> !fir.ref<i32>
+      hlfir.yield %2 : !fir.ref<i32>
+    }
+  }
+  return
+}
+// CHECK-LABEL:   func.func @test_index(
+// CHECK-SAME:                          %[[VAL_0:.*]]: !fir.ref<!fir.array<10xi32>>) {
+// CHECK:           %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i"}
+// CHECK:           %[[VAL_2:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_3:.*]] = arith.constant 10 : index
+// CHECK:           %[[VAL_4:.*]] = arith.constant 1 : index
+// CHECK:           fir.do_loop %[[VAL_5:.*]] = %[[VAL_2]] to %[[VAL_3]] step %[[VAL_4]] {
+// CHECK:             %[[VAL_6:.*]] = fir.convert %[[VAL_5]] : (index) -> i32
+// CHECK:             fir.store %[[VAL_6]] to %[[VAL_1]] : !fir.ref<i32>
+// CHECK:             %[[VAL_7:.*]] = fir.load %[[VAL_1]] : !fir.ref<i32>
+// CHECK:             %[[VAL_8:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_6]])  : (!fir.ref<!fir.array<10xi32>>, i32) -> !fir.ref<i32>
+// CHECK:             hlfir.assign %[[VAL_7]] to %[[VAL_8]] : i32, !fir.ref<i32>
+// CHECK:           }
+
+
+func.func @split_schedule(%arg0: !fir.box<!fir.array<?xf32>>, %arg1: !fir.box<!fir.array<?xf32>>, %arg2: !fir.box<!fir.array<?x?xf32>>) {
+  %c11 = arith.constant 11 : i64
+  %c10 = arith.constant 10 : i64
+  %c1 = arith.constant 1 : i64
+  %0:2 = hlfir.declare %arg0 {uniq_name = "x"} : (!fir.box<!fir.array<?xf32>>) -> (!fir.box<!fir.array<?xf32>>, !fir.box<!fir.array<?xf32>>)
+  %1:2 = hlfir.declare %arg1 {uniq_name = "y"} : (!fir.box<!fir.array<?xf32>>) -> (!fir.box<!fir.array<?xf32>>, !fir.box<!fir.array<?xf32>>)
+  %2:2 = hlfir.declare %arg2 {uniq_name = "z"} : (!fir.box<!fir.array<?x?xf32>>) -> (!fir.box<!fir.array<?x?xf32>>, !fir.box<!fir.array<?x?xf32>>)
+  hlfir.forall lb {
+    hlfir.yield %c1 : i64
+  } ub {
+    hlfir.yield %c10 : i64
+  }  (%arg3: i64) {
+    hlfir.region_assign {
+      %3 = hlfir.designate %1#0 (%arg3)  : (!fir.box<!fir.array<?xf32>>, i64) -> !fir.ref<f32>
+      %4 = fir.load %3 : !fir.ref<f32>
+      hlfir.yield %4 : f32
+    } to {
+      %3 = hlfir.designate %0#0 (%arg3)  : (!fir.box<!fir.array<?xf32>>, i64) -> !fir.ref<f32>
+      hlfir.yield %3 : !fir.ref<f32>
+    }
+    hlfir.forall lb {
+      hlfir.yield %c1 : i64
+    } ub {
+      hlfir.yield %c10 : i64
+    }  (%arg4: i64) {
+      hlfir.region_assign {
+        %3 = arith.subi %c11, %arg3 : i64
+        %4 = hlfir.designate %0#0 (%3)  : (!fir.box<!fir.array<?xf32>>, i64) -> !fir.ref<f32>
+        %5 = fir.load %4 : !fir.ref<f32>
+        hlfir.yield %5 : f32
+      } to {
+        %3 = hlfir.designate %2#0 (%arg3, %arg4)  : (!fir.box<!fir.array<?x?xf32>>, i64, i64) -> !fir.ref<f32>
+        hlfir.yield %3 : !fir.ref<f32>
+      }
+    }
+  }
+  return
+}
+// CHECK-LABEL:   func.func @split_schedule(
+// CHECK:           %[[VAL_3:.*]] = arith.constant 11 : i64
+// CHECK:           %[[VAL_4:.*]] = arith.constant 10 : i64
+// CHECK:           %[[VAL_5:.*]] = arith.constant 1 : i64
+// CHECK:           %[[VAL_6:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "x"} : (!fir.box<!fir.array<?xf32>>) -> (!fir.box<!fir.array<?xf32>>, !fir.box<!fir.array<?xf32>>)
+// CHECK:           %[[VAL_7:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "y"} : (!fir.box<!fir.array<?xf32>>) -> (!fir.box<!fir.array<?xf32>>, !fir.box<!fir.array<?xf32>>)
+// CHECK:           %[[VAL_8:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "z"} : (!fir.box<!fir.array<?x?xf32>>) -> (!fir.box<!fir.array<?x?xf32>>, !fir.box<!fir.array<?x?xf32>>)
+// CHECK:           %[[VAL_9:.*]] = fir.convert %[[VAL_5]] : (i64) -> index
+// CHECK:           %[[VAL_10:.*]] = fir.convert %[[VAL_4]] : (i64) -> index
+// CHECK:           %[[VAL_11:.*]] = arith.constant 1 : index
+// CHECK:           fir.do_loop %[[VAL_12:.*]] = %[[VAL_9]] to %[[VAL_10]] step %[[VAL_11]] {
+// CHECK:             %[[VAL_13:.*]] = fir.convert %[[VAL_12]] : (index) -> i64
+// CHECK:             %[[VAL_14:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_13]])  : (!fir.box<!fir.array<?xf32>>, i64) -> !fir.ref<f32>
+// CHECK:             %[[VAL_15:.*]] = fir.load %[[VAL_14]] : !fir.ref<f32>
+// CHECK:             %[[VAL_16:.*]] = hlfir.designate %[[VAL_6]]#0 (%[[VAL_13]])  : (!fir.box<!fir.array<?xf32>>, i64) -> !fir.ref<f32>
+// CHECK:             hlfir.assign %[[VAL_15]] to %[[VAL_16]] : f32, !fir.ref<f32>
+// CHECK:           }
+// CHECK:           %[[VAL_17:.*]] = fir.convert %[[VAL_5]] : (i64) -> index
+// CHECK:           %[[VAL_18:.*]] = fir.convert %[[VAL_4]] : (i64) -> index
+// CHECK:           %[[VAL_19:.*]] = arith.constant 1 : index
+// CHECK:           fir.do_loop %[[VAL_20:.*]] = %[[VAL_17]] to %[[VAL_18]] step %[[VAL_19]] {
+// CHECK:             %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (index) -> i64
+// CHECK:             %[[VAL_22:.*]] = fir.convert %[[VAL_5]] : (i64) -> index
+// CHECK:             %[[VAL_23:.*]] = fir.convert %[[VAL_4]] : (i64) -> index
+// CHECK:             %[[VAL_24:.*]] = arith.constant 1 : index
+// CHECK:             fir.do_loop %[[VAL_25:.*]] = %[[VAL_22]] to %[[VAL_23]] step %[[VAL_24]] {
+// CHECK:               %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (index) -> i64
+// CHECK:               %[[VAL_27:.*]] = arith.subi %[[VAL_3]], %[[VAL_21]] : i64
+// CHECK:               %[[VAL_28:.*]] = hlfir.designate %[[VAL_6]]#0 (%[[VAL_27]])  : (!fir.box<!fir.array<?xf32>>, i64) -> !fir.ref<f32>
+// CHECK:               %[[VAL_29:.*]] = fir.load %[[VAL_28]] : !fir.ref<f32>
+// CHECK:               %[[VAL_30:.*]] = hlfir.designate %[[VAL_8]]#0 (%[[VAL_21]], %[[VAL_26]])  : (!fir.box<!fir.array<?x?xf32>>, i64, i64) -> !fir.ref<f32>
+// CHECK:               hlfir.assign %[[VAL_29]] to %[[VAL_30]] : f32, !fir.ref<f32>
+// CHECK:             }
+// CHECK:           }
+
+func.func @test_mask(%arg0: !fir.box<!fir.array<?x?xf32>>, %arg1: !fir.box<!fir.array<?x?xf32>>, %arg2: !fir.box<!fir.array<?x!fir.logical<4>>>) {
+  %c10 = arith.constant 10 : i64
+  %c1 = arith.constant 1 : i64
+  %0:2 = hlfir.declare %arg2 {uniq_name = "mask"} : (!fir.box<!fir.array<?x!fir.logical<4>>>) -> (!fir.box<!fir.array<?x!fir.logical<4>>>, !fir.box<!fir.array<?x!fir.logical<4>>>)
+  %1:2 = hlfir.declare %arg0 {uniq_name = "x"} : (!fir.box<!fir.array<?x?xf32>>) -> (!fir.box<!fir.array<?x?xf32>>, !fir.box<!fir.array<?x?xf32>>)
+  %2:2 = hlfir.declare %arg1 {uniq_name = "y"} : (!fir.box<!fir.array<?x?xf32>>) -> (!fir.box<!fir.array<?x?xf32>>, !fir.box<!fir.array<?x?xf32>>)
+  hlfir.forall lb {
+    hlfir.yield %c1 : i64
+  } ub {
+    hlfir.yield %c10 : i64
+  }  (%arg3: i64) {
+    hlfir.forall_mask {
+      %3 = hlfir.designate %0#0 (%arg3)  : (!fir.box<!fir.array<?x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+      %4 = fir.load %3 : !fir.ref<!fir.logical<4>>
+      %5 = fir.convert %4 : (!fir.logical<4>) -> i1
+      hlfir.yield %5 : i1
+    } do {
+      hlfir.forall lb {
+        hlfir.yield %c1 : i64
+      } ub {
+        hlfir.yield %arg3 : i64
+      }  (%arg4: i64) {
+        hlfir.region_assign {
+          %3 = hlfir.designate %2#0 (%arg3, %arg4)  : (!fir.box<!fir.array<?x?xf32>>, i64, i64) -> !fir.ref<f32>
+          %4 = fir.load %3 : !fir.ref<f32>
+          hlfir.yield %4 : f32
+        } to {
+          %3 = hlfir.designate %1#0 (%arg3, %arg4)  : (!fir.box<!fir.array<?x?xf32>>, i64, i64) -> !fir.ref<f32>
+          hlfir.yield %3 : !fir.ref<f32>
+        }
+      }
+    }
+  }
+  return
+}
+// CHECK-LABEL:   func.func @test_mask(
+// CHECK:           %[[VAL_3:.*]] = arith.constant 10 : i64
+// CHECK:           %[[VAL_4:.*]] = arith.constant 1 : i64
+// CHECK:           %[[VAL_5:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "mask"} : (!fir.box<!fir.array<?x!fir.logical<4>>>) -> (!fir.box<!fir.array<?x!fir.logical<4>>>, !fir.box<!fir.array<?x!fir.logical<4>>>)
+// CHECK:           %[[VAL_6:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "x"} : (!fir.box<!fir.array<?x?xf32>>) -> (!fir.box<!fir.array<?x?xf32>>, !fir.box<!fir.array<?x?xf32>>)
+// CHECK:           %[[VAL_7:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "y"} : (!fir.box<!fir.array<?x?xf32>>) -> (!fir.box<!fir.array<?x?xf32>>, !fir.box<!fir.array<?x?xf32>>)
+// CHECK:           %[[VAL_8:.*]] = fir.convert %[[VAL_4]] : (i64) -> index
+// CHECK:           %[[VAL_9:.*]] = fir.convert %[[VAL_3]] : (i64) -> index
+// CHECK:           %[[VAL_10:.*]] = arith.constant 1 : index
+// CHECK:           fir.do_loop %[[VAL_11:.*]] = %[[VAL_8]] to %[[VAL_9]] step %[[VAL_10]] {
+// CHECK:             %[[VAL_12:.*]] = fir.convert %[[VAL_11]] : (index) -> i64
+// CHECK:             %[[VAL_13:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_12]])  : (!fir.box<!fir.array<?x!fir.logical<4>>>, i64) -> !fir.ref<!fir.logical<4>>
+// CHECK:             %[[VAL_14:.*]] = fir.load %[[VAL_13]] : !fir.ref<!fir.logical<4>>
+// CHECK:             %[[VAL_15:.*]] = fir.convert %[[VAL_14]] : (!fir.logical<4>) -> i1
+// CHECK:             fir.if %[[VAL_15]] {
+// CHECK:               %[[VAL_16:.*]] = fir.convert %[[VAL_4]] : (i64) -> index
+// CHECK:               %[[VAL_17:.*]] = fir.convert %[[VAL_12]] : (i64) -> index
+// CHECK:               %[[VAL_18:.*]] = arith.constant 1 : index
+// CHECK:               fir.do_loop %[[VAL_19:.*]] = %[[VAL_16]] to %[[VAL_17]] step %[[VAL_18]] {
+// CHECK:                 %[[VAL_20:.*]] = fir.convert %[[VAL_19]] : (index) -> i64
+// CHECK:                 %[[VAL_21:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_12]], %[[VAL_20]])  : (!fir.box<!fir.array<?x?xf32>>, i64, i64) -> !fir.ref<f32>
+// CHECK:                 %[[VAL_22:.*]] = fir.load %[[VAL_21]] : !fir.ref<f32>
+// CHECK:                 %[[VAL_23:.*]] = hlfir.designate %[[VAL_6]]#0 (%[[VAL_12]], %[[VAL_20]])  : (!fir.box<!fir.array<?x?xf32>>, i64, i64) -> !fir.ref<f32>
+// CHECK:                 hlfir.assign %[[VAL_22]] to %[[VAL_23]] : f32, !fir.ref<f32>
+// CHECK:               }
+// CHECK:             }
+// CHECK:           }

diff  --git a/flang/test/HLFIR/ordered-assignments-codegen-todo.fir b/flang/test/HLFIR/ordered-assignments-codegen-todo.fir
index ccbd97ce5caba..6557a03219fb3 100644
--- a/flang/test/HLFIR/ordered-assignments-codegen-todo.fir
+++ b/flang/test/HLFIR/ordered-assignments-codegen-todo.fir
@@ -2,9 +2,9 @@
 // RUN: %not_todo_cmd fir-opt --lower-hlfir-ordered-assignments %s 2>&1 | FileCheck %s
 
 
-// CHECK: not yet implemented: FORALL construct or statement in HLFIR
+// CHECK: not yet implemented: creating temporary storage in FORALL or WHERE constructs
 
-func.func @forall_todo(%arg0: !fir.ref<!fir.array<10xf32>>, %arg1: !fir.ref<!fir.array<10xf32>>) {
+func.func @forall_todo(%arg0: !fir.ref<!fir.array<10xf32>>) {
   %c1 = arith.constant 1 : index
   %c10 = arith.constant 10 : index
   hlfir.forall lb {
@@ -13,7 +13,7 @@ func.func @forall_todo(%arg0: !fir.ref<!fir.array<10xf32>>, %arg1: !fir.ref<!fir
     hlfir.yield %c10 : index
   }  (%arg2: i64) {
     hlfir.region_assign {
-      %1 = hlfir.designate %arg1 (%arg2)  : (!fir.ref<!fir.array<10xf32>>, i64) -> !fir.ref<f32>
+      %1 = hlfir.designate %arg0 (%arg2)  : (!fir.ref<!fir.array<10xf32>>, i64) -> !fir.ref<f32>
       hlfir.yield %1 : !fir.ref<f32>
     } to {
       %1 = hlfir.designate %arg0 (%arg2)  : (!fir.ref<!fir.array<10xf32>>, i64) -> !fir.ref<f32>


        


More information about the flang-commits mailing list