[flang-commits] [flang] [flang] Extract hlfir.assign inlining from opt-bufferization. (PR #121544)

Slava Zakharin via flang-commits flang-commits at lists.llvm.org
Thu Jan 2 20:43:38 PST 2025


https://github.com/vzakhari created https://github.com/llvm/llvm-project/pull/121544

Optimized bufferization can transform hlfir.assign into a loop
nest doing element per element assignment, but it avoids
doing so for RHS that is hlfir.expr. This is done to let
ElementalAssignBufferization pattern to try to do a better job.

This patch moves the hlfir.assign inlining after opt-bufferization,
and enables it for hlfir.expr RHS.

The hlfir.expr RHS cases are present in tonto, and this patch
results in some nice improvements. Note that those cases
are handled by other compilers also using array temporaries,
so this patch seems to just get rid of the Assign runtime
overhead/inefficiency.


>From f6783fa4063ce693f9bb9bac5ba850dc7a069356 Mon Sep 17 00:00:00 2001
From: Slava Zakharin <szakharin at nvidia.com>
Date: Thu, 2 Jan 2025 18:21:00 -0800
Subject: [PATCH] [flang] Extract hlfir.assign inlining from opt-bufferization.

Optimized bufferization can transform hlfir.assign into a loop
nest doing element per element assignment, but it avoids
doing so for RHS that is hlfir.expr. This is done to let
ElementalAssignBufferization pattern to try to do a better job.

This patch moves the hlfir.assign inlining after opt-bufferization,
and enables it for hlfir.expr RHS.

The hlfir.expr RHS cases are present in tonto, and this patch
results in some nice improvements. Note that those cases
are handled by other compilers also using array temporaries,
so this patch seems to just get rid of the Assign runtime
overhead/inefficiency.
---
 flang/include/flang/Optimizer/HLFIR/Passes.td |   4 +
 .../Optimizer/HLFIR/Transforms/CMakeLists.txt |   1 +
 .../HLFIR/Transforms/InlineHLFIRAssign.cpp    | 152 ++++++++++++++++++
 .../Transforms/OptimizedBufferization.cpp     | 109 +------------
 flang/lib/Optimizer/Passes/Pipelines.cpp      |   2 +
 flang/test/Driver/mlir-pass-pipeline.f90      |   4 +
 flang/test/Fir/basic-program.fir              |   4 +
 ...ble-assign.fir => inline-hlfir-assign.fir} |  57 ++++++-
 flang/test/HLFIR/maxloc-elemental.fir         |   8 +-
 flang/test/HLFIR/minloc-elemental.fir         |  16 +-
 .../HLFIR/opt-bufferization-eval_in_mem.fir   |   7 +-
 flang/test/HLFIR/opt-bufferization.fir        |  42 -----
 12 files changed, 228 insertions(+), 178 deletions(-)
 create mode 100644 flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRAssign.cpp
 rename flang/test/HLFIR/{opt-variable-assign.fir => inline-hlfir-assign.fir} (84%)

diff --git a/flang/include/flang/Optimizer/HLFIR/Passes.td b/flang/include/flang/Optimizer/HLFIR/Passes.td
index ed49f5093c9652..644f1e3c3af2b8 100644
--- a/flang/include/flang/Optimizer/HLFIR/Passes.td
+++ b/flang/include/flang/Optimizer/HLFIR/Passes.td
@@ -49,4 +49,8 @@ def InlineElementals : Pass<"inline-elementals"> {
   let summary = "Inline chained hlfir.elemental operations";
 }
 
+def InlineHLFIRAssign : Pass<"inline-hlfir-assign"> {
+  let summary = "Inline hlfir.assign operations";
+}
+
 #endif //FORTRAN_DIALECT_HLFIR_PASSES
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt b/flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt
index d18df2ef49f10e..25a532204dd053 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt
+++ b/flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt
@@ -4,6 +4,7 @@ add_flang_library(HLFIRTransforms
   BufferizeHLFIR.cpp
   ConvertToFIR.cpp
   InlineElementals.cpp
+  InlineHLFIRAssign.cpp
   LowerHLFIRIntrinsics.cpp
   LowerHLFIROrderedAssignments.cpp
   ScheduleOrderedAssignments.cpp
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRAssign.cpp b/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRAssign.cpp
new file mode 100644
index 00000000000000..249976d5509b0c
--- /dev/null
+++ b/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRAssign.cpp
@@ -0,0 +1,152 @@
+//===- InlineHLFIRAssign.cpp - Inline hlfir.assign ops --------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// Transform hlfir.assign array operations into loop nests performing element
+// per element assignments. The inlining is done for trivial data types always,
+// though, we may add performance/code-size heuristics in future.
+//===----------------------------------------------------------------------===//
+
+#include "flang/Optimizer/Analysis/AliasAnalysis.h"
+#include "flang/Optimizer/Builder/FIRBuilder.h"
+#include "flang/Optimizer/Builder/HLFIRTools.h"
+#include "flang/Optimizer/HLFIR/HLFIROps.h"
+#include "flang/Optimizer/HLFIR/Passes.h"
+#include "flang/Optimizer/OpenMP/Passes.h"
+#include "mlir/IR/PatternMatch.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Support/LLVM.h"
+#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
+
+namespace hlfir {
+#define GEN_PASS_DEF_INLINEHLFIRASSIGN
+#include "flang/Optimizer/HLFIR/Passes.h.inc"
+} // namespace hlfir
+
+#define DEBUG_TYPE "inline-hlfir-assign"
+
+namespace {
+/// Expand hlfir.assign of array RHS to array LHS into a loop nest
+/// of element-by-element assignments:
+///   hlfir.assign %4 to %5 : !fir.ref<!fir.array<3x3xf32>>,
+///                           !fir.ref<!fir.array<3x3xf32>>
+/// into:
+///   fir.do_loop %arg1 = %c1 to %c3 step %c1 unordered {
+///     fir.do_loop %arg2 = %c1 to %c3 step %c1 unordered {
+///       %6 = hlfir.designate %4 (%arg2, %arg1)  :
+///           (!fir.ref<!fir.array<3x3xf32>>, index, index) -> !fir.ref<f32>
+///       %7 = fir.load %6 : !fir.ref<f32>
+///       %8 = hlfir.designate %5 (%arg2, %arg1)  :
+///           (!fir.ref<!fir.array<3x3xf32>>, index, index) -> !fir.ref<f32>
+///       hlfir.assign %7 to %8 : f32, !fir.ref<f32>
+///     }
+///   }
+///
+/// The transformation is correct only when LHS and RHS do not alias.
+/// When RHS is an array expression, then there is no aliasing.
+/// This transformation does not support runtime checking for
+/// non-conforming LHS/RHS arrays' shapes currently.
+class InlineHLFIRAssignConversion
+    : public mlir::OpRewritePattern<hlfir::AssignOp> {
+public:
+  using mlir::OpRewritePattern<hlfir::AssignOp>::OpRewritePattern;
+
+  llvm::LogicalResult
+  matchAndRewrite(hlfir::AssignOp assign,
+                  mlir::PatternRewriter &rewriter) const override {
+    if (assign.isAllocatableAssignment())
+      return rewriter.notifyMatchFailure(assign,
+                                         "AssignOp may imply allocation");
+
+    hlfir::Entity rhs{assign.getRhs()};
+
+    if (!rhs.isArray())
+      return rewriter.notifyMatchFailure(assign,
+                                         "AssignOp's RHS is not an array");
+
+    mlir::Type rhsEleTy = rhs.getFortranElementType();
+    if (!fir::isa_trivial(rhsEleTy))
+      return rewriter.notifyMatchFailure(
+          assign, "AssignOp's RHS data type is not trivial");
+
+    hlfir::Entity lhs{assign.getLhs()};
+    if (!lhs.isArray())
+      return rewriter.notifyMatchFailure(assign,
+                                         "AssignOp's LHS is not an array");
+
+    mlir::Type lhsEleTy = lhs.getFortranElementType();
+    if (!fir::isa_trivial(lhsEleTy))
+      return rewriter.notifyMatchFailure(
+          assign, "AssignOp's LHS data type is not trivial");
+
+    if (lhsEleTy != rhsEleTy)
+      return rewriter.notifyMatchFailure(assign,
+                                         "RHS/LHS element types mismatch");
+
+    if (!mlir::isa<hlfir::ExprType>(rhs.getType())) {
+      // If RHS is not an hlfir.expr, then we should prove that
+      // LHS and RHS do not alias.
+      // TODO: if they may alias, we can insert hlfir.as_expr for RHS,
+      // and proceed with the inlining.
+      fir::AliasAnalysis aliasAnalysis;
+      mlir::AliasResult aliasRes = aliasAnalysis.alias(lhs, rhs);
+      // TODO: use areIdenticalOrDisjointSlices() from
+      // OptimizedBufferization.cpp to check if we can still do the expansion.
+      if (!aliasRes.isNo()) {
+        LLVM_DEBUG(llvm::dbgs() << "InlineHLFIRAssign:\n"
+                                << "\tLHS: " << lhs << "\n"
+                                << "\tRHS: " << rhs << "\n"
+                                << "\tALIAS: " << aliasRes << "\n");
+        return rewriter.notifyMatchFailure(assign, "RHS/LHS may alias");
+      }
+    }
+
+    mlir::Location loc = assign->getLoc();
+    fir::FirOpBuilder builder(rewriter, assign.getOperation());
+    builder.setInsertionPoint(assign);
+    rhs = hlfir::derefPointersAndAllocatables(loc, builder, rhs);
+    lhs = hlfir::derefPointersAndAllocatables(loc, builder, lhs);
+    mlir::Value shape = hlfir::genShape(loc, builder, lhs);
+    llvm::SmallVector<mlir::Value> extents =
+        hlfir::getIndexExtents(loc, builder, shape);
+    hlfir::LoopNest loopNest =
+        hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true,
+                           flangomp::shouldUseWorkshareLowering(assign));
+    builder.setInsertionPointToStart(loopNest.body);
+    auto rhsArrayElement =
+        hlfir::getElementAt(loc, builder, rhs, loopNest.oneBasedIndices);
+    rhsArrayElement = hlfir::loadTrivialScalar(loc, builder, rhsArrayElement);
+    auto lhsArrayElement =
+        hlfir::getElementAt(loc, builder, lhs, loopNest.oneBasedIndices);
+    builder.create<hlfir::AssignOp>(loc, rhsArrayElement, lhsArrayElement);
+    rewriter.eraseOp(assign);
+    return mlir::success();
+  }
+};
+
+class InlineHLFIRAssignPass
+    : public hlfir::impl::InlineHLFIRAssignBase<InlineHLFIRAssignPass> {
+public:
+  void runOnOperation() override {
+    mlir::MLIRContext *context = &getContext();
+
+    mlir::GreedyRewriteConfig config;
+    // Prevent the pattern driver from merging blocks.
+    config.enableRegionSimplification =
+        mlir::GreedySimplifyRegionLevel::Disabled;
+
+    mlir::RewritePatternSet patterns(context);
+    patterns.insert<InlineHLFIRAssignConversion>(context);
+
+    if (mlir::failed(mlir::applyPatternsGreedily(
+            getOperation(), std::move(patterns), config))) {
+      mlir::emitError(getOperation()->getLoc(),
+                      "failure in hlfir.assign inlining");
+      signalPassFailure();
+    }
+  }
+};
+} // namespace
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
index bf3cf861e46f4a..a0f83cb5b8f475 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
@@ -698,108 +698,6 @@ llvm::LogicalResult BroadcastAssignBufferization::matchAndRewrite(
   return mlir::success();
 }
 
-/// Expand hlfir.assign of array RHS to array LHS into a loop nest
-/// of element-by-element assignments:
-///   hlfir.assign %4 to %5 : !fir.ref<!fir.array<3x3xf32>>,
-///                           !fir.ref<!fir.array<3x3xf32>>
-/// into:
-///   fir.do_loop %arg1 = %c1 to %c3 step %c1 unordered {
-///     fir.do_loop %arg2 = %c1 to %c3 step %c1 unordered {
-///       %6 = hlfir.designate %4 (%arg2, %arg1)  :
-///           (!fir.ref<!fir.array<3x3xf32>>, index, index) -> !fir.ref<f32>
-///       %7 = fir.load %6 : !fir.ref<f32>
-///       %8 = hlfir.designate %5 (%arg2, %arg1)  :
-///           (!fir.ref<!fir.array<3x3xf32>>, index, index) -> !fir.ref<f32>
-///       hlfir.assign %7 to %8 : f32, !fir.ref<f32>
-///     }
-///   }
-///
-/// The transformation is correct only when LHS and RHS do not alias.
-/// This transformation does not support runtime checking for
-/// non-conforming LHS/RHS arrays' shapes currently.
-class VariableAssignBufferization
-    : public mlir::OpRewritePattern<hlfir::AssignOp> {
-private:
-public:
-  using mlir::OpRewritePattern<hlfir::AssignOp>::OpRewritePattern;
-
-  llvm::LogicalResult
-  matchAndRewrite(hlfir::AssignOp assign,
-                  mlir::PatternRewriter &rewriter) const override;
-};
-
-llvm::LogicalResult VariableAssignBufferization::matchAndRewrite(
-    hlfir::AssignOp assign, mlir::PatternRewriter &rewriter) const {
-  if (assign.isAllocatableAssignment())
-    return rewriter.notifyMatchFailure(assign, "AssignOp may imply allocation");
-
-  hlfir::Entity rhs{assign.getRhs()};
-
-  // To avoid conflicts with ElementalAssignBufferization pattern, we avoid
-  // matching RHS when it is an `ExprType` defined by an `ElementalOp`; which is
-  // among the main criteria matched by ElementalAssignBufferization.
-  if (mlir::isa<hlfir::ExprType>(rhs.getType()) &&
-      mlir::isa<hlfir::ElementalOp>(rhs.getDefiningOp()))
-    return rewriter.notifyMatchFailure(
-        assign, "RHS is an ExprType defined by ElementalOp");
-
-  if (!rhs.isArray())
-    return rewriter.notifyMatchFailure(assign,
-                                       "AssignOp's RHS is not an array");
-
-  mlir::Type rhsEleTy = rhs.getFortranElementType();
-  if (!fir::isa_trivial(rhsEleTy))
-    return rewriter.notifyMatchFailure(
-        assign, "AssignOp's RHS data type is not trivial");
-
-  hlfir::Entity lhs{assign.getLhs()};
-  if (!lhs.isArray())
-    return rewriter.notifyMatchFailure(assign,
-                                       "AssignOp's LHS is not an array");
-
-  mlir::Type lhsEleTy = lhs.getFortranElementType();
-  if (!fir::isa_trivial(lhsEleTy))
-    return rewriter.notifyMatchFailure(
-        assign, "AssignOp's LHS data type is not trivial");
-
-  if (lhsEleTy != rhsEleTy)
-    return rewriter.notifyMatchFailure(assign,
-                                       "RHS/LHS element types mismatch");
-
-  fir::AliasAnalysis aliasAnalysis;
-  mlir::AliasResult aliasRes = aliasAnalysis.alias(lhs, rhs);
-  // TODO: use areIdenticalOrDisjointSlices() to check if
-  // we can still do the expansion.
-  if (!aliasRes.isNo()) {
-    LLVM_DEBUG(llvm::dbgs() << "VariableAssignBufferization:\n"
-                            << "\tLHS: " << lhs << "\n"
-                            << "\tRHS: " << rhs << "\n"
-                            << "\tALIAS: " << aliasRes << "\n");
-    return rewriter.notifyMatchFailure(assign, "RHS/LHS may alias");
-  }
-
-  mlir::Location loc = assign->getLoc();
-  fir::FirOpBuilder builder(rewriter, assign.getOperation());
-  builder.setInsertionPoint(assign);
-  rhs = hlfir::derefPointersAndAllocatables(loc, builder, rhs);
-  lhs = hlfir::derefPointersAndAllocatables(loc, builder, lhs);
-  mlir::Value shape = hlfir::genShape(loc, builder, lhs);
-  llvm::SmallVector<mlir::Value> extents =
-      hlfir::getIndexExtents(loc, builder, shape);
-  hlfir::LoopNest loopNest =
-      hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true,
-                         flangomp::shouldUseWorkshareLowering(assign));
-  builder.setInsertionPointToStart(loopNest.body);
-  auto rhsArrayElement =
-      hlfir::getElementAt(loc, builder, rhs, loopNest.oneBasedIndices);
-  rhsArrayElement = hlfir::loadTrivialScalar(loc, builder, rhsArrayElement);
-  auto lhsArrayElement =
-      hlfir::getElementAt(loc, builder, lhs, loopNest.oneBasedIndices);
-  builder.create<hlfir::AssignOp>(loc, rhsArrayElement, lhsArrayElement);
-  rewriter.eraseOp(assign);
-  return mlir::success();
-}
-
 using GenBodyFn =
     std::function<mlir::Value(fir::FirOpBuilder &, mlir::Location, mlir::Value,
                               const llvm::SmallVectorImpl<mlir::Value> &)>;
@@ -1206,9 +1104,9 @@ class ReductionMaskConversion : public mlir::OpRewritePattern<Op> {
         loc, resultArr, builder.createBool(loc, false));
 
     // Check all the users - the destroy is no longer required, and any assign
-    // can use resultArr directly so that VariableAssignBufferization in this
-    // pass can optimize the results. Other operations are replaces with an
-    // AsExpr for the temporary resultArr.
+    // can use resultArr directly so that InlineHLFIRAssign pass
+    // can optimize the results. Other operations are replaced with an AsExpr
+    // for the temporary resultArr.
     llvm::SmallVector<hlfir::DestroyOp> destroys;
     llvm::SmallVector<hlfir::AssignOp> assigns;
     for (auto user : mloc->getUsers()) {
@@ -1356,7 +1254,6 @@ class OptimizedBufferizationPass
     // This requires small code reordering in ElementalAssignBufferization.
     patterns.insert<ElementalAssignBufferization>(context);
     patterns.insert<BroadcastAssignBufferization>(context);
-    patterns.insert<VariableAssignBufferization>(context);
     patterns.insert<EvaluateIntoMemoryAssignBufferization>(context);
     patterns.insert<ReductionConversion<hlfir::CountOp>>(context);
     patterns.insert<ReductionConversion<hlfir::AnyOp>>(context);
diff --git a/flang/lib/Optimizer/Passes/Pipelines.cpp b/flang/lib/Optimizer/Passes/Pipelines.cpp
index 72803aa3793cec..20e4599587c4b2 100644
--- a/flang/lib/Optimizer/Passes/Pipelines.cpp
+++ b/flang/lib/Optimizer/Passes/Pipelines.cpp
@@ -234,6 +234,8 @@ void createHLFIRToFIRPassPipeline(mlir::PassManager &pm, bool enableOpenMP,
     pm.addPass(mlir::createCSEPass());
     addNestedPassToAllTopLevelOperations<PassConstructor>(
         pm, hlfir::createOptimizedBufferization);
+    addNestedPassToAllTopLevelOperations<PassConstructor>(
+        pm, hlfir::createInlineHLFIRAssign);
   }
   pm.addPass(hlfir::createLowerHLFIROrderedAssignments());
   pm.addPass(hlfir::createLowerHLFIRIntrinsics());
diff --git a/flang/test/Driver/mlir-pass-pipeline.f90 b/flang/test/Driver/mlir-pass-pipeline.f90
index b30affe691b840..9655afce96d927 100644
--- a/flang/test/Driver/mlir-pass-pipeline.f90
+++ b/flang/test/Driver/mlir-pass-pipeline.f90
@@ -36,12 +36,16 @@
 ! O2-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private']
 ! O2-NEXT: 'fir.global' Pipeline
 ! O2-NEXT:   OptimizedBufferization
+! O2-NEXT:   InlineHLFIRAssign
 ! O2-NEXT: 'func.func' Pipeline
 ! O2-NEXT:   OptimizedBufferization
+! O2-NEXT:   InlineHLFIRAssign
 ! O2-NEXT: 'omp.declare_reduction' Pipeline
 ! O2-NEXT:   OptimizedBufferization
+! O2-NEXT:   InlineHLFIRAssign
 ! O2-NEXT: 'omp.private' Pipeline
 ! O2-NEXT:   OptimizedBufferization
+! O2-NEXT:   InlineHLFIRAssign
 ! ALL: LowerHLFIROrderedAssignments
 ! ALL-NEXT: LowerHLFIRIntrinsics
 ! ALL-NEXT: BufferizeHLFIR
diff --git a/flang/test/Fir/basic-program.fir b/flang/test/Fir/basic-program.fir
index d2788008c3893e..620882ebbed2a9 100644
--- a/flang/test/Fir/basic-program.fir
+++ b/flang/test/Fir/basic-program.fir
@@ -37,12 +37,16 @@ func.func @_QQmain() {
 // PASSES-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private']
 // PASSES-NEXT: 'fir.global' Pipeline
 // PASSES-NEXT:    OptimizedBufferization
+// PASSES-NEXT:    InlineHLFIRAssign
 // PASSES-NEXT: 'func.func' Pipeline
 // PASSES-NEXT:    OptimizedBufferization
+// PASSES-NEXT:    InlineHLFIRAssign
 // PASSES-NEXT: 'omp.declare_reduction' Pipeline
 // PASSES-NEXT:    OptimizedBufferization
+// PASSES-NEXT:    InlineHLFIRAssign
 // PASSES-NEXT: 'omp.private' Pipeline
 // PASSES-NEXT:    OptimizedBufferization
+// PASSES-NEXT:    InlineHLFIRAssign
 // PASSES-NEXT:   LowerHLFIROrderedAssignments
 // PASSES-NEXT:   LowerHLFIRIntrinsics
 // PASSES-NEXT:   BufferizeHLFIR
diff --git a/flang/test/HLFIR/opt-variable-assign.fir b/flang/test/HLFIR/inline-hlfir-assign.fir
similarity index 84%
rename from flang/test/HLFIR/opt-variable-assign.fir
rename to flang/test/HLFIR/inline-hlfir-assign.fir
index 17124fa86af65a..f834e7971e3d50 100644
--- a/flang/test/HLFIR/opt-variable-assign.fir
+++ b/flang/test/HLFIR/inline-hlfir-assign.fir
@@ -1,6 +1,5 @@
-// Test optimized bufferization for hlfir.assign of arrays
-// variables:
-// RUN: fir-opt --opt-bufferization %s | FileCheck %s
+// Test inlining of hlfir.assign of arrays:
+// RUN: fir-opt --inline-hlfir-assign %s | FileCheck %s
 
 // The two assigns come from the following source forms:
 //   y(:,:) = x(:,:)
@@ -302,3 +301,55 @@ func.func @_QPtest7(%arg0: !fir.ref<!fir.box<!fir.heap<!fir.array<?x?xf32>>>> {f
 // CHECK-NOT:       hlfir.assign
 // CHECK:           hlfir.assign %{{.*}} to %{{.*}} : f32, !fir.ref<f32>
 // CHECK-NOT:       hlfir.assign
+
+
+// Test that VAR = EXPR assignment is inlined:
+// subroutine test_expr_rhs(p1, p2)
+//   logical, pointer :: p1(:), p2(:)
+//   p1 = (p2)
+// end subroutine test_expr_rhs
+func.func @_QPtest_expr_rhs(%arg0: !fir.ref<!fir.box<!fir.ptr<!fir.array<?x!fir.logical<4>>>>> {fir.bindc_name = "p1"}, %arg1: !fir.ref<!fir.box<!fir.ptr<!fir.array<?x!fir.logical<4>>>>> {fir.bindc_name = "p2"}) {
+  %c1 = arith.constant 1 : index
+  %c0 = arith.constant 0 : index
+  %0 = fir.dummy_scope : !fir.dscope
+  %1:2 = hlfir.declare %arg0 dummy_scope %0 {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFtest_expr_rhsEp1"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?x!fir.logical<4>>>>>, !fir.dscope) -> (!fir.ref<!fir.box<!fir.ptr<!fir.array<?x!fir.logical<4>>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?x!fir.logical<4>>>>>)
+  %2:2 = hlfir.declare %arg1 dummy_scope %0 {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFtest_expr_rhsEp2"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?x!fir.logical<4>>>>>, !fir.dscope) -> (!fir.ref<!fir.box<!fir.ptr<!fir.array<?x!fir.logical<4>>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?x!fir.logical<4>>>>>)
+  %3 = fir.load %2#0 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?x!fir.logical<4>>>>>
+  %4:3 = fir.box_dims %3, %c0 : (!fir.box<!fir.ptr<!fir.array<?x!fir.logical<4>>>>, index) -> (index, index, index)
+  %5 = fir.shape %4#1 : (index) -> !fir.shape<1>
+  %6 = hlfir.elemental %5 unordered : (!fir.shape<1>) -> !hlfir.expr<?x!fir.logical<4>> {
+  ^bb0(%arg2: index):
+    %8 = arith.subi %4#0, %c1 : index
+    %9 = arith.addi %arg2, %8 : index
+    %10 = hlfir.designate %3 (%9)  : (!fir.box<!fir.ptr<!fir.array<?x!fir.logical<4>>>>, index) -> !fir.ref<!fir.logical<4>>
+    %11 = fir.load %10 : !fir.ref<!fir.logical<4>>
+    %12 = hlfir.no_reassoc %11 : !fir.logical<4>
+    hlfir.yield_element %12 : !fir.logical<4>
+  }
+  %7 = fir.load %1#0 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?x!fir.logical<4>>>>>
+  hlfir.assign %6 to %7 : !hlfir.expr<?x!fir.logical<4>>, !fir.box<!fir.ptr<!fir.array<?x!fir.logical<4>>>>
+  hlfir.destroy %6 : !hlfir.expr<?x!fir.logical<4>>
+  return
+}
+// CHECK-LABEL:   func.func @_QPtest_expr_rhs(
+// CHECK-SAME:                                %[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.ptr<!fir.array<?x!fir.logical<4>>>>> {fir.bindc_name = "p1"},
+// CHECK-SAME:                                %[[VAL_1:.*]]: !fir.ref<!fir.box<!fir.ptr<!fir.array<?x!fir.logical<4>>>>> {fir.bindc_name = "p2"}) {
+// CHECK:           %[[VAL_2:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_3:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_4:.*]] = fir.dummy_scope : !fir.dscope
+// CHECK:           %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_0]] dummy_scope %[[VAL_4]] {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFtest_expr_rhsEp1"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?x!fir.logical<4>>>>>, !fir.dscope) -> (!fir.ref<!fir.box<!fir.ptr<!fir.array<?x!fir.logical<4>>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?x!fir.logical<4>>>>>)
+// CHECK:           %[[VAL_10:.*]] = hlfir.elemental %{{.*}} unordered : (!fir.shape<1>) -> !hlfir.expr<?x!fir.logical<4>> {
+// CHECK:           }
+// CHECK:           %[[VAL_17:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?x!fir.logical<4>>>>>
+// CHECK:           %[[VAL_18:.*]]:3 = fir.box_dims %[[VAL_17]], %[[VAL_3]] : (!fir.box<!fir.ptr<!fir.array<?x!fir.logical<4>>>>, index) -> (index, index, index)
+// CHECK:           fir.do_loop %[[VAL_19:.*]] = %[[VAL_2]] to %[[VAL_18]]#1 step %[[VAL_2]] unordered {
+// CHECK:             %[[VAL_20:.*]] = hlfir.apply %[[VAL_10]], %[[VAL_19]] : (!hlfir.expr<?x!fir.logical<4>>, index) -> !fir.logical<4>
+// CHECK:             %[[VAL_21:.*]]:3 = fir.box_dims %[[VAL_17]], %[[VAL_3]] : (!fir.box<!fir.ptr<!fir.array<?x!fir.logical<4>>>>, index) -> (index, index, index)
+// CHECK:             %[[VAL_22:.*]] = arith.subi %[[VAL_21]]#0, %[[VAL_2]] : index
+// CHECK:             %[[VAL_23:.*]] = arith.addi %[[VAL_19]], %[[VAL_22]] : index
+// CHECK:             %[[VAL_24:.*]] = hlfir.designate %[[VAL_17]] (%[[VAL_23]])  : (!fir.box<!fir.ptr<!fir.array<?x!fir.logical<4>>>>, index) -> !fir.ref<!fir.logical<4>>
+// CHECK:             hlfir.assign %[[VAL_20]] to %[[VAL_24]] : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+// CHECK:           }
+// CHECK:           hlfir.destroy %[[VAL_10]] : !hlfir.expr<?x!fir.logical<4>>
+// CHECK:           return
+// CHECK:         }
diff --git a/flang/test/HLFIR/maxloc-elemental.fir b/flang/test/HLFIR/maxloc-elemental.fir
index 497a58c9bd1d4e..c9210a59f03404 100644
--- a/flang/test/HLFIR/maxloc-elemental.fir
+++ b/flang/test/HLFIR/maxloc-elemental.fir
@@ -68,13 +68,7 @@ func.func @_QPtest(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "array"}
 // CHECK-NEXT:      }
 // CHECK-NEXT:      fir.result %[[V18]] : i32
 // CHECK-NEXT:    }
-// CHECK-NEXT:    %[[BD:.*]]:3 = fir.box_dims %[[V2]]#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
-// CHECK-NEXT:    fir.do_loop %arg3 = %c1 to %[[BD]]#1 step %c1 unordered {
-// CHECK-NEXT:      %[[V13:.*]] = hlfir.designate %[[RES]] (%arg3)  : (!fir.ref<!fir.array<1xi32>>, index) -> !fir.ref<i32>
-// CHECK-NEXT:      %[[V14:.*]] = fir.load %[[V13]] : !fir.ref<i32>
-// CHECK-NEXT:      %[[V15:.*]] = hlfir.designate %[[V2]]#0 (%arg3)  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
-// CHECK-NEXT:      hlfir.assign %[[V14]] to %[[V15]] : i32, !fir.ref<i32>
-// CHECK-NEXT:    }
+// CHECK-NEXT:    hlfir.assign %[[RES]] to %[[V2]]#0 : !fir.ref<!fir.array<1xi32>>, !fir.box<!fir.array<?xi32>>
 // CHECK-NEXT:    return
 // CHECK-NEXT:  }
 
diff --git a/flang/test/HLFIR/minloc-elemental.fir b/flang/test/HLFIR/minloc-elemental.fir
index 5fa482a7b904ee..9453a335b4fbf1 100644
--- a/flang/test/HLFIR/minloc-elemental.fir
+++ b/flang/test/HLFIR/minloc-elemental.fir
@@ -68,13 +68,7 @@ func.func @_QPtest(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "array"}
 // CHECK-NEXT:      }
 // CHECK-NEXT:      fir.result %[[V18]] : i32
 // CHECK-NEXT:    }
-// CHECK-NEXT:    %[[BD:.*]]:3 = fir.box_dims %[[V2]]#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
-// CHECK-NEXT:    fir.do_loop %arg3 = %c1 to %[[BD]]#1 step %c1 unordered {
-// CHECK-NEXT:      %[[V13:.*]] = hlfir.designate %[[RES]] (%arg3)  : (!fir.ref<!fir.array<1xi32>>, index) -> !fir.ref<i32>
-// CHECK-NEXT:      %[[V14:.*]] = fir.load %[[V13]] : !fir.ref<i32>
-// CHECK-NEXT:      %[[V15:.*]] = hlfir.designate %[[V2]]#0 (%arg3)  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
-// CHECK-NEXT:      hlfir.assign %[[V14]] to %[[V15]] : i32, !fir.ref<i32>
-// CHECK-NEXT:    }
+// CHECK-NEXT:    hlfir.assign %[[RES]] to %[[V2]]#0 : !fir.ref<!fir.array<1xi32>>, !fir.box<!fir.array<?xi32>>
 // CHECK-NEXT:    return
 // CHECK-NEXT:  }
 
@@ -147,13 +141,7 @@ func.func @_QPtest_kind2(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "a
 // CHECK-NEXT:      }
 // CHECK-NEXT:      fir.result %[[V18]] : i32
 // CHECK-NEXT:    }
-// CHECK-NEXT:    %[[BD:.*]]:3 = fir.box_dims %[[V2]]#0, %c0 : (!fir.box<!fir.array<?xi16>>, index) -> (index, index, index)
-// CHECK-NEXT:    fir.do_loop %arg3 = %c1 to %[[BD]]#1 step %c1 unordered {
-// CHECK-NEXT:      %[[V13:.*]] = hlfir.designate %[[RES]] (%arg3)  : (!fir.ref<!fir.array<1xi16>>, index) -> !fir.ref<i16>
-// CHECK-NEXT:      %[[V14:.*]] = fir.load %[[V13]] : !fir.ref<i16>
-// CHECK-NEXT:      %[[V15:.*]] = hlfir.designate %[[V2]]#0 (%arg3)  : (!fir.box<!fir.array<?xi16>>, index) -> !fir.ref<i16>
-// CHECK-NEXT:      hlfir.assign %[[V14]] to %[[V15]] : i16, !fir.ref<i16>
-// CHECK-NEXT:    }
+// CHECK-NEXT:    hlfir.assign %[[RES]] to %[[V2]]#0 : !fir.ref<!fir.array<1xi16>>, !fir.box<!fir.array<?xi16>>
 // CHECK-NEXT:    return
 
 
diff --git a/flang/test/HLFIR/opt-bufferization-eval_in_mem.fir b/flang/test/HLFIR/opt-bufferization-eval_in_mem.fir
index 984c0bcbaddcc3..ce669073dbb1b5 100644
--- a/flang/test/HLFIR/opt-bufferization-eval_in_mem.fir
+++ b/flang/test/HLFIR/opt-bufferization-eval_in_mem.fir
@@ -48,7 +48,6 @@ func.func @_QPnegative_test_is_target(%arg0: !fir.ref<!fir.array<10xf32>> {fir.b
 }
 // CHECK-LABEL: func.func @_QPnegative_test_is_target(
 // CHECK-SAME:                                        %[[VAL_0:.*]]: !fir.ref<!fir.array<10xf32>> {fir.bindc_name = "x", fir.target}) {
-// CHECK:         %[[VAL_1:.*]] = arith.constant 1 : index
 // CHECK:         %[[VAL_2:.*]] = arith.constant false
 // CHECK:         %[[VAL_3:.*]] = arith.constant 10 : index
 // CHECK:         %[[VAL_4:.*]] = fir.alloca !fir.array<10xf32>
@@ -57,11 +56,7 @@ func.func @_QPnegative_test_is_target(%arg0: !fir.ref<!fir.array<10xf32>> {fir.b
 // CHECK:         %[[VAL_9:.*]] = fir.call @_QPfoo() fastmath<contract> : () -> !fir.array<10xf32>
 // CHECK:         fir.save_result %[[VAL_9]] to %[[VAL_8]]#1{{.*}}
 // CHECK:         %[[VAL_10:.*]] = hlfir.as_expr %[[VAL_8]]#0 move %[[VAL_2]] : (!fir.ref<!fir.array<10xf32>>, i1) -> !hlfir.expr<10xf32>
-// CHECK:         fir.do_loop %[[VAL_11:.*]] = %[[VAL_1]] to %[[VAL_3]] step %[[VAL_1]] unordered {
-// CHECK:           %[[VAL_12:.*]] = hlfir.apply %[[VAL_10]], %[[VAL_11]] : (!hlfir.expr<10xf32>, index) -> f32
-// CHECK:           %[[VAL_13:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_11]])  : (!fir.ref<!fir.array<10xf32>>, index) -> !fir.ref<f32>
-// CHECK:           hlfir.assign %[[VAL_12]] to %[[VAL_13]] : f32, !fir.ref<f32>
-// CHECK:         }
+// CHECK:         hlfir.assign %[[VAL_10]] to %[[VAL_7]]#0 : !hlfir.expr<10xf32>, !fir.ref<!fir.array<10xf32>>
 // CHECK:         hlfir.destroy %[[VAL_10]] : !hlfir.expr<10xf32>
 // CHECK:         return
 // CHECK:       }
diff --git a/flang/test/HLFIR/opt-bufferization.fir b/flang/test/HLFIR/opt-bufferization.fir
index 87afb3cc924533..faa8f4bcdb7789 100644
--- a/flang/test/HLFIR/opt-bufferization.fir
+++ b/flang/test/HLFIR/opt-bufferization.fir
@@ -796,45 +796,3 @@ func.func @_QPddx(%arg0: !fir.box<!fir.array<?x?xf64>> {fir.bindc_name = "array"
 // CHECK:           %[[VAL_61:.*]] = fir.load %[[VAL_26]]#1 : !fir.ref<!fir.array<?x?xf64>>
 // CHECK:           return %[[VAL_61]] : !fir.array<?x?xf64>
 // CHECK:         }
-
-// `hlfir.expr` bufferization (when the expresion is not the result of
-// `hlfir.elemental`)
-func.func @_QPfoo() {
-  %c1 = arith.constant 1 : index
-  %0 = fir.alloca !fir.array<1xi32> {bindc_name = "iavs", uniq_name = "_QFfooEiavs"}
-  %1 = fir.shape %c1 : (index) -> !fir.shape<1>
-  %2:2 = hlfir.declare %0(%1) {uniq_name = "_QFfooEiavs"} : (!fir.ref<!fir.array<1xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<1xi32>>, !fir.ref<!fir.array<1xi32>>)
-  %3 = fir.alloca i32 {bindc_name = "iv", uniq_name = "_QFfooEiv"}
-  %4:2 = hlfir.declare %3 {uniq_name = "_QFfooEiv"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  %c10_i32 = arith.constant 10 : i32
-  %6 = fir.convert %c10_i32 : (i32) -> index
-  %7 = fir.convert %c1 : (index) -> i32
-  %8:2 = fir.do_loop %arg0 = %c1 to %6 step %c1 iter_args(%arg1 = %7) -> (index, i32) {
-    fir.store %arg1 to %4#1 : !fir.ref<i32>
-    %9 = fir.allocmem !fir.array<1xi32> {bindc_name = ".tmp.arrayctor", uniq_name = ""}
-    %10 = fir.shape %c1 : (index) -> !fir.shape<1>
-    %11:2 = hlfir.declare %9(%10) {uniq_name = ".tmp.arrayctor"} : (!fir.heap<!fir.array<1xi32>>, !fir.shape<1>) -> (!fir.heap<!fir.array<1xi32>>, !fir.heap<!fir.array<1xi32>>)
-    %12 = fir.load %4#0 : !fir.ref<i32>
-    %13 = hlfir.designate %11#0 (%c1)  : (!fir.heap<!fir.array<1xi32>>, index) -> !fir.ref<i32>
-    hlfir.assign %12 to %13 : i32, !fir.ref<i32>
-    %true = arith.constant true
-    %14 = hlfir.as_expr %11#0 move %true : (!fir.heap<!fir.array<1xi32>>, i1) -> !hlfir.expr<1xi32>
-    hlfir.assign %14 to %2#0 : !hlfir.expr<1xi32>, !fir.ref<!fir.array<1xi32>>
-    hlfir.destroy %14 : !hlfir.expr<1xi32>
-    %15 = arith.addi %arg0, %c1 : index
-    %16 = fir.convert %c1 : (index) -> i32
-    %17 = fir.load %4#1 : !fir.ref<i32>
-    %18 = arith.addi %17, %16 : i32
-    fir.result %15, %18 : index, i32
-  }
-  fir.store %8#1 to %4#1 : !fir.ref<i32>
-  return
-}
-
-// CHECK-LABEL:   func.func @_QPfoo
-// CHECK:           %[[C1:.*]] = arith.constant 1 : index
-// CHECK:           fir.do_loop {{.*}} {
-// CHECK-NOT:         hlfir.assign %{{.*}} to %{{.*}}#0 : !hlfir.expr<1xi32>, !fir.ref<!fir.array<1xi32>>
-// CHECK:             fir.do_loop %{{.*}} = %[[C1]] to %[[C1]] step %[[C1]] unordered {
-// CHECK:             }
-// CHECK:           }



More information about the flang-commits mailing list