[flang-commits] [flang] [Flang] And and All elemental lowering (PR #75776)

David Green via flang-commits flang-commits at lists.llvm.org
Mon Dec 18 01:24:09 PST 2023


https://github.com/davemgreen updated https://github.com/llvm/llvm-project/pull/75776

>From f61d44f926613b1b1846867200de1372000f777a Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Mon, 18 Dec 2023 09:22:50 +0000
Subject: [PATCH 1/2] [Flang] Generate inline reduction loops for elemental
 count intrinsics

This adds a ReductionElementalConversion transform to OptimizedBufferizationPass,
taking hlfir::count(hlfir::elemental) and generating the inline loop to perform
the count of true elements. This lets us generate a single loop instead of
ending up as two plus a temporary.

This is currently part of OptimizedBufferization, similar to #74828. I
attempted to move it to LowerHLFIRIntrinsics to make it part of the existing
lowering, but it hit problems with inlining elementals that contain operations
that are being legalized by the same pass.

Any and All should be able to share the same code with a different
function/initial value.
---
 .../Transforms/OptimizedBufferization.cpp     | 119 +++++++
 flang/test/HLFIR/count-elemental.fir          | 314 ++++++++++++++++++
 2 files changed, 433 insertions(+)
 create mode 100644 flang/test/HLFIR/count-elemental.fir

diff --git a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
index 7abfa20493c736..7c839f1d20af52 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
@@ -659,6 +659,124 @@ mlir::LogicalResult VariableAssignBufferization::matchAndRewrite(
   return mlir::success();
 }
 
+using GenBodyFn =
+    std::function<mlir::Value(fir::FirOpBuilder &, mlir::Location, mlir::Value,
+                              const llvm::SmallVectorImpl<mlir::Value> &)>;
+static mlir::Value generateReductionLoop(fir::FirOpBuilder &builder,
+                                         mlir::Location loc, mlir::Value init,
+                                         mlir::Value shape, GenBodyFn genBody) {
+  auto extents = hlfir::getIndexExtents(loc, builder, shape);
+  mlir::Value reduction = init;
+  mlir::IndexType idxTy = builder.getIndexType();
+  mlir::Value oneIdx = builder.createIntegerConstant(loc, idxTy, 1);
+
+  // Create a reduction loop nest. We use one-based indices so that they can be
+  // passed to the elemental.
+  llvm::SmallVector<mlir::Value> indices;
+  for (unsigned i = 0; i < extents.size(); ++i) {
+    auto loop =
+        builder.create<fir::DoLoopOp>(loc, oneIdx, extents[i], oneIdx, false,
+                                      /*finalCountValue=*/false, reduction);
+    reduction = loop.getRegionIterArgs()[0];
+    indices.push_back(loop.getInductionVar());
+    // Set insertion point to the loop body so that the next loop
+    // is inserted inside the current one.
+    builder.setInsertionPointToStart(loop.getBody());
+  }
+
+  // Generate the body
+  reduction = genBody(builder, loc, reduction, indices);
+
+  // Unwind the loop nest.
+  for (unsigned i = 0; i < extents.size(); ++i) {
+    auto result = builder.create<fir::ResultOp>(loc, reduction);
+    auto loop = mlir::cast<fir::DoLoopOp>(result->getParentOp());
+    reduction = loop.getResult(0);
+    // Set insertion point after the loop operation that we have
+    // just processed.
+    builder.setInsertionPointAfter(loop.getOperation());
+  }
+
+  return reduction;
+}
+
+/// Given a reduction operation with an elemental mask, attempt to generate a
+/// do-loop to perform the operation inline.
+///   %e = hlfir.elemental %shape unordered
+///   %r = hlfir.count %e
+/// =>
+///   %r = for.do_loop %arg = 1 to bound(%shape) step 1 iter_args(%arg2 = init)
+///     %i = <inline elemental>
+///     %c = <reduce count> %i
+///     fir.result %c
+template <typename Op>
+class ReductionElementalConversion : public mlir::OpRewritePattern<Op> {
+public:
+  using mlir::OpRewritePattern<Op>::OpRewritePattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(Op op, mlir::PatternRewriter &rewriter) const override {
+    mlir::Location loc = op.getLoc();
+    hlfir::ElementalOp elemental =
+        op.getMask().template getDefiningOp<hlfir::ElementalOp>();
+    if (!elemental || op.getDim())
+      return rewriter.notifyMatchFailure(op, "Did not find valid elemental");
+
+    fir::KindMapping kindMap =
+        fir::getKindMapping(op->template getParentOfType<mlir::ModuleOp>());
+    fir::FirOpBuilder builder{op, kindMap};
+
+    mlir::Value init;
+    GenBodyFn genBodyFn;
+    if constexpr (std::is_same_v<Op, hlfir::CountOp>) {
+      init = builder.createIntegerConstant(loc, op.getType(), 0);
+      genBodyFn = [elemental](fir::FirOpBuilder builder, mlir::Location loc,
+                              mlir::Value reduction,
+                              const llvm::SmallVectorImpl<mlir::Value> &indices)
+          -> mlir::Value {
+        // Inline the elemental and get the condition from it.
+        auto yield = inlineElementalOp(loc, builder, elemental, indices);
+        mlir::Value cond = builder.create<fir::ConvertOp>(
+            loc, builder.getI1Type(), yield.getElementValue());
+        yield->erase();
+
+        // Conditionally add one to the current value
+        mlir::Value one =
+            builder.createIntegerConstant(loc, reduction.getType(), 1);
+        mlir::Value add1 =
+            builder.create<mlir::arith::AddIOp>(loc, reduction, one);
+        return builder.create<mlir::arith::SelectOp>(loc, cond, add1,
+                                                     reduction);
+      };
+    } else {
+      static_assert("Expected Op to be handled");
+      return mlir::failure();
+    }
+
+    mlir::Value res = generateReductionLoop(builder, loc, init,
+                                            elemental.getOperand(0), genBodyFn);
+    if (res.getType() != op.getType())
+      res = builder.create<fir::ConvertOp>(loc, op.getType(), res);
+
+    // Check if the op was the only user of the elemental (apart from a
+    // destroy), and remove it if so.
+    mlir::Operation::user_range elemUsers = elemental->getUsers();
+    hlfir::DestroyOp elemDestroy;
+    if (std::distance(elemUsers.begin(), elemUsers.end()) == 2) {
+      elemDestroy = mlir::dyn_cast<hlfir::DestroyOp>(*elemUsers.begin());
+      if (!elemDestroy)
+        elemDestroy = mlir::dyn_cast<hlfir::DestroyOp>(*++elemUsers.begin());
+    }
+
+    rewriter.replaceOp(op, res);
+    if (elemDestroy) {
+      rewriter.eraseOp(elemDestroy);
+      rewriter.eraseOp(elemental);
+    }
+    return mlir::success();
+  }
+};
+
 class OptimizedBufferizationPass
     : public hlfir::impl::OptimizedBufferizationBase<
           OptimizedBufferizationPass> {
@@ -681,6 +799,7 @@ class OptimizedBufferizationPass
     patterns.insert<ElementalAssignBufferization>(context);
     patterns.insert<BroadcastAssignBufferization>(context);
     patterns.insert<VariableAssignBufferization>(context);
+    patterns.insert<ReductionElementalConversion<hlfir::CountOp>>(context);
 
     if (mlir::failed(mlir::applyPatternsAndFoldGreedily(
             func, std::move(patterns), config))) {
diff --git a/flang/test/HLFIR/count-elemental.fir b/flang/test/HLFIR/count-elemental.fir
new file mode 100644
index 00000000000000..1641e0fae6fb55
--- /dev/null
+++ b/flang/test/HLFIR/count-elemental.fir
@@ -0,0 +1,314 @@
+// RUN: fir-opt %s -opt-bufferization | FileCheck %s
+
+func.func @_QFPtest(%arg0: !fir.ref<!fir.array<4x7xi32>> {fir.bindc_name = "b"}, %arg1: !fir.ref<i32> {fir.bindc_name = "row"}, %arg2: !fir.ref<i32> {fir.bindc_name = "val"}) -> i32 {
+  %c1 = arith.constant 1 : index
+  %c4 = arith.constant 4 : index
+  %c7 = arith.constant 7 : index
+  %0 = fir.shape %c4, %c7 : (index, index) -> !fir.shape<2>
+  %1:2 = hlfir.declare %arg0(%0) {uniq_name = "_QFFtestEb"} : (!fir.ref<!fir.array<4x7xi32>>, !fir.shape<2>) -> (!fir.ref<!fir.array<4x7xi32>>, !fir.ref<!fir.array<4x7xi32>>)
+  %2:2 = hlfir.declare %arg1 {uniq_name = "_QFFtestErow"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %3 = fir.alloca i32 {bindc_name = "test", uniq_name = "_QFFtestEtest"}
+  %4:2 = hlfir.declare %3 {uniq_name = "_QFFtestEtest"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %5:2 = hlfir.declare %arg2 {uniq_name = "_QFFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %6 = fir.load %2#0 : !fir.ref<i32>
+  %7 = fir.convert %6 : (i32) -> i64
+  %8 = fir.shape %c7 : (index) -> !fir.shape<1>
+  %9 = hlfir.designate %1#0 (%7, %c1:%c7:%c1)  shape %8 : (!fir.ref<!fir.array<4x7xi32>>, i64, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<7xi32>>
+  %10 = fir.load %5#0 : !fir.ref<i32>
+  %11 = hlfir.elemental %8 unordered : (!fir.shape<1>) -> !hlfir.expr<7x!fir.logical<4>> {
+  ^bb0(%arg3: index):
+    %14 = hlfir.designate %9 (%arg3)  : (!fir.box<!fir.array<7xi32>>, index) -> !fir.ref<i32>
+    %15 = fir.load %14 : !fir.ref<i32>
+    %16 = arith.cmpi sge, %15, %10 : i32
+    %17 = fir.convert %16 : (i1) -> !fir.logical<4>
+    hlfir.yield_element %17 : !fir.logical<4>
+  }
+  %12 = hlfir.count %11 : (!hlfir.expr<7x!fir.logical<4>>) -> i32
+  hlfir.assign %12 to %4#0 : i32, !fir.ref<i32>
+  hlfir.destroy %11 : !hlfir.expr<7x!fir.logical<4>>
+  %13 = fir.load %4#1 : !fir.ref<i32>
+  return %13 : i32
+}
+// CHECK-LABEL:  func.func @_QFPtest(%arg0: !fir.ref<!fir.array<4x7xi32>> {fir.bindc_name = "b"}, %arg1: !fir.ref<i32> {fir.bindc_name = "row"}, %arg2: !fir.ref<i32> {fir.bindc_name = "val"}) -> i32 {
+// CHECK-NEXT:     %c1_i32 = arith.constant 1 : i32
+// CHECK-NEXT:     %c0_i32 = arith.constant 0 : i32
+// CHECK-NEXT:     %c1 = arith.constant 1 : index
+// CHECK-NEXT:     %c4 = arith.constant 4 : index
+// CHECK-NEXT:     %c7 = arith.constant 7 : index
+// CHECK-NEXT:     %[[V0:.*]] = fir.shape %c4, %c7 : (index, index) -> !fir.shape<2>
+// CHECK-NEXT:     %[[V1:.*]]:2 = hlfir.declare %arg0(%[[V0]])
+// CHECK-NEXT:     %[[V2:.*]]:2 = hlfir.declare %arg1
+// CHECK-NEXT:     %[[V3:.*]] = fir.alloca i32
+// CHECK-NEXT:     %[[V4:.*]]:2 = hlfir.declare %[[V3]]
+// CHECK-NEXT:     %[[V5:.*]]:2 = hlfir.declare %arg2
+// CHECK-NEXT:     %[[V6:.*]] = fir.load %[[V2]]#0 : !fir.ref<i32>
+// CHECK-NEXT:     %[[V7:.*]] = fir.convert %[[V6]] : (i32) -> i64
+// CHECK-NEXT:     %[[V8:.*]] = fir.shape %c7 : (index) -> !fir.shape<1>
+// CHECK-NEXT:     %[[V9:.*]] = hlfir.designate %[[V1]]#0 (%[[V7]], %c1:%c7:%c1)  shape %[[V8]] : (!fir.ref<!fir.array<4x7xi32>>, i64, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<7xi32>>
+// CHECK-NEXT:     %[[V10:.*]] = fir.load %[[V5]]#0 : !fir.ref<i32>
+// CHECK-NEXT:     %[[V11:.*]] = fir.do_loop %arg3 = %c1 to %c7 step %c1 iter_args(%arg4 = %c0_i32) -> (i32) {
+// CHECK-NEXT:       %[[V13:.*]] = hlfir.designate %[[V9]] (%arg3)  : (!fir.box<!fir.array<7xi32>>, index) -> !fir.ref<i32>
+// CHECK-NEXT:       %[[V14:.*]] = fir.load %[[V13]] : !fir.ref<i32>
+// CHECK-NEXT:       %[[V15:.*]] = arith.cmpi sge, %[[V14]], %[[V10]] : i32
+// CHECK-NEXT:       %[[V16:.*]] = arith.addi %arg4, %c1_i32 : i32
+// CHECK-NEXT:       %[[V17:.*]] = arith.select %[[V15]], %[[V16]], %arg4 : i32
+// CHECK-NEXT:       fir.result %[[V17]] : i32
+// CHECK-NEXT:     }
+// CHECK-NEXT:     hlfir.assign %[[V11]] to %[[V4]]#0 : i32, !fir.ref<i32>
+// CHECK-NEXT:     %[[V12:.*]] = fir.load %[[V4]]#1 : !fir.ref<i32>
+// CHECK-NEXT:     return %[[V12]] : i32
+
+func.func @_QFPtest_kind2(%arg0: !fir.ref<!fir.array<4x7xi32>> {fir.bindc_name = "b"}, %arg1: !fir.ref<i32> {fir.bindc_name = "row"}, %arg2: !fir.ref<i32> {fir.bindc_name = "val"}) -> i16 {
+  %c1 = arith.constant 1 : index
+  %c4 = arith.constant 4 : index
+  %c7 = arith.constant 7 : index
+  %0 = fir.shape %c4, %c7 : (index, index) -> !fir.shape<2>
+  %1:2 = hlfir.declare %arg0(%0) {uniq_name = "_QFFtestEb"} : (!fir.ref<!fir.array<4x7xi32>>, !fir.shape<2>) -> (!fir.ref<!fir.array<4x7xi32>>, !fir.ref<!fir.array<4x7xi32>>)
+  %2:2 = hlfir.declare %arg1 {uniq_name = "_QFFtestErow"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %3 = fir.alloca i16 {bindc_name = "test", uniq_name = "_QFFtestEtest"}
+  %4:2 = hlfir.declare %3 {uniq_name = "_QFFtestEtest"} : (!fir.ref<i16>) -> (!fir.ref<i16>, !fir.ref<i16>)
+  %5:2 = hlfir.declare %arg2 {uniq_name = "_QFFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %6 = fir.load %2#0 : !fir.ref<i32>
+  %7 = fir.convert %6 : (i32) -> i64
+  %8 = fir.shape %c7 : (index) -> !fir.shape<1>
+  %9 = hlfir.designate %1#0 (%7, %c1:%c7:%c1)  shape %8 : (!fir.ref<!fir.array<4x7xi32>>, i64, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<7xi32>>
+  %10 = fir.load %5#0 : !fir.ref<i32>
+  %11 = hlfir.elemental %8 unordered : (!fir.shape<1>) -> !hlfir.expr<7x!fir.logical<4>> {
+  ^bb0(%arg3: index):
+    %14 = hlfir.designate %9 (%arg3)  : (!fir.box<!fir.array<7xi32>>, index) -> !fir.ref<i32>
+    %15 = fir.load %14 : !fir.ref<i32>
+    %16 = arith.cmpi sge, %15, %10 : i32
+    %17 = fir.convert %16 : (i1) -> !fir.logical<4>
+    hlfir.yield_element %17 : !fir.logical<4>
+  }
+  %12 = hlfir.count %11 : (!hlfir.expr<7x!fir.logical<4>>) -> i16
+  hlfir.assign %12 to %4#0 : i16, !fir.ref<i16>
+  hlfir.destroy %11 : !hlfir.expr<7x!fir.logical<4>>
+  %13 = fir.load %4#1 : !fir.ref<i16>
+  return %13 : i16
+}
+// CHECK-LABEL:  func.func @_QFPtest_kind2(%arg0: !fir.ref<!fir.array<4x7xi32>> {fir.bindc_name = "b"}, %arg1: !fir.ref<i32> {fir.bindc_name = "row"}, %arg2: !fir.ref<i32> {fir.bindc_name = "val"}) -> i16 {
+// CHECK-NEXT:     %c1_i16 = arith.constant 1 : i16
+// CHECK-NEXT:     %c0_i16 = arith.constant 0 : i16
+// CHECK-NEXT:     %c1 = arith.constant 1 : index
+// CHECK-NEXT:     %c4 = arith.constant 4 : index
+// CHECK-NEXT:     %c7 = arith.constant 7 : index
+// CHECK-NEXT:     %[[V0:.*]] = fir.shape %c4, %c7 : (index, index) -> !fir.shape<2>
+// CHECK-NEXT:     %[[V1:.*]]:2 = hlfir.declare %arg0(%[[V0]])
+// CHECK-NEXT:     %[[V2:.*]]:2 = hlfir.declare %arg1
+// CHECK-NEXT:     %[[V3:.*]] = fir.alloca i16
+// CHECK-NEXT:     %[[V4:.*]]:2 = hlfir.declare %[[V3]]
+// CHECK-NEXT:     %[[V5:.*]]:2 = hlfir.declare %arg2
+// CHECK-NEXT:     %[[V6:.*]] = fir.load %[[V2]]#0 : !fir.ref<i32>
+// CHECK-NEXT:     %[[V7:.*]] = fir.convert %[[V6]] : (i32) -> i64
+// CHECK-NEXT:     %[[V8:.*]] = fir.shape %c7 : (index) -> !fir.shape<1>
+// CHECK-NEXT:     %[[V9:.*]] = hlfir.designate %[[V1]]#0 (%[[V7]], %c1:%c7:%c1)  shape %[[V8]] : (!fir.ref<!fir.array<4x7xi32>>, i64, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<7xi32>>
+// CHECK-NEXT:     %[[V10:.*]] = fir.load %[[V5]]#0 : !fir.ref<i32>
+// CHECK-NEXT:     %[[V11:.*]] = fir.do_loop %arg3 = %c1 to %c7 step %c1 iter_args(%arg4 = %c0_i16) -> (i16) {
+// CHECK-NEXT:       %[[V13:.*]] = hlfir.designate %[[V9]] (%arg3)  : (!fir.box<!fir.array<7xi32>>, index) -> !fir.ref<i32>
+// CHECK-NEXT:       %[[V14:.*]] = fir.load %[[V13]] : !fir.ref<i32>
+// CHECK-NEXT:       %[[V15:.*]] = arith.cmpi sge, %[[V14]], %[[V10]] : i32
+// CHECK-NEXT:       %[[V16:.*]] = arith.addi %arg4, %c1_i16 : i16
+// CHECK-NEXT:       %[[V17:.*]] = arith.select %[[V15]], %[[V16]], %arg4 : i16
+// CHECK-NEXT:       fir.result %[[V17]] : i16
+// CHECK-NEXT:     }
+// CHECK-NEXT:     hlfir.assign %[[V11]] to %[[V4]]#0 : i16, !fir.ref<i16>
+// CHECK-NEXT:     %[[V12:.*]] = fir.load %[[V4]]#1 : !fir.ref<i16>
+// CHECK-NEXT:     return %[[V12]] : i16
+
+func.func @_QFPtest_dim(%arg0: !fir.ref<!fir.array<4x7xi32>> {fir.bindc_name = "b"}, %arg1: !fir.ref<i32> {fir.bindc_name = "row"}, %arg2: !fir.ref<i32> {fir.bindc_name = "val"}) -> !fir.array<7xi32> {
+  %c1_i32 = arith.constant 1 : i32
+  %c1 = arith.constant 1 : index
+  %c4 = arith.constant 4 : index
+  %c7 = arith.constant 7 : index
+  %0 = fir.shape %c4, %c7 : (index, index) -> !fir.shape<2>
+  %1:2 = hlfir.declare %arg0(%0) {uniq_name = "_QFFtestEb"} : (!fir.ref<!fir.array<4x7xi32>>, !fir.shape<2>) -> (!fir.ref<!fir.array<4x7xi32>>, !fir.ref<!fir.array<4x7xi32>>)
+  %2:2 = hlfir.declare %arg1 {uniq_name = "_QFFtestErow"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %3 = fir.alloca !fir.array<7xi32> {bindc_name = "test", uniq_name = "_QFFtestEtest"}
+  %4 = fir.shape %c7 : (index) -> !fir.shape<1>
+  %5:2 = hlfir.declare %3(%4) {uniq_name = "_QFFtestEtest"} : (!fir.ref<!fir.array<7xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<7xi32>>, !fir.ref<!fir.array<7xi32>>)
+  %6:2 = hlfir.declare %arg2 {uniq_name = "_QFFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %7 = hlfir.designate %1#0 (%c1:%c4:%c1, %c1:%c7:%c1)  shape %0 : (!fir.ref<!fir.array<4x7xi32>>, index, index, index, index, index, index, !fir.shape<2>) -> !fir.ref<!fir.array<4x7xi32>>
+  %8 = fir.load %6#0 : !fir.ref<i32>
+  %9 = hlfir.elemental %0 unordered : (!fir.shape<2>) -> !hlfir.expr<4x7x!fir.logical<4>> {
+  ^bb0(%arg3: index, %arg4: index):
+    %12 = hlfir.designate %7 (%arg3, %arg4)  : (!fir.ref<!fir.array<4x7xi32>>, index, index) -> !fir.ref<i32>
+    %13 = fir.load %12 : !fir.ref<i32>
+    %14 = arith.cmpi sge, %13, %8 : i32
+    %15 = fir.convert %14 : (i1) -> !fir.logical<4>
+    hlfir.yield_element %15 : !fir.logical<4>
+  }
+  %10 = hlfir.count %9 dim %c1_i32 : (!hlfir.expr<4x7x!fir.logical<4>>, i32) -> !hlfir.expr<7xi32>
+  hlfir.assign %10 to %5#0 : !hlfir.expr<7xi32>, !fir.ref<!fir.array<7xi32>>
+  hlfir.destroy %10 : !hlfir.expr<7xi32>
+  hlfir.destroy %9 : !hlfir.expr<4x7x!fir.logical<4>>
+  %11 = fir.load %5#1 : !fir.ref<!fir.array<7xi32>>
+  return %11 : !fir.array<7xi32>
+}
+// CHECK-LABEL:  func.func @_QFPtest_dim(
+// CHECK: %{{.*}} = hlfir.count %{{.*}} dim %c1_i32
+
+
+func.func @_QFPtest_multi(%arg0: !fir.ref<!fir.array<4x7x2xi32>> {fir.bindc_name = "b"}, %arg1: !fir.ref<i32> {fir.bindc_name = "row"}, %arg2: !fir.ref<i32> {fir.bindc_name = "val"}) -> i32 {
+  %c1 = arith.constant 1 : index
+  %c4 = arith.constant 4 : index
+  %c7 = arith.constant 7 : index
+  %c2 = arith.constant 2 : index
+  %0 = fir.shape %c4, %c7, %c2 : (index, index, index) -> !fir.shape<3>
+  %1:2 = hlfir.declare %arg0(%0) {uniq_name = "_QFFtestEb"} : (!fir.ref<!fir.array<4x7x2xi32>>, !fir.shape<3>) -> (!fir.ref<!fir.array<4x7x2xi32>>, !fir.ref<!fir.array<4x7x2xi32>>)
+  %2:2 = hlfir.declare %arg1 {uniq_name = "_QFFtestErow"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %3 = fir.alloca i32 {bindc_name = "test", uniq_name = "_QFFtestEtest"}
+  %4:2 = hlfir.declare %3 {uniq_name = "_QFFtestEtest"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %5:2 = hlfir.declare %arg2 {uniq_name = "_QFFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %6 = hlfir.designate %1#0 (%c1:%c4:%c1, %c1:%c7:%c1, %c1:%c2:%c1)  shape %0 : (!fir.ref<!fir.array<4x7x2xi32>>, index, index, index, index, index, index, index, index, index, !fir.shape<3>) -> !fir.ref<!fir.array<4x7x2xi32>>
+  %7 = fir.load %5#0 : !fir.ref<i32>
+  %8 = hlfir.elemental %0 unordered : (!fir.shape<3>) -> !hlfir.expr<4x7x2x!fir.logical<4>> {
+  ^bb0(%arg3: index, %arg4: index, %arg5: index):
+    %11 = hlfir.designate %6 (%arg3, %arg4, %arg5)  : (!fir.ref<!fir.array<4x7x2xi32>>, index, index, index) -> !fir.ref<i32>
+    %12 = fir.load %11 : !fir.ref<i32>
+    %13 = arith.cmpi sge, %12, %7 : i32
+    %14 = fir.convert %13 : (i1) -> !fir.logical<4>
+    hlfir.yield_element %14 : !fir.logical<4>
+  }
+  %9 = hlfir.count %8 : (!hlfir.expr<4x7x2x!fir.logical<4>>) -> i32
+  hlfir.assign %9 to %4#0 : i32, !fir.ref<i32>
+  hlfir.destroy %8 : !hlfir.expr<4x7x2x!fir.logical<4>>
+  %10 = fir.load %4#1 : !fir.ref<i32>
+  return %10 : i32
+}
+// CHECK-LABEL:  func.func @_QFPtest_multi(%arg0: !fir.ref<!fir.array<4x7x2xi32>> {fir.bindc_name = "b"}, %arg1: !fir.ref<i32> {fir.bindc_name = "row"}, %arg2: !fir.ref<i32> {fir.bindc_name = "val"}) -> i32 {
+// CHECK-NEXT:     %c1_i32 = arith.constant 1 : i32
+// CHECK-NEXT:     %c0_i32 = arith.constant 0 : i32
+// CHECK-NEXT:     %c1 = arith.constant 1 : index
+// CHECK-NEXT:     %c4 = arith.constant 4 : index
+// CHECK-NEXT:     %c7 = arith.constant 7 : index
+// CHECK-NEXT:     %c2 = arith.constant 2 : index
+// CHECK-NEXT:     %[[V0:.*]] = fir.shape %c4, %c7, %c2 : (index, index, index) -> !fir.shape<3>
+// CHECK-NEXT:     %[[V1:.*]]:2 = hlfir.declare %arg0(%[[V0]]) {uniq_name = "_QFFtestEb"} : (!fir.ref<!fir.array<4x7x2xi32>>, !fir.shape<3>) -> (!fir.ref<!fir.array<4x7x2xi32>>, !fir.ref<!fir.array<4x7x2xi32>>)
+// CHECK-NEXT:     %[[V2:.*]]:2 = hlfir.declare %arg1 {uniq_name = "_QFFtestErow"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+// CHECK-NEXT:     %[[V3:.*]] = fir.alloca i32 {bindc_name = "test", uniq_name = "_QFFtestEtest"}
+// CHECK-NEXT:     %[[V4:.*]]:2 = hlfir.declare %[[V3]] {uniq_name = "_QFFtestEtest"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+// CHECK-NEXT:     %[[V5:.*]]:2 = hlfir.declare %arg2 {uniq_name = "_QFFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+// CHECK-NEXT:     %[[V6:.*]] = hlfir.designate %[[V1]]#0 (%c1:%c4:%c1, %c1:%c7:%c1, %c1:%c2:%c1)  shape %[[V0]] : (!fir.ref<!fir.array<4x7x2xi32>>, index, index, index, index, index, index, index, index, index, !fir.shape<3>) -> !fir.ref<!fir.array<4x7x2xi32>>
+// CHECK-NEXT:     %[[V7:.*]] = fir.load %[[V5]]#0 : !fir.ref<i32>
+// CHECK-NEXT:     %[[V8:.*]] = fir.do_loop %arg3 = %c1 to %c4 step %c1 iter_args(%arg4 = %c0_i32) -> (i32) {
+// CHECK-NEXT:       %[[V10:.*]] = fir.do_loop %arg5 = %c1 to %c7 step %c1 iter_args(%arg6 = %arg4) -> (i32) {
+// CHECK-NEXT:         %[[V11:.*]] = fir.do_loop %arg7 = %c1 to %c2 step %c1 iter_args(%arg8 = %arg6) -> (i32) {
+// CHECK-NEXT:           %[[V12:.*]] = hlfir.designate %[[V6]] (%arg3, %arg5, %arg7)  : (!fir.ref<!fir.array<4x7x2xi32>>, index, index, index) -> !fir.ref<i32>
+// CHECK-NEXT:           %[[V13:.*]] = fir.load %[[V12]] : !fir.ref<i32>
+// CHECK-NEXT:           %[[V14:.*]] = arith.cmpi sge, %[[V13]], %[[V7]] : i32
+// CHECK-NEXT:           %[[V15:.*]] = arith.addi %arg8, %c1_i32 : i32
+// CHECK-NEXT:           %[[V16:.*]] = arith.select %[[V14]], %[[V15]], %arg8 : i32
+// CHECK-NEXT:           fir.result %[[V16]] : i32
+// CHECK-NEXT:         }
+// CHECK-NEXT:         fir.result %[[V11]] : i32
+// CHECK-NEXT:       }
+// CHECK-NEXT:       fir.result %[[V10]] : i32
+// CHECK-NEXT:     }
+// CHECK-NEXT:     hlfir.assign %[[V8]] to %[[V4]]#0 : i32, !fir.ref<i32>
+// CHECK-NEXT:     %[[V9:.*]] = fir.load %[[V4]]#1 : !fir.ref<i32>
+// CHECK-NEXT:     return %[[V9]] : i32
+
+
+
+
+
+func.func @_QFPtest_rec_sum(%arg0: !fir.ref<!fir.array<4x7xi32>> {fir.bindc_name = "b"}, %arg1: !fir.ref<i32> {fir.bindc_name = "row"}, %arg2: !fir.ref<i32> {fir.bindc_name = "val"}) -> i32 {
+  %c1 = arith.constant 1 : index
+  %c4 = arith.constant 4 : index
+  %c7 = arith.constant 7 : index
+  %0 = fir.shape %c4, %c7 : (index, index) -> !fir.shape<2>
+  %1:2 = hlfir.declare %arg0(%0) {uniq_name = "_QFFtestEb"} : (!fir.ref<!fir.array<4x7xi32>>, !fir.shape<2>) -> (!fir.ref<!fir.array<4x7xi32>>, !fir.ref<!fir.array<4x7xi32>>)
+  %2:2 = hlfir.declare %arg1 {uniq_name = "_QFFtestErow"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %3 = fir.alloca i32 {bindc_name = "test", uniq_name = "_QFFtestEtest"}
+  %4:2 = hlfir.declare %3 {uniq_name = "_QFFtestEtest"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %5:2 = hlfir.declare %arg2 {uniq_name = "_QFFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %6 = fir.load %2#0 : !fir.ref<i32>
+  %7 = fir.convert %6 : (i32) -> i64
+  %8 = fir.shape %c7 : (index) -> !fir.shape<1>
+  %9 = hlfir.designate %1#0 (%7, %c1:%c7:%c1)  shape %8 : (!fir.ref<!fir.array<4x7xi32>>, i64, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<7xi32>>
+  %10 = fir.load %5#0 : !fir.ref<i32>
+  %11 = hlfir.elemental %8 unordered : (!fir.shape<1>) -> !hlfir.expr<7xi32> {
+  ^bb0(%arg3: index):
+    %15 = hlfir.designate %9 (%arg3)  : (!fir.box<!fir.array<7xi32>>, index) -> !fir.ref<i32>
+    %16 = fir.load %15 : !fir.ref<i32>
+    hlfir.yield_element %16 : i32
+  }
+  %12 = hlfir.elemental %8 unordered : (!fir.shape<1>) -> !hlfir.expr<7x!fir.logical<4>> {
+  ^bb0(%arg3: index):
+    %15 = hlfir.sum %11 : (!hlfir.expr<7xi32>) -> i32
+    %16 = arith.cmpi sge, %15, %10 : i32
+    %17 = fir.convert %16 : (i1) -> !fir.logical<4>
+    hlfir.yield_element %17 : !fir.logical<4>
+  }
+  %13 = hlfir.count %12 : (!hlfir.expr<7x!fir.logical<4>>) -> i32
+  hlfir.assign %13 to %4#0 : i32, !fir.ref<i32>
+  hlfir.destroy %12 : !hlfir.expr<7x!fir.logical<4>>
+  hlfir.destroy %11 : !hlfir.expr<7xi32>
+  %14 = fir.load %4#1 : !fir.ref<i32>
+  return %14 : i32
+}
+// CHECK-LABEL:  func.func @_QFPtest_rec_sum(%arg0: !fir.ref<!fir.array<4x7xi32>> {fir.bindc_name = "b"}, %arg1: !fir.ref<i32> {fir.bindc_name = "row"}, %arg2: !fir.ref<i32> {fir.bindc_name = "val"}) -> i32 {
+// CHECK:    %[[V12:.*]] = fir.do_loop %arg3 = %c1 to %c7 step %c1 iter_args(%arg4 = %c0_i32) -> (i32) {
+// CHECK:      %[[V14:.*]] = hlfir.sum %[[V11]] : (!hlfir.expr<7xi32>) -> i32
+// CHECK:      %[[V15:.*]] = arith.cmpi sge, %[[V14]], %[[V10]] : i32
+// CHECK:      %[[V16:.*]] = arith.addi %arg4, %c1_i32 : i32
+// CHECK:      %[[V17:.*]] = arith.select %[[V15]], %[[V16]], %arg4 : i32
+// CHECK:      fir.result %[[V17]] : i32
+// CHECK:    }
+
+
+
+
+func.func @_QFPtest_rec_count(%arg0: !fir.ref<!fir.array<4x7xi32>> {fir.bindc_name = "b"}, %arg1: !fir.ref<i32> {fir.bindc_name = "row"}, %arg2: !fir.ref<i32> {fir.bindc_name = "val"}) -> i32 {
+  %c1 = arith.constant 1 : index
+  %c4 = arith.constant 4 : index
+  %c7 = arith.constant 7 : index
+  %0 = fir.shape %c4, %c7 : (index, index) -> !fir.shape<2>
+  %1:2 = hlfir.declare %arg0(%0) {uniq_name = "_QFFtestEb"} : (!fir.ref<!fir.array<4x7xi32>>, !fir.shape<2>) -> (!fir.ref<!fir.array<4x7xi32>>, !fir.ref<!fir.array<4x7xi32>>)
+  %2:2 = hlfir.declare %arg1 {uniq_name = "_QFFtestErow"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %3 = fir.alloca i32 {bindc_name = "test", uniq_name = "_QFFtestEtest"}
+  %4:2 = hlfir.declare %3 {uniq_name = "_QFFtestEtest"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %5:2 = hlfir.declare %arg2 {uniq_name = "_QFFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %6 = fir.load %2#0 : !fir.ref<i32>
+  %7 = fir.convert %6 : (i32) -> i64
+  %8 = fir.shape %c7 : (index) -> !fir.shape<1>
+  %9 = hlfir.designate %1#0 (%7, %c1:%c7:%c1)  shape %8 : (!fir.ref<!fir.array<4x7xi32>>, i64, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<7xi32>>
+  %10 = fir.load %5#0 : !fir.ref<i32>
+  %11 = hlfir.elemental %8 unordered : (!fir.shape<1>) -> !hlfir.expr<7x!fir.logical<4>> {
+  ^bb0(%arg3: index):
+    %15 = hlfir.designate %9 (%arg3)  : (!fir.box<!fir.array<7xi32>>, index) -> !fir.ref<i32>
+    %16 = fir.load %15 : !fir.ref<i32>
+    %17 = arith.cmpi sge, %16, %10 : i32
+    %18 = fir.convert %17 : (i1) -> !fir.logical<4>
+    hlfir.yield_element %18 : !fir.logical<4>
+  }
+  %12 = hlfir.elemental %8 unordered : (!fir.shape<1>) -> !hlfir.expr<7x!fir.logical<4>> {
+  ^bb0(%arg3: index):
+    %15 = hlfir.count %11 : (!hlfir.expr<7x!fir.logical<4>>) -> i32
+    %16 = arith.cmpi sge, %15, %10 : i32
+    %17 = fir.convert %16 : (i1) -> !fir.logical<4>
+    hlfir.yield_element %17 : !fir.logical<4>
+  }
+  %13 = hlfir.count %12 : (!hlfir.expr<7x!fir.logical<4>>) -> i32
+  hlfir.assign %13 to %4#0 : i32, !fir.ref<i32>
+  hlfir.destroy %12 : !hlfir.expr<7x!fir.logical<4>>
+  hlfir.destroy %11 : !hlfir.expr<7x!fir.logical<4>>
+  %14 = fir.load %4#1 : !fir.ref<i32>
+  return %14 : i32
+}
+// CHECK-LABEL:  func.func @_QFPtest_rec_count(%arg0: !fir.ref<!fir.array<4x7xi32>> {fir.bindc_name = "b"}, %arg1: !fir.ref<i32> {fir.bindc_name = "row"}, %arg2: !fir.ref<i32> {fir.bindc_name = "val"}) -> i32 {
+// CHECK:    %[[V11:.*]] = fir.do_loop %arg3 = %c1 to %c7 step %c1 iter_args(%arg4 = %c0_i32) -> (i32) {
+// CHECK:      %[[V13:.*]] = fir.do_loop %arg5 = %c1 to %c7 step %c1 iter_args(%arg6 = %c0_i32) -> (i32) {
+// CHECK:        %[[V17:.*]] = hlfir.designate %[[V9]] (%arg5)  : (!fir.box<!fir.array<7xi32>>, index) -> !fir.ref<i32>
+// CHECK:        %[[V18:.*]] = fir.load %[[V17]] : !fir.ref<i32>
+// CHECK:        %[[V19:.*]] = arith.cmpi sge, %[[V18]], %[[V10]] : i32
+// CHECK:        %[[V20:.*]] = arith.addi %arg6, %c1_i32 : i32
+// CHECK:        %[[V21:.*]] = arith.select %[[V19]], %[[V20]], %arg6 : i32
+// CHECK:        fir.result %[[V21]] : i32
+// CHECK:      }
+// CHECK:      %[[V14:.*]] = arith.cmpi sge, %[[V13]], %[[V10]] : i32
+// CHECK:      %[[V15:.*]] = arith.addi %arg4, %c1_i32 : i32
+// CHECK:      %[[V16:.*]] = arith.select %[[V14]], %[[V15]], %arg4 : i32
+// CHECK:      fir.result %[[V16]] : i32
+// CHECK:    }

>From aaa3dbb4cb799dcc6f99473dd1f662ae2df33f4e Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Mon, 18 Dec 2023 09:23:57 +0000
Subject: [PATCH 2/2] [Flang] And and All elemental lowering

This is an extension of #75774, with Any and All lowering added along side Count.
---
 .../Transforms/OptimizedBufferization.cpp     |  34 +++-
 flang/test/HLFIR/all-elemental.fir            |  91 +++++++++
 flang/test/HLFIR/any-elemental.fir            | 190 ++++++++++++++++++
 3 files changed, 314 insertions(+), 1 deletion(-)
 create mode 100644 flang/test/HLFIR/all-elemental.fir
 create mode 100644 flang/test/HLFIR/any-elemental.fir

diff --git a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
index 7c839f1d20af52..b59076742c73d2 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
@@ -728,7 +728,37 @@ class ReductionElementalConversion : public mlir::OpRewritePattern<Op> {
 
     mlir::Value init;
     GenBodyFn genBodyFn;
-    if constexpr (std::is_same_v<Op, hlfir::CountOp>) {
+    if constexpr (std::is_same_v<Op, hlfir::AnyOp>) {
+      init = builder.createIntegerConstant(loc, builder.getI1Type(), 0);
+      genBodyFn = [elemental](fir::FirOpBuilder builder, mlir::Location loc,
+                              mlir::Value reduction,
+                              const llvm::SmallVectorImpl<mlir::Value> &indices)
+          -> mlir::Value {
+        // Inline the elemental and get the condition from it.
+        auto yield = inlineElementalOp(loc, builder, elemental, indices);
+        mlir::Value cond = builder.create<fir::ConvertOp>(
+            loc, builder.getI1Type(), yield.getElementValue());
+        yield->erase();
+
+        // Conditionally set the reduction variable.
+        return builder.create<mlir::arith::OrIOp>(loc, reduction, cond);
+      };
+    } else if constexpr (std::is_same_v<Op, hlfir::AllOp>) {
+      init = builder.createIntegerConstant(loc, builder.getI1Type(), 1);
+      genBodyFn = [elemental](fir::FirOpBuilder builder, mlir::Location loc,
+                              mlir::Value reduction,
+                              const llvm::SmallVectorImpl<mlir::Value> &indices)
+          -> mlir::Value {
+        // Inline the elemental and get the condition from it.
+        auto yield = inlineElementalOp(loc, builder, elemental, indices);
+        mlir::Value cond = builder.create<fir::ConvertOp>(
+            loc, builder.getI1Type(), yield.getElementValue());
+        yield->erase();
+
+        // Conditionally set the reduction variable.
+        return builder.create<mlir::arith::AndIOp>(loc, reduction, cond);
+      };
+    } else if constexpr (std::is_same_v<Op, hlfir::CountOp>) {
       init = builder.createIntegerConstant(loc, op.getType(), 0);
       genBodyFn = [elemental](fir::FirOpBuilder builder, mlir::Location loc,
                               mlir::Value reduction,
@@ -800,6 +830,8 @@ class OptimizedBufferizationPass
     patterns.insert<BroadcastAssignBufferization>(context);
     patterns.insert<VariableAssignBufferization>(context);
     patterns.insert<ReductionElementalConversion<hlfir::CountOp>>(context);
+    patterns.insert<ReductionElementalConversion<hlfir::AnyOp>>(context);
+    patterns.insert<ReductionElementalConversion<hlfir::AllOp>>(context);
 
     if (mlir::failed(mlir::applyPatternsAndFoldGreedily(
             func, std::move(patterns), config))) {
diff --git a/flang/test/HLFIR/all-elemental.fir b/flang/test/HLFIR/all-elemental.fir
new file mode 100644
index 00000000000000..1ba8bb1b7a5fb4
--- /dev/null
+++ b/flang/test/HLFIR/all-elemental.fir
@@ -0,0 +1,91 @@
+// RUN: fir-opt %s -opt-bufferization | FileCheck %s
+
+func.func @_QFPtest(%arg0: !fir.ref<!fir.array<4x7xi32>> {fir.bindc_name = "b"}, %arg1: !fir.ref<i32> {fir.bindc_name = "row"}, %arg2: !fir.ref<i32> {fir.bindc_name = "val"}) -> !fir.logical<4> {
+  %c1 = arith.constant 1 : index
+  %c4 = arith.constant 4 : index
+  %c7 = arith.constant 7 : index
+  %0 = fir.shape %c4, %c7 : (index, index) -> !fir.shape<2>
+  %1:2 = hlfir.declare %arg0(%0) {uniq_name = "_QFFtestEb"} : (!fir.ref<!fir.array<4x7xi32>>, !fir.shape<2>) -> (!fir.ref<!fir.array<4x7xi32>>, !fir.ref<!fir.array<4x7xi32>>)
+  %2:2 = hlfir.declare %arg1 {uniq_name = "_QFFtestErow"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %3 = fir.alloca !fir.logical<4> {bindc_name = "test", uniq_name = "_QFFtestEtest"}
+  %4:2 = hlfir.declare %3 {uniq_name = "_QFFtestEtest"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+  %5:2 = hlfir.declare %arg2 {uniq_name = "_QFFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %6 = fir.load %2#0 : !fir.ref<i32>
+  %7 = fir.convert %6 : (i32) -> i64
+  %8 = fir.shape %c7 : (index) -> !fir.shape<1>
+  %9 = hlfir.designate %1#0 (%7, %c1:%c7:%c1)  shape %8 : (!fir.ref<!fir.array<4x7xi32>>, i64, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<7xi32>>
+  %10 = fir.load %5#0 : !fir.ref<i32>
+  %11 = hlfir.elemental %8 unordered : (!fir.shape<1>) -> !hlfir.expr<7x!fir.logical<4>> {
+  ^bb0(%arg3: index):
+    %14 = hlfir.designate %9 (%arg3)  : (!fir.box<!fir.array<7xi32>>, index) -> !fir.ref<i32>
+    %15 = fir.load %14 : !fir.ref<i32>
+    %16 = arith.cmpi sge, %15, %10 : i32
+    %17 = fir.convert %16 : (i1) -> !fir.logical<4>
+    hlfir.yield_element %17 : !fir.logical<4>
+  }
+  %12 = hlfir.all %11 : (!hlfir.expr<7x!fir.logical<4>>) -> !fir.logical<4>
+  hlfir.assign %12 to %4#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+  hlfir.destroy %11 : !hlfir.expr<7x!fir.logical<4>>
+  %13 = fir.load %4#1 : !fir.ref<!fir.logical<4>>
+  return %13 : !fir.logical<4>
+}
+// CHECK-LABEL:  func.func @_QFPtest(%arg0: !fir.ref<!fir.array<4x7xi32>> {fir.bindc_name = "b"}, %arg1: !fir.ref<i32> {fir.bindc_name = "row"}, %arg2: !fir.ref<i32> {fir.bindc_name = "val"}) -> !fir.logical<4> {
+// CHECK-NEXT:     %true = arith.constant true
+// CHECK-NEXT:     %c1 = arith.constant 1 : index
+// CHECK-NEXT:     %c4 = arith.constant 4 : index
+// CHECK-NEXT:     %c7 = arith.constant 7 : index
+// CHECK-NEXT:     %[[V0:.*]] = fir.shape %c4, %c7 : (index, index) -> !fir.shape<2>
+// CHECK-NEXT:     %[[V1:.*]]:2 = hlfir.declare %arg0(%[[V0]])
+// CHECK-NEXT:     %[[V2:.*]]:2 = hlfir.declare %arg1
+// CHECK-NEXT:     %[[V3:.*]] = fir.alloca !fir.logical<4>
+// CHECK-NEXT:     %[[V4:.*]]:2 = hlfir.declare %[[V3]]
+// CHECK-NEXT:     %[[V5:.*]]:2 = hlfir.declare %arg2
+// CHECK-NEXT:     %[[V6:.*]] = fir.load %[[V2]]#0 : !fir.ref<i32>
+// CHECK-NEXT:     %[[V7:.*]] = fir.convert %[[V6]] : (i32) -> i64
+// CHECK-NEXT:     %[[V8:.*]] = fir.shape %c7 : (index) -> !fir.shape<1>
+// CHECK-NEXT:     %[[V9:.*]] = hlfir.designate %[[V1]]#0 (%[[V7]], %c1:%c7:%c1)  shape %[[V8]] : (!fir.ref<!fir.array<4x7xi32>>, i64, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<7xi32>>
+// CHECK-NEXT:     %[[V10:.*]] = fir.load %[[V5]]#0 : !fir.ref<i32>
+// CHECK-NEXT:     %[[V11:.*]] = fir.do_loop %arg3 = %c1 to %c7 step %c1 iter_args(%arg4 = %true) -> (i1) {
+// CHECK-NEXT:       %[[V14:.*]] = hlfir.designate %[[V9]] (%arg3)  : (!fir.box<!fir.array<7xi32>>, index) -> !fir.ref<i32>
+// CHECK-NEXT:       %[[V15:.*]] = fir.load %[[V14]] : !fir.ref<i32>
+// CHECK-NEXT:       %[[V16:.*]] = arith.cmpi sge, %[[V15]], %[[V10]] : i32
+// CHECK-NEXT:       %[[V17:.*]] = arith.andi %arg4, %[[V16]] : i1
+// CHECK-NEXT:       fir.result %[[V17]] : i1
+// CHECK-NEXT:     }
+// CHECK-NEXT:     %[[V12:.*]] = fir.convert %[[V11]] : (i1) -> !fir.logical<4>
+// CHECK-NEXT:     hlfir.assign %[[V12]] to %[[V4]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+// CHECK-NEXT:     %[[V13:.*]] = fir.load %[[V4]]#1 : !fir.ref<!fir.logical<4>>
+// CHECK-NEXT:     return %[[V13]] : !fir.logical<4>
+
+
+func.func @_QFPtest_dim(%arg0: !fir.ref<!fir.array<4x7xi32>> {fir.bindc_name = "b"}, %arg1: !fir.ref<i32> {fir.bindc_name = "row"}, %arg2: !fir.ref<i32> {fir.bindc_name = "val"}) -> !fir.array<4x!fir.logical<4>> {
+  %c2_i32 = arith.constant 2 : i32
+  %c1 = arith.constant 1 : index
+  %c4 = arith.constant 4 : index
+  %c7 = arith.constant 7 : index
+  %0 = fir.shape %c4, %c7 : (index, index) -> !fir.shape<2>
+  %1:2 = hlfir.declare %arg0(%0) {uniq_name = "_QFFtestEb"} : (!fir.ref<!fir.array<4x7xi32>>, !fir.shape<2>) -> (!fir.ref<!fir.array<4x7xi32>>, !fir.ref<!fir.array<4x7xi32>>)
+  %2:2 = hlfir.declare %arg1 {uniq_name = "_QFFtestErow"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %3 = fir.alloca !fir.array<4x!fir.logical<4>> {bindc_name = "test", uniq_name = "_QFFtestEtest"}
+  %4 = fir.shape %c4 : (index) -> !fir.shape<1>
+  %5:2 = hlfir.declare %3(%4) {uniq_name = "_QFFtestEtest"} : (!fir.ref<!fir.array<4x!fir.logical<4>>>, !fir.shape<1>) -> (!fir.ref<!fir.array<4x!fir.logical<4>>>, !fir.ref<!fir.array<4x!fir.logical<4>>>)
+  %6:2 = hlfir.declare %arg2 {uniq_name = "_QFFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %7 = hlfir.designate %1#0 (%c1:%c4:%c1, %c1:%c7:%c1)  shape %0 : (!fir.ref<!fir.array<4x7xi32>>, index, index, index, index, index, index, !fir.shape<2>) -> !fir.ref<!fir.array<4x7xi32>>
+  %8 = fir.load %6#0 : !fir.ref<i32>
+  %9 = hlfir.elemental %0 unordered : (!fir.shape<2>) -> !hlfir.expr<4x7x!fir.logical<4>> {
+  ^bb0(%arg3: index, %arg4: index):
+    %12 = hlfir.designate %7 (%arg3, %arg4)  : (!fir.ref<!fir.array<4x7xi32>>, index, index) -> !fir.ref<i32>
+    %13 = fir.load %12 : !fir.ref<i32>
+    %14 = arith.cmpi sge, %13, %8 : i32
+    %15 = fir.convert %14 : (i1) -> !fir.logical<4>
+    hlfir.yield_element %15 : !fir.logical<4>
+  }
+  %10 = hlfir.all %9 dim %c2_i32 : (!hlfir.expr<4x7x!fir.logical<4>>, i32) -> !hlfir.expr<4x!fir.logical<4>>
+  hlfir.assign %10 to %5#0 : !hlfir.expr<4x!fir.logical<4>>, !fir.ref<!fir.array<4x!fir.logical<4>>>
+  hlfir.destroy %10 : !hlfir.expr<4x!fir.logical<4>>
+  hlfir.destroy %9 : !hlfir.expr<4x7x!fir.logical<4>>
+  %11 = fir.load %5#1 : !fir.ref<!fir.array<4x!fir.logical<4>>>
+  return %11 : !fir.array<4x!fir.logical<4>>
+}
+// CHECK-LABEL:  func.func @_QFPtest_dim(
+// CHECK: %10 = hlfir.all %9 dim %c2_i32
\ No newline at end of file
diff --git a/flang/test/HLFIR/any-elemental.fir b/flang/test/HLFIR/any-elemental.fir
new file mode 100644
index 00000000000000..6e233068d2e9b5
--- /dev/null
+++ b/flang/test/HLFIR/any-elemental.fir
@@ -0,0 +1,190 @@
+// RUN: fir-opt %s -opt-bufferization | FileCheck %s
+
+func.func @_QFPtest(%arg0: !fir.ref<!fir.array<4x7xi32>> {fir.bindc_name = "b"}, %arg1: !fir.ref<i32> {fir.bindc_name = "row"}, %arg2: !fir.ref<i32> {fir.bindc_name = "val"}) -> !fir.logical<4> {
+  %c1 = arith.constant 1 : index
+  %c4 = arith.constant 4 : index
+  %c7 = arith.constant 7 : index
+  %0 = fir.shape %c4, %c7 : (index, index) -> !fir.shape<2>
+  %1:2 = hlfir.declare %arg0(%0) {uniq_name = "_QFFtestEb"} : (!fir.ref<!fir.array<4x7xi32>>, !fir.shape<2>) -> (!fir.ref<!fir.array<4x7xi32>>, !fir.ref<!fir.array<4x7xi32>>)
+  %2:2 = hlfir.declare %arg1 {uniq_name = "_QFFtestErow"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %3 = fir.alloca !fir.logical<4> {bindc_name = "test", uniq_name = "_QFFtestEtest"}
+  %4:2 = hlfir.declare %3 {uniq_name = "_QFFtestEtest"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+  %5:2 = hlfir.declare %arg2 {uniq_name = "_QFFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %6 = fir.load %2#0 : !fir.ref<i32>
+  %7 = fir.convert %6 : (i32) -> i64
+  %8 = fir.shape %c7 : (index) -> !fir.shape<1>
+  %9 = hlfir.designate %1#0 (%7, %c1:%c7:%c1)  shape %8 : (!fir.ref<!fir.array<4x7xi32>>, i64, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<7xi32>>
+  %10 = fir.load %5#0 : !fir.ref<i32>
+  %11 = hlfir.elemental %8 unordered : (!fir.shape<1>) -> !hlfir.expr<7x!fir.logical<4>> {
+  ^bb0(%arg3: index):
+    %14 = hlfir.designate %9 (%arg3)  : (!fir.box<!fir.array<7xi32>>, index) -> !fir.ref<i32>
+    %15 = fir.load %14 : !fir.ref<i32>
+    %16 = arith.cmpi sge, %15, %10 : i32
+    %17 = fir.convert %16 : (i1) -> !fir.logical<4>
+    hlfir.yield_element %17 : !fir.logical<4>
+  }
+  %12 = hlfir.any %11 : (!hlfir.expr<7x!fir.logical<4>>) -> !fir.logical<4>
+  hlfir.assign %12 to %4#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+  hlfir.destroy %11 : !hlfir.expr<7x!fir.logical<4>>
+  %13 = fir.load %4#1 : !fir.ref<!fir.logical<4>>
+  return %13 : !fir.logical<4>
+}
+// CHECK-LABEL:  func.func @_QFPtest(%arg0: !fir.ref<!fir.array<4x7xi32>> {fir.bindc_name = "b"}, %arg1: !fir.ref<i32> {fir.bindc_name = "row"}, %arg2: !fir.ref<i32> {fir.bindc_name = "val"}) -> !fir.logical<4> {
+// CHECK-NEXT:     %false = arith.constant false
+// CHECK-NEXT:     %c1 = arith.constant 1 : index
+// CHECK-NEXT:     %c4 = arith.constant 4 : index
+// CHECK-NEXT:     %c7 = arith.constant 7 : index
+// CHECK-NEXT:     %[[V0:.*]] = fir.shape %c4, %c7 : (index, index) -> !fir.shape<2>
+// CHECK-NEXT:     %[[V1:.*]]:2 = hlfir.declare %arg0(%[[V0]])
+// CHECK-NEXT:     %[[V2:.*]]:2 = hlfir.declare %arg1
+// CHECK-NEXT:     %[[V3:.*]] = fir.alloca !fir.logical<4>
+// CHECK-NEXT:     %[[V4:.*]]:2 = hlfir.declare %[[V3]]
+// CHECK-NEXT:     %[[V5:.*]]:2 = hlfir.declare %arg2
+// CHECK-NEXT:     %[[V6:.*]] = fir.load %[[V2]]#0 : !fir.ref<i32>
+// CHECK-NEXT:     %[[V7:.*]] = fir.convert %[[V6]] : (i32) -> i64
+// CHECK-NEXT:     %[[V8:.*]] = fir.shape %c7 : (index) -> !fir.shape<1>
+// CHECK-NEXT:     %[[V9:.*]] = hlfir.designate %[[V1]]#0 (%[[V7]], %c1:%c7:%c1)  shape %[[V8]] : (!fir.ref<!fir.array<4x7xi32>>, i64, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<7xi32>>
+// CHECK-NEXT:     %[[V10:.*]] = fir.load %[[V5]]#0 : !fir.ref<i32>
+// CHECK-NEXT:     %[[V11:.*]] = fir.do_loop %arg3 = %c1 to %c7 step %c1 iter_args(%arg4 = %false) -> (i1) {
+// CHECK-NEXT:       %[[V14:.*]] = hlfir.designate %[[V9]] (%arg3)  : (!fir.box<!fir.array<7xi32>>, index) -> !fir.ref<i32>
+// CHECK-NEXT:       %[[V15:.*]] = fir.load %[[V14]] : !fir.ref<i32>
+// CHECK-NEXT:       %[[V16:.*]] = arith.cmpi sge, %[[V15]], %[[V10]] : i32
+// CHECK-NEXT:       %[[V17:.*]] = arith.ori %arg4, %[[V16]] : i1
+// CHECK-NEXT:       fir.result %[[V17]] : i1
+// CHECK-NEXT:     }
+// CHECK-NEXT:     %[[V12:.*]] = fir.convert %[[V11]] : (i1) -> !fir.logical<4>
+// CHECK-NEXT:     hlfir.assign %[[V12]] to %[[V4]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+// CHECK-NEXT:     %[[V13:.*]] = fir.load %[[V4]]#1 : !fir.ref<!fir.logical<4>>
+// CHECK-NEXT:     return %[[V13]] : !fir.logical<4>
+
+
+func.func @_QFPtest_dim(%arg0: !fir.ref<!fir.array<4x7xi32>> {fir.bindc_name = "b"}, %arg1: !fir.ref<i32> {fir.bindc_name = "row"}, %arg2: !fir.ref<i32> {fir.bindc_name = "val"}) -> !fir.array<4x!fir.logical<4>> {
+  %c2_i32 = arith.constant 2 : i32
+  %c1 = arith.constant 1 : index
+  %c4 = arith.constant 4 : index
+  %c7 = arith.constant 7 : index
+  %0 = fir.shape %c4, %c7 : (index, index) -> !fir.shape<2>
+  %1:2 = hlfir.declare %arg0(%0) {uniq_name = "_QFFtestEb"} : (!fir.ref<!fir.array<4x7xi32>>, !fir.shape<2>) -> (!fir.ref<!fir.array<4x7xi32>>, !fir.ref<!fir.array<4x7xi32>>)
+  %2:2 = hlfir.declare %arg1 {uniq_name = "_QFFtestErow"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %3 = fir.alloca !fir.array<4x!fir.logical<4>> {bindc_name = "test", uniq_name = "_QFFtestEtest"}
+  %4 = fir.shape %c4 : (index) -> !fir.shape<1>
+  %5:2 = hlfir.declare %3(%4) {uniq_name = "_QFFtestEtest"} : (!fir.ref<!fir.array<4x!fir.logical<4>>>, !fir.shape<1>) -> (!fir.ref<!fir.array<4x!fir.logical<4>>>, !fir.ref<!fir.array<4x!fir.logical<4>>>)
+  %6:2 = hlfir.declare %arg2 {uniq_name = "_QFFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %7 = hlfir.designate %1#0 (%c1:%c4:%c1, %c1:%c7:%c1)  shape %0 : (!fir.ref<!fir.array<4x7xi32>>, index, index, index, index, index, index, !fir.shape<2>) -> !fir.ref<!fir.array<4x7xi32>>
+  %8 = fir.load %6#0 : !fir.ref<i32>
+  %9 = hlfir.elemental %0 unordered : (!fir.shape<2>) -> !hlfir.expr<4x7x!fir.logical<4>> {
+  ^bb0(%arg3: index, %arg4: index):
+    %12 = hlfir.designate %7 (%arg3, %arg4)  : (!fir.ref<!fir.array<4x7xi32>>, index, index) -> !fir.ref<i32>
+    %13 = fir.load %12 : !fir.ref<i32>
+    %14 = arith.cmpi sge, %13, %8 : i32
+    %15 = fir.convert %14 : (i1) -> !fir.logical<4>
+    hlfir.yield_element %15 : !fir.logical<4>
+  }
+  %10 = hlfir.any %9 dim %c2_i32 : (!hlfir.expr<4x7x!fir.logical<4>>, i32) -> !hlfir.expr<4x!fir.logical<4>>
+  hlfir.assign %10 to %5#0 : !hlfir.expr<4x!fir.logical<4>>, !fir.ref<!fir.array<4x!fir.logical<4>>>
+  hlfir.destroy %10 : !hlfir.expr<4x!fir.logical<4>>
+  hlfir.destroy %9 : !hlfir.expr<4x7x!fir.logical<4>>
+  %11 = fir.load %5#1 : !fir.ref<!fir.array<4x!fir.logical<4>>>
+  return %11 : !fir.array<4x!fir.logical<4>>
+}
+// CHECK-LABEL:  func.func @_QFPtest_dim(
+// CHECK: {{.*}} = hlfir.any {{.*}} dim %c2_i32
+
+
+func.func @_Qtest_recursive() attributes {fir.bindc_name = "test"} {
+  %c1 = arith.constant 1 : index
+  %true = arith.constant true
+  %false = arith.constant false
+  %c0_i64 = arith.constant 0 : i64
+  %c2_i32 = arith.constant 2 : i32
+  %c0 = arith.constant 0 : index
+  %c1_i32 = arith.constant 1 : i32
+  %0 = fir.address_of(@_QFEa) : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+  %1:2 = hlfir.declare %0 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFEa"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
+  %2 = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFEi"}
+  %3:2 = hlfir.declare %2 {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %4 = fir.alloca i32 {bindc_name = "n", uniq_name = "_QFEn"}
+  %5:2 = hlfir.declare %4 {uniq_name = "_QFEn"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %6 = fir.alloca !fir.array<1x!fir.logical<4>> {bindc_name = "ra", uniq_name = "_QFEra"}
+  %7 = fir.shape %c1 : (index) -> !fir.shape<1>
+  %8:2 = hlfir.declare %6(%7) {uniq_name = "_QFEra"} : (!fir.ref<!fir.array<1x!fir.logical<4>>>, !fir.shape<1>) -> (!fir.ref<!fir.array<1x!fir.logical<4>>>, !fir.ref<!fir.array<1x!fir.logical<4>>>)
+  %9 = fir.alloca !fir.logical<4> {bindc_name = "rs", uniq_name = "_QFErs"}
+  %10:2 = hlfir.declare %9 {uniq_name = "_QFErs"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+  %11 = fir.allocmem !fir.array<?xi32>, %c1 {fir.must_be_heap = true, uniq_name = "_QFEa.alloc"}
+  %12 = fir.embox %11(%7) : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<?xi32>>>
+  fir.store %12 to %1#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+  hlfir.assign %c1_i32 to %5#0 : i32, !fir.ref<i32>
+  %13 = fir.load %1#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+  %14:3 = fir.box_dims %13, %c0 : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
+  fir.do_loop %arg0 = %c1 to %14#1 step %c1 unordered {
+    %27:3 = fir.box_dims %13, %c0 : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
+    %28 = arith.subi %27#0, %c1 : index
+    %29 = arith.addi %arg0, %28 : index
+    %30 = hlfir.designate %13 (%29)  : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> !fir.ref<i32>
+    hlfir.assign %c2_i32 to %30 : i32, !fir.ref<i32>
+  }
+  %15 = fir.load %5#0 : !fir.ref<i32>
+  %16 = fir.convert %15 : (i32) -> i64
+  %17 = arith.cmpi sgt, %16, %c0_i64 : i64
+  %18 = arith.select %17, %16, %c0_i64 : i64
+  %19 = fir.convert %18 : (i64) -> index
+  %20 = fir.shape %19 : (index) -> !fir.shape<1>
+  %21 = hlfir.elemental %20 unordered : (!fir.shape<1>) -> !hlfir.expr<?x!fir.logical<4>> {
+  ^bb0(%arg0: index):
+    %27 = fir.load %1#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+    %28:3 = fir.box_dims %27, %c0 : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
+    %29 = arith.addi %28#0, %28#1 : index
+    %30 = arith.subi %29, %c1 : index
+    %31 = arith.subi %30, %28#0 : index
+    %32 = arith.addi %31, %c1 : index
+    %33 = arith.cmpi sgt, %32, %c0 : index
+    %34 = arith.select %33, %32, %c0 : index
+    %35 = fir.shape %34 : (index) -> !fir.shape<1>
+    %36 = hlfir.designate %27 (%28#0:%30:%c1)  shape %35 : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<?xi32>>
+    %37 = hlfir.elemental %35 unordered : (!fir.shape<1>) -> !hlfir.expr<?x!fir.logical<4>> {
+    ^bb0(%arg1: index):
+      %39 = hlfir.designate %36 (%arg1)  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+      %40 = fir.load %39 : !fir.ref<i32>
+      %41 = arith.cmpi eq, %40, %c1_i32 : i32
+      %42 = fir.convert %41 : (i1) -> !fir.logical<4>
+      hlfir.yield_element %42 : !fir.logical<4>
+    }
+    %38 = hlfir.any %37 : (!hlfir.expr<?x!fir.logical<4>>) -> !fir.logical<4>
+    hlfir.destroy %37 : !hlfir.expr<?x!fir.logical<4>>
+    hlfir.yield_element %38 : !fir.logical<4>
+  }
+  %22 = hlfir.any %21 : (!hlfir.expr<?x!fir.logical<4>>) -> !fir.logical<4>
+  hlfir.assign %22 to %10#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
+  hlfir.destroy %21 : !hlfir.expr<?x!fir.logical<4>>
+  %23 = fir.load %10#0 : !fir.ref<!fir.logical<4>>
+  %24 = fir.convert %23 : (!fir.logical<4>) -> i1
+  %25 = arith.xori %24, %true : i1
+  cf.cond_br %25, ^bb1, ^bb2
+^bb1:  // pred: ^bb0
+  %26 = fir.call @_FortranAStopStatement(%c2_i32, %false, %false) fastmath<contract> : (i32, i1, i1) -> none
+  fir.unreachable
+^bb2:  // pred: ^bb0
+  return
+}
+// CHECK-LABEL: func.func @_Qtest_recursive()
+// CHECK:    %[[V20:.*]] = fir.do_loop %arg0 = %c1 to %{{.*}} step %c1 iter_args(%arg1 = %false) -> (i1) {
+// CHECK:      %[[V26:.*]] = fir.load %[[V1]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+// CHECK:      %[[V27:.*]]:3 = fir.box_dims %[[V26]], %c0 : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
+// CHECK:      %[[V28:.*]] = arith.addi %[[V27]]#0, %[[V27]]#1 : index
+// CHECK:      %[[V29:.*]] = arith.subi %[[V28]], %c1 : index
+// CHECK:      %[[V30:.*]] = arith.subi %[[V29]], %[[V27]]#0 : index
+// CHECK:      %[[V31:.*]] = arith.addi %[[V30]], %c1 : index
+// CHECK:      %[[V32:.*]] = arith.cmpi sgt, %[[V31]], %c0 : index
+// CHECK:      %[[V33:.*]] = arith.select %[[V32]], %[[V31]], %c0 : index
+// CHECK:      %[[V34:.*]] = fir.shape %[[V33]] : (index) -> !fir.shape<1>
+// CHECK:      %[[V35:.*]] = hlfir.designate %[[V26]] (%[[V27]]#0:%[[V29]]:%c1)  shape %[[V34]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<?xi32>>
+// CHECK:      %[[V36:.*]] = fir.do_loop %arg2 = %c1 to %[[V33]] step %c1 iter_args(%arg3 = %false) -> (i1) {
+// CHECK:        %[[V38:.*]] = hlfir.designate %[[V35]] (%arg2)  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK:        %[[V39:.*]] = fir.load %[[V38]] : !fir.ref<i32>
+// CHECK:        %[[V40:.*]] = arith.cmpi eq, %[[V39]], %c1_i32 : i32
+// CHECK:        %[[V41:.*]] = arith.ori %arg3, %[[V40]] : i1
+// CHECK:        fir.result %[[V41]] : i1
+// CHECK:      }
+// CHECK:      %[[V37:.*]] = arith.ori %arg1, %[[V36]] : i1
+// CHECK:      fir.result %[[V37]] : i1
+// CHECK:    }



More information about the flang-commits mailing list