[flang-commits] [flang] 20fba03 - [Flang] Add Any and All intrinsics to simplify intrinsics pass

Thu Feb 9 11:53:10 PST 2023

Author: Sacha Ballantyne
Date: 2023-02-09T19:52:15Z
New Revision: 20fba03f96b43d4e6903e0a7c24ae7b665ab9a58

URL: https://github.com/llvm/llvm-project/commit/20fba03f96b43d4e6903e0a7c24ae7b665ab9a58
DIFF: https://github.com/llvm/llvm-project/commit/20fba03f96b43d4e6903e0a7c24ae7b665ab9a58.diff

LOG: [Flang] Add Any and All intrinsics to simplify intrinsics pass

This patch provides a simplified version of the Any intrinsic as well as the All intrinsic
that can be used for inlining or simpiler use cases. These changes are targeting exchange2, and
provide a ~9% performance increase.

Reviewed By: Leporacanthicus, vzakhari

Differential Revision: https://reviews.llvm.org/D142977

Added: 
    

Modified: 
    flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp
    flang/test/Lower/array-derived.f90
    flang/test/Transforms/simplifyintrinsics.fir

Removed: 
    


################################################################################
diff  --git a/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp b/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp
index de65c487a0348..89e6e0810e577 100644

--- a/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp
+++ b/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp
@@ -27,6 +27,7 @@
 #include "flang/Optimizer/Builder/Todo.h"
 #include "flang/Optimizer/Dialect/FIROps.h"
 #include "flang/Optimizer/Dialect/FIRType.h"
+#include "flang/Optimizer/HLFIR/HLFIRDialect.h"
 #include "flang/Optimizer/Support/FIRContext.h"
 #include "flang/Optimizer/Transforms/Passes.h"
 #include "flang/Runtime/entry-names.h"
@@ -39,6 +40,10 @@
 #include "mlir/Transforms/RegionUtils.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include <mlir/IR/Location.h>
+#include <mlir/IR/MLIRContext.h>
+#include <mlir/IR/Value.h>
+#include <mlir/Support/LLVM.h>
 #include <optional>
 
 namespace fir {
@@ -57,7 +62,8 @@ class SimplifyIntrinsicsPass
   using FunctionBodyGeneratorTy =
       llvm::function_ref<void(fir::FirOpBuilder &, mlir::func::FuncOp &)>;
   using GenReductionBodyTy = llvm::function_ref<void(
-      fir::FirOpBuilder &builder, mlir::func::FuncOp &funcOp, unsigned rank)>;
+      fir::FirOpBuilder &builder, mlir::func::FuncOp &funcOp, unsigned rank,
+      mlir::Type elementType)>;
 
 public:
   /// Generate a new function implementing a simplified version
@@ -82,13 +88,17 @@ class SimplifyIntrinsicsPass
   void simplifyIntOrFloatReduction(fir::CallOp call,
                                    const fir::KindMapping &kindMap,
                                    GenReductionBodyTy genBodyFunc);
-  void simplifyLogicalReduction(fir::CallOp call,
-                                const fir::KindMapping &kindMap,
-                                GenReductionBodyTy genBodyFunc);
+  void simplifyLogicalDim0Reduction(fir::CallOp call,
+                                    const fir::KindMapping &kindMap,
+                                    GenReductionBodyTy genBodyFunc);
+  void simplifyLogicalDim1Reduction(fir::CallOp call,
+                                    const fir::KindMapping &kindMap,
+                                    GenReductionBodyTy genBodyFunc);
   void simplifyReductionBody(fir::CallOp call, const fir::KindMapping &kindMap,
                              GenReductionBodyTy genBodyFunc,
                              fir::FirOpBuilder &builder,
-                             const mlir::StringRef &basename);
+                             const mlir::StringRef &basename,
+                             mlir::Type elementType);
 };
 
 } // namespace
@@ -136,22 +146,30 @@ using BodyOpGeneratorTy = llvm::function_ref<mlir::Value(
     mlir::Value)>;
 using InitValGeneratorTy = llvm::function_ref<mlir::Value(
     fir::FirOpBuilder &, mlir::Location, const mlir::Type &)>;
+using ContinueLoopGenTy = llvm::function_ref<llvm::SmallVector<mlir::Value>(
+    fir::FirOpBuilder &, mlir::Location, mlir::Value)>;
 
 /// Generate the reduction loop into \p funcOp.
 ///
-/// \p elementType is the type of the elements in the input array,
-///    which may be 
diff erent to the return type.
 /// \p initVal is a function, called to get the initial value for
 ///    the reduction value
 /// \p genBody is called to fill in the actual reduciton operation
 ///    for example add for SUM, MAX for MAXVAL, etc.
 /// \p rank is the rank of the input argument.
-static void genReductionLoop(fir::FirOpBuilder &builder, mlir::Type elementType,
-                             mlir::func::FuncOp &funcOp,
-                             InitValGeneratorTy initVal,
-                             BodyOpGeneratorTy genBody, unsigned rank) {
-  auto loc = mlir::UnknownLoc::get(builder.getContext());
-  builder.setInsertionPointToEnd(funcOp.addEntryBlock());
+/// \p elementType is the type of the elements in the input array,
+///    which may be 
diff erent to the return type.
+/// \p loopCond is called to generate the condition to continue or
+///    not for IterWhile loops
+/// \p unorderedOrInitalLoopCond contains either a boolean or bool
+///    mlir constant, and controls the inital value for while loops
+///    or if DoLoop is ordered/unordered.
+
+template <typename OP, typename T, int resultIndex>
+static void
+genReductionLoop(fir::FirOpBuilder &builder, mlir::func::FuncOp &funcOp,
+                 InitValGeneratorTy initVal, ContinueLoopGenTy loopCond,
+                 T unorderedOrInitialLoopCond, BodyOpGeneratorTy genBody,
+                 unsigned rank, mlir::Type elementType, mlir::Location loc) {
 
   mlir::IndexType idxTy = builder.getIndexType();
 
@@ -186,8 +204,7 @@ static void genReductionLoop(fir::FirOpBuilder &builder, mlir::Type elementType,
     mlir::Value loopCount = builder.create<mlir::arith::SubIOp>(loc, len, one);
     bounds.push_back(loopCount);
   }
-
-  // Create a loop nest consisting of DoLoopOp operations.
+  // Create a loop nest consisting of OP operations.
   // Collect the loops' induction variables into indices array,
   // which will be used in the innermost loop to load the input
   // array's element.
@@ -197,9 +214,9 @@ static void genReductionLoop(fir::FirOpBuilder &builder, mlir::Type elementType,
   for (unsigned i = rank; 0 < i; --i) {
     mlir::Value step = one;
     mlir::Value loopCount = bounds[i - 1];
-    auto loop = builder.create<fir::DoLoopOp>(loc, zeroIdx, loopCount, step,
-                                              /*unordered=*/false,
-                                              /*finalCountValue=*/false, init);
+    auto loop = builder.create<OP>(loc, zeroIdx, loopCount, step,
+                                   unorderedOrInitialLoopCond,
+                                   /*finalCountValue=*/false, init);
     init = loop.getRegionIterArgs()[0];
     indices.push_back(loop.getInductionVar());
     // Set insertion point to the loop body so that the next loop
@@ -210,31 +227,38 @@ static void genReductionLoop(fir::FirOpBuilder &builder, mlir::Type elementType,
   // Reverse the indices such that they are ordered as:
   //   <dim-0-idx, dim-1-idx, ...>
   std::reverse(indices.begin(), indices.end());
-
   // We are in the innermost loop: generate the reduction body.
   mlir::Type eleRefTy = builder.getRefType(elementType);
   mlir::Value addr =
       builder.create<fir::CoordinateOp>(loc, eleRefTy, array, indices);
   mlir::Value elem = builder.create<fir::LoadOp>(loc, addr);
-
   mlir::Value reductionVal = genBody(builder, loc, elementType, elem, init);
+  // Generate vector with condition to continue while loop at [0] and result
+  // from current loop at [1] for IterWhileOp loops, just result at [0] for
+  // DoLoopOp loops.
+  llvm::SmallVector<mlir::Value> results = loopCond(builder, loc, reductionVal);
 
   // Unwind the loop nest and insert ResultOp on each level
   // to return the updated value of the reduction to the enclosing
   // loops.
   for (unsigned i = 0; i < rank; ++i) {
-    auto result = builder.create<fir::ResultOp>(loc, reductionVal);
+    auto result = builder.create<fir::ResultOp>(loc, results);
     // Proceed to the outer loop.
-    auto loop = mlir::cast<fir::DoLoopOp>(result->getParentOp());
-    reductionVal = loop.getResult(0);
+    auto loop = mlir::cast<OP>(result->getParentOp());
+    results = loop.getResults();
     // Set insertion point after the loop operation that we have
     // just processed.
     builder.setInsertionPointAfter(loop.getOperation());
   }
-
   // End of loop nest. The insertion point is after the outermost loop.
   // Return the reduction value from the function.
-  builder.create<mlir::func::ReturnOp>(loc, reductionVal);
+  builder.create<mlir::func::ReturnOp>(loc, results[resultIndex]);
+}
+
+static llvm::SmallVector<mlir::Value> nopLoopCond(fir::FirOpBuilder &builder,
+                                                  mlir::Location,
+                                                  mlir::Value reductionVal) {
+  return {reductionVal};
 }
 
 /// Generate function body of the simplified version of RTNAME(Sum)
@@ -243,7 +267,8 @@ static void genReductionLoop(fir::FirOpBuilder &builder, mlir::Type elementType,
 /// \p funcOp is expected to be empty on entry to this function.
 /// \p rank specifies the rank of the input argument.
 static void genRuntimeSumBody(fir::FirOpBuilder &builder,
-                              mlir::func::FuncOp &funcOp, unsigned rank) {
+                              mlir::func::FuncOp &funcOp, unsigned rank,
+                              mlir::Type elementType) {
   // function RTNAME(Sum)<T>x<rank>_simplified(arr)
   //   T, dimension(:) :: arr
   //   T sum = 0
@@ -275,13 +300,17 @@ static void genRuntimeSumBody(fir::FirOpBuilder &builder,
     return {};
   };
 
-  mlir::Type elementType = funcOp.getResultTypes()[0];
+  mlir::Location loc = mlir::UnknownLoc::get(builder.getContext());
+  builder.setInsertionPointToEnd(funcOp.addEntryBlock());
 
-  genReductionLoop(builder, elementType, funcOp, zero, genBodyOp, rank);
+  genReductionLoop<fir::DoLoopOp, bool, 0>(builder, funcOp, zero, nopLoopCond,
+                                           false, genBodyOp, rank, elementType,
+                                           loc);
 }
 
 static void genRuntimeMaxvalBody(fir::FirOpBuilder &builder,
-                                 mlir::func::FuncOp &funcOp, unsigned rank) {
+                                 mlir::func::FuncOp &funcOp, unsigned rank,
+                                 mlir::Type elementType) {
   auto init = [](fir::FirOpBuilder builder, mlir::Location loc,
                  mlir::Type elementType) {
     if (auto ty = elementType.dyn_cast<mlir::FloatType>()) {
@@ -306,13 +335,17 @@ static void genRuntimeMaxvalBody(fir::FirOpBuilder &builder,
     return {};
   };
 
-  mlir::Type elementType = funcOp.getResultTypes()[0];
+  mlir::Location loc = mlir::UnknownLoc::get(builder.getContext());
+  builder.setInsertionPointToEnd(funcOp.addEntryBlock());
 
-  genReductionLoop(builder, elementType, funcOp, init, genBodyOp, rank);
+  genReductionLoop<fir::DoLoopOp, bool, 0>(builder, funcOp, init, nopLoopCond,
+                                           false, genBodyOp, rank, elementType,
+                                           loc);
 }
 
 static void genRuntimeCountBody(fir::FirOpBuilder &builder,
-                                mlir::func::FuncOp &funcOp, unsigned rank) {
+                                mlir::func::FuncOp &funcOp, unsigned rank,
+                                mlir::Type elementType) {
   auto zero = [](fir::FirOpBuilder builder, mlir::Location loc,
                  mlir::Type elementType) {
     unsigned bits = elementType.getIntOrFloatBitWidth();
@@ -334,9 +367,78 @@ static void genRuntimeCountBody(fir::FirOpBuilder &builder,
     return builder.create<mlir::arith::AddIOp>(loc, select, elem2);
   };
 
-  mlir::Type elementType = builder.getI32Type();
+  // Count always gets I32 for elementType as it converts logical input to
+  // logical<4> before passing to the function.
+  mlir::Location loc = mlir::UnknownLoc::get(builder.getContext());
+  builder.setInsertionPointToEnd(funcOp.addEntryBlock());
 
-  genReductionLoop(builder, elementType, funcOp, zero, genBodyOp, rank);
+  genReductionLoop<fir::DoLoopOp, bool, 0>(builder, funcOp, zero, nopLoopCond,
+                                           false, genBodyOp, rank, elementType,
+                                           loc);
+}
+
+static void genRuntimeAnyBody(fir::FirOpBuilder &builder,
+                              mlir::func::FuncOp &funcOp, unsigned rank,
+                              mlir::Type elementType) {
+  auto zero = [](fir::FirOpBuilder builder, mlir::Location loc,
+                 mlir::Type elementType) {
+    return builder.createIntegerConstant(loc, elementType, 0);
+  };
+
+  auto genBodyOp = [](fir::FirOpBuilder builder, mlir::Location loc,
+                      mlir::Type elementType, mlir::Value elem1,
+                      mlir::Value elem2) -> mlir::Value {
+    auto zero = builder.createIntegerConstant(loc, elementType, 0);
+    return builder.create<mlir::arith::CmpIOp>(
+        loc, mlir::arith::CmpIPredicate::ne, elem1, zero);
+  };
+
+  auto continueCond = [](fir::FirOpBuilder builder, mlir::Location loc,
+                         mlir::Value reductionVal) {
+    auto one1 = builder.createIntegerConstant(loc, builder.getI1Type(), 1);
+    auto eor = builder.create<mlir::arith::XOrIOp>(loc, reductionVal, one1);
+    llvm::SmallVector<mlir::Value> results = {eor, reductionVal};
+    return results;
+  };
+
+  mlir::Location loc = mlir::UnknownLoc::get(builder.getContext());
+  builder.setInsertionPointToEnd(funcOp.addEntryBlock());
+  mlir::Value ok = builder.createBool(loc, true);
+
+  genReductionLoop<fir::IterWhileOp, mlir::Value, 1>(
+      builder, funcOp, zero, continueCond, ok, genBodyOp, rank, elementType,
+      loc);
+}
+
+static void genRuntimeAllBody(fir::FirOpBuilder &builder,
+                              mlir::func::FuncOp &funcOp, unsigned rank,
+                              mlir::Type elementType) {
+  auto one = [](fir::FirOpBuilder builder, mlir::Location loc,
+                mlir::Type elementType) {
+    return builder.createIntegerConstant(loc, elementType, 1);
+  };
+
+  auto genBodyOp = [](fir::FirOpBuilder builder, mlir::Location loc,
+                      mlir::Type elementType, mlir::Value elem1,
+                      mlir::Value elem2) -> mlir::Value {
+    auto zero = builder.createIntegerConstant(loc, elementType, 0);
+    return builder.create<mlir::arith::CmpIOp>(
+        loc, mlir::arith::CmpIPredicate::ne, elem1, zero);
+  };
+
+  auto continueCond = [](fir::FirOpBuilder builder, mlir::Location loc,
+                         mlir::Value reductionVal) {
+    llvm::SmallVector<mlir::Value> results = {reductionVal, reductionVal};
+    return results;
+  };
+
+  mlir::Location loc = mlir::UnknownLoc::get(builder.getContext());
+  builder.setInsertionPointToEnd(funcOp.addEntryBlock());
+  mlir::Value ok = builder.createBool(loc, true);
+
+  genReductionLoop<fir::IterWhileOp, mlir::Value, 1>(
+      builder, funcOp, one, continueCond, ok, genBodyOp, rank, elementType,
+      loc);
 }
 
 /// Generate function type for the simplified version of RTNAME(DotProduct)
@@ -612,10 +714,11 @@ void SimplifyIntrinsicsPass::simplifyIntOrFloatReduction(
        (fmfString.empty() ? mlir::Twine{} : mlir::Twine{"_", fmfString}))
           .str();
 
-  simplifyReductionBody(call, kindMap, genBodyFunc, builder, funcName);
+  simplifyReductionBody(call, kindMap, genBodyFunc, builder, funcName,
+                        resultType);
 }
 
-void SimplifyIntrinsicsPass::simplifyLogicalReduction(
+void SimplifyIntrinsicsPass::simplifyLogicalDim0Reduction(
     fir::CallOp call, const fir::KindMapping &kindMap,
     GenReductionBodyTy genBodyFunc) {
 
@@ -623,26 +726,79 @@ void SimplifyIntrinsicsPass::simplifyLogicalReduction(
   const mlir::Value &dim = args[3];
   unsigned rank = getDimCount(args[0]);
 
-  // Rank is set to 0 for assumed shape arrays, don't simplify
-  // in these cases
+  // getDimCount returns a rank of 0 for assumed shape arrays, don't simplify in
+  // these cases.
   if (!(isZero(dim) && rank > 0))
     return;
 
+  mlir::Value inputBox = findBoxDef(args[0]);
+  LLVM_DEBUG(llvm::dbgs() << "Boxdef was: " << inputBox << '\n');
+
+  mlir::Type elementType = hlfir::getFortranElementType(inputBox.getType());
   mlir::SymbolRefAttr callee = call.getCalleeAttr();
 
   fir::FirOpBuilder builder{getSimplificationBuilder(call, kindMap)};
+
+  LLVM_DEBUG(llvm::dbgs() << "In DIM0 simplify" << '\n');
+  // Treating logicals as integers makes things a lot easier
+  fir::LogicalType logicalType = {elementType.dyn_cast<fir::LogicalType>()};
+  LLVM_DEBUG(llvm::dbgs() << "Done logical cast, got: " << logicalType << '\n');
+  fir::KindTy kind = logicalType.getFKind();
+  mlir::Type intElementType =
+      mlir::IntegerType::get(builder.getContext(), kind * 8);
+
+  // Mangle kind into function name as it is not done by default
   std::string funcName =
-      (mlir::Twine{callee.getLeafReference().getValue(), "x"} +
-       mlir::Twine{rank})
+      (mlir::Twine{callee.getLeafReference().getValue(), "Logical"} +
+       mlir::Twine{kind} + "x" + mlir::Twine{rank})
+          .str();
+
+  LLVM_DEBUG(llvm::dbgs() << "end of DIM0" << '\n');
+
+  simplifyReductionBody(call, kindMap, genBodyFunc, builder, funcName,
+                        intElementType);
+}
+
+void SimplifyIntrinsicsPass::simplifyLogicalDim1Reduction(
+    fir::CallOp call, const fir::KindMapping &kindMap,
+    GenReductionBodyTy genBodyFunc) {
+
+  mlir::Operation::operand_range args = call.getArgs();
+  mlir::SymbolRefAttr callee = call.getCalleeAttr();
+  mlir::StringRef funcNameBase = callee.getLeafReference().getValue();
+  unsigned rank = getDimCount(args[0]);
+
+  // getDimCount returns a rank of 0 for assumed shape arrays, don't simplify in
+  // these cases. We check for Dim at the end as some logical functions (Any,
+  // All) set dim to 1 instead of 0 when the argument is not present.
+  if (funcNameBase.ends_with("Dim") || !(rank > 0))
+    return;
+
+  mlir::Value inputBox = findBoxDef(args[0]);
+  mlir::Type elementType = hlfir::getFortranElementType(inputBox.getType());
+
+  fir::FirOpBuilder builder{getSimplificationBuilder(call, kindMap)};
+
+  // Treating logicals as integers makes things a lot easier
+  fir::LogicalType logicalType = {elementType.dyn_cast<fir::LogicalType>()};
+  fir::KindTy kind = logicalType.getFKind();
+  mlir::Type intElementType =
+      mlir::IntegerType::get(builder.getContext(), kind * 8);
+
+  // Mangle kind into function name as it is not done by default
+  std::string funcName =
+      (mlir::Twine{callee.getLeafReference().getValue(), "Logical"} +
+       mlir::Twine{kind} + "x" + mlir::Twine{rank})
           .str();
 
-  simplifyReductionBody(call, kindMap, genBodyFunc, builder, funcName);
+  simplifyReductionBody(call, kindMap, genBodyFunc, builder, funcName,
+                        intElementType);
 }
 
 void SimplifyIntrinsicsPass::simplifyReductionBody(
     fir::CallOp call, const fir::KindMapping &kindMap,
     GenReductionBodyTy genBodyFunc, fir::FirOpBuilder &builder,
-    const mlir::StringRef &funcName) {
+    const mlir::StringRef &funcName, mlir::Type elementType) {
 
   mlir::Operation::operand_range args = call.getArgs();
 
@@ -654,9 +810,10 @@ void SimplifyIntrinsicsPass::simplifyReductionBody(
   auto typeGenerator = [&resultType](fir::FirOpBuilder &builder) {
     return genNoneBoxType(builder, resultType);
   };
-  auto bodyGenerator = [&rank, &genBodyFunc](fir::FirOpBuilder &builder,
-                                             mlir::func::FuncOp &funcOp) {
-    genBodyFunc(builder, funcOp, rank);
+  auto bodyGenerator = [&rank, &genBodyFunc,
+                        &elementType](fir::FirOpBuilder &builder,
+                                      mlir::func::FuncOp &funcOp) {
+    genBodyFunc(builder, funcOp, rank, elementType);
   };
   // Mangle the function name with the rank value as "x<rank>".
   mlir::func::FuncOp newFunc =
@@ -761,7 +918,17 @@ void SimplifyIntrinsicsPass::runOnOperation() {
           return;
         }
         if (funcName.startswith(RTNAME_STRING(Count))) {
-          simplifyLogicalReduction(call, kindMap, genRuntimeCountBody);
+          LLVM_DEBUG(llvm::dbgs() << "Count" << '\n');
+          simplifyLogicalDim0Reduction(call, kindMap, genRuntimeCountBody);
+          return;
+        }
+        if (funcName.startswith(RTNAME_STRING(Any))) {
+          LLVM_DEBUG(llvm::dbgs() << "Any" << '\n');
+          simplifyLogicalDim1Reduction(call, kindMap, genRuntimeAnyBody);
+          return;
+        }
+        if (funcName.endswith(RTNAME_STRING(All))) {
+          simplifyLogicalDim1Reduction(call, kindMap, genRuntimeAllBody);
           return;
         }
       }

diff  --git a/flang/test/Lower/array-derived.f90 b/flang/test/Lower/array-derived.f90
index 00df7beef9afd..1ad8a53ef92b8 100644
--- a/flang/test/Lower/array-derived.f90
+++ b/flang/test/Lower/array-derived.f90
@@ -29,7 +29,7 @@ function c1(e, c)
     ! CHECK: %[[slice0:.*]] = fir.slice %c1{{.*}}, %[[ext0]]#1, %c1{{.*}} path %[[fldn]] : (index, index, index, !fir.field) -> !fir.slice<1>
     ! CHECK-DAG: = fir.array_coor %[[arg1]] [%[[slice1]]] %[[index:.*]] : (!fir.box<!fir.array<?x!fir.type<_QMcsTr{n:i32,d:i32}>>>, !fir.slice<1>, index) -> !fir.ref<i32>
     ! CHECK-DAG: = fir.array_coor %[[arg0]] [%[[slice0]]] %[[index]] : (!fir.box<!fir.array<?x!fir.type<_QMcsTr{n:i32,d:i32}>>>, !fir.slice<1>, index) -> !fir.ref<i32>
-    ! CHECK: = fir.call @_FortranAAll(
+    ! CHECK: = fir.call @_FortranAAllLogical4x1_simplified(
     c1 = all(c%n == e%n)
   end function c1
 

diff  --git a/flang/test/Transforms/simplifyintrinsics.fir b/flang/test/Transforms/simplifyintrinsics.fir
index 282fafcd49186..d21cf19a72862 100644
--- a/flang/test/Transforms/simplifyintrinsics.fir
+++ b/flang/test/Transforms/simplifyintrinsics.fir
@@ -1132,13 +1132,13 @@ fir.global linkonce @_QQcl.2E2F746573746661696C2E66393000 constant : !fir.char<1
 // CHECK:           %[[A_BOX_LOGICAL:.*]] = fir.embox %{{.*}}(%[[SHAPE]]) : (!fir.ref<!fir.array<10x!fir.logical<4>>>, !fir.shape<1>) -> !fir.box<!fir.array<10x!fir.logical<4>>>
 // CHECK:           %[[A_BOX_NONE:.*]] = fir.convert %[[A_BOX_LOGICAL]] : (!fir.box<!fir.array<10x!fir.logical<4>>>) -> !fir.box<none>
 // CHECK-NOT:       fir.call @_FortranACount({{.*}})
-// CHECK:           %[[RES:.*]] = fir.call @_FortranACountx1_simplified(%[[A_BOX_NONE]]) fastmath<contract> : (!fir.box<none>) -> i64
+// CHECK:           %[[RES:.*]] = fir.call @_FortranACountLogical4x1_simplified(%[[A_BOX_NONE]]) fastmath<contract> : (!fir.box<none>) -> i64
 // CHECK-NOT:       fir.call @_FortranACount({{.*}})
 // CHECK:           return %{{.*}} : i32
 // CHECK:         }
 // CHECK:         func.func private @_FortranACount(!fir.box<none>, !fir.ref<i8>, i32, i32) -> i64 attributes {fir.runtime}
 
-// CHECK-LABEL:   func.func private @_FortranACountx1_simplified(
+// CHECK-LABEL:   func.func private @_FortranACountLogical4x1_simplified(
 // CHECK-SAME:                                                            %[[ARR:.*]]: !fir.box<none>) -> i64 attributes {llvm.linkage = #llvm.linkage<linkonce_odr>} {
 // CHECK:           %[[C_INDEX0:.*]] = arith.constant 0 : index
 // CHECK:           %[[ARR_BOX_I32:.*]] = fir.convert %[[ARR]] : (!fir.box<none>) -> !fir.box<!fir.array<?xi32>>
@@ -1211,9 +1211,9 @@ func.func private @_FortranACountDim(!fir.ref<!fir.box<none>>, !fir.box<none>, i
 
 // CHECK-LABEL:   func.func @_QMtestPcount_generate_mask(
 // CHECK-SAME:                                           %[[A:.*]]: !fir.ref<!fir.array<10x10x!fir.logical<4>>> {fir.bindc_name = "mask"}) -> !fir.array<10xi32> {
-// CHECK-NOT        fir.call @_FortranACountDim_simplified({{.*}})
+// CHECK-NOT        fir.call @_FortranACountDimLogical4_simplified({{.*}})
 // CHECK:           %[[RES:.*]] = fir.call @_FortranACountDim({{.*}}) fastmath<contract> : (!fir.ref<!fir.box<none>>, !fir.box<none>, i32, i32, !fir.ref<i8>, i32) -> none
-// CHECK-NOT        fir.call @_FortranACountDim_simplified({{.*}})
+// CHECK-NOT        fir.call @_FortranACountDimLogical4_simplified({{.*}})
 
 // -----
 // Ensure count isn't simplified for unknown dimension arrays
@@ -1236,6 +1236,345 @@ func.func private @_FortranACount(!fir.box<none>, !fir.ref<i8>, i32, i32) -> i64
 
 // CHECK-LABEL:   func.func @_QPmc(
 // CHECK-SAME:                     %[[VAL_0:.*]]: !fir.box<!fir.array<?x?x?x!fir.logical<4>>> {fir.bindc_name = "m"}) -> i32 {
-// CHECK-NOT        fir.call @_FortranACount_simplified({{.*}})
+// CHECK-NOT        fir.call @_FortranACountLogical4_simplified({{.*}})
 // CHECK:           %[[RES:.*]] = fir.call @_FortranACount({{.*}}) fastmath<contract> : (!fir.box<none>, !fir.ref<i8>, i32, i32) -> i64
-// CHECK-NOT        fir.call @_FortranACount_simplified({{.*}})
+// CHECK-NOT        fir.call @_FortranACountLogical4_simplified({{.*}})
+
+// -----
+// Ensure Any is simplified in correct usage
+
+func.func @_QPtestAny_NoDimArg(%arg0: !fir.ref<!fir.array<10x!fir.logical<4>>> {fir.bindc_name = "a"}) -> !fir.logical<4> {
+  %c10 = arith.constant 10 : index
+  %0 = fir.alloca !fir.logical<4> {bindc_name = "testAny_NoDimArg", uniq_name = "_QFtestAny_NoDimArgEtestAny_NoDimArg"}
+  %1 = fir.shape %c10 : (index) -> !fir.shape<1>
+  %2 = fir.embox %arg0(%1) : (!fir.ref<!fir.array<10x!fir.logical<4>>>, !fir.shape<1>) -> !fir.box<!fir.array<10x!fir.logical<4>>>
+  %c1 = arith.constant 1 : index
+  %3 = fir.address_of(@_QQcl.04ab56883945fd2c21a3b6d132f0bb37) : !fir.ref<!fir.char<1,48>>
+  %c3_i32 = arith.constant 3 : i32
+  %4 = fir.convert %2 : (!fir.box<!fir.array<10x!fir.logical<4>>>) -> !fir.box<none>
+  %5 = fir.convert %3 : (!fir.ref<!fir.char<1,48>>) -> !fir.ref<i8>
+  %6 = fir.convert %c1 : (index) -> i32
+  %7 = fir.call @_FortranAAny(%4, %5, %c3_i32, %6) fastmath<contract> : (!fir.box<none>, !fir.ref<i8>, i32, i32) -> i1
+  %8 = fir.convert %7 : (i1) -> !fir.logical<4>
+  fir.store %8 to %0 : !fir.ref<!fir.logical<4>>
+  %9 = fir.load %0 : !fir.ref<!fir.logical<4>>
+  return %9 : !fir.logical<4>
+}
+func.func private @_FortranAAny(!fir.box<none>, !fir.ref<i8>, i32, i32) -> i1 attributes {fir.runtime}
+
+// CHECK-LABEL:   func.func @_QPtestAny_NoDimArg(
+// CHECK-SAME:                          %[[ARR:.*]]: !fir.ref<!fir.array<10x!fir.logical<4>>> {fir.bindc_name = "a"}) -> !fir.logical<4> {
+// CHECK:           %[[SIZE:.*]] = arith.constant 10 : index
+// CHECK:           %[[SHAPE:.*]] = fir.shape %[[SIZE]] : (index) -> !fir.shape<1>
+// CHECK:           %[[A_BOX_LOGICAL:.*]] = fir.embox %[[ARR]](%[[SHAPE]]) : (!fir.ref<!fir.array<10x!fir.logical<4>>>, !fir.shape<1>) -> !fir.box<!fir.array<10x!fir.logical<4>>>
+// CHECK:           %[[A_BOX_NONE:.*]] = fir.convert %[[A_BOX_LOGICAL]] : (!fir.box<!fir.array<10x!fir.logical<4>>>) -> !fir.box<none>
+// CHECK:           %[[RES:.*]] = fir.call @_FortranAAnyLogical4x1_simplified(%[[A_BOX_NONE]]) fastmath<contract> : (!fir.box<none>) -> i1
+// CHECK:         }
+
+// CHECK-LABEL:   func.func private @_FortranAAnyLogical4x1_simplified(
+// CHECK-SAME:                                                 %[[ARR:.*]]: !fir.box<none>) -> i1 attributes {llvm.linkage = #llvm.linkage<linkonce_odr>} {
+// CHECK:           %[[INIT_COND:.*]] = arith.constant true
+// CHECK:           %[[C_INDEX0:.*]] = arith.constant 0 : index
+// CHECK:           %[[A_BOX_I32:.*]] = fir.convert %[[ARR]] : (!fir.box<none>) -> !fir.box<!fir.array<?xi32>>
+// CHECK:           %[[FALSE:.*]] = arith.constant false
+// CHECK:           %[[C_INDEX1:.*]] = arith.constant 1 : index
+// CHECK:           %[[DIM_IDX0:.*]] = arith.constant 0 : index
+// CHECK:           %[[DIMS:.*]]:3 = fir.box_dims %[[A_BOX_I32]], %[[DIM_IDX0]] : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
+// CHECK:           %[[EXTENT:.*]] = arith.subi %[[DIMS]]#1, %[[C_INDEX1]] : index
+// CHECK:           %[[RES:.*]]:2 = fir.iterate_while (%[[ITER:.*]] = %[[C_INDEX0]] to %[[EXTENT]] step %[[C_INDEX1]]) and (%[[OK:.*]] = %[[INIT_COND]]) iter_args(%[[INIT:.*]] = %[[FALSE]]) -> (i1) {
+// CHECK:             %[[ITEM:.*]] = fir.coordinate_of %[[A_BOX_I32]], %[[ITER]] : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK:             %[[ITEM_VAL:.*]] = fir.load %[[ITEM]] : !fir.ref<i32>
+// CHECK:             %[[I32_0:.*]] = arith.constant 0 : i32
+// CHECK:             %[[CMP:.*]] = arith.cmpi ne, %[[ITEM_VAL]], %[[I32_0]] : i32
+// CHECK:             %[[I1_1:.*]] = arith.constant true
+// CHECK:             %[[CONTINUE:.*]] = arith.xori %[[CMP]], %[[I1_1]] : i1
+// CHECK:             fir.result %[[CONTINUE]], %[[CMP]] : i1, i1
+// CHECK:           }
+// CHECK:           return %[[RES:.*]]#1 : i1
+// CHECK:         }
+
+// -----
+// Ensure Any is simpified correctly for 
diff erent kind logical
+
+func.func @_QPtestAny_NoDimArgLogical8(%arg0: !fir.ref<!fir.array<10x!fir.logical<8>>> {fir.bindc_name = "a"}) -> !fir.logical<8> {
+  %c10 = arith.constant 10 : index
+  %0 = fir.alloca !fir.logical<8> {bindc_name = "testAny_NoDimArgLogical8", uniq_name = "_QFtestAny_NoDimArgLogical8EtestAny_NoDimArgLogical8"}
+  %1 = fir.shape %c10 : (index) -> !fir.shape<1>
+  %2 = fir.embox %arg0(%1) : (!fir.ref<!fir.array<10x!fir.logical<8>>>, !fir.shape<1>) -> !fir.box<!fir.array<10x!fir.logical<8>>>
+  %c1 = arith.constant 1 : index
+  %3 = fir.address_of(@_QQcl.04ab56883945fd2c21a3b6d132f0bb37) : !fir.ref<!fir.char<1,48>>
+  %c3_i32 = arith.constant 3 : i32
+  %4 = fir.convert %2 : (!fir.box<!fir.array<10x!fir.logical<8>>>) -> !fir.box<none>
+  %5 = fir.convert %3 : (!fir.ref<!fir.char<1,48>>) -> !fir.ref<i8>
+  %6 = fir.convert %c1 : (index) -> i32
+  %7 = fir.call @_FortranAAny(%4, %5, %c3_i32, %6) fastmath<contract> : (!fir.box<none>, !fir.ref<i8>, i32, i32) -> i1
+  %8 = fir.convert %7 : (i1) -> !fir.logical<8>
+  fir.store %8 to %0 : !fir.ref<!fir.logical<8>>
+  %9 = fir.load %0 : !fir.ref<!fir.logical<8>>
+  return %9 : !fir.logical<8>
+}
+func.func private @_FortranAAny(!fir.box<none>, !fir.ref<i8>, i32, i32) -> i1 attributes {fir.runtime}
+
+// CHECK-LABEL:   func.func @_QPtestAny_NoDimArgLogical8(
+// CHECK-SAME:                          %[[ARR:.*]]: !fir.ref<!fir.array<10x!fir.logical<8>>> {fir.bindc_name = "a"}) -> !fir.logical<8> {
+// CHECK:           %[[SIZE:.*]] = arith.constant 10 : index
+// CHECK:           %[[SHAPE:.*]] = fir.shape %[[SIZE]] : (index) -> !fir.shape<1>
+// CHECK:           %[[A_BOX_LOGICAL:.*]] = fir.embox %[[ARR]](%[[SHAPE]]) : (!fir.ref<!fir.array<10x!fir.logical<8>>>, !fir.shape<1>) -> !fir.box<!fir.array<10x!fir.logical<8>>>
+// CHECK:           %[[A_BOX_NONE:.*]] = fir.convert %[[A_BOX_LOGICAL]] : (!fir.box<!fir.array<10x!fir.logical<8>>>) -> !fir.box<none>
+// CHECK:           %[[RES:.*]] = fir.call @_FortranAAnyLogical8x1_simplified(%[[A_BOX_NONE]]) fastmath<contract> : (!fir.box<none>) -> i1
+// CHECK:         }
+
+// CHECK-LABEL:   func.func private @_FortranAAnyLogical8x1_simplified(
+// CHECK-SAME:                                                 %[[ARR:.*]]: !fir.box<none>) -> i1 attributes {llvm.linkage = #llvm.linkage<linkonce_odr>} {
+// CHECK:           %[[INIT_COND:.*]] = arith.constant true
+// CHECK:           %[[C_INDEX0:.*]] = arith.constant 0 : index
+// CHECK:           %[[A_BOX_I64:.*]] = fir.convert %[[ARR]] : (!fir.box<none>) -> !fir.box<!fir.array<?xi64>>
+// CHECK:           %[[FALSE:.*]] = arith.constant false
+// CHECK:           %[[C_INDEX1:.*]] = arith.constant 1 : index
+// CHECK:           %[[DIM_IDX0:.*]] = arith.constant 0 : index
+// CHECK:           %[[DIMS:.*]]:3 = fir.box_dims %[[A_BOX_I64]], %[[DIM_IDX0]] : (!fir.box<!fir.array<?xi64>>, index) -> (index, index, index)
+// CHECK:           %[[EXTENT:.*]] = arith.subi %[[DIMS]]#1, %[[C_INDEX1]] : index
+// CHECK:           %[[RES:.*]]:2 = fir.iterate_while (%[[ITER:.*]] = %[[C_INDEX0]] to %[[EXTENT]] step %[[C_INDEX1]]) and (%[[OK:.*]] = %[[INIT_COND]]) iter_args(%[[INIT:.*]] = %[[FALSE]]) -> (i1) {
+// CHECK:             %[[ITEM:.*]] = fir.coordinate_of %[[A_BOX_I64]], %[[ITER]] : (!fir.box<!fir.array<?xi64>>, index) -> !fir.ref<i64>
+// CHECK:             %[[ITEM_VAL:.*]] = fir.load %[[ITEM]] : !fir.ref<i64>
+// CHECK:             %[[I64_0:.*]] = arith.constant 0 : i64
+// CHECK:             %[[CMP:.*]] = arith.cmpi ne, %[[ITEM_VAL]], %[[I64_0]] : i64
+// CHECK:             %[[I1_1:.*]] = arith.constant true
+// CHECK:             %[[CONTINUE:.*]] = arith.xori %[[CMP]], %[[I1_1]] : i1
+// CHECK:             fir.result %[[CONTINUE]], %[[CMP]] : i1, i1
+// CHECK:           }
+// CHECK:           return %[[RES:.*]]#1 : i1
+// CHECK:         }
+
+// -----
+// Ensure Any is not simplified when call ends in 'Dim'
+
+func.func @_QPtestAny_DimArg(%arg0: !fir.ref<!fir.array<10x10x!fir.logical<4>>> {fir.bindc_name = "a"}) -> !fir.array<10x!fir.logical<4>> {
+  %0 = fir.alloca !fir.box<!fir.heap<!fir.array<?x!fir.logical<4>>>>
+  %c10 = arith.constant 10 : index
+  %c10_0 = arith.constant 10 : index
+  %c10_1 = arith.constant 10 : index
+  %1 = fir.alloca !fir.array<10x!fir.logical<4>> {bindc_name = "testAny_DimArg", uniq_name = "_QFtestAny_DimArgEtestAny_DimArg"}
+  %2 = fir.shape %c10_1 : (index) -> !fir.shape<1>
+  %3 = fir.array_load %1(%2) : (!fir.ref<!fir.array<10x!fir.logical<4>>>, !fir.shape<1>) -> !fir.array<10x!fir.logical<4>>
+  %c2_i32 = arith.constant 2 : i32
+  %4 = fir.shape %c10, %c10_0 : (index, index) -> !fir.shape<2>
+  %5 = fir.embox %arg0(%4) : (!fir.ref<!fir.array<10x10x!fir.logical<4>>>, !fir.shape<2>) -> !fir.box<!fir.array<10x10x!fir.logical<4>>>
+  %6 = fir.zero_bits !fir.heap<!fir.array<?x!fir.logical<4>>>
+  %c0 = arith.constant 0 : index
+  %7 = fir.shape %c0 : (index) -> !fir.shape<1>
+  %8 = fir.embox %6(%7) : (!fir.heap<!fir.array<?x!fir.logical<4>>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<?x!fir.logical<4>>>>
+  fir.store %8 to %0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.logical<4>>>>>
+  %9 = fir.address_of(@_QQcl.04ab56883945fd2c21a3b6d132f0bb37) : !fir.ref<!fir.char<1,48>>
+  %c3_i32 = arith.constant 3 : i32
+  %10 = fir.convert %0 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.logical<4>>>>>) -> !fir.ref<!fir.box<none>>
+  %11 = fir.convert %5 : (!fir.box<!fir.array<10x10x!fir.logical<4>>>) -> !fir.box<none>
+  %12 = fir.convert %9 : (!fir.ref<!fir.char<1,48>>) -> !fir.ref<i8>
+  %13 = fir.call @_FortranAAnyDim(%10, %11, %c2_i32, %12, %c3_i32) fastmath<contract> : (!fir.ref<!fir.box<none>>, !fir.box<none>, i32, !fir.ref<i8>, i32) -> none
+  %14 = fir.load %0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.logical<4>>>>>
+  %c0_2 = arith.constant 0 : index
+  %15:3 = fir.box_dims %14, %c0_2 : (!fir.box<!fir.heap<!fir.array<?x!fir.logical<4>>>>, index) -> (index, index, index)
+  %16 = fir.box_addr %14 : (!fir.box<!fir.heap<!fir.array<?x!fir.logical<4>>>>) -> !fir.heap<!fir.array<?x!fir.logical<4>>>
+  %17 = fir.shape_shift %15#0, %15#1 : (index, index) -> !fir.shapeshift<1>
+  %18 = fir.array_load %16(%17) : (!fir.heap<!fir.array<?x!fir.logical<4>>>, !fir.shapeshift<1>) -> !fir.array<?x!fir.logical<4>>
+  %c1 = arith.constant 1 : index
+  %c0_3 = arith.constant 0 : index
+  %19 = arith.subi %c10_1, %c1 : index
+  %20 = fir.do_loop %arg1 = %c0_3 to %19 step %c1 unordered iter_args(%arg2 = %3) -> (!fir.array<10x!fir.logical<4>>) {
+    %22 = fir.array_fetch %18, %arg1 : (!fir.array<?x!fir.logical<4>>, index) -> !fir.logical<4>
+    %23 = fir.array_update %arg2, %22, %arg1 : (!fir.array<10x!fir.logical<4>>, !fir.logical<4>, index) -> !fir.array<10x!fir.logical<4>>
+    fir.result %23 : !fir.array<10x!fir.logical<4>>
+  }
+  fir.array_merge_store %3, %20 to %1 : !fir.array<10x!fir.logical<4>>, !fir.array<10x!fir.logical<4>>, !fir.ref<!fir.array<10x!fir.logical<4>>>
+  fir.freemem %16 : !fir.heap<!fir.array<?x!fir.logical<4>>>
+  %21 = fir.load %1 : !fir.ref<!fir.array<10x!fir.logical<4>>>
+  return %21 : !fir.array<10x!fir.logical<4>>
+}
+func.func private @_FortranAAnyDim(!fir.ref<!fir.box<none>>, !fir.box<none>, i32, !fir.ref<i8>, i32) -> none attributes {fir.runtime}
+
+// CHECK-LABEL:   func.func @_QPtestAny_DimArg(
+// CHECK-SAME:                          %[[ARR:.*]]: !fir.ref<!fir.array<10x10x!fir.logical<4>>> {fir.bindc_name = "a"}) -> !fir.array<10x!fir.logical<4>> {
+// CHECK-NOT        fir.call @_FortranAAnyDimLogical4x1_simplified({{.*}})
+// CHECK:           fir.call @_FortranAAnyDim({{.*}}) fastmath<contract> : (!fir.ref<!fir.box<none>>, !fir.box<none>, i32, !fir.ref<i8>, i32) -> none
+// CHECK-NOT        fir.call @_FortranAAnyDimLogical4x1_simplified({{.*}})
+
+// -----
+// Ensure Any is not simplified for unknown dimension arrays
+
+func.func @_QPtestAny_UnknownDim(%arg0: !fir.box<!fir.array<?x!fir.logical<4>>> {fir.bindc_name = "a"}) -> !fir.logical<4> {
+  %0 = fir.alloca !fir.logical<4> {bindc_name = "testAny_UnknownDim", uniq_name = "_QFtestAny_UnknownDimEtestAny_UnknownDim"}
+  %c1 = arith.constant 1 : index
+  %1 = fir.address_of(@_QQcl.04ab56883945fd2c21a3b6d132f0bb37) : !fir.ref<!fir.char<1,48>>
+  %c3_i32 = arith.constant 3 : i32
+  %2 = fir.convert %arg0 : (!fir.box<!fir.array<?x!fir.logical<4>>>) -> !fir.box<none>
+  %3 = fir.convert %1 : (!fir.ref<!fir.char<1,48>>) -> !fir.ref<i8>
+  %4 = fir.convert %c1 : (index) -> i32
+  %5 = fir.call @_FortranAAny(%2, %3, %c3_i32, %4) fastmath<contract> : (!fir.box<none>, !fir.ref<i8>, i32, i32) -> i1
+  %6 = fir.convert %5 : (i1) -> !fir.logical<4>
+  fir.store %6 to %0 : !fir.ref<!fir.logical<4>>
+  %7 = fir.load %0 : !fir.ref<!fir.logical<4>>
+  return %7 : !fir.logical<4>
+}
+func.func private @_FortranAAny(!fir.box<none>, !fir.ref<i8>, i32, i32) -> i1 attributes {fir.runtime}
+
+// CHECK-LABEL:   func.func @_QPtestAny_UnknownDim(
+// CHECK-SAME:                          %[[VAL_0:.*]]: !fir.box<!fir.array<?x!fir.logical<4>>> {fir.bindc_name = "a"}) -> !fir.logical<4> {
+// CHECK-NOT        fir.call @_FortranAAnyLogical4x1_simplified({{.*}})
+// CHECK:           fir.call @_FortranAAny({{.*}}) fastmath<contract> : (!fir.box<none>, !fir.ref<i8>, i32, i32) -> i1
+// CHECK-NOT        fir.call @_FortranAAnyLogical4x1_simplified({{.*}})
+
+// -----
+// Ensure All is simplified in correct usage
+
+func.func @_QPtestAll_NoDimArg(%arg0: !fir.ref<!fir.array<10x!fir.logical<4>>> {fir.bindc_name = "a"}) -> !fir.logical<4> {
+  %c10 = arith.constant 10 : index
+  %0 = fir.alloca !fir.logical<4> {bindc_name = "testAll_NoDimArg", uniq_name = "_QFtestAll_NoDimArgEtestAll_NoDimArg"}
+  %1 = fir.shape %c10 : (index) -> !fir.shape<1>
+  %2 = fir.embox %arg0(%1) : (!fir.ref<!fir.array<10x!fir.logical<4>>>, !fir.shape<1>) -> !fir.box<!fir.array<10x!fir.logical<4>>>
+  %c1 = arith.constant 1 : index
+  %3 = fir.address_of(@_QQcl.04ab56883945fd2c21a3b6d132f0bb37) : !fir.ref<!fir.char<1,48>>
+  %c3_i32 = arith.constant 3 : i32
+  %4 = fir.convert %2 : (!fir.box<!fir.array<10x!fir.logical<4>>>) -> !fir.box<none>
+  %5 = fir.convert %3 : (!fir.ref<!fir.char<1,48>>) -> !fir.ref<i8>
+  %6 = fir.convert %c1 : (index) -> i32
+  %7 = fir.call @_FortranAAll(%4, %5, %c3_i32, %6) fastmath<contract> : (!fir.box<none>, !fir.ref<i8>, i32, i32) -> i1
+  %8 = fir.convert %7 : (i1) -> !fir.logical<4>
+  fir.store %8 to %0 : !fir.ref<!fir.logical<4>>
+  %9 = fir.load %0 : !fir.ref<!fir.logical<4>>
+  return %9 : !fir.logical<4>
+}
+func.func private @_FortranAAll(!fir.box<none>, !fir.ref<i8>, i32, i32) -> i1 attributes {fir.runtime}
+
+// CHECK-LABEL:   func.func @_QPtestAll_NoDimArg(
+// CHECK-SAME:                          %[[ARR:.*]]: !fir.ref<!fir.array<10x!fir.logical<4>>> {fir.bindc_name = "a"}) -> !fir.logical<4> {
+// CHECK:           %[[SIZE:.*]] = arith.constant 10 : index
+// CHECK:           %[[SHAPE:.*]] = fir.shape %[[SIZE]] : (index) -> !fir.shape<1>
+// CHECK:           %[[A_BOX_LOGICAL:.*]] = fir.embox %[[ARR]](%[[SHAPE]]) : (!fir.ref<!fir.array<10x!fir.logical<4>>>, !fir.shape<1>) -> !fir.box<!fir.array<10x!fir.logical<4>>>
+// CHECK:           %[[A_BOX_NONE:.*]] = fir.convert %[[A_BOX_LOGICAL]] : (!fir.box<!fir.array<10x!fir.logical<4>>>) -> !fir.box<none>
+// CHECK:           %[[RES:.*]] = fir.call @_FortranAAllLogical4x1_simplified(%[[A_BOX_NONE]]) fastmath<contract> : (!fir.box<none>) -> i1
+// CHECK:         }
+
+// CHECK-LABEL:   func.func private @_FortranAAllLogical4x1_simplified(
+// CHECK-SAME:                                                 %[[ARR:.*]]: !fir.box<none>) -> i1 attributes {llvm.linkage = #llvm.linkage<linkonce_odr>} {
+// CHECK:           %[[INIT_COND:.*]] = arith.constant true
+// CHECK:           %[[C_INDEX0:.*]] = arith.constant 0 : index
+// CHECK:           %[[A_BOX_I32:.*]] = fir.convert %[[ARR]] : (!fir.box<none>) -> !fir.box<!fir.array<?xi32>>
+// CHECK:           %[[TRUE:.*]] = arith.constant true
+// CHECK:           %[[C_INDEX1:.*]] = arith.constant 1 : index
+// CHECK:           %[[DIM_INDEX0:.*]] = arith.constant 0 : index
+// CHECK:           %[[DIMS:.*]]:3 = fir.box_dims %[[A_BOX_I32]], %[[DIM_INDEX0]] : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
+// CHECK:           %[[EXTENT:.*]] = arith.subi %[[DIMS]]#1, %[[C_INDEX1]] : index
+// CHECK:           %[[RES:.*]]:2 = fir.iterate_while (%[[ITER:.*]] = %[[C_INDEX0]] to %[[EXTENT]] step %[[C_INDEX1]]) and (%[[OK:.*]] = %[[INIT_COND]]) iter_args(%[[INIT:.*]] = %[[TRUE]]) -> (i1) {
+// CHECK:             %[[ITEM:.*]] = fir.coordinate_of %[[A_BOX_I32]], %[[ITER]] : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK:             %[[ITEM_VAL:.*]] = fir.load %[[ITEM]] : !fir.ref<i32>
+// CHECK:             %[[I32_0:.*]] = arith.constant 0 : i32
+// CHECK:             %[[CMP_AND_CONTINUE:.*]] = arith.cmpi ne, %[[ITEM_VAL]], %[[I32_0]] : i32
+// CHECK:             fir.result %[[CMP_AND_CONTINUE]], %[[CMP_AND_CONTINUE]] : i1, i1
+// CHECK:           }
+// CHECK:           return %[[RES:.*]]#1 : i1
+// CHECK:         }
+
+// -----
+// Ensure All is correctly simplified for 
diff erent kind logical
+
+
+func.func @_QPtestAll_NoDimArgLogical1(%arg0: !fir.ref<!fir.array<10x!fir.logical<1>>> {fir.bindc_name = "a"}) -> !fir.logical<1> {
+  %c10 = arith.constant 10 : index
+  %0 = fir.alloca !fir.logical<1> {bindc_name = "testAll_NoDimArgLogical1", uniq_name = "_QFtestAll_NoDimArgLogical1EtestAll_NoDimArgLogical1"}
+  %1 = fir.shape %c10 : (index) -> !fir.shape<1>
+  %2 = fir.embox %arg0(%1) : (!fir.ref<!fir.array<10x!fir.logical<1>>>, !fir.shape<1>) -> !fir.box<!fir.array<10x!fir.logical<1>>>
+  %c1 = arith.constant 1 : index
+  %3 = fir.address_of(@_QQcl.04ab56883945fd2c21a3b6d132f0bb37) : !fir.ref<!fir.char<1,48>>
+  %c3_i32 = arith.constant 3 : i32
+  %4 = fir.convert %2 : (!fir.box<!fir.array<10x!fir.logical<1>>>) -> !fir.box<none>
+  %5 = fir.convert %3 : (!fir.ref<!fir.char<1,48>>) -> !fir.ref<i8>
+  %6 = fir.convert %c1 : (index) -> i32
+  %7 = fir.call @_FortranAAll(%4, %5, %c3_i32, %6) fastmath<contract> : (!fir.box<none>, !fir.ref<i8>, i32, i32) -> i1
+  %8 = fir.convert %7 : (i1) -> !fir.logical<1>
+  fir.store %8 to %0 : !fir.ref<!fir.logical<1>>
+  %9 = fir.load %0 : !fir.ref<!fir.logical<1>>
+  return %9 : !fir.logical<1>
+}
+func.func private @_FortranAAll(!fir.box<none>, !fir.ref<i8>, i32, i32) -> i1 attributes {fir.runtime}
+
+// CHECK-LABEL:   func.func @_QPtestAll_NoDimArgLogical1(
+// CHECK-SAME:                          %[[ARR:.*]]: !fir.ref<!fir.array<10x!fir.logical<1>>> {fir.bindc_name = "a"}) -> !fir.logical<1> {
+// CHECK:           %[[SIZE:.*]] = arith.constant 10 : index
+// CHECK:           %[[SHAPE:.*]] = fir.shape %[[SIZE]] : (index) -> !fir.shape<1>
+// CHECK:           %[[A_BOX_LOGICAL:.*]] = fir.embox %[[ARR]](%[[SHAPE]]) : (!fir.ref<!fir.array<10x!fir.logical<1>>>, !fir.shape<1>) -> !fir.box<!fir.array<10x!fir.logical<1>>>
+// CHECK:           %[[A_BOX_NONE:.*]] = fir.convert %[[A_BOX_LOGICAL]] : (!fir.box<!fir.array<10x!fir.logical<1>>>) -> !fir.box<none>
+// CHECK:           %[[RES:.*]] = fir.call @_FortranAAllLogical1x1_simplified(%[[A_BOX_NONE]]) fastmath<contract> : (!fir.box<none>) -> i1
+// CHECK:         }
+
+// CHECK-LABEL:   func.func private @_FortranAAllLogical1x1_simplified(
+// CHECK-SAME:                                                 %[[ARR:.*]]: !fir.box<none>) -> i1 attributes {llvm.linkage = #llvm.linkage<linkonce_odr>} {
+// CHECK:           %[[INIT_COND:.*]] = arith.constant true
+// CHECK:           %[[C_INDEX0:.*]] = arith.constant 0 : index
+// CHECK:           %[[A_BOX_I8:.*]] = fir.convert %[[ARR]] : (!fir.box<none>) -> !fir.box<!fir.array<?xi8>>
+// CHECK:           %[[TRUE:.*]] = arith.constant true
+// CHECK:           %[[C_INDEX1:.*]] = arith.constant 1 : index
+// CHECK:           %[[DIM_INDEX0:.*]] = arith.constant 0 : index
+// CHECK:           %[[DIMS:.*]]:3 = fir.box_dims %[[A_BOX_I8]], %[[DIM_INDEX0]] : (!fir.box<!fir.array<?xi8>>, index) -> (index, index, index)
+// CHECK:           %[[EXTENT:.*]] = arith.subi %[[DIMS]]#1, %[[C_INDEX1]] : index
+// CHECK:           %[[RES:.*]]:2 = fir.iterate_while (%[[ITER:.*]] = %[[C_INDEX0]] to %[[EXTENT]] step %[[C_INDEX1]]) and (%[[OK:.*]] = %[[INIT_COND]]) iter_args(%[[INIT:.*]] = %[[TRUE]]) -> (i1) {
+// CHECK:             %[[ITEM:.*]] = fir.coordinate_of %[[A_BOX_I8]], %[[ITER]] : (!fir.box<!fir.array<?xi8>>, index) -> !fir.ref<i8>
+// CHECK:             %[[ITEM_VAL:.*]] = fir.load %[[ITEM]] : !fir.ref<i8>
+// CHECK:             %[[I8_0:.*]] = arith.constant 0 : i8
+// CHECK:             %[[CMP_AND_CONTINUE:.*]] = arith.cmpi ne, %[[ITEM_VAL]], %[[I8_0]] : i8
+// CHECK:             fir.result %[[CMP_AND_CONTINUE]], %[[CMP_AND_CONTINUE]] : i1, i1
+// CHECK:           }
+// CHECK:           return %[[RES:.*]]#1 : i1
+// CHECK:         }
+
+
+// -----
+//  Ensure All is not simplified when call ends in 'Dim'
+
+func.func @_QPtestAll_DimArg(%arg0: !fir.ref<!fir.array<10x10x!fir.logical<4>>> {fir.bindc_name = "a"}) -> !fir.array<10x!fir.logical<4>> {
+  %0 = fir.alloca !fir.box<!fir.heap<!fir.array<?x!fir.logical<4>>>>
+  %c10 = arith.constant 10 : index
+  %c10_0 = arith.constant 10 : index
+  %c10_1 = arith.constant 10 : index
+  %1 = fir.alloca !fir.array<10x!fir.logical<4>> {bindc_name = "testAll_DimArg", uniq_name = "_QFtestAll_DimArgEtestAll_DimArg"}
+  %2 = fir.shape %c10_1 : (index) -> !fir.shape<1>
+  %3 = fir.array_load %1(%2) : (!fir.ref<!fir.array<10x!fir.logical<4>>>, !fir.shape<1>) -> !fir.array<10x!fir.logical<4>>
+  %c1_i32 = arith.constant 1 : i32
+  %4 = fir.shape %c10, %c10_0 : (index, index) -> !fir.shape<2>
+  %5 = fir.embox %arg0(%4) : (!fir.ref<!fir.array<10x10x!fir.logical<4>>>, !fir.shape<2>) -> !fir.box<!fir.array<10x10x!fir.logical<4>>>
+  %6 = fir.zero_bits !fir.heap<!fir.array<?x!fir.logical<4>>>
+  %c0 = arith.constant 0 : index
+  %7 = fir.shape %c0 : (index) -> !fir.shape<1>
+  %8 = fir.embox %6(%7) : (!fir.heap<!fir.array<?x!fir.logical<4>>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<?x!fir.logical<4>>>>
+  fir.store %8 to %0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.logical<4>>>>>
+  %9 = fir.address_of(@_QQcl.04ab56883945fd2c21a3b6d132f0bb37) : !fir.ref<!fir.char<1,48>>
+  %c3_i32 = arith.constant 3 : i32
+  %10 = fir.convert %0 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.logical<4>>>>>) -> !fir.ref<!fir.box<none>>
+  %11 = fir.convert %5 : (!fir.box<!fir.array<10x10x!fir.logical<4>>>) -> !fir.box<none>
+  %12 = fir.convert %9 : (!fir.ref<!fir.char<1,48>>) -> !fir.ref<i8>
+  %13 = fir.call @_FortranAAllDim(%10, %11, %c1_i32, %12, %c3_i32) fastmath<contract> : (!fir.ref<!fir.box<none>>, !fir.box<none>, i32, !fir.ref<i8>, i32) -> none
+  %14 = fir.load %0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.logical<4>>>>>
+  %c0_2 = arith.constant 0 : index
+  %15:3 = fir.box_dims %14, %c0_2 : (!fir.box<!fir.heap<!fir.array<?x!fir.logical<4>>>>, index) -> (index, index, index)
+  %16 = fir.box_addr %14 : (!fir.box<!fir.heap<!fir.array<?x!fir.logical<4>>>>) -> !fir.heap<!fir.array<?x!fir.logical<4>>>
+  %17 = fir.shape_shift %15#0, %15#1 : (index, index) -> !fir.shapeshift<1>
+  %18 = fir.array_load %16(%17) : (!fir.heap<!fir.array<?x!fir.logical<4>>>, !fir.shapeshift<1>) -> !fir.array<?x!fir.logical<4>>
+  %c1 = arith.constant 1 : index
+  %c0_3 = arith.constant 0 : index
+  %19 = arith.subi %c10_1, %c1 : index
+  %20 = fir.do_loop %arg1 = %c0_3 to %19 step %c1 unordered iter_args(%arg2 = %3) -> (!fir.array<10x!fir.logical<4>>) {
+    %22 = fir.array_fetch %18, %arg1 : (!fir.array<?x!fir.logical<4>>, index) -> !fir.logical<4>
+    %23 = fir.array_update %arg2, %22, %arg1 : (!fir.array<10x!fir.logical<4>>, !fir.logical<4>, index) -> !fir.array<10x!fir.logical<4>>
+    fir.result %23 : !fir.array<10x!fir.logical<4>>
+  }
+  fir.array_merge_store %3, %20 to %1 : !fir.array<10x!fir.logical<4>>, !fir.array<10x!fir.logical<4>>, !fir.ref<!fir.array<10x!fir.logical<4>>>
+  fir.freemem %16 : !fir.heap<!fir.array<?x!fir.logical<4>>>
+  %21 = fir.load %1 : !fir.ref<!fir.array<10x!fir.logical<4>>>
+  return %21 : !fir.array<10x!fir.logical<4>>
+}
+func.func private @_FortranAAllDim(!fir.ref<!fir.box<none>>, !fir.box<none>, i32, !fir.ref<i8>, i32) -> none attributes {fir.runtime}
+
+// CHECK-LABEL:   func.func @_QPtestAll_DimArg(
+// CHECK-SAME:                          %[[ARR:.*]]: !fir.ref<!fir.array<10x10x!fir.logical<4>>> {fir.bindc_name = "a"}) -> !fir.array<10x!fir.logical<4>> {
+// CHECK-NOT        fir.call @_FortranAAllDimLogical4x1_simplified({{.*}})
+// CHECK:           fir.call @_FortranAAllDim({{.*}}) fastmath<contract> : (!fir.ref<!fir.box<none>>, !fir.box<none>, i32, !fir.ref<i8>, i32) -> none
+// CHECK-NOT        fir.call @_FortranAAllDimLogical4x1_simplified({{.*}})