[flang-commits] [flang] 59b18b5 - [flang] Avoid unnecessary temporaries in ArrayValueCopy.

Mon Jan 23 12:33:54 PST 2023

Author: Slava Zakharin
Date: 2023-01-23T12:33:37-08:00
New Revision: 59b18b5c91ec655189c73a123cb5204062601671

URL: https://github.com/llvm/llvm-project/commit/59b18b5c91ec655189c73a123cb5204062601671
DIFF: https://github.com/llvm/llvm-project/commit/59b18b5c91ec655189c73a123cb5204062601671.diff

LOG: [flang] Avoid unnecessary temporaries in ArrayValueCopy.

Assume no conflict between pointer arrays and arrays without the target
attribute, if the fact of an array not having the target attribute
can be reliably computed.

This change speeds up SPEC CPU2017/527.cam from 2.5k seconds to 880 seconds
on Icelake, and makes further performance investigation easier.

Differential Revision: https://reviews.llvm.org/D142273

Added: 
    flang/test/Fir/array-value-copy-cam4.fir

Modified: 
    flang/include/flang/Optimizer/Dialect/FIROpsSupport.h
    flang/include/flang/Optimizer/Transforms/Passes.h
    flang/include/flang/Optimizer/Transforms/Passes.td
    flang/include/flang/Tools/CLOptions.inc
    flang/lib/Optimizer/Dialect/FIROps.cpp
    flang/lib/Optimizer/Transforms/ArrayValueCopy.cpp
    flang/test/Fir/array-copies-pointers.fir
    flang/test/Fir/array-modify.fir
    flang/test/Fir/array-value-copy-2.fir
    flang/test/Fir/array-value-copy-3.fir
    flang/test/Fir/array-value-copy-4.fir

Removed: 
    


################################################################################
diff  --git a/flang/include/flang/Optimizer/Dialect/FIROpsSupport.h b/flang/include/flang/Optimizer/Dialect/FIROpsSupport.h
index 17cc99491cb6b..f67ca9cfcba1e 100644

--- a/flang/include/flang/Optimizer/Dialect/FIROpsSupport.h
+++ b/flang/include/flang/Optimizer/Dialect/FIROpsSupport.h
@@ -106,7 +106,7 @@ inline bool isInternalPorcedure(mlir::func::FuncOp func) {
 
 /// Tell if \p value is:
 ///   - a function argument that has attribute \p attributeName
-///   - or, the result of fir.alloca/fir.allocamem op that has attribute \p
+///   - or, the result of fir.alloca/fir.allocmem op that has attribute \p
 ///     attributeName
 ///   - or, the result of a fir.address_of of a fir.global that has attribute \p
 ///     attributeName
@@ -114,6 +114,19 @@ inline bool isInternalPorcedure(mlir::func::FuncOp func) {
 ///     previous cases.
 bool valueHasFirAttribute(mlir::Value value, llvm::StringRef attributeName);
 
+/// A more conservative version of valueHasFirAttribute().
+/// If `value` is one of the operation/function-argument cases listed
+/// for valueHasFirAttribute():
+///   * if any of the `attributeNames` attributes is set, then the function
+///     will return true.
+///   * otherwise, it will return false.
+///
+/// Otherwise, the function will return true indicating that the attributes
+/// may actually be set but we were not able to reach the point of definition
+/// to confirm that.
+bool valueMayHaveFirAttributes(mlir::Value value,
+                               llvm::ArrayRef<llvm::StringRef> attributeNames);
+
 /// Scan the arguments of a FuncOp to determine if any arguments have the
 /// attribute `attr` placed on them. This can be used to determine if the
 /// function has any host associations, for example.

diff  --git a/flang/include/flang/Optimizer/Transforms/Passes.h b/flang/include/flang/Optimizer/Transforms/Passes.h
index dee514bc444ac..54a5d958d7315 100644
--- a/flang/include/flang/Optimizer/Transforms/Passes.h
+++ b/flang/include/flang/Optimizer/Transforms/Passes.h
@@ -47,7 +47,8 @@ namespace fir {
 std::unique_ptr<mlir::Pass> createAbstractResultOnFuncOptPass();
 std::unique_ptr<mlir::Pass> createAbstractResultOnGlobalOptPass();
 std::unique_ptr<mlir::Pass> createAffineDemotionPass();
-std::unique_ptr<mlir::Pass> createArrayValueCopyPass();
+std::unique_ptr<mlir::Pass>
+createArrayValueCopyPass(fir::ArrayValueCopyOptions options = {});
 std::unique_ptr<mlir::Pass> createFirToCfgPass();
 std::unique_ptr<mlir::Pass> createCharacterConversionPass();
 std::unique_ptr<mlir::Pass> createExternalNameConversionPass();

diff  --git a/flang/include/flang/Optimizer/Transforms/Passes.td b/flang/include/flang/Optimizer/Transforms/Passes.td
index 6aec8b330573c..85a412ab046d6 100644
--- a/flang/include/flang/Optimizer/Transforms/Passes.td
+++ b/flang/include/flang/Optimizer/Transforms/Passes.td
@@ -117,6 +117,12 @@ def ArrayValueCopy : Pass<"array-value-copy", "::mlir::func::FuncOp"> {
   }];
   let constructor = "::fir::createArrayValueCopyPass()";
   let dependentDialects = [ "fir::FIROpsDialect" ];
+  let options = [
+    Option<"optimizeConflicts", "optimize-conflicts", "bool",
+           /*default=*/"false",
+           "do more detailed conflict analysis to reduce the number "
+           "of temporaries">
+  ];
 }
 
 def CharacterConversion : Pass<"character-conversion"> {

diff  --git a/flang/include/flang/Tools/CLOptions.inc b/flang/include/flang/Tools/CLOptions.inc
index 3857ea3e7285b..f499c11f54777 100644
--- a/flang/include/flang/Tools/CLOptions.inc
+++ b/flang/include/flang/Tools/CLOptions.inc
@@ -104,9 +104,12 @@ inline void addCfgConversionPass(mlir::PassManager &pm) {
       pm, disableCfgConversion, fir::createFirToCfgPass);
 }
 
-inline void addAVC(mlir::PassManager &pm) {
+inline void addAVC(
+    mlir::PassManager &pm, const llvm::OptimizationLevel &optLevel) {
+  ArrayValueCopyOptions options;
+  options.optimizeConflicts = optLevel.isOptimizingForSpeed();
   addNestedPassConditionally<mlir::func::FuncOp>(
-      pm, disableFirAvc, fir::createArrayValueCopyPass);
+      pm, disableFirAvc, [&]() { return createArrayValueCopyPass(options); });
 }
 
 inline void addMemoryAllocationOpt(mlir::PassManager &pm) {
@@ -169,7 +172,7 @@ inline void createDefaultFIROptimizerPassPipeline(
   mlir::GreedyRewriteConfig config;
   config.enableRegionSimplification = false;
   pm.addPass(mlir::createCSEPass());
-  fir::addAVC(pm);
+  fir::addAVC(pm, optLevel);
   pm.addNestedPass<mlir::func::FuncOp>(fir::createCharacterConversionPass());
   pm.addPass(mlir::createCanonicalizerPass(config));
   pm.addPass(fir::createSimplifyRegionLitePass());

diff  --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp
index dcedff6aae1cb..023e49f39ddfb 100644
--- a/flang/lib/Optimizer/Dialect/FIROps.cpp
+++ b/flang/lib/Optimizer/Dialect/FIROps.cpp
@@ -3540,8 +3540,48 @@ bool fir::hasHostAssociationArgument(mlir::func::FuncOp func) {
   return false;
 }
 
-bool fir::valueHasFirAttribute(mlir::Value value,
-                               llvm::StringRef attributeName) {
+// Test if value's definition has the specified set of
+// attributeNames. The value's definition is one of the operations
+// that are able to carry the Fortran variable attributes, e.g.
+// fir.alloca or fir.allocmem. Function arguments may also represent
+// value definitions and carry relevant attributes.
+//
+// If it is not possible to reach the limited set of definition
+// entities from the given value, then the function will return
+// std::nullopt. Otherwise, the definition is known and the return
+// value is computed as:
+//   * if checkAny is true, then the function will return true
+//     iff any of the attributeNames attributes is set on the definition.
+//   * if checkAny is false, then the function will return true
+//     iff all of the attributeNames attributes are set on the definition.
+static std::optional<bool>
+valueCheckFirAttributes(mlir::Value value,
+                        llvm::ArrayRef<llvm::StringRef> attributeNames,
+                        bool checkAny) {
+  auto testAttributeSets = [&](llvm::ArrayRef<mlir::NamedAttribute> setAttrs,
+                               llvm::ArrayRef<llvm::StringRef> checkAttrs) {
+    if (checkAny) {
+      // Return true iff any of checkAttrs attributes is present
+      // in setAttrs set.
+      for (llvm::StringRef checkAttrName : checkAttrs)
+        if (llvm::any_of(setAttrs, [&](mlir::NamedAttribute setAttr) {
+              return setAttr.getName() == checkAttrName;
+            }))
+          return true;
+
+      return false;
+    }
+
+    // Return true iff all attributes from checkAttrs are present
+    // in setAttrs set.
+    for (mlir::StringRef checkAttrName : checkAttrs)
+      if (llvm::none_of(setAttrs, [&](mlir::NamedAttribute setAttr) {
+            return setAttr.getName() == checkAttrName;
+          }))
+        return false;
+
+    return true;
+  };
   // If this is a fir.box that was loaded, the fir attributes will be on the
   // related fir.ref<fir.box> creation.
   if (value.getType().isa<fir::BoxType>())
@@ -3553,32 +3593,50 @@ bool fir::valueHasFirAttribute(mlir::Value value,
     if (blockArg.getOwner() && blockArg.getOwner()->isEntryBlock())
       if (auto funcOp = mlir::dyn_cast<mlir::func::FuncOp>(
               blockArg.getOwner()->getParentOp()))
-        if (funcOp.getArgAttr(blockArg.getArgNumber(), attributeName))
-          return true;
-    return false;
+        return testAttributeSets(
+            mlir::cast<mlir::FunctionOpInterface>(*funcOp).getArgAttrs(
+                blockArg.getArgNumber()),
+            attributeNames);
+
+    // If it is not a function argument, the attributes are unknown.
+    return std::nullopt;
   }
 
   if (auto definingOp = value.getDefiningOp()) {
     // If this is an allocated value, look at the allocation attributes.
     if (mlir::isa<fir::AllocMemOp>(definingOp) ||
-        mlir::isa<AllocaOp>(definingOp))
-      return definingOp->hasAttr(attributeName);
+        mlir::isa<fir::AllocaOp>(definingOp))
+      return testAttributeSets(definingOp->getAttrs(), attributeNames);
     // If this is an imported global, look at AddrOfOp and GlobalOp attributes.
     // Both operations are looked at because use/host associated variable (the
     // AddrOfOp) can have ASYNCHRONOUS/VOLATILE attributes even if the ultimate
     // entity (the globalOp) does not have them.
     if (auto addressOfOp = mlir::dyn_cast<fir::AddrOfOp>(definingOp)) {
-      if (addressOfOp->hasAttr(attributeName))
+      if (testAttributeSets(addressOfOp->getAttrs(), attributeNames))
         return true;
       if (auto module = definingOp->getParentOfType<mlir::ModuleOp>())
         if (auto globalOp =
                 module.lookupSymbol<fir::GlobalOp>(addressOfOp.getSymbol()))
-          return globalOp->hasAttr(attributeName);
+          return testAttributeSets(globalOp->getAttrs(), attributeNames);
     }
   }
   // TODO: Construct associated entities attributes. Decide where the fir
   // attributes must be placed/looked for in this case.
-  return false;
+  return std::nullopt;
+}
+
+bool fir::valueMayHaveFirAttributes(
+    mlir::Value value, llvm::ArrayRef<llvm::StringRef> attributeNames) {
+  std::optional<bool> mayHaveAttr =
+      valueCheckFirAttributes(value, attributeNames, /*checkAny=*/true);
+  return mayHaveAttr.value_or(true);
+}
+
+bool fir::valueHasFirAttribute(mlir::Value value,
+                               llvm::StringRef attributeName) {
+  std::optional<bool> mayHaveAttr =
+      valueCheckFirAttributes(value, {attributeName}, /*checkAny=*/false);
+  return mayHaveAttr.value_or(false);
 }
 
 bool fir::anyFuncArgsHaveAttr(mlir::func::FuncOp func, llvm::StringRef attr) {

diff  --git a/flang/lib/Optimizer/Transforms/ArrayValueCopy.cpp b/flang/lib/Optimizer/Transforms/ArrayValueCopy.cpp
index 3ae9db6985750..c0563588bfc30 100644
--- a/flang/lib/Optimizer/Transforms/ArrayValueCopy.cpp
+++ b/flang/lib/Optimizer/Transforms/ArrayValueCopy.cpp
@@ -65,16 +65,18 @@ namespace {
 ///
 /// If none of the array values overlap in storage and the accesses are not
 /// loop-carried, then the arrays are conflict-free and no copies are required.
-class ArrayCopyAnalysis {
+class ArrayCopyAnalysisBase {
 public:
-  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(ArrayCopyAnalysis)
-
   using ConflictSetT = llvm::SmallPtrSet<mlir::Operation *, 16>;
   using UseSetT = llvm::SmallPtrSet<mlir::OpOperand *, 8>;
   using LoadMapSetsT = llvm::DenseMap<mlir::Operation *, UseSetT>;
   using AmendAccessSetT = llvm::SmallPtrSet<mlir::Operation *, 4>;
 
-  ArrayCopyAnalysis(mlir::Operation *op) : operation{op} { construct(op); }
+  ArrayCopyAnalysisBase(mlir::Operation *op, bool optimized)
+      : operation{op}, optimizeConflicts(optimized) {
+    construct(op);
+  }
+  virtual ~ArrayCopyAnalysisBase() = default;
 
   mlir::Operation *getOperation() const { return operation; }
 
@@ -117,6 +119,27 @@ class ArrayCopyAnalysis {
   LoadMapSetsT loadMapSets;
   // Set of array_access ops associated with array_amend ops.
   AmendAccessSetT amendAccesses;
+  bool optimizeConflicts;
+};
+
+// Optimized array copy analysis that takes into account Fortran
+// variable attributes to prove that no conflict is possible
+// and reduce the number of temporary arrays.
+class ArrayCopyAnalysisOptimized : public ArrayCopyAnalysisBase {
+public:
+  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(ArrayCopyAnalysisOptimized)
+
+  ArrayCopyAnalysisOptimized(mlir::Operation *op)
+      : ArrayCopyAnalysisBase(op, /*optimized=*/true) {}
+};
+
+// Unoptimized array copy analysis used at O0.
+class ArrayCopyAnalysis : public ArrayCopyAnalysisBase {
+public:
+  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(ArrayCopyAnalysis)
+
+  ArrayCopyAnalysis(mlir::Operation *op)
+      : ArrayCopyAnalysisBase(op, /*optimized=*/false) {}
 };
 } // namespace
 
@@ -332,7 +355,7 @@ class ReachCollector {
 
 /// Find all the array operations that access the array value that is loaded by
 /// the array load operation, `load`.
-void ArrayCopyAnalysis::arrayMentions(
+void ArrayCopyAnalysisBase::arrayMentions(
     llvm::SmallVectorImpl<mlir::Operation *> &mentions, ArrayLoadOp load) {
   mentions.clear();
   auto lmIter = loadMapSets.find(load);
@@ -541,8 +564,10 @@ static bool mutuallyExclusiveSliceRange(ArrayLoadOp ld, ArrayMergeStoreOp st) {
 /// Is there a conflict between the array value that was updated and to be
 /// stored to `st` and the set of arrays loaded (`reach`) and used to compute
 /// the updated value?
+/// If `optimize` is true, use the variable attributes to prove that
+/// there is no conflict.
 static bool conflictOnLoad(llvm::ArrayRef<mlir::Operation *> reach,
-                           ArrayMergeStoreOp st) {
+                           ArrayMergeStoreOp st, bool optimize) {
   mlir::Value load;
   mlir::Value addr = st.getMemref();
   const bool storeHasPointerType = hasPointerType(addr.getType());
@@ -560,18 +585,30 @@ static bool conflictOnLoad(llvm::ArrayRef<mlir::Operation *> reach,
           return true;
         }
         load = ld;
-      } else if ((hasPointerType(ldTy) || storeHasPointerType)) {
-        // TODO: Use target attribute to restrict this case further.
-        // TODO: Check if types can also allow ruling out some cases. For now,
-        // the fact that equivalences is using pointer attribute to enforce
-        // aliasing is preventing any attempt to do so, and in general, it may
-        // be wrong to use this if any of the types is a complex or a derived
-        // for which it is possible to create a pointer to a part with a
-        // 
diff erent type than the whole, although this deserve some more
-        // investigation because existing compiler behavior seem to diverge
-        // here.
+      } else if (storeHasPointerType) {
+        if (optimize && !hasPointerType(ldTy) &&
+            !valueMayHaveFirAttributes(
+                ld.getMemref(),
+                {getTargetAttrName(), GlobalOp::getTargetAttrNameStr()}))
+          continue;
+
+        return true;
+      } else if (hasPointerType(ldTy)) {
+        if (optimize && !storeHasPointerType &&
+            !valueMayHaveFirAttributes(
+                addr, {getTargetAttrName(), GlobalOp::getTargetAttrNameStr()}))
+          continue;
+
         return true;
       }
+      // TODO: Check if types can also allow ruling out some cases. For now,
+      // the fact that equivalences is using pointer attribute to enforce
+      // aliasing is preventing any attempt to do so, and in general, it may
+      // be wrong to use this if any of the types is a complex or a derived
+      // for which it is possible to create a pointer to a part with a
+      // 
diff erent type than the whole, although this deserve some more
+      // investigation because existing compiler behavior seem to diverge
+      // here.
     }
   return false;
 }
@@ -674,8 +711,8 @@ amendingAccess(llvm::ArrayRef<mlir::Operation *> mentions) {
 // Are any conflicts present? The conflicts detected here are described above.
 static bool conflictDetected(llvm::ArrayRef<mlir::Operation *> reach,
                              llvm::ArrayRef<mlir::Operation *> mentions,
-                             ArrayMergeStoreOp st) {
-  return conflictOnLoad(reach, st) || conflictOnMerge(mentions);
+                             ArrayMergeStoreOp st, bool optimize) {
+  return conflictOnLoad(reach, st, optimize) || conflictOnMerge(mentions);
 }
 
 // Assume that any call to a function that uses host-associations will be
@@ -696,7 +733,7 @@ conservativeCallConflict(llvm::ArrayRef<mlir::Operation *> reaches) {
 
 /// Constructor of the array copy analysis.
 /// This performs the analysis and saves the intermediate results.
-void ArrayCopyAnalysis::construct(mlir::Operation *topLevelOp) {
+void ArrayCopyAnalysisBase::construct(mlir::Operation *topLevelOp) {
   topLevelOp->walk([&](Operation *op) {
     if (auto st = mlir::dyn_cast<fir::ArrayMergeStoreOp>(op)) {
       llvm::SmallVector<mlir::Operation *> values;
@@ -705,7 +742,7 @@ void ArrayCopyAnalysis::construct(mlir::Operation *topLevelOp) {
       llvm::SmallVector<mlir::Operation *> mentions;
       arrayMentions(mentions,
                     mlir::cast<ArrayLoadOp>(st.getOriginal().getDefiningOp()));
-      bool conflict = conflictDetected(values, mentions, st);
+      bool conflict = conflictDetected(values, mentions, st, optimizeConflicts);
       bool refConflict = conflictOnReference(mentions);
       if (callConflict || conflict || refConflict) {
         LLVM_DEBUG(llvm::dbgs()
@@ -1086,7 +1123,7 @@ class ArrayUpdateConversionBase : public mlir::OpRewritePattern<ArrayOp> {
 public:
   // TODO: Implement copy/swap semantics?
   explicit ArrayUpdateConversionBase(mlir::MLIRContext *ctx,
-                                     const ArrayCopyAnalysis &a,
+                                     const ArrayCopyAnalysisBase &a,
                                      const OperationUseMapT &m)
       : mlir::OpRewritePattern<ArrayOp>{ctx}, analysis{a}, useMap{m} {}
 
@@ -1192,14 +1229,14 @@ class ArrayUpdateConversionBase : public mlir::OpRewritePattern<ArrayOp> {
   }
 
 protected:
-  const ArrayCopyAnalysis &analysis;
+  const ArrayCopyAnalysisBase &analysis;
   const OperationUseMapT &useMap;
 };
 
 class ArrayUpdateConversion : public ArrayUpdateConversionBase<ArrayUpdateOp> {
 public:
   explicit ArrayUpdateConversion(mlir::MLIRContext *ctx,
-                                 const ArrayCopyAnalysis &a,
+                                 const ArrayCopyAnalysisBase &a,
                                  const OperationUseMapT &m)
       : ArrayUpdateConversionBase{ctx, a, m} {}
 
@@ -1227,7 +1264,7 @@ class ArrayUpdateConversion : public ArrayUpdateConversionBase<ArrayUpdateOp> {
 class ArrayModifyConversion : public ArrayUpdateConversionBase<ArrayModifyOp> {
 public:
   explicit ArrayModifyConversion(mlir::MLIRContext *ctx,
-                                 const ArrayCopyAnalysis &a,
+                                 const ArrayCopyAnalysisBase &a,
                                  const OperationUseMapT &m)
       : ArrayUpdateConversionBase{ctx, a, m} {}
 
@@ -1280,7 +1317,7 @@ class ArrayFetchConversion : public mlir::OpRewritePattern<ArrayFetchOp> {
 class ArrayAccessConversion : public ArrayUpdateConversionBase<ArrayAccessOp> {
 public:
   explicit ArrayAccessConversion(mlir::MLIRContext *ctx,
-                                 const ArrayCopyAnalysis &a,
+                                 const ArrayCopyAnalysisBase &a,
                                  const OperationUseMapT &m)
       : ArrayUpdateConversionBase{ctx, a, m} {}
 
@@ -1332,6 +1369,10 @@ class ArrayAmendConversion : public mlir::OpRewritePattern<ArrayAmendOp> {
 class ArrayValueCopyConverter
     : public fir::impl::ArrayValueCopyBase<ArrayValueCopyConverter> {
 public:
+  ArrayValueCopyConverter() = default;
+  ArrayValueCopyConverter(const fir::ArrayValueCopyOptions &options)
+      : Base(options) {}
+
   void runOnOperation() override {
     auto func = getOperation();
     LLVM_DEBUG(llvm::dbgs() << "\n\narray-value-copy pass on function '"
@@ -1339,14 +1380,19 @@ class ArrayValueCopyConverter
     auto *context = &getContext();
 
     // Perform the conflict analysis.
-    const auto &analysis = getAnalysis<ArrayCopyAnalysis>();
-    const auto &useMap = analysis.getUseMap();
+    const ArrayCopyAnalysisBase *analysis;
+    if (optimizeConflicts)
+      analysis = &getAnalysis<ArrayCopyAnalysisOptimized>();
+    else
+      analysis = &getAnalysis<ArrayCopyAnalysis>();
+
+    const auto &useMap = analysis->getUseMap();
 
     mlir::RewritePatternSet patterns1(context);
     patterns1.insert<ArrayFetchConversion>(context, useMap);
-    patterns1.insert<ArrayUpdateConversion>(context, analysis, useMap);
-    patterns1.insert<ArrayModifyConversion>(context, analysis, useMap);
-    patterns1.insert<ArrayAccessConversion>(context, analysis, useMap);
+    patterns1.insert<ArrayUpdateConversion>(context, *analysis, useMap);
+    patterns1.insert<ArrayModifyConversion>(context, *analysis, useMap);
+    patterns1.insert<ArrayAccessConversion>(context, *analysis, useMap);
     patterns1.insert<ArrayAmendConversion>(context);
     mlir::ConversionTarget target(*context);
     target
@@ -1376,6 +1422,7 @@ class ArrayValueCopyConverter
 };
 } // namespace
 
-std::unique_ptr<mlir::Pass> fir::createArrayValueCopyPass() {
-  return std::make_unique<ArrayValueCopyConverter>();
+std::unique_ptr<mlir::Pass>
+fir::createArrayValueCopyPass(fir::ArrayValueCopyOptions options) {
+  return std::make_unique<ArrayValueCopyConverter>(options);
 }

diff  --git a/flang/test/Fir/array-copies-pointers.fir b/flang/test/Fir/array-copies-pointers.fir
index 3dc42dcbc91b2..490c1c1055e18 100644
--- a/flang/test/Fir/array-copies-pointers.fir
+++ b/flang/test/Fir/array-copies-pointers.fir
@@ -1,17 +1,45 @@
 // Test array-copy-value pass (copy elision) with array assignment
 // involving Fortran pointers. Focus in only on wether copy ellision
 // is made or not.
-// RUN: fir-opt %s --array-value-copy -split-input-file | FileCheck %s
+// RUN: fir-opt %s --array-value-copy -split-input-file | FileCheck --check-prefixes=ALL,NOOPT %s
+// RUN: fir-opt %s --array-value-copy="optimize-conflicts=true" -split-input-file | FileCheck --check-prefixes=ALL,OPT %s
 
 // Test `pointer(:) = array(:)`
-// TODO: array should have target attribute.
-// CHECK-LABEL: func @maybe_overlap
-// CHECK: %[[ALLOC:.*]] = fir.allocmem !fir.array<100xf32>
-// CHECK: fir.do_loop
-// CHECK: fir.do_loop
-// CHECK: fir.do_loop
-// CHECK: fir.freemem %[[ALLOC]] : !fir.heap<!fir.array<100xf32>>
-func.func @maybe_overlap(%arg0: !fir.ptr<!fir.array<100xf32>>, %arg1: !fir.ref<!fir.array<100xf32>>) {
+// ALL-LABEL: func @maybe_overlap
+// ALL: %[[ALLOC:.*]] = fir.allocmem !fir.array<100xf32>
+// ALL: fir.do_loop
+// ALL: fir.do_loop
+// ALL: fir.do_loop
+// ALL: fir.freemem %[[ALLOC]] : !fir.heap<!fir.array<100xf32>>
+func.func @maybe_overlap(%arg0: !fir.ptr<!fir.array<100xf32>>, %arg1 : !fir.ref<!fir.array<100xf32>> {fir.target}) {
+  %c100 = arith.constant 100 : index
+  %c99 = arith.constant 99 : index
+  %c1 = arith.constant 1 : index
+  %c0 = arith.constant 0 : index
+  %0 = fir.alloca f32
+  %1 = fir.shape %c100 : (index) -> !fir.shape<1>
+  %2 = fir.array_load %arg0(%1) : (!fir.ptr<!fir.array<100xf32>>, !fir.shape<1>) -> !fir.array<100xf32>
+  %3 = fir.array_load %arg1(%1) : (!fir.ref<!fir.array<100xf32>>, !fir.shape<1>) -> !fir.array<100xf32>
+  %4 = fir.do_loop %arg2 = %c0 to %c99 step %c1 unordered iter_args(%arg3 = %2) -> (!fir.array<100xf32>) {
+    %5 = fir.array_fetch %3, %arg2 : (!fir.array<100xf32>, index) -> f32
+    %6 = fir.array_update %arg3, %5, %arg2 : (!fir.array<100xf32>, f32, index) -> !fir.array<100xf32>
+    fir.result %6 : !fir.array<100xf32>
+  }
+  fir.array_merge_store %2, %4 to %arg0 : !fir.array<100xf32>, !fir.array<100xf32>, !fir.ptr<!fir.array<100xf32>>
+  return
+}
+
+// -----
+
+// Test `pointer(:) = array(:)`
+// ALL-LABEL: func @no_overlap1
+// OPT-NOT: fir.allocmem
+// NOOPT: %[[ALLOC:.*]] = fir.allocmem !fir.array<100xf32>
+// NOOPT: fir.do_loop
+// NOOPT: fir.do_loop
+// NOOPT: fir.do_loop
+// NOOPT: fir.freemem %[[ALLOC]] : !fir.heap<!fir.array<100xf32>>
+func.func @no_overlap1(%arg0: !fir.ptr<!fir.array<100xf32>>, %arg1 : !fir.ref<!fir.array<100xf32>>) {
   %c100 = arith.constant 100 : index
   %c99 = arith.constant 99 : index
   %c1 = arith.constant 1 : index
@@ -32,12 +60,12 @@ func.func @maybe_overlap(%arg0: !fir.ptr<!fir.array<100xf32>>, %arg1: !fir.ref<!
 // -----
 
 // Test `pointer(:) = pointer(:)`
-// CHECK-LABEL: func @no_overlap
-// CHECK-NOT: fir.allocmem
-// CHECK:     fir.do_loop
-// CHECK:       fir.array_coor
-// CHECK:       fir.array_coor
-// CHECK:       fir.store
+// ALL-LABEL: func @no_overlap
+// ALL-NOT: fir.allocmem
+// ALL:     fir.do_loop
+// ALL:       fir.array_coor
+// ALL:       fir.array_coor
+// ALL:       fir.store
 func.func @no_overlap(%arg0: !fir.ptr<!fir.array<100xf32>>, %arg1: !fir.ref<!fir.array<100xf32>>) {
   %c100 = arith.constant 100 : index
   %c99 = arith.constant 99 : index
@@ -58,14 +86,41 @@ func.func @no_overlap(%arg0: !fir.ptr<!fir.array<100xf32>>, %arg1: !fir.ref<!fir
 // -----
 
 // Test `array(:) = pointer(:)`
-// TODO: array should have target attribute.
-// CHECK-LABEL: func @maybe_overlap_2
-// CHECK: %[[ALLOC:.*]] = fir.allocmem !fir.array<100xf32>
-// CHECK: fir.do_loop
-// CHECK: fir.do_loop
-// CHECK: fir.do_loop
-// CHECK: fir.freemem %[[ALLOC]] : !fir.heap<!fir.array<100xf32>>
-func.func @maybe_overlap_2(%arg0: !fir.ptr<!fir.array<100xf32>>, %arg1: !fir.ref<!fir.array<100xf32>>) {
+// ALL-LABEL: func @maybe_overlap_2
+// ALL: %[[ALLOC:.*]] = fir.allocmem !fir.array<100xf32>
+// ALL: fir.do_loop
+// ALL: fir.do_loop
+// ALL: fir.do_loop
+// ALL: fir.freemem %[[ALLOC]] : !fir.heap<!fir.array<100xf32>>
+func.func @maybe_overlap_2(%arg0: !fir.ptr<!fir.array<100xf32>>, %arg1: !fir.ref<!fir.array<100xf32>> {fir.target}) {
+  %c100 = arith.constant 100 : index
+  %c99 = arith.constant 99 : index
+  %c1 = arith.constant 1 : index
+  %c0 = arith.constant 0 : index
+  %0 = fir.alloca f32
+  %1 = fir.shape %c100 : (index) -> !fir.shape<1>
+  %2 = fir.array_load %arg0(%1) : (!fir.ptr<!fir.array<100xf32>>, !fir.shape<1>) -> !fir.array<100xf32>
+  %3 = fir.array_load %arg1(%1) : (!fir.ref<!fir.array<100xf32>>, !fir.shape<1>) -> !fir.array<100xf32>
+  %4 = fir.do_loop %arg2 = %c0 to %c99 step %c1 unordered iter_args(%arg3 = %3) -> (!fir.array<100xf32>) {
+    %5 = fir.array_fetch %2, %arg2 : (!fir.array<100xf32>, index) -> f32
+    %6 = fir.array_update %arg3, %5, %arg2 : (!fir.array<100xf32>, f32, index) -> !fir.array<100xf32>
+    fir.result %6 : !fir.array<100xf32>
+  }
+  fir.array_merge_store %3, %4 to %arg1 : !fir.array<100xf32>, !fir.array<100xf32>, !fir.ref<!fir.array<100xf32>>
+  return
+}
+
+// -----
+
+// Test `array(:) = pointer(:)`
+// ALL-LABEL: func @no_overlap_2
+// OPT-NOT: fir.allocmem
+// NOOPT: %[[ALLOC:.*]] = fir.allocmem !fir.array<100xf32>
+// NOOPT: fir.do_loop
+// NOOPT: fir.do_loop
+// NOOPT: fir.do_loop
+// NOOPT: fir.freemem %[[ALLOC]] : !fir.heap<!fir.array<100xf32>>
+func.func @no_overlap_2(%arg0: !fir.ptr<!fir.array<100xf32>>, %arg1: !fir.ref<!fir.array<100xf32>>) {
   %c100 = arith.constant 100 : index
   %c99 = arith.constant 99 : index
   %c1 = arith.constant 1 : index
@@ -86,12 +141,12 @@ func.func @maybe_overlap_2(%arg0: !fir.ptr<!fir.array<100xf32>>, %arg1: !fir.ref
 // -----
 
 // Test `pointer1(:) = pointer2(:)`
-// CHECK-LABEL: func @maybe_overlap_3
-// CHECK: %[[ALLOC:.*]] = fir.allocmem !fir.array<100xf32>
-// CHECK: fir.do_loop
-// CHECK: fir.do_loop
-// CHECK: fir.do_loop
-// CHECK: fir.freemem %[[ALLOC]] : !fir.heap<!fir.array<100xf32>>
+// ALL-LABEL: func @maybe_overlap_3
+// ALL: %[[ALLOC:.*]] = fir.allocmem !fir.array<100xf32>
+// ALL: fir.do_loop
+// ALL: fir.do_loop
+// ALL: fir.do_loop
+// ALL: fir.freemem %[[ALLOC]] : !fir.heap<!fir.array<100xf32>>
 func.func @maybe_overlap_3(%arg0: !fir.ptr<!fir.array<100xf32>>, %arg1: !fir.ptr<!fir.array<100xf32>>) {
   %c100 = arith.constant 100 : index
   %c99 = arith.constant 99 : index
@@ -114,10 +169,10 @@ func.func @maybe_overlap_3(%arg0: !fir.ptr<!fir.array<100xf32>>, %arg1: !fir.ptr
 
 // Test derived_target(:)%i = integer_pointer(:)
 // The integer pointer may be aliasing the derived target component.
-// CHECK-LABEL: func @derived_whose_component_may_be_aliased
-// CHECK: %[[ALLOC:.*]] = fir.allocmem !fir.array<4x!fir.type<some_type{i:i32}>>
-// CHECK-COUNT-3: fir.do_loop
-// CHECK: fir.freemem %[[ALLOC]] : !fir.heap<!fir.array<4x!fir.type<some_type{i:i32}>>>
+// ALL-LABEL: func @derived_whose_component_may_be_aliased
+// ALL: %[[ALLOC:.*]] = fir.allocmem !fir.array<4x!fir.type<some_type{i:i32}>>
+// ALL-COUNT-3: fir.do_loop
+// ALL: fir.freemem %[[ALLOC]] : !fir.heap<!fir.array<4x!fir.type<some_type{i:i32}>>>
 func.func @derived_whose_component_may_be_aliased(%arg0: !fir.box<!fir.array<4x!fir.type<some_type{i:i32}>>> {fir.target}, %arg1: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>) {
   %c4 = arith.constant 4 : index
   %0 = fir.field_index i, !fir.type<some_type{i:i32}>
@@ -143,10 +198,10 @@ func.func @derived_whose_component_may_be_aliased(%arg0: !fir.box<!fir.array<4x!
 
 // Test real_target = complex_target(:)%re
 // The real pointer may be aliasing the complex real part.
-// CHECK-LABEL: func @complex_real_aliasing
-// CHECK: %[[ALLOC:.*]] = fir.allocmem !fir.array<?xf32>
-// CHECK-COUNT-3: fir.do_loop
-// CHECK: fir.freemem %[[ALLOC]] : !fir.heap<!fir.array<?xf32>>
+// ALL-LABEL: func @complex_real_aliasing
+// ALL: %[[ALLOC:.*]] = fir.allocmem !fir.array<?xf32>
+// ALL-COUNT-3: fir.do_loop
+// ALL: fir.freemem %[[ALLOC]] : !fir.heap<!fir.array<?xf32>>
 func.func @complex_real_aliasing(%arg0: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, %arg1: !fir.ref<!fir.array<4x!fir.complex<4>>> {fir.target}) {
   %c4 = arith.constant 4 : index
   %0 = fir.load %arg0 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>
@@ -168,3 +223,68 @@ func.func @complex_real_aliasing(%arg0: !fir.ref<!fir.box<!fir.ptr<!fir.array<?x
   fir.array_merge_store %3, %8 to %0 : !fir.array<?xf32>, !fir.array<?xf32>, !fir.box<!fir.ptr<!fir.array<?xf32>>>
   return
 }
+
+// -----
+
+// Test `array(:) = pointer(:)`
+// ALL-LABEL: func @maybe_overlap_3
+// ALL: %[[ALLOC:.*]] = fir.allocmem !fir.array<100xf32>
+// ALL: fir.do_loop
+// ALL: fir.do_loop
+// ALL: fir.do_loop
+// ALL: fir.freemem %[[ALLOC]] : !fir.heap<!fir.array<100xf32>>
+fir.global @_QMdataEglob target : !fir.array<100xf32> {
+  %0 = fir.undefined !fir.array<100xf32>
+  fir.has_value %0 : !fir.array<100xf32>
+}
+
+func.func @maybe_overlap_3(%arg0: !fir.ptr<!fir.array<100xf32>>, %arg1: !fir.ref<!fir.array<100xf32>> {fir.target}) {
+  %c100 = arith.constant 100 : index
+  %c99 = arith.constant 99 : index
+  %c1 = arith.constant 1 : index
+  %c0 = arith.constant 0 : index
+  %0 = fir.address_of(@_QMdataEglob) : !fir.ref<!fir.array<100xf32>>
+  %1 = fir.shape %c100 : (index) -> !fir.shape<1>
+  %2 = fir.array_load %arg0(%1) : (!fir.ptr<!fir.array<100xf32>>, !fir.shape<1>) -> !fir.array<100xf32>
+  %3 = fir.array_load %0(%1) : (!fir.ref<!fir.array<100xf32>>, !fir.shape<1>) -> !fir.array<100xf32>
+  %4 = fir.do_loop %arg2 = %c0 to %c99 step %c1 unordered iter_args(%arg3 = %3) -> (!fir.array<100xf32>) {
+    %5 = fir.array_fetch %2, %arg2 : (!fir.array<100xf32>, index) -> f32
+    %6 = fir.array_update %arg3, %5, %arg2 : (!fir.array<100xf32>, f32, index) -> !fir.array<100xf32>
+    fir.result %6 : !fir.array<100xf32>
+  }
+  fir.array_merge_store %3, %4 to %0 : !fir.array<100xf32>, !fir.array<100xf32>, !fir.ref<!fir.array<100xf32>>
+  return
+}
+
+// -----
+
+// Test `array(:) = pointer(:)`
+// ALL-LABEL: func @no_overlap_3
+// OPT-NOT: fir.allocmem
+// NOOPT: %[[ALLOC:.*]] = fir.allocmem !fir.array<100xf32>
+// NOOPT: fir.do_loop
+// NOOPT: fir.do_loop
+// NOOPT: fir.do_loop
+// NOOPT: fir.freemem %[[ALLOC]] : !fir.heap<!fir.array<100xf32>>
+fir.global @_QMdataEglob : !fir.array<100xf32> {
+  %0 = fir.undefined !fir.array<100xf32>
+  fir.has_value %0 : !fir.array<100xf32>
+}
+
+func.func @no_overlap_3(%arg0: !fir.ptr<!fir.array<100xf32>>, %arg1: !fir.ref<!fir.array<100xf32>> {fir.target}) {
+  %c100 = arith.constant 100 : index
+  %c99 = arith.constant 99 : index
+  %c1 = arith.constant 1 : index
+  %c0 = arith.constant 0 : index
+  %0 = fir.address_of(@_QMdataEglob) : !fir.ref<!fir.array<100xf32>>
+  %1 = fir.shape %c100 : (index) -> !fir.shape<1>
+  %2 = fir.array_load %arg0(%1) : (!fir.ptr<!fir.array<100xf32>>, !fir.shape<1>) -> !fir.array<100xf32>
+  %3 = fir.array_load %0(%1) : (!fir.ref<!fir.array<100xf32>>, !fir.shape<1>) -> !fir.array<100xf32>
+  %4 = fir.do_loop %arg2 = %c0 to %c99 step %c1 unordered iter_args(%arg3 = %3) -> (!fir.array<100xf32>) {
+    %5 = fir.array_fetch %2, %arg2 : (!fir.array<100xf32>, index) -> f32
+    %6 = fir.array_update %arg3, %5, %arg2 : (!fir.array<100xf32>, f32, index) -> !fir.array<100xf32>
+    fir.result %6 : !fir.array<100xf32>
+  }
+  fir.array_merge_store %3, %4 to %0 : !fir.array<100xf32>, !fir.array<100xf32>, !fir.ref<!fir.array<100xf32>>
+  return
+}

diff  --git a/flang/test/Fir/array-modify.fir b/flang/test/Fir/array-modify.fir
index 4aff744c9ab75..09c9516ec57d3 100644
--- a/flang/test/Fir/array-modify.fir
+++ b/flang/test/Fir/array-modify.fir
@@ -1,5 +1,6 @@
 // Test array-copy-value pass (copy elision) with fir.array_modify
 // RUN: fir-opt %s --array-value-copy | FileCheck %s
+// RUN: fir-opt %s --array-value-copy="optimize-conflicts=true" | FileCheck %s
 
 // Test user_defined_assignment(arg0(:), arg1(:))
 func.func @no_overlap(%arg0: !fir.ref<!fir.array<100xf32>>, %arg1: !fir.ref<!fir.array<100xf32>>) {

diff  --git a/flang/test/Fir/array-value-copy-2.fir b/flang/test/Fir/array-value-copy-2.fir
index 5750b41887b23..21b340af10c6b 100644
--- a/flang/test/Fir/array-value-copy-2.fir
+++ b/flang/test/Fir/array-value-copy-2.fir
@@ -1,4 +1,5 @@
 // RUN: fir-opt --array-value-copy --cfg-conversion %s | FileCheck %s
+// RUN: fir-opt --array-value-copy="optimize-conflicts=true" --cfg-conversion %s | FileCheck %s
 
 // CHECK-LABEL: func @_QPslice1(
 // CHECK-NOT: fir.allocmem

diff  --git a/flang/test/Fir/array-value-copy-3.fir b/flang/test/Fir/array-value-copy-3.fir
index 9103aa67c1f0d..2840c3c68d701 100644
--- a/flang/test/Fir/array-value-copy-3.fir
+++ b/flang/test/Fir/array-value-copy-3.fir
@@ -4,6 +4,7 @@
 // that may have been allocated in the end.
 
 // RUN: fir-opt --array-value-copy %s | FileCheck %s
+// RUN: fir-opt --array-value-copy="optimize-conflicts=true" %s | FileCheck %s
 
 
 !t_with_alloc_comp = !fir.type<t{i:!fir.box<!fir.heap<!fir.array<?xi32>>>}>

diff  --git a/flang/test/Fir/array-value-copy-4.fir b/flang/test/Fir/array-value-copy-4.fir
index 762e78491f148..bf9ddc37dd5c2 100644
--- a/flang/test/Fir/array-value-copy-4.fir
+++ b/flang/test/Fir/array-value-copy-4.fir
@@ -3,6 +3,7 @@
 // https://github.com/llvm/llvm-project/issues/59342.
 
 // RUN: fir-opt --array-value-copy %s | FileCheck %s
+// RUN: fir-opt --array-value-copy="optimize-conflicts=true" %s | FileCheck %s
 
 func.func @_QMmodPsub1(%arg0: !fir.box<!fir.array<?x!fir.type<_QMmodTrec1{dat:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>> {fir.bindc_name = "x"}) {
   %0 = fir.alloca !fir.box<!fir.type<_QMmodTrec1{dat:!fir.box<!fir.heap<!fir.array<?xi32>>>}>>

diff  --git a/flang/test/Fir/array-value-copy-cam4.fir b/flang/test/Fir/array-value-copy-cam4.fir
new file mode 100644
index 0000000000000..3b3b0082743ce
--- /dev/null
+++ b/flang/test/Fir/array-value-copy-cam4.fir
@@ -0,0 +1,102 @@
+// RUN: fir-opt --array-value-copy %s | FileCheck --check-prefix=NOOPT %s
+// RUN: fir-opt --array-value-copy="optimize-conflicts=true" %s | FileCheck --check-prefix=OPT %s
+
+// Reproducer from SPEC CPU2017/527.cam4_r:
+// module cam4
+//   type, public :: pbuf_fld
+//      real*8, pointer, dimension(:,:,:,:,:) :: fld_ptr
+//   end type pbuf_fld
+// contains
+//   subroutine test(pbuf, ncol, lchnk, time_index, kvh_idx)
+//     implicit none
+//     interface
+//        subroutine init(kvh)
+//          real*8, intent(out) :: kvh(4,27)
+//        end subroutine init
+//     end interface
+//     type(pbuf_fld), intent(inout), dimension(1000) :: pbuf
+//     real*8 :: kvh(4,27)
+//     integer kvh_idx, ncol, lchnk, time_index
+//     call init(kvh)
+//     pbuf(kvh_idx)%fld_ptr(1,1:ncol,1:27,lchnk,time_index) = kvh(:ncol,:)
+//   end subroutine test
+// end module cam4
+
+// Verify that no memory is allocated for the temporary array,
+// when compiling with optimizations:
+// OPT-NOT: fir.allocmem
+// NOOPT: fir.allocmem
+
+module {
+  func.func @_QMcam4Ptest(%arg0: !fir.ref<!fir.array<1000x!fir.type<_QMcam4Tpbuf_fld{fld_ptr:!fir.box<!fir.ptr<!fir.array<?x?x?x?x?xf64>>>}>>> {fir.bindc_name = "pbuf"}, %arg1: !fir.ref<i32> {fir.bindc_name = "ncol"}, %arg2: !fir.ref<i32> {fir.bindc_name = "lchnk"}, %arg3: !fir.ref<i32> {fir.bindc_name = "time_index"}, %arg4: !fir.ref<i32> {fir.bindc_name = "kvh_idx"}) {
+    %c4 = arith.constant 4 : index
+    %c27 = arith.constant 27 : index
+    %0 = fir.alloca !fir.array<4x27xf64> {bindc_name = "kvh", uniq_name = "_QMcam4FtestEkvh"}
+    fir.call @_QPinit(%0) fastmath<contract> : (!fir.ref<!fir.array<4x27xf64>>) -> ()
+    %1 = fir.load %arg4 : !fir.ref<i32>
+    %2 = fir.convert %1 : (i32) -> i64
+    %c1_i64 = arith.constant 1 : i64
+    %3 = arith.subi %2, %c1_i64 : i64
+    %4 = fir.coordinate_of %arg0, %3 : (!fir.ref<!fir.array<1000x!fir.type<_QMcam4Tpbuf_fld{fld_ptr:!fir.box<!fir.ptr<!fir.array<?x?x?x?x?xf64>>>}>>>, i64) -> !fir.ref<!fir.type<_QMcam4Tpbuf_fld{fld_ptr:!fir.box<!fir.ptr<!fir.array<?x?x?x?x?xf64>>>}>>
+    %5 = fir.field_index fld_ptr, !fir.type<_QMcam4Tpbuf_fld{fld_ptr:!fir.box<!fir.ptr<!fir.array<?x?x?x?x?xf64>>>}>
+    %6 = fir.coordinate_of %4, %5 : (!fir.ref<!fir.type<_QMcam4Tpbuf_fld{fld_ptr:!fir.box<!fir.ptr<!fir.array<?x?x?x?x?xf64>>>}>>, !fir.field) -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?x?x?x?xf64>>>>
+    %7 = fir.load %6 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?x?x?x?xf64>>>>
+    %c0 = arith.constant 0 : index
+    %8:3 = fir.box_dims %7, %c0 : (!fir.box<!fir.ptr<!fir.array<?x?x?x?x?xf64>>>, index) -> (index, index, index)
+    %c1 = arith.constant 1 : index
+    %9:3 = fir.box_dims %7, %c1 : (!fir.box<!fir.ptr<!fir.array<?x?x?x?x?xf64>>>, index) -> (index, index, index)
+    %c2 = arith.constant 2 : index
+    %10:3 = fir.box_dims %7, %c2 : (!fir.box<!fir.ptr<!fir.array<?x?x?x?x?xf64>>>, index) -> (index, index, index)
+    %c3 = arith.constant 3 : index
+    %11:3 = fir.box_dims %7, %c3 : (!fir.box<!fir.ptr<!fir.array<?x?x?x?x?xf64>>>, index) -> (index, index, index)
+    %12:3 = fir.box_dims %7, %c4 : (!fir.box<!fir.ptr<!fir.array<?x?x?x?x?xf64>>>, index) -> (index, index, index)
+    %13 = fir.undefined index
+    %14 = fir.convert %c1_i64 : (i64) -> index
+    %15 = arith.subi %14, %8#0 : index
+    %16 = fir.load %arg1 : !fir.ref<i32>
+    %17 = fir.convert %16 : (i32) -> i64
+    %18 = fir.convert %17 : (i64) -> index
+    %19 = arith.subi %18, %14 : index
+    %20 = arith.addi %19, %14 : index
+    %21 = arith.divsi %20, %14 : index
+    %22 = arith.cmpi sgt, %21, %c0 : index
+    %23 = arith.select %22, %21, %c0 : index
+    %c27_i64 = arith.constant 27 : i64
+    %24 = fir.convert %c27_i64 : (i64) -> index
+    %25 = arith.subi %24, %14 : index
+    %26 = arith.addi %25, %14 : index
+    %27 = arith.divsi %26, %14 : index
+    %28 = arith.cmpi sgt, %27, %c0 : index
+    %29 = arith.select %28, %27, %c0 : index
+    %30 = fir.load %arg2 : !fir.ref<i32>
+    %31 = fir.convert %30 : (i32) -> i64
+    %32 = fir.convert %31 : (i64) -> index
+    %33 = arith.subi %32, %11#0 : index
+    %34 = fir.load %arg3 : !fir.ref<i32>
+    %35 = fir.convert %34 : (i32) -> i64
+    %36 = fir.convert %35 : (i64) -> index
+    %37 = arith.subi %36, %12#0 : index
+    %38 = fir.shift %8#0, %9#0, %10#0, %11#0, %12#0 : (index, index, index, index, index) -> !fir.shift<5>
+    %39 = fir.slice %c1_i64, %13, %13, %14, %18, %14, %14, %24, %14, %31, %13, %13, %35, %13, %13 : (i64, index, index, index, index, index, index, index, index, i64, index, index, i64, index, index) -> !fir.slice<5>
+    %40 = fir.array_load %7(%38) [%39] : (!fir.box<!fir.ptr<!fir.array<?x?x?x?x?xf64>>>, !fir.shift<5>, !fir.slice<5>) -> !fir.array<?x?x?x?x?xf64>
+    %41 = arith.addi %c1, %c27 : index
+    %42 = arith.subi %41, %c1 : index
+    %43 = fir.shape %c4, %c27 : (index, index) -> !fir.shape<2>
+    %44 = fir.slice %c1, %18, %14, %c1, %42, %14 : (index, index, index, index, index, index) -> !fir.slice<2>
+    %45 = fir.array_load %0(%43) [%44] : (!fir.ref<!fir.array<4x27xf64>>, !fir.shape<2>, !fir.slice<2>) -> !fir.array<4x27xf64>
+    %46 = arith.subi %23, %c1 : index
+    %47 = arith.subi %29, %c1 : index
+    %48 = fir.do_loop %arg5 = %c0 to %47 step %c1 unordered iter_args(%arg6 = %40) -> (!fir.array<?x?x?x?x?xf64>) {
+      %49 = fir.do_loop %arg7 = %c0 to %46 step %c1 unordered iter_args(%arg8 = %arg6) -> (!fir.array<?x?x?x?x?xf64>) {
+        %50 = fir.array_fetch %45, %arg7, %arg5 : (!fir.array<4x27xf64>, index, index) -> f64
+        %51 = fir.array_update %arg8, %50, %15, %arg7, %arg5, %33, %37 : (!fir.array<?x?x?x?x?xf64>, f64, index, index, index, index, index) -> !fir.array<?x?x?x?x?xf64>
+        fir.result %51 : !fir.array<?x?x?x?x?xf64>
+      }
+      fir.result %49 : !fir.array<?x?x?x?x?xf64>
+    }
+    fir.array_merge_store %40, %48 to %7[%39] : !fir.array<?x?x?x?x?xf64>, !fir.array<?x?x?x?x?xf64>, !fir.box<!fir.ptr<!fir.array<?x?x?x?x?xf64>>>, !fir.slice<5>
+    return
+  }
+  func.func private @_QPinit(!fir.ref<!fir.array<4x27xf64>>)
+  fir.dispatch_table @_QMcam4Tpbuf_fld
+}