[flang-commits] [flang] df5c278 - [flang][FIR] add FIR TBAA pass

Tom Eccles via flang-commits flang-commits at lists.llvm.org
Wed Oct 11 07:36:56 PDT 2023


Author: Tom Eccles
Date: 2023-10-11T14:29:47Z
New Revision: df5c27869c84ed6d6e61f03ac6e7a83e42d7dbd9

URL: https://github.com/llvm/llvm-project/commit/df5c27869c84ed6d6e61f03ac6e7a83e42d7dbd9
DIFF: https://github.com/llvm/llvm-project/commit/df5c27869c84ed6d6e61f03ac6e7a83e42d7dbd9.diff

LOG: [flang][FIR] add FIR TBAA pass

See RFC at
https://discourse.llvm.org/t/rfc-propagate-fir-alias-analysis-information-using-tbaa/73755

This pass adds TBAA tags to all accesses to non-pointer/target dummy
arguments. These TBAA tags tell LLVM that these accesses cannot alias:
allowing better dead code elimination, hoisting out of loops, and
vectorization.

Each function has its own TBAA tree so that accesses between funtions
MayAlias after inlining.

I also included code for adding tags for local allocations and for
global variables. Enabling all three kinds of tag is known to produce a
miscompile and so these are disabled by default. But it isn't much
code and I thought it could be interesting to play with these later if
one is looking at a benchmark which looks like it would benefit from
more alias information. I'm open to removing this code too.

TBAA tags are also added separately by TBAABuilder during CodeGen.
TBAABuilder has to run during CodeGen because it adds tags to box
accesses, many of which are implicit in FIR. This pass cannot (easily)
run in CodeGen because fir::AliasAnalysis has difficulty tracing values
between blocks, and by the time CodeGen runs, structured control flow
has already been lowered.

Coming in follow up patches
  - Change CodeGen/TBAABuilder to use TBAAForest to add tags within the
    same per-function trees as are used here (delayed to a later patch
    to make it easier to revert)
  - Command line argument processing to actually enable the pass

Added: 
    flang/include/flang/Optimizer/Analysis/TBAAForest.h
    flang/lib/Optimizer/Analysis/TBAAForest.cpp
    flang/lib/Optimizer/Transforms/AddAliasTags.cpp
    flang/test/Transforms/tbaa.fir
    flang/test/Transforms/tbaa2.fir

Modified: 
    flang/include/flang/Optimizer/Transforms/Passes.h
    flang/include/flang/Optimizer/Transforms/Passes.td
    flang/lib/Optimizer/Analysis/CMakeLists.txt
    flang/lib/Optimizer/Transforms/CMakeLists.txt

Removed: 
    


################################################################################
diff  --git a/flang/include/flang/Optimizer/Analysis/TBAAForest.h b/flang/include/flang/Optimizer/Analysis/TBAAForest.h
new file mode 100644
index 000000000000000..a024544e50ef98b
--- /dev/null
+++ b/flang/include/flang/Optimizer/Analysis/TBAAForest.h
@@ -0,0 +1,104 @@
+//===-- TBAAForest.h - A TBAA tree for each function -----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FORTRAN_OPTIMIZER_ANALYSIS_TBAA_FOREST_H
+#define FORTRAN_OPTIMIZER_ANALYSIS_TBAA_FOREST_H
+
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/Dialect/LLVMIR/LLVMAttrs.h"
+#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/IR/MLIRContext.h"
+#include "llvm/ADT/DenseMap.h"
+#include <string>
+
+namespace fir {
+
+//===----------------------------------------------------------------------===//
+// TBAATree
+//===----------------------------------------------------------------------===//
+/// Per-function TBAA tree. Each tree contains branches for data (of various
+/// kinds) and descriptor access
+struct TBAATree {
+  //===----------------------------------------------------------------------===//
+  // TBAAForrest::TBAATree::SubtreeState
+  //===----------------------------------------------------------------------===//
+  /// This contains a TBAA subtree based on some parent. New tags can be added
+  /// under the parent using getTag.
+  class SubtreeState {
+    friend TBAATree; // only allow construction by TBAATree
+  public:
+    SubtreeState() = delete;
+    SubtreeState(const SubtreeState &) = delete;
+    SubtreeState(SubtreeState &&) = default;
+
+    mlir::LLVM::TBAATagAttr getTag(llvm::StringRef uniqueId) const;
+
+  private:
+    SubtreeState(mlir::MLIRContext *ctx, std::string name,
+                 mlir::LLVM::TBAANodeAttr grandParent)
+        : parentId{std::move(name)}, context(ctx) {
+      parent = mlir::LLVM::TBAATypeDescriptorAttr::get(
+          context, parentId, mlir::LLVM::TBAAMemberAttr::get(grandParent, 0));
+    }
+
+    const std::string parentId;
+    mlir::MLIRContext *const context;
+    mlir::LLVM::TBAATypeDescriptorAttr parent;
+    llvm::DenseMap<llvm::StringRef, mlir::LLVM::TBAATagAttr> tagDedup;
+  };
+
+  SubtreeState globalDataTree;
+  SubtreeState allocatedDataTree;
+  SubtreeState dummyArgDataTree;
+  mlir::LLVM::TBAATypeDescriptorAttr anyAccessDesc;
+  mlir::LLVM::TBAATypeDescriptorAttr boxMemberTypeDesc;
+  mlir::LLVM::TBAATypeDescriptorAttr anyDataTypeDesc;
+
+  static TBAATree buildTree(mlir::StringAttr functionName);
+
+private:
+  TBAATree(mlir::LLVM::TBAATypeDescriptorAttr anyAccess,
+           mlir::LLVM::TBAATypeDescriptorAttr dataRoot,
+           mlir::LLVM::TBAATypeDescriptorAttr boxMemberTypeDesc);
+};
+
+//===----------------------------------------------------------------------===//
+// TBAAForrest
+//===----------------------------------------------------------------------===//
+/// Collection of TBAATrees, usually indexed by function (so that each function
+/// has a 
diff erent TBAATree)
+class TBAAForrest {
+public:
+  explicit TBAAForrest(bool separatePerFunction = true)
+      : separatePerFunction{separatePerFunction} {}
+
+  inline const TBAATree &operator[](mlir::func::FuncOp func) {
+    return getFuncTree(func.getSymNameAttr());
+  }
+  inline const TBAATree &operator[](mlir::LLVM::LLVMFuncOp func) {
+    return getFuncTree(func.getSymNameAttr());
+  }
+
+private:
+  const TBAATree &getFuncTree(mlir::StringAttr symName) {
+    if (!separatePerFunction)
+      symName = mlir::StringAttr::get(symName.getContext(), "");
+    if (!trees.contains(symName))
+      trees.insert({symName, TBAATree::buildTree(symName)});
+    return trees.at(symName);
+  }
+
+  // Should each function use a 
diff erent tree?
+  const bool separatePerFunction;
+  // TBAA tree per function
+  llvm::DenseMap<mlir::StringAttr, TBAATree> trees;
+};
+
+} // namespace fir
+
+#endif // FORTRAN_OPTIMIZER_ANALYSIS_TBAA_FOREST_H

diff  --git a/flang/include/flang/Optimizer/Transforms/Passes.h b/flang/include/flang/Optimizer/Transforms/Passes.h
index 64882c8ec406b0a..30d97be3800c191 100644
--- a/flang/include/flang/Optimizer/Transforms/Passes.h
+++ b/flang/include/flang/Optimizer/Transforms/Passes.h
@@ -61,6 +61,7 @@ std::unique_ptr<mlir::Pass> createMemDataFlowOptPass();
 std::unique_ptr<mlir::Pass> createPromoteToAffinePass();
 std::unique_ptr<mlir::Pass> createMemoryAllocationPass();
 std::unique_ptr<mlir::Pass> createStackArraysPass();
+std::unique_ptr<mlir::Pass> createAliasTagsPass();
 std::unique_ptr<mlir::Pass> createSimplifyIntrinsicsPass();
 std::unique_ptr<mlir::Pass> createAddDebugFoundationPass();
 std::unique_ptr<mlir::Pass> createLoopVersioningPass();

diff  --git a/flang/include/flang/Optimizer/Transforms/Passes.td b/flang/include/flang/Optimizer/Transforms/Passes.td
index 80da485392007fa..6d211a535b53f70 100644
--- a/flang/include/flang/Optimizer/Transforms/Passes.td
+++ b/flang/include/flang/Optimizer/Transforms/Passes.td
@@ -252,6 +252,26 @@ def StackArrays : Pass<"stack-arrays", "mlir::ModuleOp"> {
   let constructor = "::fir::createStackArraysPass()";
 }
 
+def AddAliasTags : Pass<"fir-add-alias-tags", "mlir::ModuleOp"> {
+  let summary = "Add tbaa tags to operations that implement FirAliasAnalysisOpInterface";
+  let description = [{
+    TBAA (type based alias analysis) is one method to pass pointer alias information
+    from language frontends to LLVM. This pass uses fir::AliasAnalysis to add this
+    information to fir.load and fir.store operations.
+    Additional tags are added during codegen. See fir::TBAABuilder.
+    This needs to be a separate pass so that it happens before structured control
+    flow operations are lowered to branches and basic blocks (this makes tracing
+    the source of values much eaiser). The other TBAA tags need to be applied to
+    box loads and stores which are implicit in FIR and so cannot be annotated
+    until codegen.
+    TODO: this is currently a pass on mlir::ModuleOp to avoid parallelism. In
+    theory, each operation could be considered in prallel, so long as there
+    aren't races adding new tags to the mlir context.
+  }];
+  let dependentDialects = [ "fir::FIROpsDialect" ];
+  let constructor = "::fir::createAliasTagsPass()";
+}
+
 def SimplifyRegionLite : Pass<"simplify-region-lite", "mlir::ModuleOp"> {
   let summary = "Region simplification";
   let description = [{

diff  --git a/flang/lib/Optimizer/Analysis/CMakeLists.txt b/flang/lib/Optimizer/Analysis/CMakeLists.txt
index 19dadf72cf4ce14..436d4d3f18969c1 100644
--- a/flang/lib/Optimizer/Analysis/CMakeLists.txt
+++ b/flang/lib/Optimizer/Analysis/CMakeLists.txt
@@ -1,5 +1,6 @@
 add_flang_library(FIRAnalysis
   AliasAnalysis.cpp
+  TBAAForest.cpp
 
   DEPENDS
   FIRDialect

diff  --git a/flang/lib/Optimizer/Analysis/TBAAForest.cpp b/flang/lib/Optimizer/Analysis/TBAAForest.cpp
new file mode 100644
index 000000000000000..070e2be6700cc11
--- /dev/null
+++ b/flang/lib/Optimizer/Analysis/TBAAForest.cpp
@@ -0,0 +1,60 @@
+//===- TBAAForest.cpp - Per-functon TBAA Trees ----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "flang/Optimizer/Analysis/TBAAForest.h"
+#include <mlir/Dialect/LLVMIR/LLVMAttrs.h>
+
+mlir::LLVM::TBAATagAttr
+fir::TBAATree::SubtreeState::getTag(llvm::StringRef uniqueName) const {
+  // mlir::LLVM::TBAATagAttr &tag = tagDedup[uniqueName];
+  // if (tag)
+  //   return tag;
+  std::string id = (parentId + "/" + uniqueName).str();
+  mlir::LLVM::TBAATypeDescriptorAttr type =
+      mlir::LLVM::TBAATypeDescriptorAttr::get(
+          context, id, mlir::LLVM::TBAAMemberAttr::get(parent, 0));
+  return mlir::LLVM::TBAATagAttr::get(type, type, 0);
+  // return tag;
+}
+
+fir::TBAATree fir::TBAATree::buildTree(mlir::StringAttr func) {
+  llvm::StringRef funcName = func.getValue();
+  std::string rootId = ("Flang function root " + funcName).str();
+  mlir::MLIRContext *ctx = func.getContext();
+  mlir::LLVM::TBAARootAttr funcRoot =
+      mlir::LLVM::TBAARootAttr::get(ctx, mlir::StringAttr::get(ctx, rootId));
+
+  static constexpr llvm::StringRef anyAccessTypeDescId = "any access";
+  mlir::LLVM::TBAATypeDescriptorAttr anyAccess =
+      mlir::LLVM::TBAATypeDescriptorAttr::get(
+          ctx, anyAccessTypeDescId,
+          mlir::LLVM::TBAAMemberAttr::get(funcRoot, 0));
+
+  static constexpr llvm::StringRef anyDataAccessTypeDescId = "any data access";
+  mlir::LLVM::TBAATypeDescriptorAttr dataRoot =
+      mlir::LLVM::TBAATypeDescriptorAttr::get(
+          ctx, anyDataAccessTypeDescId,
+          mlir::LLVM::TBAAMemberAttr::get(anyAccess, 0));
+
+  static constexpr llvm::StringRef boxMemberTypeDescId = "descriptor member";
+  mlir::LLVM::TBAATypeDescriptorAttr boxMemberTypeDesc =
+      mlir::LLVM::TBAATypeDescriptorAttr::get(
+          ctx, boxMemberTypeDescId,
+          mlir::LLVM::TBAAMemberAttr::get(anyAccess, 0));
+
+  return TBAATree{anyAccess, dataRoot, boxMemberTypeDesc};
+}
+
+fir::TBAATree::TBAATree(mlir::LLVM::TBAATypeDescriptorAttr anyAccess,
+                        mlir::LLVM::TBAATypeDescriptorAttr dataRoot,
+                        mlir::LLVM::TBAATypeDescriptorAttr boxMemberTypeDesc)
+    : globalDataTree(dataRoot.getContext(), "global data", dataRoot),
+      allocatedDataTree(dataRoot.getContext(), "allocated data", dataRoot),
+      dummyArgDataTree(dataRoot.getContext(), "dummy arg data", dataRoot),
+      anyAccessDesc(anyAccess), boxMemberTypeDesc(boxMemberTypeDesc),
+      anyDataTypeDesc(dataRoot) {}

diff  --git a/flang/lib/Optimizer/Transforms/AddAliasTags.cpp b/flang/lib/Optimizer/Transforms/AddAliasTags.cpp
new file mode 100644
index 000000000000000..25439837acac518
--- /dev/null
+++ b/flang/lib/Optimizer/Transforms/AddAliasTags.cpp
@@ -0,0 +1,210 @@
+//===- AddAliasTags.cpp ---------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+/// \file
+/// Adds TBAA alias tags to fir loads and stores, based on information from
+/// fir::AliasAnalysis. More are added later in CodeGen - see fir::TBAABuilder
+//===----------------------------------------------------------------------===//
+
+#include "flang/Optimizer/Analysis/AliasAnalysis.h"
+#include "flang/Optimizer/Analysis/TBAAForest.h"
+#include "flang/Optimizer/Dialect/FIRDialect.h"
+#include "flang/Optimizer/Dialect/FirAliasTagOpInterface.h"
+#include "flang/Optimizer/Transforms/Passes.h"
+#include "mlir/Pass/Pass.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <optional>
+
+namespace fir {
+#define GEN_PASS_DEF_ADDALIASTAGS
+#include "flang/Optimizer/Transforms/Passes.h.inc"
+} // namespace fir
+
+#define DEBUG_TYPE "fir-add-alias-tags"
+
+static llvm::cl::opt<bool>
+    enableDummyArgs("dummy-arg-tbaa", llvm::cl::init(true), llvm::cl::Hidden,
+                    llvm::cl::desc("Add TBAA tags to dummy arguments"));
+// These two are **known unsafe** (misscompare in spec2017/wrf_r). They should
+// not be enabled by default.
+// The code is kept so that these may be tried with new benchmarks to see if
+// this is worth fixing in the future.
+static llvm::cl::opt<bool>
+    enableGlobals("globals-tbaa", llvm::cl::init(false), llvm::cl::Hidden,
+                  llvm::cl::desc("Add TBAA tags to global variables. UNSAFE."));
+static llvm::cl::opt<bool> enableLocalAllocs(
+    "local-alloc-tbaa", llvm::cl::init(false), llvm::cl::Hidden,
+    llvm::cl::desc("Add TBAA tags to local allocations. UNSAFE."));
+
+namespace {
+
+/// Shared state per-module
+class PassState {
+public:
+  /// memoised call to fir::AliasAnalysis::getSource
+  inline const fir::AliasAnalysis::Source &getSource(mlir::Value value) {
+    if (!analysisCache.contains(value))
+      analysisCache.insert({value, analysis.getSource(value)});
+    return analysisCache[value];
+  }
+
+  /// get the per-function TBAATree for this function
+  inline const fir::TBAATree &getFuncTree(mlir::func::FuncOp func) {
+    return forrest[func];
+  }
+
+private:
+  fir::AliasAnalysis analysis;
+  llvm::DenseMap<mlir::Value, fir::AliasAnalysis::Source> analysisCache;
+  fir::TBAAForrest forrest;
+};
+
+class AddAliasTagsPass : public fir::impl::AddAliasTagsBase<AddAliasTagsPass> {
+public:
+  void runOnOperation() override;
+
+private:
+  /// The real workhorse of the pass. This is a runOnOperation() which
+  /// operates on fir::FirAliasTagOpInterface, using some extra state
+  void runOnAliasInterface(fir::FirAliasTagOpInterface op, PassState &state);
+};
+
+} // namespace
+
+static fir::DeclareOp getDeclareOp(mlir::Value arg) {
+  for (mlir::Operation *use : arg.getUsers())
+    if (fir::DeclareOp declare = mlir::dyn_cast<fir::DeclareOp>(use))
+      return declare;
+  return nullptr;
+}
+
+/// Get the name of a function argument using the "fir.bindc_name" attribute,
+/// or ""
+static std::string getFuncArgName(mlir::Value arg) {
+  // first try getting the name from the hlfir.declare
+  if (fir::DeclareOp declare = getDeclareOp(arg))
+    return declare.getUniqName().str();
+
+  // get from attribute on function argument
+  // always succeeds because arg is a function argument
+  mlir::BlockArgument blockArg = mlir::cast<mlir::BlockArgument>(arg);
+  assert(blockArg.getOwner() && blockArg.getOwner()->isEntryBlock() &&
+         "arg is a function argument");
+  mlir::FunctionOpInterface func = mlir::dyn_cast<mlir::FunctionOpInterface>(
+      blockArg.getOwner()->getParentOp());
+  mlir::StringAttr attr = func.getArgAttrOfType<mlir::StringAttr>(
+      blockArg.getArgNumber(), "fir.bindc_name");
+  if (!attr)
+    return "";
+  return attr.str();
+}
+
+void AddAliasTagsPass::runOnAliasInterface(fir::FirAliasTagOpInterface op,
+                                           PassState &state) {
+  mlir::func::FuncOp func = op->getParentOfType<mlir::func::FuncOp>();
+  if (!func)
+    return;
+
+  llvm::SmallVector<mlir::Value> accessedOperands = op.getAccessedOperands();
+  assert(accessedOperands.size() == 1 &&
+         "load and store only access one address");
+  mlir::Value memref = accessedOperands.front();
+
+  // skip boxes. These get an "any descriptor access" tag in TBAABuilder
+  // (CodeGen). I didn't see any speedup from giving each box a separate TBAA
+  // type.
+  if (mlir::isa<fir::BaseBoxType>(fir::unwrapRefType(memref.getType())))
+    return;
+  LLVM_DEBUG(llvm::dbgs() << "Analysing " << op << "\n");
+
+  const fir::AliasAnalysis::Source &source = state.getSource(memref);
+  if (source.isTargetOrPointer()) {
+    LLVM_DEBUG(llvm::dbgs().indent(2) << "Skipping TARGET/POINTER\n");
+    // These will get an "any data access" tag in TBAABuilder (CodeGen): causing
+    // them to "MayAlias" with all non-box accesses
+    return;
+  }
+
+  mlir::LLVM::TBAATagAttr tag;
+  // TBAA for dummy arguments
+  if (enableDummyArgs &&
+      source.kind == fir::AliasAnalysis::SourceKind::Argument) {
+    LLVM_DEBUG(llvm::dbgs().indent(2)
+               << "Found reference to dummy argument at " << *op << "\n");
+    std::string name = getFuncArgName(source.u.get<mlir::Value>());
+    if (!name.empty())
+      tag = state.getFuncTree(func).dummyArgDataTree.getTag(name);
+    else
+      LLVM_DEBUG(llvm::dbgs().indent(2)
+                 << "WARN: couldn't find a name for dummy argument " << *op
+                 << "\n");
+
+    // TBAA for global variables
+  } else if (enableGlobals &&
+             source.kind == fir::AliasAnalysis::SourceKind::Global) {
+    mlir::SymbolRefAttr glbl = source.u.get<mlir::SymbolRefAttr>();
+    const char *name = glbl.getRootReference().data();
+    LLVM_DEBUG(llvm::dbgs().indent(2) << "Found reference to global " << name
+                                      << " at " << *op << "\n");
+    tag = state.getFuncTree(func).globalDataTree.getTag(name);
+
+    // TBAA for local allocations
+  } else if (enableLocalAllocs &&
+             source.kind == fir::AliasAnalysis::SourceKind::Allocate) {
+    std::optional<llvm::StringRef> name;
+    mlir::Operation *sourceOp = source.u.get<mlir::Value>().getDefiningOp();
+    if (auto alloc = mlir::dyn_cast_or_null<fir::AllocaOp>(sourceOp))
+      name = alloc.getUniqName();
+    else if (auto alloc = mlir::dyn_cast_or_null<fir::AllocMemOp>(sourceOp))
+      name = alloc.getUniqName();
+    if (name) {
+      LLVM_DEBUG(llvm::dbgs().indent(2) << "Found reference to allocation "
+                                        << name << " at " << *op << "\n");
+      tag = state.getFuncTree(func).allocatedDataTree.getTag(*name);
+    } else {
+      LLVM_DEBUG(llvm::dbgs().indent(2)
+                 << "WARN: couldn't find a name for allocation " << *op
+                 << "\n");
+    }
+  } else {
+    if (source.kind != fir::AliasAnalysis::SourceKind::Argument &&
+        source.kind != fir::AliasAnalysis::SourceKind::Allocate &&
+        source.kind != fir::AliasAnalysis::SourceKind::Global)
+      LLVM_DEBUG(llvm::dbgs().indent(2)
+                 << "WARN: unsupported value: " << source << "\n");
+  }
+
+  if (tag)
+    op.setTBAATags(mlir::ArrayAttr::get(&getContext(), tag));
+}
+
+void AddAliasTagsPass::runOnOperation() {
+  LLVM_DEBUG(llvm::dbgs() << "=== Begin " DEBUG_TYPE " ===\n");
+
+  // MLIR forbids storing state in a pass because 
diff erent instances might be
+  // used in 
diff erent threads.
+  // Instead this pass stores state per mlir::ModuleOp (which is what MLIR
+  // thinks the pass operates on), then the real work of the pass is done in
+  // runOnAliasInterface
+  PassState state;
+
+  mlir::ModuleOp mod = getOperation();
+  mod.walk(
+      [&](fir::FirAliasTagOpInterface op) { runOnAliasInterface(op, state); });
+
+  LLVM_DEBUG(llvm::dbgs() << "=== End " DEBUG_TYPE " ===\n");
+}
+
+std::unique_ptr<mlir::Pass> fir::createAliasTagsPass() {
+  return std::make_unique<AddAliasTagsPass>();
+}

diff  --git a/flang/lib/Optimizer/Transforms/CMakeLists.txt b/flang/lib/Optimizer/Transforms/CMakeLists.txt
index 428c4c2a1e64408..74a093fe74719b6 100644
--- a/flang/lib/Optimizer/Transforms/CMakeLists.txt
+++ b/flang/lib/Optimizer/Transforms/CMakeLists.txt
@@ -1,5 +1,6 @@
 add_flang_library(FIRTransforms
   AbstractResult.cpp
+  AddAliasTags.cpp
   AffinePromotion.cpp
   AffineDemotion.cpp
   AnnotateConstant.cpp

diff  --git a/flang/test/Transforms/tbaa.fir b/flang/test/Transforms/tbaa.fir
new file mode 100644
index 000000000000000..7825ae60c71e681
--- /dev/null
+++ b/flang/test/Transforms/tbaa.fir
@@ -0,0 +1,175 @@
+// RUN: fir-opt --split-input-file --fir-add-alias-tags %s | FileCheck %s
+
+// subroutine oneArg(a)
+//   integer :: a(:)
+//   a(1) = a(2)
+// end subroutine
+  func.func @_QPonearg(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "a"}) {
+    %c1 = arith.constant 1 : index
+    %c2 = arith.constant 2 : index
+    %0 = fir.declare %arg0 {uniq_name = "_QFoneargEa"} : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+    %1 = fir.rebox %0 : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+    %2 = fir.array_coor %1 %c2 : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+    %3 = fir.load %2 : !fir.ref<i32>
+    %4 = fir.array_coor %1 %c1 : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+    fir.store %3 to %4 : !fir.ref<i32>
+    return
+  }
+
+// CHECK: #[[ONE_ARG_ROOT:.+]] = #llvm.tbaa_root<id = "Flang function root _QPonearg">
+// CHECK: #[[ONE_ARG_ANY_ACCESS:.+]] = #llvm.tbaa_type_desc<id = "any access", members = {<#[[ONE_ARG_ROOT]], 0>}>
+// CHECK: #[[ONE_ARG_ANY_DATA:.+]] = #llvm.tbaa_type_desc<id = "any data access", members = {<#[[ONE_ARG_ANY_ACCESS]], 0>}>
+// CHECK: #[[ONE_ARG_ANY_ARG:.+]] = #llvm.tbaa_type_desc<id = "dummy arg data", members = {<#[[ONE_ARG_ANY_DATA]], 0>}>
+// CHECK: #[[ONE_ARG_A:.+]] = #llvm.tbaa_type_desc<id = "dummy arg data/_QFoneargEa", members = {<#[[ONE_ARG_ANY_ARG]], 0>}>
+// CHECK: #[[ONE_ARG_A_TAG:.+]] = #llvm.tbaa_tag<base_type = #[[ONE_ARG_A]], access_type = #[[ONE_ARG_A]], offset = 0>
+
+// CHECK-LABEL:   func.func @_QPonearg(
+// CHECK-SAME:                         %[[VAL_0:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "a"}) {
+// CHECK:           %[[VAL_1:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_2:.*]] = arith.constant 2 : index
+// CHECK:           %[[VAL_3:.*]] = fir.declare %[[VAL_0]] {uniq_name = "_QFoneargEa"} : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+// CHECK:           %[[VAL_4:.*]] = fir.rebox %[[VAL_3]] : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+// CHECK:           %[[VAL_5:.*]] = fir.array_coor %[[VAL_4]] %[[VAL_2]] : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK:           %[[VAL_6:.*]] = fir.load %[[VAL_5]] {tbaa = [#[[ONE_ARG_A_TAG]]]} : !fir.ref<i32>
+// CHECK:           %[[VAL_7:.*]] = fir.array_coor %[[VAL_4]] %[[VAL_1]] : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK:           fir.store %[[VAL_6]] to %[[VAL_7]] {tbaa = [#[[ONE_ARG_A_TAG]]]} : !fir.ref<i32>
+// CHECK:           return
+// CHECK:         }
+
+// -----
+
+// subroutine twoArg(a, b)
+//   integer :: a(:), b(:)
+//   a(1) = b(1)
+// end subroutine
+  func.func @_QPtwoarg(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "a"}, %arg1: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "b"}) {
+    %c1 = arith.constant 1 : index
+    %0 = fir.declare %arg0 {uniq_name = "_QFtwoargEa"} : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+    %1 = fir.rebox %0 : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+    %2 = fir.declare %arg1 {uniq_name = "_QFtwoargEb"} : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+    %3 = fir.rebox %2 : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+    %4 = fir.array_coor %3 %c1 : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+    %5 = fir.load %4 : !fir.ref<i32>
+    %6 = fir.array_coor %1 %c1 : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+    fir.store %5 to %6 : !fir.ref<i32>
+    return
+  }
+
+// CHECK: #[[TWO_ARG_ROOT:.+]] = #llvm.tbaa_root<id = "Flang function root _QPtwoarg">
+// CHECK: #[[TWO_ARG_ANY_ACCESS:.+]] = #llvm.tbaa_type_desc<id = "any access", members = {<#[[TWO_ARG_ROOT]], 0>}>
+// CHECK: #[[TWO_ARG_ANY_DATA:.+]] = #llvm.tbaa_type_desc<id = "any data access", members = {<#[[TWO_ARG_ANY_ACCESS]], 0>}>
+// CHECK: #[[TWO_ARG_ANY_ARG:.+]] = #llvm.tbaa_type_desc<id = "dummy arg data", members = {<#[[TWO_ARG_ANY_DATA]], 0>}>
+// CHECK: #[[TWO_ARG_B:.+]] = #llvm.tbaa_type_desc<id = "dummy arg data/_QFtwoargEb", members = {<#[[TWO_ARG_ANY_ARG]], 0>}>
+// CHECK: #[[TWO_ARG_A:.+]] = #llvm.tbaa_type_desc<id = "dummy arg data/_QFtwoargEa", members = {<#[[TWO_ARG_ANY_ARG]], 0>}>
+// CHECK: #[[TWO_ARG_B_TAG:.+]] = #llvm.tbaa_tag<base_type = #[[TWO_ARG_B]], access_type = #[[TWO_ARG_B]], offset = 0>
+// CHECK: #[[TWO_ARG_A_TAG:.+]] = #llvm.tbaa_tag<base_type = #[[TWO_ARG_A]], access_type = #[[TWO_ARG_A]], offset = 0>
+
+// CHECK-LABEL:   func.func @_QPtwoarg(
+// CHECK-SAME:                         %[[VAL_0:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "a"},
+// CHECK-SAME:                         %[[VAL_1:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "b"}) {
+// CHECK:           %[[VAL_2:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_3:.*]] = fir.declare %[[VAL_0]] {uniq_name = "_QFtwoargEa"} : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+// CHECK:           %[[VAL_4:.*]] = fir.rebox %[[VAL_3]] : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+// CHECK:           %[[VAL_5:.*]] = fir.declare %[[VAL_1]] {uniq_name = "_QFtwoargEb"} : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+// CHECK:           %[[VAL_6:.*]] = fir.rebox %[[VAL_5]] : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+// CHECK:           %[[VAL_7:.*]] = fir.array_coor %[[VAL_6]] %[[VAL_2]] : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK:           %[[VAL_8:.*]] = fir.load %[[VAL_7]] {tbaa = [#[[TWO_ARG_B_TAG]]]} : !fir.ref<i32>
+// CHECK:           %[[VAL_9:.*]] = fir.array_coor %[[VAL_4]] %[[VAL_2]] : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK:           fir.store %[[VAL_8]] to %[[VAL_9]] {tbaa = [#[[TWO_ARG_A_TAG]]]} : !fir.ref<i32>
+// CHECK:           return
+// CHECK:         }
+
+// -----
+
+// subroutine targetArg(a, b)
+//   integer, target :: a(:)
+//   integer :: b(:)
+//   a(1) = b(1)
+// end subroutine
+  func.func @_QPtargetarg(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "a", fir.target}, %arg1: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "b"}) {
+    %c1 = arith.constant 1 : index
+    %0 = fir.declare %arg0 {fortran_attrs = #fir.var_attrs<target>, uniq_name = "_QFtargetargEa"} : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+    %1 = fir.rebox %0 : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+    %2 = fir.declare %arg1 {uniq_name = "_QFtargetargEb"} : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+    %3 = fir.rebox %2 : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+    %4 = fir.array_coor %3 %c1 : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+    %5 = fir.load %4 : !fir.ref<i32>
+    %6 = fir.array_coor %1 %c1 : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+    fir.store %5 to %6 : !fir.ref<i32>
+    return
+  }
+
+// CHECK: #[[TARGET_ROOT:.+]] = #llvm.tbaa_root<id = "Flang function root _QPtargetarg">
+// CHECK: #[[TARGET_ANY_ACCESS:.+]] = #llvm.tbaa_type_desc<id = "any access", members = {<#[[TARGET_ROOT]], 0>}>
+// CHECK: #[[TARGET_ANY_DATA:.+]] = #llvm.tbaa_type_desc<id = "any data access", members = {<#[[TARGET_ANY_ACCESS]], 0>}>
+// CHECK: #[[TARGET_ANY_ARG:.+]] = #llvm.tbaa_type_desc<id = "dummy arg data", members = {<#[[TARGET_ANY_DATA]], 0>}>
+// CHECK: #[[TARGET_B:.+]] = #llvm.tbaa_type_desc<id = "dummy arg data/_QFtargetargEb", members = {<#[[TARGET_ANY_ARG]], 0>}>
+// CHECK: #[[TARGET_B_TAG:.+]] = #llvm.tbaa_tag<base_type = #[[TARGET_B]], access_type = #[[TARGET_B]], offset = 0>
+// No entry for "dummy arg data/a" because that pointer should get "any data access" becase it has the TARGET attribute
+
+// CHECK-LABEL:   func.func @_QPtargetarg(
+// CHECK-SAME:                            %[[VAL_0:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "a", fir.target},
+// CHECK-SAME:                            %[[VAL_1:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "b"}) {
+// CHECK:           %[[VAL_2:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_3:.*]] = fir.declare %[[VAL_0]] {fortran_attrs = #{{.*}}<target>, uniq_name = "_QFtargetargEa"} : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+// CHECK:           %[[VAL_4:.*]] = fir.rebox %[[VAL_3]] : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+// CHECK:           %[[VAL_5:.*]] = fir.declare %[[VAL_1]] {uniq_name = "_QFtargetargEb"} : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+// CHECK:           %[[VAL_6:.*]] = fir.rebox %[[VAL_5]] : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+// CHECK:           %[[VAL_7:.*]] = fir.array_coor %[[VAL_6]] %[[VAL_2]] : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK:           %[[VAL_8:.*]] = fir.load %[[VAL_7]] {tbaa = [#[[TARGET_B_TAG]]]} : !fir.ref<i32>
+// CHECK:           %[[VAL_9:.*]] = fir.array_coor %[[VAL_4]] %[[VAL_2]] : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// "any data access" tag is added by TBAABuilder during CodeGen
+// CHECK:           fir.store %[[VAL_8]] to %[[VAL_9]] : !fir.ref<i32>
+// CHECK:           return
+// CHECK:         }
+
+// -----
+
+// subroutine pointerArg(a, b)
+//   integer, pointer :: a(:)
+//   integer :: b(:)
+//   a(1) = b(1)
+// end subroutine
+  func.func @_QPpointerarg(%arg0: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>> {fir.bindc_name = "a"}, %arg1: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "b"}) {
+    %c0 = arith.constant 0 : index
+    %c1 = arith.constant 1 : index
+    %0 = fir.declare %arg0 {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFpointerargEa"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>
+    %1 = fir.declare %arg1 {uniq_name = "_QFpointerargEb"} : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+    %2 = fir.rebox %1 : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+    %3 = fir.array_coor %2 %c1 : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+    %4 = fir.load %3 : !fir.ref<i32>
+    %5 = fir.load %0 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>
+    %6:3 = fir.box_dims %5, %c0 : (!fir.box<!fir.ptr<!fir.array<?xi32>>>, index) -> (index, index, index)
+    %7 = fir.shift %6#0 : (index) -> !fir.shift<1>
+    %8 = fir.array_coor %5(%7) %c1 : (!fir.box<!fir.ptr<!fir.array<?xi32>>>, !fir.shift<1>, index) -> !fir.ref<i32>
+    fir.store %4 to %8 : !fir.ref<i32>
+    return
+  }
+
+// CHECK: #[[POINTER_ROOT:.+]] = #llvm.tbaa_root<id = "Flang function root _QPpointerarg">
+// CHECK: #[[POINTER_ANY_ACCESS:.+]] = #llvm.tbaa_type_desc<id = "any access", members = {<#[[POINTER_ROOT]], 0>}>
+// CHECK: #[[POINTER_ANY_DATA:.+]] = #llvm.tbaa_type_desc<id = "any data access", members = {<#[[POINTER_ANY_ACCESS]], 0>}>
+// CHECK: #[[POINTER_ANY_ARG:.+]] = #llvm.tbaa_type_desc<id = "dummy arg data", members = {<#[[POINTER_ANY_DATA]], 0>}>
+// CHECK: #[[POINTER_B:.+]] = #llvm.tbaa_type_desc<id = "dummy arg data/_QFpointerargEb", members = {<#[[POINTER_ANY_ARG]], 0>}>
+// CHECK: #[[POINTER_B_TAG:.+]] = #llvm.tbaa_tag<base_type = #[[POINTER_B]], access_type = #[[POINTER_B]], offset = 0>
+// No entry for "dummy arg data/a" because that pointer should get "any data access" becase it has the POINTER attribute
+
+// CHECK-LABEL:   func.func @_QPpointerarg(
+// CHECK-SAME:                             %[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>> {fir.bindc_name = "a"},
+// CHECK-SAME:                             %[[VAL_1:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "b"}) {
+// CHECK:           %[[VAL_2:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_3:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_4:.*]] = fir.declare %[[VAL_0]] {fortran_attrs = #{{.*}}<pointer>, uniq_name = "_QFpointerargEa"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>
+// CHECK:           %[[VAL_5:.*]] = fir.declare %[[VAL_1]] {uniq_name = "_QFpointerargEb"} : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+// CHECK:           %[[VAL_6:.*]] = fir.rebox %[[VAL_5]] : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+// CHECK:           %[[VAL_7:.*]] = fir.array_coor %[[VAL_6]] %[[VAL_3]] : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK:           %[[VAL_8:.*]] = fir.load %[[VAL_7]] {tbaa = [#[[POINTER_B_TAG]]]} : !fir.ref<i32>
+// "any descriptor access" tag is added by TBAABuilder during CodeGen
+// CHECK:           %[[VAL_9:.*]] = fir.load %[[VAL_4]] : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>
+// CHECK:           %[[VAL_10:.*]]:3 = fir.box_dims %[[VAL_9]], %[[VAL_2]] : (!fir.box<!fir.ptr<!fir.array<?xi32>>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_11:.*]] = fir.shift %[[VAL_10]]#0 : (index) -> !fir.shift<1>
+// CHECK:           %[[VAL_12:.*]] = fir.array_coor %[[VAL_9]](%[[VAL_11]]) %[[VAL_3]] : (!fir.box<!fir.ptr<!fir.array<?xi32>>>, !fir.shift<1>, index) -> !fir.ref<i32>
+// "any data access" tag is added by TBAABuilder during CodeGen
+// CHECK:           fir.store %[[VAL_8]] to %[[VAL_12]] : !fir.ref<i32>
+// CHECK:           return
+// CHECK:         }

diff  --git a/flang/test/Transforms/tbaa2.fir b/flang/test/Transforms/tbaa2.fir
new file mode 100644
index 000000000000000..84ba281cce7a956
--- /dev/null
+++ b/flang/test/Transforms/tbaa2.fir
@@ -0,0 +1,386 @@
+// Test fir alias analysis pass on a larger real life code example (from the RFC)
+// RUN: fir-opt --fir-add-alias-tags %s | FileCheck %s
+
+  fir.global @_QMmodEa : !fir.box<!fir.heap<!fir.array<?xf32>>> {
+    %c0 = arith.constant 0 : index
+    %0 = fir.zero_bits !fir.heap<!fir.array<?xf32>>
+    %1 = fir.shape %c0 : (index) -> !fir.shape<1>
+    %2 = fir.embox %0(%1) : (!fir.heap<!fir.array<?xf32>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<?xf32>>>
+    fir.has_value %2 : !fir.box<!fir.heap<!fir.array<?xf32>>>
+  }
+  fir.global @_QMmodEb : !fir.box<!fir.heap<!fir.array<?xf32>>> {
+    %c0 = arith.constant 0 : index
+    %0 = fir.zero_bits !fir.heap<!fir.array<?xf32>>
+    %1 = fir.shape %c0 : (index) -> !fir.shape<1>
+    %2 = fir.embox %0(%1) : (!fir.heap<!fir.array<?xf32>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<?xf32>>>
+    fir.has_value %2 : !fir.box<!fir.heap<!fir.array<?xf32>>>
+  }
+  fir.global @_QMmodEdxinv : f32 {
+    %0 = fir.zero_bits f32
+    fir.has_value %0 : f32
+  }
+  fir.global @_QMmodEdyinv : f32 {
+    %0 = fir.zero_bits f32
+    fir.has_value %0 : f32
+  }
+  fir.global @_QMmodExstart : i32 {
+    %0 = fir.zero_bits i32
+    fir.has_value %0 : i32
+  }
+  fir.global @_QMmodEystart : i32 {
+    %0 = fir.zero_bits i32
+    fir.has_value %0 : i32
+  }
+  fir.global @_QMmodEystop : i32 {
+    %0 = fir.zero_bits i32
+    fir.has_value %0 : i32
+  }
+  fir.global @_QMmodEzstart : i32 {
+    %0 = fir.zero_bits i32
+    fir.has_value %0 : i32
+  }
+  fir.global @_QMmodEzstop : i32 {
+    %0 = fir.zero_bits i32
+    fir.has_value %0 : i32
+  }
+
+// CHECK: #[[ROOT:.+]] = #llvm.tbaa_root<id = "Flang function root _QMmodPcallee">
+// CHECK: #[[ANY_ACCESS:.+]] = #llvm.tbaa_type_desc<id = "any access", members = {<#[[ROOT]], 0>}>
+// CHECK: #[[ANY_DATA:.+]] = #llvm.tbaa_type_desc<id = "any data access", members = {<#[[ANY_ACCESS]], 0>}>
+// CHECK: #[[ANY_ARG:.+]] = #llvm.tbaa_type_desc<id = "dummy arg data", members = {<#[[ANY_DATA]], 0>}>
+// CHECK: #[[ARG_LOW:.+]] = #llvm.tbaa_type_desc<id = "dummy arg data/_QMmodFcalleeElow", members = {<#[[ANY_ARG]], 0>}>
+// CHECK: #[[ARG_Z:.+]] = #llvm.tbaa_type_desc<id = "dummy arg data/_QMmodFcalleeEz", members = {<#[[ANY_ARG]], 0>}>
+// CHECK: #[[ARG_Y:.+]] = #llvm.tbaa_type_desc<id = "dummy arg data/_QMmodFcalleeEy", members = {<#[[ANY_ARG]], 0>}>
+// CHECK: #[[ARG_LOW_TAG:.+]] = #llvm.tbaa_tag<base_type = #[[ARG_LOW]], access_type = #[[ARG_LOW]], offset = 0>
+// CHECK: #[[ARG_Z_TAG:.+]] = #llvm.tbaa_tag<base_type = #[[ARG_Z]], access_type = #[[ARG_Z]], offset = 0>
+// CHECK: #[[ARG_Y_TAG:.+]] = #llvm.tbaa_tag<base_type = #[[ARG_Y]], access_type = #[[ARG_Y]], offset = 0>
+
+  func.func @_QMmodPcallee(%arg0: !fir.box<!fir.array<?x?x?xf32>> {fir.bindc_name = "z"}, %arg1: !fir.box<!fir.array<?x?x?xf32>> {fir.bindc_name = "y"}, %arg2: !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>> {fir.bindc_name = "low"}) {
+    %c2 = arith.constant 2 : index
+    %c0 = arith.constant 0 : index
+    %c1 = arith.constant 1 : index
+    %c1_i32 = arith.constant 1 : i32
+    %0 = fir.address_of(@_QMmodEa) : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+    %1 = fir.declare %0 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMmodEa"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+    %2 = fir.address_of(@_QMmodEb) : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+    %3 = fir.declare %2 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMmodEb"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+    %4 = fir.address_of(@_QMmodEdxinv) : !fir.ref<f32>
+    %5 = fir.declare %4 {uniq_name = "_QMmodEdxinv"} : (!fir.ref<f32>) -> !fir.ref<f32>
+    %6 = fir.address_of(@_QMmodEdyinv) : !fir.ref<f32>
+    %7 = fir.declare %6 {uniq_name = "_QMmodEdyinv"} : (!fir.ref<f32>) -> !fir.ref<f32>
+    %8 = fir.address_of(@_QMmodExstart) : !fir.ref<i32>
+    %9 = fir.declare %8 {uniq_name = "_QMmodExstart"} : (!fir.ref<i32>) -> !fir.ref<i32>
+    %10 = fir.address_of(@_QMmodEystart) : !fir.ref<i32>
+    %11 = fir.declare %10 {uniq_name = "_QMmodEystart"} : (!fir.ref<i32>) -> !fir.ref<i32>
+    %12 = fir.address_of(@_QMmodEystop) : !fir.ref<i32>
+    %13 = fir.declare %12 {uniq_name = "_QMmodEystop"} : (!fir.ref<i32>) -> !fir.ref<i32>
+    %14 = fir.address_of(@_QMmodEzstart) : !fir.ref<i32>
+    %15 = fir.declare %14 {uniq_name = "_QMmodEzstart"} : (!fir.ref<i32>) -> !fir.ref<i32>
+    %16 = fir.address_of(@_QMmodEzstop) : !fir.ref<i32>
+    %17 = fir.declare %16 {uniq_name = "_QMmodEzstop"} : (!fir.ref<i32>) -> !fir.ref<i32>
+    %18 = fir.alloca f32 {bindc_name = "dxold", uniq_name = "_QMmodFcalleeEdxold"}
+    %19 = fir.declare %18 {uniq_name = "_QMmodFcalleeEdxold"} : (!fir.ref<f32>) -> !fir.ref<f32>
+    %20 = fir.alloca f32 {bindc_name = "dzinv", uniq_name = "_QMmodFcalleeEdzinv"}
+    %21 = fir.declare %20 {uniq_name = "_QMmodFcalleeEdzinv"} : (!fir.ref<f32>) -> !fir.ref<f32>
+    %22 = fir.alloca i32 {bindc_name = "i", uniq_name = "_QMmodFcalleeEi"}
+    %23 = fir.declare %22 {uniq_name = "_QMmodFcalleeEi"} : (!fir.ref<i32>) -> !fir.ref<i32>
+    %24 = fir.alloca i32 {bindc_name = "j", uniq_name = "_QMmodFcalleeEj"}
+    %25 = fir.declare %24 {uniq_name = "_QMmodFcalleeEj"} : (!fir.ref<i32>) -> !fir.ref<i32>
+    %26 = fir.alloca i32 {bindc_name = "k", uniq_name = "_QMmodFcalleeEk"}
+    %27 = fir.declare %26 {uniq_name = "_QMmodFcalleeEk"} : (!fir.ref<i32>) -> !fir.ref<i32>
+    %28 = fir.declare %arg2 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMmodFcalleeElow"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>>
+    %29 = fir.declare %arg1 {fortran_attrs = #fir.var_attrs<intent_in>, uniq_name = "_QMmodFcalleeEy"} : (!fir.box<!fir.array<?x?x?xf32>>) -> !fir.box<!fir.array<?x?x?xf32>>
+    %30 = fir.rebox %29 : (!fir.box<!fir.array<?x?x?xf32>>) -> !fir.box<!fir.array<?x?x?xf32>>
+    %31 = fir.declare %arg0 {fortran_attrs = #fir.var_attrs<intent_in>, uniq_name = "_QMmodFcalleeEz"} : (!fir.box<!fir.array<?x?x?xf32>>) -> !fir.box<!fir.array<?x?x?xf32>>
+    %32 = fir.rebox %31 : (!fir.box<!fir.array<?x?x?xf32>>) -> !fir.box<!fir.array<?x?x?xf32>>
+    %33 = fir.load %15 : !fir.ref<i32>
+    %34 = arith.addi %33, %c1_i32 : i32
+    %35 = fir.convert %34 : (i32) -> index
+    %36 = fir.load %17 : !fir.ref<i32>
+    %37 = fir.convert %36 : (i32) -> index
+    %38 = fir.convert %35 : (index) -> i32
+    %39:2 = fir.do_loop %arg3 = %35 to %37 step %c1 iter_args(%arg4 = %38) -> (index, i32) {
+      fir.store %arg4 to %27 : !fir.ref<i32>
+      %40 = fir.load %11 : !fir.ref<i32>
+      %41 = arith.addi %40, %c1_i32 : i32
+      %42 = fir.convert %41 : (i32) -> index
+      %43 = fir.load %13 : !fir.ref<i32>
+      %44 = fir.convert %43 : (i32) -> index
+      %45 = fir.convert %42 : (index) -> i32
+      %46:2 = fir.do_loop %arg5 = %42 to %44 step %c1 iter_args(%arg6 = %45) -> (index, i32) {
+        fir.store %arg6 to %25 : !fir.ref<i32>
+        %51 = fir.load %9 : !fir.ref<i32>
+        %52 = arith.addi %51, %c1_i32 : i32
+        %53 = fir.convert %52 : (i32) -> index
+        %54 = fir.convert %53 : (index) -> i32
+        %55:2 = fir.do_loop %arg7 = %53 to %c0 step %c1 iter_args(%arg8 = %54) -> (index, i32) {
+          fir.store %arg8 to %23 : !fir.ref<i32>
+          %60 = fir.load %28 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>>
+          %61 = fir.load %23 : !fir.ref<i32>
+          %62 = fir.convert %61 : (i32) -> i64
+          %63 = fir.load %25 : !fir.ref<i32>
+          %64 = fir.convert %63 : (i32) -> i64
+          %65 = fir.load %27 : !fir.ref<i32>
+          %66 = fir.convert %65 : (i32) -> i64
+          %67 = fir.box_addr %60 : (!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>) -> !fir.heap<!fir.array<?x?x?xf32>>
+          %68:3 = fir.box_dims %60, %c0 : (!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>, index) -> (index, index, index)
+          %69:3 = fir.box_dims %60, %c1 : (!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>, index) -> (index, index, index)
+          %70:3 = fir.box_dims %60, %c2 : (!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>, index) -> (index, index, index)
+          %71 = fir.shape_shift %68#0, %68#1, %69#0, %69#1, %70#0, %70#1 : (index, index, index, index, index, index) -> !fir.shapeshift<3>
+          %72 = fir.array_coor %67(%71) %62, %64, %66 : (!fir.heap<!fir.array<?x?x?xf32>>, !fir.shapeshift<3>, i64, i64, i64) -> !fir.ref<f32>
+          %73 = fir.load %72 : !fir.ref<f32>
+          fir.store %73 to %19 : !fir.ref<f32>
+          %74 = fir.load %1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+          %75 = fir.load %25 : !fir.ref<i32>
+          %76 = fir.convert %75 : (i32) -> i64
+          %77 = fir.box_addr %74 : (!fir.box<!fir.heap<!fir.array<?xf32>>>) -> !fir.heap<!fir.array<?xf32>>
+          %78:3 = fir.box_dims %74, %c0 : (!fir.box<!fir.heap<!fir.array<?xf32>>>, index) -> (index, index, index)
+          %79 = fir.shape_shift %78#0, %78#1 : (index, index) -> !fir.shapeshift<1>
+          %80 = fir.array_coor %77(%79) %76 : (!fir.heap<!fir.array<?xf32>>, !fir.shapeshift<1>, i64) -> !fir.ref<f32>
+          %81 = fir.load %80 : !fir.ref<f32>
+          %82 = fir.load %28 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>>
+          %83 = fir.load %23 : !fir.ref<i32>
+          %84 = fir.convert %83 : (i32) -> i64
+          %85 = fir.load %27 : !fir.ref<i32>
+          %86 = fir.convert %85 : (i32) -> i64
+          %87 = fir.box_addr %82 : (!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>) -> !fir.heap<!fir.array<?x?x?xf32>>
+          %88:3 = fir.box_dims %82, %c0 : (!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>, index) -> (index, index, index)
+          %89:3 = fir.box_dims %82, %c1 : (!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>, index) -> (index, index, index)
+          %90:3 = fir.box_dims %82, %c2 : (!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>, index) -> (index, index, index)
+          %91 = fir.shape_shift %88#0, %88#1, %89#0, %89#1, %90#0, %90#1 : (index, index, index, index, index, index) -> !fir.shapeshift<3>
+          %92 = fir.array_coor %87(%91) %84, %76, %86 : (!fir.heap<!fir.array<?x?x?xf32>>, !fir.shapeshift<3>, i64, i64, i64) -> !fir.ref<f32>
+          %93 = fir.load %92 : !fir.ref<f32>
+          %94 = arith.mulf %81, %93 fastmath<contract> : f32
+          %95 = fir.load %3 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+          %96 = fir.box_addr %95 : (!fir.box<!fir.heap<!fir.array<?xf32>>>) -> !fir.heap<!fir.array<?xf32>>
+          %97:3 = fir.box_dims %95, %c0 : (!fir.box<!fir.heap<!fir.array<?xf32>>>, index) -> (index, index, index)
+          %98 = fir.shape_shift %97#0, %97#1 : (index, index) -> !fir.shapeshift<1>
+          %99 = fir.array_coor %96(%98) %76 : (!fir.heap<!fir.array<?xf32>>, !fir.shapeshift<1>, i64) -> !fir.ref<f32>
+          %100 = fir.load %99 : !fir.ref<f32>
+          %101 = fir.array_coor %32 %84, %76, %86 : (!fir.box<!fir.array<?x?x?xf32>>, i64, i64, i64) -> !fir.ref<f32>
+          %102 = fir.load %101 : !fir.ref<f32>
+          %103 = arith.subi %75, %c1_i32 : i32
+          %104 = fir.convert %103 : (i32) -> i64
+          %105 = fir.array_coor %32 %84, %104, %86 : (!fir.box<!fir.array<?x?x?xf32>>, i64, i64, i64) -> !fir.ref<f32>
+          %106 = fir.load %105 : !fir.ref<f32>
+          %107 = arith.subf %102, %106 fastmath<contract> : f32
+          %108 = fir.no_reassoc %107 : f32
+          %109 = fir.load %7 : !fir.ref<f32>
+          %110 = arith.mulf %108, %109 fastmath<contract> : f32
+          %111 = arith.subi %85, %c1_i32 : i32
+          %112 = fir.convert %111 : (i32) -> i64
+          %113 = fir.array_coor %30 %84, %76, %112 : (!fir.box<!fir.array<?x?x?xf32>>, i64, i64, i64) -> !fir.ref<f32>
+          %114 = fir.load %113 : !fir.ref<f32>
+          %115 = fir.array_coor %30 %84, %76, %86 : (!fir.box<!fir.array<?x?x?xf32>>, i64, i64, i64) -> !fir.ref<f32>
+          %116 = fir.load %115 : !fir.ref<f32>
+          %117 = arith.subf %114, %116 fastmath<contract> : f32
+          %118 = fir.no_reassoc %117 : f32
+          %119 = fir.load %21 : !fir.ref<f32>
+          %120 = arith.mulf %118, %119 fastmath<contract> : f32
+          %121 = arith.addf %110, %120 fastmath<contract> : f32
+          %122 = fir.no_reassoc %121 : f32
+          %123 = arith.mulf %100, %122 fastmath<contract> : f32
+          %124 = arith.addf %94, %123 fastmath<contract> : f32
+          fir.store %124 to %92 : !fir.ref<f32>
+          %125 = arith.addi %arg7, %c1 : index
+          %126 = fir.convert %c1 : (index) -> i32
+          %127 = fir.load %23 : !fir.ref<i32>
+          %128 = arith.addi %127, %126 : i32
+          fir.result %125, %128 : index, i32
+        }
+        fir.store %55#1 to %23 : !fir.ref<i32>
+        %56 = arith.addi %arg5, %c1 : index
+        %57 = fir.convert %c1 : (index) -> i32
+        %58 = fir.load %25 : !fir.ref<i32>
+        %59 = arith.addi %58, %57 : i32
+        fir.result %56, %59 : index, i32
+      }
+      fir.store %46#1 to %25 : !fir.ref<i32>
+      %47 = arith.addi %arg3, %c1 : index
+      %48 = fir.convert %c1 : (index) -> i32
+      %49 = fir.load %27 : !fir.ref<i32>
+      %50 = arith.addi %49, %48 : i32
+      fir.result %47, %50 : index, i32
+    }
+    fir.store %39#1 to %27 : !fir.ref<i32>
+    return
+  }
+// CHECK-LABEL:   func.func @_QMmodPcallee(
+// CHECK-SAME:                             %[[VAL_0:.*]]: !fir.box<!fir.array<?x?x?xf32>> {fir.bindc_name = "z"},
+// CHECK-SAME:                             %[[VAL_1:.*]]: !fir.box<!fir.array<?x?x?xf32>> {fir.bindc_name = "y"},
+// CHECK-SAME:                             %[[VAL_2:.*]]: !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>> {fir.bindc_name = "low"}) {
+// CHECK:           %[[VAL_3:.*]] = arith.constant 2 : index
+// CHECK:           %[[VAL_4:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_5:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_6:.*]] = arith.constant 1 : i32
+// CHECK:           %[[VAL_7:.*]] = fir.address_of(@_QMmodEa) : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+// CHECK:           %[[VAL_8:.*]] = fir.declare %[[VAL_7]] {fortran_attrs = #{{.*}}<allocatable>, uniq_name = "_QMmodEa"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+// CHECK:           %[[VAL_9:.*]] = fir.address_of(@_QMmodEb) : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+// CHECK:           %[[VAL_10:.*]] = fir.declare %[[VAL_9]] {fortran_attrs = #{{.*}}<allocatable>, uniq_name = "_QMmodEb"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+// CHECK:           %[[VAL_11:.*]] = fir.address_of(@_QMmodEdxinv) : !fir.ref<f32>
+// CHECK:           %[[VAL_12:.*]] = fir.declare %[[VAL_11]] {uniq_name = "_QMmodEdxinv"} : (!fir.ref<f32>) -> !fir.ref<f32>
+// CHECK:           %[[VAL_13:.*]] = fir.address_of(@_QMmodEdyinv) : !fir.ref<f32>
+// CHECK:           %[[VAL_14:.*]] = fir.declare %[[VAL_13]] {uniq_name = "_QMmodEdyinv"} : (!fir.ref<f32>) -> !fir.ref<f32>
+// CHECK:           %[[VAL_15:.*]] = fir.address_of(@_QMmodExstart) : !fir.ref<i32>
+// CHECK:           %[[VAL_16:.*]] = fir.declare %[[VAL_15]] {uniq_name = "_QMmodExstart"} : (!fir.ref<i32>) -> !fir.ref<i32>
+// CHECK:           %[[VAL_17:.*]] = fir.address_of(@_QMmodEystart) : !fir.ref<i32>
+// CHECK:           %[[VAL_18:.*]] = fir.declare %[[VAL_17]] {uniq_name = "_QMmodEystart"} : (!fir.ref<i32>) -> !fir.ref<i32>
+// CHECK:           %[[VAL_19:.*]] = fir.address_of(@_QMmodEystop) : !fir.ref<i32>
+// CHECK:           %[[VAL_20:.*]] = fir.declare %[[VAL_19]] {uniq_name = "_QMmodEystop"} : (!fir.ref<i32>) -> !fir.ref<i32>
+// CHECK:           %[[VAL_21:.*]] = fir.address_of(@_QMmodEzstart) : !fir.ref<i32>
+// CHECK:           %[[VAL_22:.*]] = fir.declare %[[VAL_21]] {uniq_name = "_QMmodEzstart"} : (!fir.ref<i32>) -> !fir.ref<i32>
+// CHECK:           %[[VAL_23:.*]] = fir.address_of(@_QMmodEzstop) : !fir.ref<i32>
+// CHECK:           %[[VAL_24:.*]] = fir.declare %[[VAL_23]] {uniq_name = "_QMmodEzstop"} : (!fir.ref<i32>) -> !fir.ref<i32>
+// CHECK:           %[[VAL_25:.*]] = fir.alloca f32 {bindc_name = "dxold", uniq_name = "_QMmodFcalleeEdxold"}
+// CHECK:           %[[VAL_26:.*]] = fir.declare %[[VAL_25]] {uniq_name = "_QMmodFcalleeEdxold"} : (!fir.ref<f32>) -> !fir.ref<f32>
+// CHECK:           %[[VAL_27:.*]] = fir.alloca f32 {bindc_name = "dzinv", uniq_name = "_QMmodFcalleeEdzinv"}
+// CHECK:           %[[VAL_28:.*]] = fir.declare %[[VAL_27]] {uniq_name = "_QMmodFcalleeEdzinv"} : (!fir.ref<f32>) -> !fir.ref<f32>
+// CHECK:           %[[VAL_29:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QMmodFcalleeEi"}
+// CHECK:           %[[VAL_30:.*]] = fir.declare %[[VAL_29]] {uniq_name = "_QMmodFcalleeEi"} : (!fir.ref<i32>) -> !fir.ref<i32>
+// CHECK:           %[[VAL_31:.*]] = fir.alloca i32 {bindc_name = "j", uniq_name = "_QMmodFcalleeEj"}
+// CHECK:           %[[VAL_32:.*]] = fir.declare %[[VAL_31]] {uniq_name = "_QMmodFcalleeEj"} : (!fir.ref<i32>) -> !fir.ref<i32>
+// CHECK:           %[[VAL_33:.*]] = fir.alloca i32 {bindc_name = "k", uniq_name = "_QMmodFcalleeEk"}
+// CHECK:           %[[VAL_34:.*]] = fir.declare %[[VAL_33]] {uniq_name = "_QMmodFcalleeEk"} : (!fir.ref<i32>) -> !fir.ref<i32>
+// CHECK:           %[[VAL_35:.*]] = fir.declare %[[VAL_2]] {fortran_attrs = #{{.*}}<allocatable>, uniq_name = "_QMmodFcalleeElow"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>>
+// CHECK:           %[[VAL_36:.*]] = fir.declare %[[VAL_1]] {fortran_attrs = #{{.*}}<intent_in>, uniq_name = "_QMmodFcalleeEy"} : (!fir.box<!fir.array<?x?x?xf32>>) -> !fir.box<!fir.array<?x?x?xf32>>
+// CHECK:           %[[VAL_37:.*]] = fir.rebox %[[VAL_36]] : (!fir.box<!fir.array<?x?x?xf32>>) -> !fir.box<!fir.array<?x?x?xf32>>
+// CHECK:           %[[VAL_38:.*]] = fir.declare %[[VAL_0]] {fortran_attrs = #{{.*}}<intent_in>, uniq_name = "_QMmodFcalleeEz"} : (!fir.box<!fir.array<?x?x?xf32>>) -> !fir.box<!fir.array<?x?x?xf32>>
+// CHECK:           %[[VAL_39:.*]] = fir.rebox %[[VAL_38]] : (!fir.box<!fir.array<?x?x?xf32>>) -> !fir.box<!fir.array<?x?x?xf32>>
+// TODO: read from global assumed to always alias
+// CHECK:           %[[VAL_40:.*]] = fir.load %[[VAL_22]] : !fir.ref<i32>
+// CHECK:           %[[VAL_41:.*]] = arith.addi %[[VAL_40]], %[[VAL_6]] : i32
+// CHECK:           %[[VAL_42:.*]] = fir.convert %[[VAL_41]] : (i32) -> index
+// TODO: read from global assumed to always alias
+// CHECK:           %[[VAL_43:.*]] = fir.load %[[VAL_24]] : !fir.ref<i32>
+// CHECK:           %[[VAL_44:.*]] = fir.convert %[[VAL_43]] : (i32) -> index
+// CHECK:           %[[VAL_45:.*]] = fir.convert %[[VAL_42]] : (index) -> i32
+// CHECK:           %[[VAL_46:.*]]:2 = fir.do_loop %[[VAL_47:.*]] = %[[VAL_42]] to %[[VAL_44]] step %[[VAL_5]] iter_args(%[[VAL_48:.*]] = %[[VAL_45]]) -> (index, i32) {
+// CHECK:             fir.store %[[VAL_48]] to %[[VAL_34]] : !fir.ref<i32>
+// TODO: read from global assumed to always alias
+// CHECK:             %[[VAL_49:.*]] = fir.load %[[VAL_18]] : !fir.ref<i32>
+// CHECK:             %[[VAL_50:.*]] = arith.addi %[[VAL_49]], %[[VAL_6]] : i32
+// CHECK:             %[[VAL_51:.*]] = fir.convert %[[VAL_50]] : (i32) -> index
+// TODO: read from global assumed to always alias
+// CHECK:             %[[VAL_52:.*]] = fir.load %[[VAL_20]] : !fir.ref<i32>
+// CHECK:             %[[VAL_53:.*]] = fir.convert %[[VAL_52]] : (i32) -> index
+// CHECK:             %[[VAL_54:.*]] = fir.convert %[[VAL_51]] : (index) -> i32
+// CHECK:             %[[VAL_55:.*]]:2 = fir.do_loop %[[VAL_56:.*]] = %[[VAL_51]] to %[[VAL_53]] step %[[VAL_5]] iter_args(%[[VAL_57:.*]] = %[[VAL_54]]) -> (index, i32) {
+// CHECK:               fir.store %[[VAL_57]] to %[[VAL_32]] : !fir.ref<i32>
+// TODO: read from global assumed to always alias
+// CHECK:               %[[VAL_58:.*]] = fir.load %[[VAL_16]] : !fir.ref<i32>
+// CHECK:               %[[VAL_59:.*]] = arith.addi %[[VAL_58]], %[[VAL_6]] : i32
+// CHECK:               %[[VAL_60:.*]] = fir.convert %[[VAL_59]] : (i32) -> index
+// CHECK:               %[[VAL_61:.*]] = fir.convert %[[VAL_60]] : (index) -> i32
+// CHECK:               %[[VAL_62:.*]]:2 = fir.do_loop %[[VAL_63:.*]] = %[[VAL_60]] to %[[VAL_4]] step %[[VAL_5]] iter_args(%[[VAL_64:.*]] = %[[VAL_61]]) -> (index, i32) {
+// TODO: local allocation assumed to always alias
+// CHECK:                 fir.store %[[VAL_64]] to %[[VAL_30]] : !fir.ref<i32>
+// load from box tagged in CodeGen
+// CHECK:                 %[[VAL_65:.*]] = fir.load %[[VAL_35]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>>
+// TODO: local allocation assumed to always alias
+// CHECK:                 %[[VAL_66:.*]] = fir.load %[[VAL_30]] : !fir.ref<i32>
+// CHECK:                 %[[VAL_67:.*]] = fir.convert %[[VAL_66]] : (i32) -> i64
+// TODO: local allocation assumed to always alias
+// CHECK:                 %[[VAL_68:.*]] = fir.load %[[VAL_32]] : !fir.ref<i32>
+// CHECK:                 %[[VAL_69:.*]] = fir.convert %[[VAL_68]] : (i32) -> i64
+// TODO: local allocation assumed to always alias
+// CHECK:                 %[[VAL_70:.*]] = fir.load %[[VAL_34]] : !fir.ref<i32>
+// CHECK:                 %[[VAL_71:.*]] = fir.convert %[[VAL_70]] : (i32) -> i64
+// CHECK:                 %[[VAL_72:.*]] = fir.box_addr %[[VAL_65]] : (!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>) -> !fir.heap<!fir.array<?x?x?xf32>>
+// CHECK:                 %[[VAL_73:.*]]:3 = fir.box_dims %[[VAL_65]], %[[VAL_4]] : (!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>, index) -> (index, index, index)
+// CHECK:                 %[[VAL_74:.*]]:3 = fir.box_dims %[[VAL_65]], %[[VAL_5]] : (!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>, index) -> (index, index, index)
+// CHECK:                 %[[VAL_75:.*]]:3 = fir.box_dims %[[VAL_65]], %[[VAL_3]] : (!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>, index) -> (index, index, index)
+// CHECK:                 %[[VAL_76:.*]] = fir.shape_shift %[[VAL_73]]#0, %[[VAL_73]]#1, %[[VAL_74]]#0, %[[VAL_74]]#1, %[[VAL_75]]#0, %[[VAL_75]]#1 : (index, index, index, index, index, index) -> !fir.shapeshift<3>
+// CHECK:                 %[[VAL_77:.*]] = fir.array_coor %[[VAL_72]](%[[VAL_76]]) %[[VAL_67]], %[[VAL_69]], %[[VAL_71]] : (!fir.heap<!fir.array<?x?x?xf32>>, !fir.shapeshift<3>, i64, i64, i64) -> !fir.ref<f32>
+// CHECK:                 %[[VAL_78:.*]] = fir.load %[[VAL_77]] {tbaa = [#[[ARG_LOW_TAG]]]} : !fir.ref<f32>
+// CHECK:                 fir.store %[[VAL_78]] to %[[VAL_26]] : !fir.ref<f32>
+// load from box tagged in CodeGen
+// CHECK:                 %[[VAL_79:.*]] = fir.load %[[VAL_8]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+// TODO: local allocation assumed to always alias
+// CHECK:                 %[[VAL_80:.*]] = fir.load %[[VAL_32]] : !fir.ref<i32>
+// CHECK:                 %[[VAL_81:.*]] = fir.convert %[[VAL_80]] : (i32) -> i64
+// CHECK:                 %[[VAL_82:.*]] = fir.box_addr %[[VAL_79]] : (!fir.box<!fir.heap<!fir.array<?xf32>>>) -> !fir.heap<!fir.array<?xf32>>
+// CHECK:                 %[[VAL_83:.*]]:3 = fir.box_dims %[[VAL_79]], %[[VAL_4]] : (!fir.box<!fir.heap<!fir.array<?xf32>>>, index) -> (index, index, index)
+// CHECK:                 %[[VAL_84:.*]] = fir.shape_shift %[[VAL_83]]#0, %[[VAL_83]]#1 : (index, index) -> !fir.shapeshift<1>
+// CHECK:                 %[[VAL_85:.*]] = fir.array_coor %[[VAL_82]](%[[VAL_84]]) %[[VAL_81]] : (!fir.heap<!fir.array<?xf32>>, !fir.shapeshift<1>, i64) -> !fir.ref<f32>
+// load from global variable
+// CHECK:                 %[[VAL_86:.*]] = fir.load %[[VAL_85]] : !fir.ref<f32>
+// load from box
+// CHECK:                 %[[VAL_87:.*]] = fir.load %[[VAL_35]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>>
+// load from local allocation
+// CHECK:                 %[[VAL_88:.*]] = fir.load %[[VAL_30]] : !fir.ref<i32>
+// CHECK:                 %[[VAL_89:.*]] = fir.convert %[[VAL_88]] : (i32) -> i64
+// load from local allocation
+// CHECK:                 %[[VAL_90:.*]] = fir.load %[[VAL_34]] : !fir.ref<i32>
+// CHECK:                 %[[VAL_91:.*]] = fir.convert %[[VAL_90]] : (i32) -> i64
+// CHECK:                 %[[VAL_92:.*]] = fir.box_addr %[[VAL_87]] : (!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>) -> !fir.heap<!fir.array<?x?x?xf32>>
+// CHECK:                 %[[VAL_93:.*]]:3 = fir.box_dims %[[VAL_87]], %[[VAL_4]] : (!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>, index) -> (index, index, index)
+// CHECK:                 %[[VAL_94:.*]]:3 = fir.box_dims %[[VAL_87]], %[[VAL_5]] : (!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>, index) -> (index, index, index)
+// CHECK:                 %[[VAL_95:.*]]:3 = fir.box_dims %[[VAL_87]], %[[VAL_3]] : (!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>, index) -> (index, index, index)
+// CHECK:                 %[[VAL_96:.*]] = fir.shape_shift %[[VAL_93]]#0, %[[VAL_93]]#1, %[[VAL_94]]#0, %[[VAL_94]]#1, %[[VAL_95]]#0, %[[VAL_95]]#1 : (index, index, index, index, index, index) -> !fir.shapeshift<3>
+// CHECK:                 %[[VAL_97:.*]] = fir.array_coor %[[VAL_92]](%[[VAL_96]]) %[[VAL_89]], %[[VAL_81]], %[[VAL_91]] : (!fir.heap<!fir.array<?x?x?xf32>>, !fir.shapeshift<3>, i64, i64, i64) -> !fir.ref<f32>
+// CHECK:                 %[[VAL_98:.*]] = fir.load %[[VAL_97]] {tbaa = [#[[ARG_LOW_TAG]]]} : !fir.ref<f32>
+// CHECK:                 %[[VAL_99:.*]] = arith.mulf %[[VAL_86]], %[[VAL_98]] fastmath<contract> : f32
+// load from box
+// CHECK:                 %[[VAL_100:.*]] = fir.load %[[VAL_10]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+// CHECK:                 %[[VAL_101:.*]] = fir.box_addr %[[VAL_100]] : (!fir.box<!fir.heap<!fir.array<?xf32>>>) -> !fir.heap<!fir.array<?xf32>>
+// CHECK:                 %[[VAL_102:.*]]:3 = fir.box_dims %[[VAL_100]], %[[VAL_4]] : (!fir.box<!fir.heap<!fir.array<?xf32>>>, index) -> (index, index, index)
+// CHECK:                 %[[VAL_103:.*]] = fir.shape_shift %[[VAL_102]]#0, %[[VAL_102]]#1 : (index, index) -> !fir.shapeshift<1>
+// CHECK:                 %[[VAL_104:.*]] = fir.array_coor %[[VAL_101]](%[[VAL_103]]) %[[VAL_81]] : (!fir.heap<!fir.array<?xf32>>, !fir.shapeshift<1>, i64) -> !fir.ref<f32>
+// load from global variable
+// CHECK:                 %[[VAL_105:.*]] = fir.load %[[VAL_104]] : !fir.ref<f32>
+// CHECK:                 %[[VAL_106:.*]] = fir.array_coor %[[VAL_39]] %[[VAL_89]], %[[VAL_81]], %[[VAL_91]] : (!fir.box<!fir.array<?x?x?xf32>>, i64, i64, i64) -> !fir.ref<f32>
+// CHECK:                 %[[VAL_107:.*]] = fir.load %[[VAL_106]] {tbaa = [#[[ARG_Z_TAG]]]} : !fir.ref<f32>
+// CHECK:                 %[[VAL_108:.*]] = arith.subi %[[VAL_80]], %[[VAL_6]] : i32
+// CHECK:                 %[[VAL_109:.*]] = fir.convert %[[VAL_108]] : (i32) -> i64
+// CHECK:                 %[[VAL_110:.*]] = fir.array_coor %[[VAL_39]] %[[VAL_89]], %[[VAL_109]], %[[VAL_91]] : (!fir.box<!fir.array<?x?x?xf32>>, i64, i64, i64) -> !fir.ref<f32>
+// CHECK:                 %[[VAL_111:.*]] = fir.load %[[VAL_110]] {tbaa = [#[[ARG_Z_TAG]]]} : !fir.ref<f32>
+// CHECK:                 %[[VAL_112:.*]] = arith.subf %[[VAL_107]], %[[VAL_111]] fastmath<contract> : f32
+// CHECK:                 %[[VAL_113:.*]] = fir.no_reassoc %[[VAL_112]] : f32
+// load from global variable
+// CHECK:                 %[[VAL_114:.*]] = fir.load %[[VAL_14]] : !fir.ref<f32>
+// CHECK:                 %[[VAL_115:.*]] = arith.mulf %[[VAL_113]], %[[VAL_114]] fastmath<contract> : f32
+// CHECK:                 %[[VAL_116:.*]] = arith.subi %[[VAL_90]], %[[VAL_6]] : i32
+// CHECK:                 %[[VAL_117:.*]] = fir.convert %[[VAL_116]] : (i32) -> i64
+// CHECK:                 %[[VAL_118:.*]] = fir.array_coor %[[VAL_37]] %[[VAL_89]], %[[VAL_81]], %[[VAL_117]] : (!fir.box<!fir.array<?x?x?xf32>>, i64, i64, i64) -> !fir.ref<f32>
+// CHECK:                 %[[VAL_119:.*]] = fir.load %[[VAL_118]] {tbaa = [#[[ARG_Y_TAG]]]} : !fir.ref<f32>
+// CHECK:                 %[[VAL_120:.*]] = fir.array_coor %[[VAL_37]] %[[VAL_89]], %[[VAL_81]], %[[VAL_91]] : (!fir.box<!fir.array<?x?x?xf32>>, i64, i64, i64) -> !fir.ref<f32>
+// CHECK:                 %[[VAL_121:.*]] = fir.load %[[VAL_120]] {tbaa = [#[[ARG_Y_TAG]]]} : !fir.ref<f32>
+// CHECK:                 %[[VAL_122:.*]] = arith.subf %[[VAL_119]], %[[VAL_121]] fastmath<contract> : f32
+// CHECK:                 %[[VAL_123:.*]] = fir.no_reassoc %[[VAL_122]] : f32
+// load from local allocation
+// CHECK:                 %[[VAL_124:.*]] = fir.load %[[VAL_28]] : !fir.ref<f32>
+// CHECK:                 %[[VAL_125:.*]] = arith.mulf %[[VAL_123]], %[[VAL_124]] fastmath<contract> : f32
+// CHECK:                 %[[VAL_126:.*]] = arith.addf %[[VAL_115]], %[[VAL_125]] fastmath<contract> : f32
+// CHECK:                 %[[VAL_127:.*]] = fir.no_reassoc %[[VAL_126]] : f32
+// CHECK:                 %[[VAL_128:.*]] = arith.mulf %[[VAL_105]], %[[VAL_127]] fastmath<contract> : f32
+// CHECK:                 %[[VAL_129:.*]] = arith.addf %[[VAL_99]], %[[VAL_128]] fastmath<contract> : f32
+// CHECK:                 fir.store %[[VAL_129]] to %[[VAL_97]] {tbaa = [#[[ARG_LOW_TAG]]]} : !fir.ref<f32>
+// CHECK:                 %[[VAL_130:.*]] = arith.addi %[[VAL_63]], %[[VAL_5]] : index
+// CHECK:                 %[[VAL_131:.*]] = fir.convert %[[VAL_5]] : (index) -> i32
+// load from local allocation
+// CHECK:                 %[[VAL_132:.*]] = fir.load %[[VAL_30]] : !fir.ref<i32>
+// CHECK:                 %[[VAL_133:.*]] = arith.addi %[[VAL_132]], %[[VAL_131]] : i32
+// CHECK:                 fir.result %[[VAL_130]], %[[VAL_133]] : index, i32
+// CHECK:               }
+// store to local allocation
+// CHECK:               fir.store %[[VAL_134:.*]]#1 to %[[VAL_30]] : !fir.ref<i32>
+// CHECK:               %[[VAL_135:.*]] = arith.addi %[[VAL_56]], %[[VAL_5]] : index
+// CHECK:               %[[VAL_136:.*]] = fir.convert %[[VAL_5]] : (index) -> i32
+// local allocation:
+// CHECK:               %[[VAL_137:.*]] = fir.load %[[VAL_32]] : !fir.ref<i32>
+// CHECK:               %[[VAL_138:.*]] = arith.addi %[[VAL_137]], %[[VAL_136]] : i32
+// CHECK:               fir.result %[[VAL_135]], %[[VAL_138]] : index, i32
+// CHECK:             }
+// local allocation:
+// CHECK:             fir.store %[[VAL_139:.*]]#1 to %[[VAL_32]] : !fir.ref<i32>
+// CHECK:             %[[VAL_140:.*]] = arith.addi %[[VAL_47]], %[[VAL_5]] : index
+// CHECK:             %[[VAL_141:.*]] = fir.convert %[[VAL_5]] : (index) -> i32
+// local allocation:
+// CHECK:             %[[VAL_142:.*]] = fir.load %[[VAL_34]] : !fir.ref<i32>
+// CHECK:             %[[VAL_143:.*]] = arith.addi %[[VAL_142]], %[[VAL_141]] : i32
+// CHECK:             fir.result %[[VAL_140]], %[[VAL_143]] : index, i32
+// CHECK:           }
+// local allocation:
+// CHECK:           fir.store %[[VAL_144:.*]]#1 to %[[VAL_34]] : !fir.ref<i32>
+// CHECK:           return
+// CHECK:         }


        


More information about the flang-commits mailing list