[flang-commits] [flang] [flang] use TBAA Forest in TBAABuilder (PR #68437)

Tom Eccles via flang-commits flang-commits at lists.llvm.org
Fri Oct 6 11:20:33 PDT 2023


https://github.com/tblah created https://github.com/llvm/llvm-project/pull/68437

See previous reviews at https://github.com/llvm/llvm-project/pull/68414 and https://github.com/llvm/llvm-project/pull/68317

This is important to ensure that tags end up in the same trees that were
created in the FIR TBAA pass. If they are in different trees then
everything in one tree will be assumed to MayAlias with everything in the
other tree. This leads to poor performance.

@vzakhari requested that the old (not-per-function) trees are
maintained so I left the old test intact.

>From c13a5dc6d0509e329c8735c26361cd3abfdde5a1 Mon Sep 17 00:00:00 2001
From: Tom Eccles <tom.eccles at arm.com>
Date: Tue, 12 Sep 2023 15:40:30 +0000
Subject: [PATCH 1/4] [flang][FIR] add FirAliasAnalysisOpInterface

This interface allows (HL)FIR passes to add TBAA information to fir.load
and fir.store. If present, these TBAA tags take precedence over those
added during CodeGen.

We can't reuse mlir::LLVMIR::AliasAnalysisOpInterface because that uses
the mlir::LLVMIR namespace so it tries to define methods for fir
operations in the wrong namespace. But I did re-use the tbaa tag type to
minimise boilerplate code.

The new builders are to preserve the old interface without the tbaa tag.
---
 .../flang/Optimizer/Dialect/CMakeLists.txt    |  4 ++
 .../include/flang/Optimizer/Dialect/FIROps.h  |  1 +
 .../include/flang/Optimizer/Dialect/FIROps.td | 17 ++++--
 .../Dialect/FirAliasAnalysisOpInterface.h     | 27 +++++++++
 .../Dialect/FirAliasAnalysisOpInterface.td    | 59 +++++++++++++++++++
 flang/lib/Optimizer/CodeGen/CodeGen.cpp       | 15 ++++-
 flang/lib/Optimizer/Dialect/CMakeLists.txt    |  1 +
 flang/lib/Optimizer/Dialect/FIROps.cpp        | 17 +++++-
 .../Dialect/FirAliasAnalysisOpInterface.cpp   | 31 ++++++++++
 flang/test/Fir/tbaa-codegen.fir               | 47 +++++++++++++++
 10 files changed, 210 insertions(+), 9 deletions(-)
 create mode 100644 flang/include/flang/Optimizer/Dialect/FirAliasAnalysisOpInterface.h
 create mode 100644 flang/include/flang/Optimizer/Dialect/FirAliasAnalysisOpInterface.td
 create mode 100644 flang/lib/Optimizer/Dialect/FirAliasAnalysisOpInterface.cpp
 create mode 100644 flang/test/Fir/tbaa-codegen.fir

diff --git a/flang/include/flang/Optimizer/Dialect/CMakeLists.txt b/flang/include/flang/Optimizer/Dialect/CMakeLists.txt
index d657e3f16690377..15c835aad9bc7d2 100644
--- a/flang/include/flang/Optimizer/Dialect/CMakeLists.txt
+++ b/flang/include/flang/Optimizer/Dialect/CMakeLists.txt
@@ -18,6 +18,10 @@ set(LLVM_TARGET_DEFINITIONS FortranVariableInterface.td)
 mlir_tablegen(FortranVariableInterface.h.inc -gen-op-interface-decls)
 mlir_tablegen(FortranVariableInterface.cpp.inc -gen-op-interface-defs)
 
+set(LLVM_TARGET_DEFINITIONS FirAliasAnalysisOpInterface.td)
+mlir_tablegen(FirAliasAnalaysOpInterface.h.inc -gen-op-interface-decls)
+mlir_tablegen(FirAliasAnalysisOpInterface.cpp.inc -gen-op-interface-defs)
+
 set(LLVM_TARGET_DEFINITIONS CanonicalizationPatterns.td)
 mlir_tablegen(CanonicalizationPatterns.inc -gen-rewriters)
 add_public_tablegen_target(CanonicalizationPatternsIncGen)
diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.h b/flang/include/flang/Optimizer/Dialect/FIROps.h
index 8f03dc5cf795225..bab35bac5c81f4b 100644
--- a/flang/include/flang/Optimizer/Dialect/FIROps.h
+++ b/flang/include/flang/Optimizer/Dialect/FIROps.h
@@ -11,6 +11,7 @@
 
 #include "flang/Optimizer/Dialect/FIRAttr.h"
 #include "flang/Optimizer/Dialect/FIRType.h"
+#include "flang/Optimizer/Dialect/FirAliasAnalysisOpInterface.h"
 #include "flang/Optimizer/Dialect/FortranVariableInterface.h"
 #include "mlir/Dialect/Arith/IR/Arith.h"
 #include "mlir/Dialect/Func/IR/FuncOps.h"
diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td
index a57add9f731979d..fae9b92662f3559 100644
--- a/flang/include/flang/Optimizer/Dialect/FIROps.td
+++ b/flang/include/flang/Optimizer/Dialect/FIROps.td
@@ -16,10 +16,12 @@
 
 include "mlir/Dialect/Arith/IR/ArithBase.td"
 include "mlir/Dialect/Arith/IR/ArithOpsInterfaces.td"
+include "mlir/Dialect/LLVMIR/LLVMAttrDefs.td"
 include "flang/Optimizer/Dialect/FIRDialect.td"
 include "flang/Optimizer/Dialect/FIRTypes.td"
 include "flang/Optimizer/Dialect/FIRAttr.td"
 include "flang/Optimizer/Dialect/FortranVariableInterface.td"
+include "flang/Optimizer/Dialect/FirAliasAnalysisOpInterface.td"
 include "mlir/IR/BuiltinAttributes.td"
 
 // Base class for FIR operations.
@@ -258,7 +260,7 @@ def fir_FreeMemOp : fir_Op<"freemem", [MemoryEffects<[MemFree]>]> {
   let assemblyFormat = "$heapref attr-dict `:` qualified(type($heapref))";
 }
 
-def fir_LoadOp : fir_OneResultOp<"load", []> {
+def fir_LoadOp : fir_OneResultOp<"load", [FirAliasAnalysisOpInterface]> {
   let summary = "load a value from a memory reference";
   let description = [{
     Load a value from a memory reference into an ssa-value (virtual register).
@@ -274,9 +276,11 @@ def fir_LoadOp : fir_OneResultOp<"load", []> {
     or null.
   }];
 
-  let arguments = (ins Arg<AnyReferenceLike, "", [MemRead]>:$memref);
+  let arguments = (ins Arg<AnyReferenceLike, "", [MemRead]>:$memref,
+                  OptionalAttr<LLVM_TBAATagArrayAttr>:$tbaa);
 
-  let builders = [OpBuilder<(ins "mlir::Value":$refVal)>];
+  let builders = [OpBuilder<(ins "mlir::Value":$refVal)>,
+                  OpBuilder<(ins "mlir::Type":$resTy, "mlir::Value":$refVal)>];
 
   let hasCustomAssemblyFormat = 1;
 
@@ -285,7 +289,7 @@ def fir_LoadOp : fir_OneResultOp<"load", []> {
   }];
 }
 
-def fir_StoreOp : fir_Op<"store", []> {
+def fir_StoreOp : fir_Op<"store", [FirAliasAnalysisOpInterface]> {
   let summary = "store an SSA-value to a memory location";
 
   let description = [{
@@ -305,7 +309,10 @@ def fir_StoreOp : fir_Op<"store", []> {
   }];
 
   let arguments = (ins AnyType:$value,
-                   Arg<AnyReferenceLike, "", [MemWrite]>:$memref);
+                   Arg<AnyReferenceLike, "", [MemWrite]>:$memref,
+                   OptionalAttr<LLVM_TBAATagArrayAttr>:$tbaa);
+
+  let builders = [OpBuilder<(ins "mlir::Value":$value, "mlir::Value":$memref)>];
 
   let hasCustomAssemblyFormat = 1;
   let hasVerifier = 1;
diff --git a/flang/include/flang/Optimizer/Dialect/FirAliasAnalysisOpInterface.h b/flang/include/flang/Optimizer/Dialect/FirAliasAnalysisOpInterface.h
new file mode 100644
index 000000000000000..c07bf648eab454a
--- /dev/null
+++ b/flang/include/flang/Optimizer/Dialect/FirAliasAnalysisOpInterface.h
@@ -0,0 +1,27 @@
+//===- FirAliasAnalysisInterface.h ------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains an interface for adding alias analysis information to
+// loads and stores
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FORTRAN_OPTIMIZER_DIALECT_FIR_ALIAS_ANALYSIS_INTERFACE_H
+#define FORTRAN_OPTIMIZER_DIALECT_FIR_ALIAS_ANALYSIS_INTERFACE_H
+
+#include "mlir/IR/OpDefinition.h"
+#include "mlir/IR/Operation.h"
+#include "mlir/Support/LogicalResult.h"
+
+namespace fir::detail {
+mlir::LogicalResult verifyFirAliasAnalysisOpInterface(mlir::Operation *op);
+} // namespace fir::detail
+
+#include "flang/Optimizer/Dialect/FirAliasAnalaysOpInterface.h.inc"
+
+#endif // FORTRAN_OPTIMIZER_DIALECT_FIR_ALIAS_ANALYSIS_INTERFACE_H
diff --git a/flang/include/flang/Optimizer/Dialect/FirAliasAnalysisOpInterface.td b/flang/include/flang/Optimizer/Dialect/FirAliasAnalysisOpInterface.td
new file mode 100644
index 000000000000000..1d3e49d383f63a4
--- /dev/null
+++ b/flang/include/flang/Optimizer/Dialect/FirAliasAnalysisOpInterface.td
@@ -0,0 +1,59 @@
+//===-- FirAliasAnalysisOpInterface.td ---------------------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+include "mlir/IR/Interfaces.td"
+
+def FirAliasAnalysisOpInterface : OpInterface<"FirAliasAnalysisOpInterface"> {
+  let description = [{
+    An interface for memory operations that can carry alias analysis metadata.
+    It provides setters and getters for the operation's alias analysis
+    attributes. The default implementations of the interface methods expect
+    the operation to have an attribute of type ArrayAttr named tbaa.
+    Unlike the mlir::LLVM::AliasAnalysisOpInterface, this only supports tbaa.
+  }];
+
+  let cppNamespace = "::fir";
+  let verify = [{ return detail::verifyFirAliasAnalysisOpInterface($_op); }];
+
+  let methods = [
+    InterfaceMethod<
+      /*desc=*/        "Returns the tbaa attribute or nullptr",
+      /*returnType=*/  "mlir::ArrayAttr",
+      /*methodName=*/  "getTBAATagsOrNull",
+      /*args=*/        (ins),
+      /*methodBody=*/  [{}],
+      /*defaultImpl=*/ [{
+        auto op = mlir::cast<ConcreteOp>(this->getOperation());
+        return op.getTbaaAttr();
+      }]
+      >,
+    InterfaceMethod<
+      /*desc=*/        "Sets the tbaa attribute",
+      /*returnType=*/  "void",
+      /*methodName=*/  "setTBAATags",
+      /*args=*/        (ins "const mlir::ArrayAttr":$attr),
+      /*methodBody=*/  [{}],
+      /*defaultImpl=*/ [{
+        auto op = mlir::cast<ConcreteOp>(this->getOperation());
+        op.setTbaaAttr(attr);
+      }]
+      >,
+    InterfaceMethod<
+      /*desc=*/        "Returns a list of all pointer operands accessed by the "
+                       "operation",
+      /*returnType=*/  "::llvm::SmallVector<::mlir::Value>",
+      /*methodName=*/  "getAccessedOperands",
+      /*args=*/        (ins),
+      /*methodBody=*/  [{}],
+      /*defaultImpl=*/ [{
+        auto op = mlir::cast<ConcreteOp>(this->getOperation());
+        return {op.getMemref()};
+      }]
+      >
+  ];
+}
diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
index d1b7f3de93b4647..f2ce123124895e0 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
@@ -3085,7 +3085,10 @@ struct LoadOpConversion : public FIROpConversion<fir::LoadOp> {
       auto boxValue = rewriter.create<mlir::LLVM::LoadOp>(
           loc, boxPtrTy.cast<mlir::LLVM::LLVMPointerType>().getElementType(),
           inputBoxStorage);
-      attachTBAATag(boxValue, boxTy, boxTy, nullptr);
+      if (std::optional<mlir::ArrayAttr> optionalTag = load.getTbaa())
+        boxValue.setTBAATags(*optionalTag);
+      else
+        attachTBAATag(boxValue, boxTy, boxTy, nullptr);
       auto newBoxStorage =
           genAllocaWithType(loc, boxPtrTy, defaultAlign, rewriter);
       auto storeOp =
@@ -3096,7 +3099,10 @@ struct LoadOpConversion : public FIROpConversion<fir::LoadOp> {
       mlir::Type loadTy = convertType(load.getType());
       auto loadOp = rewriter.create<mlir::LLVM::LoadOp>(
           load.getLoc(), loadTy, adaptor.getOperands(), load->getAttrs());
-      attachTBAATag(loadOp, load.getType(), load.getType(), nullptr);
+      if (std::optional<mlir::ArrayAttr> optionalTag = load.getTbaa())
+        loadOp.setTBAATags(*optionalTag);
+      else
+        attachTBAATag(loadOp, load.getType(), load.getType(), nullptr);
       rewriter.replaceOp(load, loadOp.getResult());
     }
     return mlir::success();
@@ -3340,7 +3346,10 @@ struct StoreOpConversion : public FIROpConversion<fir::StoreOp> {
       newStoreOp = rewriter.create<mlir::LLVM::StoreOp>(
           loc, adaptor.getOperands()[0], adaptor.getOperands()[1]);
     }
-    attachTBAATag(newStoreOp, storeTy, storeTy, nullptr);
+    if (std::optional<mlir::ArrayAttr> optionalTag = store.getTbaa())
+      newStoreOp.setTBAATags(*optionalTag);
+    else
+      attachTBAATag(newStoreOp, storeTy, storeTy, nullptr);
     rewriter.eraseOp(store);
     return mlir::success();
   }
diff --git a/flang/lib/Optimizer/Dialect/CMakeLists.txt b/flang/lib/Optimizer/Dialect/CMakeLists.txt
index fe5edb54a78e9e5..6beabcdb4e25d76 100644
--- a/flang/lib/Optimizer/Dialect/CMakeLists.txt
+++ b/flang/lib/Optimizer/Dialect/CMakeLists.txt
@@ -6,6 +6,7 @@ add_flang_library(FIRDialect
   FIROps.cpp
   FIRType.cpp
   FortranVariableInterface.cpp
+  FirAliasAnalysisOpInterface.cpp
   Inliner.cpp
 
   DEPENDS
diff --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp
index 962b87acd5a8050..2f08cd1b8111531 100644
--- a/flang/lib/Optimizer/Dialect/FIROps.cpp
+++ b/flang/lib/Optimizer/Dialect/FIROps.cpp
@@ -2016,8 +2016,18 @@ void fir::LoadOp::build(mlir::OpBuilder &builder, mlir::OperationState &result,
     mlir::emitError(result.location, "not a memory reference type");
     return;
   }
+  build(builder, result, eleTy, refVal);
+}
+
+void fir::LoadOp::build(mlir::OpBuilder &builder, mlir::OperationState &result,
+                        mlir::Type resTy, mlir::Value refVal) {
+
+  if (!refVal) {
+    mlir::emitError(result.location, "LoadOp has null argument");
+    return;
+  }
   result.addOperands(refVal);
-  result.addTypes(eleTy);
+  result.addTypes(resTy);
 }
 
 mlir::ParseResult fir::LoadOp::getElementOf(mlir::Type &ele, mlir::Type ref) {
@@ -3288,6 +3298,11 @@ mlir::LogicalResult fir::StoreOp::verify() {
   return mlir::success();
 }
 
+void fir::StoreOp::build(mlir::OpBuilder &builder, mlir::OperationState &result,
+                         mlir::Value value, mlir::Value memref) {
+  build(builder, result, value, memref, {});
+}
+
 //===----------------------------------------------------------------------===//
 // StringLitOp
 //===----------------------------------------------------------------------===//
diff --git a/flang/lib/Optimizer/Dialect/FirAliasAnalysisOpInterface.cpp b/flang/lib/Optimizer/Dialect/FirAliasAnalysisOpInterface.cpp
new file mode 100644
index 000000000000000..63686b1d0e7ebb6
--- /dev/null
+++ b/flang/lib/Optimizer/Dialect/FirAliasAnalysisOpInterface.cpp
@@ -0,0 +1,31 @@
+//===-- FirAliasAnalysisOpInterface.cpp ----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/
+//
+//===----------------------------------------------------------------------===//
+
+#include "flang/Optimizer/Dialect/FirAliasAnalysisOpInterface.h"
+#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+
+#include "flang/Optimizer/Dialect/FirAliasAnalysisOpInterface.cpp.inc"
+
+mlir::LogicalResult
+fir::detail::verifyFirAliasAnalysisOpInterface(mlir::Operation *op) {
+  auto iface = mlir::cast<FirAliasAnalysisOpInterface>(op);
+
+  mlir::ArrayAttr tags = iface.getTBAATagsOrNull();
+  if (!tags)
+    return mlir::success();
+
+  for (mlir::Attribute iter : tags)
+    if (!mlir::isa<mlir::LLVM::TBAATagAttr>(iter))
+      return op->emitOpError("expected op to return array of ")
+             << mlir::LLVM::TBAATagAttr::getMnemonic() << " attributes";
+  return mlir::success();
+}
diff --git a/flang/test/Fir/tbaa-codegen.fir b/flang/test/Fir/tbaa-codegen.fir
new file mode 100644
index 000000000000000..386fe42eaaba9a2
--- /dev/null
+++ b/flang/test/Fir/tbaa-codegen.fir
@@ -0,0 +1,47 @@
+// test that tbaa attributes can be added to fir.load and fir.store
+// and that these attributes are propagated to LLVMIR
+
+// RUN: tco %s | FileCheck %s
+
+// subroutine simple(a)
+//   integer, intent(inout) :: a(:)
+//   a(1) = a(2)
+// end subroutine
+#tbaa_root = #llvm.tbaa_root<id = "Flang function root _QPsimple">
+#tbaa_type_desc = #llvm.tbaa_type_desc<id = "any access", members = {<#tbaa_root, 0>}>
+#tbaa_type_desc1 = #llvm.tbaa_type_desc<id = "any data access", members = {<#tbaa_type_desc, 0>}>
+#tbaa_type_desc2 = #llvm.tbaa_type_desc<id = "dummy arg data", members = {<#tbaa_type_desc1, 0>}>
+#tbaa_type_desc3 = #llvm.tbaa_type_desc<id = "dummy arg data/a", members = {<#tbaa_type_desc2, 0>}>
+#tbaa_tag = #llvm.tbaa_tag<base_type = #tbaa_type_desc3, access_type = #tbaa_type_desc3, offset = 0>
+module attributes {fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.target_triple = "aarch64-unknown-linux-gnu"} {
+  func.func @_QPsimple(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "a"}) {
+    %c1 = arith.constant 1 : index
+    %c2 = arith.constant 2 : index
+    %0 = fir.declare %arg0 {fortran_attrs = #fir.var_attrs<intent_inout>, uniq_name = "_QFfuncEa"} : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+    %1 = fir.rebox %0 : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+    %2 = fir.array_coor %1 %c2 : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+    %3 = fir.load %2 {tbaa = [#tbaa_tag]} : !fir.ref<i32>
+    %4 = fir.array_coor %1 %c1 : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+    fir.store %3 to %4 {tbaa = [#tbaa_tag]} : !fir.ref<i32>
+    return
+  }
+}
+
+// CHECK-LABEL: define void @_QPsimple(
+// CHECK-SAME:      ptr %[[ARG0:.*]]) {
+// [...]
+// load  a(2):
+// CHECK:  %[[VAL20:.*]] = getelementptr i8, ptr %{{.*}}, i64 %{{.*}}
+// CHECK:  %[[A2:.*]] = load i32, ptr %[[VAL20]], align 4, !tbaa ![[A_ACCESS_TAG:.*]]
+// [...]
+// store a(2) to a(1):
+// CHECK:  %[[A1:.*]] = getelementptr i8, ptr %{{.*}}, i64 %{{.*}}
+// CHECK:  store i32 %[[A2]], ptr %[[A1]], align 4, !tbaa ![[A_ACCESS_TAG]]
+// CHECK:  ret void
+// CHECK: }
+// CHECK: ![[A_ACCESS_TAG]] = !{![[A_ACCESS_TYPE:.*]], ![[A_ACCESS_TYPE]], i64 0}
+// CHECK: ![[A_ACCESS_TYPE]] = !{!"dummy arg data/a", ![[DUMMY_ARG_TYPE:.*]], i64 0}
+// CHECK: ![[DUMMY_ARG_TYPE]] = !{!"dummy arg data", ![[DATA_ACCESS_TYPE:.*]], i64 0}
+// CHECK: ![[DATA_ACCESS_TYPE]] = !{!"any data access", ![[ANY_ACCESS_TYPE:.*]], i64 0}
+// CHECK: ![[ANY_ACCESS_TYPE]] = !{!"any access", ![[ROOT:.*]], i64 0}
+// CHECK: ![[ROOT]] = !{!"Flang function root _QPsimple"}
\ No newline at end of file

>From 15648e0f9af0471d327f07f6a089441e7c50c1b9 Mon Sep 17 00:00:00 2001
From: Tom Eccles <tom.eccles at arm.com>
Date: Thu, 5 Oct 2023 17:10:10 +0000
Subject: [PATCH 2/4] [flang][FIR] Add dependency from FIRDialect to
 LLVMDialect

For TBBAA attribute types.
---
 flang/include/flang/Optimizer/Dialect/FIRDialect.td | 4 +++-
 flang/lib/Optimizer/Dialect/FIRDialect.cpp          | 2 ++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/flang/include/flang/Optimizer/Dialect/FIRDialect.td b/flang/include/flang/Optimizer/Dialect/FIRDialect.td
index d0735bbeb2d3d88..b366b6d40e4e213 100644
--- a/flang/include/flang/Optimizer/Dialect/FIRDialect.td
+++ b/flang/include/flang/Optimizer/Dialect/FIRDialect.td
@@ -30,7 +30,9 @@ def fir_Dialect : Dialect {
   let dependentDialects = [
     // Arith dialect provides FastMathFlagsAttr
     // supported by some FIR operations.
-    "arith::ArithDialect"
+    "arith::ArithDialect",
+    // TBAA Tag types
+    "LLVM::LLVMDialect"
   ];
 }
 
diff --git a/flang/lib/Optimizer/Dialect/FIRDialect.cpp b/flang/lib/Optimizer/Dialect/FIRDialect.cpp
index c2377f112be8473..997a6c90ada314a 100644
--- a/flang/lib/Optimizer/Dialect/FIRDialect.cpp
+++ b/flang/lib/Optimizer/Dialect/FIRDialect.cpp
@@ -14,6 +14,7 @@
 #include "flang/Optimizer/Dialect/FIRAttr.h"
 #include "flang/Optimizer/Dialect/FIROps.h"
 #include "flang/Optimizer/Dialect/FIRType.h"
+#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
 #include "mlir/Transforms/InliningUtils.h"
 
 using namespace fir;
@@ -58,6 +59,7 @@ struct FIRInlinerInterface : public mlir::DialectInlinerInterface {
 
 fir::FIROpsDialect::FIROpsDialect(mlir::MLIRContext *ctx)
     : mlir::Dialect("fir", ctx, mlir::TypeID::get<FIROpsDialect>()) {
+  getContext()->loadDialect<mlir::LLVM::LLVMDialect>();
   registerTypes();
   registerAttributes();
   addOperations<

>From 4f262cfef8ea5d8b3a8d45b93a8843963ccb63bd Mon Sep 17 00:00:00 2001
From: Tom Eccles <tom.eccles at arm.com>
Date: Wed, 13 Sep 2023 11:25:01 +0000
Subject: [PATCH 3/4] [flang][FIR] add FIR TBAA pass

See RFC at
https://discourse.llvm.org/t/rfc-propagate-fir-alias-analysis-information-using-tbaa/73755

This pass adds TBAA tags to all accesses to non-pointer/target dummy
arguments. These TBAA tags tell LLVM that these accesses cannot alias:
allowing better dead code elimination, hoisting out of loops, and
vectorization.

Each function has its own TBAA tree so that accesses between funtions
MayAlias after inlining.

I also included code for adding tags for local allocations and for
global variables. Enabling all three kinds of tag is known to produce a
miscompile and so these are disabled by default. But it isn't much
code and I thought it could be interesting to play with these later if
one is looking at a benchmark which looks like it would benefit from
more alias information. I'm open to removing this code too.

TBAA tags are also added separately by TBAABuilder during CodeGen.
TBAABuilder has to run during CodeGen because it adds tags to box
accesses, many of which are implicit in FIR. This pass cannot (easily)
run in CodeGen because fir::AliasAnalysis has difficulty tracing values
between blocks, and by the time CodeGen runs, structured control flow
has already been lowered.

Coming in follow up patches
  - Change CodeGen/TBAABuilder to use TBAAForest to add tags within the
    same per-function trees as are used here (delayed to a later patch
    to make it easier to revert)
  - Command line argument processing to actually enable the pass
---
 .../flang/Optimizer/Analysis/TBAAForest.h     | 104 +++++
 .../flang/Optimizer/Transforms/Passes.h       |   1 +
 .../flang/Optimizer/Transforms/Passes.td      |  20 +
 flang/lib/Optimizer/Analysis/CMakeLists.txt   |   1 +
 flang/lib/Optimizer/Analysis/TBAAForest.cpp   |  60 +++
 .../lib/Optimizer/Transforms/AddAliasTags.cpp | 198 +++++++++
 flang/lib/Optimizer/Transforms/CMakeLists.txt |   1 +
 flang/test/Transforms/tbaa.fir                | 175 ++++++++
 flang/test/Transforms/tbaa2.fir               | 386 ++++++++++++++++++
 9 files changed, 946 insertions(+)
 create mode 100644 flang/include/flang/Optimizer/Analysis/TBAAForest.h
 create mode 100644 flang/lib/Optimizer/Analysis/TBAAForest.cpp
 create mode 100644 flang/lib/Optimizer/Transforms/AddAliasTags.cpp
 create mode 100644 flang/test/Transforms/tbaa.fir
 create mode 100644 flang/test/Transforms/tbaa2.fir

diff --git a/flang/include/flang/Optimizer/Analysis/TBAAForest.h b/flang/include/flang/Optimizer/Analysis/TBAAForest.h
new file mode 100644
index 000000000000000..3d11e6112bacce9
--- /dev/null
+++ b/flang/include/flang/Optimizer/Analysis/TBAAForest.h
@@ -0,0 +1,104 @@
+//===-- TBAAForest.h - A TBAA tree for each function -----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FORTRAN_OPTIMIZER_ANALYSIS_TBAA_FOREST_H
+#define FORTRAN_OPTIMIZER_ANALYSIS_TBAA_FOREST_H
+
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/Dialect/LLVMIR/LLVMAttrs.h"
+#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/IR/MLIRContext.h"
+#include "llvm/ADT/DenseMap.h"
+#include <string>
+
+namespace fir {
+
+//===----------------------------------------------------------------------===//
+// TBAATree
+//===----------------------------------------------------------------------===//
+/// Per-function TBAA tree. Each tree contins branches for data (of various
+/// kinds) and descriptor access
+struct TBAATree {
+  //===----------------------------------------------------------------------===//
+  // TBAAForrest::TBAATree::SubtreeState
+  //===----------------------------------------------------------------------===//
+  /// This contains a TBAA subtree based on some parent. New tags can be added
+  /// under the parent using getTag.
+  class SubtreeState {
+    friend TBAATree; // only allow construction by TBAATree
+  public:
+    SubtreeState() = delete;
+    SubtreeState(const SubtreeState &) = delete;
+    SubtreeState(SubtreeState &&) = default;
+
+    mlir::LLVM::TBAATagAttr getTag(llvm::StringRef uniqueId) const;
+
+  private:
+    SubtreeState(mlir::MLIRContext *ctx, std::string name,
+                 mlir::LLVM::TBAANodeAttr grandParent)
+        : parentId{std::move(name)}, context(ctx) {
+      parent = mlir::LLVM::TBAATypeDescriptorAttr::get(
+          context, parentId, mlir::LLVM::TBAAMemberAttr::get(grandParent, 0));
+    }
+
+    const std::string parentId;
+    mlir::MLIRContext *const context;
+    mlir::LLVM::TBAATypeDescriptorAttr parent;
+    llvm::DenseMap<llvm::StringRef, mlir::LLVM::TBAATagAttr> tagDedup;
+  };
+
+  SubtreeState globalDataTree;
+  SubtreeState allocatedDataTree;
+  SubtreeState dummyArgDataTree;
+  mlir::LLVM::TBAATypeDescriptorAttr anyAccessDesc;
+  mlir::LLVM::TBAATypeDescriptorAttr boxMemberTypeDesc;
+  mlir::LLVM::TBAATypeDescriptorAttr anyDataTypeDesc;
+
+  static TBAATree buildTree(mlir::StringAttr functionName);
+
+private:
+  TBAATree(mlir::LLVM::TBAATypeDescriptorAttr anyAccess,
+           mlir::LLVM::TBAATypeDescriptorAttr dataRoot,
+           mlir::LLVM::TBAATypeDescriptorAttr boxMemberTypeDesc);
+};
+
+//===----------------------------------------------------------------------===//
+// TBAAForrest
+//===----------------------------------------------------------------------===//
+/// Colletion of TBAATrees, usually indexed by function (so that each function
+/// has a different TBAATree)
+class TBAAForrest {
+public:
+  explicit TBAAForrest(bool separatePerFunction = true)
+      : separatePerFunction{separatePerFunction} {}
+
+  inline const TBAATree &operator[](mlir::func::FuncOp func) {
+    return getFuncTree(func.getSymNameAttr());
+  }
+  inline const TBAATree &operator[](mlir::LLVM::LLVMFuncOp func) {
+    return getFuncTree(func.getSymNameAttr());
+  }
+
+private:
+  const TBAATree &getFuncTree(mlir::StringAttr symName) {
+    if (!separatePerFunction)
+      symName = mlir::StringAttr::get(symName.getContext(), "");
+    if (!trees.contains(symName))
+      trees.insert({symName, TBAATree::buildTree(symName)});
+    return trees.at(symName);
+  }
+
+  // Should each function use a different tree?
+  const bool separatePerFunction;
+  // TBAA tree per function
+  llvm::DenseMap<mlir::StringAttr, TBAATree> trees;
+};
+
+} // namespace fir
+
+#endif // FORTRAN_OPTIMIZER_ANALYSIS_TBAA_FOREST_H
diff --git a/flang/include/flang/Optimizer/Transforms/Passes.h b/flang/include/flang/Optimizer/Transforms/Passes.h
index 64882c8ec406b0a..30d97be3800c191 100644
--- a/flang/include/flang/Optimizer/Transforms/Passes.h
+++ b/flang/include/flang/Optimizer/Transforms/Passes.h
@@ -61,6 +61,7 @@ std::unique_ptr<mlir::Pass> createMemDataFlowOptPass();
 std::unique_ptr<mlir::Pass> createPromoteToAffinePass();
 std::unique_ptr<mlir::Pass> createMemoryAllocationPass();
 std::unique_ptr<mlir::Pass> createStackArraysPass();
+std::unique_ptr<mlir::Pass> createAliasTagsPass();
 std::unique_ptr<mlir::Pass> createSimplifyIntrinsicsPass();
 std::unique_ptr<mlir::Pass> createAddDebugFoundationPass();
 std::unique_ptr<mlir::Pass> createLoopVersioningPass();
diff --git a/flang/include/flang/Optimizer/Transforms/Passes.td b/flang/include/flang/Optimizer/Transforms/Passes.td
index 80da485392007fa..7aa08b0616de99d 100644
--- a/flang/include/flang/Optimizer/Transforms/Passes.td
+++ b/flang/include/flang/Optimizer/Transforms/Passes.td
@@ -252,6 +252,26 @@ def StackArrays : Pass<"stack-arrays", "mlir::ModuleOp"> {
   let constructor = "::fir::createStackArraysPass()";
 }
 
+def AddAliasTags : Pass<"fir-alias-analysis", "mlir::ModuleOp"> {
+  let summary = "Add tbaa tags to operations that implement FirAliasAnalysisOpInterface";
+  let description = [{
+    TBAA (type based alias analysis) is one method to pass pointer alias information
+    from language frontends to LLVM. This pass uses fir::AliasAnalysis to add this
+    information to fir.load and fir.store operations.
+    Additional tags are added during codegen. See fir::TBAABuilder.
+    This needs to be a separate pass so that it happens before structured control
+    flow operations are lowered to branches and basic blocks (this makes tracing
+    the source of values much eaiser). The other TBAA tags need to be applied to
+    box loads and stores which are implicit in FIR and so cannot be annotated
+    until codegen.
+    TODO: this is currently a pass on mlir::ModuleOp to avoid parallelism. In
+    theory, each operation could be considered in prallel, so long as there
+    aren't races adding new tags to the mlir context.
+  }];
+  let dependentDialects = [ "fir::FIROpsDialect" ];
+  let constructor = "::fir::createAliasTagsPass()";
+}
+
 def SimplifyRegionLite : Pass<"simplify-region-lite", "mlir::ModuleOp"> {
   let summary = "Region simplification";
   let description = [{
diff --git a/flang/lib/Optimizer/Analysis/CMakeLists.txt b/flang/lib/Optimizer/Analysis/CMakeLists.txt
index 19dadf72cf4ce14..436d4d3f18969c1 100644
--- a/flang/lib/Optimizer/Analysis/CMakeLists.txt
+++ b/flang/lib/Optimizer/Analysis/CMakeLists.txt
@@ -1,5 +1,6 @@
 add_flang_library(FIRAnalysis
   AliasAnalysis.cpp
+  TBAAForest.cpp
 
   DEPENDS
   FIRDialect
diff --git a/flang/lib/Optimizer/Analysis/TBAAForest.cpp b/flang/lib/Optimizer/Analysis/TBAAForest.cpp
new file mode 100644
index 000000000000000..070e2be6700cc11
--- /dev/null
+++ b/flang/lib/Optimizer/Analysis/TBAAForest.cpp
@@ -0,0 +1,60 @@
+//===- TBAAForest.cpp - Per-functon TBAA Trees ----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "flang/Optimizer/Analysis/TBAAForest.h"
+#include <mlir/Dialect/LLVMIR/LLVMAttrs.h>
+
+mlir::LLVM::TBAATagAttr
+fir::TBAATree::SubtreeState::getTag(llvm::StringRef uniqueName) const {
+  // mlir::LLVM::TBAATagAttr &tag = tagDedup[uniqueName];
+  // if (tag)
+  //   return tag;
+  std::string id = (parentId + "/" + uniqueName).str();
+  mlir::LLVM::TBAATypeDescriptorAttr type =
+      mlir::LLVM::TBAATypeDescriptorAttr::get(
+          context, id, mlir::LLVM::TBAAMemberAttr::get(parent, 0));
+  return mlir::LLVM::TBAATagAttr::get(type, type, 0);
+  // return tag;
+}
+
+fir::TBAATree fir::TBAATree::buildTree(mlir::StringAttr func) {
+  llvm::StringRef funcName = func.getValue();
+  std::string rootId = ("Flang function root " + funcName).str();
+  mlir::MLIRContext *ctx = func.getContext();
+  mlir::LLVM::TBAARootAttr funcRoot =
+      mlir::LLVM::TBAARootAttr::get(ctx, mlir::StringAttr::get(ctx, rootId));
+
+  static constexpr llvm::StringRef anyAccessTypeDescId = "any access";
+  mlir::LLVM::TBAATypeDescriptorAttr anyAccess =
+      mlir::LLVM::TBAATypeDescriptorAttr::get(
+          ctx, anyAccessTypeDescId,
+          mlir::LLVM::TBAAMemberAttr::get(funcRoot, 0));
+
+  static constexpr llvm::StringRef anyDataAccessTypeDescId = "any data access";
+  mlir::LLVM::TBAATypeDescriptorAttr dataRoot =
+      mlir::LLVM::TBAATypeDescriptorAttr::get(
+          ctx, anyDataAccessTypeDescId,
+          mlir::LLVM::TBAAMemberAttr::get(anyAccess, 0));
+
+  static constexpr llvm::StringRef boxMemberTypeDescId = "descriptor member";
+  mlir::LLVM::TBAATypeDescriptorAttr boxMemberTypeDesc =
+      mlir::LLVM::TBAATypeDescriptorAttr::get(
+          ctx, boxMemberTypeDescId,
+          mlir::LLVM::TBAAMemberAttr::get(anyAccess, 0));
+
+  return TBAATree{anyAccess, dataRoot, boxMemberTypeDesc};
+}
+
+fir::TBAATree::TBAATree(mlir::LLVM::TBAATypeDescriptorAttr anyAccess,
+                        mlir::LLVM::TBAATypeDescriptorAttr dataRoot,
+                        mlir::LLVM::TBAATypeDescriptorAttr boxMemberTypeDesc)
+    : globalDataTree(dataRoot.getContext(), "global data", dataRoot),
+      allocatedDataTree(dataRoot.getContext(), "allocated data", dataRoot),
+      dummyArgDataTree(dataRoot.getContext(), "dummy arg data", dataRoot),
+      anyAccessDesc(anyAccess), boxMemberTypeDesc(boxMemberTypeDesc),
+      anyDataTypeDesc(dataRoot) {}
diff --git a/flang/lib/Optimizer/Transforms/AddAliasTags.cpp b/flang/lib/Optimizer/Transforms/AddAliasTags.cpp
new file mode 100644
index 000000000000000..1129ae87246b7f8
--- /dev/null
+++ b/flang/lib/Optimizer/Transforms/AddAliasTags.cpp
@@ -0,0 +1,198 @@
+//===- AddAliasTags.cpp ---------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+/// \file
+/// Adds TBAA alias tags to fir loads and stores, based on information from
+/// fir::AliasAnalysis. More are added later in CodeGen - see fir::TBAABuilder
+//===----------------------------------------------------------------------===//
+
+#include "flang/Optimizer/Analysis/AliasAnalysis.h"
+#include "flang/Optimizer/Analysis/TBAAForest.h"
+#include "flang/Optimizer/Dialect/FIRDialect.h"
+#include "flang/Optimizer/Dialect/FirAliasAnalysisOpInterface.h"
+#include "flang/Optimizer/Transforms/Passes.h"
+#include "mlir/Pass/Pass.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <optional>
+
+namespace fir {
+#define GEN_PASS_DEF_ADDALIASTAGS
+#include "flang/Optimizer/Transforms/Passes.h.inc"
+} // namespace fir
+
+#define DEBUG_TYPE "fir-add-alias-tags"
+
+static llvm::cl::opt<bool>
+    enableDummyArgs("dummy-arg-tbaa", llvm::cl::init(true), llvm::cl::Hidden,
+                    llvm::cl::desc("Add TBAA tags to dummy arguments"));
+// These two are **known unsafe** (misscompare in spec2017/wrf_r). They should
+// not be enabled by default.
+// The code is kept so that these may be tried with new benchmarks to see if
+// this is worth fixing in the future.
+static llvm::cl::opt<bool>
+    enableGlobals("globals-tbaa", llvm::cl::init(false), llvm::cl::Hidden,
+                  llvm::cl::desc("Add TBAA tags to dummy arguments. UNSAFE."));
+static llvm::cl::opt<bool> enableLocalAllocs(
+    "local-alloc-tbaa", llvm::cl::init(false), llvm::cl::Hidden,
+    llvm::cl::desc("Add TBAA tags to dummy arguments. UNSAFE."));
+
+namespace {
+
+/// Shared state per-module
+class PassState {
+public:
+  /// memoised call to fir::AliasAnalysis::getSource
+  inline const fir::AliasAnalysis::Source &getSource(mlir::Value value) {
+    if (!analysisCache.contains(value))
+      analysisCache.insert({value, analysis.getSource(value)});
+    return analysisCache[value];
+  }
+
+  /// get the per-function TBAATree for this function
+  inline const fir::TBAATree &getFuncTree(mlir::func::FuncOp func) {
+    return forrest[func];
+  }
+
+private:
+  fir::AliasAnalysis analysis;
+  llvm::DenseMap<mlir::Value, fir::AliasAnalysis::Source> analysisCache;
+  fir::TBAAForrest forrest;
+};
+
+class AddAliasTagsPass : public fir::impl::AddAliasTagsBase<AddAliasTagsPass> {
+public:
+  void runOnOperation() override;
+
+private:
+  /// The real workhorse of the pass. This is a runOnOperation() which
+  /// operates on fir::FirAliasAnalysisOpInterface, using some extra state
+  void runOnAliasInterface(fir::FirAliasAnalysisOpInterface op,
+                           PassState &state);
+};
+
+} // namespace
+
+/// Get the name of a function argument using the "fir.bindc_name" attribute,
+/// or ""
+static std::string getFuncArgName(mlir::Value arg) {
+  // always succeeds because arg is a function argument
+  mlir::BlockArgument blockArg = mlir::cast<mlir::BlockArgument>(arg);
+  assert(blockArg.getOwner() && blockArg.getOwner()->isEntryBlock() &&
+         "arg is a function argument");
+  mlir::FunctionOpInterface func =
+      mlir::cast<mlir::FunctionOpInterface>(blockArg.getOwner()->getParentOp());
+  mlir::StringAttr attr = func.getArgAttrOfType<mlir::StringAttr>(
+      blockArg.getArgNumber(), "fir.bindc_name");
+  if (!attr)
+    return "";
+  return attr.str();
+}
+
+void AddAliasTagsPass::runOnAliasInterface(fir::FirAliasAnalysisOpInterface op,
+                                           PassState &state) {
+  mlir::func::FuncOp func = op->getParentOfType<mlir::func::FuncOp>();
+
+  llvm::SmallVector<mlir::Value> accessedOperands = op.getAccessedOperands();
+  assert(accessedOperands.size() == 1 &&
+         "load and store only access one address");
+  mlir::Value memref = accessedOperands.front();
+
+  // skip boxes. These get an "any descriptor access" tag in TBAABuilder
+  // (CodeGen). I didn't see any speedup from giving each box a separate TBAA
+  // type.
+  if (mlir::isa<fir::BaseBoxType>(fir::unwrapRefType(memref.getType())))
+    return;
+  LLVM_DEBUG(llvm::dbgs() << "Analysing " << op << "\n");
+
+  const fir::AliasAnalysis::Source &source = state.getSource(memref);
+  if (source.isTargetOrPointer()) {
+    LLVM_DEBUG(llvm::dbgs().indent(2) << "Skipping TARGET/POINTER\n");
+    // These will get an "any data access" tag in TBAABuilder (CodeGen): causing
+    // them to "MayAlias" with all non-box accesses
+    return;
+  }
+
+  mlir::LLVM::TBAATagAttr tag;
+  // TBAA for dummy arguments
+  if (enableDummyArgs &&
+      source.kind == fir::AliasAnalysis::SourceKind::Argument) {
+    LLVM_DEBUG(llvm::dbgs().indent(2)
+               << "Found reference to dummy argument at " << *op << "\n");
+    std::string name = getFuncArgName(source.u.get<mlir::Value>());
+    if (!name.empty())
+      tag = state.getFuncTree(func).dummyArgDataTree.getTag(name);
+    else
+      LLVM_DEBUG(llvm::dbgs().indent(2)
+                 << "WARN: couldn't find a name for dummy argument " << *op
+                 << "\n");
+
+    // TBAA for global variables
+  } else if (enableGlobals &&
+             source.kind == fir::AliasAnalysis::SourceKind::Global) {
+    mlir::SymbolRefAttr glbl = source.u.get<mlir::SymbolRefAttr>();
+    const char *name = glbl.getRootReference().data();
+    LLVM_DEBUG(llvm::dbgs().indent(2) << "Found reference to global " << name
+                                      << " at " << *op << "\n");
+    tag = state.getFuncTree(func).globalDataTree.getTag(name);
+
+    // TBAA for local allocations
+  } else if (enableLocalAllocs &&
+             source.kind == fir::AliasAnalysis::SourceKind::Allocate) {
+    std::optional<llvm::StringRef> name;
+    mlir::Operation *sourceOp = source.u.get<mlir::Value>().getDefiningOp();
+    if (auto alloc = mlir::dyn_cast_or_null<fir::AllocaOp>(sourceOp))
+      name = alloc.getUniqName();
+    else if (auto alloc = mlir::dyn_cast_or_null<fir::AllocMemOp>(sourceOp))
+      name = alloc.getUniqName();
+    if (name) {
+      LLVM_DEBUG(llvm::dbgs().indent(2) << "Found reference to allocation "
+                                        << name << " at " << *op << "\n");
+      tag = state.getFuncTree(func).allocatedDataTree.getTag(*name);
+    } else {
+      LLVM_DEBUG(llvm::dbgs().indent(2)
+                 << "WARN: couldn't find a name for allocation " << *op
+                 << "\n");
+    }
+  } else {
+    if (source.kind != fir::AliasAnalysis::SourceKind::Argument &&
+        source.kind != fir::AliasAnalysis::SourceKind::Allocate &&
+        source.kind != fir::AliasAnalysis::SourceKind::Global)
+      LLVM_DEBUG(llvm::dbgs().indent(2)
+                 << "WARN: unsupported value: " << source << "\n");
+  }
+
+  if (tag)
+    op.setTBAATags(mlir::ArrayAttr::get(&getContext(), tag));
+}
+
+void AddAliasTagsPass::runOnOperation() {
+  LLVM_DEBUG(llvm::dbgs() << "=== Begin " DEBUG_TYPE " ===\n");
+
+  // MLIR forbids storing state in a pass because different instances might be
+  // used in different threads.
+  // Instead this pass stores state per mlir::ModuleOp (which is what MLIR
+  // thinks the pass operates on), then the real work of the pass is done in
+  // runOnAliasInterface
+  PassState state;
+
+  mlir::ModuleOp mod = getOperation();
+  mod.walk([&](fir::FirAliasAnalysisOpInterface op) {
+    runOnAliasInterface(op, state);
+  });
+
+  LLVM_DEBUG(llvm::dbgs() << "=== End " DEBUG_TYPE " ===\n");
+}
+
+std::unique_ptr<mlir::Pass> fir::createAliasTagsPass() {
+  return std::make_unique<AddAliasTagsPass>();
+}
diff --git a/flang/lib/Optimizer/Transforms/CMakeLists.txt b/flang/lib/Optimizer/Transforms/CMakeLists.txt
index 428c4c2a1e64408..74a093fe74719b6 100644
--- a/flang/lib/Optimizer/Transforms/CMakeLists.txt
+++ b/flang/lib/Optimizer/Transforms/CMakeLists.txt
@@ -1,5 +1,6 @@
 add_flang_library(FIRTransforms
   AbstractResult.cpp
+  AddAliasTags.cpp
   AffinePromotion.cpp
   AffineDemotion.cpp
   AnnotateConstant.cpp
diff --git a/flang/test/Transforms/tbaa.fir b/flang/test/Transforms/tbaa.fir
new file mode 100644
index 000000000000000..4519acffd1f91b1
--- /dev/null
+++ b/flang/test/Transforms/tbaa.fir
@@ -0,0 +1,175 @@
+// RUN: fir-opt --split-input-file --fir-alias-analysis %s | FileCheck %s
+
+// subroutine oneArg(a)
+//   integer :: a(:)
+//   a(1) = a(2)
+// end subroutine
+  func.func @_QPonearg(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "a"}) {
+    %c1 = arith.constant 1 : index
+    %c2 = arith.constant 2 : index
+    %0 = fir.declare %arg0 {uniq_name = "_QFoneargEa"} : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+    %1 = fir.rebox %0 : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+    %2 = fir.array_coor %1 %c2 : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+    %3 = fir.load %2 : !fir.ref<i32>
+    %4 = fir.array_coor %1 %c1 : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+    fir.store %3 to %4 : !fir.ref<i32>
+    return
+  }
+
+// CHECK: #[[ONE_ARG_ROOT:.+]] = #llvm.tbaa_root<id = "Flang function root _QPonearg">
+// CHECK: #[[ONE_ARG_ANY_ACCESS:.+]] = #llvm.tbaa_type_desc<id = "any access", members = {<#[[ONE_ARG_ROOT]], 0>}>
+// CHECK: #[[ONE_ARG_ANY_DATA:.+]] = #llvm.tbaa_type_desc<id = "any data access", members = {<#[[ONE_ARG_ANY_ACCESS]], 0>}>
+// CHECK: #[[ONE_ARG_ANY_ARG:.+]] = #llvm.tbaa_type_desc<id = "dummy arg data", members = {<#[[ONE_ARG_ANY_DATA]], 0>}>
+// CHECK: #[[ONE_ARG_A:.+]] = #llvm.tbaa_type_desc<id = "dummy arg data/a", members = {<#[[ONE_ARG_ANY_ARG]], 0>}>
+// CHECK: #[[ONE_ARG_A_TAG:.+]] = #llvm.tbaa_tag<base_type = #[[ONE_ARG_A]], access_type = #[[ONE_ARG_A]], offset = 0>
+
+// CHECK-LABEL:   func.func @_QPonearg(
+// CHECK-SAME:                         %[[VAL_0:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "a"}) {
+// CHECK:           %[[VAL_1:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_2:.*]] = arith.constant 2 : index
+// CHECK:           %[[VAL_3:.*]] = fir.declare %[[VAL_0]] {uniq_name = "_QFoneargEa"} : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+// CHECK:           %[[VAL_4:.*]] = fir.rebox %[[VAL_3]] : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+// CHECK:           %[[VAL_5:.*]] = fir.array_coor %[[VAL_4]] %[[VAL_2]] : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK:           %[[VAL_6:.*]] = fir.load %[[VAL_5]] {tbaa = [#[[ONE_ARG_A_TAG]]]} : !fir.ref<i32>
+// CHECK:           %[[VAL_7:.*]] = fir.array_coor %[[VAL_4]] %[[VAL_1]] : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK:           fir.store %[[VAL_6]] to %[[VAL_7]] {tbaa = [#[[ONE_ARG_A_TAG]]]} : !fir.ref<i32>
+// CHECK:           return
+// CHECK:         }
+
+// -----
+
+// subroutine twoArg(a, b)
+//   integer :: a(:), b(:)
+//   a(1) = b(1)
+// end subroutine
+  func.func @_QPtwoarg(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "a"}, %arg1: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "b"}) {
+    %c1 = arith.constant 1 : index
+    %0 = fir.declare %arg0 {uniq_name = "_QFtwoargEa"} : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+    %1 = fir.rebox %0 : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+    %2 = fir.declare %arg1 {uniq_name = "_QFtwoargEb"} : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+    %3 = fir.rebox %2 : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+    %4 = fir.array_coor %3 %c1 : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+    %5 = fir.load %4 : !fir.ref<i32>
+    %6 = fir.array_coor %1 %c1 : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+    fir.store %5 to %6 : !fir.ref<i32>
+    return
+  }
+
+// CHECK: #[[TWO_ARG_ROOT:.+]] = #llvm.tbaa_root<id = "Flang function root _QPtwoarg">
+// CHECK: #[[TWO_ARG_ANY_ACCESS:.+]] = #llvm.tbaa_type_desc<id = "any access", members = {<#[[TWO_ARG_ROOT]], 0>}>
+// CHECK: #[[TWO_ARG_ANY_DATA:.+]] = #llvm.tbaa_type_desc<id = "any data access", members = {<#[[TWO_ARG_ANY_ACCESS]], 0>}>
+// CHECK: #[[TWO_ARG_ANY_ARG:.+]] = #llvm.tbaa_type_desc<id = "dummy arg data", members = {<#[[TWO_ARG_ANY_DATA]], 0>}>
+// CHECK: #[[TWO_ARG_B:.+]] = #llvm.tbaa_type_desc<id = "dummy arg data/b", members = {<#[[TWO_ARG_ANY_ARG]], 0>}>
+// CHECK: #[[TWO_ARG_A:.+]] = #llvm.tbaa_type_desc<id = "dummy arg data/a", members = {<#[[TWO_ARG_ANY_ARG]], 0>}>
+// CHECK: #[[TWO_ARG_B_TAG:.+]] = #llvm.tbaa_tag<base_type = #[[TWO_ARG_B]], access_type = #[[TWO_ARG_B]], offset = 0>
+// CHECK: #[[TWO_ARG_A_TAG:.+]] = #llvm.tbaa_tag<base_type = #[[TWO_ARG_A]], access_type = #[[TWO_ARG_A]], offset = 0>
+
+// CHECK-LABEL:   func.func @_QPtwoarg(
+// CHECK-SAME:                         %[[VAL_0:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "a"},
+// CHECK-SAME:                         %[[VAL_1:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "b"}) {
+// CHECK:           %[[VAL_2:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_3:.*]] = fir.declare %[[VAL_0]] {uniq_name = "_QFtwoargEa"} : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+// CHECK:           %[[VAL_4:.*]] = fir.rebox %[[VAL_3]] : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+// CHECK:           %[[VAL_5:.*]] = fir.declare %[[VAL_1]] {uniq_name = "_QFtwoargEb"} : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+// CHECK:           %[[VAL_6:.*]] = fir.rebox %[[VAL_5]] : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+// CHECK:           %[[VAL_7:.*]] = fir.array_coor %[[VAL_6]] %[[VAL_2]] : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK:           %[[VAL_8:.*]] = fir.load %[[VAL_7]] {tbaa = [#[[TWO_ARG_B_TAG]]]} : !fir.ref<i32>
+// CHECK:           %[[VAL_9:.*]] = fir.array_coor %[[VAL_4]] %[[VAL_2]] : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK:           fir.store %[[VAL_8]] to %[[VAL_9]] {tbaa = [#[[TWO_ARG_A_TAG]]]} : !fir.ref<i32>
+// CHECK:           return
+// CHECK:         }
+
+// -----
+
+// subroutine targetArg(a, b)
+//   integer, target :: a(:)
+//   integer :: b(:)
+//   a(1) = b(1)
+// end subroutine
+  func.func @_QPtargetarg(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "a", fir.target}, %arg1: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "b"}) {
+    %c1 = arith.constant 1 : index
+    %0 = fir.declare %arg0 {fortran_attrs = #fir.var_attrs<target>, uniq_name = "_QFtargetargEa"} : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+    %1 = fir.rebox %0 : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+    %2 = fir.declare %arg1 {uniq_name = "_QFtargetargEb"} : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+    %3 = fir.rebox %2 : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+    %4 = fir.array_coor %3 %c1 : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+    %5 = fir.load %4 : !fir.ref<i32>
+    %6 = fir.array_coor %1 %c1 : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+    fir.store %5 to %6 : !fir.ref<i32>
+    return
+  }
+
+// CHECK: #[[TARGET_ROOT:.+]] = #llvm.tbaa_root<id = "Flang function root _QPtargetarg">
+// CHECK: #[[TARGET_ANY_ACCESS:.+]] = #llvm.tbaa_type_desc<id = "any access", members = {<#[[TARGET_ROOT]], 0>}>
+// CHECK: #[[TARGET_ANY_DATA:.+]] = #llvm.tbaa_type_desc<id = "any data access", members = {<#[[TARGET_ANY_ACCESS]], 0>}>
+// CHECK: #[[TARGET_ANY_ARG:.+]] = #llvm.tbaa_type_desc<id = "dummy arg data", members = {<#[[TARGET_ANY_DATA]], 0>}>
+// CHECK: #[[TARGET_B:.+]] = #llvm.tbaa_type_desc<id = "dummy arg data/b", members = {<#[[TARGET_ANY_ARG]], 0>}>
+// CHECK: #[[TARGET_B_TAG:.+]] = #llvm.tbaa_tag<base_type = #[[TARGET_B]], access_type = #[[TARGET_B]], offset = 0>
+// No entry for "dummy arg data/a" because that pointer should get "any data access" becase it has the TARGET attribute
+
+// CHECK-LABEL:   func.func @_QPtargetarg(
+// CHECK-SAME:                            %[[VAL_0:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "a", fir.target},
+// CHECK-SAME:                            %[[VAL_1:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "b"}) {
+// CHECK:           %[[VAL_2:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_3:.*]] = fir.declare %[[VAL_0]] {fortran_attrs = #{{.*}}<target>, uniq_name = "_QFtargetargEa"} : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+// CHECK:           %[[VAL_4:.*]] = fir.rebox %[[VAL_3]] : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+// CHECK:           %[[VAL_5:.*]] = fir.declare %[[VAL_1]] {uniq_name = "_QFtargetargEb"} : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+// CHECK:           %[[VAL_6:.*]] = fir.rebox %[[VAL_5]] : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+// CHECK:           %[[VAL_7:.*]] = fir.array_coor %[[VAL_6]] %[[VAL_2]] : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK:           %[[VAL_8:.*]] = fir.load %[[VAL_7]] {tbaa = [#[[TARGET_B_TAG]]]} : !fir.ref<i32>
+// CHECK:           %[[VAL_9:.*]] = fir.array_coor %[[VAL_4]] %[[VAL_2]] : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// "any data access" tag is added by TBAABuilder during CodeGen
+// CHECK:           fir.store %[[VAL_8]] to %[[VAL_9]] : !fir.ref<i32>
+// CHECK:           return
+// CHECK:         }
+
+// -----
+
+// subroutine pointerArg(a, b)
+//   integer, pointer :: a(:)
+//   integer :: b(:)
+//   a(1) = b(1)
+// end subroutine
+  func.func @_QPpointerarg(%arg0: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>> {fir.bindc_name = "a"}, %arg1: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "b"}) {
+    %c0 = arith.constant 0 : index
+    %c1 = arith.constant 1 : index
+    %0 = fir.declare %arg0 {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFpointerargEa"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>
+    %1 = fir.declare %arg1 {uniq_name = "_QFpointerargEb"} : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+    %2 = fir.rebox %1 : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+    %3 = fir.array_coor %2 %c1 : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+    %4 = fir.load %3 : !fir.ref<i32>
+    %5 = fir.load %0 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>
+    %6:3 = fir.box_dims %5, %c0 : (!fir.box<!fir.ptr<!fir.array<?xi32>>>, index) -> (index, index, index)
+    %7 = fir.shift %6#0 : (index) -> !fir.shift<1>
+    %8 = fir.array_coor %5(%7) %c1 : (!fir.box<!fir.ptr<!fir.array<?xi32>>>, !fir.shift<1>, index) -> !fir.ref<i32>
+    fir.store %4 to %8 : !fir.ref<i32>
+    return
+  }
+
+// CHECK: #[[POINTER_ROOT:.+]] = #llvm.tbaa_root<id = "Flang function root _QPpointerarg">
+// CHECK: #[[POINTER_ANY_ACCESS:.+]] = #llvm.tbaa_type_desc<id = "any access", members = {<#[[POINTER_ROOT]], 0>}>
+// CHECK: #[[POINTER_ANY_DATA:.+]] = #llvm.tbaa_type_desc<id = "any data access", members = {<#[[POINTER_ANY_ACCESS]], 0>}>
+// CHECK: #[[POINTER_ANY_ARG:.+]] = #llvm.tbaa_type_desc<id = "dummy arg data", members = {<#[[POINTER_ANY_DATA]], 0>}>
+// CHECK: #[[POINTER_B:.+]] = #llvm.tbaa_type_desc<id = "dummy arg data/b", members = {<#[[POINTER_ANY_ARG]], 0>}>
+// CHECK: #[[POINTER_B_TAG:.+]] = #llvm.tbaa_tag<base_type = #[[POINTER_B]], access_type = #[[POINTER_B]], offset = 0>
+// No entry for "dummy arg data/a" because that pointer should get "any data access" becase it has the POINTER attribute
+
+// CHECK-LABEL:   func.func @_QPpointerarg(
+// CHECK-SAME:                             %[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>> {fir.bindc_name = "a"},
+// CHECK-SAME:                             %[[VAL_1:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "b"}) {
+// CHECK:           %[[VAL_2:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_3:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_4:.*]] = fir.declare %[[VAL_0]] {fortran_attrs = #{{.*}}<pointer>, uniq_name = "_QFpointerargEa"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>
+// CHECK:           %[[VAL_5:.*]] = fir.declare %[[VAL_1]] {uniq_name = "_QFpointerargEb"} : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+// CHECK:           %[[VAL_6:.*]] = fir.rebox %[[VAL_5]] : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+// CHECK:           %[[VAL_7:.*]] = fir.array_coor %[[VAL_6]] %[[VAL_3]] : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK:           %[[VAL_8:.*]] = fir.load %[[VAL_7]] {tbaa = [#[[POINTER_B_TAG]]]} : !fir.ref<i32>
+// "any descriptor access" tag is added by TBAABuilder during CodeGen
+// CHECK:           %[[VAL_9:.*]] = fir.load %[[VAL_4]] : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>
+// CHECK:           %[[VAL_10:.*]]:3 = fir.box_dims %[[VAL_9]], %[[VAL_2]] : (!fir.box<!fir.ptr<!fir.array<?xi32>>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_11:.*]] = fir.shift %[[VAL_10]]#0 : (index) -> !fir.shift<1>
+// CHECK:           %[[VAL_12:.*]] = fir.array_coor %[[VAL_9]](%[[VAL_11]]) %[[VAL_3]] : (!fir.box<!fir.ptr<!fir.array<?xi32>>>, !fir.shift<1>, index) -> !fir.ref<i32>
+// "any data access" tag is added by TBAABuilder during CodeGen
+// CHECK:           fir.store %[[VAL_8]] to %[[VAL_12]] : !fir.ref<i32>
+// CHECK:           return
+// CHECK:         }
\ No newline at end of file
diff --git a/flang/test/Transforms/tbaa2.fir b/flang/test/Transforms/tbaa2.fir
new file mode 100644
index 000000000000000..5aba03f292fef74
--- /dev/null
+++ b/flang/test/Transforms/tbaa2.fir
@@ -0,0 +1,386 @@
+// Test fir alias analysis pass on a larger real life code example (from the RFC)
+// RUN: fir-opt --fir-alias-analysis %s | FileCheck %s
+
+  fir.global @_QMmodEa : !fir.box<!fir.heap<!fir.array<?xf32>>> {
+    %c0 = arith.constant 0 : index
+    %0 = fir.zero_bits !fir.heap<!fir.array<?xf32>>
+    %1 = fir.shape %c0 : (index) -> !fir.shape<1>
+    %2 = fir.embox %0(%1) : (!fir.heap<!fir.array<?xf32>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<?xf32>>>
+    fir.has_value %2 : !fir.box<!fir.heap<!fir.array<?xf32>>>
+  }
+  fir.global @_QMmodEb : !fir.box<!fir.heap<!fir.array<?xf32>>> {
+    %c0 = arith.constant 0 : index
+    %0 = fir.zero_bits !fir.heap<!fir.array<?xf32>>
+    %1 = fir.shape %c0 : (index) -> !fir.shape<1>
+    %2 = fir.embox %0(%1) : (!fir.heap<!fir.array<?xf32>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<?xf32>>>
+    fir.has_value %2 : !fir.box<!fir.heap<!fir.array<?xf32>>>
+  }
+  fir.global @_QMmodEdxinv : f32 {
+    %0 = fir.zero_bits f32
+    fir.has_value %0 : f32
+  }
+  fir.global @_QMmodEdyinv : f32 {
+    %0 = fir.zero_bits f32
+    fir.has_value %0 : f32
+  }
+  fir.global @_QMmodExstart : i32 {
+    %0 = fir.zero_bits i32
+    fir.has_value %0 : i32
+  }
+  fir.global @_QMmodEystart : i32 {
+    %0 = fir.zero_bits i32
+    fir.has_value %0 : i32
+  }
+  fir.global @_QMmodEystop : i32 {
+    %0 = fir.zero_bits i32
+    fir.has_value %0 : i32
+  }
+  fir.global @_QMmodEzstart : i32 {
+    %0 = fir.zero_bits i32
+    fir.has_value %0 : i32
+  }
+  fir.global @_QMmodEzstop : i32 {
+    %0 = fir.zero_bits i32
+    fir.has_value %0 : i32
+  }
+
+// CHECK: #[[ROOT:.+]] = #llvm.tbaa_root<id = "Flang function root _QMmodPcallee">
+// CHECK: #[[ANY_ACCESS:.+]] = #llvm.tbaa_type_desc<id = "any access", members = {<#[[ROOT]], 0>}>
+// CHECK: #[[ANY_DATA:.+]] = #llvm.tbaa_type_desc<id = "any data access", members = {<#[[ANY_ACCESS]], 0>}>
+// CHECK: #[[ANY_ARG:.+]] = #llvm.tbaa_type_desc<id = "dummy arg data", members = {<#[[ANY_DATA]], 0>}>
+// CHECK: #[[ARG_LOW:.+]] = #llvm.tbaa_type_desc<id = "dummy arg data/low", members = {<#[[ANY_ARG]], 0>}>
+// CHECK: #[[ARG_Z:.+]] = #llvm.tbaa_type_desc<id = "dummy arg data/z", members = {<#[[ANY_ARG]], 0>}>
+// CHECK: #[[ARG_Y:.+]] = #llvm.tbaa_type_desc<id = "dummy arg data/y", members = {<#[[ANY_ARG]], 0>}>
+// CHECK: #[[ARG_LOW_TAG:.+]] = #llvm.tbaa_tag<base_type = #[[ARG_LOW]], access_type = #[[ARG_LOW]], offset = 0>
+// CHECK: #[[ARG_Z_TAG:.+]] = #llvm.tbaa_tag<base_type = #[[ARG_Z]], access_type = #[[ARG_Z]], offset = 0>
+// CHECK: #[[ARG_Y_TAG:.+]] = #llvm.tbaa_tag<base_type = #[[ARG_Y]], access_type = #[[ARG_Y]], offset = 0>
+
+  func.func @_QMmodPcallee(%arg0: !fir.box<!fir.array<?x?x?xf32>> {fir.bindc_name = "z"}, %arg1: !fir.box<!fir.array<?x?x?xf32>> {fir.bindc_name = "y"}, %arg2: !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>> {fir.bindc_name = "low"}) {
+    %c2 = arith.constant 2 : index
+    %c0 = arith.constant 0 : index
+    %c1 = arith.constant 1 : index
+    %c1_i32 = arith.constant 1 : i32
+    %0 = fir.address_of(@_QMmodEa) : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+    %1 = fir.declare %0 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMmodEa"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+    %2 = fir.address_of(@_QMmodEb) : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+    %3 = fir.declare %2 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMmodEb"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+    %4 = fir.address_of(@_QMmodEdxinv) : !fir.ref<f32>
+    %5 = fir.declare %4 {uniq_name = "_QMmodEdxinv"} : (!fir.ref<f32>) -> !fir.ref<f32>
+    %6 = fir.address_of(@_QMmodEdyinv) : !fir.ref<f32>
+    %7 = fir.declare %6 {uniq_name = "_QMmodEdyinv"} : (!fir.ref<f32>) -> !fir.ref<f32>
+    %8 = fir.address_of(@_QMmodExstart) : !fir.ref<i32>
+    %9 = fir.declare %8 {uniq_name = "_QMmodExstart"} : (!fir.ref<i32>) -> !fir.ref<i32>
+    %10 = fir.address_of(@_QMmodEystart) : !fir.ref<i32>
+    %11 = fir.declare %10 {uniq_name = "_QMmodEystart"} : (!fir.ref<i32>) -> !fir.ref<i32>
+    %12 = fir.address_of(@_QMmodEystop) : !fir.ref<i32>
+    %13 = fir.declare %12 {uniq_name = "_QMmodEystop"} : (!fir.ref<i32>) -> !fir.ref<i32>
+    %14 = fir.address_of(@_QMmodEzstart) : !fir.ref<i32>
+    %15 = fir.declare %14 {uniq_name = "_QMmodEzstart"} : (!fir.ref<i32>) -> !fir.ref<i32>
+    %16 = fir.address_of(@_QMmodEzstop) : !fir.ref<i32>
+    %17 = fir.declare %16 {uniq_name = "_QMmodEzstop"} : (!fir.ref<i32>) -> !fir.ref<i32>
+    %18 = fir.alloca f32 {bindc_name = "dxold", uniq_name = "_QMmodFcalleeEdxold"}
+    %19 = fir.declare %18 {uniq_name = "_QMmodFcalleeEdxold"} : (!fir.ref<f32>) -> !fir.ref<f32>
+    %20 = fir.alloca f32 {bindc_name = "dzinv", uniq_name = "_QMmodFcalleeEdzinv"}
+    %21 = fir.declare %20 {uniq_name = "_QMmodFcalleeEdzinv"} : (!fir.ref<f32>) -> !fir.ref<f32>
+    %22 = fir.alloca i32 {bindc_name = "i", uniq_name = "_QMmodFcalleeEi"}
+    %23 = fir.declare %22 {uniq_name = "_QMmodFcalleeEi"} : (!fir.ref<i32>) -> !fir.ref<i32>
+    %24 = fir.alloca i32 {bindc_name = "j", uniq_name = "_QMmodFcalleeEj"}
+    %25 = fir.declare %24 {uniq_name = "_QMmodFcalleeEj"} : (!fir.ref<i32>) -> !fir.ref<i32>
+    %26 = fir.alloca i32 {bindc_name = "k", uniq_name = "_QMmodFcalleeEk"}
+    %27 = fir.declare %26 {uniq_name = "_QMmodFcalleeEk"} : (!fir.ref<i32>) -> !fir.ref<i32>
+    %28 = fir.declare %arg2 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMmodFcalleeElow"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>>
+    %29 = fir.declare %arg1 {fortran_attrs = #fir.var_attrs<intent_in>, uniq_name = "_QMmodFcalleeEy"} : (!fir.box<!fir.array<?x?x?xf32>>) -> !fir.box<!fir.array<?x?x?xf32>>
+    %30 = fir.rebox %29 : (!fir.box<!fir.array<?x?x?xf32>>) -> !fir.box<!fir.array<?x?x?xf32>>
+    %31 = fir.declare %arg0 {fortran_attrs = #fir.var_attrs<intent_in>, uniq_name = "_QMmodFcalleeEz"} : (!fir.box<!fir.array<?x?x?xf32>>) -> !fir.box<!fir.array<?x?x?xf32>>
+    %32 = fir.rebox %31 : (!fir.box<!fir.array<?x?x?xf32>>) -> !fir.box<!fir.array<?x?x?xf32>>
+    %33 = fir.load %15 : !fir.ref<i32>
+    %34 = arith.addi %33, %c1_i32 : i32
+    %35 = fir.convert %34 : (i32) -> index
+    %36 = fir.load %17 : !fir.ref<i32>
+    %37 = fir.convert %36 : (i32) -> index
+    %38 = fir.convert %35 : (index) -> i32
+    %39:2 = fir.do_loop %arg3 = %35 to %37 step %c1 iter_args(%arg4 = %38) -> (index, i32) {
+      fir.store %arg4 to %27 : !fir.ref<i32>
+      %40 = fir.load %11 : !fir.ref<i32>
+      %41 = arith.addi %40, %c1_i32 : i32
+      %42 = fir.convert %41 : (i32) -> index
+      %43 = fir.load %13 : !fir.ref<i32>
+      %44 = fir.convert %43 : (i32) -> index
+      %45 = fir.convert %42 : (index) -> i32
+      %46:2 = fir.do_loop %arg5 = %42 to %44 step %c1 iter_args(%arg6 = %45) -> (index, i32) {
+        fir.store %arg6 to %25 : !fir.ref<i32>
+        %51 = fir.load %9 : !fir.ref<i32>
+        %52 = arith.addi %51, %c1_i32 : i32
+        %53 = fir.convert %52 : (i32) -> index
+        %54 = fir.convert %53 : (index) -> i32
+        %55:2 = fir.do_loop %arg7 = %53 to %c0 step %c1 iter_args(%arg8 = %54) -> (index, i32) {
+          fir.store %arg8 to %23 : !fir.ref<i32>
+          %60 = fir.load %28 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>>
+          %61 = fir.load %23 : !fir.ref<i32>
+          %62 = fir.convert %61 : (i32) -> i64
+          %63 = fir.load %25 : !fir.ref<i32>
+          %64 = fir.convert %63 : (i32) -> i64
+          %65 = fir.load %27 : !fir.ref<i32>
+          %66 = fir.convert %65 : (i32) -> i64
+          %67 = fir.box_addr %60 : (!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>) -> !fir.heap<!fir.array<?x?x?xf32>>
+          %68:3 = fir.box_dims %60, %c0 : (!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>, index) -> (index, index, index)
+          %69:3 = fir.box_dims %60, %c1 : (!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>, index) -> (index, index, index)
+          %70:3 = fir.box_dims %60, %c2 : (!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>, index) -> (index, index, index)
+          %71 = fir.shape_shift %68#0, %68#1, %69#0, %69#1, %70#0, %70#1 : (index, index, index, index, index, index) -> !fir.shapeshift<3>
+          %72 = fir.array_coor %67(%71) %62, %64, %66 : (!fir.heap<!fir.array<?x?x?xf32>>, !fir.shapeshift<3>, i64, i64, i64) -> !fir.ref<f32>
+          %73 = fir.load %72 : !fir.ref<f32>
+          fir.store %73 to %19 : !fir.ref<f32>
+          %74 = fir.load %1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+          %75 = fir.load %25 : !fir.ref<i32>
+          %76 = fir.convert %75 : (i32) -> i64
+          %77 = fir.box_addr %74 : (!fir.box<!fir.heap<!fir.array<?xf32>>>) -> !fir.heap<!fir.array<?xf32>>
+          %78:3 = fir.box_dims %74, %c0 : (!fir.box<!fir.heap<!fir.array<?xf32>>>, index) -> (index, index, index)
+          %79 = fir.shape_shift %78#0, %78#1 : (index, index) -> !fir.shapeshift<1>
+          %80 = fir.array_coor %77(%79) %76 : (!fir.heap<!fir.array<?xf32>>, !fir.shapeshift<1>, i64) -> !fir.ref<f32>
+          %81 = fir.load %80 : !fir.ref<f32>
+          %82 = fir.load %28 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>>
+          %83 = fir.load %23 : !fir.ref<i32>
+          %84 = fir.convert %83 : (i32) -> i64
+          %85 = fir.load %27 : !fir.ref<i32>
+          %86 = fir.convert %85 : (i32) -> i64
+          %87 = fir.box_addr %82 : (!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>) -> !fir.heap<!fir.array<?x?x?xf32>>
+          %88:3 = fir.box_dims %82, %c0 : (!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>, index) -> (index, index, index)
+          %89:3 = fir.box_dims %82, %c1 : (!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>, index) -> (index, index, index)
+          %90:3 = fir.box_dims %82, %c2 : (!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>, index) -> (index, index, index)
+          %91 = fir.shape_shift %88#0, %88#1, %89#0, %89#1, %90#0, %90#1 : (index, index, index, index, index, index) -> !fir.shapeshift<3>
+          %92 = fir.array_coor %87(%91) %84, %76, %86 : (!fir.heap<!fir.array<?x?x?xf32>>, !fir.shapeshift<3>, i64, i64, i64) -> !fir.ref<f32>
+          %93 = fir.load %92 : !fir.ref<f32>
+          %94 = arith.mulf %81, %93 fastmath<contract> : f32
+          %95 = fir.load %3 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+          %96 = fir.box_addr %95 : (!fir.box<!fir.heap<!fir.array<?xf32>>>) -> !fir.heap<!fir.array<?xf32>>
+          %97:3 = fir.box_dims %95, %c0 : (!fir.box<!fir.heap<!fir.array<?xf32>>>, index) -> (index, index, index)
+          %98 = fir.shape_shift %97#0, %97#1 : (index, index) -> !fir.shapeshift<1>
+          %99 = fir.array_coor %96(%98) %76 : (!fir.heap<!fir.array<?xf32>>, !fir.shapeshift<1>, i64) -> !fir.ref<f32>
+          %100 = fir.load %99 : !fir.ref<f32>
+          %101 = fir.array_coor %32 %84, %76, %86 : (!fir.box<!fir.array<?x?x?xf32>>, i64, i64, i64) -> !fir.ref<f32>
+          %102 = fir.load %101 : !fir.ref<f32>
+          %103 = arith.subi %75, %c1_i32 : i32
+          %104 = fir.convert %103 : (i32) -> i64
+          %105 = fir.array_coor %32 %84, %104, %86 : (!fir.box<!fir.array<?x?x?xf32>>, i64, i64, i64) -> !fir.ref<f32>
+          %106 = fir.load %105 : !fir.ref<f32>
+          %107 = arith.subf %102, %106 fastmath<contract> : f32
+          %108 = fir.no_reassoc %107 : f32
+          %109 = fir.load %7 : !fir.ref<f32>
+          %110 = arith.mulf %108, %109 fastmath<contract> : f32
+          %111 = arith.subi %85, %c1_i32 : i32
+          %112 = fir.convert %111 : (i32) -> i64
+          %113 = fir.array_coor %30 %84, %76, %112 : (!fir.box<!fir.array<?x?x?xf32>>, i64, i64, i64) -> !fir.ref<f32>
+          %114 = fir.load %113 : !fir.ref<f32>
+          %115 = fir.array_coor %30 %84, %76, %86 : (!fir.box<!fir.array<?x?x?xf32>>, i64, i64, i64) -> !fir.ref<f32>
+          %116 = fir.load %115 : !fir.ref<f32>
+          %117 = arith.subf %114, %116 fastmath<contract> : f32
+          %118 = fir.no_reassoc %117 : f32
+          %119 = fir.load %21 : !fir.ref<f32>
+          %120 = arith.mulf %118, %119 fastmath<contract> : f32
+          %121 = arith.addf %110, %120 fastmath<contract> : f32
+          %122 = fir.no_reassoc %121 : f32
+          %123 = arith.mulf %100, %122 fastmath<contract> : f32
+          %124 = arith.addf %94, %123 fastmath<contract> : f32
+          fir.store %124 to %92 : !fir.ref<f32>
+          %125 = arith.addi %arg7, %c1 : index
+          %126 = fir.convert %c1 : (index) -> i32
+          %127 = fir.load %23 : !fir.ref<i32>
+          %128 = arith.addi %127, %126 : i32
+          fir.result %125, %128 : index, i32
+        }
+        fir.store %55#1 to %23 : !fir.ref<i32>
+        %56 = arith.addi %arg5, %c1 : index
+        %57 = fir.convert %c1 : (index) -> i32
+        %58 = fir.load %25 : !fir.ref<i32>
+        %59 = arith.addi %58, %57 : i32
+        fir.result %56, %59 : index, i32
+      }
+      fir.store %46#1 to %25 : !fir.ref<i32>
+      %47 = arith.addi %arg3, %c1 : index
+      %48 = fir.convert %c1 : (index) -> i32
+      %49 = fir.load %27 : !fir.ref<i32>
+      %50 = arith.addi %49, %48 : i32
+      fir.result %47, %50 : index, i32
+    }
+    fir.store %39#1 to %27 : !fir.ref<i32>
+    return
+  }
+// CHECK-LABEL:   func.func @_QMmodPcallee(
+// CHECK-SAME:                             %[[VAL_0:.*]]: !fir.box<!fir.array<?x?x?xf32>> {fir.bindc_name = "z"},
+// CHECK-SAME:                             %[[VAL_1:.*]]: !fir.box<!fir.array<?x?x?xf32>> {fir.bindc_name = "y"},
+// CHECK-SAME:                             %[[VAL_2:.*]]: !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>> {fir.bindc_name = "low"}) {
+// CHECK:           %[[VAL_3:.*]] = arith.constant 2 : index
+// CHECK:           %[[VAL_4:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_5:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_6:.*]] = arith.constant 1 : i32
+// CHECK:           %[[VAL_7:.*]] = fir.address_of(@_QMmodEa) : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+// CHECK:           %[[VAL_8:.*]] = fir.declare %[[VAL_7]] {fortran_attrs = #{{.*}}<allocatable>, uniq_name = "_QMmodEa"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+// CHECK:           %[[VAL_9:.*]] = fir.address_of(@_QMmodEb) : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+// CHECK:           %[[VAL_10:.*]] = fir.declare %[[VAL_9]] {fortran_attrs = #{{.*}}<allocatable>, uniq_name = "_QMmodEb"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+// CHECK:           %[[VAL_11:.*]] = fir.address_of(@_QMmodEdxinv) : !fir.ref<f32>
+// CHECK:           %[[VAL_12:.*]] = fir.declare %[[VAL_11]] {uniq_name = "_QMmodEdxinv"} : (!fir.ref<f32>) -> !fir.ref<f32>
+// CHECK:           %[[VAL_13:.*]] = fir.address_of(@_QMmodEdyinv) : !fir.ref<f32>
+// CHECK:           %[[VAL_14:.*]] = fir.declare %[[VAL_13]] {uniq_name = "_QMmodEdyinv"} : (!fir.ref<f32>) -> !fir.ref<f32>
+// CHECK:           %[[VAL_15:.*]] = fir.address_of(@_QMmodExstart) : !fir.ref<i32>
+// CHECK:           %[[VAL_16:.*]] = fir.declare %[[VAL_15]] {uniq_name = "_QMmodExstart"} : (!fir.ref<i32>) -> !fir.ref<i32>
+// CHECK:           %[[VAL_17:.*]] = fir.address_of(@_QMmodEystart) : !fir.ref<i32>
+// CHECK:           %[[VAL_18:.*]] = fir.declare %[[VAL_17]] {uniq_name = "_QMmodEystart"} : (!fir.ref<i32>) -> !fir.ref<i32>
+// CHECK:           %[[VAL_19:.*]] = fir.address_of(@_QMmodEystop) : !fir.ref<i32>
+// CHECK:           %[[VAL_20:.*]] = fir.declare %[[VAL_19]] {uniq_name = "_QMmodEystop"} : (!fir.ref<i32>) -> !fir.ref<i32>
+// CHECK:           %[[VAL_21:.*]] = fir.address_of(@_QMmodEzstart) : !fir.ref<i32>
+// CHECK:           %[[VAL_22:.*]] = fir.declare %[[VAL_21]] {uniq_name = "_QMmodEzstart"} : (!fir.ref<i32>) -> !fir.ref<i32>
+// CHECK:           %[[VAL_23:.*]] = fir.address_of(@_QMmodEzstop) : !fir.ref<i32>
+// CHECK:           %[[VAL_24:.*]] = fir.declare %[[VAL_23]] {uniq_name = "_QMmodEzstop"} : (!fir.ref<i32>) -> !fir.ref<i32>
+// CHECK:           %[[VAL_25:.*]] = fir.alloca f32 {bindc_name = "dxold", uniq_name = "_QMmodFcalleeEdxold"}
+// CHECK:           %[[VAL_26:.*]] = fir.declare %[[VAL_25]] {uniq_name = "_QMmodFcalleeEdxold"} : (!fir.ref<f32>) -> !fir.ref<f32>
+// CHECK:           %[[VAL_27:.*]] = fir.alloca f32 {bindc_name = "dzinv", uniq_name = "_QMmodFcalleeEdzinv"}
+// CHECK:           %[[VAL_28:.*]] = fir.declare %[[VAL_27]] {uniq_name = "_QMmodFcalleeEdzinv"} : (!fir.ref<f32>) -> !fir.ref<f32>
+// CHECK:           %[[VAL_29:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QMmodFcalleeEi"}
+// CHECK:           %[[VAL_30:.*]] = fir.declare %[[VAL_29]] {uniq_name = "_QMmodFcalleeEi"} : (!fir.ref<i32>) -> !fir.ref<i32>
+// CHECK:           %[[VAL_31:.*]] = fir.alloca i32 {bindc_name = "j", uniq_name = "_QMmodFcalleeEj"}
+// CHECK:           %[[VAL_32:.*]] = fir.declare %[[VAL_31]] {uniq_name = "_QMmodFcalleeEj"} : (!fir.ref<i32>) -> !fir.ref<i32>
+// CHECK:           %[[VAL_33:.*]] = fir.alloca i32 {bindc_name = "k", uniq_name = "_QMmodFcalleeEk"}
+// CHECK:           %[[VAL_34:.*]] = fir.declare %[[VAL_33]] {uniq_name = "_QMmodFcalleeEk"} : (!fir.ref<i32>) -> !fir.ref<i32>
+// CHECK:           %[[VAL_35:.*]] = fir.declare %[[VAL_2]] {fortran_attrs = #{{.*}}<allocatable>, uniq_name = "_QMmodFcalleeElow"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>>
+// CHECK:           %[[VAL_36:.*]] = fir.declare %[[VAL_1]] {fortran_attrs = #{{.*}}<intent_in>, uniq_name = "_QMmodFcalleeEy"} : (!fir.box<!fir.array<?x?x?xf32>>) -> !fir.box<!fir.array<?x?x?xf32>>
+// CHECK:           %[[VAL_37:.*]] = fir.rebox %[[VAL_36]] : (!fir.box<!fir.array<?x?x?xf32>>) -> !fir.box<!fir.array<?x?x?xf32>>
+// CHECK:           %[[VAL_38:.*]] = fir.declare %[[VAL_0]] {fortran_attrs = #{{.*}}<intent_in>, uniq_name = "_QMmodFcalleeEz"} : (!fir.box<!fir.array<?x?x?xf32>>) -> !fir.box<!fir.array<?x?x?xf32>>
+// CHECK:           %[[VAL_39:.*]] = fir.rebox %[[VAL_38]] : (!fir.box<!fir.array<?x?x?xf32>>) -> !fir.box<!fir.array<?x?x?xf32>>
+// TODO: read from global assumed to always alias
+// CHECK:           %[[VAL_40:.*]] = fir.load %[[VAL_22]] : !fir.ref<i32>
+// CHECK:           %[[VAL_41:.*]] = arith.addi %[[VAL_40]], %[[VAL_6]] : i32
+// CHECK:           %[[VAL_42:.*]] = fir.convert %[[VAL_41]] : (i32) -> index
+// TODO: read from global assumed to always alias
+// CHECK:           %[[VAL_43:.*]] = fir.load %[[VAL_24]] : !fir.ref<i32>
+// CHECK:           %[[VAL_44:.*]] = fir.convert %[[VAL_43]] : (i32) -> index
+// CHECK:           %[[VAL_45:.*]] = fir.convert %[[VAL_42]] : (index) -> i32
+// CHECK:           %[[VAL_46:.*]]:2 = fir.do_loop %[[VAL_47:.*]] = %[[VAL_42]] to %[[VAL_44]] step %[[VAL_5]] iter_args(%[[VAL_48:.*]] = %[[VAL_45]]) -> (index, i32) {
+// CHECK:             fir.store %[[VAL_48]] to %[[VAL_34]] : !fir.ref<i32>
+// TODO: read from global assumed to always alias
+// CHECK:             %[[VAL_49:.*]] = fir.load %[[VAL_18]] : !fir.ref<i32>
+// CHECK:             %[[VAL_50:.*]] = arith.addi %[[VAL_49]], %[[VAL_6]] : i32
+// CHECK:             %[[VAL_51:.*]] = fir.convert %[[VAL_50]] : (i32) -> index
+// TODO: read from global assumed to always alias
+// CHECK:             %[[VAL_52:.*]] = fir.load %[[VAL_20]] : !fir.ref<i32>
+// CHECK:             %[[VAL_53:.*]] = fir.convert %[[VAL_52]] : (i32) -> index
+// CHECK:             %[[VAL_54:.*]] = fir.convert %[[VAL_51]] : (index) -> i32
+// CHECK:             %[[VAL_55:.*]]:2 = fir.do_loop %[[VAL_56:.*]] = %[[VAL_51]] to %[[VAL_53]] step %[[VAL_5]] iter_args(%[[VAL_57:.*]] = %[[VAL_54]]) -> (index, i32) {
+// CHECK:               fir.store %[[VAL_57]] to %[[VAL_32]] : !fir.ref<i32>
+// TODO: read from global assumed to always alias
+// CHECK:               %[[VAL_58:.*]] = fir.load %[[VAL_16]] : !fir.ref<i32>
+// CHECK:               %[[VAL_59:.*]] = arith.addi %[[VAL_58]], %[[VAL_6]] : i32
+// CHECK:               %[[VAL_60:.*]] = fir.convert %[[VAL_59]] : (i32) -> index
+// CHECK:               %[[VAL_61:.*]] = fir.convert %[[VAL_60]] : (index) -> i32
+// CHECK:               %[[VAL_62:.*]]:2 = fir.do_loop %[[VAL_63:.*]] = %[[VAL_60]] to %[[VAL_4]] step %[[VAL_5]] iter_args(%[[VAL_64:.*]] = %[[VAL_61]]) -> (index, i32) {
+// TODO: local allocation assumed to always alias
+// CHECK:                 fir.store %[[VAL_64]] to %[[VAL_30]] : !fir.ref<i32>
+// load from box tagged in CodeGen
+// CHECK:                 %[[VAL_65:.*]] = fir.load %[[VAL_35]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>>
+// TODO: local allocation assumed to always alias
+// CHECK:                 %[[VAL_66:.*]] = fir.load %[[VAL_30]] : !fir.ref<i32>
+// CHECK:                 %[[VAL_67:.*]] = fir.convert %[[VAL_66]] : (i32) -> i64
+// TODO: local allocation assumed to always alias
+// CHECK:                 %[[VAL_68:.*]] = fir.load %[[VAL_32]] : !fir.ref<i32>
+// CHECK:                 %[[VAL_69:.*]] = fir.convert %[[VAL_68]] : (i32) -> i64
+// TODO: local allocation assumed to always alias
+// CHECK:                 %[[VAL_70:.*]] = fir.load %[[VAL_34]] : !fir.ref<i32>
+// CHECK:                 %[[VAL_71:.*]] = fir.convert %[[VAL_70]] : (i32) -> i64
+// CHECK:                 %[[VAL_72:.*]] = fir.box_addr %[[VAL_65]] : (!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>) -> !fir.heap<!fir.array<?x?x?xf32>>
+// CHECK:                 %[[VAL_73:.*]]:3 = fir.box_dims %[[VAL_65]], %[[VAL_4]] : (!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>, index) -> (index, index, index)
+// CHECK:                 %[[VAL_74:.*]]:3 = fir.box_dims %[[VAL_65]], %[[VAL_5]] : (!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>, index) -> (index, index, index)
+// CHECK:                 %[[VAL_75:.*]]:3 = fir.box_dims %[[VAL_65]], %[[VAL_3]] : (!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>, index) -> (index, index, index)
+// CHECK:                 %[[VAL_76:.*]] = fir.shape_shift %[[VAL_73]]#0, %[[VAL_73]]#1, %[[VAL_74]]#0, %[[VAL_74]]#1, %[[VAL_75]]#0, %[[VAL_75]]#1 : (index, index, index, index, index, index) -> !fir.shapeshift<3>
+// CHECK:                 %[[VAL_77:.*]] = fir.array_coor %[[VAL_72]](%[[VAL_76]]) %[[VAL_67]], %[[VAL_69]], %[[VAL_71]] : (!fir.heap<!fir.array<?x?x?xf32>>, !fir.shapeshift<3>, i64, i64, i64) -> !fir.ref<f32>
+// CHECK:                 %[[VAL_78:.*]] = fir.load %[[VAL_77]] {tbaa = [#[[ARG_LOW_TAG]]]} : !fir.ref<f32>
+// CHECK:                 fir.store %[[VAL_78]] to %[[VAL_26]] : !fir.ref<f32>
+// load from box tagged in CodeGen
+// CHECK:                 %[[VAL_79:.*]] = fir.load %[[VAL_8]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+// TODO: local allocation assumed to always alias
+// CHECK:                 %[[VAL_80:.*]] = fir.load %[[VAL_32]] : !fir.ref<i32>
+// CHECK:                 %[[VAL_81:.*]] = fir.convert %[[VAL_80]] : (i32) -> i64
+// CHECK:                 %[[VAL_82:.*]] = fir.box_addr %[[VAL_79]] : (!fir.box<!fir.heap<!fir.array<?xf32>>>) -> !fir.heap<!fir.array<?xf32>>
+// CHECK:                 %[[VAL_83:.*]]:3 = fir.box_dims %[[VAL_79]], %[[VAL_4]] : (!fir.box<!fir.heap<!fir.array<?xf32>>>, index) -> (index, index, index)
+// CHECK:                 %[[VAL_84:.*]] = fir.shape_shift %[[VAL_83]]#0, %[[VAL_83]]#1 : (index, index) -> !fir.shapeshift<1>
+// CHECK:                 %[[VAL_85:.*]] = fir.array_coor %[[VAL_82]](%[[VAL_84]]) %[[VAL_81]] : (!fir.heap<!fir.array<?xf32>>, !fir.shapeshift<1>, i64) -> !fir.ref<f32>
+// load from global variable
+// CHECK:                 %[[VAL_86:.*]] = fir.load %[[VAL_85]] : !fir.ref<f32>
+// load from box
+// CHECK:                 %[[VAL_87:.*]] = fir.load %[[VAL_35]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>>
+// load from local allocation
+// CHECK:                 %[[VAL_88:.*]] = fir.load %[[VAL_30]] : !fir.ref<i32>
+// CHECK:                 %[[VAL_89:.*]] = fir.convert %[[VAL_88]] : (i32) -> i64
+// load from local allocation
+// CHECK:                 %[[VAL_90:.*]] = fir.load %[[VAL_34]] : !fir.ref<i32>
+// CHECK:                 %[[VAL_91:.*]] = fir.convert %[[VAL_90]] : (i32) -> i64
+// CHECK:                 %[[VAL_92:.*]] = fir.box_addr %[[VAL_87]] : (!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>) -> !fir.heap<!fir.array<?x?x?xf32>>
+// CHECK:                 %[[VAL_93:.*]]:3 = fir.box_dims %[[VAL_87]], %[[VAL_4]] : (!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>, index) -> (index, index, index)
+// CHECK:                 %[[VAL_94:.*]]:3 = fir.box_dims %[[VAL_87]], %[[VAL_5]] : (!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>, index) -> (index, index, index)
+// CHECK:                 %[[VAL_95:.*]]:3 = fir.box_dims %[[VAL_87]], %[[VAL_3]] : (!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>, index) -> (index, index, index)
+// CHECK:                 %[[VAL_96:.*]] = fir.shape_shift %[[VAL_93]]#0, %[[VAL_93]]#1, %[[VAL_94]]#0, %[[VAL_94]]#1, %[[VAL_95]]#0, %[[VAL_95]]#1 : (index, index, index, index, index, index) -> !fir.shapeshift<3>
+// CHECK:                 %[[VAL_97:.*]] = fir.array_coor %[[VAL_92]](%[[VAL_96]]) %[[VAL_89]], %[[VAL_81]], %[[VAL_91]] : (!fir.heap<!fir.array<?x?x?xf32>>, !fir.shapeshift<3>, i64, i64, i64) -> !fir.ref<f32>
+// CHECK:                 %[[VAL_98:.*]] = fir.load %[[VAL_97]] {tbaa = [#[[ARG_LOW_TAG]]]} : !fir.ref<f32>
+// CHECK:                 %[[VAL_99:.*]] = arith.mulf %[[VAL_86]], %[[VAL_98]] fastmath<contract> : f32
+// load from box
+// CHECK:                 %[[VAL_100:.*]] = fir.load %[[VAL_10]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+// CHECK:                 %[[VAL_101:.*]] = fir.box_addr %[[VAL_100]] : (!fir.box<!fir.heap<!fir.array<?xf32>>>) -> !fir.heap<!fir.array<?xf32>>
+// CHECK:                 %[[VAL_102:.*]]:3 = fir.box_dims %[[VAL_100]], %[[VAL_4]] : (!fir.box<!fir.heap<!fir.array<?xf32>>>, index) -> (index, index, index)
+// CHECK:                 %[[VAL_103:.*]] = fir.shape_shift %[[VAL_102]]#0, %[[VAL_102]]#1 : (index, index) -> !fir.shapeshift<1>
+// CHECK:                 %[[VAL_104:.*]] = fir.array_coor %[[VAL_101]](%[[VAL_103]]) %[[VAL_81]] : (!fir.heap<!fir.array<?xf32>>, !fir.shapeshift<1>, i64) -> !fir.ref<f32>
+// load from global variable
+// CHECK:                 %[[VAL_105:.*]] = fir.load %[[VAL_104]] : !fir.ref<f32>
+// CHECK:                 %[[VAL_106:.*]] = fir.array_coor %[[VAL_39]] %[[VAL_89]], %[[VAL_81]], %[[VAL_91]] : (!fir.box<!fir.array<?x?x?xf32>>, i64, i64, i64) -> !fir.ref<f32>
+// CHECK:                 %[[VAL_107:.*]] = fir.load %[[VAL_106]] {tbaa = [#[[ARG_Z_TAG]]]} : !fir.ref<f32>
+// CHECK:                 %[[VAL_108:.*]] = arith.subi %[[VAL_80]], %[[VAL_6]] : i32
+// CHECK:                 %[[VAL_109:.*]] = fir.convert %[[VAL_108]] : (i32) -> i64
+// CHECK:                 %[[VAL_110:.*]] = fir.array_coor %[[VAL_39]] %[[VAL_89]], %[[VAL_109]], %[[VAL_91]] : (!fir.box<!fir.array<?x?x?xf32>>, i64, i64, i64) -> !fir.ref<f32>
+// CHECK:                 %[[VAL_111:.*]] = fir.load %[[VAL_110]] {tbaa = [#[[ARG_Z_TAG]]]} : !fir.ref<f32>
+// CHECK:                 %[[VAL_112:.*]] = arith.subf %[[VAL_107]], %[[VAL_111]] fastmath<contract> : f32
+// CHECK:                 %[[VAL_113:.*]] = fir.no_reassoc %[[VAL_112]] : f32
+// load from global variable
+// CHECK:                 %[[VAL_114:.*]] = fir.load %[[VAL_14]] : !fir.ref<f32>
+// CHECK:                 %[[VAL_115:.*]] = arith.mulf %[[VAL_113]], %[[VAL_114]] fastmath<contract> : f32
+// CHECK:                 %[[VAL_116:.*]] = arith.subi %[[VAL_90]], %[[VAL_6]] : i32
+// CHECK:                 %[[VAL_117:.*]] = fir.convert %[[VAL_116]] : (i32) -> i64
+// CHECK:                 %[[VAL_118:.*]] = fir.array_coor %[[VAL_37]] %[[VAL_89]], %[[VAL_81]], %[[VAL_117]] : (!fir.box<!fir.array<?x?x?xf32>>, i64, i64, i64) -> !fir.ref<f32>
+// CHECK:                 %[[VAL_119:.*]] = fir.load %[[VAL_118]] {tbaa = [#[[ARG_Y_TAG]]]} : !fir.ref<f32>
+// CHECK:                 %[[VAL_120:.*]] = fir.array_coor %[[VAL_37]] %[[VAL_89]], %[[VAL_81]], %[[VAL_91]] : (!fir.box<!fir.array<?x?x?xf32>>, i64, i64, i64) -> !fir.ref<f32>
+// CHECK:                 %[[VAL_121:.*]] = fir.load %[[VAL_120]] {tbaa = [#[[ARG_Y_TAG]]]} : !fir.ref<f32>
+// CHECK:                 %[[VAL_122:.*]] = arith.subf %[[VAL_119]], %[[VAL_121]] fastmath<contract> : f32
+// CHECK:                 %[[VAL_123:.*]] = fir.no_reassoc %[[VAL_122]] : f32
+// load from local allocation
+// CHECK:                 %[[VAL_124:.*]] = fir.load %[[VAL_28]] : !fir.ref<f32>
+// CHECK:                 %[[VAL_125:.*]] = arith.mulf %[[VAL_123]], %[[VAL_124]] fastmath<contract> : f32
+// CHECK:                 %[[VAL_126:.*]] = arith.addf %[[VAL_115]], %[[VAL_125]] fastmath<contract> : f32
+// CHECK:                 %[[VAL_127:.*]] = fir.no_reassoc %[[VAL_126]] : f32
+// CHECK:                 %[[VAL_128:.*]] = arith.mulf %[[VAL_105]], %[[VAL_127]] fastmath<contract> : f32
+// CHECK:                 %[[VAL_129:.*]] = arith.addf %[[VAL_99]], %[[VAL_128]] fastmath<contract> : f32
+// CHECK:                 fir.store %[[VAL_129]] to %[[VAL_97]] {tbaa = [#[[ARG_LOW_TAG]]]} : !fir.ref<f32>
+// CHECK:                 %[[VAL_130:.*]] = arith.addi %[[VAL_63]], %[[VAL_5]] : index
+// CHECK:                 %[[VAL_131:.*]] = fir.convert %[[VAL_5]] : (index) -> i32
+// load from local allocation
+// CHECK:                 %[[VAL_132:.*]] = fir.load %[[VAL_30]] : !fir.ref<i32>
+// CHECK:                 %[[VAL_133:.*]] = arith.addi %[[VAL_132]], %[[VAL_131]] : i32
+// CHECK:                 fir.result %[[VAL_130]], %[[VAL_133]] : index, i32
+// CHECK:               }
+// store to local allocation
+// CHECK:               fir.store %[[VAL_134:.*]]#1 to %[[VAL_30]] : !fir.ref<i32>
+// CHECK:               %[[VAL_135:.*]] = arith.addi %[[VAL_56]], %[[VAL_5]] : index
+// CHECK:               %[[VAL_136:.*]] = fir.convert %[[VAL_5]] : (index) -> i32
+// local allocation:
+// CHECK:               %[[VAL_137:.*]] = fir.load %[[VAL_32]] : !fir.ref<i32>
+// CHECK:               %[[VAL_138:.*]] = arith.addi %[[VAL_137]], %[[VAL_136]] : i32
+// CHECK:               fir.result %[[VAL_135]], %[[VAL_138]] : index, i32
+// CHECK:             }
+// local allocation:
+// CHECK:             fir.store %[[VAL_139:.*]]#1 to %[[VAL_32]] : !fir.ref<i32>
+// CHECK:             %[[VAL_140:.*]] = arith.addi %[[VAL_47]], %[[VAL_5]] : index
+// CHECK:             %[[VAL_141:.*]] = fir.convert %[[VAL_5]] : (index) -> i32
+// local allocation:
+// CHECK:             %[[VAL_142:.*]] = fir.load %[[VAL_34]] : !fir.ref<i32>
+// CHECK:             %[[VAL_143:.*]] = arith.addi %[[VAL_142]], %[[VAL_141]] : i32
+// CHECK:             fir.result %[[VAL_140]], %[[VAL_143]] : index, i32
+// CHECK:           }
+// local allocation:
+// CHECK:           fir.store %[[VAL_144:.*]]#1 to %[[VAL_34]] : !fir.ref<i32>
+// CHECK:           return
+// CHECK:         }
\ No newline at end of file

>From 95b35e1421f4043a213d5628c41d02deb0d27b52 Mon Sep 17 00:00:00 2001
From: Tom Eccles <tom.eccles at arm.com>
Date: Thu, 14 Sep 2023 15:01:08 +0000
Subject: [PATCH 4/4] [flang] use TBAAForest in TBAABuilder

This is important to ensure that tags end up in the same trees that were
created in the FIR TBAA pass. If they are in different trees then
everything in one tree will be assumed to MayAlias with everything in the
other tree. This leads to poor performance.

@vzakhari requested that the old (not-per-function) trees are
maintained so I left the old test intact.
---
 .../flang/Optimizer/CodeGen/TBAABuilder.h     |  45 +++----
 flang/lib/Optimizer/CodeGen/TBAABuilder.cpp   |  58 ++++-----
 flang/test/Fir/tbaa-codegen.fir               |   6 +-
 flang/test/Fir/tbaa-codegen2.fir              | 114 ++++++++++++++++++
 flang/test/Fir/tbaa.fir                       |  24 ++--
 5 files changed, 175 insertions(+), 72 deletions(-)
 create mode 100644 flang/test/Fir/tbaa-codegen2.fir

diff --git a/flang/include/flang/Optimizer/CodeGen/TBAABuilder.h b/flang/include/flang/Optimizer/CodeGen/TBAABuilder.h
index 3aeb124f911867e..2c5795678a91e16 100644
--- a/flang/include/flang/Optimizer/CodeGen/TBAABuilder.h
+++ b/flang/include/flang/Optimizer/CodeGen/TBAABuilder.h
@@ -13,8 +13,8 @@
 #ifndef FORTRAN_OPTIMIZER_CODEGEN_TBAABUILDER_H
 #define FORTRAN_OPTIMIZER_CODEGEN_TBAABUILDER_H
 
+#include "flang/Optimizer/Analysis/TBAAForest.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
-#include "mlir/IR/BuiltinAttributes.h"
 
 namespace fir {
 
@@ -25,9 +25,9 @@ namespace fir {
 //
 // TBAA type information is represented with LLVM::MetadataOp operation
 // with specific symbol name `TBAABuilder::tbaaMetaOpName`. The basic
-// TBAA tree used for Flang consists of the following nodes:
+// TBAA trees used for Flang consists of the following nodes:
 //   llvm.metadata @__flang_tbaa {
-//     llvm.tbaa_root @root_0 {id = "Flang Type TBAA Root"}
+//     llvm.tbaa_root @root_0 {id = "Flang Type TBAA Function Root funcName"}
 //     llvm.tbaa_type_desc @type_desc_1 {id = "any access",
 //                                       members = {<@root_0, 0>}}
 //     llvm.tbaa_type_desc @type_desc_2 {id = "any data access",
@@ -162,6 +162,9 @@ namespace fir {
 // Given the storage association, all non-box accesses are represented
 // with the conservative data access tag:
 //   < `<any data access>`, `<any data access>`, 0 >
+
+// additional tags are added in flang/Optimizer/Transforms/AddAliasTags.cpp
+// (before CodeGen)
 class TBAABuilder {
 public:
   TBAABuilder(mlir::MLIRContext *context, bool applyTBAA);
@@ -184,13 +187,13 @@ class TBAABuilder {
 
   // Returns TBAATagAttr representing access tag:
   //   < <descriptor member>, <descriptor member>, 0 >
-  mlir::LLVM::TBAATagAttr getAnyBoxAccessTag();
+  mlir::LLVM::TBAATagAttr getAnyBoxAccessTag(mlir::LLVM::LLVMFuncOp func);
   // Returns TBAATagAttr representing access tag:
   //   < <any data access>, <any data access>, 0 >
-  mlir::LLVM::TBAATagAttr getAnyDataAccessTag();
+  mlir::LLVM::TBAATagAttr getAnyDataAccessTag(mlir::LLVM::LLVMFuncOp func);
   // Returns TBAATagAttr representing access tag:
   //   < <any access>, <any access>, 0 >
-  mlir::LLVM::TBAATagAttr getAnyAccessTag();
+  mlir::LLVM::TBAATagAttr getAnyAccessTag(mlir::LLVM::LLVMFuncOp func);
 
   // Returns TBAATagAttr representing access tag described by the base and
   // access FIR types and the LLVM::GepOp representing the access in terms of
@@ -198,7 +201,8 @@ class TBAABuilder {
   // fir::BaseBoxType.
   mlir::LLVM::TBAATagAttr getBoxAccessTag(mlir::Type baseFIRType,
                                           mlir::Type accessFIRType,
-                                          mlir::LLVM::GEPOp gep);
+                                          mlir::LLVM::GEPOp gep,
+                                          mlir::LLVM::LLVMFuncOp func);
 
   // Returns TBAATagAttr representing access tag described by the base and
   // access FIR types and the LLVM::GepOp representing the access in terms of
@@ -206,34 +210,13 @@ class TBAABuilder {
   // "data" access, i.e. not an access of any box/descriptor member.
   mlir::LLVM::TBAATagAttr getDataAccessTag(mlir::Type baseFIRType,
                                            mlir::Type accessFIRType,
-                                           mlir::LLVM::GEPOp gep);
+                                           mlir::LLVM::GEPOp gep,
+                                           mlir::LLVM::LLVMFuncOp func);
 
   // Set to true, if TBAA builder is active, otherwise, all public
   // methods are no-ops.
   bool enableTBAA;
 
-  // LLVM::TBAARootAttr identifying Flang's TBAA root.
-  mlir::LLVM::TBAARootAttr flangTBAARoot;
-  // Identity string for Flang's TBAA root.
-  static constexpr llvm::StringRef flangTBAARootId = "Flang Type TBAA Root";
-
-  // LLVM::TBAATypeDescriptorAttr identifying "any access".
-  mlir::LLVM::TBAATypeDescriptorAttr anyAccessTypeDesc;
-  // Identity string for "any access" type descriptor.
-  static constexpr llvm::StringRef anyAccessTypeDescId = "any access";
-
-  // LLVM::TBAATypeDescriptorAttr identifying "any data access" (i.e. non-box
-  // memory access).
-  mlir::LLVM::TBAATypeDescriptorAttr anyDataAccessTypeDesc;
-  // Identity string for "any data access" type descriptor.
-  static constexpr llvm::StringRef anyDataAccessTypeDescId = "any data access";
-
-  // LLVM::TBAATypeDescriptorAttr identifying "descriptor member" access, i.e.
-  // any access within the bounds of a box/descriptor.
-  mlir::LLVM::TBAATypeDescriptorAttr boxMemberTypeDesc;
-  // Identity string for "descriptor member" type descriptor.
-  static constexpr llvm::StringRef boxMemberTypeDescId = "descriptor member";
-
   // Number of attached TBAA tags (used for debugging).
   unsigned tagAttachmentCounter = 0;
 
@@ -247,6 +230,8 @@ class TBAABuilder {
       std::tuple<mlir::LLVM::TBAANodeAttr, mlir::LLVM::TBAANodeAttr, int64_t>,
       mlir::LLVM::TBAATagAttr>
       tagsMap;
+
+  TBAAForrest trees;
 };
 
 } // namespace fir
diff --git a/flang/lib/Optimizer/CodeGen/TBAABuilder.cpp b/flang/lib/Optimizer/CodeGen/TBAABuilder.cpp
index 1a5ae8cf7aac629..9baeb0b27091ad7 100644
--- a/flang/lib/Optimizer/CodeGen/TBAABuilder.cpp
+++ b/flang/lib/Optimizer/CodeGen/TBAABuilder.cpp
@@ -15,6 +15,9 @@
 #include "llvm/ADT/TypeSwitch.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include <mlir/Dialect/LLVMIR/LLVMAttrs.h>
+#include <mlir/Dialect/LLVMIR/LLVMDialect.h>
+#include <mlir/Dialect/LLVMIR/LLVMTypes.h>
 
 #define DEBUG_TYPE "flang-tbaa-builder"
 
@@ -27,6 +30,13 @@ static llvm::cl::opt<bool> disableTBAA(
                    "to override default Flang behavior"),
     llvm::cl::init(false));
 
+// disabling this will play badly with the FIR TBAA pass, leading to worse
+// performance
+static llvm::cl::opt<bool> perFunctionTBAATrees(
+    "per-function-tbaa-trees",
+    llvm::cl::desc("Give each function an independent TBAA tree (default)"),
+    llvm::cl::init(true), llvm::cl::Hidden);
+
 // tagAttachmentLimit is a debugging option that allows limiting
 // the number of TBAA access tag attributes attached to operations.
 // It is set to kTagAttachmentUnlimited by default denoting "no limit".
@@ -39,26 +49,10 @@ static llvm::cl::opt<unsigned>
 namespace fir {
 
 TBAABuilder::TBAABuilder(MLIRContext *context, bool applyTBAA)
-    : enableTBAA(applyTBAA && !disableTBAA) {
+    : enableTBAA(applyTBAA && !disableTBAA),
+      trees(/*separatePerFunction=*/perFunctionTBAATrees) {
   if (!enableTBAA)
     return;
-
-  // Root node.
-  flangTBAARoot =
-      TBAARootAttr::get(context, StringAttr::get(context, flangTBAARootId));
-
-  // Any access node.
-  anyAccessTypeDesc = TBAATypeDescriptorAttr::get(
-      context, anyAccessTypeDescId, TBAAMemberAttr::get(flangTBAARoot, 0));
-
-  // Any data access node.
-  anyDataAccessTypeDesc =
-      TBAATypeDescriptorAttr::get(context, anyDataAccessTypeDescId,
-                                  TBAAMemberAttr::get(anyAccessTypeDesc, 0));
-
-  // Box member access node.
-  boxMemberTypeDesc = TBAATypeDescriptorAttr::get(
-      context, boxMemberTypeDescId, TBAAMemberAttr::get(anyAccessTypeDesc, 0));
 }
 
 TBAATagAttr TBAABuilder::getAccessTag(TBAATypeDescriptorAttr baseTypeDesc,
@@ -73,26 +67,31 @@ TBAATagAttr TBAABuilder::getAccessTag(TBAATypeDescriptorAttr baseTypeDesc,
   return tag;
 }
 
-TBAATagAttr TBAABuilder::getAnyBoxAccessTag() {
+TBAATagAttr TBAABuilder::getAnyBoxAccessTag(mlir::LLVM::LLVMFuncOp func) {
+  TBAATypeDescriptorAttr boxMemberTypeDesc = trees[func].boxMemberTypeDesc;
   return getAccessTag(boxMemberTypeDesc, boxMemberTypeDesc, /*offset=*/0);
 }
 
 TBAATagAttr TBAABuilder::getBoxAccessTag(Type baseFIRType, Type accessFIRType,
-                                         GEPOp gep) {
-  return getAnyBoxAccessTag();
+                                         GEPOp gep,
+                                         mlir::LLVM::LLVMFuncOp func) {
+  return getAnyBoxAccessTag(func);
 }
 
-TBAATagAttr TBAABuilder::getAnyDataAccessTag() {
+TBAATagAttr TBAABuilder::getAnyDataAccessTag(mlir::LLVM::LLVMFuncOp func) {
+  TBAATypeDescriptorAttr anyDataAccessTypeDesc = trees[func].anyDataTypeDesc;
   return getAccessTag(anyDataAccessTypeDesc, anyDataAccessTypeDesc,
                       /*offset=*/0);
 }
 
 TBAATagAttr TBAABuilder::getDataAccessTag(Type baseFIRType, Type accessFIRType,
-                                          GEPOp gep) {
-  return getAnyDataAccessTag();
+                                          GEPOp gep,
+                                          mlir::LLVM::LLVMFuncOp func) {
+  return getAnyDataAccessTag(func);
 }
 
-TBAATagAttr TBAABuilder::getAnyAccessTag() {
+TBAATagAttr TBAABuilder::getAnyAccessTag(mlir::LLVM::LLVMFuncOp func) {
+  TBAATypeDescriptorAttr anyAccessTypeDesc = trees[func].anyAccessDesc;
   return getAccessTag(anyAccessTypeDesc, anyAccessTypeDesc, /*offset=*/0);
 }
 
@@ -101,6 +100,9 @@ void TBAABuilder::attachTBAATag(AliasAnalysisOpInterface op, Type baseFIRType,
   if (!enableTBAA)
     return;
 
+  mlir::LLVM::LLVMFuncOp func = op->getParentOfType<mlir::LLVM::LLVMFuncOp>();
+  assert(func && "func.func should have already been converted to llvm.func");
+
   ++tagAttachmentCounter;
   if (tagAttachmentLimit != kTagAttachmentUnlimited &&
       tagAttachmentCounter > tagAttachmentLimit)
@@ -115,11 +117,11 @@ void TBAABuilder::attachTBAATag(AliasAnalysisOpInterface op, Type baseFIRType,
     // a mix of data members and descriptor members may alias
     // with both data and descriptor accesses.
     // Conservatively set any-access tag if there is any descriptor member.
-    tbaaTagSym = getAnyAccessTag();
+    tbaaTagSym = getAnyAccessTag(func);
   } else if (baseFIRType.isa<fir::BaseBoxType>()) {
-    tbaaTagSym = getBoxAccessTag(baseFIRType, accessFIRType, gep);
+    tbaaTagSym = getBoxAccessTag(baseFIRType, accessFIRType, gep, func);
   } else {
-    tbaaTagSym = getDataAccessTag(baseFIRType, accessFIRType, gep);
+    tbaaTagSym = getDataAccessTag(baseFIRType, accessFIRType, gep, func);
   }
 
   if (!tbaaTagSym)
diff --git a/flang/test/Fir/tbaa-codegen.fir b/flang/test/Fir/tbaa-codegen.fir
index 386fe42eaaba9a2..9ee7a28a6d22250 100644
--- a/flang/test/Fir/tbaa-codegen.fir
+++ b/flang/test/Fir/tbaa-codegen.fir
@@ -39,9 +39,9 @@ module attributes {fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.targ
 // CHECK:  store i32 %[[A2]], ptr %[[A1]], align 4, !tbaa ![[A_ACCESS_TAG]]
 // CHECK:  ret void
 // CHECK: }
+// CHECK: ![[ANY_ACCESS_TYPE:.*]] = !{!"any access", ![[ROOT:.*]], i64 0}
+// CHECK: ![[ROOT]] = !{!"Flang function root _QPsimple"}
 // CHECK: ![[A_ACCESS_TAG]] = !{![[A_ACCESS_TYPE:.*]], ![[A_ACCESS_TYPE]], i64 0}
 // CHECK: ![[A_ACCESS_TYPE]] = !{!"dummy arg data/a", ![[DUMMY_ARG_TYPE:.*]], i64 0}
 // CHECK: ![[DUMMY_ARG_TYPE]] = !{!"dummy arg data", ![[DATA_ACCESS_TYPE:.*]], i64 0}
-// CHECK: ![[DATA_ACCESS_TYPE]] = !{!"any data access", ![[ANY_ACCESS_TYPE:.*]], i64 0}
-// CHECK: ![[ANY_ACCESS_TYPE]] = !{!"any access", ![[ROOT:.*]], i64 0}
-// CHECK: ![[ROOT]] = !{!"Flang function root _QPsimple"}
\ No newline at end of file
+// CHECK: ![[DATA_ACCESS_TYPE]] = !{!"any data access", ![[ANY_ACCESS_TYPE]], i64 0}
\ No newline at end of file
diff --git a/flang/test/Fir/tbaa-codegen2.fir b/flang/test/Fir/tbaa-codegen2.fir
new file mode 100644
index 000000000000000..6b6849ab8584191
--- /dev/null
+++ b/flang/test/Fir/tbaa-codegen2.fir
@@ -0,0 +1,114 @@
+// test that tbaa attributes can be added to fir.load and fir.store
+// and that these attributes are propagated to LLVMIR and that these
+// interoperrate with tbaa tags added during codegen
+
+// RUN: tco %s | FileCheck %s
+
+// subroutine func(a)
+//   integer, intent(inout) :: a(:)
+//   a = a+1
+//   a(1) = a(2)
+#tbaa_root = #llvm.tbaa_root<id = "Flang function root _QPfunc">
+#tbaa_type_desc = #llvm.tbaa_type_desc<id = "any access", members = {<#tbaa_root, 0>}>
+#tbaa_type_desc1 = #llvm.tbaa_type_desc<id = "any data access", members = {<#tbaa_type_desc, 0>}>
+#tbaa_type_desc2 = #llvm.tbaa_type_desc<id = "dummy arg data", members = {<#tbaa_type_desc1, 0>}>
+#tbaa_type_desc3 = #llvm.tbaa_type_desc<id = "dummy arg data/a", members = {<#tbaa_type_desc2, 0>}>
+#tbaa_tag = #llvm.tbaa_tag<base_type = #tbaa_type_desc3, access_type = #tbaa_type_desc3, offset = 0>
+module attributes {fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.target_triple = "aarch64-unknown-linux-gnu"} {
+  func.func @_QPfunc(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "a"}) {
+    %c3_i32 = arith.constant 3 : i32
+    %c1_i32 = arith.constant 1 : i32
+    %c0 = arith.constant 0 : index
+    %c2 = arith.constant 2 : index
+    %c1 = arith.constant 1 : index
+    %0 = fir.alloca !fir.box<!fir.array<?xi32>>
+    %1 = fir.declare %arg0 {fortran_attrs = #fir.var_attrs<intent_inout>, uniq_name = "_QFfuncEa"} : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+    %2 = fir.rebox %1 : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+    %3:3 = fir.box_dims %2, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
+    %4 = fir.shape %3#1 : (index) -> !fir.shape<1>
+    %5 = fir.allocmem !fir.array<?xi32>, %3#1 {bindc_name = ".tmp.array", uniq_name = ""}
+    %6 = fir.declare %5(%4) {uniq_name = ".tmp.array"} : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.heap<!fir.array<?xi32>>
+    %7 = fir.embox %6(%4) : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<?xi32>>
+    fir.do_loop %arg1 = %c1 to %3#1 step %c1 unordered {
+      %16 = fir.array_coor %2 %arg1 : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+      // load with tbaa
+      %17 = fir.load %16 {tbaa = [#tbaa_tag]} : !fir.ref<i32>
+      %18 = arith.addi %17, %c1_i32 : i32
+      %19 = fir.array_coor %6(%4) %arg1 : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>, index) -> !fir.ref<i32>
+      // store without tbaa
+      fir.store %18 to %19 : !fir.ref<i32>
+    }
+    fir.store %2 to %0 : !fir.ref<!fir.box<!fir.array<?xi32>>>
+    %8 = fir.address_of(@_QQcl.2F746D702F73696D706C652E66393000) : !fir.ref<!fir.char<1,16>>
+    %9 = fir.convert %0 : (!fir.ref<!fir.box<!fir.array<?xi32>>>) -> !fir.ref<!fir.box<none>>
+    %10 = fir.convert %7 : (!fir.box<!fir.array<?xi32>>) -> !fir.box<none>
+    %11 = fir.convert %8 : (!fir.ref<!fir.char<1,16>>) -> !fir.ref<i8>
+    %12 = fir.call @_FortranAAssign(%9, %10, %11, %c3_i32) : (!fir.ref<!fir.box<none>>, !fir.box<none>, !fir.ref<i8>, i32) -> none
+    fir.freemem %6 : !fir.heap<!fir.array<?xi32>>
+    %13 = fir.array_coor %2 %c2 : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+    // load modified not to have tbaa
+    %14 = fir.load %13 : !fir.ref<i32>
+    %15 = fir.array_coor %2 %c1 : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+    // store with tbaa
+    fir.store %14 to %15 {tbaa = [#tbaa_tag]} : !fir.ref<i32>
+    return
+  }
+  func.func private @_FortranAAssign(!fir.ref<!fir.box<none>>, !fir.box<none>, !fir.ref<i8>, i32) -> none attributes {fir.runtime}
+  fir.global linkonce @_QQcl.2F746D702F73696D706C652E66393000 constant : !fir.char<1,16> {
+    %0 = fir.string_lit "/tmp/simple.f90\00"(16) : !fir.char<1,16>
+    fir.has_value %0 : !fir.char<1,16>
+  }
+}
+// CHECK-LABEL: define void @_QPfunc(
+// CHECK-SAME:      ptr %[[ARG0:.*]]) {
+// [...]
+// CHECK:  %[[VAL5:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[ARG0]], i32 0, i32 7, i32 0, i32 0
+// box access:
+// CHECK:  %[[VAL6:.*]] = load i64, ptr %[[VAL5]], align 4, !tbaa ![[BOX_ACCESS_TAG:.*]]
+// CHECK:  %[[VAL7:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %0, i32 0, i32 7, i32 0, i32 1
+// box access:
+// CHECK:  %[[VAL8:.*]] = load i64, ptr %[[VAL7]], align 4, !tbaa ![[BOX_ACCESS_TAG]]
+// CHECK:  %[[VAL9:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[ARG0]], i32 0, i32 7, i32 0, i32 2
+// box access:
+// CHECK:  %[[VAL10:.*]] = load i64, ptr %[[VAL9]], align 4, !tbaa ![[BOX_ACCESS_TAG]]
+// CHECK:  %[[VAL11:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[ARG0]], i32 0, i32 0
+// box access:
+// CHECK:  %[[VAL12:.*]] = load ptr, ptr %[[VAL11]], align 8, !tbaa ![[BOX_ACCESS_TAG]]
+// CHECK:  %[[VAL15:.*]] = insertvalue { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] } %14, ptr %[[VAL12]], 0
+// CHECK:  store { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] } %[[VAL15]], ptr %{{.*}}, align 8, !tbaa ![[BOX_ACCESS_TAG]]
+// CHECK:  %[[VAL16:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %{{.*}}, i32 0, i32 7, i64 0, i32 0
+// box access:
+// CHECK:  %[[VAL17:.*]] = load i64, ptr %[[VAL16]], align 4, !tbaa ![[BOX_ACCESS_TAG]]
+// CHECK:  %[[VAL18:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %{{.*}}, i32 0, i32 7, i64 0, i32 1
+// box access:
+// CHECK:  %[[VAL19:.*]] = load i64, ptr %[[VAL18]], align 4, !tbaa ![[BOX_ACCESS_TAG]]
+// CHECK:  %[[VAL20:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %{{.*}}, i32 0, i32 7, i64 0, i32 2
+// box access:
+// CHECK:  %[[VAL21:.*]] = load i64, ptr %[[VAL20]], align 4, !tbaa ![[BOX_ACCESS_TAG]]
+// [...]
+// box access:
+// CHECK:  store { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] } %{{.*}}, ptr %{{.*}}, align 8, !tbaa ![[BOX_ACCESS_TAG]]
+// [...]
+
+// [...]
+// CHECK:  %[[VAL40:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %{{.*}}, i32 0, i32 0
+// box access:
+// CHECK:  %[[VAL41:.*]] = load ptr, ptr %[[VAL40]], align 8, !tbaa ![[BOX_ACCESS_TAG]]
+// CHECK:  %[[VAL42:.*]] = getelementptr i8, ptr %[[VAL41]], i64 %{{.*}}
+// access to 'a':
+// CHECK:  %[[VAL43:.*]] = load i32, ptr %[[VAL42]], align 4, !tbaa ![[A_ACCESS_TAG:.*]]
+// [...]
+// CHECK:  %[[VAL50:.*]] = getelementptr i32, ptr %{{.*}}, i64 %{{.*}}
+// store to the temporary:
+// CHECK:  store i32 %{{.*}}, ptr %[[VAL50]], align 4, !tbaa ![[DATA_ACCESS_TAG:.*]]
+// [...]
+
+// CHECK: [[BOX_ACCESS_TAG]] = !{![[BOX_ACCESS_TYPE:.*]], ![[BOX_ACCESS_TYPE]], i64 0}
+// CHECK: ![[BOX_ACCESS_TYPE]] = !{!"descriptor member", ![[ANY_ACCESS_TYPE:.*]], i64 0}
+// CHECK: ![[ANY_ACCESS_TYPE]] = !{!"any access", ![[ROOT_TYPE:.*]], i64 0}
+// CHECK: ![[ROOT_TYPE]] = !{!"Flang function root _QPfunc"}
+// CHECK: ![[A_ACCESS_TAG]] = !{![[A_ACCESS_TYPE:.*]], ![[A_ACCESS_TYPE]], i64 0}
+// CHECK: ![[A_ACCESS_TYPE]] = !{!"dummy arg data/a", ![[ARG_ACCESS_TYPE:.*]], i64 0}
+// CHECK: ![[ARG_ACCESS_TYPE]] = !{!"dummy arg data", ![[DATA_ACCESS_TYPE:.*]], i64 0}
+// CHECK: ![[DATA_ACCESS_TYPE]] = !{!"any data access", ![[ANY_ACCESS_TYPE]], i64 0}
+// CHECK: ![[DATA_ACCESS_TAG]] = !{![[DATA_ACCESS_TYPE]], ![[DATA_ACCESS_TYPE]], i64 0}
diff --git a/flang/test/Fir/tbaa.fir b/flang/test/Fir/tbaa.fir
index eabc9f30127fa3d..d260e4f4aec46a3 100644
--- a/flang/test/Fir/tbaa.fir
+++ b/flang/test/Fir/tbaa.fir
@@ -1,5 +1,7 @@
-// RUN: fir-opt %s --split-input-file --fir-to-llvm-ir="target=x86_64-unknown-linux-gnu apply-tbaa=true" | FileCheck %s
-// RUN: fir-opt %s --split-input-file --fir-to-llvm-ir="target=aarch64-unknown-linux-gnu apply-tbaa=true" | FileCheck %s
+// test without per-function tbaa trees so that this functionality does not bitrot
+// per-function tbaa tbaa-codegen2.fir
+// RUN: fir-opt %s --split-input-file --fir-to-llvm-ir="target=x86_64-unknown-linux-gnu apply-tbaa=true" --per-function-tbaa-trees=false | FileCheck %s
+// RUN: fir-opt %s --split-input-file --fir-to-llvm-ir="target=aarch64-unknown-linux-gnu apply-tbaa=true" --per-function-tbaa-trees=false | FileCheck %s
 
 module {
   func.func @tbaa(%arg0: !fir.class<!fir.array<?xnone>> {fir.bindc_name = "a"}) {
@@ -20,7 +22,7 @@ module {
   }
 }
 
-// CHECK-DAG:     #[[ROOT:.*]] = #llvm.tbaa_root<id = "Flang Type TBAA Root">
+// CHECK-DAG:     #[[ROOT:.*]] = #llvm.tbaa_root<id = "Flang function root ">
 // CHECK-DAG:     #[[ANYACC:.*]] = #llvm.tbaa_type_desc<id = "any access", members = {<#[[ROOT]], 0>}>
 // CHECK-DAG:     #[[ANYDACC:.*]] = #llvm.tbaa_type_desc<id = "any data access", members = {<#[[ANYACC]], 0>}>
 // CHECK-DAG:     #[[BOXMEM:.*]] = #llvm.tbaa_type_desc<id = "descriptor member", members = {<#[[ANYACC]], 0>}>
@@ -119,7 +121,7 @@ module {
   }
 }
 
-// CHECK-DAG:     #[[ROOT:.*]] = #llvm.tbaa_root<id = "Flang Type TBAA Root">
+// CHECK-DAG:     #[[ROOT:.*]] = #llvm.tbaa_root<id = "Flang function root ">
 // CHECK-DAG:     #[[ANYACC:.*]] = #llvm.tbaa_type_desc<id = "any access", members = {<#[[ROOT]], 0>}>
 // CHECK-DAG:     #[[BOXMEM:.*]] = #llvm.tbaa_type_desc<id = "descriptor member", members = {<#[[ANYACC]], 0>}>
 // CHECK-DAG:     #[[$BOXT:.*]] = #llvm.tbaa_tag<base_type = #[[BOXMEM]], access_type = #[[BOXMEM]], offset = 0>
@@ -245,7 +247,7 @@ func.func @tbaa(%arg0: !fir.box<!fir.array<*:f64>>) -> i32 {
   return %0 : i32
 }
 
-// CHECK-DAG:     #[[ROOT:.*]] = #llvm.tbaa_root<id = "Flang Type TBAA Root">
+// CHECK-DAG:     #[[ROOT:.*]] = #llvm.tbaa_root<id = "Flang function root ">
 // CHECK-DAG:     #[[ANYACC:.*]] = #llvm.tbaa_type_desc<id = "any access", members = {<#[[ROOT]], 0>}>
 // CHECK-DAG:     #[[BOXMEM:.*]] = #llvm.tbaa_type_desc<id = "descriptor member", members = {<#[[ANYACC]], 0>}>
 // CHECK-DAG:     #[[$BOXT:.*]] = #llvm.tbaa_tag<base_type = #[[BOXMEM]], access_type = #[[BOXMEM]], offset = 0>
@@ -264,7 +266,7 @@ func.func @tbaa(%arg0: !fir.box<!fir.array<*:f64>>) -> i1 {
   return %0 : i1
 }
 
-// CHECK-DAG:     #[[ROOT:.*]] = #llvm.tbaa_root<id = "Flang Type TBAA Root">
+// CHECK-DAG:     #[[ROOT:.*]] = #llvm.tbaa_root<id = "Flang function root ">
 // CHECK-DAG:     #[[ANYACC:.*]] = #llvm.tbaa_type_desc<id = "any access", members = {<#[[ROOT]], 0>}>
 // CHECK-DAG:     #[[BOXMEM:.*]] = #llvm.tbaa_type_desc<id = "descriptor member", members = {<#[[ANYACC]], 0>}>
 // CHECK-DAG:     #[[$BOXT:.*]] = #llvm.tbaa_tag<base_type = #[[BOXMEM]], access_type = #[[BOXMEM]], offset = 0>
@@ -285,7 +287,7 @@ func.func @tbaa(%arg0: !fir.box<f32>) -> i32 {
   return %0 : i32
 }
 
-// CHECK-DAG:     #[[ROOT:.*]] = #llvm.tbaa_root<id = "Flang Type TBAA Root">
+// CHECK-DAG:     #[[ROOT:.*]] = #llvm.tbaa_root<id = "Flang function root ">
 // CHECK-DAG:     #[[ANYACC:.*]] = #llvm.tbaa_type_desc<id = "any access", members = {<#[[ROOT]], 0>}>
 // CHECK-DAG:     #[[BOXMEM:.*]] = #llvm.tbaa_type_desc<id = "descriptor member", members = {<#[[ANYACC]], 0>}>
 // CHECK-DAG:     #[[$BOXT:.*]] = #llvm.tbaa_tag<base_type = #[[BOXMEM]], access_type = #[[BOXMEM]], offset = 0>
@@ -304,7 +306,7 @@ func.func @tbaa(%arg0: !fir.box<!fir.array<*:f64>>) -> i1 {
   return %0 : i1
 }
 
-// CHECK-DAG:     #[[ROOT:.*]] = #llvm.tbaa_root<id = "Flang Type TBAA Root">
+// CHECK-DAG:     #[[ROOT:.*]] = #llvm.tbaa_root<id = "Flang function root ">
 // CHECK-DAG:     #[[ANYACC:.*]] = #llvm.tbaa_type_desc<id = "any access", members = {<#[[ROOT]], 0>}>
 // CHECK-DAG:     #[[BOXMEM:.*]] = #llvm.tbaa_type_desc<id = "descriptor member", members = {<#[[ANYACC]], 0>}>
 // CHECK-DAG:     #[[$BOXT:.*]] = #llvm.tbaa_tag<base_type = #[[BOXMEM]], access_type = #[[BOXMEM]], offset = 0>
@@ -328,7 +330,7 @@ func.func @tbaa(%arg0: !fir.box<!fir.array<?xi32>>) {
   return
 }
 
-// CHECK-DAG:     #[[ROOT:.*]] = #llvm.tbaa_root<id = "Flang Type TBAA Root">
+// CHECK-DAG:     #[[ROOT:.*]] = #llvm.tbaa_root<id = "Flang function root ">
 // CHECK-DAG:     #[[ANYACC:.*]] = #llvm.tbaa_type_desc<id = "any access", members = {<#[[ROOT]], 0>}>
 // CHECK-DAG:     #[[BOXMEM:.*]] = #llvm.tbaa_type_desc<id = "descriptor member", members = {<#[[ANYACC]], 0>}>
 // CHECK-DAG:     #[[$BOXT:.*]] = #llvm.tbaa_tag<base_type = #[[BOXMEM]], access_type = #[[BOXMEM]], offset = 0>
@@ -356,7 +358,7 @@ func.func @tbaa(%arg0: !fir.box<!fir.array<?xi32>>) {
 
 // Check that the scalar aggregate load/store with a descriptor member
 // is mapped to any-access.
-// CHECK-DAG:     #[[ROOT:.*]] = #llvm.tbaa_root<id = "Flang Type TBAA Root">
+// CHECK-DAG:     #[[ROOT:.*]] = #llvm.tbaa_root<id = "Flang function root ">
 // CHECK-DAG:     #[[ANYACC:.*]] = #llvm.tbaa_type_desc<id = "any access", members = {<#[[ROOT]], 0>}>
 // CHECK-DAG:     #[[$ANYT:.*]] = #llvm.tbaa_tag<base_type = #[[ANYACC]], access_type = #[[ANYACC]], offset = 0>
 
@@ -373,7 +375,7 @@ func.func @tbaa(%arg0: !fir.ref<!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}
 
 // Check that the array aggregate load/store with a descriptor member
 // is mapped to any-access.
-// CHECK-DAG:     #[[ROOT:.*]] = #llvm.tbaa_root<id = "Flang Type TBAA Root">
+// CHECK-DAG:     #[[ROOT:.*]] = #llvm.tbaa_root<id = "Flang function root ">
 // CHECK-DAG:     #[[ANYACC:.*]] = #llvm.tbaa_type_desc<id = "any access", members = {<#[[ROOT]], 0>}>
 // CHECK-DAG:     #[[$ANYT:.*]] = #llvm.tbaa_tag<base_type = #[[ANYACC]], access_type = #[[ANYACC]], offset = 0>
 



More information about the flang-commits mailing list