[flang-commits] [flang] [flang] add tbaa tags to global variables (PR #68727)

Tom Eccles via flang-commits flang-commits at lists.llvm.org
Thu Oct 12 05:22:51 PDT 2023


https://github.com/tblah updated https://github.com/llvm/llvm-project/pull/68727

>From c13a5dc6d0509e329c8735c26361cd3abfdde5a1 Mon Sep 17 00:00:00 2001
From: Tom Eccles <tom.eccles at arm.com>
Date: Tue, 12 Sep 2023 15:40:30 +0000
Subject: [PATCH 01/16] [flang][FIR] add FirAliasAnalysisOpInterface

This interface allows (HL)FIR passes to add TBAA information to fir.load
and fir.store. If present, these TBAA tags take precedence over those
added during CodeGen.

We can't reuse mlir::LLVMIR::AliasAnalysisOpInterface because that uses
the mlir::LLVMIR namespace so it tries to define methods for fir
operations in the wrong namespace. But I did re-use the tbaa tag type to
minimise boilerplate code.

The new builders are to preserve the old interface without the tbaa tag.
---
 .../flang/Optimizer/Dialect/CMakeLists.txt    |  4 ++
 .../include/flang/Optimizer/Dialect/FIROps.h  |  1 +
 .../include/flang/Optimizer/Dialect/FIROps.td | 17 ++++--
 .../Dialect/FirAliasAnalysisOpInterface.h     | 27 +++++++++
 .../Dialect/FirAliasAnalysisOpInterface.td    | 59 +++++++++++++++++++
 flang/lib/Optimizer/CodeGen/CodeGen.cpp       | 15 ++++-
 flang/lib/Optimizer/Dialect/CMakeLists.txt    |  1 +
 flang/lib/Optimizer/Dialect/FIROps.cpp        | 17 +++++-
 .../Dialect/FirAliasAnalysisOpInterface.cpp   | 31 ++++++++++
 flang/test/Fir/tbaa-codegen.fir               | 47 +++++++++++++++
 10 files changed, 210 insertions(+), 9 deletions(-)
 create mode 100644 flang/include/flang/Optimizer/Dialect/FirAliasAnalysisOpInterface.h
 create mode 100644 flang/include/flang/Optimizer/Dialect/FirAliasAnalysisOpInterface.td
 create mode 100644 flang/lib/Optimizer/Dialect/FirAliasAnalysisOpInterface.cpp
 create mode 100644 flang/test/Fir/tbaa-codegen.fir

diff --git a/flang/include/flang/Optimizer/Dialect/CMakeLists.txt b/flang/include/flang/Optimizer/Dialect/CMakeLists.txt
index d657e3f16690377..15c835aad9bc7d2 100644
--- a/flang/include/flang/Optimizer/Dialect/CMakeLists.txt
+++ b/flang/include/flang/Optimizer/Dialect/CMakeLists.txt
@@ -18,6 +18,10 @@ set(LLVM_TARGET_DEFINITIONS FortranVariableInterface.td)
 mlir_tablegen(FortranVariableInterface.h.inc -gen-op-interface-decls)
 mlir_tablegen(FortranVariableInterface.cpp.inc -gen-op-interface-defs)
 
+set(LLVM_TARGET_DEFINITIONS FirAliasAnalysisOpInterface.td)
+mlir_tablegen(FirAliasAnalaysOpInterface.h.inc -gen-op-interface-decls)
+mlir_tablegen(FirAliasAnalysisOpInterface.cpp.inc -gen-op-interface-defs)
+
 set(LLVM_TARGET_DEFINITIONS CanonicalizationPatterns.td)
 mlir_tablegen(CanonicalizationPatterns.inc -gen-rewriters)
 add_public_tablegen_target(CanonicalizationPatternsIncGen)
diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.h b/flang/include/flang/Optimizer/Dialect/FIROps.h
index 8f03dc5cf795225..bab35bac5c81f4b 100644
--- a/flang/include/flang/Optimizer/Dialect/FIROps.h
+++ b/flang/include/flang/Optimizer/Dialect/FIROps.h
@@ -11,6 +11,7 @@
 
 #include "flang/Optimizer/Dialect/FIRAttr.h"
 #include "flang/Optimizer/Dialect/FIRType.h"
+#include "flang/Optimizer/Dialect/FirAliasAnalysisOpInterface.h"
 #include "flang/Optimizer/Dialect/FortranVariableInterface.h"
 #include "mlir/Dialect/Arith/IR/Arith.h"
 #include "mlir/Dialect/Func/IR/FuncOps.h"
diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td
index a57add9f731979d..fae9b92662f3559 100644
--- a/flang/include/flang/Optimizer/Dialect/FIROps.td
+++ b/flang/include/flang/Optimizer/Dialect/FIROps.td
@@ -16,10 +16,12 @@
 
 include "mlir/Dialect/Arith/IR/ArithBase.td"
 include "mlir/Dialect/Arith/IR/ArithOpsInterfaces.td"
+include "mlir/Dialect/LLVMIR/LLVMAttrDefs.td"
 include "flang/Optimizer/Dialect/FIRDialect.td"
 include "flang/Optimizer/Dialect/FIRTypes.td"
 include "flang/Optimizer/Dialect/FIRAttr.td"
 include "flang/Optimizer/Dialect/FortranVariableInterface.td"
+include "flang/Optimizer/Dialect/FirAliasAnalysisOpInterface.td"
 include "mlir/IR/BuiltinAttributes.td"
 
 // Base class for FIR operations.
@@ -258,7 +260,7 @@ def fir_FreeMemOp : fir_Op<"freemem", [MemoryEffects<[MemFree]>]> {
   let assemblyFormat = "$heapref attr-dict `:` qualified(type($heapref))";
 }
 
-def fir_LoadOp : fir_OneResultOp<"load", []> {
+def fir_LoadOp : fir_OneResultOp<"load", [FirAliasAnalysisOpInterface]> {
   let summary = "load a value from a memory reference";
   let description = [{
     Load a value from a memory reference into an ssa-value (virtual register).
@@ -274,9 +276,11 @@ def fir_LoadOp : fir_OneResultOp<"load", []> {
     or null.
   }];
 
-  let arguments = (ins Arg<AnyReferenceLike, "", [MemRead]>:$memref);
+  let arguments = (ins Arg<AnyReferenceLike, "", [MemRead]>:$memref,
+                  OptionalAttr<LLVM_TBAATagArrayAttr>:$tbaa);
 
-  let builders = [OpBuilder<(ins "mlir::Value":$refVal)>];
+  let builders = [OpBuilder<(ins "mlir::Value":$refVal)>,
+                  OpBuilder<(ins "mlir::Type":$resTy, "mlir::Value":$refVal)>];
 
   let hasCustomAssemblyFormat = 1;
 
@@ -285,7 +289,7 @@ def fir_LoadOp : fir_OneResultOp<"load", []> {
   }];
 }
 
-def fir_StoreOp : fir_Op<"store", []> {
+def fir_StoreOp : fir_Op<"store", [FirAliasAnalysisOpInterface]> {
   let summary = "store an SSA-value to a memory location";
 
   let description = [{
@@ -305,7 +309,10 @@ def fir_StoreOp : fir_Op<"store", []> {
   }];
 
   let arguments = (ins AnyType:$value,
-                   Arg<AnyReferenceLike, "", [MemWrite]>:$memref);
+                   Arg<AnyReferenceLike, "", [MemWrite]>:$memref,
+                   OptionalAttr<LLVM_TBAATagArrayAttr>:$tbaa);
+
+  let builders = [OpBuilder<(ins "mlir::Value":$value, "mlir::Value":$memref)>];
 
   let hasCustomAssemblyFormat = 1;
   let hasVerifier = 1;
diff --git a/flang/include/flang/Optimizer/Dialect/FirAliasAnalysisOpInterface.h b/flang/include/flang/Optimizer/Dialect/FirAliasAnalysisOpInterface.h
new file mode 100644
index 000000000000000..c07bf648eab454a
--- /dev/null
+++ b/flang/include/flang/Optimizer/Dialect/FirAliasAnalysisOpInterface.h
@@ -0,0 +1,27 @@
+//===- FirAliasAnalysisInterface.h ------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains an interface for adding alias analysis information to
+// loads and stores
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FORTRAN_OPTIMIZER_DIALECT_FIR_ALIAS_ANALYSIS_INTERFACE_H
+#define FORTRAN_OPTIMIZER_DIALECT_FIR_ALIAS_ANALYSIS_INTERFACE_H
+
+#include "mlir/IR/OpDefinition.h"
+#include "mlir/IR/Operation.h"
+#include "mlir/Support/LogicalResult.h"
+
+namespace fir::detail {
+mlir::LogicalResult verifyFirAliasAnalysisOpInterface(mlir::Operation *op);
+} // namespace fir::detail
+
+#include "flang/Optimizer/Dialect/FirAliasAnalaysOpInterface.h.inc"
+
+#endif // FORTRAN_OPTIMIZER_DIALECT_FIR_ALIAS_ANALYSIS_INTERFACE_H
diff --git a/flang/include/flang/Optimizer/Dialect/FirAliasAnalysisOpInterface.td b/flang/include/flang/Optimizer/Dialect/FirAliasAnalysisOpInterface.td
new file mode 100644
index 000000000000000..1d3e49d383f63a4
--- /dev/null
+++ b/flang/include/flang/Optimizer/Dialect/FirAliasAnalysisOpInterface.td
@@ -0,0 +1,59 @@
+//===-- FirAliasAnalysisOpInterface.td ---------------------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+include "mlir/IR/Interfaces.td"
+
+def FirAliasAnalysisOpInterface : OpInterface<"FirAliasAnalysisOpInterface"> {
+  let description = [{
+    An interface for memory operations that can carry alias analysis metadata.
+    It provides setters and getters for the operation's alias analysis
+    attributes. The default implementations of the interface methods expect
+    the operation to have an attribute of type ArrayAttr named tbaa.
+    Unlike the mlir::LLVM::AliasAnalysisOpInterface, this only supports tbaa.
+  }];
+
+  let cppNamespace = "::fir";
+  let verify = [{ return detail::verifyFirAliasAnalysisOpInterface($_op); }];
+
+  let methods = [
+    InterfaceMethod<
+      /*desc=*/        "Returns the tbaa attribute or nullptr",
+      /*returnType=*/  "mlir::ArrayAttr",
+      /*methodName=*/  "getTBAATagsOrNull",
+      /*args=*/        (ins),
+      /*methodBody=*/  [{}],
+      /*defaultImpl=*/ [{
+        auto op = mlir::cast<ConcreteOp>(this->getOperation());
+        return op.getTbaaAttr();
+      }]
+      >,
+    InterfaceMethod<
+      /*desc=*/        "Sets the tbaa attribute",
+      /*returnType=*/  "void",
+      /*methodName=*/  "setTBAATags",
+      /*args=*/        (ins "const mlir::ArrayAttr":$attr),
+      /*methodBody=*/  [{}],
+      /*defaultImpl=*/ [{
+        auto op = mlir::cast<ConcreteOp>(this->getOperation());
+        op.setTbaaAttr(attr);
+      }]
+      >,
+    InterfaceMethod<
+      /*desc=*/        "Returns a list of all pointer operands accessed by the "
+                       "operation",
+      /*returnType=*/  "::llvm::SmallVector<::mlir::Value>",
+      /*methodName=*/  "getAccessedOperands",
+      /*args=*/        (ins),
+      /*methodBody=*/  [{}],
+      /*defaultImpl=*/ [{
+        auto op = mlir::cast<ConcreteOp>(this->getOperation());
+        return {op.getMemref()};
+      }]
+      >
+  ];
+}
diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
index d1b7f3de93b4647..f2ce123124895e0 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
@@ -3085,7 +3085,10 @@ struct LoadOpConversion : public FIROpConversion<fir::LoadOp> {
       auto boxValue = rewriter.create<mlir::LLVM::LoadOp>(
           loc, boxPtrTy.cast<mlir::LLVM::LLVMPointerType>().getElementType(),
           inputBoxStorage);
-      attachTBAATag(boxValue, boxTy, boxTy, nullptr);
+      if (std::optional<mlir::ArrayAttr> optionalTag = load.getTbaa())
+        boxValue.setTBAATags(*optionalTag);
+      else
+        attachTBAATag(boxValue, boxTy, boxTy, nullptr);
       auto newBoxStorage =
           genAllocaWithType(loc, boxPtrTy, defaultAlign, rewriter);
       auto storeOp =
@@ -3096,7 +3099,10 @@ struct LoadOpConversion : public FIROpConversion<fir::LoadOp> {
       mlir::Type loadTy = convertType(load.getType());
       auto loadOp = rewriter.create<mlir::LLVM::LoadOp>(
           load.getLoc(), loadTy, adaptor.getOperands(), load->getAttrs());
-      attachTBAATag(loadOp, load.getType(), load.getType(), nullptr);
+      if (std::optional<mlir::ArrayAttr> optionalTag = load.getTbaa())
+        loadOp.setTBAATags(*optionalTag);
+      else
+        attachTBAATag(loadOp, load.getType(), load.getType(), nullptr);
       rewriter.replaceOp(load, loadOp.getResult());
     }
     return mlir::success();
@@ -3340,7 +3346,10 @@ struct StoreOpConversion : public FIROpConversion<fir::StoreOp> {
       newStoreOp = rewriter.create<mlir::LLVM::StoreOp>(
           loc, adaptor.getOperands()[0], adaptor.getOperands()[1]);
     }
-    attachTBAATag(newStoreOp, storeTy, storeTy, nullptr);
+    if (std::optional<mlir::ArrayAttr> optionalTag = store.getTbaa())
+      newStoreOp.setTBAATags(*optionalTag);
+    else
+      attachTBAATag(newStoreOp, storeTy, storeTy, nullptr);
     rewriter.eraseOp(store);
     return mlir::success();
   }
diff --git a/flang/lib/Optimizer/Dialect/CMakeLists.txt b/flang/lib/Optimizer/Dialect/CMakeLists.txt
index fe5edb54a78e9e5..6beabcdb4e25d76 100644
--- a/flang/lib/Optimizer/Dialect/CMakeLists.txt
+++ b/flang/lib/Optimizer/Dialect/CMakeLists.txt
@@ -6,6 +6,7 @@ add_flang_library(FIRDialect
   FIROps.cpp
   FIRType.cpp
   FortranVariableInterface.cpp
+  FirAliasAnalysisOpInterface.cpp
   Inliner.cpp
 
   DEPENDS
diff --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp
index 962b87acd5a8050..2f08cd1b8111531 100644
--- a/flang/lib/Optimizer/Dialect/FIROps.cpp
+++ b/flang/lib/Optimizer/Dialect/FIROps.cpp
@@ -2016,8 +2016,18 @@ void fir::LoadOp::build(mlir::OpBuilder &builder, mlir::OperationState &result,
     mlir::emitError(result.location, "not a memory reference type");
     return;
   }
+  build(builder, result, eleTy, refVal);
+}
+
+void fir::LoadOp::build(mlir::OpBuilder &builder, mlir::OperationState &result,
+                        mlir::Type resTy, mlir::Value refVal) {
+
+  if (!refVal) {
+    mlir::emitError(result.location, "LoadOp has null argument");
+    return;
+  }
   result.addOperands(refVal);
-  result.addTypes(eleTy);
+  result.addTypes(resTy);
 }
 
 mlir::ParseResult fir::LoadOp::getElementOf(mlir::Type &ele, mlir::Type ref) {
@@ -3288,6 +3298,11 @@ mlir::LogicalResult fir::StoreOp::verify() {
   return mlir::success();
 }
 
+void fir::StoreOp::build(mlir::OpBuilder &builder, mlir::OperationState &result,
+                         mlir::Value value, mlir::Value memref) {
+  build(builder, result, value, memref, {});
+}
+
 //===----------------------------------------------------------------------===//
 // StringLitOp
 //===----------------------------------------------------------------------===//
diff --git a/flang/lib/Optimizer/Dialect/FirAliasAnalysisOpInterface.cpp b/flang/lib/Optimizer/Dialect/FirAliasAnalysisOpInterface.cpp
new file mode 100644
index 000000000000000..63686b1d0e7ebb6
--- /dev/null
+++ b/flang/lib/Optimizer/Dialect/FirAliasAnalysisOpInterface.cpp
@@ -0,0 +1,31 @@
+//===-- FirAliasAnalysisOpInterface.cpp ----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/
+//
+//===----------------------------------------------------------------------===//
+
+#include "flang/Optimizer/Dialect/FirAliasAnalysisOpInterface.h"
+#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+
+#include "flang/Optimizer/Dialect/FirAliasAnalysisOpInterface.cpp.inc"
+
+mlir::LogicalResult
+fir::detail::verifyFirAliasAnalysisOpInterface(mlir::Operation *op) {
+  auto iface = mlir::cast<FirAliasAnalysisOpInterface>(op);
+
+  mlir::ArrayAttr tags = iface.getTBAATagsOrNull();
+  if (!tags)
+    return mlir::success();
+
+  for (mlir::Attribute iter : tags)
+    if (!mlir::isa<mlir::LLVM::TBAATagAttr>(iter))
+      return op->emitOpError("expected op to return array of ")
+             << mlir::LLVM::TBAATagAttr::getMnemonic() << " attributes";
+  return mlir::success();
+}
diff --git a/flang/test/Fir/tbaa-codegen.fir b/flang/test/Fir/tbaa-codegen.fir
new file mode 100644
index 000000000000000..386fe42eaaba9a2
--- /dev/null
+++ b/flang/test/Fir/tbaa-codegen.fir
@@ -0,0 +1,47 @@
+// test that tbaa attributes can be added to fir.load and fir.store
+// and that these attributes are propagated to LLVMIR
+
+// RUN: tco %s | FileCheck %s
+
+// subroutine simple(a)
+//   integer, intent(inout) :: a(:)
+//   a(1) = a(2)
+// end subroutine
+#tbaa_root = #llvm.tbaa_root<id = "Flang function root _QPsimple">
+#tbaa_type_desc = #llvm.tbaa_type_desc<id = "any access", members = {<#tbaa_root, 0>}>
+#tbaa_type_desc1 = #llvm.tbaa_type_desc<id = "any data access", members = {<#tbaa_type_desc, 0>}>
+#tbaa_type_desc2 = #llvm.tbaa_type_desc<id = "dummy arg data", members = {<#tbaa_type_desc1, 0>}>
+#tbaa_type_desc3 = #llvm.tbaa_type_desc<id = "dummy arg data/a", members = {<#tbaa_type_desc2, 0>}>
+#tbaa_tag = #llvm.tbaa_tag<base_type = #tbaa_type_desc3, access_type = #tbaa_type_desc3, offset = 0>
+module attributes {fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.target_triple = "aarch64-unknown-linux-gnu"} {
+  func.func @_QPsimple(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "a"}) {
+    %c1 = arith.constant 1 : index
+    %c2 = arith.constant 2 : index
+    %0 = fir.declare %arg0 {fortran_attrs = #fir.var_attrs<intent_inout>, uniq_name = "_QFfuncEa"} : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+    %1 = fir.rebox %0 : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+    %2 = fir.array_coor %1 %c2 : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+    %3 = fir.load %2 {tbaa = [#tbaa_tag]} : !fir.ref<i32>
+    %4 = fir.array_coor %1 %c1 : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+    fir.store %3 to %4 {tbaa = [#tbaa_tag]} : !fir.ref<i32>
+    return
+  }
+}
+
+// CHECK-LABEL: define void @_QPsimple(
+// CHECK-SAME:      ptr %[[ARG0:.*]]) {
+// [...]
+// load  a(2):
+// CHECK:  %[[VAL20:.*]] = getelementptr i8, ptr %{{.*}}, i64 %{{.*}}
+// CHECK:  %[[A2:.*]] = load i32, ptr %[[VAL20]], align 4, !tbaa ![[A_ACCESS_TAG:.*]]
+// [...]
+// store a(2) to a(1):
+// CHECK:  %[[A1:.*]] = getelementptr i8, ptr %{{.*}}, i64 %{{.*}}
+// CHECK:  store i32 %[[A2]], ptr %[[A1]], align 4, !tbaa ![[A_ACCESS_TAG]]
+// CHECK:  ret void
+// CHECK: }
+// CHECK: ![[A_ACCESS_TAG]] = !{![[A_ACCESS_TYPE:.*]], ![[A_ACCESS_TYPE]], i64 0}
+// CHECK: ![[A_ACCESS_TYPE]] = !{!"dummy arg data/a", ![[DUMMY_ARG_TYPE:.*]], i64 0}
+// CHECK: ![[DUMMY_ARG_TYPE]] = !{!"dummy arg data", ![[DATA_ACCESS_TYPE:.*]], i64 0}
+// CHECK: ![[DATA_ACCESS_TYPE]] = !{!"any data access", ![[ANY_ACCESS_TYPE:.*]], i64 0}
+// CHECK: ![[ANY_ACCESS_TYPE]] = !{!"any access", ![[ROOT:.*]], i64 0}
+// CHECK: ![[ROOT]] = !{!"Flang function root _QPsimple"}
\ No newline at end of file

>From 15648e0f9af0471d327f07f6a089441e7c50c1b9 Mon Sep 17 00:00:00 2001
From: Tom Eccles <tom.eccles at arm.com>
Date: Thu, 5 Oct 2023 17:10:10 +0000
Subject: [PATCH 02/16] [flang][FIR] Add dependency from FIRDialect to
 LLVMDialect

For TBBAA attribute types.
---
 flang/include/flang/Optimizer/Dialect/FIRDialect.td | 4 +++-
 flang/lib/Optimizer/Dialect/FIRDialect.cpp          | 2 ++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/flang/include/flang/Optimizer/Dialect/FIRDialect.td b/flang/include/flang/Optimizer/Dialect/FIRDialect.td
index d0735bbeb2d3d88..b366b6d40e4e213 100644
--- a/flang/include/flang/Optimizer/Dialect/FIRDialect.td
+++ b/flang/include/flang/Optimizer/Dialect/FIRDialect.td
@@ -30,7 +30,9 @@ def fir_Dialect : Dialect {
   let dependentDialects = [
     // Arith dialect provides FastMathFlagsAttr
     // supported by some FIR operations.
-    "arith::ArithDialect"
+    "arith::ArithDialect",
+    // TBAA Tag types
+    "LLVM::LLVMDialect"
   ];
 }
 
diff --git a/flang/lib/Optimizer/Dialect/FIRDialect.cpp b/flang/lib/Optimizer/Dialect/FIRDialect.cpp
index c2377f112be8473..997a6c90ada314a 100644
--- a/flang/lib/Optimizer/Dialect/FIRDialect.cpp
+++ b/flang/lib/Optimizer/Dialect/FIRDialect.cpp
@@ -14,6 +14,7 @@
 #include "flang/Optimizer/Dialect/FIRAttr.h"
 #include "flang/Optimizer/Dialect/FIROps.h"
 #include "flang/Optimizer/Dialect/FIRType.h"
+#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
 #include "mlir/Transforms/InliningUtils.h"
 
 using namespace fir;
@@ -58,6 +59,7 @@ struct FIRInlinerInterface : public mlir::DialectInlinerInterface {
 
 fir::FIROpsDialect::FIROpsDialect(mlir::MLIRContext *ctx)
     : mlir::Dialect("fir", ctx, mlir::TypeID::get<FIROpsDialect>()) {
+  getContext()->loadDialect<mlir::LLVM::LLVMDialect>();
   registerTypes();
   registerAttributes();
   addOperations<

>From 1f7ea396f3329469bc36d85c3fdefcc045dc91e9 Mon Sep 17 00:00:00 2001
From: Tom Eccles <tom.eccles at arm.com>
Date: Mon, 9 Oct 2023 09:31:58 +0000
Subject: [PATCH 03/16] [flang] Rename FirAliasAnalysisOpInterface to
 FirAliasTagOpInterface

---
 flang/include/flang/Optimizer/Dialect/CMakeLists.txt |  6 +++---
 flang/include/flang/Optimizer/Dialect/FIROps.h       |  2 +-
 flang/include/flang/Optimizer/Dialect/FIROps.td      |  6 +++---
 ...nalysisOpInterface.h => FirAliasTagOpInterface.h} | 12 ++++++------
 ...lysisOpInterface.td => FirAliasTagOpInterface.td} |  6 +++---
 flang/lib/Optimizer/Dialect/CMakeLists.txt           |  2 +-
 ...sisOpInterface.cpp => FirAliasTagOpInterface.cpp} | 10 +++++-----
 7 files changed, 22 insertions(+), 22 deletions(-)
 rename flang/include/flang/Optimizer/Dialect/{FirAliasAnalysisOpInterface.h => FirAliasTagOpInterface.h} (61%)
 rename flang/include/flang/Optimizer/Dialect/{FirAliasAnalysisOpInterface.td => FirAliasTagOpInterface.td} (89%)
 rename flang/lib/Optimizer/Dialect/{FirAliasAnalysisOpInterface.cpp => FirAliasTagOpInterface.cpp} (71%)

diff --git a/flang/include/flang/Optimizer/Dialect/CMakeLists.txt b/flang/include/flang/Optimizer/Dialect/CMakeLists.txt
index 15c835aad9bc7d2..fe9864a26295d22 100644
--- a/flang/include/flang/Optimizer/Dialect/CMakeLists.txt
+++ b/flang/include/flang/Optimizer/Dialect/CMakeLists.txt
@@ -18,9 +18,9 @@ set(LLVM_TARGET_DEFINITIONS FortranVariableInterface.td)
 mlir_tablegen(FortranVariableInterface.h.inc -gen-op-interface-decls)
 mlir_tablegen(FortranVariableInterface.cpp.inc -gen-op-interface-defs)
 
-set(LLVM_TARGET_DEFINITIONS FirAliasAnalysisOpInterface.td)
-mlir_tablegen(FirAliasAnalaysOpInterface.h.inc -gen-op-interface-decls)
-mlir_tablegen(FirAliasAnalysisOpInterface.cpp.inc -gen-op-interface-defs)
+set(LLVM_TARGET_DEFINITIONS FirAliasTagOpInterface.td)
+mlir_tablegen(FirAliasTagOpInterface.h.inc -gen-op-interface-decls)
+mlir_tablegen(FirAliasTagOpInterface.cpp.inc -gen-op-interface-defs)
 
 set(LLVM_TARGET_DEFINITIONS CanonicalizationPatterns.td)
 mlir_tablegen(CanonicalizationPatterns.inc -gen-rewriters)
diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.h b/flang/include/flang/Optimizer/Dialect/FIROps.h
index bab35bac5c81f4b..87196dbf9b97d2e 100644
--- a/flang/include/flang/Optimizer/Dialect/FIROps.h
+++ b/flang/include/flang/Optimizer/Dialect/FIROps.h
@@ -11,7 +11,7 @@
 
 #include "flang/Optimizer/Dialect/FIRAttr.h"
 #include "flang/Optimizer/Dialect/FIRType.h"
-#include "flang/Optimizer/Dialect/FirAliasAnalysisOpInterface.h"
+#include "flang/Optimizer/Dialect/FirAliasTagOpInterface.h"
 #include "flang/Optimizer/Dialect/FortranVariableInterface.h"
 #include "mlir/Dialect/Arith/IR/Arith.h"
 #include "mlir/Dialect/Func/IR/FuncOps.h"
diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td
index fae9b92662f3559..13838f999298808 100644
--- a/flang/include/flang/Optimizer/Dialect/FIROps.td
+++ b/flang/include/flang/Optimizer/Dialect/FIROps.td
@@ -21,7 +21,7 @@ include "flang/Optimizer/Dialect/FIRDialect.td"
 include "flang/Optimizer/Dialect/FIRTypes.td"
 include "flang/Optimizer/Dialect/FIRAttr.td"
 include "flang/Optimizer/Dialect/FortranVariableInterface.td"
-include "flang/Optimizer/Dialect/FirAliasAnalysisOpInterface.td"
+include "flang/Optimizer/Dialect/FirAliasTagOpInterface.td"
 include "mlir/IR/BuiltinAttributes.td"
 
 // Base class for FIR operations.
@@ -260,7 +260,7 @@ def fir_FreeMemOp : fir_Op<"freemem", [MemoryEffects<[MemFree]>]> {
   let assemblyFormat = "$heapref attr-dict `:` qualified(type($heapref))";
 }
 
-def fir_LoadOp : fir_OneResultOp<"load", [FirAliasAnalysisOpInterface]> {
+def fir_LoadOp : fir_OneResultOp<"load", [FirAliasTagOpInterface]> {
   let summary = "load a value from a memory reference";
   let description = [{
     Load a value from a memory reference into an ssa-value (virtual register).
@@ -289,7 +289,7 @@ def fir_LoadOp : fir_OneResultOp<"load", [FirAliasAnalysisOpInterface]> {
   }];
 }
 
-def fir_StoreOp : fir_Op<"store", [FirAliasAnalysisOpInterface]> {
+def fir_StoreOp : fir_Op<"store", [FirAliasTagOpInterface]> {
   let summary = "store an SSA-value to a memory location";
 
   let description = [{
diff --git a/flang/include/flang/Optimizer/Dialect/FirAliasAnalysisOpInterface.h b/flang/include/flang/Optimizer/Dialect/FirAliasTagOpInterface.h
similarity index 61%
rename from flang/include/flang/Optimizer/Dialect/FirAliasAnalysisOpInterface.h
rename to flang/include/flang/Optimizer/Dialect/FirAliasTagOpInterface.h
index c07bf648eab454a..f2d5b39acf3a723 100644
--- a/flang/include/flang/Optimizer/Dialect/FirAliasAnalysisOpInterface.h
+++ b/flang/include/flang/Optimizer/Dialect/FirAliasTagOpInterface.h
@@ -1,4 +1,4 @@
-//===- FirAliasAnalysisInterface.h ------------------------------*- C++ -*-===//
+//===- FirAliasTagOpInterface.h ---------------------------------*- C++ -*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -11,17 +11,17 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef FORTRAN_OPTIMIZER_DIALECT_FIR_ALIAS_ANALYSIS_INTERFACE_H
-#define FORTRAN_OPTIMIZER_DIALECT_FIR_ALIAS_ANALYSIS_INTERFACE_H
+#ifndef FORTRAN_OPTIMIZER_DIALECT_FIR_ALIAS_TAG_OP_INTERFACE_H
+#define FORTRAN_OPTIMIZER_DIALECT_FIR_ALIAS_TAG_OP_INTERFACE_H
 
 #include "mlir/IR/OpDefinition.h"
 #include "mlir/IR/Operation.h"
 #include "mlir/Support/LogicalResult.h"
 
 namespace fir::detail {
-mlir::LogicalResult verifyFirAliasAnalysisOpInterface(mlir::Operation *op);
+mlir::LogicalResult verifyFirAliasTagOpInterface(mlir::Operation *op);
 } // namespace fir::detail
 
-#include "flang/Optimizer/Dialect/FirAliasAnalaysOpInterface.h.inc"
+#include "flang/Optimizer/Dialect/FirAliasTagOpInterface.h.inc"
 
-#endif // FORTRAN_OPTIMIZER_DIALECT_FIR_ALIAS_ANALYSIS_INTERFACE_H
+#endif // FORTRAN_OPTIMIZER_DIALECT_FIR_ALIAS_TAG_OP_INTERFACE_H
diff --git a/flang/include/flang/Optimizer/Dialect/FirAliasAnalysisOpInterface.td b/flang/include/flang/Optimizer/Dialect/FirAliasTagOpInterface.td
similarity index 89%
rename from flang/include/flang/Optimizer/Dialect/FirAliasAnalysisOpInterface.td
rename to flang/include/flang/Optimizer/Dialect/FirAliasTagOpInterface.td
index 1d3e49d383f63a4..9ce720f3b1a5722 100644
--- a/flang/include/flang/Optimizer/Dialect/FirAliasAnalysisOpInterface.td
+++ b/flang/include/flang/Optimizer/Dialect/FirAliasTagOpInterface.td
@@ -1,4 +1,4 @@
-//===-- FirAliasAnalysisOpInterface.td ---------------------*- tablegen -*-===//
+//===-- FirAliasTagOpInterface.td --------------------------*- tablegen -*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -8,7 +8,7 @@
 
 include "mlir/IR/Interfaces.td"
 
-def FirAliasAnalysisOpInterface : OpInterface<"FirAliasAnalysisOpInterface"> {
+def FirAliasTagOpInterface : OpInterface<"FirAliasTagOpInterface"> {
   let description = [{
     An interface for memory operations that can carry alias analysis metadata.
     It provides setters and getters for the operation's alias analysis
@@ -18,7 +18,7 @@ def FirAliasAnalysisOpInterface : OpInterface<"FirAliasAnalysisOpInterface"> {
   }];
 
   let cppNamespace = "::fir";
-  let verify = [{ return detail::verifyFirAliasAnalysisOpInterface($_op); }];
+  let verify = [{ return detail::verifyFirAliasTagOpInterface($_op); }];
 
   let methods = [
     InterfaceMethod<
diff --git a/flang/lib/Optimizer/Dialect/CMakeLists.txt b/flang/lib/Optimizer/Dialect/CMakeLists.txt
index 6beabcdb4e25d76..745439b7e1e5e87 100644
--- a/flang/lib/Optimizer/Dialect/CMakeLists.txt
+++ b/flang/lib/Optimizer/Dialect/CMakeLists.txt
@@ -6,7 +6,7 @@ add_flang_library(FIRDialect
   FIROps.cpp
   FIRType.cpp
   FortranVariableInterface.cpp
-  FirAliasAnalysisOpInterface.cpp
+  FirAliasTagOpInterface.cpp
   Inliner.cpp
 
   DEPENDS
diff --git a/flang/lib/Optimizer/Dialect/FirAliasAnalysisOpInterface.cpp b/flang/lib/Optimizer/Dialect/FirAliasTagOpInterface.cpp
similarity index 71%
rename from flang/lib/Optimizer/Dialect/FirAliasAnalysisOpInterface.cpp
rename to flang/lib/Optimizer/Dialect/FirAliasTagOpInterface.cpp
index 63686b1d0e7ebb6..648f490f63bf364 100644
--- a/flang/lib/Optimizer/Dialect/FirAliasAnalysisOpInterface.cpp
+++ b/flang/lib/Optimizer/Dialect/FirAliasTagOpInterface.cpp
@@ -1,4 +1,4 @@
-//===-- FirAliasAnalysisOpInterface.cpp ----------------------------------===//
+//===-- FirAliasTagOpInterface.cpp ----------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -10,14 +10,14 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "flang/Optimizer/Dialect/FirAliasAnalysisOpInterface.h"
+#include "flang/Optimizer/Dialect/FirAliasTagOpInterface.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
 
-#include "flang/Optimizer/Dialect/FirAliasAnalysisOpInterface.cpp.inc"
+#include "flang/Optimizer/Dialect/FirAliasTagOpInterface.cpp.inc"
 
 mlir::LogicalResult
-fir::detail::verifyFirAliasAnalysisOpInterface(mlir::Operation *op) {
-  auto iface = mlir::cast<FirAliasAnalysisOpInterface>(op);
+fir::detail::verifyFirAliasTagOpInterface(mlir::Operation *op) {
+  auto iface = mlir::cast<FirAliasTagOpInterface>(op);
 
   mlir::ArrayAttr tags = iface.getTBAATagsOrNull();
   if (!tags)

>From 30e7d2548985552e931c3661abe78524e7e830a6 Mon Sep 17 00:00:00 2001
From: Tom Eccles <tom.eccles at arm.com>
Date: Wed, 13 Sep 2023 11:25:01 +0000
Subject: [PATCH 04/16] [flang][FIR] add FIR TBAA pass

See RFC at
https://discourse.llvm.org/t/rfc-propagate-fir-alias-analysis-information-using-tbaa/73755

This pass adds TBAA tags to all accesses to non-pointer/target dummy
arguments. These TBAA tags tell LLVM that these accesses cannot alias:
allowing better dead code elimination, hoisting out of loops, and
vectorization.

Each function has its own TBAA tree so that accesses between funtions
MayAlias after inlining.

I also included code for adding tags for local allocations and for
global variables. Enabling all three kinds of tag is known to produce a
miscompile and so these are disabled by default. But it isn't much
code and I thought it could be interesting to play with these later if
one is looking at a benchmark which looks like it would benefit from
more alias information. I'm open to removing this code too.

TBAA tags are also added separately by TBAABuilder during CodeGen.
TBAABuilder has to run during CodeGen because it adds tags to box
accesses, many of which are implicit in FIR. This pass cannot (easily)
run in CodeGen because fir::AliasAnalysis has difficulty tracing values
between blocks, and by the time CodeGen runs, structured control flow
has already been lowered.

Coming in follow up patches
  - Change CodeGen/TBAABuilder to use TBAAForest to add tags within the
    same per-function trees as are used here (delayed to a later patch
    to make it easier to revert)
  - Command line argument processing to actually enable the pass
---
 .../flang/Optimizer/Analysis/TBAAForest.h     | 104 +++++
 .../flang/Optimizer/Transforms/Passes.h       |   1 +
 .../flang/Optimizer/Transforms/Passes.td      |  20 +
 flang/lib/Optimizer/Analysis/CMakeLists.txt   |   1 +
 flang/lib/Optimizer/Analysis/TBAAForest.cpp   |  60 +++
 .../lib/Optimizer/Transforms/AddAliasTags.cpp | 198 +++++++++
 flang/lib/Optimizer/Transforms/CMakeLists.txt |   1 +
 flang/test/Transforms/tbaa.fir                | 175 ++++++++
 flang/test/Transforms/tbaa2.fir               | 386 ++++++++++++++++++
 9 files changed, 946 insertions(+)
 create mode 100644 flang/include/flang/Optimizer/Analysis/TBAAForest.h
 create mode 100644 flang/lib/Optimizer/Analysis/TBAAForest.cpp
 create mode 100644 flang/lib/Optimizer/Transforms/AddAliasTags.cpp
 create mode 100644 flang/test/Transforms/tbaa.fir
 create mode 100644 flang/test/Transforms/tbaa2.fir

diff --git a/flang/include/flang/Optimizer/Analysis/TBAAForest.h b/flang/include/flang/Optimizer/Analysis/TBAAForest.h
new file mode 100644
index 000000000000000..3d11e6112bacce9
--- /dev/null
+++ b/flang/include/flang/Optimizer/Analysis/TBAAForest.h
@@ -0,0 +1,104 @@
+//===-- TBAAForest.h - A TBAA tree for each function -----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FORTRAN_OPTIMIZER_ANALYSIS_TBAA_FOREST_H
+#define FORTRAN_OPTIMIZER_ANALYSIS_TBAA_FOREST_H
+
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/Dialect/LLVMIR/LLVMAttrs.h"
+#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/IR/MLIRContext.h"
+#include "llvm/ADT/DenseMap.h"
+#include <string>
+
+namespace fir {
+
+//===----------------------------------------------------------------------===//
+// TBAATree
+//===----------------------------------------------------------------------===//
+/// Per-function TBAA tree. Each tree contins branches for data (of various
+/// kinds) and descriptor access
+struct TBAATree {
+  //===----------------------------------------------------------------------===//
+  // TBAAForrest::TBAATree::SubtreeState
+  //===----------------------------------------------------------------------===//
+  /// This contains a TBAA subtree based on some parent. New tags can be added
+  /// under the parent using getTag.
+  class SubtreeState {
+    friend TBAATree; // only allow construction by TBAATree
+  public:
+    SubtreeState() = delete;
+    SubtreeState(const SubtreeState &) = delete;
+    SubtreeState(SubtreeState &&) = default;
+
+    mlir::LLVM::TBAATagAttr getTag(llvm::StringRef uniqueId) const;
+
+  private:
+    SubtreeState(mlir::MLIRContext *ctx, std::string name,
+                 mlir::LLVM::TBAANodeAttr grandParent)
+        : parentId{std::move(name)}, context(ctx) {
+      parent = mlir::LLVM::TBAATypeDescriptorAttr::get(
+          context, parentId, mlir::LLVM::TBAAMemberAttr::get(grandParent, 0));
+    }
+
+    const std::string parentId;
+    mlir::MLIRContext *const context;
+    mlir::LLVM::TBAATypeDescriptorAttr parent;
+    llvm::DenseMap<llvm::StringRef, mlir::LLVM::TBAATagAttr> tagDedup;
+  };
+
+  SubtreeState globalDataTree;
+  SubtreeState allocatedDataTree;
+  SubtreeState dummyArgDataTree;
+  mlir::LLVM::TBAATypeDescriptorAttr anyAccessDesc;
+  mlir::LLVM::TBAATypeDescriptorAttr boxMemberTypeDesc;
+  mlir::LLVM::TBAATypeDescriptorAttr anyDataTypeDesc;
+
+  static TBAATree buildTree(mlir::StringAttr functionName);
+
+private:
+  TBAATree(mlir::LLVM::TBAATypeDescriptorAttr anyAccess,
+           mlir::LLVM::TBAATypeDescriptorAttr dataRoot,
+           mlir::LLVM::TBAATypeDescriptorAttr boxMemberTypeDesc);
+};
+
+//===----------------------------------------------------------------------===//
+// TBAAForrest
+//===----------------------------------------------------------------------===//
+/// Colletion of TBAATrees, usually indexed by function (so that each function
+/// has a different TBAATree)
+class TBAAForrest {
+public:
+  explicit TBAAForrest(bool separatePerFunction = true)
+      : separatePerFunction{separatePerFunction} {}
+
+  inline const TBAATree &operator[](mlir::func::FuncOp func) {
+    return getFuncTree(func.getSymNameAttr());
+  }
+  inline const TBAATree &operator[](mlir::LLVM::LLVMFuncOp func) {
+    return getFuncTree(func.getSymNameAttr());
+  }
+
+private:
+  const TBAATree &getFuncTree(mlir::StringAttr symName) {
+    if (!separatePerFunction)
+      symName = mlir::StringAttr::get(symName.getContext(), "");
+    if (!trees.contains(symName))
+      trees.insert({symName, TBAATree::buildTree(symName)});
+    return trees.at(symName);
+  }
+
+  // Should each function use a different tree?
+  const bool separatePerFunction;
+  // TBAA tree per function
+  llvm::DenseMap<mlir::StringAttr, TBAATree> trees;
+};
+
+} // namespace fir
+
+#endif // FORTRAN_OPTIMIZER_ANALYSIS_TBAA_FOREST_H
diff --git a/flang/include/flang/Optimizer/Transforms/Passes.h b/flang/include/flang/Optimizer/Transforms/Passes.h
index 64882c8ec406b0a..30d97be3800c191 100644
--- a/flang/include/flang/Optimizer/Transforms/Passes.h
+++ b/flang/include/flang/Optimizer/Transforms/Passes.h
@@ -61,6 +61,7 @@ std::unique_ptr<mlir::Pass> createMemDataFlowOptPass();
 std::unique_ptr<mlir::Pass> createPromoteToAffinePass();
 std::unique_ptr<mlir::Pass> createMemoryAllocationPass();
 std::unique_ptr<mlir::Pass> createStackArraysPass();
+std::unique_ptr<mlir::Pass> createAliasTagsPass();
 std::unique_ptr<mlir::Pass> createSimplifyIntrinsicsPass();
 std::unique_ptr<mlir::Pass> createAddDebugFoundationPass();
 std::unique_ptr<mlir::Pass> createLoopVersioningPass();
diff --git a/flang/include/flang/Optimizer/Transforms/Passes.td b/flang/include/flang/Optimizer/Transforms/Passes.td
index 80da485392007fa..7aa08b0616de99d 100644
--- a/flang/include/flang/Optimizer/Transforms/Passes.td
+++ b/flang/include/flang/Optimizer/Transforms/Passes.td
@@ -252,6 +252,26 @@ def StackArrays : Pass<"stack-arrays", "mlir::ModuleOp"> {
   let constructor = "::fir::createStackArraysPass()";
 }
 
+def AddAliasTags : Pass<"fir-alias-analysis", "mlir::ModuleOp"> {
+  let summary = "Add tbaa tags to operations that implement FirAliasAnalysisOpInterface";
+  let description = [{
+    TBAA (type based alias analysis) is one method to pass pointer alias information
+    from language frontends to LLVM. This pass uses fir::AliasAnalysis to add this
+    information to fir.load and fir.store operations.
+    Additional tags are added during codegen. See fir::TBAABuilder.
+    This needs to be a separate pass so that it happens before structured control
+    flow operations are lowered to branches and basic blocks (this makes tracing
+    the source of values much eaiser). The other TBAA tags need to be applied to
+    box loads and stores which are implicit in FIR and so cannot be annotated
+    until codegen.
+    TODO: this is currently a pass on mlir::ModuleOp to avoid parallelism. In
+    theory, each operation could be considered in prallel, so long as there
+    aren't races adding new tags to the mlir context.
+  }];
+  let dependentDialects = [ "fir::FIROpsDialect" ];
+  let constructor = "::fir::createAliasTagsPass()";
+}
+
 def SimplifyRegionLite : Pass<"simplify-region-lite", "mlir::ModuleOp"> {
   let summary = "Region simplification";
   let description = [{
diff --git a/flang/lib/Optimizer/Analysis/CMakeLists.txt b/flang/lib/Optimizer/Analysis/CMakeLists.txt
index 19dadf72cf4ce14..436d4d3f18969c1 100644
--- a/flang/lib/Optimizer/Analysis/CMakeLists.txt
+++ b/flang/lib/Optimizer/Analysis/CMakeLists.txt
@@ -1,5 +1,6 @@
 add_flang_library(FIRAnalysis
   AliasAnalysis.cpp
+  TBAAForest.cpp
 
   DEPENDS
   FIRDialect
diff --git a/flang/lib/Optimizer/Analysis/TBAAForest.cpp b/flang/lib/Optimizer/Analysis/TBAAForest.cpp
new file mode 100644
index 000000000000000..070e2be6700cc11
--- /dev/null
+++ b/flang/lib/Optimizer/Analysis/TBAAForest.cpp
@@ -0,0 +1,60 @@
+//===- TBAAForest.cpp - Per-functon TBAA Trees ----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "flang/Optimizer/Analysis/TBAAForest.h"
+#include <mlir/Dialect/LLVMIR/LLVMAttrs.h>
+
+mlir::LLVM::TBAATagAttr
+fir::TBAATree::SubtreeState::getTag(llvm::StringRef uniqueName) const {
+  // mlir::LLVM::TBAATagAttr &tag = tagDedup[uniqueName];
+  // if (tag)
+  //   return tag;
+  std::string id = (parentId + "/" + uniqueName).str();
+  mlir::LLVM::TBAATypeDescriptorAttr type =
+      mlir::LLVM::TBAATypeDescriptorAttr::get(
+          context, id, mlir::LLVM::TBAAMemberAttr::get(parent, 0));
+  return mlir::LLVM::TBAATagAttr::get(type, type, 0);
+  // return tag;
+}
+
+fir::TBAATree fir::TBAATree::buildTree(mlir::StringAttr func) {
+  llvm::StringRef funcName = func.getValue();
+  std::string rootId = ("Flang function root " + funcName).str();
+  mlir::MLIRContext *ctx = func.getContext();
+  mlir::LLVM::TBAARootAttr funcRoot =
+      mlir::LLVM::TBAARootAttr::get(ctx, mlir::StringAttr::get(ctx, rootId));
+
+  static constexpr llvm::StringRef anyAccessTypeDescId = "any access";
+  mlir::LLVM::TBAATypeDescriptorAttr anyAccess =
+      mlir::LLVM::TBAATypeDescriptorAttr::get(
+          ctx, anyAccessTypeDescId,
+          mlir::LLVM::TBAAMemberAttr::get(funcRoot, 0));
+
+  static constexpr llvm::StringRef anyDataAccessTypeDescId = "any data access";
+  mlir::LLVM::TBAATypeDescriptorAttr dataRoot =
+      mlir::LLVM::TBAATypeDescriptorAttr::get(
+          ctx, anyDataAccessTypeDescId,
+          mlir::LLVM::TBAAMemberAttr::get(anyAccess, 0));
+
+  static constexpr llvm::StringRef boxMemberTypeDescId = "descriptor member";
+  mlir::LLVM::TBAATypeDescriptorAttr boxMemberTypeDesc =
+      mlir::LLVM::TBAATypeDescriptorAttr::get(
+          ctx, boxMemberTypeDescId,
+          mlir::LLVM::TBAAMemberAttr::get(anyAccess, 0));
+
+  return TBAATree{anyAccess, dataRoot, boxMemberTypeDesc};
+}
+
+fir::TBAATree::TBAATree(mlir::LLVM::TBAATypeDescriptorAttr anyAccess,
+                        mlir::LLVM::TBAATypeDescriptorAttr dataRoot,
+                        mlir::LLVM::TBAATypeDescriptorAttr boxMemberTypeDesc)
+    : globalDataTree(dataRoot.getContext(), "global data", dataRoot),
+      allocatedDataTree(dataRoot.getContext(), "allocated data", dataRoot),
+      dummyArgDataTree(dataRoot.getContext(), "dummy arg data", dataRoot),
+      anyAccessDesc(anyAccess), boxMemberTypeDesc(boxMemberTypeDesc),
+      anyDataTypeDesc(dataRoot) {}
diff --git a/flang/lib/Optimizer/Transforms/AddAliasTags.cpp b/flang/lib/Optimizer/Transforms/AddAliasTags.cpp
new file mode 100644
index 000000000000000..1129ae87246b7f8
--- /dev/null
+++ b/flang/lib/Optimizer/Transforms/AddAliasTags.cpp
@@ -0,0 +1,198 @@
+//===- AddAliasTags.cpp ---------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+/// \file
+/// Adds TBAA alias tags to fir loads and stores, based on information from
+/// fir::AliasAnalysis. More are added later in CodeGen - see fir::TBAABuilder
+//===----------------------------------------------------------------------===//
+
+#include "flang/Optimizer/Analysis/AliasAnalysis.h"
+#include "flang/Optimizer/Analysis/TBAAForest.h"
+#include "flang/Optimizer/Dialect/FIRDialect.h"
+#include "flang/Optimizer/Dialect/FirAliasAnalysisOpInterface.h"
+#include "flang/Optimizer/Transforms/Passes.h"
+#include "mlir/Pass/Pass.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <optional>
+
+namespace fir {
+#define GEN_PASS_DEF_ADDALIASTAGS
+#include "flang/Optimizer/Transforms/Passes.h.inc"
+} // namespace fir
+
+#define DEBUG_TYPE "fir-add-alias-tags"
+
+static llvm::cl::opt<bool>
+    enableDummyArgs("dummy-arg-tbaa", llvm::cl::init(true), llvm::cl::Hidden,
+                    llvm::cl::desc("Add TBAA tags to dummy arguments"));
+// These two are **known unsafe** (misscompare in spec2017/wrf_r). They should
+// not be enabled by default.
+// The code is kept so that these may be tried with new benchmarks to see if
+// this is worth fixing in the future.
+static llvm::cl::opt<bool>
+    enableGlobals("globals-tbaa", llvm::cl::init(false), llvm::cl::Hidden,
+                  llvm::cl::desc("Add TBAA tags to dummy arguments. UNSAFE."));
+static llvm::cl::opt<bool> enableLocalAllocs(
+    "local-alloc-tbaa", llvm::cl::init(false), llvm::cl::Hidden,
+    llvm::cl::desc("Add TBAA tags to dummy arguments. UNSAFE."));
+
+namespace {
+
+/// Shared state per-module
+class PassState {
+public:
+  /// memoised call to fir::AliasAnalysis::getSource
+  inline const fir::AliasAnalysis::Source &getSource(mlir::Value value) {
+    if (!analysisCache.contains(value))
+      analysisCache.insert({value, analysis.getSource(value)});
+    return analysisCache[value];
+  }
+
+  /// get the per-function TBAATree for this function
+  inline const fir::TBAATree &getFuncTree(mlir::func::FuncOp func) {
+    return forrest[func];
+  }
+
+private:
+  fir::AliasAnalysis analysis;
+  llvm::DenseMap<mlir::Value, fir::AliasAnalysis::Source> analysisCache;
+  fir::TBAAForrest forrest;
+};
+
+class AddAliasTagsPass : public fir::impl::AddAliasTagsBase<AddAliasTagsPass> {
+public:
+  void runOnOperation() override;
+
+private:
+  /// The real workhorse of the pass. This is a runOnOperation() which
+  /// operates on fir::FirAliasAnalysisOpInterface, using some extra state
+  void runOnAliasInterface(fir::FirAliasAnalysisOpInterface op,
+                           PassState &state);
+};
+
+} // namespace
+
+/// Get the name of a function argument using the "fir.bindc_name" attribute,
+/// or ""
+static std::string getFuncArgName(mlir::Value arg) {
+  // always succeeds because arg is a function argument
+  mlir::BlockArgument blockArg = mlir::cast<mlir::BlockArgument>(arg);
+  assert(blockArg.getOwner() && blockArg.getOwner()->isEntryBlock() &&
+         "arg is a function argument");
+  mlir::FunctionOpInterface func =
+      mlir::cast<mlir::FunctionOpInterface>(blockArg.getOwner()->getParentOp());
+  mlir::StringAttr attr = func.getArgAttrOfType<mlir::StringAttr>(
+      blockArg.getArgNumber(), "fir.bindc_name");
+  if (!attr)
+    return "";
+  return attr.str();
+}
+
+void AddAliasTagsPass::runOnAliasInterface(fir::FirAliasAnalysisOpInterface op,
+                                           PassState &state) {
+  mlir::func::FuncOp func = op->getParentOfType<mlir::func::FuncOp>();
+
+  llvm::SmallVector<mlir::Value> accessedOperands = op.getAccessedOperands();
+  assert(accessedOperands.size() == 1 &&
+         "load and store only access one address");
+  mlir::Value memref = accessedOperands.front();
+
+  // skip boxes. These get an "any descriptor access" tag in TBAABuilder
+  // (CodeGen). I didn't see any speedup from giving each box a separate TBAA
+  // type.
+  if (mlir::isa<fir::BaseBoxType>(fir::unwrapRefType(memref.getType())))
+    return;
+  LLVM_DEBUG(llvm::dbgs() << "Analysing " << op << "\n");
+
+  const fir::AliasAnalysis::Source &source = state.getSource(memref);
+  if (source.isTargetOrPointer()) {
+    LLVM_DEBUG(llvm::dbgs().indent(2) << "Skipping TARGET/POINTER\n");
+    // These will get an "any data access" tag in TBAABuilder (CodeGen): causing
+    // them to "MayAlias" with all non-box accesses
+    return;
+  }
+
+  mlir::LLVM::TBAATagAttr tag;
+  // TBAA for dummy arguments
+  if (enableDummyArgs &&
+      source.kind == fir::AliasAnalysis::SourceKind::Argument) {
+    LLVM_DEBUG(llvm::dbgs().indent(2)
+               << "Found reference to dummy argument at " << *op << "\n");
+    std::string name = getFuncArgName(source.u.get<mlir::Value>());
+    if (!name.empty())
+      tag = state.getFuncTree(func).dummyArgDataTree.getTag(name);
+    else
+      LLVM_DEBUG(llvm::dbgs().indent(2)
+                 << "WARN: couldn't find a name for dummy argument " << *op
+                 << "\n");
+
+    // TBAA for global variables
+  } else if (enableGlobals &&
+             source.kind == fir::AliasAnalysis::SourceKind::Global) {
+    mlir::SymbolRefAttr glbl = source.u.get<mlir::SymbolRefAttr>();
+    const char *name = glbl.getRootReference().data();
+    LLVM_DEBUG(llvm::dbgs().indent(2) << "Found reference to global " << name
+                                      << " at " << *op << "\n");
+    tag = state.getFuncTree(func).globalDataTree.getTag(name);
+
+    // TBAA for local allocations
+  } else if (enableLocalAllocs &&
+             source.kind == fir::AliasAnalysis::SourceKind::Allocate) {
+    std::optional<llvm::StringRef> name;
+    mlir::Operation *sourceOp = source.u.get<mlir::Value>().getDefiningOp();
+    if (auto alloc = mlir::dyn_cast_or_null<fir::AllocaOp>(sourceOp))
+      name = alloc.getUniqName();
+    else if (auto alloc = mlir::dyn_cast_or_null<fir::AllocMemOp>(sourceOp))
+      name = alloc.getUniqName();
+    if (name) {
+      LLVM_DEBUG(llvm::dbgs().indent(2) << "Found reference to allocation "
+                                        << name << " at " << *op << "\n");
+      tag = state.getFuncTree(func).allocatedDataTree.getTag(*name);
+    } else {
+      LLVM_DEBUG(llvm::dbgs().indent(2)
+                 << "WARN: couldn't find a name for allocation " << *op
+                 << "\n");
+    }
+  } else {
+    if (source.kind != fir::AliasAnalysis::SourceKind::Argument &&
+        source.kind != fir::AliasAnalysis::SourceKind::Allocate &&
+        source.kind != fir::AliasAnalysis::SourceKind::Global)
+      LLVM_DEBUG(llvm::dbgs().indent(2)
+                 << "WARN: unsupported value: " << source << "\n");
+  }
+
+  if (tag)
+    op.setTBAATags(mlir::ArrayAttr::get(&getContext(), tag));
+}
+
+void AddAliasTagsPass::runOnOperation() {
+  LLVM_DEBUG(llvm::dbgs() << "=== Begin " DEBUG_TYPE " ===\n");
+
+  // MLIR forbids storing state in a pass because different instances might be
+  // used in different threads.
+  // Instead this pass stores state per mlir::ModuleOp (which is what MLIR
+  // thinks the pass operates on), then the real work of the pass is done in
+  // runOnAliasInterface
+  PassState state;
+
+  mlir::ModuleOp mod = getOperation();
+  mod.walk([&](fir::FirAliasAnalysisOpInterface op) {
+    runOnAliasInterface(op, state);
+  });
+
+  LLVM_DEBUG(llvm::dbgs() << "=== End " DEBUG_TYPE " ===\n");
+}
+
+std::unique_ptr<mlir::Pass> fir::createAliasTagsPass() {
+  return std::make_unique<AddAliasTagsPass>();
+}
diff --git a/flang/lib/Optimizer/Transforms/CMakeLists.txt b/flang/lib/Optimizer/Transforms/CMakeLists.txt
index 428c4c2a1e64408..74a093fe74719b6 100644
--- a/flang/lib/Optimizer/Transforms/CMakeLists.txt
+++ b/flang/lib/Optimizer/Transforms/CMakeLists.txt
@@ -1,5 +1,6 @@
 add_flang_library(FIRTransforms
   AbstractResult.cpp
+  AddAliasTags.cpp
   AffinePromotion.cpp
   AffineDemotion.cpp
   AnnotateConstant.cpp
diff --git a/flang/test/Transforms/tbaa.fir b/flang/test/Transforms/tbaa.fir
new file mode 100644
index 000000000000000..4519acffd1f91b1
--- /dev/null
+++ b/flang/test/Transforms/tbaa.fir
@@ -0,0 +1,175 @@
+// RUN: fir-opt --split-input-file --fir-alias-analysis %s | FileCheck %s
+
+// subroutine oneArg(a)
+//   integer :: a(:)
+//   a(1) = a(2)
+// end subroutine
+  func.func @_QPonearg(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "a"}) {
+    %c1 = arith.constant 1 : index
+    %c2 = arith.constant 2 : index
+    %0 = fir.declare %arg0 {uniq_name = "_QFoneargEa"} : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+    %1 = fir.rebox %0 : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+    %2 = fir.array_coor %1 %c2 : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+    %3 = fir.load %2 : !fir.ref<i32>
+    %4 = fir.array_coor %1 %c1 : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+    fir.store %3 to %4 : !fir.ref<i32>
+    return
+  }
+
+// CHECK: #[[ONE_ARG_ROOT:.+]] = #llvm.tbaa_root<id = "Flang function root _QPonearg">
+// CHECK: #[[ONE_ARG_ANY_ACCESS:.+]] = #llvm.tbaa_type_desc<id = "any access", members = {<#[[ONE_ARG_ROOT]], 0>}>
+// CHECK: #[[ONE_ARG_ANY_DATA:.+]] = #llvm.tbaa_type_desc<id = "any data access", members = {<#[[ONE_ARG_ANY_ACCESS]], 0>}>
+// CHECK: #[[ONE_ARG_ANY_ARG:.+]] = #llvm.tbaa_type_desc<id = "dummy arg data", members = {<#[[ONE_ARG_ANY_DATA]], 0>}>
+// CHECK: #[[ONE_ARG_A:.+]] = #llvm.tbaa_type_desc<id = "dummy arg data/a", members = {<#[[ONE_ARG_ANY_ARG]], 0>}>
+// CHECK: #[[ONE_ARG_A_TAG:.+]] = #llvm.tbaa_tag<base_type = #[[ONE_ARG_A]], access_type = #[[ONE_ARG_A]], offset = 0>
+
+// CHECK-LABEL:   func.func @_QPonearg(
+// CHECK-SAME:                         %[[VAL_0:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "a"}) {
+// CHECK:           %[[VAL_1:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_2:.*]] = arith.constant 2 : index
+// CHECK:           %[[VAL_3:.*]] = fir.declare %[[VAL_0]] {uniq_name = "_QFoneargEa"} : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+// CHECK:           %[[VAL_4:.*]] = fir.rebox %[[VAL_3]] : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+// CHECK:           %[[VAL_5:.*]] = fir.array_coor %[[VAL_4]] %[[VAL_2]] : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK:           %[[VAL_6:.*]] = fir.load %[[VAL_5]] {tbaa = [#[[ONE_ARG_A_TAG]]]} : !fir.ref<i32>
+// CHECK:           %[[VAL_7:.*]] = fir.array_coor %[[VAL_4]] %[[VAL_1]] : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK:           fir.store %[[VAL_6]] to %[[VAL_7]] {tbaa = [#[[ONE_ARG_A_TAG]]]} : !fir.ref<i32>
+// CHECK:           return
+// CHECK:         }
+
+// -----
+
+// subroutine twoArg(a, b)
+//   integer :: a(:), b(:)
+//   a(1) = b(1)
+// end subroutine
+  func.func @_QPtwoarg(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "a"}, %arg1: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "b"}) {
+    %c1 = arith.constant 1 : index
+    %0 = fir.declare %arg0 {uniq_name = "_QFtwoargEa"} : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+    %1 = fir.rebox %0 : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+    %2 = fir.declare %arg1 {uniq_name = "_QFtwoargEb"} : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+    %3 = fir.rebox %2 : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+    %4 = fir.array_coor %3 %c1 : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+    %5 = fir.load %4 : !fir.ref<i32>
+    %6 = fir.array_coor %1 %c1 : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+    fir.store %5 to %6 : !fir.ref<i32>
+    return
+  }
+
+// CHECK: #[[TWO_ARG_ROOT:.+]] = #llvm.tbaa_root<id = "Flang function root _QPtwoarg">
+// CHECK: #[[TWO_ARG_ANY_ACCESS:.+]] = #llvm.tbaa_type_desc<id = "any access", members = {<#[[TWO_ARG_ROOT]], 0>}>
+// CHECK: #[[TWO_ARG_ANY_DATA:.+]] = #llvm.tbaa_type_desc<id = "any data access", members = {<#[[TWO_ARG_ANY_ACCESS]], 0>}>
+// CHECK: #[[TWO_ARG_ANY_ARG:.+]] = #llvm.tbaa_type_desc<id = "dummy arg data", members = {<#[[TWO_ARG_ANY_DATA]], 0>}>
+// CHECK: #[[TWO_ARG_B:.+]] = #llvm.tbaa_type_desc<id = "dummy arg data/b", members = {<#[[TWO_ARG_ANY_ARG]], 0>}>
+// CHECK: #[[TWO_ARG_A:.+]] = #llvm.tbaa_type_desc<id = "dummy arg data/a", members = {<#[[TWO_ARG_ANY_ARG]], 0>}>
+// CHECK: #[[TWO_ARG_B_TAG:.+]] = #llvm.tbaa_tag<base_type = #[[TWO_ARG_B]], access_type = #[[TWO_ARG_B]], offset = 0>
+// CHECK: #[[TWO_ARG_A_TAG:.+]] = #llvm.tbaa_tag<base_type = #[[TWO_ARG_A]], access_type = #[[TWO_ARG_A]], offset = 0>
+
+// CHECK-LABEL:   func.func @_QPtwoarg(
+// CHECK-SAME:                         %[[VAL_0:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "a"},
+// CHECK-SAME:                         %[[VAL_1:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "b"}) {
+// CHECK:           %[[VAL_2:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_3:.*]] = fir.declare %[[VAL_0]] {uniq_name = "_QFtwoargEa"} : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+// CHECK:           %[[VAL_4:.*]] = fir.rebox %[[VAL_3]] : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+// CHECK:           %[[VAL_5:.*]] = fir.declare %[[VAL_1]] {uniq_name = "_QFtwoargEb"} : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+// CHECK:           %[[VAL_6:.*]] = fir.rebox %[[VAL_5]] : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+// CHECK:           %[[VAL_7:.*]] = fir.array_coor %[[VAL_6]] %[[VAL_2]] : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK:           %[[VAL_8:.*]] = fir.load %[[VAL_7]] {tbaa = [#[[TWO_ARG_B_TAG]]]} : !fir.ref<i32>
+// CHECK:           %[[VAL_9:.*]] = fir.array_coor %[[VAL_4]] %[[VAL_2]] : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK:           fir.store %[[VAL_8]] to %[[VAL_9]] {tbaa = [#[[TWO_ARG_A_TAG]]]} : !fir.ref<i32>
+// CHECK:           return
+// CHECK:         }
+
+// -----
+
+// subroutine targetArg(a, b)
+//   integer, target :: a(:)
+//   integer :: b(:)
+//   a(1) = b(1)
+// end subroutine
+  func.func @_QPtargetarg(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "a", fir.target}, %arg1: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "b"}) {
+    %c1 = arith.constant 1 : index
+    %0 = fir.declare %arg0 {fortran_attrs = #fir.var_attrs<target>, uniq_name = "_QFtargetargEa"} : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+    %1 = fir.rebox %0 : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+    %2 = fir.declare %arg1 {uniq_name = "_QFtargetargEb"} : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+    %3 = fir.rebox %2 : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+    %4 = fir.array_coor %3 %c1 : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+    %5 = fir.load %4 : !fir.ref<i32>
+    %6 = fir.array_coor %1 %c1 : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+    fir.store %5 to %6 : !fir.ref<i32>
+    return
+  }
+
+// CHECK: #[[TARGET_ROOT:.+]] = #llvm.tbaa_root<id = "Flang function root _QPtargetarg">
+// CHECK: #[[TARGET_ANY_ACCESS:.+]] = #llvm.tbaa_type_desc<id = "any access", members = {<#[[TARGET_ROOT]], 0>}>
+// CHECK: #[[TARGET_ANY_DATA:.+]] = #llvm.tbaa_type_desc<id = "any data access", members = {<#[[TARGET_ANY_ACCESS]], 0>}>
+// CHECK: #[[TARGET_ANY_ARG:.+]] = #llvm.tbaa_type_desc<id = "dummy arg data", members = {<#[[TARGET_ANY_DATA]], 0>}>
+// CHECK: #[[TARGET_B:.+]] = #llvm.tbaa_type_desc<id = "dummy arg data/b", members = {<#[[TARGET_ANY_ARG]], 0>}>
+// CHECK: #[[TARGET_B_TAG:.+]] = #llvm.tbaa_tag<base_type = #[[TARGET_B]], access_type = #[[TARGET_B]], offset = 0>
+// No entry for "dummy arg data/a" because that pointer should get "any data access" becase it has the TARGET attribute
+
+// CHECK-LABEL:   func.func @_QPtargetarg(
+// CHECK-SAME:                            %[[VAL_0:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "a", fir.target},
+// CHECK-SAME:                            %[[VAL_1:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "b"}) {
+// CHECK:           %[[VAL_2:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_3:.*]] = fir.declare %[[VAL_0]] {fortran_attrs = #{{.*}}<target>, uniq_name = "_QFtargetargEa"} : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+// CHECK:           %[[VAL_4:.*]] = fir.rebox %[[VAL_3]] : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+// CHECK:           %[[VAL_5:.*]] = fir.declare %[[VAL_1]] {uniq_name = "_QFtargetargEb"} : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+// CHECK:           %[[VAL_6:.*]] = fir.rebox %[[VAL_5]] : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+// CHECK:           %[[VAL_7:.*]] = fir.array_coor %[[VAL_6]] %[[VAL_2]] : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK:           %[[VAL_8:.*]] = fir.load %[[VAL_7]] {tbaa = [#[[TARGET_B_TAG]]]} : !fir.ref<i32>
+// CHECK:           %[[VAL_9:.*]] = fir.array_coor %[[VAL_4]] %[[VAL_2]] : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// "any data access" tag is added by TBAABuilder during CodeGen
+// CHECK:           fir.store %[[VAL_8]] to %[[VAL_9]] : !fir.ref<i32>
+// CHECK:           return
+// CHECK:         }
+
+// -----
+
+// subroutine pointerArg(a, b)
+//   integer, pointer :: a(:)
+//   integer :: b(:)
+//   a(1) = b(1)
+// end subroutine
+  func.func @_QPpointerarg(%arg0: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>> {fir.bindc_name = "a"}, %arg1: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "b"}) {
+    %c0 = arith.constant 0 : index
+    %c1 = arith.constant 1 : index
+    %0 = fir.declare %arg0 {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFpointerargEa"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>
+    %1 = fir.declare %arg1 {uniq_name = "_QFpointerargEb"} : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+    %2 = fir.rebox %1 : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+    %3 = fir.array_coor %2 %c1 : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+    %4 = fir.load %3 : !fir.ref<i32>
+    %5 = fir.load %0 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>
+    %6:3 = fir.box_dims %5, %c0 : (!fir.box<!fir.ptr<!fir.array<?xi32>>>, index) -> (index, index, index)
+    %7 = fir.shift %6#0 : (index) -> !fir.shift<1>
+    %8 = fir.array_coor %5(%7) %c1 : (!fir.box<!fir.ptr<!fir.array<?xi32>>>, !fir.shift<1>, index) -> !fir.ref<i32>
+    fir.store %4 to %8 : !fir.ref<i32>
+    return
+  }
+
+// CHECK: #[[POINTER_ROOT:.+]] = #llvm.tbaa_root<id = "Flang function root _QPpointerarg">
+// CHECK: #[[POINTER_ANY_ACCESS:.+]] = #llvm.tbaa_type_desc<id = "any access", members = {<#[[POINTER_ROOT]], 0>}>
+// CHECK: #[[POINTER_ANY_DATA:.+]] = #llvm.tbaa_type_desc<id = "any data access", members = {<#[[POINTER_ANY_ACCESS]], 0>}>
+// CHECK: #[[POINTER_ANY_ARG:.+]] = #llvm.tbaa_type_desc<id = "dummy arg data", members = {<#[[POINTER_ANY_DATA]], 0>}>
+// CHECK: #[[POINTER_B:.+]] = #llvm.tbaa_type_desc<id = "dummy arg data/b", members = {<#[[POINTER_ANY_ARG]], 0>}>
+// CHECK: #[[POINTER_B_TAG:.+]] = #llvm.tbaa_tag<base_type = #[[POINTER_B]], access_type = #[[POINTER_B]], offset = 0>
+// No entry for "dummy arg data/a" because that pointer should get "any data access" becase it has the POINTER attribute
+
+// CHECK-LABEL:   func.func @_QPpointerarg(
+// CHECK-SAME:                             %[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>> {fir.bindc_name = "a"},
+// CHECK-SAME:                             %[[VAL_1:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "b"}) {
+// CHECK:           %[[VAL_2:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_3:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_4:.*]] = fir.declare %[[VAL_0]] {fortran_attrs = #{{.*}}<pointer>, uniq_name = "_QFpointerargEa"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>
+// CHECK:           %[[VAL_5:.*]] = fir.declare %[[VAL_1]] {uniq_name = "_QFpointerargEb"} : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+// CHECK:           %[[VAL_6:.*]] = fir.rebox %[[VAL_5]] : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+// CHECK:           %[[VAL_7:.*]] = fir.array_coor %[[VAL_6]] %[[VAL_3]] : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK:           %[[VAL_8:.*]] = fir.load %[[VAL_7]] {tbaa = [#[[POINTER_B_TAG]]]} : !fir.ref<i32>
+// "any descriptor access" tag is added by TBAABuilder during CodeGen
+// CHECK:           %[[VAL_9:.*]] = fir.load %[[VAL_4]] : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>
+// CHECK:           %[[VAL_10:.*]]:3 = fir.box_dims %[[VAL_9]], %[[VAL_2]] : (!fir.box<!fir.ptr<!fir.array<?xi32>>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_11:.*]] = fir.shift %[[VAL_10]]#0 : (index) -> !fir.shift<1>
+// CHECK:           %[[VAL_12:.*]] = fir.array_coor %[[VAL_9]](%[[VAL_11]]) %[[VAL_3]] : (!fir.box<!fir.ptr<!fir.array<?xi32>>>, !fir.shift<1>, index) -> !fir.ref<i32>
+// "any data access" tag is added by TBAABuilder during CodeGen
+// CHECK:           fir.store %[[VAL_8]] to %[[VAL_12]] : !fir.ref<i32>
+// CHECK:           return
+// CHECK:         }
\ No newline at end of file
diff --git a/flang/test/Transforms/tbaa2.fir b/flang/test/Transforms/tbaa2.fir
new file mode 100644
index 000000000000000..5aba03f292fef74
--- /dev/null
+++ b/flang/test/Transforms/tbaa2.fir
@@ -0,0 +1,386 @@
+// Test fir alias analysis pass on a larger real life code example (from the RFC)
+// RUN: fir-opt --fir-alias-analysis %s | FileCheck %s
+
+  fir.global @_QMmodEa : !fir.box<!fir.heap<!fir.array<?xf32>>> {
+    %c0 = arith.constant 0 : index
+    %0 = fir.zero_bits !fir.heap<!fir.array<?xf32>>
+    %1 = fir.shape %c0 : (index) -> !fir.shape<1>
+    %2 = fir.embox %0(%1) : (!fir.heap<!fir.array<?xf32>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<?xf32>>>
+    fir.has_value %2 : !fir.box<!fir.heap<!fir.array<?xf32>>>
+  }
+  fir.global @_QMmodEb : !fir.box<!fir.heap<!fir.array<?xf32>>> {
+    %c0 = arith.constant 0 : index
+    %0 = fir.zero_bits !fir.heap<!fir.array<?xf32>>
+    %1 = fir.shape %c0 : (index) -> !fir.shape<1>
+    %2 = fir.embox %0(%1) : (!fir.heap<!fir.array<?xf32>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<?xf32>>>
+    fir.has_value %2 : !fir.box<!fir.heap<!fir.array<?xf32>>>
+  }
+  fir.global @_QMmodEdxinv : f32 {
+    %0 = fir.zero_bits f32
+    fir.has_value %0 : f32
+  }
+  fir.global @_QMmodEdyinv : f32 {
+    %0 = fir.zero_bits f32
+    fir.has_value %0 : f32
+  }
+  fir.global @_QMmodExstart : i32 {
+    %0 = fir.zero_bits i32
+    fir.has_value %0 : i32
+  }
+  fir.global @_QMmodEystart : i32 {
+    %0 = fir.zero_bits i32
+    fir.has_value %0 : i32
+  }
+  fir.global @_QMmodEystop : i32 {
+    %0 = fir.zero_bits i32
+    fir.has_value %0 : i32
+  }
+  fir.global @_QMmodEzstart : i32 {
+    %0 = fir.zero_bits i32
+    fir.has_value %0 : i32
+  }
+  fir.global @_QMmodEzstop : i32 {
+    %0 = fir.zero_bits i32
+    fir.has_value %0 : i32
+  }
+
+// CHECK: #[[ROOT:.+]] = #llvm.tbaa_root<id = "Flang function root _QMmodPcallee">
+// CHECK: #[[ANY_ACCESS:.+]] = #llvm.tbaa_type_desc<id = "any access", members = {<#[[ROOT]], 0>}>
+// CHECK: #[[ANY_DATA:.+]] = #llvm.tbaa_type_desc<id = "any data access", members = {<#[[ANY_ACCESS]], 0>}>
+// CHECK: #[[ANY_ARG:.+]] = #llvm.tbaa_type_desc<id = "dummy arg data", members = {<#[[ANY_DATA]], 0>}>
+// CHECK: #[[ARG_LOW:.+]] = #llvm.tbaa_type_desc<id = "dummy arg data/low", members = {<#[[ANY_ARG]], 0>}>
+// CHECK: #[[ARG_Z:.+]] = #llvm.tbaa_type_desc<id = "dummy arg data/z", members = {<#[[ANY_ARG]], 0>}>
+// CHECK: #[[ARG_Y:.+]] = #llvm.tbaa_type_desc<id = "dummy arg data/y", members = {<#[[ANY_ARG]], 0>}>
+// CHECK: #[[ARG_LOW_TAG:.+]] = #llvm.tbaa_tag<base_type = #[[ARG_LOW]], access_type = #[[ARG_LOW]], offset = 0>
+// CHECK: #[[ARG_Z_TAG:.+]] = #llvm.tbaa_tag<base_type = #[[ARG_Z]], access_type = #[[ARG_Z]], offset = 0>
+// CHECK: #[[ARG_Y_TAG:.+]] = #llvm.tbaa_tag<base_type = #[[ARG_Y]], access_type = #[[ARG_Y]], offset = 0>
+
+  func.func @_QMmodPcallee(%arg0: !fir.box<!fir.array<?x?x?xf32>> {fir.bindc_name = "z"}, %arg1: !fir.box<!fir.array<?x?x?xf32>> {fir.bindc_name = "y"}, %arg2: !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>> {fir.bindc_name = "low"}) {
+    %c2 = arith.constant 2 : index
+    %c0 = arith.constant 0 : index
+    %c1 = arith.constant 1 : index
+    %c1_i32 = arith.constant 1 : i32
+    %0 = fir.address_of(@_QMmodEa) : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+    %1 = fir.declare %0 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMmodEa"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+    %2 = fir.address_of(@_QMmodEb) : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+    %3 = fir.declare %2 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMmodEb"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+    %4 = fir.address_of(@_QMmodEdxinv) : !fir.ref<f32>
+    %5 = fir.declare %4 {uniq_name = "_QMmodEdxinv"} : (!fir.ref<f32>) -> !fir.ref<f32>
+    %6 = fir.address_of(@_QMmodEdyinv) : !fir.ref<f32>
+    %7 = fir.declare %6 {uniq_name = "_QMmodEdyinv"} : (!fir.ref<f32>) -> !fir.ref<f32>
+    %8 = fir.address_of(@_QMmodExstart) : !fir.ref<i32>
+    %9 = fir.declare %8 {uniq_name = "_QMmodExstart"} : (!fir.ref<i32>) -> !fir.ref<i32>
+    %10 = fir.address_of(@_QMmodEystart) : !fir.ref<i32>
+    %11 = fir.declare %10 {uniq_name = "_QMmodEystart"} : (!fir.ref<i32>) -> !fir.ref<i32>
+    %12 = fir.address_of(@_QMmodEystop) : !fir.ref<i32>
+    %13 = fir.declare %12 {uniq_name = "_QMmodEystop"} : (!fir.ref<i32>) -> !fir.ref<i32>
+    %14 = fir.address_of(@_QMmodEzstart) : !fir.ref<i32>
+    %15 = fir.declare %14 {uniq_name = "_QMmodEzstart"} : (!fir.ref<i32>) -> !fir.ref<i32>
+    %16 = fir.address_of(@_QMmodEzstop) : !fir.ref<i32>
+    %17 = fir.declare %16 {uniq_name = "_QMmodEzstop"} : (!fir.ref<i32>) -> !fir.ref<i32>
+    %18 = fir.alloca f32 {bindc_name = "dxold", uniq_name = "_QMmodFcalleeEdxold"}
+    %19 = fir.declare %18 {uniq_name = "_QMmodFcalleeEdxold"} : (!fir.ref<f32>) -> !fir.ref<f32>
+    %20 = fir.alloca f32 {bindc_name = "dzinv", uniq_name = "_QMmodFcalleeEdzinv"}
+    %21 = fir.declare %20 {uniq_name = "_QMmodFcalleeEdzinv"} : (!fir.ref<f32>) -> !fir.ref<f32>
+    %22 = fir.alloca i32 {bindc_name = "i", uniq_name = "_QMmodFcalleeEi"}
+    %23 = fir.declare %22 {uniq_name = "_QMmodFcalleeEi"} : (!fir.ref<i32>) -> !fir.ref<i32>
+    %24 = fir.alloca i32 {bindc_name = "j", uniq_name = "_QMmodFcalleeEj"}
+    %25 = fir.declare %24 {uniq_name = "_QMmodFcalleeEj"} : (!fir.ref<i32>) -> !fir.ref<i32>
+    %26 = fir.alloca i32 {bindc_name = "k", uniq_name = "_QMmodFcalleeEk"}
+    %27 = fir.declare %26 {uniq_name = "_QMmodFcalleeEk"} : (!fir.ref<i32>) -> !fir.ref<i32>
+    %28 = fir.declare %arg2 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMmodFcalleeElow"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>>
+    %29 = fir.declare %arg1 {fortran_attrs = #fir.var_attrs<intent_in>, uniq_name = "_QMmodFcalleeEy"} : (!fir.box<!fir.array<?x?x?xf32>>) -> !fir.box<!fir.array<?x?x?xf32>>
+    %30 = fir.rebox %29 : (!fir.box<!fir.array<?x?x?xf32>>) -> !fir.box<!fir.array<?x?x?xf32>>
+    %31 = fir.declare %arg0 {fortran_attrs = #fir.var_attrs<intent_in>, uniq_name = "_QMmodFcalleeEz"} : (!fir.box<!fir.array<?x?x?xf32>>) -> !fir.box<!fir.array<?x?x?xf32>>
+    %32 = fir.rebox %31 : (!fir.box<!fir.array<?x?x?xf32>>) -> !fir.box<!fir.array<?x?x?xf32>>
+    %33 = fir.load %15 : !fir.ref<i32>
+    %34 = arith.addi %33, %c1_i32 : i32
+    %35 = fir.convert %34 : (i32) -> index
+    %36 = fir.load %17 : !fir.ref<i32>
+    %37 = fir.convert %36 : (i32) -> index
+    %38 = fir.convert %35 : (index) -> i32
+    %39:2 = fir.do_loop %arg3 = %35 to %37 step %c1 iter_args(%arg4 = %38) -> (index, i32) {
+      fir.store %arg4 to %27 : !fir.ref<i32>
+      %40 = fir.load %11 : !fir.ref<i32>
+      %41 = arith.addi %40, %c1_i32 : i32
+      %42 = fir.convert %41 : (i32) -> index
+      %43 = fir.load %13 : !fir.ref<i32>
+      %44 = fir.convert %43 : (i32) -> index
+      %45 = fir.convert %42 : (index) -> i32
+      %46:2 = fir.do_loop %arg5 = %42 to %44 step %c1 iter_args(%arg6 = %45) -> (index, i32) {
+        fir.store %arg6 to %25 : !fir.ref<i32>
+        %51 = fir.load %9 : !fir.ref<i32>
+        %52 = arith.addi %51, %c1_i32 : i32
+        %53 = fir.convert %52 : (i32) -> index
+        %54 = fir.convert %53 : (index) -> i32
+        %55:2 = fir.do_loop %arg7 = %53 to %c0 step %c1 iter_args(%arg8 = %54) -> (index, i32) {
+          fir.store %arg8 to %23 : !fir.ref<i32>
+          %60 = fir.load %28 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>>
+          %61 = fir.load %23 : !fir.ref<i32>
+          %62 = fir.convert %61 : (i32) -> i64
+          %63 = fir.load %25 : !fir.ref<i32>
+          %64 = fir.convert %63 : (i32) -> i64
+          %65 = fir.load %27 : !fir.ref<i32>
+          %66 = fir.convert %65 : (i32) -> i64
+          %67 = fir.box_addr %60 : (!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>) -> !fir.heap<!fir.array<?x?x?xf32>>
+          %68:3 = fir.box_dims %60, %c0 : (!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>, index) -> (index, index, index)
+          %69:3 = fir.box_dims %60, %c1 : (!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>, index) -> (index, index, index)
+          %70:3 = fir.box_dims %60, %c2 : (!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>, index) -> (index, index, index)
+          %71 = fir.shape_shift %68#0, %68#1, %69#0, %69#1, %70#0, %70#1 : (index, index, index, index, index, index) -> !fir.shapeshift<3>
+          %72 = fir.array_coor %67(%71) %62, %64, %66 : (!fir.heap<!fir.array<?x?x?xf32>>, !fir.shapeshift<3>, i64, i64, i64) -> !fir.ref<f32>
+          %73 = fir.load %72 : !fir.ref<f32>
+          fir.store %73 to %19 : !fir.ref<f32>
+          %74 = fir.load %1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+          %75 = fir.load %25 : !fir.ref<i32>
+          %76 = fir.convert %75 : (i32) -> i64
+          %77 = fir.box_addr %74 : (!fir.box<!fir.heap<!fir.array<?xf32>>>) -> !fir.heap<!fir.array<?xf32>>
+          %78:3 = fir.box_dims %74, %c0 : (!fir.box<!fir.heap<!fir.array<?xf32>>>, index) -> (index, index, index)
+          %79 = fir.shape_shift %78#0, %78#1 : (index, index) -> !fir.shapeshift<1>
+          %80 = fir.array_coor %77(%79) %76 : (!fir.heap<!fir.array<?xf32>>, !fir.shapeshift<1>, i64) -> !fir.ref<f32>
+          %81 = fir.load %80 : !fir.ref<f32>
+          %82 = fir.load %28 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>>
+          %83 = fir.load %23 : !fir.ref<i32>
+          %84 = fir.convert %83 : (i32) -> i64
+          %85 = fir.load %27 : !fir.ref<i32>
+          %86 = fir.convert %85 : (i32) -> i64
+          %87 = fir.box_addr %82 : (!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>) -> !fir.heap<!fir.array<?x?x?xf32>>
+          %88:3 = fir.box_dims %82, %c0 : (!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>, index) -> (index, index, index)
+          %89:3 = fir.box_dims %82, %c1 : (!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>, index) -> (index, index, index)
+          %90:3 = fir.box_dims %82, %c2 : (!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>, index) -> (index, index, index)
+          %91 = fir.shape_shift %88#0, %88#1, %89#0, %89#1, %90#0, %90#1 : (index, index, index, index, index, index) -> !fir.shapeshift<3>
+          %92 = fir.array_coor %87(%91) %84, %76, %86 : (!fir.heap<!fir.array<?x?x?xf32>>, !fir.shapeshift<3>, i64, i64, i64) -> !fir.ref<f32>
+          %93 = fir.load %92 : !fir.ref<f32>
+          %94 = arith.mulf %81, %93 fastmath<contract> : f32
+          %95 = fir.load %3 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+          %96 = fir.box_addr %95 : (!fir.box<!fir.heap<!fir.array<?xf32>>>) -> !fir.heap<!fir.array<?xf32>>
+          %97:3 = fir.box_dims %95, %c0 : (!fir.box<!fir.heap<!fir.array<?xf32>>>, index) -> (index, index, index)
+          %98 = fir.shape_shift %97#0, %97#1 : (index, index) -> !fir.shapeshift<1>
+          %99 = fir.array_coor %96(%98) %76 : (!fir.heap<!fir.array<?xf32>>, !fir.shapeshift<1>, i64) -> !fir.ref<f32>
+          %100 = fir.load %99 : !fir.ref<f32>
+          %101 = fir.array_coor %32 %84, %76, %86 : (!fir.box<!fir.array<?x?x?xf32>>, i64, i64, i64) -> !fir.ref<f32>
+          %102 = fir.load %101 : !fir.ref<f32>
+          %103 = arith.subi %75, %c1_i32 : i32
+          %104 = fir.convert %103 : (i32) -> i64
+          %105 = fir.array_coor %32 %84, %104, %86 : (!fir.box<!fir.array<?x?x?xf32>>, i64, i64, i64) -> !fir.ref<f32>
+          %106 = fir.load %105 : !fir.ref<f32>
+          %107 = arith.subf %102, %106 fastmath<contract> : f32
+          %108 = fir.no_reassoc %107 : f32
+          %109 = fir.load %7 : !fir.ref<f32>
+          %110 = arith.mulf %108, %109 fastmath<contract> : f32
+          %111 = arith.subi %85, %c1_i32 : i32
+          %112 = fir.convert %111 : (i32) -> i64
+          %113 = fir.array_coor %30 %84, %76, %112 : (!fir.box<!fir.array<?x?x?xf32>>, i64, i64, i64) -> !fir.ref<f32>
+          %114 = fir.load %113 : !fir.ref<f32>
+          %115 = fir.array_coor %30 %84, %76, %86 : (!fir.box<!fir.array<?x?x?xf32>>, i64, i64, i64) -> !fir.ref<f32>
+          %116 = fir.load %115 : !fir.ref<f32>
+          %117 = arith.subf %114, %116 fastmath<contract> : f32
+          %118 = fir.no_reassoc %117 : f32
+          %119 = fir.load %21 : !fir.ref<f32>
+          %120 = arith.mulf %118, %119 fastmath<contract> : f32
+          %121 = arith.addf %110, %120 fastmath<contract> : f32
+          %122 = fir.no_reassoc %121 : f32
+          %123 = arith.mulf %100, %122 fastmath<contract> : f32
+          %124 = arith.addf %94, %123 fastmath<contract> : f32
+          fir.store %124 to %92 : !fir.ref<f32>
+          %125 = arith.addi %arg7, %c1 : index
+          %126 = fir.convert %c1 : (index) -> i32
+          %127 = fir.load %23 : !fir.ref<i32>
+          %128 = arith.addi %127, %126 : i32
+          fir.result %125, %128 : index, i32
+        }
+        fir.store %55#1 to %23 : !fir.ref<i32>
+        %56 = arith.addi %arg5, %c1 : index
+        %57 = fir.convert %c1 : (index) -> i32
+        %58 = fir.load %25 : !fir.ref<i32>
+        %59 = arith.addi %58, %57 : i32
+        fir.result %56, %59 : index, i32
+      }
+      fir.store %46#1 to %25 : !fir.ref<i32>
+      %47 = arith.addi %arg3, %c1 : index
+      %48 = fir.convert %c1 : (index) -> i32
+      %49 = fir.load %27 : !fir.ref<i32>
+      %50 = arith.addi %49, %48 : i32
+      fir.result %47, %50 : index, i32
+    }
+    fir.store %39#1 to %27 : !fir.ref<i32>
+    return
+  }
+// CHECK-LABEL:   func.func @_QMmodPcallee(
+// CHECK-SAME:                             %[[VAL_0:.*]]: !fir.box<!fir.array<?x?x?xf32>> {fir.bindc_name = "z"},
+// CHECK-SAME:                             %[[VAL_1:.*]]: !fir.box<!fir.array<?x?x?xf32>> {fir.bindc_name = "y"},
+// CHECK-SAME:                             %[[VAL_2:.*]]: !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>> {fir.bindc_name = "low"}) {
+// CHECK:           %[[VAL_3:.*]] = arith.constant 2 : index
+// CHECK:           %[[VAL_4:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_5:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_6:.*]] = arith.constant 1 : i32
+// CHECK:           %[[VAL_7:.*]] = fir.address_of(@_QMmodEa) : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+// CHECK:           %[[VAL_8:.*]] = fir.declare %[[VAL_7]] {fortran_attrs = #{{.*}}<allocatable>, uniq_name = "_QMmodEa"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+// CHECK:           %[[VAL_9:.*]] = fir.address_of(@_QMmodEb) : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+// CHECK:           %[[VAL_10:.*]] = fir.declare %[[VAL_9]] {fortran_attrs = #{{.*}}<allocatable>, uniq_name = "_QMmodEb"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+// CHECK:           %[[VAL_11:.*]] = fir.address_of(@_QMmodEdxinv) : !fir.ref<f32>
+// CHECK:           %[[VAL_12:.*]] = fir.declare %[[VAL_11]] {uniq_name = "_QMmodEdxinv"} : (!fir.ref<f32>) -> !fir.ref<f32>
+// CHECK:           %[[VAL_13:.*]] = fir.address_of(@_QMmodEdyinv) : !fir.ref<f32>
+// CHECK:           %[[VAL_14:.*]] = fir.declare %[[VAL_13]] {uniq_name = "_QMmodEdyinv"} : (!fir.ref<f32>) -> !fir.ref<f32>
+// CHECK:           %[[VAL_15:.*]] = fir.address_of(@_QMmodExstart) : !fir.ref<i32>
+// CHECK:           %[[VAL_16:.*]] = fir.declare %[[VAL_15]] {uniq_name = "_QMmodExstart"} : (!fir.ref<i32>) -> !fir.ref<i32>
+// CHECK:           %[[VAL_17:.*]] = fir.address_of(@_QMmodEystart) : !fir.ref<i32>
+// CHECK:           %[[VAL_18:.*]] = fir.declare %[[VAL_17]] {uniq_name = "_QMmodEystart"} : (!fir.ref<i32>) -> !fir.ref<i32>
+// CHECK:           %[[VAL_19:.*]] = fir.address_of(@_QMmodEystop) : !fir.ref<i32>
+// CHECK:           %[[VAL_20:.*]] = fir.declare %[[VAL_19]] {uniq_name = "_QMmodEystop"} : (!fir.ref<i32>) -> !fir.ref<i32>
+// CHECK:           %[[VAL_21:.*]] = fir.address_of(@_QMmodEzstart) : !fir.ref<i32>
+// CHECK:           %[[VAL_22:.*]] = fir.declare %[[VAL_21]] {uniq_name = "_QMmodEzstart"} : (!fir.ref<i32>) -> !fir.ref<i32>
+// CHECK:           %[[VAL_23:.*]] = fir.address_of(@_QMmodEzstop) : !fir.ref<i32>
+// CHECK:           %[[VAL_24:.*]] = fir.declare %[[VAL_23]] {uniq_name = "_QMmodEzstop"} : (!fir.ref<i32>) -> !fir.ref<i32>
+// CHECK:           %[[VAL_25:.*]] = fir.alloca f32 {bindc_name = "dxold", uniq_name = "_QMmodFcalleeEdxold"}
+// CHECK:           %[[VAL_26:.*]] = fir.declare %[[VAL_25]] {uniq_name = "_QMmodFcalleeEdxold"} : (!fir.ref<f32>) -> !fir.ref<f32>
+// CHECK:           %[[VAL_27:.*]] = fir.alloca f32 {bindc_name = "dzinv", uniq_name = "_QMmodFcalleeEdzinv"}
+// CHECK:           %[[VAL_28:.*]] = fir.declare %[[VAL_27]] {uniq_name = "_QMmodFcalleeEdzinv"} : (!fir.ref<f32>) -> !fir.ref<f32>
+// CHECK:           %[[VAL_29:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QMmodFcalleeEi"}
+// CHECK:           %[[VAL_30:.*]] = fir.declare %[[VAL_29]] {uniq_name = "_QMmodFcalleeEi"} : (!fir.ref<i32>) -> !fir.ref<i32>
+// CHECK:           %[[VAL_31:.*]] = fir.alloca i32 {bindc_name = "j", uniq_name = "_QMmodFcalleeEj"}
+// CHECK:           %[[VAL_32:.*]] = fir.declare %[[VAL_31]] {uniq_name = "_QMmodFcalleeEj"} : (!fir.ref<i32>) -> !fir.ref<i32>
+// CHECK:           %[[VAL_33:.*]] = fir.alloca i32 {bindc_name = "k", uniq_name = "_QMmodFcalleeEk"}
+// CHECK:           %[[VAL_34:.*]] = fir.declare %[[VAL_33]] {uniq_name = "_QMmodFcalleeEk"} : (!fir.ref<i32>) -> !fir.ref<i32>
+// CHECK:           %[[VAL_35:.*]] = fir.declare %[[VAL_2]] {fortran_attrs = #{{.*}}<allocatable>, uniq_name = "_QMmodFcalleeElow"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>>
+// CHECK:           %[[VAL_36:.*]] = fir.declare %[[VAL_1]] {fortran_attrs = #{{.*}}<intent_in>, uniq_name = "_QMmodFcalleeEy"} : (!fir.box<!fir.array<?x?x?xf32>>) -> !fir.box<!fir.array<?x?x?xf32>>
+// CHECK:           %[[VAL_37:.*]] = fir.rebox %[[VAL_36]] : (!fir.box<!fir.array<?x?x?xf32>>) -> !fir.box<!fir.array<?x?x?xf32>>
+// CHECK:           %[[VAL_38:.*]] = fir.declare %[[VAL_0]] {fortran_attrs = #{{.*}}<intent_in>, uniq_name = "_QMmodFcalleeEz"} : (!fir.box<!fir.array<?x?x?xf32>>) -> !fir.box<!fir.array<?x?x?xf32>>
+// CHECK:           %[[VAL_39:.*]] = fir.rebox %[[VAL_38]] : (!fir.box<!fir.array<?x?x?xf32>>) -> !fir.box<!fir.array<?x?x?xf32>>
+// TODO: read from global assumed to always alias
+// CHECK:           %[[VAL_40:.*]] = fir.load %[[VAL_22]] : !fir.ref<i32>
+// CHECK:           %[[VAL_41:.*]] = arith.addi %[[VAL_40]], %[[VAL_6]] : i32
+// CHECK:           %[[VAL_42:.*]] = fir.convert %[[VAL_41]] : (i32) -> index
+// TODO: read from global assumed to always alias
+// CHECK:           %[[VAL_43:.*]] = fir.load %[[VAL_24]] : !fir.ref<i32>
+// CHECK:           %[[VAL_44:.*]] = fir.convert %[[VAL_43]] : (i32) -> index
+// CHECK:           %[[VAL_45:.*]] = fir.convert %[[VAL_42]] : (index) -> i32
+// CHECK:           %[[VAL_46:.*]]:2 = fir.do_loop %[[VAL_47:.*]] = %[[VAL_42]] to %[[VAL_44]] step %[[VAL_5]] iter_args(%[[VAL_48:.*]] = %[[VAL_45]]) -> (index, i32) {
+// CHECK:             fir.store %[[VAL_48]] to %[[VAL_34]] : !fir.ref<i32>
+// TODO: read from global assumed to always alias
+// CHECK:             %[[VAL_49:.*]] = fir.load %[[VAL_18]] : !fir.ref<i32>
+// CHECK:             %[[VAL_50:.*]] = arith.addi %[[VAL_49]], %[[VAL_6]] : i32
+// CHECK:             %[[VAL_51:.*]] = fir.convert %[[VAL_50]] : (i32) -> index
+// TODO: read from global assumed to always alias
+// CHECK:             %[[VAL_52:.*]] = fir.load %[[VAL_20]] : !fir.ref<i32>
+// CHECK:             %[[VAL_53:.*]] = fir.convert %[[VAL_52]] : (i32) -> index
+// CHECK:             %[[VAL_54:.*]] = fir.convert %[[VAL_51]] : (index) -> i32
+// CHECK:             %[[VAL_55:.*]]:2 = fir.do_loop %[[VAL_56:.*]] = %[[VAL_51]] to %[[VAL_53]] step %[[VAL_5]] iter_args(%[[VAL_57:.*]] = %[[VAL_54]]) -> (index, i32) {
+// CHECK:               fir.store %[[VAL_57]] to %[[VAL_32]] : !fir.ref<i32>
+// TODO: read from global assumed to always alias
+// CHECK:               %[[VAL_58:.*]] = fir.load %[[VAL_16]] : !fir.ref<i32>
+// CHECK:               %[[VAL_59:.*]] = arith.addi %[[VAL_58]], %[[VAL_6]] : i32
+// CHECK:               %[[VAL_60:.*]] = fir.convert %[[VAL_59]] : (i32) -> index
+// CHECK:               %[[VAL_61:.*]] = fir.convert %[[VAL_60]] : (index) -> i32
+// CHECK:               %[[VAL_62:.*]]:2 = fir.do_loop %[[VAL_63:.*]] = %[[VAL_60]] to %[[VAL_4]] step %[[VAL_5]] iter_args(%[[VAL_64:.*]] = %[[VAL_61]]) -> (index, i32) {
+// TODO: local allocation assumed to always alias
+// CHECK:                 fir.store %[[VAL_64]] to %[[VAL_30]] : !fir.ref<i32>
+// load from box tagged in CodeGen
+// CHECK:                 %[[VAL_65:.*]] = fir.load %[[VAL_35]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>>
+// TODO: local allocation assumed to always alias
+// CHECK:                 %[[VAL_66:.*]] = fir.load %[[VAL_30]] : !fir.ref<i32>
+// CHECK:                 %[[VAL_67:.*]] = fir.convert %[[VAL_66]] : (i32) -> i64
+// TODO: local allocation assumed to always alias
+// CHECK:                 %[[VAL_68:.*]] = fir.load %[[VAL_32]] : !fir.ref<i32>
+// CHECK:                 %[[VAL_69:.*]] = fir.convert %[[VAL_68]] : (i32) -> i64
+// TODO: local allocation assumed to always alias
+// CHECK:                 %[[VAL_70:.*]] = fir.load %[[VAL_34]] : !fir.ref<i32>
+// CHECK:                 %[[VAL_71:.*]] = fir.convert %[[VAL_70]] : (i32) -> i64
+// CHECK:                 %[[VAL_72:.*]] = fir.box_addr %[[VAL_65]] : (!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>) -> !fir.heap<!fir.array<?x?x?xf32>>
+// CHECK:                 %[[VAL_73:.*]]:3 = fir.box_dims %[[VAL_65]], %[[VAL_4]] : (!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>, index) -> (index, index, index)
+// CHECK:                 %[[VAL_74:.*]]:3 = fir.box_dims %[[VAL_65]], %[[VAL_5]] : (!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>, index) -> (index, index, index)
+// CHECK:                 %[[VAL_75:.*]]:3 = fir.box_dims %[[VAL_65]], %[[VAL_3]] : (!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>, index) -> (index, index, index)
+// CHECK:                 %[[VAL_76:.*]] = fir.shape_shift %[[VAL_73]]#0, %[[VAL_73]]#1, %[[VAL_74]]#0, %[[VAL_74]]#1, %[[VAL_75]]#0, %[[VAL_75]]#1 : (index, index, index, index, index, index) -> !fir.shapeshift<3>
+// CHECK:                 %[[VAL_77:.*]] = fir.array_coor %[[VAL_72]](%[[VAL_76]]) %[[VAL_67]], %[[VAL_69]], %[[VAL_71]] : (!fir.heap<!fir.array<?x?x?xf32>>, !fir.shapeshift<3>, i64, i64, i64) -> !fir.ref<f32>
+// CHECK:                 %[[VAL_78:.*]] = fir.load %[[VAL_77]] {tbaa = [#[[ARG_LOW_TAG]]]} : !fir.ref<f32>
+// CHECK:                 fir.store %[[VAL_78]] to %[[VAL_26]] : !fir.ref<f32>
+// load from box tagged in CodeGen
+// CHECK:                 %[[VAL_79:.*]] = fir.load %[[VAL_8]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+// TODO: local allocation assumed to always alias
+// CHECK:                 %[[VAL_80:.*]] = fir.load %[[VAL_32]] : !fir.ref<i32>
+// CHECK:                 %[[VAL_81:.*]] = fir.convert %[[VAL_80]] : (i32) -> i64
+// CHECK:                 %[[VAL_82:.*]] = fir.box_addr %[[VAL_79]] : (!fir.box<!fir.heap<!fir.array<?xf32>>>) -> !fir.heap<!fir.array<?xf32>>
+// CHECK:                 %[[VAL_83:.*]]:3 = fir.box_dims %[[VAL_79]], %[[VAL_4]] : (!fir.box<!fir.heap<!fir.array<?xf32>>>, index) -> (index, index, index)
+// CHECK:                 %[[VAL_84:.*]] = fir.shape_shift %[[VAL_83]]#0, %[[VAL_83]]#1 : (index, index) -> !fir.shapeshift<1>
+// CHECK:                 %[[VAL_85:.*]] = fir.array_coor %[[VAL_82]](%[[VAL_84]]) %[[VAL_81]] : (!fir.heap<!fir.array<?xf32>>, !fir.shapeshift<1>, i64) -> !fir.ref<f32>
+// load from global variable
+// CHECK:                 %[[VAL_86:.*]] = fir.load %[[VAL_85]] : !fir.ref<f32>
+// load from box
+// CHECK:                 %[[VAL_87:.*]] = fir.load %[[VAL_35]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>>
+// load from local allocation
+// CHECK:                 %[[VAL_88:.*]] = fir.load %[[VAL_30]] : !fir.ref<i32>
+// CHECK:                 %[[VAL_89:.*]] = fir.convert %[[VAL_88]] : (i32) -> i64
+// load from local allocation
+// CHECK:                 %[[VAL_90:.*]] = fir.load %[[VAL_34]] : !fir.ref<i32>
+// CHECK:                 %[[VAL_91:.*]] = fir.convert %[[VAL_90]] : (i32) -> i64
+// CHECK:                 %[[VAL_92:.*]] = fir.box_addr %[[VAL_87]] : (!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>) -> !fir.heap<!fir.array<?x?x?xf32>>
+// CHECK:                 %[[VAL_93:.*]]:3 = fir.box_dims %[[VAL_87]], %[[VAL_4]] : (!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>, index) -> (index, index, index)
+// CHECK:                 %[[VAL_94:.*]]:3 = fir.box_dims %[[VAL_87]], %[[VAL_5]] : (!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>, index) -> (index, index, index)
+// CHECK:                 %[[VAL_95:.*]]:3 = fir.box_dims %[[VAL_87]], %[[VAL_3]] : (!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>, index) -> (index, index, index)
+// CHECK:                 %[[VAL_96:.*]] = fir.shape_shift %[[VAL_93]]#0, %[[VAL_93]]#1, %[[VAL_94]]#0, %[[VAL_94]]#1, %[[VAL_95]]#0, %[[VAL_95]]#1 : (index, index, index, index, index, index) -> !fir.shapeshift<3>
+// CHECK:                 %[[VAL_97:.*]] = fir.array_coor %[[VAL_92]](%[[VAL_96]]) %[[VAL_89]], %[[VAL_81]], %[[VAL_91]] : (!fir.heap<!fir.array<?x?x?xf32>>, !fir.shapeshift<3>, i64, i64, i64) -> !fir.ref<f32>
+// CHECK:                 %[[VAL_98:.*]] = fir.load %[[VAL_97]] {tbaa = [#[[ARG_LOW_TAG]]]} : !fir.ref<f32>
+// CHECK:                 %[[VAL_99:.*]] = arith.mulf %[[VAL_86]], %[[VAL_98]] fastmath<contract> : f32
+// load from box
+// CHECK:                 %[[VAL_100:.*]] = fir.load %[[VAL_10]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+// CHECK:                 %[[VAL_101:.*]] = fir.box_addr %[[VAL_100]] : (!fir.box<!fir.heap<!fir.array<?xf32>>>) -> !fir.heap<!fir.array<?xf32>>
+// CHECK:                 %[[VAL_102:.*]]:3 = fir.box_dims %[[VAL_100]], %[[VAL_4]] : (!fir.box<!fir.heap<!fir.array<?xf32>>>, index) -> (index, index, index)
+// CHECK:                 %[[VAL_103:.*]] = fir.shape_shift %[[VAL_102]]#0, %[[VAL_102]]#1 : (index, index) -> !fir.shapeshift<1>
+// CHECK:                 %[[VAL_104:.*]] = fir.array_coor %[[VAL_101]](%[[VAL_103]]) %[[VAL_81]] : (!fir.heap<!fir.array<?xf32>>, !fir.shapeshift<1>, i64) -> !fir.ref<f32>
+// load from global variable
+// CHECK:                 %[[VAL_105:.*]] = fir.load %[[VAL_104]] : !fir.ref<f32>
+// CHECK:                 %[[VAL_106:.*]] = fir.array_coor %[[VAL_39]] %[[VAL_89]], %[[VAL_81]], %[[VAL_91]] : (!fir.box<!fir.array<?x?x?xf32>>, i64, i64, i64) -> !fir.ref<f32>
+// CHECK:                 %[[VAL_107:.*]] = fir.load %[[VAL_106]] {tbaa = [#[[ARG_Z_TAG]]]} : !fir.ref<f32>
+// CHECK:                 %[[VAL_108:.*]] = arith.subi %[[VAL_80]], %[[VAL_6]] : i32
+// CHECK:                 %[[VAL_109:.*]] = fir.convert %[[VAL_108]] : (i32) -> i64
+// CHECK:                 %[[VAL_110:.*]] = fir.array_coor %[[VAL_39]] %[[VAL_89]], %[[VAL_109]], %[[VAL_91]] : (!fir.box<!fir.array<?x?x?xf32>>, i64, i64, i64) -> !fir.ref<f32>
+// CHECK:                 %[[VAL_111:.*]] = fir.load %[[VAL_110]] {tbaa = [#[[ARG_Z_TAG]]]} : !fir.ref<f32>
+// CHECK:                 %[[VAL_112:.*]] = arith.subf %[[VAL_107]], %[[VAL_111]] fastmath<contract> : f32
+// CHECK:                 %[[VAL_113:.*]] = fir.no_reassoc %[[VAL_112]] : f32
+// load from global variable
+// CHECK:                 %[[VAL_114:.*]] = fir.load %[[VAL_14]] : !fir.ref<f32>
+// CHECK:                 %[[VAL_115:.*]] = arith.mulf %[[VAL_113]], %[[VAL_114]] fastmath<contract> : f32
+// CHECK:                 %[[VAL_116:.*]] = arith.subi %[[VAL_90]], %[[VAL_6]] : i32
+// CHECK:                 %[[VAL_117:.*]] = fir.convert %[[VAL_116]] : (i32) -> i64
+// CHECK:                 %[[VAL_118:.*]] = fir.array_coor %[[VAL_37]] %[[VAL_89]], %[[VAL_81]], %[[VAL_117]] : (!fir.box<!fir.array<?x?x?xf32>>, i64, i64, i64) -> !fir.ref<f32>
+// CHECK:                 %[[VAL_119:.*]] = fir.load %[[VAL_118]] {tbaa = [#[[ARG_Y_TAG]]]} : !fir.ref<f32>
+// CHECK:                 %[[VAL_120:.*]] = fir.array_coor %[[VAL_37]] %[[VAL_89]], %[[VAL_81]], %[[VAL_91]] : (!fir.box<!fir.array<?x?x?xf32>>, i64, i64, i64) -> !fir.ref<f32>
+// CHECK:                 %[[VAL_121:.*]] = fir.load %[[VAL_120]] {tbaa = [#[[ARG_Y_TAG]]]} : !fir.ref<f32>
+// CHECK:                 %[[VAL_122:.*]] = arith.subf %[[VAL_119]], %[[VAL_121]] fastmath<contract> : f32
+// CHECK:                 %[[VAL_123:.*]] = fir.no_reassoc %[[VAL_122]] : f32
+// load from local allocation
+// CHECK:                 %[[VAL_124:.*]] = fir.load %[[VAL_28]] : !fir.ref<f32>
+// CHECK:                 %[[VAL_125:.*]] = arith.mulf %[[VAL_123]], %[[VAL_124]] fastmath<contract> : f32
+// CHECK:                 %[[VAL_126:.*]] = arith.addf %[[VAL_115]], %[[VAL_125]] fastmath<contract> : f32
+// CHECK:                 %[[VAL_127:.*]] = fir.no_reassoc %[[VAL_126]] : f32
+// CHECK:                 %[[VAL_128:.*]] = arith.mulf %[[VAL_105]], %[[VAL_127]] fastmath<contract> : f32
+// CHECK:                 %[[VAL_129:.*]] = arith.addf %[[VAL_99]], %[[VAL_128]] fastmath<contract> : f32
+// CHECK:                 fir.store %[[VAL_129]] to %[[VAL_97]] {tbaa = [#[[ARG_LOW_TAG]]]} : !fir.ref<f32>
+// CHECK:                 %[[VAL_130:.*]] = arith.addi %[[VAL_63]], %[[VAL_5]] : index
+// CHECK:                 %[[VAL_131:.*]] = fir.convert %[[VAL_5]] : (index) -> i32
+// load from local allocation
+// CHECK:                 %[[VAL_132:.*]] = fir.load %[[VAL_30]] : !fir.ref<i32>
+// CHECK:                 %[[VAL_133:.*]] = arith.addi %[[VAL_132]], %[[VAL_131]] : i32
+// CHECK:                 fir.result %[[VAL_130]], %[[VAL_133]] : index, i32
+// CHECK:               }
+// store to local allocation
+// CHECK:               fir.store %[[VAL_134:.*]]#1 to %[[VAL_30]] : !fir.ref<i32>
+// CHECK:               %[[VAL_135:.*]] = arith.addi %[[VAL_56]], %[[VAL_5]] : index
+// CHECK:               %[[VAL_136:.*]] = fir.convert %[[VAL_5]] : (index) -> i32
+// local allocation:
+// CHECK:               %[[VAL_137:.*]] = fir.load %[[VAL_32]] : !fir.ref<i32>
+// CHECK:               %[[VAL_138:.*]] = arith.addi %[[VAL_137]], %[[VAL_136]] : i32
+// CHECK:               fir.result %[[VAL_135]], %[[VAL_138]] : index, i32
+// CHECK:             }
+// local allocation:
+// CHECK:             fir.store %[[VAL_139:.*]]#1 to %[[VAL_32]] : !fir.ref<i32>
+// CHECK:             %[[VAL_140:.*]] = arith.addi %[[VAL_47]], %[[VAL_5]] : index
+// CHECK:             %[[VAL_141:.*]] = fir.convert %[[VAL_5]] : (index) -> i32
+// local allocation:
+// CHECK:             %[[VAL_142:.*]] = fir.load %[[VAL_34]] : !fir.ref<i32>
+// CHECK:             %[[VAL_143:.*]] = arith.addi %[[VAL_142]], %[[VAL_141]] : i32
+// CHECK:             fir.result %[[VAL_140]], %[[VAL_143]] : index, i32
+// CHECK:           }
+// local allocation:
+// CHECK:           fir.store %[[VAL_144:.*]]#1 to %[[VAL_34]] : !fir.ref<i32>
+// CHECK:           return
+// CHECK:         }
\ No newline at end of file

>From 9958d2389b1090dae3fd822347d9c47cfae12bb2 Mon Sep 17 00:00:00 2001
From: Tom Eccles <tom.eccles at arm.com>
Date: Mon, 9 Oct 2023 09:39:25 +0000
Subject: [PATCH 05/16] [flang] Rename FirAliasAnalysisOpInterface to
 FirAliasTagOpInterface

---
 flang/lib/Optimizer/Transforms/AddAliasTags.cpp | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/flang/lib/Optimizer/Transforms/AddAliasTags.cpp b/flang/lib/Optimizer/Transforms/AddAliasTags.cpp
index 1129ae87246b7f8..1dbf1ef7eb46290 100644
--- a/flang/lib/Optimizer/Transforms/AddAliasTags.cpp
+++ b/flang/lib/Optimizer/Transforms/AddAliasTags.cpp
@@ -15,7 +15,7 @@
 #include "flang/Optimizer/Analysis/AliasAnalysis.h"
 #include "flang/Optimizer/Analysis/TBAAForest.h"
 #include "flang/Optimizer/Dialect/FIRDialect.h"
-#include "flang/Optimizer/Dialect/FirAliasAnalysisOpInterface.h"
+#include "flang/Optimizer/Dialect/FirAliasTagOpInterface.h"
 #include "flang/Optimizer/Transforms/Passes.h"
 #include "mlir/Pass/Pass.h"
 #include "llvm/ADT/DenseMap.h"
@@ -75,9 +75,8 @@ class AddAliasTagsPass : public fir::impl::AddAliasTagsBase<AddAliasTagsPass> {
 
 private:
   /// The real workhorse of the pass. This is a runOnOperation() which
-  /// operates on fir::FirAliasAnalysisOpInterface, using some extra state
-  void runOnAliasInterface(fir::FirAliasAnalysisOpInterface op,
-                           PassState &state);
+  /// operates on fir::FirAliasTagOpInterface, using some extra state
+  void runOnAliasInterface(fir::FirAliasTagOpInterface op, PassState &state);
 };
 
 } // namespace
@@ -98,7 +97,7 @@ static std::string getFuncArgName(mlir::Value arg) {
   return attr.str();
 }
 
-void AddAliasTagsPass::runOnAliasInterface(fir::FirAliasAnalysisOpInterface op,
+void AddAliasTagsPass::runOnAliasInterface(fir::FirAliasTagOpInterface op,
                                            PassState &state) {
   mlir::func::FuncOp func = op->getParentOfType<mlir::func::FuncOp>();
 
@@ -186,9 +185,8 @@ void AddAliasTagsPass::runOnOperation() {
   PassState state;
 
   mlir::ModuleOp mod = getOperation();
-  mod.walk([&](fir::FirAliasAnalysisOpInterface op) {
-    runOnAliasInterface(op, state);
-  });
+  mod.walk(
+      [&](fir::FirAliasTagOpInterface op) { runOnAliasInterface(op, state); });
 
   LLVM_DEBUG(llvm::dbgs() << "=== End " DEBUG_TYPE " ===\n");
 }

>From 2075968bb2652595a2d1495a0eac15fdd0adc88e Mon Sep 17 00:00:00 2001
From: Tom Eccles <tom.eccles at arm.com>
Date: Mon, 9 Oct 2023 09:43:02 +0000
Subject: [PATCH 06/16] Fix typos

---
 flang/include/flang/Optimizer/Analysis/TBAAForest.h | 4 ++--
 flang/lib/Optimizer/Transforms/AddAliasTags.cpp     | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/flang/include/flang/Optimizer/Analysis/TBAAForest.h b/flang/include/flang/Optimizer/Analysis/TBAAForest.h
index 3d11e6112bacce9..a024544e50ef98b 100644
--- a/flang/include/flang/Optimizer/Analysis/TBAAForest.h
+++ b/flang/include/flang/Optimizer/Analysis/TBAAForest.h
@@ -21,7 +21,7 @@ namespace fir {
 //===----------------------------------------------------------------------===//
 // TBAATree
 //===----------------------------------------------------------------------===//
-/// Per-function TBAA tree. Each tree contins branches for data (of various
+/// Per-function TBAA tree. Each tree contains branches for data (of various
 /// kinds) and descriptor access
 struct TBAATree {
   //===----------------------------------------------------------------------===//
@@ -70,7 +70,7 @@ struct TBAATree {
 //===----------------------------------------------------------------------===//
 // TBAAForrest
 //===----------------------------------------------------------------------===//
-/// Colletion of TBAATrees, usually indexed by function (so that each function
+/// Collection of TBAATrees, usually indexed by function (so that each function
 /// has a different TBAATree)
 class TBAAForrest {
 public:
diff --git a/flang/lib/Optimizer/Transforms/AddAliasTags.cpp b/flang/lib/Optimizer/Transforms/AddAliasTags.cpp
index 1dbf1ef7eb46290..7c3b02f75a446af 100644
--- a/flang/lib/Optimizer/Transforms/AddAliasTags.cpp
+++ b/flang/lib/Optimizer/Transforms/AddAliasTags.cpp
@@ -41,10 +41,10 @@ static llvm::cl::opt<bool>
 // this is worth fixing in the future.
 static llvm::cl::opt<bool>
     enableGlobals("globals-tbaa", llvm::cl::init(false), llvm::cl::Hidden,
-                  llvm::cl::desc("Add TBAA tags to dummy arguments. UNSAFE."));
+                  llvm::cl::desc("Add TBAA tags to global variables. UNSAFE."));
 static llvm::cl::opt<bool> enableLocalAllocs(
     "local-alloc-tbaa", llvm::cl::init(false), llvm::cl::Hidden,
-    llvm::cl::desc("Add TBAA tags to dummy arguments. UNSAFE."));
+    llvm::cl::desc("Add TBAA tags to local allocations. UNSAFE."));
 
 namespace {
 

>From 0d3d17b9b7e72ab70a6e07dceb142a71366974d3 Mon Sep 17 00:00:00 2001
From: Tom Eccles <tom.eccles at arm.com>
Date: Mon, 9 Oct 2023 10:07:35 +0000
Subject: [PATCH 07/16] Rename pass to fir-add-alias-tags

---
 flang/include/flang/Optimizer/Transforms/Passes.td | 2 +-
 flang/test/Transforms/tbaa.fir                     | 4 ++--
 flang/test/Transforms/tbaa2.fir                    | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/flang/include/flang/Optimizer/Transforms/Passes.td b/flang/include/flang/Optimizer/Transforms/Passes.td
index 7aa08b0616de99d..6d211a535b53f70 100644
--- a/flang/include/flang/Optimizer/Transforms/Passes.td
+++ b/flang/include/flang/Optimizer/Transforms/Passes.td
@@ -252,7 +252,7 @@ def StackArrays : Pass<"stack-arrays", "mlir::ModuleOp"> {
   let constructor = "::fir::createStackArraysPass()";
 }
 
-def AddAliasTags : Pass<"fir-alias-analysis", "mlir::ModuleOp"> {
+def AddAliasTags : Pass<"fir-add-alias-tags", "mlir::ModuleOp"> {
   let summary = "Add tbaa tags to operations that implement FirAliasAnalysisOpInterface";
   let description = [{
     TBAA (type based alias analysis) is one method to pass pointer alias information
diff --git a/flang/test/Transforms/tbaa.fir b/flang/test/Transforms/tbaa.fir
index 4519acffd1f91b1..5c6123e34e154c0 100644
--- a/flang/test/Transforms/tbaa.fir
+++ b/flang/test/Transforms/tbaa.fir
@@ -1,4 +1,4 @@
-// RUN: fir-opt --split-input-file --fir-alias-analysis %s | FileCheck %s
+// RUN: fir-opt --split-input-file --fir-add-alias-tags %s | FileCheck %s
 
 // subroutine oneArg(a)
 //   integer :: a(:)
@@ -172,4 +172,4 @@
 // "any data access" tag is added by TBAABuilder during CodeGen
 // CHECK:           fir.store %[[VAL_8]] to %[[VAL_12]] : !fir.ref<i32>
 // CHECK:           return
-// CHECK:         }
\ No newline at end of file
+// CHECK:         }
diff --git a/flang/test/Transforms/tbaa2.fir b/flang/test/Transforms/tbaa2.fir
index 5aba03f292fef74..d3e093448650ed1 100644
--- a/flang/test/Transforms/tbaa2.fir
+++ b/flang/test/Transforms/tbaa2.fir
@@ -1,5 +1,5 @@
 // Test fir alias analysis pass on a larger real life code example (from the RFC)
-// RUN: fir-opt --fir-alias-analysis %s | FileCheck %s
+// RUN: fir-opt --fir-add-alias-tags %s | FileCheck %s
 
   fir.global @_QMmodEa : !fir.box<!fir.heap<!fir.array<?xf32>>> {
     %c0 = arith.constant 0 : index
@@ -383,4 +383,4 @@
 // local allocation:
 // CHECK:           fir.store %[[VAL_144:.*]]#1 to %[[VAL_34]] : !fir.ref<i32>
 // CHECK:           return
-// CHECK:         }
\ No newline at end of file
+// CHECK:         }

>From a5046ea56554193e7885a8e414d64f5329a8263d Mon Sep 17 00:00:00 2001
From: Tom Eccles <tom.eccles at arm.com>
Date: Mon, 9 Oct 2023 10:16:33 +0000
Subject: [PATCH 08/16] Get function argument name from fir::DeclareOp

---
 flang/lib/Optimizer/Transforms/AddAliasTags.cpp | 16 ++++++++++++++--
 flang/test/Transforms/tbaa.fir                  | 10 +++++-----
 flang/test/Transforms/tbaa2.fir                 |  6 +++---
 3 files changed, 22 insertions(+), 10 deletions(-)

diff --git a/flang/lib/Optimizer/Transforms/AddAliasTags.cpp b/flang/lib/Optimizer/Transforms/AddAliasTags.cpp
index 7c3b02f75a446af..c584f17de4fbaf8 100644
--- a/flang/lib/Optimizer/Transforms/AddAliasTags.cpp
+++ b/flang/lib/Optimizer/Transforms/AddAliasTags.cpp
@@ -81,15 +81,27 @@ class AddAliasTagsPass : public fir::impl::AddAliasTagsBase<AddAliasTagsPass> {
 
 } // namespace
 
+static fir::DeclareOp getDeclareOp(mlir::Value arg) {
+  for (mlir::Operation *use : arg.getUsers())
+    if (fir::DeclareOp declare = mlir::dyn_cast<fir::DeclareOp>(use))
+      return declare;
+  return nullptr;
+}
+
 /// Get the name of a function argument using the "fir.bindc_name" attribute,
 /// or ""
 static std::string getFuncArgName(mlir::Value arg) {
+  // first try getting the name from the hlfir.declare
+  if (fir::DeclareOp declare = getDeclareOp(arg))
+    return declare.getUniqName().str();
+
+  // get from attribute on function argument
   // always succeeds because arg is a function argument
   mlir::BlockArgument blockArg = mlir::cast<mlir::BlockArgument>(arg);
   assert(blockArg.getOwner() && blockArg.getOwner()->isEntryBlock() &&
          "arg is a function argument");
-  mlir::FunctionOpInterface func =
-      mlir::cast<mlir::FunctionOpInterface>(blockArg.getOwner()->getParentOp());
+  mlir::FunctionOpInterface func = mlir::dyn_cast<mlir::FunctionOpInterface>(
+      blockArg.getOwner()->getParentOp());
   mlir::StringAttr attr = func.getArgAttrOfType<mlir::StringAttr>(
       blockArg.getArgNumber(), "fir.bindc_name");
   if (!attr)
diff --git a/flang/test/Transforms/tbaa.fir b/flang/test/Transforms/tbaa.fir
index 5c6123e34e154c0..7825ae60c71e681 100644
--- a/flang/test/Transforms/tbaa.fir
+++ b/flang/test/Transforms/tbaa.fir
@@ -20,7 +20,7 @@
 // CHECK: #[[ONE_ARG_ANY_ACCESS:.+]] = #llvm.tbaa_type_desc<id = "any access", members = {<#[[ONE_ARG_ROOT]], 0>}>
 // CHECK: #[[ONE_ARG_ANY_DATA:.+]] = #llvm.tbaa_type_desc<id = "any data access", members = {<#[[ONE_ARG_ANY_ACCESS]], 0>}>
 // CHECK: #[[ONE_ARG_ANY_ARG:.+]] = #llvm.tbaa_type_desc<id = "dummy arg data", members = {<#[[ONE_ARG_ANY_DATA]], 0>}>
-// CHECK: #[[ONE_ARG_A:.+]] = #llvm.tbaa_type_desc<id = "dummy arg data/a", members = {<#[[ONE_ARG_ANY_ARG]], 0>}>
+// CHECK: #[[ONE_ARG_A:.+]] = #llvm.tbaa_type_desc<id = "dummy arg data/_QFoneargEa", members = {<#[[ONE_ARG_ANY_ARG]], 0>}>
 // CHECK: #[[ONE_ARG_A_TAG:.+]] = #llvm.tbaa_tag<base_type = #[[ONE_ARG_A]], access_type = #[[ONE_ARG_A]], offset = 0>
 
 // CHECK-LABEL:   func.func @_QPonearg(
@@ -59,8 +59,8 @@
 // CHECK: #[[TWO_ARG_ANY_ACCESS:.+]] = #llvm.tbaa_type_desc<id = "any access", members = {<#[[TWO_ARG_ROOT]], 0>}>
 // CHECK: #[[TWO_ARG_ANY_DATA:.+]] = #llvm.tbaa_type_desc<id = "any data access", members = {<#[[TWO_ARG_ANY_ACCESS]], 0>}>
 // CHECK: #[[TWO_ARG_ANY_ARG:.+]] = #llvm.tbaa_type_desc<id = "dummy arg data", members = {<#[[TWO_ARG_ANY_DATA]], 0>}>
-// CHECK: #[[TWO_ARG_B:.+]] = #llvm.tbaa_type_desc<id = "dummy arg data/b", members = {<#[[TWO_ARG_ANY_ARG]], 0>}>
-// CHECK: #[[TWO_ARG_A:.+]] = #llvm.tbaa_type_desc<id = "dummy arg data/a", members = {<#[[TWO_ARG_ANY_ARG]], 0>}>
+// CHECK: #[[TWO_ARG_B:.+]] = #llvm.tbaa_type_desc<id = "dummy arg data/_QFtwoargEb", members = {<#[[TWO_ARG_ANY_ARG]], 0>}>
+// CHECK: #[[TWO_ARG_A:.+]] = #llvm.tbaa_type_desc<id = "dummy arg data/_QFtwoargEa", members = {<#[[TWO_ARG_ANY_ARG]], 0>}>
 // CHECK: #[[TWO_ARG_B_TAG:.+]] = #llvm.tbaa_tag<base_type = #[[TWO_ARG_B]], access_type = #[[TWO_ARG_B]], offset = 0>
 // CHECK: #[[TWO_ARG_A_TAG:.+]] = #llvm.tbaa_tag<base_type = #[[TWO_ARG_A]], access_type = #[[TWO_ARG_A]], offset = 0>
 
@@ -103,7 +103,7 @@
 // CHECK: #[[TARGET_ANY_ACCESS:.+]] = #llvm.tbaa_type_desc<id = "any access", members = {<#[[TARGET_ROOT]], 0>}>
 // CHECK: #[[TARGET_ANY_DATA:.+]] = #llvm.tbaa_type_desc<id = "any data access", members = {<#[[TARGET_ANY_ACCESS]], 0>}>
 // CHECK: #[[TARGET_ANY_ARG:.+]] = #llvm.tbaa_type_desc<id = "dummy arg data", members = {<#[[TARGET_ANY_DATA]], 0>}>
-// CHECK: #[[TARGET_B:.+]] = #llvm.tbaa_type_desc<id = "dummy arg data/b", members = {<#[[TARGET_ANY_ARG]], 0>}>
+// CHECK: #[[TARGET_B:.+]] = #llvm.tbaa_type_desc<id = "dummy arg data/_QFtargetargEb", members = {<#[[TARGET_ANY_ARG]], 0>}>
 // CHECK: #[[TARGET_B_TAG:.+]] = #llvm.tbaa_tag<base_type = #[[TARGET_B]], access_type = #[[TARGET_B]], offset = 0>
 // No entry for "dummy arg data/a" because that pointer should get "any data access" becase it has the TARGET attribute
 
@@ -150,7 +150,7 @@
 // CHECK: #[[POINTER_ANY_ACCESS:.+]] = #llvm.tbaa_type_desc<id = "any access", members = {<#[[POINTER_ROOT]], 0>}>
 // CHECK: #[[POINTER_ANY_DATA:.+]] = #llvm.tbaa_type_desc<id = "any data access", members = {<#[[POINTER_ANY_ACCESS]], 0>}>
 // CHECK: #[[POINTER_ANY_ARG:.+]] = #llvm.tbaa_type_desc<id = "dummy arg data", members = {<#[[POINTER_ANY_DATA]], 0>}>
-// CHECK: #[[POINTER_B:.+]] = #llvm.tbaa_type_desc<id = "dummy arg data/b", members = {<#[[POINTER_ANY_ARG]], 0>}>
+// CHECK: #[[POINTER_B:.+]] = #llvm.tbaa_type_desc<id = "dummy arg data/_QFpointerargEb", members = {<#[[POINTER_ANY_ARG]], 0>}>
 // CHECK: #[[POINTER_B_TAG:.+]] = #llvm.tbaa_tag<base_type = #[[POINTER_B]], access_type = #[[POINTER_B]], offset = 0>
 // No entry for "dummy arg data/a" because that pointer should get "any data access" becase it has the POINTER attribute
 
diff --git a/flang/test/Transforms/tbaa2.fir b/flang/test/Transforms/tbaa2.fir
index d3e093448650ed1..84ba281cce7a956 100644
--- a/flang/test/Transforms/tbaa2.fir
+++ b/flang/test/Transforms/tbaa2.fir
@@ -48,9 +48,9 @@
 // CHECK: #[[ANY_ACCESS:.+]] = #llvm.tbaa_type_desc<id = "any access", members = {<#[[ROOT]], 0>}>
 // CHECK: #[[ANY_DATA:.+]] = #llvm.tbaa_type_desc<id = "any data access", members = {<#[[ANY_ACCESS]], 0>}>
 // CHECK: #[[ANY_ARG:.+]] = #llvm.tbaa_type_desc<id = "dummy arg data", members = {<#[[ANY_DATA]], 0>}>
-// CHECK: #[[ARG_LOW:.+]] = #llvm.tbaa_type_desc<id = "dummy arg data/low", members = {<#[[ANY_ARG]], 0>}>
-// CHECK: #[[ARG_Z:.+]] = #llvm.tbaa_type_desc<id = "dummy arg data/z", members = {<#[[ANY_ARG]], 0>}>
-// CHECK: #[[ARG_Y:.+]] = #llvm.tbaa_type_desc<id = "dummy arg data/y", members = {<#[[ANY_ARG]], 0>}>
+// CHECK: #[[ARG_LOW:.+]] = #llvm.tbaa_type_desc<id = "dummy arg data/_QMmodFcalleeElow", members = {<#[[ANY_ARG]], 0>}>
+// CHECK: #[[ARG_Z:.+]] = #llvm.tbaa_type_desc<id = "dummy arg data/_QMmodFcalleeEz", members = {<#[[ANY_ARG]], 0>}>
+// CHECK: #[[ARG_Y:.+]] = #llvm.tbaa_type_desc<id = "dummy arg data/_QMmodFcalleeEy", members = {<#[[ANY_ARG]], 0>}>
 // CHECK: #[[ARG_LOW_TAG:.+]] = #llvm.tbaa_tag<base_type = #[[ARG_LOW]], access_type = #[[ARG_LOW]], offset = 0>
 // CHECK: #[[ARG_Z_TAG:.+]] = #llvm.tbaa_tag<base_type = #[[ARG_Z]], access_type = #[[ARG_Z]], offset = 0>
 // CHECK: #[[ARG_Y_TAG:.+]] = #llvm.tbaa_tag<base_type = #[[ARG_Y]], access_type = #[[ARG_Y]], offset = 0>

>From d5b464e702a1227c582996684f8f3cb82dad0f0a Mon Sep 17 00:00:00 2001
From: Tom Eccles <tom.eccles at arm.com>
Date: Mon, 9 Oct 2023 10:23:57 +0000
Subject: [PATCH 09/16] Bail out if no func.func parent can be found

---
 flang/lib/Optimizer/Transforms/AddAliasTags.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/flang/lib/Optimizer/Transforms/AddAliasTags.cpp b/flang/lib/Optimizer/Transforms/AddAliasTags.cpp
index c584f17de4fbaf8..25439837acac518 100644
--- a/flang/lib/Optimizer/Transforms/AddAliasTags.cpp
+++ b/flang/lib/Optimizer/Transforms/AddAliasTags.cpp
@@ -112,6 +112,8 @@ static std::string getFuncArgName(mlir::Value arg) {
 void AddAliasTagsPass::runOnAliasInterface(fir::FirAliasTagOpInterface op,
                                            PassState &state) {
   mlir::func::FuncOp func = op->getParentOfType<mlir::func::FuncOp>();
+  if (!func)
+    return;
 
   llvm::SmallVector<mlir::Value> accessedOperands = op.getAccessedOperands();
   assert(accessedOperands.size() == 1 &&

>From 51b401180023a4232e5c8349f44f6d8de3381c4b Mon Sep 17 00:00:00 2001
From: Tom Eccles <tom.eccles at arm.com>
Date: Thu, 14 Sep 2023 15:01:08 +0000
Subject: [PATCH 10/16] [flang] use TBAAForest in TBAABuilder

This is important to ensure that tags end up in the same trees that were
created in the FIR TBAA pass. If they are in different trees then
everything in one tree will be assumed to MayAlias with everything in the
other tree. This leads to poor performance.

@vzakhari requested that the old (not-per-function) trees are
maintained so I left the old test intact.
---
 .../flang/Optimizer/CodeGen/TBAABuilder.h     |  45 +++----
 flang/lib/Optimizer/CodeGen/TBAABuilder.cpp   |  58 ++++-----
 flang/test/Fir/tbaa-codegen.fir               |   6 +-
 flang/test/Fir/tbaa-codegen2.fir              | 114 ++++++++++++++++++
 flang/test/Fir/tbaa.fir                       |  24 ++--
 5 files changed, 175 insertions(+), 72 deletions(-)
 create mode 100644 flang/test/Fir/tbaa-codegen2.fir

diff --git a/flang/include/flang/Optimizer/CodeGen/TBAABuilder.h b/flang/include/flang/Optimizer/CodeGen/TBAABuilder.h
index 3aeb124f911867e..2c5795678a91e16 100644
--- a/flang/include/flang/Optimizer/CodeGen/TBAABuilder.h
+++ b/flang/include/flang/Optimizer/CodeGen/TBAABuilder.h
@@ -13,8 +13,8 @@
 #ifndef FORTRAN_OPTIMIZER_CODEGEN_TBAABUILDER_H
 #define FORTRAN_OPTIMIZER_CODEGEN_TBAABUILDER_H
 
+#include "flang/Optimizer/Analysis/TBAAForest.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
-#include "mlir/IR/BuiltinAttributes.h"
 
 namespace fir {
 
@@ -25,9 +25,9 @@ namespace fir {
 //
 // TBAA type information is represented with LLVM::MetadataOp operation
 // with specific symbol name `TBAABuilder::tbaaMetaOpName`. The basic
-// TBAA tree used for Flang consists of the following nodes:
+// TBAA trees used for Flang consists of the following nodes:
 //   llvm.metadata @__flang_tbaa {
-//     llvm.tbaa_root @root_0 {id = "Flang Type TBAA Root"}
+//     llvm.tbaa_root @root_0 {id = "Flang Type TBAA Function Root funcName"}
 //     llvm.tbaa_type_desc @type_desc_1 {id = "any access",
 //                                       members = {<@root_0, 0>}}
 //     llvm.tbaa_type_desc @type_desc_2 {id = "any data access",
@@ -162,6 +162,9 @@ namespace fir {
 // Given the storage association, all non-box accesses are represented
 // with the conservative data access tag:
 //   < `<any data access>`, `<any data access>`, 0 >
+
+// additional tags are added in flang/Optimizer/Transforms/AddAliasTags.cpp
+// (before CodeGen)
 class TBAABuilder {
 public:
   TBAABuilder(mlir::MLIRContext *context, bool applyTBAA);
@@ -184,13 +187,13 @@ class TBAABuilder {
 
   // Returns TBAATagAttr representing access tag:
   //   < <descriptor member>, <descriptor member>, 0 >
-  mlir::LLVM::TBAATagAttr getAnyBoxAccessTag();
+  mlir::LLVM::TBAATagAttr getAnyBoxAccessTag(mlir::LLVM::LLVMFuncOp func);
   // Returns TBAATagAttr representing access tag:
   //   < <any data access>, <any data access>, 0 >
-  mlir::LLVM::TBAATagAttr getAnyDataAccessTag();
+  mlir::LLVM::TBAATagAttr getAnyDataAccessTag(mlir::LLVM::LLVMFuncOp func);
   // Returns TBAATagAttr representing access tag:
   //   < <any access>, <any access>, 0 >
-  mlir::LLVM::TBAATagAttr getAnyAccessTag();
+  mlir::LLVM::TBAATagAttr getAnyAccessTag(mlir::LLVM::LLVMFuncOp func);
 
   // Returns TBAATagAttr representing access tag described by the base and
   // access FIR types and the LLVM::GepOp representing the access in terms of
@@ -198,7 +201,8 @@ class TBAABuilder {
   // fir::BaseBoxType.
   mlir::LLVM::TBAATagAttr getBoxAccessTag(mlir::Type baseFIRType,
                                           mlir::Type accessFIRType,
-                                          mlir::LLVM::GEPOp gep);
+                                          mlir::LLVM::GEPOp gep,
+                                          mlir::LLVM::LLVMFuncOp func);
 
   // Returns TBAATagAttr representing access tag described by the base and
   // access FIR types and the LLVM::GepOp representing the access in terms of
@@ -206,34 +210,13 @@ class TBAABuilder {
   // "data" access, i.e. not an access of any box/descriptor member.
   mlir::LLVM::TBAATagAttr getDataAccessTag(mlir::Type baseFIRType,
                                            mlir::Type accessFIRType,
-                                           mlir::LLVM::GEPOp gep);
+                                           mlir::LLVM::GEPOp gep,
+                                           mlir::LLVM::LLVMFuncOp func);
 
   // Set to true, if TBAA builder is active, otherwise, all public
   // methods are no-ops.
   bool enableTBAA;
 
-  // LLVM::TBAARootAttr identifying Flang's TBAA root.
-  mlir::LLVM::TBAARootAttr flangTBAARoot;
-  // Identity string for Flang's TBAA root.
-  static constexpr llvm::StringRef flangTBAARootId = "Flang Type TBAA Root";
-
-  // LLVM::TBAATypeDescriptorAttr identifying "any access".
-  mlir::LLVM::TBAATypeDescriptorAttr anyAccessTypeDesc;
-  // Identity string for "any access" type descriptor.
-  static constexpr llvm::StringRef anyAccessTypeDescId = "any access";
-
-  // LLVM::TBAATypeDescriptorAttr identifying "any data access" (i.e. non-box
-  // memory access).
-  mlir::LLVM::TBAATypeDescriptorAttr anyDataAccessTypeDesc;
-  // Identity string for "any data access" type descriptor.
-  static constexpr llvm::StringRef anyDataAccessTypeDescId = "any data access";
-
-  // LLVM::TBAATypeDescriptorAttr identifying "descriptor member" access, i.e.
-  // any access within the bounds of a box/descriptor.
-  mlir::LLVM::TBAATypeDescriptorAttr boxMemberTypeDesc;
-  // Identity string for "descriptor member" type descriptor.
-  static constexpr llvm::StringRef boxMemberTypeDescId = "descriptor member";
-
   // Number of attached TBAA tags (used for debugging).
   unsigned tagAttachmentCounter = 0;
 
@@ -247,6 +230,8 @@ class TBAABuilder {
       std::tuple<mlir::LLVM::TBAANodeAttr, mlir::LLVM::TBAANodeAttr, int64_t>,
       mlir::LLVM::TBAATagAttr>
       tagsMap;
+
+  TBAAForrest trees;
 };
 
 } // namespace fir
diff --git a/flang/lib/Optimizer/CodeGen/TBAABuilder.cpp b/flang/lib/Optimizer/CodeGen/TBAABuilder.cpp
index 1a5ae8cf7aac629..9baeb0b27091ad7 100644
--- a/flang/lib/Optimizer/CodeGen/TBAABuilder.cpp
+++ b/flang/lib/Optimizer/CodeGen/TBAABuilder.cpp
@@ -15,6 +15,9 @@
 #include "llvm/ADT/TypeSwitch.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include <mlir/Dialect/LLVMIR/LLVMAttrs.h>
+#include <mlir/Dialect/LLVMIR/LLVMDialect.h>
+#include <mlir/Dialect/LLVMIR/LLVMTypes.h>
 
 #define DEBUG_TYPE "flang-tbaa-builder"
 
@@ -27,6 +30,13 @@ static llvm::cl::opt<bool> disableTBAA(
                    "to override default Flang behavior"),
     llvm::cl::init(false));
 
+// disabling this will play badly with the FIR TBAA pass, leading to worse
+// performance
+static llvm::cl::opt<bool> perFunctionTBAATrees(
+    "per-function-tbaa-trees",
+    llvm::cl::desc("Give each function an independent TBAA tree (default)"),
+    llvm::cl::init(true), llvm::cl::Hidden);
+
 // tagAttachmentLimit is a debugging option that allows limiting
 // the number of TBAA access tag attributes attached to operations.
 // It is set to kTagAttachmentUnlimited by default denoting "no limit".
@@ -39,26 +49,10 @@ static llvm::cl::opt<unsigned>
 namespace fir {
 
 TBAABuilder::TBAABuilder(MLIRContext *context, bool applyTBAA)
-    : enableTBAA(applyTBAA && !disableTBAA) {
+    : enableTBAA(applyTBAA && !disableTBAA),
+      trees(/*separatePerFunction=*/perFunctionTBAATrees) {
   if (!enableTBAA)
     return;
-
-  // Root node.
-  flangTBAARoot =
-      TBAARootAttr::get(context, StringAttr::get(context, flangTBAARootId));
-
-  // Any access node.
-  anyAccessTypeDesc = TBAATypeDescriptorAttr::get(
-      context, anyAccessTypeDescId, TBAAMemberAttr::get(flangTBAARoot, 0));
-
-  // Any data access node.
-  anyDataAccessTypeDesc =
-      TBAATypeDescriptorAttr::get(context, anyDataAccessTypeDescId,
-                                  TBAAMemberAttr::get(anyAccessTypeDesc, 0));
-
-  // Box member access node.
-  boxMemberTypeDesc = TBAATypeDescriptorAttr::get(
-      context, boxMemberTypeDescId, TBAAMemberAttr::get(anyAccessTypeDesc, 0));
 }
 
 TBAATagAttr TBAABuilder::getAccessTag(TBAATypeDescriptorAttr baseTypeDesc,
@@ -73,26 +67,31 @@ TBAATagAttr TBAABuilder::getAccessTag(TBAATypeDescriptorAttr baseTypeDesc,
   return tag;
 }
 
-TBAATagAttr TBAABuilder::getAnyBoxAccessTag() {
+TBAATagAttr TBAABuilder::getAnyBoxAccessTag(mlir::LLVM::LLVMFuncOp func) {
+  TBAATypeDescriptorAttr boxMemberTypeDesc = trees[func].boxMemberTypeDesc;
   return getAccessTag(boxMemberTypeDesc, boxMemberTypeDesc, /*offset=*/0);
 }
 
 TBAATagAttr TBAABuilder::getBoxAccessTag(Type baseFIRType, Type accessFIRType,
-                                         GEPOp gep) {
-  return getAnyBoxAccessTag();
+                                         GEPOp gep,
+                                         mlir::LLVM::LLVMFuncOp func) {
+  return getAnyBoxAccessTag(func);
 }
 
-TBAATagAttr TBAABuilder::getAnyDataAccessTag() {
+TBAATagAttr TBAABuilder::getAnyDataAccessTag(mlir::LLVM::LLVMFuncOp func) {
+  TBAATypeDescriptorAttr anyDataAccessTypeDesc = trees[func].anyDataTypeDesc;
   return getAccessTag(anyDataAccessTypeDesc, anyDataAccessTypeDesc,
                       /*offset=*/0);
 }
 
 TBAATagAttr TBAABuilder::getDataAccessTag(Type baseFIRType, Type accessFIRType,
-                                          GEPOp gep) {
-  return getAnyDataAccessTag();
+                                          GEPOp gep,
+                                          mlir::LLVM::LLVMFuncOp func) {
+  return getAnyDataAccessTag(func);
 }
 
-TBAATagAttr TBAABuilder::getAnyAccessTag() {
+TBAATagAttr TBAABuilder::getAnyAccessTag(mlir::LLVM::LLVMFuncOp func) {
+  TBAATypeDescriptorAttr anyAccessTypeDesc = trees[func].anyAccessDesc;
   return getAccessTag(anyAccessTypeDesc, anyAccessTypeDesc, /*offset=*/0);
 }
 
@@ -101,6 +100,9 @@ void TBAABuilder::attachTBAATag(AliasAnalysisOpInterface op, Type baseFIRType,
   if (!enableTBAA)
     return;
 
+  mlir::LLVM::LLVMFuncOp func = op->getParentOfType<mlir::LLVM::LLVMFuncOp>();
+  assert(func && "func.func should have already been converted to llvm.func");
+
   ++tagAttachmentCounter;
   if (tagAttachmentLimit != kTagAttachmentUnlimited &&
       tagAttachmentCounter > tagAttachmentLimit)
@@ -115,11 +117,11 @@ void TBAABuilder::attachTBAATag(AliasAnalysisOpInterface op, Type baseFIRType,
     // a mix of data members and descriptor members may alias
     // with both data and descriptor accesses.
     // Conservatively set any-access tag if there is any descriptor member.
-    tbaaTagSym = getAnyAccessTag();
+    tbaaTagSym = getAnyAccessTag(func);
   } else if (baseFIRType.isa<fir::BaseBoxType>()) {
-    tbaaTagSym = getBoxAccessTag(baseFIRType, accessFIRType, gep);
+    tbaaTagSym = getBoxAccessTag(baseFIRType, accessFIRType, gep, func);
   } else {
-    tbaaTagSym = getDataAccessTag(baseFIRType, accessFIRType, gep);
+    tbaaTagSym = getDataAccessTag(baseFIRType, accessFIRType, gep, func);
   }
 
   if (!tbaaTagSym)
diff --git a/flang/test/Fir/tbaa-codegen.fir b/flang/test/Fir/tbaa-codegen.fir
index 386fe42eaaba9a2..9ee7a28a6d22250 100644
--- a/flang/test/Fir/tbaa-codegen.fir
+++ b/flang/test/Fir/tbaa-codegen.fir
@@ -39,9 +39,9 @@ module attributes {fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.targ
 // CHECK:  store i32 %[[A2]], ptr %[[A1]], align 4, !tbaa ![[A_ACCESS_TAG]]
 // CHECK:  ret void
 // CHECK: }
+// CHECK: ![[ANY_ACCESS_TYPE:.*]] = !{!"any access", ![[ROOT:.*]], i64 0}
+// CHECK: ![[ROOT]] = !{!"Flang function root _QPsimple"}
 // CHECK: ![[A_ACCESS_TAG]] = !{![[A_ACCESS_TYPE:.*]], ![[A_ACCESS_TYPE]], i64 0}
 // CHECK: ![[A_ACCESS_TYPE]] = !{!"dummy arg data/a", ![[DUMMY_ARG_TYPE:.*]], i64 0}
 // CHECK: ![[DUMMY_ARG_TYPE]] = !{!"dummy arg data", ![[DATA_ACCESS_TYPE:.*]], i64 0}
-// CHECK: ![[DATA_ACCESS_TYPE]] = !{!"any data access", ![[ANY_ACCESS_TYPE:.*]], i64 0}
-// CHECK: ![[ANY_ACCESS_TYPE]] = !{!"any access", ![[ROOT:.*]], i64 0}
-// CHECK: ![[ROOT]] = !{!"Flang function root _QPsimple"}
\ No newline at end of file
+// CHECK: ![[DATA_ACCESS_TYPE]] = !{!"any data access", ![[ANY_ACCESS_TYPE]], i64 0}
\ No newline at end of file
diff --git a/flang/test/Fir/tbaa-codegen2.fir b/flang/test/Fir/tbaa-codegen2.fir
new file mode 100644
index 000000000000000..6b6849ab8584191
--- /dev/null
+++ b/flang/test/Fir/tbaa-codegen2.fir
@@ -0,0 +1,114 @@
+// test that tbaa attributes can be added to fir.load and fir.store
+// and that these attributes are propagated to LLVMIR and that these
+// interoperrate with tbaa tags added during codegen
+
+// RUN: tco %s | FileCheck %s
+
+// subroutine func(a)
+//   integer, intent(inout) :: a(:)
+//   a = a+1
+//   a(1) = a(2)
+#tbaa_root = #llvm.tbaa_root<id = "Flang function root _QPfunc">
+#tbaa_type_desc = #llvm.tbaa_type_desc<id = "any access", members = {<#tbaa_root, 0>}>
+#tbaa_type_desc1 = #llvm.tbaa_type_desc<id = "any data access", members = {<#tbaa_type_desc, 0>}>
+#tbaa_type_desc2 = #llvm.tbaa_type_desc<id = "dummy arg data", members = {<#tbaa_type_desc1, 0>}>
+#tbaa_type_desc3 = #llvm.tbaa_type_desc<id = "dummy arg data/a", members = {<#tbaa_type_desc2, 0>}>
+#tbaa_tag = #llvm.tbaa_tag<base_type = #tbaa_type_desc3, access_type = #tbaa_type_desc3, offset = 0>
+module attributes {fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.target_triple = "aarch64-unknown-linux-gnu"} {
+  func.func @_QPfunc(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "a"}) {
+    %c3_i32 = arith.constant 3 : i32
+    %c1_i32 = arith.constant 1 : i32
+    %c0 = arith.constant 0 : index
+    %c2 = arith.constant 2 : index
+    %c1 = arith.constant 1 : index
+    %0 = fir.alloca !fir.box<!fir.array<?xi32>>
+    %1 = fir.declare %arg0 {fortran_attrs = #fir.var_attrs<intent_inout>, uniq_name = "_QFfuncEa"} : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+    %2 = fir.rebox %1 : (!fir.box<!fir.array<?xi32>>) -> !fir.box<!fir.array<?xi32>>
+    %3:3 = fir.box_dims %2, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
+    %4 = fir.shape %3#1 : (index) -> !fir.shape<1>
+    %5 = fir.allocmem !fir.array<?xi32>, %3#1 {bindc_name = ".tmp.array", uniq_name = ""}
+    %6 = fir.declare %5(%4) {uniq_name = ".tmp.array"} : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.heap<!fir.array<?xi32>>
+    %7 = fir.embox %6(%4) : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<?xi32>>
+    fir.do_loop %arg1 = %c1 to %3#1 step %c1 unordered {
+      %16 = fir.array_coor %2 %arg1 : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+      // load with tbaa
+      %17 = fir.load %16 {tbaa = [#tbaa_tag]} : !fir.ref<i32>
+      %18 = arith.addi %17, %c1_i32 : i32
+      %19 = fir.array_coor %6(%4) %arg1 : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>, index) -> !fir.ref<i32>
+      // store without tbaa
+      fir.store %18 to %19 : !fir.ref<i32>
+    }
+    fir.store %2 to %0 : !fir.ref<!fir.box<!fir.array<?xi32>>>
+    %8 = fir.address_of(@_QQcl.2F746D702F73696D706C652E66393000) : !fir.ref<!fir.char<1,16>>
+    %9 = fir.convert %0 : (!fir.ref<!fir.box<!fir.array<?xi32>>>) -> !fir.ref<!fir.box<none>>
+    %10 = fir.convert %7 : (!fir.box<!fir.array<?xi32>>) -> !fir.box<none>
+    %11 = fir.convert %8 : (!fir.ref<!fir.char<1,16>>) -> !fir.ref<i8>
+    %12 = fir.call @_FortranAAssign(%9, %10, %11, %c3_i32) : (!fir.ref<!fir.box<none>>, !fir.box<none>, !fir.ref<i8>, i32) -> none
+    fir.freemem %6 : !fir.heap<!fir.array<?xi32>>
+    %13 = fir.array_coor %2 %c2 : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+    // load modified not to have tbaa
+    %14 = fir.load %13 : !fir.ref<i32>
+    %15 = fir.array_coor %2 %c1 : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+    // store with tbaa
+    fir.store %14 to %15 {tbaa = [#tbaa_tag]} : !fir.ref<i32>
+    return
+  }
+  func.func private @_FortranAAssign(!fir.ref<!fir.box<none>>, !fir.box<none>, !fir.ref<i8>, i32) -> none attributes {fir.runtime}
+  fir.global linkonce @_QQcl.2F746D702F73696D706C652E66393000 constant : !fir.char<1,16> {
+    %0 = fir.string_lit "/tmp/simple.f90\00"(16) : !fir.char<1,16>
+    fir.has_value %0 : !fir.char<1,16>
+  }
+}
+// CHECK-LABEL: define void @_QPfunc(
+// CHECK-SAME:      ptr %[[ARG0:.*]]) {
+// [...]
+// CHECK:  %[[VAL5:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[ARG0]], i32 0, i32 7, i32 0, i32 0
+// box access:
+// CHECK:  %[[VAL6:.*]] = load i64, ptr %[[VAL5]], align 4, !tbaa ![[BOX_ACCESS_TAG:.*]]
+// CHECK:  %[[VAL7:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %0, i32 0, i32 7, i32 0, i32 1
+// box access:
+// CHECK:  %[[VAL8:.*]] = load i64, ptr %[[VAL7]], align 4, !tbaa ![[BOX_ACCESS_TAG]]
+// CHECK:  %[[VAL9:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[ARG0]], i32 0, i32 7, i32 0, i32 2
+// box access:
+// CHECK:  %[[VAL10:.*]] = load i64, ptr %[[VAL9]], align 4, !tbaa ![[BOX_ACCESS_TAG]]
+// CHECK:  %[[VAL11:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[ARG0]], i32 0, i32 0
+// box access:
+// CHECK:  %[[VAL12:.*]] = load ptr, ptr %[[VAL11]], align 8, !tbaa ![[BOX_ACCESS_TAG]]
+// CHECK:  %[[VAL15:.*]] = insertvalue { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] } %14, ptr %[[VAL12]], 0
+// CHECK:  store { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] } %[[VAL15]], ptr %{{.*}}, align 8, !tbaa ![[BOX_ACCESS_TAG]]
+// CHECK:  %[[VAL16:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %{{.*}}, i32 0, i32 7, i64 0, i32 0
+// box access:
+// CHECK:  %[[VAL17:.*]] = load i64, ptr %[[VAL16]], align 4, !tbaa ![[BOX_ACCESS_TAG]]
+// CHECK:  %[[VAL18:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %{{.*}}, i32 0, i32 7, i64 0, i32 1
+// box access:
+// CHECK:  %[[VAL19:.*]] = load i64, ptr %[[VAL18]], align 4, !tbaa ![[BOX_ACCESS_TAG]]
+// CHECK:  %[[VAL20:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %{{.*}}, i32 0, i32 7, i64 0, i32 2
+// box access:
+// CHECK:  %[[VAL21:.*]] = load i64, ptr %[[VAL20]], align 4, !tbaa ![[BOX_ACCESS_TAG]]
+// [...]
+// box access:
+// CHECK:  store { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] } %{{.*}}, ptr %{{.*}}, align 8, !tbaa ![[BOX_ACCESS_TAG]]
+// [...]
+
+// [...]
+// CHECK:  %[[VAL40:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %{{.*}}, i32 0, i32 0
+// box access:
+// CHECK:  %[[VAL41:.*]] = load ptr, ptr %[[VAL40]], align 8, !tbaa ![[BOX_ACCESS_TAG]]
+// CHECK:  %[[VAL42:.*]] = getelementptr i8, ptr %[[VAL41]], i64 %{{.*}}
+// access to 'a':
+// CHECK:  %[[VAL43:.*]] = load i32, ptr %[[VAL42]], align 4, !tbaa ![[A_ACCESS_TAG:.*]]
+// [...]
+// CHECK:  %[[VAL50:.*]] = getelementptr i32, ptr %{{.*}}, i64 %{{.*}}
+// store to the temporary:
+// CHECK:  store i32 %{{.*}}, ptr %[[VAL50]], align 4, !tbaa ![[DATA_ACCESS_TAG:.*]]
+// [...]
+
+// CHECK: [[BOX_ACCESS_TAG]] = !{![[BOX_ACCESS_TYPE:.*]], ![[BOX_ACCESS_TYPE]], i64 0}
+// CHECK: ![[BOX_ACCESS_TYPE]] = !{!"descriptor member", ![[ANY_ACCESS_TYPE:.*]], i64 0}
+// CHECK: ![[ANY_ACCESS_TYPE]] = !{!"any access", ![[ROOT_TYPE:.*]], i64 0}
+// CHECK: ![[ROOT_TYPE]] = !{!"Flang function root _QPfunc"}
+// CHECK: ![[A_ACCESS_TAG]] = !{![[A_ACCESS_TYPE:.*]], ![[A_ACCESS_TYPE]], i64 0}
+// CHECK: ![[A_ACCESS_TYPE]] = !{!"dummy arg data/a", ![[ARG_ACCESS_TYPE:.*]], i64 0}
+// CHECK: ![[ARG_ACCESS_TYPE]] = !{!"dummy arg data", ![[DATA_ACCESS_TYPE:.*]], i64 0}
+// CHECK: ![[DATA_ACCESS_TYPE]] = !{!"any data access", ![[ANY_ACCESS_TYPE]], i64 0}
+// CHECK: ![[DATA_ACCESS_TAG]] = !{![[DATA_ACCESS_TYPE]], ![[DATA_ACCESS_TYPE]], i64 0}
diff --git a/flang/test/Fir/tbaa.fir b/flang/test/Fir/tbaa.fir
index eabc9f30127fa3d..d260e4f4aec46a3 100644
--- a/flang/test/Fir/tbaa.fir
+++ b/flang/test/Fir/tbaa.fir
@@ -1,5 +1,7 @@
-// RUN: fir-opt %s --split-input-file --fir-to-llvm-ir="target=x86_64-unknown-linux-gnu apply-tbaa=true" | FileCheck %s
-// RUN: fir-opt %s --split-input-file --fir-to-llvm-ir="target=aarch64-unknown-linux-gnu apply-tbaa=true" | FileCheck %s
+// test without per-function tbaa trees so that this functionality does not bitrot
+// per-function tbaa tbaa-codegen2.fir
+// RUN: fir-opt %s --split-input-file --fir-to-llvm-ir="target=x86_64-unknown-linux-gnu apply-tbaa=true" --per-function-tbaa-trees=false | FileCheck %s
+// RUN: fir-opt %s --split-input-file --fir-to-llvm-ir="target=aarch64-unknown-linux-gnu apply-tbaa=true" --per-function-tbaa-trees=false | FileCheck %s
 
 module {
   func.func @tbaa(%arg0: !fir.class<!fir.array<?xnone>> {fir.bindc_name = "a"}) {
@@ -20,7 +22,7 @@ module {
   }
 }
 
-// CHECK-DAG:     #[[ROOT:.*]] = #llvm.tbaa_root<id = "Flang Type TBAA Root">
+// CHECK-DAG:     #[[ROOT:.*]] = #llvm.tbaa_root<id = "Flang function root ">
 // CHECK-DAG:     #[[ANYACC:.*]] = #llvm.tbaa_type_desc<id = "any access", members = {<#[[ROOT]], 0>}>
 // CHECK-DAG:     #[[ANYDACC:.*]] = #llvm.tbaa_type_desc<id = "any data access", members = {<#[[ANYACC]], 0>}>
 // CHECK-DAG:     #[[BOXMEM:.*]] = #llvm.tbaa_type_desc<id = "descriptor member", members = {<#[[ANYACC]], 0>}>
@@ -119,7 +121,7 @@ module {
   }
 }
 
-// CHECK-DAG:     #[[ROOT:.*]] = #llvm.tbaa_root<id = "Flang Type TBAA Root">
+// CHECK-DAG:     #[[ROOT:.*]] = #llvm.tbaa_root<id = "Flang function root ">
 // CHECK-DAG:     #[[ANYACC:.*]] = #llvm.tbaa_type_desc<id = "any access", members = {<#[[ROOT]], 0>}>
 // CHECK-DAG:     #[[BOXMEM:.*]] = #llvm.tbaa_type_desc<id = "descriptor member", members = {<#[[ANYACC]], 0>}>
 // CHECK-DAG:     #[[$BOXT:.*]] = #llvm.tbaa_tag<base_type = #[[BOXMEM]], access_type = #[[BOXMEM]], offset = 0>
@@ -245,7 +247,7 @@ func.func @tbaa(%arg0: !fir.box<!fir.array<*:f64>>) -> i32 {
   return %0 : i32
 }
 
-// CHECK-DAG:     #[[ROOT:.*]] = #llvm.tbaa_root<id = "Flang Type TBAA Root">
+// CHECK-DAG:     #[[ROOT:.*]] = #llvm.tbaa_root<id = "Flang function root ">
 // CHECK-DAG:     #[[ANYACC:.*]] = #llvm.tbaa_type_desc<id = "any access", members = {<#[[ROOT]], 0>}>
 // CHECK-DAG:     #[[BOXMEM:.*]] = #llvm.tbaa_type_desc<id = "descriptor member", members = {<#[[ANYACC]], 0>}>
 // CHECK-DAG:     #[[$BOXT:.*]] = #llvm.tbaa_tag<base_type = #[[BOXMEM]], access_type = #[[BOXMEM]], offset = 0>
@@ -264,7 +266,7 @@ func.func @tbaa(%arg0: !fir.box<!fir.array<*:f64>>) -> i1 {
   return %0 : i1
 }
 
-// CHECK-DAG:     #[[ROOT:.*]] = #llvm.tbaa_root<id = "Flang Type TBAA Root">
+// CHECK-DAG:     #[[ROOT:.*]] = #llvm.tbaa_root<id = "Flang function root ">
 // CHECK-DAG:     #[[ANYACC:.*]] = #llvm.tbaa_type_desc<id = "any access", members = {<#[[ROOT]], 0>}>
 // CHECK-DAG:     #[[BOXMEM:.*]] = #llvm.tbaa_type_desc<id = "descriptor member", members = {<#[[ANYACC]], 0>}>
 // CHECK-DAG:     #[[$BOXT:.*]] = #llvm.tbaa_tag<base_type = #[[BOXMEM]], access_type = #[[BOXMEM]], offset = 0>
@@ -285,7 +287,7 @@ func.func @tbaa(%arg0: !fir.box<f32>) -> i32 {
   return %0 : i32
 }
 
-// CHECK-DAG:     #[[ROOT:.*]] = #llvm.tbaa_root<id = "Flang Type TBAA Root">
+// CHECK-DAG:     #[[ROOT:.*]] = #llvm.tbaa_root<id = "Flang function root ">
 // CHECK-DAG:     #[[ANYACC:.*]] = #llvm.tbaa_type_desc<id = "any access", members = {<#[[ROOT]], 0>}>
 // CHECK-DAG:     #[[BOXMEM:.*]] = #llvm.tbaa_type_desc<id = "descriptor member", members = {<#[[ANYACC]], 0>}>
 // CHECK-DAG:     #[[$BOXT:.*]] = #llvm.tbaa_tag<base_type = #[[BOXMEM]], access_type = #[[BOXMEM]], offset = 0>
@@ -304,7 +306,7 @@ func.func @tbaa(%arg0: !fir.box<!fir.array<*:f64>>) -> i1 {
   return %0 : i1
 }
 
-// CHECK-DAG:     #[[ROOT:.*]] = #llvm.tbaa_root<id = "Flang Type TBAA Root">
+// CHECK-DAG:     #[[ROOT:.*]] = #llvm.tbaa_root<id = "Flang function root ">
 // CHECK-DAG:     #[[ANYACC:.*]] = #llvm.tbaa_type_desc<id = "any access", members = {<#[[ROOT]], 0>}>
 // CHECK-DAG:     #[[BOXMEM:.*]] = #llvm.tbaa_type_desc<id = "descriptor member", members = {<#[[ANYACC]], 0>}>
 // CHECK-DAG:     #[[$BOXT:.*]] = #llvm.tbaa_tag<base_type = #[[BOXMEM]], access_type = #[[BOXMEM]], offset = 0>
@@ -328,7 +330,7 @@ func.func @tbaa(%arg0: !fir.box<!fir.array<?xi32>>) {
   return
 }
 
-// CHECK-DAG:     #[[ROOT:.*]] = #llvm.tbaa_root<id = "Flang Type TBAA Root">
+// CHECK-DAG:     #[[ROOT:.*]] = #llvm.tbaa_root<id = "Flang function root ">
 // CHECK-DAG:     #[[ANYACC:.*]] = #llvm.tbaa_type_desc<id = "any access", members = {<#[[ROOT]], 0>}>
 // CHECK-DAG:     #[[BOXMEM:.*]] = #llvm.tbaa_type_desc<id = "descriptor member", members = {<#[[ANYACC]], 0>}>
 // CHECK-DAG:     #[[$BOXT:.*]] = #llvm.tbaa_tag<base_type = #[[BOXMEM]], access_type = #[[BOXMEM]], offset = 0>
@@ -356,7 +358,7 @@ func.func @tbaa(%arg0: !fir.box<!fir.array<?xi32>>) {
 
 // Check that the scalar aggregate load/store with a descriptor member
 // is mapped to any-access.
-// CHECK-DAG:     #[[ROOT:.*]] = #llvm.tbaa_root<id = "Flang Type TBAA Root">
+// CHECK-DAG:     #[[ROOT:.*]] = #llvm.tbaa_root<id = "Flang function root ">
 // CHECK-DAG:     #[[ANYACC:.*]] = #llvm.tbaa_type_desc<id = "any access", members = {<#[[ROOT]], 0>}>
 // CHECK-DAG:     #[[$ANYT:.*]] = #llvm.tbaa_tag<base_type = #[[ANYACC]], access_type = #[[ANYACC]], offset = 0>
 
@@ -373,7 +375,7 @@ func.func @tbaa(%arg0: !fir.ref<!fir.type<_QMtypesTt{x:!fir.box<!fir.heap<f32>>}
 
 // Check that the array aggregate load/store with a descriptor member
 // is mapped to any-access.
-// CHECK-DAG:     #[[ROOT:.*]] = #llvm.tbaa_root<id = "Flang Type TBAA Root">
+// CHECK-DAG:     #[[ROOT:.*]] = #llvm.tbaa_root<id = "Flang function root ">
 // CHECK-DAG:     #[[ANYACC:.*]] = #llvm.tbaa_type_desc<id = "any access", members = {<#[[ROOT]], 0>}>
 // CHECK-DAG:     #[[$ANYT:.*]] = #llvm.tbaa_tag<base_type = #[[ANYACC]], access_type = #[[ANYACC]], offset = 0>
 

>From 5debd9a12bf1f15e2bb6561f2cb1ee4aa16483ef Mon Sep 17 00:00:00 2001
From: Tom Eccles <tom.eccles at arm.com>
Date: Mon, 9 Oct 2023 12:49:43 +0000
Subject: [PATCH 11/16] Update tests for new naming

This is fallout from getting argument names from fir.declare operations
instead of fir.bindc attributes.
---
 flang/test/Fir/tbaa-codegen.fir  | 6 +++---
 flang/test/Fir/tbaa-codegen2.fir | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/flang/test/Fir/tbaa-codegen.fir b/flang/test/Fir/tbaa-codegen.fir
index 9ee7a28a6d22250..7acd04da24c9cbf 100644
--- a/flang/test/Fir/tbaa-codegen.fir
+++ b/flang/test/Fir/tbaa-codegen.fir
@@ -11,7 +11,7 @@
 #tbaa_type_desc = #llvm.tbaa_type_desc<id = "any access", members = {<#tbaa_root, 0>}>
 #tbaa_type_desc1 = #llvm.tbaa_type_desc<id = "any data access", members = {<#tbaa_type_desc, 0>}>
 #tbaa_type_desc2 = #llvm.tbaa_type_desc<id = "dummy arg data", members = {<#tbaa_type_desc1, 0>}>
-#tbaa_type_desc3 = #llvm.tbaa_type_desc<id = "dummy arg data/a", members = {<#tbaa_type_desc2, 0>}>
+#tbaa_type_desc3 = #llvm.tbaa_type_desc<id = "dummy arg data/_QFfuncEa", members = {<#tbaa_type_desc2, 0>}>
 #tbaa_tag = #llvm.tbaa_tag<base_type = #tbaa_type_desc3, access_type = #tbaa_type_desc3, offset = 0>
 module attributes {fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.target_triple = "aarch64-unknown-linux-gnu"} {
   func.func @_QPsimple(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "a"}) {
@@ -42,6 +42,6 @@ module attributes {fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.targ
 // CHECK: ![[ANY_ACCESS_TYPE:.*]] = !{!"any access", ![[ROOT:.*]], i64 0}
 // CHECK: ![[ROOT]] = !{!"Flang function root _QPsimple"}
 // CHECK: ![[A_ACCESS_TAG]] = !{![[A_ACCESS_TYPE:.*]], ![[A_ACCESS_TYPE]], i64 0}
-// CHECK: ![[A_ACCESS_TYPE]] = !{!"dummy arg data/a", ![[DUMMY_ARG_TYPE:.*]], i64 0}
+// CHECK: ![[A_ACCESS_TYPE]] = !{!"dummy arg data/_QFfuncEa", ![[DUMMY_ARG_TYPE:.*]], i64 0}
 // CHECK: ![[DUMMY_ARG_TYPE]] = !{!"dummy arg data", ![[DATA_ACCESS_TYPE:.*]], i64 0}
-// CHECK: ![[DATA_ACCESS_TYPE]] = !{!"any data access", ![[ANY_ACCESS_TYPE]], i64 0}
\ No newline at end of file
+// CHECK: ![[DATA_ACCESS_TYPE]] = !{!"any data access", ![[ANY_ACCESS_TYPE]], i64 0}
diff --git a/flang/test/Fir/tbaa-codegen2.fir b/flang/test/Fir/tbaa-codegen2.fir
index 6b6849ab8584191..f79a6108fb41e80 100644
--- a/flang/test/Fir/tbaa-codegen2.fir
+++ b/flang/test/Fir/tbaa-codegen2.fir
@@ -12,7 +12,7 @@
 #tbaa_type_desc = #llvm.tbaa_type_desc<id = "any access", members = {<#tbaa_root, 0>}>
 #tbaa_type_desc1 = #llvm.tbaa_type_desc<id = "any data access", members = {<#tbaa_type_desc, 0>}>
 #tbaa_type_desc2 = #llvm.tbaa_type_desc<id = "dummy arg data", members = {<#tbaa_type_desc1, 0>}>
-#tbaa_type_desc3 = #llvm.tbaa_type_desc<id = "dummy arg data/a", members = {<#tbaa_type_desc2, 0>}>
+#tbaa_type_desc3 = #llvm.tbaa_type_desc<id = "dummy arg data/_QFfuncEa", members = {<#tbaa_type_desc2, 0>}>
 #tbaa_tag = #llvm.tbaa_tag<base_type = #tbaa_type_desc3, access_type = #tbaa_type_desc3, offset = 0>
 module attributes {fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.target_triple = "aarch64-unknown-linux-gnu"} {
   func.func @_QPfunc(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "a"}) {
@@ -108,7 +108,7 @@ module attributes {fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.targ
 // CHECK: ![[ANY_ACCESS_TYPE]] = !{!"any access", ![[ROOT_TYPE:.*]], i64 0}
 // CHECK: ![[ROOT_TYPE]] = !{!"Flang function root _QPfunc"}
 // CHECK: ![[A_ACCESS_TAG]] = !{![[A_ACCESS_TYPE:.*]], ![[A_ACCESS_TYPE]], i64 0}
-// CHECK: ![[A_ACCESS_TYPE]] = !{!"dummy arg data/a", ![[ARG_ACCESS_TYPE:.*]], i64 0}
+// CHECK: ![[A_ACCESS_TYPE]] = !{!"dummy arg data/_QFfuncEa", ![[ARG_ACCESS_TYPE:.*]], i64 0}
 // CHECK: ![[ARG_ACCESS_TYPE]] = !{!"dummy arg data", ![[DATA_ACCESS_TYPE:.*]], i64 0}
 // CHECK: ![[DATA_ACCESS_TYPE]] = !{!"any data access", ![[ANY_ACCESS_TYPE]], i64 0}
 // CHECK: ![[DATA_ACCESS_TAG]] = !{![[DATA_ACCESS_TYPE]], ![[DATA_ACCESS_TYPE]], i64 0}

>From 56ed3a6e25c34542a47d52afc7cf1aeaa2b58271 Mon Sep 17 00:00:00 2001
From: Tom Eccles <tom.eccles at arm.com>
Date: Tue, 10 Oct 2023 14:51:36 +0000
Subject: [PATCH 12/16] Add argument to prevent per-function tbaa trees

---
 flang/include/flang/Optimizer/CodeGen/TBAABuilder.h | 4 +++-
 flang/lib/Optimizer/CodeGen/TBAABuilder.cpp         | 5 +++--
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/flang/include/flang/Optimizer/CodeGen/TBAABuilder.h b/flang/include/flang/Optimizer/CodeGen/TBAABuilder.h
index 2c5795678a91e16..5420e48146bbfae 100644
--- a/flang/include/flang/Optimizer/CodeGen/TBAABuilder.h
+++ b/flang/include/flang/Optimizer/CodeGen/TBAABuilder.h
@@ -167,7 +167,9 @@ namespace fir {
 // (before CodeGen)
 class TBAABuilder {
 public:
-  TBAABuilder(mlir::MLIRContext *context, bool applyTBAA);
+  /// if forceUnifiedTree is true, functions will not have different TBAA trees
+  TBAABuilder(mlir::MLIRContext *context, bool applyTBAA,
+              bool forceUnifiedTree = false);
   TBAABuilder(TBAABuilder const &) = delete;
   TBAABuilder &operator=(TBAABuilder const &) = delete;
 
diff --git a/flang/lib/Optimizer/CodeGen/TBAABuilder.cpp b/flang/lib/Optimizer/CodeGen/TBAABuilder.cpp
index 9baeb0b27091ad7..8e7f59f76383c96 100644
--- a/flang/lib/Optimizer/CodeGen/TBAABuilder.cpp
+++ b/flang/lib/Optimizer/CodeGen/TBAABuilder.cpp
@@ -48,9 +48,10 @@ static llvm::cl::opt<unsigned>
 
 namespace fir {
 
-TBAABuilder::TBAABuilder(MLIRContext *context, bool applyTBAA)
+TBAABuilder::TBAABuilder(MLIRContext *context, bool applyTBAA,
+                         bool forceUnifiedTree)
     : enableTBAA(applyTBAA && !disableTBAA),
-      trees(/*separatePerFunction=*/perFunctionTBAATrees) {
+      trees(/*separatePerFunction=*/perFunctionTBAATrees && !forceUnifiedTree) {
   if (!enableTBAA)
     return;
 }

>From ca6751edec4207165e09070393b9f5489e85d4a4 Mon Sep 17 00:00:00 2001
From: Tom Eccles <tom.eccles at arm.com>
Date: Thu, 14 Sep 2023 09:09:29 +0000
Subject: [PATCH 13/16] [flang][driver] add command line arguments for alias
 tags pass

The ultimate intention is to have this pass enabled by default whenever
we are optimizing for speed. But for now, just add the arguments so this
can be more easily tested.

Previous PR in this series:
https://github.com/llvm/llvm-project/pull/68437
---
 clang/include/clang/Driver/Options.td         |  3 +++
 clang/lib/Driver/ToolChains/Flang.cpp         |  4 +++-
 .../include/flang/Frontend/CodeGenOptions.def |  1 +
 flang/include/flang/Tools/CLOptions.inc       |  4 ++++
 flang/include/flang/Tools/CrossToolHelpers.h  |  2 ++
 flang/lib/Frontend/CompilerInvocation.cpp     |  8 +++++++
 flang/test/Driver/driver-help-hidden.f90      |  2 ++
 flang/test/Driver/driver-help.f90             |  4 ++++
 flang/test/Driver/falias-analysis.f90         | 21 +++++++++++++++++++
 9 files changed, 48 insertions(+), 1 deletion(-)
 create mode 100644 flang/test/Driver/falias-analysis.f90

diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index e7d8fa1c587d4e0..9410e354298640a 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -6247,6 +6247,9 @@ defm stack_arrays : BoolOptionWithoutMarshalling<"f", "stack-arrays",
 defm loop_versioning : BoolOptionWithoutMarshalling<"f", "version-loops-for-stride",
   PosFlag<SetTrue, [], [ClangOption], "Create unit-strided versions of loops">,
    NegFlag<SetFalse, [], [ClangOption], "Do not create unit-strided loops (default)">>;
+defm alias_analysis : BoolOptionWithoutMarshalling<"f", "alias-analysis",
+  PosFlag<SetTrue, [], [], "Pass alias information on to LLVM (default when optimizing for speed)">,
+  NegFlag<SetFalse, [], [], "Do not pass alias information on to LLVM (default for unoptimized builds)">>;
 } // let Visibility = [FC1Option, FlangOption]
 
 def J : JoinedOrSeparate<["-"], "J">,
diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp
index fe44939741d8ddb..6234572397466bc 100644
--- a/clang/lib/Driver/ToolChains/Flang.cpp
+++ b/clang/lib/Driver/ToolChains/Flang.cpp
@@ -145,7 +145,9 @@ void Flang::addCodegenOptions(const ArgList &Args,
   Args.AddAllArgs(CmdArgs, {options::OPT_flang_experimental_hlfir,
                             options::OPT_flang_experimental_polymorphism,
                             options::OPT_fno_ppc_native_vec_elem_order,
-                            options::OPT_fppc_native_vec_elem_order});
+                            options::OPT_fppc_native_vec_elem_order,
+                            options::OPT_falias_analysis,
+                            options::OPT_fno_alias_analysis});
 }
 
 void Flang::addPicOptions(const ArgList &Args, ArgStringList &CmdArgs) const {
diff --git a/flang/include/flang/Frontend/CodeGenOptions.def b/flang/include/flang/Frontend/CodeGenOptions.def
index c3a04108aa081c7..1e350869f1377e3 100644
--- a/flang/include/flang/Frontend/CodeGenOptions.def
+++ b/flang/include/flang/Frontend/CodeGenOptions.def
@@ -32,6 +32,7 @@ CODEGENOPT(PrepareForThinLTO , 1, 0) ///< Set when -flto=thin is enabled on the
                                      ///< compile step.
 CODEGENOPT(StackArrays, 1, 0) ///< -fstack-arrays (enable the stack-arrays pass)
 CODEGENOPT(LoopVersioning, 1, 0) ///< Enable loop versioning.
+CODEGENOPT(AliasAnalysis, 1, 0) ///< Enable alias analysis pass
 
 CODEGENOPT(Underscoring, 1, 1)
 ENUM_CODEGENOPT(RelocationModel, llvm::Reloc::Model, 3, llvm::Reloc::PIC_) ///< Name of the relocation model to use.
diff --git a/flang/include/flang/Tools/CLOptions.inc b/flang/include/flang/Tools/CLOptions.inc
index 76d18b73aee20af..e80a7cd344c0411 100644
--- a/flang/include/flang/Tools/CLOptions.inc
+++ b/flang/include/flang/Tools/CLOptions.inc
@@ -64,6 +64,7 @@ DisableOption(CfgConversion, "cfg-conversion", "disable FIR to CFG pass");
 DisableOption(FirAvc, "avc", "array value copy analysis and transformation");
 DisableOption(
     FirMao, "memory-allocation-opt", "memory allocation optimization");
+DisableOption(FirAliasTags, "fir-alias-tags", "fir alias analysis");
 
 /// CodeGen Passes
 #if !defined(FLANG_EXCLUDE_CODEGEN)
@@ -220,6 +221,9 @@ inline void createDefaultFIROptimizerPassPipeline(
   // Polymorphic types
   pm.addPass(fir::createPolymorphicOpConversionPass());
 
+  if (pc.AliasAnalysis && !disableFirAliasTags)
+    pm.addPass(fir::createAliasTagsPass());
+
   // convert control flow to CFG form
   fir::addCfgConversionPass(pm);
   pm.addPass(mlir::createConvertSCFToCFPass());
diff --git a/flang/include/flang/Tools/CrossToolHelpers.h b/flang/include/flang/Tools/CrossToolHelpers.h
index 6245a2f1376fcca..ddec70fa9824c52 100644
--- a/flang/include/flang/Tools/CrossToolHelpers.h
+++ b/flang/include/flang/Tools/CrossToolHelpers.h
@@ -34,12 +34,14 @@ struct MLIRToLLVMPassPipelineConfig {
     Underscoring = opts.Underscoring;
     LoopVersioning = opts.LoopVersioning;
     DebugInfo = opts.getDebugInfo();
+    AliasAnalysis = opts.AliasAnalysis;
   }
 
   llvm::OptimizationLevel OptLevel; ///< optimisation level
   bool StackArrays = false; ///< convert memory allocations to alloca.
   bool Underscoring = true; ///< add underscores to function names.
   bool LoopVersioning = false; ///< Run the version loop pass.
+  bool AliasAnalysis = false; ///< Add TBAA tags to generated LLVMIR
   llvm::codegenoptions::DebugInfoKind DebugInfo =
       llvm::codegenoptions::NoDebugInfo; ///< Debug info generation.
   unsigned VScaleMin = 0; ///< SVE vector range minimum.
diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp
index 37315e0e4d27a7a..a5edd9e887d8a24 100644
--- a/flang/lib/Frontend/CompilerInvocation.cpp
+++ b/flang/lib/Frontend/CompilerInvocation.cpp
@@ -214,6 +214,14 @@ static void parseCodeGenArgs(Fortran::frontend::CodeGenOptions &opts,
                    clang::driver::options::OPT_fno_loop_versioning, false))
     opts.LoopVersioning = 1;
 
+  opts.AliasAnalysis = false;
+  if (auto *arg =
+          args.getLastArg(clang::driver::options::OPT_falias_analysis,
+                          clang::driver::options::OPT_fno_alias_analysis)) {
+    if (arg->getOption().matches(clang::driver::options::OPT_falias_analysis))
+      opts.AliasAnalysis = true;
+  }
+
   for (auto *a : args.filtered(clang::driver::options::OPT_fpass_plugin_EQ))
     opts.LLVMPassPlugins.push_back(a->getValue());
 
diff --git a/flang/test/Driver/driver-help-hidden.f90 b/flang/test/Driver/driver-help-hidden.f90
index 807b0f938d27b5c..0edb84d25f4de37 100644
--- a/flang/test/Driver/driver-help-hidden.f90
+++ b/flang/test/Driver/driver-help-hidden.f90
@@ -24,6 +24,7 @@
 ! CHECK-NEXT: -D <macro>=<value>      Define <macro> to <value> (or 1 if <value> omitted)
 ! CHECK-NEXT: -emit-llvm              Use the LLVM representation for assembler and object files
 ! CHECK-NEXT: -E                      Only run the preprocessor
+! CHECK-NEXT: -falias-analysis        Pass alias information on to LLVM (default when optimizing for speed)
 ! CHECK-NEXT: -falternative-parameter-statement
 ! CHECK-NEXT:                         Enable the old style PARAMETER statement
 ! CHECK-NEXT: -fapprox-func           Allow certain math function calls to be replaced with an approximately equivalent calculation
@@ -56,6 +57,7 @@
 ! CHECK-NEXT: -flto=jobserver         Enable LTO in 'full' mode
 ! CHECK-NEXT: -flto=<value>           Set LTO mode
 ! CHECK-NEXT: -flto                   Enable LTO in 'full' mode
+! CHECK-NEXT: -fno-alias-analysis     Do not pass alias information on to LLVM (default for unoptimized builds)
 ! CHECK-NEXT: -fno-automatic          Implies the SAVE attribute for non-automatic local objects in subprograms unless RECURSIVE
 ! CHECK-NEXT: -fno-color-diagnostics  Disable colors in diagnostics
 ! CHECK-NEXT: -fno-integrated-as      Disable the integrated assembler
diff --git a/flang/test/Driver/driver-help.f90 b/flang/test/Driver/driver-help.f90
index 4894f90f5310439..53b05a5ac104a7b 100644
--- a/flang/test/Driver/driver-help.f90
+++ b/flang/test/Driver/driver-help.f90
@@ -20,6 +20,7 @@
 ! HELP-NEXT: -D <macro>=<value>      Define <macro> to <value> (or 1 if <value> omitted)
 ! HELP-NEXT: -emit-llvm              Use the LLVM representation for assembler and object files
 ! HELP-NEXT: -E                      Only run the preprocessor
+! HELP-NEXT: -falias-analysis        Pass alias information on to LLVM (default when optimizing for speed)
 ! HELP-NEXT: -falternative-parameter-statement
 ! HELP-NEXT:                         Enable the old style PARAMETER statement
 ! HELP-NEXT: -fapprox-func           Allow certain math function calls to be replaced with an approximately equivalent calculation
@@ -48,6 +49,7 @@
 ! HELP-NEXT: -flto=jobserver         Enable LTO in 'full' mode
 ! HELP-NEXT: -flto=<value>           Set LTO mode
 ! HELP-NEXT: -flto                   Enable LTO in 'full' mode
+! HELP-NEXT: -fno-alias-analysis     Do not pass alias information on to LLVM (default for unoptimized builds)
 ! HELP-NEXT: -fno-automatic          Implies the SAVE attribute for non-automatic local objects in subprograms unless RECURSIVE
 ! HELP-NEXT: -fno-color-diagnostics  Disable colors in diagnostics
 ! HELP-NEXT: -fno-integrated-as      Disable the integrated assembler
@@ -141,6 +143,7 @@
 ! HELP-FC1-NEXT: -emit-llvm              Use the LLVM representation for assembler and object files
 ! HELP-FC1-NEXT: -emit-obj               Emit native object files
 ! HELP-FC1-NEXT: -E                      Only run the preprocessor
+! HELP-FC1-NEXT: -falias-analysis        Pass alias information on to LLVM (default when optimizing for speed)
 ! HELP-FC1-NEXT: -falternative-parameter-statement
 ! HELP-FC1-NEXT:                         Enable the old style PARAMETER statement
 ! HELP-FC1-NEXT: -fapprox-func           Allow certain math function calls to be replaced with an approximately equivalent calculation
@@ -187,6 +190,7 @@
 ! HELP-FC1-NEXT: -flogical-abbreviations Enable logical abbreviations
 ! HELP-FC1-NEXT: -flto=<value>           Set LTO mode
 ! HELP-FC1-NEXT: -flto                   Enable LTO in 'full' mode
+! HELP-FC1-NEXT: -fno-alias-analysis     Do not pass alias information on to LLVM (default for unoptimized builds)
 ! HELP-FC1-NEXT: -fno-analyzed-objects-for-unparse
 ! HELP-FC1-NEXT:                         Do not use the analyzed objects when unparsing
 ! HELP-FC1-NEXT: -fno-automatic          Implies the SAVE attribute for non-automatic local objects in subprograms unless RECURSIVE
diff --git a/flang/test/Driver/falias-analysis.f90 b/flang/test/Driver/falias-analysis.f90
new file mode 100644
index 000000000000000..f2c5dbde6d2c878
--- /dev/null
+++ b/flang/test/Driver/falias-analysis.f90
@@ -0,0 +1,21 @@
+! Check that -falias-analysis and -fno-alias-analysis work as expected
+! See flang/test/Fir/tbaa-codegen.fir for a test that the output is correct
+
+! RUN: %flang -c -emit-llvm -falias-analysis %s -o - | llvm-dis | FileCheck %s --check-prefix=CHECK-AA --check-prefix=CHECK-ALL
+! RUN: %flang -c -emit-llvm -falias-analysis -fno-alias-analysis %s -o - | llvm-dis | FileCheck %s --check-prefix=CHECK-NOAA --check-prefix=CHECK-ALL
+! RUN: %flang -c -emit-llvm %s -o - | llvm-dis | FileCheck %s --check-prefix=CHECK-NOAA --check-prefix=CHECK-ALL
+
+! RUN: %flang -fc1 -emit-llvm -falias-analysis %s -o - | FileCheck %s --check-prefix=CHECK-AA --check-prefix=CHECK-ALL
+! RUN: %flang -fc1 -emit-llvm -falias-analysis -fno-alias-analysis %s -o - | FileCheck %s --check-prefix=CHECK-NOAA --check-prefix=CHECK-ALL
+! RUN: %flang -fc1 -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK-NOAA --check-prefix=CHECK-ALL
+
+subroutine simple(a)
+  integer, intent(inout) :: a(:)
+  a(1) = a(2)
+end subroutine
+! CHECK-ALL-LABEL: define void @simple
+! CHECK-ALL:       ret
+! CHECK-ALL:     }
+
+! CHECK-AA: ![[ROOT:.*]] = !{!"Flang function root _QPsimple"}
+! CHECK-NOAA-NOT: ![[ROOT:.*]] = !{!"Flang function root _QPsimple"}

>From 9168aa0f6722d576267747e3ec3f8178e6df7f85 Mon Sep 17 00:00:00 2001
From: Tom Eccles <tom.eccles at arm.com>
Date: Tue, 10 Oct 2023 15:37:29 +0000
Subject: [PATCH 14/16] Add flag to return to the old tbaa behaviour

---
 flang/include/flang/Optimizer/CodeGen/CodeGen.h       | 3 +++
 flang/include/flang/Optimizer/CodeGen/TypeConverter.h | 3 ++-
 flang/include/flang/Tools/CLOptions.inc               | 8 +++++++-
 flang/lib/Optimizer/CodeGen/CodeGen.cpp               | 3 ++-
 flang/lib/Optimizer/CodeGen/TypeConverter.cpp         | 7 ++++---
 5 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/flang/include/flang/Optimizer/CodeGen/CodeGen.h b/flang/include/flang/Optimizer/CodeGen/CodeGen.h
index 64747b871de0943..6d33e9f9bb9affd 100644
--- a/flang/include/flang/Optimizer/CodeGen/CodeGen.h
+++ b/flang/include/flang/Optimizer/CodeGen/CodeGen.h
@@ -54,6 +54,9 @@ struct FIRToLLVMPassOptions {
 
   // Generate TBAA information for FIR types and memory accessing operations.
   bool applyTBAA = false;
+
+  // force the usage of a unified tbaa tree in TBAABuilder
+  bool forceUnifiedTBAATree = false;
 };
 
 /// Convert FIR to the LLVM IR dialect with default options.
diff --git a/flang/include/flang/Optimizer/CodeGen/TypeConverter.h b/flang/include/flang/Optimizer/CodeGen/TypeConverter.h
index 5b8b51b5a30bc8c..29d0a902f556269 100644
--- a/flang/include/flang/Optimizer/CodeGen/TypeConverter.h
+++ b/flang/include/flang/Optimizer/CodeGen/TypeConverter.h
@@ -45,7 +45,8 @@ namespace fir {
 /// This converts FIR types to LLVM types (for now)
 class LLVMTypeConverter : public mlir::LLVMTypeConverter {
 public:
-  LLVMTypeConverter(mlir::ModuleOp module, bool applyTBAA);
+  LLVMTypeConverter(mlir::ModuleOp module, bool applyTBAA,
+                    bool forceUnifiedTBAATree);
 
   // i32 is used here because LLVM wants i32 constants when indexing into struct
   // types. Indexing into other aggregate types is more flexible.
diff --git a/flang/include/flang/Tools/CLOptions.inc b/flang/include/flang/Tools/CLOptions.inc
index e80a7cd344c0411..2ed716382feb43f 100644
--- a/flang/include/flang/Tools/CLOptions.inc
+++ b/flang/include/flang/Tools/CLOptions.inc
@@ -64,7 +64,12 @@ DisableOption(CfgConversion, "cfg-conversion", "disable FIR to CFG pass");
 DisableOption(FirAvc, "avc", "array value copy analysis and transformation");
 DisableOption(
     FirMao, "memory-allocation-opt", "memory allocation optimization");
+
 DisableOption(FirAliasTags, "fir-alias-tags", "fir alias analysis");
+static llvm::cl::opt<bool> useOldAliasTags("use-old-alias-tags",
+    llvm::cl::desc("Use a single TBAA tree for all functions and do not use "
+                   "the FIR alias tags pass"),
+    llvm::cl::init(false), llvm::cl::Hidden);
 
 /// CodeGen Passes
 #if !defined(FLANG_EXCLUDE_CODEGEN)
@@ -157,6 +162,7 @@ inline void addFIRToLLVMPass(
   fir::FIRToLLVMPassOptions options;
   options.ignoreMissingTypeDescriptors = ignoreMissingTypeDescriptors;
   options.applyTBAA = optLevel.isOptimizingForSpeed();
+  options.forceUnifiedTBAATree = useOldAliasTags;
   addPassConditionally(pm, disableFirToLlvmIr,
       [&]() { return fir::createFIRToLLVMPass(options); });
 }
@@ -221,7 +227,7 @@ inline void createDefaultFIROptimizerPassPipeline(
   // Polymorphic types
   pm.addPass(fir::createPolymorphicOpConversionPass());
 
-  if (pc.AliasAnalysis && !disableFirAliasTags)
+  if (pc.AliasAnalysis && !disableFirAliasTags && !useOldAliasTags)
     pm.addPass(fir::createAliasTagsPass());
 
   // convert control flow to CFG form
diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
index f2ce123124895e0..337f73e93d9f635 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
@@ -3786,7 +3786,8 @@ class FIRToLLVMLowering
 
     auto *context = getModule().getContext();
     fir::LLVMTypeConverter typeConverter{getModule(),
-                                         options.applyTBAA || applyTBAA};
+                                         options.applyTBAA || applyTBAA,
+                                         options.forceUnifiedTBAATree};
     mlir::RewritePatternSet pattern(context);
     pattern.insert<
         AbsentOpConversion, AddcOpConversion, AddrOfOpConversion,
diff --git a/flang/lib/Optimizer/CodeGen/TypeConverter.cpp b/flang/lib/Optimizer/CodeGen/TypeConverter.cpp
index 77e94c00ec0792f..104018030bffd5c 100644
--- a/flang/lib/Optimizer/CodeGen/TypeConverter.cpp
+++ b/flang/lib/Optimizer/CodeGen/TypeConverter.cpp
@@ -26,7 +26,8 @@
 
 namespace fir {
 
-LLVMTypeConverter::LLVMTypeConverter(mlir::ModuleOp module, bool applyTBAA)
+LLVMTypeConverter::LLVMTypeConverter(mlir::ModuleOp module, bool applyTBAA,
+                                     bool forceUnifiedTBAATree)
     : mlir::LLVMTypeConverter(module.getContext(),
                               [&] {
                                 mlir::LowerToLLVMOptions options(
@@ -38,8 +39,8 @@ LLVMTypeConverter::LLVMTypeConverter(mlir::ModuleOp module, bool applyTBAA)
       specifics(CodeGenSpecifics::get(module.getContext(),
                                       getTargetTriple(module),
                                       getKindMapping(module))),
-      tbaaBuilder(
-          std::make_unique<TBAABuilder>(module->getContext(), applyTBAA)) {
+      tbaaBuilder(std::make_unique<TBAABuilder>(module->getContext(), applyTBAA,
+                                                forceUnifiedTBAATree)) {
   LLVM_DEBUG(llvm::dbgs() << "FIR type converter\n");
 
   // Each conversion should return a value of type mlir::Type.

>From ba1bbbe7f9af23e1814faa40223d330a3f9553b5 Mon Sep 17 00:00:00 2001
From: Tom Eccles <tom.eccles at arm.com>
Date: Mon, 9 Oct 2023 21:46:53 +0000
Subject: [PATCH 15/16] [flang] add TBAA tags to global variables

These turn out to be useful for spec2017/fotonik3d and safe so long as
they are not used along side TBAA tags for local allocations. LLVM may
be able to figure out local allocations by itself anyway.
---
 .../lib/Optimizer/Analysis/AliasAnalysis.cpp  |  2 +-
 .../lib/Optimizer/Transforms/AddAliasTags.cpp | 11 ++---
 flang/test/Transforms/tbaa2.fir               | 42 ++++++++++++-------
 3 files changed, 33 insertions(+), 22 deletions(-)

diff --git a/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp b/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp
index 850026ebf33b995..90072eea323beba 100644
--- a/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp
+++ b/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp
@@ -406,7 +406,7 @@ AliasAnalysis::Source AliasAnalysis::getSource(mlir::Value v) {
         attributes.set(Attribute::Pointer);
     }
 
-  if (type == SourceKind::Global)
+  if (type == SourceKind::Global || type == SourceKind::Direct)
     return {global, type, ty, attributes, approximateSource};
 
   return {v, type, ty, attributes, approximateSource};
diff --git a/flang/lib/Optimizer/Transforms/AddAliasTags.cpp b/flang/lib/Optimizer/Transforms/AddAliasTags.cpp
index 25439837acac518..733146f05a232f2 100644
--- a/flang/lib/Optimizer/Transforms/AddAliasTags.cpp
+++ b/flang/lib/Optimizer/Transforms/AddAliasTags.cpp
@@ -35,13 +35,13 @@ namespace fir {
 static llvm::cl::opt<bool>
     enableDummyArgs("dummy-arg-tbaa", llvm::cl::init(true), llvm::cl::Hidden,
                     llvm::cl::desc("Add TBAA tags to dummy arguments"));
-// These two are **known unsafe** (misscompare in spec2017/wrf_r). They should
+static llvm::cl::opt<bool>
+    enableGlobals("globals-tbaa", llvm::cl::init(true), llvm::cl::Hidden,
+                  llvm::cl::desc("Add TBAA tags to global variables"));
+// This is **known unsafe** (misscompare in spec2017/wrf_r). It should
 // not be enabled by default.
 // The code is kept so that these may be tried with new benchmarks to see if
 // this is worth fixing in the future.
-static llvm::cl::opt<bool>
-    enableGlobals("globals-tbaa", llvm::cl::init(false), llvm::cl::Hidden,
-                  llvm::cl::desc("Add TBAA tags to global variables. UNSAFE."));
 static llvm::cl::opt<bool> enableLocalAllocs(
     "local-alloc-tbaa", llvm::cl::init(false), llvm::cl::Hidden,
     llvm::cl::desc("Add TBAA tags to local allocations. UNSAFE."));
@@ -151,7 +151,8 @@ void AddAliasTagsPass::runOnAliasInterface(fir::FirAliasTagOpInterface op,
 
     // TBAA for global variables
   } else if (enableGlobals &&
-             source.kind == fir::AliasAnalysis::SourceKind::Global) {
+             (source.kind == fir::AliasAnalysis::SourceKind::Global ||
+              source.kind == fir::AliasAnalysis::SourceKind::Direct)) {
     mlir::SymbolRefAttr glbl = source.u.get<mlir::SymbolRefAttr>();
     const char *name = glbl.getRootReference().data();
     LLVM_DEBUG(llvm::dbgs().indent(2) << "Found reference to global " << name
diff --git a/flang/test/Transforms/tbaa2.fir b/flang/test/Transforms/tbaa2.fir
index 84ba281cce7a956..47fd7be06aaa6a2 100644
--- a/flang/test/Transforms/tbaa2.fir
+++ b/flang/test/Transforms/tbaa2.fir
@@ -47,12 +47,30 @@
 // CHECK: #[[ROOT:.+]] = #llvm.tbaa_root<id = "Flang function root _QMmodPcallee">
 // CHECK: #[[ANY_ACCESS:.+]] = #llvm.tbaa_type_desc<id = "any access", members = {<#[[ROOT]], 0>}>
 // CHECK: #[[ANY_DATA:.+]] = #llvm.tbaa_type_desc<id = "any data access", members = {<#[[ANY_ACCESS]], 0>}>
+// CHECK: #[[ANY_GLBL:.+]] = #llvm.tbaa_type_desc<id = "global data", members = {<#[[ANY_DATA]], 0>}>
 // CHECK: #[[ANY_ARG:.+]] = #llvm.tbaa_type_desc<id = "dummy arg data", members = {<#[[ANY_DATA]], 0>}>
+// CHECK: #[[GLBL_ZSTART:.+]] = #llvm.tbaa_type_desc<id = "global data/_QMmodEzstart", members = {<#[[ANY_GLBL]], 0>}>
+// CHECK: #[[GLBL_ZSTOP:.+]] = #llvm.tbaa_type_desc<id = "global data/_QMmodEzstop", members = {<#[[ANY_GLBL]], 0>}>
+// CHECK: #[[GLBL_YSTART:.+]] = #llvm.tbaa_type_desc<id = "global data/_QMmodEystart", members = {<#[[ANY_GLBL]], 0>}>
+// CHECK: #[[GLBL_YSTOP:.+]] = #llvm.tbaa_type_desc<id = "global data/_QMmodEystop", members = {<#[[ANY_GLBL]], 0>}>
+// CHECK: #[[GLBL_XSTART:.+]] = #llvm.tbaa_type_desc<id = "global data/_QMmodExstart", members = {<#[[ANY_GLBL]], 0>}>
 // CHECK: #[[ARG_LOW:.+]] = #llvm.tbaa_type_desc<id = "dummy arg data/_QMmodFcalleeElow", members = {<#[[ANY_ARG]], 0>}>
+// CHECK: #[[GLBL_A:.+]] = #llvm.tbaa_type_desc<id = "global data/_QMmodEa", members = {<#[[ANY_GLBL]], 0>}>
+// CHECK: #[[GLBL_B:.+]] = #llvm.tbaa_type_desc<id = "global data/_QMmodEb", members = {<#[[ANY_GLBL]], 0>}>
 // CHECK: #[[ARG_Z:.+]] = #llvm.tbaa_type_desc<id = "dummy arg data/_QMmodFcalleeEz", members = {<#[[ANY_ARG]], 0>}>
+// CHECK: #[[GLBL_DYINV:.+]] = #llvm.tbaa_type_desc<id = "global data/_QMmodEdyinv", members = {<#[[ANY_GLBL]], 0>}>
 // CHECK: #[[ARG_Y:.+]] = #llvm.tbaa_type_desc<id = "dummy arg data/_QMmodFcalleeEy", members = {<#[[ANY_ARG]], 0>}>
+
+// CHECK: #[[GLBL_ZSTART_TAG:.+]] = #llvm.tbaa_tag<base_type = #[[GLBL_ZSTART]], access_type = #[[GLBL_ZSTART]], offset = 0>
+// CHECK: #[[GLBL_ZSTOP_TAG:.+]] = #llvm.tbaa_tag<base_type = #[[GLBL_ZSTOP]], access_type = #[[GLBL_ZSTOP]], offset = 0>
+// CHECK: #[[GLBL_YSTART_TAG:.+]] = #llvm.tbaa_tag<base_type = #[[GLBL_YSTART]], access_type = #[[GLBL_YSTART]], offset = 0>
+// CHECK: #[[GLBL_YSTOP_TAG:.+]] = #llvm.tbaa_tag<base_type = #[[GLBL_YSTOP]], access_type = #[[GLBL_YSTOP]], offset = 0>
+// CHECK: #[[GLBL_XSTART_TAG:.+]] = #llvm.tbaa_tag<base_type = #[[GLBL_XSTART]], access_type = #[[GLBL_XSTART]], offset = 0>
 // CHECK: #[[ARG_LOW_TAG:.+]] = #llvm.tbaa_tag<base_type = #[[ARG_LOW]], access_type = #[[ARG_LOW]], offset = 0>
+// CHECK: #[[GLBL_A_TAG:.+]] = #llvm.tbaa_tag<base_type = #[[GLBL_A]], access_type = #[[GLBL_A]], offset = 0>
+// CHECK: #[[GLBL_B_TAG:.+]] = #llvm.tbaa_tag<base_type = #[[GLBL_B]], access_type = #[[GLBL_B]], offset = 0>
 // CHECK: #[[ARG_Z_TAG:.+]] = #llvm.tbaa_tag<base_type = #[[ARG_Z]], access_type = #[[ARG_Z]], offset = 0>
+// CHECK: #[[GLBL_DYINV_TAG:.+]] = #llvm.tbaa_tag<base_type = #[[GLBL_DYINV]], access_type = #[[GLBL_DYINV]], offset = 0>
 // CHECK: #[[ARG_Y_TAG:.+]] = #llvm.tbaa_tag<base_type = #[[ARG_Y]], access_type = #[[ARG_Y]], offset = 0>
 
   func.func @_QMmodPcallee(%arg0: !fir.box<!fir.array<?x?x?xf32>> {fir.bindc_name = "z"}, %arg1: !fir.box<!fir.array<?x?x?xf32>> {fir.bindc_name = "y"}, %arg2: !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>> {fir.bindc_name = "low"}) {
@@ -246,28 +264,23 @@
 // CHECK:           %[[VAL_37:.*]] = fir.rebox %[[VAL_36]] : (!fir.box<!fir.array<?x?x?xf32>>) -> !fir.box<!fir.array<?x?x?xf32>>
 // CHECK:           %[[VAL_38:.*]] = fir.declare %[[VAL_0]] {fortran_attrs = #{{.*}}<intent_in>, uniq_name = "_QMmodFcalleeEz"} : (!fir.box<!fir.array<?x?x?xf32>>) -> !fir.box<!fir.array<?x?x?xf32>>
 // CHECK:           %[[VAL_39:.*]] = fir.rebox %[[VAL_38]] : (!fir.box<!fir.array<?x?x?xf32>>) -> !fir.box<!fir.array<?x?x?xf32>>
-// TODO: read from global assumed to always alias
-// CHECK:           %[[VAL_40:.*]] = fir.load %[[VAL_22]] : !fir.ref<i32>
+// CHECK:           %[[VAL_40:.*]] = fir.load %[[VAL_22]] {tbaa = [#[[GLBL_ZSTART_TAG]]]} : !fir.ref<i32>
 // CHECK:           %[[VAL_41:.*]] = arith.addi %[[VAL_40]], %[[VAL_6]] : i32
 // CHECK:           %[[VAL_42:.*]] = fir.convert %[[VAL_41]] : (i32) -> index
-// TODO: read from global assumed to always alias
-// CHECK:           %[[VAL_43:.*]] = fir.load %[[VAL_24]] : !fir.ref<i32>
+// CHECK:           %[[VAL_43:.*]] = fir.load %[[VAL_24]] {tbaa = [#[[GLBL_ZSTOP_TAG]]]} : !fir.ref<i32>
 // CHECK:           %[[VAL_44:.*]] = fir.convert %[[VAL_43]] : (i32) -> index
 // CHECK:           %[[VAL_45:.*]] = fir.convert %[[VAL_42]] : (index) -> i32
 // CHECK:           %[[VAL_46:.*]]:2 = fir.do_loop %[[VAL_47:.*]] = %[[VAL_42]] to %[[VAL_44]] step %[[VAL_5]] iter_args(%[[VAL_48:.*]] = %[[VAL_45]]) -> (index, i32) {
 // CHECK:             fir.store %[[VAL_48]] to %[[VAL_34]] : !fir.ref<i32>
-// TODO: read from global assumed to always alias
-// CHECK:             %[[VAL_49:.*]] = fir.load %[[VAL_18]] : !fir.ref<i32>
+// CHECK:             %[[VAL_49:.*]] = fir.load %[[VAL_18]] {tbaa = [#[[GLBL_YSTART_TAG]]]} : !fir.ref<i32>
 // CHECK:             %[[VAL_50:.*]] = arith.addi %[[VAL_49]], %[[VAL_6]] : i32
 // CHECK:             %[[VAL_51:.*]] = fir.convert %[[VAL_50]] : (i32) -> index
-// TODO: read from global assumed to always alias
-// CHECK:             %[[VAL_52:.*]] = fir.load %[[VAL_20]] : !fir.ref<i32>
+// CHECK:             %[[VAL_52:.*]] = fir.load %[[VAL_20]] {tbaa = [#[[GLBL_YSTOP_TAG]]]} : !fir.ref<i32>
 // CHECK:             %[[VAL_53:.*]] = fir.convert %[[VAL_52]] : (i32) -> index
 // CHECK:             %[[VAL_54:.*]] = fir.convert %[[VAL_51]] : (index) -> i32
 // CHECK:             %[[VAL_55:.*]]:2 = fir.do_loop %[[VAL_56:.*]] = %[[VAL_51]] to %[[VAL_53]] step %[[VAL_5]] iter_args(%[[VAL_57:.*]] = %[[VAL_54]]) -> (index, i32) {
 // CHECK:               fir.store %[[VAL_57]] to %[[VAL_32]] : !fir.ref<i32>
-// TODO: read from global assumed to always alias
-// CHECK:               %[[VAL_58:.*]] = fir.load %[[VAL_16]] : !fir.ref<i32>
+// CHECK:               %[[VAL_58:.*]] = fir.load %[[VAL_16]] {tbaa = [#[[GLBL_XSTART_TAG]]]} : !fir.ref<i32>
 // CHECK:               %[[VAL_59:.*]] = arith.addi %[[VAL_58]], %[[VAL_6]] : i32
 // CHECK:               %[[VAL_60:.*]] = fir.convert %[[VAL_59]] : (i32) -> index
 // CHECK:               %[[VAL_61:.*]] = fir.convert %[[VAL_60]] : (index) -> i32
@@ -302,8 +315,7 @@
 // CHECK:                 %[[VAL_83:.*]]:3 = fir.box_dims %[[VAL_79]], %[[VAL_4]] : (!fir.box<!fir.heap<!fir.array<?xf32>>>, index) -> (index, index, index)
 // CHECK:                 %[[VAL_84:.*]] = fir.shape_shift %[[VAL_83]]#0, %[[VAL_83]]#1 : (index, index) -> !fir.shapeshift<1>
 // CHECK:                 %[[VAL_85:.*]] = fir.array_coor %[[VAL_82]](%[[VAL_84]]) %[[VAL_81]] : (!fir.heap<!fir.array<?xf32>>, !fir.shapeshift<1>, i64) -> !fir.ref<f32>
-// load from global variable
-// CHECK:                 %[[VAL_86:.*]] = fir.load %[[VAL_85]] : !fir.ref<f32>
+// CHECK:                 %[[VAL_86:.*]] = fir.load %[[VAL_85]] {tbaa = [#[[GLBL_A_TAG]]]} : !fir.ref<f32>
 // load from box
 // CHECK:                 %[[VAL_87:.*]] = fir.load %[[VAL_35]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>>
 // load from local allocation
@@ -326,8 +338,7 @@
 // CHECK:                 %[[VAL_102:.*]]:3 = fir.box_dims %[[VAL_100]], %[[VAL_4]] : (!fir.box<!fir.heap<!fir.array<?xf32>>>, index) -> (index, index, index)
 // CHECK:                 %[[VAL_103:.*]] = fir.shape_shift %[[VAL_102]]#0, %[[VAL_102]]#1 : (index, index) -> !fir.shapeshift<1>
 // CHECK:                 %[[VAL_104:.*]] = fir.array_coor %[[VAL_101]](%[[VAL_103]]) %[[VAL_81]] : (!fir.heap<!fir.array<?xf32>>, !fir.shapeshift<1>, i64) -> !fir.ref<f32>
-// load from global variable
-// CHECK:                 %[[VAL_105:.*]] = fir.load %[[VAL_104]] : !fir.ref<f32>
+// CHECK:                 %[[VAL_105:.*]] = fir.load %[[VAL_104]] {tbaa = [#[[GLBL_B_TAG]]]} : !fir.ref<f32>
 // CHECK:                 %[[VAL_106:.*]] = fir.array_coor %[[VAL_39]] %[[VAL_89]], %[[VAL_81]], %[[VAL_91]] : (!fir.box<!fir.array<?x?x?xf32>>, i64, i64, i64) -> !fir.ref<f32>
 // CHECK:                 %[[VAL_107:.*]] = fir.load %[[VAL_106]] {tbaa = [#[[ARG_Z_TAG]]]} : !fir.ref<f32>
 // CHECK:                 %[[VAL_108:.*]] = arith.subi %[[VAL_80]], %[[VAL_6]] : i32
@@ -336,8 +347,7 @@
 // CHECK:                 %[[VAL_111:.*]] = fir.load %[[VAL_110]] {tbaa = [#[[ARG_Z_TAG]]]} : !fir.ref<f32>
 // CHECK:                 %[[VAL_112:.*]] = arith.subf %[[VAL_107]], %[[VAL_111]] fastmath<contract> : f32
 // CHECK:                 %[[VAL_113:.*]] = fir.no_reassoc %[[VAL_112]] : f32
-// load from global variable
-// CHECK:                 %[[VAL_114:.*]] = fir.load %[[VAL_14]] : !fir.ref<f32>
+// CHECK:                 %[[VAL_114:.*]] = fir.load %[[VAL_14]] {tbaa = [#[[GLBL_DYINV_TAG]]]} : !fir.ref<f32>
 // CHECK:                 %[[VAL_115:.*]] = arith.mulf %[[VAL_113]], %[[VAL_114]] fastmath<contract> : f32
 // CHECK:                 %[[VAL_116:.*]] = arith.subi %[[VAL_90]], %[[VAL_6]] : i32
 // CHECK:                 %[[VAL_117:.*]] = fir.convert %[[VAL_116]] : (i32) -> i64

>From df18f16e5abd9915fb80e7f07af88b3449098cf9 Mon Sep 17 00:00:00 2001
From: Tom Eccles <tom.eccles at arm.com>
Date: Thu, 12 Oct 2023 11:03:34 +0000
Subject: [PATCH 16/16] Separate access to Direct data from Global data

---
 .../flang/Optimizer/Analysis/TBAAForest.h     |  1 +
 flang/lib/Optimizer/Analysis/TBAAForest.cpp   |  1 +
 .../lib/Optimizer/Transforms/AddAliasTags.cpp | 22 +++++++++++++++++--
 flang/test/Transforms/tbaa2.fir               | 13 ++++++-----
 4 files changed, 29 insertions(+), 8 deletions(-)

diff --git a/flang/include/flang/Optimizer/Analysis/TBAAForest.h b/flang/include/flang/Optimizer/Analysis/TBAAForest.h
index a024544e50ef98b..86030f0be26aecc 100644
--- a/flang/include/flang/Optimizer/Analysis/TBAAForest.h
+++ b/flang/include/flang/Optimizer/Analysis/TBAAForest.h
@@ -55,6 +55,7 @@ struct TBAATree {
   SubtreeState globalDataTree;
   SubtreeState allocatedDataTree;
   SubtreeState dummyArgDataTree;
+  SubtreeState directDataTree;
   mlir::LLVM::TBAATypeDescriptorAttr anyAccessDesc;
   mlir::LLVM::TBAATypeDescriptorAttr boxMemberTypeDesc;
   mlir::LLVM::TBAATypeDescriptorAttr anyDataTypeDesc;
diff --git a/flang/lib/Optimizer/Analysis/TBAAForest.cpp b/flang/lib/Optimizer/Analysis/TBAAForest.cpp
index 070e2be6700cc11..786c4932ea89e26 100644
--- a/flang/lib/Optimizer/Analysis/TBAAForest.cpp
+++ b/flang/lib/Optimizer/Analysis/TBAAForest.cpp
@@ -56,5 +56,6 @@ fir::TBAATree::TBAATree(mlir::LLVM::TBAATypeDescriptorAttr anyAccess,
     : globalDataTree(dataRoot.getContext(), "global data", dataRoot),
       allocatedDataTree(dataRoot.getContext(), "allocated data", dataRoot),
       dummyArgDataTree(dataRoot.getContext(), "dummy arg data", dataRoot),
+      directDataTree(dataRoot.getContext(), "direct data", dataRoot),
       anyAccessDesc(anyAccess), boxMemberTypeDesc(boxMemberTypeDesc),
       anyDataTypeDesc(dataRoot) {}
diff --git a/flang/lib/Optimizer/Transforms/AddAliasTags.cpp b/flang/lib/Optimizer/Transforms/AddAliasTags.cpp
index 733146f05a232f2..684aa4462915e51 100644
--- a/flang/lib/Optimizer/Transforms/AddAliasTags.cpp
+++ b/flang/lib/Optimizer/Transforms/AddAliasTags.cpp
@@ -38,6 +38,9 @@ static llvm::cl::opt<bool>
 static llvm::cl::opt<bool>
     enableGlobals("globals-tbaa", llvm::cl::init(true), llvm::cl::Hidden,
                   llvm::cl::desc("Add TBAA tags to global variables"));
+static llvm::cl::opt<bool>
+    enableDirect("direct-tbaa", llvm::cl::init(true), llvm::cl::Hidden,
+                 llvm::cl::desc("Add TBAA tags to direct variables"));
 // This is **known unsafe** (misscompare in spec2017/wrf_r). It should
 // not be enabled by default.
 // The code is kept so that these may be tried with new benchmarks to see if
@@ -151,14 +154,29 @@ void AddAliasTagsPass::runOnAliasInterface(fir::FirAliasTagOpInterface op,
 
     // TBAA for global variables
   } else if (enableGlobals &&
-             (source.kind == fir::AliasAnalysis::SourceKind::Global ||
-              source.kind == fir::AliasAnalysis::SourceKind::Direct)) {
+             source.kind == fir::AliasAnalysis::SourceKind::Global) {
     mlir::SymbolRefAttr glbl = source.u.get<mlir::SymbolRefAttr>();
     const char *name = glbl.getRootReference().data();
     LLVM_DEBUG(llvm::dbgs().indent(2) << "Found reference to global " << name
                                       << " at " << *op << "\n");
     tag = state.getFuncTree(func).globalDataTree.getTag(name);
 
+    // TBAA for SourceKind::Direct
+  } else if (enableDirect &&
+             source.kind == fir::AliasAnalysis::SourceKind::Direct) {
+    if (source.u.is<mlir::SymbolRefAttr>()) {
+      mlir::SymbolRefAttr glbl = source.u.get<mlir::SymbolRefAttr>();
+      const char *name = glbl.getRootReference().data();
+      LLVM_DEBUG(llvm::dbgs().indent(2) << "Found reference to direct " << name
+                                        << " at " << *op << "\n");
+      tag = state.getFuncTree(func).directDataTree.getTag(name);
+    } else {
+      // SourceKind::Direct is likely to be extended to cases which are not a
+      // SymbolRefAttr in the future
+      LLVM_DEBUG(llvm::dbgs().indent(2) << "Can't get name for direct "
+                                        << source << " at " << *op << "\n");
+    }
+
     // TBAA for local allocations
   } else if (enableLocalAllocs &&
              source.kind == fir::AliasAnalysis::SourceKind::Allocate) {
diff --git a/flang/test/Transforms/tbaa2.fir b/flang/test/Transforms/tbaa2.fir
index 47fd7be06aaa6a2..ab39f65cdade700 100644
--- a/flang/test/Transforms/tbaa2.fir
+++ b/flang/test/Transforms/tbaa2.fir
@@ -49,14 +49,15 @@
 // CHECK: #[[ANY_DATA:.+]] = #llvm.tbaa_type_desc<id = "any data access", members = {<#[[ANY_ACCESS]], 0>}>
 // CHECK: #[[ANY_GLBL:.+]] = #llvm.tbaa_type_desc<id = "global data", members = {<#[[ANY_DATA]], 0>}>
 // CHECK: #[[ANY_ARG:.+]] = #llvm.tbaa_type_desc<id = "dummy arg data", members = {<#[[ANY_DATA]], 0>}>
+// CHECK: #[[ANY_DIRECT:.+]] = #llvm.tbaa_type_desc<id = "direct data", members = {<#[[ANY_DATA]], 0>}>
 // CHECK: #[[GLBL_ZSTART:.+]] = #llvm.tbaa_type_desc<id = "global data/_QMmodEzstart", members = {<#[[ANY_GLBL]], 0>}>
 // CHECK: #[[GLBL_ZSTOP:.+]] = #llvm.tbaa_type_desc<id = "global data/_QMmodEzstop", members = {<#[[ANY_GLBL]], 0>}>
 // CHECK: #[[GLBL_YSTART:.+]] = #llvm.tbaa_type_desc<id = "global data/_QMmodEystart", members = {<#[[ANY_GLBL]], 0>}>
 // CHECK: #[[GLBL_YSTOP:.+]] = #llvm.tbaa_type_desc<id = "global data/_QMmodEystop", members = {<#[[ANY_GLBL]], 0>}>
 // CHECK: #[[GLBL_XSTART:.+]] = #llvm.tbaa_type_desc<id = "global data/_QMmodExstart", members = {<#[[ANY_GLBL]], 0>}>
 // CHECK: #[[ARG_LOW:.+]] = #llvm.tbaa_type_desc<id = "dummy arg data/_QMmodFcalleeElow", members = {<#[[ANY_ARG]], 0>}>
-// CHECK: #[[GLBL_A:.+]] = #llvm.tbaa_type_desc<id = "global data/_QMmodEa", members = {<#[[ANY_GLBL]], 0>}>
-// CHECK: #[[GLBL_B:.+]] = #llvm.tbaa_type_desc<id = "global data/_QMmodEb", members = {<#[[ANY_GLBL]], 0>}>
+// CHECK: #[[DIRECT_A:.+]] = #llvm.tbaa_type_desc<id = "direct data/_QMmodEa", members = {<#[[ANY_DIRECT]], 0>}>
+// CHECK: #[[DIRECT_B:.+]] = #llvm.tbaa_type_desc<id = "direct data/_QMmodEb", members = {<#[[ANY_DIRECT]], 0>}>
 // CHECK: #[[ARG_Z:.+]] = #llvm.tbaa_type_desc<id = "dummy arg data/_QMmodFcalleeEz", members = {<#[[ANY_ARG]], 0>}>
 // CHECK: #[[GLBL_DYINV:.+]] = #llvm.tbaa_type_desc<id = "global data/_QMmodEdyinv", members = {<#[[ANY_GLBL]], 0>}>
 // CHECK: #[[ARG_Y:.+]] = #llvm.tbaa_type_desc<id = "dummy arg data/_QMmodFcalleeEy", members = {<#[[ANY_ARG]], 0>}>
@@ -67,8 +68,8 @@
 // CHECK: #[[GLBL_YSTOP_TAG:.+]] = #llvm.tbaa_tag<base_type = #[[GLBL_YSTOP]], access_type = #[[GLBL_YSTOP]], offset = 0>
 // CHECK: #[[GLBL_XSTART_TAG:.+]] = #llvm.tbaa_tag<base_type = #[[GLBL_XSTART]], access_type = #[[GLBL_XSTART]], offset = 0>
 // CHECK: #[[ARG_LOW_TAG:.+]] = #llvm.tbaa_tag<base_type = #[[ARG_LOW]], access_type = #[[ARG_LOW]], offset = 0>
-// CHECK: #[[GLBL_A_TAG:.+]] = #llvm.tbaa_tag<base_type = #[[GLBL_A]], access_type = #[[GLBL_A]], offset = 0>
-// CHECK: #[[GLBL_B_TAG:.+]] = #llvm.tbaa_tag<base_type = #[[GLBL_B]], access_type = #[[GLBL_B]], offset = 0>
+// CHECK: #[[DIRECT_A_TAG:.+]] = #llvm.tbaa_tag<base_type = #[[DIRECT_A]], access_type = #[[DIRECT_A]], offset = 0>
+// CHECK: #[[DIRECT_B_TAG:.+]] = #llvm.tbaa_tag<base_type = #[[DIRECT_B]], access_type = #[[DIRECT_B]], offset = 0>
 // CHECK: #[[ARG_Z_TAG:.+]] = #llvm.tbaa_tag<base_type = #[[ARG_Z]], access_type = #[[ARG_Z]], offset = 0>
 // CHECK: #[[GLBL_DYINV_TAG:.+]] = #llvm.tbaa_tag<base_type = #[[GLBL_DYINV]], access_type = #[[GLBL_DYINV]], offset = 0>
 // CHECK: #[[ARG_Y_TAG:.+]] = #llvm.tbaa_tag<base_type = #[[ARG_Y]], access_type = #[[ARG_Y]], offset = 0>
@@ -315,7 +316,7 @@
 // CHECK:                 %[[VAL_83:.*]]:3 = fir.box_dims %[[VAL_79]], %[[VAL_4]] : (!fir.box<!fir.heap<!fir.array<?xf32>>>, index) -> (index, index, index)
 // CHECK:                 %[[VAL_84:.*]] = fir.shape_shift %[[VAL_83]]#0, %[[VAL_83]]#1 : (index, index) -> !fir.shapeshift<1>
 // CHECK:                 %[[VAL_85:.*]] = fir.array_coor %[[VAL_82]](%[[VAL_84]]) %[[VAL_81]] : (!fir.heap<!fir.array<?xf32>>, !fir.shapeshift<1>, i64) -> !fir.ref<f32>
-// CHECK:                 %[[VAL_86:.*]] = fir.load %[[VAL_85]] {tbaa = [#[[GLBL_A_TAG]]]} : !fir.ref<f32>
+// CHECK:                 %[[VAL_86:.*]] = fir.load %[[VAL_85]] {tbaa = [#[[DIRECT_A_TAG]]]} : !fir.ref<f32>
 // load from box
 // CHECK:                 %[[VAL_87:.*]] = fir.load %[[VAL_35]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x?xf32>>>>
 // load from local allocation
@@ -338,7 +339,7 @@
 // CHECK:                 %[[VAL_102:.*]]:3 = fir.box_dims %[[VAL_100]], %[[VAL_4]] : (!fir.box<!fir.heap<!fir.array<?xf32>>>, index) -> (index, index, index)
 // CHECK:                 %[[VAL_103:.*]] = fir.shape_shift %[[VAL_102]]#0, %[[VAL_102]]#1 : (index, index) -> !fir.shapeshift<1>
 // CHECK:                 %[[VAL_104:.*]] = fir.array_coor %[[VAL_101]](%[[VAL_103]]) %[[VAL_81]] : (!fir.heap<!fir.array<?xf32>>, !fir.shapeshift<1>, i64) -> !fir.ref<f32>
-// CHECK:                 %[[VAL_105:.*]] = fir.load %[[VAL_104]] {tbaa = [#[[GLBL_B_TAG]]]} : !fir.ref<f32>
+// CHECK:                 %[[VAL_105:.*]] = fir.load %[[VAL_104]] {tbaa = [#[[DIRECT_B_TAG]]]} : !fir.ref<f32>
 // CHECK:                 %[[VAL_106:.*]] = fir.array_coor %[[VAL_39]] %[[VAL_89]], %[[VAL_81]], %[[VAL_91]] : (!fir.box<!fir.array<?x?x?xf32>>, i64, i64, i64) -> !fir.ref<f32>
 // CHECK:                 %[[VAL_107:.*]] = fir.load %[[VAL_106]] {tbaa = [#[[ARG_Z_TAG]]]} : !fir.ref<f32>
 // CHECK:                 %[[VAL_108:.*]] = arith.subi %[[VAL_80]], %[[VAL_6]] : i32



More information about the flang-commits mailing list