[flang-commits] [flang] [flang][AddAliasTags] Generalise tbaa tags beyond functions (PR #92379)

Tom Eccles via flang-commits flang-commits at lists.llvm.org
Thu May 16 03:41:45 PDT 2024


https://github.com/tblah created https://github.com/llvm/llvm-project/pull/92379

TL;DR without using the undocumented option this should be NFC. Even with the option, it is probably still NFC in practice.

This patch is intended to upstream some experiment code that might be more broadly useful.

This changes the per-function TBAA trees to be per-(top-level)-symbol. This allows running the pass on other top level operations e.g. openmp reductions. Symbol names should be unique so the trees should not collide (without multiple inlining like in #89829).

The current alias code can't generate useful alias tags for reductions (it just sees the reduction variables come from a block argument and correctly concludes it doesn't know anything about them). But in the future this could be useful to allow array reductions to vectorize. For this reason, I don't think the current code will generate alias tags for existing top level operations other than functions, but I would prefer to leave the option off until there is a real use case just to be safe.

I have tested with the option enabled and do not know of any misscompilations.

>From a6ef4dde10b1567f5f7d86cfd98ff8d3064faa3f Mon Sep 17 00:00:00 2001
From: Tom Eccles <tom.eccles at arm.com>
Date: Fri, 26 Apr 2024 14:56:59 +0000
Subject: [PATCH] [flang][AddAliasTags] Generalise tbaa tags beyond functions

TL;DR without using the undocumented option this should be NFC. Even with
the option, it is probably still NFC in practice.

This patch is intended to upstream some experiment code that might be more
broadly useful.

This changes the per-function TBAA trees to be per-(top-level)-symbol.
This allows running the pass on other top level operations e.g. openmp
reductions. Symbol names should be unique so the trees should not
collide (without multiple inlining like in #89829).

The current alias code can't generate useful alias tags for reductions
(it just sees the reduction variables come from a block argument and
correctly concludes it doesn't know anything about them). But in the
future this could be useful to allow array reductions to vectorize.
For this reason, I don't think the current code will generate alias
tags for existing top level operations other than functions, but I
would prefer to leave the option off until there is a real use case just
to be safe.

I have tested with the option enabled and do not know of any
misscompilations.
---
 .../flang/Optimizer/Analysis/TBAAForest.h     |  8 +--
 .../lib/Optimizer/Transforms/AddAliasTags.cpp | 37 +++++++---
 flang/test/Transforms/tbaa3.fir               | 72 +++++++++++++++++++
 3 files changed, 104 insertions(+), 13 deletions(-)
 create mode 100644 flang/test/Transforms/tbaa3.fir

diff --git a/flang/include/flang/Optimizer/Analysis/TBAAForest.h b/flang/include/flang/Optimizer/Analysis/TBAAForest.h
index 619ed4939c51c..f82a3fce3f9e9 100644
--- a/flang/include/flang/Optimizer/Analysis/TBAAForest.h
+++ b/flang/include/flang/Optimizer/Analysis/TBAAForest.h
@@ -73,15 +73,15 @@ struct TBAATree {
 //===----------------------------------------------------------------------===//
 // TBAAForrest
 //===----------------------------------------------------------------------===//
-/// Collection of TBAATrees, usually indexed by function (so that each function
-/// has a different TBAATree)
+/// Collection of TBAATrees, usually indexed by symbol (so that each
+/// function-like container operation has a different TBAATree)
 class TBAAForrest {
 public:
   explicit TBAAForrest(bool separatePerFunction = true)
       : separatePerFunction{separatePerFunction} {}
 
-  inline const TBAATree &operator[](mlir::func::FuncOp func) {
-    return getFuncTree(func.getSymNameAttr());
+  inline const TBAATree &operator[](mlir::SymbolOpInterface sym) {
+    return getFuncTree(sym.getNameAttr());
   }
   inline const TBAATree &operator[](mlir::LLVM::LLVMFuncOp func) {
     // the external name conversion pass may rename some functions. Their old
diff --git a/flang/lib/Optimizer/Transforms/AddAliasTags.cpp b/flang/lib/Optimizer/Transforms/AddAliasTags.cpp
index 3642a812096db..643218583b0d9 100644
--- a/flang/lib/Optimizer/Transforms/AddAliasTags.cpp
+++ b/flang/lib/Optimizer/Transforms/AddAliasTags.cpp
@@ -49,6 +49,13 @@ static llvm::cl::opt<bool> enableLocalAllocs(
     "local-alloc-tbaa", llvm::cl::init(false), llvm::cl::Hidden,
     llvm::cl::desc("Add TBAA tags to local allocations. UNSAFE."));
 
+// Run on function-like top-level operations (e.g. omp.declare_reduction).
+// This needs more testing before enabling by default:
+static llvm::cl::opt<bool> runOnNonFunctions(
+    "nonfunc-tbaa", llvm::cl::init(false), llvm::cl::Hidden,
+    llvm::cl::desc("Add TBAA tags to operations insode of container operations "
+                   "other than func.func"));
+
 namespace {
 
 /// Shared state per-module
@@ -61,9 +68,9 @@ class PassState {
     return analysisCache[value];
   }
 
-  /// get the per-function TBAATree for this function
-  inline const fir::TBAATree &getFuncTree(mlir::func::FuncOp func) {
-    return forrest[func];
+  /// get the per-function TBAATree for this function-like op
+  inline const fir::TBAATree &getFuncTree(mlir::SymbolOpInterface symbol) {
+    return forrest[symbol];
   }
 
 private:
@@ -113,10 +120,22 @@ static std::string getFuncArgName(mlir::Value arg) {
   return attr.str();
 }
 
+static mlir::SymbolOpInterface getTopLevelSymbolParent(mlir::Operation *op) {
+  mlir::SymbolOpInterface parent =
+      op->getParentOfType<mlir::SymbolOpInterface>();
+  mlir::SymbolOpInterface oldParent = parent;
+  while (parent) {
+    parent = getTopLevelSymbolParent(parent);
+    if (parent)
+      oldParent = parent;
+  }
+  return oldParent;
+}
+
 void AddAliasTagsPass::runOnAliasInterface(fir::FirAliasTagOpInterface op,
                                            PassState &state) {
-  mlir::func::FuncOp func = op->getParentOfType<mlir::func::FuncOp>();
-  if (!func)
+  mlir::SymbolOpInterface parent = getTopLevelSymbolParent(op);
+  if (!runOnNonFunctions && !mlir::isa<mlir::func::FuncOp>(parent))
     return;
 
   llvm::SmallVector<mlir::Value> accessedOperands = op.getAccessedOperands();
@@ -147,7 +166,7 @@ void AddAliasTagsPass::runOnAliasInterface(fir::FirAliasTagOpInterface op,
                << "Found reference to dummy argument at " << *op << "\n");
     std::string name = getFuncArgName(source.u.get<mlir::Value>());
     if (!name.empty())
-      tag = state.getFuncTree(func).dummyArgDataTree.getTag(name);
+      tag = state.getFuncTree(parent).dummyArgDataTree.getTag(name);
     else
       LLVM_DEBUG(llvm::dbgs().indent(2)
                  << "WARN: couldn't find a name for dummy argument " << *op
@@ -160,7 +179,7 @@ void AddAliasTagsPass::runOnAliasInterface(fir::FirAliasTagOpInterface op,
     const char *name = glbl.getRootReference().data();
     LLVM_DEBUG(llvm::dbgs().indent(2) << "Found reference to global " << name
                                       << " at " << *op << "\n");
-    tag = state.getFuncTree(func).globalDataTree.getTag(name);
+    tag = state.getFuncTree(parent).globalDataTree.getTag(name);
 
     // TBAA for SourceKind::Direct
   } else if (enableDirect &&
@@ -170,7 +189,7 @@ void AddAliasTagsPass::runOnAliasInterface(fir::FirAliasTagOpInterface op,
       const char *name = glbl.getRootReference().data();
       LLVM_DEBUG(llvm::dbgs().indent(2) << "Found reference to direct " << name
                                         << " at " << *op << "\n");
-      tag = state.getFuncTree(func).directDataTree.getTag(name);
+      tag = state.getFuncTree(parent).directDataTree.getTag(name);
     } else {
       // SourceKind::Direct is likely to be extended to cases which are not a
       // SymbolRefAttr in the future
@@ -190,7 +209,7 @@ void AddAliasTagsPass::runOnAliasInterface(fir::FirAliasTagOpInterface op,
     if (name) {
       LLVM_DEBUG(llvm::dbgs().indent(2) << "Found reference to allocation "
                                         << name << " at " << *op << "\n");
-      tag = state.getFuncTree(func).allocatedDataTree.getTag(*name);
+      tag = state.getFuncTree(parent).allocatedDataTree.getTag(*name);
     } else {
       LLVM_DEBUG(llvm::dbgs().indent(2)
                  << "WARN: couldn't find a name for allocation " << *op
diff --git a/flang/test/Transforms/tbaa3.fir b/flang/test/Transforms/tbaa3.fir
new file mode 100644
index 0000000000000..18121edfe1a73
--- /dev/null
+++ b/flang/test/Transforms/tbaa3.fir
@@ -0,0 +1,72 @@
+// Test experimental option to add tbaa trees for things that aren't functions
+
+// RUN: fir-opt --nonfunc-tbaa --fir-add-alias-tags %s | FileCheck %s
+
+
+// CHECK: #[[TBAA_ROOT:.*]] = #llvm.tbaa_root<id = "Flang function root add_reduction_byref_box_3xi32">
+// CHECK: #[[ANY_ACCESS:.*]] = #llvm.tbaa_type_desc<id = "any access", members = {<#[[TBAA_ROOT]], 0>}>
+// CHECK: #[[ANY_DATA:.*]] = #llvm.tbaa_type_desc<id = "any data access", members = {<#[[ANY_ACCESS]], 0>}>
+// CHECK: #[[DIRECT_DATA:.*]] = #llvm.tbaa_type_desc<id = "direct data", members = {<#[[ANY_DATA]], 0>}>
+// CHECK: #[[GLOBAL_A:.*]] = #llvm.tbaa_type_desc<id = "direct data/_QMmodEa", members = {<#[[DIRECT_DATA]], 0>}>
+// CHECK: #[[GLOBAL_A_TAG:.*]] = #llvm.tbaa_tag<base_type = #[[GLOBAL_A]], access_type = #[[GLOBAL_A]], offset = 0>
+
+fir.global @_QMmodEa : !fir.box<!fir.heap<!fir.array<?xi32>>> {
+  %c0 = arith.constant 0 : index
+  %0 = fir.zero_bits !fir.heap<!fir.array<?xi32>>
+  %1 = fir.shape %c0 : (index) -> !fir.shape<1>
+  %2 = fir.embox %0(%1) : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<?xi32>>>
+  fir.has_value %2 : !fir.box<!fir.heap<!fir.array<?xi32>>>
+}
+
+// nonsense reduction to give us something that we can generate TBAA tags for
+omp.declare_reduction @add_reduction_byref_box_3xi32 : !fir.ref<!fir.box<!fir.array<3xi32>>> init {
+^bb0(%arg0: !fir.ref<!fir.box<!fir.array<3xi32>>>):
+  %c0_i32 = arith.constant 0 : i32
+  %0 = fir.load %arg0 : !fir.ref<!fir.box<!fir.array<3xi32>>>
+  %1 = fir.address_of(@_QMmodEa) : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+  %c3 = arith.constant 3 : index
+  %2 = fir.shape %c3 : (index) -> !fir.shape<1>
+  %3 = fir.allocmem !fir.array<3xi32> {bindc_name = ".tmp", uniq_name = ""}
+  %true = arith.constant true
+  %4:2 = hlfir.declare %3(%2) {uniq_name = ".tmp"} : (!fir.heap<!fir.array<3xi32>>, !fir.shape<1>) -> (!fir.heap<!fir.array<3xi32>>, !fir.heap<!fir.array<3xi32>>)
+  %c0 = arith.constant 0 : index
+  %5:3 = fir.box_dims %0, %c0 : (!fir.box<!fir.array<3xi32>>, index) -> (index, index, index)
+  %6 = fir.shape_shift %5#0, %5#1 : (index, index) -> !fir.shapeshift<1>
+  %7 = fir.embox %4#0(%6) : (!fir.heap<!fir.array<3xi32>>, !fir.shapeshift<1>) -> !fir.box<!fir.array<3xi32>>
+  hlfir.assign %c0_i32 to %7 : i32, !fir.box<!fir.array<3xi32>>
+  %8 = fir.convert %7 : (!fir.box<!fir.array<3xi32>>) -> !fir.box<!fir.heap<!fir.array<?xi32>>>
+  fir.store %8 to %1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+  %9 = fir.convert %1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<!fir.array<3xi32>>>
+  omp.yield(%9 : !fir.ref<!fir.box<!fir.array<3xi32>>>)
+} combiner {
+^bb0(%arg0: !fir.ref<!fir.box<!fir.array<3xi32>>>, %arg1: !fir.ref<!fir.box<!fir.array<3xi32>>>):
+  %0 = fir.load %arg0 : !fir.ref<!fir.box<!fir.array<3xi32>>>
+  %1 = fir.address_of(@_QMmodEa) : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+  %a = fir.load %1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+  %c0 = arith.constant 0 : index
+  %2:3 = fir.box_dims %0, %c0 : (!fir.box<!fir.array<3xi32>>, index) -> (index, index, index)
+  %3 = fir.shape_shift %2#0, %2#1 : (index, index) -> !fir.shapeshift<1>
+  %c1 = arith.constant 1 : index
+  fir.do_loop %arg2 = %c1 to %2#1 step %c1 unordered {
+    %4 = fir.array_coor %0(%3) %arg2 : (!fir.box<!fir.array<3xi32>>, !fir.shapeshift<1>, index) -> !fir.ref<i32>
+    %5 = fir.array_coor %a(%3) %arg2 : (!fir.box<!fir.heap<!fir.array<?xi32>>>, !fir.shapeshift<1>, index) -> !fir.ref<i32>
+    %6 = fir.load %4 : !fir.ref<i32>
+    %7 = fir.load %5 : !fir.ref<i32>
+// CHECK:      fir.load %{{.*}} {tbaa = [#[[GLOBAL_A_TAG]]]} : !fir.ref<i32>
+    %8 = arith.addi %6, %7 : i32
+    fir.store %8 to %4 : !fir.ref<i32>
+  }
+  omp.yield(%arg0 : !fir.ref<!fir.box<!fir.array<3xi32>>>)
+}  cleanup {
+^bb0(%arg0: !fir.ref<!fir.box<!fir.array<3xi32>>>):
+  %0 = fir.load %arg0 : !fir.ref<!fir.box<!fir.array<3xi32>>>
+  %1 = fir.box_addr %0 : (!fir.box<!fir.array<3xi32>>) -> !fir.ref<!fir.array<3xi32>>
+  %2 = fir.convert %1 : (!fir.ref<!fir.array<3xi32>>) -> i64
+  %c0_i64 = arith.constant 0 : i64
+  %3 = arith.cmpi ne, %2, %c0_i64 : i64
+  fir.if %3 {
+    %4 = fir.convert %1 : (!fir.ref<!fir.array<3xi32>>) -> !fir.heap<!fir.array<3xi32>>
+    fir.freemem %4 : !fir.heap<!fir.array<3xi32>>
+  }
+  omp.yield
+}



More information about the flang-commits mailing list