[Mlir-commits] [mlir] [MLIR][Affine] Check dependences during MDG init (PR #156422)

Tue Sep 2 01:24:55 PDT 2025

https://github.com/bondhugula created https://github.com/llvm/llvm-project/pull/156422

Check affine dependences precisely during MDG init before adding edges. We were conservatively only checking for memref-level conflicts.

>From 65ceab0756ba85c77f1c97184a87982d50e42d4e Mon Sep 17 00:00:00 2001
From: Uday Bondhugula <uday at polymagelabs.com>
Date: Thu, 21 Aug 2025 17:53:11 +0530
Subject: [PATCH] [MLIR][Affine] Check dependences during MDG init

Check affine dependences precisely during MDG init before adding edges.
We were conservatively only checking for memref-level conflicts.
---
 .../mlir/Dialect/Affine/Analysis/Utils.h      | 14 ++++-
 mlir/lib/Dialect/Affine/Analysis/Utils.cpp    | 62 +++++++++++++++++--
 .../Dialect/Affine/loop-fusion-sibling.mlir   | 23 +++++++
 3 files changed, 91 insertions(+), 8 deletions(-)
 create mode 100644 mlir/test/Dialect/Affine/loop-fusion-sibling.mlir

diff --git a/mlir/include/mlir/Dialect/Affine/Analysis/Utils.h b/mlir/include/mlir/Dialect/Affine/Analysis/Utils.h
index 0dd8de4f70039..17ba09ff81c3d 100644
--- a/mlir/include/mlir/Dialect/Affine/Analysis/Utils.h
+++ b/mlir/include/mlir/Dialect/Affine/Analysis/Utils.h
@@ -153,9 +153,17 @@ struct MemRefDependenceGraph {
 
   MemRefDependenceGraph(Block &block) : block(block) {}
 
-  // Initializes the dependence graph based on operations in `block'.
-  // Returns true on success, false otherwise.
-  bool init();
+  // Initializes the data dependence graph by walking operations in the MDG's
+  // `block`. A `Node` is created for every top-level op except for
+  // side-effect-free operations with zero results and no regions. Assigns each
+  // node in the graph a node id based on the order in block. Fails if certain
+  // kinds of operations, for which `Node` creation isn't supported, are
+  // encountered (unknown region holding ops). If `fullAffineDependences` is
+  // set, affine memory dependence analysis is performed before concluding that
+  // conflicting affine memory accesses lead to a dependence check; otherwise, a
+  // pair of conflicting affine memory accesses (where one of them is a store
+  // and they are to the same memref) always leads to an edge (conservatively).
+  bool init(bool fullAffineDependences = true);
 
   // Returns the graph node for 'id'.
   const Node *getNode(unsigned id) const;
diff --git a/mlir/lib/Dialect/Affine/Analysis/Utils.cpp b/mlir/lib/Dialect/Affine/Analysis/Utils.cpp
index 4739290bf6e4b..555ba8feccde8 100644
--- a/mlir/lib/Dialect/Affine/Analysis/Utils.cpp
+++ b/mlir/lib/Dialect/Affine/Analysis/Utils.cpp
@@ -240,7 +240,55 @@ addNodeToMDG(Operation *nodeOp, MemRefDependenceGraph &mdg,
   return &node;
 }
 
-bool MemRefDependenceGraph::init() {
+/// Returns true if there may be a dependence on `memref` from srcNode's
+/// memory ops to dstNode's memory ops, while using the affine memory
+/// dependence analysis checks. The method assumes that there is at least one
+/// memory op in srcNode's loads and stores on `memref`, and similarly for
+/// `dstNode`. `srcNode.op` and `destNode.op` are expected to be nested in the
+/// same block and so the dependences are tested at the depth of that block.
+static bool mayDependence(const Node &srcNode, const Node &dstNode,
+                          Value memref) {
+  assert(srcNode.op->getBlock() == dstNode.op->getBlock());
+  if (!isa<AffineForOp>(srcNode.op) || !isa<AffineForOp>(dstNode.op))
+    return true;
+
+  // Non-affine stores, can't check. Conservatively, return true.
+  if (!srcNode.memrefStores.empty())
+    return true;
+  if (!dstNode.memrefStores.empty())
+    return true;
+
+  // Non-affine loads with a store in the other.
+  if (!srcNode.memrefLoads.empty() && !dstNode.stores.empty())
+    return true;
+  if (!dstNode.memrefLoads.empty() && !srcNode.stores.empty())
+    return true;
+
+  // Affine load/store pairs. We don't need to check for locally allocated
+  // memrefs since the dependence analysis here is between mem ops from
+  // srcNode's for op to dstNode's for op at the depth at which those
+  // `affine.for` ops are nested, i.e., dependences at depth `d + 1` where
+  // `d` is the number of common surrounding loops.
+  for (auto *srcMemOp :
+       llvm::concat<Operation *const>(srcNode.stores, srcNode.loads)) {
+    MemRefAccess srcAcc(srcMemOp);
+    if (srcAcc.memref != memref)
+      continue;
+    for (auto *destMemOp :
+         llvm::concat<Operation *const>(dstNode.stores, dstNode.loads)) {
+      MemRefAccess destAcc(destMemOp);
+      if (destAcc.memref != memref)
+        continue;
+      // Check for a top-level dependence between srcNode and destNode's ops.
+      if (!noDependence(checkMemrefAccessDependence(
+              srcAcc, destAcc, getNestingDepth(srcNode.op) + 1)))
+        return true;
+    }
+  }
+  return false;
+}
+
+bool MemRefDependenceGraph::init(bool fullAffineDependences) {
   LLVM_DEBUG(llvm::dbgs() << "--- Initializing MDG ---\n");
   // Map from a memref to the set of ids of the nodes that have ops accessing
   // the memref.
@@ -344,8 +392,12 @@ bool MemRefDependenceGraph::init() {
         Node *dstNode = getNode(dstId);
         bool dstHasStoreOrFree =
             dstNode->hasStore(srcMemRef) || dstNode->hasFree(srcMemRef);
-        if (srcHasStoreOrFree || dstHasStoreOrFree)
-          addEdge(srcId, dstId, srcMemRef);
+        if ((srcHasStoreOrFree || dstHasStoreOrFree)) {
+          // Check precise affine deps if asked for; otherwise, conservative.
+          if (!fullAffineDependences ||
+              mayDependence(*srcNode, *dstNode, srcMemRef))
+            addEdge(srcId, dstId, srcMemRef);
+        }
       }
     }
   }
@@ -563,13 +615,13 @@ MemRefDependenceGraph::getFusedLoopNestInsertionPoint(unsigned srcId,
   }
 
   // Build set of insts in range (srcId, dstId) which depend on 'srcId'.
-  SmallPtrSet<Operation *, 2> srcDepInsts;
+  llvm::SmallPtrSet<Operation *, 2> srcDepInsts;
   for (auto &outEdge : outEdges.lookup(srcId))
     if (outEdge.id != dstId)
       srcDepInsts.insert(getNode(outEdge.id)->op);
 
   // Build set of insts in range (srcId, dstId) on which 'dstId' depends.
-  SmallPtrSet<Operation *, 2> dstDepInsts;
+  llvm::SmallPtrSet<Operation *, 2> dstDepInsts;
   for (auto &inEdge : inEdges.lookup(dstId))
     if (inEdge.id != srcId)
       dstDepInsts.insert(getNode(inEdge.id)->op);
diff --git a/mlir/test/Dialect/Affine/loop-fusion-sibling.mlir b/mlir/test/Dialect/Affine/loop-fusion-sibling.mlir
new file mode 100644
index 0000000000000..937c855b86b50
--- /dev/null
+++ b/mlir/test/Dialect/Affine/loop-fusion-sibling.mlir
@@ -0,0 +1,23 @@
+// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(affine-loop-fusion{maximal mode=sibling}))' | FileCheck %s
+
+// Test cases specifically for sibling fusion. Note that sibling fusion test
+// cases also exist in loop-fusion*.mlir.
+
+// CHECK-LABEL: func @disjoint_stores
+func.func @disjoint_stores(%0: memref<8xf32>) {
+  %alloc_1 = memref.alloc() : memref<16xf32>
+  // The affine stores below are to different parts of the memrefs. Sibling
+  // fusion helps improve reuse and is valid.
+  affine.for %arg2 = 0 to 8 {
+    %2 = affine.load %0[%arg2] : memref<8xf32>
+    affine.store %2, %alloc_1[%arg2] : memref<16xf32>
+  }
+  affine.for %arg2 = 0 to 8 {
+    %2 = affine.load %0[%arg2] : memref<8xf32>
+    %3 = arith.negf %2 : f32
+    affine.store %3, %alloc_1[%arg2 + 8] : memref<16xf32>
+  }
+  // CHECK: affine.for
+  // CHECK-NOT: affine.for
+  return
+}