[Mlir-commits] [mlir] 6981f7e - [mlir] account for explicit affine.parallel in parallelization (#130812)

Tue Mar 11 18:53:54 PDT 2025

Author: Oleksandr "Alex" Zinenko
Date: 2025-03-11T20:53:50-05:00
New Revision: 6981f7e92a051fcc7cd2688bbb0230275b7d2360

URL: https://github.com/llvm/llvm-project/commit/6981f7e92a051fcc7cd2688bbb0230275b7d2360
DIFF: https://github.com/llvm/llvm-project/commit/6981f7e92a051fcc7cd2688bbb0230275b7d2360.diff

LOG: [mlir] account for explicit affine.parallel in parallelization (#130812)

Affine parallelization should take explicitly parallel loops into
account when computing loop depth for dependency analysis purposes. This
was previously not the case, potentially leading to loops incorrectly
being marked as parallel due to depth mismatch.

Added: 
    

Modified: 
    mlir/lib/Dialect/Affine/Analysis/Utils.cpp
    mlir/test/Dialect/Affine/parallelize.mlir

Removed: 
    


################################################################################
diff  --git a/mlir/lib/Dialect/Affine/Analysis/Utils.cpp b/mlir/lib/Dialect/Affine/Analysis/Utils.cpp
index cf9eaa9e7d66d..86aba7b187535 100644

--- a/mlir/lib/Dialect/Affine/Analysis/Utils.cpp
+++ b/mlir/lib/Dialect/Affine/Analysis/Utils.cpp
@@ -1988,6 +1988,8 @@ unsigned mlir::affine::getNestingDepth(Operation *op) {
   while ((currOp = currOp->getParentOp())) {
     if (isa<AffineForOp>(currOp))
       depth++;
+    if (auto parOp = dyn_cast<AffineParallelOp>(currOp))
+      depth += parOp.getNumDims();
   }
   return depth;
 }

diff  --git a/mlir/test/Dialect/Affine/parallelize.mlir b/mlir/test/Dialect/Affine/parallelize.mlir
index b3bb20929c334..bfd1720959861 100644
--- a/mlir/test/Dialect/Affine/parallelize.mlir
+++ b/mlir/test/Dialect/Affine/parallelize.mlir
@@ -341,3 +341,23 @@ func.func @test_add_inv_or_terminal_symbol(%arg0: memref<9x9xi32>, %arg1: i1) {
   }
   return
 }
+
+// Ensure that outer parallel loops are taken into account when computing the
+// loop depth in dependency analysis during parallelization. With correct
+// depth, the analysis should see the inner loop as sequential due to reads and
+// writes to the same address indexed by the outer (parallel) loop.
+//
+// CHECK-LABEL: @explicit_parallel
+func.func @explicit_parallel(%arg0: memref<1x123x194xf64>, %arg5: memref<34x99x194xf64>) {
+  // CHECK: affine.parallel
+  affine.parallel (%arg7, %arg8) = (0, 0) to (85, 180) {
+    // CHECK: affine.for
+    affine.for %arg9 = 0 to 18 {
+      %0 = affine.load %arg0[0, %arg7 + 19, %arg8 + 7] : memref<1x123x194xf64>
+      %1 = affine.load %arg5[%arg9 + 8, %arg7 + 7, %arg8 + 7] : memref<34x99x194xf64>
+      %2 = arith.addf %0, %1 {fastmathFlags = #llvm.fastmath<none>} : f64
+      affine.store %1, %arg0[0, %arg7 + 19, %arg8 + 7] : memref<1x123x194xf64>
+    }
+  }
+  return
+}