[Mlir-commits] [mlir] 51d43bb - [MLIR] Fix affine parallelize pass.

Wed Jun 16 12:56:14 PDT 2021

Author: Prashant Kumar
Date: 2021-06-17T01:25:24+05:30
New Revision: 51d43bbc4662202d7f694c43b968fb289a56a355

URL: https://github.com/llvm/llvm-project/commit/51d43bbc4662202d7f694c43b968fb289a56a355
DIFF: https://github.com/llvm/llvm-project/commit/51d43bbc4662202d7f694c43b968fb289a56a355.diff

LOG: [MLIR] Fix affine parallelize pass.

To control the number of outer parallel loops, we need to process the
 outer loops first and hence pre-order walk fixes the issue.

Reviewed By: bondhugula

Differential Revision: https://reviews.llvm.org/D104361

Added: 
    

Modified: 
    mlir/lib/Dialect/Affine/Transforms/AffineParallelize.cpp
    mlir/test/Dialect/Affine/parallelize.mlir

Removed: 
    


################################################################################
diff  --git a/mlir/lib/Dialect/Affine/Transforms/AffineParallelize.cpp b/mlir/lib/Dialect/Affine/Transforms/AffineParallelize.cpp
index 62519908a248f..237094d400062 100644

--- a/mlir/lib/Dialect/Affine/Transforms/AffineParallelize.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/AffineParallelize.cpp
@@ -50,14 +50,13 @@ struct ParallelizationCandidate {
 void AffineParallelize::runOnFunction() {
   FuncOp f = getFunction();
 
-  // The walker proceeds in post-order, but we need to process outer loops first
-  // to control the number of outer parallel loops, so push candidate loops to
-  // the front of a deque.
-  std::deque<ParallelizationCandidate> parallelizableLoops;
-  f.walk([&](AffineForOp loop) {
+  // The walker proceeds in pre-order to process the outer loops first
+  // and control the number of outer parallel loops.
+  std::vector<ParallelizationCandidate> parallelizableLoops;
+  f.walk<WalkOrder::PreOrder>([&](AffineForOp loop) {
     SmallVector<LoopReduction> reductions;
     if (isLoopParallel(loop, parallelReductions ? &reductions : nullptr))
-      parallelizableLoops.emplace_back(loop, std::move(reductions));
+      parallelizableLoops.push_back({loop, std::move(reductions)});
   });
 
   for (const ParallelizationCandidate &candidate : parallelizableLoops) {

diff  --git a/mlir/test/Dialect/Affine/parallelize.mlir b/mlir/test/Dialect/Affine/parallelize.mlir
index bb98e654a80d7..dc5c435c1e9c0 100644
--- a/mlir/test/Dialect/Affine/parallelize.mlir
+++ b/mlir/test/Dialect/Affine/parallelize.mlir
@@ -155,6 +155,27 @@ func @max_nested(%m: memref<?x?xf32>, %lb0: index, %lb1: index,
   return
 }
 
+// MAX-NESTED-LABEL: @max_nested_1
+func @max_nested_1(%arg0: memref<4096x4096xf32>, %arg1: memref<4096x4096xf32>, %arg2: memref<4096x4096xf32>) {
+  %0 = memref.alloc() : memref<4096x4096xf32>
+  // MAX-NESTED: affine.parallel
+  affine.for %arg3 = 0 to 4096 {
+    // MAX-NESTED-NEXT: affine.for
+    affine.for %arg4 = 0 to 4096 {
+      // MAX-NESTED-NEXT: affine.for
+      affine.for %arg5 = 0 to 4096 {
+        %1 = affine.load %arg0[%arg3, %arg5] : memref<4096x4096xf32>
+        %2 = affine.load %arg1[%arg5, %arg4] : memref<4096x4096xf32>
+        %3 = affine.load %0[%arg3, %arg4] : memref<4096x4096xf32>
+        %4 = mulf %1, %2 : f32
+        %5 = addf %3, %4 : f32
+        affine.store %5, %0[%arg3, %arg4] : memref<4096x4096xf32>
+      }
+    }
+  }
+  return
+}
+
 // CHECK-LABEL: @iter_args
 // REDUCE-LABEL: @iter_args
 func @iter_args(%in: memref<10xf32>) {