[Mlir-commits] [mlir] 51d43bb - [MLIR] Fix affine parallelize pass.
Uday Bondhugula
llvmlistbot at llvm.org
Wed Jun 16 12:56:14 PDT 2021
Author: Prashant Kumar
Date: 2021-06-17T01:25:24+05:30
New Revision: 51d43bbc4662202d7f694c43b968fb289a56a355
URL: https://github.com/llvm/llvm-project/commit/51d43bbc4662202d7f694c43b968fb289a56a355
DIFF: https://github.com/llvm/llvm-project/commit/51d43bbc4662202d7f694c43b968fb289a56a355.diff
LOG: [MLIR] Fix affine parallelize pass.
To control the number of outer parallel loops, we need to process the
outer loops first and hence pre-order walk fixes the issue.
Reviewed By: bondhugula
Differential Revision: https://reviews.llvm.org/D104361
Added:
Modified:
mlir/lib/Dialect/Affine/Transforms/AffineParallelize.cpp
mlir/test/Dialect/Affine/parallelize.mlir
Removed:
################################################################################
diff --git a/mlir/lib/Dialect/Affine/Transforms/AffineParallelize.cpp b/mlir/lib/Dialect/Affine/Transforms/AffineParallelize.cpp
index 62519908a248f..237094d400062 100644
--- a/mlir/lib/Dialect/Affine/Transforms/AffineParallelize.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/AffineParallelize.cpp
@@ -50,14 +50,13 @@ struct ParallelizationCandidate {
void AffineParallelize::runOnFunction() {
FuncOp f = getFunction();
- // The walker proceeds in post-order, but we need to process outer loops first
- // to control the number of outer parallel loops, so push candidate loops to
- // the front of a deque.
- std::deque<ParallelizationCandidate> parallelizableLoops;
- f.walk([&](AffineForOp loop) {
+ // The walker proceeds in pre-order to process the outer loops first
+ // and control the number of outer parallel loops.
+ std::vector<ParallelizationCandidate> parallelizableLoops;
+ f.walk<WalkOrder::PreOrder>([&](AffineForOp loop) {
SmallVector<LoopReduction> reductions;
if (isLoopParallel(loop, parallelReductions ? &reductions : nullptr))
- parallelizableLoops.emplace_back(loop, std::move(reductions));
+ parallelizableLoops.push_back({loop, std::move(reductions)});
});
for (const ParallelizationCandidate &candidate : parallelizableLoops) {
diff --git a/mlir/test/Dialect/Affine/parallelize.mlir b/mlir/test/Dialect/Affine/parallelize.mlir
index bb98e654a80d7..dc5c435c1e9c0 100644
--- a/mlir/test/Dialect/Affine/parallelize.mlir
+++ b/mlir/test/Dialect/Affine/parallelize.mlir
@@ -155,6 +155,27 @@ func @max_nested(%m: memref<?x?xf32>, %lb0: index, %lb1: index,
return
}
+// MAX-NESTED-LABEL: @max_nested_1
+func @max_nested_1(%arg0: memref<4096x4096xf32>, %arg1: memref<4096x4096xf32>, %arg2: memref<4096x4096xf32>) {
+ %0 = memref.alloc() : memref<4096x4096xf32>
+ // MAX-NESTED: affine.parallel
+ affine.for %arg3 = 0 to 4096 {
+ // MAX-NESTED-NEXT: affine.for
+ affine.for %arg4 = 0 to 4096 {
+ // MAX-NESTED-NEXT: affine.for
+ affine.for %arg5 = 0 to 4096 {
+ %1 = affine.load %arg0[%arg3, %arg5] : memref<4096x4096xf32>
+ %2 = affine.load %arg1[%arg5, %arg4] : memref<4096x4096xf32>
+ %3 = affine.load %0[%arg3, %arg4] : memref<4096x4096xf32>
+ %4 = mulf %1, %2 : f32
+ %5 = addf %3, %4 : f32
+ affine.store %5, %0[%arg3, %arg4] : memref<4096x4096xf32>
+ }
+ }
+ }
+ return
+}
+
// CHECK-LABEL: @iter_args
// REDUCE-LABEL: @iter_args
func @iter_args(%in: memref<10xf32>) {
More information about the Mlir-commits
mailing list