[Mlir-commits] [mlir] fbc06b2 - Revert "[MLIR] Parallelize affine.for op to 1-D affine.parallel op"
Mehdi Amini
llvmlistbot at llvm.org
Sat Jul 4 13:56:34 PDT 2020
Author: Mehdi Amini
Date: 2020-07-04T20:55:47Z
New Revision: fbc06b228012f1f7939c2acae9d8435ffc9aeb2c
URL: https://github.com/llvm/llvm-project/commit/fbc06b228012f1f7939c2acae9d8435ffc9aeb2c
DIFF: https://github.com/llvm/llvm-project/commit/fbc06b228012f1f7939c2acae9d8435ffc9aeb2c.diff
LOG: Revert "[MLIR] Parallelize affine.for op to 1-D affine.parallel op"
This reverts commit 5f2843857feee6fbf755c12c21698a4987eda5d1.
This broke the build when -DDBUILD_SHARED_LIBS=ON is used.
Added:
mlir/test/Dialect/Affine/parallelism-detection.mlir
mlir/test/lib/Dialect/Affine/TestParallelismDetection.cpp
Modified:
mlir/include/mlir/Dialect/Affine/Passes.h
mlir/include/mlir/Dialect/Affine/Passes.td
mlir/include/mlir/Dialect/Affine/Utils.h
mlir/lib/Dialect/Affine/Transforms/CMakeLists.txt
mlir/lib/Dialect/Affine/Utils/Utils.cpp
mlir/test/lib/Dialect/Affine/CMakeLists.txt
mlir/tools/mlir-opt/mlir-opt.cpp
Removed:
mlir/lib/Dialect/Affine/Transforms/AffineParallelize.cpp
mlir/test/Dialect/Affine/parallelize.mlir
################################################################################
diff --git a/mlir/include/mlir/Dialect/Affine/Passes.h b/mlir/include/mlir/Dialect/Affine/Passes.h
index 18b3b790338d..0d7c3be240c9 100644
--- a/mlir/include/mlir/Dialect/Affine/Passes.h
+++ b/mlir/include/mlir/Dialect/Affine/Passes.h
@@ -36,10 +36,6 @@ std::unique_ptr<OperationPass<FuncOp>> createSimplifyAffineStructuresPass();
std::unique_ptr<OperationPass<FuncOp>>
createAffineLoopInvariantCodeMotionPass();
-/// Creates a pass to convert all parallel affine.for's into 1-d affine.parallel
-/// ops.
-std::unique_ptr<OperationPass<FuncOp>> createAffineParallelizePass();
-
/// Performs packing (or explicit copying) of accessed memref regions into
/// buffers in the specified faster memory space through either pointwise copies
/// or DMA operations.
diff --git a/mlir/include/mlir/Dialect/Affine/Passes.td b/mlir/include/mlir/Dialect/Affine/Passes.td
index 810640058155..06e0920413a9 100644
--- a/mlir/include/mlir/Dialect/Affine/Passes.td
+++ b/mlir/include/mlir/Dialect/Affine/Passes.td
@@ -112,11 +112,6 @@ def AffineVectorize : FunctionPass<"affine-super-vectorize"> {
];
}
-def AffineParallelize : FunctionPass<"affine-parallelize"> {
- let summary = "Convert affine.for ops into 1-D affine.parallel";
- let constructor = "mlir::createAffineParallelizePass()";
-}
-
def SimplifyAffineStructures : FunctionPass<"simplify-affine-structures"> {
let summary = "Simplify affine expressions in maps/sets and normalize "
"memrefs";
diff --git a/mlir/include/mlir/Dialect/Affine/Utils.h b/mlir/include/mlir/Dialect/Affine/Utils.h
index 19df93f760f5..a2c0211b301e 100644
--- a/mlir/include/mlir/Dialect/Affine/Utils.h
+++ b/mlir/include/mlir/Dialect/Affine/Utils.h
@@ -15,16 +15,9 @@
namespace mlir {
-class AffineForOp;
class AffineIfOp;
-class AffineParallelOp;
struct LogicalResult;
-/// Replaces parallel affine.for op with 1-d affine.parallel op.
-/// mlir::isLoopParallel detect the parallel affine.for ops.
-/// There is no cost model currently used to drive this parallelization.
-void affineParallelize(AffineForOp forOp);
-
/// Hoists out affine.if/else to as high as possible, i.e., past all invariant
/// affine.fors/parallel's. Returns success if any hoisting happened; folded` is
/// set to true if the op was folded or erased. This hoisting could lead to
diff --git a/mlir/lib/Dialect/Affine/Transforms/AffineParallelize.cpp b/mlir/lib/Dialect/Affine/Transforms/AffineParallelize.cpp
deleted file mode 100644
index b3651e202245..000000000000
--- a/mlir/lib/Dialect/Affine/Transforms/AffineParallelize.cpp
+++ /dev/null
@@ -1,50 +0,0 @@
-//===- AffineParallelize.cpp - Affineparallelize Pass---------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements a parallelizer for affine loop nests that is able to
-// perform inner or outer loop parallelization.
-//
-//===----------------------------------------------------------------------===//
-
-#include "PassDetail.h"
-#include "mlir/Analysis/AffineStructures.h"
-#include "mlir/Analysis/LoopAnalysis.h"
-#include "mlir/Analysis/Utils.h"
-#include "mlir/Dialect/Affine/IR/AffineOps.h"
-#include "mlir/Dialect/Affine/IR/AffineValueMap.h"
-#include "mlir/Dialect/Affine/Passes.h"
-#include "mlir/Dialect/Affine/Passes.h.inc"
-#include "mlir/Dialect/Affine/Utils.h"
-#include "mlir/Transforms/LoopUtils.h"
-#include "llvm/Support/Debug.h"
-
-#define DEBUG_TYPE "affine-parallel"
-
-using namespace mlir;
-
-namespace {
-/// Convert all parallel affine.for op into 1-D affine.parallel op.
-struct AffineParallelize : public AffineParallelizeBase<AffineParallelize> {
- void runOnFunction() override;
-};
-} // namespace
-
-void AffineParallelize::runOnFunction() {
- FuncOp f = getFunction();
- SmallVector<AffineForOp, 8> parallelizableLoops;
- f.walk([&](AffineForOp loop) {
- if (isLoopParallel(loop))
- parallelizableLoops.push_back(loop);
- });
- for (AffineForOp loop : parallelizableLoops)
- affineParallelize(loop);
-}
-
-std::unique_ptr<OperationPass<FuncOp>> mlir::createAffineParallelizePass() {
- return std::make_unique<AffineParallelize>();
-}
diff --git a/mlir/lib/Dialect/Affine/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Affine/Transforms/CMakeLists.txt
index 369874830c56..0098c3e21091 100644
--- a/mlir/lib/Dialect/Affine/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/Affine/Transforms/CMakeLists.txt
@@ -1,7 +1,6 @@
add_mlir_dialect_library(MLIRAffineTransforms
AffineDataCopyGeneration.cpp
AffineLoopInvariantCodeMotion.cpp
- AffineParallelize.cpp
LoopTiling.cpp
LoopUnroll.cpp
LoopUnrollAndJam.cpp
diff --git a/mlir/lib/Dialect/Affine/Utils/Utils.cpp b/mlir/lib/Dialect/Affine/Utils/Utils.cpp
index f1a9a0ce43b3..811579bb6c8c 100644
--- a/mlir/lib/Dialect/Affine/Utils/Utils.cpp
+++ b/mlir/lib/Dialect/Affine/Utils/Utils.cpp
@@ -129,20 +129,6 @@ static AffineIfOp hoistAffineIfOp(AffineIfOp ifOp, Operation *hoistOverOp) {
return hoistedIfOp;
}
-/// Replace affine.for with a 1-d affine.parallel by moving the former's body
-/// into the latter one.
-void mlir::affineParallelize(AffineForOp forOp) {
- Location loc = forOp.getLoc();
- OpBuilder outsideBuilder(forOp);
- // Create empty 1-D affine.parallel op.
- AffineParallelOp newPloop = outsideBuilder.create<AffineParallelOp>(
- loc, forOp.getLowerBoundMap(), forOp.getLowerBoundOperands(),
- forOp.getUpperBoundMap(), forOp.getUpperBoundOperands());
- // Steal the body of the old affine for op and erase it.
- newPloop.region().takeBody(forOp.region());
- forOp.erase();
-}
-
// Returns success if any hoisting happened.
LogicalResult mlir::hoistAffineIfOp(AffineIfOp ifOp, bool *folded) {
// Apply canonicalization patterns and folding - this is necessary for the
diff --git a/mlir/test/Dialect/Affine/parallelism-detection.mlir b/mlir/test/Dialect/Affine/parallelism-detection.mlir
new file mode 100644
index 000000000000..0788e6f8fb20
--- /dev/null
+++ b/mlir/test/Dialect/Affine/parallelism-detection.mlir
@@ -0,0 +1,47 @@
+// RUN: mlir-opt -allow-unregistered-dialect %s -test-detect-parallel -split-input-file -verify-diagnostics | FileCheck %s
+
+// CHECK-LABEL: func @loop_nest_3d_outer_two_parallel
+func @loop_nest_3d_outer_two_parallel(%N : index) {
+ %0 = alloc() : memref<1024 x 1024 x vector<64xf32>>
+ %1 = alloc() : memref<1024 x 1024 x vector<64xf32>>
+ %2 = alloc() : memref<1024 x 1024 x vector<64xf32>>
+ affine.for %i = 0 to %N {
+ // expected-remark at -1 {{parallel loop}}
+ affine.for %j = 0 to %N {
+ // expected-remark at -1 {{parallel loop}}
+ affine.for %k = 0 to %N {
+ // expected-remark at -1 {{sequential loop}}
+ %5 = affine.load %0[%i, %k] : memref<1024x1024xvector<64xf32>>
+ %6 = affine.load %1[%k, %j] : memref<1024x1024xvector<64xf32>>
+ %7 = affine.load %2[%i, %j] : memref<1024x1024xvector<64xf32>>
+ %8 = mulf %5, %6 : vector<64xf32>
+ %9 = addf %7, %8 : vector<64xf32>
+ affine.store %9, %2[%i, %j] : memref<1024x1024xvector<64xf32>>
+ }
+ }
+ }
+ return
+}
+
+// -----
+
+// CHECK-LABEL: unknown_op_conservative
+func @unknown_op_conservative() {
+ affine.for %i = 0 to 10 {
+ // expected-remark at -1 {{sequential loop}}
+ "unknown"() : () -> ()
+ }
+ return
+}
+
+// -----
+
+// CHECK-LABEL: non_affine_load
+func @non_affine_load() {
+ %0 = alloc() : memref<100 x f32>
+ affine.for %i = 0 to 100 {
+ // expected-remark at -1 {{sequential loop}}
+ load %0[%i] : memref<100 x f32>
+ }
+ return
+}
diff --git a/mlir/test/Dialect/Affine/parallelize.mlir b/mlir/test/Dialect/Affine/parallelize.mlir
deleted file mode 100644
index 5287628185c5..000000000000
--- a/mlir/test/Dialect/Affine/parallelize.mlir
+++ /dev/null
@@ -1,118 +0,0 @@
-// RUN: mlir-opt %s -allow-unregistered-dialect -affine-parallelize| FileCheck %s
-
-// For multiple nested for-loops.
-// CHECK-DAG: [[MAP5:#map[0-9]+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0 + d1, d2 * 2 + d3, d4 * 2 + d5, d6 + d7)>
-// CHECK-LABEL: func @reduce_window_max() {
-func @reduce_window_max() {
- %cst = constant 0.000000e+00 : f32
- %0 = alloc() : memref<1x8x8x64xf32>
- %1 = alloc() : memref<1x18x18x64xf32>
- affine.for %arg0 = 0 to 1 {
- affine.for %arg1 = 0 to 8 {
- affine.for %arg2 = 0 to 8 {
- affine.for %arg3 = 0 to 64 {
- affine.store %cst, %0[%arg0, %arg1, %arg2, %arg3] : memref<1x8x8x64xf32>
- }
- }
- }
- }
- affine.for %arg0 = 0 to 1 {
- affine.for %arg1 = 0 to 8 {
- affine.for %arg2 = 0 to 8 {
- affine.for %arg3 = 0 to 64 {
- affine.for %arg4 = 0 to 1 {
- affine.for %arg5 = 0 to 3 {
- affine.for %arg6 = 0 to 3 {
- affine.for %arg7 = 0 to 1 {
- %2 = affine.load %0[%arg0, %arg1, %arg2, %arg3] : memref<1x8x8x64xf32>
- %3 = affine.load %1[%arg0 + %arg4, %arg1 * 2 + %arg5, %arg2 * 2 + %arg6, %arg3 + %arg7] : memref<1x18x18x64xf32>
- %4 = cmpf "ogt", %2, %3 : f32
- %5 = select %4, %2, %3 : f32
- affine.store %5, %0[%arg0, %arg1, %arg2, %arg3] : memref<1x8x8x64xf32>
- }
- }
- }
- }
- }
- }
- }
- }
- return
-}
-
-// CHECK: %[[cst:.*]] = constant 0.000000e+00 : f32
-// CHECK: %[[v0:.*]] = alloc() : memref<1x8x8x64xf32>
-// CHECK: %[[v1:.*]] = alloc() : memref<1x18x18x64xf32>
-// CHECK: affine.parallel (%[[arg0:.*]]) = (0) to (1) {
-// CHECK: affine.parallel (%[[arg1:.*]]) = (0) to (8) {
-// CHECK: affine.parallel (%[[arg2:.*]]) = (0) to (8) {
-// CHECK: affine.parallel (%[[arg3:.*]]) = (0) to (64) {
-// CHECK: affine.store %[[cst]], %[[v0]][%[[arg0]], %[[arg1]], %[[arg2]], %[[arg3]]] : memref<1x8x8x64xf32>
-// CHECK: }
-// CHECK: }
-// CHECK: }
-// CHECK: }
-// CHECK: affine.parallel (%[[a0:.*]]) = (0) to (1) {
-// CHECK: affine.parallel (%[[a1:.*]]) = (0) to (8) {
-// CHECK: affine.parallel (%[[a2:.*]]) = (0) to (8) {
-// CHECK: affine.parallel (%[[a3:.*]]) = (0) to (64) {
-// CHECK: affine.parallel (%[[a4:.*]]) = (0) to (1) {
-// CHECK: affine.for %[[a5:.*]] = 0 to 3 {
-// CHECK: affine.for %[[a6:.*]] = 0 to 3 {
-// CHECK: affine.parallel (%[[a7:.*]]) = (0) to (1) {
-// CHECK: %[[lhs:.*]] = affine.load %[[v0]][%[[a0]], %[[a1]], %[[a2]], %[[a3]]] : memref<1x8x8x64xf32>
-// CHECK: %[[rhs:.*]] = affine.load %[[v1]][%[[a0]] + %[[a4]], %[[a1]] * 2 + %[[a5]], %[[a2]] * 2 + %[[a6]], %[[a3]] + %[[a7]]] : memref<1x18x18x64xf32>
-// CHECK: %[[res:.*]] = cmpf "ogt", %[[lhs]], %[[rhs]] : f32
-// CHECK: %[[sel:.*]] = select %[[res]], %[[lhs]], %[[rhs]] : f32
-// CHECK: affine.store %[[sel]], %[[v0]][%[[a0]], %[[a1]], %[[a2]], %[[a3]]] : memref<1x8x8x64xf32>
-// CHECK: }
-// CHECK: }
-// CHECK: }
-// CHECK: }
-// CHECK: }
-// CHECK: }
-// CHECK: }
-// CHECK: }
-// CHECK: }
-
-func @loop_nest_3d_outer_two_parallel(%N : index) {
- %0 = alloc() : memref<1024 x 1024 x vector<64xf32>>
- %1 = alloc() : memref<1024 x 1024 x vector<64xf32>>
- %2 = alloc() : memref<1024 x 1024 x vector<64xf32>>
- affine.for %i = 0 to %N {
- affine.for %j = 0 to %N {
- %7 = affine.load %2[%i, %j] : memref<1024x1024xvector<64xf32>>
- affine.for %k = 0 to %N {
- %5 = affine.load %0[%i, %k] : memref<1024x1024xvector<64xf32>>
- %6 = affine.load %1[%k, %j] : memref<1024x1024xvector<64xf32>>
- %8 = mulf %5, %6 : vector<64xf32>
- %9 = addf %7, %8 : vector<64xf32>
- affine.store %9, %2[%i, %j] : memref<1024x1024xvector<64xf32>>
- }
- }
- }
- return
-}
-
-// CHECK: affine.parallel (%[[arg1:.*]]) = (0) to (symbol(%arg0)) {
-// CHECK-NEXT: affine.parallel (%[[arg2:.*]]) = (0) to (symbol(%arg0)) {
-// CHECK: affine.for %[[arg3:.*]] = 0 to %arg0 {
-
-// CHECK-LABEL: unknown_op_conservative
-func @unknown_op_conservative() {
- affine.for %i = 0 to 10 {
-// CHECK: affine.for %[[arg1:.*]] = 0 to 10 {
- "unknown"() : () -> ()
- }
- return
-}
-
-// CHECK-LABEL: non_affine_load
-func @non_affine_load() {
- %0 = alloc() : memref<100 x f32>
- affine.for %i = 0 to 100 {
-// CHECK: affine.for %{{.*}} = 0 to 100 {
- load %0[%i] : memref<100 x f32>
- }
- return
-}
diff --git a/mlir/test/lib/Dialect/Affine/CMakeLists.txt b/mlir/test/lib/Dialect/Affine/CMakeLists.txt
index 3d08fed788e2..68a0b06e0e31 100644
--- a/mlir/test/lib/Dialect/Affine/CMakeLists.txt
+++ b/mlir/test/lib/Dialect/Affine/CMakeLists.txt
@@ -3,6 +3,7 @@ add_mlir_library(MLIRAffineTransformsTestPasses
TestAffineDataCopy.cpp
TestAffineLoopUnswitching.cpp
TestLoopPermutation.cpp
+ TestParallelismDetection.cpp
TestVectorizationUtils.cpp
EXCLUDE_FROM_LIBMLIR
diff --git a/mlir/test/lib/Dialect/Affine/TestParallelismDetection.cpp b/mlir/test/lib/Dialect/Affine/TestParallelismDetection.cpp
new file mode 100644
index 000000000000..b19e26031693
--- /dev/null
+++ b/mlir/test/lib/Dialect/Affine/TestParallelismDetection.cpp
@@ -0,0 +1,47 @@
+//===- ParallelismDetection.cpp - Parallelism Detection pass ------------*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a pass to detect parallel affine 'affine.for' ops.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Analysis/Utils.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/IR/Builders.h"
+#include "mlir/Pass/Pass.h"
+
+using namespace mlir;
+
+namespace {
+
+struct TestParallelismDetection
+ : public PassWrapper<TestParallelismDetection, FunctionPass> {
+ void runOnFunction() override;
+};
+
+} // end anonymous namespace
+
+// Walks the function and emits a note for all 'affine.for' ops detected as
+// parallel.
+void TestParallelismDetection::runOnFunction() {
+ FuncOp f = getFunction();
+ OpBuilder b(f.getBody());
+ f.walk([&](AffineForOp forOp) {
+ if (isLoopParallel(forOp))
+ forOp.emitRemark("parallel loop");
+ else
+ forOp.emitRemark("sequential loop");
+ });
+}
+
+namespace mlir {
+void registerTestParallelismDetection() {
+ PassRegistration<TestParallelismDetection> pass(
+ "test-detect-parallel", "Test parallelism detection ");
+}
+} // namespace mlir
diff --git a/mlir/tools/mlir-opt/mlir-opt.cpp b/mlir/tools/mlir-opt/mlir-opt.cpp
index a6da313f155b..2d753d8fd076 100644
--- a/mlir/tools/mlir-opt/mlir-opt.cpp
+++ b/mlir/tools/mlir-opt/mlir-opt.cpp
@@ -62,6 +62,7 @@ void registerTestMatchers();
void registerTestMemRefDependenceCheck();
void registerTestMemRefStrideCalculation();
void registerTestOpaqueLoc();
+void registerTestParallelismDetection();
void registerTestPreparationPassWithAllowedMemrefResults();
void registerTestGpuParallelLoopMappingPass();
void registerTestSCFUtilsPass();
@@ -136,6 +137,7 @@ void registerTestPasses() {
registerTestMemRefDependenceCheck();
registerTestMemRefStrideCalculation();
registerTestOpaqueLoc();
+ registerTestParallelismDetection();
registerTestPreparationPassWithAllowedMemrefResults();
registerTestGpuParallelLoopMappingPass();
registerTestSCFUtilsPass();
More information about the Mlir-commits
mailing list