[Mlir-commits] [mlir] 4bcd08e - [mlir] Add for loop specialization
Stephan Herhut
llvmlistbot at llvm.org
Mon Jun 22 01:23:16 PDT 2020
Author: Stephan Herhut
Date: 2020-06-22T10:14:17+02:00
New Revision: 4bcd08eb1c3bcd6ce580cc21b4e6d9f10286b9e6
URL: https://github.com/llvm/llvm-project/commit/4bcd08eb1c3bcd6ce580cc21b4e6d9f10286b9e6
DIFF: https://github.com/llvm/llvm-project/commit/4bcd08eb1c3bcd6ce580cc21b4e6d9f10286b9e6.diff
LOG: [mlir] Add for loop specialization
Summary:
We already had a parallel loop specialization pass that is used to
enable unrolling and consecutive vectorization by rewriting loops
whose bound is defined as a min of a constant and a dynamic value
into a loop with static bound (the constant) and the minimum as
bound, wrapped into a conditional to dispatch between the two.
This adds the same rewriting for for loops.
Differential Revision: https://reviews.llvm.org/D82189
Added:
mlir/lib/Dialect/SCF/Transforms/LoopSpecialization.cpp
mlir/test/Dialect/SCF/for-loop-specialization.mlir
Modified:
mlir/include/mlir/Dialect/SCF/Passes.h
mlir/include/mlir/Dialect/SCF/Passes.td
mlir/lib/Dialect/SCF/Transforms/CMakeLists.txt
mlir/lib/Dialect/SCF/Transforms/ParallelLoopFusion.cpp
mlir/lib/Dialect/SCF/Transforms/ParallelLoopTiling.cpp
Removed:
mlir/lib/Dialect/SCF/Transforms/ParallelLoopSpecialization.cpp
################################################################################
diff --git a/mlir/include/mlir/Dialect/SCF/Passes.h b/mlir/include/mlir/Dialect/SCF/Passes.h
index e88297f07cf2..df6037874f2b 100644
--- a/mlir/include/mlir/Dialect/SCF/Passes.h
+++ b/mlir/include/mlir/Dialect/SCF/Passes.h
@@ -20,6 +20,10 @@ namespace mlir {
class Pass;
+/// Creates a pass that specializes for loop for unrolling and
+/// vectorization.
+std::unique_ptr<Pass> createForLoopSpecializationPass();
+
/// Creates a loop fusion pass which fuses parallel loops.
std::unique_ptr<Pass> createParallelLoopFusionPass();
diff --git a/mlir/include/mlir/Dialect/SCF/Passes.td b/mlir/include/mlir/Dialect/SCF/Passes.td
index 5e6c8c1dd46f..483d0ba7c7be 100644
--- a/mlir/include/mlir/Dialect/SCF/Passes.td
+++ b/mlir/include/mlir/Dialect/SCF/Passes.td
@@ -1,4 +1,4 @@
-//===-- Passes.td - Loop pass definition file --------------*- tablegen -*-===//
+//===-- Passes.td - SCF pass definition file ---------------*- tablegen -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -11,18 +11,24 @@
include "mlir/Pass/PassBase.td"
-def LoopParallelLoopFusion : Pass<"parallel-loop-fusion"> {
+def SCFForLoopSpecialization
+ : FunctionPass<"for-loop-specialization"> {
+ let summary = "Specialize `for` loops for vectorization";
+ let constructor = "mlir::createForLoopSpecializationPass()";
+}
+
+def SCFParallelLoopFusion : Pass<"parallel-loop-fusion"> {
let summary = "Fuse adjacent parallel loops";
let constructor = "mlir::createParallelLoopFusionPass()";
}
-def LoopParallelLoopSpecialization
+def SCFParallelLoopSpecialization
: FunctionPass<"parallel-loop-specialization"> {
let summary = "Specialize parallel loops for vectorization";
let constructor = "mlir::createParallelLoopSpecializationPass()";
}
-def LoopParallelLoopTiling : FunctionPass<"parallel-loop-tiling"> {
+def SCFParallelLoopTiling : FunctionPass<"parallel-loop-tiling"> {
let summary = "Tile parallel loops";
let constructor = "mlir::createParallelLoopTilingPass()";
let options = [
diff --git a/mlir/lib/Dialect/SCF/Transforms/CMakeLists.txt b/mlir/lib/Dialect/SCF/Transforms/CMakeLists.txt
index 58890d4a3782..7a54ace0bf8f 100644
--- a/mlir/lib/Dialect/SCF/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/SCF/Transforms/CMakeLists.txt
@@ -1,6 +1,6 @@
add_mlir_dialect_library(MLIRSCFTransforms
+ LoopSpecialization.cpp
ParallelLoopFusion.cpp
- ParallelLoopSpecialization.cpp
ParallelLoopTiling.cpp
Utils.cpp
diff --git a/mlir/lib/Dialect/SCF/Transforms/ParallelLoopSpecialization.cpp b/mlir/lib/Dialect/SCF/Transforms/LoopSpecialization.cpp
similarity index 50%
rename from mlir/lib/Dialect/SCF/Transforms/ParallelLoopSpecialization.cpp
rename to mlir/lib/Dialect/SCF/Transforms/LoopSpecialization.cpp
index 12c35b117488..54c663ca67bc 100644
--- a/mlir/lib/Dialect/SCF/Transforms/ParallelLoopSpecialization.cpp
+++ b/mlir/lib/Dialect/SCF/Transforms/LoopSpecialization.cpp
@@ -1,4 +1,4 @@
-//===- ParallelLoopSpecialization.cpp - scf.parallel specialization ------===//
+//===- LoopSpecialization.cpp - scf.parallel/SCR.for specialization -------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,7 +6,8 @@
//
//===----------------------------------------------------------------------===//
//
-// Specializes parallel loops for easier unrolling and vectorization.
+// Specializes parallel loops and for loops for easier unrolling and
+// vectorization.
//
//===----------------------------------------------------------------------===//
@@ -19,13 +20,14 @@
#include "mlir/IR/BlockAndValueMapping.h"
using namespace mlir;
+using scf::ForOp;
using scf::ParallelOp;
-/// Rewrite a loop with bounds defined by an affine.min with a constant into 2
-/// loops after checking if the bounds are equal to that constant. This is
-/// beneficial if the loop will almost always have the constant bound and that
-/// version can be fully unrolled and vectorized.
-static void specializeLoopForUnrolling(ParallelOp op) {
+/// Rewrite a parallel loop with bounds defined by an affine.min with a constant
+/// into 2 loops after checking if the bounds are equal to that constant. This
+/// is beneficial if the loop will almost always have the constant bound and
+/// that version can be fully unrolled and vectorized.
+static void specializeParallelLoopForUnrolling(ParallelOp op) {
SmallVector<int64_t, 2> constantIndices;
constantIndices.reserve(op.upperBound().size());
for (auto bound : op.upperBound()) {
@@ -33,7 +35,7 @@ static void specializeLoopForUnrolling(ParallelOp op) {
if (!minOp)
return;
int64_t minConstant = std::numeric_limits<int64_t>::max();
- for (auto expr : minOp.map().getResults()) {
+ for (AffineExpr expr : minOp.map().getResults()) {
if (auto constantIndex = expr.dyn_cast<AffineConstantExpr>())
minConstant = std::min(minConstant, constantIndex.getValue());
}
@@ -58,11 +60,48 @@ static void specializeLoopForUnrolling(ParallelOp op) {
op.erase();
}
+/// Rewrite a for loop with bounds defined by an affine.min with a constant into
+/// 2 loops after checking if the bounds are equal to that constant. This is
+/// beneficial if the loop will almost always have the constant bound and that
+/// version can be fully unrolled and vectorized.
+static void specializeForLoopForUnrolling(ForOp op) {
+ auto bound = op.upperBound();
+ auto minOp = bound.getDefiningOp<AffineMinOp>();
+ if (!minOp)
+ return;
+ int64_t minConstant = std::numeric_limits<int64_t>::max();
+ for (AffineExpr expr : minOp.map().getResults()) {
+ if (auto constantIndex = expr.dyn_cast<AffineConstantExpr>())
+ minConstant = std::min(minConstant, constantIndex.getValue());
+ }
+ if (minConstant == std::numeric_limits<int64_t>::max())
+ return;
+
+ OpBuilder b(op);
+ BlockAndValueMapping map;
+ Value constant = b.create<ConstantIndexOp>(op.getLoc(), minConstant);
+ Value cond =
+ b.create<CmpIOp>(op.getLoc(), CmpIPredicate::eq, bound, constant);
+ map.map(bound, constant);
+ auto ifOp = b.create<scf::IfOp>(op.getLoc(), cond, /*withElseRegion=*/true);
+ ifOp.getThenBodyBuilder().clone(*op.getOperation(), map);
+ ifOp.getElseBodyBuilder().clone(*op.getOperation());
+ op.erase();
+}
+
namespace {
struct ParallelLoopSpecialization
- : public LoopParallelLoopSpecializationBase<ParallelLoopSpecialization> {
+ : public SCFParallelLoopSpecializationBase<ParallelLoopSpecialization> {
+ void runOnFunction() override {
+ getFunction().walk(
+ [](ParallelOp op) { specializeParallelLoopForUnrolling(op); });
+ }
+};
+
+struct ForLoopSpecialization
+ : public SCFForLoopSpecializationBase<ForLoopSpecialization> {
void runOnFunction() override {
- getFunction().walk([](ParallelOp op) { specializeLoopForUnrolling(op); });
+ getFunction().walk([](ForOp op) { specializeForLoopForUnrolling(op); });
}
};
} // namespace
@@ -70,3 +109,7 @@ struct ParallelLoopSpecialization
std::unique_ptr<Pass> mlir::createParallelLoopSpecializationPass() {
return std::make_unique<ParallelLoopSpecialization>();
}
+
+std::unique_ptr<Pass> mlir::createForLoopSpecializationPass() {
+ return std::make_unique<ForLoopSpecialization>();
+}
diff --git a/mlir/lib/Dialect/SCF/Transforms/ParallelLoopFusion.cpp b/mlir/lib/Dialect/SCF/Transforms/ParallelLoopFusion.cpp
index 204528f16238..cbdf78cad51d 100644
--- a/mlir/lib/Dialect/SCF/Transforms/ParallelLoopFusion.cpp
+++ b/mlir/lib/Dialect/SCF/Transforms/ParallelLoopFusion.cpp
@@ -160,7 +160,7 @@ void mlir::scf::naivelyFuseParallelOps(Region ®ion) {
namespace {
struct ParallelLoopFusion
- : public LoopParallelLoopFusionBase<ParallelLoopFusion> {
+ : public SCFParallelLoopFusionBase<ParallelLoopFusion> {
void runOnOperation() override {
getOperation()->walk([&](Operation *child) {
for (Region ®ion : child->getRegions())
diff --git a/mlir/lib/Dialect/SCF/Transforms/ParallelLoopTiling.cpp b/mlir/lib/Dialect/SCF/Transforms/ParallelLoopTiling.cpp
index 40469138ea01..ee4428ded99e 100644
--- a/mlir/lib/Dialect/SCF/Transforms/ParallelLoopTiling.cpp
+++ b/mlir/lib/Dialect/SCF/Transforms/ParallelLoopTiling.cpp
@@ -119,7 +119,7 @@ static bool getInnermostNestedLoops(Block *block,
namespace {
struct ParallelLoopTiling
- : public LoopParallelLoopTilingBase<ParallelLoopTiling> {
+ : public SCFParallelLoopTilingBase<ParallelLoopTiling> {
ParallelLoopTiling() = default;
explicit ParallelLoopTiling(ArrayRef<int64_t> tileSizes) {
this->tileSizes = tileSizes;
diff --git a/mlir/test/Dialect/SCF/for-loop-specialization.mlir b/mlir/test/Dialect/SCF/for-loop-specialization.mlir
new file mode 100644
index 000000000000..f7b501e0c95c
--- /dev/null
+++ b/mlir/test/Dialect/SCF/for-loop-specialization.mlir
@@ -0,0 +1,39 @@
+// RUN: mlir-opt %s -for-loop-specialization -split-input-file | FileCheck %s
+
+#map0 = affine_map<()[s0, s1] -> (1024, s0 - s1)>
+#map1 = affine_map<()[s0, s1] -> (64, s0 - s1)>
+
+func @for(%outer: index, %A: memref<?xf32>, %B: memref<?xf32>,
+ %C: memref<?xf32>, %result: memref<?xf32>) {
+ %c0 = constant 0 : index
+ %c1 = constant 1 : index
+ %d0 = dim %A, %c0 : memref<?xf32>
+ %b0 = affine.min #map0()[%d0, %outer]
+ scf.for %i0 = %c0 to %b0 step %c1 {
+ %B_elem = load %B[%i0] : memref<?xf32>
+ %C_elem = load %C[%i0] : memref<?xf32>
+ %sum_elem = addf %B_elem, %C_elem : f32
+ store %sum_elem, %result[%i0] : memref<?xf32>
+ }
+ return
+}
+
+// CHECK-LABEL: func @for(
+// CHECK-SAME: [[ARG0:%.*]]: index, [[ARG1:%.*]]: memref<?xf32>, [[ARG2:%.*]]: memref<?xf32>, [[ARG3:%.*]]: memref<?xf32>, [[ARG4:%.*]]: memref<?xf32>) {
+// CHECK: [[CST_0:%.*]] = constant 0 : index
+// CHECK: [[CST_1:%.*]] = constant 1 : index
+// CHECK: [[DIM_0:%.*]] = dim [[ARG1]], [[CST_0]] : memref<?xf32>
+// CHECK: [[MIN:%.*]] = affine.min #map0(){{\[}}[[DIM_0]], [[ARG0]]]
+// CHECK: [[CST_1024:%.*]] = constant 1024 : index
+// CHECK: [[PRED:%.*]] = cmpi "eq", [[MIN]], [[CST_1024]] : index
+// CHECK: scf.if [[PRED]] {
+// CHECK: scf.for [[IDX0:%.*]] = [[CST_0]] to [[CST_1024]] step [[CST_1]] {
+// CHECK: store
+// CHECK: }
+// CHECK: } else {
+// CHECK: scf.for [[IDX0:%.*]] = [[CST_0]] to [[MIN]] step [[CST_1]] {
+// CHECK: store
+// CHECK: }
+// CHECK: }
+// CHECK: return
+// CHECK: }
More information about the Mlir-commits
mailing list