[Mlir-commits] [mlir] 40b2eb3 - [mlir][AffineOps] Adds affine loop fusion transformation function to LoopFusionUtils.
Andy Davis
llvmlistbot at llvm.org
Tue Feb 11 13:56:34 PST 2020
Author: Andy Davis
Date: 2020-02-11T13:56:26-08:00
New Revision: 40b2eb353043f4e526575a61b0a90fe06e938db1
URL: https://github.com/llvm/llvm-project/commit/40b2eb353043f4e526575a61b0a90fe06e938db1
DIFF: https://github.com/llvm/llvm-project/commit/40b2eb353043f4e526575a61b0a90fe06e938db1.diff
LOG: [mlir][AffineOps] Adds affine loop fusion transformation function to LoopFusionUtils.
Summary:
Adds affine loop fusion transformation function to LoopFusionUtils.
Updates TestLoopFusion utility to run loop fusion transformation until a fixed point is reached.
Adds unit tests to test the transformation.
Includes ASAN bug fix for D73190.
Reviewers: bondhugula, dcaballe
Reviewed By: bondhugula, dcaballe
Subscribers: mehdi_amini, rriddle, jpienaar, burmako, shauheen, antiagainst, nicolasvasilache, arpith-jacob, mgester, lucyrfox, aartbik, liufengdb, Joonsoo, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D74330
Added:
mlir/test/Transforms/loop-fusion-transformation.mlir
Modified:
mlir/include/mlir/Transforms/LoopFusionUtils.h
mlir/lib/Transforms/Utils/LoopFusionUtils.cpp
mlir/test/lib/Transforms/TestLoopFusion.cpp
Removed:
################################################################################
diff --git a/mlir/include/mlir/Transforms/LoopFusionUtils.h b/mlir/include/mlir/Transforms/LoopFusionUtils.h
index f79571f523f8..2832f050fc55 100644
--- a/mlir/include/mlir/Transforms/LoopFusionUtils.h
+++ b/mlir/include/mlir/Transforms/LoopFusionUtils.h
@@ -51,6 +51,11 @@ FusionResult canFuseLoops(AffineForOp srcForOp, AffineForOp dstForOp,
unsigned dstLoopDepth,
ComputationSliceState *srcSlice);
+/// Fuses 'srcForOp' into 'dstForOp' with destination loop block insertion point
+/// and source slice loop bounds specified in 'srcSlice'.
+void fuseLoops(AffineForOp srcForOp, AffineForOp dstForOp,
+ ComputationSliceState *srcSlice);
+
/// LoopNestStats aggregates various per-loop statistics (eg. loop trip count
/// and operation count) for a loop nest up until (and including) the innermost
/// loop body.
diff --git a/mlir/lib/Transforms/Utils/LoopFusionUtils.cpp b/mlir/lib/Transforms/Utils/LoopFusionUtils.cpp
index c5011a79c74d..2860a61f18d9 100644
--- a/mlir/lib/Transforms/Utils/LoopFusionUtils.cpp
+++ b/mlir/lib/Transforms/Utils/LoopFusionUtils.cpp
@@ -24,6 +24,7 @@
#include "mlir/IR/Builders.h"
#include "mlir/IR/Function.h"
#include "mlir/IR/Operation.h"
+#include "mlir/Transforms/LoopUtils.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/Debug.h"
@@ -246,6 +247,34 @@ FusionResult mlir::canFuseLoops(AffineForOp srcForOp, AffineForOp dstForOp,
return FusionResult::Success;
}
+/// Fuses 'srcForOp' into 'dstForOp' with destination loop block insertion point
+/// and source slice loop bounds specified in 'srcSlice'.
+void mlir::fuseLoops(AffineForOp srcForOp, AffineForOp dstForOp,
+ ComputationSliceState *srcSlice) {
+ // Clone 'srcForOp' into 'dstForOp' at 'srcSlice->insertPoint'.
+ OpBuilder b(srcSlice->insertPoint->getBlock(), srcSlice->insertPoint);
+ BlockAndValueMapping mapper;
+ b.clone(*srcForOp, mapper);
+
+ // Update 'sliceLoopNest' upper and lower bounds from computed 'srcSlice'.
+ SmallVector<AffineForOp, 4> sliceLoops;
+ for (unsigned i = 0, e = srcSlice->ivs.size(); i < e; ++i) {
+ auto loopIV = mapper.lookupOrNull(srcSlice->ivs[i]);
+ if (!loopIV)
+ continue;
+ auto forOp = getForInductionVarOwner(loopIV);
+ sliceLoops.push_back(forOp);
+ if (AffineMap lbMap = srcSlice->lbs[i])
+ forOp.setLowerBound(srcSlice->lbOperands[i], lbMap);
+ if (AffineMap ubMap = srcSlice->ubs[i])
+ forOp.setUpperBound(srcSlice->ubOperands[i], ubMap);
+ }
+
+ // Promote any single iteration slice loops.
+ for (AffineForOp forOp : sliceLoops)
+ promoteIfSingleIteration(forOp);
+}
+
/// Collect loop nest statistics (eg. loop trip count and operation count)
/// in 'stats' for loop nest rooted at 'forOp'. Returns true on success,
/// returns false otherwise.
diff --git a/mlir/test/Transforms/loop-fusion-transformation.mlir b/mlir/test/Transforms/loop-fusion-transformation.mlir
new file mode 100644
index 000000000000..1a0106301627
--- /dev/null
+++ b/mlir/test/Transforms/loop-fusion-transformation.mlir
@@ -0,0 +1,105 @@
+// RUN: mlir-opt %s -test-loop-fusion -test-loop-fusion-transformation -split-input-file -canonicalize | FileCheck %s
+
+// CHECK-LABEL: func @slice_depth1_loop_nest() {
+func @slice_depth1_loop_nest() {
+ %0 = alloc() : memref<100xf32>
+ %cst = constant 7.000000e+00 : f32
+ affine.for %i0 = 0 to 16 {
+ affine.store %cst, %0[%i0] : memref<100xf32>
+ }
+ affine.for %i1 = 0 to 5 {
+ %1 = affine.load %0[%i1] : memref<100xf32>
+ }
+ // CHECK: affine.for %[[IV0:.*]] = 0 to 5 {
+ // CHECK-NEXT: affine.store %{{.*}}, %{{.*}}[%[[IV0]]] : memref<100xf32>
+ // CHECK-NEXT: affine.load %{{.*}}[%[[IV0]]] : memref<100xf32>
+ // CHECK-NEXT: }
+ // CHECK-NEXT: return
+ return
+}
+
+// -----
+
+// CHECK-LABEL: func @should_fuse_reduction_to_pointwise() {
+func @should_fuse_reduction_to_pointwise() {
+ %a = alloc() : memref<10x10xf32>
+ %b = alloc() : memref<10xf32>
+ %c = alloc() : memref<10xf32>
+
+ %cf7 = constant 7.0 : f32
+
+ affine.for %i0 = 0 to 10 {
+ affine.for %i1 = 0 to 10 {
+ %v0 = affine.load %b[%i0] : memref<10xf32>
+ %v1 = affine.load %a[%i0, %i1] : memref<10x10xf32>
+ %v3 = addf %v0, %v1 : f32
+ affine.store %v3, %b[%i0] : memref<10xf32>
+ }
+ }
+ affine.for %i2 = 0 to 10 {
+ %v4 = affine.load %b[%i2] : memref<10xf32>
+ affine.store %v4, %c[%i2] : memref<10xf32>
+ }
+
+ // Match on the fused loop nest.
+ // Should fuse in entire inner loop on %i1 from source loop nest, as %i1
+ // is not used in the access function of the store/load on %b.
+ // CHECK: affine.for %{{.*}} = 0 to 10 {
+ // CHECK-NEXT: affine.for %{{.*}} = 0 to 10 {
+ // CHECK-NEXT: affine.load %{{.*}}[%{{.*}}] : memref<10xf32>
+ // CHECK-NEXT: affine.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32>
+ // CHECK-NEXT: addf %{{.*}}, %{{.*}} : f32
+ // CHECK-NEXT: affine.store %{{.*}}, %{{.*}}[%{{.*}}] : memref<10xf32>
+ // CHECK-NEXT: }
+ // CHECK-NEXT: affine.load %{{.*}}[%{{.*}}] : memref<10xf32>
+ // CHECK-NEXT: affine.store %{{.*}}, %{{.*}}[%{{.*}}] : memref<10xf32>
+ // CHECK-NEXT: }
+ // CHECK-NEXT: return
+ return
+}
+
+// -----
+
+// CHECK-LABEL: func @should_fuse_avoiding_dependence_cycle() {
+func @should_fuse_avoiding_dependence_cycle() {
+ %a = alloc() : memref<10xf32>
+ %b = alloc() : memref<10xf32>
+ %c = alloc() : memref<10xf32>
+
+ %cf7 = constant 7.0 : f32
+
+ // Set up the following dependences:
+ // 1) loop0 -> loop1 on memref '%{{.*}}'
+ // 2) loop0 -> loop2 on memref '%{{.*}}'
+ // 3) loop1 -> loop2 on memref '%{{.*}}'
+ affine.for %i0 = 0 to 10 {
+ %v0 = affine.load %a[%i0] : memref<10xf32>
+ affine.store %cf7, %b[%i0] : memref<10xf32>
+ }
+ affine.for %i1 = 0 to 10 {
+ affine.store %cf7, %a[%i1] : memref<10xf32>
+ %v1 = affine.load %c[%i1] : memref<10xf32>
+ }
+ affine.for %i2 = 0 to 10 {
+ %v2 = affine.load %b[%i2] : memref<10xf32>
+ affine.store %cf7, %c[%i2] : memref<10xf32>
+ }
+ // Fusing loop first loop into last would create a cycle:
+ // {1} <--> {0, 2}
+ // However, we can avoid the dependence cycle if we first fuse loop0 into
+ // loop1:
+ // {0, 1) --> {2}
+ // Then fuse this loop nest with loop2:
+ // {0, 1, 2}
+ //
+ // CHECK: affine.for %{{.*}} = 0 to 10 {
+ // CHECK-NEXT: affine.load %{{.*}}[%{{.*}}] : memref<10xf32>
+ // CHECK-NEXT: affine.store %{{.*}}, %{{.*}}[%{{.*}}] : memref<10xf32>
+ // CHECK-NEXT: affine.store %{{.*}}, %{{.*}}[%{{.*}}] : memref<10xf32>
+ // CHECK-NEXT: affine.load %{{.*}}[%{{.*}}] : memref<10xf32>
+ // CHECK-NEXT: affine.load %{{.*}}[%{{.*}}] : memref<10xf32>
+ // CHECK-NEXT: affine.store %{{.*}}, %{{.*}}[%{{.*}}] : memref<10xf32>
+ // CHECK-NEXT: }
+ // CHECK-NEXT: return
+ return
+}
diff --git a/mlir/test/lib/Transforms/TestLoopFusion.cpp b/mlir/test/lib/Transforms/TestLoopFusion.cpp
index 4a4e5f9c05f6..93e6fd450160 100644
--- a/mlir/test/lib/Transforms/TestLoopFusion.cpp
+++ b/mlir/test/lib/Transforms/TestLoopFusion.cpp
@@ -41,6 +41,11 @@ static llvm::cl::opt<bool> clTestSliceComputation(
llvm::cl::desc("Enable testing of loop fusion slice computation"),
llvm::cl::cat(clOptionsCategory));
+static llvm::cl::opt<bool> clTestLoopFusionTransformation(
+ "test-loop-fusion-transformation",
+ llvm::cl::desc("Enable testing of loop fusion transformation"),
+ llvm::cl::cat(clOptionsCategory));
+
namespace {
struct TestLoopFusion : public FunctionPass<TestLoopFusion> {
@@ -69,11 +74,10 @@ gatherLoops(Block *block, unsigned currLoopDepth,
// Run fusion dependence check on 'loops[i]' and 'loops[j]' at loop depths
// in range ['loopDepth' + 1, 'maxLoopDepth'].
// Emits a remark on 'loops[i]' if a fusion-preventing dependence exists.
-static void testDependenceCheck(SmallVector<AffineForOp, 2> &loops, unsigned i,
- unsigned j, unsigned loopDepth,
+// Returns false as IR is not transformed.
+static bool testDependenceCheck(AffineForOp srcForOp, AffineForOp dstForOp,
+ unsigned i, unsigned j, unsigned loopDepth,
unsigned maxLoopDepth) {
- AffineForOp srcForOp = loops[i];
- AffineForOp dstForOp = loops[j];
mlir::ComputationSliceState sliceUnion;
for (unsigned d = loopDepth + 1; d <= maxLoopDepth; ++d) {
FusionResult result =
@@ -84,6 +88,7 @@ static void testDependenceCheck(SmallVector<AffineForOp, 2> &loops, unsigned i,
<< i << " into loop nest " << j << " at depth " << loopDepth;
}
}
+ return false;
}
// Returns the index of 'op' in its block.
@@ -121,11 +126,10 @@ static std::string getSliceStr(const mlir::ComputationSliceState &sliceUnion) {
// Computes fusion slice union on 'loops[i]' and 'loops[j]' at loop depths
// in range ['loopDepth' + 1, 'maxLoopDepth'].
// Emits a string representation of the slice union as a remark on 'loops[j]'.
-static void testSliceComputation(SmallVector<AffineForOp, 2> &loops, unsigned i,
- unsigned j, unsigned loopDepth,
+// Returns false as IR is not transformed.
+static bool testSliceComputation(AffineForOp forOpA, AffineForOp forOpB,
+ unsigned i, unsigned j, unsigned loopDepth,
unsigned maxLoopDepth) {
- AffineForOp forOpA = loops[i];
- AffineForOp forOpB = loops[j];
for (unsigned d = loopDepth + 1; d <= maxLoopDepth; ++d) {
mlir::ComputationSliceState sliceUnion;
FusionResult result = mlir::canFuseLoops(forOpA, forOpB, d, &sliceUnion);
@@ -135,31 +139,83 @@ static void testSliceComputation(SmallVector<AffineForOp, 2> &loops, unsigned i,
<< " : " << getSliceStr(sliceUnion) << ")";
}
}
+ return false;
}
-void TestLoopFusion::runOnFunction() {
- // Gather all AffineForOps by loop depth.
- DenseMap<unsigned, SmallVector<AffineForOp, 2>> depthToLoops;
- for (auto &block : getFunction()) {
- gatherLoops(&block, /*currLoopDepth=*/0, depthToLoops);
+// Attempts to fuse 'forOpA' into 'forOpB' at loop depths in range
+// ['loopDepth' + 1, 'maxLoopDepth'].
+// Returns true if loops were successfully fused, false otherwise.
+static bool testLoopFusionTransformation(AffineForOp forOpA, AffineForOp forOpB,
+ unsigned i, unsigned j,
+ unsigned loopDepth,
+ unsigned maxLoopDepth) {
+ for (unsigned d = loopDepth + 1; d <= maxLoopDepth; ++d) {
+ mlir::ComputationSliceState sliceUnion;
+ FusionResult result = mlir::canFuseLoops(forOpA, forOpB, d, &sliceUnion);
+ if (result.value == FusionResult::Success) {
+ mlir::fuseLoops(forOpA, forOpB, &sliceUnion);
+ // Note: 'forOpA' is removed to simplify test output. A proper loop
+ // fusion pass should check the data dependence graph and run memref
+ // region analysis to ensure removing 'forOpA' is safe.
+ forOpA.erase();
+ return true;
+ }
}
+ return false;
+}
- // Run tests on all combinations of src/dst loop nests in 'depthToLoops'.
+using LoopFunc = function_ref<bool(AffineForOp, AffineForOp, unsigned, unsigned,
+ unsigned, unsigned)>;
+
+// Run tests on all combinations of src/dst loop nests in 'depthToLoops'.
+// If 'return_on_change' is true, returns on first invocation of 'fn' which
+// returns true.
+static bool
+iterateLoops(DenseMap<unsigned, SmallVector<AffineForOp, 2>> &depthToLoops,
+ LoopFunc fn, bool return_on_change = false) {
+ bool changed = false;
for (auto &depthAndLoops : depthToLoops) {
unsigned loopDepth = depthAndLoops.first;
auto &loops = depthAndLoops.second;
unsigned numLoops = loops.size();
for (unsigned j = 0; j < numLoops; ++j) {
for (unsigned k = 0; k < numLoops; ++k) {
- if (j == k)
- continue;
- if (clTestDependenceCheck)
- testDependenceCheck(loops, j, k, loopDepth, depthToLoops.size());
- if (clTestSliceComputation)
- testSliceComputation(loops, j, k, loopDepth, depthToLoops.size());
+ if (j != k)
+ changed |=
+ fn(loops[j], loops[k], j, k, loopDepth, depthToLoops.size());
+ if (changed && return_on_change)
+ return true;
}
}
}
+ return changed;
+}
+
+void TestLoopFusion::runOnFunction() {
+ DenseMap<unsigned, SmallVector<AffineForOp, 2>> depthToLoops;
+ if (clTestLoopFusionTransformation) {
+ // Run loop fusion until a fixed point is reached.
+ do {
+ depthToLoops.clear();
+ // Gather all AffineForOps by loop depth.
+ for (auto &block : getFunction())
+ gatherLoops(&block, /*currLoopDepth=*/0, depthToLoops);
+
+ // Try to fuse all combinations of src/dst loop nests in 'depthToLoops'.
+ } while (iterateLoops(depthToLoops, testLoopFusionTransformation,
+ /*return_on_change=*/true));
+ return;
+ }
+
+ // Gather all AffineForOps by loop depth.
+ for (Block &block : getFunction())
+ gatherLoops(&block, /*currLoopDepth=*/0, depthToLoops);
+
+ // Run tests on all combinations of src/dst loop nests in 'depthToLoops'.
+ if (clTestDependenceCheck)
+ iterateLoops(depthToLoops, testDependenceCheck);
+ if (clTestSliceComputation)
+ iterateLoops(depthToLoops, testSliceComputation);
}
static PassRegistration<TestLoopFusion>
More information about the Mlir-commits
mailing list