[Mlir-commits] [mlir] d00f563 - [mlir] Add a simplifying wrapper for generateCopy and expose it.
Tim Shen
llvmlistbot at llvm.org
Wed Mar 11 16:23:03 PDT 2020
Author: Tim Shen
Date: 2020-03-11T16:22:31-07:00
New Revision: d00f5632f39e101b1679ef887b03c566c4400d19
URL: https://github.com/llvm/llvm-project/commit/d00f5632f39e101b1679ef887b03c566c4400d19
DIFF: https://github.com/llvm/llvm-project/commit/d00f5632f39e101b1679ef887b03c566c4400d19.diff
LOG: [mlir] Add a simplifying wrapper for generateCopy and expose it.
Summary:
affineDataCopyGenerate is a monolithinc function that
combines several steps for good reasons, but it makes customizing
the behaivor even harder. The major two steps by affineDataCopyGenerate are:
a) Identify interesting memrefs and collect their uses.
b) Create new buffers to forward these uses.
Step (a) actually has requires tremendous customization options. One could see
that from the recently added filterMemRef parameter.
This patch adds a function that only does (b), in the hope that (a)
can be directly implemented by the callers. In fact, (a) is quite
simple if the caller has only one buffer to consider, or even one use.
Differential Revision: https://reviews.llvm.org/D75965
Added:
Modified:
mlir/include/mlir/Transforms/LoopUtils.h
mlir/lib/Transforms/Utils/LoopUtils.cpp
mlir/test/Transforms/affine-data-copy.mlir
mlir/test/lib/Transforms/TestAffineDataCopy.cpp
Removed:
################################################################################
diff --git a/mlir/include/mlir/Transforms/LoopUtils.h b/mlir/include/mlir/Transforms/LoopUtils.h
index a82a06541a13..72db5e625aa2 100644
--- a/mlir/include/mlir/Transforms/LoopUtils.h
+++ b/mlir/include/mlir/Transforms/LoopUtils.h
@@ -24,6 +24,7 @@ class AffineForOp;
class FuncOp;
class OpBuilder;
class Value;
+struct MemRefRegion;
namespace loop {
class ForOp;
@@ -185,6 +186,34 @@ uint64_t affineDataCopyGenerate(AffineForOp forOp,
Optional<Value> filterMemRef,
DenseSet<Operation *> ©Nests);
+/// Result for calling generateCopyForMemRegion.
+struct CopyGenerateResult {
+ // Number of bytes used by alloc.
+ uint64_t sizeInBytes;
+
+ // The newly created buffer allocation.
+ Operation *alloc;
+
+ // Generated loop nest for copying data between the allocated buffer and the
+ // original memref.
+ Operation *copyNest;
+};
+
+/// generateCopyForMemRegion is similar to affineDataCopyGenerate, but works
+/// with a single memref region. `memrefRegion` is supposed to contain analysis
+/// information within analyzedOp. The generated prologue and epilogue always
+/// surround `analyzedOp`.
+///
+/// Note that `analyzedOp` is a single op for API convenience, and the
+/// [begin, end) version can be added as needed.
+///
+/// Also note that certain options in `copyOptions` aren't looked at anymore,
+/// like slowMemorySpace.
+LogicalResult generateCopyForMemRegion(const MemRefRegion &memrefRegion,
+ Operation *analyzedOp,
+ const AffineCopyOptions ©Options,
+ CopyGenerateResult &result);
+
/// Tile a nest of standard for loops rooted at `rootForOp` by finding such
/// parametric tile sizes that the outer loops have a fixed number of iterations
/// as defined in `sizes`.
diff --git a/mlir/lib/Transforms/Utils/LoopUtils.cpp b/mlir/lib/Transforms/Utils/LoopUtils.cpp
index 1c9ac5e84754..dfe39ecded54 100644
--- a/mlir/lib/Transforms/Utils/LoopUtils.cpp
+++ b/mlir/lib/Transforms/Utils/LoopUtils.cpp
@@ -1797,6 +1797,28 @@ uint64_t mlir::affineDataCopyGenerate(AffineForOp forOp,
filterMemRef, copyNests);
}
+LogicalResult mlir::generateCopyForMemRegion(
+ const MemRefRegion &memrefRegion, Operation *analyzedOp,
+ const AffineCopyOptions ©Options, CopyGenerateResult &result) {
+ Block *block = analyzedOp->getBlock();
+ auto begin = analyzedOp->getIterator();
+ auto end = std::next(begin);
+ DenseMap<Value, Value> fastBufferMap;
+ DenseSet<Operation *> copyNests;
+
+ auto err = generateCopy(memrefRegion, block, begin, end, block, begin, end,
+ copyOptions, fastBufferMap, copyNests,
+ &result.sizeInBytes, &begin, &end);
+ if (failed(err))
+ return err;
+
+ result.alloc =
+ fastBufferMap.find(memrefRegion.memref)->second.getDefiningOp();
+ assert(copyNests.size() <= 1 && "At most one copy nest is expected.");
+ result.copyNest = copyNests.empty() ? nullptr : *copyNests.begin();
+ return success();
+}
+
/// Gathers all AffineForOps in 'block' at 'currLoopDepth' in 'depthToLoops'.
static void
gatherLoopsInBlock(Block *block, unsigned currLoopDepth,
diff --git a/mlir/test/Transforms/affine-data-copy.mlir b/mlir/test/Transforms/affine-data-copy.mlir
index b2e4fbbf76c1..e9543e5280b9 100644
--- a/mlir/test/Transforms/affine-data-copy.mlir
+++ b/mlir/test/Transforms/affine-data-copy.mlir
@@ -6,7 +6,8 @@
// affine data copy utility on the input loop nest.
// '-test-affine-data-copy-memref-filter' passes the first memref found in an
// affine.load op in the innermost loop as a filter.
-// RUN: mlir-opt %s -split-input-file -test-affine-data-copy='memref-filter=1' | FileCheck %s --check-prefix=FILTER
+// RUN: mlir-opt %s -split-input-file -test-affine-data-copy='memref-filter' | FileCheck %s --check-prefix=FILTER
+// RUN: mlir-opt %s -split-input-file -test-affine-data-copy='for-memref-region' | FileCheck %s --check-prefix=MEMREF_REGION
// -copy-skip-non-stride-loops forces the copies to be placed right inside the
// tile space loops, avoiding the sensitivity of copy placement depth to memory
@@ -140,6 +141,7 @@ func @matmul(%A: memref<4096x4096xf32>, %B: memref<4096x4096xf32>, %C: memref<40
//
// CHECK-SMALL-LABEL: func @foo
// FILTER-LABEL: func @foo
+// MEMREF_REGION-LABEL: func @foo
func @foo(%arg0: memref<1024x1024xf32>, %arg1: memref<1024x1024xf32>, %arg2: memref<1024x1024xf32>) -> memref<1024x1024xf32> {
affine.for %i = 0 to 1024 {
affine.for %j = 0 to 1024 {
@@ -198,3 +200,15 @@ func @foo(%arg0: memref<1024x1024xf32>, %arg1: memref<1024x1024xf32>, %arg2: mem
// FILTER-NEXT: affine.for %{{.*}} = 0 to 1024 {
// FILTER: dealloc %{{.*}} : memref<1024x1024xf32>
// FILTER-NOT: dealloc
+
+// CHeck that only one memref is copied, because for-memref-region is enabled
+// (and the first ever encountered load is analyzed).
+// MEMREF_REGION: alloc() : memref<1024x1024xf32>
+// MEMREF_REGION-NOT: alloc()
+// MEMREF_REGION: affine.for %{{.*}} = 0 to 1024 {
+// MEMREF_REGION: affine.for %{{.*}} = 0 to 1024 {
+// MEMREF_REGION: affine.for %{{.*}} = 0 to 1024 {
+// MEMREF_REGION-NEXT: affine.for %{{.*}} = 0 to 1024 {
+// MEMREF_REGION-NEXT: affine.for %{{.*}} = 0 to 1024 {
+// MEMREF_REGION: dealloc %{{.*}} : memref<1024x1024xf32>
+// MEMREF_REGION-NOT: dealloc
diff --git a/mlir/test/lib/Transforms/TestAffineDataCopy.cpp b/mlir/test/lib/Transforms/TestAffineDataCopy.cpp
index de7cdbd51b9d..966df287359a 100644
--- a/mlir/test/lib/Transforms/TestAffineDataCopy.cpp
+++ b/mlir/test/lib/Transforms/TestAffineDataCopy.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "mlir/Analysis/Passes.h"
+#include "mlir/Analysis/Utils.h"
#include "mlir/Dialect/AffineOps/AffineOps.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Transforms/LoopUtils.h"
@@ -37,6 +38,10 @@ struct TestAffineDataCopy : public FunctionPass<TestAffineDataCopy> {
llvm::cl::desc(
"Enable memref filter testing in affine data copy optimization"),
llvm::cl::init(false)};
+ Option<bool> clTestGenerateCopyForMemRegion{
+ *this, "for-memref-region",
+ llvm::cl::desc("Test copy generation for a single memref region"),
+ llvm::cl::init(false)};
};
} // end anonymous namespace
@@ -55,13 +60,13 @@ void TestAffineDataCopy::runOnFunction() {
auto loopNest = depthToLoops[0][0];
auto innermostLoop = depthToLoops[innermostLoopIdx][0];
- Optional<Value> memrefFilter;
- if (clMemRefFilter) {
+ AffineLoadOp load;
+ if (clMemRefFilter || clTestGenerateCopyForMemRegion) {
// Gather MemRef filter. For simplicity, we use the first loaded memref
// found in the innermost loop.
for (auto &op : *innermostLoop.getBody()) {
- if (auto load = dyn_cast<AffineLoadOp>(op)) {
- memrefFilter = load.getMemRef();
+ if (auto ld = dyn_cast<AffineLoadOp>(op)) {
+ load = ld;
break;
}
}
@@ -72,8 +77,15 @@ void TestAffineDataCopy::runOnFunction() {
/*fastMemorySpace=*/0,
/*tagMemorySpace=*/0,
/*fastMemCapacityBytes=*/32 * 1024 * 1024UL};
- DenseSet<Operation *> copyNests;
- affineDataCopyGenerate(loopNest, copyOptions, memrefFilter, copyNests);
+ if (clMemRefFilter) {
+ DenseSet<Operation *> copyNests;
+ affineDataCopyGenerate(loopNest, copyOptions, load.getMemRef(), copyNests);
+ } else if (clTestGenerateCopyForMemRegion) {
+ CopyGenerateResult result;
+ MemRefRegion region(loopNest.getLoc());
+ region.compute(load, /*loopDepth=*/0);
+ generateCopyForMemRegion(region, loopNest, copyOptions, result);
+ }
}
namespace mlir {
More information about the Mlir-commits
mailing list