[Mlir-commits] [mlir] d492a7b - [mlir][Linalg] Add a useLinalgCopy option to Linalg bufferization.
Nicolas Vasilache
llvmlistbot at llvm.org
Wed Jan 19 10:00:16 PST 2022
Author: Nicolas Vasilache
Date: 2022-01-19T13:00:11-05:00
New Revision: d492a7b2cac3e89597e87091f7e1101b70829dee
URL: https://github.com/llvm/llvm-project/commit/d492a7b2cac3e89597e87091f7e1101b70829dee
DIFF: https://github.com/llvm/llvm-project/commit/d492a7b2cac3e89597e87091f7e1101b70829dee.diff
LOG: [mlir][Linalg] Add a useLinalgCopy option to Linalg bufferization.
Benchmarks show that memref::CopyOp is curently up to 200x slower than
tiled and vectorized versions of linalg::Copy.
Add a temporary flag to allow comprehensive bufferize to generate a
linalg::GenericOp that implements a copy until this performance bug is
resolved.
Differential Revision: https://reviews.llvm.org/D117696
Added:
Modified:
mlir/include/mlir/Dialect/Linalg/Passes.h
mlir/include/mlir/Dialect/Linalg/Passes.td
mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp
Removed:
################################################################################
diff --git a/mlir/include/mlir/Dialect/Linalg/Passes.h b/mlir/include/mlir/Dialect/Linalg/Passes.h
index 5b0abdec15c6..4b145a944e7f 100644
--- a/mlir/include/mlir/Dialect/Linalg/Passes.h
+++ b/mlir/include/mlir/Dialect/Linalg/Passes.h
@@ -64,6 +64,8 @@ std::unique_ptr<OperationPass<FuncOp>> createConvertLinalgToAffineLoopsPass();
/// on SSA use-def chains starting from function operands that are annotated
/// with the 'inplaceable' attribute.
std::unique_ptr<Pass> createLinalgComprehensiveModuleBufferizePass();
+std::unique_ptr<Pass>
+createLinalgComprehensiveModuleBufferizePass(bool useLinalgCopy);
/// Create a pass to convert Linalg operations which work on tensors to use
/// buffers instead.
diff --git a/mlir/include/mlir/Dialect/Linalg/Passes.td b/mlir/include/mlir/Dialect/Linalg/Passes.td
index 9d22e5c84ddb..c67ebc84a5cf 100644
--- a/mlir/include/mlir/Dialect/Linalg/Passes.td
+++ b/mlir/include/mlir/Dialect/Linalg/Passes.td
@@ -52,6 +52,9 @@ def LinalgComprehensiveModuleBufferize :
Option<"useAlloca", "use-alloca", "bool",
/*default=*/"false",
"Use stack allocations for memrefs (for testing purposes only)">,
+ Option<"useLinalgCopy", "use-linalg-copy", "bool",
+ /*default=*/"false",
+ "Use a copy operation implemented as a Linalg op.">,
Option<"analysisFuzzerSeed", "analysis-fuzzer-seed", "unsigned",
/*default=*/"0",
"Analyze ops in random order with a given seed (fuzzer)">,
diff --git a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp
index dfc25bae2a47..f8233c5bfe11 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp
@@ -39,6 +39,10 @@ struct LinalgComprehensiveModuleBufferize
LinalgComprehensiveModuleBufferize(
const LinalgComprehensiveModuleBufferize &p) = default;
+ LinalgComprehensiveModuleBufferize(bool linalgCopy) {
+ this->useLinalgCopy = linalgCopy;
+ }
+
void runOnOperation() override;
void getDependentDialects(DialectRegistry ®istry) const override {
@@ -74,6 +78,32 @@ static FailureOr<Value> allocationFnUsingAlloca(OpBuilder &b, Location loc,
return allocated;
}
+/// Create a linalg::GenericOp version of an n-D copy that can further tile,
+/// lower to loops or vectorize, unlike the current implementation of
+/// memref::CopyOp.
+/// Do not depend on linalg::CopyOp that is getting deprecated.
+static LogicalResult createLinalgCopyOp(OpBuilder &b, Location loc, Value from,
+ Value to) {
+ auto memrefTypeFrom = from.getType().cast<MemRefType>();
+ auto memrefTypeTo = to.getType().cast<MemRefType>();
+ if (!memrefTypeFrom || !memrefTypeTo ||
+ memrefTypeFrom.getRank() != memrefTypeTo.getRank())
+ return failure();
+ AffineMap id =
+ AffineMap::getMultiDimIdentityMap(memrefTypeTo.getRank(), b.getContext());
+ SmallVector<StringRef> iteratorTypes(memrefTypeTo.getRank(),
+ getParallelIteratorTypeName());
+ b.create<linalg::GenericOp>(loc,
+ /*inputs=*/from,
+ /*outputs=*/to,
+ /*indexingMaps=*/llvm::makeArrayRef({id, id}),
+ /*iteratorTypes=*/iteratorTypes,
+ [](OpBuilder &b, Location loc, ValueRange args) {
+ b.create<linalg::YieldOp>(loc, args.front());
+ });
+ return success();
+}
+
void LinalgComprehensiveModuleBufferize::runOnOperation() {
auto options = std::make_unique<AnalysisBufferizationOptions>();
if (useAlloca) {
@@ -82,13 +112,17 @@ void LinalgComprehensiveModuleBufferize::runOnOperation() {
return success();
};
}
+ // TODO: atm memref::CopyOp can be 200x slower than linalg::GenericOp.
+ // Once this perf bug is fixed more systematically, we can revisit.
+ if (useLinalgCopy)
+ options->memCpyFn = createLinalgCopyOp;
options->allowReturnMemref = allowReturnMemref;
options->allowUnknownOps = allowUnknownOps;
options->analysisFuzzerSeed = analysisFuzzerSeed;
- options->testAnalysisOnly = testAnalysisOnly;
- options->printConflicts = printConflicts;
options->createDeallocs = createDeallocs;
+ options->printConflicts = printConflicts;
+ options->testAnalysisOnly = testAnalysisOnly;
// Enable InitTensorOp elimination.
if (initTensorElimination) {
@@ -120,3 +154,8 @@ void LinalgComprehensiveModuleBufferize::runOnOperation() {
std::unique_ptr<Pass> mlir::createLinalgComprehensiveModuleBufferizePass() {
return std::make_unique<LinalgComprehensiveModuleBufferize>();
}
+
+std::unique_ptr<Pass>
+mlir::createLinalgComprehensiveModuleBufferizePass(bool useLinalgCopy) {
+ return std::make_unique<LinalgComprehensiveModuleBufferize>(useLinalgCopy);
+}
More information about the Mlir-commits
mailing list