[Mlir-commits] [mlir] 16219f8 - [MLIR][GPU] Add canonicalizer for gpu.memcpy
Uday Bondhugula
llvmlistbot at llvm.org
Sat May 14 06:31:53 PDT 2022
Author: Arnab Dutta
Date: 2022-05-14T19:01:04+05:30
New Revision: 16219f8c94a225bf71a249cea8f0fdf3696c4b3c
URL: https://github.com/llvm/llvm-project/commit/16219f8c94a225bf71a249cea8f0fdf3696c4b3c
DIFF: https://github.com/llvm/llvm-project/commit/16219f8c94a225bf71a249cea8f0fdf3696c4b3c.diff
LOG: [MLIR][GPU] Add canonicalizer for gpu.memcpy
Erase gpu.memcpy op when only uses of dest are
the memcpy op in question, its allocation and deallocation
ops.
Reviewed By: bondhugula, csigg
Differential Revision: https://reviews.llvm.org/D124257
Added:
Modified:
mlir/include/mlir/Dialect/GPU/GPUOps.td
mlir/include/mlir/Interfaces/SideEffectInterfaces.h
mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
mlir/lib/Interfaces/SideEffectInterfaces.cpp
mlir/test/Dialect/GPU/canonicalize.mlir
Removed:
################################################################################
diff --git a/mlir/include/mlir/Dialect/GPU/GPUOps.td b/mlir/include/mlir/Dialect/GPU/GPUOps.td
index 5a66c9d0ded06..10f9dbde011db 100644
--- a/mlir/include/mlir/Dialect/GPU/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/GPUOps.td
@@ -1007,6 +1007,7 @@ def GPU_MemcpyOp : GPU_Op<"memcpy", [GPU_AsyncOpInterface]> {
}];
let hasFolder = 1;
let hasVerifier = 1;
+ let hasCanonicalizer = 1;
}
def GPU_MemsetOp : GPU_Op<"memset",
diff --git a/mlir/include/mlir/Interfaces/SideEffectInterfaces.h b/mlir/include/mlir/Interfaces/SideEffectInterfaces.h
index 596492495b778..5e88a92283f5f 100644
--- a/mlir/include/mlir/Interfaces/SideEffectInterfaces.h
+++ b/mlir/include/mlir/Interfaces/SideEffectInterfaces.h
@@ -248,6 +248,10 @@ struct Write : public Effect::Base<Write> {};
// SideEffect Utilities
//===----------------------------------------------------------------------===//
+/// Returns true if this operation only has the given effect on `value`.
+template <typename EffectTy>
+bool hasSingleEffect(Operation *op, Value value);
+
/// Return true if the given operation is unused, and has no side effects on
/// memory that prevent erasing.
bool isOpTriviallyDead(Operation *op);
diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
index 8d0e6da9a7b52..8b83820a2f36c 100644
--- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
+++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
@@ -24,6 +24,7 @@
#include "mlir/IR/OpImplementation.h"
#include "mlir/IR/PatternMatch.h"
#include "mlir/IR/TypeUtilities.h"
+#include "mlir/Interfaces/SideEffectInterfaces.h"
#include "mlir/Transforms/InliningUtils.h"
#include "llvm/ADT/TypeSwitch.h"
@@ -1105,6 +1106,48 @@ LogicalResult MemcpyOp::verify() {
return success();
}
+namespace {
+
+/// Erases a common case of copy ops where a destination value is used only by
+/// the copy op, alloc and dealloc ops.
+struct EraseTrivialCopyOp : public OpRewritePattern<MemcpyOp> {
+ using OpRewritePattern<MemcpyOp>::OpRewritePattern;
+
+ LogicalResult matchAndRewrite(MemcpyOp op,
+ PatternRewriter &rewriter) const override {
+ Value dest = op.dst();
+ Operation *destDefOp = dest.getDefiningOp();
+ // `dest` must be defined by an op having Allocate memory effect in order to
+ // perform the folding.
+ if (!destDefOp ||
+ !hasSingleEffect<MemoryEffects::Allocate>(destDefOp, dest))
+ return failure();
+ // We can erase `op` iff `dest` has no other use apart from its
+ // use by `op` and dealloc ops.
+ if (llvm::any_of(dest.getUsers(), [op, dest](Operation *user) {
+ return user != op &&
+ !hasSingleEffect<MemoryEffects::Free>(user, dest);
+ }))
+ return failure();
+ // We can perform the folding if and only if op has a single async
+ // dependency and produces an async token as result, or if it does not have
+ // any async dependency and does not produce any async token result.
+ if (op.asyncDependencies().size() > 1 ||
+ ((op.asyncDependencies().empty() && op.asyncToken()) ||
+ (!op.asyncDependencies().empty() && !op.asyncToken())))
+ return failure();
+ rewriter.replaceOp(op, op.asyncDependencies());
+ return success();
+ }
+};
+
+} // end anonymous namespace
+
+void MemcpyOp::getCanonicalizationPatterns(RewritePatternSet &results,
+ MLIRContext *context) {
+ results.add<EraseTrivialCopyOp>(context);
+}
+
//===----------------------------------------------------------------------===//
// GPU_SubgroupMmaLoadMatrixOp
//===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Interfaces/SideEffectInterfaces.cpp b/mlir/lib/Interfaces/SideEffectInterfaces.cpp
index e469dde68e7f8..82d4f9edb9830 100644
--- a/mlir/lib/Interfaces/SideEffectInterfaces.cpp
+++ b/mlir/lib/Interfaces/SideEffectInterfaces.cpp
@@ -90,6 +90,33 @@ static bool wouldOpBeTriviallyDeadImpl(Operation *rootOp) {
return true;
}
+template <typename EffectTy>
+bool mlir::hasSingleEffect(Operation *op, Value value) {
+ auto memOp = dyn_cast<MemoryEffectOpInterface>(op);
+ if (!memOp)
+ return false;
+ SmallVector<SideEffects::EffectInstance<MemoryEffects::Effect>, 4> effects;
+ memOp.getEffects(effects);
+ bool doesOpOnlyHaveSingleEffectOnVal = false;
+ // Iterate through `effects` and check if and only if effect of type
+ // `EffectTy` is present.
+ for (auto &effect : effects) {
+ if (effect.getValue() == value && isa<EffectTy>(effect.getEffect()))
+ doesOpOnlyHaveSingleEffectOnVal = true;
+ if (effect.getValue() == value && !isa<EffectTy>(effect.getEffect())) {
+ doesOpOnlyHaveSingleEffectOnVal = false;
+ break;
+ }
+ }
+ return doesOpOnlyHaveSingleEffectOnVal;
+}
+
+template bool mlir::hasSingleEffect<MemoryEffects::Allocate>(Operation *,
+ Value);
+template bool mlir::hasSingleEffect<MemoryEffects::Free>(Operation *, Value);
+template bool mlir::hasSingleEffect<MemoryEffects::Write>(Operation *, Value);
+template bool mlir::hasSingleEffect<MemoryEffects::Read>(Operation *, Value);
+
bool mlir::wouldOpBeTriviallyDead(Operation *op) {
if (op->mightHaveTrait<OpTrait::IsTerminator>())
return false;
diff --git a/mlir/test/Dialect/GPU/canonicalize.mlir b/mlir/test/Dialect/GPU/canonicalize.mlir
index 232d96e7435aa..eedc2381f7437 100644
--- a/mlir/test/Dialect/GPU/canonicalize.mlir
+++ b/mlir/test/Dialect/GPU/canonicalize.mlir
@@ -28,6 +28,70 @@ func.func @fold_wait_op_test2(%arg0: i1) -> (memref<5xf16>, memref<5xf16>) {
// CHECK-NEXT: gpu.alloc async [%[[TOKEN1]]] ()
// CHECK-NEXT: return
+// CHECK-LABEL: func @fold_memcpy_op
+func.func @fold_memcpy_op(%arg0: i1) {
+ %cst = arith.constant 0.000000e+00 : f16
+ %1 = memref.alloc() : memref<2xf16>
+ %2 = gpu.wait async
+ %memref, %asyncToken = gpu.alloc async [%2] () : memref<2xf16>
+ gpu.wait [%2]
+ affine.store %cst, %memref[0] : memref<2xf16>
+ %3 = gpu.wait async
+ %4 = gpu.memcpy async [%3] %1, %memref : memref<2xf16>, memref<2xf16>
+ gpu.wait [%3]
+ %5 = scf.if %arg0 -> (i1) {
+ memref.dealloc %1 : memref<2xf16>
+ scf.yield %arg0 : i1
+ } else {
+ memref.dealloc %1 : memref<2xf16>
+ scf.yield %arg0 : i1
+ }
+ return
+}
+// CHECK-NOT: gpu.memcpy
+
+// We cannot fold memcpy here as dest is a block argument.
+// CHECK-LABEL: func @do_not_fold_memcpy_op1
+func.func @do_not_fold_memcpy_op1(%arg0: i1, %arg1: memref<2xf16>) {
+ %cst = arith.constant 0.000000e+00 : f16
+ %2 = gpu.wait async
+ %memref, %asyncToken = gpu.alloc async [%2] () : memref<2xf16>
+ gpu.wait [%2]
+ affine.store %cst, %memref[0] : memref<2xf16>
+ %3 = gpu.wait async
+ %4 = gpu.memcpy async [%3] %arg1, %memref : memref<2xf16>, memref<2xf16>
+ gpu.wait [%3]
+ return
+}
+// CHECK: gpu.memcpy
+
+// We cannot fold gpu.memcpy as it is used by an op having read effect on dest.
+// CHECK-LABEL: func @do_not_fold_memcpy_op2
+func.func @do_not_fold_memcpy_op2(%arg0: i1, %arg1: index) -> f16 {
+ %cst = arith.constant 0.000000e+00 : f16
+ %1 = memref.alloc() : memref<2xf16>
+ %2 = gpu.wait async
+ %memref, %asyncToken = gpu.alloc async [%2] () : memref<2xf16>
+ gpu.wait [%2]
+ affine.store %cst, %memref[0] : memref<2xf16>
+ %3 = gpu.wait async
+ %4 = gpu.memcpy async [%3] %1, %memref : memref<2xf16>, memref<2xf16>
+ gpu.wait [%3]
+ %5 = memref.load %1[%arg1] : memref<2xf16>
+ return %5 : f16
+}
+// CHECK: gpu.memcpy
+
+// We cannot fold gpu.memcpy, as the defining op if dest is not a alloc like op.
+// CHECK-LABEL: func @do_not_fold_memcpy_op3
+func.func @do_not_fold_memcpy_op3(%arg0: memref<1xi8>, %arg1: memref<i1>) {
+ %0 = arith.constant 0 : index
+ %1 = memref.view %arg0[%0][] : memref<1xi8> to memref<i1>
+ gpu.memcpy %1, %arg1 : memref<i1>, memref<i1>
+ func.return
+}
+// CHECK: gpu.memcpy
+
// CHECK-LABEL: @memcpy_after_cast
func.func @memcpy_after_cast(%arg0: memref<10xf32>, %arg1: memref<10xf32>) {
// CHECK-NOT: memref.cast
More information about the Mlir-commits
mailing list