[Mlir-commits] [mlir] ae46b3e - Revert D121279 "[MLIR][GPU] Add canonicalizer for gpu.memcpy"
Fangrui Song
llvmlistbot at llvm.org
Thu Apr 21 08:55:17 PDT 2022
Author: Fangrui Song
Date: 2022-04-21T08:55:13-07:00
New Revision: ae46b3e01faa0515fd85b8e107389348bd0e341a
URL: https://github.com/llvm/llvm-project/commit/ae46b3e01faa0515fd85b8e107389348bd0e341a
DIFF: https://github.com/llvm/llvm-project/commit/ae46b3e01faa0515fd85b8e107389348bd0e341a.diff
LOG: Revert D121279 "[MLIR][GPU] Add canonicalizer for gpu.memcpy"
This reverts commit 12f55cac69d8978d1c433756a8b2114bf9ed1e1b.
Causes miscompile. Will follow up with a reproduce.
Added:
Modified:
mlir/include/mlir/Dialect/GPU/GPUOps.td
mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
mlir/test/Dialect/GPU/canonicalize.mlir
Removed:
################################################################################
diff --git a/mlir/include/mlir/Dialect/GPU/GPUOps.td b/mlir/include/mlir/Dialect/GPU/GPUOps.td
index 407ab3765770f..dc87edb679d84 100644
--- a/mlir/include/mlir/Dialect/GPU/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/GPUOps.td
@@ -1011,7 +1011,6 @@ def GPU_MemcpyOp : GPU_Op<"memcpy", [GPU_AsyncOpInterface]> {
}];
let hasFolder = 1;
let hasVerifier = 1;
- let hasCanonicalizer = 1;
}
def GPU_MemsetOp : GPU_Op<"memset",
diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
index b87256dd994ea..9de7c7e1145a9 100644
--- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
+++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
@@ -1124,55 +1124,6 @@ LogicalResult MemcpyOp::verify() {
return success();
}
-namespace {
-
-/// Erases a common case of copy ops where a destination value is used only by
-/// the copy op, alloc and dealloc ops.
-struct EraseTrivialCopyOp : public OpRewritePattern<MemcpyOp> {
- using OpRewritePattern<MemcpyOp>::OpRewritePattern;
-
- LogicalResult matchAndRewrite(MemcpyOp op,
- PatternRewriter &rewriter) const override {
- Value dest = op.dst();
- // If `dest` is a block argument, we cannot remove `op`.
- if (dest.isa<BlockArgument>())
- return failure();
- auto isDeallocLikeOpActingOnVal = [](Operation *op, Value val) {
- auto memOp = dyn_cast<MemoryEffectOpInterface>(op);
- if (!memOp)
- return false;
- llvm::SmallVector<SideEffects::EffectInstance<MemoryEffects::Effect>, 4>
- memOpEffects;
- memOp.getEffects(memOpEffects);
- return llvm::none_of(memOpEffects, [val](auto &effect) {
- return effect.getValue() == val &&
- !isa<MemoryEffects::Free>(effect.getEffect());
- });
- };
- // We can erase `op` iff `dest` has no other use apart from its
- // use by `op` and dealloc ops.
- if (llvm::any_of(dest.getUsers(), [isDeallocLikeOpActingOnVal, op,
- dest](Operation *user) {
- return user != op && !isDeallocLikeOpActingOnVal(user, dest);
- }))
- return failure();
-
- if (op.asyncDependencies().size() > 1 ||
- ((op.asyncDependencies().empty() && op.asyncToken()) ||
- (!op.asyncDependencies().empty() && !op.asyncToken())))
- return failure();
- rewriter.replaceOp(op, op.asyncDependencies());
- return success();
- }
-};
-
-} // end anonymous namespace
-
-void MemcpyOp::getCanonicalizationPatterns(RewritePatternSet &results,
- MLIRContext *context) {
- results.add<EraseTrivialCopyOp>(context);
-}
-
//===----------------------------------------------------------------------===//
// GPU_SubgroupMmaLoadMatrixOp
//===----------------------------------------------------------------------===//
diff --git a/mlir/test/Dialect/GPU/canonicalize.mlir b/mlir/test/Dialect/GPU/canonicalize.mlir
index db577f0558aa7..232d96e7435aa 100644
--- a/mlir/test/Dialect/GPU/canonicalize.mlir
+++ b/mlir/test/Dialect/GPU/canonicalize.mlir
@@ -28,60 +28,6 @@ func.func @fold_wait_op_test2(%arg0: i1) -> (memref<5xf16>, memref<5xf16>) {
// CHECK-NEXT: gpu.alloc async [%[[TOKEN1]]] ()
// CHECK-NEXT: return
-// CHECK-LABEL: func @fold_memcpy_op
-func.func @fold_memcpy_op(%arg0: i1) {
- %cst = arith.constant 0.000000e+00 : f16
- %1 = memref.alloc() : memref<2xf16>
- %2 = gpu.wait async
- %memref, %asyncToken = gpu.alloc async [%2] () : memref<2xf16>
- gpu.wait [%2]
- affine.store %cst, %memref[0] : memref<2xf16>
- %3 = gpu.wait async
- %4 = gpu.memcpy async [%3] %1, %memref : memref<2xf16>, memref<2xf16>
- gpu.wait [%3]
- %5 = scf.if %arg0 -> (i1) {
- memref.dealloc %1 : memref<2xf16>
- scf.yield %arg0 : i1
- } else {
- memref.dealloc %1 : memref<2xf16>
- scf.yield %arg0 : i1
- }
- return
-}
-// CHECK-NOT: gpu.memcpy
-
-// We cannot fold memcpy here as dest is a block argument.
-// CHECK-LABEL: func @do_not_fold_memcpy_op1
-func.func @do_not_fold_memcpy_op1(%arg0: i1, %arg1: memref<2xf16>) {
- %cst = arith.constant 0.000000e+00 : f16
- %2 = gpu.wait async
- %memref, %asyncToken = gpu.alloc async [%2] () : memref<2xf16>
- gpu.wait [%2]
- affine.store %cst, %memref[0] : memref<2xf16>
- %3 = gpu.wait async
- %4 = gpu.memcpy async [%3] %arg1, %memref : memref<2xf16>, memref<2xf16>
- gpu.wait [%3]
- return
-}
-// CHECK: gpu.memcpy
-
-// We cannot fold gpu.memcpy as it is used by an op having read effect on dest.
-// CHECK-LABEL: func @do_not_fold_memcpy_op2
-func.func @do_not_fold_memcpy_op2(%arg0: i1, %arg1: index) -> f16 {
- %cst = arith.constant 0.000000e+00 : f16
- %1 = memref.alloc() : memref<2xf16>
- %2 = gpu.wait async
- %memref, %asyncToken = gpu.alloc async [%2] () : memref<2xf16>
- gpu.wait [%2]
- affine.store %cst, %memref[0] : memref<2xf16>
- %3 = gpu.wait async
- %4 = gpu.memcpy async [%3] %1, %memref : memref<2xf16>, memref<2xf16>
- gpu.wait [%3]
- %5 = memref.load %1[%arg1] : memref<2xf16>
- return %5 : f16
-}
-// CHECK: gpu.memcpy
-
// CHECK-LABEL: @memcpy_after_cast
func.func @memcpy_after_cast(%arg0: memref<10xf32>, %arg1: memref<10xf32>) {
// CHECK-NOT: memref.cast
More information about the Mlir-commits
mailing list