[Mlir-commits] [mlir] 00b6463 - [MLIR][GPU] Simplify memcpy of cast

Mon Jun 7 11:01:00 PDT 2021

Author: William S. Moses
Date: 2021-06-07T14:00:13-04:00
New Revision: 00b6463b269f6815a8a110b9208f483e239ce1e2

URL: https://github.com/llvm/llvm-project/commit/00b6463b269f6815a8a110b9208f483e239ce1e2
DIFF: https://github.com/llvm/llvm-project/commit/00b6463b269f6815a8a110b9208f483e239ce1e2.diff

LOG: [MLIR][GPU] Simplify memcpy of cast

Introduce a simplification that allows memcpy of a cast to simply use the underlying op

Differential Revision: https://reviews.llvm.org/D103830

Added: 
    mlir/test/Dialect/GPU/canonicalize.mlir

Modified: 
    mlir/include/mlir/Dialect/GPU/GPUOps.td
    mlir/lib/Dialect/GPU/IR/GPUDialect.cpp

Removed: 
    


################################################################################
diff  --git a/mlir/include/mlir/Dialect/GPU/GPUOps.td b/mlir/include/mlir/Dialect/GPU/GPUOps.td
index bc6f3f3169e1c..8e2520b675ae6 100644

--- a/mlir/include/mlir/Dialect/GPU/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/GPUOps.td
@@ -900,6 +900,7 @@ def GPU_MemcpyOp : GPU_Op<"memcpy", [GPU_AsyncOpInterface]> {
     $dst`,` $src `:` type($dst)`,` type($src) attr-dict
   }];
   let verifier = [{ return ::verify(*this); }];
+  let hasFolder = 1;
 }
 
 def GPU_SubgroupMmaLoadMatrixOp : GPU_Op<"subgroup_mma_load_matrix",

diff  --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
index 39acf4182863d..f0845c45cd63e 100644
--- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
+++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
@@ -13,6 +13,7 @@
 #include "mlir/Dialect/GPU/GPUDialect.h"
 
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/IR/Attributes.h"
 #include "mlir/IR/Builders.h"
@@ -1066,6 +1067,26 @@ static LogicalResult verify(SubgroupMmaComputeOp op) {
   return success();
 }
 
+/// This is a common class used for patterns of the form
+/// "someop(memrefcast) -> someop".  It folds the source of any memref.cast
+/// into the root operation directly.
+static LogicalResult foldMemRefCast(Operation *op) {
+  bool folded = false;
+  for (OpOperand &operand : op->getOpOperands()) {
+    auto cast = operand.get().getDefiningOp<mlir::memref::CastOp>();
+    if (cast) {
+      operand.set(cast.getOperand());
+      folded = true;
+    }
+  }
+  return success(folded);
+}
+
+LogicalResult MemcpyOp::fold(ArrayRef<Attribute> operands,
+                             SmallVectorImpl<::mlir::OpFoldResult> &results) {
+  return foldMemRefCast(*this);
+}
+
 #include "mlir/Dialect/GPU/GPUOpInterfaces.cpp.inc"
 
 #define GET_OP_CLASSES

diff  --git a/mlir/test/Dialect/GPU/canonicalize.mlir b/mlir/test/Dialect/GPU/canonicalize.mlir
new file mode 100644
index 0000000000000..c3cba47192427
--- /dev/null
+++ b/mlir/test/Dialect/GPU/canonicalize.mlir
@@ -0,0 +1,11 @@
+// RUN: mlir-opt %s -canonicalize --split-input-file -allow-unregistered-dialect | FileCheck %s
+
+// CHECK-LABEL: @memcpy_after_cast
+func @memcpy_after_cast(%arg0: memref<10xf32>, %arg1: memref<10xf32>) {
+  // CHECK-NOT: memref.cast
+  // CHECK: gpu.memcpy
+  %0 = memref.cast %arg0 : memref<10xf32> to memref<?xf32>
+  %1 = memref.cast %arg1 : memref<10xf32> to memref<?xf32>
+  gpu.memcpy %0,%1 : memref<?xf32>, memref<?xf32>
+  return
+}