[Mlir-commits] [mlir] 7716e55 - [mlir] Fixes to hoist padding

Wed Mar 24 04:56:20 PDT 2021

Author: Nicolas Vasilache
Date: 2021-03-24T11:51:28Z
New Revision: 7716e5535c6b248b5faabd2d1af01415a78da8d7

URL: https://github.com/llvm/llvm-project/commit/7716e5535c6b248b5faabd2d1af01415a78da8d7
DIFF: https://github.com/llvm/llvm-project/commit/7716e5535c6b248b5faabd2d1af01415a78da8d7.diff

LOG: [mlir] Fixes to hoist padding

Fix the BlockAndValueMapping update that was missing entries for scf.for op's blockIterArgs.
Skip cloning subtensors of the padded tensor as the logic for these is separate.
Add a filter to drop side-effecting ops.

Tests are beefed up to verify the IR is sound in all hoisting configurations for 2-level 3-D tiled matmul.

Differential Revision: https://reviews.llvm.org/D99255

Added: 
    

Modified: 
    mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp
    mlir/test/Dialect/Linalg/hoist-padding.mlir
    mlir/test/lib/Transforms/TestLinalgTransforms.cpp

Removed: 
    


################################################################################
diff  --git a/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp b/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp
index 3baf9b41fe7d..b4a2182cf23a 100644

--- a/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp
@@ -793,7 +793,15 @@ LogicalResult mlir::linalg::hoistPaddingOnTensors(PadTensorOp &padTensorOp,
   backwardSlice.insert(padTensorOp);
   // Stack step 1. iteratively clone loops and push `packedTensor`.
   for (Operation *op : backwardSlice) {
-    if (op->getNumRegions() == 0 || isa<linalg::PadTensorOp>(op)) {
+    // Specifically sit out in the subtenso(packedTensor) case: this is the
+    // piece we seek to replace.
+    if (auto subTensor = dyn_cast<SubTensorOp>(op))
+      if (bvm.lookupOrDefault(subTensor.source()) == packedTensor)
+        continue;
+    auto effects = dyn_cast<MemoryEffectOpInterface>(op);
+    bool hasNoEffects = !effects || effects.hasNoEffect();
+    if (hasNoEffects &&
+        (op->getNumRegions() == 0 || isa<linalg::PadTensorOp>(op))) {
       b.clone(*op, bvm);
       continue;
     }
@@ -808,8 +816,10 @@ LogicalResult mlir::linalg::hoistPaddingOnTensors(PadTensorOp &padTensorOp,
         b.create<scf::ForOp>(loc, bvm.lookupOrDefault(forOp.lowerBound()),
                              bvm.lookupOrDefault(forOp.upperBound()),
                              bvm.lookupOrDefault(forOp.step()), packedTensor);
-
+    // Map the induction var, region args and results to the `clonedForOp`.
     bvm.map(forOp.getInductionVar(), clonedForOp.getInductionVar());
+    bvm.map(forOp.getRegionIterArgs(), clonedForOp.getRegionIterArgs());
+    bvm.map(forOp.getResults(), clonedForOp.getResults());
     assert(clonedForOp->getNumRegions() == 1);
     clonedLoopIvs.push_back(clonedForOp.getInductionVar());
 

diff  --git a/mlir/test/Dialect/Linalg/hoist-padding.mlir b/mlir/test/Dialect/Linalg/hoist-padding.mlir
index 2459d2af4546..248aa6414ad8 100644
--- a/mlir/test/Dialect/Linalg/hoist-padding.mlir
+++ b/mlir/test/Dialect/Linalg/hoist-padding.mlir
@@ -1,4 +1,13 @@
-// RUN: mlir-opt %s -split-input-file -test-linalg-transform-patterns=test-hoist-padding-2-level -canonicalize | FileCheck %s
+// Specific structural checks are performed on 2-level hoisting
+// RUN: mlir-opt %s -split-input-file -test-linalg-transform-patterns=test-hoist-padding=2 -canonicalize | FileCheck %s
+
+// IR verification is performed on [0-6]-level hoisting
+// RUN: mlir-opt %s -split-input-file -test-linalg-transform-patterns=test-hoist-padding=0 | FileCheck %s --check-prefix=VERIFIER-ONLY
+// RUN: mlir-opt %s -split-input-file -test-linalg-transform-patterns=test-hoist-padding=1 | FileCheck %s --check-prefix=VERIFIER-ONLY
+// RUN: mlir-opt %s -split-input-file -test-linalg-transform-patterns=test-hoist-padding=3 | FileCheck %s --check-prefix=VERIFIER-ONLY
+// RUN: mlir-opt %s -split-input-file -test-linalg-transform-patterns=test-hoist-padding=4 | FileCheck %s --check-prefix=VERIFIER-ONLY
+// RUN: mlir-opt %s -split-input-file -test-linalg-transform-patterns=test-hoist-padding=5 | FileCheck %s --check-prefix=VERIFIER-ONLY
+// RUN: mlir-opt %s -split-input-file -test-linalg-transform-patterns=test-hoist-padding=6 | FileCheck %s --check-prefix=VERIFIER-ONLY
 
 // CHECK-DAG: #[[$DIV3:[0-9a-z]+]] = affine_map<(d0) -> (d0 ceildiv 3)>
 // CHECK-DAG: #[[$DIV4:[0-9a-z]+]] = affine_map<(d0) -> (d0 ceildiv 4)>
@@ -14,6 +23,7 @@
 //  CHECK-SAME:   %[[TA:[0-9a-z]+]]: tensor
 //  CHECK-SAME:   %[[TB:[0-9a-z]+]]: tensor
 //  CHECK-SAME:   %[[TC:[0-9a-z]+]]: tensor
+// VERIFIER-ONLY-LABEL: func @matmul_tensors
 func @matmul_tensors(
   %arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>, %arg2: tensor<?x?xf32>)
   -> tensor<?x?xf32>
@@ -140,6 +150,7 @@ func @matmul_tensors(
 #map2 = affine_map<(d0, d1) -> (2, d0 - d1)>
 
 // CHECK-LABEL: func @dot
+// VERIFIER-ONLY-LABEL: func @dot
 func @dot(%arg0: tensor<?xf32>, %arg1: tensor<?xf32>, %arg2: tensor<f32>)
     -> tensor<f32>
 {
@@ -217,3 +228,63 @@ func @dot(%arg0: tensor<?xf32>, %arg1: tensor<?xf32>, %arg2: tensor<f32>)
   }
   return %4 : tensor<f32>
 }
+
+// -----
+
+// CHECK-LABEL: func @matmul_2d_tiling
+// VERIFIER-ONLY-LABEL: func @matmul_2d_tiling
+func @matmul_2d_tiling(%arg0: tensor<32x128xf32>, %arg1: tensor<128x64xf32>, %arg2: tensor<32x64xf32>) -> tensor<32x64xf32> {
+  %c128 = constant 128 : index
+  %c64 = constant 64 : index
+  %c32 = constant 32 : index
+  %c16 = constant 16 : index
+  %cst = constant 0.000000e+00 : f32
+  %c2 = constant 2 : index
+  %c4 = constant 4 : index
+  %c0 = constant 0 : index
+  %1 = scf.for %arg3 = %c0 to %c32 step %c16 iter_args(%arg4 = %arg2) -> (tensor<32x64xf32>) {
+    %2 = scf.for %arg5 = %c0 to %c64 step %c32 iter_args(%arg6 = %arg4) -> (tensor<32x64xf32>) {
+      %3 = scf.for %arg7 = %c0 to %c128 step %c32 iter_args(%arg8 = %arg6) -> (tensor<32x64xf32>) {
+        %4 = subtensor %arg0[%arg3, %arg7] [16, 32] [1, 1] : tensor<32x128xf32> to tensor<16x32xf32>
+        %5 = subtensor %arg1[%arg7, %arg5] [32, 32] [1, 1] : tensor<128x64xf32> to tensor<32x32xf32>
+        %6 = subtensor %arg8[%arg3, %arg5] [16, 32] [1, 1] : tensor<32x64xf32> to tensor<16x32xf32>
+        %7 = scf.for %arg9 = %c0 to %c16 step %c2 iter_args(%arg10 = %6) -> (tensor<16x32xf32>) {
+          %10 = scf.for %arg11 = %c0 to %c32 step %c4 iter_args(%arg12 = %arg10) -> (tensor<16x32xf32>) {
+            %11 = scf.for %arg13 = %c0 to %c32 step %c16 iter_args(%arg14 = %arg12) -> (tensor<16x32xf32>) {
+              %12 = subtensor %4[%arg9, %arg13] [2, 16] [1, 1] : tensor<16x32xf32> to tensor<2x16xf32>
+              %13 = tensor.cast %12 : tensor<2x16xf32> to tensor<?x?xf32>
+              %14 = subtensor %5[%arg13, %arg11] [16, 4] [1, 1] : tensor<32x32xf32> to tensor<16x4xf32>
+              %15 = tensor.cast %14 : tensor<16x4xf32> to tensor<?x?xf32>
+              %16 = subtensor %arg14[%arg9, %arg11] [2, 4] [1, 1] : tensor<16x32xf32> to tensor<2x4xf32>
+              %17 = tensor.cast %16 : tensor<2x4xf32> to tensor<?x?xf32>
+              %18 = linalg.pad_tensor %13 low[%c0, %c0] high[%c0, %c0]  {
+              ^bb0(%arg15: index, %arg16: index):  // no predecessors
+                linalg.yield %cst : f32
+              } : tensor<?x?xf32> to tensor<2x16xf32>
+              %19 = linalg.pad_tensor %15 low[%c0, %c0] high[%c0, %c0]  {
+              ^bb0(%arg15: index, %arg16: index):  // no predecessors
+                linalg.yield %cst : f32
+              } : tensor<?x?xf32> to tensor<16x4xf32>
+              %20 = linalg.pad_tensor %17 low[%c0, %c0] high[%c0, %c0]  {
+              ^bb0(%arg15: index, %arg16: index):  // no predecessors
+                linalg.yield %cst : f32
+              } : tensor<?x?xf32> to tensor<2x4xf32>
+              %21 = linalg.matmul ins(%18, %19 : tensor<2x16xf32>, tensor<16x4xf32>) outs(%20 : tensor<2x4xf32>) -> tensor<2x4xf32>
+              %22 = tensor.cast %21 : tensor<2x4xf32> to tensor<?x?xf32>
+              %23 = subtensor_insert %22 into %arg14[%arg9, %arg11] [%c2, %c4] [1, 1] : tensor<?x?xf32> into tensor<16x32xf32>
+              scf.yield %23 : tensor<16x32xf32>
+            }
+            scf.yield %11 : tensor<16x32xf32>
+          }
+          scf.yield %10 : tensor<16x32xf32>
+        }
+        %8 = tensor.cast %7 : tensor<16x32xf32> to tensor<?x?xf32>
+        %9 = subtensor_insert %8 into %arg8[%arg3, %arg5] [%c16, %c32] [1, 1] : tensor<?x?xf32> into tensor<32x64xf32>
+        scf.yield %9 : tensor<32x64xf32>
+      }
+      scf.yield %3 : tensor<32x64xf32>
+    }
+    scf.yield %2 : tensor<32x64xf32>
+  }
+  return %1 : tensor<32x64xf32>
+}

diff  --git a/mlir/test/lib/Transforms/TestLinalgTransforms.cpp b/mlir/test/lib/Transforms/TestLinalgTransforms.cpp
index 276a9f7c7fc3..fd8fb3bc6eff 100644
--- a/mlir/test/lib/Transforms/TestLinalgTransforms.cpp
+++ b/mlir/test/lib/Transforms/TestLinalgTransforms.cpp
@@ -84,9 +84,9 @@ struct TestLinalgTransforms
   Option<bool> testTileAndPadPattern{
       *this, "test-tile-and-pad-pattern",
       llvm::cl::desc("Test tile and pad pattern"), llvm::cl::init(false)};
-  Option<bool> testHoistPadding2Levels{*this, "test-hoist-padding-2-level",
-                                       llvm::cl::desc("Test hoist padding"),
-                                       llvm::cl::init(false)};
+  Option<int> testHoistPadding{*this, "test-hoist-padding",
+                               llvm::cl::desc("Test hoist padding"),
+                               llvm::cl::init(0)};
 };
 } // end anonymous namespace
 
@@ -571,9 +571,9 @@ void TestLinalgTransforms::runOnFunction() {
     return applyAffineMinSCFCanonicalizationPatterns(getFunction());
   if (testTileAndPadPattern)
     return applyTileAndPadPattern(getFunction());
-  if (testHoistPadding2Levels) {
-    getFunction().walk([](linalg::PadTensorOp padTensorOp) {
-      (void)linalg::hoistPaddingOnTensors(padTensorOp, 2);
+  if (testHoistPadding) {
+    getFunction().walk([&](linalg::PadTensorOp padTensorOp) {
+      (void)linalg::hoistPaddingOnTensors(padTensorOp, testHoistPadding);
     });
   }
 }