[Mlir-commits] [mlir] 7716e55 - [mlir] Fixes to hoist padding
Nicolas Vasilache
llvmlistbot at llvm.org
Wed Mar 24 04:56:20 PDT 2021
Author: Nicolas Vasilache
Date: 2021-03-24T11:51:28Z
New Revision: 7716e5535c6b248b5faabd2d1af01415a78da8d7
URL: https://github.com/llvm/llvm-project/commit/7716e5535c6b248b5faabd2d1af01415a78da8d7
DIFF: https://github.com/llvm/llvm-project/commit/7716e5535c6b248b5faabd2d1af01415a78da8d7.diff
LOG: [mlir] Fixes to hoist padding
Fix the BlockAndValueMapping update that was missing entries for scf.for op's blockIterArgs.
Skip cloning subtensors of the padded tensor as the logic for these is separate.
Add a filter to drop side-effecting ops.
Tests are beefed up to verify the IR is sound in all hoisting configurations for 2-level 3-D tiled matmul.
Differential Revision: https://reviews.llvm.org/D99255
Added:
Modified:
mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp
mlir/test/Dialect/Linalg/hoist-padding.mlir
mlir/test/lib/Transforms/TestLinalgTransforms.cpp
Removed:
################################################################################
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp b/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp
index 3baf9b41fe7d..b4a2182cf23a 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp
@@ -793,7 +793,15 @@ LogicalResult mlir::linalg::hoistPaddingOnTensors(PadTensorOp &padTensorOp,
backwardSlice.insert(padTensorOp);
// Stack step 1. iteratively clone loops and push `packedTensor`.
for (Operation *op : backwardSlice) {
- if (op->getNumRegions() == 0 || isa<linalg::PadTensorOp>(op)) {
+ // Specifically sit out in the subtenso(packedTensor) case: this is the
+ // piece we seek to replace.
+ if (auto subTensor = dyn_cast<SubTensorOp>(op))
+ if (bvm.lookupOrDefault(subTensor.source()) == packedTensor)
+ continue;
+ auto effects = dyn_cast<MemoryEffectOpInterface>(op);
+ bool hasNoEffects = !effects || effects.hasNoEffect();
+ if (hasNoEffects &&
+ (op->getNumRegions() == 0 || isa<linalg::PadTensorOp>(op))) {
b.clone(*op, bvm);
continue;
}
@@ -808,8 +816,10 @@ LogicalResult mlir::linalg::hoistPaddingOnTensors(PadTensorOp &padTensorOp,
b.create<scf::ForOp>(loc, bvm.lookupOrDefault(forOp.lowerBound()),
bvm.lookupOrDefault(forOp.upperBound()),
bvm.lookupOrDefault(forOp.step()), packedTensor);
-
+ // Map the induction var, region args and results to the `clonedForOp`.
bvm.map(forOp.getInductionVar(), clonedForOp.getInductionVar());
+ bvm.map(forOp.getRegionIterArgs(), clonedForOp.getRegionIterArgs());
+ bvm.map(forOp.getResults(), clonedForOp.getResults());
assert(clonedForOp->getNumRegions() == 1);
clonedLoopIvs.push_back(clonedForOp.getInductionVar());
diff --git a/mlir/test/Dialect/Linalg/hoist-padding.mlir b/mlir/test/Dialect/Linalg/hoist-padding.mlir
index 2459d2af4546..248aa6414ad8 100644
--- a/mlir/test/Dialect/Linalg/hoist-padding.mlir
+++ b/mlir/test/Dialect/Linalg/hoist-padding.mlir
@@ -1,4 +1,13 @@
-// RUN: mlir-opt %s -split-input-file -test-linalg-transform-patterns=test-hoist-padding-2-level -canonicalize | FileCheck %s
+// Specific structural checks are performed on 2-level hoisting
+// RUN: mlir-opt %s -split-input-file -test-linalg-transform-patterns=test-hoist-padding=2 -canonicalize | FileCheck %s
+
+// IR verification is performed on [0-6]-level hoisting
+// RUN: mlir-opt %s -split-input-file -test-linalg-transform-patterns=test-hoist-padding=0 | FileCheck %s --check-prefix=VERIFIER-ONLY
+// RUN: mlir-opt %s -split-input-file -test-linalg-transform-patterns=test-hoist-padding=1 | FileCheck %s --check-prefix=VERIFIER-ONLY
+// RUN: mlir-opt %s -split-input-file -test-linalg-transform-patterns=test-hoist-padding=3 | FileCheck %s --check-prefix=VERIFIER-ONLY
+// RUN: mlir-opt %s -split-input-file -test-linalg-transform-patterns=test-hoist-padding=4 | FileCheck %s --check-prefix=VERIFIER-ONLY
+// RUN: mlir-opt %s -split-input-file -test-linalg-transform-patterns=test-hoist-padding=5 | FileCheck %s --check-prefix=VERIFIER-ONLY
+// RUN: mlir-opt %s -split-input-file -test-linalg-transform-patterns=test-hoist-padding=6 | FileCheck %s --check-prefix=VERIFIER-ONLY
// CHECK-DAG: #[[$DIV3:[0-9a-z]+]] = affine_map<(d0) -> (d0 ceildiv 3)>
// CHECK-DAG: #[[$DIV4:[0-9a-z]+]] = affine_map<(d0) -> (d0 ceildiv 4)>
@@ -14,6 +23,7 @@
// CHECK-SAME: %[[TA:[0-9a-z]+]]: tensor
// CHECK-SAME: %[[TB:[0-9a-z]+]]: tensor
// CHECK-SAME: %[[TC:[0-9a-z]+]]: tensor
+// VERIFIER-ONLY-LABEL: func @matmul_tensors
func @matmul_tensors(
%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>, %arg2: tensor<?x?xf32>)
-> tensor<?x?xf32>
@@ -140,6 +150,7 @@ func @matmul_tensors(
#map2 = affine_map<(d0, d1) -> (2, d0 - d1)>
// CHECK-LABEL: func @dot
+// VERIFIER-ONLY-LABEL: func @dot
func @dot(%arg0: tensor<?xf32>, %arg1: tensor<?xf32>, %arg2: tensor<f32>)
-> tensor<f32>
{
@@ -217,3 +228,63 @@ func @dot(%arg0: tensor<?xf32>, %arg1: tensor<?xf32>, %arg2: tensor<f32>)
}
return %4 : tensor<f32>
}
+
+// -----
+
+// CHECK-LABEL: func @matmul_2d_tiling
+// VERIFIER-ONLY-LABEL: func @matmul_2d_tiling
+func @matmul_2d_tiling(%arg0: tensor<32x128xf32>, %arg1: tensor<128x64xf32>, %arg2: tensor<32x64xf32>) -> tensor<32x64xf32> {
+ %c128 = constant 128 : index
+ %c64 = constant 64 : index
+ %c32 = constant 32 : index
+ %c16 = constant 16 : index
+ %cst = constant 0.000000e+00 : f32
+ %c2 = constant 2 : index
+ %c4 = constant 4 : index
+ %c0 = constant 0 : index
+ %1 = scf.for %arg3 = %c0 to %c32 step %c16 iter_args(%arg4 = %arg2) -> (tensor<32x64xf32>) {
+ %2 = scf.for %arg5 = %c0 to %c64 step %c32 iter_args(%arg6 = %arg4) -> (tensor<32x64xf32>) {
+ %3 = scf.for %arg7 = %c0 to %c128 step %c32 iter_args(%arg8 = %arg6) -> (tensor<32x64xf32>) {
+ %4 = subtensor %arg0[%arg3, %arg7] [16, 32] [1, 1] : tensor<32x128xf32> to tensor<16x32xf32>
+ %5 = subtensor %arg1[%arg7, %arg5] [32, 32] [1, 1] : tensor<128x64xf32> to tensor<32x32xf32>
+ %6 = subtensor %arg8[%arg3, %arg5] [16, 32] [1, 1] : tensor<32x64xf32> to tensor<16x32xf32>
+ %7 = scf.for %arg9 = %c0 to %c16 step %c2 iter_args(%arg10 = %6) -> (tensor<16x32xf32>) {
+ %10 = scf.for %arg11 = %c0 to %c32 step %c4 iter_args(%arg12 = %arg10) -> (tensor<16x32xf32>) {
+ %11 = scf.for %arg13 = %c0 to %c32 step %c16 iter_args(%arg14 = %arg12) -> (tensor<16x32xf32>) {
+ %12 = subtensor %4[%arg9, %arg13] [2, 16] [1, 1] : tensor<16x32xf32> to tensor<2x16xf32>
+ %13 = tensor.cast %12 : tensor<2x16xf32> to tensor<?x?xf32>
+ %14 = subtensor %5[%arg13, %arg11] [16, 4] [1, 1] : tensor<32x32xf32> to tensor<16x4xf32>
+ %15 = tensor.cast %14 : tensor<16x4xf32> to tensor<?x?xf32>
+ %16 = subtensor %arg14[%arg9, %arg11] [2, 4] [1, 1] : tensor<16x32xf32> to tensor<2x4xf32>
+ %17 = tensor.cast %16 : tensor<2x4xf32> to tensor<?x?xf32>
+ %18 = linalg.pad_tensor %13 low[%c0, %c0] high[%c0, %c0] {
+ ^bb0(%arg15: index, %arg16: index): // no predecessors
+ linalg.yield %cst : f32
+ } : tensor<?x?xf32> to tensor<2x16xf32>
+ %19 = linalg.pad_tensor %15 low[%c0, %c0] high[%c0, %c0] {
+ ^bb0(%arg15: index, %arg16: index): // no predecessors
+ linalg.yield %cst : f32
+ } : tensor<?x?xf32> to tensor<16x4xf32>
+ %20 = linalg.pad_tensor %17 low[%c0, %c0] high[%c0, %c0] {
+ ^bb0(%arg15: index, %arg16: index): // no predecessors
+ linalg.yield %cst : f32
+ } : tensor<?x?xf32> to tensor<2x4xf32>
+ %21 = linalg.matmul ins(%18, %19 : tensor<2x16xf32>, tensor<16x4xf32>) outs(%20 : tensor<2x4xf32>) -> tensor<2x4xf32>
+ %22 = tensor.cast %21 : tensor<2x4xf32> to tensor<?x?xf32>
+ %23 = subtensor_insert %22 into %arg14[%arg9, %arg11] [%c2, %c4] [1, 1] : tensor<?x?xf32> into tensor<16x32xf32>
+ scf.yield %23 : tensor<16x32xf32>
+ }
+ scf.yield %11 : tensor<16x32xf32>
+ }
+ scf.yield %10 : tensor<16x32xf32>
+ }
+ %8 = tensor.cast %7 : tensor<16x32xf32> to tensor<?x?xf32>
+ %9 = subtensor_insert %8 into %arg8[%arg3, %arg5] [%c16, %c32] [1, 1] : tensor<?x?xf32> into tensor<32x64xf32>
+ scf.yield %9 : tensor<32x64xf32>
+ }
+ scf.yield %3 : tensor<32x64xf32>
+ }
+ scf.yield %2 : tensor<32x64xf32>
+ }
+ return %1 : tensor<32x64xf32>
+}
diff --git a/mlir/test/lib/Transforms/TestLinalgTransforms.cpp b/mlir/test/lib/Transforms/TestLinalgTransforms.cpp
index 276a9f7c7fc3..fd8fb3bc6eff 100644
--- a/mlir/test/lib/Transforms/TestLinalgTransforms.cpp
+++ b/mlir/test/lib/Transforms/TestLinalgTransforms.cpp
@@ -84,9 +84,9 @@ struct TestLinalgTransforms
Option<bool> testTileAndPadPattern{
*this, "test-tile-and-pad-pattern",
llvm::cl::desc("Test tile and pad pattern"), llvm::cl::init(false)};
- Option<bool> testHoistPadding2Levels{*this, "test-hoist-padding-2-level",
- llvm::cl::desc("Test hoist padding"),
- llvm::cl::init(false)};
+ Option<int> testHoistPadding{*this, "test-hoist-padding",
+ llvm::cl::desc("Test hoist padding"),
+ llvm::cl::init(0)};
};
} // end anonymous namespace
@@ -571,9 +571,9 @@ void TestLinalgTransforms::runOnFunction() {
return applyAffineMinSCFCanonicalizationPatterns(getFunction());
if (testTileAndPadPattern)
return applyTileAndPadPattern(getFunction());
- if (testHoistPadding2Levels) {
- getFunction().walk([](linalg::PadTensorOp padTensorOp) {
- (void)linalg::hoistPaddingOnTensors(padTensorOp, 2);
+ if (testHoistPadding) {
+ getFunction().walk([&](linalg::PadTensorOp padTensorOp) {
+ (void)linalg::hoistPaddingOnTensors(padTensorOp, testHoistPadding);
});
}
}
More information about the Mlir-commits
mailing list