[Mlir-commits] [mlir] 4b03906 - [mlir][linalg] Perform checks early in hoist padding.
Tobias Gysi
llvmlistbot at llvm.org
Thu Nov 25 02:41:20 PST 2021
Author: Tobias Gysi
Date: 2021-11-25T10:37:12Z
New Revision: 4b03906346a89b8c7e96b5ffe762183fb0fff7cd
URL: https://github.com/llvm/llvm-project/commit/4b03906346a89b8c7e96b5ffe762183fb0fff7cd
DIFF: https://github.com/llvm/llvm-project/commit/4b03906346a89b8c7e96b5ffe762183fb0fff7cd.diff
LOG: [mlir][linalg] Perform checks early in hoist padding.
Instead of checking for unexpected operations (any operation with a region except for scf::For and `padTensorOp` or operations with a memory effect) while cloning the packing loop nest perform the checks early. Update `dropNonIndexDependencies` to check for unexpected operations. Additionally, check all of these operations have index type operands only.
Depends On D114428
Reviewed By: nicolasvasilache
Differential Revision: https://reviews.llvm.org/D114438
Added:
Modified:
mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp
mlir/test/Dialect/Linalg/pad-and-hoist.mlir
Removed:
################################################################################
diff --git a/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp b/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp
index ba46a813d1ea..29cc48350938 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp
@@ -43,7 +43,8 @@ using namespace mlir::linalg;
/// 2. Pad op does not have a constant padding value.
/// 3. There is no immediately enclosing scf::ForOp.
/// 4. The backward slice from the pad op to the scf::ForOp to hoist above
-/// contains an unknown op with a region.
+/// contains an unknown op with non index type operands, a region, or a
+/// memory effect.
/// 5. The backward slice from the pad op to the scf::ForOp to hoist above is
/// empty.
/// 6. The source tensor of pad op is not defined by an extract slice op.
@@ -80,7 +81,8 @@ struct HoistingAnalysis {
/// operands consumed by `padTensorOp` and `sliceOp` and drops the operations
/// not part of this index computation. Afterwards, the filtered
/// `backwardSlice` contains only the loops whose induction variable is used,
- /// directly or indirectly, to index the padded tensor.
+ /// directly or indirectly, to index the padded tensor. The method returns
+ /// failure if the filtered backward slice contains an unexpected operation.
///
/// Example:
/// ```
@@ -96,8 +98,8 @@ struct HoistingAnalysis {
/// ```
/// dropNonIndexDependencies(%padded_slice, %slice)
/// removes [scf.for %k, linalg.fill(%cst, %arg1)] from backwardSlice.
- void dropNonIndexDependencies(PadTensorOp padTensorOp,
- tensor::ExtractSliceOp sliceOp);
+ LogicalResult dropNonIndexDependencies(PadTensorOp padTensorOp,
+ tensor::ExtractSliceOp sliceOp);
/// Encodes whether the analysis is valid and hoisting can proceed.
bool valid;
@@ -209,18 +211,8 @@ HoistingAnalysis::HoistingAnalysis(PadTensorOp padTensorOp, int numLoops) {
// Remove all ops in the backward slice that are not used to index the padded
// tensor. In particular, keep `padTensorOp`, `sliceOp`, and the loop and
// affine operations used for the index computation.
- dropNonIndexDependencies(padTensorOp, sliceOp);
-
- // Check if an op has a region it is either `padTensorOp`, a scf::ForOp, or a
- // LinalgOp.
- for (Operation *op : backwardSlice) {
- if (op != padTensorOp && op->getNumRegions() > 0 &&
- !isa<scf::ForOp, LinalgOp>(op)) {
- LLVM_DEBUG(DBGS() << "Unsupported op with region: " << *op
- << " -> skip\n");
- return;
- }
- }
+ if (failed(dropNonIndexDependencies(padTensorOp, sliceOp)))
+ return;
// Add only the loops part of the filtered `backwardSlice` to the packing
// loops. All other loops are not used to index the padded data and
@@ -239,8 +231,9 @@ HoistingAnalysis::HoistingAnalysis(PadTensorOp padTensorOp, int numLoops) {
valid = true;
}
-void HoistingAnalysis::dropNonIndexDependencies(
- PadTensorOp padTensorOp, tensor::ExtractSliceOp sliceOp) {
+LogicalResult
+HoistingAnalysis::dropNonIndexDependencies(PadTensorOp padTensorOp,
+ tensor::ExtractSliceOp sliceOp) {
// Set of all values used for index computation.
SetVector<Value> indexEdges;
@@ -289,7 +282,7 @@ void HoistingAnalysis::dropNonIndexDependencies(
// Add the index operands of the loop if its induction variable is
// used for index computation.
if (auto forOp = dyn_cast<scf::ForOp>(op)) {
- if (indexEdges.contains(forOp.getInductionVar())) {
+ if (!hasIndexResult(op) && indexEdges.contains(forOp.getInductionVar())) {
addIndexOperandsToIndexEdges(op);
continue;
}
@@ -298,6 +291,21 @@ void HoistingAnalysis::dropNonIndexDependencies(
// used for index computation.
if (hasIndexResult(op)) {
addIndexOperandsToIndexEdges(op);
+ // Check the operands of the remaining operations all have index type.
+ if (llvm::any_of(op->getOperandTypes(),
+ [](Type type) { return !type.isIndex(); })) {
+ LLVM_DEBUG(DBGS() << "Unsupported op with non index type operands: "
+ << op << " -> skip\n");
+ return failure();
+ }
+ // Check the remaining operations do not have regions or memory effects.
+ auto effectInterface = dyn_cast<MemoryEffectOpInterface>(op);
+ bool hasMemoryEffect = effectInterface && !effectInterface.hasNoEffect();
+ if (hasMemoryEffect || op->getNumRegions() != 0) {
+ LLVM_DEBUG(DBGS() << "Unsupported op with region or memory effect: "
+ << op << " -> skip\n");
+ return failure();
+ }
continue;
}
// Remove all other operation not used by the index computation except for
@@ -305,6 +313,7 @@ void HoistingAnalysis::dropNonIndexDependencies(
if (!isa<arith::ConstantOp>(op))
backwardSlice.remove(op);
}
+ return success();
}
SmallVector<Value>
@@ -416,18 +425,13 @@ FailureOr<Value> mlir::linalg::hoistPaddingOnTensors(PadTensorOp opToHoist,
if (auto sliceOp = dyn_cast<tensor::ExtractSliceOp>(op))
if (bvm.lookupOrDefault(sliceOp.source()) == packedTensor)
continue;
- auto effects = dyn_cast<MemoryEffectOpInterface>(op);
- bool hasNoEffects = !effects || effects.hasNoEffect();
- if (hasNoEffects &&
- (op->getNumRegions() == 0 || isa<linalg::PadTensorOp>(op))) {
+ // Clone all operations except it is a loop.
+ auto forOp = dyn_cast<scf::ForOp>(op);
+ if (!forOp) {
b.clone(*op, bvm);
continue;
}
- // TODO: support more cases as they appear.
- auto forOp = dyn_cast<scf::ForOp>(op);
- assert(forOp && llvm::is_contained(analysis.packingLoops, forOp) &&
- "expect an scf::ForOp that is a packing loop");
-
+ // Create a packing loop that takes `packedTensor` as iteration argument.
auto clonedForOp =
b.create<scf::ForOp>(loc, bvm.lookupOrDefault(forOp.lowerBound()),
bvm.lookupOrDefault(forOp.upperBound()),
diff --git a/mlir/test/Dialect/Linalg/pad-and-hoist.mlir b/mlir/test/Dialect/Linalg/pad-and-hoist.mlir
index d3f86d36504a..31a5469d03c6 100644
--- a/mlir/test/Dialect/Linalg/pad-and-hoist.mlir
+++ b/mlir/test/Dialect/Linalg/pad-and-hoist.mlir
@@ -436,3 +436,154 @@ func @non_constant_padding(%arg0: tensor<24x12xf32>,
return %0 : tensor<24x25xf32>
}
+// -----
+
+#map0 = affine_map<(d0) -> (5, -d0 + 24)>
+#map1 = affine_map<(d0) -> (7, -d0 + 25)>
+#map2 = affine_map<(d0) -> (-d0 + 5)>
+#map3 = affine_map<(d0) -> (-d0 + 7)>
+
+// CHECK: unexpected_operation
+// CHECK-DOUBLE: unexpected_operation
+// CHECK-SAME: %[[ARG3:[0-9a-zA-Z]*]]: memref<?xindex>
+// CHECK-SAME: %[[ARG4:[0-9a-zA-Z]*]]: i32
+func @unexpected_operation(%arg0: tensor<24x12xf32>,
+ %arg1: tensor<12x25xf32>,
+ %arg2: tensor<24x25xf32>,
+ %arg3: memref<?xindex>,
+ %arg4: i32) -> tensor<24x25xf32> {
+ %cst = arith.constant 0.000000e+00 : f32
+ %c5 = arith.constant 5 : index
+ %c7 = arith.constant 7 : index
+ %c6 = arith.constant 6 : index
+ %c24 = arith.constant 24 : index
+ %c25 = arith.constant 25 : index
+ %c12 = arith.constant 12 : index
+ %c0 = arith.constant 0 : index
+
+ // CHECK: scf.for %[[IV0:[0-9a-zA-Z]*]] =
+ %0 = scf.for %arg5 = %c0 to %c24 step %c5 iter_args(%arg6 = %arg2) -> (tensor<24x25xf32>) {
+
+ // CHECK-NEXT: scf.for %[[IV1:[0-9a-zA-Z]*]] =
+ %1 = scf.for %arg7 = %c0 to %c25 step %c7 iter_args(%arg8 = %arg6) -> (tensor<24x25xf32>) {
+
+ // CHECK-NEXT: scf.for %[[IV2:[0-9a-zA-Z]*]] =
+ %2 = scf.for %arg9 = %c0 to %c12 step %c6 iter_args(%arg10 = %arg8) -> (tensor<24x25xf32>) {
+ %3 = affine.min #map0(%arg5)
+ %4 = tensor.extract_slice %arg0[%arg5, %arg9] [%3, 6] [1, 1] : tensor<24x12xf32> to tensor<?x6xf32>
+ %5 = affine.min #map1(%arg7)
+ %6 = tensor.extract_slice %arg1[%arg9, %arg7] [6, %5] [1, 1] : tensor<12x25xf32> to tensor<6x?xf32>
+ %7 = tensor.extract_slice %arg10[%arg5, %arg7] [%3, %5] [1, 1] : tensor<24x25xf32> to tensor<?x?xf32>
+ %8 = affine.apply #map2(%3)
+
+ // Check cannot hoist due to unexpected operation with memory effect.
+ // CHECK: %[[IDX0:.*]] = memref.load %[[ARG3]]
+ // CHECK: %[[T0:.*]] = linalg.pad_tensor {{.*}}, %[[IDX0]]
+ %9 = memref.load %arg3[%c0] : memref<?xindex>
+ %10 = linalg.pad_tensor %4 nofold low[%c0, %c0] high[%8, %9] {
+ ^bb0(%arg11: index, %arg12: index): // no predecessors
+ linalg.yield %cst : f32
+ } : tensor<?x6xf32> to tensor<5x6xf32>
+ %11 = affine.apply #map3(%5)
+
+ // Check cannot hoist due to unexpected operation with non index operand.
+ // CHECK: %[[IDX1:.*]] = arith.index_cast %[[ARG4]]
+ // CHECK: %[[T1:.*]] = linalg.pad_tensor {{.*}}[%[[IDX1]]
+ %12 = arith.index_cast %arg4 : i32 to index
+ %13 = linalg.pad_tensor %6 nofold low[%c0, %c0] high[%12, %11] {
+ ^bb0(%arg11: index, %arg12: index): // no predecessors
+ linalg.yield %cst : f32
+ } : tensor<6x?xf32> to tensor<6x7xf32>
+ %14 = linalg.pad_tensor %7 low[%c0, %c0] high[%8, %11] {
+ ^bb0(%arg11: index, %arg12: index): // no predecessors
+ linalg.yield %cst : f32
+ } : tensor<?x?xf32> to tensor<5x7xf32>
+
+ // CHECK: = linalg.matmul ins(%[[T0]], %[[T1]]
+ %15 = linalg.matmul ins(%10, %13 : tensor<5x6xf32>, tensor<6x7xf32>) outs(%14 : tensor<5x7xf32>) -> tensor<5x7xf32>
+ %16 = tensor.extract_slice %15[0, 0] [%3, %5] [1, 1] : tensor<5x7xf32> to tensor<?x?xf32>
+ %17 = tensor.insert_slice %16 into %arg10[%arg5, %arg7] [%3, %5] [1, 1] : tensor<?x?xf32> into tensor<24x25xf32>
+ scf.yield %17 : tensor<24x25xf32>
+ }
+ scf.yield %2 : tensor<24x25xf32>
+ }
+ scf.yield %1 : tensor<24x25xf32>
+ }
+ return %0 : tensor<24x25xf32>
+}
+
+// -----
+
+#map0 = affine_map<(d0) -> (5, -d0 + 24)>
+#map1 = affine_map<(d0) -> (7, -d0 + 25)>
+#map2 = affine_map<(d0) -> (-d0 + 5)>
+#map3 = affine_map<(d0) -> (-d0 + 7)>
+
+// CHECK: unexpected_loop
+// CHECK-DOUBLE: unexpected_loop
+// CHECK-SAME: %[[ARG3:[0-9a-zA-Z]*]]: index
+func @unexpected_loop(%arg0: tensor<24x12xf32>,
+ %arg1: tensor<12x25xf32>,
+ %arg2: tensor<24x25xf32>,
+ %arg3: index) -> tensor<24x25xf32> {
+ %c0 = arith.constant 0 : index
+ %c12 = arith.constant 12 : index
+ %c25 = arith.constant 25 : index
+ %c24 = arith.constant 24 : index
+ %c6 = arith.constant 6 : index
+ %c7 = arith.constant 7 : index
+ %c5 = arith.constant 5 : index
+ %cst = arith.constant 0.000000e+00 : f32
+
+ // CHECK: scf.for %[[IV0:[0-9a-zA-Z]*]] =
+ %0 = scf.for %arg4 = %c0 to %c24 step %c5 iter_args(%arg5 = %arg2) -> (tensor<24x25xf32>) {
+
+ // CHECK-NEXT: scf.for %[[IV1:[0-9a-zA-Z]*]] =
+ %1 = scf.for %arg6 = %c0 to %c25 step %c7 iter_args(%arg7 = %arg5) -> (tensor<24x25xf32>) {
+
+ // Check the padding of the first input operand is hoisted.
+ // CHECK: = linalg.pad_tensor
+
+ // CHECK: scf.for %[[IV2:[0-9a-zA-Z]*]] =
+ %2 = scf.for %arg8 = %c0 to %c12 step %c6 iter_args(%arg9 = %arg7) -> (tensor<24x25xf32>) {
+ %3 = affine.min #map0(%arg4)
+ %4 = tensor.extract_slice %arg0[%arg4, %arg8] [%3, 6] [1, 1] : tensor<24x12xf32> to tensor<?x6xf32>
+ %5 = affine.min #map1(%arg6)
+ %6 = tensor.extract_slice %arg1[%arg8, %arg6] [6, %5] [1, 1] : tensor<12x25xf32> to tensor<6x?xf32>
+ %7 = tensor.extract_slice %arg9[%arg4, %arg6] [%3, %5] [1, 1] : tensor<24x25xf32> to tensor<?x?xf32>
+ %8 = affine.apply #map2(%3)
+
+ // Check cannot hoist due to unexpected operation that has a region.
+ // CHECK: %[[IDX0:.*]] = scf.for {{.*}} step %[[ARG3]]
+ // CHECK: %[[T0:.*]] = linalg.pad_tensor {{.*}}, %[[IDX0]]
+ %9 = scf.for %arg10 = %c0 to %c24 step %arg3 iter_args(%arg11 = %c0) -> (index) {
+ %17 = arith.addi %arg3, %arg11 : index
+ scf.yield %17 : index
+ }
+ %10 = linalg.pad_tensor %4 nofold low[%c0, %c0] high[%8, %9] {
+ ^bb0(%arg10: index, %arg11: index): // no predecessors
+ linalg.yield %cst : f32
+ } : tensor<?x6xf32> to tensor<5x6xf32>
+ %11 = affine.apply #map3(%5)
+ %12 = linalg.pad_tensor %6 nofold low[%c0, %c0] high[%c0, %11] {
+ ^bb0(%arg10: index, %arg11: index): // no predecessors
+ linalg.yield %cst : f32
+ } : tensor<6x?xf32> to tensor<6x7xf32>
+ %13 = linalg.pad_tensor %7 low[%c0, %c0] high[%8, %11] {
+ ^bb0(%arg10: index, %arg11: index): // no predecessors
+ linalg.yield %cst : f32
+ } : tensor<?x?xf32> to tensor<5x7xf32>
+
+ // CHECK: = linalg.matmul ins(%[[T0]]
+ %14 = linalg.matmul ins(%10, %12 : tensor<5x6xf32>, tensor<6x7xf32>) outs(%13 : tensor<5x7xf32>) -> tensor<5x7xf32>
+ %15 = tensor.extract_slice %14[0, 0] [%3, %5] [1, 1] : tensor<5x7xf32> to tensor<?x?xf32>
+ %16 = tensor.insert_slice %15 into %arg9[%arg4, %arg6] [%3, %5] [1, 1] : tensor<?x?xf32> into tensor<24x25xf32>
+ scf.yield %16 : tensor<24x25xf32>
+ }
+ scf.yield %2 : tensor<24x25xf32>
+ }
+ scf.yield %1 : tensor<24x25xf32>
+ }
+ return %0 : tensor<24x25xf32>
+}
+
More information about the Mlir-commits
mailing list