[Mlir-commits] [mlir] 2e34599 - [mlir][tensor] Remove folding of tensor.extract_slice during tiling

Fri Aug 26 07:39:35 PDT 2022

Author: Thomas Raoux
Date: 2022-08-26T14:30:39Z
New Revision: 2e34599bfd01e5b20e09bd6af590a52d6a63a64c

URL: https://github.com/llvm/llvm-project/commit/2e34599bfd01e5b20e09bd6af590a52d6a63a64c
DIFF: https://github.com/llvm/llvm-project/commit/2e34599bfd01e5b20e09bd6af590a52d6a63a64c.diff

LOG: [mlir][tensor] Remove folding of tensor.extract_slice during tiling

Blindly folding tensor.extract_slice makes the bufferization
transformation harder. This kind of transformation should happen
separatley if needed rather than doing it within makeShape that is
called during tiling.
Also removed makeComposedExtractSliceOp as it is not tested outside of
this code.

Differential Revision: https://reviews.llvm.org/D132666

Added: 
    

Modified: 
    mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
    mlir/lib/Dialect/Linalg/Utils/Utils.cpp
    mlir/test/Dialect/Linalg/multisize-tiling-full.mlir
    mlir/test/Dialect/Linalg/tile-fuse-and-distribute.mlir
    mlir/test/Dialect/Linalg/tile-tensors.mlir
    mlir/test/Dialect/Linalg/transform-op-fuse.mlir
    mlir/test/Dialect/Linalg/transform-op-split.mlir

Removed: 
    


################################################################################
diff  --git a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
index c3806e035cc8f..382e8ce23a201 100644

--- a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
+++ b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
@@ -93,25 +93,6 @@ void getUpperBoundForIndex(Value value, AffineMap &boundMap,
 /// (boundsMap = affine.map<() -> (42)>)
 FailureOr<int64_t> getConstantUpperBoundForIndex(Value value);
 
-/// Create an ExtractSliceOp and, if `source` is defined by an ExtractSliceOp,
-/// fold it by adding the offsets.
-///
-/// Example:
-/// ```
-/// %0 = tensor.extract_slice %arg0[3, 4][3, 32][1, 1] : tensor<64x64xf32> to
-///                                                        tensor<3x32xf32>
-/// %1 = tensor.extract_slice %0[0, 5][3, 4][1, 1] : tensor<3x32xf32> to
-///                                                    tensor<3x4xf32>
-/// ```
-/// folds into:
-/// ```
-/// %1 = tensor.extract_slice %arg0[3, 9][3, 4][1, 1] : tensor<64x64xf32> to
-///                                                       tensor<3x4xf32>
-/// ```
-tensor::ExtractSliceOp makeComposedExtractSliceOp(
-    OpBuilder &b, Location loc, Value source, ArrayRef<OpFoldResult> offsets,
-    ArrayRef<OpFoldResult> sizes, ArrayRef<OpFoldResult> strides);
-
 /// Create a tensor::PadOp that pads `source` to the size of the statically
 /// sized `type` whose static sizes are assumed to be greater than the dynamic
 /// `source` size. The padding introduces trailing `pad` values until the target

diff  --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
index a9c44f0ac8635..9a170ddbec90d 100644
--- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
+++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
@@ -346,48 +346,6 @@ FailureOr<int64_t> getConstantUpperBoundForIndex(Value value) {
   return *std::min_element(constantBounds.begin(), constantBounds.end());
 }
 
-tensor::ExtractSliceOp makeComposedExtractSliceOp(
-    OpBuilder &b, Location loc, Value source, ArrayRef<OpFoldResult> offsets,
-    ArrayRef<OpFoldResult> sizes, ArrayRef<OpFoldResult> strides) {
-  assert(source && "expect source to be nonzero");
-
-  // Do not fold if the producer is not an ExtractSliceOp.
-  auto producerOp = source.getDefiningOp<tensor::ExtractSliceOp>();
-  if (!producerOp)
-    return b.create<tensor::ExtractSliceOp>(loc, source, offsets, sizes,
-                                            strides);
-
-  // Do not fold if the producer is rank reducing or if there are any non-unit
-  // strides. Supporting non-unit strides complicates the offset computation
-  // since the consumer offsets need to be multiplied by the producer strides.
-  // TODO: support non-unit strides once there are use cases.
-  SmallVector<OpFoldResult> allStrides = producerOp.getMixedStrides();
-  allStrides.append(strides.begin(), strides.end());
-  bool hasNonUnitStride = any_of(allStrides, [](OpFoldResult ofr) {
-    return getConstantIntValue(ofr) != static_cast<int64_t>(1);
-  });
-  if (hasNonUnitStride ||
-      producerOp.getSourceType().getRank() !=
-          producerOp.getResult().getType().cast<ShapedType>().getRank())
-    return b.create<tensor::ExtractSliceOp>(loc, source, offsets, sizes,
-                                            strides);
-
-  // Fold the producer by adding the offests and extracting the slice directly
-  // from the producer source tensor.
-  SmallVector<OpFoldResult> foldedOffsets(offsets.begin(), offsets.end());
-  AffineExpr dim1, dim2;
-  bindDims(b.getContext(), dim1, dim2);
-  for (const auto &en : enumerate(producerOp.getMixedOffsets())) {
-    SmallVector<Value> offsetValues = {
-        getValueOrCreateConstantIndexOp(b, loc, foldedOffsets[en.index()]),
-        getValueOrCreateConstantIndexOp(b, loc, en.value())};
-    foldedOffsets[en.index()] =
-        makeComposedAffineApply(b, loc, dim1 + dim2, offsetValues).getResult();
-  }
-  return b.create<tensor::ExtractSliceOp>(loc, producerOp.getSource(),
-                                          foldedOffsets, sizes, strides);
-}
-
 Value makeComposedPadHighOp(OpBuilder &b, Location loc, RankedTensorType type,
                             Value source, Value pad, bool nofold) {
   // Exit if `source` is not defined by an ExtractSliceOp.
@@ -777,8 +735,8 @@ static Value materializeTiledShape(OpBuilder &builder, Location loc,
                             sliceParams.sizes, sliceParams.strides);
                       })
                       .Case([&](RankedTensorType) {
-                        return makeComposedExtractSliceOp(
-                            builder, loc, valueToTile, sliceParams.offsets,
+                        return builder.create<tensor::ExtractSliceOp>(
+                            loc, valueToTile, sliceParams.offsets,
                             sliceParams.sizes, sliceParams.strides);
                       })
                       .Default([](ShapedType) -> Operation * {

diff  --git a/mlir/test/Dialect/Linalg/multisize-tiling-full.mlir b/mlir/test/Dialect/Linalg/multisize-tiling-full.mlir
index ecafcadc2bdc4..df4bcd422c7a7 100644
--- a/mlir/test/Dialect/Linalg/multisize-tiling-full.mlir
+++ b/mlir/test/Dialect/Linalg/multisize-tiling-full.mlir
@@ -47,13 +47,16 @@ func.func @two_d(%arg0: tensor<10x34xf32>,
   // The canonicalizer is able to recover static shapes of for linalg.generic
   // instances, use those to 
diff erentiate the quadrants.
 
+  // CHECK:      %[[SLICE_1_IN:.+]] = tensor.extract_slice %[[IN]][0, 0] [4, 34] [1, 1]
   // CHECK:      %[[SLICE_1:.+]] = tensor.extract_slice %[[OUT]][0, 0] [4, 34] [1, 1]
   // CHECK:      scf.for %[[I1:.+]] = %{{.*}} to %{{.*}} step %{{.*}} iter_args(%[[ITERARG_1:.+]] = %[[SLICE_1]])
+  // CHECK:        %[[OUTSLICE_1_IN:.+]] = tensor.extract_slice %[[SLICE_1_IN]][%[[I1]], 0] [2, 34] [1, 1]
   // CHECK:        %[[OUTSLICE_1:.+]] = tensor.extract_slice %[[ITERARG_1]][%[[I1]], 0] [2, 34] [1, 1]
 
-  // CHECK:        %[[SLICE_2:.+]] = tensor.extract_slice %[[ITERARG_1]][%[[I1]], 0] [2, 16] [1, 1]
+  // CHECK:        %[[SLICE_2_IN:.+]] = tensor.extract_slice %[[OUTSLICE_1_IN]][0, 0] [2, 16] [1, 1]
+  // CHECK:        %[[SLICE_2:.+]] = tensor.extract_slice %[[OUTSLICE_1]][0, 0] [2, 16] [1, 1]
   // CHECK:        %[[LOOPRES:.+]] = scf.for %[[I2:.+]] = %{{.*}} to %{{.*}} step %{{.*}} iter_args(%[[ITERARG_2:.+]] = %[[SLICE_2]])
-  // CHECK:          %[[INSLICE_2:.+]] = tensor.extract_slice %[[IN]][%[[I1]], %[[I2]]] [2, 8] [1, 1]
+  // CHECK:          %[[INSLICE_2:.+]] = tensor.extract_slice %[[SLICE_2_IN]][0, %[[I2]]] [2, 8] [1, 1]
   // CHECK:          %[[OUTSLICE_2:.+]] = tensor.extract_slice %[[ITERARG_2]][0, %[[I2]]] [2, 8] [1, 1]
   // CHECK:          %[[RESSLICE_1:.+]] = linalg.generic {{.*}} ins(%[[INSLICE_2]] : tensor<2x8xf32>) outs(%[[OUTSLICE_2]] : tensor<2x8xf32>)
   // CHECK:          %[[RESPARTIAL:.+]] = tensor.insert_slice %[[RESSLICE_1]] into %[[ITERARG_2]]

diff  --git a/mlir/test/Dialect/Linalg/tile-fuse-and-distribute.mlir b/mlir/test/Dialect/Linalg/tile-fuse-and-distribute.mlir
index bf0e96d1f25dd..9d06888d136c4 100644
--- a/mlir/test/Dialect/Linalg/tile-fuse-and-distribute.mlir
+++ b/mlir/test/Dialect/Linalg/tile-fuse-and-distribute.mlir
@@ -23,11 +23,13 @@ func.func @fill_matmul_tensors(
 //      CHECK: %[[LBX:.+]] = affine.apply #[[ADDMAP]]()[%[[MUL]], %[[C0]]]
 //      CHECK: %[[STEPX:.+]] = affine.apply #[[MULMAP]]()[%[[NBLOCKSX]], %[[C8]]]
 //      CHECK:   %[[TD1:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC1:.*]] = %[[TC0]]) -> (tensor<?x?xf32>) {
+//      CHECK:     %[[OUTSLICEA:.+]] = tensor.extract_slice %{{.*}}[%{{.*}}, 0] [%{{.*}}, %{{.*}}] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
+//      CHECK:     %[[OUTSLICEB:.+]] = tensor.extract_slice %{{.*}}[0, %{{.*}}] [%{{.*}}, %{{.*}}] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
 //      CHECK:     %[[SLICE:.+]] = tensor.extract_slice %[[TC1]]
 //      CHECK:     %[[FILL:.+]] = linalg.fill ins(%{{.+}}{{.*}}outs(%[[SLICE]]
 //      CHECK:     %[[sTD2:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC2:.*]] = %[[FILL]]) -> (tensor<?x?xf32>) {
-//      CHECK:       %[[sTA:.*]] = tensor.extract_slice %[[TA]][{{.*}}] : tensor<?x?xf32> to tensor<?x?xf32>
-//      CHECK:       %[[sTB:.*]] = tensor.extract_slice %[[TB]][{{.*}}] : tensor<?x?xf32> to tensor<?x?xf32>
+//      CHECK:       %[[sTA:.*]] = tensor.extract_slice %[[OUTSLICEA]][{{.*}}] : tensor<?x?xf32> to tensor<?x?xf32>
+//      CHECK:       %[[sTB:.*]] = tensor.extract_slice %[[OUTSLICEB]][{{.*}}] : tensor<?x?xf32> to tensor<?x?xf32>
 //      CHECK:       %[[sTC:.*]] = tensor.extract_slice %[[TC2]][{{.*}}] : tensor<?x?xf32> to tensor<?x?xf32>
 //      CHECK:       %[[sTD:.*]] = linalg.matmul ins(%[[sTA]], %[[sTB]] : tensor<?x?xf32>, tensor<?x?xf32>)
 // CHECK-SAME:                                  outs(%[[sTC]] : tensor<?x?xf32>)  -> tensor<?x?xf32>

diff  --git a/mlir/test/Dialect/Linalg/tile-tensors.mlir b/mlir/test/Dialect/Linalg/tile-tensors.mlir
index 7bf8e3062c896..cfd68e57c88af 100644
--- a/mlir/test/Dialect/Linalg/tile-tensors.mlir
+++ b/mlir/test/Dialect/Linalg/tile-tensors.mlir
@@ -77,8 +77,6 @@ func.func @generic_op_tensors(
 // -----
 
 //  CHECK-DAG:  #[[MAP0:.*]] = affine_map<(d0)[s0] -> (-d0 + s0, 2)>
-//  CHECK-DAG:  #[[MAP1:.*]] = affine_map<(d0) -> (d0 + 3)>
-//  CHECK-DAG:  #[[MAP2:.*]] = affine_map<(d0) -> (d0 + 4)>
 
 //      CHECK:  fold_extract_slice
 // CHECK-SAME:    %[[ARG0:[0-9a-zA-Z]*]]: tensor<?x128xf32>
@@ -93,15 +91,15 @@ func.func @fold_extract_slice(
   %0 = tensor.dim %arg1, %c0 : tensor<?x42xf32>
   %1 = tensor.extract_slice %arg0[3, 4] [%0, 42] [1, 1] : tensor<?x128xf32> to tensor<?x42xf32>
 
+  //      CHECK:   %[[E:.*]] = tensor.extract_slice %[[ARG0]][3, 4] [%[[DIM]], 42] [1, 1] : tensor<?x128xf32> to tensor<?x42xf32>
+
   //      CHECK:    scf.for %[[IV0:[0-9a-zA-Z]*]] =
   //      CHECK:      scf.for %[[IV1:[0-9a-zA-Z]*]] =
 
   // Fold the existing extract slice op into the one created by the tiling.
   //      CHECK:        %[[SIZE0:.*]] = affine.min #[[MAP0]](%[[IV0]])[%[[DIM]]
-  //      CHECK:        %[[OFF0:.*]] = affine.apply #[[MAP1]](%[[IV0]]
-  //      CHECK:        %[[OFF1:.*]] = affine.apply #[[MAP2]](%[[IV1]]
-  //      CHECK:        %[[T0:.*]] = tensor.extract_slice %[[ARG0]]
-  // CHECK-SAME:                                          %[[OFF0]], %[[OFF1]]
+  //      CHECK:        %[[T0:.*]] = tensor.extract_slice %[[E]]
+  // CHECK-SAME:                                          %[[IV0]], %[[IV1]]
   // CHECK-SAME:                                          %[[SIZE0]], 3
   // CHECK-SAME:                                          1, 1
   //      CHECK:        {{.*}} = linalg.generic {{.*}} ins(%[[T0]]

diff  --git a/mlir/test/Dialect/Linalg/transform-op-fuse.mlir b/mlir/test/Dialect/Linalg/transform-op-fuse.mlir
index 8c133d75180a6..39227e6503f15 100644
--- a/mlir/test/Dialect/Linalg/transform-op-fuse.mlir
+++ b/mlir/test/Dialect/Linalg/transform-op-fuse.mlir
@@ -66,11 +66,12 @@ func.func @interchange_reduction(%input: tensor<12x7x25xf32>) -> tensor<12x25xf3
 //   CHECK-DAG: %[[C7:.+]] = arith.constant 7 : index
 //       CHECK: scf.for %[[IV0:.+]] = %{{.+}} to %{{.+}} step %[[C5]] iter_args(%[[FOR_ARG0:.+]] = %[[INIT]])
 //       CHECK:   scf.for %[[IV1:.+]] = %{{.+}} to %{{.+}} step %[[C7]] iter_args(%[[FOR_ARG1:.+]] = %[[FOR_ARG0]])
-//       CHECK:     %[[OUT_SLICE0:.+]] = tensor.extract_slice %[[FOR_ARG1]][%[[IV0]], %[[IV1]]]
-//       CHECK:     %[[FILL:.+]] = linalg.fill {{.+}} outs(%[[OUT_SLICE0]] : tensor<?x?xf32>)
+//       CHECK:     %[[OUT_SLICE0:.+]] = tensor.extract_slice %[[INPUT]][%[[IV0]], 0, %[[IV1]]]
+//       CHECK:     %[[OUT_SLICE1:.+]] = tensor.extract_slice %[[FOR_ARG1]][%[[IV0]], %[[IV1]]]
+//       CHECK:     %[[FILL:.+]] = linalg.fill {{.+}} outs(%[[OUT_SLICE1]] : tensor<?x?xf32>)
 //       CHECK:     %[[C4:.+]] = arith.constant 4 : index
 //       CHECK:     scf.for %[[IV2:.+]] = %{{.+}} to %{{.+}} step %[[C4]] iter_args(%[[FOR_ARG2:.+]] = %[[FILL]])
-//       CHECK:       %[[IN_SLICE:.+]] = tensor.extract_slice %[[INPUT]]
+//       CHECK:       %[[IN_SLICE:.+]] = tensor.extract_slice %[[OUT_SLICE0]]
 //       CHECK:       %[[OUT_SLICE2:.+]] = tensor.extract_slice %[[FOR_ARG2]][0, 0]
 //       CHECK:       linalg.generic {{.+}} ins(%[[IN_SLICE]] : tensor<?x?x?xf32>) outs(%[[OUT_SLICE2]] : tensor<?x?xf32>)
 

diff  --git a/mlir/test/Dialect/Linalg/transform-op-split.mlir b/mlir/test/Dialect/Linalg/transform-op-split.mlir
index ebf3b2545630c..8ae34bdcb6e1d 100644
--- a/mlir/test/Dialect/Linalg/transform-op-split.mlir
+++ b/mlir/test/Dialect/Linalg/transform-op-split.mlir
@@ -153,14 +153,14 @@ func.func @two_d(%arg0: tensor<10x34xf32>,
   // CHECK:      %[[OUT_2:.+]] = tensor.extract_slice %[[PARTIAL_1]]
   // Note that `extract_slice` taking a slice from another `extract_slice` result
   // is folded to use the operand of the first `extract_slice`.
-  // CHECK:      %[[IN_21:.+]] = tensor.extract_slice %[[IN]]
-  // CHECK:      %[[OUT_21:.+]] = tensor.extract_slice %[[PARTIAL_1]]
+  // CHECK:      %[[IN_21:.+]] = tensor.extract_slice %[[IN_2]]
+  // CHECK:      %[[OUT_21:.+]] = tensor.extract_slice %[[OUT_2]]
   // CHECK:      %[[RES_21:.+]] = linalg.generic
   // CHECK-SAME:   ins(%[[IN_21]] : tensor<6x16xf32>)
   // CHECK-SAME:   outs(%[[OUT_21]] : tensor<6x16xf32>)
   // CHECK:      %[[PARTIAL_21:.+]] = tensor.insert_slice %[[RES_21]] into %[[OUT_2]]
   //
-  // CHECK:      %[[IN_22:.+]] = tensor.extract_slice %[[IN]]
+  // CHECK:      %[[IN_22:.+]] = tensor.extract_slice %[[IN_2]]
   // CHECK:      %[[OUT_22:.+]] = tensor.extract_slice %[[PARTIAL_21]]
   // CHECK:      %[[RES_22:.+]] = linalg.generic
   // CHECK-SAME:   ins(%[[IN_22]] : tensor<6x18xf32>)