[all-commits] [llvm/llvm-project] 44861c: [mlir] [linalg] Check for dim shape to decide unit...

Tue May 28 06:22:07 PDT 2024

  Branch: refs/heads/main
  Home:   https://github.com/llvm/llvm-project
  Commit: 44861c7ac563f9e994305e22f2dca1c4f37265e4
      https://github.com/llvm/llvm-project/commit/44861c7ac563f9e994305e22f2dca1c4f37265e4
  Author: Sayan Saha <sayans at mathworks.com>
  Date:   2024-05-28 (Tue, 28 May 2024)

  Changed paths:
    M mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp
    M mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir

  Log Message:
  -----------
  [mlir] [linalg] Check for dim shape to decide unit dim for each operand in dropUnitDims pass. (#93317)

`mlir-opt --linalg-fold-unit-extent-dims` pass on the following IR

```
#map = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1 + d4, d2 + d5, d6)>
#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d4, d5, d6, d3)>
#map2 = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1, d2, d3)>
module {
  func.func @main(%arg0: tensor<1x?x?x1xf32>, %arg1: index) -> tensor<?x1x61x1xf32> {
    %cst = arith.constant dense<1.000000e+00> : tensor<1x1x1x1xf32>
    %0 = tensor.empty(%arg1) : tensor<?x1x61x1xf32>
    %1 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%arg0, %cst : tensor<1x?x?x1xf32>, tensor<1x1x1x1xf32>) outs(%0 : tensor<?x1x61x1xf32>) {
    ^bb0(%in: f32, %in_0: f32, %out: f32):
      %2 = arith.mulf %in, %in_0 : f32
      %3 = arith.addf %out, %2 : f32
      linalg.yield %3 : f32
    } -> tensor<?x1x61x1xf32>
    return %1 : tensor<?x1x61x1xf32>
  }
}
```

produces an incorrect tensor.expand_shape operation:

```
error: 'tensor.expand_shape' op expected dimension 0 of collapsed type to be dynamic since one or more of the corresponding dimensions in the expanded type is dynamic
    %1 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%arg0, %cst : tensor<1x?x?x1xf32>, tensor<1x1x1x1xf32>) outs(%0 : tensor<?x1x61x1xf32>) {
         ^
/mathworks/devel/sandbox/sayans/geckWorks/g3294570/repro.mlir:8:10: note: see current operation: %5 = "tensor.expand_shape"(%4) <{reassociation = [[0, 1, 2, 3]]}> : (tensor<61xf32>) -> tensor<?x1x61x1xf32>
// -----// IR Dump After LinalgFoldUnitExtentDimsPass Failed (linalg-fold-unit-extent-dims) //----- //
#map = affine_map<(d0) -> (0, d0)>
#map1 = affine_map<(d0) -> ()>
#map2 = affine_map<(d0) -> (d0)>
"builtin.module"() ({
  "func.func"() <{function_type = (tensor<1x?x?x1xf32>, index) -> tensor<?x1x61x1xf32>, sym_name = "main"}> ({
  ^bb0(%arg0: tensor<1x?x?x1xf32>, %arg1: index):
    %0 = "arith.constant"() <{value = dense<1.000000e+00> : tensor<f32>}> : () -> tensor<f32>
    %1 = "tensor.collapse_shape"(%arg0) <{reassociation = [[0, 1], [2, 3]]}> : (tensor<1x?x?x1xf32>) -> tensor<?x?xf32>
    %2 = "tensor.empty"() : () -> tensor<61xf32>
    %3 = "tensor.empty"() : () -> tensor<61xf32>
    %4 = "linalg.generic"(%1, %0, %2, %3) <{indexing_maps = [#map, #map1, #map2, #map2], iterator_types = [#linalg.iterator_type<parallel>], operandSegmentSizes = array<i32: 3, 1>}> ({
    ^bb0(%arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32):
      %6 = "arith.mulf"(%arg2, %arg3) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32
      %7 = "arith.addf"(%arg4, %6) <{fastmath = #arith.fastmath<none>}> : (f32, f32) -> f32
      "linalg.yield"(%7) : (f32) -> ()
    }) : (tensor<?x?xf32>, tensor<f32>, tensor<61xf32>, tensor<61xf32>) -> tensor<61xf32>
    %5 = "tensor.expand_shape"(%4) <{reassociation = [[0, 1, 2, 3]]}> : (tensor<61xf32>) -> tensor<?x1x61x1xf32>
    "func.return"(%5) : (tensor<?x1x61x1xf32>) -> ()
  }) : () -> ()
}) : () -> ()
```

The reason of this is because the dimension `d0` is determined to be an
unit-dim that can be dropped based on the dimensions of operand `arg0`
to `linalg.generic`. Later on when iterating over operand `outs` the
dimension `d0` is determined to be an unit-dim even though the shape
corresponding to it is `Shape::kDynamic`. For the `linalg.generic` to be
valid `d0` of `outs` does need to be `1` but that isn't properly
processed in the current implementation and the dimension is dropped
resulting in `outs` operand to be `tensor<61xf32>` in the example.

The fix is to also check that the dimension shape is actually `1` before
dropping the dimension. The IR after the fix is:

```
#map = affine_map<()[s0, s1] -> (s0 * s1)>
#map1 = affine_map<(d0) -> (0, d0)>
#map2 = affine_map<(d0) -> ()>
module {
  func.func @main(%arg0: tensor<1x?x?x1xf32>, %arg1: index) -> tensor<?x1x61x1xf32> {
    %c0 = arith.constant 0 : index
    %c1 = arith.constant 1 : index
    %cst = arith.constant dense<1.000000e+00> : tensor<f32>
    %collapsed = tensor.collapse_shape %arg0 [[0, 1], [2, 3]] : tensor<1x?x?x1xf32> into tensor<?x?xf32>
    %0 = tensor.empty(%arg1) : tensor<?x61xf32>
    %1 = affine.apply #map()[%arg1, %c1]
    %2 = tensor.empty(%1) : tensor<?x61xf32>
    %3 = linalg.generic {indexing_maps = [#map1, #map2, #map1, #map1], iterator_types = ["parallel"]} ins(%collapsed, %cst, %0 : tensor<?x?xf32>, tensor<f32>, tensor<?x61xf32>) outs(%2 : tensor<?x61xf32>) {
    ^bb0(%in: f32, %in_0: f32, %in_1: f32, %out: f32):
      %4 = arith.mulf %in, %in_0 : f32
      %5 = arith.addf %in_1, %4 : f32
      linalg.yield %5 : f32
    } -> tensor<?x61xf32>
    %expanded = tensor.expand_shape %3 [[0, 1], [2, 3]] output_shape [%c0, 1, 61, 1] : tensor<?x61xf32> into tensor<?x1x61x1xf32>
    return %expanded : tensor<?x1x61x1xf32>
  }
}
```

To unsubscribe from these emails, change your notification settings at https://github.com/llvm/llvm-project/settings/notifications