[Mlir-commits] [mlir] [mlir][linalg] fix segmentation fault in isContractionBody function (PR #108703)

Sat Sep 14 08:09:42 PDT 2024

BRUCE11111 wrote:

e.g.:
See the first generic operation in following IR:
```mlir
#map = affine_map<(d0, d1, d2) -> ()>
#map1 = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
#map2 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0 + d3, d1 + d4, d2 + d5)>
#map3 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d3, d4, d5)>
#map4 = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2)>
module attributes {dlti.target_system_spec = #dlti.target_system_spec<"CPU" : #dlti.target_device_spec<#dlti.dl_entry<"L1_cache_size_in_bytes", 49152 : ui32>, #dlti.dl_entry<"L2_cache_size_in_bytes", 1310720 : ui64>, #dlti.dl_entry<"L3_cache_size_in_bytes", 50331648 : ui64>, #dlti.dl_entry<"num_threads", 1 : i32>, #dlti.dl_entry<"max_vector_width", 512 : i64>>>} {
  func.func @entry(%arg0: tensor<32x32x32xf32>, %arg1: tensor<4x4x4xf32>) -> tensor<29x29x29xf32> attributes {llvm.emit_c_interface} {
    %cst = arith.constant 0.000000e+00 : f32
    %0 = tensor.empty() : tensor<29x29x29xf32>
    %1 = scf.forall (%arg2) in (29) shared_outs(%arg3 = %0) -> (tensor<29x29x29xf32>) {
      %extracted_slice = tensor.extract_slice %arg3[%arg2, 0, 0] [1, 29, 29] [1, 1, 1] : tensor<29x29x29xf32> to tensor<1x29x29xf32>
      %3 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%cst : f32) outs(%extracted_slice : tensor<1x29x29xf32>) {
      ^bb0(%in: f32, %out: f32):
        linalg.yield %in : f32
      } -> tensor<1x29x29xf32>
      scf.forall.in_parallel {
        tensor.parallel_insert_slice %3 into %arg3[%arg2, 0, 0] [1, 29, 29] [1, 1, 1] : tensor<1x29x29xf32> into tensor<29x29x29xf32>
      }
    }
    %2 = scf.forall (%arg2, %arg3, %arg4) in (29, 29, 29) shared_outs(%arg5 = %1) -> (tensor<29x29x29xf32>) {
      %extracted_slice = tensor.extract_slice %arg0[%arg2, %arg3, %arg4] [4, 4, 4] [1, 1, 1] : tensor<32x32x32xf32> to tensor<4x4x4xf32>
      %extracted_slice_0 = tensor.extract_slice %arg5[%arg2, %arg3, %arg4] [1, 1, 1] [1, 1, 1] : tensor<29x29x29xf32> to tensor<1x1x1xf32>
      %3 = linalg.generic {indexing_maps = [#map2, #map3, #map4], iterator_types = ["parallel", "parallel", "parallel", "reduction", "reduction", "reduction"]} ins(%extracted_slice, %arg1 : tensor<4x4x4xf32>, tensor<4x4x4xf32>) outs(%extracted_slice_0 : tensor<1x1x1xf32>) {
      ^bb0(%in: f32, %in_1: f32, %out: f32):
        %4 = arith.mulf %in, %in_1 : f32
        %5 = arith.addf %out, %4 : f32
        linalg.yield %5 : f32
      } -> tensor<1x1x1xf32>
      scf.forall.in_parallel {
        tensor.parallel_insert_slice %3 into %arg5[%arg2, %arg3, %arg4] [1, 1, 1] [1, 1, 1] : tensor<1x1x1xf32> into tensor<29x29x29xf32>
      }
    }
    return %2 : tensor<29x29x29xf32>
  }
}
```

https://github.com/llvm/llvm-project/pull/108703