[Mlir-commits] [mlir] [mlir][linalg] Elementwise fusion for any `LinalgOp` (PR #144922)

Thu Jun 19 10:30:22 PDT 2025

srcarroll wrote:

Although i didn't expect differences between implementing for generic vs map, but i am seeing a failure in a case with linalg.map that passes for the equivalent linalg.generic version.

map version
```
func.func @main() -> (tensor<1xi64>, tensor<1xi64>) {
    %cst_16 = arith.constant dense<0> : tensor<1xi64>
    %cst_17 = arith.constant dense<1> : tensor<1xi64>
    %cst_3 = arith.constant dense<26> : tensor<1xi64>
    %c0_i64 = arith.constant 0 : i64
    %c1_i64 = arith.constant 1 : i64
    %9 = tensor.empty() : tensor<1xi1>
    %mapped_25 = linalg.map ins(%cst_17 : tensor<1xi64>) outs(%9 : tensor<1xi1>)
      (%in: i64) {
        %255 = arith.cmpi slt, %in, %c0_i64 : i64
        linalg.yield %255 : i1
      }
    %11 = tensor.empty() : tensor<1xi64>
    %mapped_27 = linalg.map ins(%cst_17 : tensor<1xi64>) outs(%11 : tensor<1xi64>)
      (%in: i64) {
        %255 = arith.subi %c0_i64, %in : i64
        linalg.yield %255 : i64
      }
    %mapped_28 = linalg.map ins(%cst_3 : tensor<1xi64>) outs(%11 : tensor<1xi64>)
      (%in: i64) {
        %255 = arith.addi %in, %c1_i64 : i64
        linalg.yield %255 : i64
      }
    %mapped_29 = linalg.map { arith.select } ins(%mapped_25, %mapped_28, %cst_16 : tensor<1xi1>, tensor<1xi64>, tensor<1xi64>) outs(%11 : tensor<1xi64>)
    %mapped_30 = linalg.map { arith.select } ins(%mapped_25, %mapped_27, %cst_17 : tensor<1xi1>, tensor<1xi64>, tensor<1xi64>) outs(%11 : tensor<1xi64>)
    return %mapped_29, %mapped_30 : tensor<1xi64>, tensor<1xi64>
}
```

generic version
```
#map = affine_map<(d0)->(d0)>
func.func @main() -> (tensor<1xi64>, tensor<1xi64>) {
    %cst_16 = arith.constant dense<0> : tensor<1xi64>
    %cst_17 = arith.constant dense<1> : tensor<1xi64>
    %cst_3 = arith.constant dense<26> : tensor<1xi64>
    %c0_i64 = arith.constant 0 : i64
    %c1_i64 = arith.constant 1 : i64
    %9 = tensor.empty() : tensor<1xi1>
    %mapped_25 = linalg.generic {
      indexing_maps = [#map, #map],
      iterator_types = ["parallel"]}
      ins(%cst_17 : tensor<1xi64>) outs(%9 : tensor<1xi1>) {
    ^bb0(%b0: i64, %b1: i1):
        %255 = arith.cmpi slt, %b0, %c0_i64 : i64
        linalg.yield %255 : i1
    } -> tensor<1xi1>
    %11 = tensor.empty() : tensor<1xi64>
    %mapped_27 = linalg.generic {
      indexing_maps = [#map, #map],
      iterator_types = ["parallel"]}
      ins(%cst_17 : tensor<1xi64>) outs(%11 : tensor<1xi64>) {
    ^bb0(%b0: i64, %b1: i64):
        %255 = arith.subi %c0_i64, %b0 : i64
        linalg.yield %255 : i64
    } -> tensor<1xi64>
    %mapped_28 = linalg.generic {
      indexing_maps = [#map, #map],
      iterator_types = ["parallel"]}
      ins(%cst_3 : tensor<1xi64>) outs(%11 : tensor<1xi64>) {
    ^bb0(%b0: i64, %b1: i64):
        %255 = arith.addi %b0, %c1_i64 : i64
        linalg.yield %255 : i64
    } -> tensor<1xi64>
    %mapped_29 = linalg.generic {
      indexing_maps = [#map, #map, #map, #map],
      iterator_types = ["parallel"]}
      ins(%mapped_25, %mapped_28, %cst_16 : tensor<1xi1>, tensor<1xi64>, tensor<1xi64>) outs(%11 : tensor<1xi64>) {
    ^bb0(%b0: i1, %b1: i64, %b2: i64, %b3: i64):
        %255 = arith.select %b0, %b1, %b2 : i64
        linalg.yield %255 : i64
    } -> tensor<1xi64>
    %mapped_30 = linalg.generic {
      indexing_maps = [#map, #map, #map, #map],
      iterator_types = ["parallel"]}
      ins(%mapped_25, %mapped_27, %cst_17 : tensor<1xi1>, tensor<1xi64>, tensor<1xi64>) outs(%11 : tensor<1xi64>) {
    ^bb0(%b0: i1, %b1: i64, %b2: i64, %b3: i64):
        %255 = arith.select %b0, %b1, %b2 : i64
        linalg.yield %255 : i64
    } -> tensor<1xi64>
    return %mapped_29, %mapped_30 : tensor<1xi64>, tensor<1xi64>
}
```
since `linalg-generalize-named-ops` doesn't convert `linalg.map` i had to translate by hand, so hopefully didn't mess it up

https://github.com/llvm/llvm-project/pull/144922