<table border="1" cellspacing="0" cellpadding="8">

    <tr>

        <th>Issue</th>

        <td>

            <a href=https://github.com/llvm/llvm-project/issues/151305>151305</a>

        </td>

    </tr>

    <tr>

        <th>Summary</th>

        <td>

            [MLIR][SVE] Unable to lower code with SVE transforms

        </td>

    </tr>

    <tr>

      <th>Labels</th>

      <td>

            mlir

      </td>

    </tr>

    <tr>

      <th>Assignees</th>

      <td>

      </td>

    </tr>

    <tr>

      <th>Reporter</th>

      <td>

          Dasor

      </td>

    </tr>

</table>

<pre>

    I'm trying to lower a simple matmul to SVE as show in the [test here](https://github.com/llvm/llvm-project/blob/main/mlir/test/Integration/Dialect/Linalg/CPU/ArmSVE/matmul.mlir) I'm using the same transform code plus some more to minimize the flags passed to mlir-opt, this is the code:

```mlir

module {

  func.func @bare_matmul(%arg0: memref<*xf32> {tt.divisibility = 16 : i32}, %arg1: memref<*xf32> {tt.divisibility = 16 : i32}, %arg2: memref<*xf32> {tt.divisibility = 16 : i32}, %arg3: i32 {tt.divisibility = 16 : i32}, %arg4: i32 {tt.divisibility = 16 : i32}, %arg5: i32 {tt.divisibility = 16 : i32}, %arg6: i32, %arg7: i32, %arg8: i32, %arg9: i32, %arg10: i32, %arg11: i32) {

 %cst = arith.constant 0.000000e+00 : f32

    %c128_i32 = arith.constant 128 : i32

    %0 = arith.muli %arg9, %c128_i32 : i32

    %1 = arith.index_cast %0 : i32 to index

    %2 = arith.muli %arg10, %c128_i32 : i32

    %3 = arith.index_cast %2 : i32 to index

    %4 = arith.index_cast %arg5 : i32 to index

    %5 = arith.muli %1, %4 : index

    %6 = arith.addi %5, %3 : index

    %reinterpret_cast = memref.reinterpret_cast %arg0 to offset: [%6], sizes: [128, 128], strides: [%4, 1] : memref<*xf32> to memref<128x128xf32, strided<[?, 1], offset: ?>>

    %alloc = memref.alloc() : memref<128x128xf32>

    memref.copy %reinterpret_cast, %alloc : memref<128x128xf32, strided<[?, 1], offset: ?>> to memref<128x128xf32>

 %7 = bufferization.to_tensor %alloc restrict writable : memref<128x128xf32> to tensor<128x128xf32>

    %8 = arith.index_cast %arg4 : i32 to index

 %9 = arith.muli %1, %8 : index

    %10 = arith.addi %9, %3 : index

 %reinterpret_cast_0 = memref.reinterpret_cast %arg1 to offset: [%10], sizes: [128, 128], strides: [%8, 1] : memref<*xf32> to memref<128x128xf32, strided<[?, 1], offset: ?>>

    %alloc_1 = memref.alloc() : memref<128x128xf32>

    memref.copy %reinterpret_cast_0, %alloc_1 : memref<128x128xf32, strided<[?, 1], offset: ?>> to memref<128x128xf32>

 %11 = bufferization.to_tensor %alloc_1 restrict writable : memref<128x128xf32> to tensor<128x128xf32>

    %12 = tensor.empty() : tensor<128x128xf32>

    %13 = linalg.fill ins(%cst : f32) outs(%12 : tensor<128x128xf32>) -> tensor<128x128xf32>

    %14 = linalg.matmul ins(%7, %11 : tensor<128x128xf32>, tensor<128x128xf32>) outs(%13 : tensor<128x128xf32>) -> tensor<128x128xf32>

    %reinterpret_cast_2 = memref.reinterpret_cast %arg2 to offset: [%10], sizes: [128, 128], strides: [%8, 1] : memref<*xf32> to memref<128x128xf32, strided<[?, 1], offset: ?>>

 bufferization.materialize_in_destination %14 in writable %reinterpret_cast_2 : (tensor<128x128xf32>, memref<128x128xf32, strided<[?, 1], offset: ?>>) -> ()

    return

  }

}

module attributes {transform.with_named_sequence} {

  transform.named_sequence @__tile_and_vectorize(%arg0: !transform.op<"func.func"> {transform.readonly}) {

    %0 = transform.structured.match ops{["linalg.matmul"]} in %arg0 : (!transform.op<"func.func">) -> !transform.any_op

    %tiled_linalg_op, %loops:3 = transform.structured.tile_using_for %0 tile_sizes [2, [4], 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)

    transform.structured.vectorize %tiled_linalg_op vector_sizes [2, [4], 1] : !transform.any_op

 transform.apply_patterns to %arg0 {

 transform.apply_patterns.vector.reduction_to_contract

 transform.apply_patterns.vector.transfer_permutation_patterns

 transform.apply_patterns.vector.lower_masked_transfers

 transform.apply_patterns.vector.sink_ops

    } : !transform.op<"func.func">

    transform.apply_patterns to %arg0 {

 transform.apply_patterns.vector.lower_contraction

 transform.apply_patterns.vector.lower_outerproduct

    } : !transform.op<"func.func">

    transform.yield 

  }

 transform.named_sequence @opt(%arg0: !transform.op<"func.func"> {transform.consumed}) {

    transform.apply_cse to %arg0 : !transform.op<"func.func">

    %0 = transform.apply_registered_pass "canonicalize" to %arg0 : (!transform.op<"func.func">) -> !transform.op<"func.func">

    %1 = transform.apply_registered_pass "convert-vector-to-scf" to %0 : (!transform.op<"func.func">) -> !transform.op<"func.func">

    %2 = transform.apply_registered_pass "convert-linalg-to-loops" to %1 : (!transform.op<"func.func">) -> !transform.op<"func.func">

    %3 = transform.apply_registered_pass "arm-sve-legalize-vector-storage" to %2 : (!transform.op<"func.func">) -> !transform.op<"func.func">

    %4 = transform.apply_registered_pass "convert-vector-to-llvm" with options = {"enable-arm-sve" = true} to %3 : (!transform.op<"func.func">) -> !transform.op<"func.func">

 transform.yield 

  }

  transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.consumed}) {

    %0 = transform.bufferization.one_shot_bufferize %arg0 {bufferize_function_boundaries = true} : (!transform.any_op) -> !transform.any_op

    %1 = transform.structured.match ops{["func.func"]} in %0 : (!transform.any_op) -> !transform.op<"func.func">

 transform.foreach %1 : !transform.op<"func.func"> {

    ^bb0(%arg1: !transform.op<"func.func">):

      transform.include @__tile_and_vectorize failures(propagate) (%arg1) : (!transform.op<"func.func">) -> ()

 transform.include @opt failures(propagate) (%arg1) : (!transform.op<"func.func">) -> ()

    }

    transform.yield 

 }

}

```

However, when running `mlir-opt` like this:

```shell

mlir-opt file.mlir --transform-interpreter --test-transform-dialect-erase-schedule --test-lower-to-llvm

```

I get an error regarding the control flow dialect, the error comes from the pass `convert-cf-to-llvm` that the `test-lower-to-llvm` flag uses and the error is:

```

/tmp/dumps/file.mlir:25:11: error: failed to legalize operation 'cf.br' that was explicitly marked illegal

    %14 = linalg.matmul ins(%7, %11 : tensor<128x128xf32>, tensor<128x128xf32>) outs(%13 : tensor<128x128xf32>) -> tensor<128x128xf32>

 ^

/tmp/dumps/file.mlir:25:11: note: see current operation: "cf.br"(%10, %31)[^bb1] : (index, tensor<128x128xf32>) -> ()

```

I'm using latest llvm version, exactly commit: `5ae83b0ccd28`

</pre>

<img width="1" height="1" alt="" src="http://email.email.llvm.org/o/eJzUWU9v4zoO_zTKRUhgy3H-HHJomwY7wFtgsYM3V0O26Vg7suSV5Lbpp19Ish2ncdJ0XqeLN8gkM5JI_UiRFClSrdleAGxQfI_i7YQ2ppRqs6Vaqkkq88PmGyLLCht1YGKPjcRcPoPCFGtW1RxwRU3VcDvx_ccjphrrUj5jJrApAaP43oA2uAQFKN4isiqNqTWK7hDZIbLbM1M26SyTFSI7zp-6n2mt5H8gM4jsUi5TRHYVZcL-cKYQ2VmmiOy-CQN7RQ2Tdm7LKPc0fzBB-R6R3cO__kRkd6eq7z8eHROLdeaZrLGXrNFOsBKwphVgo6jQhVQVzmQOuOaNxlpWgCupwIpZMcEq9gqOpOB0r3FNtYbcTXKmprI2iDxgUzKNmXbrLC8rdeA-i8B_HJDgrpJ5wwGj5T0K7jAuGpHN7BdG8yClChKPG5EVIjFV-wBFd7iCSkGBogdE7l6KiKDo0XIwZpazJ6ZZyjgzB4yiLQ4X2FKwiKDl1iLzbMLPYUM-h03UDn6QbP5rZPGvkS26wX5keTayOhtZn42EwflQ2A-tO1NAJM60cYCoYqacZVJoQ4XBwSxwfwCR-yBwWK3WrflgRxaSVeLkO6cNyaoXricIBiurhrMeugc44PeGMBwQMpHDS5JRi9mz9Eo2ErupIxUZ3y4M3t0vurgfubLf_CKVNYYrhPEI0LAFOfd0pwSLAQHNc0cQtwTRGIECJgyoWoFpQUXb1p9m53M-AFigsig0GMvQhm4SL1yAfcCavYJuh0MbMh7siXeTRrG8n7YyuHkUb_ElN7ZRrRsOyerF_i285XpuuV0f36No1_GyvwN40Q5Fj_bTy0w5l9lQUDfgAtz6FMhwx55DS5TJ-jCmwc6p2k0ucPs4_ouq8MAQiZdOprQpClDs1V1MMyMTA0JLdcSkwO6cGfysmKGpjf2XZba7egaX1IFIvLpm3_NR-0YkXl8x7tWYrYbBiHWvx6175GCS4AbjDseMOwx-ybpX_z_rTsLfZt9JMLRwt9EX2ngY3mTkSfgbzDz094ZfN4OqNoeBUt8n95cHd-nhrGCcYya0z6v8ReuvUbLGsjHtREiuMidrPHXw3918Pty8TZv77ZftkYbh9d0ergEZgI4-B_SZ6ZEbPJj8jT341KwrakAxytkrJEwkOWjDhJtqT5SJgXFfUJf1mtWVA_0s9N2heo9oz1CBaZRw_7GZbHDXfrtPW3hQYxRLGwPapcNdATR7ZqZMBK0gTzT8twGRAVpu-zrluPB0jS1bksQwDgkVefIEmZGKvcJp_YJIeGQga3empC99EOlLiH6RAppLwQ8uI-9z5GH6elyrjWoy0yjI7RlmJZa1tgTWpsiJC9qd4q2Vi4k-vWoP7RaMA7UPFlNxSGR9xGe1kSd-YzvhnZ1L6Yrh6DJ6p0ZXoiaFj64BdmPOjayT-Boivp-3ptE7y1sBWkwnZjIy_TAqyQeHO_Mblam3iBHNYD95k3ij-h6M1TU_JDU1BpTQNk70x-tN59LSFuBMQd5k1tkTI5NMCqNoZm4h9POgkhpU1RgXMPpFtzBwbyxJRfVPyJOO202UmomfibWq1vSWI8q6ZMlvj-wvK9CL0WmOSXE7kWxcHJX2BD5DlgMDnuNBHLwavtwLzl-LVrbcbirIz6LVW_kzDSeq_ZiEI7HPs1WwZ9qAgjypqdYYEZJRIQXL3HWGCDnf9S8EvHdRhjejlOIJlJl6c5gaOdVZcYT7FVjJR7H68GWx-pDeow2_AO3b2-MiWqqqqX6CKYe9s4FOxdpIRfcDkyBfgHr-6_bgH4oJtvkJlrUNLNpxs05GCAibjk1bae1Cv1PjshcvYfRbJbwWdt5Lm7rZxL94XwxC_sa7MeaMRInTRFcKSHQpTdINwzDW94OJldPdZqlsRE4VA32i3Zvyjit50tsocTWPO9H8MIcbjxHXcNx0mIVUQLNy6Ne33QqdfPFjmgb9mYY3h3qy9r0DjE8vECYy3uQXE25cUMYbBbYmrJWs6Z4acIZxhNCWzh_3hK7GGEUja_Mlm-OjW41f9ic1T9d28cXPP-QzPIGyOeVzCQKrRggm9rjty7guziLAnP0E18o5b9_oEji3ZVS7HBeMg-su4em0RzPtS0Jw46DNYDL3TaspKKphqrMSXEnWrnO5UB_yzkT4hvdgMBUYlJIKK9hTlXfdLJd2SY4LLp9x3vXGHtycX5_JCjQulKzcoA-4i6ALuFnR77wIsCmp8T29RTCCbRG4XhhuNGhMRT7YZkx19t9kZ6oakV3eVPbK3PXaQ9EdiVF051sijol7maG2VHAtyPYCw7IG1ZXjy6yYpQqRpYf6TDWGl5qzjBl-wBVVPyHHjDvav-2rDIofP6A6IQ3YXw2As0YpEOaoMu95pNUaaRF2T4uR9U7XE35M02FJ6d93r0t86qVvjXbQdeXUtYetBeEnUNo1ch8wvNDMHlomq4r5V45FEFNYRWmQZTlZoUUwyTdRvo7WdAKbcBlHq2VIFmRSbuJFSBbRPI3joMjXRZxFWVyEEMGKrlfBcj5hGxKQOFhGQRiQ5ZzMotVinadLEi_X8xTiEM0DqCjjM4trJtV-wrRuYBPGYRTEE05T4Hrj7yDfS7bXz0RtXOs6bfYazQPOtNFHDoYZ7prs__zj27_tZRXff__xaPX6p0tXjp1113Z2qc33H4_HoKYnjeKbD7fPHXJrIi34pw35XwAAAP__rxaoSg">