<table border="1" cellspacing="0" cellpadding="8">
    <tr>
        <th>Issue</th>
        <td>
            <a href=https://github.com/llvm/llvm-project/issues/107476>107476</a>
        </td>
    </tr>

    <tr>
        <th>Summary</th>
        <td>
            [MLIR][linalg][vectorization] failure for an nd extract case
        </td>
    </tr>

    <tr>
      <th>Labels</th>
      <td>
            mlir
      </td>
    </tr>

    <tr>
      <th>Assignees</th>
      <td>
      </td>
    </tr>

    <tr>
      <th>Reporter</th>
      <td>
          nirvedhmeshram
      </td>
    </tr>
</table>

<pre>
    For the following IR
```
#map = affine_map<(d0, d1) -> (d0, d1)>
#map1 = affine_map<(d0, d1, d2) -> (d0 + d1 + d2)>
func.func @gather_failure(%arg0: tensor<8x128x768xf32>, %arg2: tensor<8x1xf32>, %arg3 : index) -> tensor<8x1xf32> {
  %c0 = arith.constant 0 : index
  %1 = linalg.generic {
    indexing_maps = [#map], 
    iterator_types = ["parallel", "parallel"]
  } outs(%arg2 : tensor<8x1xf32>) {
 ^bb0(%arg5: f32):
      %2 = linalg.index 0 : index
      %3 = linalg.index 1 : index
      %4 = affine.apply #map1(%arg3, %3, %arg3)
 %extracted = tensor.extract %arg0[%2, %c0, %4] : tensor<8x128x768xf32>
 linalg.yield %extracted : f32
  } -> tensor<8x1xf32>
  return %1 : tensor<8x1xf32>
}

module attributes {transform.with_named_sequence} {
  transform.named_sequence @__transform_main(%arg2: !transform.any_op {transform.readonly}) {
    %0 = transform.structured.match ops{["linalg.generic"]} in %arg2 : (!transform.any_op) -> !transform.any_op
    %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op
    %2 = transform.structured.vectorize_children_and_apply_patterns %1 {vectorize_nd_extract} : (!transform.any_op) -> !transform.any_op
 transform.yield
  }
}
```
run
```
mlir-opt -transform-interpreter -split-input-file test_gather_core.mlir 
```
It gives the error
```
within split at test_gather_core.mlir:1 offset :13:18: error: 'vector.shape_cast' op source/result number of elements must match
    %extracted = tensor.extract %arg0[%2, %c0, %4] : tensor<8x128x768xf32>
                 ^
within split at test_gather_core.mlir:1 offset :13:18: note: see current operation: %7 = "vector.shape_cast"(%5) : (vector<8x1xindex>) -> vector<1xindex>
```
with `verify-each=0` you can see the following output IR
```
#map = affine_map<(d0, d1, d2) -> (d1, d2)>
#map1 = affine_map<(d0, d1) -> (d0, d1)>
"builtin.module"() ({
 "func.func"() <{function_type = (tensor<8x128x768xf32>, tensor<8x1xf32>, index) -> tensor<8x1xf32>, sym_name = "gather_failure"}> ({
 ^bb0(%arg1: tensor<8x128x768xf32>, %arg2: tensor<8x1xf32>, %arg3: index):
 %3 = "arith.constant"() <{value = 0.000000e+00 : f32}> : () -> f32
 %4 = "arith.constant"() <{value = 0 : i32}> : () -> i32
    %5 = "arith.constant"() <{value = 0 : index}> : () -> index
    %6 = "arith.constant"() <{value = dense<[0, 1, 2, 3, 4, 5, 6, 7]> : vector<8xindex>}> : () -> vector<8xindex>
    %7 = "vector.broadcast"(%6) : (vector<8xindex>) -> vector<1x8xindex>
    %8 = "vector.transpose"(%7) <{permutation = array<i64: 1, 0>}> : (vector<1x8xindex>) -> vector<8x1xindex>
    %9 = "arith.addi"(%arg3, %arg3) <{overflowFlags = #arith.overflow<none>}> : (index, index) -> index
    %10 = "vector.shape_cast"(%8) : (vector<8x1xindex>) -> vector<1xindex>
    %11 = "vector.extractelement"(%10, %4) : (vector<1xindex>, i32) -> index
    %12 = "vector.transfer_read"(%arg1, %11, %5, %9, %3) <{in_bounds = [true, true], operandSegmentSizes = array<i32: 1, 3, 1, 0>, permutation_map = #map}> : (tensor<8x128x768xf32>, index, index, index, f32) -> vector<8x1xf32>
    %13 = "vector.transfer_write"(%12, %arg2, %5, %5) <{in_bounds = [true, true], operandSegmentSizes = array<i32: 1, 1, 2, 0>, permutation_map = #map1}> : (vector<8x1xf32>, tensor<8x1xf32>, index, index) -> tensor<8x1xf32>
    "func.return"(%13) : (tensor<8x1xf32>) -> ()
  }) : () -> ()
  ```


</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJy8WN2O4joSfhpzY4ESJyFwwcUAjTTS7s05DxCZpAJeJXbWdujmPP3KP_kFune2Z09rlGSciqvqq7_PUKXYhQPsULJHyXFBW30VcseZvEFxrUFdJa0XZ1Hcdychsb4CLkVViXfGL_jnHyg4ouAHWgf-n_sviWraYBQdMS1LxiGraYOiAyKbIkDkgIsQkS1eougNT9dQ9DbeIvxijwMuyHQnjMgeF6G7kfGOZcvzlblgFAcXqq8gs5KyqpWAyAaRhMpLgKIfWANXQqLosPkIyeYjXW8-yoiYfcgBOzkyl3uQiLARYbyAj97AJx9glO6dedh8mAfOYcn0dZULrjTlGgejvQZZh03FOK0uqwtwkCwfb4fdF4xfDHLKSpsYW2BRcrS2DrIaJNVCZvrewEiYNFTSqoIKEeK8m6wkx96i9IhFq1WPJcEvQdqO7ETJ2_kc9F8l5iMjZyL3Y7DPukzGLlvvnmHjhaNH4fClcDxKtBVtmuqOfQb2lkU-uNEoyMZK7wZJ4ENLmmso7F7O8ZVfxF1-GUwTj2SSB_4hRsnxAa5p7jk13p07g6qY63S4jcLxKuk6GQm6lbzLpeexcsWY-jC7ay2KtgJMtZbs3GqTLuleS8pVKWS9emf6mnFaQ5Ep-HcLPAdjzSg1B9mpmKnMLOvfZjVlfMgnYyIi4fAx5fdMNFPlEmgheHU3Jk_yzMXZ1dcgrrRsc91KKFY11fkVi0aZb2zqT2vLp3t6xIzjcYpbCx_MGrWlx3djk8KZSRfQWUMlcG2dszane6ZERTUUWSlFndGzuDlQv6-fvIbkBrkWkv0FWX5lVSGBZ5QXma2PrKFag-TK-5DuB2leZD4xv23jsGxzfpTe89ScjiDZ8qfrdcXkUjQaL_udl4xrkI0EDRIvVVMxvWS8afWyZBVgDUpnfl7kQsLKbIGfbv5T4wu7gbJTEqQU8qmYKRDGsdWEqX6uAUU_QizKUoE2CIaRuWwMlm5jC2rqMF-pK20gy6nSiKRYNFiJVuaAyEmCaiuNeVufQWJRYqigBq4VrlulsU36STr8HV1s_oeSt9-CDBcazF0B4LyVpoawaMxgY4I7xJLUzTZCnkBHXLNJbOdwWeukfE90k8NNMJu3_dvRu1cRx2gd3ECy8r4Eml9RdDTv8V20OKfc2jwlV6LVTau_xbEe-NGw9otM6yu2Rs4tqzTjKzccOiy3tvKHYU9IT8NGItEBpXuzZuJkGYgP0uZTLvaCgH1JuoyQutd2RnXZMCeExDQW5_BLrhL-PrY4JosD7-lZDCJkygnn4N1o1TpfglVg_wCRfRD0tMB707ViD85AGHoO9CuqHJ16uTsb6IhtLcn_qMDi8kLFlMshkqx_VUkBXIFZTPY2s22J2N5mmV5sLom5rM0lNRzAGzLqDX31PzfzmeTY6HlTOktBi0lPWj_vSZ91pBe6NjNddg42QvU1m6QDTA3IutW2f_qTiaR3FB3YOjbGWKyCB7-fG_EEjHnb9CZupzGkRcF640ZM3DNwb6q4gSwr8X6q6MUfYEjkNuheoejABYcHe33lzVvHY3KFwdfTY_P96dFpC2fautnsRnivMRwG8BPVY8UHW5Of-EeeJUcJMjO8ehyD0KsMu4fE37fDMamLDOPZWbS86I-VWrZgG7i5u5OoHdO8-BMuxrU_2V_-ENonXET6hIv6Mg28V6M0zbqp6A-6k0B_2qznWTB6KEeoTWI6pzUGkegVhu-S6aHKQjKeEFMQk_8feEN7-xq88EVZT0fY52P4v5nHI_gcP3An0wGqaJTYL35R6OjJcCTH3Rlw1oxnQjNaZa-LYhcV22hLF7ALU5IkabQOo8V1d46jOF2fC5qe4816DTSJA0KLMg3KuNiEyYLtSEDiYBskhARBsFmlJZBNEsZhEkOwWVMUB1BTVq2q6lavhLwsmFIt7MIgjdP1oqJnqNTOHUAt27XHzoXcGfnlub0oFAcVU1oNO2imK_sD3j__8fMPkxPJ3p1d3XN3MHM0ODliz3JwKSSmHPMCdwQ_pwoWrax2V60bZZgIOSFyujB9bc-rXNSInIxaf1s2UvwLco3IyXqhEDl5R2478p8AAAD__yeIvZw">