[Mlir-commits] [mlir] [mlir][vector] Refine vectorisation of tensor.extract (PR #109580)

Mon Sep 23 12:46:00 PDT 2024

================
@@ -307,6 +307,84 @@ module attributes {transform.with_named_sequence} {
 
 // -----
 
+// Reading a 1D column vector (hence a candidate for a contiguous load), but given
+// %1, it's a gather load.
+
+#map = affine_map<(d0, d1) -> (d0, d1)>
+func.func @index_from_output_column_vector_gather_load(%src: tensor<8x128xf32>) -> tensor<8x1xf32> {
+  %c0 = arith.constant 0 : index
+  %0 = tensor.empty() : tensor<8x1xf32>
+  %res = linalg.generic {
+    indexing_maps = [#map],
+    iterator_types = ["parallel", "parallel"]
+  } outs(%0 : tensor<8x1xf32>) {
+  ^bb0(%arg1: f32):
+      %1 = linalg.index 0 : index
+    %extracted = tensor.extract %src[%1, %c0] : tensor<8x128xf32>
+      linalg.yield %extracted : f32
+  } -> tensor<8x1xf32>
+  return %res : tensor<8x1xf32>
+}
+
+// CHECK-LABEL:   func.func @index_from_output_column_vector_gather_load(
+// CHECK-SAME:      %[[SRC:.*]]: tensor<8x128xf32>) -> tensor<8x1xf32> {
+// CHECK:           %[[C128:.*]] = arith.constant dense<128> : vector<1x8xindex>
+// CHECK:           %[[C0:.*]] = arith.constant 0 : index
+// CHECK:           %[[PASS_THRU:.*]] = arith.constant dense<0.000000e+00> : vector<8x1xf32>
+// CHECK:           %[[MASK:.*]] = arith.constant dense<true> : vector<8x1xi1>
+// CHECK:           %[[IDX_VEC:.*]] = arith.constant dense<[0, 1, 2, 3, 4, 5, 6, 7]> : vector<8xindex>
+// CHECK:           %[[OUT:.*]] = tensor.empty() : tensor<8x1xf32>
+// CHECK:           %[[B:.*]] = vector.broadcast %[[IDX_VEC]] : vector<8xindex> to vector<1x8xindex>
+// CHECK:           %[[MUL:.*]] = arith.muli %[[B]], %[[C128]] : vector<1x8xindex>
+// CHECK:           %[[TR:.*]] = vector.transpose %[[MUL]], [1, 0] : vector<1x8xindex> to vector<8x1xindex>
+// CHECK:           %[[GATHER:.*]] = vector.gather %[[SRC]]{{\[}}%[[C0]], %[[C0]]] {{\[}}%[[TR]]], %[[MASK]], %[[PASS_THRU]] : tensor<8x128xf32>, vector<8x1xindex>, vector<8x1xi1>, vector<8x1xf32> into vector<8x1xf32>
+// CHECK:           %[[RES:.*]] = vector.transfer_write %[[GATHER]], %[[OUT]]{{\[}}%[[C0]], %[[C0]]] {in_bounds = [true, true]} : vector<8x1xf32>, tensor<8x1xf32>
+// CHECK:           return %[[RES]] : tensor<8x1xf32>
+
+// Same as above, but the access indices have been swapped and hence this is
+// contiguous load. Currently not supported and lowered as vector.gather
+// instead.
+// TODO: Make sure that this is lowered as a contiguous load.
+
----------------
dcaballe wrote:

missing split intput file marker?

https://github.com/llvm/llvm-project/pull/109580