[Mlir-commits] [mlir] andrzej/update collapse inner 6 (PR #96218)

Thu Jun 20 10:34:10 PDT 2024

https://github.com/banach-space created https://github.com/llvm/llvm-project/pull/96218

- **[mlir][vector] Update tests for collapse 4/n (nfc)**
- **[mlir][vector] Restrict DropInnerMostUnitDimsTransferWrite**


>From b14d305ce8822f04aed0f411a7d94351f4d1846f Mon Sep 17 00:00:00 2001
From: Andrzej Warzynski <andrzej.warzynski at arm.com>
Date: Thu, 20 Jun 2024 14:33:43 +0100
Subject: [PATCH 1/2] [mlir][vector] Update tests for collapse 4/n (nfc)

The main goal of this PR (and subsequent PRs), is to add more tests with
scalable vectors to:
  * vector-transfer-collapse-inner-most-dims.mlir

There's quite a few cases to consider, hence this is split into multiple
PRs. In this PR, `@outer_dyn_drop_inner_most_dim` is replaced with:
  * `@contiguous_inner_most_dynamic_outer`

I am also adding a similar test for scalable vectors. In addition,
  * `@drop_two_inner_most_dim` and
    `@drop_two_inner_most_dim_scalable_inner_dim`,

are renamed as `@contiguous_inner_most_scalable_inner_dim` to match
their counterpart for xfer_read.

NOTE: This PR is limited to tests for `vector.transfer_write`

This is a follow-up for: #94490, #94604, #94906
---
 ...tor-transfer-collapse-inner-most-dims.mlir | 71 ++++++++++++-------
 1 file changed, 46 insertions(+), 25 deletions(-)

diff --git a/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir b/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir
index 5183205db1b47..686b4a0b60c2a 100644
--- a/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir
+++ b/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir
@@ -1,5 +1,7 @@
 // RUN: mlir-opt %s -test-vector-transfer-collapse-inner-most-dims -split-input-file | FileCheck %s
 
+// TODO: Unify how memref and vectors are named
+
 //-----------------------------------------------------------------------------
 // 1. vector.transfer_read
 //-----------------------------------------------------------------------------
@@ -254,14 +256,14 @@ func.func @negative_non_unit_inner_memref_dim(%arg0: memref<4x8xf32>) -> vector<
 // 2. vector.transfer_write
 //-----------------------------------------------------------------------------
 
-func.func @drop_two_inner_most_dim(%arg0: memref<1x512x16x1x1xf32>, %arg1: vector<1x16x16x1x1xf32>, %arg2: index) {
+func.func @contiguous_inner_most(%arg0: memref<1x512x16x1x1xf32>, %arg1: vector<1x16x16x1x1xf32>, %arg2: index) {
   %c0 = arith.constant 0 : index
   vector.transfer_write %arg1, %arg0[%c0, %arg2, %c0, %c0, %c0]
     {in_bounds = [true, true, true, true, true]}
     : vector<1x16x16x1x1xf32>, memref<1x512x16x1x1xf32>
   return
 }
-// CHECK:      func.func @drop_two_inner_most_dim
+// CHECK:      func.func @contiguous_inner_most
 // CHECK-SAME:   %[[DEST:[a-zA-Z0-9]+]]
 // CHECK-SAME:   %[[VEC:[a-zA-Z0-9]+]]
 // CHECK-SAME:   %[[IDX:[a-zA-Z0-9]+]]
@@ -276,14 +278,14 @@ func.func @drop_two_inner_most_dim(%arg0: memref<1x512x16x1x1xf32>, %arg1: vecto
 // dim scalable. Note that this example only makes sense when "16 = [16]" (i.e.
 // vscale = 1). This is assumed (implicitly) via the `in_bounds` attribute.
 
-func.func @drop_two_inner_most_dim_scalable_inner_dim(%arg0: memref<1x512x16x1x1xf32>, %arg1: vector<1x16x[16]x1x1xf32>, %arg2: index) {
+func.func @contiguous_inner_most_scalable_inner_dim(%arg0: memref<1x512x16x1x1xf32>, %arg1: vector<1x16x[16]x1x1xf32>, %arg2: index) {
   %c0 = arith.constant 0 : index
   vector.transfer_write %arg1, %arg0[%c0, %arg2, %c0, %c0, %c0]
     {in_bounds = [true, true, true, true, true]}
     : vector<1x16x[16]x1x1xf32>, memref<1x512x16x1x1xf32>
   return
 }
-// CHECK:      func.func @drop_two_inner_most_dim_scalable_inner_dim
+// CHECK:      func.func @contiguous_inner_most_scalable_inner_dim
 // CHECK-SAME:   %[[DEST:[a-zA-Z0-9]+]]
 // CHECK-SAME:   %[[VEC:[a-zA-Z0-9]+]]
 // CHECK-SAME:   %[[IDX:[a-zA-Z0-9]+]]
@@ -325,6 +327,46 @@ func.func @negative_scalable_one_trailing_dim(%arg0: memref<1x512x16x1x1xf32>, %
 
 // -----
 
+func.func @contiguous_inner_most_dynamic_outer(%a: index, %b: index, %arg0: memref<?x?x16x1xf32>, %arg1: vector<8x1xf32>) {
+  %c0 = arith.constant 0 : index
+  vector.transfer_write %arg1, %arg0[%a, %b, %c0, %c0] {in_bounds = [true, true]} : vector<8x1xf32>, memref<?x?x16x1xf32>
+  return
+}
+// CHECK-LABEL: func.func @contiguous_inner_most_dynamic_outer(
+// CHECK-SAME:      %[[IDX_0:.*]]: index, %[[IDX_1:.*]]: index,
+// CHECK-SAME:      %[[MEM:.*]]: memref<?x?x16x1xf32>,
+// CHECK-SAME:      %[[VEC:.*]]: vector<8x1xf32>) {
+// CHECK:           %[[C1:.*]] = arith.constant 1 : index
+// CHECK:           %[[C0:.*]] = arith.constant 0 : index
+// CHECK:           %[[DIM0:.*]] = memref.dim %[[MEM]], %[[C0]] : memref<?x?x16x1xf32>
+// CHECK:           %[[DIM1:.*]] = memref.dim %[[MEM]], %[[C1]] : memref<?x?x16x1xf32>
+// CHECK:           %[[SV:.*]] = memref.subview %[[MEM]][0, 0, 0, 0] {{\[}}%[[DIM0]], %[[DIM1]], 16, 1] [1, 1, 1, 1] : memref<?x?x16x1xf32> to memref<?x?x16xf32, strided<[?, 16, 1], offset: ?>>
+// CHECK:           %[[SC:.*]] = vector.shape_cast %[[VEC]] : vector<8x1xf32> to vector<8xf32>
+// CHECK:           vector.transfer_write %[[SC]], %[[SV]]{{\[}}%[[IDX_0]], %[[IDX_1]], %[[C0]]] {in_bounds = [true]} : vector<8xf32>, memref<?x?x16xf32, strided<[?, 16, 1], offset: ?>>
+
+// Same as the top example within this split, but with the outer vector
+// dim scalable. Note that this example only makes sense when "8 = [8]" (i.e.
+// vscale = 1). This is assumed (implicitly) via the `in_bounds` attribute.
+
+func.func @contiguous_inner_most_dynamic_outer_scalable_inner_dim(%a: index, %b: index, %arg0: memref<?x?x16x1xf32>, %arg1: vector<[8]x1xf32>) {
+  %c0 = arith.constant 0 : index
+  vector.transfer_write %arg1, %arg0[%a, %b, %c0, %c0] {in_bounds = [true, true]} : vector<[8]x1xf32>, memref<?x?x16x1xf32>
+  return
+}
+// CHECK-LABEL: func.func @contiguous_inner_most_dynamic_outer_scalable_inner_dim(
+// CHECK-SAME:      %[[IDX_0:.*]]: index, %[[IDX_1:.*]]: index,
+// CHECK-SAME:      %[[MEM:.*]]: memref<?x?x16x1xf32>,
+// CHECK-SAME:      %[[VEC:.*]]: vector<[8]x1xf32>) {
+// CHECK:           %[[C1:.*]] = arith.constant 1 : index
+// CHECK:           %[[C0:.*]] = arith.constant 0 : index
+// CHECK:           %[[DIM0:.*]] = memref.dim %[[MEM]], %[[C0]] : memref<?x?x16x1xf32>
+// CHECK:           %[[DIM1:.*]] = memref.dim %[[MEM]], %[[C1]] : memref<?x?x16x1xf32>
+// CHECK:           %[[SV:.*]] = memref.subview %[[MEM]][0, 0, 0, 0] {{\[}}%[[DIM0]], %[[DIM1]], 16, 1] [1, 1, 1, 1] : memref<?x?x16x1xf32> to memref<?x?x16xf32, strided<[?, 16, 1], offset: ?>>
+// CHECK:           %[[SC:.*]] = vector.shape_cast %[[VEC]] : vector<[8]x1xf32> to vector<[8]xf32>
+// CHECK:           vector.transfer_write %[[SC]], %[[SV]]{{\[}}%[[IDX_0]], %[[IDX_1]], %[[C0]]] {in_bounds = [true]} : vector<[8]xf32>, memref<?x?x16xf32, strided<[?, 16, 1], offset: ?>>
+
+// -----
+
 func.func @drop_inner_most_dim(%arg0: memref<1x512x16x1xf32, strided<[8192, 16, 1, 1], offset: ?>>, %arg1: vector<1x16x16x1xf32>, %arg2: index) {
   %c0 = arith.constant 0 : index
   vector.transfer_write %arg1, %arg0[%c0, %arg2, %c0, %c0]
@@ -345,27 +387,6 @@ func.func @drop_inner_most_dim(%arg0: memref<1x512x16x1xf32, strided<[8192, 16,
 
 // -----
 
-func.func @outer_dyn_drop_inner_most_dim(%arg0: memref<?x512x16x1xf32, strided<[8192, 16, 1, 1], offset: ?>>, %arg1: vector<1x16x16x1xf32>, %arg2: index) {
-  %c0 = arith.constant 0 : index
-  vector.transfer_write %arg1, %arg0[%arg2, %c0, %c0, %c0]
-    {in_bounds = [true, true, true, true]}
-    : vector<1x16x16x1xf32>, memref<?x512x16x1xf32, strided<[8192, 16, 1, 1], offset: ?>>
-  return
-}
-// CHECK:      func.func @outer_dyn_drop_inner_most_dim
-// CHECK-SAME:   %[[DEST:[a-zA-Z0-9]+]]
-// CHECK-SAME:   %[[VEC:[a-zA-Z0-9]+]]
-// CHECK-SAME:   %[[IDX:[a-zA-Z0-9]+]]
-//  CHECK-DAG:   %[[C0:.+]] = arith.constant 0 : index
-//  CHECK-DAG:   %[[D0:.+]] = memref.dim %[[SRC]], %[[C0]]
-// CHECK:        %[[SUBVIEW:.+]] = memref.subview %[[DEST]][0, 0, 0, 0] [%[[D0]], 512, 16, 1]
-// CHECK-SAME:     memref<?x512x16x1xf32, strided<[8192, 16, 1, 1], offset: ?>> to memref<?x512x16xf32, strided<[8192, 16, 1], offset: ?>>
-// CHECK:        %[[CAST:.+]] = vector.shape_cast %[[VEC]] : vector<1x16x16x1xf32> to vector<1x16x16xf32>
-// CHECK:        vector.transfer_write %[[CAST]], %[[SUBVIEW]]
-// CHECK-SAME:     [%[[IDX]], %[[C0]], %[[C0]]]
-
-// -----
-
 func.func @non_unit_strides(%arg0: memref<512x16x1xf32, strided<[8192, 16, 4], offset: ?>>, %arg1: vector<16x16x1xf32>, %arg2: index) {
   %c0 = arith.constant 0 : index
   vector.transfer_write %arg1, %arg0[%arg2, %c0, %c0]

>From b2e8f75b87a334e5edc6bff88b1b847bb27bc2bb Mon Sep 17 00:00:00 2001
From: Andrzej Warzynski <andrzej.warzynski at arm.com>
Date: Thu, 20 Jun 2024 14:34:30 +0100
Subject: [PATCH 2/2] [mlir][vector] Restrict
 DropInnerMostUnitDimsTransferWrite

Restrict `DropInnerMostUnitDimsTransferWrite` so that it fails when one
of the indices to be dropped could be != 0, e.g.

```mlir
func.func @negative_example(
    %arg0: memref<16x1xf32>,
    %arg1: vector<8x1xf32>,
    %idx_1: index,
    %idx_2: index) {

  %c0 = arith.constant 0 : index
  vector.transfer_write %arg1, %arg0[%idx_1, %idx_2] {in_bounds = [true, true]} : vector<8x1xf32>, memref<16x1xf32>
  return
}
```

This is an edge case that could represent an out-of-bounds access,
though that will depend on the actual value of `%i`. Importantly,
_without this change_ it would be transformed as follows:
```mlir
func.func @negative_example(
    %arg0: memref<16x1xf32>,
    %arg1: vector<8x1xf32>,
    %idx_1: index,
    %idx_2: index) {

  %subview = memref.subview %arg0[0, 0] [16, 1] [1, 1] : memref<16x1xf32> to memref<16xf32, strided<[1]>>
  %0 = vector.shape_cast %arg1 : vector<8x1xf32> to vector<8xf32>
  vector.transfer_write %0, %subview[%idx_1] {in_bounds = [true]} : vector<8xf32>, memref<16xf32, strided<[1]>>
  return
}
```

This is incorrect - `%idx_2` is ignored. Hence the extra restriction to
avoid such cases.

NOTE: This PR is limited to `vector.transfer_write`. Similar patch for
`vector.transfer_read`: #94904
---
 .../Vector/Transforms/VectorTransforms.cpp    |  5 ++++
 ...tor-transfer-collapse-inner-most-dims.mlir | 27 +++++++++++++++++++
 2 files changed, 32 insertions(+)

diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
index b824508728ac8..890cfe2746dae 100644
--- a/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
+++ b/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
@@ -1395,6 +1395,11 @@ class DropInnerMostUnitDimsTransferWrite
     if (dimsToDrop == 0)
       return failure();
 
+    // Make sure that the indices to be dropped are equal 0.
+    // TODO: Deal with cases when the indices are not 0.
+    if (!llvm::all_of(writeOp.getIndices().take_back(dimsToDrop), isZeroIndex))
+      return failure();
+
     auto resultTargetVecType =
         VectorType::get(targetType.getShape().drop_back(dimsToDrop),
                         targetType.getElementType(),
diff --git a/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir b/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir
index 686b4a0b60c2a..df1ae547bcdfa 100644
--- a/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir
+++ b/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir
@@ -367,6 +367,33 @@ func.func @contiguous_inner_most_dynamic_outer_scalable_inner_dim(%a: index, %b:
 
 // -----
 
+func.func @contiguous_inner_most_non_zero_idxs(%arg0: memref<16x1xf32>, %arg1: vector<8x1xf32>, %i: index) {
+  %c0 = arith.constant 0 : index
+  vector.transfer_write %arg1, %arg0[%i, %c0] {in_bounds = [true, true]} : vector<8x1xf32>, memref<16x1xf32>
+  return
+}
+// CHECK-LABEL:   func.func @contiguous_inner_most_non_zero_idxs(
+// CHECK-SAME:      %[[MEM:.*]]: memref<16x1xf32>,
+// CHECK-SAME:      %[[VEC:.*]]: vector<8x1xf32>,
+// CHECK-SAME:      %[[IDX:.*]]: index) {
+// CHECK:           %[[SV:.*]] = memref.subview %[[MEM]][0, 0] [16, 1] [1, 1] : memref<16x1xf32> to memref<16xf32, strided<[1]>>
+// CHECK:           %[[SC:.*]] = vector.shape_cast %[[VEC]] : vector<8x1xf32> to vector<8xf32>
+// CHECK:           vector.transfer_write %[[SC]], %[[SV]]{{\[}}%[[IDX]]] {in_bounds = [true]} : vector<8xf32>, memref<16xf32, strided<[1]>>
+
+// The index to be dropped is != 0 - this is currently not supported.
+
+func.func @negative_contiguous_inner_most_dim_non_zero_idxs(%arg0: memref<16x1xf32>, %arg1: vector<8x1xf32>, %i: index) {
+  %c0 = arith.constant 0 : index
+  vector.transfer_write %arg1, %arg0[%i, %i] {in_bounds = [true, true]} : vector<8x1xf32>, memref<16x1xf32>
+  return
+}
+// CHECK-LABEL: func @negative_contiguous_inner_most_dim_non_zero_idxs
+// CHECK-NOT:     memref.subview
+// CHECK-NOT:     memref.shape_cast
+// CHECK:         vector.transfer_write
+
+// -----
+
 func.func @drop_inner_most_dim(%arg0: memref<1x512x16x1xf32, strided<[8192, 16, 1, 1], offset: ?>>, %arg1: vector<1x16x16x1xf32>, %arg2: index) {
   %c0 = arith.constant 0 : index
   vector.transfer_write %arg1, %arg0[%c0, %arg2, %c0, %c0]