[Mlir-commits] [mlir] [mlir][vector] Update tests for collapse 5/n (nfc) (PR #96227)

Mon Jul 15 01:56:08 PDT 2024

https://github.com/banach-space updated https://github.com/llvm/llvm-project/pull/96227

>From 871a7eea134730966c21c992487481e4d9b122fc Mon Sep 17 00:00:00 2001
From: Andrzej Warzynski <andrzej.warzynski at arm.com>
Date: Thu, 20 Jun 2024 19:14:23 +0100
Subject: [PATCH 1/2] [mlir][vector] Update tests for collapse 5/n (nfc)

The main goal of this PR (and subsequent PRs), is to add more tests with
scalable vectors to:
  * vector-transfer-collapse-inner-most-dims.mlir

There's quite a few cases to consider, hence this is split into multiple
PRs. In this PR, I am simply adding more tests for
`vector.transfer_write` so that for every test for `xfer_read`, there's
a corresponding test for `xfer_write`.

This is a follow-up for: #94490, #94604, #94906, #96214
---
 ...tor-transfer-collapse-inner-most-dims.mlir | 119 ++++++++++++++++++
 1 file changed, 119 insertions(+)

diff --git a/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir b/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir
index 3c0414c83ed68..981c488c90283 100644
--- a/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir
+++ b/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir
@@ -449,6 +449,101 @@ func.func @negative_contiguous_inner_most_dim_non_zero_idx_out_of_bounds(%arg0:
 // CHECK-NOT:     memref.shape_cast
 // CHECK:         vector.transfer_write
 
+// Same as the top example within this split, but with the outer vector
+// dim scalable. Note that this example only makes sense when "8 = [8]" (i.e.
+// vscale = 1). This is assumed (implicitly) via the `in_bounds` attribute.
+
+func.func @contiguous_inner_most_non_zero_idxs_scalable(%arg0: memref<16x1xf32>, %arg1: vector<[8]x1xf32>, %i: index) {
+  %c0 = arith.constant 0 : index
+  vector.transfer_write %arg1, %arg0[%i, %c0] {in_bounds = [true, true]} : vector<[8]x1xf32>, memref<16x1xf32>
+  return
+}
+// CHECK-LABEL:   func.func @contiguous_inner_most_non_zero_idxs_scalable(
+// CHECK-SAME:      %[[MEM:.*]]: memref<16x1xf32>,
+// CHECK-SAME:      %[[VEC:.*]]: vector<[8]x1xf32>,
+// CHECK-SAME:      %[[IDX:.*]]: index) {
+// CHECK:           %[[SV:.*]] = memref.subview %[[MEM]][0, 0] [16, 1] [1, 1] : memref<16x1xf32> to memref<16xf32, strided<[1]>>
+// CHECK:           %[[SC:.*]] = vector.shape_cast %[[VEC]] : vector<[8]x1xf32> to vector<[8]xf32>
+// CHECK:           vector.transfer_write %[[SC]], %[[SV]]{{\[}}%[[IDX]]] {in_bounds = [true]} : vector<[8]xf32>, memref<16xf32, strided<[1]>>
+
+// -----
+
+func.func @contiguous_inner_most_dim_with_subview(%A: memref<1000x1xf32>, %i:index, %ii:index, %vec: vector<4x1xf32>) {
+  %c0 = arith.constant 0 : index
+  %cst = arith.constant 0.0 : f32
+  %0 = memref.subview %A[%i, 0] [40, 1] [1, 1] : memref<1000x1xf32> to memref<40x1xf32, strided<[1, 1], offset: ?>>
+  vector.transfer_write %vec, %0[%ii, %c0] {in_bounds = [true, true]} : vector<4x1xf32>, memref<40x1xf32, strided<[1, 1], offset: ?>>
+  return
+}
+
+// CHECK-LABEL:   func.func @contiguous_inner_most_dim_with_subview(
+// CHECK-SAME:      %[[MEM:.*]]: memref<1000x1xf32>,
+// CHECK-SAME:      %[[IDX_1:.*]]: index, %[[IDX_2:.*]]: index,
+// CHECK-SAME:      %[[VEC:.*]]: vector<4x1xf32>) {
+// CHECK:           %[[SV_1:.*]] = memref.subview %[[MEM]]{{\[}}%[[IDX_1]], 0] [40, 1] [1, 1] : memref<1000x1xf32> to memref<40x1xf32, strided<[1, 1], offset: ?>>
+// CHECK:           %[[SV_2:.*]] = memref.subview %[[SV_1]][0, 0] [40, 1] [1, 1] : memref<40x1xf32, strided<[1, 1], offset: ?>> to memref<40xf32, strided<[1], offset: ?>>
+// CHECK:           %[[SC:.*]] = vector.shape_cast %[[VEC]] : vector<4x1xf32> to vector<4xf32>
+// CHECK:           vector.transfer_write %[[SC]], %[[SV_2]]{{\[}}%[[IDX_2]]] {in_bounds = [true]} : vector<4xf32>, memref<40xf32, strided<[1], offset: ?>>
+
+// Same as the top example within this split, but with the outer vector
+// dim scalable. Note that this example only makes sense when "4 = [4]" (i.e.
+// vscale = 1). This is assumed (implicitly) via the `in_bounds` attribute.
+
+func.func @contiguous_inner_most_dim_with_subview_scalable(%A: memref<1000x1xf32>, %i:index, %ii:index, %vec: vector<[4]x1xf32>) {
+  %c0 = arith.constant 0 : index
+  %cst = arith.constant 0.0 : f32
+  %0 = memref.subview %A[%i, 0] [40, 1] [1, 1] : memref<1000x1xf32> to memref<40x1xf32, strided<[1, 1], offset: ?>>
+  vector.transfer_write %vec, %0[%ii, %c0] {in_bounds = [true, true]} : vector<[4]x1xf32>, memref<40x1xf32, strided<[1, 1], offset: ?>>
+  return
+}
+
+// CHECK-LABEL:   func.func @contiguous_inner_most_dim_with_subview_scalable
+// CHECK-SAME:      %[[MEM:.*]]: memref<1000x1xf32>,
+// CHECK-SAME:      %[[IDX_1:.*]]: index, %[[IDX_2:.*]]: index,
+// CHECK-SAME:      %[[VEC:.*]]: vector<[4]x1xf32>) {
+// CHECK:           %[[SV_1:.*]] = memref.subview %[[MEM]]{{\[}}%[[IDX_1]], 0] [40, 1] [1, 1] : memref<1000x1xf32> to memref<40x1xf32, strided<[1, 1], offset: ?>>
+// CHECK:           %[[SV_2:.*]] = memref.subview %[[SV_1]][0, 0] [40, 1] [1, 1] : memref<40x1xf32, strided<[1, 1], offset: ?>> to memref<40xf32, strided<[1], offset: ?>>
+// CHECK:           %[[SC:.*]] = vector.shape_cast %[[VEC]] : vector<[4]x1xf32> to vector<[4]xf32>
+// CHECK:           vector.transfer_write %[[SC]], %[[SV_2]]{{\[}}%[[IDX_2]]] {in_bounds = [true]} : vector<[4]xf32>, memref<40xf32, strided<[1], offset: ?>>
+
+// -----
+
+func.func @contiguous_inner_most_dim_with_subview_2d(%A: memref<1000x1x1xf32>, %i:index, %ii:index, %vec: vector<4x1x1xf32>) {
+  %c0 = arith.constant 0 : index
+  %cst = arith.constant 0.0 : f32
+  %0 = memref.subview %A[%i, 0, 0] [40, 1, 1] [1, 1, 1] : memref<1000x1x1xf32> to memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>>
+  vector.transfer_write %vec, %0[%ii, %c0, %c0] {in_bounds = [true, true, true]} : vector<4x1x1xf32>, memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>>
+  return
+}
+// CHECK-LABEL:   func.func @contiguous_inner_most_dim_with_subview_2d(
+// CHECK-SAME:      %[[MEM:.*]]: memref<1000x1x1xf32>,
+// CHECK-SAME:      %[[IDX_1:.*]]: index, %[[IDX_2:.*]]: index,
+// CHECK-SAME:      %[[VEC:.*]]: vector<4x1x1xf32>) {
+// CHECK:           %[[SV_1:.*]] = memref.subview %[[MEM]]{{\[}}%[[IDX_1]], 0, 0] [40, 1, 1] [1, 1, 1] : memref<1000x1x1xf32> to memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>>
+// CHECK:           %[[SV_2:.*]] = memref.subview %[[SV_1]][0, 0, 0] [40, 1, 1] [1, 1, 1] : memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>> to memref<40xf32, strided<[1], offset: ?>>
+// CHECK:           %[[SC:.*]] = vector.shape_cast %[[VEC]] : vector<4x1x1xf32> to vector<4xf32>
+// CHECK:           vector.transfer_write %[[SC]], %[[SV_2]]{{\[}}%[[IDX_2]]] {in_bounds = [true]} : vector<4xf32>, memref<40xf32, strided<[1], offset: ?>>
+
+// Same as the top example within this split, but with the outer vector
+// dim scalable. Note that this example only makes sense when "4 = [4]" (i.e.
+// vscale = 1). This is assumed (implicitly) via the `in_bounds` attribute.
+
+func.func @contiguous_inner_most_dim_with_subview_2d_scalable(%A: memref<1000x1x1xf32>, %i:index, %ii:index, %vec: vector<[4]x1x1xf32>) {
+  %c0 = arith.constant 0 : index
+  %cst = arith.constant 0.0 : f32
+  %0 = memref.subview %A[%i, 0, 0] [40, 1, 1] [1, 1, 1] : memref<1000x1x1xf32> to memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>>
+  vector.transfer_write %vec, %0[%ii, %c0, %c0] {in_bounds = [true, true, true]} : vector<[4]x1x1xf32>, memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>>
+  return
+}
+// CHECK-LABEL:   func.func @contiguous_inner_most_dim_with_subview_2d_scalable
+// CHECK-SAME:      %[[MEM:.*]]: memref<1000x1x1xf32>,
+// CHECK-SAME:      %[[IDX_1:.*]]: index, %[[IDX_2:.*]]: index,
+// CHECK-SAME:      %[[VEC:.*]]: vector<[4]x1x1xf32>) {
+// CHECK:           %[[SV_1:.*]] = memref.subview %[[MEM]]{{\[}}%[[IDX_1]], 0, 0] [40, 1, 1] [1, 1, 1] : memref<1000x1x1xf32> to memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>>
+// CHECK:           %[[SV_2:.*]] = memref.subview %[[SV_1]][0, 0, 0] [40, 1, 1] [1, 1, 1] : memref<40x1x1xf32, strided<[1, 1, 1], offset: ?>> to memref<40xf32, strided<[1], offset: ?>>
+// CHECK:           %[[SC:.*]] = vector.shape_cast %[[VEC]] : vector<[4]x1x1xf32> to vector<[4]xf32>
+// CHECK:           vector.transfer_write %[[SC]], %[[SV_2]]{{\[}}%[[IDX_2]]] {in_bounds = [true]} : vector<[4]xf32>, memref<40xf32, strided<[1], offset: ?>>
+
 // -----
 
 func.func @drop_inner_most_dim(%arg0: memref<1x512x16x1xf32, strided<[8192, 16, 1, 1], offset: ?>>, %arg1: vector<1x16x16x1xf32>, %arg2: index) {
@@ -471,6 +566,30 @@ func.func @drop_inner_most_dim(%arg0: memref<1x512x16x1xf32, strided<[8192, 16,
 
 // -----
 
+// NOTE: This is an out-of-bounds access.
+
+func.func @negative_non_unit_inner_vec_dim(%arg0: memref<4x1xf32>, %vec: vector<4x8xf32>) {
+  %c0 = arith.constant 0 : index
+  vector.transfer_write %vec, %arg0[%c0, %c0] : vector<4x8xf32>, memref<4x1xf32>
+  return
+}
+//      CHECK: func.func @negative_non_unit_inner_vec_dim
+//  CHECK-NOT:   memref.subview
+//      CHECK:   vector.transfer_write
+
+// -----
+
+func.func @negative_non_unit_inner_memref_dim(%arg0: memref<4x8xf32>, %vec: vector<4x1xf32>) {
+  %c0 = arith.constant 0 : index
+  vector.transfer_write %vec, %arg0[%c0, %c0] : vector<4x1xf32>, memref<4x8xf32>
+  return
+}
+//      CHECK: func.func @negative_non_unit_inner_memref_dim
+//  CHECK-NOT:   memref.subview
+//      CHECK:   vector.transfer_write
+
+// -----
+
 func.func @non_unit_strides(%arg0: memref<512x16x1xf32, strided<[8192, 16, 4], offset: ?>>, %arg1: vector<16x16x1xf32>, %arg2: index) {
   %c0 = arith.constant 0 : index
   vector.transfer_write %arg1, %arg0[%arg2, %c0, %c0]

>From 1f5c0aa73629f4b94781b4e2885c2a50b51f8902 Mon Sep 17 00:00:00 2001
From: Andrzej Warzynski <andrzej.warzynski at arm.com>
Date: Mon, 15 Jul 2024 09:55:43 +0100
Subject: [PATCH 2/2] fixup! [mlir][vector] Update tests for collapse 5/n (nfc)

Move things around a bit, use consistent names.
---
 ...tor-transfer-collapse-inner-most-dims.mlir | 43 ++++++++++---------
 1 file changed, 23 insertions(+), 20 deletions(-)

diff --git a/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir b/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir
index 981c488c90283..beeb1362b7ab3 100644
--- a/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir
+++ b/mlir/test/Dialect/Vector/vector-transfer-collapse-inner-most-dims.mlir
@@ -438,36 +438,39 @@ func.func @contiguous_inner_most_dim_non_zero_idx_in_bounds(%arg0: memref<16x1xf
 // CHECK:           %[[SC:.*]] = vector.shape_cast %[[VEC]] : vector<8x1xf32> to vector<8xf32>
 // CHECK:           vector.transfer_write %[[SC]], %[[SV]]{{\[}}%[[IDX]]] {in_bounds = [true]} : vector<8xf32>, memref<16xf32, strided<[1]>>
 
-// The index to be dropped is unknown and "out of bounds" - not safe to
-// collapse.
-func.func @negative_contiguous_inner_most_dim_non_zero_idx_out_of_bounds(%arg0: memref<16x1xf32>, %arg1: vector<8x1xf32>, %i: index) {
-  vector.transfer_write %arg1, %arg0[%i, %i] {in_bounds = [true, false]} : vector<8x1xf32>, memref<16x1xf32>
-  return
-}
-// CHECK-LABEL: func @negative_contiguous_inner_most_dim_non_zero_idx_out_of_bounds
-// CHECK-NOT:     memref.subview
-// CHECK-NOT:     memref.shape_cast
-// CHECK:         vector.transfer_write
-
 // Same as the top example within this split, but with the outer vector
 // dim scalable. Note that this example only makes sense when "8 = [8]" (i.e.
-// vscale = 1). This is assumed (implicitly) via the `in_bounds` attribute.
+// vscale = 1). This is assumed via the `in_bounds` attribute.
 
-func.func @contiguous_inner_most_non_zero_idxs_scalable(%arg0: memref<16x1xf32>, %arg1: vector<[8]x1xf32>, %i: index) {
-  %c0 = arith.constant 0 : index
-  vector.transfer_write %arg1, %arg0[%i, %c0] {in_bounds = [true, true]} : vector<[8]x1xf32>, memref<16x1xf32>
+// TODO: Add a similar test for xfer_write
+
+func.func @contiguous_inner_most_non_zero_idx_in_bounds_scalable(%arg0: memref<16x1xf32>, %arg1: vector<[8]x1xf32>, %i: index) {
+  vector.transfer_write %arg1, %arg0[%i, %i] {in_bounds = [true, true]} : vector<[8]x1xf32>, memref<16x1xf32>
   return
 }
-// CHECK-LABEL:   func.func @contiguous_inner_most_non_zero_idxs_scalable(
+// CHECK-LABEL:   func.func @contiguous_inner_most_non_zero_idx_in_bounds_scalable(
 // CHECK-SAME:      %[[MEM:.*]]: memref<16x1xf32>,
-// CHECK-SAME:      %[[VEC:.*]]: vector<[8]x1xf32>,
+// CHECK-SAME:      %[[VEC:.*]]: vector<[8]x1xf32>
 // CHECK-SAME:      %[[IDX:.*]]: index) {
 // CHECK:           %[[SV:.*]] = memref.subview %[[MEM]][0, 0] [16, 1] [1, 1] : memref<16x1xf32> to memref<16xf32, strided<[1]>>
 // CHECK:           %[[SC:.*]] = vector.shape_cast %[[VEC]] : vector<[8]x1xf32> to vector<[8]xf32>
 // CHECK:           vector.transfer_write %[[SC]], %[[SV]]{{\[}}%[[IDX]]] {in_bounds = [true]} : vector<[8]xf32>, memref<16xf32, strided<[1]>>
 
+// The index to be dropped is unknown and "out of bounds" - not safe to
+// collapse.
+func.func @negative_contiguous_inner_most_dim_non_zero_idx_out_of_bounds(%arg0: memref<16x1xf32>, %arg1: vector<8x1xf32>, %i: index) {
+  vector.transfer_write %arg1, %arg0[%i, %i] {in_bounds = [true, false]} : vector<8x1xf32>, memref<16x1xf32>
+  return
+}
+// CHECK-LABEL: func @negative_contiguous_inner_most_dim_non_zero_idx_out_of_bounds
+// CHECK-NOT:     memref.subview
+// CHECK-NOT:     memref.shape_cast
+// CHECK:         vector.transfer_write
+
 // -----
 
+// Verify that the transformation does work even when the input is a "subview"
+
 func.func @contiguous_inner_most_dim_with_subview(%A: memref<1000x1xf32>, %i:index, %ii:index, %vec: vector<4x1xf32>) {
   %c0 = arith.constant 0 : index
   %cst = arith.constant 0.0 : f32
@@ -487,9 +490,9 @@ func.func @contiguous_inner_most_dim_with_subview(%A: memref<1000x1xf32>, %i:ind
 
 // Same as the top example within this split, but with the outer vector
 // dim scalable. Note that this example only makes sense when "4 = [4]" (i.e.
-// vscale = 1). This is assumed (implicitly) via the `in_bounds` attribute.
+// vscale = 1). This is assumed via the `in_bounds` attribute.
 
-func.func @contiguous_inner_most_dim_with_subview_scalable(%A: memref<1000x1xf32>, %i:index, %ii:index, %vec: vector<[4]x1xf32>) {
+func.func @contiguous_inner_most_dim_with_subview_scalable_inner_dim(%A: memref<1000x1xf32>, %i:index, %ii:index, %vec: vector<[4]x1xf32>) {
   %c0 = arith.constant 0 : index
   %cst = arith.constant 0.0 : f32
   %0 = memref.subview %A[%i, 0] [40, 1] [1, 1] : memref<1000x1xf32> to memref<40x1xf32, strided<[1, 1], offset: ?>>
@@ -497,7 +500,7 @@ func.func @contiguous_inner_most_dim_with_subview_scalable(%A: memref<1000x1xf32
   return
 }
 
-// CHECK-LABEL:   func.func @contiguous_inner_most_dim_with_subview_scalable
+// CHECK-LABEL:   func.func @contiguous_inner_most_dim_with_subview_scalable_inner_dim
 // CHECK-SAME:      %[[MEM:.*]]: memref<1000x1xf32>,
 // CHECK-SAME:      %[[IDX_1:.*]]: index, %[[IDX_2:.*]]: index,
 // CHECK-SAME:      %[[VEC:.*]]: vector<[4]x1xf32>) {