[Mlir-commits] [mlir] [mlir][ArmSME] Move tests out of `vector-ops-to-llvm.mlir` (PR #89905)

Wed Apr 24 02:58:50 PDT 2024

llvmbot wrote:




@llvm/pr-subscribers-mlir

Author: Benjamin Maxwell (MacDue)

<details>
<summary>Changes</summary>

These tests basically were integration tests as unit tests, checking too many passes at once to be useful, and brittle to any changes.

This patch moves these tests to the `vector -> ArmSME` conversion tests. The rest of the lowerings are already checked (e.g. in ArmSME to SCF tests).

---

Patch is 26.03 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/89905.diff


2 Files Affected:

- (modified) mlir/test/Conversion/VectorToArmSME/vector-to-arm-sme.mlir (+216) 
- (modified) mlir/test/Dialect/ArmSME/vector-ops-to-llvm.mlir (-357) 


``````````diff

diff --git a/mlir/test/Conversion/VectorToArmSME/vector-to-arm-sme.mlir b/mlir/test/Conversion/VectorToArmSME/vector-to-arm-sme.mlir
index d3f02c6288a240..9368a52729e54f 100644
--- a/mlir/test/Conversion/VectorToArmSME/vector-to-arm-sme.mlir
+++ b/mlir/test/Conversion/VectorToArmSME/vector-to-arm-sme.mlir
@@ -620,3 +620,219 @@ func.func @vector_print_tile(%tile: vector<[4]x[4]xf32>)
 // CHECK-NEXT:      scf.for %[[TILE_SLICE_INDEX:.*]] = %[[C0]] to %[[NUM_TILE_SLICES]] step %[[C1]] {
 // CHECK-NEXT:        %[[TILE_SLICE:.*]] = arm_sme.move_tile_slice_to_vector %[[TILE]][%[[TILE_SLICE_INDEX]]] : vector<[4]xf32> from vector<[4]x[4]xf32>
 // CHECK-NEXT:        vector.print %[[TILE_SLICE]] : vector<[4]xf32>
+
+//===----------------------------------------------------------------------===//
+// vector.load
+//===----------------------------------------------------------------------===//
+
+// -----
+
+// CHECK-LABEL: @vector_load_i8_with_offset(
+// CHECK-SAME:                              %[[MEMREF:.*]]: memref<?x?xi8>)
+// CHECK: %[[C0:.*]] = arith.constant 0 : index
+// CHECK: %[[C123:.*]] = arith.constant 123 : index
+// CHECK: arm_sme.tile_load %[[MEMREF]][%[[C123]], %[[C0]]] : memref<?x?xi8>, vector<[16]x[16]xi8>
+func.func @vector_load_i8_with_offset(%arg0 : memref<?x?xi8>) -> vector<[16]x[16]xi8> {
+  %c0 = arith.constant 0 : index
+  %c123 = arith.constant 123 : index
+  %tile = vector.load %arg0[%c123, %c0] : memref<?x?xi8>, vector<[16]x[16]xi8>
+  return %tile : vector<[16]x[16]xi8>
+}
+
+// -----
+
+// CHECK-LABEL: @vector_load_i8_from_rank_1_memref(
+// CHECK-SAME:                                     %[[MEMREF:.*]]: memref<?xi8>)
+// CHECK: %[[C0:.*]] = arith.constant 0 : index
+// CHECK: arm_sme.tile_load %[[MEMREF]][%[[C0]]] : memref<?xi8>, vector<[16]x[16]xi8>
+func.func @vector_load_i8_from_rank_1_memref(%arg0 : memref<?xi8>) -> vector<[16]x[16]xi8> {
+  %c0 = arith.constant 0 : index
+  %tile = vector.load %arg0[%c0] : memref<?xi8>, vector<[16]x[16]xi8>
+  return %tile : vector<[16]x[16]xi8>
+}
+
+// -----
+
+// CHECK-LABEL: @vector_load_i16(
+// CHECK: arm_sme.tile_load {{.*}} : memref<?x?xi16>, vector<[8]x[8]xi16>
+func.func @vector_load_i16(%arg0 : memref<?x?xi16>) -> vector<[8]x[8]xi16> {
+  %c0 = arith.constant 0 : index
+  %tile = vector.load %arg0[%c0, %c0] : memref<?x?xi16>, vector<[8]x[8]xi16>
+  return %tile : vector<[8]x[8]xi16>
+}
+
+// -----
+
+// CHECK-LABEL: @vector_load_i32(
+// CHECK: arm_sme.tile_load {{.*}} : memref<?x?xi32>, vector<[4]x[4]xi32>
+func.func @vector_load_i32(%arg0 : memref<?x?xi32>) -> vector<[4]x[4]xi32> {
+  %c0 = arith.constant 0 : index
+  %tile = vector.load %arg0[%c0, %c0] : memref<?x?xi32>, vector<[4]x[4]xi32>
+  return %tile : vector<[4]x[4]xi32>
+}
+
+// -----
+
+// CHECK-LABEL: @vector_load_i64(
+// CHECK: arm_sme.tile_load {{.*}} : memref<?x?xi64>, vector<[2]x[2]xi64>
+func.func @vector_load_i64(%arg0 : memref<?x?xi64>) -> vector<[2]x[2]xi64> {
+  %c0 = arith.constant 0 : index
+  %tile = vector.load %arg0[%c0, %c0] : memref<?x?xi64>, vector<[2]x[2]xi64>
+  return %tile : vector<[2]x[2]xi64>
+}
+
+// -----
+
+// CHECK-LABEL: @vector_load_f16(
+// CHECK: arm_sme.tile_load {{.*}} : memref<?x?xf16>, vector<[8]x[8]xf16>
+func.func @vector_load_f16(%arg0 : memref<?x?xf16>) -> vector<[8]x[8]xf16> {
+  %c0 = arith.constant 0 : index
+  %tile = vector.load %arg0[%c0, %c0] : memref<?x?xf16>, vector<[8]x[8]xf16>
+  return %tile : vector<[8]x[8]xf16>
+}
+
+// -----
+
+// CHECK-LABEL: @vector_load_bf16(
+// CHECK: arm_sme.tile_load {{.*}} : memref<?x?xbf16>, vector<[8]x[8]xbf16>
+func.func @vector_load_bf16(%arg0 : memref<?x?xbf16>) -> vector<[8]x[8]xbf16> {
+  %c0 = arith.constant 0 : index
+  %tile = vector.load %arg0[%c0, %c0] : memref<?x?xbf16>, vector<[8]x[8]xbf16>
+  return %tile : vector<[8]x[8]xbf16>
+}
+
+// -----
+
+// CHECK-LABEL: @vector_load_f32(
+// CHECK: arm_sme.tile_load {{.*}} : memref<?x?xf32>, vector<[4]x[4]xf32>
+func.func @vector_load_f32(%arg0 : memref<?x?xf32>) -> vector<[4]x[4]xf32> {
+  %c0 = arith.constant 0 : index
+  %tile = vector.load %arg0[%c0, %c0] : memref<?x?xf32>, vector<[4]x[4]xf32>
+  return %tile : vector<[4]x[4]xf32>
+}
+
+// -----
+
+// CHECK-LABEL: @vector_load_f64(
+// CHECK: arm_sme.tile_load {{.*}} : memref<?x?xf64>, vector<[2]x[2]xf64>
+func.func @vector_load_f64(%arg0 : memref<?x?xf64>) -> vector<[2]x[2]xf64> {
+  %c0 = arith.constant 0 : index
+  %tile = vector.load %arg0[%c0, %c0] : memref<?x?xf64>, vector<[2]x[2]xf64>
+  return %tile : vector<[2]x[2]xf64>
+}
+
+// -----
+
+// CHECK-LABEL: @vector_load_i128(
+// CHECK: arm_sme.tile_load {{.*}} : memref<?x?xi128>, vector<[1]x[1]xi128>
+func.func @vector_load_i128(%arg0 : memref<?x?xi128>) -> vector<[1]x[1]xi128> {
+  %c0 = arith.constant 0 : index
+  %tile = vector.load %arg0[%c0, %c0] : memref<?x?xi128>, vector<[1]x[1]xi128>
+  return %tile : vector<[1]x[1]xi128>
+}
+
+
+//===----------------------------------------------------------------------===//
+// vector.store
+//===----------------------------------------------------------------------===//
+
+// -----
+
+// CHECK-LABEL: @vector_store_i8(
+// CHECK-SAME:                   %[[MEMREF:.*]]: memref<?x?xi8>) {
+// CHECK: %[[C0:.*]] = arith.constant 0 : index
+// CHECK: %[[TILE:.*]] = arm_sme.get_tile : vector<[16]x[16]xi8>
+// CHECK: arm_sme.tile_store %[[TILE]], %[[MEMREF]][%[[C0]], %[[C0]]] : memref<?x?xi8>, vector<[16]x[16]xi8>
+func.func @vector_store_i8(%arg0 : memref<?x?xi8>) {
+  %c0 = arith.constant 0 : index
+  %tile = arm_sme.get_tile : vector<[16]x[16]xi8>
+  vector.store %tile, %arg0[%c0, %c0] : memref<?x?xi8>, vector<[16]x[16]xi8>
+  return
+}
+
+// -----
+
+// CHECK-LABEL: @vector_store_i16
+// CHECK: arm_sme.tile_store {{.*}} : memref<?x?xi16>, vector<[8]x[8]xi16>
+func.func @vector_store_i16(%arg0 : memref<?x?xi16>) {
+  %c0 = arith.constant 0 : index
+  %tile = arm_sme.get_tile : vector<[8]x[8]xi16>
+  vector.store %tile, %arg0[%c0, %c0] : memref<?x?xi16>, vector<[8]x[8]xi16>
+  return
+}
+
+// -----
+
+// CHECK-LABEL: @vector_store_i32
+// CHECK: arm_sme.tile_store {{.*}} : memref<?x?xi32>, vector<[4]x[4]xi32>
+func.func @vector_store_i32(%arg0 : memref<?x?xi32>) {
+  %c0 = arith.constant 0 : index
+  %tile = arm_sme.get_tile : vector<[4]x[4]xi32>
+  vector.store %tile, %arg0[%c0, %c0] : memref<?x?xi32>, vector<[4]x[4]xi32>
+  return
+}
+
+// -----
+
+// CHECK-LABEL: @vector_store_i64
+// CHECK: arm_sme.tile_store {{.*}} : memref<?x?xi64>, vector<[2]x[2]xi64>
+func.func @vector_store_i64(%arg0 : memref<?x?xi64>) {
+  %c0 = arith.constant 0 : index
+  %tile = arm_sme.get_tile : vector<[2]x[2]xi64>
+  vector.store %tile, %arg0[%c0, %c0] : memref<?x?xi64>, vector<[2]x[2]xi64>
+  return
+}
+
+// -----
+
+// CHECK-LABEL: @vector_store_f16
+// CHECK: arm_sme.tile_store {{.*}} : memref<?x?xf16>, vector<[8]x[8]xf16>
+func.func @vector_store_f16(%arg0 : memref<?x?xf16>) {
+  %c0 = arith.constant 0 : index
+  %tile = arm_sme.get_tile : vector<[8]x[8]xf16>
+  vector.store %tile, %arg0[%c0, %c0] : memref<?x?xf16>, vector<[8]x[8]xf16>
+  return
+}
+
+// -----
+
+// CHECK-LABEL: @vector_store_bf16
+// CHECK: arm_sme.tile_store {{.*}} : memref<?x?xbf16>, vector<[8]x[8]xbf16>
+func.func @vector_store_bf16(%arg0 : memref<?x?xbf16>) {
+  %c0 = arith.constant 0 : index
+  %tile = arm_sme.get_tile : vector<[8]x[8]xbf16>
+  vector.store %tile, %arg0[%c0, %c0] : memref<?x?xbf16>, vector<[8]x[8]xbf16>
+  return
+}
+// -----
+
+// CHECK-LABEL: @vector_store_f32
+// CHECK: arm_sme.tile_store {{.*}} : memref<?x?xf32>, vector<[4]x[4]xf32>
+func.func @vector_store_f32(%arg0 : memref<?x?xf32>) {
+  %c0 = arith.constant 0 : index
+  %tile = arm_sme.get_tile : vector<[4]x[4]xf32>
+  vector.store %tile, %arg0[%c0, %c0] : memref<?x?xf32>, vector<[4]x[4]xf32>
+  return
+}
+
+// -----
+
+// CHECK-LABEL: @vector_store_f64
+// CHECK: arm_sme.tile_store {{.*}} : memref<?x?xf64>, vector<[2]x[2]xf64>
+func.func @vector_store_f64(%arg0 : memref<?x?xf64>) {
+  %c0 = arith.constant 0 : index
+  %tile = arm_sme.get_tile : vector<[2]x[2]xf64>
+  vector.store %tile, %arg0[%c0, %c0] : memref<?x?xf64>, vector<[2]x[2]xf64>
+  return
+}
+
+// -----
+
+// CHECK-LABEL: @vector_store_i128
+// CHECK: arm_sme.tile_store {{.*}} : memref<?x?xi128>, vector<[1]x[1]xi128>
+func.func @vector_store_i128(%arg0 : memref<?x?xi128>) {
+  %c0 = arith.constant 0 : index
+  %tile = arm_sme.get_tile : vector<[1]x[1]xi128>
+  vector.store %tile, %arg0[%c0, %c0] : memref<?x?xi128>, vector<[1]x[1]xi128>
+  return
+}
diff --git a/mlir/test/Dialect/ArmSME/vector-ops-to-llvm.mlir b/mlir/test/Dialect/ArmSME/vector-ops-to-llvm.mlir
index 17a070999c20a0..9c07f2d0f655cd 100644
--- a/mlir/test/Dialect/ArmSME/vector-ops-to-llvm.mlir
+++ b/mlir/test/Dialect/ArmSME/vector-ops-to-llvm.mlir
@@ -1,362 +1,5 @@
 // RUN: mlir-opt %s -convert-vector-to-arm-sme -convert-arith-to-arm-sme -allocate-arm-sme-tiles -convert-arm-sme-to-scf -convert-arm-sme-to-llvm -cse -canonicalize -split-input-file -allow-unregistered-dialect -verify-diagnostics | FileCheck %s
 
-//===----------------------------------------------------------------------===//
-// vector.transfer_write
-//===----------------------------------------------------------------------===//
-
-// CHECK-LABEL: @transfer_write_2d_zero_i8(
-// CHECK-SAME:                             %[[ARG0:.*]]: memref<?x?xi8>)
-// CHECK-DAG:  %[[MEM_DESC:.*]] = builtin.unrealized_conversion_cast %[[ARG0]] : memref<?x?xi8> to !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
-// CHECK-DAG:  %[[C0:.*]] = arith.constant 0 : index
-// CHECK-DAG:  %[[C1:.*]] = arith.constant 1 : index
-// CHECK-DAG:  %[[MIN_SVL_B:.*]] = arith.constant 16 : index
-// CHECK-DAG:  %[[PTRUE_ALL:.*]] = arith.constant dense<true> : vector<[16]xi1>
-// CHECK-DAG:  %[[C0_I64:.*]] = builtin.unrealized_conversion_cast %[[C0]] : index to i64
-// CHECK-DAG:  "arm_sme.intr.zero"() <{tile_mask = 255 : i32}> : () -> ()
-// CHECK-DAG:  %[[VSCALE:.*]] = vector.vscale
-// CHECK-NEXT: %[[SVL_B:.*]] = arith.muli %[[VSCALE]], %[[MIN_SVL_B]] : index
-// CHECK-NEXT: scf.for %[[TILE_SLICE:.*]] = %[[C0]] to %[[SVL_B]] step %[[C1]] {
-// CHECK:        %[[TILE_SLICE_I64:.*]] = builtin.unrealized_conversion_cast %[[TILE_SLICE]] : index to i64
-// CHECK-NEXT:   %[[ALIGNED_BASE:.*]] = llvm.extractvalue %[[MEM_DESC]][1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
-// CHECK-NEXT:   %[[STRIDE0:.*]] = llvm.extractvalue %[[MEM_DESC]][4, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
-// CHECK-NEXT:   %[[OFF0:.*]] = llvm.mul %[[TILE_SLICE_I64]], %[[STRIDE0]]  : i64
-// CHECK-NEXT:   %[[OFF1:.*]] = llvm.add %[[OFF0]], %[[C0_I64]]  : i64
-// CHECK-NEXT:   %[[GEP:.*]] = llvm.getelementptr %[[ALIGNED_BASE]]{{\[}}%[[OFF1]]] : (!llvm.ptr, i64) -> !llvm.ptr, i8
-// CHECK-NEXT:   %[[TILE_SLICE_I32:.*]] = arith.index_castui %[[TILE_SLICE]] : index to i32
-// CHECK-NEXT:   "arm_sme.intr.st1b.horiz"(%[[PTRUE_ALL]], %[[GEP]], %[[TILE_SLICE_I32]]) <{tile_id = 0 : i32}> : (vector<[16]xi1>, !llvm.ptr, i32) -> ()
-func.func @transfer_write_2d_zero_i8(%arg0 : memref<?x?xi8>) {
-  %c0 = arith.constant 0 : index
-  %cst = arith.constant dense<0> : vector<[16]x[16]xi8>
-  vector.transfer_write %cst, %arg0[%c0, %c0] {in_bounds = [true, true]} : vector<[16]x[16]xi8>, memref<?x?xi8>
-  return
-}
-
-//===----------------------------------------------------------------------===//
-// vector.load
-//===----------------------------------------------------------------------===//
-
-// -----
-
-// Load an 8-bit tile from a rank 2 memref with a non-zero offset for the first
-// memref index. This verifies the offset is preserved when materializing the
-// loop of tile slice loads.
-
-// CHECK-LABEL: @vector_load_i8_with_offset(
-// CHECK-SAME:                              %[[ARG0:.*]]: memref<?x?xi8>)
-// CHECK-DAG:  %[[MEM_DESC:.*]] = builtin.unrealized_conversion_cast %[[ARG0]] : memref<?x?xi8> to !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
-// CHECK-DAG:  %[[C0:.*]] = arith.constant 0 : index
-// CHECK-DAG:  %[[C1:.*]] = arith.constant 1 : index
-// CHECK-DAG:  %[[C123:.*]] = arith.constant 123 : index
-// CHECK-DAG:  %[[MIN_SVL_B:.*]] = arith.constant 16 : index
-// CHECK-DAG:  %[[PTRUE_ALL:.*]] = arith.constant dense<true> : vector<[16]xi1>
-// CHECK-DAG:  %[[C0_I64:.*]] = builtin.unrealized_conversion_cast %[[C0]] : index to i64
-// CHECK-DAG:  %[[VSCALE:.*]] = vector.vscale
-// CHECK-NEXT: %[[SVL_B:.*]] = arith.muli %[[VSCALE]], %[[MIN_SVL_B]] : index
-// CHECK-NEXT: scf.for %[[TILE_SLICE:.*]] = %[[C0]] to %[[SVL_B]] step %[[C1]] {
-// CHECK-NEXT:   %[[TILE_SLICE_PLUS_OFF0:.*]] = arith.addi %[[TILE_SLICE]], %[[C123]] : index
-// CHECK-NEXT:   %[[TILE_SLICE_PLUS_OFF0_I64:.*]] = builtin.unrealized_conversion_cast %[[TILE_SLICE_PLUS_OFF0]] : index to i64
-// CHECK-NEXT:   %[[ALIGNED_BASE:.*]] = llvm.extractvalue %[[MEM_DESC]][1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
-// CHECK-NEXT:   %[[STRIDE0:.*]] = llvm.extractvalue %[[MEM_DESC]][4, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
-// CHECK-NEXT:   %[[OFF0:.*]] = llvm.mul %[[TILE_SLICE_PLUS_OFF0_I64]], %[[STRIDE0]]  : i64
-// CHECK-NEXT:   %[[OFF1:.*]] = llvm.add %[[OFF0]], %[[C0_I64]]  : i64
-// CHECK-NEXT:   %[[GEP:.*]] = llvm.getelementptr %[[ALIGNED_BASE]]{{\[}}%[[OFF1]]] : (!llvm.ptr, i64) -> !llvm.ptr, i8
-// CHECK-NEXT:   %[[TILE_SLICE_I32:.*]] = arith.index_castui %[[TILE_SLICE]] : index to i32
-// CHECK-NEXT:   "arm_sme.intr.ld1b.horiz"(%[[PTRUE_ALL]], %[[GEP]], %[[TILE_SLICE_I32]]) <{tile_id = 0 : i32}> : (vector<[16]xi1>, !llvm.ptr, i32) -> ()
-// CHECK-NEXT: }
-func.func @vector_load_i8_with_offset(%arg0 : memref<?x?xi8>) -> vector<[16]x[16]xi8> {
-  %c0 = arith.constant 0 : index
-  %c123 = arith.constant 123 : index
-  %tile = vector.load %arg0[%c123, %c0] : memref<?x?xi8>, vector<[16]x[16]xi8>
-  return %tile : vector<[16]x[16]xi8>
-}
-
-// -----
-
-// CHECK-LABEL: @vector_load_i8_from_rank_1_memref(
-// CHECK-SAME:                                     %[[ARG0:.*]]: memref<?xi8>)
-// CHECK-DAG:  %[[MEM_DESC:.*]] = builtin.unrealized_conversion_cast %[[ARG0]] : memref<?xi8> to !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>
-// CHECK-DAG:  %[[C0:.*]] = arith.constant 0 : index
-// CHECK-DAG:  %[[C1:.*]] = arith.constant 1 : index
-// CHECK-DAG:  %[[MIN_SVL_B:.*]] = arith.constant 16 : index
-// CHECK-DAG:  %[[PTRUE_ALL:.*]] = arith.constant dense<true> : vector<[16]xi1>
-// CHECK-DAG:  %[[VSCALE:.*]] = vector.vscale
-// CHECK-NEXT: %[[SVL_B:.*]] = arith.muli %[[VSCALE]], %[[MIN_SVL_B]] : index
-// CHECK-NEXT: scf.for %[[TILE_SLICE:.*]] = %[[C0]] to %[[SVL_B]] step %[[C1]] {
-// CHECK-NEXT:   %[[TILE_SLICE_IDX:.*]] = arith.muli %[[TILE_SLICE]], %[[SVL_B]] : index
-// CHECK-NEXT:   %[[TILE_SLICE_IDX_I64:.*]] = builtin.unrealized_conversion_cast %[[TILE_SLICE_IDX]] : index to i64
-// CHECK-NEXT:   %[[ALIGNED_BASE:.*]] = llvm.extractvalue %[[MEM_DESC]][1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>
-// CHECK-NEXT:   %[[GEP:.*]] = llvm.getelementptr %[[ALIGNED_BASE]]{{\[}}%[[TILE_SLICE_IDX_I64]]] : (!llvm.ptr, i64) -> !llvm.ptr, i8
-// CHECK-NEXT:   %[[TILE_SLICE_I32:.*]] = arith.index_castui %[[TILE_SLICE]] : index to i32
-// CHECK-NEXT:   "arm_sme.intr.ld1b.horiz"(%[[PTRUE_ALL]], %[[GEP]], %[[TILE_SLICE_I32]]) <{tile_id = 0 : i32}> : (vector<[16]xi1>, !llvm.ptr, i32) -> ()
-// CHECK-NEXT: }
-func.func @vector_load_i8_from_rank_1_memref(%arg0 : memref<?xi8>) -> vector<[16]x[16]xi8> {
-  %c0 = arith.constant 0 : index
-  %tile = vector.load %arg0[%c0] : memref<?xi8>, vector<[16]x[16]xi8>
-  return %tile : vector<[16]x[16]xi8>
-}
-
-
-// -----
-
-// CHECK-LABEL: @vector_load_i16(
-// CHECK-SAME:                   %[[ARG0:.*]]: memref<?x?xi16>)
-// CHECK-DAG: %[[MIN_SVL_H:.*]] = arith.constant 8 : index
-// CHECK:     %[[SVL_H:.*]] = arith.muli %{{.*}}, %[[MIN_SVL_H]] : index
-// CHECK:       arm_sme.intr.ld1h.horiz
-// CHECK-SAME:  tile_id = 0
-func.func @vector_load_i16(%arg0 : memref<?x?xi16>) -> vector<[8]x[8]xi16> {
-  %c0 = arith.constant 0 : index
-  %tile = vector.load %arg0[%c0, %c0] : memref<?x?xi16>, vector<[8]x[8]xi16>
-  return %tile : vector<[8]x[8]xi16>
-}
-
-// -----
-
-// CHECK-LABEL: @vector_load_i32(
-// CHECK-SAME:                   %[[ARG0:.*]]: memref<?x?xi32>)
-// CHECK-DAG: %[[MIN_SVL_S:.*]] = arith.constant 4 : index
-// CHECK:     %[[SVL_S:.*]] = arith.muli %{{.*}}, %[[MIN_SVL_S]] : index
-// CHECK:       arm_sme.intr.ld1w.horiz
-// CHECK-SAME:  tile_id = 0
-func.func @vector_load_i32(%arg0 : memref<?x?xi32>) -> vector<[4]x[4]xi32> {
-  %c0 = arith.constant 0 : index
-  %tile = vector.load %arg0[%c0, %c0] : memref<?x?xi32>, vector<[4]x[4]xi32>
-  return %tile : vector<[4]x[4]xi32>
-}
-
-// -----
-
-// CHECK-LABEL: @vector_load_i64(
-// CHECK-SAME:                   %[[ARG0:.*]]: memref<?x?xi64>)
-// CHECK-DAG: %[[MIN_SVL_D:.*]] = arith.constant 2 : index
-// CHECK:     %[[SVL_D:.*]] = arith.muli %{{.*}}, %[[MIN_SVL_D]] : index
-// CHECK:       arm_sme.intr.ld1d.horiz
-// CHECK-SAME:  tile_id = 0
-func.func @vector_load_i64(%arg0 : memref<?x?xi64>) -> vector<[2]x[2]xi64> {
-  %c0 = arith.constant 0 : index
-  %tile = vector.load %arg0[%c0, %c0] : memref<?x?xi64>, vector<[2]x[2]xi64>
-  return %tile : vector<[2]x[2]xi64>
-}
-
-// -----
-
-// CHECK-LABEL: @vector_load_f16(
-// CHECK-SAME:                   %[[ARG0:.*]]: memref<?x?xf16>)
-// CHECK-DAG: %[[MIN_SVL_H:.*]] = arith.constant 8 : index
-// CHECK:     %[[SVL_H:.*]] = arith.muli %{{.*}}, %[[MIN_SVL_H]] : index
-// CHECK:       arm_sme.intr.ld1h.horiz
-// CHECK-SAME:  tile_id = 0
-func.func @vector_load_f16(%arg0 : memref<?x?xf16>) -> vector<[8]x[8]xf16> {
-  %c0 = arith.constant 0 : index
-  %tile = vector.load %arg0[%c0, %c0] : memref<?x?xf16>, vector<[8]x[8]xf16>
-  return %tile : vector<[8]x[8]xf16>
-}
-
-// -----
-
-// CHECK-LABEL: @vector_load_bf16(
-// CHECK-SAME:                    %[[ARG0:.*]]: memref<?x?xbf16>)
-// CHECK-DAG: %[[MIN_SVL_H:.*]] = arith.constant 8 : index
-// CHECK:     %[[SVL_H:.*]] = arith.muli %{{.*}}, %[[MIN_SVL_H]] : index
-// CHECK:       arm_sme.intr.ld1h.horiz
-// CHECK-SAME:  tile_id = 0
-func.func @vector_load_bf16(%arg0 : memref<?x?xbf16>) -> vector<[8]x[8]xbf16> {
-  %c0 = arith.constant 0 : index
-  %tile = vector.load %arg0[%c0, %c0] : memref<?x?xbf16>, vector<[8]x[8]xbf16>
-  return %tile : vector<[8]x[8]xbf16>
-}
-
-// -----
-
-// CHECK-LABEL: @vector_load_f32(
-// CHECK-SAME:                   %[[ARG0:.*]]: memref<?x?xf32>)
-// CHECK-DAG: %[[MIN_SVL_S:.*]] = arith.constant 4 : index
-// CHECK:     %[[SVL_S:.*]] = arith.muli %{{.*}}, %[[MIN_SVL_S]] : index
-// CHECK:       arm_sme.intr.ld1w.horiz
-// CHECK-SAME:  tile_id = 0
-func.func @vector_load_f32(%arg0 : memref<?x?xf32>) -> vector<[4]x[4]xf32> {
-  %c0 = arith.constant 0 : index
-  %tile = vector.load %arg0[%c0, %c0] : memref<?x?xf32>, vector<[4]x[4]xf32>
-  return %tile : vector<[4]x[4]xf32>
-}
-
-// -----
-
-// CHECK-LABEL: @vector_load_f64(
-// CHECK-SAME:                   %[[ARG0:.*]]: memref<?x?xf64>)
-// CHECK-DAG: %[[MIN_SVL_D:.*]] = arith.constant 2 : index
-// CHECK:     %[[SVL_D:.*]] = arith.muli %{{.*}}, %[[MIN_SVL_D]] : index
-// CHECK:       arm_sme.intr.ld1d.horiz
-// CHECK-SAME:  tile_id = 0
-func.func @vector_load_f64(%arg0 : memref<?x?xf64>) -> vector<[2]x[2]xf64> {
-  %c0 = arith.constant 0 : index
-  %tile = vector.load %arg0[%c0, %c0] : memref<?x?xf64>, vector<[2]x[2]xf64>
-  return %tile : vector<[2]x[2]xf64>
-}
-
-// -----
-
-// CHECK-LABEL: @vector_load_i128(
-// CHECK-SAME:                    %[[ARG0:.*]]: memref<?x?xi128>)
-// CHECK:       arm_sme.intr.ld1q.horiz
-// CHECK-SAME:  tile_id = 0
-func.func @vector_load_i128(%arg0 : memref<?x?xi128>) -> vector<[1]x[1]xi128> {
-  %c0 = arith.constant 0 : index
-  %tile = vector.load %arg0[%c0, %c0] : memref<?x?xi128>, vector<[1]x[1]xi128>
-  return %tile : vector<[1]x[1]xi128>
-}
-
-//===----------------------------------------------------------------------===//
-// vector.store
-//===----------------------------------------------------------------------===//
-
-// -----
-
-// CHECK-LABEL: @vector_store_i8(
-// CHECK-SAME:                   %[[ARG0:.*]]: memref<?x?xi8>)
-// CHECK-DAG:  %[[MEM_DESC:.*]] = builtin.unrealized_conversion_cast %[[ARG0]] : memref<?x?xi8> to !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
-// CHECK-DAG:  %[[C0:.*]] = ...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/89905