[Mlir-commits] [mlir] c8d1388 - [mlir][ArmSME] Add tests for Streaming SVE

Tue Apr 25 00:51:56 PDT 2023

Author: Cullen Rhodes
Date: 2023-04-25T07:51:43Z
New Revision: c8d1388e6c8bd57299d5801f170719218f735c4c

URL: https://github.com/llvm/llvm-project/commit/c8d1388e6c8bd57299d5801f170719218f735c4c
DIFF: https://github.com/llvm/llvm-project/commit/c8d1388e6c8bd57299d5801f170719218f735c4c.diff

LOG: [mlir][ArmSME] Add tests for Streaming SVE

This patch adds a couple of tests for targeting Arm Streaming SVE (SSVE)
mode, part of the Arm Scalable Matrix Extension (SME).

SSVE is enabled in the backend at the function boundary by specifying
the `aarch64_pstate_sm_enabled` attribute, as documented here [1]. SSVE
can be targeted from MLIR by specifying this in the passthrough
attributes [2] and compiling with

  -mattr=+sme,+sve -force-streaming-compatible-sve

The passthrough will propagate to the backend where `smstart/smstop`
will be emitted around the call to the SSVE function.

The set of legal instructions changes in SSVE,
`-force-streaming-compatible-sve` avoids the use of NEON entirely and
instead lowers to (streaming-compatible) SVE. The behaviour this flag
predicates will be hooked up to the function attribute in the future
such that simply specifying this (should) lead to correct
code-generation.

Two tests are added:

  * A basic LLVMIR test verifying the attribute is passed through.
  * An integration test calling a SSVE function.

The integration test can be run with QEMU.

[1] https://llvm.org/docs/AArch64SME.html
[2] https://mlir.llvm.org/docs/Dialects/LLVM/#attribute-pass-through

Reviewed By: awarzynski, aartbik

Differential Revision: https://reviews.llvm.org/D148111

Added: 
    mlir/test/Integration/Dialect/Vector/CPU/ArmSME/lit.local.cfg
    mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-ssve.mlir
    mlir/test/Target/LLVMIR/arm-ssve.mlir

Modified: 
    mlir/test/CMakeLists.txt
    mlir/test/Integration/Dialect/SparseTensor/CPU/lit.local.cfg
    mlir/test/Integration/Dialect/Vector/CPU/ArmSVE/lit.local.cfg
    mlir/test/lit.site.cfg.py.in

Removed: 
    


################################################################################
diff  --git a/mlir/test/CMakeLists.txt b/mlir/test/CMakeLists.txt
index f0fc972a4a257..048291bd5b8d8 100644

--- a/mlir/test/CMakeLists.txt
+++ b/mlir/test/CMakeLists.txt
@@ -29,6 +29,7 @@ if (MLIR_INCLUDE_INTEGRATION_TESTS)
   option(MLIR_RUN_CUDA_TENSOR_CORE_TESTS "Run CUDA Tensor core WMMA tests.")
   option(MLIR_RUN_CUDA_SM80_TESTS "Run CUDA A100 tests.")
   option(MLIR_RUN_ARM_SVE_TESTS "Run Arm SVE tests.")
+  option(MLIR_RUN_ARM_SME_TESTS "Run Arm SME tests.")
 
 
   # The native target may not be enabled when cross compiling, raise an error.
@@ -52,6 +53,7 @@ llvm_canonicalize_cmake_booleans(
   MLIR_RUN_CUDA_TENSOR_CORE_TESTS
   MLIR_RUN_X86VECTOR_TESTS
   MLIR_RUN_ARM_SVE_TESTS
+  MLIR_RUN_ARM_SME_TESTS
   MLIR_RUN_CUDA_SM80_TESTS
   )
 

diff  --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/lit.local.cfg b/mlir/test/Integration/Dialect/SparseTensor/CPU/lit.local.cfg
index b0a8a6c732441..b85fbff3ac9e0 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/lit.local.cfg
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/lit.local.cfg
@@ -1,4 +1,5 @@
 import sys
+from lit.llvm import llvm_config
 
 # FIXME: %mlir_native_utils_lib_dir is set incorrectly on Windows
 if sys.platform == 'win32':
@@ -18,6 +19,15 @@ if config.mlir_run_arm_sve_tests:
         config.substitutions.append(('%mlir_native_utils_lib_dir', config.mlir_lib_dir))
 
     if config.arm_emulator_executable:
+        if not config.arm_emulator_lli_executable:
+            # Top-level lit config adds llvm_tools_dir to PATH but this is lost
+            # when running under an emulator. If the user didn't specify an lli
+            # executable, use absolute path %llvm_tools_dir/lli.
+            # TODO(c-rhodes): This logic is duplicated across several Lit files
+            # and needs refactoring.
+            lli_cmd = llvm_config.use_llvm_tool('lli', search_env='LLI', required=True,
+                                                search_paths=[config.llvm_tools_dir],
+                                                use_installed=False)
         # Run test in emulator (qemu or armie).
         emulation_cmd = config.arm_emulator_executable
         if config.arm_emulator_options:

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/lit.local.cfg b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/lit.local.cfg
new file mode 100644
index 0000000000000..9de2b0b96e3b5
--- /dev/null
+++ b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/lit.local.cfg
@@ -0,0 +1,36 @@
+import sys
+from lit.llvm import llvm_config
+
+# ArmSME tests must be enabled via build flag.
+if not config.mlir_run_arm_sme_tests:
+    config.unsupported = True
+
+# No JIT on win32.
+if sys.platform == 'win32':
+    config.unsupported = True
+
+lli_cmd = 'lli'
+if config.arm_emulator_lli_executable:
+    lli_cmd = config.arm_emulator_lli_executable
+
+config.substitutions.append(('%mlir_native_utils_lib_dir',
+    config.arm_emulator_utils_lib_dir or config.mlir_lib_dir))
+
+if config.arm_emulator_executable:
+    if not config.arm_emulator_lli_executable:
+        # Top-level lit config adds llvm_tools_dir to PATH but this is lost
+        # when running under an emulator. If the user didn't specify an lli
+        # executable, use absolute path %llvm_tools_dir/lli.
+        # TODO(c-rhodes): This logic is duplicated across several Lit files and
+        # needs refactoring.
+        lli_cmd = llvm_config.use_llvm_tool('lli', search_env='LLI', required=True,
+                                            search_paths=[config.llvm_tools_dir],
+                                            use_installed=False)
+    # Run test in emulator (QEMU)
+    emulation_cmd = config.arm_emulator_executable
+    if config.arm_emulator_options:
+        emulation_cmd = emulation_cmd + ' ' + config.arm_emulator_options
+    emulation_cmd = emulation_cmd + ' ' + lli_cmd
+    config.substitutions.append(('%lli', emulation_cmd))
+else:
+    config.substitutions.append(('%lli', lli_cmd))

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-ssve.mlir b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-ssve.mlir
new file mode 100644
index 0000000000000..bf8741f556e53
--- /dev/null
+++ b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-ssve.mlir
@@ -0,0 +1,65 @@
+// RUN: mlir-opt %s -test-lower-to-llvm | \
+// RUN: mlir-translate -mlir-to-llvmir | \
+// RUN: %lli --march=aarch64 --mattr="+sve,+sme" \
+// RUN:      -force-streaming-compatible-sve \
+// RUN:      --entry-function=entry \
+// RUN:      --dlopen=%mlir_native_utils_lib_dir/libmlir_c_runner_utils%shlibext | \
+// RUN: FileCheck %s
+
+// NOTE: To run this test, your CPU must support SME.
+
+// VLA memcopy in streaming mode.
+func.func @streaming_kernel_copy(%src : memref<?xi64>, %dst : memref<?xi64>, %size : index) attributes {passthrough = ["aarch64_pstate_sm_enabled"]} {
+  %c0 = arith.constant 0 : index
+  %c2 = arith.constant 2 : index
+  %vscale = vector.vscale
+  %step = arith.muli %c2, %vscale : index
+  scf.for %i = %c0 to %size step %step {
+    %0 = vector.load %src[%i] : memref<?xi64>, vector<[2]xi64>
+    vector.store %0, %dst[%i] : memref<?xi64>, vector<[2]xi64>
+  }
+  return
+}
+
+func.func @entry() -> i32 {
+  %i0 = arith.constant 0: i64
+  %r0 = arith.constant 0: i32
+  %c0 = arith.constant 0: index
+  %c4 = arith.constant 4: index
+  %c32 = arith.constant 32: index
+
+  // Set up memory.
+  %a = memref.alloc()      : memref<32xi64>
+  %a_copy = memref.alloc() : memref<32xi64>
+  %a_data = arith.constant dense<[1 , 2,  3 , 4 , 5,  6,  7,  8,
+                                  9, 10, 11, 12, 13, 14, 15, 16,
+                                  17, 18, 19, 20, 21, 22, 23, 24,
+                                  25, 26, 27, 28, 29, 30, 31, 32]> : vector<32xi64>
+  vector.transfer_write %a_data, %a[%c0] : vector<32xi64>, memref<32xi64>
+
+  // Call kernel.
+  %0 = memref.cast %a : memref<32xi64> to memref<?xi64>
+  %1 = memref.cast %a_copy : memref<32xi64> to memref<?xi64>
+  call @streaming_kernel_copy(%0, %1, %c32) : (memref<?xi64>, memref<?xi64>, index) -> ()
+
+  // Print and verify.
+  //
+  // CHECK:      ( 1, 2, 3, 4 )
+  // CHECK-NEXT: ( 5, 6, 7, 8 )
+  // CHECK-NEXT: ( 9, 10, 11, 12 )
+  // CHECK-NEXT: ( 13, 14, 15, 16 )
+  // CHECK-NEXT: ( 17, 18, 19, 20 )
+  // CHECK-NEXT: ( 21, 22, 23, 24 )
+  // CHECK-NEXT: ( 25, 26, 27, 28 )
+  // CHECK-NEXT: ( 29, 30, 31, 32 )
+  scf.for %i = %c0 to %c32 step %c4 {
+    %cv = vector.transfer_read %a_copy[%i], %i0 : memref<32xi64>, vector<4xi64>
+    vector.print %cv : vector<4xi64>
+  }
+
+  // Release resources.
+  memref.dealloc %a      : memref<32xi64>
+  memref.dealloc %a_copy : memref<32xi64>
+
+  return %r0 : i32
+}

diff  --git a/mlir/test/Integration/Dialect/Vector/CPU/ArmSVE/lit.local.cfg b/mlir/test/Integration/Dialect/Vector/CPU/ArmSVE/lit.local.cfg
index 077904cc44df0..c637d6f3bb2c2 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/ArmSVE/lit.local.cfg
+++ b/mlir/test/Integration/Dialect/Vector/CPU/ArmSVE/lit.local.cfg
@@ -1,4 +1,5 @@
 import sys
+from lit.llvm import llvm_config
 
 # ArmSVE tests must be enabled via build flag.
 if not config.mlir_run_arm_sve_tests:
@@ -16,6 +17,15 @@ config.substitutions.append(('%mlir_native_utils_lib_dir',
     config.arm_emulator_utils_lib_dir or config.mlir_lib_dir))
 
 if config.arm_emulator_executable:
+    if not config.arm_emulator_lli_executable:
+        # Top-level lit config adds llvm_tools_dir to PATH but this is lost
+        # when running under an emulator. If the user didn't specify an lli
+        # executable, use absolute path %llvm_tools_dir/lli.
+        # TODO(c-rhodes): This logic is duplicated across several Lit files and
+        # needs refactoring.
+        lli_cmd = llvm_config.use_llvm_tool('lli', search_env='LLI', required=True,
+                                            search_paths=[config.llvm_tools_dir],
+                                            use_installed=False)
     # Run test in emulator (qemu or armie)
     emulation_cmd = config.arm_emulator_executable
     if config.arm_emulator_options:

diff  --git a/mlir/test/Target/LLVMIR/arm-ssve.mlir b/mlir/test/Target/LLVMIR/arm-ssve.mlir
new file mode 100644
index 0000000000000..91bf3e6daf517
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/arm-ssve.mlir
@@ -0,0 +1,11 @@
+// RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s
+
+// Attribute to enable streaming-mode.
+
+// CHECK-LABEL: @streaming_callee
+// CHECK: #[[ATTR:[0-9]*]]
+llvm.func @streaming_callee() attributes {passthrough = ["aarch64_pstate_sm_enabled"]} {
+  llvm.return
+}
+
+// CHECK: attributes #[[ATTR]] = { "aarch64_pstate_sm_enabled" }

diff  --git a/mlir/test/lit.site.cfg.py.in b/mlir/test/lit.site.cfg.py.in
index 1013dc8464adc..bf3f59b4c755e 100644
--- a/mlir/test/lit.site.cfg.py.in
+++ b/mlir/test/lit.site.cfg.py.in
@@ -36,6 +36,7 @@ config.enable_bindings_python = @MLIR_ENABLE_BINDINGS_PYTHON@
 config.intel_sde_executable = "@INTEL_SDE_EXECUTABLE@"
 config.mlir_run_amx_tests = @MLIR_RUN_AMX_TESTS@
 config.mlir_run_arm_sve_tests = @MLIR_RUN_ARM_SVE_TESTS@
+config.mlir_run_arm_sme_tests = @MLIR_RUN_ARM_SME_TESTS@
 config.mlir_run_x86vector_tests = @MLIR_RUN_X86VECTOR_TESTS@
 config.mlir_run_riscv_vector_tests = "@MLIR_RUN_RISCV_VECTOR_TESTS@"
 config.mlir_run_cuda_tensor_core_tests = @MLIR_RUN_CUDA_TENSOR_CORE_TESTS@