[Mlir-commits] [mlir] 783ac3b - [mlir][ArmSME] Make use of backend function attributes for enabling ZA storage (#71044)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Tue Nov 14 04:50:42 PST 2023
Author: Benjamin Maxwell
Date: 2023-11-14T12:50:38Z
New Revision: 783ac3b6fb70ce88182a4dee1db0d3f5fb93953c
URL: https://github.com/llvm/llvm-project/commit/783ac3b6fb70ce88182a4dee1db0d3f5fb93953c
DIFF: https://github.com/llvm/llvm-project/commit/783ac3b6fb70ce88182a4dee1db0d3f5fb93953c.diff
LOG: [mlir][ArmSME] Make use of backend function attributes for enabling ZA storage (#71044)
Previously, we were inserting za.enable/disable intrinsics for functions
with the "arm_za" attribute (at the MLIR level), rather than using the
backend attributes. This was done to avoid a dependency on the SME ABI
functions from compiler-rt (which have only recently been implemented).
Doing things this way did have correctness issues, for example, calling
a streaming-mode function from another streaming-mode function (both
with ZA enabled) would lead to ZA being disabled after returning to the
caller (where it should still be enabled). Fixing issues like this would
require re-doing the ABI work already done in the backend within MLIR.
Instead, this patch switches to use the "arm_new_za" (backend) attribute
for enabling ZA for an MLIR function. For the integration tests, this
requires some way of linking the SME ABI functions. This is done via the
`%arm_sme_abi_shlib` lit substitution. By default, this expands to a
stub implementation of the SME ABI functions, but this can be overridden
by providing the `ARM_SME_ABI_ROUTINES_SHLIB` CMake cache variable
(pointing it at an alternative implementation). For now, the ArmSME
integration tests pass with just stubs, as we don't make use of nested
ZA-enabled calls.
A future patch may add an option to compiler-rt to build the SME
builtins into a standalone shared library to allow easily
building/testing with the actual implementation.
Added:
mlir/lib/ExecutionEngine/ArmSMEStubs.cpp
Modified:
mlir/include/mlir/Dialect/ArmSME/IR/ArmSMEIntrinsicOps.td
mlir/include/mlir/Dialect/ArmSME/Transforms/CMakeLists.txt
mlir/include/mlir/Dialect/ArmSME/Transforms/Passes.h
mlir/include/mlir/Dialect/ArmSME/Transforms/Passes.td
mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
mlir/lib/Dialect/ArmSME/Transforms/EnableArmStreaming.cpp
mlir/lib/Dialect/ArmSME/Transforms/LegalizeForLLVMExport.cpp
mlir/lib/ExecutionEngine/CMakeLists.txt
mlir/lib/Target/LLVMIR/ModuleImport.cpp
mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
mlir/test/CMakeLists.txt
mlir/test/Dialect/ArmSME/enable-arm-streaming.mlir
mlir/test/Dialect/ArmSME/enable-arm-za.mlir
mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/fill-2d.mlir
mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/matmul-transpose-a.mlir
mlir/test/Integration/Dialect/Vector/CPU/ArmSME/load-store-128-bit-tile.mlir
mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-load-vertical.mlir
mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-outerproduct-f32.mlir
mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-outerproduct-f64.mlir
mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-transfer-read-2d.mlir
mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-transfer-write-2d.mlir
mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-transpose.mlir
mlir/test/Integration/Dialect/Vector/CPU/ArmSME/tile_fill.mlir
mlir/test/Integration/Dialect/Vector/CPU/ArmSME/vector-load-store.mlir
mlir/test/Integration/Dialect/Vector/CPU/ArmSME/vector-ops.mlir
mlir/test/Target/LLVMIR/arm-sme.mlir
mlir/test/lit.cfg.py
mlir/test/lit.site.cfg.py.in
Removed:
################################################################################
diff --git a/mlir/include/mlir/Dialect/ArmSME/IR/ArmSMEIntrinsicOps.td b/mlir/include/mlir/Dialect/ArmSME/IR/ArmSMEIntrinsicOps.td
index c86a73812a5899c..bcf2466b13a739f 100644
--- a/mlir/include/mlir/Dialect/ArmSME/IR/ArmSMEIntrinsicOps.td
+++ b/mlir/include/mlir/Dialect/ArmSME/IR/ArmSMEIntrinsicOps.td
@@ -161,7 +161,4 @@ def LLVM_aarch64_sme_write_vert : LLVM_aarch64_sme_write<"vert">;
def LLVM_aarch64_sme_read_horiz : LLVM_aarch64_sme_read<"horiz">;
def LLVM_aarch64_sme_read_vert : LLVM_aarch64_sme_read<"vert">;
-def LLVM_aarch64_sme_za_enable : ArmSME_IntrOp<"za.enable">;
-def LLVM_aarch64_sme_za_disable : ArmSME_IntrOp<"za.disable">;
-
#endif // ARMSME_INTRINSIC_OPS
diff --git a/mlir/include/mlir/Dialect/ArmSME/Transforms/CMakeLists.txt b/mlir/include/mlir/Dialect/ArmSME/Transforms/CMakeLists.txt
index e2738b0fc404d63..38f48757b7749b7 100644
--- a/mlir/include/mlir/Dialect/ArmSME/Transforms/CMakeLists.txt
+++ b/mlir/include/mlir/Dialect/ArmSME/Transforms/CMakeLists.txt
@@ -1,5 +1,7 @@
set(LLVM_TARGET_DEFINITIONS Passes.td)
mlir_tablegen(Passes.h.inc -gen-pass-decls -name ArmSME)
+mlir_tablegen(PassesEnums.h.inc -gen-enum-decls)
+mlir_tablegen(PassesEnums.cpp.inc -gen-enum-defs)
add_public_tablegen_target(MLIRArmSMETransformsIncGen)
add_mlir_doc(Passes ArmSMEPasses ./ -gen-pass-doc)
diff --git a/mlir/include/mlir/Dialect/ArmSME/Transforms/Passes.h b/mlir/include/mlir/Dialect/ArmSME/Transforms/Passes.h
index ab5c179f2dd7790..6f7617f5411c57f 100644
--- a/mlir/include/mlir/Dialect/ArmSME/Transforms/Passes.h
+++ b/mlir/include/mlir/Dialect/ArmSME/Transforms/Passes.h
@@ -10,6 +10,7 @@
#define MLIR_DIALECT_ARMSME_TRANSFORMS_PASSES_H
#include "mlir/Conversion/LLVMCommon/TypeConverter.h"
+#include "mlir/Dialect/ArmSME/Transforms/PassesEnums.h.inc"
#include "mlir/Pass/Pass.h"
namespace mlir {
@@ -20,19 +21,13 @@ namespace arm_sme {
//===----------------------------------------------------------------------===//
// The EnableArmStreaming pass.
//===----------------------------------------------------------------------===//
-// Options for Armv9 Streaming SVE mode. By default, streaming-mode is part of
-// the function interface (ABI) and the caller manages PSTATE.SM on entry/exit.
-// In a locally streaming function PSTATE.SM is kept internal and the callee
-// manages it on entry/exit.
-enum class ArmStreaming { Default = 0, Locally = 1 };
-
#define GEN_PASS_DECL
#include "mlir/Dialect/ArmSME/Transforms/Passes.h.inc"
/// Pass to enable Armv9 Streaming SVE mode.
-std::unique_ptr<Pass>
-createEnableArmStreamingPass(const ArmStreaming mode = ArmStreaming::Default,
- const bool enableZA = false);
+std::unique_ptr<Pass> createEnableArmStreamingPass(
+ const ArmStreamingMode = ArmStreamingMode::Streaming,
+ const ArmZaMode = ArmZaMode::Disabled);
/// Pass that replaces 'arm_sme.get_tile_id' ops with actual tiles.
std::unique_ptr<Pass> createTileAllocationPass();
diff --git a/mlir/include/mlir/Dialect/ArmSME/Transforms/Passes.td b/mlir/include/mlir/Dialect/ArmSME/Transforms/Passes.td
index 3fa1b43eb9e67e0..3253b47e62abddb 100644
--- a/mlir/include/mlir/Dialect/ArmSME/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/ArmSME/Transforms/Passes.td
@@ -10,6 +10,32 @@
#define MLIR_DIALECT_ARMSME_TRANSFORMS_PASSES_TD
include "mlir/Pass/PassBase.td"
+include "mlir/IR/EnumAttr.td"
+
+def ArmStreamingMode : I32EnumAttr<"ArmStreamingMode", "Armv9 Streaming SVE mode",
+ [
+ I32EnumAttrCase<"Disabled", 0, "disabled">,
+ // Streaming: Streaming-mode is part of the function interface (ABI).
+ I32EnumAttrCase<"Streaming", 1, "arm_streaming">,
+ // StreamingLocally: PSTATE.SM is kept internal and the callee manages it
+ // on entry/exit.
+ I32EnumAttrCase<"StreamingLocally", 2, "arm_locally_streaming">,
+ ]>{
+ let cppNamespace = "mlir::arm_sme";
+ let genSpecializedAttr = 0;
+}
+
+// TODO: Add other ZA modes.
+// https://arm-software.github.io/acle/main/acle.html#sme-attributes-relating-to-za
+def ArmZaMode : I32EnumAttr<"ArmZaMode", "Armv9 ZA storage mode",
+ [
+ I32EnumAttrCase<"Disabled", 0, "disabled">,
+ // A function's ZA state is created on entry and destroyed on exit.
+ I32EnumAttrCase<"NewZA", 1, "arm_new_za">,
+ ]>{
+ let cppNamespace = "mlir::arm_sme";
+ let genSpecializedAttr = 0;
+}
def EnableArmStreaming
: Pass<"enable-arm-streaming", "mlir::func::FuncOp"> {
@@ -22,19 +48,32 @@ def EnableArmStreaming
}];
let constructor = "mlir::arm_sme::createEnableArmStreamingPass()";
let options = [
- Option<"mode", "mode", "mlir::arm_sme::ArmStreaming",
- /*default=*/"mlir::arm_sme::ArmStreaming::Default",
+ Option<"streamingMode", "streaming-mode", "mlir::arm_sme::ArmStreamingMode",
+ /*default=*/"mlir::arm_sme::ArmStreamingMode::Streaming",
"Select how streaming-mode is managed at the function-level.",
[{::llvm::cl::values(
- clEnumValN(mlir::arm_sme::ArmStreaming::Default, "default",
- "Streaming mode is part of the function interface "
- "(ABI), caller manages PSTATE.SM on entry/exit."),
- clEnumValN(mlir::arm_sme::ArmStreaming::Locally, "locally",
- "Streaming mode is internal to the function, callee "
- "manages PSTATE.SM on entry/exit.")
+ clEnumValN(mlir::arm_sme::ArmStreamingMode::Disabled,
+ "disabled", "Streaming mode is disabled."),
+ clEnumValN(mlir::arm_sme::ArmStreamingMode::Streaming,
+ "streaming",
+ "Streaming mode is part of the function interface "
+ "(ABI), caller manages PSTATE.SM on entry/exit."),
+ clEnumValN(mlir::arm_sme::ArmStreamingMode::StreamingLocally,
+ "streaming-locally",
+ "Streaming mode is internal to the function, callee "
+ "manages PSTATE.SM on entry/exit.")
)}]>,
- Option<"enableZA", "enable-za", "bool", /*default=*/"false",
- "Enable ZA storage array.">,
+ Option<"zaMode", "za-mode", "mlir::arm_sme::ArmZaMode",
+ /*default=*/"mlir::arm_sme::ArmZaMode::Disabled",
+ "Select how ZA-storage is managed at the function-level.",
+ [{::llvm::cl::values(
+ clEnumValN(mlir::arm_sme::ArmZaMode::Disabled,
+ "disabled", "ZA storage is disabled."),
+ clEnumValN(mlir::arm_sme::ArmZaMode::NewZA,
+ "new-za",
+ "The function has ZA state. The ZA state is "
+ "created on entry and destroyed on exit.")
+ )}]>
];
let dependentDialects = ["func::FuncDialect"];
}
diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
index 9fa6f23ce4de2dd..88f4f81735372b9 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
@@ -1387,6 +1387,7 @@ def LLVM_LLVMFuncOp : LLVM_Op<"func", [
DefaultValuedAttr<Visibility, "mlir::LLVM::Visibility::Default">:$visibility_,
OptionalAttr<UnitAttr>:$arm_streaming,
OptionalAttr<UnitAttr>:$arm_locally_streaming,
+ OptionalAttr<UnitAttr>:$arm_new_za,
OptionalAttr<StrAttr>:$section,
OptionalAttr<UnnamedAddr>:$unnamed_addr,
OptionalAttr<I64Attr>:$alignment,
diff --git a/mlir/lib/Dialect/ArmSME/Transforms/EnableArmStreaming.cpp b/mlir/lib/Dialect/ArmSME/Transforms/EnableArmStreaming.cpp
index 1d3a090e861013b..c3a1a1c9a3fb49e 100644
--- a/mlir/lib/Dialect/ArmSME/Transforms/EnableArmStreaming.cpp
+++ b/mlir/lib/Dialect/ArmSME/Transforms/EnableArmStreaming.cpp
@@ -34,6 +34,7 @@
//===----------------------------------------------------------------------===//
#include "mlir/Dialect/ArmSME/Transforms/Passes.h"
+#include "mlir/Dialect/ArmSME/Transforms/PassesEnums.cpp.inc"
#include "mlir/Dialect/Func/IR/FuncOps.h"
@@ -48,46 +49,38 @@ namespace arm_sme {
using namespace mlir;
using namespace mlir::arm_sme;
+namespace {
-static constexpr char kArmStreamingAttr[] = "arm_streaming";
-static constexpr char kArmLocallyStreamingAttr[] = "arm_locally_streaming";
-static constexpr char kArmZAAttr[] = "arm_za";
-static constexpr char kEnableArmStreamingIgnoreAttr[] =
- "enable_arm_streaming_ignore";
+constexpr StringLiteral
+ kEnableArmStreamingIgnoreAttr("enable_arm_streaming_ignore");
-namespace {
struct EnableArmStreamingPass
: public arm_sme::impl::EnableArmStreamingBase<EnableArmStreamingPass> {
- EnableArmStreamingPass(ArmStreaming mode, bool enableZA) {
- this->mode = mode;
- this->enableZA = enableZA;
+ EnableArmStreamingPass(ArmStreamingMode streamingMode, ArmZaMode zaMode) {
+ this->streamingMode = streamingMode;
+ this->zaMode = zaMode;
}
void runOnOperation() override {
- if (getOperation()->getAttr(kEnableArmStreamingIgnoreAttr))
+ auto op = getOperation();
+ if (op->getAttr(kEnableArmStreamingIgnoreAttr) ||
+ streamingMode == ArmStreamingMode::Disabled)
return;
- StringRef attr;
- switch (mode) {
- case ArmStreaming::Default:
- attr = kArmStreamingAttr;
- break;
- case ArmStreaming::Locally:
- attr = kArmLocallyStreamingAttr;
- break;
- }
- getOperation()->setAttr(attr, UnitAttr::get(&getContext()));
+
+ auto unitAttr = UnitAttr::get(&getContext());
+
+ op->setAttr(stringifyArmStreamingMode(streamingMode), unitAttr);
// The pass currently only supports enabling ZA when in streaming-mode, but
// ZA can be accessed by the SME LDR, STR and ZERO instructions when not in
// streaming-mode (see section B1.1.1, IDGNQM of spec [1]). It may be worth
// supporting this later.
- if (enableZA)
- getOperation()->setAttr(kArmZAAttr, UnitAttr::get(&getContext()));
+ if (zaMode != ArmZaMode::Disabled)
+ op->setAttr(stringifyArmZaMode(zaMode), unitAttr);
}
};
} // namespace
-std::unique_ptr<Pass>
-mlir::arm_sme::createEnableArmStreamingPass(const ArmStreaming mode,
- const bool enableZA) {
- return std::make_unique<EnableArmStreamingPass>(mode, enableZA);
+std::unique_ptr<Pass> mlir::arm_sme::createEnableArmStreamingPass(
+ const ArmStreamingMode streamingMode, const ArmZaMode zaMode) {
+ return std::make_unique<EnableArmStreamingPass>(streamingMode, zaMode);
}
diff --git a/mlir/lib/Dialect/ArmSME/Transforms/LegalizeForLLVMExport.cpp b/mlir/lib/Dialect/ArmSME/Transforms/LegalizeForLLVMExport.cpp
index d1a54658a595bf3..6078b3f2c5e4708 100644
--- a/mlir/lib/Dialect/ArmSME/Transforms/LegalizeForLLVMExport.cpp
+++ b/mlir/lib/Dialect/ArmSME/Transforms/LegalizeForLLVMExport.cpp
@@ -21,33 +21,6 @@ using namespace mlir;
using namespace mlir::arm_sme;
namespace {
-/// Insert 'llvm.aarch64.sme.za.enable' intrinsic at the start of 'func.func'
-/// ops to enable the ZA storage array.
-struct EnableZAPattern : public OpRewritePattern<func::FuncOp> {
- using OpRewritePattern::OpRewritePattern;
- LogicalResult matchAndRewrite(func::FuncOp op,
- PatternRewriter &rewriter) const final {
- OpBuilder::InsertionGuard g(rewriter);
- rewriter.setInsertionPointToStart(&op.front());
- rewriter.create<arm_sme::aarch64_sme_za_enable>(op->getLoc());
- rewriter.updateRootInPlace(op, [] {});
- return success();
- }
-};
-
-/// Insert 'llvm.aarch64.sme.za.disable' intrinsic before 'func.return' ops to
-/// disable the ZA storage array.
-struct DisableZAPattern : public OpRewritePattern<func::ReturnOp> {
- using OpRewritePattern::OpRewritePattern;
- LogicalResult matchAndRewrite(func::ReturnOp op,
- PatternRewriter &rewriter) const final {
- OpBuilder::InsertionGuard g(rewriter);
- rewriter.setInsertionPoint(op);
- rewriter.create<arm_sme::aarch64_sme_za_disable>(op->getLoc());
- rewriter.updateRootInPlace(op, [] {});
- return success();
- }
-};
/// Lower 'arm_sme.zero' to SME intrinsics.
///
@@ -678,39 +651,13 @@ void mlir::configureArmSMELegalizeForExportTarget(
arm_sme::aarch64_sme_st1w_vert, arm_sme::aarch64_sme_st1d_vert,
arm_sme::aarch64_sme_st1q_vert, arm_sme::aarch64_sme_read_horiz,
arm_sme::aarch64_sme_read_vert, arm_sme::aarch64_sme_write_horiz,
- arm_sme::aarch64_sme_write_vert, arm_sme::aarch64_sme_mopa,
- arm_sme::aarch64_sme_za_enable, arm_sme::aarch64_sme_za_disable>();
+ arm_sme::aarch64_sme_write_vert, arm_sme::aarch64_sme_mopa>();
target.addLegalOp<GetTileID>();
target.addIllegalOp<vector::OuterProductOp>();
-
- // Mark 'func.func' ops as legal if either:
- // 1. no 'arm_za' function attribute is present.
- // 2. the 'arm_za' function attribute is present and the first op in the
- // function is an 'arm_sme::aarch64_sme_za_enable' intrinsic.
- target.addDynamicallyLegalOp<func::FuncOp>([&](func::FuncOp funcOp) {
- if (funcOp.isDeclaration())
- return true;
- auto firstOp = funcOp.getBody().front().begin();
- return !funcOp->hasAttr("arm_za") ||
- isa<arm_sme::aarch64_sme_za_enable>(firstOp);
- });
-
- // Mark 'func.return' ops as legal if either:
- // 1. no 'arm_za' function attribute is present.
- // 2. the 'arm_za' function attribute is present and there's a preceding
- // 'arm_sme::aarch64_sme_za_disable' intrinsic.
- target.addDynamicallyLegalOp<func::ReturnOp>([&](func::ReturnOp returnOp) {
- bool hasDisableZA = false;
- auto funcOp = returnOp->getParentOp();
- funcOp->walk<WalkOrder::PreOrder>(
- [&](arm_sme::aarch64_sme_za_disable op) { hasDisableZA = true; });
- return !funcOp->hasAttr("arm_za") || hasDisableZA;
- });
}
void mlir::populateArmSMELegalizeForLLVMExportPatterns(
LLVMTypeConverter &converter, RewritePatternSet &patterns) {
- patterns.add<DisableZAPattern, EnableZAPattern>(patterns.getContext());
patterns.add<
LoadTileSliceToArmSMELowering, MoveTileSliceToVectorArmSMELowering,
MoveVectorToTileSliceToArmSMELowering, StoreTileSliceToArmSMELowering,
diff --git a/mlir/lib/ExecutionEngine/ArmSMEStubs.cpp b/mlir/lib/ExecutionEngine/ArmSMEStubs.cpp
new file mode 100644
index 000000000000000..f9f64ad5e5ac81c
--- /dev/null
+++ b/mlir/lib/ExecutionEngine/ArmSMEStubs.cpp
@@ -0,0 +1,48 @@
+//===- ArmSMEStub.cpp - ArmSME ABI routine stubs --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Compiler.h"
+#include <cstdint>
+#include <iostream>
+
+// The actual implementation of these routines is in:
+// compiler-rt/lib/builtins/aarch64/sme-abi.S. These stubs allow the current
+// ArmSME tests to run without depending on compiler-rt. This works as we don't
+// rely on nested ZA-enabled calls at the moment. The use of these stubs can be
+// overridden by setting the ARM_SME_ABI_ROUTINES_SHLIB CMake cache variable to
+// a path to an alternate implementation.
+
+extern "C" {
+
+bool LLVM_ATTRIBUTE_WEAK __aarch64_sme_accessible() {
+ // The ArmSME tests are run within an emulator so we assume SME is available.
+ return true;
+}
+
+struct sme_state {
+ int64_t x0;
+ int64_t x1;
+};
+
+sme_state LLVM_ATTRIBUTE_WEAK __arm_sme_state() {
+ std::cerr << "[warning] __arm_sme_state() stubbed!\n";
+ return sme_state{};
+}
+
+void LLVM_ATTRIBUTE_WEAK __arm_tpidr2_restore() {
+ std::cerr << "[warning] __arm_tpidr2_restore() stubbed!\n";
+}
+
+void LLVM_ATTRIBUTE_WEAK __arm_tpidr2_save() {
+ std::cerr << "[warning] __arm_tpidr2_save() stubbed!\n";
+}
+
+void LLVM_ATTRIBUTE_WEAK __arm_za_disable() {
+ std::cerr << "[warning] __arm_za_disable() stubbed!\n";
+}
+}
diff --git a/mlir/lib/ExecutionEngine/CMakeLists.txt b/mlir/lib/ExecutionEngine/CMakeLists.txt
index fdc797763ae3a41..fe139661f2bbb5a 100644
--- a/mlir/lib/ExecutionEngine/CMakeLists.txt
+++ b/mlir/lib/ExecutionEngine/CMakeLists.txt
@@ -2,6 +2,7 @@
# is a big dependency which most don't need.
set(LLVM_OPTIONAL_SOURCES
+ ArmSMEStubs.cpp
AsyncRuntime.cpp
CRunnerUtils.cpp
CudaRuntimeWrappers.cpp
@@ -177,6 +178,10 @@ if(LLVM_ENABLE_PIC)
target_link_options(mlir_async_runtime PRIVATE "-Wl,-exclude-libs,ALL")
endif()
+ add_mlir_library(mlir_arm_sme_abi_stubs
+ SHARED
+ ArmSMEStubs.cpp)
+
if(MLIR_ENABLE_CUDA_RUNNER)
# Configure CUDA support. Using check_language first allows us to give a
# custom error message.
diff --git a/mlir/lib/Target/LLVMIR/ModuleImport.cpp b/mlir/lib/Target/LLVMIR/ModuleImport.cpp
index dd2da66caf428bf..75e806650f3117e 100644
--- a/mlir/lib/Target/LLVMIR/ModuleImport.cpp
+++ b/mlir/lib/Target/LLVMIR/ModuleImport.cpp
@@ -1573,6 +1573,15 @@ static void processMemoryEffects(llvm::Function *func, LLVMFuncOp funcOp) {
funcOp.setMemoryAttr(memAttr);
}
+// List of LLVM IR attributes that map to an explicit attribute on the MLIR
+// LLVMFuncOp.
+static constexpr std::array ExplicitAttributes{
+ StringLiteral("aarch64_pstate_sm_enabled"),
+ StringLiteral("aarch64_pstate_sm_body"),
+ StringLiteral("aarch64_pstate_za_new"),
+ StringLiteral("vscale_range"),
+};
+
static void processPassthroughAttrs(llvm::Function *func, LLVMFuncOp funcOp) {
MLIRContext *context = funcOp.getContext();
SmallVector<Attribute> passthroughs;
@@ -1598,11 +1607,8 @@ static void processPassthroughAttrs(llvm::Function *func, LLVMFuncOp funcOp) {
attrName = llvm::Attribute::getNameFromAttrKind(attr.getKindAsEnum());
auto keyAttr = StringAttr::get(context, attrName);
- // Skip the aarch64_pstate_sm_<body|enabled> since the LLVMFuncOp has an
- // explicit attribute.
- // Also skip the vscale_range, it is also an explicit attribute.
- if (attrName == "aarch64_pstate_sm_enabled" ||
- attrName == "aarch64_pstate_sm_body" || attrName == "vscale_range")
+ // Skip attributes that map to an explicit attribute on the LLVMFuncOp.
+ if (llvm::is_contained(ExplicitAttributes, attrName))
continue;
if (attr.isStringAttribute()) {
@@ -1642,6 +1648,10 @@ void ModuleImport::processFunctionAttributes(llvm::Function *func,
funcOp.setArmStreaming(true);
else if (func->hasFnAttribute("aarch64_pstate_sm_body"))
funcOp.setArmLocallyStreaming(true);
+
+ if (func->hasFnAttribute("aarch64_pstate_za_new"))
+ funcOp.setArmNewZa(true);
+
llvm::Attribute attr = func->getFnAttribute(llvm::Attribute::VScaleRange);
if (attr.isValid()) {
MLIRContext *context = funcOp.getContext();
diff --git a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
index 388ae61958b78b9..911c7141e45d5f2 100644
--- a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
@@ -890,6 +890,9 @@ LogicalResult ModuleTranslation::convertOneFunction(LLVMFuncOp func) {
else if (func.getArmLocallyStreaming())
llvmFunc->addFnAttr("aarch64_pstate_sm_body");
+ if (func.getArmNewZa())
+ llvmFunc->addFnAttr("aarch64_pstate_za_new");
+
if (auto attr = func.getVscaleRange())
llvmFunc->addFnAttr(llvm::Attribute::getWithVScaleRangeArgs(
getLLVMContext(), attr->getMinRange().getInt(),
diff --git a/mlir/test/CMakeLists.txt b/mlir/test/CMakeLists.txt
index d81f3c4b1e20c5a..e4343095578c1f0 100644
--- a/mlir/test/CMakeLists.txt
+++ b/mlir/test/CMakeLists.txt
@@ -28,6 +28,8 @@ if (MLIR_INCLUDE_INTEGRATION_TESTS)
"If arch-specific Arm integration tests run emulated, find Arm native utility libraries in this directory.")
set(MLIR_GPU_COMPILATION_TEST_FORMAT "fatbin" CACHE STRING
"The GPU compilation format used by the tests.")
+ set(ARM_SME_ABI_ROUTINES_SHLIB "" CACHE STRING
+ "Path to a shared library containing Arm SME ABI routines, required for Arm SME integration tests.")
option(MLIR_RUN_AMX_TESTS "Run AMX tests.")
option(MLIR_RUN_X86VECTOR_TESTS "Run X86Vector tests.")
option(MLIR_RUN_CUDA_TENSOR_CORE_TESTS "Run CUDA Tensor core WMMA tests.")
@@ -139,6 +141,10 @@ if(MLIR_ENABLE_ROCM_RUNNER)
list(APPEND MLIR_TEST_DEPENDS mlir_rocm_runtime)
endif()
+if (MLIR_RUN_ARM_SME_TESTS AND NOT ARM_SME_ABI_ROUTINES_SHLIB)
+ list(APPEND MLIR_TEST_DEPENDS mlir_arm_sme_abi_stubs)
+endif()
+
list(APPEND MLIR_TEST_DEPENDS MLIRUnitTests)
if(LLVM_BUILD_EXAMPLES)
diff --git a/mlir/test/Dialect/ArmSME/enable-arm-streaming.mlir b/mlir/test/Dialect/ArmSME/enable-arm-streaming.mlir
index e7bbe8c0047687d..70119b08c3e91aa 100644
--- a/mlir/test/Dialect/ArmSME/enable-arm-streaming.mlir
+++ b/mlir/test/Dialect/ArmSME/enable-arm-streaming.mlir
@@ -1,13 +1,13 @@
// RUN: mlir-opt %s -enable-arm-streaming -verify-diagnostics | FileCheck %s
-// RUN: mlir-opt %s -enable-arm-streaming=mode=locally -verify-diagnostics | FileCheck %s -check-prefix=CHECK-LOCALLY
-// RUN: mlir-opt %s -enable-arm-streaming=enable-za -verify-diagnostics | FileCheck %s -check-prefix=CHECK-ENABLE-ZA
+// RUN: mlir-opt %s -enable-arm-streaming=streaming-mode=streaming-locally -verify-diagnostics | FileCheck %s -check-prefix=CHECK-LOCALLY
+// RUN: mlir-opt %s -enable-arm-streaming=za-mode=new-za -verify-diagnostics | FileCheck %s -check-prefix=CHECK-ENABLE-ZA
// CHECK-LABEL: @arm_streaming
// CHECK-SAME: attributes {arm_streaming}
// CHECK-LOCALLY-LABEL: @arm_streaming
// CHECK-LOCALLY-SAME: attributes {arm_locally_streaming}
// CHECK-ENABLE-ZA-LABEL: @arm_streaming
-// CHECK-ENABLE-ZA-SAME: attributes {arm_streaming, arm_za}
+// CHECK-ENABLE-ZA-SAME: attributes {arm_new_za, arm_streaming}
func.func @arm_streaming() { return }
// CHECK-LABEL: @not_arm_streaming
diff --git a/mlir/test/Dialect/ArmSME/enable-arm-za.mlir b/mlir/test/Dialect/ArmSME/enable-arm-za.mlir
index d415b19f6fa94cf..0f31278eefd1550 100644
--- a/mlir/test/Dialect/ArmSME/enable-arm-za.mlir
+++ b/mlir/test/Dialect/ArmSME/enable-arm-za.mlir
@@ -1,18 +1,16 @@
-// RUN: mlir-opt %s -enable-arm-streaming=enable-za -convert-vector-to-llvm="enable-arm-sme" | FileCheck %s -check-prefix=ENABLE-ZA
+// RUN: mlir-opt %s -enable-arm-streaming=za-mode=new-za -convert-vector-to-llvm="enable-arm-sme" | FileCheck %s -check-prefix=ENABLE-ZA
// RUN: mlir-opt %s -enable-arm-streaming -convert-vector-to-llvm="enable-arm-sme" | FileCheck %s -check-prefix=DISABLE-ZA
// RUN: mlir-opt %s -convert-vector-to-llvm="enable-arm-sme" | FileCheck %s -check-prefix=NO-ARM-STREAMING
// CHECK-LABEL: @declaration
func.func private @declaration()
-// CHECK-LABEL: @arm_za
-func.func @arm_za() {
- // ENABLE-ZA: arm_sme.intr.za.enable
- // ENABLE-ZA-NEXT: arm_sme.intr.za.disable
- // ENABLE-ZA-NEXT: return
- // DISABLE-ZA-NOT: arm_sme.intr.za.enable
- // DISABLE-ZA-NOT: arm_sme.intr.za.disable
- // NO-ARM-STREAMING-NOT: arm_sme.intr.za.enable
- // NO-ARM-STREAMING-NOT: arm_sme.intr.za.disable
- return
-}
+// ENABLE-ZA-LABEL: @arm_new_za
+// ENABLE-ZA-SAME: attributes {arm_new_za, arm_streaming}
+// DISABLE-ZA-LABEL: @arm_new_za
+// DISABLE-ZA-NOT: arm_new_za
+// DISABLE-ZA-SAME: attributes {arm_streaming}
+// NO-ARM-STREAMING-LABEL: @arm_new_za
+// NO-ARM-STREAMING-NOT: arm_new_za
+// NO-ARM-STREAMING-NOT: arm_streaming
+func.func @arm_new_za() { return }
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/fill-2d.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/fill-2d.mlir
index 131cbc05a9857e0..efe4da7d3c50c6f 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/fill-2d.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/fill-2d.mlir
@@ -3,14 +3,14 @@
// RUN: -test-transform-dialect-erase-schedule \
// RUN: -lower-vector-mask \
// RUN: -one-shot-bufferize="bufferize-function-boundaries" \
-// RUN: -enable-arm-streaming="mode=locally enable-za" \
+// RUN: -enable-arm-streaming="streaming-mode=streaming-locally za-mode=new-za" \
// RUN: -convert-vector-to-arm-sme -convert-arm-sme-to-scf \
// RUN: -convert-vector-to-llvm="enable-arm-sme" -cse -canonicalize \
// RUN: -allocate-arm-sme-tiles -test-lower-to-llvm | \
// RUN: %mcr_aarch64_cmd \
// RUN: -e=entry -entry-point-result=void \
// RUN: -march=aarch64 -mattr="+sve,+sme" \
-// RUN: -shared-libs=%mlir_runner_utils,%mlir_c_runner_utils | \
+// RUN: -shared-libs=%mlir_runner_utils,%mlir_c_runner_utils,%arm_sme_abi_shlib | \
// RUN: FileCheck %s
func.func @entry() {
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/matmul-transpose-a.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/matmul-transpose-a.mlir
index 28179fed31eca4b..ab74f0100474263 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/matmul-transpose-a.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/matmul-transpose-a.mlir
@@ -1,7 +1,7 @@
// RUN: mlir-opt %s \
// RUN: -transform-interpreter -test-transform-dialect-erase-schedule \
// RUN: -one-shot-bufferize="bufferize-function-boundaries" -canonicalize \
-// RUN: -enable-arm-streaming="mode=locally enable-za" \
+// RUN: -enable-arm-streaming="streaming-mode=streaming-locally za-mode=new-za" \
// RUN: -convert-vector-to-arm-sme -convert-arm-sme-to-scf \
// RUN: -convert-vector-to-scf -cse -arm-sve-legalize-vector-storage \
// RUN: -convert-vector-to-llvm=enable-arm-sme \
@@ -10,7 +10,7 @@
// RUN: %mcr_aarch64_cmd \
// RUN: -e=main -entry-point-result=void \
// RUN: -march=aarch64 -mattr="+sve,+sme" \
-// RUN: -shared-libs=%mlir_runner_utils,%mlir_c_runner_utils | \
+// RUN: -shared-libs=%mlir_runner_utils,%mlir_c_runner_utils,%arm_sme_abi_shlib | \
// RUN: FileCheck %s
func.func @matmul_transpose_a(%A : tensor<?x?xf32>, %B : tensor<?x?xf32>, %C : tensor<?x?xf32>) {
@@ -21,7 +21,7 @@ func.func @matmul_transpose_a(%A : tensor<?x?xf32>, %B : tensor<?x?xf32>, %C : t
return
}
-func.func @main() {
+func.func @main() attributes { enable_arm_streaming_ignore } {
%c0 = arith.constant 0 : i32
%c7 = arith.constant 7 : index
diff --git a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/load-store-128-bit-tile.mlir b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/load-store-128-bit-tile.mlir
index 1d6125a0d7999f5..32e7e6b79ce09b9 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/load-store-128-bit-tile.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/load-store-128-bit-tile.mlir
@@ -1,13 +1,13 @@
// DEFINE: %{entry_point} = test_load_store_zaq0
// DEFINE: %{compile} = mlir-opt %s \
-// DEFINE: -enable-arm-streaming="mode=locally enable-za" \
+// DEFINE: -enable-arm-streaming="streaming-mode=streaming-locally za-mode=new-za" \
// DEFINE: -convert-vector-to-arm-sme -convert-arm-sme-to-scf \
// DEFINE: -convert-vector-to-llvm="enable-arm-sme" -cse -canonicalize \
// DEFINE: -allocate-arm-sme-tiles -test-lower-to-llvm
// DEFINE: %{run} = %mcr_aarch64_cmd \
// DEFINE: -march=aarch64 -mattr=+sve,+sme \
// DEFINE: -e %{entry_point} -entry-point-result=void \
-// DEFINE: -shared-libs=%mlir_runner_utils,%mlir_c_runner_utils
+// DEFINE: -shared-libs=%mlir_runner_utils,%mlir_c_runner_utils,%arm_sme_abi_shlib
// RUN: %{compile} | %{run} | FileCheck %s
diff --git a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-load-vertical.mlir b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-load-vertical.mlir
index eda4d9a090f8d40..44cf23f41b63254 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-load-vertical.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-load-vertical.mlir
@@ -1,13 +1,13 @@
// DEFINE: %{entry_point} = entry
// DEFINE: %{compile} = mlir-opt %s \
-// DEFINE: -enable-arm-streaming="mode=locally enable-za" \
+// DEFINE: -enable-arm-streaming="streaming-mode=streaming-locally za-mode=new-za" \
// DEFINE: -convert-vector-to-arm-sme -convert-arm-sme-to-scf \
// DEFINE: -convert-vector-to-llvm="enable-arm-sme" -cse -canonicalize \
// DEFINE: -allocate-arm-sme-tiles -test-lower-to-llvm
// DEFINE: %{run} = %mcr_aarch64_cmd \
// DEFINE: -march=aarch64 -mattr=+sve,+sme \
// DEFINE: -e %{entry_point} -entry-point-result=void \
-// DEFINE: -shared-libs=%mlir_runner_utils,%mlir_c_runner_utils
+// DEFINE: -shared-libs=%mlir_runner_utils,%mlir_c_runner_utils,%arm_sme_abi_shlib
// RUN: %{compile} | %{run} | FileCheck %s
diff --git a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-outerproduct-f32.mlir b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-outerproduct-f32.mlir
index ae5ad9cc2a5e90c..f1ecf768ebe83db 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-outerproduct-f32.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-outerproduct-f32.mlir
@@ -1,13 +1,13 @@
// DEFINE: %{entry_point} = test_outerproduct_no_accumulator_4x4xf32
// DEFINE: %{compile} = mlir-opt %s \
-// DEFINE: -enable-arm-streaming="mode=locally enable-za" \
+// DEFINE: -enable-arm-streaming="streaming-mode=streaming-locally za-mode=new-za" \
// DEFINE: -convert-vector-to-arm-sme -convert-arm-sme-to-scf \
// DEFINE: -convert-vector-to-llvm="enable-arm-sme" -cse -canonicalize \
// DEFINE: -allocate-arm-sme-tiles -test-lower-to-llvm -o %t
// DEFINE: %{run} = %mcr_aarch64_cmd %t \
// DEFINE: -march=aarch64 -mattr=+sve,+sme \
// DEFINE: -e %{entry_point} -entry-point-result=void \
-// DEFINE: -shared-libs=%mlir_runner_utils,%mlir_c_runner_utils
+// DEFINE: -shared-libs=%mlir_runner_utils,%mlir_c_runner_utils,%arm_sme_abi_shlib
// RUN: %{compile}
diff --git a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-outerproduct-f64.mlir b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-outerproduct-f64.mlir
index 36ce896a4c1bd90..5c907bb1675e462 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-outerproduct-f64.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-outerproduct-f64.mlir
@@ -1,13 +1,13 @@
// DEFINE: %{entry_point} = test_outerproduct_no_accumulator_2x2xf64
// DEFINE: %{compile} = mlir-opt %s \
-// DEFINE: -enable-arm-streaming="mode=locally enable-za" \
+// DEFINE: -enable-arm-streaming="streaming-mode=streaming-locally za-mode=new-za" \
// DEFINE: -convert-vector-to-arm-sme -convert-arm-sme-to-scf \
// DEFINE: -convert-vector-to-llvm="enable-arm-sme" -cse -canonicalize \
// DEFINE: -allocate-arm-sme-tiles -test-lower-to-llvm -o %t
// DEFINE: %{run} = %mcr_aarch64_cmd %t \
// DEFINE: -march=aarch64 -mattr=+sve,+sme-f64f64 \
// DEFINE: -e %{entry_point} -entry-point-result=void \
-// DEFINE: -shared-libs=%mlir_runner_utils,%mlir_c_runner_utils
+// DEFINE: -shared-libs=%mlir_runner_utils,%mlir_c_runner_utils,%arm_sme_abi_shlib
// RUN: %{compile}
diff --git a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-transfer-read-2d.mlir b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-transfer-read-2d.mlir
index 48725d9ea03f94c..ccc08289570afc5 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-transfer-read-2d.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-transfer-read-2d.mlir
@@ -1,13 +1,13 @@
// DEFINE: %{entry_point} = entry
// DEFINE: %{compile} = mlir-opt %s \
-// DEFINE: -enable-arm-streaming="mode=locally enable-za" \
+// DEFINE: -enable-arm-streaming="streaming-mode=streaming-locally za-mode=new-za" \
// DEFINE: -convert-vector-to-arm-sme -convert-arm-sme-to-scf \
// DEFINE: -convert-vector-to-llvm="enable-arm-sme" -cse -canonicalize \
// DEFINE: -allocate-arm-sme-tiles -test-lower-to-llvm
// DEFINE: %{run} = %mcr_aarch64_cmd \
// DEFINE: -march=aarch64 -mattr=+sve,+sme \
// DEFINE: -e %{entry_point} -entry-point-result=void \
-// DEFINE: -shared-libs=%mlir_runner_utils,%mlir_c_runner_utils
+// DEFINE: -shared-libs=%mlir_runner_utils,%mlir_c_runner_utils,%arm_sme_abi_shlib
// RUN: %{compile} | %{run} | FileCheck %s
@@ -134,7 +134,13 @@ func.func @initialize_memory(%d0 : index, %d1 : index) -> memref<?x?xf32> {
return %A : memref<?x?xf32>
}
-func.func @entry() {
+// This will be made a streaming function by enable-arm-streaming so return SVL.
+func.func @get_svl() -> index {
+ %vscale = vector.vscale
+ return %vscale : index
+}
+
+func.func @entry() attributes { enable_arm_streaming_ignore } {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
@@ -142,8 +148,8 @@ func.func @entry() {
// Allocate enough memory to load a 32-bit tile plus a tiny bit more to test
// non-zero offsets while remaining inbounds.
- %vscale = vector.vscale
- %svl_s = arith.muli %c4, %vscale : index
+ %svl = call @get_svl() : () -> index
+ %svl_s = arith.muli %c4, %svl : index
%svl_s_plus_two = arith.addi %svl_s, %c2 : index
%A = call @initialize_memory(%svl_s_plus_two, %svl_s_plus_two) : (index, index) -> memref<?x?xf32>
diff --git a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-transfer-write-2d.mlir b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-transfer-write-2d.mlir
index 49c513badb7b071..f35f83dcec0daa2 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-transfer-write-2d.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-transfer-write-2d.mlir
@@ -1,13 +1,13 @@
// DEFINE: %{entry_point} = entry
// DEFINE: %{compile} = mlir-opt %s \
-// DEFINE: -enable-arm-streaming="mode=locally enable-za" \
+// DEFINE: -enable-arm-streaming="streaming-mode=streaming-locally za-mode=new-za" \
// DEFINE: -convert-vector-to-arm-sme -convert-arm-sme-to-scf \
// DEFINE: -convert-vector-to-llvm="enable-arm-sme" -cse -canonicalize \
// DEFINE: -allocate-arm-sme-tiles -test-lower-to-llvm
// DEFINE: %{run} = %mcr_aarch64_cmd \
// DEFINE: -march=aarch64 -mattr=+sve,+sme \
// DEFINE: -e %{entry_point} -entry-point-result=void \
-// DEFINE: -shared-libs=%mlir_runner_utils,%mlir_c_runner_utils
+// DEFINE: -shared-libs=%mlir_runner_utils,%mlir_c_runner_utils,%arm_sme_abi_shlib
// RUN: %{compile} | %{run} | FileCheck %s
@@ -96,7 +96,13 @@ func.func @initialize_memory(%d0 : index, %d1 : index) -> memref<?x?xf32> {
return %A : memref<?x?xf32>
}
-func.func @entry() {
+// This will be made a streaming function by enable-arm-streaming so return SVL.
+func.func @get_svl() -> index {
+ %vscale = vector.vscale
+ return %vscale : index
+}
+
+func.func @entry() attributes { enable_arm_streaming_ignore } {
%c0 = arith.constant 0 : index
%c2 = arith.constant 2 : index
%c4 = arith.constant 4 : index
@@ -105,8 +111,8 @@ func.func @entry() {
//
// Allocate enough memory to load a 32-bit tile plus a tiny bit more to test
// non-zero offsets while remaining inbounds.
- %vscale = vector.vscale
- %svl_s = arith.muli %c4, %vscale : index
+ %svl = call @get_svl() : () -> index
+ %svl_s = arith.muli %c4, %svl : index
%svl_s_plus_two = arith.addi %svl_s, %c2 : index
%A = call @initialize_memory(%svl_s_plus_two, %svl_s_plus_two) : (index, index) -> memref<?x?xf32>
diff --git a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-transpose.mlir b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-transpose.mlir
index 65b930115e88895..39b5ef2ade4b0c0 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-transpose.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-transpose.mlir
@@ -1,13 +1,13 @@
// DEFINE: %{entry_point} = entry
// DEFINE: %{compile} = mlir-opt %s \
-// DEFINE: -enable-arm-streaming="mode=locally enable-za" \
+// DEFINE: -enable-arm-streaming="streaming-mode=streaming-locally za-mode=new-za" \
// DEFINE: -convert-vector-to-arm-sme -convert-arm-sme-to-scf \
// DEFINE: -convert-vector-to-llvm="enable-arm-sme" -cse -canonicalize \
// DEFINE: -allocate-arm-sme-tiles -test-lower-to-llvm
// DEFINE: %{run} = %mcr_aarch64_cmd \
// DEFINE: -march=aarch64 -mattr=+sve,+sme \
// DEFINE: -e %{entry_point} -entry-point-result=void \
-// DEFINE: -shared-libs=%mlir_runner_utils,%mlir_c_runner_utils
+// DEFINE: -shared-libs=%mlir_runner_utils,%mlir_c_runner_utils,%arm_sme_abi_shlib
// RUN: %{compile} | %{run} | FileCheck %s
diff --git a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/tile_fill.mlir b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/tile_fill.mlir
index 92031586b8cfc91..baf2046722b9e0c 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/tile_fill.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/tile_fill.mlir
@@ -1,11 +1,11 @@
-// RUN: mlir-opt %s -enable-arm-streaming="mode=locally enable-za" \
+// RUN: mlir-opt %s -enable-arm-streaming="streaming-mode=streaming-locally za-mode=new-za" \
// RUN: -convert-vector-to-arm-sme -convert-arm-sme-to-scf \
// RUN: -convert-vector-to-llvm="enable-arm-sme" -cse -canonicalize \
// RUN: -allocate-arm-sme-tiles -test-lower-to-llvm | \
// RUN: %mcr_aarch64_cmd \
// RUN: -march=aarch64 -mattr=+sve,+sme \
// RUN: -e entry -entry-point-result=i32 \
-// RUN: -shared-libs=%mlir_runner_utils,%mlir_c_runner_utils | \
+// RUN: -shared-libs=%mlir_runner_utils,%mlir_c_runner_utils,%arm_sme_abi_shlib | \
// RUN: FileCheck %s
// Integration test demonstrating filling a 32-bit element ZA tile with a
diff --git a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/vector-load-store.mlir b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/vector-load-store.mlir
index adf1d365cb99823..8878dca8bdcb6b1 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/vector-load-store.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/vector-load-store.mlir
@@ -1,13 +1,13 @@
// DEFINE: %{entry_point} = za0_d_f64
// DEFINE: %{compile} = mlir-opt %s \
-// DEFINE: -enable-arm-streaming="mode=locally enable-za" \
+// DEFINE: -enable-arm-streaming="streaming-mode=streaming-locally za-mode=new-za" \
// DEFINE: -convert-vector-to-arm-sme -convert-arm-sme-to-scf \
// DEFINE: -convert-vector-to-llvm="enable-arm-sme" -cse -canonicalize \
// DEFINE: -allocate-arm-sme-tiles -test-lower-to-llvm
// DEFINE: %{run} = %mcr_aarch64_cmd \
// DEFINE: -march=aarch64 -mattr=+sve,+sme \
// DEFINE: -e %{entry_point} -entry-point-result=i32 \
-// DEFINE: -shared-libs=%mlir_runner_utils,%mlir_c_runner_utils
+// DEFINE: -shared-libs=%mlir_runner_utils,%mlir_c_runner_utils,%arm_sme_abi_shlib
// RUN: %{compile} | %{run} | FileCheck %s --check-prefix=CHECK-ZA0_D
diff --git a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/vector-ops.mlir b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/vector-ops.mlir
index 455405d923bd664..a890aaa6f309d15 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/vector-ops.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/vector-ops.mlir
@@ -1,12 +1,12 @@
// DEFINE: %{entry_point} = entry
-// DEFINE: %{compile} = mlir-opt %s -enable-arm-streaming="mode=locally enable-za" \
+// DEFINE: %{compile} = mlir-opt %s -enable-arm-streaming="streaming-mode=streaming-locally za-mode=new-za" \
// DEFINE: -convert-vector-to-arm-sme -convert-arm-sme-to-scf \
// DEFINE: -convert-vector-to-llvm="enable-arm-sme" \
// DEFINE: -allocate-arm-sme-tiles -test-lower-to-llvm
// DEFINE: %{run} = %mcr_aarch64_cmd \
// DEFINE: -march=aarch64 -mattr=+sve,+sme \
// DEFINE: -e %{entry_point} -entry-point-result=i32 \
-// DEFINE: -shared-libs=%mlir_runner_utils,%mlir_c_runner_utils
+// DEFINE: -shared-libs=%mlir_runner_utils,%mlir_c_runner_utils,%arm_sme_abi_shlib
// RUN: %{compile} | %{run} | FileCheck %s
diff --git a/mlir/test/Target/LLVMIR/arm-sme.mlir b/mlir/test/Target/LLVMIR/arm-sme.mlir
index 27c94d9aeac8bf4..aa0389e888b60d6 100644
--- a/mlir/test/Target/LLVMIR/arm-sme.mlir
+++ b/mlir/test/Target/LLVMIR/arm-sme.mlir
@@ -220,17 +220,6 @@ llvm.func @arm_sme_store(%nxv1i1 : vector<[1]xi1>,
// -----
-// CHECK-LABEL: @arm_sme_toggle_za
-llvm.func @arm_sme_toggle_za() {
- // CHECK: call void @llvm.aarch64.sme.za.enable()
- "arm_sme.intr.za.enable"() : () -> ()
- // CHECK: call void @llvm.aarch64.sme.za.disable()
- "arm_sme.intr.za.disable"() : () -> ()
- llvm.return
-}
-
-// -----
-
// CHECK-LABEL: @arm_sme_vector_to_tile_horiz
llvm.func @arm_sme_vector_to_tile_horiz(%tileslice : i32,
%nxv16i1 : vector<[16]xi1>,
diff --git a/mlir/test/lit.cfg.py b/mlir/test/lit.cfg.py
index da8488373862c36..87bbe51e95d4c9d 100644
--- a/mlir/test/lit.cfg.py
+++ b/mlir/test/lit.cfg.py
@@ -54,10 +54,9 @@
config.substitutions.append(("%host_cc", config.host_cc))
-# Searches for a runtime library with the given name and returns a tool
-# substitution of the same name and the found path.
+# Searches for a runtime library with the given name and returns the found path.
# Correctly handles the platforms shared library directory and naming conventions.
-def add_runtime(name):
+def find_runtime(name):
path = ""
for prefix in ["", "lib"]:
path = os.path.join(
@@ -65,7 +64,13 @@ def add_runtime(name):
)
if os.path.isfile(path):
break
- return ToolSubst(f"%{name}", path)
+ return path
+
+
+# Searches for a runtime library with the given name and returns a tool
+# substitution of the same name and the found path.
+def add_runtime(name):
+ return ToolSubst(f"%{name}", find_runtime(name))
llvm_config.with_system_environment(["HOME", "INCLUDE", "LIB", "TMP", "TEMP"])
@@ -126,6 +131,15 @@ def add_runtime(name):
if config.enable_cuda_runner:
tools.extend([add_runtime("mlir_cuda_runtime")])
+if config.mlir_run_arm_sme_tests:
+ config.substitutions.append(
+ (
+ "%arm_sme_abi_shlib",
+ # Use passed Arm SME ABI routines, if not present default to stubs.
+ config.arm_sme_abi_routines_shlib or find_runtime("mlir_arm_sme_abi_stubs"),
+ )
+ )
+
# The following tools are optional
tools.extend(
[
diff --git a/mlir/test/lit.site.cfg.py.in b/mlir/test/lit.site.cfg.py.in
index 2de40ba5e8e57e6..146e8443f5c98e5 100644
--- a/mlir/test/lit.site.cfg.py.in
+++ b/mlir/test/lit.site.cfg.py.in
@@ -56,6 +56,7 @@ config.arm_emulator_options = "@ARM_EMULATOR_OPTIONS@"
config.arm_emulator_mlir_cpu_runner_executable = "@ARM_EMULATOR_MLIR_CPU_RUNNER_EXECUTABLE@"
config.arm_emulator_lli_executable = "@ARM_EMULATOR_LLI_EXECUTABLE@"
config.arm_emulator_utils_lib_dir = "@ARM_EMULATOR_UTILS_LIB_DIR@"
+config.arm_sme_abi_routines_shlib = "@ARM_SME_ABI_ROUTINES_SHLIB@"
config.riscv_vector_emulator_executable = "@RISCV_VECTOR_EMULATOR_EXECUTABLE@"
config.riscv_vector_emulator_options = "@RISCV_VECTOR_EMULATOR_OPTIONS@"
config.riscv_emulator_lli_executable = "@RISCV_EMULATOR_LLI_EXECUTABLE@"
More information about the Mlir-commits
mailing list