[llvm] [AArch64] Consider runtime mode when deciding to use SVE for fixed-length vectors. (PR #96081)
Sander de Smalen via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 20 02:53:23 PDT 2024
https://github.com/sdesmalen-arm updated https://github.com/llvm/llvm-project/pull/96081
>From d1ec6a1a843aa86144fa99635edb99151536f238 Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen at arm.com>
Date: Mon, 17 Jun 2024 17:28:43 +0100
Subject: [PATCH 1/2] [AArch64] Consider runtime mode when deciding to use SVE
for fixed-length vectors.
This also fixes the case where an SVE div is incorrectly to be assumed
available in non-streaming mode with SME.
---
.../Target/AArch64/AArch64ISelLowering.cpp | 26 +-
llvm/lib/Target/AArch64/AArch64Subtarget.h | 12 +-
.../Target/AArch64/AArch64TargetMachine.cpp | 8 +-
.../AArch64/intrinsic-cttz-elts-sve.ll | 316 ++++++++++++------
.../AArch64/sme-intrinsics-mova-extract.ll | 2 +-
.../AArch64/sme-intrinsics-mova-insert.ll | 2 +-
...streaming-mode-fixed-length-and-combine.ll | 2 +-
...treaming-mode-fixed-length-bit-counting.ll | 2 +-
...sve-streaming-mode-fixed-length-bitcast.ll | 2 +-
...e-streaming-mode-fixed-length-bitselect.ll | 2 +-
...treaming-mode-fixed-length-build-vector.ll | 2 +-
.../sve-streaming-mode-fixed-length-concat.ll | 2 +-
...e-streaming-mode-fixed-length-ext-loads.ll | 2 +-
...ing-mode-fixed-length-extract-subvector.ll | 2 +-
...ng-mode-fixed-length-extract-vector-elt.ll | 2 +-
...e-streaming-mode-fixed-length-fcopysign.ll | 2 +-
...ve-streaming-mode-fixed-length-fp-arith.ll | 2 +-
...streaming-mode-fixed-length-fp-compares.ll | 2 +-
...-streaming-mode-fixed-length-fp-convert.ll | 2 +-
...aming-mode-fixed-length-fp-extend-trunc.ll | 2 +-
...e-streaming-mode-fixed-length-fp-minmax.ll | 2 +-
...streaming-mode-fixed-length-fp-rounding.ll | 2 +-
...e-streaming-mode-fixed-length-fp-select.ll | 2 +-
...-streaming-mode-fixed-length-fp-vselect.ll | 2 +-
...e-streaming-mode-fixed-length-int-arith.ll | 2 +-
...treaming-mode-fixed-length-int-compares.ll | 2 +-
...sve-streaming-mode-fixed-length-int-div.ll | 5 +-
...streaming-mode-fixed-length-int-extends.ll | 2 +-
...eaming-mode-fixed-length-int-immediates.ll | 2 +-
...sve-streaming-mode-fixed-length-int-log.ll | 2 +-
...-streaming-mode-fixed-length-int-minmax.ll | 2 +-
...sve-streaming-mode-fixed-length-int-mul.ll | 3 +-
...ve-streaming-mode-fixed-length-int-mulh.ll | 2 +-
...-streaming-mode-fixed-length-int-reduce.ll | 2 +-
...sve-streaming-mode-fixed-length-int-rem.ll | 2 +-
...-streaming-mode-fixed-length-int-select.ll | 2 +-
...-streaming-mode-fixed-length-int-shifts.ll | 2 +-
...streaming-mode-fixed-length-int-vselect.ll | 2 +-
...-streaming-mode-fixed-length-ld2-alloca.ll | 2 +-
...reaming-mode-fixed-length-limit-duplane.ll | 2 +-
.../sve-streaming-mode-fixed-length-loads.ll | 2 +-
...-streaming-mode-fixed-length-log-reduce.ll | 2 +-
...mode-fixed-length-masked-gather-scatter.ll | 91 ++---
...eaming-mode-fixed-length-optimize-ptrue.ll | 2 +-
...streaming-mode-fixed-length-permute-rev.ll | 2 +-
...g-mode-fixed-length-permute-zip-uzp-trn.ll | 2 +-
.../sve-streaming-mode-fixed-length-ptest.ll | 2 +-
.../sve-streaming-mode-fixed-length-rev.ll | 2 +-
...e-streaming-mode-fixed-length-sdiv-pow2.ll | 2 +-
...sve-streaming-mode-fixed-length-shuffle.ll | 2 +-
.../sve-streaming-mode-fixed-length-stores.ll | 2 +-
...e-streaming-mode-fixed-length-subvector.ll | 2 +-
...treaming-mode-fixed-length-trunc-stores.ll | 2 +-
.../sve-streaming-mode-fixed-length-trunc.ll | 2 +-
...eaming-mode-fixed-length-vector-shuffle.ll | 2 +-
.../sve-streaming-mode-test-register-mov.ll | 2 +-
56 files changed, 350 insertions(+), 209 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index c790209cc221f..a4fa25ffdd6ff 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1418,7 +1418,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
}
}
- if (Subtarget->hasSVEorSME()) {
+ if (Subtarget->isSVEorStreamingSVEAvailable()) {
for (auto VT : {MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64}) {
setOperationAction(ISD::BITREVERSE, VT, Custom);
setOperationAction(ISD::BSWAP, VT, Custom);
@@ -1528,14 +1528,24 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
}
}
- // NEON doesn't support masked loads/stores/gathers/scatters, but SVE does
- for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, MVT::v1f64,
- MVT::v2f64, MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
- MVT::v2i32, MVT::v4i32, MVT::v1i64, MVT::v2i64}) {
+ // NEON doesn't support masked loads/stores, but SME and SVE do.
+ for (auto VT :
+ {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, MVT::v1f64,
+ MVT::v2f64, MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
+ MVT::v2i32, MVT::v4i32, MVT::v1i64, MVT::v2i64}) {
setOperationAction(ISD::MLOAD, VT, Custom);
setOperationAction(ISD::MSTORE, VT, Custom);
- setOperationAction(ISD::MGATHER, VT, Custom);
- setOperationAction(ISD::MSCATTER, VT, Custom);
+ }
+
+ // NEON doesn't support masked gathers/scatters, but SVE does.
+ if (Subtarget->isSVEAvailable()) {
+ for (auto VT :
+ {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, MVT::v1f64,
+ MVT::v2f64, MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
+ MVT::v2i32, MVT::v4i32, MVT::v1i64, MVT::v2i64}) {
+ setOperationAction(ISD::MGATHER, VT, Custom);
+ setOperationAction(ISD::MSCATTER, VT, Custom);
+ }
}
// Firstly, exclude all scalable vector extending loads/truncating stores,
@@ -6986,7 +6996,7 @@ bool AArch64TargetLowering::useSVEForFixedLengthVectorVT(
// NEON-sized vectors can be emulated using SVE instructions.
if (OverrideNEON && (VT.is128BitVector() || VT.is64BitVector()))
- return Subtarget->hasSVEorSME();
+ return Subtarget->isSVEorStreamingSVEAvailable();
// Ensure NEON MVTs only belong to a single register class.
if (VT.getFixedSizeInBits() <= 128)
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index 7ef7a89b5749f..5e1a370778914 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -185,6 +185,12 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
(hasSMEFA64() || (!isStreaming() && !isStreamingCompatible()));
}
+ /// Returns true if the target has access to either the full range of SVE instructions,
+ /// or the streaming-compatible subset of SVE instructions.
+ bool isSVEorStreamingSVEAvailable() const {
+ return hasSVE() || hasSMEFA64() || (hasSME() && isStreaming());
+ }
+
unsigned getMinVectorRegisterBitWidth() const {
// Don't assume any minimum vector size when PSTATE.SM may not be 0, because
// we don't yet support streaming-compatible codegen support that we trust
@@ -374,11 +380,11 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
}
bool useSVEForFixedLengthVectors() const {
- if (!isNeonAvailable())
- return hasSVEorSME();
+ if (!isSVEorStreamingSVEAvailable())
+ return false;
// Prefer NEON unless larger SVE registers are available.
- return hasSVEorSME() && getMinSVEVectorSizeInBits() >= 256;
+ return !isNeonAvailable() || getMinSVEVectorSizeInBits() >= 256;
}
bool useSVEForFixedLengthVectors(EVT VT) const {
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index 7de9071476e7f..f94fa037a42c4 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -187,6 +187,11 @@ static cl::opt<unsigned> SVEVectorBitsMinOpt(
"with zero meaning no minimum size is assumed."),
cl::init(0), cl::Hidden);
+static cl::opt<bool> ForceStreaming(
+ "force-streaming",
+ cl::desc("Force the use of streaming code for all functions"),
+ cl::init(false), cl::Hidden);
+
static cl::opt<bool> ForceStreamingCompatible(
"force-streaming-compatible",
cl::desc("Force the use of streaming-compatible code for all functions"),
@@ -412,7 +417,8 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
StringRef FS = FSAttr.isValid() ? FSAttr.getValueAsString() : TargetFS;
bool HasMinSize = F.hasMinSize();
- bool IsStreaming = F.hasFnAttribute("aarch64_pstate_sm_enabled") ||
+ bool IsStreaming = ForceStreaming ||
+ F.hasFnAttribute("aarch64_pstate_sm_enabled") ||
F.hasFnAttribute("aarch64_pstate_sm_body");
bool IsStreamingCompatible =
F.hasFnAttribute("aarch64_pstate_sm_compatible") ||
diff --git a/llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll b/llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll
index 9c72afd84fa7c..cdf2a962f9322 100644
--- a/llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll
+++ b/llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s --check-prefixes=CHECK,NONSTREAMING
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,STREAMING
; WITH VSCALE RANGE
@@ -362,145 +362,261 @@ define i32 @add_i32_ctz_nxv16i1_poison(<vscale x 16 x i1> %a, i32 %b) {
; FIXED-WIDTH VECTOR TYPES
define i32 @ctz_v16i1(<16 x i1> %a) {
-; CHECK-LABEL: ctz_v16i1:
-; CHECK: // %bb.0:
-; CHECK-NEXT: shl v0.16b, v0.16b, #7
-; CHECK-NEXT: ptrue p0.b, vl16
-; CHECK-NEXT: ptrue p1.b
-; CHECK-NEXT: cmlt v0.16b, v0.16b, #0
-; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0
-; CHECK-NEXT: brkb p0.b, p1/z, p0.b
-; CHECK-NEXT: cntp x0, p0, p0.b
-; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
-; CHECK-NEXT: ret
+; NONSTREAMING-LABEL: ctz_v16i1:
+; NONSTREAMING: // %bb.0:
+; NONSTREAMING-NEXT: shl v0.16b, v0.16b, #7
+; NONSTREAMING-NEXT: ptrue p0.b, vl16
+; NONSTREAMING-NEXT: ptrue p1.b
+; NONSTREAMING-NEXT: cmlt v0.16b, v0.16b, #0
+; NONSTREAMING-NEXT: cmpne p0.b, p0/z, z0.b, #0
+; NONSTREAMING-NEXT: brkb p0.b, p1/z, p0.b
+; NONSTREAMING-NEXT: cntp x0, p0, p0.b
+; NONSTREAMING-NEXT: // kill: def $w0 killed $w0 killed $x0
+; NONSTREAMING-NEXT: ret
+;
+; STREAMING-LABEL: ctz_v16i1:
+; STREAMING: // %bb.0:
+; STREAMING-NEXT: // kill: def $q0 killed $q0 def $z0
+; STREAMING-NEXT: ptrue p0.b, vl16
+; STREAMING-NEXT: lsl z0.b, z0.b, #7
+; STREAMING-NEXT: ptrue p1.b
+; STREAMING-NEXT: asr z0.b, z0.b, #7
+; STREAMING-NEXT: cmpne p0.b, p0/z, z0.b, #0
+; STREAMING-NEXT: brkb p0.b, p1/z, p0.b
+; STREAMING-NEXT: cntp x0, p0, p0.b
+; STREAMING-NEXT: // kill: def $w0 killed $w0 killed $x0
+; STREAMING-NEXT: ret
%res = call i32 @llvm.experimental.cttz.elts.i32.v16i1(<16 x i1> %a, i1 0)
ret i32 %res
}
define i32 @ctz_v16i1_poison(<16 x i1> %a) {
-; CHECK-LABEL: ctz_v16i1_poison:
-; CHECK: // %bb.0:
-; CHECK-NEXT: shl v0.16b, v0.16b, #7
-; CHECK-NEXT: ptrue p0.b, vl16
-; CHECK-NEXT: ptrue p1.b
-; CHECK-NEXT: cmlt v0.16b, v0.16b, #0
-; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0
-; CHECK-NEXT: brkb p0.b, p1/z, p0.b
-; CHECK-NEXT: cntp x0, p0, p0.b
-; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
-; CHECK-NEXT: ret
+; NONSTREAMING-LABEL: ctz_v16i1_poison:
+; NONSTREAMING: // %bb.0:
+; NONSTREAMING-NEXT: shl v0.16b, v0.16b, #7
+; NONSTREAMING-NEXT: ptrue p0.b, vl16
+; NONSTREAMING-NEXT: ptrue p1.b
+; NONSTREAMING-NEXT: cmlt v0.16b, v0.16b, #0
+; NONSTREAMING-NEXT: cmpne p0.b, p0/z, z0.b, #0
+; NONSTREAMING-NEXT: brkb p0.b, p1/z, p0.b
+; NONSTREAMING-NEXT: cntp x0, p0, p0.b
+; NONSTREAMING-NEXT: // kill: def $w0 killed $w0 killed $x0
+; NONSTREAMING-NEXT: ret
+;
+; STREAMING-LABEL: ctz_v16i1_poison:
+; STREAMING: // %bb.0:
+; STREAMING-NEXT: // kill: def $q0 killed $q0 def $z0
+; STREAMING-NEXT: ptrue p0.b, vl16
+; STREAMING-NEXT: lsl z0.b, z0.b, #7
+; STREAMING-NEXT: ptrue p1.b
+; STREAMING-NEXT: asr z0.b, z0.b, #7
+; STREAMING-NEXT: cmpne p0.b, p0/z, z0.b, #0
+; STREAMING-NEXT: brkb p0.b, p1/z, p0.b
+; STREAMING-NEXT: cntp x0, p0, p0.b
+; STREAMING-NEXT: // kill: def $w0 killed $w0 killed $x0
+; STREAMING-NEXT: ret
%res = call i32 @llvm.experimental.cttz.elts.i32.v16i1(<16 x i1> %a, i1 1)
ret i32 %res
}
define i64 @add_i64_ctz_v16i1_poison(<16 x i1> %a, i64 %b) {
-; CHECK-LABEL: add_i64_ctz_v16i1_poison:
-; CHECK: // %bb.0:
-; CHECK-NEXT: shl v0.16b, v0.16b, #7
-; CHECK-NEXT: ptrue p0.b, vl16
-; CHECK-NEXT: ptrue p1.b
-; CHECK-NEXT: cmlt v0.16b, v0.16b, #0
-; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0
-; CHECK-NEXT: brkb p0.b, p1/z, p0.b
-; CHECK-NEXT: incp x0, p0.b
-; CHECK-NEXT: ret
+; NONSTREAMING-LABEL: add_i64_ctz_v16i1_poison:
+; NONSTREAMING: // %bb.0:
+; NONSTREAMING-NEXT: shl v0.16b, v0.16b, #7
+; NONSTREAMING-NEXT: ptrue p0.b, vl16
+; NONSTREAMING-NEXT: ptrue p1.b
+; NONSTREAMING-NEXT: cmlt v0.16b, v0.16b, #0
+; NONSTREAMING-NEXT: cmpne p0.b, p0/z, z0.b, #0
+; NONSTREAMING-NEXT: brkb p0.b, p1/z, p0.b
+; NONSTREAMING-NEXT: incp x0, p0.b
+; NONSTREAMING-NEXT: ret
+;
+; STREAMING-LABEL: add_i64_ctz_v16i1_poison:
+; STREAMING: // %bb.0:
+; STREAMING-NEXT: // kill: def $q0 killed $q0 def $z0
+; STREAMING-NEXT: ptrue p0.b, vl16
+; STREAMING-NEXT: lsl z0.b, z0.b, #7
+; STREAMING-NEXT: ptrue p1.b
+; STREAMING-NEXT: asr z0.b, z0.b, #7
+; STREAMING-NEXT: cmpne p0.b, p0/z, z0.b, #0
+; STREAMING-NEXT: brkb p0.b, p1/z, p0.b
+; STREAMING-NEXT: incp x0, p0.b
+; STREAMING-NEXT: ret
%res = call i64 @llvm.experimental.cttz.elts.i64.v16i1(<16 x i1> %a, i1 1)
%add = add i64 %res, %b
ret i64 %add
}
define i32 @ctz_v8i1(<8 x i1> %a) {
-; CHECK-LABEL: ctz_v8i1:
-; CHECK: // %bb.0:
-; CHECK-NEXT: shl v0.8b, v0.8b, #7
-; CHECK-NEXT: ptrue p0.b, vl8
-; CHECK-NEXT: ptrue p1.b
-; CHECK-NEXT: cmlt v0.8b, v0.8b, #0
-; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0
-; CHECK-NEXT: brkb p0.b, p1/z, p0.b
-; CHECK-NEXT: cntp x0, p0, p0.b
-; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
-; CHECK-NEXT: ret
+; NONSTREAMING-LABEL: ctz_v8i1:
+; NONSTREAMING: // %bb.0:
+; NONSTREAMING-NEXT: shl v0.8b, v0.8b, #7
+; NONSTREAMING-NEXT: ptrue p0.b, vl8
+; NONSTREAMING-NEXT: ptrue p1.b
+; NONSTREAMING-NEXT: cmlt v0.8b, v0.8b, #0
+; NONSTREAMING-NEXT: cmpne p0.b, p0/z, z0.b, #0
+; NONSTREAMING-NEXT: brkb p0.b, p1/z, p0.b
+; NONSTREAMING-NEXT: cntp x0, p0, p0.b
+; NONSTREAMING-NEXT: // kill: def $w0 killed $w0 killed $x0
+; NONSTREAMING-NEXT: ret
+;
+; STREAMING-LABEL: ctz_v8i1:
+; STREAMING: // %bb.0:
+; STREAMING-NEXT: // kill: def $d0 killed $d0 def $z0
+; STREAMING-NEXT: ptrue p0.b, vl8
+; STREAMING-NEXT: lsl z0.b, z0.b, #7
+; STREAMING-NEXT: ptrue p1.b
+; STREAMING-NEXT: asr z0.b, z0.b, #7
+; STREAMING-NEXT: cmpne p0.b, p0/z, z0.b, #0
+; STREAMING-NEXT: brkb p0.b, p1/z, p0.b
+; STREAMING-NEXT: cntp x0, p0, p0.b
+; STREAMING-NEXT: // kill: def $w0 killed $w0 killed $x0
+; STREAMING-NEXT: ret
%res = call i32 @llvm.experimental.cttz.elts.i32.v8i1(<8 x i1> %a, i1 0)
ret i32 %res
}
define i32 @ctz_v8i1_poison(<8 x i1> %a) {
-; CHECK-LABEL: ctz_v8i1_poison:
-; CHECK: // %bb.0:
-; CHECK-NEXT: shl v0.8b, v0.8b, #7
-; CHECK-NEXT: ptrue p0.b, vl8
-; CHECK-NEXT: ptrue p1.b
-; CHECK-NEXT: cmlt v0.8b, v0.8b, #0
-; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0
-; CHECK-NEXT: brkb p0.b, p1/z, p0.b
-; CHECK-NEXT: cntp x0, p0, p0.b
-; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
-; CHECK-NEXT: ret
+; NONSTREAMING-LABEL: ctz_v8i1_poison:
+; NONSTREAMING: // %bb.0:
+; NONSTREAMING-NEXT: shl v0.8b, v0.8b, #7
+; NONSTREAMING-NEXT: ptrue p0.b, vl8
+; NONSTREAMING-NEXT: ptrue p1.b
+; NONSTREAMING-NEXT: cmlt v0.8b, v0.8b, #0
+; NONSTREAMING-NEXT: cmpne p0.b, p0/z, z0.b, #0
+; NONSTREAMING-NEXT: brkb p0.b, p1/z, p0.b
+; NONSTREAMING-NEXT: cntp x0, p0, p0.b
+; NONSTREAMING-NEXT: // kill: def $w0 killed $w0 killed $x0
+; NONSTREAMING-NEXT: ret
+;
+; STREAMING-LABEL: ctz_v8i1_poison:
+; STREAMING: // %bb.0:
+; STREAMING-NEXT: // kill: def $d0 killed $d0 def $z0
+; STREAMING-NEXT: ptrue p0.b, vl8
+; STREAMING-NEXT: lsl z0.b, z0.b, #7
+; STREAMING-NEXT: ptrue p1.b
+; STREAMING-NEXT: asr z0.b, z0.b, #7
+; STREAMING-NEXT: cmpne p0.b, p0/z, z0.b, #0
+; STREAMING-NEXT: brkb p0.b, p1/z, p0.b
+; STREAMING-NEXT: cntp x0, p0, p0.b
+; STREAMING-NEXT: // kill: def $w0 killed $w0 killed $x0
+; STREAMING-NEXT: ret
%res = call i32 @llvm.experimental.cttz.elts.i32.v8i1(<8 x i1> %a, i1 1)
ret i32 %res
}
define i32 @ctz_v4i1(<4 x i1> %a) {
-; CHECK-LABEL: ctz_v4i1:
-; CHECK: // %bb.0:
-; CHECK-NEXT: shl v0.4h, v0.4h, #15
-; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: ptrue p1.h
-; CHECK-NEXT: cmlt v0.4h, v0.4h, #0
-; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0
-; CHECK-NEXT: brkb p0.b, p1/z, p0.b
-; CHECK-NEXT: cntp x0, p0, p0.h
-; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
-; CHECK-NEXT: ret
+; NONSTREAMING-LABEL: ctz_v4i1:
+; NONSTREAMING: // %bb.0:
+; NONSTREAMING-NEXT: shl v0.4h, v0.4h, #15
+; NONSTREAMING-NEXT: ptrue p0.h, vl4
+; NONSTREAMING-NEXT: ptrue p1.h
+; NONSTREAMING-NEXT: cmlt v0.4h, v0.4h, #0
+; NONSTREAMING-NEXT: cmpne p0.h, p0/z, z0.h, #0
+; NONSTREAMING-NEXT: brkb p0.b, p1/z, p0.b
+; NONSTREAMING-NEXT: cntp x0, p0, p0.h
+; NONSTREAMING-NEXT: // kill: def $w0 killed $w0 killed $x0
+; NONSTREAMING-NEXT: ret
+;
+; STREAMING-LABEL: ctz_v4i1:
+; STREAMING: // %bb.0:
+; STREAMING-NEXT: // kill: def $d0 killed $d0 def $z0
+; STREAMING-NEXT: ptrue p0.h, vl4
+; STREAMING-NEXT: lsl z0.h, z0.h, #15
+; STREAMING-NEXT: ptrue p1.h
+; STREAMING-NEXT: asr z0.h, z0.h, #15
+; STREAMING-NEXT: cmpne p0.h, p0/z, z0.h, #0
+; STREAMING-NEXT: brkb p0.b, p1/z, p0.b
+; STREAMING-NEXT: cntp x0, p0, p0.h
+; STREAMING-NEXT: // kill: def $w0 killed $w0 killed $x0
+; STREAMING-NEXT: ret
%res = call i32 @llvm.experimental.cttz.elts.i32.v4i1(<4 x i1> %a, i1 0)
ret i32 %res
}
define i32 @ctz_v4i1_poison(<4 x i1> %a) {
-; CHECK-LABEL: ctz_v4i1_poison:
-; CHECK: // %bb.0:
-; CHECK-NEXT: shl v0.4h, v0.4h, #15
-; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: ptrue p1.h
-; CHECK-NEXT: cmlt v0.4h, v0.4h, #0
-; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0
-; CHECK-NEXT: brkb p0.b, p1/z, p0.b
-; CHECK-NEXT: cntp x0, p0, p0.h
-; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
-; CHECK-NEXT: ret
+; NONSTREAMING-LABEL: ctz_v4i1_poison:
+; NONSTREAMING: // %bb.0:
+; NONSTREAMING-NEXT: shl v0.4h, v0.4h, #15
+; NONSTREAMING-NEXT: ptrue p0.h, vl4
+; NONSTREAMING-NEXT: ptrue p1.h
+; NONSTREAMING-NEXT: cmlt v0.4h, v0.4h, #0
+; NONSTREAMING-NEXT: cmpne p0.h, p0/z, z0.h, #0
+; NONSTREAMING-NEXT: brkb p0.b, p1/z, p0.b
+; NONSTREAMING-NEXT: cntp x0, p0, p0.h
+; NONSTREAMING-NEXT: // kill: def $w0 killed $w0 killed $x0
+; NONSTREAMING-NEXT: ret
+;
+; STREAMING-LABEL: ctz_v4i1_poison:
+; STREAMING: // %bb.0:
+; STREAMING-NEXT: // kill: def $d0 killed $d0 def $z0
+; STREAMING-NEXT: ptrue p0.h, vl4
+; STREAMING-NEXT: lsl z0.h, z0.h, #15
+; STREAMING-NEXT: ptrue p1.h
+; STREAMING-NEXT: asr z0.h, z0.h, #15
+; STREAMING-NEXT: cmpne p0.h, p0/z, z0.h, #0
+; STREAMING-NEXT: brkb p0.b, p1/z, p0.b
+; STREAMING-NEXT: cntp x0, p0, p0.h
+; STREAMING-NEXT: // kill: def $w0 killed $w0 killed $x0
+; STREAMING-NEXT: ret
%res = call i32 @llvm.experimental.cttz.elts.i32.v4i1(<4 x i1> %a, i1 1)
ret i32 %res
}
define i32 @ctz_v2i1(<2 x i1> %a) {
-; CHECK-LABEL: ctz_v2i1:
-; CHECK: // %bb.0:
-; CHECK-NEXT: shl v0.2s, v0.2s, #31
-; CHECK-NEXT: ptrue p0.s, vl2
-; CHECK-NEXT: ptrue p1.s
-; CHECK-NEXT: cmlt v0.2s, v0.2s, #0
-; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0
-; CHECK-NEXT: brkb p0.b, p1/z, p0.b
-; CHECK-NEXT: cntp x0, p0, p0.s
-; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
-; CHECK-NEXT: ret
+; NONSTREAMING-LABEL: ctz_v2i1:
+; NONSTREAMING: // %bb.0:
+; NONSTREAMING-NEXT: shl v0.2s, v0.2s, #31
+; NONSTREAMING-NEXT: ptrue p0.s, vl2
+; NONSTREAMING-NEXT: ptrue p1.s
+; NONSTREAMING-NEXT: cmlt v0.2s, v0.2s, #0
+; NONSTREAMING-NEXT: cmpne p0.s, p0/z, z0.s, #0
+; NONSTREAMING-NEXT: brkb p0.b, p1/z, p0.b
+; NONSTREAMING-NEXT: cntp x0, p0, p0.s
+; NONSTREAMING-NEXT: // kill: def $w0 killed $w0 killed $x0
+; NONSTREAMING-NEXT: ret
+;
+; STREAMING-LABEL: ctz_v2i1:
+; STREAMING: // %bb.0:
+; STREAMING-NEXT: // kill: def $d0 killed $d0 def $z0
+; STREAMING-NEXT: ptrue p0.s, vl2
+; STREAMING-NEXT: lsl z0.s, z0.s, #31
+; STREAMING-NEXT: ptrue p1.s
+; STREAMING-NEXT: asr z0.s, z0.s, #31
+; STREAMING-NEXT: cmpne p0.s, p0/z, z0.s, #0
+; STREAMING-NEXT: brkb p0.b, p1/z, p0.b
+; STREAMING-NEXT: cntp x0, p0, p0.s
+; STREAMING-NEXT: // kill: def $w0 killed $w0 killed $x0
+; STREAMING-NEXT: ret
%res = call i32 @llvm.experimental.cttz.elts.i32.v2i1(<2 x i1> %a, i1 0)
ret i32 %res
}
define i32 @ctz_v2i1_poison(<2 x i1> %a) {
-; CHECK-LABEL: ctz_v2i1_poison:
-; CHECK: // %bb.0:
-; CHECK-NEXT: shl v0.2s, v0.2s, #31
-; CHECK-NEXT: ptrue p0.s, vl2
-; CHECK-NEXT: ptrue p1.s
-; CHECK-NEXT: cmlt v0.2s, v0.2s, #0
-; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0
-; CHECK-NEXT: brkb p0.b, p1/z, p0.b
-; CHECK-NEXT: cntp x0, p0, p0.s
-; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
-; CHECK-NEXT: ret
+; NONSTREAMING-LABEL: ctz_v2i1_poison:
+; NONSTREAMING: // %bb.0:
+; NONSTREAMING-NEXT: shl v0.2s, v0.2s, #31
+; NONSTREAMING-NEXT: ptrue p0.s, vl2
+; NONSTREAMING-NEXT: ptrue p1.s
+; NONSTREAMING-NEXT: cmlt v0.2s, v0.2s, #0
+; NONSTREAMING-NEXT: cmpne p0.s, p0/z, z0.s, #0
+; NONSTREAMING-NEXT: brkb p0.b, p1/z, p0.b
+; NONSTREAMING-NEXT: cntp x0, p0, p0.s
+; NONSTREAMING-NEXT: // kill: def $w0 killed $w0 killed $x0
+; NONSTREAMING-NEXT: ret
+;
+; STREAMING-LABEL: ctz_v2i1_poison:
+; STREAMING: // %bb.0:
+; STREAMING-NEXT: // kill: def $d0 killed $d0 def $z0
+; STREAMING-NEXT: ptrue p0.s, vl2
+; STREAMING-NEXT: lsl z0.s, z0.s, #31
+; STREAMING-NEXT: ptrue p1.s
+; STREAMING-NEXT: asr z0.s, z0.s, #31
+; STREAMING-NEXT: cmpne p0.s, p0/z, z0.s, #0
+; STREAMING-NEXT: brkb p0.b, p1/z, p0.b
+; STREAMING-NEXT: cntp x0, p0, p0.s
+; STREAMING-NEXT: // kill: def $w0 killed $w0 killed $x0
+; STREAMING-NEXT: ret
%res = call i32 @llvm.experimental.cttz.elts.i32.v2i1(<2 x i1> %a, i1 1)
ret i32 %res
}
diff --git a/llvm/test/CodeGen/AArch64/sme-intrinsics-mova-extract.ll b/llvm/test/CodeGen/AArch64/sme-intrinsics-mova-extract.ll
index 25f3540766618..48fbd14bd8540 100644
--- a/llvm/test/CodeGen/AArch64/sme-intrinsics-mova-extract.ll
+++ b/llvm/test/CodeGen/AArch64/sme-intrinsics-mova-extract.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -force-streaming -verify-machineinstrs < %s | FileCheck %s
define <vscale x 16 x i8> @extract_row_b(<vscale x 16 x i8> %zd, <vscale x 16 x i1> %pg, i32 %tileslice) {
; CHECK-LABEL: extract_row_b:
diff --git a/llvm/test/CodeGen/AArch64/sme-intrinsics-mova-insert.ll b/llvm/test/CodeGen/AArch64/sme-intrinsics-mova-insert.ll
index 15c33e343c48f..8711a0388e34c 100644
--- a/llvm/test/CodeGen/AArch64/sme-intrinsics-mova-insert.ll
+++ b/llvm/test/CodeGen/AArch64/sme-intrinsics-mova-insert.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -force-streaming -verify-machineinstrs < %s | FileCheck %s
define void @insert_row_b(i32 %tileslice, <vscale x 16 x i1> %pg,
; CHECK-LABEL: insert_row_b:
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-and-combine.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-and-combine.ll
index 4cdb175f55c9c..617b560713c3a 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-and-combine.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-and-combine.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll
index f662140327135..2a83f13251d76 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitcast.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitcast.ll
index 41065b3602003..b9264ad5f77c3 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitcast.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitcast.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll
index d65e87d5b9756..d3c446c9904b2 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64"
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-build-vector.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-build-vector.ll
index 318a9cf7d738b..b8a2e0e0f4bd4 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-build-vector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-build-vector.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll
index a845c3cbdc2b6..4b6285b2732fe 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll
index 2cdd4374a56c5..50a05cb4b1e27 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll
index b7b34cfa1517c..2665696308463 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-vector-elt.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-vector-elt.ll
index 0a1831a94d8fe..cf308e6c4395f 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-vector-elt.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-vector-elt.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll
index a8d01ec7ce0b4..dad53b31db0b0 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE
; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE2
-; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE2
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,SVE2
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-arith.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-arith.ll
index e84acfc8504a9..f2c882c370eab 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-arith.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-arith.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-compares.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-compares.ll
index 776b6918923ae..200ffb60a7928 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-compares.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-compares.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-convert.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-convert.ll
index 2c08977320e84..5f6b60a767f9d 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-convert.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-convert.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll
index 9878910763a75..c96189b960268 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-minmax.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-minmax.ll
index 775cac272cde9..84aea185917fa 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-minmax.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-minmax.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-rounding.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-rounding.ll
index 344aac5b19838..03bc39a6ef3ee 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-rounding.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-rounding.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-select.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-select.ll
index daa9b51cc827b..bcc446d9d1a41 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-select.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-select.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll
index 69661049bcb6f..035c76b569298 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-arith.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-arith.ll
index a2875ffef2e88..4360f3a12014a 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-arith.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-arith.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE
; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE2
-; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE2
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,SVE2
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-compares.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-compares.ll
index 0b4316686fff6..ba20de65a253a 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-compares.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-compares.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll
index 43c67382c9d82..ecc44927526e8 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll
@@ -1,9 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE
; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE2
-; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE2
-; FIXME: We shouldn't ever be emitting any SVE instructions when +sme is set but the function is not in streaming mode.
-; RUN: llc -mattr=+sme < %s | FileCheck %s --check-prefixes=NEON-NOSVE
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,SVE2
+; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s --check-prefixes=NONEON-NOSVE
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll
index d3ac1445e1086..25a6ea490c163 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE
; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE2
-; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE2
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,SVE2
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-immediates.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-immediates.ll
index 1f5bb5f5486af..1335bb769821f 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-immediates.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-immediates.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-log.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-log.ll
index 3137a7bc7ad27..687dd9445f387 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-log.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-log.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-minmax.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-minmax.ll
index 4775a965b70d7..1bca7dd09d9b7 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-minmax.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-minmax.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mul.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mul.ll
index ba479fc3bbe2d..05869281eb59f 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mul.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mul.ll
@@ -1,7 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s --check-prefix=SVE2
-; FIXME: We shouldn't ever be emitting any SVE instructions when +sme is set but the function is not in streaming mode.
-; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s --check-prefix=SVE2
+; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll
index 6198926c0b438..b0fdce9a93bd3 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE
; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE2
-; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE2
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,SVE2
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
; This test only tests the legal types for a given vector width, as mulh nodes
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-reduce.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-reduce.ll
index 7bdb4599707b0..92a67cba55f7a 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-reduce.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-reduce.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll
index dd6d2dcacd616..b4641172f8b06 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-select.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-select.ll
index 5cee1360f6f3c..4ac156c42fda0 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-select.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-select.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll
index 2778e93416a74..d0f99211e80fc 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll
index af15d5f67ad15..41eb731fd66df 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll
index 66d544d0acbf5..270f05a806b82 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-limit-duplane.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-limit-duplane.ll
index 3b83f982b6bfc..3627390b5edfa 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-limit-duplane.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-limit-duplane.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-loads.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-loads.ll
index c97a3c2e721a3..504db6df18ee5 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-loads.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-loads.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-log-reduce.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-log-reduce.ll
index 9e1edb817c459..d4565c4b69c77 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-log-reduce.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-log-reduce.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-gather-scatter.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-gather-scatter.ll
index 75c4536c97306..8ca598902b94c 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-gather-scatter.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-gather-scatter.ll
@@ -1,6 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve < %s | FileCheck %s --check-prefix=SVE
-; FIXME: We shouldn't ever be emitting any SVE instructions when +sme is set but the function is not in streaming mode.
; RUN: llc -mattr=+sme < %s | FileCheck %s --check-prefix=SME
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-SVE-NOGATHER
; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
@@ -87,24 +86,27 @@ define <2 x i64> @masked_gather_v2i64(ptr %a, ptr %b) vscale_range(2, 2) {
;
; NONEON-NOSVE-LABEL: masked_gather_v2i64:
; NONEON-NOSVE: // %bb.0:
-; NONEON-NOSVE-NEXT: sub sp, sp, #16
-; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
-; NONEON-NOSVE-NEXT: ptrue p0.d, vl2
+; NONEON-NOSVE-NEXT: sub sp, sp, #144
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 144
; NONEON-NOSVE-NEXT: ldr q0, [x0]
-; NONEON-NOSVE-NEXT: cmpeq p1.d, p0/z, z0.d, #0
-; NONEON-NOSVE-NEXT: index z0.d, #1, #1
-; NONEON-NOSVE-NEXT: mov z1.d, p1/z, #-1 // =0xffffffffffffffff
-; NONEON-NOSVE-NEXT: and z0.d, z1.d, z0.d
; NONEON-NOSVE-NEXT: ldr q1, [x1]
-; NONEON-NOSVE-NEXT: uaddv d0, p0, z0.d
-; NONEON-NOSVE-NEXT: ptrue p0.d
-; NONEON-NOSVE-NEXT: fmov x8, d0
-; NONEON-NOSVE-NEXT: strb w8, [sp, #12]
+; NONEON-NOSVE-NEXT: mov w8, #2 // =0x2
+; NONEON-NOSVE-NEXT: str q0, [sp, #112]
+; NONEON-NOSVE-NEXT: ldp x10, x9, [sp, #112]
+; NONEON-NOSVE-NEXT: cmp x9, #0
+; NONEON-NOSVE-NEXT: csel x8, x8, xzr, eq
+; NONEON-NOSVE-NEXT: cmp x10, #0
+; NONEON-NOSVE-NEXT: csetm x9, eq
+; NONEON-NOSVE-NEXT: sub w8, w8, w9
+; NONEON-NOSVE-NEXT: strb w8, [sp, #140]
; NONEON-NOSVE-NEXT: and w8, w8, #0xff
; NONEON-NOSVE-NEXT: tbz w8, #0, .LBB0_2
; NONEON-NOSVE-NEXT: // %bb.1: // %cond.load
-; NONEON-NOSVE-NEXT: fmov x9, d1
-; NONEON-NOSVE-NEXT: ld1rd { z0.d }, p0/z, [x9]
+; NONEON-NOSVE-NEXT: str q1, [sp, #96]
+; NONEON-NOSVE-NEXT: ldr x9, [sp, #96]
+; NONEON-NOSVE-NEXT: ldr x9, [x9]
+; NONEON-NOSVE-NEXT: str x9, [sp, #80]
+; NONEON-NOSVE-NEXT: ldr q0, [sp, #80]
; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB0_3
; NONEON-NOSVE-NEXT: b .LBB0_4
; NONEON-NOSVE-NEXT: .LBB0_2:
@@ -112,17 +114,19 @@ define <2 x i64> @masked_gather_v2i64(ptr %a, ptr %b) vscale_range(2, 2) {
; NONEON-NOSVE-NEXT: ldr q0, [x9, :lo12:.LCPI0_0]
; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB0_4
; NONEON-NOSVE-NEXT: .LBB0_3: // %cond.load1
-; NONEON-NOSVE-NEXT: mov w8, #1 // =0x1
-; NONEON-NOSVE-NEXT: index z2.d, #0, #1
-; NONEON-NOSVE-NEXT: mov z1.d, z1.d[1]
-; NONEON-NOSVE-NEXT: mov z3.d, x8
-; NONEON-NOSVE-NEXT: fmov x8, d1
-; NONEON-NOSVE-NEXT: cmpeq p0.d, p0/z, z2.d, z3.d
+; NONEON-NOSVE-NEXT: str q1, [sp, #64]
+; NONEON-NOSVE-NEXT: ldr x8, [sp, #72]
; NONEON-NOSVE-NEXT: ldr x8, [x8]
-; NONEON-NOSVE-NEXT: mov z0.d, p0/m, x8
+; NONEON-NOSVE-NEXT: str q0, [sp]
+; NONEON-NOSVE-NEXT: ldr x9, [sp]
+; NONEON-NOSVE-NEXT: str x8, [sp, #48]
+; NONEON-NOSVE-NEXT: ldr q0, [sp, #48]
+; NONEON-NOSVE-NEXT: str q0, [sp, #16]
+; NONEON-NOSVE-NEXT: ldr x8, [sp, #16]
+; NONEON-NOSVE-NEXT: stp x9, x8, [sp, #32]
+; NONEON-NOSVE-NEXT: ldr q0, [sp, #32]
; NONEON-NOSVE-NEXT: .LBB0_4: // %else2
-; NONEON-NOSVE-NEXT: // kill: def $q0 killed $q0 killed $z0
-; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: add sp, sp, #144
; NONEON-NOSVE-NEXT: ret
%vals = load <2 x i64>, ptr %a
%ptrs = load <2 x ptr>, ptr %b
@@ -203,37 +207,38 @@ define void @masked_scatter_v2i64(ptr %a, ptr %b) vscale_range(2, 2) {
;
; NONEON-NOSVE-LABEL: masked_scatter_v2i64:
; NONEON-NOSVE: // %bb.0:
-; NONEON-NOSVE-NEXT: sub sp, sp, #16
-; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
-; NONEON-NOSVE-NEXT: ptrue p0.d, vl2
-; NONEON-NOSVE-NEXT: ldr q0, [x0]
-; NONEON-NOSVE-NEXT: index z1.d, #1, #1
-; NONEON-NOSVE-NEXT: cmpeq p1.d, p0/z, z0.d, #0
-; NONEON-NOSVE-NEXT: mov z2.d, p1/z, #-1 // =0xffffffffffffffff
-; NONEON-NOSVE-NEXT: and z1.d, z2.d, z1.d
-; NONEON-NOSVE-NEXT: uaddv d1, p0, z1.d
-; NONEON-NOSVE-NEXT: fmov x8, d1
-; NONEON-NOSVE-NEXT: ldr q1, [x1]
-; NONEON-NOSVE-NEXT: strb w8, [sp, #12]
+; NONEON-NOSVE-NEXT: sub sp, sp, #96
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
+; NONEON-NOSVE-NEXT: ldr q1, [x0]
+; NONEON-NOSVE-NEXT: ldr q0, [x1]
+; NONEON-NOSVE-NEXT: mov w8, #2 // =0x2
+; NONEON-NOSVE-NEXT: str q1, [sp, #64]
+; NONEON-NOSVE-NEXT: ldp x10, x9, [sp, #64]
+; NONEON-NOSVE-NEXT: cmp x9, #0
+; NONEON-NOSVE-NEXT: csel x8, x8, xzr, eq
+; NONEON-NOSVE-NEXT: cmp x10, #0
+; NONEON-NOSVE-NEXT: csetm x9, eq
+; NONEON-NOSVE-NEXT: sub w8, w8, w9
+; NONEON-NOSVE-NEXT: strb w8, [sp, #92]
; NONEON-NOSVE-NEXT: and w8, w8, #0xff
; NONEON-NOSVE-NEXT: tbnz w8, #0, .LBB1_3
; NONEON-NOSVE-NEXT: // %bb.1: // %else
; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB1_4
; NONEON-NOSVE-NEXT: .LBB1_2: // %else2
-; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: add sp, sp, #96
; NONEON-NOSVE-NEXT: ret
; NONEON-NOSVE-NEXT: .LBB1_3: // %cond.store
-; NONEON-NOSVE-NEXT: fmov x9, d0
-; NONEON-NOSVE-NEXT: fmov x10, d1
+; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32]
+; NONEON-NOSVE-NEXT: ldr x9, [sp, #32]
+; NONEON-NOSVE-NEXT: ldr x10, [sp, #48]
; NONEON-NOSVE-NEXT: str x9, [x10]
; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB1_2
; NONEON-NOSVE-NEXT: .LBB1_4: // %cond.store1
-; NONEON-NOSVE-NEXT: mov z0.d, z0.d[1]
-; NONEON-NOSVE-NEXT: mov z1.d, z1.d[1]
-; NONEON-NOSVE-NEXT: fmov x8, d0
-; NONEON-NOSVE-NEXT: fmov x9, d1
+; NONEON-NOSVE-NEXT: stp q1, q0, [sp]
+; NONEON-NOSVE-NEXT: ldr x8, [sp, #8]
+; NONEON-NOSVE-NEXT: ldr x9, [sp, #24]
; NONEON-NOSVE-NEXT: str x8, [x9]
-; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: add sp, sp, #96
; NONEON-NOSVE-NEXT: ret
%vals = load <2 x i64>, ptr %a
%ptrs = load <2 x ptr>, ptr %b
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-optimize-ptrue.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-optimize-ptrue.ll
index dbdf5f2502999..431c5a78202e8 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-optimize-ptrue.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-optimize-ptrue.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-rev.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-rev.ll
index 8c23f5f9922da..a33e8537edf4e 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-rev.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-rev.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-zip-uzp-trn.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-zip-uzp-trn.ll
index bc6fdd1ecd5a7..b91f813c5141b 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-zip-uzp-trn.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-zip-uzp-trn.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ptest.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ptest.ll
index 8ebf713a671f4..5235423c00d9a 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ptest.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ptest.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll
index bc0fc7c79391d..c34cae12516ed 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-sdiv-pow2.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-sdiv-pow2.ll
index df019ce2e0ad6..85ba964000234 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-sdiv-pow2.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-sdiv-pow2.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-shuffle.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-shuffle.ll
index b66e6d9013573..c7b2575266d65 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-shuffle.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-shuffle.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-stores.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-stores.ll
index a77ac7832e17c..b8779991dbb45 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-stores.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-stores.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-subvector.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-subvector.ll
index a9f4d92b1e6b6..d022999b856b1 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-subvector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-subvector.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc-stores.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc-stores.ll
index 30682751037fe..c0aa162b19b77 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc-stores.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc-stores.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc.ll
index bc046059f0bd5..77aaeeadcfc2f 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-vector-shuffle.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-vector-shuffle.ll
index 323f5f56a2c08..ea6123edc8b4c 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-vector-shuffle.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-vector-shuffle.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-test-register-mov.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-test-register-mov.ll
index 23adb1a4bc092..9c7a3d5046d0e 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-test-register-mov.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-test-register-mov.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
>From fd116beff8fab54c5be36a6888e1e1f55d03fb31 Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen at arm.com>
Date: Thu, 20 Jun 2024 09:53:42 +0100
Subject: [PATCH 2/2] Address comments
---
.../Target/AArch64/AArch64ISelLowering.cpp | 56 ++---
llvm/lib/Target/AArch64/AArch64Subtarget.h | 2 +-
.../Target/AArch64/AArch64TargetMachine.cpp | 5 +-
...sve-streaming-mode-fixed-length-int-div.ll | 1 -
...sve-streaming-mode-fixed-length-int-mul.ll | 2 +-
...mode-fixed-length-masked-gather-scatter.ll | 223 ++++++------------
6 files changed, 105 insertions(+), 184 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index a4fa25ffdd6ff..2a82e032c6415 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1430,8 +1430,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SINT_TO_FP, VT, Custom);
setOperationAction(ISD::FP_TO_UINT, VT, Custom);
setOperationAction(ISD::FP_TO_SINT, VT, Custom);
- setOperationAction(ISD::MGATHER, VT, Custom);
- setOperationAction(ISD::MSCATTER, VT, Custom);
setOperationAction(ISD::MLOAD, VT, Custom);
setOperationAction(ISD::MUL, VT, Custom);
setOperationAction(ISD::MULHS, VT, Custom);
@@ -1537,17 +1535,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::MSTORE, VT, Custom);
}
- // NEON doesn't support masked gathers/scatters, but SVE does.
- if (Subtarget->isSVEAvailable()) {
- for (auto VT :
- {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, MVT::v1f64,
- MVT::v2f64, MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
- MVT::v2i32, MVT::v4i32, MVT::v1i64, MVT::v2i64}) {
- setOperationAction(ISD::MGATHER, VT, Custom);
- setOperationAction(ISD::MSCATTER, VT, Custom);
- }
- }
-
// Firstly, exclude all scalable vector extending loads/truncating stores,
// include both integer and floating scalable vector.
for (MVT VT : MVT::scalable_vector_valuetypes()) {
@@ -1586,8 +1573,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
MVT::nxv4f32, MVT::nxv2f64}) {
setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
- setOperationAction(ISD::MGATHER, VT, Custom);
- setOperationAction(ISD::MSCATTER, VT, Custom);
setOperationAction(ISD::MLOAD, VT, Custom);
setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
setOperationAction(ISD::SELECT, VT, Custom);
@@ -1621,8 +1606,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMAXIMUM, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMINIMUM, VT, Custom);
- if (Subtarget->isSVEAvailable())
- setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom);
setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom);
@@ -1660,8 +1643,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
for (auto VT : {MVT::nxv2bf16, MVT::nxv4bf16, MVT::nxv8bf16}) {
setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
- setOperationAction(ISD::MGATHER, VT, Custom);
- setOperationAction(ISD::MSCATTER, VT, Custom);
setOperationAction(ISD::MLOAD, VT, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
@@ -1685,18 +1666,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::MUL, MVT::v1i64, Custom);
setOperationAction(ISD::MUL, MVT::v2i64, Custom);
- if (Subtarget->isSVEAvailable()) {
- // NEON doesn't support across-vector reductions, but SVE does.
- for (auto VT :
- {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, MVT::v2f64})
- setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
- }
-
- // Histcnt is SVE2 only
- if (Subtarget->hasSVE2() && Subtarget->isSVEAvailable())
- setOperationAction(ISD::EXPERIMENTAL_VECTOR_HISTOGRAM, MVT::Other,
- Custom);
-
// NOTE: Currently this has to happen after computeRegisterProperties rather
// than the preferred option of combining it with the addRegisterClass call.
if (Subtarget->useSVEForFixedLengthVectors()) {
@@ -1772,6 +1741,31 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::INTRINSIC_WO_CHAIN, VT, Custom);
}
+ // Handle operations that are only available in non-streaming SVE mode.
+ if (Subtarget->isSVEAvailable()) {
+ for (auto VT : {MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64,
+ MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32,
+ MVT::nxv4f32, MVT::nxv2f64, MVT::nxv2bf16, MVT::nxv4bf16,
+ MVT::nxv8bf16, MVT::v4f16, MVT::v8f16, MVT::v2f32,
+ MVT::v4f32, MVT::v1f64, MVT::v2f64, MVT::v8i8,
+ MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
+ MVT::v4i32, MVT::v1i64, MVT::v2i64}) {
+ setOperationAction(ISD::MGATHER, VT, Custom);
+ setOperationAction(ISD::MSCATTER, VT, Custom);
+ }
+
+ for (auto VT : {MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32,
+ MVT::nxv4f32, MVT::nxv2f64, MVT::v4f16, MVT::v8f16,
+ MVT::v2f32, MVT::v4f32, MVT::v2f64})
+ setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
+
+ // Histcnt is SVE2 only
+ if (Subtarget->hasSVE2())
+ setOperationAction(ISD::EXPERIMENTAL_VECTOR_HISTOGRAM, MVT::Other,
+ Custom);
+ }
+
+
if (Subtarget->hasMOPS() && Subtarget->hasMTE()) {
// Only required for llvm.aarch64.mops.memset.tag
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom);
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index 5e1a370778914..0c9352bda7599 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -188,7 +188,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
/// Returns true if the target has access to either the full range of SVE instructions,
/// or the streaming-compatible subset of SVE instructions.
bool isSVEorStreamingSVEAvailable() const {
- return hasSVE() || hasSMEFA64() || (hasSME() && isStreaming());
+ return hasSVE() || (hasSME() && isStreaming());
}
unsigned getMinVectorRegisterBitWidth() const {
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index f94fa037a42c4..8c924e7c937cd 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -420,9 +420,8 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
bool IsStreaming = ForceStreaming ||
F.hasFnAttribute("aarch64_pstate_sm_enabled") ||
F.hasFnAttribute("aarch64_pstate_sm_body");
- bool IsStreamingCompatible =
- F.hasFnAttribute("aarch64_pstate_sm_compatible") ||
- ForceStreamingCompatible;
+ bool IsStreamingCompatible = ForceStreamingCompatible ||
+ F.hasFnAttribute("aarch64_pstate_sm_compatible");
unsigned MinSVEVectorSize = 0;
unsigned MaxSVEVectorSize = 0;
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll
index ecc44927526e8..516772b8ca664 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll
@@ -2,7 +2,6 @@
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE
; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE2
; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,SVE2
-; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s --check-prefixes=NONEON-NOSVE
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mul.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mul.ll
index 05869281eb59f..319fa5c845827 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mul.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mul.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s --check-prefix=SVE2
-; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefix=SVE2
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-gather-scatter.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-gather-scatter.ll
index 8ca598902b94c..c8cea6ebabd48 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-gather-scatter.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-gather-scatter.ll
@@ -1,88 +1,51 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+sve < %s | FileCheck %s --check-prefix=SVE
-; RUN: llc -mattr=+sme < %s | FileCheck %s --check-prefix=SME
-; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-SVE-NOGATHER
-; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
+; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
define <2 x i64> @masked_gather_v2i64(ptr %a, ptr %b) vscale_range(2, 2) {
-; SVE-LABEL: masked_gather_v2i64:
-; SVE: // %bb.0:
-; SVE-NEXT: ldr q0, [x0]
-; SVE-NEXT: ptrue p0.d, vl2
-; SVE-NEXT: cmeq v0.2d, v0.2d, #0
-; SVE-NEXT: cmpne p0.d, p0/z, z0.d, #0
-; SVE-NEXT: ldr q0, [x1]
-; SVE-NEXT: ld1d { z0.d }, p0/z, [z0.d]
-; SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
-; SVE-NEXT: ret
-;
-; SME-LABEL: masked_gather_v2i64:
-; SME: // %bb.0:
-; SME-NEXT: ldr q0, [x0]
-; SME-NEXT: adrp x8, .LCPI0_0
-; SME-NEXT: ldr q1, [x8, :lo12:.LCPI0_0]
-; SME-NEXT: cmeq v0.2d, v0.2d, #0
-; SME-NEXT: and v0.16b, v0.16b, v1.16b
-; SME-NEXT: ldr q1, [x1]
-; SME-NEXT: addp d0, v0.2d
-; SME-NEXT: fmov x8, d0
-; SME-NEXT: // implicit-def: $q0
-; SME-NEXT: tbnz w8, #0, .LBB0_3
-; SME-NEXT: // %bb.1: // %else
-; SME-NEXT: tbnz w8, #1, .LBB0_4
-; SME-NEXT: .LBB0_2: // %else2
-; SME-NEXT: ret
-; SME-NEXT: .LBB0_3: // %cond.load
-; SME-NEXT: fmov x9, d1
-; SME-NEXT: ldr d0, [x9]
-; SME-NEXT: tbz w8, #1, .LBB0_2
-; SME-NEXT: .LBB0_4: // %cond.load1
-; SME-NEXT: mov x8, v1.d[1]
-; SME-NEXT: ld1 { v0.d }[1], [x8]
-; SME-NEXT: ret
-;
-; NONEON-SVE-NOGATHER-LABEL: masked_gather_v2i64:
-; NONEON-SVE-NOGATHER: // %bb.0:
-; NONEON-SVE-NOGATHER-NEXT: sub sp, sp, #16
-; NONEON-SVE-NOGATHER-NEXT: .cfi_def_cfa_offset 16
-; NONEON-SVE-NOGATHER-NEXT: ptrue p0.d, vl2
-; NONEON-SVE-NOGATHER-NEXT: ldr q0, [x0]
-; NONEON-SVE-NOGATHER-NEXT: cmpeq p1.d, p0/z, z0.d, #0
-; NONEON-SVE-NOGATHER-NEXT: index z0.d, #1, #1
-; NONEON-SVE-NOGATHER-NEXT: mov z1.d, p1/z, #-1 // =0xffffffffffffffff
-; NONEON-SVE-NOGATHER-NEXT: and z0.d, z1.d, z0.d
-; NONEON-SVE-NOGATHER-NEXT: ldr q1, [x1]
-; NONEON-SVE-NOGATHER-NEXT: uaddv d0, p0, z0.d
-; NONEON-SVE-NOGATHER-NEXT: ptrue p0.d
-; NONEON-SVE-NOGATHER-NEXT: fmov x8, d0
-; NONEON-SVE-NOGATHER-NEXT: strb w8, [sp, #12]
-; NONEON-SVE-NOGATHER-NEXT: and w8, w8, #0xff
-; NONEON-SVE-NOGATHER-NEXT: tbz w8, #0, .LBB0_2
-; NONEON-SVE-NOGATHER-NEXT: // %bb.1: // %cond.load
-; NONEON-SVE-NOGATHER-NEXT: fmov x9, d1
-; NONEON-SVE-NOGATHER-NEXT: ld1rd { z0.d }, p0/z, [x9]
-; NONEON-SVE-NOGATHER-NEXT: tbnz w8, #1, .LBB0_3
-; NONEON-SVE-NOGATHER-NEXT: b .LBB0_4
-; NONEON-SVE-NOGATHER-NEXT: .LBB0_2:
-; NONEON-SVE-NOGATHER-NEXT: adrp x9, .LCPI0_0
-; NONEON-SVE-NOGATHER-NEXT: ldr q0, [x9, :lo12:.LCPI0_0]
-; NONEON-SVE-NOGATHER-NEXT: tbz w8, #1, .LBB0_4
-; NONEON-SVE-NOGATHER-NEXT: .LBB0_3: // %cond.load1
-; NONEON-SVE-NOGATHER-NEXT: mov w8, #1 // =0x1
-; NONEON-SVE-NOGATHER-NEXT: index z2.d, #0, #1
-; NONEON-SVE-NOGATHER-NEXT: mov z1.d, z1.d[1]
-; NONEON-SVE-NOGATHER-NEXT: mov z3.d, x8
-; NONEON-SVE-NOGATHER-NEXT: fmov x8, d1
-; NONEON-SVE-NOGATHER-NEXT: cmpeq p0.d, p0/z, z2.d, z3.d
-; NONEON-SVE-NOGATHER-NEXT: ldr x8, [x8]
-; NONEON-SVE-NOGATHER-NEXT: mov z0.d, p0/m, x8
-; NONEON-SVE-NOGATHER-NEXT: .LBB0_4: // %else2
-; NONEON-SVE-NOGATHER-NEXT: // kill: def $q0 killed $q0 killed $z0
-; NONEON-SVE-NOGATHER-NEXT: add sp, sp, #16
-; NONEON-SVE-NOGATHER-NEXT: ret
+; CHECK-LABEL: masked_gather_v2i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: ptrue p0.d, vl2
+; CHECK-NEXT: ldr q0, [x0]
+; CHECK-NEXT: cmpeq p1.d, p0/z, z0.d, #0
+; CHECK-NEXT: index z0.d, #1, #1
+; CHECK-NEXT: mov z1.d, p1/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: and z0.d, z1.d, z0.d
+; CHECK-NEXT: ldr q1, [x1]
+; CHECK-NEXT: uaddv d0, p0, z0.d
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: fmov x8, d0
+; CHECK-NEXT: strb w8, [sp, #12]
+; CHECK-NEXT: and w8, w8, #0xff
+; CHECK-NEXT: tbz w8, #0, .LBB0_2
+; CHECK-NEXT: // %bb.1: // %cond.load
+; CHECK-NEXT: fmov x9, d1
+; CHECK-NEXT: ld1rd { z0.d }, p0/z, [x9]
+; CHECK-NEXT: tbnz w8, #1, .LBB0_3
+; CHECK-NEXT: b .LBB0_4
+; CHECK-NEXT: .LBB0_2:
+; CHECK-NEXT: adrp x9, .LCPI0_0
+; CHECK-NEXT: ldr q0, [x9, :lo12:.LCPI0_0]
+; CHECK-NEXT: tbz w8, #1, .LBB0_4
+; CHECK-NEXT: .LBB0_3: // %cond.load1
+; CHECK-NEXT: mov w8, #1 // =0x1
+; CHECK-NEXT: index z2.d, #0, #1
+; CHECK-NEXT: mov z1.d, z1.d[1]
+; CHECK-NEXT: mov z3.d, x8
+; CHECK-NEXT: fmov x8, d1
+; CHECK-NEXT: cmpeq p0.d, p0/z, z2.d, z3.d
+; CHECK-NEXT: ldr x8, [x8]
+; CHECK-NEXT: mov z0.d, p0/m, x8
+; CHECK-NEXT: .LBB0_4: // %else2
+; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT: add sp, sp, #16
+; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: masked_gather_v2i64:
; NONEON-NOSVE: // %bb.0:
@@ -136,74 +99,40 @@ define <2 x i64> @masked_gather_v2i64(ptr %a, ptr %b) vscale_range(2, 2) {
}
define void @masked_scatter_v2i64(ptr %a, ptr %b) vscale_range(2, 2) {
-; SVE-LABEL: masked_scatter_v2i64:
-; SVE: // %bb.0:
-; SVE-NEXT: ldr q0, [x0]
-; SVE-NEXT: ptrue p0.d, vl2
-; SVE-NEXT: cmeq v1.2d, v0.2d, #0
-; SVE-NEXT: cmpne p0.d, p0/z, z1.d, #0
-; SVE-NEXT: ldr q1, [x1]
-; SVE-NEXT: st1d { z0.d }, p0, [z1.d]
-; SVE-NEXT: ret
-;
-; SME-LABEL: masked_scatter_v2i64:
-; SME: // %bb.0:
-; SME-NEXT: ldr q0, [x0]
-; SME-NEXT: adrp x8, .LCPI1_0
-; SME-NEXT: ldr q2, [x8, :lo12:.LCPI1_0]
-; SME-NEXT: cmeq v1.2d, v0.2d, #0
-; SME-NEXT: and v1.16b, v1.16b, v2.16b
-; SME-NEXT: addp d2, v1.2d
-; SME-NEXT: ldr q1, [x1]
-; SME-NEXT: fmov x8, d2
-; SME-NEXT: tbnz w8, #0, .LBB1_3
-; SME-NEXT: // %bb.1: // %else
-; SME-NEXT: tbnz w8, #1, .LBB1_4
-; SME-NEXT: .LBB1_2: // %else2
-; SME-NEXT: ret
-; SME-NEXT: .LBB1_3: // %cond.store
-; SME-NEXT: fmov x9, d1
-; SME-NEXT: str d0, [x9]
-; SME-NEXT: tbz w8, #1, .LBB1_2
-; SME-NEXT: .LBB1_4: // %cond.store1
-; SME-NEXT: mov x8, v1.d[1]
-; SME-NEXT: st1 { v0.d }[1], [x8]
-; SME-NEXT: ret
-;
-; NONEON-SVE-NOGATHER-LABEL: masked_scatter_v2i64:
-; NONEON-SVE-NOGATHER: // %bb.0:
-; NONEON-SVE-NOGATHER-NEXT: sub sp, sp, #16
-; NONEON-SVE-NOGATHER-NEXT: .cfi_def_cfa_offset 16
-; NONEON-SVE-NOGATHER-NEXT: ptrue p0.d, vl2
-; NONEON-SVE-NOGATHER-NEXT: ldr q0, [x0]
-; NONEON-SVE-NOGATHER-NEXT: index z1.d, #1, #1
-; NONEON-SVE-NOGATHER-NEXT: cmpeq p1.d, p0/z, z0.d, #0
-; NONEON-SVE-NOGATHER-NEXT: mov z2.d, p1/z, #-1 // =0xffffffffffffffff
-; NONEON-SVE-NOGATHER-NEXT: and z1.d, z2.d, z1.d
-; NONEON-SVE-NOGATHER-NEXT: uaddv d1, p0, z1.d
-; NONEON-SVE-NOGATHER-NEXT: fmov x8, d1
-; NONEON-SVE-NOGATHER-NEXT: ldr q1, [x1]
-; NONEON-SVE-NOGATHER-NEXT: strb w8, [sp, #12]
-; NONEON-SVE-NOGATHER-NEXT: and w8, w8, #0xff
-; NONEON-SVE-NOGATHER-NEXT: tbnz w8, #0, .LBB1_3
-; NONEON-SVE-NOGATHER-NEXT: // %bb.1: // %else
-; NONEON-SVE-NOGATHER-NEXT: tbnz w8, #1, .LBB1_4
-; NONEON-SVE-NOGATHER-NEXT: .LBB1_2: // %else2
-; NONEON-SVE-NOGATHER-NEXT: add sp, sp, #16
-; NONEON-SVE-NOGATHER-NEXT: ret
-; NONEON-SVE-NOGATHER-NEXT: .LBB1_3: // %cond.store
-; NONEON-SVE-NOGATHER-NEXT: fmov x9, d0
-; NONEON-SVE-NOGATHER-NEXT: fmov x10, d1
-; NONEON-SVE-NOGATHER-NEXT: str x9, [x10]
-; NONEON-SVE-NOGATHER-NEXT: tbz w8, #1, .LBB1_2
-; NONEON-SVE-NOGATHER-NEXT: .LBB1_4: // %cond.store1
-; NONEON-SVE-NOGATHER-NEXT: mov z0.d, z0.d[1]
-; NONEON-SVE-NOGATHER-NEXT: mov z1.d, z1.d[1]
-; NONEON-SVE-NOGATHER-NEXT: fmov x8, d0
-; NONEON-SVE-NOGATHER-NEXT: fmov x9, d1
-; NONEON-SVE-NOGATHER-NEXT: str x8, [x9]
-; NONEON-SVE-NOGATHER-NEXT: add sp, sp, #16
-; NONEON-SVE-NOGATHER-NEXT: ret
+; CHECK-LABEL: masked_scatter_v2i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: ptrue p0.d, vl2
+; CHECK-NEXT: ldr q0, [x0]
+; CHECK-NEXT: index z1.d, #1, #1
+; CHECK-NEXT: cmpeq p1.d, p0/z, z0.d, #0
+; CHECK-NEXT: mov z2.d, p1/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: and z1.d, z2.d, z1.d
+; CHECK-NEXT: uaddv d1, p0, z1.d
+; CHECK-NEXT: fmov x8, d1
+; CHECK-NEXT: ldr q1, [x1]
+; CHECK-NEXT: strb w8, [sp, #12]
+; CHECK-NEXT: and w8, w8, #0xff
+; CHECK-NEXT: tbnz w8, #0, .LBB1_3
+; CHECK-NEXT: // %bb.1: // %else
+; CHECK-NEXT: tbnz w8, #1, .LBB1_4
+; CHECK-NEXT: .LBB1_2: // %else2
+; CHECK-NEXT: add sp, sp, #16
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB1_3: // %cond.store
+; CHECK-NEXT: fmov x9, d0
+; CHECK-NEXT: fmov x10, d1
+; CHECK-NEXT: str x9, [x10]
+; CHECK-NEXT: tbz w8, #1, .LBB1_2
+; CHECK-NEXT: .LBB1_4: // %cond.store1
+; CHECK-NEXT: mov z0.d, z0.d[1]
+; CHECK-NEXT: mov z1.d, z1.d[1]
+; CHECK-NEXT: fmov x8, d0
+; CHECK-NEXT: fmov x9, d1
+; CHECK-NEXT: str x8, [x9]
+; CHECK-NEXT: add sp, sp, #16
+; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: masked_scatter_v2i64:
; NONEON-NOSVE: // %bb.0:
More information about the llvm-commits
mailing list