[llvm] [AArch64] Consider runtime mode when deciding to use SVE for fixed-length vectors. (PR #96081)

Sander de Smalen via llvm-commits llvm-commits at lists.llvm.org
Thu Jun 20 02:53:23 PDT 2024


https://github.com/sdesmalen-arm updated https://github.com/llvm/llvm-project/pull/96081

>From d1ec6a1a843aa86144fa99635edb99151536f238 Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen at arm.com>
Date: Mon, 17 Jun 2024 17:28:43 +0100
Subject: [PATCH 1/2] [AArch64] Consider runtime mode when deciding to use SVE
 for fixed-length vectors.

This also fixes the case where an SVE div is incorrectly to be assumed
available in non-streaming mode with SME.
---
 .../Target/AArch64/AArch64ISelLowering.cpp    |  26 +-
 llvm/lib/Target/AArch64/AArch64Subtarget.h    |  12 +-
 .../Target/AArch64/AArch64TargetMachine.cpp   |   8 +-
 .../AArch64/intrinsic-cttz-elts-sve.ll        | 316 ++++++++++++------
 .../AArch64/sme-intrinsics-mova-extract.ll    |   2 +-
 .../AArch64/sme-intrinsics-mova-insert.ll     |   2 +-
 ...streaming-mode-fixed-length-and-combine.ll |   2 +-
 ...treaming-mode-fixed-length-bit-counting.ll |   2 +-
 ...sve-streaming-mode-fixed-length-bitcast.ll |   2 +-
 ...e-streaming-mode-fixed-length-bitselect.ll |   2 +-
 ...treaming-mode-fixed-length-build-vector.ll |   2 +-
 .../sve-streaming-mode-fixed-length-concat.ll |   2 +-
 ...e-streaming-mode-fixed-length-ext-loads.ll |   2 +-
 ...ing-mode-fixed-length-extract-subvector.ll |   2 +-
 ...ng-mode-fixed-length-extract-vector-elt.ll |   2 +-
 ...e-streaming-mode-fixed-length-fcopysign.ll |   2 +-
 ...ve-streaming-mode-fixed-length-fp-arith.ll |   2 +-
 ...streaming-mode-fixed-length-fp-compares.ll |   2 +-
 ...-streaming-mode-fixed-length-fp-convert.ll |   2 +-
 ...aming-mode-fixed-length-fp-extend-trunc.ll |   2 +-
 ...e-streaming-mode-fixed-length-fp-minmax.ll |   2 +-
 ...streaming-mode-fixed-length-fp-rounding.ll |   2 +-
 ...e-streaming-mode-fixed-length-fp-select.ll |   2 +-
 ...-streaming-mode-fixed-length-fp-vselect.ll |   2 +-
 ...e-streaming-mode-fixed-length-int-arith.ll |   2 +-
 ...treaming-mode-fixed-length-int-compares.ll |   2 +-
 ...sve-streaming-mode-fixed-length-int-div.ll |   5 +-
 ...streaming-mode-fixed-length-int-extends.ll |   2 +-
 ...eaming-mode-fixed-length-int-immediates.ll |   2 +-
 ...sve-streaming-mode-fixed-length-int-log.ll |   2 +-
 ...-streaming-mode-fixed-length-int-minmax.ll |   2 +-
 ...sve-streaming-mode-fixed-length-int-mul.ll |   3 +-
 ...ve-streaming-mode-fixed-length-int-mulh.ll |   2 +-
 ...-streaming-mode-fixed-length-int-reduce.ll |   2 +-
 ...sve-streaming-mode-fixed-length-int-rem.ll |   2 +-
 ...-streaming-mode-fixed-length-int-select.ll |   2 +-
 ...-streaming-mode-fixed-length-int-shifts.ll |   2 +-
 ...streaming-mode-fixed-length-int-vselect.ll |   2 +-
 ...-streaming-mode-fixed-length-ld2-alloca.ll |   2 +-
 ...reaming-mode-fixed-length-limit-duplane.ll |   2 +-
 .../sve-streaming-mode-fixed-length-loads.ll  |   2 +-
 ...-streaming-mode-fixed-length-log-reduce.ll |   2 +-
 ...mode-fixed-length-masked-gather-scatter.ll |  91 ++---
 ...eaming-mode-fixed-length-optimize-ptrue.ll |   2 +-
 ...streaming-mode-fixed-length-permute-rev.ll |   2 +-
 ...g-mode-fixed-length-permute-zip-uzp-trn.ll |   2 +-
 .../sve-streaming-mode-fixed-length-ptest.ll  |   2 +-
 .../sve-streaming-mode-fixed-length-rev.ll    |   2 +-
 ...e-streaming-mode-fixed-length-sdiv-pow2.ll |   2 +-
 ...sve-streaming-mode-fixed-length-shuffle.ll |   2 +-
 .../sve-streaming-mode-fixed-length-stores.ll |   2 +-
 ...e-streaming-mode-fixed-length-subvector.ll |   2 +-
 ...treaming-mode-fixed-length-trunc-stores.ll |   2 +-
 .../sve-streaming-mode-fixed-length-trunc.ll  |   2 +-
 ...eaming-mode-fixed-length-vector-shuffle.ll |   2 +-
 .../sve-streaming-mode-test-register-mov.ll   |   2 +-
 56 files changed, 350 insertions(+), 209 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index c790209cc221f..a4fa25ffdd6ff 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1418,7 +1418,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
     }
   }
 
-  if (Subtarget->hasSVEorSME()) {
+  if (Subtarget->isSVEorStreamingSVEAvailable()) {
     for (auto VT : {MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64}) {
       setOperationAction(ISD::BITREVERSE, VT, Custom);
       setOperationAction(ISD::BSWAP, VT, Custom);
@@ -1528,14 +1528,24 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
       }
     }
 
-    // NEON doesn't support masked loads/stores/gathers/scatters, but SVE does
-    for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, MVT::v1f64,
-                    MVT::v2f64, MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
-                    MVT::v2i32, MVT::v4i32, MVT::v1i64, MVT::v2i64}) {
+    // NEON doesn't support masked loads/stores, but SME and SVE do.
+    for (auto VT :
+         {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, MVT::v1f64,
+          MVT::v2f64, MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
+          MVT::v2i32, MVT::v4i32, MVT::v1i64, MVT::v2i64}) {
       setOperationAction(ISD::MLOAD, VT, Custom);
       setOperationAction(ISD::MSTORE, VT, Custom);
-      setOperationAction(ISD::MGATHER, VT, Custom);
-      setOperationAction(ISD::MSCATTER, VT, Custom);
+    }
+
+    // NEON doesn't support masked gathers/scatters, but SVE does.
+    if (Subtarget->isSVEAvailable()) {
+      for (auto VT :
+           {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, MVT::v1f64,
+            MVT::v2f64, MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
+            MVT::v2i32, MVT::v4i32, MVT::v1i64, MVT::v2i64}) {
+        setOperationAction(ISD::MGATHER, VT, Custom);
+        setOperationAction(ISD::MSCATTER, VT, Custom);
+      }
     }
 
     // Firstly, exclude all scalable vector extending loads/truncating stores,
@@ -6986,7 +6996,7 @@ bool AArch64TargetLowering::useSVEForFixedLengthVectorVT(
 
   // NEON-sized vectors can be emulated using SVE instructions.
   if (OverrideNEON && (VT.is128BitVector() || VT.is64BitVector()))
-    return Subtarget->hasSVEorSME();
+    return Subtarget->isSVEorStreamingSVEAvailable();
 
   // Ensure NEON MVTs only belong to a single register class.
   if (VT.getFixedSizeInBits() <= 128)
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index 7ef7a89b5749f..5e1a370778914 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -185,6 +185,12 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
            (hasSMEFA64() || (!isStreaming() && !isStreamingCompatible()));
   }
 
+  /// Returns true if the target has access to either the full range of SVE instructions,
+  /// or the streaming-compatible subset of SVE instructions.
+  bool isSVEorStreamingSVEAvailable() const {
+    return hasSVE() || hasSMEFA64() || (hasSME() && isStreaming());
+  }
+
   unsigned getMinVectorRegisterBitWidth() const {
     // Don't assume any minimum vector size when PSTATE.SM may not be 0, because
     // we don't yet support streaming-compatible codegen support that we trust
@@ -374,11 +380,11 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
   }
 
   bool useSVEForFixedLengthVectors() const {
-    if (!isNeonAvailable())
-      return hasSVEorSME();
+    if (!isSVEorStreamingSVEAvailable())
+      return false;
 
     // Prefer NEON unless larger SVE registers are available.
-    return hasSVEorSME() && getMinSVEVectorSizeInBits() >= 256;
+    return !isNeonAvailable() || getMinSVEVectorSizeInBits() >= 256;
   }
 
   bool useSVEForFixedLengthVectors(EVT VT) const {
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index 7de9071476e7f..f94fa037a42c4 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -187,6 +187,11 @@ static cl::opt<unsigned> SVEVectorBitsMinOpt(
              "with zero meaning no minimum size is assumed."),
     cl::init(0), cl::Hidden);
 
+static cl::opt<bool> ForceStreaming(
+    "force-streaming",
+    cl::desc("Force the use of streaming code for all functions"),
+    cl::init(false), cl::Hidden);
+
 static cl::opt<bool> ForceStreamingCompatible(
     "force-streaming-compatible",
     cl::desc("Force the use of streaming-compatible code for all functions"),
@@ -412,7 +417,8 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
   StringRef FS = FSAttr.isValid() ? FSAttr.getValueAsString() : TargetFS;
   bool HasMinSize = F.hasMinSize();
 
-  bool IsStreaming = F.hasFnAttribute("aarch64_pstate_sm_enabled") ||
+  bool IsStreaming = ForceStreaming ||
+                     F.hasFnAttribute("aarch64_pstate_sm_enabled") ||
                      F.hasFnAttribute("aarch64_pstate_sm_body");
   bool IsStreamingCompatible =
       F.hasFnAttribute("aarch64_pstate_sm_compatible") ||
diff --git a/llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll b/llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll
index 9c72afd84fa7c..cdf2a962f9322 100644
--- a/llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll
+++ b/llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s --check-prefixes=CHECK,NONSTREAMING
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,STREAMING
 
 ; WITH VSCALE RANGE
 
@@ -362,145 +362,261 @@ define i32 @add_i32_ctz_nxv16i1_poison(<vscale x 16 x i1> %a, i32 %b) {
 ; FIXED-WIDTH VECTOR TYPES
 
 define i32 @ctz_v16i1(<16 x i1> %a) {
-; CHECK-LABEL: ctz_v16i1:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    shl v0.16b, v0.16b, #7
-; CHECK-NEXT:    ptrue p0.b, vl16
-; CHECK-NEXT:    ptrue p1.b
-; CHECK-NEXT:    cmlt v0.16b, v0.16b, #0
-; CHECK-NEXT:    cmpne p0.b, p0/z, z0.b, #0
-; CHECK-NEXT:    brkb p0.b, p1/z, p0.b
-; CHECK-NEXT:    cntp x0, p0, p0.b
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
-; CHECK-NEXT:    ret
+; NONSTREAMING-LABEL: ctz_v16i1:
+; NONSTREAMING:       // %bb.0:
+; NONSTREAMING-NEXT:    shl v0.16b, v0.16b, #7
+; NONSTREAMING-NEXT:    ptrue p0.b, vl16
+; NONSTREAMING-NEXT:    ptrue p1.b
+; NONSTREAMING-NEXT:    cmlt v0.16b, v0.16b, #0
+; NONSTREAMING-NEXT:    cmpne p0.b, p0/z, z0.b, #0
+; NONSTREAMING-NEXT:    brkb p0.b, p1/z, p0.b
+; NONSTREAMING-NEXT:    cntp x0, p0, p0.b
+; NONSTREAMING-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; NONSTREAMING-NEXT:    ret
+;
+; STREAMING-LABEL: ctz_v16i1:
+; STREAMING:       // %bb.0:
+; STREAMING-NEXT:    // kill: def $q0 killed $q0 def $z0
+; STREAMING-NEXT:    ptrue p0.b, vl16
+; STREAMING-NEXT:    lsl z0.b, z0.b, #7
+; STREAMING-NEXT:    ptrue p1.b
+; STREAMING-NEXT:    asr z0.b, z0.b, #7
+; STREAMING-NEXT:    cmpne p0.b, p0/z, z0.b, #0
+; STREAMING-NEXT:    brkb p0.b, p1/z, p0.b
+; STREAMING-NEXT:    cntp x0, p0, p0.b
+; STREAMING-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; STREAMING-NEXT:    ret
   %res = call i32 @llvm.experimental.cttz.elts.i32.v16i1(<16 x i1> %a, i1 0)
   ret i32 %res
 }
 
 define i32 @ctz_v16i1_poison(<16 x i1> %a) {
-; CHECK-LABEL: ctz_v16i1_poison:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    shl v0.16b, v0.16b, #7
-; CHECK-NEXT:    ptrue p0.b, vl16
-; CHECK-NEXT:    ptrue p1.b
-; CHECK-NEXT:    cmlt v0.16b, v0.16b, #0
-; CHECK-NEXT:    cmpne p0.b, p0/z, z0.b, #0
-; CHECK-NEXT:    brkb p0.b, p1/z, p0.b
-; CHECK-NEXT:    cntp x0, p0, p0.b
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
-; CHECK-NEXT:    ret
+; NONSTREAMING-LABEL: ctz_v16i1_poison:
+; NONSTREAMING:       // %bb.0:
+; NONSTREAMING-NEXT:    shl v0.16b, v0.16b, #7
+; NONSTREAMING-NEXT:    ptrue p0.b, vl16
+; NONSTREAMING-NEXT:    ptrue p1.b
+; NONSTREAMING-NEXT:    cmlt v0.16b, v0.16b, #0
+; NONSTREAMING-NEXT:    cmpne p0.b, p0/z, z0.b, #0
+; NONSTREAMING-NEXT:    brkb p0.b, p1/z, p0.b
+; NONSTREAMING-NEXT:    cntp x0, p0, p0.b
+; NONSTREAMING-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; NONSTREAMING-NEXT:    ret
+;
+; STREAMING-LABEL: ctz_v16i1_poison:
+; STREAMING:       // %bb.0:
+; STREAMING-NEXT:    // kill: def $q0 killed $q0 def $z0
+; STREAMING-NEXT:    ptrue p0.b, vl16
+; STREAMING-NEXT:    lsl z0.b, z0.b, #7
+; STREAMING-NEXT:    ptrue p1.b
+; STREAMING-NEXT:    asr z0.b, z0.b, #7
+; STREAMING-NEXT:    cmpne p0.b, p0/z, z0.b, #0
+; STREAMING-NEXT:    brkb p0.b, p1/z, p0.b
+; STREAMING-NEXT:    cntp x0, p0, p0.b
+; STREAMING-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; STREAMING-NEXT:    ret
   %res = call i32 @llvm.experimental.cttz.elts.i32.v16i1(<16 x i1> %a, i1 1)
   ret i32 %res
 }
 
 define i64 @add_i64_ctz_v16i1_poison(<16 x i1> %a, i64 %b) {
-; CHECK-LABEL: add_i64_ctz_v16i1_poison:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    shl v0.16b, v0.16b, #7
-; CHECK-NEXT:    ptrue p0.b, vl16
-; CHECK-NEXT:    ptrue p1.b
-; CHECK-NEXT:    cmlt v0.16b, v0.16b, #0
-; CHECK-NEXT:    cmpne p0.b, p0/z, z0.b, #0
-; CHECK-NEXT:    brkb p0.b, p1/z, p0.b
-; CHECK-NEXT:    incp x0, p0.b
-; CHECK-NEXT:    ret
+; NONSTREAMING-LABEL: add_i64_ctz_v16i1_poison:
+; NONSTREAMING:       // %bb.0:
+; NONSTREAMING-NEXT:    shl v0.16b, v0.16b, #7
+; NONSTREAMING-NEXT:    ptrue p0.b, vl16
+; NONSTREAMING-NEXT:    ptrue p1.b
+; NONSTREAMING-NEXT:    cmlt v0.16b, v0.16b, #0
+; NONSTREAMING-NEXT:    cmpne p0.b, p0/z, z0.b, #0
+; NONSTREAMING-NEXT:    brkb p0.b, p1/z, p0.b
+; NONSTREAMING-NEXT:    incp x0, p0.b
+; NONSTREAMING-NEXT:    ret
+;
+; STREAMING-LABEL: add_i64_ctz_v16i1_poison:
+; STREAMING:       // %bb.0:
+; STREAMING-NEXT:    // kill: def $q0 killed $q0 def $z0
+; STREAMING-NEXT:    ptrue p0.b, vl16
+; STREAMING-NEXT:    lsl z0.b, z0.b, #7
+; STREAMING-NEXT:    ptrue p1.b
+; STREAMING-NEXT:    asr z0.b, z0.b, #7
+; STREAMING-NEXT:    cmpne p0.b, p0/z, z0.b, #0
+; STREAMING-NEXT:    brkb p0.b, p1/z, p0.b
+; STREAMING-NEXT:    incp x0, p0.b
+; STREAMING-NEXT:    ret
   %res = call i64 @llvm.experimental.cttz.elts.i64.v16i1(<16 x i1> %a, i1 1)
   %add = add i64 %res, %b
   ret i64 %add
 }
 
 define i32 @ctz_v8i1(<8 x i1> %a) {
-; CHECK-LABEL: ctz_v8i1:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    shl v0.8b, v0.8b, #7
-; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    ptrue p1.b
-; CHECK-NEXT:    cmlt v0.8b, v0.8b, #0
-; CHECK-NEXT:    cmpne p0.b, p0/z, z0.b, #0
-; CHECK-NEXT:    brkb p0.b, p1/z, p0.b
-; CHECK-NEXT:    cntp x0, p0, p0.b
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
-; CHECK-NEXT:    ret
+; NONSTREAMING-LABEL: ctz_v8i1:
+; NONSTREAMING:       // %bb.0:
+; NONSTREAMING-NEXT:    shl v0.8b, v0.8b, #7
+; NONSTREAMING-NEXT:    ptrue p0.b, vl8
+; NONSTREAMING-NEXT:    ptrue p1.b
+; NONSTREAMING-NEXT:    cmlt v0.8b, v0.8b, #0
+; NONSTREAMING-NEXT:    cmpne p0.b, p0/z, z0.b, #0
+; NONSTREAMING-NEXT:    brkb p0.b, p1/z, p0.b
+; NONSTREAMING-NEXT:    cntp x0, p0, p0.b
+; NONSTREAMING-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; NONSTREAMING-NEXT:    ret
+;
+; STREAMING-LABEL: ctz_v8i1:
+; STREAMING:       // %bb.0:
+; STREAMING-NEXT:    // kill: def $d0 killed $d0 def $z0
+; STREAMING-NEXT:    ptrue p0.b, vl8
+; STREAMING-NEXT:    lsl z0.b, z0.b, #7
+; STREAMING-NEXT:    ptrue p1.b
+; STREAMING-NEXT:    asr z0.b, z0.b, #7
+; STREAMING-NEXT:    cmpne p0.b, p0/z, z0.b, #0
+; STREAMING-NEXT:    brkb p0.b, p1/z, p0.b
+; STREAMING-NEXT:    cntp x0, p0, p0.b
+; STREAMING-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; STREAMING-NEXT:    ret
   %res = call i32 @llvm.experimental.cttz.elts.i32.v8i1(<8 x i1> %a, i1 0)
   ret i32 %res
 }
 
 define i32 @ctz_v8i1_poison(<8 x i1> %a) {
-; CHECK-LABEL: ctz_v8i1_poison:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    shl v0.8b, v0.8b, #7
-; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    ptrue p1.b
-; CHECK-NEXT:    cmlt v0.8b, v0.8b, #0
-; CHECK-NEXT:    cmpne p0.b, p0/z, z0.b, #0
-; CHECK-NEXT:    brkb p0.b, p1/z, p0.b
-; CHECK-NEXT:    cntp x0, p0, p0.b
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
-; CHECK-NEXT:    ret
+; NONSTREAMING-LABEL: ctz_v8i1_poison:
+; NONSTREAMING:       // %bb.0:
+; NONSTREAMING-NEXT:    shl v0.8b, v0.8b, #7
+; NONSTREAMING-NEXT:    ptrue p0.b, vl8
+; NONSTREAMING-NEXT:    ptrue p1.b
+; NONSTREAMING-NEXT:    cmlt v0.8b, v0.8b, #0
+; NONSTREAMING-NEXT:    cmpne p0.b, p0/z, z0.b, #0
+; NONSTREAMING-NEXT:    brkb p0.b, p1/z, p0.b
+; NONSTREAMING-NEXT:    cntp x0, p0, p0.b
+; NONSTREAMING-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; NONSTREAMING-NEXT:    ret
+;
+; STREAMING-LABEL: ctz_v8i1_poison:
+; STREAMING:       // %bb.0:
+; STREAMING-NEXT:    // kill: def $d0 killed $d0 def $z0
+; STREAMING-NEXT:    ptrue p0.b, vl8
+; STREAMING-NEXT:    lsl z0.b, z0.b, #7
+; STREAMING-NEXT:    ptrue p1.b
+; STREAMING-NEXT:    asr z0.b, z0.b, #7
+; STREAMING-NEXT:    cmpne p0.b, p0/z, z0.b, #0
+; STREAMING-NEXT:    brkb p0.b, p1/z, p0.b
+; STREAMING-NEXT:    cntp x0, p0, p0.b
+; STREAMING-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; STREAMING-NEXT:    ret
   %res = call i32 @llvm.experimental.cttz.elts.i32.v8i1(<8 x i1> %a, i1 1)
   ret i32 %res
 }
 
 define i32 @ctz_v4i1(<4 x i1> %a) {
-; CHECK-LABEL: ctz_v4i1:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    shl v0.4h, v0.4h, #15
-; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    ptrue p1.h
-; CHECK-NEXT:    cmlt v0.4h, v0.4h, #0
-; CHECK-NEXT:    cmpne p0.h, p0/z, z0.h, #0
-; CHECK-NEXT:    brkb p0.b, p1/z, p0.b
-; CHECK-NEXT:    cntp x0, p0, p0.h
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
-; CHECK-NEXT:    ret
+; NONSTREAMING-LABEL: ctz_v4i1:
+; NONSTREAMING:       // %bb.0:
+; NONSTREAMING-NEXT:    shl v0.4h, v0.4h, #15
+; NONSTREAMING-NEXT:    ptrue p0.h, vl4
+; NONSTREAMING-NEXT:    ptrue p1.h
+; NONSTREAMING-NEXT:    cmlt v0.4h, v0.4h, #0
+; NONSTREAMING-NEXT:    cmpne p0.h, p0/z, z0.h, #0
+; NONSTREAMING-NEXT:    brkb p0.b, p1/z, p0.b
+; NONSTREAMING-NEXT:    cntp x0, p0, p0.h
+; NONSTREAMING-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; NONSTREAMING-NEXT:    ret
+;
+; STREAMING-LABEL: ctz_v4i1:
+; STREAMING:       // %bb.0:
+; STREAMING-NEXT:    // kill: def $d0 killed $d0 def $z0
+; STREAMING-NEXT:    ptrue p0.h, vl4
+; STREAMING-NEXT:    lsl z0.h, z0.h, #15
+; STREAMING-NEXT:    ptrue p1.h
+; STREAMING-NEXT:    asr z0.h, z0.h, #15
+; STREAMING-NEXT:    cmpne p0.h, p0/z, z0.h, #0
+; STREAMING-NEXT:    brkb p0.b, p1/z, p0.b
+; STREAMING-NEXT:    cntp x0, p0, p0.h
+; STREAMING-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; STREAMING-NEXT:    ret
   %res = call i32 @llvm.experimental.cttz.elts.i32.v4i1(<4 x i1> %a, i1 0)
   ret i32 %res
 }
 
 define i32 @ctz_v4i1_poison(<4 x i1> %a) {
-; CHECK-LABEL: ctz_v4i1_poison:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    shl v0.4h, v0.4h, #15
-; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    ptrue p1.h
-; CHECK-NEXT:    cmlt v0.4h, v0.4h, #0
-; CHECK-NEXT:    cmpne p0.h, p0/z, z0.h, #0
-; CHECK-NEXT:    brkb p0.b, p1/z, p0.b
-; CHECK-NEXT:    cntp x0, p0, p0.h
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
-; CHECK-NEXT:    ret
+; NONSTREAMING-LABEL: ctz_v4i1_poison:
+; NONSTREAMING:       // %bb.0:
+; NONSTREAMING-NEXT:    shl v0.4h, v0.4h, #15
+; NONSTREAMING-NEXT:    ptrue p0.h, vl4
+; NONSTREAMING-NEXT:    ptrue p1.h
+; NONSTREAMING-NEXT:    cmlt v0.4h, v0.4h, #0
+; NONSTREAMING-NEXT:    cmpne p0.h, p0/z, z0.h, #0
+; NONSTREAMING-NEXT:    brkb p0.b, p1/z, p0.b
+; NONSTREAMING-NEXT:    cntp x0, p0, p0.h
+; NONSTREAMING-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; NONSTREAMING-NEXT:    ret
+;
+; STREAMING-LABEL: ctz_v4i1_poison:
+; STREAMING:       // %bb.0:
+; STREAMING-NEXT:    // kill: def $d0 killed $d0 def $z0
+; STREAMING-NEXT:    ptrue p0.h, vl4
+; STREAMING-NEXT:    lsl z0.h, z0.h, #15
+; STREAMING-NEXT:    ptrue p1.h
+; STREAMING-NEXT:    asr z0.h, z0.h, #15
+; STREAMING-NEXT:    cmpne p0.h, p0/z, z0.h, #0
+; STREAMING-NEXT:    brkb p0.b, p1/z, p0.b
+; STREAMING-NEXT:    cntp x0, p0, p0.h
+; STREAMING-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; STREAMING-NEXT:    ret
   %res = call i32 @llvm.experimental.cttz.elts.i32.v4i1(<4 x i1> %a, i1 1)
   ret i32 %res
 }
 
 define i32 @ctz_v2i1(<2 x i1> %a) {
-; CHECK-LABEL: ctz_v2i1:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    shl v0.2s, v0.2s, #31
-; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    ptrue p1.s
-; CHECK-NEXT:    cmlt v0.2s, v0.2s, #0
-; CHECK-NEXT:    cmpne p0.s, p0/z, z0.s, #0
-; CHECK-NEXT:    brkb p0.b, p1/z, p0.b
-; CHECK-NEXT:    cntp x0, p0, p0.s
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
-; CHECK-NEXT:    ret
+; NONSTREAMING-LABEL: ctz_v2i1:
+; NONSTREAMING:       // %bb.0:
+; NONSTREAMING-NEXT:    shl v0.2s, v0.2s, #31
+; NONSTREAMING-NEXT:    ptrue p0.s, vl2
+; NONSTREAMING-NEXT:    ptrue p1.s
+; NONSTREAMING-NEXT:    cmlt v0.2s, v0.2s, #0
+; NONSTREAMING-NEXT:    cmpne p0.s, p0/z, z0.s, #0
+; NONSTREAMING-NEXT:    brkb p0.b, p1/z, p0.b
+; NONSTREAMING-NEXT:    cntp x0, p0, p0.s
+; NONSTREAMING-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; NONSTREAMING-NEXT:    ret
+;
+; STREAMING-LABEL: ctz_v2i1:
+; STREAMING:       // %bb.0:
+; STREAMING-NEXT:    // kill: def $d0 killed $d0 def $z0
+; STREAMING-NEXT:    ptrue p0.s, vl2
+; STREAMING-NEXT:    lsl z0.s, z0.s, #31
+; STREAMING-NEXT:    ptrue p1.s
+; STREAMING-NEXT:    asr z0.s, z0.s, #31
+; STREAMING-NEXT:    cmpne p0.s, p0/z, z0.s, #0
+; STREAMING-NEXT:    brkb p0.b, p1/z, p0.b
+; STREAMING-NEXT:    cntp x0, p0, p0.s
+; STREAMING-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; STREAMING-NEXT:    ret
   %res = call i32 @llvm.experimental.cttz.elts.i32.v2i1(<2 x i1> %a, i1 0)
   ret i32 %res
 }
 
 define i32 @ctz_v2i1_poison(<2 x i1> %a) {
-; CHECK-LABEL: ctz_v2i1_poison:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    shl v0.2s, v0.2s, #31
-; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    ptrue p1.s
-; CHECK-NEXT:    cmlt v0.2s, v0.2s, #0
-; CHECK-NEXT:    cmpne p0.s, p0/z, z0.s, #0
-; CHECK-NEXT:    brkb p0.b, p1/z, p0.b
-; CHECK-NEXT:    cntp x0, p0, p0.s
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
-; CHECK-NEXT:    ret
+; NONSTREAMING-LABEL: ctz_v2i1_poison:
+; NONSTREAMING:       // %bb.0:
+; NONSTREAMING-NEXT:    shl v0.2s, v0.2s, #31
+; NONSTREAMING-NEXT:    ptrue p0.s, vl2
+; NONSTREAMING-NEXT:    ptrue p1.s
+; NONSTREAMING-NEXT:    cmlt v0.2s, v0.2s, #0
+; NONSTREAMING-NEXT:    cmpne p0.s, p0/z, z0.s, #0
+; NONSTREAMING-NEXT:    brkb p0.b, p1/z, p0.b
+; NONSTREAMING-NEXT:    cntp x0, p0, p0.s
+; NONSTREAMING-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; NONSTREAMING-NEXT:    ret
+;
+; STREAMING-LABEL: ctz_v2i1_poison:
+; STREAMING:       // %bb.0:
+; STREAMING-NEXT:    // kill: def $d0 killed $d0 def $z0
+; STREAMING-NEXT:    ptrue p0.s, vl2
+; STREAMING-NEXT:    lsl z0.s, z0.s, #31
+; STREAMING-NEXT:    ptrue p1.s
+; STREAMING-NEXT:    asr z0.s, z0.s, #31
+; STREAMING-NEXT:    cmpne p0.s, p0/z, z0.s, #0
+; STREAMING-NEXT:    brkb p0.b, p1/z, p0.b
+; STREAMING-NEXT:    cntp x0, p0, p0.s
+; STREAMING-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; STREAMING-NEXT:    ret
   %res = call i32 @llvm.experimental.cttz.elts.i32.v2i1(<2 x i1> %a, i1 1)
   ret i32 %res
 }
diff --git a/llvm/test/CodeGen/AArch64/sme-intrinsics-mova-extract.ll b/llvm/test/CodeGen/AArch64/sme-intrinsics-mova-extract.ll
index 25f3540766618..48fbd14bd8540 100644
--- a/llvm/test/CodeGen/AArch64/sme-intrinsics-mova-extract.ll
+++ b/llvm/test/CodeGen/AArch64/sme-intrinsics-mova-extract.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -force-streaming -verify-machineinstrs < %s | FileCheck %s
 
 define <vscale x 16 x i8> @extract_row_b(<vscale x 16 x i8> %zd, <vscale x 16 x i1> %pg, i32 %tileslice) {
 ; CHECK-LABEL: extract_row_b:
diff --git a/llvm/test/CodeGen/AArch64/sme-intrinsics-mova-insert.ll b/llvm/test/CodeGen/AArch64/sme-intrinsics-mova-insert.ll
index 15c33e343c48f..8711a0388e34c 100644
--- a/llvm/test/CodeGen/AArch64/sme-intrinsics-mova-insert.ll
+++ b/llvm/test/CodeGen/AArch64/sme-intrinsics-mova-insert.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -force-streaming -verify-machineinstrs < %s | FileCheck %s
 
 define void @insert_row_b(i32 %tileslice, <vscale x 16 x i1> %pg,
 ; CHECK-LABEL: insert_row_b:
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-and-combine.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-and-combine.ll
index 4cdb175f55c9c..617b560713c3a 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-and-combine.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-and-combine.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
 ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll
index f662140327135..2a83f13251d76 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
 ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitcast.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitcast.ll
index 41065b3602003..b9264ad5f77c3 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitcast.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitcast.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible  < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s
 ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll
index d65e87d5b9756..d3c446c9904b2 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible  < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s
 ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 target triple = "aarch64"
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-build-vector.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-build-vector.ll
index 318a9cf7d738b..b8a2e0e0f4bd4 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-build-vector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-build-vector.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible  < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s
 ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll
index a845c3cbdc2b6..4b6285b2732fe 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible  < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s
 ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll
index 2cdd4374a56c5..50a05cb4b1e27 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible  < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s
 ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll
index b7b34cfa1517c..2665696308463 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible  < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s
 ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-vector-elt.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-vector-elt.ll
index 0a1831a94d8fe..cf308e6c4395f 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-vector-elt.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-vector-elt.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible  < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s
 ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll
index a8d01ec7ce0b4..dad53b31db0b0 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s --check-prefixes=CHECK,SVE
 ; RUN: llc -mattr=+sve2 -force-streaming-compatible  < %s | FileCheck %s --check-prefixes=CHECK,SVE2
-; RUN: llc -mattr=+sme -force-streaming-compatible  < %s | FileCheck %s --check-prefixes=CHECK,SVE2
+; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s --check-prefixes=CHECK,SVE2
 ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-arith.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-arith.ll
index e84acfc8504a9..f2c882c370eab 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-arith.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-arith.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible  < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s
 ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-compares.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-compares.ll
index 776b6918923ae..200ffb60a7928 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-compares.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-compares.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible  < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s
 ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-convert.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-convert.ll
index 2c08977320e84..5f6b60a767f9d 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-convert.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-convert.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible  < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s
 ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll
index 9878910763a75..c96189b960268 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible  < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s
 ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-minmax.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-minmax.ll
index 775cac272cde9..84aea185917fa 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-minmax.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-minmax.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible  < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s
 ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-rounding.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-rounding.ll
index 344aac5b19838..03bc39a6ef3ee 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-rounding.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-rounding.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible  < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s
 ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-select.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-select.ll
index daa9b51cc827b..bcc446d9d1a41 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-select.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-select.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible  < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s
 ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll
index 69661049bcb6f..035c76b569298 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible  < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s
 ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-arith.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-arith.ll
index a2875ffef2e88..4360f3a12014a 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-arith.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-arith.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s --check-prefixes=CHECK,SVE
 ; RUN: llc -mattr=+sve2 -force-streaming-compatible  < %s | FileCheck %s --check-prefixes=CHECK,SVE2
-; RUN: llc -mattr=+sme -force-streaming-compatible  < %s | FileCheck %s --check-prefixes=CHECK,SVE2
+; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s --check-prefixes=CHECK,SVE2
 ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-compares.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-compares.ll
index 0b4316686fff6..ba20de65a253a 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-compares.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-compares.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible  < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s
 ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll
index 43c67382c9d82..ecc44927526e8 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll
@@ -1,9 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s --check-prefixes=CHECK,SVE
 ; RUN: llc -mattr=+sve2 -force-streaming-compatible  < %s | FileCheck %s --check-prefixes=CHECK,SVE2
-; RUN: llc -mattr=+sme -force-streaming-compatible  < %s | FileCheck %s --check-prefixes=CHECK,SVE2
-; FIXME: We shouldn't ever be emitting any SVE instructions when +sme is set but the function is not in streaming mode.
-; RUN: llc -mattr=+sme < %s | FileCheck %s --check-prefixes=NEON-NOSVE
+; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s --check-prefixes=CHECK,SVE2
+; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s --check-prefixes=NONEON-NOSVE
 ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll
index d3ac1445e1086..25a6ea490c163 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s --check-prefixes=CHECK,SVE
 ; RUN: llc -mattr=+sve2 -force-streaming-compatible  < %s | FileCheck %s --check-prefixes=CHECK,SVE2
-; RUN: llc -mattr=+sme -force-streaming-compatible  < %s | FileCheck %s --check-prefixes=CHECK,SVE2
+; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s --check-prefixes=CHECK,SVE2
 ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-immediates.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-immediates.ll
index 1f5bb5f5486af..1335bb769821f 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-immediates.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-immediates.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible  < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s
 ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-log.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-log.ll
index 3137a7bc7ad27..687dd9445f387 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-log.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-log.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible  < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s
 ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-minmax.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-minmax.ll
index 4775a965b70d7..1bca7dd09d9b7 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-minmax.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-minmax.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible  < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s
 ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mul.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mul.ll
index ba479fc3bbe2d..05869281eb59f 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mul.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mul.ll
@@ -1,7 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mattr=+sve2 -force-streaming-compatible  < %s | FileCheck %s --check-prefix=SVE2
-; FIXME: We shouldn't ever be emitting any SVE instructions when +sme is set but the function is not in streaming mode.
-; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s --check-prefix=SVE2
+; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll
index 6198926c0b438..b0fdce9a93bd3 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s --check-prefixes=CHECK,SVE
 ; RUN: llc -mattr=+sve2 -force-streaming-compatible  < %s | FileCheck %s --check-prefixes=CHECK,SVE2
-; RUN: llc -mattr=+sme -force-streaming-compatible  < %s | FileCheck %s --check-prefixes=CHECK,SVE2
+; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s --check-prefixes=CHECK,SVE2
 ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 ; This test only tests the legal types for a given vector width, as mulh nodes
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-reduce.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-reduce.ll
index 7bdb4599707b0..92a67cba55f7a 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-reduce.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-reduce.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible  < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s
 ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll
index dd6d2dcacd616..b4641172f8b06 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible  < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s
 ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-select.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-select.ll
index 5cee1360f6f3c..4ac156c42fda0 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-select.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-select.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible  < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s
 ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll
index 2778e93416a74..d0f99211e80fc 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible  < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s
 ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll
index af15d5f67ad15..41eb731fd66df 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible  < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s
 ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll
index 66d544d0acbf5..270f05a806b82 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible  < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s
 ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-limit-duplane.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-limit-duplane.ll
index 3b83f982b6bfc..3627390b5edfa 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-limit-duplane.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-limit-duplane.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible  < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s
 ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-loads.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-loads.ll
index c97a3c2e721a3..504db6df18ee5 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-loads.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-loads.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
 ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-log-reduce.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-log-reduce.ll
index 9e1edb817c459..d4565c4b69c77 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-log-reduce.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-log-reduce.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
 ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-gather-scatter.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-gather-scatter.ll
index 75c4536c97306..8ca598902b94c 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-gather-scatter.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-gather-scatter.ll
@@ -1,6 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mattr=+sve < %s | FileCheck %s --check-prefix=SVE
-; FIXME: We shouldn't ever be emitting any SVE instructions when +sme is set but the function is not in streaming mode.
 ; RUN: llc -mattr=+sme < %s | FileCheck %s --check-prefix=SME
 ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-SVE-NOGATHER
 ; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
@@ -87,24 +86,27 @@ define <2 x i64> @masked_gather_v2i64(ptr %a, ptr %b) vscale_range(2, 2) {
 ;
 ; NONEON-NOSVE-LABEL: masked_gather_v2i64:
 ; NONEON-NOSVE:       // %bb.0:
-; NONEON-NOSVE-NEXT:    sub sp, sp, #16
-; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
-; NONEON-NOSVE-NEXT:    ptrue p0.d, vl2
+; NONEON-NOSVE-NEXT:    sub sp, sp, #144
+; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 144
 ; NONEON-NOSVE-NEXT:    ldr q0, [x0]
-; NONEON-NOSVE-NEXT:    cmpeq p1.d, p0/z, z0.d, #0
-; NONEON-NOSVE-NEXT:    index z0.d, #1, #1
-; NONEON-NOSVE-NEXT:    mov z1.d, p1/z, #-1 // =0xffffffffffffffff
-; NONEON-NOSVE-NEXT:    and z0.d, z1.d, z0.d
 ; NONEON-NOSVE-NEXT:    ldr q1, [x1]
-; NONEON-NOSVE-NEXT:    uaddv d0, p0, z0.d
-; NONEON-NOSVE-NEXT:    ptrue p0.d
-; NONEON-NOSVE-NEXT:    fmov x8, d0
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #12]
+; NONEON-NOSVE-NEXT:    mov w8, #2 // =0x2
+; NONEON-NOSVE-NEXT:    str q0, [sp, #112]
+; NONEON-NOSVE-NEXT:    ldp x10, x9, [sp, #112]
+; NONEON-NOSVE-NEXT:    cmp x9, #0
+; NONEON-NOSVE-NEXT:    csel x8, x8, xzr, eq
+; NONEON-NOSVE-NEXT:    cmp x10, #0
+; NONEON-NOSVE-NEXT:    csetm x9, eq
+; NONEON-NOSVE-NEXT:    sub w8, w8, w9
+; NONEON-NOSVE-NEXT:    strb w8, [sp, #140]
 ; NONEON-NOSVE-NEXT:    and w8, w8, #0xff
 ; NONEON-NOSVE-NEXT:    tbz w8, #0, .LBB0_2
 ; NONEON-NOSVE-NEXT:  // %bb.1: // %cond.load
-; NONEON-NOSVE-NEXT:    fmov x9, d1
-; NONEON-NOSVE-NEXT:    ld1rd { z0.d }, p0/z, [x9]
+; NONEON-NOSVE-NEXT:    str q1, [sp, #96]
+; NONEON-NOSVE-NEXT:    ldr x9, [sp, #96]
+; NONEON-NOSVE-NEXT:    ldr x9, [x9]
+; NONEON-NOSVE-NEXT:    str x9, [sp, #80]
+; NONEON-NOSVE-NEXT:    ldr q0, [sp, #80]
 ; NONEON-NOSVE-NEXT:    tbnz w8, #1, .LBB0_3
 ; NONEON-NOSVE-NEXT:    b .LBB0_4
 ; NONEON-NOSVE-NEXT:  .LBB0_2:
@@ -112,17 +114,19 @@ define <2 x i64> @masked_gather_v2i64(ptr %a, ptr %b) vscale_range(2, 2) {
 ; NONEON-NOSVE-NEXT:    ldr q0, [x9, :lo12:.LCPI0_0]
 ; NONEON-NOSVE-NEXT:    tbz w8, #1, .LBB0_4
 ; NONEON-NOSVE-NEXT:  .LBB0_3: // %cond.load1
-; NONEON-NOSVE-NEXT:    mov w8, #1 // =0x1
-; NONEON-NOSVE-NEXT:    index z2.d, #0, #1
-; NONEON-NOSVE-NEXT:    mov z1.d, z1.d[1]
-; NONEON-NOSVE-NEXT:    mov z3.d, x8
-; NONEON-NOSVE-NEXT:    fmov x8, d1
-; NONEON-NOSVE-NEXT:    cmpeq p0.d, p0/z, z2.d, z3.d
+; NONEON-NOSVE-NEXT:    str q1, [sp, #64]
+; NONEON-NOSVE-NEXT:    ldr x8, [sp, #72]
 ; NONEON-NOSVE-NEXT:    ldr x8, [x8]
-; NONEON-NOSVE-NEXT:    mov z0.d, p0/m, x8
+; NONEON-NOSVE-NEXT:    str q0, [sp]
+; NONEON-NOSVE-NEXT:    ldr x9, [sp]
+; NONEON-NOSVE-NEXT:    str x8, [sp, #48]
+; NONEON-NOSVE-NEXT:    ldr q0, [sp, #48]
+; NONEON-NOSVE-NEXT:    str q0, [sp, #16]
+; NONEON-NOSVE-NEXT:    ldr x8, [sp, #16]
+; NONEON-NOSVE-NEXT:    stp x9, x8, [sp, #32]
+; NONEON-NOSVE-NEXT:    ldr q0, [sp, #32]
 ; NONEON-NOSVE-NEXT:  .LBB0_4: // %else2
-; NONEON-NOSVE-NEXT:    // kill: def $q0 killed $q0 killed $z0
-; NONEON-NOSVE-NEXT:    add sp, sp, #16
+; NONEON-NOSVE-NEXT:    add sp, sp, #144
 ; NONEON-NOSVE-NEXT:    ret
   %vals = load <2 x i64>, ptr %a
   %ptrs = load <2 x ptr>, ptr %b
@@ -203,37 +207,38 @@ define void @masked_scatter_v2i64(ptr %a, ptr %b) vscale_range(2, 2) {
 ;
 ; NONEON-NOSVE-LABEL: masked_scatter_v2i64:
 ; NONEON-NOSVE:       // %bb.0:
-; NONEON-NOSVE-NEXT:    sub sp, sp, #16
-; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
-; NONEON-NOSVE-NEXT:    ptrue p0.d, vl2
-; NONEON-NOSVE-NEXT:    ldr q0, [x0]
-; NONEON-NOSVE-NEXT:    index z1.d, #1, #1
-; NONEON-NOSVE-NEXT:    cmpeq p1.d, p0/z, z0.d, #0
-; NONEON-NOSVE-NEXT:    mov z2.d, p1/z, #-1 // =0xffffffffffffffff
-; NONEON-NOSVE-NEXT:    and z1.d, z2.d, z1.d
-; NONEON-NOSVE-NEXT:    uaddv d1, p0, z1.d
-; NONEON-NOSVE-NEXT:    fmov x8, d1
-; NONEON-NOSVE-NEXT:    ldr q1, [x1]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #12]
+; NONEON-NOSVE-NEXT:    sub sp, sp, #96
+; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 96
+; NONEON-NOSVE-NEXT:    ldr q1, [x0]
+; NONEON-NOSVE-NEXT:    ldr q0, [x1]
+; NONEON-NOSVE-NEXT:    mov w8, #2 // =0x2
+; NONEON-NOSVE-NEXT:    str q1, [sp, #64]
+; NONEON-NOSVE-NEXT:    ldp x10, x9, [sp, #64]
+; NONEON-NOSVE-NEXT:    cmp x9, #0
+; NONEON-NOSVE-NEXT:    csel x8, x8, xzr, eq
+; NONEON-NOSVE-NEXT:    cmp x10, #0
+; NONEON-NOSVE-NEXT:    csetm x9, eq
+; NONEON-NOSVE-NEXT:    sub w8, w8, w9
+; NONEON-NOSVE-NEXT:    strb w8, [sp, #92]
 ; NONEON-NOSVE-NEXT:    and w8, w8, #0xff
 ; NONEON-NOSVE-NEXT:    tbnz w8, #0, .LBB1_3
 ; NONEON-NOSVE-NEXT:  // %bb.1: // %else
 ; NONEON-NOSVE-NEXT:    tbnz w8, #1, .LBB1_4
 ; NONEON-NOSVE-NEXT:  .LBB1_2: // %else2
-; NONEON-NOSVE-NEXT:    add sp, sp, #16
+; NONEON-NOSVE-NEXT:    add sp, sp, #96
 ; NONEON-NOSVE-NEXT:    ret
 ; NONEON-NOSVE-NEXT:  .LBB1_3: // %cond.store
-; NONEON-NOSVE-NEXT:    fmov x9, d0
-; NONEON-NOSVE-NEXT:    fmov x10, d1
+; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #32]
+; NONEON-NOSVE-NEXT:    ldr x9, [sp, #32]
+; NONEON-NOSVE-NEXT:    ldr x10, [sp, #48]
 ; NONEON-NOSVE-NEXT:    str x9, [x10]
 ; NONEON-NOSVE-NEXT:    tbz w8, #1, .LBB1_2
 ; NONEON-NOSVE-NEXT:  .LBB1_4: // %cond.store1
-; NONEON-NOSVE-NEXT:    mov z0.d, z0.d[1]
-; NONEON-NOSVE-NEXT:    mov z1.d, z1.d[1]
-; NONEON-NOSVE-NEXT:    fmov x8, d0
-; NONEON-NOSVE-NEXT:    fmov x9, d1
+; NONEON-NOSVE-NEXT:    stp q1, q0, [sp]
+; NONEON-NOSVE-NEXT:    ldr x8, [sp, #8]
+; NONEON-NOSVE-NEXT:    ldr x9, [sp, #24]
 ; NONEON-NOSVE-NEXT:    str x8, [x9]
-; NONEON-NOSVE-NEXT:    add sp, sp, #16
+; NONEON-NOSVE-NEXT:    add sp, sp, #96
 ; NONEON-NOSVE-NEXT:    ret
   %vals = load <2 x i64>, ptr %a
   %ptrs = load <2 x ptr>, ptr %b
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-optimize-ptrue.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-optimize-ptrue.ll
index dbdf5f2502999..431c5a78202e8 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-optimize-ptrue.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-optimize-ptrue.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
 ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-rev.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-rev.ll
index 8c23f5f9922da..a33e8537edf4e 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-rev.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-rev.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
 ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-zip-uzp-trn.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-zip-uzp-trn.ll
index bc6fdd1ecd5a7..b91f813c5141b 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-zip-uzp-trn.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-zip-uzp-trn.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
 ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ptest.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ptest.ll
index 8ebf713a671f4..5235423c00d9a 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ptest.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ptest.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
 ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll
index bc0fc7c79391d..c34cae12516ed 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
 ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-sdiv-pow2.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-sdiv-pow2.ll
index df019ce2e0ad6..85ba964000234 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-sdiv-pow2.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-sdiv-pow2.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
 ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-shuffle.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-shuffle.ll
index b66e6d9013573..c7b2575266d65 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-shuffle.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-shuffle.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
 ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-stores.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-stores.ll
index a77ac7832e17c..b8779991dbb45 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-stores.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-stores.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
 ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-subvector.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-subvector.ll
index a9f4d92b1e6b6..d022999b856b1 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-subvector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-subvector.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
 ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc-stores.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc-stores.ll
index 30682751037fe..c0aa162b19b77 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc-stores.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc-stores.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
 ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc.ll
index bc046059f0bd5..77aaeeadcfc2f 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
 ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-vector-shuffle.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-vector-shuffle.ll
index 323f5f56a2c08..ea6123edc8b4c 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-vector-shuffle.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-vector-shuffle.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
 ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-test-register-mov.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-test-register-mov.ll
index 23adb1a4bc092..9c7a3d5046d0e 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-test-register-mov.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-test-register-mov.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
 ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 

>From fd116beff8fab54c5be36a6888e1e1f55d03fb31 Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen at arm.com>
Date: Thu, 20 Jun 2024 09:53:42 +0100
Subject: [PATCH 2/2] Address comments

---
 .../Target/AArch64/AArch64ISelLowering.cpp    |  56 ++---
 llvm/lib/Target/AArch64/AArch64Subtarget.h    |   2 +-
 .../Target/AArch64/AArch64TargetMachine.cpp   |   5 +-
 ...sve-streaming-mode-fixed-length-int-div.ll |   1 -
 ...sve-streaming-mode-fixed-length-int-mul.ll |   2 +-
 ...mode-fixed-length-masked-gather-scatter.ll | 223 ++++++------------
 6 files changed, 105 insertions(+), 184 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index a4fa25ffdd6ff..2a82e032c6415 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1430,8 +1430,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
       setOperationAction(ISD::SINT_TO_FP, VT, Custom);
       setOperationAction(ISD::FP_TO_UINT, VT, Custom);
       setOperationAction(ISD::FP_TO_SINT, VT, Custom);
-      setOperationAction(ISD::MGATHER, VT, Custom);
-      setOperationAction(ISD::MSCATTER, VT, Custom);
       setOperationAction(ISD::MLOAD, VT, Custom);
       setOperationAction(ISD::MUL, VT, Custom);
       setOperationAction(ISD::MULHS, VT, Custom);
@@ -1537,17 +1535,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
       setOperationAction(ISD::MSTORE, VT, Custom);
     }
 
-    // NEON doesn't support masked gathers/scatters, but SVE does.
-    if (Subtarget->isSVEAvailable()) {
-      for (auto VT :
-           {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, MVT::v1f64,
-            MVT::v2f64, MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
-            MVT::v2i32, MVT::v4i32, MVT::v1i64, MVT::v2i64}) {
-        setOperationAction(ISD::MGATHER, VT, Custom);
-        setOperationAction(ISD::MSCATTER, VT, Custom);
-      }
-    }
-
     // Firstly, exclude all scalable vector extending loads/truncating stores,
     // include both integer and floating scalable vector.
     for (MVT VT : MVT::scalable_vector_valuetypes()) {
@@ -1586,8 +1573,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
                     MVT::nxv4f32, MVT::nxv2f64}) {
       setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
       setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
-      setOperationAction(ISD::MGATHER, VT, Custom);
-      setOperationAction(ISD::MSCATTER, VT, Custom);
       setOperationAction(ISD::MLOAD, VT, Custom);
       setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
       setOperationAction(ISD::SELECT, VT, Custom);
@@ -1621,8 +1606,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
       setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
       setOperationAction(ISD::VECREDUCE_FMAXIMUM, VT, Custom);
       setOperationAction(ISD::VECREDUCE_FMINIMUM, VT, Custom);
-      if (Subtarget->isSVEAvailable())
-        setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
       setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
       setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom);
       setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom);
@@ -1660,8 +1643,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
 
     for (auto VT : {MVT::nxv2bf16, MVT::nxv4bf16, MVT::nxv8bf16}) {
       setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
-      setOperationAction(ISD::MGATHER, VT, Custom);
-      setOperationAction(ISD::MSCATTER, VT, Custom);
       setOperationAction(ISD::MLOAD, VT, Custom);
       setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
       setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
@@ -1685,18 +1666,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::MUL, MVT::v1i64, Custom);
     setOperationAction(ISD::MUL, MVT::v2i64, Custom);
 
-    if (Subtarget->isSVEAvailable()) {
-      // NEON doesn't support across-vector reductions, but SVE does.
-      for (auto VT :
-           {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, MVT::v2f64})
-        setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
-    }
-
-    // Histcnt is SVE2 only
-    if (Subtarget->hasSVE2() && Subtarget->isSVEAvailable())
-      setOperationAction(ISD::EXPERIMENTAL_VECTOR_HISTOGRAM, MVT::Other,
-                         Custom);
-
     // NOTE: Currently this has to happen after computeRegisterProperties rather
     // than the preferred option of combining it with the addRegisterClass call.
     if (Subtarget->useSVEForFixedLengthVectors()) {
@@ -1772,6 +1741,31 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
       setOperationAction(ISD::INTRINSIC_WO_CHAIN, VT, Custom);
   }
 
+  // Handle operations that are only available in non-streaming SVE mode.
+  if (Subtarget->isSVEAvailable()) {
+    for (auto VT : {MVT::nxv16i8,  MVT::nxv8i16, MVT::nxv4i32,  MVT::nxv2i64,
+                    MVT::nxv2f16,  MVT::nxv4f16, MVT::nxv8f16,  MVT::nxv2f32,
+                    MVT::nxv4f32,  MVT::nxv2f64, MVT::nxv2bf16, MVT::nxv4bf16,
+                    MVT::nxv8bf16, MVT::v4f16,   MVT::v8f16,    MVT::v2f32,
+                    MVT::v4f32,    MVT::v1f64,   MVT::v2f64,    MVT::v8i8,
+                    MVT::v16i8,    MVT::v4i16,   MVT::v8i16,    MVT::v2i32,
+                    MVT::v4i32,    MVT::v1i64,   MVT::v2i64}) {
+      setOperationAction(ISD::MGATHER, VT, Custom);
+      setOperationAction(ISD::MSCATTER, VT, Custom);
+    }
+
+    for (auto VT : {MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32,
+                    MVT::nxv4f32, MVT::nxv2f64, MVT::v4f16, MVT::v8f16,
+                    MVT::v2f32, MVT::v4f32, MVT::v2f64})
+      setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
+
+    // Histcnt is SVE2 only
+    if (Subtarget->hasSVE2())
+      setOperationAction(ISD::EXPERIMENTAL_VECTOR_HISTOGRAM, MVT::Other,
+                         Custom);
+  }
+
+
   if (Subtarget->hasMOPS() && Subtarget->hasMTE()) {
     // Only required for llvm.aarch64.mops.memset.tag
     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom);
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index 5e1a370778914..0c9352bda7599 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -188,7 +188,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
   /// Returns true if the target has access to either the full range of SVE instructions,
   /// or the streaming-compatible subset of SVE instructions.
   bool isSVEorStreamingSVEAvailable() const {
-    return hasSVE() || hasSMEFA64() || (hasSME() && isStreaming());
+    return hasSVE() || (hasSME() && isStreaming());
   }
 
   unsigned getMinVectorRegisterBitWidth() const {
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index f94fa037a42c4..8c924e7c937cd 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -420,9 +420,8 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
   bool IsStreaming = ForceStreaming ||
                      F.hasFnAttribute("aarch64_pstate_sm_enabled") ||
                      F.hasFnAttribute("aarch64_pstate_sm_body");
-  bool IsStreamingCompatible =
-      F.hasFnAttribute("aarch64_pstate_sm_compatible") ||
-      ForceStreamingCompatible;
+  bool IsStreamingCompatible = ForceStreamingCompatible ||
+                               F.hasFnAttribute("aarch64_pstate_sm_compatible");
 
   unsigned MinSVEVectorSize = 0;
   unsigned MaxSVEVectorSize = 0;
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll
index ecc44927526e8..516772b8ca664 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll
@@ -2,7 +2,6 @@
 ; RUN: llc -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s --check-prefixes=CHECK,SVE
 ; RUN: llc -mattr=+sve2 -force-streaming-compatible  < %s | FileCheck %s --check-prefixes=CHECK,SVE2
 ; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s --check-prefixes=CHECK,SVE2
-; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s --check-prefixes=NONEON-NOSVE
 ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mul.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mul.ll
index 05869281eb59f..319fa5c845827 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mul.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mul.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mattr=+sve2 -force-streaming-compatible  < %s | FileCheck %s --check-prefix=SVE2
-; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefix=SVE2
 ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 target triple = "aarch64-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-gather-scatter.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-gather-scatter.ll
index 8ca598902b94c..c8cea6ebabd48 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-gather-scatter.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-gather-scatter.ll
@@ -1,88 +1,51 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+sve < %s | FileCheck %s --check-prefix=SVE
-; RUN: llc -mattr=+sme < %s | FileCheck %s --check-prefix=SME
-; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-SVE-NOGATHER
-; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
+; RUN: llc -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
+; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 
 target triple = "aarch64-unknown-linux-gnu"
 
 define <2 x i64> @masked_gather_v2i64(ptr %a, ptr %b) vscale_range(2, 2) {
-; SVE-LABEL: masked_gather_v2i64:
-; SVE:       // %bb.0:
-; SVE-NEXT:    ldr q0, [x0]
-; SVE-NEXT:    ptrue p0.d, vl2
-; SVE-NEXT:    cmeq v0.2d, v0.2d, #0
-; SVE-NEXT:    cmpne p0.d, p0/z, z0.d, #0
-; SVE-NEXT:    ldr q0, [x1]
-; SVE-NEXT:    ld1d { z0.d }, p0/z, [z0.d]
-; SVE-NEXT:    // kill: def $q0 killed $q0 killed $z0
-; SVE-NEXT:    ret
-;
-; SME-LABEL: masked_gather_v2i64:
-; SME:       // %bb.0:
-; SME-NEXT:    ldr q0, [x0]
-; SME-NEXT:    adrp x8, .LCPI0_0
-; SME-NEXT:    ldr q1, [x8, :lo12:.LCPI0_0]
-; SME-NEXT:    cmeq v0.2d, v0.2d, #0
-; SME-NEXT:    and v0.16b, v0.16b, v1.16b
-; SME-NEXT:    ldr q1, [x1]
-; SME-NEXT:    addp d0, v0.2d
-; SME-NEXT:    fmov x8, d0
-; SME-NEXT:    // implicit-def: $q0
-; SME-NEXT:    tbnz w8, #0, .LBB0_3
-; SME-NEXT:  // %bb.1: // %else
-; SME-NEXT:    tbnz w8, #1, .LBB0_4
-; SME-NEXT:  .LBB0_2: // %else2
-; SME-NEXT:    ret
-; SME-NEXT:  .LBB0_3: // %cond.load
-; SME-NEXT:    fmov x9, d1
-; SME-NEXT:    ldr d0, [x9]
-; SME-NEXT:    tbz w8, #1, .LBB0_2
-; SME-NEXT:  .LBB0_4: // %cond.load1
-; SME-NEXT:    mov x8, v1.d[1]
-; SME-NEXT:    ld1 { v0.d }[1], [x8]
-; SME-NEXT:    ret
-;
-; NONEON-SVE-NOGATHER-LABEL: masked_gather_v2i64:
-; NONEON-SVE-NOGATHER:       // %bb.0:
-; NONEON-SVE-NOGATHER-NEXT:    sub sp, sp, #16
-; NONEON-SVE-NOGATHER-NEXT:    .cfi_def_cfa_offset 16
-; NONEON-SVE-NOGATHER-NEXT:    ptrue p0.d, vl2
-; NONEON-SVE-NOGATHER-NEXT:    ldr q0, [x0]
-; NONEON-SVE-NOGATHER-NEXT:    cmpeq p1.d, p0/z, z0.d, #0
-; NONEON-SVE-NOGATHER-NEXT:    index z0.d, #1, #1
-; NONEON-SVE-NOGATHER-NEXT:    mov z1.d, p1/z, #-1 // =0xffffffffffffffff
-; NONEON-SVE-NOGATHER-NEXT:    and z0.d, z1.d, z0.d
-; NONEON-SVE-NOGATHER-NEXT:    ldr q1, [x1]
-; NONEON-SVE-NOGATHER-NEXT:    uaddv d0, p0, z0.d
-; NONEON-SVE-NOGATHER-NEXT:    ptrue p0.d
-; NONEON-SVE-NOGATHER-NEXT:    fmov x8, d0
-; NONEON-SVE-NOGATHER-NEXT:    strb w8, [sp, #12]
-; NONEON-SVE-NOGATHER-NEXT:    and w8, w8, #0xff
-; NONEON-SVE-NOGATHER-NEXT:    tbz w8, #0, .LBB0_2
-; NONEON-SVE-NOGATHER-NEXT:  // %bb.1: // %cond.load
-; NONEON-SVE-NOGATHER-NEXT:    fmov x9, d1
-; NONEON-SVE-NOGATHER-NEXT:    ld1rd { z0.d }, p0/z, [x9]
-; NONEON-SVE-NOGATHER-NEXT:    tbnz w8, #1, .LBB0_3
-; NONEON-SVE-NOGATHER-NEXT:    b .LBB0_4
-; NONEON-SVE-NOGATHER-NEXT:  .LBB0_2:
-; NONEON-SVE-NOGATHER-NEXT:    adrp x9, .LCPI0_0
-; NONEON-SVE-NOGATHER-NEXT:    ldr q0, [x9, :lo12:.LCPI0_0]
-; NONEON-SVE-NOGATHER-NEXT:    tbz w8, #1, .LBB0_4
-; NONEON-SVE-NOGATHER-NEXT:  .LBB0_3: // %cond.load1
-; NONEON-SVE-NOGATHER-NEXT:    mov w8, #1 // =0x1
-; NONEON-SVE-NOGATHER-NEXT:    index z2.d, #0, #1
-; NONEON-SVE-NOGATHER-NEXT:    mov z1.d, z1.d[1]
-; NONEON-SVE-NOGATHER-NEXT:    mov z3.d, x8
-; NONEON-SVE-NOGATHER-NEXT:    fmov x8, d1
-; NONEON-SVE-NOGATHER-NEXT:    cmpeq p0.d, p0/z, z2.d, z3.d
-; NONEON-SVE-NOGATHER-NEXT:    ldr x8, [x8]
-; NONEON-SVE-NOGATHER-NEXT:    mov z0.d, p0/m, x8
-; NONEON-SVE-NOGATHER-NEXT:  .LBB0_4: // %else2
-; NONEON-SVE-NOGATHER-NEXT:    // kill: def $q0 killed $q0 killed $z0
-; NONEON-SVE-NOGATHER-NEXT:    add sp, sp, #16
-; NONEON-SVE-NOGATHER-NEXT:    ret
+; CHECK-LABEL: masked_gather_v2i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #16
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    ldr q0, [x0]
+; CHECK-NEXT:    cmpeq p1.d, p0/z, z0.d, #0
+; CHECK-NEXT:    index z0.d, #1, #1
+; CHECK-NEXT:    mov z1.d, p1/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    and z0.d, z1.d, z0.d
+; CHECK-NEXT:    ldr q1, [x1]
+; CHECK-NEXT:    uaddv d0, p0, z0.d
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fmov x8, d0
+; CHECK-NEXT:    strb w8, [sp, #12]
+; CHECK-NEXT:    and w8, w8, #0xff
+; CHECK-NEXT:    tbz w8, #0, .LBB0_2
+; CHECK-NEXT:  // %bb.1: // %cond.load
+; CHECK-NEXT:    fmov x9, d1
+; CHECK-NEXT:    ld1rd { z0.d }, p0/z, [x9]
+; CHECK-NEXT:    tbnz w8, #1, .LBB0_3
+; CHECK-NEXT:    b .LBB0_4
+; CHECK-NEXT:  .LBB0_2:
+; CHECK-NEXT:    adrp x9, .LCPI0_0
+; CHECK-NEXT:    ldr q0, [x9, :lo12:.LCPI0_0]
+; CHECK-NEXT:    tbz w8, #1, .LBB0_4
+; CHECK-NEXT:  .LBB0_3: // %cond.load1
+; CHECK-NEXT:    mov w8, #1 // =0x1
+; CHECK-NEXT:    index z2.d, #0, #1
+; CHECK-NEXT:    mov z1.d, z1.d[1]
+; CHECK-NEXT:    mov z3.d, x8
+; CHECK-NEXT:    fmov x8, d1
+; CHECK-NEXT:    cmpeq p0.d, p0/z, z2.d, z3.d
+; CHECK-NEXT:    ldr x8, [x8]
+; CHECK-NEXT:    mov z0.d, p0/m, x8
+; CHECK-NEXT:  .LBB0_4: // %else2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT:    add sp, sp, #16
+; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: masked_gather_v2i64:
 ; NONEON-NOSVE:       // %bb.0:
@@ -136,74 +99,40 @@ define <2 x i64> @masked_gather_v2i64(ptr %a, ptr %b) vscale_range(2, 2) {
 }
 
 define void @masked_scatter_v2i64(ptr %a, ptr %b) vscale_range(2, 2) {
-; SVE-LABEL: masked_scatter_v2i64:
-; SVE:       // %bb.0:
-; SVE-NEXT:    ldr q0, [x0]
-; SVE-NEXT:    ptrue p0.d, vl2
-; SVE-NEXT:    cmeq v1.2d, v0.2d, #0
-; SVE-NEXT:    cmpne p0.d, p0/z, z1.d, #0
-; SVE-NEXT:    ldr q1, [x1]
-; SVE-NEXT:    st1d { z0.d }, p0, [z1.d]
-; SVE-NEXT:    ret
-;
-; SME-LABEL: masked_scatter_v2i64:
-; SME:       // %bb.0:
-; SME-NEXT:    ldr q0, [x0]
-; SME-NEXT:    adrp x8, .LCPI1_0
-; SME-NEXT:    ldr q2, [x8, :lo12:.LCPI1_0]
-; SME-NEXT:    cmeq v1.2d, v0.2d, #0
-; SME-NEXT:    and v1.16b, v1.16b, v2.16b
-; SME-NEXT:    addp d2, v1.2d
-; SME-NEXT:    ldr q1, [x1]
-; SME-NEXT:    fmov x8, d2
-; SME-NEXT:    tbnz w8, #0, .LBB1_3
-; SME-NEXT:  // %bb.1: // %else
-; SME-NEXT:    tbnz w8, #1, .LBB1_4
-; SME-NEXT:  .LBB1_2: // %else2
-; SME-NEXT:    ret
-; SME-NEXT:  .LBB1_3: // %cond.store
-; SME-NEXT:    fmov x9, d1
-; SME-NEXT:    str d0, [x9]
-; SME-NEXT:    tbz w8, #1, .LBB1_2
-; SME-NEXT:  .LBB1_4: // %cond.store1
-; SME-NEXT:    mov x8, v1.d[1]
-; SME-NEXT:    st1 { v0.d }[1], [x8]
-; SME-NEXT:    ret
-;
-; NONEON-SVE-NOGATHER-LABEL: masked_scatter_v2i64:
-; NONEON-SVE-NOGATHER:       // %bb.0:
-; NONEON-SVE-NOGATHER-NEXT:    sub sp, sp, #16
-; NONEON-SVE-NOGATHER-NEXT:    .cfi_def_cfa_offset 16
-; NONEON-SVE-NOGATHER-NEXT:    ptrue p0.d, vl2
-; NONEON-SVE-NOGATHER-NEXT:    ldr q0, [x0]
-; NONEON-SVE-NOGATHER-NEXT:    index z1.d, #1, #1
-; NONEON-SVE-NOGATHER-NEXT:    cmpeq p1.d, p0/z, z0.d, #0
-; NONEON-SVE-NOGATHER-NEXT:    mov z2.d, p1/z, #-1 // =0xffffffffffffffff
-; NONEON-SVE-NOGATHER-NEXT:    and z1.d, z2.d, z1.d
-; NONEON-SVE-NOGATHER-NEXT:    uaddv d1, p0, z1.d
-; NONEON-SVE-NOGATHER-NEXT:    fmov x8, d1
-; NONEON-SVE-NOGATHER-NEXT:    ldr q1, [x1]
-; NONEON-SVE-NOGATHER-NEXT:    strb w8, [sp, #12]
-; NONEON-SVE-NOGATHER-NEXT:    and w8, w8, #0xff
-; NONEON-SVE-NOGATHER-NEXT:    tbnz w8, #0, .LBB1_3
-; NONEON-SVE-NOGATHER-NEXT:  // %bb.1: // %else
-; NONEON-SVE-NOGATHER-NEXT:    tbnz w8, #1, .LBB1_4
-; NONEON-SVE-NOGATHER-NEXT:  .LBB1_2: // %else2
-; NONEON-SVE-NOGATHER-NEXT:    add sp, sp, #16
-; NONEON-SVE-NOGATHER-NEXT:    ret
-; NONEON-SVE-NOGATHER-NEXT:  .LBB1_3: // %cond.store
-; NONEON-SVE-NOGATHER-NEXT:    fmov x9, d0
-; NONEON-SVE-NOGATHER-NEXT:    fmov x10, d1
-; NONEON-SVE-NOGATHER-NEXT:    str x9, [x10]
-; NONEON-SVE-NOGATHER-NEXT:    tbz w8, #1, .LBB1_2
-; NONEON-SVE-NOGATHER-NEXT:  .LBB1_4: // %cond.store1
-; NONEON-SVE-NOGATHER-NEXT:    mov z0.d, z0.d[1]
-; NONEON-SVE-NOGATHER-NEXT:    mov z1.d, z1.d[1]
-; NONEON-SVE-NOGATHER-NEXT:    fmov x8, d0
-; NONEON-SVE-NOGATHER-NEXT:    fmov x9, d1
-; NONEON-SVE-NOGATHER-NEXT:    str x8, [x9]
-; NONEON-SVE-NOGATHER-NEXT:    add sp, sp, #16
-; NONEON-SVE-NOGATHER-NEXT:    ret
+; CHECK-LABEL: masked_scatter_v2i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #16
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    ldr q0, [x0]
+; CHECK-NEXT:    index z1.d, #1, #1
+; CHECK-NEXT:    cmpeq p1.d, p0/z, z0.d, #0
+; CHECK-NEXT:    mov z2.d, p1/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    and z1.d, z2.d, z1.d
+; CHECK-NEXT:    uaddv d1, p0, z1.d
+; CHECK-NEXT:    fmov x8, d1
+; CHECK-NEXT:    ldr q1, [x1]
+; CHECK-NEXT:    strb w8, [sp, #12]
+; CHECK-NEXT:    and w8, w8, #0xff
+; CHECK-NEXT:    tbnz w8, #0, .LBB1_3
+; CHECK-NEXT:  // %bb.1: // %else
+; CHECK-NEXT:    tbnz w8, #1, .LBB1_4
+; CHECK-NEXT:  .LBB1_2: // %else2
+; CHECK-NEXT:    add sp, sp, #16
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB1_3: // %cond.store
+; CHECK-NEXT:    fmov x9, d0
+; CHECK-NEXT:    fmov x10, d1
+; CHECK-NEXT:    str x9, [x10]
+; CHECK-NEXT:    tbz w8, #1, .LBB1_2
+; CHECK-NEXT:  .LBB1_4: // %cond.store1
+; CHECK-NEXT:    mov z0.d, z0.d[1]
+; CHECK-NEXT:    mov z1.d, z1.d[1]
+; CHECK-NEXT:    fmov x8, d0
+; CHECK-NEXT:    fmov x9, d1
+; CHECK-NEXT:    str x8, [x9]
+; CHECK-NEXT:    add sp, sp, #16
+; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: masked_scatter_v2i64:
 ; NONEON-NOSVE:       // %bb.0:



More information about the llvm-commits mailing list