[llvm] 93831c7 - [AArch64] Let patterns for NEON instructions check runtime mode. (#95560)

via llvm-commits llvm-commits at lists.llvm.org
Wed Jun 19 06:24:32 PDT 2024


Author: Sander de Smalen
Date: 2024-06-19T14:24:28+01:00
New Revision: 93831c73ea51dcf4dc1832a4ea5616b819d36f31

URL: https://github.com/llvm/llvm-project/commit/93831c73ea51dcf4dc1832a4ea5616b819d36f31
DIFF: https://github.com/llvm/llvm-project/commit/93831c73ea51dcf4dc1832a4ea5616b819d36f31.diff

LOG: [AArch64] Let patterns for NEON instructions check runtime mode. (#95560)

This helps identify any failures where the compiler might otherwise
silently emit instructions that are not valid for the given runtime mode.

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/lib/Target/AArch64/AArch64InstrFormats.td
    llvm/lib/Target/AArch64/AArch64InstrInfo.td
    llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-load.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-store.ll

Removed: 
    llvm/test/MC/AArch64/SME/streaming-mode-neon-bf16.s
    llvm/test/MC/AArch64/SME/streaming-mode-neon.s


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 9f6f66e9e0c70..0f0606c49a570 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -22368,7 +22368,8 @@ static SDValue vectorToScalarBitmask(SDNode *N, SelectionDAG &DAG) {
   ComparisonResult = DAG.getSExtOrTrunc(ComparisonResult, DL, VecVT);
 
   SmallVector<SDValue, 16> MaskConstants;
-  if (VecVT == MVT::v16i8) {
+  if (DAG.getSubtarget<AArch64Subtarget>().isNeonAvailable() &&
+      VecVT == MVT::v16i8) {
     // v16i8 is a special case, as we have 16 entries but only 8 positional bits
     // per entry. We split it into two halves, apply the mask, zip the halves to
     // create 8x 16-bit values, and the perform the vector reduce.

diff  --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 17d011086634c..e1ecc5a57dd26 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -7602,13 +7602,12 @@ multiclass SIMDTwoScalarD<bit U, bits<5> opc, string asm,
 }
 
 let mayRaiseFPException = 1, Uses = [FPCR] in
-multiclass SIMDFPTwoScalar<bit U, bit S, bits<5> opc, string asm,
-                           Predicate pred = HasNEON> {
-  let Predicates = [pred] in {
+multiclass SIMDFPTwoScalar<bit U, bit S, bits<5> opc, string asm> {
+  let Predicates = [HasNEONandIsStreamingSafe] in {
   def v1i64       : BaseSIMDTwoScalar<U, {S,1}, 0b00, opc, FPR64, FPR64, asm,[]>;
   def v1i32       : BaseSIMDTwoScalar<U, {S,0}, 0b00, opc, FPR32, FPR32, asm,[]>;
   }
-  let Predicates = [pred, HasFullFP16] in {
+  let Predicates = [HasNEONandIsStreamingSafe, HasFullFP16] in {
   def v1f16       : BaseSIMDTwoScalar<U, {S,1}, 0b11, opc, FPR16, FPR16, asm,[]>;
   }
 }
@@ -7616,11 +7615,13 @@ multiclass SIMDFPTwoScalar<bit U, bit S, bits<5> opc, string asm,
 let mayRaiseFPException = 1, Uses = [FPCR] in
 multiclass SIMDFPTwoScalarCVT<bit U, bit S, bits<5> opc, string asm,
                               SDPatternOperator OpNode> {
+  let Predicates = [HasNEONandIsStreamingSafe] in {
   def v1i64 : BaseSIMDTwoScalar<U, {S,1}, 0b00, opc, FPR64, FPR64, asm,
                                 [(set FPR64:$Rd, (OpNode (f64 FPR64:$Rn)))]>;
   def v1i32 : BaseSIMDTwoScalar<U, {S,0}, 0b00, opc, FPR32, FPR32, asm,
                                 [(set FPR32:$Rd, (OpNode (f32 FPR32:$Rn)))]>;
-  let Predicates = [HasNEON, HasFullFP16] in {
+  }
+  let Predicates = [HasNEONandIsStreamingSafe, HasFullFP16] in {
   def v1i16 : BaseSIMDTwoScalar<U, {S,1}, 0b11, opc, FPR16, FPR16, asm,
                                 [(set (f16 FPR16:$Rd), (OpNode (f16 FPR16:$Rn)))]>;
   }
@@ -7880,7 +7881,7 @@ class SIMDMovAlias<string asm, string size, Instruction inst,
 multiclass SMov {
   // SMOV with vector index of 0 are legal in Scalable Matrix Extension (SME)
   // streaming mode.
-  let Predicates = [HasNEONorSME] in {
+  let Predicates = [HasNEONandIsStreamingSafe] in {
     def vi8to32_idx0 : SIMDSMov<0, ".b", GPR32, VectorIndex0> {
       let Inst{20-16} = 0b00001;
     }
@@ -7927,7 +7928,7 @@ multiclass SMov {
 multiclass UMov {
   // UMOV with vector index of 0 are legal in Scalable Matrix Extension (SME)
   // streaming mode.
-  let Predicates = [HasNEONorSME] in {
+  let Predicates = [HasNEONandIsStreamingSafe] in {
     def vi8_idx0 : SIMDUMov<0, ".b", v16i8, GPR32, VectorIndex0> {
       let Inst{20-16} = 0b00001;
     }

diff  --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 91e5bc3caa102..6afee9bd388a6 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -107,7 +107,7 @@ def HasRCPC_IMMO      : Predicate<"Subtarget->hasRCPC_IMMO()">,
 
 def HasFPARMv8       : Predicate<"Subtarget->hasFPARMv8()">,
                                AssemblerPredicateWithAll<(all_of FeatureFPARMv8), "fp-armv8">;
-def HasNEON          : Predicate<"Subtarget->hasNEON()">,
+def HasNEON          : Predicate<"Subtarget->isNeonAvailable()">,
                                  AssemblerPredicateWithAll<(all_of FeatureNEON), "neon">;
 def HasSM4           : Predicate<"Subtarget->hasSM4()">,
                                  AssemblerPredicateWithAll<(all_of FeatureSM4), "sm4">;
@@ -235,11 +235,10 @@ def HasSMEF16F16orSMEF8F16
                 "sme-f16f16 or sme-f8f16">;
 
 // A subset of NEON instructions are legal in Streaming SVE execution mode,
-// they should be enabled if either has been specified.
-def HasNEONorSME
-    : Predicate<"Subtarget->hasNEON() || Subtarget->hasSME()">,
-                AssemblerPredicateWithAll<(any_of FeatureNEON, FeatureSME),
-                "neon or sme">;
+// so don't need the additional check for 'isNeonAvailable'.
+def HasNEONandIsStreamingSafe
+    : Predicate<"Subtarget->hasNEON()">,
+      AssemblerPredicateWithAll<(any_of FeatureNEON), "neon">;
 def HasRCPC          : Predicate<"Subtarget->hasRCPC()">,
                                  AssemblerPredicateWithAll<(all_of FeatureRCPC), "rcpc">;
 def HasAltNZCV       : Predicate<"Subtarget->hasAlternativeNZCV()">,
@@ -323,8 +322,6 @@ def NoUseScalarIncVL : Predicate<"!Subtarget->useScalarIncVL()">;
 
 def UseSVEFPLD1R : Predicate<"!Subtarget->noSVEFPLD1R()">;
 
-def IsNeonAvailable : Predicate<"Subtarget->isNeonAvailable()">;
-
 def AArch64LocalRecover : SDNode<"ISD::LOCAL_RECOVER",
                                   SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
                                                        SDTCisInt<1>]>>;
@@ -1350,7 +1347,7 @@ def : Pat<(v2f32 (int_aarch64_neon_bfdot
                              VectorIndexS:$idx)>;
 }
 
-let Predicates = [HasNEONorSME, HasBF16] in {
+let Predicates = [HasNEONandIsStreamingSafe, HasBF16] in {
 def BFCVT : BF16ToSinglePrecision<"bfcvt">;
 // Round FP32 to BF16.
 def : Pat<(bf16 (any_fpround (f32 FPR32:$Rn))), (BFCVT $Rn)>;
@@ -5789,9 +5786,9 @@ defm FACGT    : SIMDThreeScalarFPCmp<1, 1, 0b101, "facgt",
 defm FCMEQ    : SIMDThreeScalarFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>;
 defm FCMGE    : SIMDThreeScalarFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>;
 defm FCMGT    : SIMDThreeScalarFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>;
-defm FMULX    : SIMDFPThreeScalar<0, 0, 0b011, "fmulx", int_aarch64_neon_fmulx, HasNEONorSME>;
-defm FRECPS   : SIMDFPThreeScalar<0, 0, 0b111, "frecps", int_aarch64_neon_frecps, HasNEONorSME>;
-defm FRSQRTS  : SIMDFPThreeScalar<0, 1, 0b111, "frsqrts", int_aarch64_neon_frsqrts, HasNEONorSME>;
+defm FMULX    : SIMDFPThreeScalar<0, 0, 0b011, "fmulx", int_aarch64_neon_fmulx, HasNEONandIsStreamingSafe>;
+defm FRECPS   : SIMDFPThreeScalar<0, 0, 0b111, "frecps", int_aarch64_neon_frecps, HasNEONandIsStreamingSafe>;
+defm FRSQRTS  : SIMDFPThreeScalar<0, 1, 0b111, "frsqrts", int_aarch64_neon_frsqrts, HasNEONandIsStreamingSafe>;
 defm SQADD    : SIMDThreeScalarBHSD<0, 0b00001, "sqadd", int_aarch64_neon_sqadd>;
 defm SQDMULH  : SIMDThreeScalarHS<  0, 0b10110, "sqdmulh", int_aarch64_neon_sqdmulh>;
 defm SQRDMULH : SIMDThreeScalarHS<  1, 0b10110, "sqrdmulh", int_aarch64_neon_sqrdmulh>;
@@ -5820,7 +5817,7 @@ let Predicates = [HasRDM] in {
 
 defm : FMULScalarFromIndexedLane0Patterns<"FMULX", "16", "32", "64",
                                           int_aarch64_neon_fmulx,
-                                          [HasNEONorSME]>;
+                                          [HasNEONandIsStreamingSafe]>;
 
 let Predicates = [HasNEON] in {
 def : InstAlias<"cmls $dst, $src1, $src2",
@@ -5894,9 +5891,9 @@ defm FCVTPU : SIMDFPTwoScalar<   1, 1, 0b11010, "fcvtpu">;
 def  FCVTXNv1i64 : SIMDInexactCvtTwoScalar<0b10110, "fcvtxn">;
 defm FCVTZS : SIMDFPTwoScalar<   0, 1, 0b11011, "fcvtzs">;
 defm FCVTZU : SIMDFPTwoScalar<   1, 1, 0b11011, "fcvtzu">;
-defm FRECPE : SIMDFPTwoScalar<   0, 1, 0b11101, "frecpe", HasNEONorSME>;
-defm FRECPX : SIMDFPTwoScalar<   0, 1, 0b11111, "frecpx", HasNEONorSME>;
-defm FRSQRTE : SIMDFPTwoScalar<  1, 1, 0b11101, "frsqrte", HasNEONorSME>;
+defm FRECPE : SIMDFPTwoScalar<   0, 1, 0b11101, "frecpe">;
+defm FRECPX : SIMDFPTwoScalar<   0, 1, 0b11111, "frecpx">;
+defm FRSQRTE : SIMDFPTwoScalar<  1, 1, 0b11101, "frsqrte">;
 defm NEG    : SIMDTwoScalarD<    1, 0b01011, "neg",
                                  UnOpFrag<(sub immAllZerosV, node:$LHS)> >;
 defm SCVTF  : SIMDFPTwoScalarCVT<   0, 0, 0b11101, "scvtf", AArch64sitof>;
@@ -5915,7 +5912,7 @@ def : Pat<(v1i64 (AArch64vashr (v1i64 V64:$Rn), (i32 63))),
           (CMLTv1i64rz V64:$Rn)>;
 
 // Round FP64 to BF16.
-let Predicates = [HasNEONorSME, HasBF16] in
+let Predicates = [HasNEONandIsStreamingSafe, HasBF16] in
 def : Pat<(bf16 (any_fpround (f64 FPR64:$Rn))),
           (BFCVT (FCVTXNv1i64 $Rn))>;
 
@@ -6016,7 +6013,7 @@ def : Pat<(v2f64 (AArch64frsqrts (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))),
 // Some float -> int -> float conversion patterns for which we want to keep the
 // int values in FP registers using the corresponding NEON instructions to
 // avoid more costly int <-> fp register transfers.
-let Predicates = [HasNEON] in {
+let Predicates = [HasNEONandIsStreamingSafe] in {
 def : Pat<(f64 (any_sint_to_fp (i64 (any_fp_to_sint f64:$Rn)))),
           (SCVTFv1i64 (i64 (FCVTZSv1i64 f64:$Rn)))>;
 def : Pat<(f32 (any_sint_to_fp (i32 (any_fp_to_sint f32:$Rn)))),
@@ -6026,7 +6023,7 @@ def : Pat<(f64 (any_uint_to_fp (i64 (any_fp_to_uint f64:$Rn)))),
 def : Pat<(f32 (any_uint_to_fp (i32 (any_fp_to_uint f32:$Rn)))),
           (UCVTFv1i32 (i32 (FCVTZUv1i32 f32:$Rn)))>;
 
-let Predicates = [HasFullFP16] in {
+let Predicates = [HasNEONandIsStreamingSafe, HasFullFP16] in {
 def : Pat<(f16 (any_sint_to_fp (i32 (any_fp_to_sint f16:$Rn)))),
           (SCVTFv1i16 (f16 (FCVTZSv1f16 f16:$Rn)))>;
 def : Pat<(f16 (any_uint_to_fp (i32 (any_fp_to_uint f16:$Rn)))),
@@ -6118,7 +6115,7 @@ def : Pat <(f64 (uint_to_fp (i32
                           (LDURSi GPR64sp:$Rn, simm9:$offset), ssub))>;
 // 64-bits -> double are handled in target specific dag combine:
 // performIntToFpCombine.
-} // let Predicates = [HasNEON]
+} // let Predicates = [HasNEONandIsStreamingSafe]
 
 //===----------------------------------------------------------------------===//
 // Advanced SIMD three 
diff erent-sized vector instructions.
@@ -8379,7 +8376,7 @@ def : Ld1Lane64IdxOpPat<extloadi8, VectorIndexH, v4i16, i32, LD1i8, VectorIndexH
 
 // Same as above, but the first element is populated using
 // scalar_to_vector + insert_subvector instead of insert_vector_elt.
-let Predicates = [IsNeonAvailable] in {
+let Predicates = [HasNEON] in {
   class Ld1Lane128FirstElm<ValueType ResultTy, ValueType VecTy,
                           SDPatternOperator ExtLoad, Instruction LD1>
     : Pat<(ResultTy (scalar_to_vector (i32 (ExtLoad GPR64sp:$Rn)))),

diff  --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index bd5de628d8529..a3c41f2e052cd 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -3351,7 +3351,7 @@ let Predicates = [HasSVEorSME] in {
             (EXTRACT_SUBREG (DUP_ZZI_D ZPR:$vec, sve_elm_idx_extdup_d:$index), dsub)>;
 
   // Extract element from vector with immediate index that's within the bottom 128-bits.
-  let Predicates = [IsNeonAvailable], AddedComplexity = 1 in {
+  let Predicates = [HasNEON], AddedComplexity = 1 in {
   def : Pat<(i32 (vector_extract nxv16i8:$vec, VectorIndexB:$index)),
             (UMOVvi8 (v16i8 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexB:$index)>;
   def : Pat<(i32 (vector_extract nxv8i16:$vec, VectorIndexH:$index)),
@@ -3360,9 +3360,9 @@ let Predicates = [HasSVEorSME] in {
             (UMOVvi32 (v4i32 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexS:$index)>;
   def : Pat<(i64 (vector_extract nxv2i64:$vec, VectorIndexD:$index)),
             (UMOVvi64 (v2i64 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexD:$index)>;
-  } // End IsNeonAvailable
+  } // End HasNEON
 
-  let Predicates = [IsNeonAvailable] in {
+  let Predicates = [HasNEON] in {
   def : Pat<(sext_inreg (vector_extract nxv16i8:$vec, VectorIndexB:$index), i8),
             (SMOVvi8to32 (v16i8 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexB:$index)>;
   def : Pat<(sext_inreg (anyext (i32 (vector_extract nxv16i8:$vec, VectorIndexB:$index))), i8),
@@ -3375,7 +3375,7 @@ let Predicates = [HasSVEorSME] in {
 
   def : Pat<(sext (i32 (vector_extract nxv4i32:$vec, VectorIndexS:$index))),
             (SMOVvi32to64 (v4i32 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexS:$index)>;
-  } // End IsNeonAvailable
+  } // End HasNEON
 
   // Extract first element from vector.
   let AddedComplexity = 2 in {

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-load.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-load.ll
index a689a539b0082..5f4b9dd1592cf 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-load.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-load.ll
@@ -315,92 +315,40 @@ define <16 x i8> @masked_load_v16i8(ptr %src, <16 x i1> %mask) {
 ; NONEON-NOSVE-LABEL: masked_load_v16i8:
 ; NONEON-NOSVE:       // %bb.0:
 ; NONEON-NOSVE-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; NONEON-NOSVE-NEXT:    sub sp, sp, #1024
-; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 1040
+; NONEON-NOSVE-NEXT:    sub sp, sp, #992
+; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 1008
 ; NONEON-NOSVE-NEXT:    .cfi_offset w29, -16
 ; NONEON-NOSVE-NEXT:    str q0, [sp, #976]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #984]
-; NONEON-NOSVE-NEXT:    and w8, w8, #0x1
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #1000]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #976]
-; NONEON-NOSVE-NEXT:    and w8, w8, #0x1
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #992]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #991]
-; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
-; NONEON-NOSVE-NEXT:    and w8, w8, #0x80
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #1007]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #990]
-; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
-; NONEON-NOSVE-NEXT:    and w8, w8, #0x40
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #1006]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #989]
-; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
-; NONEON-NOSVE-NEXT:    and w8, w8, #0x20
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #1005]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #988]
-; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
-; NONEON-NOSVE-NEXT:    and w8, w8, #0x10
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #1004]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #987]
-; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
-; NONEON-NOSVE-NEXT:    and w8, w8, #0x8
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #1003]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #986]
-; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
-; NONEON-NOSVE-NEXT:    and w8, w8, #0x4
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #1002]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #985]
-; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
-; NONEON-NOSVE-NEXT:    and w8, w8, #0x2
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #1001]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #983]
-; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
-; NONEON-NOSVE-NEXT:    and w8, w8, #0x80
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #999]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #982]
-; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
-; NONEON-NOSVE-NEXT:    and w8, w8, #0x40
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #998]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #981]
-; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
-; NONEON-NOSVE-NEXT:    and w8, w8, #0x20
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #997]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #980]
-; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
-; NONEON-NOSVE-NEXT:    and w8, w8, #0x10
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #996]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #979]
-; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
-; NONEON-NOSVE-NEXT:    and w8, w8, #0x8
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #995]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #978]
+; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #979]
+; NONEON-NOSVE-NEXT:    ldrb w10, [sp, #977]
+; NONEON-NOSVE-NEXT:    ldrb w12, [sp, #980]
+; NONEON-NOSVE-NEXT:    ldrb w13, [sp, #981]
+; NONEON-NOSVE-NEXT:    ldrb w14, [sp, #982]
 ; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
+; NONEON-NOSVE-NEXT:    sbfx w9, w9, #0, #1
+; NONEON-NOSVE-NEXT:    sbfx w10, w10, #0, #1
+; NONEON-NOSVE-NEXT:    ldrb w11, [sp, #976]
+; NONEON-NOSVE-NEXT:    sbfx w12, w12, #0, #1
+; NONEON-NOSVE-NEXT:    sbfx w13, w13, #0, #1
 ; NONEON-NOSVE-NEXT:    and w8, w8, #0x4
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #994]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #977]
-; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
-; NONEON-NOSVE-NEXT:    and w8, w8, #0x2
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #993]
-; NONEON-NOSVE-NEXT:    ldr q0, [sp, #992]
-; NONEON-NOSVE-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; NONEON-NOSVE-NEXT:    zip1 v0.16b, v0.16b, v1.16b
-; NONEON-NOSVE-NEXT:    str q0, [sp, #1008]
-; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #1010]
-; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #1008]
-; NONEON-NOSVE-NEXT:    ldrh w10, [sp, #1012]
-; NONEON-NOSVE-NEXT:    ldrh w11, [sp, #1014]
-; NONEON-NOSVE-NEXT:    ldrh w12, [sp, #1016]
-; NONEON-NOSVE-NEXT:    ldrh w13, [sp, #1018]
-; NONEON-NOSVE-NEXT:    ldrh w14, [sp, #1020]
-; NONEON-NOSVE-NEXT:    add w8, w9, w8
-; NONEON-NOSVE-NEXT:    add w9, w10, w11
-; NONEON-NOSVE-NEXT:    add w10, w12, w13
-; NONEON-NOSVE-NEXT:    add w8, w8, w9
-; NONEON-NOSVE-NEXT:    add w9, w10, w14
-; NONEON-NOSVE-NEXT:    ldrh w10, [sp, #1022]
+; NONEON-NOSVE-NEXT:    and w9, w9, #0x8
+; NONEON-NOSVE-NEXT:    sbfx w14, w14, #0, #1
+; NONEON-NOSVE-NEXT:    orr w8, w8, w9
+; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #983]
+; NONEON-NOSVE-NEXT:    and w10, w10, #0x2
+; NONEON-NOSVE-NEXT:    and w12, w12, #0x10
+; NONEON-NOSVE-NEXT:    bfxil w10, w11, #0, #1
+; NONEON-NOSVE-NEXT:    and w11, w13, #0x20
+; NONEON-NOSVE-NEXT:    orr w8, w8, w12
+; NONEON-NOSVE-NEXT:    and w12, w14, #0x40
+; NONEON-NOSVE-NEXT:    sbfx w9, w9, #0, #1
+; NONEON-NOSVE-NEXT:    orr w8, w10, w8
+; NONEON-NOSVE-NEXT:    orr w10, w11, w12
+; NONEON-NOSVE-NEXT:    orr w8, w8, w10
+; NONEON-NOSVE-NEXT:    and w9, w9, #0xffffff80
 ; NONEON-NOSVE-NEXT:    add w8, w8, w9
 ; NONEON-NOSVE-NEXT:    add x9, sp, #720
-; NONEON-NOSVE-NEXT:    add w8, w8, w10
 ; NONEON-NOSVE-NEXT:    tbz w8, #0, .LBB2_2
 ; NONEON-NOSVE-NEXT:  // %bb.1: // %cond.load
 ; NONEON-NOSVE-NEXT:    ldrb w10, [x0]
@@ -481,7 +429,7 @@ define <16 x i8> @masked_load_v16i8(ptr %src, <16 x i1> %mask) {
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #47]
 ; NONEON-NOSVE-NEXT:    ldr q0, [sp, #32]
 ; NONEON-NOSVE-NEXT:  .LBB2_19: // %else44
-; NONEON-NOSVE-NEXT:    add sp, sp, #1024
+; NONEON-NOSVE-NEXT:    add sp, sp, #992
 ; NONEON-NOSVE-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
 ; NONEON-NOSVE-NEXT:    ret
 ; NONEON-NOSVE-NEXT:  .LBB2_20: // %cond.load4
@@ -806,166 +754,62 @@ define <32 x i8> @masked_load_v32i8(ptr %src, <32 x i1> %mask) {
 ; NONEON-NOSVE-LABEL: masked_load_v32i8:
 ; NONEON-NOSVE:       // %bb.0:
 ; NONEON-NOSVE-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; NONEON-NOSVE-NEXT:    sub sp, sp, #2064
-; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 2080
+; NONEON-NOSVE-NEXT:    sub sp, sp, #2000
+; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 2016
 ; NONEON-NOSVE-NEXT:    .cfi_offset w29, -16
-; NONEON-NOSVE-NEXT:    ldr w8, [sp, #2216]
-; NONEON-NOSVE-NEXT:    ldr w9, [sp, #2152]
-; NONEON-NOSVE-NEXT:    ldr w10, [sp, #2272]
-; NONEON-NOSVE-NEXT:    ldr w11, [sp, #2176]
-; NONEON-NOSVE-NEXT:    ldr w12, [sp, #2160]
-; NONEON-NOSVE-NEXT:    and w8, w8, #0x1
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #2024]
-; NONEON-NOSVE-NEXT:    and w8, w9, #0x1
-; NONEON-NOSVE-NEXT:    sbfx w9, w10, #0, #1
-; NONEON-NOSVE-NEXT:    ldr w10, [sp, #2264]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #2016]
-; NONEON-NOSVE-NEXT:    sbfx w11, w11, #0, #1
-; NONEON-NOSVE-NEXT:    and w8, w9, #0x80
-; NONEON-NOSVE-NEXT:    sbfx w9, w10, #0, #1
-; NONEON-NOSVE-NEXT:    ldr w10, [sp, #2256]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #2031]
-; NONEON-NOSVE-NEXT:    and w8, w9, #0x40
-; NONEON-NOSVE-NEXT:    sbfx w9, w10, #0, #1
-; NONEON-NOSVE-NEXT:    ldr w10, [sp, #2248]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #2030]
-; NONEON-NOSVE-NEXT:    and w8, w9, #0x20
-; NONEON-NOSVE-NEXT:    sbfx w9, w10, #0, #1
-; NONEON-NOSVE-NEXT:    ldr w10, [sp, #2240]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #2029]
-; NONEON-NOSVE-NEXT:    and w8, w9, #0x10
-; NONEON-NOSVE-NEXT:    sbfx w9, w10, #0, #1
-; NONEON-NOSVE-NEXT:    ldr w10, [sp, #2232]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #2028]
-; NONEON-NOSVE-NEXT:    and w8, w9, #0x8
-; NONEON-NOSVE-NEXT:    sbfx w9, w10, #0, #1
-; NONEON-NOSVE-NEXT:    ldr w10, [sp, #2224]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #2027]
-; NONEON-NOSVE-NEXT:    and w8, w9, #0x4
-; NONEON-NOSVE-NEXT:    sbfx w9, w10, #0, #1
-; NONEON-NOSVE-NEXT:    ldr w10, [sp, #2208]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #2026]
-; NONEON-NOSVE-NEXT:    and w8, w9, #0x2
-; NONEON-NOSVE-NEXT:    sbfx w9, w10, #0, #1
-; NONEON-NOSVE-NEXT:    ldr w10, [sp, #2200]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #2025]
-; NONEON-NOSVE-NEXT:    and w8, w9, #0x80
-; NONEON-NOSVE-NEXT:    sbfx w9, w10, #0, #1
-; NONEON-NOSVE-NEXT:    ldr w10, [sp, #2192]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #2023]
-; NONEON-NOSVE-NEXT:    ldr w8, [sp, #2184]
-; NONEON-NOSVE-NEXT:    and w9, w9, #0x40
-; NONEON-NOSVE-NEXT:    sbfx w10, w10, #0, #1
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #2022]
+; NONEON-NOSVE-NEXT:    ldr w8, [sp, #2096]
+; NONEON-NOSVE-NEXT:    ldr w9, [sp, #2104]
+; NONEON-NOSVE-NEXT:    sbfx w15, w7, #0, #1
+; NONEON-NOSVE-NEXT:    ldr w10, [sp, #2112]
+; NONEON-NOSVE-NEXT:    ldr w11, [sp, #2088]
+; NONEON-NOSVE-NEXT:    ldr w12, [sp, #2120]
 ; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
-; NONEON-NOSVE-NEXT:    ldr w9, [sp, #2168]
-; NONEON-NOSVE-NEXT:    and w10, w10, #0x20
-; NONEON-NOSVE-NEXT:    and w8, w8, #0x10
 ; NONEON-NOSVE-NEXT:    sbfx w9, w9, #0, #1
-; NONEON-NOSVE-NEXT:    strb w10, [sp, #2021]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #2020]
-; NONEON-NOSVE-NEXT:    and w8, w11, #0x8
-; NONEON-NOSVE-NEXT:    sbfx w10, w12, #0, #1
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #2019]
-; NONEON-NOSVE-NEXT:    and w8, w9, #0x4
-; NONEON-NOSVE-NEXT:    ldr w9, [sp, #2088]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #2018]
-; NONEON-NOSVE-NEXT:    and w8, w10, #0x2
-; NONEON-NOSVE-NEXT:    ldr w10, [sp, #2136]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #2017]
-; NONEON-NOSVE-NEXT:    and w8, w9, #0x1
-; NONEON-NOSVE-NEXT:    ldr w9, [sp, #2144]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #2008]
-; NONEON-NOSVE-NEXT:    and w8, w1, #0x1
-; NONEON-NOSVE-NEXT:    ldr w11, [sp, #2104]
-; NONEON-NOSVE-NEXT:    sbfx w9, w9, #0, #1
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #2000]
-; NONEON-NOSVE-NEXT:    ldr w12, [sp, #2080]
-; NONEON-NOSVE-NEXT:    sbfx w11, w11, #0, #1
-; NONEON-NOSVE-NEXT:    ldr q0, [sp, #2016]
-; NONEON-NOSVE-NEXT:    and w8, w9, #0x80
-; NONEON-NOSVE-NEXT:    sbfx w9, w10, #0, #1
-; NONEON-NOSVE-NEXT:    ldr w10, [sp, #2128]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #2015]
-; NONEON-NOSVE-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; NONEON-NOSVE-NEXT:    and w8, w9, #0x40
-; NONEON-NOSVE-NEXT:    sbfx w9, w10, #0, #1
-; NONEON-NOSVE-NEXT:    ldr w10, [sp, #2120]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #2014]
-; NONEON-NOSVE-NEXT:    ldr w8, [sp, #2112]
-; NONEON-NOSVE-NEXT:    and w9, w9, #0x20
+; NONEON-NOSVE-NEXT:    ldr w13, [sp, #2136]
 ; NONEON-NOSVE-NEXT:    sbfx w10, w10, #0, #1
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #2013]
-; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
-; NONEON-NOSVE-NEXT:    ldr w9, [sp, #2096]
-; NONEON-NOSVE-NEXT:    and w10, w10, #0x10
-; NONEON-NOSVE-NEXT:    zip1 v0.16b, v0.16b, v1.16b
-; NONEON-NOSVE-NEXT:    and w8, w8, #0x8
-; NONEON-NOSVE-NEXT:    sbfx w9, w9, #0, #1
-; NONEON-NOSVE-NEXT:    strb w10, [sp, #2012]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #2011]
-; NONEON-NOSVE-NEXT:    and w8, w11, #0x4
-; NONEON-NOSVE-NEXT:    sbfx w10, w12, #0, #1
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #2010]
-; NONEON-NOSVE-NEXT:    and w8, w9, #0x2
-; NONEON-NOSVE-NEXT:    sbfx w9, w7, #0, #1
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #2009]
-; NONEON-NOSVE-NEXT:    and w8, w10, #0x80
-; NONEON-NOSVE-NEXT:    sbfx w10, w6, #0, #1
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #2007]
-; NONEON-NOSVE-NEXT:    and w8, w9, #0x40
-; NONEON-NOSVE-NEXT:    sbfx w9, w5, #0, #1
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #2006]
-; NONEON-NOSVE-NEXT:    and w8, w10, #0x20
-; NONEON-NOSVE-NEXT:    sbfx w10, w4, #0, #1
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #2005]
-; NONEON-NOSVE-NEXT:    and w8, w9, #0x10
-; NONEON-NOSVE-NEXT:    sbfx w9, w3, #0, #1
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #2004]
-; NONEON-NOSVE-NEXT:    and w8, w10, #0x8
+; NONEON-NOSVE-NEXT:    ldr w14, [sp, #2144]
+; NONEON-NOSVE-NEXT:    ldr w16, [sp, #2016]
+; NONEON-NOSVE-NEXT:    and w8, w8, #0x2
+; NONEON-NOSVE-NEXT:    and w9, w9, #0x4
+; NONEON-NOSVE-NEXT:    bfxil w8, w11, #0, #1
+; NONEON-NOSVE-NEXT:    sbfx w11, w12, #0, #1
+; NONEON-NOSVE-NEXT:    ldr w12, [sp, #2128]
+; NONEON-NOSVE-NEXT:    and w10, w10, #0x8
+; NONEON-NOSVE-NEXT:    sbfx w14, w14, #0, #1
+; NONEON-NOSVE-NEXT:    orr w9, w9, w10
+; NONEON-NOSVE-NEXT:    and w10, w11, #0x10
+; NONEON-NOSVE-NEXT:    sbfx w11, w12, #0, #1
+; NONEON-NOSVE-NEXT:    orr w9, w9, w10
+; NONEON-NOSVE-NEXT:    sbfx w10, w13, #0, #1
+; NONEON-NOSVE-NEXT:    sbfx w12, w4, #0, #1
+; NONEON-NOSVE-NEXT:    orr w8, w8, w9
+; NONEON-NOSVE-NEXT:    and w9, w11, #0x20
+; NONEON-NOSVE-NEXT:    sbfx w11, w3, #0, #1
+; NONEON-NOSVE-NEXT:    and w10, w10, #0x40
+; NONEON-NOSVE-NEXT:    sbfx w13, w5, #0, #1
+; NONEON-NOSVE-NEXT:    and w12, w12, #0x8
+; NONEON-NOSVE-NEXT:    orr w9, w9, w10
 ; NONEON-NOSVE-NEXT:    sbfx w10, w2, #0, #1
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #2003]
-; NONEON-NOSVE-NEXT:    and w8, w9, #0x4
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #2002]
-; NONEON-NOSVE-NEXT:    and w8, w10, #0x2
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #2001]
-; NONEON-NOSVE-NEXT:    str q0, [sp, #2048]
-; NONEON-NOSVE-NEXT:    ldr q0, [sp, #2000]
-; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #2050]
-; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #2048]
-; NONEON-NOSVE-NEXT:    ldrh w10, [sp, #2052]
-; NONEON-NOSVE-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; NONEON-NOSVE-NEXT:    ldrh w11, [sp, #2054]
-; NONEON-NOSVE-NEXT:    ldrh w12, [sp, #2056]
-; NONEON-NOSVE-NEXT:    ldrh w13, [sp, #2058]
-; NONEON-NOSVE-NEXT:    add w8, w9, w8
-; NONEON-NOSVE-NEXT:    add w9, w10, w11
-; NONEON-NOSVE-NEXT:    ldrh w10, [sp, #2060]
-; NONEON-NOSVE-NEXT:    add w8, w8, w9
-; NONEON-NOSVE-NEXT:    add w9, w12, w13
-; NONEON-NOSVE-NEXT:    zip1 v0.16b, v0.16b, v1.16b
-; NONEON-NOSVE-NEXT:    add w9, w9, w10
-; NONEON-NOSVE-NEXT:    add w8, w8, w9
-; NONEON-NOSVE-NEXT:    str q0, [sp, #2032]
-; NONEON-NOSVE-NEXT:    ldrh w11, [sp, #2034]
-; NONEON-NOSVE-NEXT:    ldrh w12, [sp, #2032]
-; NONEON-NOSVE-NEXT:    ldrh w13, [sp, #2036]
-; NONEON-NOSVE-NEXT:    ldrh w14, [sp, #2038]
-; NONEON-NOSVE-NEXT:    ldrh w15, [sp, #2040]
-; NONEON-NOSVE-NEXT:    ldrh w16, [sp, #2042]
-; NONEON-NOSVE-NEXT:    add w10, w12, w11
-; NONEON-NOSVE-NEXT:    ldrh w11, [sp, #2044]
-; NONEON-NOSVE-NEXT:    ldrh w12, [sp, #2062]
-; NONEON-NOSVE-NEXT:    add w13, w13, w14
-; NONEON-NOSVE-NEXT:    add w14, w15, w16
-; NONEON-NOSVE-NEXT:    add w10, w10, w13
-; NONEON-NOSVE-NEXT:    add w11, w14, w11
-; NONEON-NOSVE-NEXT:    ldrh w13, [sp, #2046]
-; NONEON-NOSVE-NEXT:    add w9, w10, w11
-; NONEON-NOSVE-NEXT:    add w10, w8, w12
-; NONEON-NOSVE-NEXT:    add w8, w9, w13
+; NONEON-NOSVE-NEXT:    and w11, w11, #0x4
+; NONEON-NOSVE-NEXT:    orr w11, w11, w12
+; NONEON-NOSVE-NEXT:    and w12, w13, #0x10
+; NONEON-NOSVE-NEXT:    sbfx w13, w6, #0, #1
+; NONEON-NOSVE-NEXT:    and w10, w10, #0x2
+; NONEON-NOSVE-NEXT:    orr w11, w11, w12
+; NONEON-NOSVE-NEXT:    orr w8, w8, w9
+; NONEON-NOSVE-NEXT:    bfxil w10, w1, #0, #1
+; NONEON-NOSVE-NEXT:    and w12, w13, #0x20
+; NONEON-NOSVE-NEXT:    and w13, w15, #0x40
+; NONEON-NOSVE-NEXT:    sbfx w15, w16, #0, #1
+; NONEON-NOSVE-NEXT:    orr w9, w10, w11
+; NONEON-NOSVE-NEXT:    orr w10, w12, w13
+; NONEON-NOSVE-NEXT:    and w11, w14, #0xff80
+; NONEON-NOSVE-NEXT:    orr w9, w9, w10
+; NONEON-NOSVE-NEXT:    and w10, w15, #0xff80
+; NONEON-NOSVE-NEXT:    add w11, w8, w11
+; NONEON-NOSVE-NEXT:    add w8, w9, w10
 ; NONEON-NOSVE-NEXT:    adrp x9, .LCPI3_0
-; NONEON-NOSVE-NEXT:    bfi w8, w10, #16, #16
+; NONEON-NOSVE-NEXT:    bfi w8, w11, #16, #16
 ; NONEON-NOSVE-NEXT:    ldr q1, [x9, :lo12:.LCPI3_0]
 ; NONEON-NOSVE-NEXT:    add x9, sp, #1744
 ; NONEON-NOSVE-NEXT:    tbz w8, #0, .LBB3_2
@@ -1083,7 +927,7 @@ define <32 x i8> @masked_load_v32i8(ptr %src, <32 x i1> %mask) {
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #47]
 ; NONEON-NOSVE-NEXT:    ldr q1, [sp, #32]
 ; NONEON-NOSVE-NEXT:  .LBB3_35: // %else92
-; NONEON-NOSVE-NEXT:    add sp, sp, #2064
+; NONEON-NOSVE-NEXT:    add sp, sp, #2000
 ; NONEON-NOSVE-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
 ; NONEON-NOSVE-NEXT:    ret
 ; NONEON-NOSVE-NEXT:  .LBB3_36: // %cond.load4
@@ -1996,94 +1840,42 @@ define <16 x half> @masked_load_v16f16(ptr %src, <16 x i1> %mask) {
 ; NONEON-NOSVE-LABEL: masked_load_v16f16:
 ; NONEON-NOSVE:       // %bb.0:
 ; NONEON-NOSVE-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
-; NONEON-NOSVE-NEXT:    sub sp, sp, #1024
-; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 1040
+; NONEON-NOSVE-NEXT:    sub sp, sp, #992
+; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 1008
 ; NONEON-NOSVE-NEXT:    .cfi_offset w29, -16
 ; NONEON-NOSVE-NEXT:    str q0, [sp, #976]
 ; NONEON-NOSVE-NEXT:    adrp x9, .LCPI7_0
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #984]
-; NONEON-NOSVE-NEXT:    and w8, w8, #0x1
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #1000]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #976]
-; NONEON-NOSVE-NEXT:    and w8, w8, #0x1
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #992]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #991]
-; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
-; NONEON-NOSVE-NEXT:    and w8, w8, #0x80
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #1007]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #990]
-; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
-; NONEON-NOSVE-NEXT:    and w8, w8, #0x40
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #1006]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #989]
-; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
-; NONEON-NOSVE-NEXT:    and w8, w8, #0x20
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #1005]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #988]
-; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
-; NONEON-NOSVE-NEXT:    and w8, w8, #0x10
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #1004]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #987]
-; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
-; NONEON-NOSVE-NEXT:    and w8, w8, #0x8
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #1003]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #986]
-; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
-; NONEON-NOSVE-NEXT:    and w8, w8, #0x4
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #1002]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #985]
-; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
-; NONEON-NOSVE-NEXT:    and w8, w8, #0x2
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #1001]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #983]
-; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
-; NONEON-NOSVE-NEXT:    and w8, w8, #0x80
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #999]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #982]
-; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
-; NONEON-NOSVE-NEXT:    and w8, w8, #0x40
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #998]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #981]
-; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
-; NONEON-NOSVE-NEXT:    and w8, w8, #0x20
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #997]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #980]
-; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
-; NONEON-NOSVE-NEXT:    and w8, w8, #0x10
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #996]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #979]
-; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
-; NONEON-NOSVE-NEXT:    and w8, w8, #0x8
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #995]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #978]
+; NONEON-NOSVE-NEXT:    ldrb w10, [sp, #979]
+; NONEON-NOSVE-NEXT:    ldrb w11, [sp, #977]
+; NONEON-NOSVE-NEXT:    ldrb w13, [sp, #980]
+; NONEON-NOSVE-NEXT:    ldrb w14, [sp, #981]
+; NONEON-NOSVE-NEXT:    ldrb w15, [sp, #982]
 ; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
+; NONEON-NOSVE-NEXT:    sbfx w10, w10, #0, #1
+; NONEON-NOSVE-NEXT:    sbfx w11, w11, #0, #1
+; NONEON-NOSVE-NEXT:    ldrb w12, [sp, #976]
+; NONEON-NOSVE-NEXT:    sbfx w13, w13, #0, #1
+; NONEON-NOSVE-NEXT:    sbfx w14, w14, #0, #1
 ; NONEON-NOSVE-NEXT:    and w8, w8, #0x4
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #994]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #977]
-; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
-; NONEON-NOSVE-NEXT:    and w8, w8, #0x2
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #993]
-; NONEON-NOSVE-NEXT:    ldr q0, [sp, #992]
-; NONEON-NOSVE-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; NONEON-NOSVE-NEXT:    zip1 v0.16b, v0.16b, v1.16b
+; NONEON-NOSVE-NEXT:    and w10, w10, #0x8
+; NONEON-NOSVE-NEXT:    sbfx w15, w15, #0, #1
+; NONEON-NOSVE-NEXT:    orr w8, w8, w10
+; NONEON-NOSVE-NEXT:    ldrb w10, [sp, #983]
+; NONEON-NOSVE-NEXT:    and w11, w11, #0x2
+; NONEON-NOSVE-NEXT:    and w13, w13, #0x10
+; NONEON-NOSVE-NEXT:    bfxil w11, w12, #0, #1
+; NONEON-NOSVE-NEXT:    and w12, w14, #0x20
+; NONEON-NOSVE-NEXT:    orr w8, w8, w13
+; NONEON-NOSVE-NEXT:    and w13, w15, #0x40
+; NONEON-NOSVE-NEXT:    sbfx w10, w10, #0, #1
+; NONEON-NOSVE-NEXT:    orr w8, w11, w8
+; NONEON-NOSVE-NEXT:    orr w11, w12, w13
 ; NONEON-NOSVE-NEXT:    ldr q1, [x9, :lo12:.LCPI7_0]
+; NONEON-NOSVE-NEXT:    orr w8, w8, w11
+; NONEON-NOSVE-NEXT:    and w10, w10, #0xffffff80
 ; NONEON-NOSVE-NEXT:    add x9, sp, #720
-; NONEON-NOSVE-NEXT:    str q0, [sp, #1008]
-; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #1010]
-; NONEON-NOSVE-NEXT:    ldrh w10, [sp, #1008]
-; NONEON-NOSVE-NEXT:    ldrh w11, [sp, #1012]
-; NONEON-NOSVE-NEXT:    ldrh w12, [sp, #1014]
-; NONEON-NOSVE-NEXT:    ldrh w13, [sp, #1016]
-; NONEON-NOSVE-NEXT:    ldrh w14, [sp, #1018]
-; NONEON-NOSVE-NEXT:    ldrh w15, [sp, #1020]
-; NONEON-NOSVE-NEXT:    add w8, w10, w8
-; NONEON-NOSVE-NEXT:    add w10, w11, w12
-; NONEON-NOSVE-NEXT:    add w11, w13, w14
-; NONEON-NOSVE-NEXT:    add w8, w8, w10
-; NONEON-NOSVE-NEXT:    add w10, w11, w15
-; NONEON-NOSVE-NEXT:    ldrh w11, [sp, #1022]
 ; NONEON-NOSVE-NEXT:    add w8, w8, w10
-; NONEON-NOSVE-NEXT:    add w8, w8, w11
 ; NONEON-NOSVE-NEXT:    tbz w8, #0, .LBB7_2
 ; NONEON-NOSVE-NEXT:  // %bb.1: // %cond.load
 ; NONEON-NOSVE-NEXT:    fmov s0, wzr
@@ -2159,7 +1951,7 @@ define <16 x half> @masked_load_v16f16(ptr %src, <16 x i1> %mask) {
 ; NONEON-NOSVE-NEXT:    str h1, [sp, #46]
 ; NONEON-NOSVE-NEXT:    ldr q1, [sp, #32]
 ; NONEON-NOSVE-NEXT:  .LBB7_19: // %else44
-; NONEON-NOSVE-NEXT:    add sp, sp, #1024
+; NONEON-NOSVE-NEXT:    add sp, sp, #992
 ; NONEON-NOSVE-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
 ; NONEON-NOSVE-NEXT:    ret
 ; NONEON-NOSVE-NEXT:  .LBB7_20: // %cond.load4

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-store.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-store.ll
index 13b83d2ae3f07..0c3411e5f5514 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-store.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-store.ll
@@ -172,89 +172,37 @@ define void @masked_store_v16i8(ptr %dst, <16 x i1> %mask) {
 ;
 ; NONEON-NOSVE-LABEL: masked_store_v16i8:
 ; NONEON-NOSVE:       // %bb.0:
-; NONEON-NOSVE-NEXT:    str q0, [sp, #-48]!
-; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 48
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #8]
-; NONEON-NOSVE-NEXT:    and w8, w8, #0x1
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #24]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp]
-; NONEON-NOSVE-NEXT:    and w8, w8, #0x1
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #16]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #15]
-; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
-; NONEON-NOSVE-NEXT:    and w8, w8, #0x80
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #31]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #14]
-; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
-; NONEON-NOSVE-NEXT:    and w8, w8, #0x40
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #30]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #13]
-; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
-; NONEON-NOSVE-NEXT:    and w8, w8, #0x20
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #29]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #12]
-; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
-; NONEON-NOSVE-NEXT:    and w8, w8, #0x10
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #28]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #11]
-; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
-; NONEON-NOSVE-NEXT:    and w8, w8, #0x8
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #27]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #10]
-; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
-; NONEON-NOSVE-NEXT:    and w8, w8, #0x4
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #26]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #9]
-; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
-; NONEON-NOSVE-NEXT:    and w8, w8, #0x2
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #25]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #7]
-; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
-; NONEON-NOSVE-NEXT:    and w8, w8, #0x80
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #23]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #6]
-; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
-; NONEON-NOSVE-NEXT:    and w8, w8, #0x40
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #22]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #5]
-; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
-; NONEON-NOSVE-NEXT:    and w8, w8, #0x20
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #21]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #4]
-; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
-; NONEON-NOSVE-NEXT:    and w8, w8, #0x10
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #20]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #3]
-; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
-; NONEON-NOSVE-NEXT:    and w8, w8, #0x8
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #19]
+; NONEON-NOSVE-NEXT:    str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #2]
+; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #3]
+; NONEON-NOSVE-NEXT:    ldrb w10, [sp, #1]
+; NONEON-NOSVE-NEXT:    ldrb w12, [sp, #4]
+; NONEON-NOSVE-NEXT:    ldrb w13, [sp, #5]
+; NONEON-NOSVE-NEXT:    ldrb w14, [sp, #6]
 ; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
+; NONEON-NOSVE-NEXT:    sbfx w9, w9, #0, #1
+; NONEON-NOSVE-NEXT:    sbfx w10, w10, #0, #1
+; NONEON-NOSVE-NEXT:    ldrb w11, [sp]
+; NONEON-NOSVE-NEXT:    sbfx w12, w12, #0, #1
+; NONEON-NOSVE-NEXT:    sbfx w13, w13, #0, #1
 ; NONEON-NOSVE-NEXT:    and w8, w8, #0x4
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #18]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #1]
-; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
-; NONEON-NOSVE-NEXT:    and w8, w8, #0x2
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #17]
-; NONEON-NOSVE-NEXT:    ldr q0, [sp, #16]
-; NONEON-NOSVE-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; NONEON-NOSVE-NEXT:    zip1 v0.16b, v0.16b, v1.16b
-; NONEON-NOSVE-NEXT:    str q0, [sp, #32]
-; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #34]
-; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #32]
-; NONEON-NOSVE-NEXT:    ldrh w10, [sp, #36]
-; NONEON-NOSVE-NEXT:    ldrh w11, [sp, #38]
-; NONEON-NOSVE-NEXT:    ldrh w12, [sp, #40]
-; NONEON-NOSVE-NEXT:    ldrh w13, [sp, #42]
-; NONEON-NOSVE-NEXT:    ldrh w14, [sp, #44]
-; NONEON-NOSVE-NEXT:    add w8, w9, w8
-; NONEON-NOSVE-NEXT:    add w9, w10, w11
-; NONEON-NOSVE-NEXT:    add w10, w12, w13
-; NONEON-NOSVE-NEXT:    add w8, w8, w9
-; NONEON-NOSVE-NEXT:    add w9, w10, w14
-; NONEON-NOSVE-NEXT:    ldrh w10, [sp, #46]
+; NONEON-NOSVE-NEXT:    and w9, w9, #0x8
+; NONEON-NOSVE-NEXT:    sbfx w14, w14, #0, #1
+; NONEON-NOSVE-NEXT:    orr w8, w8, w9
+; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #7]
+; NONEON-NOSVE-NEXT:    and w10, w10, #0x2
+; NONEON-NOSVE-NEXT:    and w12, w12, #0x10
+; NONEON-NOSVE-NEXT:    bfxil w10, w11, #0, #1
+; NONEON-NOSVE-NEXT:    and w11, w13, #0x20
+; NONEON-NOSVE-NEXT:    orr w8, w8, w12
+; NONEON-NOSVE-NEXT:    and w12, w14, #0x40
+; NONEON-NOSVE-NEXT:    sbfx w9, w9, #0, #1
+; NONEON-NOSVE-NEXT:    orr w8, w10, w8
+; NONEON-NOSVE-NEXT:    orr w10, w11, w12
+; NONEON-NOSVE-NEXT:    orr w8, w8, w10
+; NONEON-NOSVE-NEXT:    and w9, w9, #0xffffff80
 ; NONEON-NOSVE-NEXT:    add w8, w8, w9
-; NONEON-NOSVE-NEXT:    add w8, w8, w10
 ; NONEON-NOSVE-NEXT:    tbnz w8, #0, .LBB2_17
 ; NONEON-NOSVE-NEXT:  // %bb.1: // %else
 ; NONEON-NOSVE-NEXT:    tbnz w8, #1, .LBB2_18
@@ -287,7 +235,7 @@ define void @masked_store_v16i8(ptr %dst, <16 x i1> %mask) {
 ; NONEON-NOSVE-NEXT:  .LBB2_15: // %else28
 ; NONEON-NOSVE-NEXT:    tbnz w8, #15, .LBB2_32
 ; NONEON-NOSVE-NEXT:  .LBB2_16: // %else30
-; NONEON-NOSVE-NEXT:    add sp, sp, #48
+; NONEON-NOSVE-NEXT:    add sp, sp, #16
 ; NONEON-NOSVE-NEXT:    ret
 ; NONEON-NOSVE-NEXT:  .LBB2_17: // %cond.store
 ; NONEON-NOSVE-NEXT:    strb wzr, [x0]
@@ -336,7 +284,7 @@ define void @masked_store_v16i8(ptr %dst, <16 x i1> %mask) {
 ; NONEON-NOSVE-NEXT:    tbz w8, #15, .LBB2_16
 ; NONEON-NOSVE-NEXT:  .LBB2_32: // %cond.store29
 ; NONEON-NOSVE-NEXT:    strb wzr, [x0, #15]
-; NONEON-NOSVE-NEXT:    add sp, sp, #48
+; NONEON-NOSVE-NEXT:    add sp, sp, #16
 ; NONEON-NOSVE-NEXT:    ret
   call void @llvm.masked.store.v16i8(<16 x i8> zeroinitializer, ptr %dst, i32 8, <16 x i1> %mask)
   ret void
@@ -421,328 +369,219 @@ define void @masked_store_v32i8(ptr %dst, <32 x i1> %mask) {
 ;
 ; NONEON-NOSVE-LABEL: masked_store_v32i8:
 ; NONEON-NOSVE:       // %bb.0:
-; NONEON-NOSVE-NEXT:    sub sp, sp, #80
-; NONEON-NOSVE-NEXT:    str x29, [sp, #64] // 8-byte Folded Spill
-; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 80
-; NONEON-NOSVE-NEXT:    .cfi_offset w29, -16
-; NONEON-NOSVE-NEXT:    ldr w8, [sp, #216]
-; NONEON-NOSVE-NEXT:    ldr w9, [sp, #152]
-; NONEON-NOSVE-NEXT:    ldr w10, [sp, #272]
-; NONEON-NOSVE-NEXT:    ldr w11, [sp, #176]
-; NONEON-NOSVE-NEXT:    ldr w12, [sp, #160]
-; NONEON-NOSVE-NEXT:    and w8, w8, #0x1
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #24]
-; NONEON-NOSVE-NEXT:    and w8, w9, #0x1
-; NONEON-NOSVE-NEXT:    sbfx w9, w10, #0, #1
-; NONEON-NOSVE-NEXT:    ldr w10, [sp, #264]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #16]
-; NONEON-NOSVE-NEXT:    sbfx w11, w11, #0, #1
-; NONEON-NOSVE-NEXT:    and w8, w9, #0x80
-; NONEON-NOSVE-NEXT:    sbfx w9, w10, #0, #1
-; NONEON-NOSVE-NEXT:    ldr w10, [sp, #256]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #31]
-; NONEON-NOSVE-NEXT:    and w8, w9, #0x40
-; NONEON-NOSVE-NEXT:    sbfx w9, w10, #0, #1
-; NONEON-NOSVE-NEXT:    ldr w10, [sp, #248]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #30]
-; NONEON-NOSVE-NEXT:    and w8, w9, #0x20
-; NONEON-NOSVE-NEXT:    sbfx w9, w10, #0, #1
-; NONEON-NOSVE-NEXT:    ldr w10, [sp, #240]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #29]
-; NONEON-NOSVE-NEXT:    and w8, w9, #0x10
-; NONEON-NOSVE-NEXT:    sbfx w9, w10, #0, #1
-; NONEON-NOSVE-NEXT:    ldr w10, [sp, #232]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #28]
-; NONEON-NOSVE-NEXT:    and w8, w9, #0x8
-; NONEON-NOSVE-NEXT:    sbfx w9, w10, #0, #1
-; NONEON-NOSVE-NEXT:    ldr w10, [sp, #224]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #27]
-; NONEON-NOSVE-NEXT:    and w8, w9, #0x4
-; NONEON-NOSVE-NEXT:    sbfx w9, w10, #0, #1
-; NONEON-NOSVE-NEXT:    ldr w10, [sp, #208]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #26]
-; NONEON-NOSVE-NEXT:    and w8, w9, #0x2
-; NONEON-NOSVE-NEXT:    sbfx w9, w10, #0, #1
-; NONEON-NOSVE-NEXT:    ldr w10, [sp, #200]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #25]
-; NONEON-NOSVE-NEXT:    and w8, w9, #0x80
-; NONEON-NOSVE-NEXT:    sbfx w9, w10, #0, #1
-; NONEON-NOSVE-NEXT:    ldr w10, [sp, #192]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #23]
-; NONEON-NOSVE-NEXT:    ldr w8, [sp, #184]
-; NONEON-NOSVE-NEXT:    and w9, w9, #0x40
-; NONEON-NOSVE-NEXT:    sbfx w10, w10, #0, #1
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #22]
-; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
-; NONEON-NOSVE-NEXT:    ldr w9, [sp, #168]
-; NONEON-NOSVE-NEXT:    and w10, w10, #0x20
-; NONEON-NOSVE-NEXT:    and w8, w8, #0x10
-; NONEON-NOSVE-NEXT:    sbfx w9, w9, #0, #1
-; NONEON-NOSVE-NEXT:    strb w10, [sp, #21]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #20]
-; NONEON-NOSVE-NEXT:    and w8, w11, #0x8
-; NONEON-NOSVE-NEXT:    sbfx w10, w12, #0, #1
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #19]
-; NONEON-NOSVE-NEXT:    and w8, w9, #0x4
+; NONEON-NOSVE-NEXT:    ldr w8, [sp, #80]
 ; NONEON-NOSVE-NEXT:    ldr w9, [sp, #88]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #18]
-; NONEON-NOSVE-NEXT:    and w8, w10, #0x2
-; NONEON-NOSVE-NEXT:    ldr w10, [sp, #136]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #17]
-; NONEON-NOSVE-NEXT:    and w8, w9, #0x1
-; NONEON-NOSVE-NEXT:    ldr w9, [sp, #144]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #8]
-; NONEON-NOSVE-NEXT:    and w8, w1, #0x1
-; NONEON-NOSVE-NEXT:    ldr w11, [sp, #104]
-; NONEON-NOSVE-NEXT:    sbfx w9, w9, #0, #1
-; NONEON-NOSVE-NEXT:    strb w8, [sp]
-; NONEON-NOSVE-NEXT:    ldr w12, [sp, #80]
-; NONEON-NOSVE-NEXT:    sbfx w11, w11, #0, #1
-; NONEON-NOSVE-NEXT:    ldr q0, [sp, #16]
-; NONEON-NOSVE-NEXT:    and w8, w9, #0x80
-; NONEON-NOSVE-NEXT:    sbfx w9, w10, #0, #1
-; NONEON-NOSVE-NEXT:    ldr w10, [sp, #128]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #15]
-; NONEON-NOSVE-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; NONEON-NOSVE-NEXT:    and w8, w9, #0x40
-; NONEON-NOSVE-NEXT:    sbfx w9, w10, #0, #1
-; NONEON-NOSVE-NEXT:    ldr w10, [sp, #120]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #14]
-; NONEON-NOSVE-NEXT:    ldr w8, [sp, #112]
-; NONEON-NOSVE-NEXT:    and w9, w9, #0x20
-; NONEON-NOSVE-NEXT:    sbfx w10, w10, #0, #1
-; NONEON-NOSVE-NEXT:    strb w9, [sp, #13]
+; NONEON-NOSVE-NEXT:    sbfx w15, w7, #0, #1
+; NONEON-NOSVE-NEXT:    ldr w10, [sp, #96]
+; NONEON-NOSVE-NEXT:    ldr w12, [sp, #104]
+; NONEON-NOSVE-NEXT:    ldr w11, [sp, #72]
 ; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
-; NONEON-NOSVE-NEXT:    ldr w9, [sp, #96]
-; NONEON-NOSVE-NEXT:    and w10, w10, #0x10
-; NONEON-NOSVE-NEXT:    zip1 v2.16b, v0.16b, v1.16b
-; NONEON-NOSVE-NEXT:    and w8, w8, #0x8
 ; NONEON-NOSVE-NEXT:    sbfx w9, w9, #0, #1
-; NONEON-NOSVE-NEXT:    strb w10, [sp, #12]
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #11]
-; NONEON-NOSVE-NEXT:    and w8, w11, #0x4
-; NONEON-NOSVE-NEXT:    sbfx w10, w12, #0, #1
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #10]
-; NONEON-NOSVE-NEXT:    and w8, w9, #0x2
-; NONEON-NOSVE-NEXT:    sbfx w9, w7, #0, #1
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #9]
-; NONEON-NOSVE-NEXT:    and w8, w10, #0x80
-; NONEON-NOSVE-NEXT:    sbfx w10, w6, #0, #1
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #7]
-; NONEON-NOSVE-NEXT:    and w8, w9, #0x40
-; NONEON-NOSVE-NEXT:    sbfx w9, w5, #0, #1
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #6]
-; NONEON-NOSVE-NEXT:    and w8, w10, #0x20
-; NONEON-NOSVE-NEXT:    sbfx w10, w4, #0, #1
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #5]
-; NONEON-NOSVE-NEXT:    and w8, w9, #0x10
-; NONEON-NOSVE-NEXT:    sbfx w9, w3, #0, #1
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #4]
-; NONEON-NOSVE-NEXT:    and w8, w10, #0x8
+; NONEON-NOSVE-NEXT:    ldr w13, [sp, #120]
+; NONEON-NOSVE-NEXT:    sbfx w10, w10, #0, #1
+; NONEON-NOSVE-NEXT:    sbfx w12, w12, #0, #1
+; NONEON-NOSVE-NEXT:    ldr w14, [sp, #128]
+; NONEON-NOSVE-NEXT:    and w8, w8, #0x2
+; NONEON-NOSVE-NEXT:    and w9, w9, #0x4
+; NONEON-NOSVE-NEXT:    ldr w16, [sp]
+; NONEON-NOSVE-NEXT:    bfxil w8, w11, #0, #1
+; NONEON-NOSVE-NEXT:    ldr w11, [sp, #112]
+; NONEON-NOSVE-NEXT:    and w10, w10, #0x8
+; NONEON-NOSVE-NEXT:    orr w9, w9, w10
+; NONEON-NOSVE-NEXT:    and w10, w12, #0x10
+; NONEON-NOSVE-NEXT:    sbfx w12, w4, #0, #1
+; NONEON-NOSVE-NEXT:    sbfx w11, w11, #0, #1
+; NONEON-NOSVE-NEXT:    orr w9, w9, w10
+; NONEON-NOSVE-NEXT:    sbfx w10, w13, #0, #1
+; NONEON-NOSVE-NEXT:    orr w8, w8, w9
+; NONEON-NOSVE-NEXT:    sbfx w13, w5, #0, #1
+; NONEON-NOSVE-NEXT:    and w12, w12, #0x8
+; NONEON-NOSVE-NEXT:    and w9, w11, #0x20
+; NONEON-NOSVE-NEXT:    and w10, w10, #0x40
+; NONEON-NOSVE-NEXT:    sbfx w11, w3, #0, #1
+; NONEON-NOSVE-NEXT:    orr w9, w9, w10
 ; NONEON-NOSVE-NEXT:    sbfx w10, w2, #0, #1
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #3]
-; NONEON-NOSVE-NEXT:    and w8, w9, #0x4
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #2]
-; NONEON-NOSVE-NEXT:    and w8, w10, #0x2
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #1]
-; NONEON-NOSVE-NEXT:    ldr q0, [sp]
-; NONEON-NOSVE-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; NONEON-NOSVE-NEXT:    zip1 v0.16b, v0.16b, v1.16b
-; NONEON-NOSVE-NEXT:    stp q0, q2, [sp, #32]
-; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #50]
-; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #48]
-; NONEON-NOSVE-NEXT:    ldrh w10, [sp, #52]
-; NONEON-NOSVE-NEXT:    ldrh w11, [sp, #54]
-; NONEON-NOSVE-NEXT:    ldrh w12, [sp, #56]
-; NONEON-NOSVE-NEXT:    ldrh w13, [sp, #58]
-; NONEON-NOSVE-NEXT:    add w8, w9, w8
-; NONEON-NOSVE-NEXT:    ldrh w14, [sp, #38]
-; NONEON-NOSVE-NEXT:    ldrh w15, [sp, #40]
-; NONEON-NOSVE-NEXT:    add w9, w10, w11
-; NONEON-NOSVE-NEXT:    ldrh w10, [sp, #60]
-; NONEON-NOSVE-NEXT:    ldrh w11, [sp, #34]
-; NONEON-NOSVE-NEXT:    add w8, w8, w9
-; NONEON-NOSVE-NEXT:    add w9, w12, w13
-; NONEON-NOSVE-NEXT:    ldrh w12, [sp, #32]
-; NONEON-NOSVE-NEXT:    ldrh w13, [sp, #36]
-; NONEON-NOSVE-NEXT:    ldrh w16, [sp, #42]
-; NONEON-NOSVE-NEXT:    add w9, w9, w10
-; NONEON-NOSVE-NEXT:    add w10, w12, w11
-; NONEON-NOSVE-NEXT:    ldrh w11, [sp, #44]
-; NONEON-NOSVE-NEXT:    add w8, w8, w9
-; NONEON-NOSVE-NEXT:    add w12, w13, w14
-; NONEON-NOSVE-NEXT:    add w14, w15, w16
-; NONEON-NOSVE-NEXT:    ldrh w13, [sp, #62]
-; NONEON-NOSVE-NEXT:    add w10, w10, w12
-; NONEON-NOSVE-NEXT:    add w11, w14, w11
-; NONEON-NOSVE-NEXT:    ldrh w12, [sp, #46]
-; NONEON-NOSVE-NEXT:    add w9, w10, w11
-; NONEON-NOSVE-NEXT:    add w10, w8, w13
-; NONEON-NOSVE-NEXT:    add w8, w9, w12
-; NONEON-NOSVE-NEXT:    bfi w8, w10, #16, #16
-; NONEON-NOSVE-NEXT:    tbnz w8, #0, .LBB3_34
+; NONEON-NOSVE-NEXT:    sbfx w14, w14, #0, #1
+; NONEON-NOSVE-NEXT:    and w11, w11, #0x4
+; NONEON-NOSVE-NEXT:    orr w8, w8, w9
+; NONEON-NOSVE-NEXT:    and w10, w10, #0x2
+; NONEON-NOSVE-NEXT:    orr w11, w11, w12
+; NONEON-NOSVE-NEXT:    and w12, w13, #0x10
+; NONEON-NOSVE-NEXT:    sbfx w13, w6, #0, #1
+; NONEON-NOSVE-NEXT:    bfxil w10, w1, #0, #1
+; NONEON-NOSVE-NEXT:    orr w11, w11, w12
+; NONEON-NOSVE-NEXT:    and w12, w13, #0x20
+; NONEON-NOSVE-NEXT:    and w13, w15, #0x40
+; NONEON-NOSVE-NEXT:    sbfx w15, w16, #0, #1
+; NONEON-NOSVE-NEXT:    orr w9, w10, w11
+; NONEON-NOSVE-NEXT:    orr w10, w12, w13
+; NONEON-NOSVE-NEXT:    and w11, w14, #0xff80
+; NONEON-NOSVE-NEXT:    orr w9, w9, w10
+; NONEON-NOSVE-NEXT:    and w10, w15, #0xff80
+; NONEON-NOSVE-NEXT:    add w11, w8, w11
+; NONEON-NOSVE-NEXT:    add w8, w9, w10
+; NONEON-NOSVE-NEXT:    bfi w8, w11, #16, #16
+; NONEON-NOSVE-NEXT:    tbnz w8, #0, .LBB3_33
 ; NONEON-NOSVE-NEXT:  // %bb.1: // %else
-; NONEON-NOSVE-NEXT:    tbnz w8, #1, .LBB3_35
+; NONEON-NOSVE-NEXT:    tbnz w8, #1, .LBB3_34
 ; NONEON-NOSVE-NEXT:  .LBB3_2: // %else2
-; NONEON-NOSVE-NEXT:    tbnz w8, #2, .LBB3_36
+; NONEON-NOSVE-NEXT:    tbnz w8, #2, .LBB3_35
 ; NONEON-NOSVE-NEXT:  .LBB3_3: // %else4
-; NONEON-NOSVE-NEXT:    tbnz w8, #3, .LBB3_37
+; NONEON-NOSVE-NEXT:    tbnz w8, #3, .LBB3_36
 ; NONEON-NOSVE-NEXT:  .LBB3_4: // %else6
-; NONEON-NOSVE-NEXT:    tbnz w8, #4, .LBB3_38
+; NONEON-NOSVE-NEXT:    tbnz w8, #4, .LBB3_37
 ; NONEON-NOSVE-NEXT:  .LBB3_5: // %else8
-; NONEON-NOSVE-NEXT:    tbnz w8, #5, .LBB3_39
+; NONEON-NOSVE-NEXT:    tbnz w8, #5, .LBB3_38
 ; NONEON-NOSVE-NEXT:  .LBB3_6: // %else10
-; NONEON-NOSVE-NEXT:    tbnz w8, #6, .LBB3_40
+; NONEON-NOSVE-NEXT:    tbnz w8, #6, .LBB3_39
 ; NONEON-NOSVE-NEXT:  .LBB3_7: // %else12
-; NONEON-NOSVE-NEXT:    tbnz w8, #7, .LBB3_41
+; NONEON-NOSVE-NEXT:    tbnz w8, #7, .LBB3_40
 ; NONEON-NOSVE-NEXT:  .LBB3_8: // %else14
-; NONEON-NOSVE-NEXT:    tbnz w8, #8, .LBB3_42
+; NONEON-NOSVE-NEXT:    tbnz w8, #8, .LBB3_41
 ; NONEON-NOSVE-NEXT:  .LBB3_9: // %else16
-; NONEON-NOSVE-NEXT:    tbnz w8, #9, .LBB3_43
+; NONEON-NOSVE-NEXT:    tbnz w8, #9, .LBB3_42
 ; NONEON-NOSVE-NEXT:  .LBB3_10: // %else18
-; NONEON-NOSVE-NEXT:    tbnz w8, #10, .LBB3_44
+; NONEON-NOSVE-NEXT:    tbnz w8, #10, .LBB3_43
 ; NONEON-NOSVE-NEXT:  .LBB3_11: // %else20
-; NONEON-NOSVE-NEXT:    tbnz w8, #11, .LBB3_45
+; NONEON-NOSVE-NEXT:    tbnz w8, #11, .LBB3_44
 ; NONEON-NOSVE-NEXT:  .LBB3_12: // %else22
-; NONEON-NOSVE-NEXT:    tbnz w8, #12, .LBB3_46
+; NONEON-NOSVE-NEXT:    tbnz w8, #12, .LBB3_45
 ; NONEON-NOSVE-NEXT:  .LBB3_13: // %else24
-; NONEON-NOSVE-NEXT:    tbnz w8, #13, .LBB3_47
+; NONEON-NOSVE-NEXT:    tbnz w8, #13, .LBB3_46
 ; NONEON-NOSVE-NEXT:  .LBB3_14: // %else26
-; NONEON-NOSVE-NEXT:    tbnz w8, #14, .LBB3_48
+; NONEON-NOSVE-NEXT:    tbnz w8, #14, .LBB3_47
 ; NONEON-NOSVE-NEXT:  .LBB3_15: // %else28
-; NONEON-NOSVE-NEXT:    tbnz w8, #15, .LBB3_49
+; NONEON-NOSVE-NEXT:    tbnz w8, #15, .LBB3_48
 ; NONEON-NOSVE-NEXT:  .LBB3_16: // %else30
-; NONEON-NOSVE-NEXT:    tbnz w8, #16, .LBB3_50
+; NONEON-NOSVE-NEXT:    tbnz w8, #16, .LBB3_49
 ; NONEON-NOSVE-NEXT:  .LBB3_17: // %else32
-; NONEON-NOSVE-NEXT:    tbnz w8, #17, .LBB3_51
+; NONEON-NOSVE-NEXT:    tbnz w8, #17, .LBB3_50
 ; NONEON-NOSVE-NEXT:  .LBB3_18: // %else34
-; NONEON-NOSVE-NEXT:    tbnz w8, #18, .LBB3_52
+; NONEON-NOSVE-NEXT:    tbnz w8, #18, .LBB3_51
 ; NONEON-NOSVE-NEXT:  .LBB3_19: // %else36
-; NONEON-NOSVE-NEXT:    tbnz w8, #19, .LBB3_53
+; NONEON-NOSVE-NEXT:    tbnz w8, #19, .LBB3_52
 ; NONEON-NOSVE-NEXT:  .LBB3_20: // %else38
-; NONEON-NOSVE-NEXT:    tbnz w8, #20, .LBB3_54
+; NONEON-NOSVE-NEXT:    tbnz w8, #20, .LBB3_53
 ; NONEON-NOSVE-NEXT:  .LBB3_21: // %else40
-; NONEON-NOSVE-NEXT:    tbnz w8, #21, .LBB3_55
+; NONEON-NOSVE-NEXT:    tbnz w8, #21, .LBB3_54
 ; NONEON-NOSVE-NEXT:  .LBB3_22: // %else42
-; NONEON-NOSVE-NEXT:    tbnz w8, #22, .LBB3_56
+; NONEON-NOSVE-NEXT:    tbnz w8, #22, .LBB3_55
 ; NONEON-NOSVE-NEXT:  .LBB3_23: // %else44
-; NONEON-NOSVE-NEXT:    tbnz w8, #23, .LBB3_57
+; NONEON-NOSVE-NEXT:    tbnz w8, #23, .LBB3_56
 ; NONEON-NOSVE-NEXT:  .LBB3_24: // %else46
-; NONEON-NOSVE-NEXT:    tbnz w8, #24, .LBB3_58
+; NONEON-NOSVE-NEXT:    tbnz w8, #24, .LBB3_57
 ; NONEON-NOSVE-NEXT:  .LBB3_25: // %else48
-; NONEON-NOSVE-NEXT:    tbnz w8, #25, .LBB3_59
+; NONEON-NOSVE-NEXT:    tbnz w8, #25, .LBB3_58
 ; NONEON-NOSVE-NEXT:  .LBB3_26: // %else50
-; NONEON-NOSVE-NEXT:    tbnz w8, #26, .LBB3_60
+; NONEON-NOSVE-NEXT:    tbnz w8, #26, .LBB3_59
 ; NONEON-NOSVE-NEXT:  .LBB3_27: // %else52
-; NONEON-NOSVE-NEXT:    tbnz w8, #27, .LBB3_61
+; NONEON-NOSVE-NEXT:    tbnz w8, #27, .LBB3_60
 ; NONEON-NOSVE-NEXT:  .LBB3_28: // %else54
-; NONEON-NOSVE-NEXT:    tbnz w8, #28, .LBB3_62
+; NONEON-NOSVE-NEXT:    tbnz w8, #28, .LBB3_61
 ; NONEON-NOSVE-NEXT:  .LBB3_29: // %else56
-; NONEON-NOSVE-NEXT:    tbnz w8, #29, .LBB3_63
+; NONEON-NOSVE-NEXT:    tbnz w8, #29, .LBB3_62
 ; NONEON-NOSVE-NEXT:  .LBB3_30: // %else58
-; NONEON-NOSVE-NEXT:    tbnz w8, #30, .LBB3_64
+; NONEON-NOSVE-NEXT:    tbnz w8, #30, .LBB3_63
 ; NONEON-NOSVE-NEXT:  .LBB3_31: // %else60
-; NONEON-NOSVE-NEXT:    tbz w8, #31, .LBB3_33
-; NONEON-NOSVE-NEXT:  .LBB3_32: // %cond.store61
-; NONEON-NOSVE-NEXT:    strb wzr, [x0, #31]
-; NONEON-NOSVE-NEXT:  .LBB3_33: // %else62
-; NONEON-NOSVE-NEXT:    ldr x29, [sp, #64] // 8-byte Folded Reload
-; NONEON-NOSVE-NEXT:    add sp, sp, #80
+; NONEON-NOSVE-NEXT:    tbnz w8, #31, .LBB3_64
+; NONEON-NOSVE-NEXT:  .LBB3_32: // %else62
 ; NONEON-NOSVE-NEXT:    ret
-; NONEON-NOSVE-NEXT:  .LBB3_34: // %cond.store
+; NONEON-NOSVE-NEXT:  .LBB3_33: // %cond.store
 ; NONEON-NOSVE-NEXT:    strb wzr, [x0]
 ; NONEON-NOSVE-NEXT:    tbz w8, #1, .LBB3_2
-; NONEON-NOSVE-NEXT:  .LBB3_35: // %cond.store1
+; NONEON-NOSVE-NEXT:  .LBB3_34: // %cond.store1
 ; NONEON-NOSVE-NEXT:    strb wzr, [x0, #1]
 ; NONEON-NOSVE-NEXT:    tbz w8, #2, .LBB3_3
-; NONEON-NOSVE-NEXT:  .LBB3_36: // %cond.store3
+; NONEON-NOSVE-NEXT:  .LBB3_35: // %cond.store3
 ; NONEON-NOSVE-NEXT:    strb wzr, [x0, #2]
 ; NONEON-NOSVE-NEXT:    tbz w8, #3, .LBB3_4
-; NONEON-NOSVE-NEXT:  .LBB3_37: // %cond.store5
+; NONEON-NOSVE-NEXT:  .LBB3_36: // %cond.store5
 ; NONEON-NOSVE-NEXT:    strb wzr, [x0, #3]
 ; NONEON-NOSVE-NEXT:    tbz w8, #4, .LBB3_5
-; NONEON-NOSVE-NEXT:  .LBB3_38: // %cond.store7
+; NONEON-NOSVE-NEXT:  .LBB3_37: // %cond.store7
 ; NONEON-NOSVE-NEXT:    strb wzr, [x0, #4]
 ; NONEON-NOSVE-NEXT:    tbz w8, #5, .LBB3_6
-; NONEON-NOSVE-NEXT:  .LBB3_39: // %cond.store9
+; NONEON-NOSVE-NEXT:  .LBB3_38: // %cond.store9
 ; NONEON-NOSVE-NEXT:    strb wzr, [x0, #5]
 ; NONEON-NOSVE-NEXT:    tbz w8, #6, .LBB3_7
-; NONEON-NOSVE-NEXT:  .LBB3_40: // %cond.store11
+; NONEON-NOSVE-NEXT:  .LBB3_39: // %cond.store11
 ; NONEON-NOSVE-NEXT:    strb wzr, [x0, #6]
 ; NONEON-NOSVE-NEXT:    tbz w8, #7, .LBB3_8
-; NONEON-NOSVE-NEXT:  .LBB3_41: // %cond.store13
+; NONEON-NOSVE-NEXT:  .LBB3_40: // %cond.store13
 ; NONEON-NOSVE-NEXT:    strb wzr, [x0, #7]
 ; NONEON-NOSVE-NEXT:    tbz w8, #8, .LBB3_9
-; NONEON-NOSVE-NEXT:  .LBB3_42: // %cond.store15
+; NONEON-NOSVE-NEXT:  .LBB3_41: // %cond.store15
 ; NONEON-NOSVE-NEXT:    strb wzr, [x0, #8]
 ; NONEON-NOSVE-NEXT:    tbz w8, #9, .LBB3_10
-; NONEON-NOSVE-NEXT:  .LBB3_43: // %cond.store17
+; NONEON-NOSVE-NEXT:  .LBB3_42: // %cond.store17
 ; NONEON-NOSVE-NEXT:    strb wzr, [x0, #9]
 ; NONEON-NOSVE-NEXT:    tbz w8, #10, .LBB3_11
-; NONEON-NOSVE-NEXT:  .LBB3_44: // %cond.store19
+; NONEON-NOSVE-NEXT:  .LBB3_43: // %cond.store19
 ; NONEON-NOSVE-NEXT:    strb wzr, [x0, #10]
 ; NONEON-NOSVE-NEXT:    tbz w8, #11, .LBB3_12
-; NONEON-NOSVE-NEXT:  .LBB3_45: // %cond.store21
+; NONEON-NOSVE-NEXT:  .LBB3_44: // %cond.store21
 ; NONEON-NOSVE-NEXT:    strb wzr, [x0, #11]
 ; NONEON-NOSVE-NEXT:    tbz w8, #12, .LBB3_13
-; NONEON-NOSVE-NEXT:  .LBB3_46: // %cond.store23
+; NONEON-NOSVE-NEXT:  .LBB3_45: // %cond.store23
 ; NONEON-NOSVE-NEXT:    strb wzr, [x0, #12]
 ; NONEON-NOSVE-NEXT:    tbz w8, #13, .LBB3_14
-; NONEON-NOSVE-NEXT:  .LBB3_47: // %cond.store25
+; NONEON-NOSVE-NEXT:  .LBB3_46: // %cond.store25
 ; NONEON-NOSVE-NEXT:    strb wzr, [x0, #13]
 ; NONEON-NOSVE-NEXT:    tbz w8, #14, .LBB3_15
-; NONEON-NOSVE-NEXT:  .LBB3_48: // %cond.store27
+; NONEON-NOSVE-NEXT:  .LBB3_47: // %cond.store27
 ; NONEON-NOSVE-NEXT:    strb wzr, [x0, #14]
 ; NONEON-NOSVE-NEXT:    tbz w8, #15, .LBB3_16
-; NONEON-NOSVE-NEXT:  .LBB3_49: // %cond.store29
+; NONEON-NOSVE-NEXT:  .LBB3_48: // %cond.store29
 ; NONEON-NOSVE-NEXT:    strb wzr, [x0, #15]
 ; NONEON-NOSVE-NEXT:    tbz w8, #16, .LBB3_17
-; NONEON-NOSVE-NEXT:  .LBB3_50: // %cond.store31
+; NONEON-NOSVE-NEXT:  .LBB3_49: // %cond.store31
 ; NONEON-NOSVE-NEXT:    strb wzr, [x0, #16]
 ; NONEON-NOSVE-NEXT:    tbz w8, #17, .LBB3_18
-; NONEON-NOSVE-NEXT:  .LBB3_51: // %cond.store33
+; NONEON-NOSVE-NEXT:  .LBB3_50: // %cond.store33
 ; NONEON-NOSVE-NEXT:    strb wzr, [x0, #17]
 ; NONEON-NOSVE-NEXT:    tbz w8, #18, .LBB3_19
-; NONEON-NOSVE-NEXT:  .LBB3_52: // %cond.store35
+; NONEON-NOSVE-NEXT:  .LBB3_51: // %cond.store35
 ; NONEON-NOSVE-NEXT:    strb wzr, [x0, #18]
 ; NONEON-NOSVE-NEXT:    tbz w8, #19, .LBB3_20
-; NONEON-NOSVE-NEXT:  .LBB3_53: // %cond.store37
+; NONEON-NOSVE-NEXT:  .LBB3_52: // %cond.store37
 ; NONEON-NOSVE-NEXT:    strb wzr, [x0, #19]
 ; NONEON-NOSVE-NEXT:    tbz w8, #20, .LBB3_21
-; NONEON-NOSVE-NEXT:  .LBB3_54: // %cond.store39
+; NONEON-NOSVE-NEXT:  .LBB3_53: // %cond.store39
 ; NONEON-NOSVE-NEXT:    strb wzr, [x0, #20]
 ; NONEON-NOSVE-NEXT:    tbz w8, #21, .LBB3_22
-; NONEON-NOSVE-NEXT:  .LBB3_55: // %cond.store41
+; NONEON-NOSVE-NEXT:  .LBB3_54: // %cond.store41
 ; NONEON-NOSVE-NEXT:    strb wzr, [x0, #21]
 ; NONEON-NOSVE-NEXT:    tbz w8, #22, .LBB3_23
-; NONEON-NOSVE-NEXT:  .LBB3_56: // %cond.store43
+; NONEON-NOSVE-NEXT:  .LBB3_55: // %cond.store43
 ; NONEON-NOSVE-NEXT:    strb wzr, [x0, #22]
 ; NONEON-NOSVE-NEXT:    tbz w8, #23, .LBB3_24
-; NONEON-NOSVE-NEXT:  .LBB3_57: // %cond.store45
+; NONEON-NOSVE-NEXT:  .LBB3_56: // %cond.store45
 ; NONEON-NOSVE-NEXT:    strb wzr, [x0, #23]
 ; NONEON-NOSVE-NEXT:    tbz w8, #24, .LBB3_25
-; NONEON-NOSVE-NEXT:  .LBB3_58: // %cond.store47
+; NONEON-NOSVE-NEXT:  .LBB3_57: // %cond.store47
 ; NONEON-NOSVE-NEXT:    strb wzr, [x0, #24]
 ; NONEON-NOSVE-NEXT:    tbz w8, #25, .LBB3_26
-; NONEON-NOSVE-NEXT:  .LBB3_59: // %cond.store49
+; NONEON-NOSVE-NEXT:  .LBB3_58: // %cond.store49
 ; NONEON-NOSVE-NEXT:    strb wzr, [x0, #25]
 ; NONEON-NOSVE-NEXT:    tbz w8, #26, .LBB3_27
-; NONEON-NOSVE-NEXT:  .LBB3_60: // %cond.store51
+; NONEON-NOSVE-NEXT:  .LBB3_59: // %cond.store51
 ; NONEON-NOSVE-NEXT:    strb wzr, [x0, #26]
 ; NONEON-NOSVE-NEXT:    tbz w8, #27, .LBB3_28
-; NONEON-NOSVE-NEXT:  .LBB3_61: // %cond.store53
+; NONEON-NOSVE-NEXT:  .LBB3_60: // %cond.store53
 ; NONEON-NOSVE-NEXT:    strb wzr, [x0, #27]
 ; NONEON-NOSVE-NEXT:    tbz w8, #28, .LBB3_29
-; NONEON-NOSVE-NEXT:  .LBB3_62: // %cond.store55
+; NONEON-NOSVE-NEXT:  .LBB3_61: // %cond.store55
 ; NONEON-NOSVE-NEXT:    strb wzr, [x0, #28]
 ; NONEON-NOSVE-NEXT:    tbz w8, #29, .LBB3_30
-; NONEON-NOSVE-NEXT:  .LBB3_63: // %cond.store57
+; NONEON-NOSVE-NEXT:  .LBB3_62: // %cond.store57
 ; NONEON-NOSVE-NEXT:    strb wzr, [x0, #29]
 ; NONEON-NOSVE-NEXT:    tbz w8, #30, .LBB3_31
-; NONEON-NOSVE-NEXT:  .LBB3_64: // %cond.store59
+; NONEON-NOSVE-NEXT:  .LBB3_63: // %cond.store59
 ; NONEON-NOSVE-NEXT:    strb wzr, [x0, #30]
-; NONEON-NOSVE-NEXT:    tbnz w8, #31, .LBB3_32
-; NONEON-NOSVE-NEXT:    b .LBB3_33
+; NONEON-NOSVE-NEXT:    tbz w8, #31, .LBB3_32
+; NONEON-NOSVE-NEXT:  .LBB3_64: // %cond.store61
+; NONEON-NOSVE-NEXT:    strb wzr, [x0, #31]
+; NONEON-NOSVE-NEXT:    ret
   call void @llvm.masked.store.v32i8(<32 x i8> zeroinitializer, ptr %dst, i32 8, <32 x i1> %mask)
   ret void
 }
@@ -981,89 +820,37 @@ define void @masked_store_v16f16(ptr %dst, <16 x i1> %mask) {
 ;
 ; NONEON-NOSVE-LABEL: masked_store_v16f16:
 ; NONEON-NOSVE:       // %bb.0:
-; NONEON-NOSVE-NEXT:    str q0, [sp, #-48]!
-; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 48
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #8]
-; NONEON-NOSVE-NEXT:    and w8, w8, #0x1
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #24]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp]
-; NONEON-NOSVE-NEXT:    and w8, w8, #0x1
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #16]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #15]
-; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
-; NONEON-NOSVE-NEXT:    and w8, w8, #0x80
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #31]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #14]
-; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
-; NONEON-NOSVE-NEXT:    and w8, w8, #0x40
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #30]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #13]
-; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
-; NONEON-NOSVE-NEXT:    and w8, w8, #0x20
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #29]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #12]
-; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
-; NONEON-NOSVE-NEXT:    and w8, w8, #0x10
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #28]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #11]
-; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
-; NONEON-NOSVE-NEXT:    and w8, w8, #0x8
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #27]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #10]
-; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
-; NONEON-NOSVE-NEXT:    and w8, w8, #0x4
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #26]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #9]
-; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
-; NONEON-NOSVE-NEXT:    and w8, w8, #0x2
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #25]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #7]
-; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
-; NONEON-NOSVE-NEXT:    and w8, w8, #0x80
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #23]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #6]
-; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
-; NONEON-NOSVE-NEXT:    and w8, w8, #0x40
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #22]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #5]
-; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
-; NONEON-NOSVE-NEXT:    and w8, w8, #0x20
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #21]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #4]
-; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
-; NONEON-NOSVE-NEXT:    and w8, w8, #0x10
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #20]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #3]
-; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
-; NONEON-NOSVE-NEXT:    and w8, w8, #0x8
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #19]
+; NONEON-NOSVE-NEXT:    str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #2]
+; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #3]
+; NONEON-NOSVE-NEXT:    ldrb w10, [sp, #1]
+; NONEON-NOSVE-NEXT:    ldrb w12, [sp, #4]
+; NONEON-NOSVE-NEXT:    ldrb w13, [sp, #5]
+; NONEON-NOSVE-NEXT:    ldrb w14, [sp, #6]
 ; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
+; NONEON-NOSVE-NEXT:    sbfx w9, w9, #0, #1
+; NONEON-NOSVE-NEXT:    sbfx w10, w10, #0, #1
+; NONEON-NOSVE-NEXT:    ldrb w11, [sp]
+; NONEON-NOSVE-NEXT:    sbfx w12, w12, #0, #1
+; NONEON-NOSVE-NEXT:    sbfx w13, w13, #0, #1
 ; NONEON-NOSVE-NEXT:    and w8, w8, #0x4
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #18]
-; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #1]
-; NONEON-NOSVE-NEXT:    sbfx w8, w8, #0, #1
-; NONEON-NOSVE-NEXT:    and w8, w8, #0x2
-; NONEON-NOSVE-NEXT:    strb w8, [sp, #17]
-; NONEON-NOSVE-NEXT:    ldr q0, [sp, #16]
-; NONEON-NOSVE-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; NONEON-NOSVE-NEXT:    zip1 v0.16b, v0.16b, v1.16b
-; NONEON-NOSVE-NEXT:    str q0, [sp, #32]
-; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #34]
-; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #32]
-; NONEON-NOSVE-NEXT:    ldrh w10, [sp, #36]
-; NONEON-NOSVE-NEXT:    ldrh w11, [sp, #38]
-; NONEON-NOSVE-NEXT:    ldrh w12, [sp, #40]
-; NONEON-NOSVE-NEXT:    ldrh w13, [sp, #42]
-; NONEON-NOSVE-NEXT:    ldrh w14, [sp, #44]
-; NONEON-NOSVE-NEXT:    add w8, w9, w8
-; NONEON-NOSVE-NEXT:    add w9, w10, w11
-; NONEON-NOSVE-NEXT:    add w10, w12, w13
-; NONEON-NOSVE-NEXT:    add w8, w8, w9
-; NONEON-NOSVE-NEXT:    add w9, w10, w14
-; NONEON-NOSVE-NEXT:    ldrh w10, [sp, #46]
+; NONEON-NOSVE-NEXT:    and w9, w9, #0x8
+; NONEON-NOSVE-NEXT:    sbfx w14, w14, #0, #1
+; NONEON-NOSVE-NEXT:    orr w8, w8, w9
+; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #7]
+; NONEON-NOSVE-NEXT:    and w10, w10, #0x2
+; NONEON-NOSVE-NEXT:    and w12, w12, #0x10
+; NONEON-NOSVE-NEXT:    bfxil w10, w11, #0, #1
+; NONEON-NOSVE-NEXT:    and w11, w13, #0x20
+; NONEON-NOSVE-NEXT:    orr w8, w8, w12
+; NONEON-NOSVE-NEXT:    and w12, w14, #0x40
+; NONEON-NOSVE-NEXT:    sbfx w9, w9, #0, #1
+; NONEON-NOSVE-NEXT:    orr w8, w10, w8
+; NONEON-NOSVE-NEXT:    orr w10, w11, w12
+; NONEON-NOSVE-NEXT:    orr w8, w8, w10
+; NONEON-NOSVE-NEXT:    and w9, w9, #0xffffff80
 ; NONEON-NOSVE-NEXT:    add w8, w8, w9
-; NONEON-NOSVE-NEXT:    add w8, w8, w10
 ; NONEON-NOSVE-NEXT:    tbnz w8, #0, .LBB7_17
 ; NONEON-NOSVE-NEXT:  // %bb.1: // %else
 ; NONEON-NOSVE-NEXT:    tbnz w8, #1, .LBB7_18
@@ -1096,7 +883,7 @@ define void @masked_store_v16f16(ptr %dst, <16 x i1> %mask) {
 ; NONEON-NOSVE-NEXT:  .LBB7_15: // %else28
 ; NONEON-NOSVE-NEXT:    tbnz w8, #15, .LBB7_32
 ; NONEON-NOSVE-NEXT:  .LBB7_16: // %else30
-; NONEON-NOSVE-NEXT:    add sp, sp, #48
+; NONEON-NOSVE-NEXT:    add sp, sp, #16
 ; NONEON-NOSVE-NEXT:    ret
 ; NONEON-NOSVE-NEXT:  .LBB7_17: // %cond.store
 ; NONEON-NOSVE-NEXT:    fmov s0, wzr
@@ -1161,7 +948,7 @@ define void @masked_store_v16f16(ptr %dst, <16 x i1> %mask) {
 ; NONEON-NOSVE-NEXT:  .LBB7_32: // %cond.store29
 ; NONEON-NOSVE-NEXT:    fmov s0, wzr
 ; NONEON-NOSVE-NEXT:    str h0, [x0, #30]
-; NONEON-NOSVE-NEXT:    add sp, sp, #48
+; NONEON-NOSVE-NEXT:    add sp, sp, #16
 ; NONEON-NOSVE-NEXT:    ret
   call void @llvm.masked.store.v16f16(<16 x half> zeroinitializer, ptr %dst, i32 8, <16 x i1> %mask)
   ret void

diff  --git a/llvm/test/MC/AArch64/SME/streaming-mode-neon-bf16.s b/llvm/test/MC/AArch64/SME/streaming-mode-neon-bf16.s
deleted file mode 100644
index 41868a8c790f1..0000000000000
--- a/llvm/test/MC/AArch64/SME/streaming-mode-neon-bf16.s
+++ /dev/null
@@ -1,16 +0,0 @@
-// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=-neon,+sme < %s \
-// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
-// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=-neon < %s 2>&1 \
-// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
-// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=-neon,+sme < %s \
-// RUN:        | llvm-objdump --mattr=-neon,+sme -d - | FileCheck %s --check-prefix=CHECK-INST
-// Disassemble encoding and check the re-encoding (-show-encoding) matches.
-// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=-neon,+sme < %s \
-// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
-// RUN:        | llvm-mc -triple=aarch64 -mattr=-neon,+sme -disassemble -show-encoding \
-// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
-
-bfcvt h5, s3
-// CHECK-INST: bfcvt h5, s3
-// CHECK-ENCODING: [0x65,0x40,0x63,0x1e]
-// CHECK-ERROR: instruction requires: bf16 neon or sme

diff  --git a/llvm/test/MC/AArch64/SME/streaming-mode-neon.s b/llvm/test/MC/AArch64/SME/streaming-mode-neon.s
deleted file mode 100644
index 138a1fe0bb8e9..0000000000000
--- a/llvm/test/MC/AArch64/SME/streaming-mode-neon.s
+++ /dev/null
@@ -1,132 +0,0 @@
-// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=-neon,+sme < %s \
-// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
-// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=-neon < %s 2>&1 \
-// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
-// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=-neon,+sme < %s \
-// RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST
-// Disassemble encoding and check the re-encoding (-show-encoding) matches.
-// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=-neon,+sme < %s \
-// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
-// RUN:        | llvm-mc -triple=aarch64 -mattr=-neon,+sme -disassemble -show-encoding \
-// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
-
-// Scalar FP instructions
-
-fmulx s0, s1, s2
-// CHECK-INST: fmulx s0, s1, s2
-// CHECK-ENCODING: [0x20,0xdc,0x22,0x5e]
-// CHECK-ERROR: instruction requires: neon or sme
-
-fmulx d0, d1, d2
-// CHECK-INST: fmulx d0, d1, d2
-// CHECK-ENCODING: [0x20,0xdc,0x62,0x5e]
-// CHECK-ERROR: instruction requires: neon or sme
-
-frecps s0, s1, s2
-// CHECK-INST: frecps s0, s1, s2
-// CHECK-ENCODING: [0x20,0xfc,0x22,0x5e]
-// CHECK-ERROR: instruction requires: neon or sme
-
-frecps d0, d1, d2
-// CHECK-INST: frecps d0, d1, d2
-// CHECK-ENCODING: [0x20,0xfc,0x62,0x5e]
-// CHECK-ERROR: instruction requires: neon or sme
-
-frsqrts s0, s1, s2
-// CHECK-INST: frsqrts s0, s1, s2
-// CHECK-ENCODING: [0x20,0xfc,0xa2,0x5e]
-// CHECK-ERROR: instruction requires: neon or sme
-
-frsqrts d0, d1, d2
-// CHECK-INST: frsqrts d0, d1, d2
-// CHECK-ENCODING: [0x20,0xfc,0xe2,0x5e]
-// CHECK-ERROR: instruction requires: neon or sme
-
-frecpe s0, s1
-// CHECK-INST: frecpe s0, s1
-// CHECK-ENCODING: [0x20,0xd8,0xa1,0x5e]
-// CHECK-ERROR: instruction requires: neon or sme
-
-frecpe d0, d1
-// CHECK-INST: frecpe d0, d1
-// CHECK-ENCODING: [0x20,0xd8,0xe1,0x5e]
-// CHECK-ERROR: instruction requires: neon or sme
-
-frecpx s0, s1
-// CHECK-INST: frecpx s0, s1
-// CHECK-ENCODING: [0x20,0xf8,0xa1,0x5e]
-// CHECK-ERROR: instruction requires: neon or sme
-
-frecpx d0, d1
-// CHECK-INST: frecpx d0, d1
-// CHECK-ENCODING: [0x20,0xf8,0xe1,0x5e]
-// CHECK-ERROR: instruction requires: neon or sme
-
-frsqrte s0, s1
-// CHECK-INST: frsqrte s0, s1
-// CHECK-ENCODING: [0x20,0xd8,0xa1,0x7e]
-// CHECK-ERROR: instruction requires: neon or sme
-
-frsqrte d0, d1
-// CHECK-INST: frsqrte d0, d1
-// CHECK-ENCODING: [0x20,0xd8,0xe1,0x7e]
-// CHECK-ERROR: instruction requires: neon or sme
-
-// Vector to GPR integer move instructions
-
-smov w0, v0.b[0]
-// CHECK-INST: smov w0, v0.b[0]
-// CHECK-ENCODING: [0x00,0x2c,0x01,0x0e]
-// CHECK-ERROR: instruction requires: neon
-
-smov x0, v0.b[0]
-// CHECK-INST: smov x0, v0.b[0]
-// CHECK-ENCODING: [0x00,0x2c,0x01,0x4e]
-// CHECK-ERROR: instruction requires: neon
-
-smov w0, v0.h[0]
-// CHECK-INST: smov w0, v0.h[0]
-// CHECK-ENCODING: [0x00,0x2c,0x02,0x0e]
-// CHECK-ERROR: instruction requires: neon
-
-smov x0, v0.h[0]
-// CHECK-INST: smov x0, v0.h[0]
-// CHECK-ENCODING: [0x00,0x2c,0x02,0x4e]
-// CHECK-ERROR: instruction requires: neon
-
-smov x0, v0.s[0]
-// CHECK-INST: smov x0, v0.s[0]
-// CHECK-ENCODING: [0x00,0x2c,0x04,0x4e]
-// CHECK-ERROR: instruction requires: neon
-
-umov w0, v0.b[0]
-// CHECK-INST: umov w0, v0.b[0]
-// CHECK-ENCODING: [0x00,0x3c,0x01,0x0e]
-// CHECK-ERROR: instruction requires: neon
-
-umov w0, v0.h[0]
-// CHECK-INST: umov w0, v0.h[0]
-// CHECK-ENCODING: [0x00,0x3c,0x02,0x0e]
-// CHECK-ERROR: instruction requires: neon
-
-umov w0, v0.s[0]
-// CHECK-INST: mov w0, v0.s[0]
-// CHECK-ENCODING: [0x00,0x3c,0x04,0x0e]
-// CHECK-ERROR: instruction requires: neon
-
-umov x0, v0.d[0]
-// CHECK-INST: mov x0, v0.d[0]
-// CHECK-ENCODING: [0x00,0x3c,0x08,0x4e]
-// CHECK-ERROR: instruction requires: neon
-
-// Aliases
-
-mov w0, v0.s[0]
-// CHECK-INST: mov w0, v0.s[0]
-// CHECK-ENCODING: [0x00,0x3c,0x04,0x0e]
-// CHECK-ERROR: instruction requires: neon
-
-mov x0, v0.d[0]
-// CHECK-INST: mov x0, v0.d[0]
-// CHECK-ENCODING: [0x00,0x3c,0x08,0x4e]
-// CHECK-ERROR: instruction requires: neon


        


More information about the llvm-commits mailing list