[llvm] [AArch64] Expand vector ops when NEON and SVE are unavailable. (PR #90833)

Thu May 2 01:29:21 PDT 2024

llvmbot wrote:



@llvm/pr-subscribers-llvm-selectiondag

@llvm/pr-subscribers-backend-aarch64

Author: Sander de Smalen (sdesmalen-arm)

<details>
<summary>Changes</summary>

Unlike `+noneon` we must assume that vector types are available, i.e. it is
valid to pass/return vector arguments to and from functions. However, the
compiler must make sure to scalarize any vector operations.

---

Patch is 3.02 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/90833.diff


60 Files Affected:

- (modified) llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp (+4-2) 
- (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+65-16) 
- (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.h (+2) 
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-and-combine.ll (+196-30) 
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll (+1893-274) 
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitcast.ll (+23-7) 
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll (+29-3) 
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll (+93-26) 
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll (+253-85) 
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll (+42-8) 
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-vector-elt.ll (+42-12) 
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll (+749-91) 
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-arith.ll (+2506-671) 
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-compares.ll (+2545-2243) 
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-convert.ll (+22-7) 
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll (+598-131) 
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-fma.ll (+491-78) 
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-minmax.ll (+1232-808) 
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce-fa64.ll (+15-11) 
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce.ll (+861-577) 
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-rounding.ll (+1785-245) 
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-select.ll (+257-48) 
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll (+1660-594) 
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll (+361-150) 
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-insert-vector-elt.ll (+313-54) 
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-arith.ll (+1999-124) 
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-compares.ll (+992-56) 
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll (+1042-1002) 
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll (+3103-613) 
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-immediates.ll (+3129-296) 
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-log.ll (+1431-72) 
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-minmax.ll (+2288-116) 
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mla-neon-fa64.ll (+45-2) 
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll (+1496-168) 
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-reduce.ll (+1474-168) 
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll (+1174-1480) 
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-select.ll (+515-66) 
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll (+1500-132) 
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll (+1442-453) 
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll (+756-61) 
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll (+125-25) 
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-limit-duplane.ll (+127-18) 
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-loads.ll (+25-8) 
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-log-reduce.ll (+666-222) 
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-load.ll (+2504-810) 
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-store.ll (+585-221) 
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-optimize-ptrue.ll (+832-105) 
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-rev.ll (+429-43) 
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-zip-uzp-trn.ll (+1091-170) 
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ptest.ll (+343-56) 
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll (+881-55) 
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-sdiv-pow2.ll (+697-71) 
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-shuffle.ll (+79) 
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-splat-vector.ll () 
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-stores.ll () 
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-subvector.ll () 
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc-stores.ll () 
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc.ll () 
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-vector-shuffle.ll () 
- (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-test-register-mov.ll () 


``````````diff

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index bfc3e08c1632de..3175d75eea0860 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -3731,8 +3731,10 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
   }
   case ISD::SUB: {
     EVT VT = Node->getValueType(0);
-    assert(TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
-           TLI.isOperationLegalOrCustom(ISD::XOR, VT) &&
+    assert((VT.isFixedLengthVector() || // fixed length ADD can be expanded to
+                                        // scalar ADD
+            (TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
+             TLI.isOperationLegalOrCustom(ISD::XOR, VT))) &&
            "Don't know how to expand this subtraction!");
     Tmp1 = DAG.getNOT(dl, Node->getOperand(1), VT);
     Tmp1 = DAG.getNode(ISD::ADD, dl, VT, Tmp1, DAG.getConstant(1, dl, VT));
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 2af679e0755b54..12a949a7733b28 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -357,7 +357,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
     addRegisterClass(MVT::f128, &AArch64::FPR128RegClass);
   }
 
-  if (Subtarget->hasNEON()) {
+  if (Subtarget->isNeonAvailable()) {
     addRegisterClass(MVT::v16i8, &AArch64::FPR8RegClass);
     addRegisterClass(MVT::v8i16, &AArch64::FPR16RegClass);
     // Someone set us up the NEON.
@@ -378,6 +378,28 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
     addQRTypeForNEON(MVT::v2i64);
     addQRTypeForNEON(MVT::v8f16);
     addQRTypeForNEON(MVT::v8bf16);
+  } else if (Subtarget->hasNEON() ||
+             Subtarget->useSVEForFixedLengthVectors()) {
+    addRegisterClass(MVT::v16i8, &AArch64::FPR8RegClass);
+    addRegisterClass(MVT::v8i16, &AArch64::FPR16RegClass);
+
+    addRegisterClass(MVT::v2f32, &AArch64::FPR64RegClass);
+    addRegisterClass(MVT::v8i8, &AArch64::FPR64RegClass);
+    addRegisterClass(MVT::v4i16, &AArch64::FPR64RegClass);
+    addRegisterClass(MVT::v2i32, &AArch64::FPR64RegClass);
+    addRegisterClass(MVT::v1i64, &AArch64::FPR64RegClass);
+    addRegisterClass(MVT::v1f64, &AArch64::FPR64RegClass);
+    addRegisterClass(MVT::v4f16, &AArch64::FPR64RegClass);
+    addRegisterClass(MVT::v4bf16, &AArch64::FPR64RegClass);
+
+    addRegisterClass(MVT::v4f32, &AArch64::FPR128RegClass);
+    addRegisterClass(MVT::v2f64, &AArch64::FPR128RegClass);
+    addRegisterClass(MVT::v16i8, &AArch64::FPR128RegClass);
+    addRegisterClass(MVT::v8i16, &AArch64::FPR128RegClass);
+    addRegisterClass(MVT::v4i32, &AArch64::FPR128RegClass);
+    addRegisterClass(MVT::v2i64, &AArch64::FPR128RegClass);
+    addRegisterClass(MVT::v8f16, &AArch64::FPR128RegClass);
+    addRegisterClass(MVT::v8bf16, &AArch64::FPR128RegClass);
   }
 
   if (Subtarget->hasSVEorSME()) {
@@ -1125,7 +1147,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
 
   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
 
-  if (Subtarget->hasNEON()) {
+  if (Subtarget->isNeonAvailable()) {
     // FIXME: v1f64 shouldn't be legal if we can avoid it, because it leads to
     // silliness like this:
     for (auto Op :
@@ -1328,6 +1350,24 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
     // FADDP custom lowering
     for (MVT VT : { MVT::v16f16, MVT::v8f32, MVT::v4f64 })
       setOperationAction(ISD::FADD, VT, Custom);
+  } else {
+    for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
+      for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
+        setOperationAction(Op, VT, Expand);
+
+      if (VT.is128BitVector() || VT.is64BitVector()) {
+        setOperationAction(ISD::LOAD, VT, Legal);
+        setOperationAction(ISD::STORE, VT, Legal);
+        setOperationAction(ISD::BITCAST, VT,
+                           Subtarget->isLittleEndian() ? Legal : Expand);
+      }
+      for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
+        setTruncStoreAction(VT, InnerVT, Expand);
+        setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
+        setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
+        setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
+      }
+    }
   }
 
   if (Subtarget->hasSME()) {
@@ -9377,7 +9417,8 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
 
 SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
                                               SelectionDAG &DAG) const {
-  if (!Subtarget->hasNEON())
+  if (!Subtarget->isNeonAvailable() &&
+      !Subtarget->useSVEForFixedLengthVectors())
     return SDValue();
 
   EVT VT = Op.getValueType();
@@ -14110,6 +14151,13 @@ SDValue AArch64TargetLowering::LowerDIV(SDValue Op, SelectionDAG &DAG) const {
   return DAG.getNode(AArch64ISD::UZP1, dl, VT, ResultLo, ResultHi);
 }
 
+bool AArch64TargetLowering::shouldExpandBuildVectorWithShuffles(
+    EVT VT, unsigned DefinedValues) const {
+  if (!Subtarget->isNeonAvailable())
+    return false;
+  return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
+}
+
 bool AArch64TargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
   // Currently no fixed length shuffles that require SVE are legal.
   if (useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable()))
@@ -15979,7 +16027,8 @@ bool AArch64TargetLowering::isLegalInterleavedAccessType(
 
   UseScalable = false;
 
-  if (!VecTy->isScalableTy() && !Subtarget->hasNEON())
+  if (!VecTy->isScalableTy() && !Subtarget->isNeonAvailable() &&
+      !Subtarget->useSVEForFixedLengthVectors())
     return false;
 
   if (VecTy->isScalableTy() && !Subtarget->hasSVEorSME())
@@ -16003,18 +16052,20 @@ bool AArch64TargetLowering::isLegalInterleavedAccessType(
   }
 
   unsigned VecSize = DL.getTypeSizeInBits(VecTy);
-  if (!Subtarget->isNeonAvailable() ||
-      (Subtarget->useSVEForFixedLengthVectors() &&
-       (VecSize % Subtarget->getMinSVEVectorSizeInBits() == 0 ||
-        (VecSize < Subtarget->getMinSVEVectorSizeInBits() &&
-         isPowerOf2_32(MinElts) && VecSize > 128)))) {
-    UseScalable = true;
-    return true;
+  if (Subtarget->useSVEForFixedLengthVectors()) {
+    unsigned MinSVEVectorSize =
+        std::max(Subtarget->getMinSVEVectorSizeInBits(), 128u);
+    if (VecSize % MinSVEVectorSize == 0 ||
+        (VecSize < MinSVEVectorSize && isPowerOf2_32(MinElts) &&
+         VecSize > 128)) {
+      UseScalable = true;
+      return true;
+    }
   }
 
   // Ensure the total vector size is 64 or a multiple of 128. Types larger than
   // 128 will be split into multiple interleaved accesses.
-  return VecSize == 64 || VecSize % 128 == 0;
+  return Subtarget->isNeonAvailable() && (VecSize == 64 || VecSize % 128 == 0);
 }
 
 static ScalableVectorType *getSVEContainerIRType(FixedVectorType *VTy) {
@@ -16105,8 +16156,7 @@ bool AArch64TargetLowering::lowerInterleavedLoad(
   // "legalize" wide vector types into multiple interleaved accesses as long as
   // the vector types are divisible by 128.
   bool UseScalable;
-  if (!Subtarget->hasNEON() ||
-      !isLegalInterleavedAccessType(VTy, DL, UseScalable))
+  if (!isLegalInterleavedAccessType(VTy, DL, UseScalable))
     return false;
 
   unsigned NumLoads = getNumInterleavedAccesses(VTy, DL, UseScalable);
@@ -16283,8 +16333,7 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
   // Skip if we do not have NEON and skip illegal vector types. We can
   // "legalize" wide vector types into multiple interleaved accesses as long as
   // the vector types are divisible by 128.
-  if (!Subtarget->hasNEON() ||
-      !isLegalInterleavedAccessType(SubVecTy, DL, UseScalable))
+  if (!isLegalInterleavedAccessType(SubVecTy, DL, UseScalable))
     return false;
 
   unsigned NumStores = getNumInterleavedAccesses(SubVecTy, DL, UseScalable);
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index fbdc4de5617fe9..5a402b8df099f0 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -1020,6 +1020,8 @@ class AArch64TargetLowering : public TargetLowering {
   void addDRTypeForNEON(MVT VT);
   void addQRTypeForNEON(MVT VT);
 
+  bool shouldExpandBuildVectorWithShuffles(EVT, unsigned) const override;
+
   unsigned allocateLazySaveBuffer(SDValue &Chain, const SDLoc &DL,
                                   SelectionDAG &DAG) const;
 
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-and-combine.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-and-combine.ll
index fd9259048df543..4c3188fd7b2381 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-and-combine.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-and-combine.ll
@@ -18,8 +18,15 @@ define <4 x i8> @vls_sve_and_4xi8(<4 x i8> %b) nounwind {
 ;
 ; NONEON-NOSVE-LABEL: vls_sve_and_4xi8:
 ; NONEON-NOSVE:       // %bb.0:
-; NONEON-NOSVE-NEXT:    movi d1, #0xff000000ff0000
-; NONEON-NOSVE-NEXT:    and v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT:    str d0, [sp, #-16]!
+; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #6]
+; NONEON-NOSVE-NEXT:    strh wzr, [sp, #12]
+; NONEON-NOSVE-NEXT:    strh wzr, [sp, #8]
+; NONEON-NOSVE-NEXT:    strh w8, [sp, #14]
+; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #2]
+; NONEON-NOSVE-NEXT:    strh w8, [sp, #10]
+; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
+; NONEON-NOSVE-NEXT:    add sp, sp, #16
 ; NONEON-NOSVE-NEXT:    ret
  %c = and <4 x i8> %b, <i8 0, i8 255, i8 0, i8 255>
  ret <4 x i8> %c
@@ -37,8 +44,21 @@ define <8 x i8> @vls_sve_and_8xi8(<8 x i8> %b) nounwind {
 ;
 ; NONEON-NOSVE-LABEL: vls_sve_and_8xi8:
 ; NONEON-NOSVE:       // %bb.0:
-; NONEON-NOSVE-NEXT:    movi d1, #0xff00ff00ff00ff00
-; NONEON-NOSVE-NEXT:    and v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT:    str d0, [sp, #-16]!
+; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #7]
+; NONEON-NOSVE-NEXT:    strb wzr, [sp, #14]
+; NONEON-NOSVE-NEXT:    strb wzr, [sp, #12]
+; NONEON-NOSVE-NEXT:    strb w8, [sp, #15]
+; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #5]
+; NONEON-NOSVE-NEXT:    strb wzr, [sp, #10]
+; NONEON-NOSVE-NEXT:    strb w8, [sp, #13]
+; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #3]
+; NONEON-NOSVE-NEXT:    strb wzr, [sp, #8]
+; NONEON-NOSVE-NEXT:    strb w8, [sp, #11]
+; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #1]
+; NONEON-NOSVE-NEXT:    strb w8, [sp, #9]
+; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
+; NONEON-NOSVE-NEXT:    add sp, sp, #16
 ; NONEON-NOSVE-NEXT:    ret
  %c = and <8 x i8> %b, <i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255>
  ret <8 x i8> %c
@@ -56,8 +76,33 @@ define <16 x i8> @vls_sve_and_16xi8(<16 x i8> %b) nounwind {
 ;
 ; NONEON-NOSVE-LABEL: vls_sve_and_16xi8:
 ; NONEON-NOSVE:       // %bb.0:
-; NONEON-NOSVE-NEXT:    movi v1.2d, #0xff00ff00ff00ff00
-; NONEON-NOSVE-NEXT:    and v0.16b, v0.16b, v1.16b
+; NONEON-NOSVE-NEXT:    str q0, [sp, #-32]!
+; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #15]
+; NONEON-NOSVE-NEXT:    strb wzr, [sp, #30]
+; NONEON-NOSVE-NEXT:    strb wzr, [sp, #28]
+; NONEON-NOSVE-NEXT:    strb w8, [sp, #31]
+; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #13]
+; NONEON-NOSVE-NEXT:    strb wzr, [sp, #26]
+; NONEON-NOSVE-NEXT:    strb w8, [sp, #29]
+; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #11]
+; NONEON-NOSVE-NEXT:    strb wzr, [sp, #24]
+; NONEON-NOSVE-NEXT:    strb w8, [sp, #27]
+; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #9]
+; NONEON-NOSVE-NEXT:    strb wzr, [sp, #22]
+; NONEON-NOSVE-NEXT:    strb w8, [sp, #25]
+; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #7]
+; NONEON-NOSVE-NEXT:    strb wzr, [sp, #20]
+; NONEON-NOSVE-NEXT:    strb w8, [sp, #23]
+; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #5]
+; NONEON-NOSVE-NEXT:    strb wzr, [sp, #18]
+; NONEON-NOSVE-NEXT:    strb w8, [sp, #21]
+; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #3]
+; NONEON-NOSVE-NEXT:    strb wzr, [sp, #16]
+; NONEON-NOSVE-NEXT:    strb w8, [sp, #19]
+; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #1]
+; NONEON-NOSVE-NEXT:    strb w8, [sp, #17]
+; NONEON-NOSVE-NEXT:    ldr q0, [sp, #16]
+; NONEON-NOSVE-NEXT:    add sp, sp, #32
 ; NONEON-NOSVE-NEXT:    ret
  %c = and <16 x i8> %b, <i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255>
  ret <16 x i8> %c
@@ -78,9 +123,57 @@ define <32 x i8> @vls_sve_and_32xi8(<32 x i8> %ap) nounwind {
 ;
 ; NONEON-NOSVE-LABEL: vls_sve_and_32xi8:
 ; NONEON-NOSVE:       // %bb.0:
-; NONEON-NOSVE-NEXT:    movi v2.2d, #0xff00ff00ff00ff00
-; NONEON-NOSVE-NEXT:    and v0.16b, v0.16b, v2.16b
-; NONEON-NOSVE-NEXT:    and v1.16b, v1.16b, v2.16b
+; NONEON-NOSVE-NEXT:    stp q0, q1, [sp, #-64]!
+; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #15]
+; NONEON-NOSVE-NEXT:    strb wzr, [sp, #46]
+; NONEON-NOSVE-NEXT:    strb wzr, [sp, #44]
+; NONEON-NOSVE-NEXT:    strb w8, [sp, #47]
+; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #13]
+; NONEON-NOSVE-NEXT:    strb wzr, [sp, #42]
+; NONEON-NOSVE-NEXT:    strb w8, [sp, #45]
+; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #11]
+; NONEON-NOSVE-NEXT:    strb wzr, [sp, #40]
+; NONEON-NOSVE-NEXT:    strb w8, [sp, #43]
+; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #9]
+; NONEON-NOSVE-NEXT:    strb wzr, [sp, #38]
+; NONEON-NOSVE-NEXT:    strb w8, [sp, #41]
+; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #7]
+; NONEON-NOSVE-NEXT:    strb wzr, [sp, #36]
+; NONEON-NOSVE-NEXT:    strb w8, [sp, #39]
+; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #5]
+; NONEON-NOSVE-NEXT:    strb wzr, [sp, #34]
+; NONEON-NOSVE-NEXT:    strb w8, [sp, #37]
+; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #3]
+; NONEON-NOSVE-NEXT:    strb wzr, [sp, #32]
+; NONEON-NOSVE-NEXT:    strb w8, [sp, #35]
+; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #1]
+; NONEON-NOSVE-NEXT:    strb wzr, [sp, #62]
+; NONEON-NOSVE-NEXT:    strb w8, [sp, #33]
+; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #31]
+; NONEON-NOSVE-NEXT:    strb wzr, [sp, #60]
+; NONEON-NOSVE-NEXT:    strb w8, [sp, #63]
+; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #29]
+; NONEON-NOSVE-NEXT:    strb wzr, [sp, #58]
+; NONEON-NOSVE-NEXT:    strb w8, [sp, #61]
+; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #27]
+; NONEON-NOSVE-NEXT:    strb wzr, [sp, #56]
+; NONEON-NOSVE-NEXT:    strb w8, [sp, #59]
+; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #25]
+; NONEON-NOSVE-NEXT:    strb wzr, [sp, #54]
+; NONEON-NOSVE-NEXT:    strb w8, [sp, #57]
+; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #23]
+; NONEON-NOSVE-NEXT:    strb wzr, [sp, #52]
+; NONEON-NOSVE-NEXT:    strb w8, [sp, #55]
+; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #21]
+; NONEON-NOSVE-NEXT:    strb wzr, [sp, #50]
+; NONEON-NOSVE-NEXT:    strb w8, [sp, #53]
+; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #19]
+; NONEON-NOSVE-NEXT:    strb wzr, [sp, #48]
+; NONEON-NOSVE-NEXT:    strb w8, [sp, #51]
+; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #17]
+; NONEON-NOSVE-NEXT:    strb w8, [sp, #49]
+; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #32]
+; NONEON-NOSVE-NEXT:    add sp, sp, #64
 ; NONEON-NOSVE-NEXT:    ret
  %b = and <32 x i8> %ap, <i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255,
                          i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255>
@@ -102,9 +195,11 @@ define <2 x i16> @vls_sve_and_2xi16(<2 x i16> %b) nounwind {
 ;
 ; NONEON-NOSVE-LABEL: vls_sve_and_2xi16:
 ; NONEON-NOSVE:       // %bb.0:
-; NONEON-NOSVE-NEXT:    // kill: def $d0 killed $d0 def $q0
-; NONEON-NOSVE-NEXT:    mov v0.s[0], wzr
-; NONEON-NOSVE-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; NONEON-NOSVE-NEXT:    str d0, [sp, #-16]!
+; NONEON-NOSVE-NEXT:    ldr w8, [sp, #4]
+; NONEON-NOSVE-NEXT:    stp wzr, w8, [sp, #8]
+; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
+; NONEON-NOSVE-NEXT:    add sp, sp, #16
 ; NONEON-NOSVE-NEXT:    ret
  %c = and <2 x i16> %b, <i16 0, i16 65535>
  ret <2 x i16> %c
@@ -122,8 +217,15 @@ define <4 x i16> @vls_sve_and_4xi16(<4 x i16> %b) nounwind {
 ;
 ; NONEON-NOSVE-LABEL: vls_sve_and_4xi16:
 ; NONEON-NOSVE:       // %bb.0:
-; NONEON-NOSVE-NEXT:    movi d1, #0xffff0000ffff0000
-; NONEON-NOSVE-NEXT:    and v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT:    str d0, [sp, #-16]!
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #6]
+; NONEON-NOSVE-NEXT:    strh wzr, [sp, #12]
+; NONEON-NOSVE-NEXT:    strh wzr, [sp, #8]
+; NONEON-NOSVE-NEXT:    strh w8, [sp, #14]
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #2]
+; NONEON-NOSVE-NEXT:    strh w8, [sp, #10]
+; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
+; NONEON-NOSVE-NEXT:    add sp, sp, #16
 ; NONEON-NOSVE-NEXT:    ret
  %c = and <4 x i16> %b, <i16 0, i16 65535, i16 0, i16 65535>
  ret <4 x i16> %c
@@ -141,8 +243,21 @@ define <8 x i16> @vls_sve_and_8xi16(<8 x i16> %b) nounwind {
 ;
 ; NONEON-NOSVE-LABEL: vls_sve_and_8xi16:
 ; NONEON-NOSVE:       // %bb.0:
-; NONEON-NOSVE-NEXT:    movi v1.2d, #0xffff0000ffff0000
-; NONEON-NOSVE-NEXT:    and v0.16b, v0.16b, v1.16b
+; NONEON-NOSVE-NEXT:    str q0, [sp, #-32]!
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #14]
+; NONEON-NOSVE-NEXT:    strh wzr, [sp, #28]
+; NONEON-NOSVE-NEXT:    strh wzr, [sp, #24]
+; NONEON-NOSVE-NEXT:    strh w8, [sp, #30]
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #10]
+; NONEON-NOSVE-NEXT:    strh wzr, [sp, #20]
+; NONEON-NOSVE-NEXT:    strh w8, [sp, #26]
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #6]
+; NONEON-NOSVE-NEXT:    strh wzr, [sp, #16]
+; NONEON-NOSVE-NEXT:    strh w8, [sp, #22]
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #2]
+; NONEON-NOSVE-NEXT:    strh w8, [sp, #18]
+; NONEON-NOSVE-NEXT:    ldr q0, [sp, #16]
+; NONEON-NOSVE-NEXT:    add sp, sp, #32
 ; NONEON-NOSVE-NEXT:    ret
  %c = and <8 x i16> %b, <i16 0, i16 65535, i16 0, i16 65535, i16 0, i16 65535, i16 0, i16 65535>
  ret <8 x i16> %c
@@ -163,9 +278,33 @@ define <16 x i16> @vls_sve_and_16xi16(<16 x i16> %b) nounwind {
 ;
 ; NONEON-NOSVE-LABEL: vls_sve_and_16xi16:
 ; NONEON-NOSVE:       // %bb.0:
-; NONEON-NOSVE-NEXT:    movi v2.2d, #0xffff0000ffff0000
-; NONEON-NOSVE-NEXT:    and v0.16b, v0.16b, v2.16b
-; NONEON-NOSVE-NEXT:    and v1.16b, v1.16b, v2.16b
+; NONEON-NOSVE-NEXT:    stp q0, q1, [sp, #-64]!
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #14]
+; NONEON-NOSVE-NEXT:    strh wzr, [sp, #44]
+; NONEON-NOSVE-NEXT:    strh wzr, [sp, #40]
+; NONEON-NOSVE-NEXT:    strh w8, [sp, #46]
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #10]
+; NONEON-NOSVE-NEXT:    strh wzr, [sp, #36]
+; NONEON-NOSVE-NEXT:    strh w8, [sp, #42]
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #6]
+; NONEON-NOSVE-NEXT:    strh wzr, [sp, #32]
+; NONEON-NOSVE-NEXT:    strh w8, [sp, #38]
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #2]
+; NONEON-NOSVE-NEXT:    strh wzr, [sp, #60]
+; NONEON-NOSVE-NEXT:    strh w8, [sp, #34]
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #30]
+; NONEON-NOSVE-NEXT:    strh wzr, [sp, #56]
+; NONEON-NOSVE-NEXT:    strh w8, [sp, #62]
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #26]
+; NONEON-NOSVE-NEXT:    strh wzr, [sp, #52]
+; NONEON-NOSVE-NEXT:    strh w8, [sp, #58]
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #22]
+; NONEON-NOSVE-NEXT:    strh wzr, [sp, #48]
+; NONEON-NOSVE-NEXT:    strh w8, [sp, #54]
+; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #18]
+; NONEON-NOSVE-NEXT:    strh w8, [sp, #50]
+; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #32]
+; NONEON-NOSVE-NEXT:    add sp, sp, #64
 ; NONEON-NOSVE-NEXT:    ret
  %c = and <16 x i16> %b, <i16 0, i16 65535, i16 0, i16 65535, i16 0, i16 65535, i16 0, i16 65535, i16 0, i16 65535, i16 0, i16 65535, i16 0, i16 65535, i16 0, i16 65535>
  ret <16 x i16> %c
@@ -183,9 +322,11 @@ define <2 x i32> @vls_sve_and_2xi32(<2 x i32> %b) nounwind {
 ;
 ; NONEON-NOSVE-LABEL: vls_sve_and_2xi32:
 ; NONEON-NOSVE:       // %bb.0:
-; NONEON-NOSVE-NEXT:    // kill: def $d0 killed $d0 def $q0
-; NONEON-NOSVE-NEXT:    mov v0.s[0], wzr
-; NONEON-NOSVE-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; NONEON-NOSVE-NEXT:    str d0, [sp, #-16]!
+; NONEON-NOSVE-NEXT:    ldr w8, [sp, #4]
+; NONEON-NOSVE-NEXT:    stp wzr, w8, [sp, #8]
+; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
+; NONEON-NOSVE-NEXT:    add sp, sp, #16
 ; NONEON-NOSVE-NEXT:    ret
  %c = and <2 x i32> %b, <i32 0, i32 4294967295>
  ret <2 x i32> %c
@@ -203,8 +344,13 @@ define <4 x i32> @vls_sve_and_4xi32(<4 x i32> %b) nounwind {
 ;
 ; NONEON-NOSVE-LABEL: vls_sve_and_4xi32:
 ; NONEON-NOSVE:       // %bb.0:
-; NONEON-NOSVE-NEXT:    movi v1.2d, #0xffffffff00000000
-; NONEON-NOSVE-NEXT:    and v0.16b, v0.16b, v1.16b
+; NONEON-NOSVE-NEXT:...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/90833