[clang] [llvm] [Clang] Add diagnostic reasoning for unsatisfied is_destructible trait (PR #166967)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Nov 9 10:37:04 PST 2025
https://github.com/AnushaK6 updated https://github.com/llvm/llvm-project/pull/166967
>From 14182fb64adde6e86a96f7a6ea0c22749124b827 Mon Sep 17 00:00:00 2001
From: AnushaK6 <anusha.k1300 at gmail.com>
Date: Fri, 7 Nov 2025 22:33:04 +0530
Subject: [PATCH 1/4] Add diagnostic reasoning for unsatisfied is_destructible
trait
---
.../clang/Basic/DiagnosticSemaKinds.td | 5 +-
clang/lib/Sema/SemaTypeTraits.cpp | 62 +++++++++++++++
.../type-traits-unsatisfied-diags-std.cpp | 76 +++++++++++++++++++
.../SemaCXX/type-traits-unsatisfied-diags.cpp | 49 ++++++++++++
4 files changed, 191 insertions(+), 1 deletion(-)
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 4e369be0bbb92..ee357936e3a87 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -1776,7 +1776,8 @@ def note_unsatisfied_trait
"%StandardLayout{standard-layout}|"
"%Aggregate{aggregate}|"
"%Final{final}|"
- "%Abstract{abstract}"
+ "%Abstract{abstract}|"
+ "%Destructible{destructible}"
"}1">;
def note_unsatisfied_trait_reason
@@ -1808,6 +1809,7 @@ def note_unsatisfied_trait_reason
"%NonStandardLayoutMember{has a non-standard-layout member %1 of type %2}|"
"%IndirectBaseWithFields{has an indirect base %1 with data members}|"
"%DeletedDtr{has a %select{deleted|user-provided}1 destructor}|"
+ "%InaccessibleDtr{has a %select{private|protected}1 destructor}|"
"%UserProvidedCtr{has a user provided %select{copy|move}1 "
"constructor}|"
"%UserDeclaredCtr{has a user-declared constructor}|"
@@ -1823,6 +1825,7 @@ def note_unsatisfied_trait_reason
"%FunctionType{is a function type}|"
"%CVVoidType{is a cv void type}|"
"%IncompleteArrayType{is an incomplete array type}|"
+ "%IncompleteType{is an incomplete type}|"
"%PrivateProtectedDirectDataMember{has a %select{private|protected}1 direct data member}|"
"%PrivateProtectedDirectBase{has a %select{private|protected}1 direct base}|"
"%NotClassOrUnion{is not a class or union type}|"
diff --git a/clang/lib/Sema/SemaTypeTraits.cpp b/clang/lib/Sema/SemaTypeTraits.cpp
index 38877967af05e..e9b8032733efc 100644
--- a/clang/lib/Sema/SemaTypeTraits.cpp
+++ b/clang/lib/Sema/SemaTypeTraits.cpp
@@ -2028,6 +2028,7 @@ static std::optional<TypeTrait> StdNameToTypeTrait(StringRef Name) {
.Case("is_constructible", TypeTrait::TT_IsConstructible)
.Case("is_final", TypeTrait::UTT_IsFinal)
.Case("is_abstract", TypeTrait::UTT_IsAbstract)
+ .Case("is_destructible", TypeTrait::UTT_IsDestructible)
.Default(std::nullopt);
}
@@ -2399,6 +2400,64 @@ static void DiagnoseNonConstructibleReason(
SemaRef.Diag(D->getLocation(), diag::note_defined_here) << D;
}
+static void DiagnoseNonDestructibleReason(
+ Sema &SemaRef, SourceLocation Loc,
+ QualType T) {
+
+ QualType CoreT = T.getCanonicalType();
+ if (const ArrayType *AT = SemaRef.Context.getAsArrayType(CoreT))
+ CoreT = AT->getElementType();
+
+ SemaRef.Diag(Loc, diag::note_unsatisfied_trait) << CoreT << diag::TraitName::Destructible;
+
+
+ if (CoreT->isFunctionType()){
+ SemaRef.Diag(Loc, diag::note_unsatisfied_trait_reason) << diag::TraitNotSatisfiedReason::FunctionType;
+ return;
+ }
+
+ if(CoreT->isVoidType()){
+ SemaRef.Diag(Loc, diag::note_unsatisfied_trait_reason) << diag::TraitNotSatisfiedReason::CVVoidType;
+ return;
+ }
+
+ if (CoreT->isIncompleteType()) {
+ SemaRef.Diag(Loc, diag::note_unsatisfied_trait_reason) << diag::TraitNotSatisfiedReason::IncompleteType;
+ return;
+ }
+
+ const CXXRecordDecl *RD = CoreT->getAsCXXRecordDecl();
+ if (!RD || RD->isInvalidDecl())
+ return;
+
+ const CXXRecordDecl *Def = RD->getDefinition();
+ if (!Def) {
+ SemaRef.Diag(Loc, diag::note_unsatisfied_trait_reason)
+ << diag::TraitNotSatisfiedReason::IncompleteType;
+ return;
+ }
+
+ CXXDestructorDecl *Dtor = Def->getDestructor();
+ if (!Dtor)
+ return;
+
+ if (Dtor->isDeleted()) {
+ SemaRef.Diag(Loc, diag::note_unsatisfied_trait_reason)
+ << diag::TraitNotSatisfiedReason::DeletedDtr << 0
+ << Dtor->getSourceRange();
+ return;
+ }
+
+ AccessSpecifier AS = Dtor->getAccess();
+ if (AS == AS_private || AS == AS_protected) {
+ unsigned Select = (AS == AS_private) ? 0 : 1;
+ SemaRef.Diag(Loc, diag::note_unsatisfied_trait_reason)
+ << diag::TraitNotSatisfiedReason::InaccessibleDtr << Select
+ << Dtor->getSourceRange();
+ return;
+ }
+}
+
static void DiagnoseNonTriviallyCopyableReason(Sema &SemaRef,
SourceLocation Loc, QualType T) {
SemaRef.Diag(Loc, diag::note_unsatisfied_trait)
@@ -2889,6 +2948,9 @@ void Sema::DiagnoseTypeTraitDetails(const Expr *E) {
case TT_IsConstructible:
DiagnoseNonConstructibleReason(*this, E->getBeginLoc(), Args);
break;
+ case UTT_IsDestructible:
+ DiagnoseNonDestructibleReason(*this, E->getBeginLoc(), Args[0]);
+ break;
case UTT_IsAggregate:
DiagnoseNonAggregateReason(*this, E->getBeginLoc(), Args[0]);
break;
diff --git a/clang/test/SemaCXX/type-traits-unsatisfied-diags-std.cpp b/clang/test/SemaCXX/type-traits-unsatisfied-diags-std.cpp
index 3e03a79275232..3e02fe8f10f56 100644
--- a/clang/test/SemaCXX/type-traits-unsatisfied-diags-std.cpp
+++ b/clang/test/SemaCXX/type-traits-unsatisfied-diags-std.cpp
@@ -73,6 +73,15 @@ struct is_abstract {
template <typename T>
constexpr bool is_abstract_v = __is_abstract(T);
+template <typename T>
+struct is_destructible {
+ static constexpr bool value = __is_destructible(T);
+};
+
+template <typename T>
+constexpr bool is_destructible_v = __is_destructible(T);
+
+
#endif
#ifdef STD2
@@ -167,6 +176,17 @@ using is_abstract = __details_is_abstract<T>;
template <typename T>
constexpr bool is_abstract_v = __is_abstract(T);
+template <typename T>
+struct __details_is_destructible {
+ static constexpr bool value = __is_destructible(T);
+};
+
+template <typename T>
+using is_destructible = __details_is_destructible<T>;
+
+template <typename T>
+constexpr bool is_destructible_v = __is_destructible(T);
+
#endif
@@ -252,6 +272,15 @@ using is_abstract = __details_is_abstract<T>;
template <typename T>
constexpr bool is_abstract_v = is_abstract<T>::value;
+template <typename T>
+struct __details_is_destructible : bool_constant<__is_destructible(T)> {};
+
+template <typename T>
+using is_destructible = __details_is_destructible<T>;
+
+template <typename T>
+constexpr bool is_destructible_v = is_destructible<T>::value;
+
#endif
}
@@ -374,6 +403,18 @@ static_assert(std::is_abstract_v<int&>);
// expected-note at -1 {{because it is a reference type}} \
// expected-note at -1 {{because it is not a struct or class type}}
+static_assert(std::is_destructible<int>::value);
+
+static_assert(std::is_destructible<void>::value);
+// expected-error-re at -1 {{static assertion failed due to requirement 'std::{{.*}}is_destructible<void>::value'}} \
+// expected-note at -1 {{'void' is not destructible}} \
+// expected-note at -1 {{because it is a cv void type}}
+
+static_assert(std::is_destructible_v<void>);
+// expected-error at -1 {{static assertion failed due to requirement 'std::is_destructible_v<void>'}} \
+// expected-note at -1 {{'void' is not destructible}} \
+// expected-note at -1 {{because it is a cv void type}}
+
namespace test_namespace {
using namespace std;
@@ -473,6 +514,17 @@ namespace test_namespace {
// expected-note at -1 {{'int &' is not abstract}} \
// expected-note at -1 {{because it is a reference type}} \
// expected-note at -1 {{because it is not a struct or class type}}
+
+ static_assert(is_destructible<void>::value);
+ // expected-error-re at -1 {{static assertion failed due to requirement '{{.*}}is_destructible<void>::value'}} \
+ // expected-note at -1 {{'void' is not destructible}} \
+ // expected-note at -1 {{because it is a cv void type}}
+
+ static_assert(is_destructible_v<void>);
+ // expected-error at -1 {{static assertion failed due to requirement 'is_destructible_v<void>'}} \
+ // expected-note at -1 {{'void' is not destructible}} \
+ // expected-note at -1 {{because it is a cv void type}}
+
}
@@ -518,6 +570,15 @@ concept C5 = std::is_aggregate_v<T>; // #concept10
template <C5 T> void g5(); // #cand10
+template <typename T>
+requires std::is_destructible<T>::value void f6(); // #cand11
+
+template <typename T>
+concept C6 = std::is_destructible_v<T>; // #concept11
+
+template <C6 T> void g6(); // #cand12
+
+
void test() {
f<int&>();
// expected-error at -1 {{no matching function for call to 'f'}} \
@@ -589,6 +650,21 @@ void test() {
// expected-note@#concept10 {{because 'std::is_aggregate_v<void>' evaluated to false}} \
// expected-note@#concept10 {{'void' is not aggregate}} \
// expected-note@#concept10 {{because it is a cv void type}}
+
+ f6<void>();
+ // expected-error at -1 {{no matching function for call to 'f6'}} \
+ // expected-note@#cand11 {{candidate template ignored: constraints not satisfied [with T = void]}} \
+ // expected-note-re@#cand11 {{because '{{.*}}is_destructible<void>::value' evaluated to false}} \
+ // expected-note@#cand11 {{'void' is not destructible}} \
+ // expected-note@#cand11 {{because it is a cv void type}}
+
+ g6<void>();
+ // expected-error at -1 {{no matching function for call to 'g6'}} \
+ // expected-note@#cand12 {{candidate template ignored: constraints not satisfied [with T = void]}} \
+ // expected-note@#cand12 {{because 'void' does not satisfy 'C6'}} \
+ // expected-note@#concept11 {{because 'std::is_destructible_v<void>' evaluated to false}} \
+ // expected-note@#concept11 {{'void' is not destructible}} \
+ // expected-note@#concept11 {{because it is a cv void type}}
}
}
diff --git a/clang/test/SemaCXX/type-traits-unsatisfied-diags.cpp b/clang/test/SemaCXX/type-traits-unsatisfied-diags.cpp
index 22740418f09f5..858a5cc24868f 100644
--- a/clang/test/SemaCXX/type-traits-unsatisfied-diags.cpp
+++ b/clang/test/SemaCXX/type-traits-unsatisfied-diags.cpp
@@ -1052,3 +1052,52 @@ static_assert(__is_abstract(U));
// expected-note at -1 {{because it is not a struct or class type}}
}
+namespace destructible {
+
+struct Incomplete; // expected-note {{forward declaration of 'destructible::Incomplete'}}
+static_assert(__is_destructible(Incomplete));
+// expected-error at -1 {{incomplete type 'Incomplete' used in type trait expression}}
+
+static_assert(__is_destructible(void));
+// expected-error at -1 {{static assertion failed due to requirement '__is_destructible(void)'}} \
+// expected-note at -1 {{'void' is not destructible}} \
+// expected-note at -1 {{because it is a cv void type}}
+
+using F = void();
+static_assert(__is_destructible(F));
+// expected-error at -1 {{static assertion failed due to requirement '__is_destructible(void ())'}} \
+// expected-note at -1 {{'void ()' is not destructible}} \
+// expected-note at -1 {{because it is a function type}}
+
+using Ref = int&;
+static_assert(__is_destructible(Ref)); // no diagnostics (true)
+
+struct DeletedDtor { // #d-DeletedDtor
+ ~DeletedDtor() = delete;
+};
+static_assert(__is_destructible(DeletedDtor));
+// expected-error at -1 {{static assertion failed due to requirement '__is_destructible(destructible::DeletedDtor)'}} \
+// expected-note at -1 {{'destructible::DeletedDtor' is not destructible}} \
+// expected-note at -1 {{because it has a deleted destructor}}
+
+struct PrivateDtor { // #d-PrivateDtor
+private:
+ ~PrivateDtor(); // #d-PrivateDtor-dtor
+};
+static_assert(__is_destructible(PrivateDtor));
+// expected-error at -1 {{static assertion failed due to requirement '__is_destructible(destructible::PrivateDtor)'}} \
+// expected-note at -1 {{'destructible::PrivateDtor' is not destructible}} \
+// expected-note at -1 {{because it has a private destructor}}
+
+struct BaseInaccessible { // #d-BaseInacc
+private:
+ ~BaseInaccessible(); // #d-BaseInacc-dtor
+};
+
+struct DerivedFromInaccessible : BaseInaccessible {}; // #d-DerivedInacc
+static_assert(__is_destructible(DerivedFromInaccessible));
+// expected-error at -1 {{static assertion failed due to requirement '__is_destructible(destructible::DerivedFromInaccessible)'}} \
+// expected-note at -1 {{'destructible::DerivedFromInaccessible' is not destructible}} \
+// expected-note at -1 {{because it has a deleted destructor}}
+
+}
>From 5b5bdbfeafceedec79b173bf8e1d317818d7127d Mon Sep 17 00:00:00 2001
From: AnushaK6 <anusha.k1300 at gmail.com>
Date: Fri, 7 Nov 2025 22:34:51 +0530
Subject: [PATCH 2/4] [NFC] Format code using clang-format
---
clang/lib/Sema/SemaTypeTraits.cpp | 26 ++++++++++++++------------
1 file changed, 14 insertions(+), 12 deletions(-)
diff --git a/clang/lib/Sema/SemaTypeTraits.cpp b/clang/lib/Sema/SemaTypeTraits.cpp
index e9b8032733efc..fef47dfc2cc51 100644
--- a/clang/lib/Sema/SemaTypeTraits.cpp
+++ b/clang/lib/Sema/SemaTypeTraits.cpp
@@ -2400,29 +2400,31 @@ static void DiagnoseNonConstructibleReason(
SemaRef.Diag(D->getLocation(), diag::note_defined_here) << D;
}
-static void DiagnoseNonDestructibleReason(
- Sema &SemaRef, SourceLocation Loc,
- QualType T) {
+static void DiagnoseNonDestructibleReason(Sema &SemaRef, SourceLocation Loc,
+ QualType T) {
QualType CoreT = T.getCanonicalType();
if (const ArrayType *AT = SemaRef.Context.getAsArrayType(CoreT))
CoreT = AT->getElementType();
- SemaRef.Diag(Loc, diag::note_unsatisfied_trait) << CoreT << diag::TraitName::Destructible;
-
+ SemaRef.Diag(Loc, diag::note_unsatisfied_trait)
+ << CoreT << diag::TraitName::Destructible;
- if (CoreT->isFunctionType()){
- SemaRef.Diag(Loc, diag::note_unsatisfied_trait_reason) << diag::TraitNotSatisfiedReason::FunctionType;
+ if (CoreT->isFunctionType()) {
+ SemaRef.Diag(Loc, diag::note_unsatisfied_trait_reason)
+ << diag::TraitNotSatisfiedReason::FunctionType;
return;
}
-
- if(CoreT->isVoidType()){
- SemaRef.Diag(Loc, diag::note_unsatisfied_trait_reason) << diag::TraitNotSatisfiedReason::CVVoidType;
+
+ if (CoreT->isVoidType()) {
+ SemaRef.Diag(Loc, diag::note_unsatisfied_trait_reason)
+ << diag::TraitNotSatisfiedReason::CVVoidType;
return;
}
if (CoreT->isIncompleteType()) {
- SemaRef.Diag(Loc, diag::note_unsatisfied_trait_reason) << diag::TraitNotSatisfiedReason::IncompleteType;
+ SemaRef.Diag(Loc, diag::note_unsatisfied_trait_reason)
+ << diag::TraitNotSatisfiedReason::IncompleteType;
return;
}
@@ -2433,7 +2435,7 @@ static void DiagnoseNonDestructibleReason(
const CXXRecordDecl *Def = RD->getDefinition();
if (!Def) {
SemaRef.Diag(Loc, diag::note_unsatisfied_trait_reason)
- << diag::TraitNotSatisfiedReason::IncompleteType;
+ << diag::TraitNotSatisfiedReason::IncompleteType;
return;
}
>From 8ba77ae65bbfd5b809c17de7c718c451c535ca54 Mon Sep 17 00:00:00 2001
From: AnushaK6 <anusha.k1300 at gmail.com>
Date: Sun, 9 Nov 2025 22:58:51 +0530
Subject: [PATCH 3/4] [GlobalISel] Port computeNumSignBits for G_MUL
---
.../CodeGen/GlobalISel/GISelValueTracking.cpp | 37 +++++++++
.../AArch64/GlobalISel/knownbits-mul.mir | 79 +++++++++++++++++++
llvm/test/CodeGen/AArch64/combine-sdiv.ll | 4 +-
llvm/test/CodeGen/AArch64/rem-by-const.ll | 3 +-
4 files changed, 118 insertions(+), 5 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/knownbits-mul.mir
diff --git a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
index c1fb8b6d78ff8..3a4d8c27ac88b 100644
--- a/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp
@@ -37,6 +37,8 @@
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/KnownFPClass.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/IR/Constants.h"
#define DEBUG_TYPE "gisel-known-bits"
@@ -2084,6 +2086,41 @@ unsigned GISelValueTracking::computeNumSignBits(Register R,
FirstAnswer = std::min(Src1NumSignBits, Src2NumSignBits) - 1;
break;
}
+ case TargetOpcode::G_MUL: {
+ Register Src1 = MI.getOperand(1).getReg();
+ Register Src2 = MI.getOperand(2).getReg();
+
+ KnownBits Known1 = getKnownBits(Src1, DemandedElts, Depth + 1);
+ KnownBits Known2 = getKnownBits(Src2, DemandedElts, Depth + 1);
+
+ if (Known1.isZero() || Known2.isZero())
+ return TyBits;
+
+ auto C1 = getIConstantVRegValWithLookThrough(Src1, MRI);
+ auto C2 = getIConstantVRegValWithLookThrough(Src2, MRI);
+
+ if (C1 && C2) {
+ APInt Val1 = C1->Value;
+ APInt Val2 = C2->Value;
+ APInt Product = Val1 * Val2;
+ return Product.getNumSignBits();
+ }
+ unsigned Src1NumSignBits =
+ computeNumSignBits(Src1, DemandedElts, Depth + 1);
+ if(Src1NumSignBits==1){
+ return 1;
+ }
+ unsigned Src2NumSignBits =
+ computeNumSignBits(Src2, DemandedElts, Depth + 1);
+ if(Src2NumSignBits==1){
+ return 1;
+ }
+
+ unsigned OutValidBits =
+ (TyBits - Src1NumSignBits + 1) + (TyBits - Src2NumSignBits + 1);
+ FirstAnswer = OutValidBits > TyBits ? 1 : TyBits - OutValidBits + 1;
+ break;
+ }
case TargetOpcode::G_FCMP:
case TargetOpcode::G_ICMP: {
bool IsFP = Opcode == TargetOpcode::G_FCMP;
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-mul.mir b/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-mul.mir
new file mode 100644
index 0000000000000..ea5281948a211
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-mul.mir
@@ -0,0 +1,79 @@
+# NOTE: Assertions have been autogenerated by utils/update_givaluetracking_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=aarch64 -passes="print<gisel-value-tracking>" -filetype=null %s 2>&1 | FileCheck %s
+
+---
+name: ConstPositives
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: @ConstPositives
+ ; CHECK-NEXT: %0:_ KnownBits:00000011 SignBits:6
+ ; CHECK-NEXT: %1:_ KnownBits:00000101 SignBits:5
+ ; CHECK-NEXT: %2:_ KnownBits:00001111 SignBits:4
+ %0:_(s8) = G_CONSTANT i8 3
+ %1:_(s8) = G_CONSTANT i8 5
+ %2:_(s8) = G_MUL %0, %1
+...
+---
+name: ConstZero
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: @ConstZero
+ ; CHECK-NEXT: %0:_ KnownBits:00000000 SignBits:8
+ ; CHECK-NEXT: %1:_ KnownBits:00000001 SignBits:7
+ ; CHECK-NEXT: %2:_ KnownBits:00000000 SignBits:8
+ %0:_(s8) = G_CONSTANT i8 0
+ %1:_(s8) = G_CONSTANT i8 1
+ %2:_(s8) = G_MUL %0, %1
+...
+---
+name: ConstNegatives
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: @ConstNegatives
+ ; CHECK-NEXT: %0:_ KnownBits:11111110 SignBits:7
+ ; CHECK-NEXT: %1:_ KnownBits:11111100 SignBits:6
+ ; CHECK-NEXT: %2:_ KnownBits:00001000 SignBits:4
+ %0:_(s8) = G_CONSTANT i8 -2
+ %1:_(s8) = G_CONSTANT i8 -4
+ %2:_(s8) = G_MUL %0, %1
+...
+---
+name: MixedSigns
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: @MixedSigns
+ ; CHECK-NEXT: %0:_ KnownBits:11111100 SignBits:6
+ ; CHECK-NEXT: %1:_ KnownBits:00000011 SignBits:6
+ ; CHECK-NEXT: %2:_ KnownBits:11110100 SignBits:4
+ %0:_(s8) = G_CONSTANT i8 -4
+ %1:_(s8) = G_CONSTANT i8 3
+ %2:_(s8) = G_MUL %0, %1
+...
+---
+name: UnknownVar
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: @UnknownVar
+ ; CHECK-NEXT: %0:_ KnownBits:???????? SignBits:1
+ ; CHECK-NEXT: %1:_ KnownBits:00000010 SignBits:6
+ ; CHECK-NEXT: %2:_ KnownBits:???????0 SignBits:1
+ %0:_(s8) = COPY $b0
+ %1:_(s8) = G_CONSTANT i8 2
+ %2:_(s8) = G_MUL %0, %1
+...
+---
+name: VectorMul
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: @VectorMul
+ ; CHECK-NEXT: %0:_ KnownBits:0000000000000001 SignBits:15
+ ; CHECK-NEXT: %1:_ KnownBits:0000000000000010 SignBits:14
+ ; CHECK-NEXT: %2:_ KnownBits:00000000000000?? SignBits:14
+ ; CHECK-NEXT: %3:_ KnownBits:00000000000000?? SignBits:14
+ ; CHECK-NEXT: %4:_ KnownBits:000000000000???? SignBits:12
+ %0:_(s16) = G_CONSTANT i16 1
+ %1:_(s16) = G_CONSTANT i16 2
+ %2:_(<4 x s16>) = G_BUILD_VECTOR %0, %1, %1, %0
+ %3:_(<4 x s16>) = G_BUILD_VECTOR %1, %0, %0, %1
+ %4:_(<4 x s16>) = G_MUL %2, %3
+...
diff --git a/llvm/test/CodeGen/AArch64/combine-sdiv.ll b/llvm/test/CodeGen/AArch64/combine-sdiv.ll
index cca190f08df2b..37ea64f700fd8 100644
--- a/llvm/test/CodeGen/AArch64/combine-sdiv.ll
+++ b/llvm/test/CodeGen/AArch64/combine-sdiv.ll
@@ -1510,7 +1510,6 @@ define i5 @combine_i5_sdiv_const7(i5 %x) {
; CHECK-GI-NEXT: sbfx w9, w0, #0, #5
; CHECK-GI-NEXT: sbfx w8, w8, #0, #5
; CHECK-GI-NEXT: mul w8, w9, w8
-; CHECK-GI-NEXT: sbfx w8, w8, #0, #10
; CHECK-GI-NEXT: add w8, w0, w8, asr #5
; CHECK-GI-NEXT: sbfx w8, w8, #0, #5
; CHECK-GI-NEXT: asr w8, w8, #2
@@ -1560,7 +1559,6 @@ define i8 @combine_i8_sdiv_const7(i8 %x) {
; CHECK-GI-NEXT: sxtb w8, w0
; CHECK-GI-NEXT: mov w9, #-109 // =0xffffff93
; CHECK-GI-NEXT: mul w8, w8, w9
-; CHECK-GI-NEXT: sxth w8, w8
; CHECK-GI-NEXT: add w8, w0, w8, asr #8
; CHECK-GI-NEXT: sbfx w8, w8, #2, #6
; CHECK-GI-NEXT: ubfx w9, w8, #7, #1
@@ -1585,7 +1583,7 @@ define i8 @combine_i8_sdiv_const100(i8 %x) {
; CHECK-GI-NEXT: sxtb w8, w0
; CHECK-GI-NEXT: mov w9, #41 // =0x29
; CHECK-GI-NEXT: mul w8, w8, w9
-; CHECK-GI-NEXT: sbfx w8, w8, #8, #8
+; CHECK-GI-NEXT: asr w8, w8, #8
; CHECK-GI-NEXT: asr w8, w8, #4
; CHECK-GI-NEXT: ubfx w9, w8, #7, #1
; CHECK-GI-NEXT: add w0, w8, w9
diff --git a/llvm/test/CodeGen/AArch64/rem-by-const.ll b/llvm/test/CodeGen/AArch64/rem-by-const.ll
index 87b11086e28d5..9cc00fae412a0 100644
--- a/llvm/test/CodeGen/AArch64/rem-by-const.ll
+++ b/llvm/test/CodeGen/AArch64/rem-by-const.ll
@@ -21,7 +21,6 @@ define i8 @si8_7(i8 %a, i8 %b) {
; CHECK-GI-NEXT: sxtb w8, w0
; CHECK-GI-NEXT: mov w9, #-109 // =0xffffff93
; CHECK-GI-NEXT: mul w8, w8, w9
-; CHECK-GI-NEXT: sxth w8, w8
; CHECK-GI-NEXT: add w8, w0, w8, asr #8
; CHECK-GI-NEXT: sbfx w8, w8, #2, #6
; CHECK-GI-NEXT: ubfx w9, w8, #7, #1
@@ -52,7 +51,7 @@ define i8 @si8_100(i8 %a, i8 %b) {
; CHECK-GI-NEXT: sxtb w8, w0
; CHECK-GI-NEXT: mov w9, #41 // =0x29
; CHECK-GI-NEXT: mul w8, w8, w9
-; CHECK-GI-NEXT: sbfx w8, w8, #8, #8
+; CHECK-GI-NEXT: asr w8, w8, #8
; CHECK-GI-NEXT: asr w8, w8, #4
; CHECK-GI-NEXT: ubfx w9, w8, #7, #1
; CHECK-GI-NEXT: add w8, w8, w9
>From 9242c9cc70db14d293f1523c1f912de19641de06 Mon Sep 17 00:00:00 2001
From: AnushaK6 <anusha.k1300 at gmail.com>
Date: Mon, 10 Nov 2025 00:06:49 +0530
Subject: [PATCH 4/4] Correct test checks
---
llvm/test/CodeGen/AArch64/neon-dotreduce.ll | 3128 ++++++++---------
.../RISCV/GlobalISel/div-by-constant.ll | 48 +-
2 files changed, 1580 insertions(+), 1596 deletions(-)
diff --git a/llvm/test/CodeGen/AArch64/neon-dotreduce.ll b/llvm/test/CodeGen/AArch64/neon-dotreduce.ll
index fb2a1fa697c26..581d8e116359b 100644
--- a/llvm/test/CodeGen/AArch64/neon-dotreduce.ll
+++ b/llvm/test/CodeGen/AArch64/neon-dotreduce.ll
@@ -2832,247 +2832,246 @@ define i32 @test_sdot_v25i8(ptr nocapture readonly %a, ptr nocapture readonly %b
; CHECK-GI-NEXT: ldp q2, q1, [x1]
; CHECK-GI-NEXT: movi d0, #0000000000000000
; CHECK-GI-NEXT: str w2, [sp, #12] // 4-byte Folded Spill
-; CHECK-GI-NEXT: mov b5, v2.b[2]
; CHECK-GI-NEXT: mov b6, v2.b[3]
; CHECK-GI-NEXT: mov b7, v2.b[4]
; CHECK-GI-NEXT: mov b16, v2.b[5]
+; CHECK-GI-NEXT: mov b19, v2.b[8]
+; CHECK-GI-NEXT: mov b4, v2.b[1]
+; CHECK-GI-NEXT: mov b5, v2.b[2]
; CHECK-GI-NEXT: mov b17, v2.b[6]
; CHECK-GI-NEXT: mov b18, v2.b[7]
-; CHECK-GI-NEXT: mov b19, v2.b[8]
; CHECK-GI-NEXT: mov b20, v2.b[9]
-; CHECK-GI-NEXT: mov b21, v2.b[15]
-; CHECK-GI-NEXT: mov b3, v2.b[1]
-; CHECK-GI-NEXT: fmov w19, s2
-; CHECK-GI-NEXT: mov b22, v1.b[6]
-; CHECK-GI-NEXT: fmov w6, s5
-; CHECK-GI-NEXT: mov b5, v2.b[10]
-; CHECK-GI-NEXT: fmov w14, s6
-; CHECK-GI-NEXT: mov b6, v2.b[11]
+; CHECK-GI-NEXT: mov b21, v2.b[10]
+; CHECK-GI-NEXT: mov b22, v2.b[11]
+; CHECK-GI-NEXT: fmov w7, s2
+; CHECK-GI-NEXT: fmov w13, s6
+; CHECK-GI-NEXT: mov b6, v2.b[12]
; CHECK-GI-NEXT: fmov w2, s7
-; CHECK-GI-NEXT: stp s17, s18, [sp, #4] // 8-byte Folded Spill
-; CHECK-GI-NEXT: mov b7, v2.b[12]
+; CHECK-GI-NEXT: mov b7, v2.b[13]
; CHECK-GI-NEXT: fmov w11, s16
-; CHECK-GI-NEXT: sxtb w28, w19
-; CHECK-GI-NEXT: mov b16, v2.b[13]
-; CHECK-GI-NEXT: mov b18, v1.b[1]
-; CHECK-GI-NEXT: sxtb w6, w6
-; CHECK-GI-NEXT: mov b17, v2.b[14]
-; CHECK-GI-NEXT: ldp q4, q2, [x0]
-; CHECK-GI-NEXT: fmov w25, s19
+; CHECK-GI-NEXT: mov b16, v2.b[14]
+; CHECK-GI-NEXT: mov b23, v2.b[15]
+; CHECK-GI-NEXT: ldp q3, q2, [x0]
+; CHECK-GI-NEXT: fmov w26, s19
+; CHECK-GI-NEXT: fmov w19, s4
+; CHECK-GI-NEXT: stp s17, s18, [sp, #4] // 8-byte Folded Spill
+; CHECK-GI-NEXT: fmov w29, s5
; CHECK-GI-NEXT: fmov w24, s20
-; CHECK-GI-NEXT: fmov w22, s5
-; CHECK-GI-NEXT: mov b5, v1.b[2]
-; CHECK-GI-NEXT: fmov w0, s6
-; CHECK-GI-NEXT: sxtb w14, w14
-; CHECK-GI-NEXT: mov b20, v1.b[3]
-; CHECK-GI-NEXT: fmov w16, s7
-; CHECK-GI-NEXT: mov b7, v1.b[4]
-; CHECK-GI-NEXT: fmov w15, s16
-; CHECK-GI-NEXT: sxtb w25, w25
+; CHECK-GI-NEXT: sxtb w8, w7
+; CHECK-GI-NEXT: mov b4, v3.b[2]
+; CHECK-GI-NEXT: mov b5, v3.b[1]
+; CHECK-GI-NEXT: sxtb w13, w13
+; CHECK-GI-NEXT: mov b17, v1.b[1]
+; CHECK-GI-NEXT: fmov w22, s21
+; CHECK-GI-NEXT: sxtb w26, w26
+; CHECK-GI-NEXT: mov b18, v1.b[2]
+; CHECK-GI-NEXT: fmov w18, s22
; CHECK-GI-NEXT: sxtb w24, w24
-; CHECK-GI-NEXT: mov b16, v1.b[5]
-; CHECK-GI-NEXT: fmov w13, s21
+; CHECK-GI-NEXT: mov b19, v1.b[3]
+; CHECK-GI-NEXT: fmov w16, s6
+; CHECK-GI-NEXT: sxtb w19, w19
+; CHECK-GI-NEXT: mov b21, v1.b[4]
+; CHECK-GI-NEXT: fmov w15, s7
; CHECK-GI-NEXT: sxtb w22, w22
-; CHECK-GI-NEXT: mov b6, v4.b[2]
-; CHECK-GI-NEXT: fmov w26, s18
-; CHECK-GI-NEXT: sxtb w0, w0
-; CHECK-GI-NEXT: mov b21, v1.b[7]
-; CHECK-GI-NEXT: mov b18, v4.b[4]
-; CHECK-GI-NEXT: fmov w7, s3
-; CHECK-GI-NEXT: mov b3, v4.b[1]
-; CHECK-GI-NEXT: fmov w12, s17
-; CHECK-GI-NEXT: fmov w5, s5
-; CHECK-GI-NEXT: mov b19, v4.b[3]
-; CHECK-GI-NEXT: fmov w4, s20
-; CHECK-GI-NEXT: fmov w3, s7
-; CHECK-GI-NEXT: sxtb w29, w7
-; CHECK-GI-NEXT: mov b17, v4.b[5]
-; CHECK-GI-NEXT: fmov w1, s16
-; CHECK-GI-NEXT: sxtb w5, w5
-; CHECK-GI-NEXT: mov b16, v4.b[6]
-; CHECK-GI-NEXT: fmov w18, s22
-; CHECK-GI-NEXT: mov b7, v4.b[7]
-; CHECK-GI-NEXT: fmov w17, s21
-; CHECK-GI-NEXT: mov b5, v4.b[8]
-; CHECK-GI-NEXT: mov b20, v4.b[9]
-; CHECK-GI-NEXT: fmov w27, s6
-; CHECK-GI-NEXT: mov b6, v4.b[10]
-; CHECK-GI-NEXT: mov b21, v4.b[11]
-; CHECK-GI-NEXT: fmov w21, s18
-; CHECK-GI-NEXT: mov b18, v4.b[12]
-; CHECK-GI-NEXT: mov b22, v4.b[13]
-; CHECK-GI-NEXT: mov b23, v4.b[14]
-; CHECK-GI-NEXT: fmov w10, s4
+; CHECK-GI-NEXT: mov b7, v1.b[5]
+; CHECK-GI-NEXT: mov b6, v3.b[3]
+; CHECK-GI-NEXT: sxtb w11, w11
+; CHECK-GI-NEXT: fmov w12, s23
+; CHECK-GI-NEXT: mov b22, v1.b[6]
+; CHECK-GI-NEXT: mov b23, v1.b[7]
+; CHECK-GI-NEXT: mov b20, v3.b[4]
+; CHECK-GI-NEXT: fmov w28, s4
+; CHECK-GI-NEXT: fmov s4, w26
+; CHECK-GI-NEXT: fmov w14, s16
+; CHECK-GI-NEXT: fmov w27, s17
+; CHECK-GI-NEXT: fmov w5, s18
+; CHECK-GI-NEXT: sxtb w12, w12
+; CHECK-GI-NEXT: fmov w4, s19
+; CHECK-GI-NEXT: mov b19, v3.b[5]
+; CHECK-GI-NEXT: sxtb w28, w28
+; CHECK-GI-NEXT: fmov w3, s21
+; CHECK-GI-NEXT: mov b18, v3.b[6]
; CHECK-GI-NEXT: sxtb w27, w27
-; CHECK-GI-NEXT: mov b24, v4.b[15]
-; CHECK-GI-NEXT: fmov s4, w25
-; CHECK-GI-NEXT: fmov w30, s3
-; CHECK-GI-NEXT: fmov s3, w28
-; CHECK-GI-NEXT: fmov w9, s5
-; CHECK-GI-NEXT: sxtb w10, w10
-; CHECK-GI-NEXT: fmov w7, s7
-; CHECK-GI-NEXT: mov b7, v2.b[1]
+; CHECK-GI-NEXT: sxtb w5, w5
+; CHECK-GI-NEXT: fmov w1, s7
+; CHECK-GI-NEXT: mov b16, v3.b[7]
+; CHECK-GI-NEXT: fmov w0, s22
+; CHECK-GI-NEXT: mov b17, v3.b[8]
+; CHECK-GI-NEXT: fmov w17, s23
+; CHECK-GI-NEXT: mov b7, v3.b[9]
+; CHECK-GI-NEXT: fmov w30, s5
+; CHECK-GI-NEXT: mov b5, v3.b[10]
+; CHECK-GI-NEXT: mov b21, v3.b[11]
+; CHECK-GI-NEXT: fmov w25, s6
+; CHECK-GI-NEXT: mov b6, v3.b[12]
+; CHECK-GI-NEXT: fmov w23, s20
+; CHECK-GI-NEXT: mov b20, v3.b[13]
+; CHECK-GI-NEXT: mov b22, v3.b[14]
+; CHECK-GI-NEXT: fmov w6, s3
+; CHECK-GI-NEXT: mov b23, v3.b[15]
+; CHECK-GI-NEXT: fmov s3, w8
+; CHECK-GI-NEXT: fmov w8, s1
; CHECK-GI-NEXT: mov v4.h[1], w24
-; CHECK-GI-NEXT: fmov w24, s1
-; CHECK-GI-NEXT: fmov w8, s20
-; CHECK-GI-NEXT: sxtb w9, w9
-; CHECK-GI-NEXT: mov v3.h[1], w29
-; CHECK-GI-NEXT: fmov w29, s6
-; CHECK-GI-NEXT: fmov s6, w10
-; CHECK-GI-NEXT: fmov w10, s2
-; CHECK-GI-NEXT: fmov w19, s16
-; CHECK-GI-NEXT: sxtb w24, w24
+; CHECK-GI-NEXT: fmov w21, s19
+; CHECK-GI-NEXT: mov b19, v2.b[1]
+; CHECK-GI-NEXT: fmov w9, s17
+; CHECK-GI-NEXT: fmov w24, s6
+; CHECK-GI-NEXT: fmov w7, s16
+; CHECK-GI-NEXT: mov b16, v2.b[2]
; CHECK-GI-NEXT: sxtb w8, w8
-; CHECK-GI-NEXT: mov b16, v2.b[3]
+; CHECK-GI-NEXT: mov v3.h[1], w19
+; CHECK-GI-NEXT: sxtb w19, w29
+; CHECK-GI-NEXT: sxtb w9, w9
+; CHECK-GI-NEXT: fmov w29, s5
+; CHECK-GI-NEXT: mov v4.h[2], w22
+; CHECK-GI-NEXT: sxtb w22, w6
+; CHECK-GI-NEXT: fmov s5, w8
+; CHECK-GI-NEXT: fmov w10, s7
+; CHECK-GI-NEXT: fmov s7, w9
+; CHECK-GI-NEXT: fmov w9, s16
+; CHECK-GI-NEXT: fmov w20, s18
; CHECK-GI-NEXT: sxtb w29, w29
-; CHECK-GI-NEXT: fmov w23, s19
-; CHECK-GI-NEXT: mov b19, v2.b[2]
+; CHECK-GI-NEXT: fmov s6, w22
+; CHECK-GI-NEXT: fmov w22, s2
; CHECK-GI-NEXT: sxtb w10, w10
-; CHECK-GI-NEXT: fmov s5, w24
-; CHECK-GI-NEXT: sxtb w24, w30
-; CHECK-GI-NEXT: mov v3.h[2], w6
-; CHECK-GI-NEXT: sxtb w6, w26
-; CHECK-GI-NEXT: fmov w28, s21
-; CHECK-GI-NEXT: sxtb w23, w23
-; CHECK-GI-NEXT: mov v6.h[1], w24
-; CHECK-GI-NEXT: fmov w24, s7
-; CHECK-GI-NEXT: fmov s7, w9
-; CHECK-GI-NEXT: fmov w9, s19
-; CHECK-GI-NEXT: mov v5.h[1], w6
-; CHECK-GI-NEXT: mov v4.h[2], w22
-; CHECK-GI-NEXT: fmov w20, s17
-; CHECK-GI-NEXT: mov b17, v2.b[4]
-; CHECK-GI-NEXT: sxtb w24, w24
-; CHECK-GI-NEXT: mov v3.h[3], w14
-; CHECK-GI-NEXT: sxtb w14, w2
+; CHECK-GI-NEXT: mov v5.h[1], w27
+; CHECK-GI-NEXT: sxtb w27, w30
; CHECK-GI-NEXT: sxtb w9, w9
-; CHECK-GI-NEXT: mov v7.h[1], w8
-; CHECK-GI-NEXT: fmov w8, s16
-; CHECK-GI-NEXT: fmov s16, w10
-; CHECK-GI-NEXT: mov v6.h[2], w27
+; CHECK-GI-NEXT: mov b18, v2.b[3]
+; CHECK-GI-NEXT: mov v3.h[2], w19
+; CHECK-GI-NEXT: sxtb w22, w22
+; CHECK-GI-NEXT: mov v6.h[1], w27
+; CHECK-GI-NEXT: fmov w27, s19
+; CHECK-GI-NEXT: mov v7.h[1], w10
+; CHECK-GI-NEXT: fmov w26, s21
+; CHECK-GI-NEXT: mov b17, v2.b[4]
+; CHECK-GI-NEXT: fmov s16, w22
; CHECK-GI-NEXT: mov v5.h[2], w5
-; CHECK-GI-NEXT: fmov w25, s18
-; CHECK-GI-NEXT: mov v4.h[3], w0
-; CHECK-GI-NEXT: sxtb w0, w4
-; CHECK-GI-NEXT: sxtb w8, w8
-; CHECK-GI-NEXT: mov b18, v2.b[5]
-; CHECK-GI-NEXT: fmov w10, s17
-; CHECK-GI-NEXT: mov v16.h[1], w24
+; CHECK-GI-NEXT: sxtb w5, w25
+; CHECK-GI-NEXT: sxtb w27, w27
+; CHECK-GI-NEXT: fmov w10, s18
+; CHECK-GI-NEXT: mov v3.h[3], w13
+; CHECK-GI-NEXT: sxtb w13, w4
+; CHECK-GI-NEXT: mov v6.h[2], w28
+; CHECK-GI-NEXT: fmov w8, s20
+; CHECK-GI-NEXT: mov v16.h[1], w27
; CHECK-GI-NEXT: mov v7.h[2], w29
-; CHECK-GI-NEXT: mov v3.h[4], w14
-; CHECK-GI-NEXT: sxtb w14, w25
-; CHECK-GI-NEXT: ldp x29, x30, [sp, #16] // 16-byte Folded Reload
+; CHECK-GI-NEXT: mov b20, v2.b[5]
; CHECK-GI-NEXT: sxtb w10, w10
-; CHECK-GI-NEXT: mov v6.h[3], w23
-; CHECK-GI-NEXT: mov v5.h[3], w0
-; CHECK-GI-NEXT: fmov w26, s22
-; CHECK-GI-NEXT: mov b19, v2.b[6]
-; CHECK-GI-NEXT: fmov w27, s18
+; CHECK-GI-NEXT: ldp x29, x30, [sp, #16] // 16-byte Folded Reload
+; CHECK-GI-NEXT: sxtb w8, w8
+; CHECK-GI-NEXT: fmov w22, s17
+; CHECK-GI-NEXT: mov v5.h[3], w13
+; CHECK-GI-NEXT: sxtb w13, w2
+; CHECK-GI-NEXT: mov v6.h[3], w5
+; CHECK-GI-NEXT: mov b21, v2.b[6]
; CHECK-GI-NEXT: mov v16.h[2], w9
-; CHECK-GI-NEXT: sxtb w9, w28
-; CHECK-GI-NEXT: fmov w22, s23
+; CHECK-GI-NEXT: sxtb w9, w18
+; CHECK-GI-NEXT: sxtb w18, w23
+; CHECK-GI-NEXT: mov v3.h[4], w13
+; CHECK-GI-NEXT: sxtb w13, w24
+; CHECK-GI-NEXT: fmov w27, s20
+; CHECK-GI-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload
+; CHECK-GI-NEXT: mov v4.h[3], w9
+; CHECK-GI-NEXT: sxtb w9, w26
+; CHECK-GI-NEXT: ldp x26, x25, [sp, #48] // 16-byte Folded Reload
+; CHECK-GI-NEXT: mov v16.h[3], w10
+; CHECK-GI-NEXT: sxtb w10, w3
+; CHECK-GI-NEXT: mov v6.h[4], w18
+; CHECK-GI-NEXT: ldr w18, [sp, #4] // 4-byte Folded Reload
+; CHECK-GI-NEXT: mov v7.h[3], w9
+; CHECK-GI-NEXT: sxtb w9, w16
+; CHECK-GI-NEXT: sxtb w16, w22
+; CHECK-GI-NEXT: mov v5.h[4], w10
+; CHECK-GI-NEXT: sxtb w10, w15
+; CHECK-GI-NEXT: sxtb w18, w18
+; CHECK-GI-NEXT: mov v4.h[4], w9
+; CHECK-GI-NEXT: sxtb w9, w21
+; CHECK-GI-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload
+; CHECK-GI-NEXT: mov v16.h[4], w16
+; CHECK-GI-NEXT: mov v7.h[4], w13
+; CHECK-GI-NEXT: ldr w13, [sp, #8] // 4-byte Folded Reload
+; CHECK-GI-NEXT: mov v6.h[5], w9
+; CHECK-GI-NEXT: sxtb w9, w1
+; CHECK-GI-NEXT: mov v3.h[5], w11
+; CHECK-GI-NEXT: sxtb w11, w27
+; CHECK-GI-NEXT: fmov w19, s22
+; CHECK-GI-NEXT: fmov w28, s21
+; CHECK-GI-NEXT: sxtb w13, w13
; CHECK-GI-NEXT: mov b17, v2.b[7]
-; CHECK-GI-NEXT: fmov w6, s24
+; CHECK-GI-NEXT: mov v5.h[5], w9
+; CHECK-GI-NEXT: sxtb w9, w0
+; CHECK-GI-NEXT: mov v4.h[5], w10
+; CHECK-GI-NEXT: sxtb w10, w20
+; CHECK-GI-NEXT: mov v7.h[5], w8
+; CHECK-GI-NEXT: mov v16.h[5], w11
+; CHECK-GI-NEXT: sxtb w8, w14
+; CHECK-GI-NEXT: sxtb w11, w28
+; CHECK-GI-NEXT: mov v6.h[6], w10
+; CHECK-GI-NEXT: sxtb w10, w19
+; CHECK-GI-NEXT: fmov w6, s23
+; CHECK-GI-NEXT: mov v5.h[6], w9
+; CHECK-GI-NEXT: fmov w9, s17
+; CHECK-GI-NEXT: mov v3.h[6], w18
+; CHECK-GI-NEXT: mov v4.h[6], w8
+; CHECK-GI-NEXT: sxtb w8, w7
+; CHECK-GI-NEXT: mov v7.h[6], w10
+; CHECK-GI-NEXT: mov v16.h[6], w11
+; CHECK-GI-NEXT: sxtb w10, w6
; CHECK-GI-NEXT: mov v0.s[1], wzr
-; CHECK-GI-NEXT: mov v7.h[3], w9
-; CHECK-GI-NEXT: sxtb w9, w11
-; CHECK-GI-NEXT: sxtb w11, w21
-; CHECK-GI-NEXT: fmov w24, s19
-; CHECK-GI-NEXT: mov v16.h[3], w8
-; CHECK-GI-NEXT: sxtb w8, w16
-; CHECK-GI-NEXT: sxtb w16, w3
-; CHECK-GI-NEXT: mov v6.h[4], w11
-; CHECK-GI-NEXT: ldr w11, [sp, #4] // 4-byte Folded Reload
-; CHECK-GI-NEXT: mov v3.h[5], w9
-; CHECK-GI-NEXT: sxtb w9, w15
-; CHECK-GI-NEXT: sxtb w15, w27
-; CHECK-GI-NEXT: mov v7.h[4], w14
-; CHECK-GI-NEXT: sxtb w14, w1
-; CHECK-GI-NEXT: sxtb w11, w11
-; CHECK-GI-NEXT: mov v4.h[4], w8
-; CHECK-GI-NEXT: sxtb w8, w20
+; CHECK-GI-NEXT: mov v6.h[7], w8
+; CHECK-GI-NEXT: sxtb w8, w17
+; CHECK-GI-NEXT: sxtb w9, w9
+; CHECK-GI-NEXT: mov v3.h[7], w13
; CHECK-GI-NEXT: ldp x28, x27, [sp, #32] // 16-byte Folded Reload
-; CHECK-GI-NEXT: mov v5.h[4], w16
-; CHECK-GI-NEXT: mov v16.h[4], w10
-; CHECK-GI-NEXT: sxtb w10, w26
-; CHECK-GI-NEXT: ldp x26, x25, [sp, #48] // 16-byte Folded Reload
-; CHECK-GI-NEXT: mov v6.h[5], w8
-; CHECK-GI-NEXT: ldr w8, [sp, #8] // 4-byte Folded Reload
-; CHECK-GI-NEXT: mov v7.h[5], w10
-; CHECK-GI-NEXT: sxtb w10, w12
-; CHECK-GI-NEXT: sxtb w12, w18
-; CHECK-GI-NEXT: mov v4.h[5], w9
-; CHECK-GI-NEXT: sxtb w9, w19
-; CHECK-GI-NEXT: mov v5.h[5], w14
-; CHECK-GI-NEXT: sxtb w8, w8
-; CHECK-GI-NEXT: mov v16.h[5], w15
-; CHECK-GI-NEXT: mov v3.h[6], w11
-; CHECK-GI-NEXT: sxtb w11, w22
-; CHECK-GI-NEXT: mov v6.h[6], w9
-; CHECK-GI-NEXT: sxtb w9, w13
-; CHECK-GI-NEXT: sxtb w13, w24
+; CHECK-GI-NEXT: mov v4.h[7], w12
+; CHECK-GI-NEXT: mov v5.h[7], w8
+; CHECK-GI-NEXT: mov v7.h[7], w10
+; CHECK-GI-NEXT: mov v16.h[7], w9
+; CHECK-GI-NEXT: smov w8, v1.b[8]
+; CHECK-GI-NEXT: smov w9, v2.b[8]
; CHECK-GI-NEXT: mov v0.s[2], wzr
-; CHECK-GI-NEXT: mov v7.h[6], w11
-; CHECK-GI-NEXT: fmov w11, s17
-; CHECK-GI-NEXT: mov v4.h[6], w10
-; CHECK-GI-NEXT: sxtb w10, w7
-; CHECK-GI-NEXT: mov v5.h[6], w12
-; CHECK-GI-NEXT: mov v16.h[6], w13
-; CHECK-GI-NEXT: mov v3.h[7], w8
-; CHECK-GI-NEXT: sxtb w8, w6
-; CHECK-GI-NEXT: smov w12, v1.b[8]
-; CHECK-GI-NEXT: mov v6.h[7], w10
-; CHECK-GI-NEXT: sxtb w10, w17
-; CHECK-GI-NEXT: sxtb w11, w11
-; CHECK-GI-NEXT: mov v4.h[7], w9
-; CHECK-GI-NEXT: mov v7.h[7], w8
-; CHECK-GI-NEXT: smov w8, v2.b[8]
-; CHECK-GI-NEXT: mov v5.h[7], w10
-; CHECK-GI-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload
-; CHECK-GI-NEXT: mov v16.h[7], w11
-; CHECK-GI-NEXT: mov v0.s[3], wzr
; CHECK-GI-NEXT: mul v3.8h, v3.8h, v6.8h
-; CHECK-GI-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload
; CHECK-GI-NEXT: mul v2.8h, v4.8h, v7.8h
-; CHECK-GI-NEXT: mul w16, w12, w8
; CHECK-GI-NEXT: mul v1.8h, v5.8h, v16.8h
-; CHECK-GI-NEXT: smov w17, v3.h[0]
-; CHECK-GI-NEXT: smov w0, v3.h[4]
-; CHECK-GI-NEXT: sxth w16, w16
-; CHECK-GI-NEXT: smov w2, v2.h[0]
-; CHECK-GI-NEXT: smov w4, v2.h[4]
-; CHECK-GI-NEXT: smov w18, v3.h[1]
-; CHECK-GI-NEXT: smov w1, v3.h[5]
-; CHECK-GI-NEXT: smov w3, v2.h[1]
-; CHECK-GI-NEXT: smov w5, v2.h[5]
-; CHECK-GI-NEXT: smov w6, v1.h[0]
-; CHECK-GI-NEXT: smov w19, v1.h[4]
-; CHECK-GI-NEXT: smov w7, v1.h[1]
-; CHECK-GI-NEXT: smov w20, v1.h[5]
+; CHECK-GI-NEXT: mul w15, w8, w9
+; CHECK-GI-NEXT: mov v0.s[3], wzr
+; CHECK-GI-NEXT: smov w16, v3.h[0]
+; CHECK-GI-NEXT: smov w18, v3.h[4]
+; CHECK-GI-NEXT: smov w17, v3.h[1]
+; CHECK-GI-NEXT: smov w1, v2.h[0]
+; CHECK-GI-NEXT: smov w3, v2.h[4]
+; CHECK-GI-NEXT: smov w0, v3.h[5]
+; CHECK-GI-NEXT: smov w5, v1.h[0]
+; CHECK-GI-NEXT: smov w7, v1.h[4]
+; CHECK-GI-NEXT: smov w2, v2.h[1]
+; CHECK-GI-NEXT: smov w4, v2.h[5]
+; CHECK-GI-NEXT: smov w6, v1.h[1]
+; CHECK-GI-NEXT: smov w19, v1.h[5]
; CHECK-GI-NEXT: smov w10, v3.h[2]
; CHECK-GI-NEXT: smov w8, v3.h[3]
; CHECK-GI-NEXT: smov w11, v3.h[6]
; CHECK-GI-NEXT: smov w9, v3.h[7]
-; CHECK-GI-NEXT: fmov s3, w17
-; CHECK-GI-NEXT: fmov s4, w0
-; CHECK-GI-NEXT: fmov s5, w2
-; CHECK-GI-NEXT: fmov s6, w4
-; CHECK-GI-NEXT: fmov s7, w6
-; CHECK-GI-NEXT: fmov s16, w19
-; CHECK-GI-NEXT: fmov s17, w16
+; CHECK-GI-NEXT: fmov s3, w16
+; CHECK-GI-NEXT: fmov s4, w18
+; CHECK-GI-NEXT: fmov s5, w1
+; CHECK-GI-NEXT: fmov s6, w3
+; CHECK-GI-NEXT: fmov s7, w5
+; CHECK-GI-NEXT: fmov s16, w7
+; CHECK-GI-NEXT: fmov s17, w15
; CHECK-GI-NEXT: smov w12, v2.h[2]
; CHECK-GI-NEXT: smov w13, v2.h[6]
; CHECK-GI-NEXT: smov w14, v1.h[2]
-; CHECK-GI-NEXT: smov w15, v1.h[6]
-; CHECK-GI-NEXT: mov v3.s[1], w18
-; CHECK-GI-NEXT: mov v4.s[1], w1
-; CHECK-GI-NEXT: mov v5.s[1], w3
-; CHECK-GI-NEXT: mov v6.s[1], w5
-; CHECK-GI-NEXT: mov v7.s[1], w7
-; CHECK-GI-NEXT: mov v16.s[1], w20
+; CHECK-GI-NEXT: smov w16, v1.h[6]
+; CHECK-GI-NEXT: mov v3.s[1], w17
+; CHECK-GI-NEXT: mov v4.s[1], w0
+; CHECK-GI-NEXT: mov v5.s[1], w2
+; CHECK-GI-NEXT: mov v6.s[1], w4
+; CHECK-GI-NEXT: mov v7.s[1], w6
+; CHECK-GI-NEXT: mov v16.s[1], w19
; CHECK-GI-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload
; CHECK-GI-NEXT: mov v17.s[1], wzr
-; CHECK-GI-NEXT: smov w16, v2.h[3]
+; CHECK-GI-NEXT: smov w15, v2.h[3]
; CHECK-GI-NEXT: smov w17, v2.h[7]
; CHECK-GI-NEXT: smov w18, v1.h[3]
; CHECK-GI-NEXT: smov w0, v1.h[7]
@@ -3081,12 +3080,12 @@ define i32 @test_sdot_v25i8(ptr nocapture readonly %a, ptr nocapture readonly %b
; CHECK-GI-NEXT: mov v5.s[2], w12
; CHECK-GI-NEXT: mov v6.s[2], w13
; CHECK-GI-NEXT: mov v7.s[2], w14
-; CHECK-GI-NEXT: mov v16.s[2], w15
+; CHECK-GI-NEXT: mov v16.s[2], w16
; CHECK-GI-NEXT: mov v17.s[2], wzr
; CHECK-GI-NEXT: mov v3.s[3], w8
; CHECK-GI-NEXT: mov v4.s[3], w9
; CHECK-GI-NEXT: ldr w9, [sp, #12] // 4-byte Folded Reload
-; CHECK-GI-NEXT: mov v5.s[3], w16
+; CHECK-GI-NEXT: mov v5.s[3], w15
; CHECK-GI-NEXT: mov v6.s[3], w17
; CHECK-GI-NEXT: mov v7.s[3], w18
; CHECK-GI-NEXT: mov v16.s[3], w0
@@ -3335,526 +3334,524 @@ define i32 @test_sdot_v25i8_double(<25 x i8> %a, <25 x i8> %b, <25 x i8> %c, <25
; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
; CHECK-GI-NEXT: .cfi_offset w29, -16
; CHECK-GI-NEXT: lsl w8, w0, #8
-; CHECK-GI-NEXT: ldr w9, [sp, #16]
-; CHECK-GI-NEXT: lsl w10, w1, #8
+; CHECK-GI-NEXT: lsl w9, w1, #8
+; CHECK-GI-NEXT: lsl w10, w2, #8
; CHECK-GI-NEXT: ldr w11, [sp, #24]
-; CHECK-GI-NEXT: lsl w12, w4, #8
-; CHECK-GI-NEXT: ldr w13, [sp, #56]
+; CHECK-GI-NEXT: lsl w12, w3, #8
+; CHECK-GI-NEXT: lsl w13, w5, #8
; CHECK-GI-NEXT: sbfx w8, w8, #8, #8
-; CHECK-GI-NEXT: lsl w9, w9, #8
+; CHECK-GI-NEXT: sbfx w9, w9, #8, #8
; CHECK-GI-NEXT: sbfx w10, w10, #8, #8
-; CHECK-GI-NEXT: sbfx w12, w12, #8, #8
-; CHECK-GI-NEXT: lsl w13, w13, #8
+; CHECK-GI-NEXT: lsl w11, w11, #8
+; CHECK-GI-NEXT: sbfx w13, w13, #8, #8
; CHECK-GI-NEXT: ldr w14, [sp, #64]
; CHECK-GI-NEXT: fmov s2, w8
-; CHECK-GI-NEXT: sbfx w9, w9, #8, #8
-; CHECK-GI-NEXT: lsl w8, w11, #8
-; CHECK-GI-NEXT: lsl w11, w2, #8
-; CHECK-GI-NEXT: sbfx w13, w13, #8, #8
+; CHECK-GI-NEXT: ldr w8, [sp, #16]
+; CHECK-GI-NEXT: ldr w15, [sp, #96]
+; CHECK-GI-NEXT: sbfx w11, w11, #8, #8
; CHECK-GI-NEXT: lsl w14, w14, #8
-; CHECK-GI-NEXT: fmov s4, w9
-; CHECK-GI-NEXT: sbfx w8, w8, #8, #8
-; CHECK-GI-NEXT: ldr w16, [sp, #112]
-; CHECK-GI-NEXT: mov v2.h[1], w10
-; CHECK-GI-NEXT: ldr w10, [sp, #32]
-; CHECK-GI-NEXT: sbfx w9, w11, #8, #8
-; CHECK-GI-NEXT: lsl w11, w3, #8
-; CHECK-GI-NEXT: sbfx w14, w14, #8, #8
; CHECK-GI-NEXT: movi d1, #0000000000000000
-; CHECK-GI-NEXT: lsl w10, w10, #8
-; CHECK-GI-NEXT: mov v4.h[1], w8
-; CHECK-GI-NEXT: ldr w8, [sp, #152]
-; CHECK-GI-NEXT: sbfx w11, w11, #8, #8
-; CHECK-GI-NEXT: movi d0, #0000000000000000
-; CHECK-GI-NEXT: mov v2.h[2], w9
-; CHECK-GI-NEXT: ldr w9, [sp, #40]
-; CHECK-GI-NEXT: sbfx w10, w10, #8, #8
; CHECK-GI-NEXT: lsl w8, w8, #8
-; CHECK-GI-NEXT: mov v1.s[1], wzr
-; CHECK-GI-NEXT: mov v4.h[2], w10
-; CHECK-GI-NEXT: lsl w9, w9, #8
-; CHECK-GI-NEXT: ldr w10, [sp, #160]
+; CHECK-GI-NEXT: lsl w15, w15, #8
+; CHECK-GI-NEXT: movi d0, #0000000000000000
+; CHECK-GI-NEXT: mov v2.h[1], w9
+; CHECK-GI-NEXT: ldr w9, [sp, #80]
+; CHECK-GI-NEXT: sbfx w14, w14, #8, #8
; CHECK-GI-NEXT: sbfx w8, w8, #8, #8
+; CHECK-GI-NEXT: sbfx w15, w15, #8, #8
+; CHECK-GI-NEXT: lsl w9, w9, #8
+; CHECK-GI-NEXT: mov v1.s[1], wzr
+; CHECK-GI-NEXT: fmov s4, w8
+; CHECK-GI-NEXT: ldr w8, [sp, #32]
; CHECK-GI-NEXT: mov v0.s[1], wzr
-; CHECK-GI-NEXT: mov v2.h[3], w11
+; CHECK-GI-NEXT: mov v2.h[2], w10
+; CHECK-GI-NEXT: ldr w10, [sp, #152]
; CHECK-GI-NEXT: sbfx w9, w9, #8, #8
-; CHECK-GI-NEXT: ldr w11, [sp, #48]
-; CHECK-GI-NEXT: lsl w10, w10, #8
-; CHECK-GI-NEXT: fmov s3, w8
+; CHECK-GI-NEXT: lsl w8, w8, #8
+; CHECK-GI-NEXT: fmov s5, w9
+; CHECK-GI-NEXT: lsl w9, w10, #8
+; CHECK-GI-NEXT: sbfx w10, w12, #8, #8
+; CHECK-GI-NEXT: mov v4.h[1], w11
+; CHECK-GI-NEXT: ldr w11, [sp, #160]
+; CHECK-GI-NEXT: sbfx w8, w8, #8, #8
+; CHECK-GI-NEXT: mov v2.h[3], w10
+; CHECK-GI-NEXT: ldr w10, [sp, #40]
+; CHECK-GI-NEXT: sbfx w9, w9, #8, #8
+; CHECK-GI-NEXT: lsl w11, w11, #8
+; CHECK-GI-NEXT: lsl w12, w4, #8
; CHECK-GI-NEXT: mov v1.s[2], wzr
-; CHECK-GI-NEXT: mov v4.h[3], w9
-; CHECK-GI-NEXT: ldr w9, [sp, #80]
-; CHECK-GI-NEXT: lsl w8, w11, #8
-; CHECK-GI-NEXT: sbfx w10, w10, #8, #8
-; CHECK-GI-NEXT: ldr w11, [sp, #168]
+; CHECK-GI-NEXT: lsl w10, w10, #8
+; CHECK-GI-NEXT: fmov s3, w9
+; CHECK-GI-NEXT: ldr w9, [sp, #48]
+; CHECK-GI-NEXT: mov v4.h[2], w8
+; CHECK-GI-NEXT: sbfx w11, w11, #8, #8
+; CHECK-GI-NEXT: ldr w8, [sp, #88]
+; CHECK-GI-NEXT: sbfx w10, w10, #8, #8
+; CHECK-GI-NEXT: sbfx w12, w12, #8, #8
; CHECK-GI-NEXT: mov v0.s[2], wzr
+; CHECK-GI-NEXT: mov v3.h[1], w11
+; CHECK-GI-NEXT: ldr w11, [sp, #168]
+; CHECK-GI-NEXT: lsl w8, w8, #8
; CHECK-GI-NEXT: mov v2.h[4], w12
-; CHECK-GI-NEXT: lsl w12, w5, #8
+; CHECK-GI-NEXT: ldr w12, [sp, #56]
+; CHECK-GI-NEXT: mov v1.s[3], wzr
+; CHECK-GI-NEXT: mov v4.h[3], w10
+; CHECK-GI-NEXT: lsl w10, w9, #8
+; CHECK-GI-NEXT: lsl w11, w11, #8
+; CHECK-GI-NEXT: sbfx w8, w8, #8, #8
+; CHECK-GI-NEXT: lsl w12, w12, #8
+; CHECK-GI-NEXT: ldr w9, [sp, #72]
+; CHECK-GI-NEXT: sbfx w10, w10, #8, #8
+; CHECK-GI-NEXT: sbfx w11, w11, #8, #8
+; CHECK-GI-NEXT: mov v0.s[3], wzr
+; CHECK-GI-NEXT: mov v5.h[1], w8
+; CHECK-GI-NEXT: ldr w8, [sp, #176]
+; CHECK-GI-NEXT: mov v2.h[5], w13
+; CHECK-GI-NEXT: mov v4.h[4], w10
+; CHECK-GI-NEXT: lsl w13, w6, #8
+; CHECK-GI-NEXT: mov v3.h[2], w11
+; CHECK-GI-NEXT: lsl w8, w8, #8
+; CHECK-GI-NEXT: sbfx w12, w12, #8, #8
+; CHECK-GI-NEXT: ldr w11, [sp, #104]
+; CHECK-GI-NEXT: sbfx w13, w13, #8, #8
+; CHECK-GI-NEXT: ldr w10, [sp, #112]
; CHECK-GI-NEXT: lsl w9, w9, #8
; CHECK-GI-NEXT: sbfx w8, w8, #8, #8
-; CHECK-GI-NEXT: mov v3.h[1], w10
-; CHECK-GI-NEXT: ldr w10, [sp, #88]
+; CHECK-GI-NEXT: mov v5.h[2], w15
; CHECK-GI-NEXT: lsl w11, w11, #8
-; CHECK-GI-NEXT: sbfx w12, w12, #8, #8
-; CHECK-GI-NEXT: sbfx w9, w9, #8, #8
-; CHECK-GI-NEXT: mov v4.h[4], w8
-; CHECK-GI-NEXT: lsl w8, w10, #8
-; CHECK-GI-NEXT: ldr w10, [sp, #176]
-; CHECK-GI-NEXT: mov v2.h[5], w12
+; CHECK-GI-NEXT: mov v4.h[5], w12
+; CHECK-GI-NEXT: mov v2.h[6], w13
+; CHECK-GI-NEXT: lsl w13, w7, #8
+; CHECK-GI-NEXT: mov v3.h[3], w8
+; CHECK-GI-NEXT: ldr w8, [sp, #216]
+; CHECK-GI-NEXT: ldr w12, [sp, #184]
+; CHECK-GI-NEXT: sbfx w13, w13, #8, #8
; CHECK-GI-NEXT: sbfx w11, w11, #8, #8
-; CHECK-GI-NEXT: lsl w12, w6, #8
-; CHECK-GI-NEXT: fmov s6, w9
-; CHECK-GI-NEXT: sbfx w15, w8, #8, #8
-; CHECK-GI-NEXT: lsl w9, w10, #8
-; CHECK-GI-NEXT: mov v3.h[2], w11
-; CHECK-GI-NEXT: sbfx w11, w12, #8, #8
-; CHECK-GI-NEXT: ldr w10, [sp, #96]
+; CHECK-GI-NEXT: ldr w15, [sp, #192]
+; CHECK-GI-NEXT: lsl w12, w12, #8
+; CHECK-GI-NEXT: lsl w10, w10, #8
; CHECK-GI-NEXT: sbfx w9, w9, #8, #8
-; CHECK-GI-NEXT: mov v4.h[5], w13
+; CHECK-GI-NEXT: mov v4.h[6], w14
+; CHECK-GI-NEXT: lsl w14, w8, #8
+; CHECK-GI-NEXT: mov v2.h[7], w13
; CHECK-GI-NEXT: ldr w13, [sp, #224]
-; CHECK-GI-NEXT: mov v6.h[1], w15
-; CHECK-GI-NEXT: mov v2.h[6], w11
-; CHECK-GI-NEXT: lsl w15, w7, #8
-; CHECK-GI-NEXT: lsl w10, w10, #8
-; CHECK-GI-NEXT: ldr w11, [sp, #184]
-; CHECK-GI-NEXT: ldr w12, [sp, #104]
-; CHECK-GI-NEXT: mov v3.h[3], w9
-; CHECK-GI-NEXT: ldr w9, [sp, #216]
-; CHECK-GI-NEXT: sbfx w15, w15, #8, #8
-; CHECK-GI-NEXT: sbfx w10, w10, #8, #8
+; CHECK-GI-NEXT: sbfx w12, w12, #8, #8
+; CHECK-GI-NEXT: mov v5.h[3], w11
+; CHECK-GI-NEXT: sbfx w14, w14, #8, #8
+; CHECK-GI-NEXT: ldr w11, [sp, #240]
+; CHECK-GI-NEXT: lsl w15, w15, #8
+; CHECK-GI-NEXT: lsl w13, w13, #8
+; CHECK-GI-NEXT: mov v3.h[4], w12
+; CHECK-GI-NEXT: ldr w12, [sp, #232]
+; CHECK-GI-NEXT: fmov s6, w14
+; CHECK-GI-NEXT: ldr w14, [sp, #280]
; CHECK-GI-NEXT: lsl w11, w11, #8
+; CHECK-GI-NEXT: sbfx w13, w13, #8, #8
; CHECK-GI-NEXT: lsl w12, w12, #8
-; CHECK-GI-NEXT: mov v2.h[7], w15
-; CHECK-GI-NEXT: lsl w15, w9, #8
-; CHECK-GI-NEXT: mov v4.h[6], w14
-; CHECK-GI-NEXT: mov v6.h[2], w10
-; CHECK-GI-NEXT: lsl w10, w13, #8
+; CHECK-GI-NEXT: sbfx w15, w15, #8, #8
+; CHECK-GI-NEXT: lsl w14, w14, #8
; CHECK-GI-NEXT: sbfx w11, w11, #8, #8
-; CHECK-GI-NEXT: sbfx w13, w15, #8, #8
-; CHECK-GI-NEXT: sbfx w12, w12, #8, #8
-; CHECK-GI-NEXT: ldr w14, [sp, #288]
; CHECK-GI-NEXT: sbfx w10, w10, #8, #8
-; CHECK-GI-NEXT: mov v3.h[4], w11
-; CHECK-GI-NEXT: ldr w11, [sp, #192]
-; CHECK-GI-NEXT: fmov s5, w13
-; CHECK-GI-NEXT: ldr w13, [sp, #232]
-; CHECK-GI-NEXT: ldr w9, [sp, #120]
-; CHECK-GI-NEXT: lsl w11, w11, #8
-; CHECK-GI-NEXT: mov v6.h[3], w12
-; CHECK-GI-NEXT: ldr w8, [sp, #72]
-; CHECK-GI-NEXT: lsl w13, w13, #8
-; CHECK-GI-NEXT: lsl w9, w9, #8
-; CHECK-GI-NEXT: mov v1.s[3], wzr
-; CHECK-GI-NEXT: mov v5.h[1], w10
-; CHECK-GI-NEXT: ldr w10, [sp, #280]
-; CHECK-GI-NEXT: sbfx w15, w11, #8, #8
-; CHECK-GI-NEXT: sbfx w12, w13, #8, #8
-; CHECK-GI-NEXT: lsl w13, w14, #8
-; CHECK-GI-NEXT: ldr w14, [sp, #240]
-; CHECK-GI-NEXT: lsl w10, w10, #8
+; CHECK-GI-NEXT: mov v6.h[1], w13
+; CHECK-GI-NEXT: ldr w13, [sp, #288]
+; CHECK-GI-NEXT: sbfx w12, w12, #8, #8
+; CHECK-GI-NEXT: sbfx w14, w14, #8, #8
; CHECK-GI-NEXT: mov v3.h[5], w15
-; CHECK-GI-NEXT: lsl w15, w16, #8
-; CHECK-GI-NEXT: lsl w14, w14, #8
-; CHECK-GI-NEXT: sbfx w13, w13, #8, #8
-; CHECK-GI-NEXT: sbfx w9, w9, #8, #8
-; CHECK-GI-NEXT: sbfx w10, w10, #8, #8
-; CHECK-GI-NEXT: mov v5.h[2], w12
+; CHECK-GI-NEXT: mov v5.h[4], w10
+; CHECK-GI-NEXT: lsl w13, w13, #8
+; CHECK-GI-NEXT: ldr w10, [sp, #208]
+; CHECK-GI-NEXT: ldr w8, [sp, #120]
+; CHECK-GI-NEXT: fmov s7, w14
+; CHECK-GI-NEXT: ldr w14, [sp, #200]
+; CHECK-GI-NEXT: ldr w15, [sp, #128]
+; CHECK-GI-NEXT: mov v6.h[2], w12
; CHECK-GI-NEXT: ldr w12, [sp, #296]
-; CHECK-GI-NEXT: sbfx w14, w14, #8, #8
-; CHECK-GI-NEXT: sbfx w15, w15, #8, #8
-; CHECK-GI-NEXT: lsl w8, w8, #8
-; CHECK-GI-NEXT: fmov s7, w10
-; CHECK-GI-NEXT: ldr w10, [sp, #200]
-; CHECK-GI-NEXT: lsl w12, w12, #8
-; CHECK-GI-NEXT: mov v6.h[4], w15
-; CHECK-GI-NEXT: ldr w15, [sp, #304]
-; CHECK-GI-NEXT: ldr w11, [sp, #128]
+; CHECK-GI-NEXT: sbfx w13, w13, #8, #8
+; CHECK-GI-NEXT: lsl w14, w14, #8
; CHECK-GI-NEXT: lsl w10, w10, #8
-; CHECK-GI-NEXT: mov v5.h[3], w14
-; CHECK-GI-NEXT: ldr w14, [sp, #208]
+; CHECK-GI-NEXT: lsl w8, w8, #8
; CHECK-GI-NEXT: mov v7.h[1], w13
; CHECK-GI-NEXT: ldr w13, [sp, #248]
-; CHECK-GI-NEXT: sbfx w12, w12, #8, #8
+; CHECK-GI-NEXT: lsl w12, w12, #8
+; CHECK-GI-NEXT: sbfx w14, w14, #8, #8
; CHECK-GI-NEXT: sbfx w10, w10, #8, #8
-; CHECK-GI-NEXT: lsl w15, w15, #8
-; CHECK-GI-NEXT: sbfx w8, w8, #8, #8
+; CHECK-GI-NEXT: sbfx w16, w8, #8, #8
+; CHECK-GI-NEXT: mov v6.h[3], w11
+; CHECK-GI-NEXT: ldr w11, [sp, #304]
; CHECK-GI-NEXT: lsl w13, w13, #8
-; CHECK-GI-NEXT: mov v6.h[5], w9
-; CHECK-GI-NEXT: ldr w9, [sp, #272]
-; CHECK-GI-NEXT: mov v3.h[6], w10
-; CHECK-GI-NEXT: lsl w10, w14, #8
-; CHECK-GI-NEXT: sbfx w14, w15, #8, #8
+; CHECK-GI-NEXT: sbfx w12, w12, #8, #8
+; CHECK-GI-NEXT: mov v3.h[6], w14
+; CHECK-GI-NEXT: ldr w14, [sp, #312]
+; CHECK-GI-NEXT: sbfx w13, w13, #8, #8
+; CHECK-GI-NEXT: lsl w11, w11, #8
+; CHECK-GI-NEXT: mov v5.h[5], w16
; CHECK-GI-NEXT: mov v7.h[2], w12
; CHECK-GI-NEXT: ldr w12, [sp, #256]
-; CHECK-GI-NEXT: sbfx w13, w13, #8, #8
-; CHECK-GI-NEXT: sbfx w10, w10, #8, #8
-; CHECK-GI-NEXT: ldr w15, [sp, #320]
-; CHECK-GI-NEXT: lsl w9, w9, #8
-; CHECK-GI-NEXT: mov v5.h[4], w13
+; CHECK-GI-NEXT: lsl w14, w14, #8
+; CHECK-GI-NEXT: sbfx w11, w11, #8, #8
+; CHECK-GI-NEXT: mov v6.h[4], w13
+; CHECK-GI-NEXT: lsl w15, w15, #8
; CHECK-GI-NEXT: lsl w12, w12, #8
-; CHECK-GI-NEXT: ldr w13, [sp, #312]
; CHECK-GI-NEXT: mov v3.h[7], w10
-; CHECK-GI-NEXT: lsl w11, w11, #8
-; CHECK-GI-NEXT: mov v4.h[7], w8
-; CHECK-GI-NEXT: mov v7.h[3], w14
-; CHECK-GI-NEXT: ldr w14, [sp, #264]
+; CHECK-GI-NEXT: sbfx w14, w14, #8, #8
+; CHECK-GI-NEXT: sbfx w15, w15, #8, #8
+; CHECK-GI-NEXT: mov v4.h[7], w9
+; CHECK-GI-NEXT: ldr w10, [sp, #272]
+; CHECK-GI-NEXT: mov v7.h[3], w11
; CHECK-GI-NEXT: sbfx w12, w12, #8, #8
-; CHECK-GI-NEXT: lsl w13, w13, #8
-; CHECK-GI-NEXT: sbfx w8, w9, #8, #8
-; CHECK-GI-NEXT: ldr w16, [sp, #136]
-; CHECK-GI-NEXT: lsl w14, w14, #8
-; CHECK-GI-NEXT: mov v5.h[5], w12
-; CHECK-GI-NEXT: sbfx w11, w11, #8, #8
+; CHECK-GI-NEXT: ldr w11, [sp, #264]
+; CHECK-GI-NEXT: mov v5.h[6], w15
+; CHECK-GI-NEXT: ldr w15, [sp, #336]
+; CHECK-GI-NEXT: lsl w10, w10, #8
+; CHECK-GI-NEXT: mov v6.h[5], w12
+; CHECK-GI-NEXT: ldr w12, [sp, #320]
; CHECK-GI-NEXT: mul v16.8h, v2.8h, v3.8h
-; CHECK-GI-NEXT: sbfx w13, w13, #8, #8
-; CHECK-GI-NEXT: lsl w16, w16, #8
-; CHECK-GI-NEXT: sbfx w12, w14, #8, #8
-; CHECK-GI-NEXT: lsl w14, w15, #8
+; CHECK-GI-NEXT: lsl w11, w11, #8
+; CHECK-GI-NEXT: lsl w15, w15, #8
+; CHECK-GI-NEXT: sbfx w10, w10, #8, #8
+; CHECK-GI-NEXT: mov v7.h[4], w14
+; CHECK-GI-NEXT: lsl w12, w12, #8
+; CHECK-GI-NEXT: ldr w14, [sp, #328]
+; CHECK-GI-NEXT: sbfx w11, w11, #8, #8
+; CHECK-GI-NEXT: sbfx w15, w15, #8, #8
+; CHECK-GI-NEXT: ldr w13, [sp, #136]
+; CHECK-GI-NEXT: sbfx w12, w12, #8, #8
+; CHECK-GI-NEXT: lsl w9, w14, #8
+; CHECK-GI-NEXT: smov w14, v16.h[4]
; CHECK-GI-NEXT: mov v6.h[6], w11
-; CHECK-GI-NEXT: mov v7.h[4], w13
-; CHECK-GI-NEXT: ldr w13, [sp, #328]
-; CHECK-GI-NEXT: ldr w10, [sp, #144]
-; CHECK-GI-NEXT: mov v5.h[6], w12
-; CHECK-GI-NEXT: ldr w12, [sp, #336]
-; CHECK-GI-NEXT: sbfx w14, w14, #8, #8
-; CHECK-GI-NEXT: smov w9, v16.h[0]
-; CHECK-GI-NEXT: smov w15, v16.h[4]
+; CHECK-GI-NEXT: smov w16, v16.h[5]
+; CHECK-GI-NEXT: ldr w11, [sp, #352]
+; CHECK-GI-NEXT: mov v7.h[5], w12
+; CHECK-GI-NEXT: sbfx w9, w9, #8, #8
+; CHECK-GI-NEXT: smov w12, v16.h[0]
; CHECK-GI-NEXT: lsl w13, w13, #8
-; CHECK-GI-NEXT: smov w17, v16.h[5]
-; CHECK-GI-NEXT: sxtb w10, w10
-; CHECK-GI-NEXT: mov v0.s[3], wzr
-; CHECK-GI-NEXT: sbfx w11, w13, #8, #8
+; CHECK-GI-NEXT: ldr w8, [sp, #144]
+; CHECK-GI-NEXT: fmov s3, w14
+; CHECK-GI-NEXT: smov w14, v16.h[6]
+; CHECK-GI-NEXT: mov v6.h[7], w10
+; CHECK-GI-NEXT: sbfx w13, w13, #8, #8
+; CHECK-GI-NEXT: ldr w10, [sp, #344]
+; CHECK-GI-NEXT: mov v7.h[6], w9
+; CHECK-GI-NEXT: fmov s2, w12
+; CHECK-GI-NEXT: ldr w12, [sp, #360]
+; CHECK-GI-NEXT: mov v3.s[1], w16
+; CHECK-GI-NEXT: ldr w9, [sp, #368]
+; CHECK-GI-NEXT: mov v5.h[7], w13
+; CHECK-GI-NEXT: lsl w12, w12, #8
; CHECK-GI-NEXT: smov w13, v16.h[1]
-; CHECK-GI-NEXT: mov v7.h[5], w14
-; CHECK-GI-NEXT: mov v5.h[7], w8
-; CHECK-GI-NEXT: ldr w14, [sp, #344]
-; CHECK-GI-NEXT: ldr w8, [sp, #352]
-; CHECK-GI-NEXT: fmov s2, w9
-; CHECK-GI-NEXT: fmov s3, w15
-; CHECK-GI-NEXT: lsl w9, w12, #8
-; CHECK-GI-NEXT: sbfx w12, w16, #8, #8
-; CHECK-GI-NEXT: sxtb w14, w14
-; CHECK-GI-NEXT: lsl w8, w8, #8
-; CHECK-GI-NEXT: mov v7.h[6], w11
-; CHECK-GI-NEXT: ldr w11, [sp, #360]
-; CHECK-GI-NEXT: smov w15, v16.h[3]
+; CHECK-GI-NEXT: sxtb w8, w8
+; CHECK-GI-NEXT: mul v6.8h, v4.8h, v6.8h
+; CHECK-GI-NEXT: lsl w9, w9, #8
+; CHECK-GI-NEXT: sxtb w10, w10
+; CHECK-GI-NEXT: mov v7.h[7], w15
+; CHECK-GI-NEXT: lsl w15, w11, #8
+; CHECK-GI-NEXT: sbfx w12, w12, #8, #8
+; CHECK-GI-NEXT: mov v3.s[2], w14
+; CHECK-GI-NEXT: smov w14, v16.h[7]
+; CHECK-GI-NEXT: sbfx w9, w9, #8, #8
+; CHECK-GI-NEXT: sbfx w15, w15, #8, #8
+; CHECK-GI-NEXT: ldr w11, [sp, #376]
+; CHECK-GI-NEXT: mul w8, w8, w10
+; CHECK-GI-NEXT: smov w10, v6.h[1]
; CHECK-GI-NEXT: mov v2.s[1], w13
; CHECK-GI-NEXT: smov w13, v16.h[2]
-; CHECK-GI-NEXT: mov v6.h[7], w12
-; CHECK-GI-NEXT: smov w12, v16.h[6]
-; CHECK-GI-NEXT: mov v3.s[1], w17
-; CHECK-GI-NEXT: mul v18.8h, v4.8h, v5.8h
+; CHECK-GI-NEXT: fmov s17, w15
+; CHECK-GI-NEXT: smov w15, v6.h[0]
; CHECK-GI-NEXT: lsl w11, w11, #8
-; CHECK-GI-NEXT: sbfx w16, w9, #8, #8
-; CHECK-GI-NEXT: ldr w9, [sp, #368]
-; CHECK-GI-NEXT: mov v2.s[2], w13
-; CHECK-GI-NEXT: smov w13, v16.h[7]
+; CHECK-GI-NEXT: mov v3.s[3], w14
+; CHECK-GI-NEXT: ldr w14, [sp, #416]
; CHECK-GI-NEXT: sbfx w11, w11, #8, #8
-; CHECK-GI-NEXT: mov v3.s[2], w12
-; CHECK-GI-NEXT: sbfx w12, w8, #8, #8
-; CHECK-GI-NEXT: mul w8, w10, w14
-; CHECK-GI-NEXT: smov w10, v18.h[0]
-; CHECK-GI-NEXT: lsl w9, w9, #8
-; CHECK-GI-NEXT: ldr w14, [sp, #376]
-; CHECK-GI-NEXT: fmov s16, w12
-; CHECK-GI-NEXT: smov w12, v18.h[1]
-; CHECK-GI-NEXT: mov v7.h[7], w16
-; CHECK-GI-NEXT: mov v2.s[3], w15
-; CHECK-GI-NEXT: smov w15, v18.h[4]
-; CHECK-GI-NEXT: sbfx w9, w9, #8, #8
-; CHECK-GI-NEXT: mov v3.s[3], w13
-; CHECK-GI-NEXT: ldr w13, [sp, #416]
+; CHECK-GI-NEXT: mov v17.h[1], w12
; CHECK-GI-NEXT: lsl w14, w14, #8
-; CHECK-GI-NEXT: fmov s4, w10
-; CHECK-GI-NEXT: mov v16.h[1], w11
-; CHECK-GI-NEXT: ldr w10, [sp, #424]
-; CHECK-GI-NEXT: lsl w13, w13, #8
-; CHECK-GI-NEXT: ldr w11, [sp, #384]
+; CHECK-GI-NEXT: mov v2.s[2], w13
+; CHECK-GI-NEXT: fmov s4, w15
+; CHECK-GI-NEXT: smov w15, v6.h[4]
+; CHECK-GI-NEXT: smov w13, v16.h[3]
; CHECK-GI-NEXT: sbfx w14, w14, #8, #8
+; CHECK-GI-NEXT: ldr w12, [sp, #384]
+; CHECK-GI-NEXT: mul v16.8h, v5.8h, v7.8h
+; CHECK-GI-NEXT: mov v17.h[2], w9
+; CHECK-GI-NEXT: ldr w9, [sp, #424]
+; CHECK-GI-NEXT: fmov s18, w14
+; CHECK-GI-NEXT: mov v4.s[1], w10
+; CHECK-GI-NEXT: ldr w10, [sp, #432]
; CHECK-GI-NEXT: fmov s5, w15
-; CHECK-GI-NEXT: lsl w10, w10, #8
-; CHECK-GI-NEXT: ldr w15, [sp, #432]
-; CHECK-GI-NEXT: mov v4.s[1], w12
-; CHECK-GI-NEXT: smov w12, v18.h[5]
-; CHECK-GI-NEXT: sbfx w13, w13, #8, #8
-; CHECK-GI-NEXT: mov v16.h[2], w9
-; CHECK-GI-NEXT: sbfx w10, w10, #8, #8
-; CHECK-GI-NEXT: lsl w15, w15, #8
-; CHECK-GI-NEXT: fmov s17, w13
-; CHECK-GI-NEXT: lsl w11, w11, #8
-; CHECK-GI-NEXT: mul v7.8h, v6.8h, v7.8h
-; CHECK-GI-NEXT: sbfx w15, w15, #8, #8
-; CHECK-GI-NEXT: ldr w9, [sp, #392]
-; CHECK-GI-NEXT: ldr w13, [sp, #400]
-; CHECK-GI-NEXT: mov v5.s[1], w12
-; CHECK-GI-NEXT: smov w12, v18.h[2]
-; CHECK-GI-NEXT: sbfx w11, w11, #8, #8
-; CHECK-GI-NEXT: mov v17.h[1], w10
-; CHECK-GI-NEXT: mov v16.h[3], w14
-; CHECK-GI-NEXT: ldr w10, [sp, #440]
-; CHECK-GI-NEXT: smov w14, v18.h[6]
; CHECK-GI-NEXT: lsl w9, w9, #8
-; CHECK-GI-NEXT: ldr w16, [sp, #456]
+; CHECK-GI-NEXT: smov w15, v6.h[5]
+; CHECK-GI-NEXT: lsl w12, w12, #8
; CHECK-GI-NEXT: lsl w10, w10, #8
-; CHECK-GI-NEXT: sxth w8, w8
-; CHECK-GI-NEXT: add v2.4s, v2.4s, v3.4s
-; CHECK-GI-NEXT: mov v4.s[2], w12
-; CHECK-GI-NEXT: smov w12, v18.h[3]
+; CHECK-GI-NEXT: mov v2.s[3], w13
+; CHECK-GI-NEXT: ldr w13, [sp, #392]
; CHECK-GI-NEXT: sbfx w9, w9, #8, #8
-; CHECK-GI-NEXT: mov v17.h[2], w15
-; CHECK-GI-NEXT: mov v16.h[4], w11
-; CHECK-GI-NEXT: sbfx w10, w10, #8, #8
-; CHECK-GI-NEXT: mov v5.s[2], w14
-; CHECK-GI-NEXT: smov w14, v18.h[7]
-; CHECK-GI-NEXT: ldr w15, [sp, #448]
-; CHECK-GI-NEXT: ldr w11, [sp, #408]
-; CHECK-GI-NEXT: mov v4.s[3], w12
-; CHECK-GI-NEXT: smov w12, v7.h[0]
-; CHECK-GI-NEXT: mov v17.h[3], w10
-; CHECK-GI-NEXT: ldr w10, [sp, #480]
-; CHECK-GI-NEXT: mov v16.h[5], w9
-; CHECK-GI-NEXT: lsl w9, w13, #8
-; CHECK-GI-NEXT: lsl w13, w15, #8
-; CHECK-GI-NEXT: mov v5.s[3], w14
+; CHECK-GI-NEXT: mov v17.h[3], w11
+; CHECK-GI-NEXT: smov w11, v6.h[2]
+; CHECK-GI-NEXT: sbfx w16, w10, #8, #8
+; CHECK-GI-NEXT: sbfx w12, w12, #8, #8
+; CHECK-GI-NEXT: ldr w14, [sp, #400]
+; CHECK-GI-NEXT: mov v18.h[1], w9
+; CHECK-GI-NEXT: ldr w9, [sp, #440]
+; CHECK-GI-NEXT: mov v5.s[1], w15
+; CHECK-GI-NEXT: smov w15, v6.h[6]
+; CHECK-GI-NEXT: lsl w14, w14, #8
+; CHECK-GI-NEXT: ldr w10, [sp, #408]
+; CHECK-GI-NEXT: mov v4.s[2], w11
+; CHECK-GI-NEXT: smov w11, v16.h[0]
+; CHECK-GI-NEXT: lsl w9, w9, #8
+; CHECK-GI-NEXT: mov v17.h[4], w12
+; CHECK-GI-NEXT: lsl w12, w13, #8
+; CHECK-GI-NEXT: ldr w13, [sp, #448]
+; CHECK-GI-NEXT: mov v18.h[2], w16
+; CHECK-GI-NEXT: sbfx w9, w9, #8, #8
+; CHECK-GI-NEXT: sbfx w14, w14, #8, #8
+; CHECK-GI-NEXT: mov v5.s[2], w15
+; CHECK-GI-NEXT: smov w15, v16.h[1]
+; CHECK-GI-NEXT: sbfx w12, w12, #8, #8
+; CHECK-GI-NEXT: fmov s7, w11
+; CHECK-GI-NEXT: lsl w13, w13, #8
+; CHECK-GI-NEXT: ldr w11, [sp, #456]
+; CHECK-GI-NEXT: mov v17.h[5], w12
+; CHECK-GI-NEXT: ldr w12, [sp, #480]
; CHECK-GI-NEXT: lsl w10, w10, #8
-; CHECK-GI-NEXT: smov w14, v7.h[1]
-; CHECK-GI-NEXT: lsl w15, w16, #8
-; CHECK-GI-NEXT: fmov s6, w12
-; CHECK-GI-NEXT: ldr w12, [sp, #488]
+; CHECK-GI-NEXT: mov v18.h[3], w9
+; CHECK-GI-NEXT: ldr w9, [sp, #552]
; CHECK-GI-NEXT: sbfx w13, w13, #8, #8
+; CHECK-GI-NEXT: mov v7.s[1], w15
+; CHECK-GI-NEXT: ldr w15, [sp, #560]
+; CHECK-GI-NEXT: lsl w12, w12, #8
+; CHECK-GI-NEXT: lsl w9, w9, #8
+; CHECK-GI-NEXT: lsl w11, w11, #8
; CHECK-GI-NEXT: sbfx w10, w10, #8, #8
+; CHECK-GI-NEXT: mov v17.h[6], w14
+; CHECK-GI-NEXT: ldr w14, [sp, #488]
+; CHECK-GI-NEXT: sbfx w16, w12, #8, #8
; CHECK-GI-NEXT: sbfx w9, w9, #8, #8
-; CHECK-GI-NEXT: sbfx w15, w15, #8, #8
+; CHECK-GI-NEXT: mov v18.h[4], w13
+; CHECK-GI-NEXT: lsl w13, w15, #8
+; CHECK-GI-NEXT: lsl w14, w14, #8
+; CHECK-GI-NEXT: fmov s19, w16
+; CHECK-GI-NEXT: ldr w15, [sp, #568]
+; CHECK-GI-NEXT: fmov s20, w9
+; CHECK-GI-NEXT: sbfx w13, w13, #8, #8
+; CHECK-GI-NEXT: ldr w12, [sp, #464]
+; CHECK-GI-NEXT: sbfx w14, w14, #8, #8
+; CHECK-GI-NEXT: lsl w15, w15, #8
+; CHECK-GI-NEXT: sbfx w11, w11, #8, #8
; CHECK-GI-NEXT: lsl w12, w12, #8
-; CHECK-GI-NEXT: mov v17.h[4], w13
+; CHECK-GI-NEXT: mov v17.h[7], w10
+; CHECK-GI-NEXT: ldr w10, [sp, #504]
+; CHECK-GI-NEXT: mov v20.h[1], w13
; CHECK-GI-NEXT: ldr w13, [sp, #496]
-; CHECK-GI-NEXT: fmov s18, w10
-; CHECK-GI-NEXT: ldr w10, [sp, #552]
-; CHECK-GI-NEXT: mov v6.s[1], w14
+; CHECK-GI-NEXT: mov v19.h[1], w14
+; CHECK-GI-NEXT: ldr w14, [sp, #576]
+; CHECK-GI-NEXT: sbfx w15, w15, #8, #8
+; CHECK-GI-NEXT: mov v18.h[5], w11
+; CHECK-GI-NEXT: lsl w13, w13, #8
; CHECK-GI-NEXT: sbfx w12, w12, #8, #8
-; CHECK-GI-NEXT: ldr w14, [sp, #464]
-; CHECK-GI-NEXT: mov v16.h[6], w9
; CHECK-GI-NEXT: lsl w10, w10, #8
-; CHECK-GI-NEXT: lsl w11, w11, #8
-; CHECK-GI-NEXT: lsl w13, w13, #8
-; CHECK-GI-NEXT: mov v18.h[1], w12
-; CHECK-GI-NEXT: ldr w12, [sp, #560]
-; CHECK-GI-NEXT: mov v17.h[5], w15
-; CHECK-GI-NEXT: sbfx w15, w10, #8, #8
; CHECK-GI-NEXT: lsl w14, w14, #8
-; CHECK-GI-NEXT: sbfx w11, w11, #8, #8
+; CHECK-GI-NEXT: ldr w11, [sp, #584]
+; CHECK-GI-NEXT: ldr w9, [sp, #472]
; CHECK-GI-NEXT: sbfx w13, w13, #8, #8
-; CHECK-GI-NEXT: lsl w12, w12, #8
-; CHECK-GI-NEXT: ldr w10, [sp, #512]
-; CHECK-GI-NEXT: fmov s19, w15
-; CHECK-GI-NEXT: ldr w15, [sp, #616]
-; CHECK-GI-NEXT: sbfx w14, w14, #8, #8
-; CHECK-GI-NEXT: mov v16.h[7], w11
-; CHECK-GI-NEXT: ldr w11, [sp, #504]
-; CHECK-GI-NEXT: mov v18.h[2], w13
-; CHECK-GI-NEXT: ldr w13, [sp, #568]
-; CHECK-GI-NEXT: sbfx w12, w12, #8, #8
-; CHECK-GI-NEXT: mov v17.h[6], w14
-; CHECK-GI-NEXT: lsl w14, w15, #8
+; CHECK-GI-NEXT: mov v20.h[2], w15
+; CHECK-GI-NEXT: sbfx w10, w10, #8, #8
+; CHECK-GI-NEXT: mov v18.h[6], w12
+; CHECK-GI-NEXT: ldr w15, [sp, #512]
; CHECK-GI-NEXT: lsl w11, w11, #8
-; CHECK-GI-NEXT: ldr w15, [sp, #576]
-; CHECK-GI-NEXT: mov v19.h[1], w12
-; CHECK-GI-NEXT: ldr w12, [sp, #624]
-; CHECK-GI-NEXT: lsl w13, w13, #8
-; CHECK-GI-NEXT: sbfx w14, w14, #8, #8
-; CHECK-GI-NEXT: sbfx w16, w11, #8, #8
-; CHECK-GI-NEXT: lsl w10, w10, #8
-; CHECK-GI-NEXT: sbfx w13, w13, #8, #8
-; CHECK-GI-NEXT: lsl w12, w12, #8
+; CHECK-GI-NEXT: mov v19.h[2], w13
+; CHECK-GI-NEXT: sbfx w13, w14, #8, #8
+; CHECK-GI-NEXT: ldr w14, [sp, #616]
; CHECK-GI-NEXT: lsl w15, w15, #8
-; CHECK-GI-NEXT: fmov s20, w14
+; CHECK-GI-NEXT: sbfx w11, w11, #8, #8
+; CHECK-GI-NEXT: lsl w9, w9, #8
+; CHECK-GI-NEXT: lsl w12, w14, #8
; CHECK-GI-NEXT: ldr w14, [sp, #680]
-; CHECK-GI-NEXT: mov v18.h[3], w16
+; CHECK-GI-NEXT: mov v20.h[3], w13
+; CHECK-GI-NEXT: ldr w13, [sp, #624]
+; CHECK-GI-NEXT: sbfx w15, w15, #8, #8
+; CHECK-GI-NEXT: sbfx w9, w9, #8, #8
; CHECK-GI-NEXT: sbfx w12, w12, #8, #8
-; CHECK-GI-NEXT: mov v19.h[2], w13
-; CHECK-GI-NEXT: ldr w13, [sp, #632]
; CHECK-GI-NEXT: lsl w14, w14, #8
-; CHECK-GI-NEXT: sbfx w10, w10, #8, #8
-; CHECK-GI-NEXT: sbfx w15, w15, #8, #8
-; CHECK-GI-NEXT: mov v20.h[1], w12
-; CHECK-GI-NEXT: ldr w12, [sp, #688]
+; CHECK-GI-NEXT: mov v19.h[3], w10
; CHECK-GI-NEXT: lsl w13, w13, #8
+; CHECK-GI-NEXT: ldr w10, [sp, #688]
+; CHECK-GI-NEXT: mov v18.h[7], w9
+; CHECK-GI-NEXT: fmov s21, w12
; CHECK-GI-NEXT: sbfx w14, w14, #8, #8
-; CHECK-GI-NEXT: mov v18.h[4], w10
-; CHECK-GI-NEXT: ldr w10, [sp, #584]
+; CHECK-GI-NEXT: ldr w12, [sp, #632]
; CHECK-GI-NEXT: sbfx w13, w13, #8, #8
-; CHECK-GI-NEXT: lsl w12, w12, #8
-; CHECK-GI-NEXT: mov v19.h[3], w15
-; CHECK-GI-NEXT: fmov s21, w14
-; CHECK-GI-NEXT: ldr w15, [sp, #640]
; CHECK-GI-NEXT: lsl w10, w10, #8
-; CHECK-GI-NEXT: mov v20.h[2], w13
+; CHECK-GI-NEXT: mov v20.h[4], w11
+; CHECK-GI-NEXT: fmov s22, w14
+; CHECK-GI-NEXT: lsl w12, w12, #8
+; CHECK-GI-NEXT: ldr w14, [sp, #592]
+; CHECK-GI-NEXT: mov v21.h[1], w13
; CHECK-GI-NEXT: ldr w13, [sp, #696]
-; CHECK-GI-NEXT: sbfx w12, w12, #8, #8
-; CHECK-GI-NEXT: ldr w11, [sp, #520]
; CHECK-GI-NEXT: sbfx w10, w10, #8, #8
-; CHECK-GI-NEXT: lsl w15, w15, #8
+; CHECK-GI-NEXT: sbfx w12, w12, #8, #8
+; CHECK-GI-NEXT: lsl w14, w14, #8
+; CHECK-GI-NEXT: mov v19.h[4], w15
; CHECK-GI-NEXT: lsl w13, w13, #8
-; CHECK-GI-NEXT: mov v21.h[1], w12
-; CHECK-GI-NEXT: ldr w12, [sp, #592]
-; CHECK-GI-NEXT: sbfx w15, w15, #8, #8
-; CHECK-GI-NEXT: mov v19.h[4], w10
-; CHECK-GI-NEXT: ldr w10, [sp, #704]
-; CHECK-GI-NEXT: lsl w11, w11, #8
+; CHECK-GI-NEXT: mov v22.h[1], w10
+; CHECK-GI-NEXT: ldr w10, [sp, #640]
+; CHECK-GI-NEXT: ldr w15, [sp, #704]
+; CHECK-GI-NEXT: sbfx w14, w14, #8, #8
+; CHECK-GI-NEXT: ldr w11, [sp, #520]
; CHECK-GI-NEXT: sbfx w13, w13, #8, #8
-; CHECK-GI-NEXT: lsl w12, w12, #8
-; CHECK-GI-NEXT: mov v20.h[3], w15
-; CHECK-GI-NEXT: ldr w15, [sp, #648]
; CHECK-GI-NEXT: lsl w10, w10, #8
-; CHECK-GI-NEXT: sbfx w11, w11, #8, #8
-; CHECK-GI-NEXT: mov v21.h[2], w13
-; CHECK-GI-NEXT: ldr w13, [sp, #600]
-; CHECK-GI-NEXT: sbfx w12, w12, #8, #8
+; CHECK-GI-NEXT: mov v21.h[2], w12
; CHECK-GI-NEXT: lsl w15, w15, #8
+; CHECK-GI-NEXT: mov v20.h[5], w14
+; CHECK-GI-NEXT: ldr w14, [sp, #712]
+; CHECK-GI-NEXT: mov v22.h[2], w13
+; CHECK-GI-NEXT: ldr w13, [sp, #648]
; CHECK-GI-NEXT: sbfx w10, w10, #8, #8
-; CHECK-GI-NEXT: mov v18.h[5], w11
-; CHECK-GI-NEXT: ldr w11, [sp, #712]
-; CHECK-GI-NEXT: lsl w13, w13, #8
-; CHECK-GI-NEXT: mov v19.h[5], w12
; CHECK-GI-NEXT: sbfx w15, w15, #8, #8
-; CHECK-GI-NEXT: ldr w12, [sp, #656]
; CHECK-GI-NEXT: lsl w11, w11, #8
+; CHECK-GI-NEXT: lsl w14, w14, #8
; CHECK-GI-NEXT: mov v21.h[3], w10
-; CHECK-GI-NEXT: sbfx w13, w13, #8, #8
-; CHECK-GI-NEXT: ldr w10, [sp, #608]
-; CHECK-GI-NEXT: mov v20.h[4], w15
-; CHECK-GI-NEXT: lsl w12, w12, #8
+; CHECK-GI-NEXT: ldr w10, [sp, #600]
+; CHECK-GI-NEXT: lsl w13, w13, #8
+; CHECK-GI-NEXT: ldr w12, [sp, #528]
; CHECK-GI-NEXT: sbfx w11, w11, #8, #8
-; CHECK-GI-NEXT: ldr w14, [sp, #528]
-; CHECK-GI-NEXT: ldr w15, [sp, #664]
-; CHECK-GI-NEXT: mov v19.h[6], w13
-; CHECK-GI-NEXT: ldr w13, [sp, #720]
+; CHECK-GI-NEXT: sbfx w14, w14, #8, #8
+; CHECK-GI-NEXT: mov v22.h[3], w15
+; CHECK-GI-NEXT: ldr w15, [sp, #656]
; CHECK-GI-NEXT: lsl w10, w10, #8
+; CHECK-GI-NEXT: sbfx w13, w13, #8, #8
+; CHECK-GI-NEXT: mov v19.h[5], w11
+; CHECK-GI-NEXT: ldr w11, [sp, #720]
+; CHECK-GI-NEXT: sbfx w10, w10, #8, #8
+; CHECK-GI-NEXT: lsl w15, w15, #8
+; CHECK-GI-NEXT: lsl w12, w12, #8
+; CHECK-GI-NEXT: mov v21.h[4], w13
+; CHECK-GI-NEXT: lsl w11, w11, #8
+; CHECK-GI-NEXT: ldr w13, [sp, #536]
+; CHECK-GI-NEXT: mov v22.h[4], w14
+; CHECK-GI-NEXT: ldr w14, [sp, #608]
+; CHECK-GI-NEXT: sbfx w15, w15, #8, #8
+; CHECK-GI-NEXT: mov v20.h[6], w10
+; CHECK-GI-NEXT: ldr w10, [sp, #664]
; CHECK-GI-NEXT: sbfx w12, w12, #8, #8
-; CHECK-GI-NEXT: mov v21.h[4], w11
; CHECK-GI-NEXT: lsl w14, w14, #8
+; CHECK-GI-NEXT: sbfx w11, w11, #8, #8
; CHECK-GI-NEXT: lsl w13, w13, #8
-; CHECK-GI-NEXT: sbfx w16, w10, #8, #8
-; CHECK-GI-NEXT: lsl w15, w15, #8
-; CHECK-GI-NEXT: mov v20.h[5], w12
-; CHECK-GI-NEXT: ldr w12, [sp, #728]
+; CHECK-GI-NEXT: mov v21.h[5], w15
+; CHECK-GI-NEXT: lsl w10, w10, #8
+; CHECK-GI-NEXT: ldr w15, [sp, #728]
; CHECK-GI-NEXT: sbfx w14, w14, #8, #8
-; CHECK-GI-NEXT: sbfx w13, w13, #8, #8
-; CHECK-GI-NEXT: mov v19.h[7], w16
-; CHECK-GI-NEXT: ldr w9, [sp, #472]
-; CHECK-GI-NEXT: lsl w12, w12, #8
-; CHECK-GI-NEXT: mov v18.h[6], w14
-; CHECK-GI-NEXT: sbfx w14, w15, #8, #8
-; CHECK-GI-NEXT: mov v21.h[5], w13
-; CHECK-GI-NEXT: ldr w15, [sp, #672]
-; CHECK-GI-NEXT: ldr w11, [sp, #536]
-; CHECK-GI-NEXT: ldr w13, [sp, #736]
-; CHECK-GI-NEXT: sbfx w12, w12, #8, #8
-; CHECK-GI-NEXT: lsl w9, w9, #8
-; CHECK-GI-NEXT: mov v20.h[6], w14
+; CHECK-GI-NEXT: mov v19.h[6], w12
+; CHECK-GI-NEXT: ldr w12, [sp, #672]
+; CHECK-GI-NEXT: sbfx w10, w10, #8, #8
+; CHECK-GI-NEXT: mov v22.h[5], w11
; CHECK-GI-NEXT: lsl w15, w15, #8
-; CHECK-GI-NEXT: lsl w11, w11, #8
-; CHECK-GI-NEXT: mul v19.8h, v16.8h, v19.8h
-; CHECK-GI-NEXT: lsl w13, w13, #8
-; CHECK-GI-NEXT: sbfx w9, w9, #8, #8
-; CHECK-GI-NEXT: mov v21.h[6], w12
+; CHECK-GI-NEXT: mov v20.h[7], w14
+; CHECK-GI-NEXT: lsl w12, w12, #8
+; CHECK-GI-NEXT: ldr w14, [sp, #736]
+; CHECK-GI-NEXT: mov v21.h[6], w10
; CHECK-GI-NEXT: sbfx w15, w15, #8, #8
-; CHECK-GI-NEXT: smov w14, v7.h[2]
-; CHECK-GI-NEXT: sbfx w11, w11, #8, #8
+; CHECK-GI-NEXT: smov w10, v6.h[3]
+; CHECK-GI-NEXT: sbfx w12, w12, #8, #8
+; CHECK-GI-NEXT: lsl w14, w14, #8
; CHECK-GI-NEXT: sbfx w13, w13, #8, #8
-; CHECK-GI-NEXT: smov w12, v7.h[4]
-; CHECK-GI-NEXT: mov v17.h[7], w9
-; CHECK-GI-NEXT: mov v20.h[7], w15
-; CHECK-GI-NEXT: smov w9, v7.h[5]
-; CHECK-GI-NEXT: mov v18.h[7], w11
-; CHECK-GI-NEXT: smov w11, v19.h[4]
-; CHECK-GI-NEXT: ldr w15, [sp, #744]
-; CHECK-GI-NEXT: mov v21.h[7], w13
+; CHECK-GI-NEXT: mov v22.h[6], w15
+; CHECK-GI-NEXT: smov w15, v6.h[7]
+; CHECK-GI-NEXT: smov w9, v16.h[2]
+; CHECK-GI-NEXT: mul v17.8h, v17.8h, v20.8h
+; CHECK-GI-NEXT: sbfx w14, w14, #8, #8
+; CHECK-GI-NEXT: mov v19.h[7], w13
+; CHECK-GI-NEXT: mov v21.h[7], w12
+; CHECK-GI-NEXT: smov w13, v16.h[4]
+; CHECK-GI-NEXT: mov v4.s[3], w10
+; CHECK-GI-NEXT: ldr w11, [sp, #544]
+; CHECK-GI-NEXT: ldr w12, [sp, #744]
+; CHECK-GI-NEXT: add v2.4s, v2.4s, v3.4s
+; CHECK-GI-NEXT: mov v22.h[7], w14
+; CHECK-GI-NEXT: mov v5.s[3], w15
+; CHECK-GI-NEXT: mov v7.s[2], w9
+; CHECK-GI-NEXT: smov w10, v17.h[0]
+; CHECK-GI-NEXT: smov w15, v17.h[4]
+; CHECK-GI-NEXT: smov w9, v17.h[1]
+; CHECK-GI-NEXT: mul v20.8h, v18.8h, v21.8h
+; CHECK-GI-NEXT: fmov s6, w13
+; CHECK-GI-NEXT: smov w13, v17.h[5]
+; CHECK-GI-NEXT: smov w14, v16.h[5]
+; CHECK-GI-NEXT: sxtb w11, w11
+; CHECK-GI-NEXT: sxtb w12, w12
+; CHECK-GI-NEXT: mul v18.8h, v19.8h, v22.8h
+; CHECK-GI-NEXT: fmov s22, w8
+; CHECK-GI-NEXT: smov w8, v16.h[7]
+; CHECK-GI-NEXT: fmov s19, w10
+; CHECK-GI-NEXT: fmov s21, w15
+; CHECK-GI-NEXT: smov w10, v17.h[2]
+; CHECK-GI-NEXT: smov w16, v20.h[0]
+; CHECK-GI-NEXT: smov w15, v17.h[6]
+; CHECK-GI-NEXT: mul w11, w11, w12
+; CHECK-GI-NEXT: mov v6.s[1], w14
+; CHECK-GI-NEXT: smov w14, v16.h[6]
+; CHECK-GI-NEXT: smov w12, v20.h[4]
+; CHECK-GI-NEXT: mov v19.s[1], w9
+; CHECK-GI-NEXT: mov v21.s[1], w13
+; CHECK-GI-NEXT: smov w13, v20.h[1]
+; CHECK-GI-NEXT: smov w9, v16.h[3]
+; CHECK-GI-NEXT: fmov s26, w11
+; CHECK-GI-NEXT: mov v22.s[1], wzr
+; CHECK-GI-NEXT: fmov s23, w16
+; CHECK-GI-NEXT: smov w11, v17.h[7]
+; CHECK-GI-NEXT: add v3.4s, v4.4s, v5.4s
; CHECK-GI-NEXT: mov v6.s[2], w14
-; CHECK-GI-NEXT: smov w14, v19.h[0]
+; CHECK-GI-NEXT: smov w14, v20.h[5]
; CHECK-GI-NEXT: fmov s16, w12
-; CHECK-GI-NEXT: smov w13, v19.h[5]
-; CHECK-GI-NEXT: smov w12, v19.h[1]
-; CHECK-GI-NEXT: mul v20.8h, v17.8h, v20.8h
-; CHECK-GI-NEXT: ldr w10, [sp, #544]
-; CHECK-GI-NEXT: add v3.4s, v4.4s, v5.4s
-; CHECK-GI-NEXT: mul v22.8h, v18.8h, v21.8h
-; CHECK-GI-NEXT: fmov s18, w11
-; CHECK-GI-NEXT: mov v16.s[1], w9
-; CHECK-GI-NEXT: fmov s17, w14
-; CHECK-GI-NEXT: smov w14, v7.h[6]
-; CHECK-GI-NEXT: smov w11, v19.h[2]
-; CHECK-GI-NEXT: smov w9, v7.h[3]
-; CHECK-GI-NEXT: sxtb w10, w10
-; CHECK-GI-NEXT: fmov s21, w8
-; CHECK-GI-NEXT: mov v18.s[1], w13
-; CHECK-GI-NEXT: sxtb w13, w15
-; CHECK-GI-NEXT: smov w15, v20.h[0]
-; CHECK-GI-NEXT: mov v17.s[1], w12
-; CHECK-GI-NEXT: smov w8, v7.h[7]
-; CHECK-GI-NEXT: smov w12, v19.h[6]
-; CHECK-GI-NEXT: mov v16.s[2], w14
-; CHECK-GI-NEXT: smov w14, v20.h[1]
-; CHECK-GI-NEXT: mul w10, w10, w13
-; CHECK-GI-NEXT: smov w13, v20.h[4]
-; CHECK-GI-NEXT: smov w16, v20.h[5]
-; CHECK-GI-NEXT: mov v21.s[1], wzr
-; CHECK-GI-NEXT: fmov s7, w15
-; CHECK-GI-NEXT: smov w15, v20.h[2]
-; CHECK-GI-NEXT: mov v6.s[3], w9
-; CHECK-GI-NEXT: mov v17.s[2], w11
-; CHECK-GI-NEXT: smov w11, v22.h[0]
-; CHECK-GI-NEXT: sxth w10, w10
-; CHECK-GI-NEXT: mov v18.s[2], w12
-; CHECK-GI-NEXT: smov w12, v22.h[1]
-; CHECK-GI-NEXT: mov v16.s[3], w8
-; CHECK-GI-NEXT: mov v7.s[1], w14
-; CHECK-GI-NEXT: smov w14, v22.h[4]
-; CHECK-GI-NEXT: fmov s23, w13
-; CHECK-GI-NEXT: smov w13, v22.h[5]
-; CHECK-GI-NEXT: fmov s26, w10
-; CHECK-GI-NEXT: smov w10, v19.h[7]
-; CHECK-GI-NEXT: fmov s24, w11
-; CHECK-GI-NEXT: smov w11, v20.h[6]
-; CHECK-GI-NEXT: mov v21.s[2], wzr
-; CHECK-GI-NEXT: mov v23.s[1], w16
-; CHECK-GI-NEXT: add v4.4s, v6.4s, v16.4s
-; CHECK-GI-NEXT: add v2.4s, v2.4s, v3.4s
-; CHECK-GI-NEXT: fmov s25, w14
-; CHECK-GI-NEXT: smov w14, v22.h[2]
+; CHECK-GI-NEXT: mov v19.s[2], w10
+; CHECK-GI-NEXT: smov w10, v18.h[0]
+; CHECK-GI-NEXT: mov v21.s[2], w15
+; CHECK-GI-NEXT: mov v23.s[1], w13
+; CHECK-GI-NEXT: smov w13, v18.h[4]
+; CHECK-GI-NEXT: smov w15, v18.h[1]
+; CHECK-GI-NEXT: smov w12, v18.h[5]
; CHECK-GI-NEXT: mov v26.s[1], wzr
-; CHECK-GI-NEXT: mov v24.s[1], w12
-; CHECK-GI-NEXT: smov w12, v19.h[3]
-; CHECK-GI-NEXT: mov v7.s[2], w15
-; CHECK-GI-NEXT: smov w15, v20.h[3]
-; CHECK-GI-NEXT: mov v18.s[3], w10
-; CHECK-GI-NEXT: mov v21.s[3], wzr
-; CHECK-GI-NEXT: mov v25.s[1], w13
-; CHECK-GI-NEXT: smov w13, v22.h[6]
-; CHECK-GI-NEXT: mov v23.s[2], w11
-; CHECK-GI-NEXT: smov w11, v20.h[7]
+; CHECK-GI-NEXT: mov v22.s[2], wzr
+; CHECK-GI-NEXT: mov v16.s[1], w14
+; CHECK-GI-NEXT: smov w14, v20.h[6]
+; CHECK-GI-NEXT: mov v7.s[3], w9
+; CHECK-GI-NEXT: fmov s24, w10
+; CHECK-GI-NEXT: smov w10, v20.h[2]
+; CHECK-GI-NEXT: mov v6.s[3], w8
+; CHECK-GI-NEXT: fmov s25, w13
+; CHECK-GI-NEXT: smov w13, v18.h[2]
+; CHECK-GI-NEXT: mov v21.s[3], w11
; CHECK-GI-NEXT: mov v26.s[2], wzr
-; CHECK-GI-NEXT: mov v24.s[2], w14
-; CHECK-GI-NEXT: smov w14, v22.h[3]
-; CHECK-GI-NEXT: mov v17.s[3], w12
-; CHECK-GI-NEXT: mov v7.s[3], w15
-; CHECK-GI-NEXT: add v1.4s, v21.4s, v1.4s
-; CHECK-GI-NEXT: mov v25.s[2], w13
-; CHECK-GI-NEXT: smov w13, v22.h[7]
-; CHECK-GI-NEXT: mov v23.s[3], w11
+; CHECK-GI-NEXT: mov v22.s[3], wzr
+; CHECK-GI-NEXT: add v2.4s, v2.4s, v3.4s
+; CHECK-GI-NEXT: mov v24.s[1], w15
+; CHECK-GI-NEXT: smov w15, v17.h[3]
+; CHECK-GI-NEXT: mov v16.s[2], w14
+; CHECK-GI-NEXT: mov v25.s[1], w12
+; CHECK-GI-NEXT: smov w12, v18.h[6]
+; CHECK-GI-NEXT: mov v23.s[2], w10
+; CHECK-GI-NEXT: smov w10, v20.h[3]
+; CHECK-GI-NEXT: smov w14, v20.h[7]
+; CHECK-GI-NEXT: add v4.4s, v7.4s, v6.4s
; CHECK-GI-NEXT: mov v26.s[3], wzr
-; CHECK-GI-NEXT: mov v24.s[3], w14
-; CHECK-GI-NEXT: add v5.4s, v17.4s, v18.4s
+; CHECK-GI-NEXT: add v1.4s, v22.4s, v1.4s
+; CHECK-GI-NEXT: mov v24.s[2], w13
+; CHECK-GI-NEXT: smov w13, v18.h[3]
+; CHECK-GI-NEXT: mov v19.s[3], w15
+; CHECK-GI-NEXT: mov v25.s[2], w12
+; CHECK-GI-NEXT: smov w12, v18.h[7]
+; CHECK-GI-NEXT: mov v23.s[3], w10
+; CHECK-GI-NEXT: mov v16.s[3], w14
; CHECK-GI-NEXT: add v1.4s, v4.4s, v1.4s
-; CHECK-GI-NEXT: mov v25.s[3], w13
-; CHECK-GI-NEXT: add v6.4s, v7.4s, v23.4s
; CHECK-GI-NEXT: add v0.4s, v26.4s, v0.4s
+; CHECK-GI-NEXT: mov v24.s[3], w13
+; CHECK-GI-NEXT: add v5.4s, v19.4s, v21.4s
+; CHECK-GI-NEXT: mov v25.s[3], w12
; CHECK-GI-NEXT: add v1.4s, v2.4s, v1.4s
+; CHECK-GI-NEXT: add v6.4s, v23.4s, v16.4s
+; CHECK-GI-NEXT: addv s1, v1.4s
; CHECK-GI-NEXT: add v7.4s, v24.4s, v25.4s
; CHECK-GI-NEXT: add v3.4s, v5.4s, v6.4s
-; CHECK-GI-NEXT: addv s1, v1.4s
-; CHECK-GI-NEXT: add v0.4s, v7.4s, v0.4s
; CHECK-GI-NEXT: fmov w8, s1
+; CHECK-GI-NEXT: add v0.4s, v7.4s, v0.4s
; CHECK-GI-NEXT: add v0.4s, v3.4s, v0.4s
; CHECK-GI-NEXT: addv s0, v0.4s
; CHECK-GI-NEXT: fmov w9, s0
@@ -5164,387 +5161,387 @@ define i32 @test_sdot_v33i8(ptr nocapture readonly %a, ptr nocapture readonly %b
; CHECK-GI-NEXT: .cfi_offset w30, -88
; CHECK-GI-NEXT: .cfi_offset w29, -96
; CHECK-GI-NEXT: ldp q7, q16, [x1]
-; CHECK-GI-NEXT: movi d1, #0000000000000000
+; CHECK-GI-NEXT: movi d5, #0000000000000000
; CHECK-GI-NEXT: str w2, [sp, #12] // 4-byte Folded Spill
+; CHECK-GI-NEXT: movi d6, #0000000000000000
+; CHECK-GI-NEXT: movi d0, #0000000000000000
+; CHECK-GI-NEXT: movi d1, #0000000000000000
; CHECK-GI-NEXT: movi d3, #0000000000000000
; CHECK-GI-NEXT: movi d2, #0000000000000000
-; CHECK-GI-NEXT: movi d5, #0000000000000000
-; CHECK-GI-NEXT: movi d4, #0000000000000000
-; CHECK-GI-NEXT: movi d6, #0000000000000000
-; CHECK-GI-NEXT: mov b19, v7.b[3]
; CHECK-GI-NEXT: mov b23, v7.b[7]
; CHECK-GI-NEXT: mov b17, v7.b[1]
; CHECK-GI-NEXT: fmov w11, s7
; CHECK-GI-NEXT: mov b18, v7.b[2]
+; CHECK-GI-NEXT: mov b19, v7.b[3]
; CHECK-GI-NEXT: mov b20, v7.b[4]
; CHECK-GI-NEXT: mov b21, v7.b[5]
; CHECK-GI-NEXT: mov b22, v7.b[6]
; CHECK-GI-NEXT: mov b24, v7.b[8]
+; CHECK-GI-NEXT: sxtb w11, w11
; CHECK-GI-NEXT: mov b25, v7.b[9]
; CHECK-GI-NEXT: mov b26, v7.b[10]
; CHECK-GI-NEXT: mov b27, v7.b[11]
-; CHECK-GI-NEXT: sxtb w11, w11
; CHECK-GI-NEXT: mov b28, v7.b[12]
-; CHECK-GI-NEXT: fmov w14, s19
-; CHECK-GI-NEXT: mov b19, v7.b[13]
-; CHECK-GI-NEXT: mov b29, v7.b[14]
+; CHECK-GI-NEXT: mov b29, v7.b[13]
+; CHECK-GI-NEXT: mov b30, v7.b[14]
; CHECK-GI-NEXT: mov b7, v7.b[15]
; CHECK-GI-NEXT: fmov w7, s23
-; CHECK-GI-NEXT: mov b23, v16.b[6]
+; CHECK-GI-NEXT: mov b23, v16.b[7]
; CHECK-GI-NEXT: fmov w10, s17
; CHECK-GI-NEXT: fmov w9, s18
+; CHECK-GI-NEXT: fmov w13, s19
; CHECK-GI-NEXT: fmov w8, s24
-; CHECK-GI-NEXT: mov b30, v16.b[1]
-; CHECK-GI-NEXT: fmov w16, s25
+; CHECK-GI-NEXT: mov b17, v16.b[2]
; CHECK-GI-NEXT: fmov w12, s20
-; CHECK-GI-NEXT: fmov w24, s21
+; CHECK-GI-NEXT: fmov w16, s25
+; CHECK-GI-NEXT: fmov w23, s21
; CHECK-GI-NEXT: sxtb w10, w10
-; CHECK-GI-NEXT: sxtb w7, w7
-; CHECK-GI-NEXT: fmov w22, s22
-; CHECK-GI-NEXT: stp s23, s7, [sp, #4] // 8-byte Folded Spill
; CHECK-GI-NEXT: sxtb w9, w9
+; CHECK-GI-NEXT: mov b18, v16.b[1]
+; CHECK-GI-NEXT: stp s23, s7, [sp, #4] // 8-byte Folded Spill
; CHECK-GI-NEXT: sxtb w8, w8
; CHECK-GI-NEXT: fmov s7, w11
-; CHECK-GI-NEXT: mov b20, v16.b[2]
-; CHECK-GI-NEXT: mov b17, v16.b[3]
-; CHECK-GI-NEXT: mov b21, v16.b[4]
-; CHECK-GI-NEXT: mov b18, v16.b[5]
+; CHECK-GI-NEXT: fmov w5, s17
; CHECK-GI-NEXT: fmov w27, s26
-; CHECK-GI-NEXT: fmov w25, s27
-; CHECK-GI-NEXT: mov b22, v16.b[7]
-; CHECK-GI-NEXT: fmov w26, s28
+; CHECK-GI-NEXT: mov b21, v16.b[5]
+; CHECK-GI-NEXT: fmov s17, w8
+; CHECK-GI-NEXT: sxtb w8, w12
+; CHECK-GI-NEXT: fmov w20, s22
; CHECK-GI-NEXT: mov v7.h[1], w10
; CHECK-GI-NEXT: sxtb w10, w16
-; CHECK-GI-NEXT: mov b25, v16.b[8]
-; CHECK-GI-NEXT: fmov w23, s19
-; CHECK-GI-NEXT: mov b24, v16.b[9]
-; CHECK-GI-NEXT: fmov w5, s29
-; CHECK-GI-NEXT: mov b26, v16.b[10]
-; CHECK-GI-NEXT: mov b19, v16.b[11]
-; CHECK-GI-NEXT: fmov w6, s30
-; CHECK-GI-NEXT: mov b27, v16.b[12]
-; CHECK-GI-NEXT: mov b28, v16.b[13]
-; CHECK-GI-NEXT: mov b29, v16.b[14]
-; CHECK-GI-NEXT: sxtb w30, w23
-; CHECK-GI-NEXT: sxtb w5, w5
+; CHECK-GI-NEXT: mov b19, v16.b[3]
+; CHECK-GI-NEXT: mov b22, v16.b[4]
+; CHECK-GI-NEXT: mov b20, v16.b[6]
+; CHECK-GI-NEXT: fmov w21, s27
+; CHECK-GI-NEXT: mov v17.h[1], w10
+; CHECK-GI-NEXT: fmov w24, s28
+; CHECK-GI-NEXT: mov b24, v16.b[8]
+; CHECK-GI-NEXT: fmov w22, s29
+; CHECK-GI-NEXT: mov b26, v16.b[9]
+; CHECK-GI-NEXT: fmov w4, s30
+; CHECK-GI-NEXT: sxtb w10, w21
; CHECK-GI-NEXT: mov v7.h[2], w9
-; CHECK-GI-NEXT: sxtb w9, w14
-; CHECK-GI-NEXT: fmov w20, s16
-; CHECK-GI-NEXT: mov b30, v16.b[15]
-; CHECK-GI-NEXT: fmov s16, w8
-; CHECK-GI-NEXT: sxtb w8, w12
-; CHECK-GI-NEXT: fmov w15, s17
-; CHECK-GI-NEXT: fmov w11, s18
-; CHECK-GI-NEXT: ldp q18, q17, [x0]
+; CHECK-GI-NEXT: sxtb w9, w13
+; CHECK-GI-NEXT: str s20, [sp] // 4-byte Folded Spill
+; CHECK-GI-NEXT: mov b25, v16.b[10]
+; CHECK-GI-NEXT: fmov w25, s18
+; CHECK-GI-NEXT: sxtb w22, w22
+; CHECK-GI-NEXT: mov b27, v16.b[11]
+; CHECK-GI-NEXT: mov b28, v16.b[12]
+; CHECK-GI-NEXT: mov b29, v16.b[13]
+; CHECK-GI-NEXT: mov b30, v16.b[14]
+; CHECK-GI-NEXT: fmov w26, s16
; CHECK-GI-NEXT: mov v7.h[3], w9
; CHECK-GI-NEXT: sxtb w9, w27
-; CHECK-GI-NEXT: fmov w18, s20
-; CHECK-GI-NEXT: sxtb w15, w15
-; CHECK-GI-NEXT: mov v16.h[1], w10
-; CHECK-GI-NEXT: sxtb w10, w25
-; CHECK-GI-NEXT: mov b20, v18.b[3]
-; CHECK-GI-NEXT: fmov w2, s22
-; CHECK-GI-NEXT: mov b22, v18.b[1]
-; CHECK-GI-NEXT: sxtb w18, w18
-; CHECK-GI-NEXT: fmov w13, s21
-; CHECK-GI-NEXT: mov b21, v18.b[2]
+; CHECK-GI-NEXT: mov b31, v16.b[15]
+; CHECK-GI-NEXT: ldp q18, q16, [x0]
+; CHECK-GI-NEXT: fmov w2, s21
+; CHECK-GI-NEXT: sxtb w26, w26
+; CHECK-GI-NEXT: mov v17.h[2], w9
+; CHECK-GI-NEXT: fmov w14, s22
+; CHECK-GI-NEXT: fmov w3, s25
+; CHECK-GI-NEXT: fmov w15, s19
+; CHECK-GI-NEXT: fmov w19, s24
; CHECK-GI-NEXT: mov v7.h[4], w8
-; CHECK-GI-NEXT: fmov w3, s19
-; CHECK-GI-NEXT: mov b19, v18.b[6]
-; CHECK-GI-NEXT: mov v16.h[2], w9
-; CHECK-GI-NEXT: sxtb w9, w24
-; CHECK-GI-NEXT: fmov w21, s25
-; CHECK-GI-NEXT: sxtb w13, w13
-; CHECK-GI-NEXT: fmov w28, s20
-; CHECK-GI-NEXT: mov b20, v18.b[11]
-; CHECK-GI-NEXT: fmov w8, s22
-; CHECK-GI-NEXT: mov b25, v18.b[8]
+; CHECK-GI-NEXT: sxtb w8, w23
+; CHECK-GI-NEXT: mov b21, v18.b[2]
+; CHECK-GI-NEXT: mov b22, v18.b[1]
+; CHECK-GI-NEXT: mov b25, v18.b[5]
+; CHECK-GI-NEXT: mov b23, v18.b[6]
+; CHECK-GI-NEXT: sxtb w19, w19
+; CHECK-GI-NEXT: sxtb w3, w3
+; CHECK-GI-NEXT: mov v17.h[3], w10
+; CHECK-GI-NEXT: sxtb w10, w24
+; CHECK-GI-NEXT: sxtb w24, w7
+; CHECK-GI-NEXT: mov b19, v18.b[3]
+; CHECK-GI-NEXT: mov v7.h[5], w8
+; CHECK-GI-NEXT: sxtb w8, w20
; CHECK-GI-NEXT: fmov w29, s21
-; CHECK-GI-NEXT: mov v7.h[5], w9
-; CHECK-GI-NEXT: sxtb w9, w22
-; CHECK-GI-NEXT: fmov w19, s24
-; CHECK-GI-NEXT: mov v16.h[3], w10
-; CHECK-GI-NEXT: sxtb w10, w26
-; CHECK-GI-NEXT: fmov w26, s18
-; CHECK-GI-NEXT: sxtb w8, w8
-; CHECK-GI-NEXT: sxtb w29, w29
-; CHECK-GI-NEXT: mov b24, v18.b[4]
-; CHECK-GI-NEXT: mov b23, v18.b[5]
+; CHECK-GI-NEXT: mov b21, v18.b[10]
+; CHECK-GI-NEXT: fmov w9, s22
+; CHECK-GI-NEXT: fmov w6, s26
+; CHECK-GI-NEXT: mov v17.h[4], w10
+; CHECK-GI-NEXT: sxtb w10, w25
; CHECK-GI-NEXT: fmov w17, s27
+; CHECK-GI-NEXT: mov b26, v18.b[4]
+; CHECK-GI-NEXT: fmov w18, s28
+; CHECK-GI-NEXT: fmov w16, s29
+; CHECK-GI-NEXT: mov v7.h[6], w8
+; CHECK-GI-NEXT: fmov w8, s18
+; CHECK-GI-NEXT: mov b24, v18.b[7]
+; CHECK-GI-NEXT: fmov w30, s21
+; CHECK-GI-NEXT: mov b20, v18.b[8]
; CHECK-GI-NEXT: mov b27, v18.b[9]
-; CHECK-GI-NEXT: sxtb w23, w26
-; CHECK-GI-NEXT: mov v7.h[6], w9
-; CHECK-GI-NEXT: fmov w24, s19
-; CHECK-GI-NEXT: mov v16.h[4], w10
-; CHECK-GI-NEXT: mov b19, v18.b[14]
-; CHECK-GI-NEXT: fmov w10, s25
-; CHECK-GI-NEXT: fmov w4, s26
-; CHECK-GI-NEXT: fmov w16, s28
-; CHECK-GI-NEXT: mov b26, v18.b[7]
-; CHECK-GI-NEXT: mov b28, v18.b[10]
-; CHECK-GI-NEXT: fmov w27, s24
-; CHECK-GI-NEXT: mov b24, v18.b[12]
-; CHECK-GI-NEXT: sxtb w10, w10
-; CHECK-GI-NEXT: mov v7.h[7], w7
-; CHECK-GI-NEXT: fmov w7, s20
-; CHECK-GI-NEXT: sxtb w4, w4
-; CHECK-GI-NEXT: fmov s20, w23
-; CHECK-GI-NEXT: fmov w25, s23
-; CHECK-GI-NEXT: mov b23, v18.b[13]
+; CHECK-GI-NEXT: sxtb w16, w16
+; CHECK-GI-NEXT: mov b28, v18.b[11]
+; CHECK-GI-NEXT: mov b29, v18.b[12]
+; CHECK-GI-NEXT: fmov w23, s25
+; CHECK-GI-NEXT: mov b25, v18.b[13]
+; CHECK-GI-NEXT: fmov w21, s23
+; CHECK-GI-NEXT: mov v7.h[7], w24
+; CHECK-GI-NEXT: sxtb w24, w8
+; CHECK-GI-NEXT: sxtb w8, w9
+; CHECK-GI-NEXT: sxtb w9, w29
+; CHECK-GI-NEXT: mov b23, v18.b[14]
; CHECK-GI-NEXT: mov b22, v18.b[15]
-; CHECK-GI-NEXT: mov v16.h[5], w30
+; CHECK-GI-NEXT: fmov s21, w24
+; CHECK-GI-NEXT: fmov s18, w26
+; CHECK-GI-NEXT: fmov w28, s19
+; CHECK-GI-NEXT: mov b19, v16.b[1]
+; CHECK-GI-NEXT: mov v17.h[5], w22
+; CHECK-GI-NEXT: fmov w7, s20
+; CHECK-GI-NEXT: fmov w11, s27
+; CHECK-GI-NEXT: fmov w27, s26
+; CHECK-GI-NEXT: mov b20, v16.b[2]
+; CHECK-GI-NEXT: mov v21.h[1], w8
+; CHECK-GI-NEXT: sxtb w8, w4
+; CHECK-GI-NEXT: mov v18.h[1], w10
+; CHECK-GI-NEXT: sxtb w10, w5
; CHECK-GI-NEXT: sxtb w7, w7
-; CHECK-GI-NEXT: fmov w9, s27
-; CHECK-GI-NEXT: mov b21, v17.b[1]
-; CHECK-GI-NEXT: mov v20.h[1], w8
-; CHECK-GI-NEXT: sxtb w8, w20
-; CHECK-GI-NEXT: sxtb w20, w6
-; CHECK-GI-NEXT: fmov w6, s19
-; CHECK-GI-NEXT: fmov w26, s28
-; CHECK-GI-NEXT: mov b28, v17.b[8]
-; CHECK-GI-NEXT: fmov s18, w8
-; CHECK-GI-NEXT: sxtb w8, w21
-; CHECK-GI-NEXT: mov v16.h[6], w5
-; CHECK-GI-NEXT: fmov w5, s22
-; CHECK-GI-NEXT: fmov s22, w10
-; CHECK-GI-NEXT: sxtb w10, w27
-; CHECK-GI-NEXT: sxtb w26, w26
-; CHECK-GI-NEXT: mov v20.h[2], w29
-; CHECK-GI-NEXT: fmov s19, w8
-; CHECK-GI-NEXT: sxtb w8, w28
-; CHECK-GI-NEXT: sxtb w28, w19
-; CHECK-GI-NEXT: sxtb w19, w9
-; CHECK-GI-NEXT: fmov w27, s17
-; CHECK-GI-NEXT: mov b25, v17.b[2]
-; CHECK-GI-NEXT: fmov w29, s21
-; CHECK-GI-NEXT: mov b21, v17.b[9]
-; CHECK-GI-NEXT: mov v22.h[1], w19
-; CHECK-GI-NEXT: fmov w23, s23
-; CHECK-GI-NEXT: mov v20.h[3], w8
-; CHECK-GI-NEXT: mov b23, v17.b[6]
-; CHECK-GI-NEXT: fmov w30, s24
-; CHECK-GI-NEXT: sxtb w27, w27
-; CHECK-GI-NEXT: mov b24, v17.b[5]
-; CHECK-GI-NEXT: mov v18.h[1], w20
-; CHECK-GI-NEXT: fmov w21, s25
-; CHECK-GI-NEXT: mov b25, v17.b[10]
-; CHECK-GI-NEXT: mov v19.h[1], w28
-; CHECK-GI-NEXT: sxtb w28, w29
-; CHECK-GI-NEXT: mov v22.h[2], w26
-; CHECK-GI-NEXT: fmov w26, s21
-; CHECK-GI-NEXT: mov v20.h[4], w10
-; CHECK-GI-NEXT: fmov w10, s28
-; CHECK-GI-NEXT: fmov s21, w27
-; CHECK-GI-NEXT: sxtb w21, w21
-; CHECK-GI-NEXT: mov b27, v17.b[3]
-; CHECK-GI-NEXT: fmov w19, s23
-; CHECK-GI-NEXT: sxtb w26, w26
-; CHECK-GI-NEXT: fmov w22, s26
-; CHECK-GI-NEXT: mov b26, v17.b[4]
-; CHECK-GI-NEXT: sxtb w10, w10
-; CHECK-GI-NEXT: mov v21.h[1], w28
-; CHECK-GI-NEXT: fmov w8, s24
-; CHECK-GI-NEXT: mov b24, v17.b[11]
-; CHECK-GI-NEXT: fmov w27, s25
-; CHECK-GI-NEXT: mov v18.h[2], w18
-; CHECK-GI-NEXT: sxtb w18, w25
-; CHECK-GI-NEXT: fmov s23, w10
-; CHECK-GI-NEXT: fmov w20, s27
+; CHECK-GI-NEXT: fmov w24, s23
+; CHECK-GI-NEXT: mov b23, v16.b[6]
+; CHECK-GI-NEXT: fmov w4, s22
+; CHECK-GI-NEXT: mov b22, v16.b[8]
+; CHECK-GI-NEXT: mov v17.h[6], w8
+; CHECK-GI-NEXT: fmov w8, s19
+; CHECK-GI-NEXT: fmov s19, w19
+; CHECK-GI-NEXT: mov v21.h[2], w9
+; CHECK-GI-NEXT: sxtb w9, w28
+; CHECK-GI-NEXT: mov v18.h[2], w10
+; CHECK-GI-NEXT: sxtb w10, w6
+; CHECK-GI-NEXT: mov b27, v16.b[9]
+; CHECK-GI-NEXT: fmov w20, s24
; CHECK-GI-NEXT: sxtb w8, w8
-; CHECK-GI-NEXT: fmov w9, s26
-; CHECK-GI-NEXT: mov b26, v17.b[12]
-; CHECK-GI-NEXT: sxtb w25, w27
-; CHECK-GI-NEXT: mov v20.h[5], w18
-; CHECK-GI-NEXT: sxtb w18, w3
-; CHECK-GI-NEXT: sxtb w3, w24
-; CHECK-GI-NEXT: mov v23.h[1], w26
-; CHECK-GI-NEXT: mov v21.h[2], w21
+; CHECK-GI-NEXT: mov b24, v16.b[3]
+; CHECK-GI-NEXT: fmov w5, s20
+; CHECK-GI-NEXT: mov v19.h[1], w10
+; CHECK-GI-NEXT: fmov w10, s23
+; CHECK-GI-NEXT: fmov s23, w7
+; CHECK-GI-NEXT: mov v21.h[3], w9
+; CHECK-GI-NEXT: sxtb w9, w11
+; CHECK-GI-NEXT: sxtb w11, w27
+; CHECK-GI-NEXT: sxtb w27, w30
+; CHECK-GI-NEXT: sxtb w5, w5
+; CHECK-GI-NEXT: fmov w7, s22
+; CHECK-GI-NEXT: sxtb w10, w10
+; CHECK-GI-NEXT: mov v23.h[1], w9
+; CHECK-GI-NEXT: fmov w9, s16
+; CHECK-GI-NEXT: mov b20, v16.b[10]
+; CHECK-GI-NEXT: fmov w22, s28
+; CHECK-GI-NEXT: fmov w25, s25
+; CHECK-GI-NEXT: sxtb w7, w7
+; CHECK-GI-NEXT: mov v21.h[4], w11
+; CHECK-GI-NEXT: fmov w11, s27
; CHECK-GI-NEXT: sxtb w9, w9
-; CHECK-GI-NEXT: fmov w28, s24
-; CHECK-GI-NEXT: mov v22.h[3], w7
-; CHECK-GI-NEXT: sxtb w7, w20
-; CHECK-GI-NEXT: mov v19.h[2], w4
-; CHECK-GI-NEXT: sxtb w4, w30
+; CHECK-GI-NEXT: mov b25, v16.b[5]
+; CHECK-GI-NEXT: fmov w29, s24
+; CHECK-GI-NEXT: fmov s22, w7
+; CHECK-GI-NEXT: mov v23.h[2], w27
+; CHECK-GI-NEXT: mov b24, v16.b[11]
+; CHECK-GI-NEXT: sxtb w11, w11
+; CHECK-GI-NEXT: fmov w27, s20
+; CHECK-GI-NEXT: fmov s20, w9
+; CHECK-GI-NEXT: fmov w26, s29
+; CHECK-GI-NEXT: mov b26, v16.b[4]
+; CHECK-GI-NEXT: mov v19.h[2], w3
+; CHECK-GI-NEXT: sxtb w3, w29
; CHECK-GI-NEXT: ldp x29, x30, [sp, #16] // 16-byte Folded Reload
-; CHECK-GI-NEXT: mov v18.h[3], w15
-; CHECK-GI-NEXT: sxtb w20, w28
+; CHECK-GI-NEXT: mov v22.h[1], w11
+; CHECK-GI-NEXT: sxtb w11, w15
+; CHECK-GI-NEXT: sxtb w15, w22
+; CHECK-GI-NEXT: sxtb w22, w23
+; CHECK-GI-NEXT: mov v20.h[1], w8
+; CHECK-GI-NEXT: fmov w6, s25
+; CHECK-GI-NEXT: mov v18.h[3], w11
+; CHECK-GI-NEXT: sxtb w11, w27
+; CHECK-GI-NEXT: mov v23.h[3], w15
; CHECK-GI-NEXT: sxtb w15, w17
-; CHECK-GI-NEXT: sxtb w17, w22
+; CHECK-GI-NEXT: sxtb w17, w21
+; CHECK-GI-NEXT: mov b25, v16.b[12]
+; CHECK-GI-NEXT: fmov w28, s24
+; CHECK-GI-NEXT: mov v21.h[5], w22
+; CHECK-GI-NEXT: mov v22.h[2], w11
+; CHECK-GI-NEXT: sxtb w11, w14
+; CHECK-GI-NEXT: sxtb w14, w26
+; CHECK-GI-NEXT: mov v20.h[2], w5
; CHECK-GI-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload
-; CHECK-GI-NEXT: mov v23.h[2], w25
-; CHECK-GI-NEXT: mov v20.h[6], w3
-; CHECK-GI-NEXT: mov v21.h[3], w7
-; CHECK-GI-NEXT: fmov w10, s26
-; CHECK-GI-NEXT: mov v22.h[4], w4
-; CHECK-GI-NEXT: mov v19.h[3], w18
-; CHECK-GI-NEXT: sxtb w18, w23
-; CHECK-GI-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload
-; CHECK-GI-NEXT: mov b27, v17.b[13]
-; CHECK-GI-NEXT: sxtb w10, w10
-; CHECK-GI-NEXT: mov v23.h[3], w20
-; CHECK-GI-NEXT: mov v18.h[4], w13
-; CHECK-GI-NEXT: sxtb w13, w6
-; CHECK-GI-NEXT: mov v20.h[7], w17
-; CHECK-GI-NEXT: mov v21.h[4], w9
-; CHECK-GI-NEXT: ldr w9, [sp, #8] // 4-byte Folded Reload
-; CHECK-GI-NEXT: mov v22.h[5], w18
-; CHECK-GI-NEXT: mov b25, v17.b[14]
-; CHECK-GI-NEXT: fmov w26, s27
-; CHECK-GI-NEXT: mov v19.h[4], w15
-; CHECK-GI-NEXT: fmov w14, s29
-; CHECK-GI-NEXT: sxtb w9, w9
-; CHECK-GI-NEXT: mov v23.h[4], w10
-; CHECK-GI-NEXT: sxtb w10, w11
-; CHECK-GI-NEXT: sxtb w11, w16
-; CHECK-GI-NEXT: mov v21.h[5], w8
-; CHECK-GI-NEXT: ldr w8, [sp, #4] // 4-byte Folded Reload
-; CHECK-GI-NEXT: sxtb w15, w26
+; CHECK-GI-NEXT: fmov w19, s26
+; CHECK-GI-NEXT: mov v18.h[4], w11
+; CHECK-GI-NEXT: sxtb w11, w28
+; CHECK-GI-NEXT: mov v23.h[4], w14
+; CHECK-GI-NEXT: sxtb w14, w25
; CHECK-GI-NEXT: ldp x26, x25, [sp, #48] // 16-byte Folded Reload
-; CHECK-GI-NEXT: sxtb w8, w8
-; CHECK-GI-NEXT: mov v18.h[5], w10
-; CHECK-GI-NEXT: sxtb w10, w19
+; CHECK-GI-NEXT: ldp x28, x27, [sp, #32] // 16-byte Folded Reload
+; CHECK-GI-NEXT: mov b26, v16.b[13]
+; CHECK-GI-NEXT: fmov w7, s25
+; CHECK-GI-NEXT: mov v19.h[3], w15
+; CHECK-GI-NEXT: sxtb w15, w18
+; CHECK-GI-NEXT: sxtb w18, w19
+; CHECK-GI-NEXT: mov v21.h[6], w17
+; CHECK-GI-NEXT: sxtb w17, w20
; CHECK-GI-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload
-; CHECK-GI-NEXT: mul v20.8h, v7.8h, v20.8h
-; CHECK-GI-NEXT: mov b7, v17.b[7]
-; CHECK-GI-NEXT: mov v22.h[6], w13
-; CHECK-GI-NEXT: sxtb w13, w5
-; CHECK-GI-NEXT: fmov w27, s25
-; CHECK-GI-NEXT: mov v19.h[5], w11
+; CHECK-GI-NEXT: mov v22.h[3], w11
; CHECK-GI-NEXT: sxtb w11, w2
-; CHECK-GI-NEXT: mov b17, v17.b[15]
-; CHECK-GI-NEXT: mov v18.h[6], w8
-; CHECK-GI-NEXT: mov v16.h[7], w9
-; CHECK-GI-NEXT: sxtb w9, w14
-; CHECK-GI-NEXT: mov v23.h[5], w15
-; CHECK-GI-NEXT: mov v21.h[6], w10
-; CHECK-GI-NEXT: sxtb w14, w27
-; CHECK-GI-NEXT: ldp x28, x27, [sp, #32] // 16-byte Folded Reload
-; CHECK-GI-NEXT: fmov w8, s7
-; CHECK-GI-NEXT: mov v22.h[7], w13
-; CHECK-GI-NEXT: fmov w12, s30
-; CHECK-GI-NEXT: mov v19.h[6], w9
-; CHECK-GI-NEXT: fmov w9, s17
-; CHECK-GI-NEXT: smov w10, v20.h[0]
-; CHECK-GI-NEXT: mov v23.h[6], w14
-; CHECK-GI-NEXT: mov v18.h[7], w11
-; CHECK-GI-NEXT: smov w13, v20.h[1]
+; CHECK-GI-NEXT: mov v20.h[3], w3
+; CHECK-GI-NEXT: mov v23.h[5], w14
+; CHECK-GI-NEXT: sxtb w14, w24
+; CHECK-GI-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload
+; CHECK-GI-NEXT: mov v18.h[5], w11
+; CHECK-GI-NEXT: sxtb w11, w7
+; CHECK-GI-NEXT: fmov w8, s26
+; CHECK-GI-NEXT: mov v19.h[4], w15
+; CHECK-GI-NEXT: ldr w15, [sp] // 4-byte Folded Reload
+; CHECK-GI-NEXT: mov v21.h[7], w17
+; CHECK-GI-NEXT: sxtb w17, w6
+; CHECK-GI-NEXT: mov v22.h[4], w11
+; CHECK-GI-NEXT: ldr w11, [sp, #8] // 4-byte Folded Reload
; CHECK-GI-NEXT: sxtb w8, w8
-; CHECK-GI-NEXT: sxtb w12, w12
-; CHECK-GI-NEXT: smov w11, v20.h[4]
+; CHECK-GI-NEXT: sxtb w15, w15
+; CHECK-GI-NEXT: fmov w13, s30
+; CHECK-GI-NEXT: sxtb w11, w11
+; CHECK-GI-NEXT: mov v20.h[4], w18
+; CHECK-GI-NEXT: mov v23.h[6], w14
+; CHECK-GI-NEXT: mov v19.h[5], w16
+; CHECK-GI-NEXT: mov b27, v16.b[14]
+; CHECK-GI-NEXT: mul v24.8h, v7.8h, v21.8h
+; CHECK-GI-NEXT: mov v22.h[5], w8
+; CHECK-GI-NEXT: sxtb w8, w4
+; CHECK-GI-NEXT: mov b7, v16.b[7]
+; CHECK-GI-NEXT: mov b16, v16.b[15]
+; CHECK-GI-NEXT: fmov w12, s31
+; CHECK-GI-NEXT: mov v17.h[7], w11
+; CHECK-GI-NEXT: sxtb w11, w13
+; CHECK-GI-NEXT: ldr w13, [sp, #4] // 4-byte Folded Reload
+; CHECK-GI-NEXT: mov v20.h[5], w17
+; CHECK-GI-NEXT: mov v23.h[7], w8
+; CHECK-GI-NEXT: fmov w9, s27
+; CHECK-GI-NEXT: mov v18.h[6], w15
+; CHECK-GI-NEXT: sxtb w8, w12
+; CHECK-GI-NEXT: sxtb w13, w13
+; CHECK-GI-NEXT: mov v19.h[6], w11
+; CHECK-GI-NEXT: fmov w12, s16
+; CHECK-GI-NEXT: fmov w11, s7
+; CHECK-GI-NEXT: movi d4, #0000000000000000
; CHECK-GI-NEXT: sxtb w9, w9
-; CHECK-GI-NEXT: mul v22.8h, v16.8h, v22.8h
-; CHECK-GI-NEXT: smov w14, v20.h[3]
-; CHECK-GI-NEXT: mov v21.h[7], w8
-; CHECK-GI-NEXT: ldrsb w8, [x0, #32]
-; CHECK-GI-NEXT: mov v19.h[7], w12
-; CHECK-GI-NEXT: mov v23.h[7], w9
-; CHECK-GI-NEXT: ldrsb w9, [x1, #32]
+; CHECK-GI-NEXT: mov v20.h[6], w10
+; CHECK-GI-NEXT: smov w10, v24.h[0]
+; CHECK-GI-NEXT: mul v21.8h, v17.8h, v23.8h
+; CHECK-GI-NEXT: mov v18.h[7], w13
+; CHECK-GI-NEXT: mov v5.s[1], wzr
+; CHECK-GI-NEXT: sxtb w11, w11
+; CHECK-GI-NEXT: mov v19.h[7], w8
+; CHECK-GI-NEXT: sxtb w8, w12
+; CHECK-GI-NEXT: smov w12, v24.h[4]
+; CHECK-GI-NEXT: mov v22.h[6], w9
+; CHECK-GI-NEXT: smov w9, v24.h[1]
+; CHECK-GI-NEXT: mov v20.h[7], w11
+; CHECK-GI-NEXT: smov w11, v24.h[5]
; CHECK-GI-NEXT: fmov s7, w10
-; CHECK-GI-NEXT: smov w10, v20.h[2]
-; CHECK-GI-NEXT: smov w12, v20.h[5]
-; CHECK-GI-NEXT: fmov s16, w11
-; CHECK-GI-NEXT: mul w9, w9, w8
-; CHECK-GI-NEXT: smov w15, v22.h[4]
-; CHECK-GI-NEXT: smov w17, v22.h[5]
-; CHECK-GI-NEXT: mul v24.8h, v18.8h, v21.8h
-; CHECK-GI-NEXT: mov v7.s[1], w13
-; CHECK-GI-NEXT: smov w13, v22.h[0]
-; CHECK-GI-NEXT: mul v18.8h, v19.8h, v23.8h
-; CHECK-GI-NEXT: smov w16, v22.h[1]
-; CHECK-GI-NEXT: smov w8, v20.h[7]
-; CHECK-GI-NEXT: sxth w9, w9
-; CHECK-GI-NEXT: mov v16.s[1], w12
-; CHECK-GI-NEXT: movi d0, #0000000000000000
-; CHECK-GI-NEXT: fmov s19, w15
-; CHECK-GI-NEXT: smov w15, v22.h[6]
-; CHECK-GI-NEXT: mov v1.s[1], wzr
-; CHECK-GI-NEXT: smov w11, v24.h[0]
-; CHECK-GI-NEXT: mov v7.s[2], w10
-; CHECK-GI-NEXT: smov w10, v20.h[6]
-; CHECK-GI-NEXT: smov w12, v24.h[1]
-; CHECK-GI-NEXT: smov w0, v18.h[4]
+; CHECK-GI-NEXT: ldrsb w10, [x1, #32]
+; CHECK-GI-NEXT: smov w13, v21.h[0]
+; CHECK-GI-NEXT: smov w14, v21.h[1]
+; CHECK-GI-NEXT: smov w15, v21.h[4]
+; CHECK-GI-NEXT: mov v6.s[1], wzr
+; CHECK-GI-NEXT: mov v0.s[1], wzr
+; CHECK-GI-NEXT: fmov s16, w12
+; CHECK-GI-NEXT: mov v22.h[7], w8
+; CHECK-GI-NEXT: smov w12, v24.h[6]
+; CHECK-GI-NEXT: smov w8, v24.h[2]
+; CHECK-GI-NEXT: mov v7.s[1], w9
+; CHECK-GI-NEXT: ldrsb w9, [x0, #32]
; CHECK-GI-NEXT: fmov s17, w13
-; CHECK-GI-NEXT: mov v19.s[1], w17
-; CHECK-GI-NEXT: smov w17, v18.h[0]
-; CHECK-GI-NEXT: smov w18, v18.h[1]
-; CHECK-GI-NEXT: smov w13, v22.h[2]
+; CHECK-GI-NEXT: mul v23.8h, v18.8h, v20.8h
+; CHECK-GI-NEXT: smov w13, v24.h[7]
+; CHECK-GI-NEXT: mov v16.s[1], w11
+; CHECK-GI-NEXT: smov w11, v21.h[5]
+; CHECK-GI-NEXT: fmov s18, w15
+; CHECK-GI-NEXT: mul v19.8h, v19.8h, v22.8h
+; CHECK-GI-NEXT: smov w15, v21.h[6]
+; CHECK-GI-NEXT: mov v1.s[1], wzr
+; CHECK-GI-NEXT: mov v17.s[1], w14
+; CHECK-GI-NEXT: smov w14, v21.h[2]
+; CHECK-GI-NEXT: mov v7.s[2], w8
+; CHECK-GI-NEXT: mul w8, w10, w9
+; CHECK-GI-NEXT: smov w9, v23.h[0]
+; CHECK-GI-NEXT: smov w10, v23.h[1]
+; CHECK-GI-NEXT: mov v16.s[2], w12
+; CHECK-GI-NEXT: smov w12, v21.h[3]
+; CHECK-GI-NEXT: mov v18.s[1], w11
+; CHECK-GI-NEXT: smov w11, v23.h[4]
; CHECK-GI-NEXT: mov v3.s[1], wzr
; CHECK-GI-NEXT: mov v2.s[1], wzr
-; CHECK-GI-NEXT: fmov s20, w11
-; CHECK-GI-NEXT: smov w11, v24.h[4]
-; CHECK-GI-NEXT: mov v7.s[3], w14
-; CHECK-GI-NEXT: smov w14, v24.h[5]
-; CHECK-GI-NEXT: mov v17.s[1], w16
-; CHECK-GI-NEXT: smov w16, v24.h[2]
-; CHECK-GI-NEXT: mov v19.s[2], w15
-; CHECK-GI-NEXT: smov w15, v18.h[5]
-; CHECK-GI-NEXT: fmov s23, w0
-; CHECK-GI-NEXT: mov v20.s[1], w12
-; CHECK-GI-NEXT: mov v16.s[2], w10
-; CHECK-GI-NEXT: smov w10, v22.h[3]
-; CHECK-GI-NEXT: fmov s21, w11
-; CHECK-GI-NEXT: smov w11, v22.h[7]
-; CHECK-GI-NEXT: fmov s22, w17
-; CHECK-GI-NEXT: mov v5.s[1], wzr
+; CHECK-GI-NEXT: mov v17.s[2], w14
+; CHECK-GI-NEXT: smov w14, v23.h[5]
; CHECK-GI-NEXT: mov v4.s[1], wzr
-; CHECK-GI-NEXT: mov v6.s[1], wzr
-; CHECK-GI-NEXT: mov v23.s[1], w15
-; CHECK-GI-NEXT: smov w15, v18.h[6]
-; CHECK-GI-NEXT: mov v0.s[1], wzr
+; CHECK-GI-NEXT: fmov s20, w9
+; CHECK-GI-NEXT: smov w9, v19.h[1]
+; CHECK-GI-NEXT: mov v5.s[2], wzr
+; CHECK-GI-NEXT: mov v16.s[3], w13
+; CHECK-GI-NEXT: smov w13, v19.h[0]
+; CHECK-GI-NEXT: mov v18.s[2], w15
+; CHECK-GI-NEXT: smov w15, v21.h[7]
+; CHECK-GI-NEXT: fmov s21, w11
+; CHECK-GI-NEXT: smov w11, v23.h[2]
+; CHECK-GI-NEXT: mov v17.s[3], w12
+; CHECK-GI-NEXT: smov w12, v19.h[4]
+; CHECK-GI-NEXT: mov v20.s[1], w10
+; CHECK-GI-NEXT: smov w10, v23.h[3]
+; CHECK-GI-NEXT: mov v6.s[2], wzr
+; CHECK-GI-NEXT: smov w16, v24.h[3]
+; CHECK-GI-NEXT: fmov s22, w13
+; CHECK-GI-NEXT: smov w13, v19.h[5]
; CHECK-GI-NEXT: mov v21.s[1], w14
-; CHECK-GI-NEXT: smov w14, v24.h[6]
-; CHECK-GI-NEXT: mov v20.s[2], w16
-; CHECK-GI-NEXT: mov v22.s[1], w18
-; CHECK-GI-NEXT: smov w16, v18.h[2]
+; CHECK-GI-NEXT: smov w14, v23.h[6]
+; CHECK-GI-NEXT: mov v18.s[3], w15
+; CHECK-GI-NEXT: smov w15, v19.h[2]
+; CHECK-GI-NEXT: mov v20.s[2], w11
+; CHECK-GI-NEXT: smov w11, v19.h[6]
+; CHECK-GI-NEXT: mov v0.s[2], wzr
+; CHECK-GI-NEXT: mov v22.s[1], w9
+; CHECK-GI-NEXT: smov w9, v23.h[7]
+; CHECK-GI-NEXT: fmov s23, w12
+; CHECK-GI-NEXT: smov w12, v19.h[3]
; CHECK-GI-NEXT: mov v1.s[2], wzr
; CHECK-GI-NEXT: mov v3.s[2], wzr
+; CHECK-GI-NEXT: mov v21.s[2], w14
; CHECK-GI-NEXT: mov v2.s[2], wzr
-; CHECK-GI-NEXT: mov v5.s[2], wzr
; CHECK-GI-NEXT: mov v4.s[2], wzr
-; CHECK-GI-NEXT: mov v6.s[2], wzr
-; CHECK-GI-NEXT: mov v23.s[2], w15
-; CHECK-GI-NEXT: mov v21.s[2], w14
-; CHECK-GI-NEXT: smov w14, v18.h[3]
-; CHECK-GI-NEXT: smov w15, v18.h[7]
-; CHECK-GI-NEXT: fmov s18, w9
-; CHECK-GI-NEXT: ldr w9, [sp, #12] // 4-byte Folded Reload
-; CHECK-GI-NEXT: mov v17.s[2], w13
-; CHECK-GI-NEXT: smov w12, v24.h[3]
-; CHECK-GI-NEXT: smov w13, v24.h[7]
-; CHECK-GI-NEXT: mov v22.s[2], w16
-; CHECK-GI-NEXT: mov v0.s[2], wzr
+; CHECK-GI-NEXT: mov v23.s[1], w13
+; CHECK-GI-NEXT: mov v5.s[3], wzr
+; CHECK-GI-NEXT: mov v6.s[3], wzr
+; CHECK-GI-NEXT: mov v22.s[2], w15
+; CHECK-GI-NEXT: mov v7.s[3], w16
+; CHECK-GI-NEXT: mov v20.s[3], w10
+; CHECK-GI-NEXT: mov v0.s[3], wzr
; CHECK-GI-NEXT: mov v1.s[3], wzr
; CHECK-GI-NEXT: mov v3.s[3], wzr
+; CHECK-GI-NEXT: mov v21.s[3], w9
+; CHECK-GI-NEXT: ldr w9, [sp, #12] // 4-byte Folded Reload
; CHECK-GI-NEXT: mov v2.s[3], wzr
-; CHECK-GI-NEXT: mov v5.s[3], wzr
+; CHECK-GI-NEXT: mov v23.s[2], w11
+; CHECK-GI-NEXT: smov w11, v19.h[7]
+; CHECK-GI-NEXT: fmov s19, w8
+; CHECK-GI-NEXT: mov v22.s[3], w12
; CHECK-GI-NEXT: mov v4.s[3], wzr
-; CHECK-GI-NEXT: mov v6.s[3], wzr
-; CHECK-GI-NEXT: mov v18.s[1], wzr
-; CHECK-GI-NEXT: mov v16.s[3], w8
-; CHECK-GI-NEXT: mov v17.s[3], w10
-; CHECK-GI-NEXT: mov v19.s[3], w11
-; CHECK-GI-NEXT: mov v20.s[3], w12
-; CHECK-GI-NEXT: mov v21.s[3], w13
-; CHECK-GI-NEXT: mov v22.s[3], w14
-; CHECK-GI-NEXT: mov v23.s[3], w15
-; CHECK-GI-NEXT: mov v0.s[3], wzr
+; CHECK-GI-NEXT: add v5.4s, v5.4s, v6.4s
+; CHECK-GI-NEXT: add v6.4s, v7.4s, v16.4s
+; CHECK-GI-NEXT: add v7.4s, v17.4s, v18.4s
; CHECK-GI-NEXT: add v1.4s, v1.4s, v3.4s
-; CHECK-GI-NEXT: add v2.4s, v2.4s, v5.4s
-; CHECK-GI-NEXT: add v3.4s, v4.4s, v6.4s
-; CHECK-GI-NEXT: mov v18.s[2], wzr
-; CHECK-GI-NEXT: add v4.4s, v7.4s, v16.4s
-; CHECK-GI-NEXT: add v5.4s, v17.4s, v19.4s
-; CHECK-GI-NEXT: add v6.4s, v20.4s, v21.4s
-; CHECK-GI-NEXT: add v7.4s, v22.4s, v23.4s
-; CHECK-GI-NEXT: add v0.4s, v0.4s, v1.4s
-; CHECK-GI-NEXT: add v1.4s, v2.4s, v3.4s
-; CHECK-GI-NEXT: mov v18.s[3], wzr
-; CHECK-GI-NEXT: add v2.4s, v4.4s, v5.4s
+; CHECK-GI-NEXT: mov v19.s[1], wzr
+; CHECK-GI-NEXT: add v16.4s, v20.4s, v21.4s
+; CHECK-GI-NEXT: mov v23.s[3], w11
+; CHECK-GI-NEXT: add v0.4s, v0.4s, v5.4s
+; CHECK-GI-NEXT: add v2.4s, v2.4s, v4.4s
; CHECK-GI-NEXT: add v3.4s, v6.4s, v7.4s
+; CHECK-GI-NEXT: mov v19.s[2], wzr
+; CHECK-GI-NEXT: add v17.4s, v22.4s, v23.4s
+; CHECK-GI-NEXT: add v1.4s, v1.4s, v2.4s
+; CHECK-GI-NEXT: mov v19.s[3], wzr
+; CHECK-GI-NEXT: add v4.4s, v16.4s, v17.4s
+; CHECK-GI-NEXT: add v2.4s, v3.4s, v4.4s
+; CHECK-GI-NEXT: add v0.4s, v19.4s, v0.4s
; CHECK-GI-NEXT: add v0.4s, v0.4s, v1.4s
-; CHECK-GI-NEXT: add v1.4s, v2.4s, v3.4s
-; CHECK-GI-NEXT: add v0.4s, v18.4s, v0.4s
-; CHECK-GI-NEXT: add v0.4s, v1.4s, v0.4s
+; CHECK-GI-NEXT: add v0.4s, v2.4s, v0.4s
; CHECK-GI-NEXT: addv s0, v0.4s
; CHECK-GI-NEXT: fmov w8, s0
; CHECK-GI-NEXT: add w0, w8, w9
@@ -5844,12 +5841,13 @@ define i32 @test_sdot_v33i8_double(<33 x i8> %a, <33 x i8> %b, <33 x i8> %c, <33
;
; CHECK-GI-LABEL: test_sdot_v33i8_double:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
-; CHECK-GI-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
-; CHECK-GI-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
-; CHECK-GI-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
-; CHECK-GI-NEXT: str x29, [sp, #64] // 8-byte Folded Spill
-; CHECK-GI-NEXT: .cfi_def_cfa_offset 80
+; CHECK-GI-NEXT: sub sp, sp, #96
+; CHECK-GI-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
+; CHECK-GI-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
+; CHECK-GI-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
+; CHECK-GI-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
+; CHECK-GI-NEXT: str x29, [sp, #80] // 8-byte Folded Spill
+; CHECK-GI-NEXT: .cfi_def_cfa_offset 96
; CHECK-GI-NEXT: .cfi_offset w29, -16
; CHECK-GI-NEXT: .cfi_offset b8, -24
; CHECK-GI-NEXT: .cfi_offset b9, -32
@@ -5859,700 +5857,704 @@ define i32 @test_sdot_v33i8_double(<33 x i8> %a, <33 x i8> %b, <33 x i8> %c, <33
; CHECK-GI-NEXT: .cfi_offset b13, -64
; CHECK-GI-NEXT: .cfi_offset b14, -72
; CHECK-GI-NEXT: .cfi_offset b15, -80
-; CHECK-GI-NEXT: lsl w8, w0, #8
-; CHECK-GI-NEXT: ldr w10, [sp, #80]
+; CHECK-GI-NEXT: lsl w10, w0, #8
+; CHECK-GI-NEXT: ldr w9, [sp, #96]
+; CHECK-GI-NEXT: ldr w8, [sp, #104]
; CHECK-GI-NEXT: lsl w11, w1, #8
-; CHECK-GI-NEXT: ldr w9, [sp, #88]
-; CHECK-GI-NEXT: ldr w13, [sp, #128]
-; CHECK-GI-NEXT: ldr w14, [sp, #136]
-; CHECK-GI-NEXT: sbfx w12, w8, #8, #8
-; CHECK-GI-NEXT: lsl w10, w10, #8
-; CHECK-GI-NEXT: sbfx w8, w11, #8, #8
+; CHECK-GI-NEXT: ldr w13, [sp, #184]
+; CHECK-GI-NEXT: ldr w16, [sp, #240]
+; CHECK-GI-NEXT: sbfx w10, w10, #8, #8
; CHECK-GI-NEXT: lsl w9, w9, #8
-; CHECK-GI-NEXT: lsl w11, w2, #8
+; CHECK-GI-NEXT: lsl w12, w8, #8
+; CHECK-GI-NEXT: sbfx w11, w11, #8, #8
; CHECK-GI-NEXT: lsl w13, w13, #8
-; CHECK-GI-NEXT: fmov s22, w12
-; CHECK-GI-NEXT: sbfx w10, w10, #8, #8
-; CHECK-GI-NEXT: ldr w12, [sp, #152]
-; CHECK-GI-NEXT: sbfx w9, w9, #8, #8
-; CHECK-GI-NEXT: lsl w16, w7, #8
-; CHECK-GI-NEXT: lsl w14, w14, #8
-; CHECK-GI-NEXT: fmov s23, w10
-; CHECK-GI-NEXT: sbfx w10, w11, #8, #8
-; CHECK-GI-NEXT: lsl w11, w3, #8
-; CHECK-GI-NEXT: mov v22.h[1], w8
-; CHECK-GI-NEXT: ldr w8, [sp, #96]
-; CHECK-GI-NEXT: lsl w12, w12, #8
+; CHECK-GI-NEXT: ldr w15, [sp, #200]
+; CHECK-GI-NEXT: fmov s22, w10
+; CHECK-GI-NEXT: sbfx w8, w9, #8, #8
+; CHECK-GI-NEXT: ldr w9, [sp, #112]
+; CHECK-GI-NEXT: sbfx w10, w12, #8, #8
+; CHECK-GI-NEXT: lsl w12, w5, #8
; CHECK-GI-NEXT: sbfx w13, w13, #8, #8
-; CHECK-GI-NEXT: sbfx w16, w16, #8, #8
-; CHECK-GI-NEXT: ldr w15, [sp, #176]
-; CHECK-GI-NEXT: lsl w8, w8, #8
-; CHECK-GI-NEXT: mov v23.h[1], w9
-; CHECK-GI-NEXT: ldr w9, [sp, #104]
-; CHECK-GI-NEXT: sbfx w12, w12, #8, #8
-; CHECK-GI-NEXT: lsl w15, w15, #8
-; CHECK-GI-NEXT: ldr w17, [sp, #224]
-; CHECK-GI-NEXT: mov v22.h[2], w10
-; CHECK-GI-NEXT: sbfx w8, w8, #8, #8
-; CHECK-GI-NEXT: sbfx w10, w11, #8, #8
+; CHECK-GI-NEXT: fmov s23, w8
+; CHECK-GI-NEXT: lsl w8, w2, #8
; CHECK-GI-NEXT: lsl w9, w9, #8
+; CHECK-GI-NEXT: mov v22.h[1], w11
; CHECK-GI-NEXT: lsl w11, w4, #8
-; CHECK-GI-NEXT: sbfx w15, w15, #8, #8
-; CHECK-GI-NEXT: mov v23.h[2], w8
-; CHECK-GI-NEXT: ldr w8, [sp, #112]
-; CHECK-GI-NEXT: movi d19, #0000000000000000
+; CHECK-GI-NEXT: sbfx w12, w12, #8, #8
+; CHECK-GI-NEXT: sbfx w8, w8, #8, #8
; CHECK-GI-NEXT: sbfx w9, w9, #8, #8
+; CHECK-GI-NEXT: movi d0, #0000000000000000
+; CHECK-GI-NEXT: mov v23.h[1], w10
+; CHECK-GI-NEXT: ldr w10, [sp, #120]
; CHECK-GI-NEXT: sbfx w11, w11, #8, #8
+; CHECK-GI-NEXT: movi d19, #0000000000000000
; CHECK-GI-NEXT: movi d21, #0000000000000000
-; CHECK-GI-NEXT: mov v22.h[3], w10
-; CHECK-GI-NEXT: ldr w10, [sp, #144]
-; CHECK-GI-NEXT: lsl w8, w8, #8
; CHECK-GI-NEXT: movi d16, #0000000000000000
+; CHECK-GI-NEXT: mov v22.h[2], w8
+; CHECK-GI-NEXT: lsl w8, w3, #8
+; CHECK-GI-NEXT: lsl w10, w10, #8
+; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-GI-NEXT: movi d18, #0000000000000000
; CHECK-GI-NEXT: movi d17, #0000000000000000
-; CHECK-GI-NEXT: lsl w10, w10, #8
-; CHECK-GI-NEXT: mov v23.h[3], w9
; CHECK-GI-NEXT: sbfx w8, w8, #8, #8
-; CHECK-GI-NEXT: ldr w9, [sp, #120]
+; CHECK-GI-NEXT: mov v23.h[2], w9
+; CHECK-GI-NEXT: sbfx w9, w10, #8, #8
+; CHECK-GI-NEXT: ldr w10, [sp, #128]
; CHECK-GI-NEXT: movi d20, #0000000000000000
; CHECK-GI-NEXT: movi d6, #0000000000000000
-; CHECK-GI-NEXT: sbfx w10, w10, #8, #8
-; CHECK-GI-NEXT: mov v22.h[4], w11
-; CHECK-GI-NEXT: lsl w11, w5, #8
-; CHECK-GI-NEXT: lsl w9, w9, #8
+; CHECK-GI-NEXT: mov v22.h[3], w8
+; CHECK-GI-NEXT: ldr w8, [sp, #160]
; CHECK-GI-NEXT: movi d7, #0000000000000000
+; CHECK-GI-NEXT: lsl w10, w10, #8
; CHECK-GI-NEXT: movi d2, #0000000000000000
-; CHECK-GI-NEXT: fmov s24, w10
-; CHECK-GI-NEXT: mov v23.h[4], w8
-; CHECK-GI-NEXT: ldr w8, [sp, #160]
-; CHECK-GI-NEXT: sbfx w11, w11, #8, #8
-; CHECK-GI-NEXT: ldr w10, [sp, #168]
-; CHECK-GI-NEXT: sbfx w9, w9, #8, #8
-; CHECK-GI-NEXT: lsl w8, w8, #8
; CHECK-GI-NEXT: movi d4, #0000000000000000
+; CHECK-GI-NEXT: lsl w8, w8, #8
+; CHECK-GI-NEXT: mov v23.h[3], w9
+; CHECK-GI-NEXT: ldr w9, [sp, #168]
+; CHECK-GI-NEXT: sbfx w10, w10, #8, #8
; CHECK-GI-NEXT: movi d3, #0000000000000000
-; CHECK-GI-NEXT: mov v24.h[1], w12
-; CHECK-GI-NEXT: lsl w12, w6, #8
-; CHECK-GI-NEXT: mov v22.h[5], w11
+; CHECK-GI-NEXT: movi d5, #0000000000000000
; CHECK-GI-NEXT: sbfx w8, w8, #8, #8
+; CHECK-GI-NEXT: lsl w9, w9, #8
+; CHECK-GI-NEXT: mov v22.h[4], w11
+; CHECK-GI-NEXT: ldr w11, [sp, #136]
+; CHECK-GI-NEXT: movi d1, #0000000000000000
+; CHECK-GI-NEXT: mov v19.s[1], wzr
+; CHECK-GI-NEXT: fmov s24, w8
+; CHECK-GI-NEXT: mov v23.h[4], w10
+; CHECK-GI-NEXT: ldr w10, [sp, #176]
+; CHECK-GI-NEXT: sbfx w9, w9, #8, #8
+; CHECK-GI-NEXT: lsl w11, w11, #8
+; CHECK-GI-NEXT: ldr w8, [sp, #144]
; CHECK-GI-NEXT: lsl w10, w10, #8
-; CHECK-GI-NEXT: mov v23.h[5], w9
-; CHECK-GI-NEXT: sbfx w12, w12, #8, #8
-; CHECK-GI-NEXT: ldr w11, [sp, #184]
-; CHECK-GI-NEXT: ldr w9, [sp, #192]
+; CHECK-GI-NEXT: mov v22.h[5], w12
+; CHECK-GI-NEXT: ldr w12, [sp, #152]
+; CHECK-GI-NEXT: mov v24.h[1], w9
+; CHECK-GI-NEXT: lsl w9, w6, #8
+; CHECK-GI-NEXT: sbfx w11, w11, #8, #8
; CHECK-GI-NEXT: sbfx w10, w10, #8, #8
-; CHECK-GI-NEXT: movi d5, #0000000000000000
-; CHECK-GI-NEXT: movi d1, #0000000000000000
-; CHECK-GI-NEXT: mov v24.h[2], w8
-; CHECK-GI-NEXT: mov v22.h[6], w12
-; CHECK-GI-NEXT: ldr w12, [sp, #208]
-; CHECK-GI-NEXT: mov v23.h[6], w13
-; CHECK-GI-NEXT: ldr w13, [sp, #216]
-; CHECK-GI-NEXT: lsl w9, w9, #8
+; CHECK-GI-NEXT: lsl w8, w8, #8
; CHECK-GI-NEXT: lsl w12, w12, #8
-; CHECK-GI-NEXT: ldr w8, [sp, #200]
-; CHECK-GI-NEXT: movi d0, #0000000000000000
-; CHECK-GI-NEXT: lsl w13, w13, #8
; CHECK-GI-NEXT: sbfx w9, w9, #8, #8
-; CHECK-GI-NEXT: mov v19.s[1], wzr
-; CHECK-GI-NEXT: mov v24.h[3], w10
-; CHECK-GI-NEXT: sbfx w10, w14, #8, #8
-; CHECK-GI-NEXT: ldr w14, [sp, #280]
-; CHECK-GI-NEXT: mov v22.h[7], w16
-; CHECK-GI-NEXT: ldr w16, [sp, #288]
+; CHECK-GI-NEXT: mov v23.h[5], w11
+; CHECK-GI-NEXT: ldr w11, [sp, #192]
+; CHECK-GI-NEXT: sbfx w8, w8, #8, #8
; CHECK-GI-NEXT: sbfx w12, w12, #8, #8
-; CHECK-GI-NEXT: lsl w14, w14, #8
-; CHECK-GI-NEXT: sbfx w13, w13, #8, #8
-; CHECK-GI-NEXT: mov v23.h[7], w10
-; CHECK-GI-NEXT: lsl w18, w16, #8
-; CHECK-GI-NEXT: fmov s27, w12
-; CHECK-GI-NEXT: ldr w10, [sp, #232]
-; CHECK-GI-NEXT: sbfx w16, w14, #8, #8
-; CHECK-GI-NEXT: mov v24.h[4], w15
-; CHECK-GI-NEXT: lsl w15, w11, #8
-; CHECK-GI-NEXT: sbfx w14, w18, #8, #8
-; CHECK-GI-NEXT: ldr w11, [sp, #296]
-; CHECK-GI-NEXT: lsl w10, w10, #8
-; CHECK-GI-NEXT: fmov s25, w16
-; CHECK-GI-NEXT: ldr w16, [sp, #344]
-; CHECK-GI-NEXT: mov v27.h[1], w13
-; CHECK-GI-NEXT: lsl w13, w17, #8
+; CHECK-GI-NEXT: mov v21.s[1], wzr
+; CHECK-GI-NEXT: mov v24.h[2], w10
+; CHECK-GI-NEXT: lsl w10, w7, #8
+; CHECK-GI-NEXT: mov v22.h[6], w9
; CHECK-GI-NEXT: lsl w11, w11, #8
-; CHECK-GI-NEXT: sbfx w15, w15, #8, #8
-; CHECK-GI-NEXT: lsl w16, w16, #8
-; CHECK-GI-NEXT: ldr w12, [sp, #240]
-; CHECK-GI-NEXT: sbfx w17, w10, #8, #8
-; CHECK-GI-NEXT: mov v25.h[1], w14
-; CHECK-GI-NEXT: ldr w14, [sp, #352]
-; CHECK-GI-NEXT: sbfx w13, w13, #8, #8
-; CHECK-GI-NEXT: sbfx w16, w16, #8, #8
+; CHECK-GI-NEXT: ldr w9, [sp, #208]
+; CHECK-GI-NEXT: mov v16.s[1], wzr
+; CHECK-GI-NEXT: sbfx w10, w10, #8, #8
+; CHECK-GI-NEXT: mov v23.h[6], w8
+; CHECK-GI-NEXT: ldr w8, [sp, #224]
; CHECK-GI-NEXT: sbfx w11, w11, #8, #8
-; CHECK-GI-NEXT: mov v24.h[5], w15
-; CHECK-GI-NEXT: mov v27.h[2], w13
-; CHECK-GI-NEXT: lsl w13, w14, #8
-; CHECK-GI-NEXT: ldr w14, [sp, #304]
-; CHECK-GI-NEXT: fmov s26, w16
+; CHECK-GI-NEXT: lsl w9, w9, #8
+; CHECK-GI-NEXT: mov v18.s[1], wzr
+; CHECK-GI-NEXT: mov v22.h[7], w10
+; CHECK-GI-NEXT: ldr w10, [sp, #296]
+; CHECK-GI-NEXT: lsl w8, w8, #8
+; CHECK-GI-NEXT: mov v24.h[3], w13
+; CHECK-GI-NEXT: ldr w13, [sp, #232]
+; CHECK-GI-NEXT: sbfx w9, w9, #8, #8
+; CHECK-GI-NEXT: sbfx w14, w8, #8, #8
+; CHECK-GI-NEXT: lsl w10, w10, #8
+; CHECK-GI-NEXT: mov v23.h[7], w12
+; CHECK-GI-NEXT: ldr w12, [sp, #304]
+; CHECK-GI-NEXT: lsl w13, w13, #8
+; CHECK-GI-NEXT: ldr w8, [sp, #216]
+; CHECK-GI-NEXT: fmov s26, w14
+; CHECK-GI-NEXT: sbfx w14, w10, #8, #8
+; CHECK-GI-NEXT: ldr w10, [sp, #312]
+; CHECK-GI-NEXT: mov v24.h[4], w11
+; CHECK-GI-NEXT: sbfx w11, w13, #8, #8
; CHECK-GI-NEXT: lsl w12, w12, #8
-; CHECK-GI-NEXT: ldr w15, [sp, #248]
-; CHECK-GI-NEXT: mov v25.h[2], w11
-; CHECK-GI-NEXT: ldr w11, [sp, #360]
-; CHECK-GI-NEXT: sbfx w13, w13, #8, #8
-; CHECK-GI-NEXT: lsl w14, w14, #8
+; CHECK-GI-NEXT: fmov s25, w14
+; CHECK-GI-NEXT: ldr w14, [sp, #360]
+; CHECK-GI-NEXT: lsl w10, w10, #8
+; CHECK-GI-NEXT: mov v26.h[1], w11
; CHECK-GI-NEXT: sbfx w12, w12, #8, #8
-; CHECK-GI-NEXT: mov v24.h[6], w9
-; CHECK-GI-NEXT: lsl w16, w11, #8
-; CHECK-GI-NEXT: mov v26.h[1], w13
-; CHECK-GI-NEXT: mov v27.h[3], w17
-; CHECK-GI-NEXT: sbfx w13, w14, #8, #8
-; CHECK-GI-NEXT: ldr w14, [sp, #312]
-; CHECK-GI-NEXT: ldr w17, [sp, #328]
-; CHECK-GI-NEXT: sbfx w16, w16, #8, #8
-; CHECK-GI-NEXT: ldr w10, [sp, #256]
-; CHECK-GI-NEXT: ldr w11, [sp, #264]
-; CHECK-GI-NEXT: mov v25.h[3], w13
-; CHECK-GI-NEXT: ldr w13, [sp, #368]
+; CHECK-GI-NEXT: lsl w11, w16, #8
; CHECK-GI-NEXT: lsl w14, w14, #8
-; CHECK-GI-NEXT: mov v26.h[2], w16
-; CHECK-GI-NEXT: ldr w16, [sp, #320]
-; CHECK-GI-NEXT: mov v27.h[4], w12
+; CHECK-GI-NEXT: lsl w13, w15, #8
+; CHECK-GI-NEXT: sbfx w10, w10, #8, #8
+; CHECK-GI-NEXT: mov v25.h[1], w12
+; CHECK-GI-NEXT: ldr w12, [sp, #368]
+; CHECK-GI-NEXT: sbfx w11, w11, #8, #8
+; CHECK-GI-NEXT: sbfx w14, w14, #8, #8
+; CHECK-GI-NEXT: sbfx w13, w13, #8, #8
+; CHECK-GI-NEXT: ldr w15, [sp, #432]
+; CHECK-GI-NEXT: mov v26.h[2], w11
+; CHECK-GI-NEXT: lsl w11, w12, #8
+; CHECK-GI-NEXT: ldr w12, [sp, #320]
+; CHECK-GI-NEXT: fmov s27, w14
+; CHECK-GI-NEXT: mov v24.h[5], w13
+; CHECK-GI-NEXT: ldr w13, [sp, #248]
+; CHECK-GI-NEXT: mov v25.h[2], w10
+; CHECK-GI-NEXT: ldr w10, [sp, #376]
+; CHECK-GI-NEXT: sbfx w11, w11, #8, #8
; CHECK-GI-NEXT: lsl w13, w13, #8
-; CHECK-GI-NEXT: sbfx w9, w14, #8, #8
-; CHECK-GI-NEXT: lsl w14, w15, #8
-; CHECK-GI-NEXT: lsl w15, w16, #8
-; CHECK-GI-NEXT: ldr w16, [sp, #408]
+; CHECK-GI-NEXT: lsl w12, w12, #8
+; CHECK-GI-NEXT: ldr w14, [sp, #256]
; CHECK-GI-NEXT: lsl w10, w10, #8
-; CHECK-GI-NEXT: sbfx w12, w13, #8, #8
-; CHECK-GI-NEXT: ldr w13, [sp, #376]
-; CHECK-GI-NEXT: mov v25.h[4], w9
-; CHECK-GI-NEXT: sbfx w9, w14, #8, #8
-; CHECK-GI-NEXT: sbfx w14, w15, #8, #8
-; CHECK-GI-NEXT: lsl w15, w16, #8
-; CHECK-GI-NEXT: mov v26.h[3], w12
-; CHECK-GI-NEXT: ldr w12, [sp, #416]
-; CHECK-GI-NEXT: lsl w13, w13, #8
-; CHECK-GI-NEXT: sbfx w15, w15, #8, #8
-; CHECK-GI-NEXT: lsl w16, w17, #8
-; CHECK-GI-NEXT: mov v27.h[5], w9
+; CHECK-GI-NEXT: mov v27.h[1], w11
+; CHECK-GI-NEXT: ldr w11, [sp, #264]
; CHECK-GI-NEXT: sbfx w13, w13, #8, #8
-; CHECK-GI-NEXT: lsl w12, w12, #8
-; CHECK-GI-NEXT: mov v25.h[5], w14
-; CHECK-GI-NEXT: fmov s29, w15
-; CHECK-GI-NEXT: ldr w14, [sp, #384]
-; CHECK-GI-NEXT: ldr w15, [sp, #472]
-; CHECK-GI-NEXT: mov v26.h[4], w13
-; CHECK-GI-NEXT: ldr w13, [sp, #424]
; CHECK-GI-NEXT: sbfx w12, w12, #8, #8
-; CHECK-GI-NEXT: sbfx w16, w16, #8, #8
; CHECK-GI-NEXT: lsl w14, w14, #8
-; CHECK-GI-NEXT: lsl w15, w15, #8
+; CHECK-GI-NEXT: sbfx w10, w10, #8, #8
+; CHECK-GI-NEXT: lsl w11, w11, #8
+; CHECK-GI-NEXT: mov v24.h[6], w9
+; CHECK-GI-NEXT: mov v26.h[3], w13
+; CHECK-GI-NEXT: mov v25.h[3], w12
+; CHECK-GI-NEXT: ldr w12, [sp, #384]
+; CHECK-GI-NEXT: mov v27.h[2], w10
+; CHECK-GI-NEXT: ldr w10, [sp, #424]
+; CHECK-GI-NEXT: sbfx w14, w14, #8, #8
+; CHECK-GI-NEXT: ldr w13, [sp, #328]
+; CHECK-GI-NEXT: lsl w12, w12, #8
+; CHECK-GI-NEXT: sbfx w11, w11, #8, #8
+; CHECK-GI-NEXT: lsl w10, w10, #8
+; CHECK-GI-NEXT: ldr w9, [sp, #272]
+; CHECK-GI-NEXT: lsl w8, w8, #8
+; CHECK-GI-NEXT: mov v26.h[4], w14
+; CHECK-GI-NEXT: lsl w14, w15, #8
; CHECK-GI-NEXT: lsl w13, w13, #8
-; CHECK-GI-NEXT: mov v29.h[1], w12
-; CHECK-GI-NEXT: ldr w12, [sp, #480]
+; CHECK-GI-NEXT: sbfx w10, w10, #8, #8
+; CHECK-GI-NEXT: sbfx w12, w12, #8, #8
+; CHECK-GI-NEXT: lsl w9, w9, #8
; CHECK-GI-NEXT: sbfx w14, w14, #8, #8
-; CHECK-GI-NEXT: mov v25.h[6], w16
-; CHECK-GI-NEXT: ldr w16, [sp, #432]
; CHECK-GI-NEXT: sbfx w13, w13, #8, #8
-; CHECK-GI-NEXT: sbfx w15, w15, #8, #8
+; CHECK-GI-NEXT: sbfx w8, w8, #8, #8
+; CHECK-GI-NEXT: fmov s29, w10
+; CHECK-GI-NEXT: mov v27.h[3], w12
+; CHECK-GI-NEXT: ldr w12, [sp, #392]
+; CHECK-GI-NEXT: mov v25.h[4], w13
+; CHECK-GI-NEXT: ldr w13, [sp, #336]
+; CHECK-GI-NEXT: ldr w10, [sp, #440]
; CHECK-GI-NEXT: lsl w12, w12, #8
-; CHECK-GI-NEXT: mov v26.h[5], w14
-; CHECK-GI-NEXT: ldr w14, [sp, #392]
-; CHECK-GI-NEXT: lsl w16, w16, #8
-; CHECK-GI-NEXT: mov v29.h[2], w13
-; CHECK-GI-NEXT: fmov s28, w15
-; CHECK-GI-NEXT: ldr w9, [sp, #336]
-; CHECK-GI-NEXT: ldr w13, [sp, #488]
+; CHECK-GI-NEXT: mov v26.h[5], w11
+; CHECK-GI-NEXT: ldr w11, [sp, #496]
+; CHECK-GI-NEXT: mov v29.h[1], w14
+; CHECK-GI-NEXT: ldr w14, [sp, #488]
+; CHECK-GI-NEXT: lsl w13, w13, #8
+; CHECK-GI-NEXT: lsl w10, w10, #8
; CHECK-GI-NEXT: sbfx w12, w12, #8, #8
+; CHECK-GI-NEXT: lsl w11, w11, #8
; CHECK-GI-NEXT: lsl w14, w14, #8
-; CHECK-GI-NEXT: ldr w15, [sp, #440]
-; CHECK-GI-NEXT: sbfx w16, w16, #8, #8
-; CHECK-GI-NEXT: lsl w9, w9, #8
-; CHECK-GI-NEXT: lsl w13, w13, #8
-; CHECK-GI-NEXT: mov v28.h[1], w12
-; CHECK-GI-NEXT: sbfx w14, w14, #8, #8
-; CHECK-GI-NEXT: lsl w15, w15, #8
-; CHECK-GI-NEXT: mov v29.h[3], w16
-; CHECK-GI-NEXT: ldr w16, [sp, #496]
; CHECK-GI-NEXT: sbfx w13, w13, #8, #8
; CHECK-GI-NEXT: sbfx w9, w9, #8, #8
-; CHECK-GI-NEXT: ldr w12, [sp, #400]
-; CHECK-GI-NEXT: mov v26.h[6], w14
-; CHECK-GI-NEXT: ldr w14, [sp, #448]
-; CHECK-GI-NEXT: sbfx w15, w15, #8, #8
-; CHECK-GI-NEXT: mov v28.h[2], w13
-; CHECK-GI-NEXT: lsl w16, w16, #8
-; CHECK-GI-NEXT: mov v25.h[7], w9
+; CHECK-GI-NEXT: sbfx w10, w10, #8, #8
+; CHECK-GI-NEXT: mov v27.h[4], w12
+; CHECK-GI-NEXT: ldr w12, [sp, #448]
+; CHECK-GI-NEXT: sbfx w14, w14, #8, #8
+; CHECK-GI-NEXT: mov v25.h[5], w13
+; CHECK-GI-NEXT: ldr w13, [sp, #344]
+; CHECK-GI-NEXT: mov v29.h[2], w10
+; CHECK-GI-NEXT: ldr w10, [sp, #504]
+; CHECK-GI-NEXT: sbfx w11, w11, #8, #8
+; CHECK-GI-NEXT: fmov s28, w14
; CHECK-GI-NEXT: lsl w12, w12, #8
-; CHECK-GI-NEXT: mov v29.h[4], w15
+; CHECK-GI-NEXT: lsl w13, w13, #8
+; CHECK-GI-NEXT: ldr w14, [sp, #400]
+; CHECK-GI-NEXT: lsl w10, w10, #8
+; CHECK-GI-NEXT: mov v26.h[6], w9
+; CHECK-GI-NEXT: sbfx w12, w12, #8, #8
+; CHECK-GI-NEXT: sbfx w13, w13, #8, #8
+; CHECK-GI-NEXT: mov v24.h[7], w8
+; CHECK-GI-NEXT: mov v28.h[1], w11
+; CHECK-GI-NEXT: ldr w11, [sp, #456]
; CHECK-GI-NEXT: lsl w14, w14, #8
-; CHECK-GI-NEXT: ldr w13, [sp, #456]
-; CHECK-GI-NEXT: ldr w15, [sp, #504]
-; CHECK-GI-NEXT: sbfx w16, w16, #8, #8
-; CHECK-GI-NEXT: sbfx w9, w12, #8, #8
-; CHECK-GI-NEXT: sbfx w12, w14, #8, #8
+; CHECK-GI-NEXT: sbfx w10, w10, #8, #8
+; CHECK-GI-NEXT: mov v29.h[3], w12
+; CHECK-GI-NEXT: mov v25.h[6], w13
; CHECK-GI-NEXT: lsl w11, w11, #8
-; CHECK-GI-NEXT: lsl w14, w15, #8
-; CHECK-GI-NEXT: mov v28.h[3], w16
-; CHECK-GI-NEXT: ldr w15, [sp, #512]
+; CHECK-GI-NEXT: ldr w13, [sp, #512]
+; CHECK-GI-NEXT: sbfx w14, w14, #8, #8
+; CHECK-GI-NEXT: ldr w12, [sp, #352]
+; CHECK-GI-NEXT: mov v17.s[1], wzr
+; CHECK-GI-NEXT: mov v20.s[1], wzr
+; CHECK-GI-NEXT: mov v28.h[2], w10
+; CHECK-GI-NEXT: ldr w10, [sp, #408]
+; CHECK-GI-NEXT: sbfx w11, w11, #8, #8
; CHECK-GI-NEXT: lsl w13, w13, #8
-; CHECK-GI-NEXT: mul v30.8h, v22.8h, v25.8h
-; CHECK-GI-NEXT: mov v26.h[7], w9
-; CHECK-GI-NEXT: mov v29.h[5], w12
-; CHECK-GI-NEXT: lsl w8, w8, #8
-; CHECK-GI-NEXT: sbfx w9, w14, #8, #8
+; CHECK-GI-NEXT: mov v27.h[5], w14
+; CHECK-GI-NEXT: lsl w12, w12, #8
+; CHECK-GI-NEXT: mov v29.h[4], w11
+; CHECK-GI-NEXT: ldr w11, [sp, #464]
+; CHECK-GI-NEXT: lsl w10, w10, #8
+; CHECK-GI-NEXT: ldr w14, [sp, #520]
+; CHECK-GI-NEXT: sbfx w13, w13, #8, #8
+; CHECK-GI-NEXT: sbfx w12, w12, #8, #8
; CHECK-GI-NEXT: sbfx w10, w10, #8, #8
-; CHECK-GI-NEXT: sbfx w14, w11, #8, #8
-; CHECK-GI-NEXT: sbfx w11, w13, #8, #8
-; CHECK-GI-NEXT: lsl w13, w15, #8
-; CHECK-GI-NEXT: ldr w17, [sp, #464]
-; CHECK-GI-NEXT: sbfx w8, w8, #8, #8
-; CHECK-GI-NEXT: mov v28.h[4], w9
+; CHECK-GI-NEXT: lsl w11, w11, #8
+; CHECK-GI-NEXT: mov v6.s[1], wzr
+; CHECK-GI-NEXT: lsl w14, w14, #8
+; CHECK-GI-NEXT: mov v28.h[3], w13
+; CHECK-GI-NEXT: ldr w13, [sp, #416]
+; CHECK-GI-NEXT: sbfx w11, w11, #8, #8
; CHECK-GI-NEXT: mov v27.h[6], w10
-; CHECK-GI-NEXT: ldr w16, [sp, #520]
-; CHECK-GI-NEXT: sbfx w10, w13, #8, #8
-; CHECK-GI-NEXT: smov w13, v30.h[0]
-; CHECK-GI-NEXT: mov v24.h[7], w8
-; CHECK-GI-NEXT: lsl w8, w17, #8
-; CHECK-GI-NEXT: mov v29.h[6], w11
-; CHECK-GI-NEXT: mul v26.8h, v23.8h, v26.8h
-; CHECK-GI-NEXT: lsl w15, w16, #8
-; CHECK-GI-NEXT: smov w16, v30.h[1]
+; CHECK-GI-NEXT: ldr w10, [sp, #472]
+; CHECK-GI-NEXT: mov v25.h[7], w12
; CHECK-GI-NEXT: ldr w12, [sp, #528]
-; CHECK-GI-NEXT: sbfx w8, w8, #8, #8
-; CHECK-GI-NEXT: mov v28.h[5], w10
-; CHECK-GI-NEXT: mov v27.h[7], w14
-; CHECK-GI-NEXT: fmov s22, w13
-; CHECK-GI-NEXT: sbfx w10, w15, #8, #8
-; CHECK-GI-NEXT: smov w14, v30.h[4]
-; CHECK-GI-NEXT: mov v29.h[7], w8
-; CHECK-GI-NEXT: smov w15, v26.h[0]
-; CHECK-GI-NEXT: smov w13, v30.h[2]
+; CHECK-GI-NEXT: sbfx w14, w14, #8, #8
+; CHECK-GI-NEXT: lsl w13, w13, #8
+; CHECK-GI-NEXT: mov v29.h[5], w11
+; CHECK-GI-NEXT: lsl w10, w10, #8
; CHECK-GI-NEXT: lsl w12, w12, #8
-; CHECK-GI-NEXT: ldr w9, [sp, #544]
-; CHECK-GI-NEXT: ldr w11, [sp, #552]
-; CHECK-GI-NEXT: mov v22.s[1], w16
-; CHECK-GI-NEXT: smov w16, v26.h[4]
-; CHECK-GI-NEXT: mov v28.h[6], w10
-; CHECK-GI-NEXT: smov w10, v26.h[1]
-; CHECK-GI-NEXT: fmov s23, w14
-; CHECK-GI-NEXT: smov w14, v26.h[5]
-; CHECK-GI-NEXT: mul v29.8h, v24.8h, v29.8h
-; CHECK-GI-NEXT: fmov s24, w15
-; CHECK-GI-NEXT: smov w15, v26.h[2]
+; CHECK-GI-NEXT: mov v28.h[4], w14
+; CHECK-GI-NEXT: ldr w11, [sp, #480]
+; CHECK-GI-NEXT: sbfx w13, w13, #8, #8
+; CHECK-GI-NEXT: sbfx w10, w10, #8, #8
+; CHECK-GI-NEXT: ldr w14, [sp, #280]
+; CHECK-GI-NEXT: mul v0.8h, v22.8h, v25.8h
+; CHECK-GI-NEXT: sbfx w12, w12, #8, #8
; CHECK-GI-NEXT: lsl w11, w11, #8
-; CHECK-GI-NEXT: smov w8, v30.h[5]
-; CHECK-GI-NEXT: smov w17, v30.h[7]
-; CHECK-GI-NEXT: fmov s25, w16
-; CHECK-GI-NEXT: mov v22.s[2], w13
-; CHECK-GI-NEXT: smov w13, v30.h[3]
-; CHECK-GI-NEXT: mov v24.s[1], w10
-; CHECK-GI-NEXT: smov w16, v26.h[6]
-; CHECK-GI-NEXT: sbfx w10, w12, #8, #8
-; CHECK-GI-NEXT: smov w18, v29.h[0]
-; CHECK-GI-NEXT: smov w0, v29.h[1]
-; CHECK-GI-NEXT: ldr w12, [sp, #560]
-; CHECK-GI-NEXT: mov v25.s[1], w14
-; CHECK-GI-NEXT: smov w14, v26.h[7]
-; CHECK-GI-NEXT: mov v28.h[7], w10
-; CHECK-GI-NEXT: mov v22.s[3], w13
-; CHECK-GI-NEXT: smov w13, v26.h[3]
-; CHECK-GI-NEXT: sbfx w11, w11, #8, #8
-; CHECK-GI-NEXT: mov v24.s[2], w15
-; CHECK-GI-NEXT: smov w15, v29.h[2]
-; CHECK-GI-NEXT: lsl w12, w12, #8
-; CHECK-GI-NEXT: fmov s26, w18
-; CHECK-GI-NEXT: mov v23.s[1], w8
-; CHECK-GI-NEXT: smov w8, v30.h[6]
-; CHECK-GI-NEXT: mov v25.s[2], w16
-; CHECK-GI-NEXT: lsl w16, w9, #8
-; CHECK-GI-NEXT: mul v31.8h, v27.8h, v28.8h
+; CHECK-GI-NEXT: mov v27.h[7], w13
+; CHECK-GI-NEXT: mov v29.h[6], w10
+; CHECK-GI-NEXT: ldr w13, [sp, #536]
+; CHECK-GI-NEXT: mov v28.h[5], w12
+; CHECK-GI-NEXT: ldr w12, [sp, #544]
+; CHECK-GI-NEXT: sbfx w9, w11, #8, #8
+; CHECK-GI-NEXT: lsl w13, w13, #8
+; CHECK-GI-NEXT: lsl w10, w14, #8
+; CHECK-GI-NEXT: mov v7.s[1], wzr
+; CHECK-GI-NEXT: smov w8, v0.h[0]
+; CHECK-GI-NEXT: lsl w11, w12, #8
+; CHECK-GI-NEXT: smov w12, v0.h[4]
+; CHECK-GI-NEXT: mul v27.8h, v23.8h, v27.8h
+; CHECK-GI-NEXT: mov v29.h[7], w9
+; CHECK-GI-NEXT: sbfx w13, w13, #8, #8
+; CHECK-GI-NEXT: sbfx w10, w10, #8, #8
+; CHECK-GI-NEXT: sbfx w9, w11, #8, #8
+; CHECK-GI-NEXT: smov w11, v0.h[1]
+; CHECK-GI-NEXT: mov v28.h[6], w13
+; CHECK-GI-NEXT: mov v2.s[1], wzr
+; CHECK-GI-NEXT: mov v4.s[1], wzr
+; CHECK-GI-NEXT: fmov s22, w8
+; CHECK-GI-NEXT: mov v26.h[7], w10
+; CHECK-GI-NEXT: smov w10, v0.h[5]
+; CHECK-GI-NEXT: smov w13, v27.h[0]
+; CHECK-GI-NEXT: smov w14, v27.h[4]
+; CHECK-GI-NEXT: mul v29.8h, v24.8h, v29.8h
+; CHECK-GI-NEXT: fmov s23, w12
+; CHECK-GI-NEXT: smov w8, v27.h[5]
+; CHECK-GI-NEXT: smov w12, v0.h[6]
+; CHECK-GI-NEXT: mov v22.s[1], w11
+; CHECK-GI-NEXT: smov w11, v0.h[2]
+; CHECK-GI-NEXT: mov v28.h[7], w9
+; CHECK-GI-NEXT: smov w9, v27.h[1]
+; CHECK-GI-NEXT: smov w15, v27.h[3]
+; CHECK-GI-NEXT: smov w16, v27.h[7]
+; CHECK-GI-NEXT: mov v23.s[1], w10
+; CHECK-GI-NEXT: fmov s24, w13
+; CHECK-GI-NEXT: fmov s25, w14
+; CHECK-GI-NEXT: smov w14, v29.h[0]
+; CHECK-GI-NEXT: smov w10, v27.h[2]
+; CHECK-GI-NEXT: smov w13, v27.h[6]
+; CHECK-GI-NEXT: mov v22.s[2], w11
+; CHECK-GI-NEXT: smov w11, v0.h[3]
+; CHECK-GI-NEXT: mul v30.8h, v26.8h, v28.8h
+; CHECK-GI-NEXT: mov v24.s[1], w9
+; CHECK-GI-NEXT: mov v25.s[1], w8
+; CHECK-GI-NEXT: smov w8, v29.h[1]
+; CHECK-GI-NEXT: mov v23.s[2], w12
+; CHECK-GI-NEXT: smov w12, v0.h[7]
+; CHECK-GI-NEXT: ldr w9, [sp, #560]
+; CHECK-GI-NEXT: fmov s26, w14
+; CHECK-GI-NEXT: smov w14, v29.h[2]
+; CHECK-GI-NEXT: mov v3.s[1], wzr
+; CHECK-GI-NEXT: mov v22.s[3], w11
+; CHECK-GI-NEXT: smov w11, v29.h[4]
+; CHECK-GI-NEXT: mov v5.s[1], wzr
+; CHECK-GI-NEXT: mov v24.s[2], w10
+; CHECK-GI-NEXT: mov v25.s[2], w13
; CHECK-GI-NEXT: ldr w10, [sp, #568]
-; CHECK-GI-NEXT: sbfx w12, w12, #8, #8
-; CHECK-GI-NEXT: ldr w9, [sp, #584]
-; CHECK-GI-NEXT: mov v24.s[3], w13
-; CHECK-GI-NEXT: smov w13, v29.h[4]
-; CHECK-GI-NEXT: mov v26.s[1], w0
-; CHECK-GI-NEXT: sbfx w16, w16, #8, #8
-; CHECK-GI-NEXT: lsl w10, w10, #8
-; CHECK-GI-NEXT: mov v23.s[2], w8
-; CHECK-GI-NEXT: mov v25.s[3], w14
-; CHECK-GI-NEXT: ldr w14, [sp, #608]
+; CHECK-GI-NEXT: mov v26.s[1], w8
+; CHECK-GI-NEXT: mov v23.s[3], w12
+; CHECK-GI-NEXT: smov w12, v30.h[0]
+; CHECK-GI-NEXT: smov w13, v29.h[5]
; CHECK-GI-NEXT: ldr w8, [sp, #576]
-; CHECK-GI-NEXT: fmov s8, w16
-; CHECK-GI-NEXT: ldr w16, [sp, #616]
-; CHECK-GI-NEXT: sbfx w10, w10, #8, #8
-; CHECK-GI-NEXT: fmov s27, w13
-; CHECK-GI-NEXT: lsl w13, w14, #8
-; CHECK-GI-NEXT: mov v26.s[2], w15
-; CHECK-GI-NEXT: smov w15, v29.h[5]
-; CHECK-GI-NEXT: lsl w16, w16, #8
-; CHECK-GI-NEXT: ldr w14, [sp, #624]
-; CHECK-GI-NEXT: sbfx w13, w13, #8, #8
-; CHECK-GI-NEXT: mov v8.h[1], w11
+; CHECK-GI-NEXT: mov v1.s[1], wzr
+; CHECK-GI-NEXT: fmov s27, w11
+; CHECK-GI-NEXT: smov w11, v29.h[6]
+; CHECK-GI-NEXT: mov v19.s[2], wzr
+; CHECK-GI-NEXT: mov v24.s[3], w15
+; CHECK-GI-NEXT: lsl w15, w9, #8
; CHECK-GI-NEXT: lsl w8, w8, #8
-; CHECK-GI-NEXT: sbfx w16, w16, #8, #8
-; CHECK-GI-NEXT: lsl w14, w14, #8
-; CHECK-GI-NEXT: mov v23.s[3], w17
-; CHECK-GI-NEXT: fmov s9, w13
-; CHECK-GI-NEXT: ldr w13, [sp, #632]
-; CHECK-GI-NEXT: smov w17, v31.h[1]
-; CHECK-GI-NEXT: mov v27.s[1], w15
-; CHECK-GI-NEXT: smov w15, v31.h[0]
+; CHECK-GI-NEXT: mov v26.s[2], w14
+; CHECK-GI-NEXT: lsl w14, w10, #8
+; CHECK-GI-NEXT: smov w10, v30.h[1]
+; CHECK-GI-NEXT: fmov s28, w12
+; CHECK-GI-NEXT: sbfx w15, w15, #8, #8
+; CHECK-GI-NEXT: mov v27.s[1], w13
+; CHECK-GI-NEXT: ldr w13, [sp, #624]
; CHECK-GI-NEXT: sbfx w14, w14, #8, #8
-; CHECK-GI-NEXT: mov v8.h[2], w12
-; CHECK-GI-NEXT: lsl w13, w13, #8
+; CHECK-GI-NEXT: ldr w12, [sp, #632]
+; CHECK-GI-NEXT: fmov s8, w15
+; CHECK-GI-NEXT: ldr w9, [sp, #584]
; CHECK-GI-NEXT: sbfx w8, w8, #8, #8
-; CHECK-GI-NEXT: mov v9.h[1], w16
-; CHECK-GI-NEXT: smov w16, v31.h[2]
+; CHECK-GI-NEXT: mov v28.s[1], w10
+; CHECK-GI-NEXT: lsl w10, w13, #8
+; CHECK-GI-NEXT: smov w13, v30.h[2]
+; CHECK-GI-NEXT: lsl w12, w12, #8
; CHECK-GI-NEXT: lsl w9, w9, #8
-; CHECK-GI-NEXT: sbfx w13, w13, #8, #8
-; CHECK-GI-NEXT: ldr w11, [sp, #592]
-; CHECK-GI-NEXT: ldr w12, [sp, #600]
-; CHECK-GI-NEXT: fmov s28, w15
-; CHECK-GI-NEXT: smov w15, v29.h[6]
+; CHECK-GI-NEXT: mov v27.s[2], w11
+; CHECK-GI-NEXT: sbfx w10, w10, #8, #8
+; CHECK-GI-NEXT: mov v8.h[1], w14
+; CHECK-GI-NEXT: ldr w14, [sp, #592]
+; CHECK-GI-NEXT: sbfx w12, w12, #8, #8
; CHECK-GI-NEXT: sbfx w9, w9, #8, #8
-; CHECK-GI-NEXT: mov v8.h[3], w10
+; CHECK-GI-NEXT: ldr w11, [sp, #600]
+; CHECK-GI-NEXT: fmov s9, w10
+; CHECK-GI-NEXT: mov v28.s[2], w13
+; CHECK-GI-NEXT: ldr w13, [sp, #688]
; CHECK-GI-NEXT: ldr w10, [sp, #640]
+; CHECK-GI-NEXT: lsl w14, w14, #8
; CHECK-GI-NEXT: lsl w11, w11, #8
-; CHECK-GI-NEXT: mov v9.h[2], w14
-; CHECK-GI-NEXT: ldr w14, [sp, #672]
-; CHECK-GI-NEXT: lsl w12, w12, #8
-; CHECK-GI-NEXT: mov v28.s[1], w17
+; CHECK-GI-NEXT: lsl w13, w13, #8
+; CHECK-GI-NEXT: mov v8.h[2], w8
+; CHECK-GI-NEXT: smov w8, v30.h[4]
+; CHECK-GI-NEXT: mov v9.h[1], w12
+; CHECK-GI-NEXT: ldr w12, [sp, #696]
; CHECK-GI-NEXT: lsl w10, w10, #8
+; CHECK-GI-NEXT: sbfx w13, w13, #8, #8
+; CHECK-GI-NEXT: sbfx w14, w14, #8, #8
; CHECK-GI-NEXT: sbfx w11, w11, #8, #8
-; CHECK-GI-NEXT: lsl w14, w14, #8
-; CHECK-GI-NEXT: mov v27.s[2], w15
-; CHECK-GI-NEXT: ldr w15, [sp, #680]
-; CHECK-GI-NEXT: mov v8.h[4], w8
-; CHECK-GI-NEXT: smov w8, v31.h[4]
+; CHECK-GI-NEXT: sbfx w10, w10, #8, #8
+; CHECK-GI-NEXT: lsl w12, w12, #8
+; CHECK-GI-NEXT: mov v25.s[3], w16
+; CHECK-GI-NEXT: fmov s10, w13
+; CHECK-GI-NEXT: mov v8.h[3], w9
+; CHECK-GI-NEXT: ldr w9, [sp, #648]
+; CHECK-GI-NEXT: mov v9.h[2], w10
+; CHECK-GI-NEXT: ldr w10, [sp, #704]
; CHECK-GI-NEXT: sbfx w12, w12, #8, #8
-; CHECK-GI-NEXT: sbfx w14, w14, #8, #8
-; CHECK-GI-NEXT: lsl w15, w15, #8
-; CHECK-GI-NEXT: mov v9.h[3], w13
-; CHECK-GI-NEXT: ldr w13, [sp, #688]
-; CHECK-GI-NEXT: mov v28.s[2], w16
-; CHECK-GI-NEXT: ldr w16, [sp, #648]
-; CHECK-GI-NEXT: fmov s10, w14
-; CHECK-GI-NEXT: sbfx w15, w15, #8, #8
-; CHECK-GI-NEXT: ldr w14, [sp, #656]
-; CHECK-GI-NEXT: lsl w13, w13, #8
-; CHECK-GI-NEXT: fmov s30, w8
-; CHECK-GI-NEXT: sbfx w8, w10, #8, #8
-; CHECK-GI-NEXT: smov w10, v31.h[5]
-; CHECK-GI-NEXT: mov v8.h[5], w9
-; CHECK-GI-NEXT: ldr w9, [sp, #696]
-; CHECK-GI-NEXT: mov v10.h[1], w15
-; CHECK-GI-NEXT: sbfx w13, w13, #8, #8
-; CHECK-GI-NEXT: mov v9.h[4], w8
-; CHECK-GI-NEXT: lsl w16, w16, #8
; CHECK-GI-NEXT: lsl w9, w9, #8
-; CHECK-GI-NEXT: lsl w14, w14, #8
-; CHECK-GI-NEXT: ldr w8, [sp, #704]
-; CHECK-GI-NEXT: ldr w15, [sp, #664]
-; CHECK-GI-NEXT: ldr w17, [sp, #768]
-; CHECK-GI-NEXT: mov v30.s[1], w10
-; CHECK-GI-NEXT: ldr w10, [sp, #744]
-; CHECK-GI-NEXT: sbfx w16, w16, #8, #8
-; CHECK-GI-NEXT: mov v10.h[2], w13
-; CHECK-GI-NEXT: ldr w13, [sp, #736]
-; CHECK-GI-NEXT: sbfx w9, w9, #8, #8
+; CHECK-GI-NEXT: ldr w13, [sp, #656]
+; CHECK-GI-NEXT: fmov s31, w8
; CHECK-GI-NEXT: lsl w10, w10, #8
-; CHECK-GI-NEXT: mov v9.h[5], w16
-; CHECK-GI-NEXT: mov v8.h[6], w11
+; CHECK-GI-NEXT: mov v10.h[1], w12
+; CHECK-GI-NEXT: smov w12, v30.h[5]
+; CHECK-GI-NEXT: mov v8.h[4], w14
+; CHECK-GI-NEXT: ldr w14, [sp, #712]
+; CHECK-GI-NEXT: sbfx w9, w9, #8, #8
+; CHECK-GI-NEXT: sbfx w10, w10, #8, #8
; CHECK-GI-NEXT: lsl w13, w13, #8
-; CHECK-GI-NEXT: ldr w11, [sp, #712]
+; CHECK-GI-NEXT: ldr w8, [sp, #608]
+; CHECK-GI-NEXT: lsl w14, w14, #8
+; CHECK-GI-NEXT: mov v9.h[3], w9
+; CHECK-GI-NEXT: ldr w9, [sp, #616]
+; CHECK-GI-NEXT: mov v10.h[2], w10
+; CHECK-GI-NEXT: mov v31.s[1], w12
+; CHECK-GI-NEXT: sbfx w12, w13, #8, #8
+; CHECK-GI-NEXT: sbfx w14, w14, #8, #8
+; CHECK-GI-NEXT: smov w13, v30.h[6]
+; CHECK-GI-NEXT: ldr w10, [sp, #664]
+; CHECK-GI-NEXT: mov v8.h[5], w11
+; CHECK-GI-NEXT: ldr w11, [sp, #720]
; CHECK-GI-NEXT: lsl w8, w8, #8
-; CHECK-GI-NEXT: sbfx w10, w10, #8, #8
-; CHECK-GI-NEXT: ldr w16, [sp, #720]
-; CHECK-GI-NEXT: lsl w15, w15, #8
-; CHECK-GI-NEXT: sbfx w13, w13, #8, #8
-; CHECK-GI-NEXT: mov v10.h[3], w9
-; CHECK-GI-NEXT: ldr w9, [sp, #752]
-; CHECK-GI-NEXT: mov v8.h[7], w12
-; CHECK-GI-NEXT: sbfx w12, w8, #8, #8
-; CHECK-GI-NEXT: lsl w18, w16, #8
-; CHECK-GI-NEXT: fmov s11, w13
+; CHECK-GI-NEXT: mov v9.h[4], w12
+; CHECK-GI-NEXT: lsl w10, w10, #8
+; CHECK-GI-NEXT: ldr w12, [sp, #672]
+; CHECK-GI-NEXT: mov v10.h[3], w14
+; CHECK-GI-NEXT: ldr w14, [sp, #752]
+; CHECK-GI-NEXT: lsl w11, w11, #8
+; CHECK-GI-NEXT: mov v31.s[2], w13
; CHECK-GI-NEXT: ldr w13, [sp, #760]
-; CHECK-GI-NEXT: ldr w8, [sp, #784]
-; CHECK-GI-NEXT: mov v21.s[1], wzr
-; CHECK-GI-NEXT: mov v16.s[1], wzr
-; CHECK-GI-NEXT: mov v18.s[1], wzr
+; CHECK-GI-NEXT: sbfx w10, w10, #8, #8
+; CHECK-GI-NEXT: lsl w14, w14, #8
+; CHECK-GI-NEXT: sbfx w11, w11, #8, #8
+; CHECK-GI-NEXT: sbfx w8, w8, #8, #8
; CHECK-GI-NEXT: lsl w13, w13, #8
-; CHECK-GI-NEXT: mov v10.h[4], w12
-; CHECK-GI-NEXT: sbfx w12, w15, #8, #8
-; CHECK-GI-NEXT: mov v11.h[1], w10
-; CHECK-GI-NEXT: sbfx w10, w14, #8, #8
-; CHECK-GI-NEXT: lsl w14, w9, #8
-; CHECK-GI-NEXT: sbfx w13, w13, #8, #8
-; CHECK-GI-NEXT: ldr w9, [sp, #776]
-; CHECK-GI-NEXT: lsl w8, w8, #8
+; CHECK-GI-NEXT: mov v9.h[5], w10
+; CHECK-GI-NEXT: ldr w10, [sp, #728]
; CHECK-GI-NEXT: sbfx w14, w14, #8, #8
-; CHECK-GI-NEXT: mov v9.h[6], w10
-; CHECK-GI-NEXT: lsl w10, w11, #8
-; CHECK-GI-NEXT: ldr w11, [sp, #808]
+; CHECK-GI-NEXT: mov v10.h[4], w11
+; CHECK-GI-NEXT: lsl w12, w12, #8
+; CHECK-GI-NEXT: sbfx w11, w13, #8, #8
+; CHECK-GI-NEXT: mov v8.h[6], w8
; CHECK-GI-NEXT: lsl w9, w9, #8
-; CHECK-GI-NEXT: sbfx w8, w8, #8, #8
-; CHECK-GI-NEXT: mov v11.h[2], w14
-; CHECK-GI-NEXT: ldr w14, [sp, #816]
-; CHECK-GI-NEXT: sbfx w10, w10, #8, #8
-; CHECK-GI-NEXT: lsl w11, w11, #8
+; CHECK-GI-NEXT: fmov s11, w14
+; CHECK-GI-NEXT: lsl w10, w10, #8
+; CHECK-GI-NEXT: sbfx w12, w12, #8, #8
+; CHECK-GI-NEXT: ldr w13, [sp, #768]
; CHECK-GI-NEXT: sbfx w9, w9, #8, #8
-; CHECK-GI-NEXT: mov v17.s[1], wzr
-; CHECK-GI-NEXT: lsl w14, w14, #8
-; CHECK-GI-NEXT: mov v9.h[7], w12
-; CHECK-GI-NEXT: ldr w12, [sp, #824]
-; CHECK-GI-NEXT: sbfx w16, w11, #8, #8
-; CHECK-GI-NEXT: mov v10.h[5], w10
-; CHECK-GI-NEXT: ldr w10, [sp, #832]
-; CHECK-GI-NEXT: mov v11.h[3], w13
-; CHECK-GI-NEXT: sbfx w15, w14, #8, #8
-; CHECK-GI-NEXT: lsl w14, w17, #8
-; CHECK-GI-NEXT: fmov s12, w16
-; CHECK-GI-NEXT: ldr w16, [sp, #872]
-; CHECK-GI-NEXT: lsl w12, w12, #8
-; CHECK-GI-NEXT: sbfx w14, w14, #8, #8
-; CHECK-GI-NEXT: ldr w11, [sp, #840]
-; CHECK-GI-NEXT: sbfx w13, w18, #8, #8
-; CHECK-GI-NEXT: sbfx w17, w12, #8, #8
-; CHECK-GI-NEXT: lsl w16, w16, #8
-; CHECK-GI-NEXT: ldr w12, [sp, #856]
-; CHECK-GI-NEXT: mov v12.h[1], w15
-; CHECK-GI-NEXT: mov v11.h[4], w14
-; CHECK-GI-NEXT: ldr w15, [sp, #880]
+; CHECK-GI-NEXT: ldr w15, [sp, #680]
+; CHECK-GI-NEXT: sbfx w14, w10, #8, #8
+; CHECK-GI-NEXT: mov v9.h[6], w12
+; CHECK-GI-NEXT: ldr w8, [sp, #736]
+; CHECK-GI-NEXT: mov v11.h[1], w11
+; CHECK-GI-NEXT: ldr w11, [sp, #824]
+; CHECK-GI-NEXT: lsl w13, w13, #8
+; CHECK-GI-NEXT: mov v8.h[7], w9
+; CHECK-GI-NEXT: ldr w9, [sp, #832]
+; CHECK-GI-NEXT: mov v10.h[5], w14
+; CHECK-GI-NEXT: lsl w12, w11, #8
+; CHECK-GI-NEXT: sbfx w13, w13, #8, #8
+; CHECK-GI-NEXT: ldr w11, [sp, #776]
+; CHECK-GI-NEXT: lsl w16, w9, #8
+; CHECK-GI-NEXT: lsl w8, w8, #8
+; CHECK-GI-NEXT: ldr w10, [sp, #744]
+; CHECK-GI-NEXT: sbfx w14, w12, #8, #8
+; CHECK-GI-NEXT: mov v11.h[2], w13
+; CHECK-GI-NEXT: lsl w13, w15, #8
+; CHECK-GI-NEXT: sbfx w12, w16, #8, #8
; CHECK-GI-NEXT: lsl w11, w11, #8
-; CHECK-GI-NEXT: mov v10.h[6], w13
+; CHECK-GI-NEXT: sbfx w8, w8, #8, #8
+; CHECK-GI-NEXT: fmov s12, w14
+; CHECK-GI-NEXT: ldr w14, [sp, #840]
+; CHECK-GI-NEXT: sbfx w13, w13, #8, #8
+; CHECK-GI-NEXT: sbfx w11, w11, #8, #8
+; CHECK-GI-NEXT: mov v10.h[6], w8
+; CHECK-GI-NEXT: lsl w10, w10, #8
+; CHECK-GI-NEXT: lsl w14, w14, #8
+; CHECK-GI-NEXT: mov v9.h[7], w13
+; CHECK-GI-NEXT: ldr w13, [sp, #896]
+; CHECK-GI-NEXT: mov v12.h[1], w12
+; CHECK-GI-NEXT: ldr w12, [sp, #888]
+; CHECK-GI-NEXT: mov v11.h[3], w11
+; CHECK-GI-NEXT: sbfx w8, w14, #8, #8
+; CHECK-GI-NEXT: lsl w11, w13, #8
; CHECK-GI-NEXT: ldr w13, [sp, #848]
-; CHECK-GI-NEXT: lsl w14, w15, #8
-; CHECK-GI-NEXT: sbfx w15, w16, #8, #8
-; CHECK-GI-NEXT: ldr w16, [sp, #888]
+; CHECK-GI-NEXT: lsl w12, w12, #8
+; CHECK-GI-NEXT: ldr w9, [sp, #784]
+; CHECK-GI-NEXT: sbfx w10, w10, #8, #8
; CHECK-GI-NEXT: lsl w13, w13, #8
+; CHECK-GI-NEXT: sbfx w11, w11, #8, #8
+; CHECK-GI-NEXT: ldr w14, [sp, #792]
+; CHECK-GI-NEXT: sbfx w12, w12, #8, #8
+; CHECK-GI-NEXT: mov v12.h[2], w8
+; CHECK-GI-NEXT: lsl w9, w9, #8
+; CHECK-GI-NEXT: mov v10.h[7], w10
+; CHECK-GI-NEXT: sbfx w10, w13, #8, #8
+; CHECK-GI-NEXT: ldr w8, [sp, #856]
+; CHECK-GI-NEXT: fmov s13, w12
+; CHECK-GI-NEXT: ldr w12, [sp, #904]
+; CHECK-GI-NEXT: sbfx w15, w9, #8, #8
+; CHECK-GI-NEXT: lsl w13, w14, #8
+; CHECK-GI-NEXT: ldr w14, [sp, #912]
+; CHECK-GI-NEXT: lsl w8, w8, #8
; CHECK-GI-NEXT: lsl w12, w12, #8
-; CHECK-GI-NEXT: mov v20.s[1], wzr
-; CHECK-GI-NEXT: mov v12.h[2], w17
-; CHECK-GI-NEXT: lsl w17, w10, #8
-; CHECK-GI-NEXT: mov v11.h[5], w9
-; CHECK-GI-NEXT: fmov s13, w15
-; CHECK-GI-NEXT: ldr w9, [sp, #936]
-; CHECK-GI-NEXT: sbfx w14, w14, #8, #8
-; CHECK-GI-NEXT: sbfx w15, w17, #8, #8
-; CHECK-GI-NEXT: lsl w16, w16, #8
+; CHECK-GI-NEXT: mov v12.h[3], w10
+; CHECK-GI-NEXT: ldr w10, [sp, #952]
+; CHECK-GI-NEXT: mov v13.h[1], w11
+; CHECK-GI-NEXT: mov v11.h[4], w15
; CHECK-GI-NEXT: sbfx w13, w13, #8, #8
-; CHECK-GI-NEXT: lsl w9, w9, #8
; CHECK-GI-NEXT: sbfx w12, w12, #8, #8
-; CHECK-GI-NEXT: ldr w10, [sp, #864]
-; CHECK-GI-NEXT: mov v12.h[3], w15
-; CHECK-GI-NEXT: mov v11.h[6], w8
-; CHECK-GI-NEXT: sbfx w8, w11, #8, #8
-; CHECK-GI-NEXT: ldr w11, [sp, #1000]
-; CHECK-GI-NEXT: mov v13.h[1], w14
-; CHECK-GI-NEXT: ldr w15, [sp, #944]
-; CHECK-GI-NEXT: sbfx w9, w9, #8, #8
-; CHECK-GI-NEXT: ldr w14, [sp, #896]
-; CHECK-GI-NEXT: sbfx w16, w16, #8, #8
-; CHECK-GI-NEXT: lsl w11, w11, #8
-; CHECK-GI-NEXT: lsl w15, w15, #8
; CHECK-GI-NEXT: lsl w10, w10, #8
-; CHECK-GI-NEXT: mov v12.h[4], w8
-; CHECK-GI-NEXT: ldr w8, [sp, #1008]
-; CHECK-GI-NEXT: fmov s14, w9
-; CHECK-GI-NEXT: sbfx w11, w11, #8, #8
-; CHECK-GI-NEXT: mov v13.h[2], w16
-; CHECK-GI-NEXT: ldr w16, [sp, #952]
; CHECK-GI-NEXT: lsl w14, w14, #8
-; CHECK-GI-NEXT: sbfx w15, w15, #8, #8
-; CHECK-GI-NEXT: lsl w17, w8, #8
-; CHECK-GI-NEXT: smov w8, v29.h[3]
-; CHECK-GI-NEXT: smov w9, v29.h[7]
-; CHECK-GI-NEXT: fmov s29, w11
-; CHECK-GI-NEXT: sbfx w14, w14, #8, #8
-; CHECK-GI-NEXT: mov v14.h[1], w15
-; CHECK-GI-NEXT: sbfx w15, w17, #8, #8
-; CHECK-GI-NEXT: ldr w11, [sp, #904]
-; CHECK-GI-NEXT: lsl w16, w16, #8
-; CHECK-GI-NEXT: mov v12.h[5], w13
-; CHECK-GI-NEXT: mov v13.h[3], w14
-; CHECK-GI-NEXT: mov v29.h[1], w15
-; CHECK-GI-NEXT: ldr w15, [sp, #960]
-; CHECK-GI-NEXT: lsl w11, w11, #8
-; CHECK-GI-NEXT: sbfx w16, w16, #8, #8
-; CHECK-GI-NEXT: ldr w14, [sp, #1016]
-; CHECK-GI-NEXT: lsl w15, w15, #8
-; CHECK-GI-NEXT: ldr w13, [sp, #1024]
+; CHECK-GI-NEXT: ldr w11, [sp, #864]
+; CHECK-GI-NEXT: sbfx w15, w8, #8, #8
+; CHECK-GI-NEXT: ldr w9, [sp, #872]
; CHECK-GI-NEXT: sbfx w10, w10, #8, #8
+; CHECK-GI-NEXT: ldr w8, [sp, #880]
+; CHECK-GI-NEXT: mov v21.s[2], wzr
+; CHECK-GI-NEXT: mov v13.h[2], w12
+; CHECK-GI-NEXT: ldr w12, [sp, #960]
+; CHECK-GI-NEXT: mov v11.h[5], w13
+; CHECK-GI-NEXT: sbfx w13, w14, #8, #8
+; CHECK-GI-NEXT: fmov s14, w10
+; CHECK-GI-NEXT: ldr w10, [sp, #1016]
+; CHECK-GI-NEXT: lsl w12, w12, #8
+; CHECK-GI-NEXT: mov v12.h[4], w15
+; CHECK-GI-NEXT: lsl w11, w11, #8
+; CHECK-GI-NEXT: lsl w10, w10, #8
+; CHECK-GI-NEXT: ldr w14, [sp, #920]
+; CHECK-GI-NEXT: lsl w9, w9, #8
+; CHECK-GI-NEXT: mov v13.h[3], w13
+; CHECK-GI-NEXT: ldr w13, [sp, #968]
+; CHECK-GI-NEXT: sbfx w12, w12, #8, #8
; CHECK-GI-NEXT: sbfx w11, w11, #8, #8
-; CHECK-GI-NEXT: mov v14.h[2], w16
+; CHECK-GI-NEXT: sbfx w10, w10, #8, #8
; CHECK-GI-NEXT: lsl w14, w14, #8
-; CHECK-GI-NEXT: sbfx w15, w15, #8, #8
-; CHECK-GI-NEXT: ldr w16, [sp, #912]
; CHECK-GI-NEXT: lsl w13, w13, #8
-; CHECK-GI-NEXT: mov v13.h[4], w11
-; CHECK-GI-NEXT: ldr w11, [sp, #968]
-; CHECK-GI-NEXT: sbfx w14, w14, #8, #8
-; CHECK-GI-NEXT: mov v12.h[6], w12
-; CHECK-GI-NEXT: ldr w12, [sp, #976]
+; CHECK-GI-NEXT: mov v14.h[1], w12
+; CHECK-GI-NEXT: ldr w12, [sp, #1024]
+; CHECK-GI-NEXT: mov v12.h[5], w11
+; CHECK-GI-NEXT: ldr w11, [sp, #976]
+; CHECK-GI-NEXT: fmov s15, w10
; CHECK-GI-NEXT: sbfx w13, w13, #8, #8
-; CHECK-GI-NEXT: mov v14.h[3], w15
-; CHECK-GI-NEXT: lsl w11, w11, #8
-; CHECK-GI-NEXT: mov v29.h[2], w14
-; CHECK-GI-NEXT: ldr w15, [sp, #1032]
-; CHECK-GI-NEXT: lsl w16, w16, #8
; CHECK-GI-NEXT: lsl w12, w12, #8
-; CHECK-GI-NEXT: sbfx w11, w11, #8, #8
-; CHECK-GI-NEXT: ldr w14, [sp, #920]
-; CHECK-GI-NEXT: mov v26.s[3], w8
-; CHECK-GI-NEXT: sbfx w16, w16, #8, #8
-; CHECK-GI-NEXT: lsl w15, w15, #8
-; CHECK-GI-NEXT: sbfx w12, w12, #8, #8
-; CHECK-GI-NEXT: mov v14.h[4], w11
-; CHECK-GI-NEXT: mov v29.h[3], w13
-; CHECK-GI-NEXT: ldr w11, [sp, #984]
-; CHECK-GI-NEXT: lsl w14, w14, #8
-; CHECK-GI-NEXT: sbfx w15, w15, #8, #8
-; CHECK-GI-NEXT: mov v13.h[5], w16
-; CHECK-GI-NEXT: ldr w16, [sp, #1040]
-; CHECK-GI-NEXT: lsl w11, w11, #8
-; CHECK-GI-NEXT: ldr w13, [sp, #928]
; CHECK-GI-NEXT: sbfx w14, w14, #8, #8
-; CHECK-GI-NEXT: mov v12.h[7], w10
-; CHECK-GI-NEXT: mov v27.s[3], w9
-; CHECK-GI-NEXT: mov v14.h[5], w12
-; CHECK-GI-NEXT: mov v29.h[4], w15
-; CHECK-GI-NEXT: lsl w16, w16, #8
-; CHECK-GI-NEXT: sbfx w10, w11, #8, #8
+; CHECK-GI-NEXT: lsl w11, w11, #8
+; CHECK-GI-NEXT: ldr w10, [sp, #984]
+; CHECK-GI-NEXT: sbfx w9, w9, #8, #8
+; CHECK-GI-NEXT: mov v14.h[2], w13
+; CHECK-GI-NEXT: ldr w13, [sp, #1032]
+; CHECK-GI-NEXT: sbfx w12, w12, #8, #8
+; CHECK-GI-NEXT: mov v13.h[4], w14
+; CHECK-GI-NEXT: ldr w14, [sp, #928]
+; CHECK-GI-NEXT: sbfx w11, w11, #8, #8
; CHECK-GI-NEXT: lsl w13, w13, #8
-; CHECK-GI-NEXT: mov v13.h[6], w14
-; CHECK-GI-NEXT: ldr w12, [sp, #1048]
-; CHECK-GI-NEXT: sbfx w14, w16, #8, #8
-; CHECK-GI-NEXT: ldr w11, [sp, #728]
+; CHECK-GI-NEXT: mov v15.h[1], w12
+; CHECK-GI-NEXT: lsl w10, w10, #8
+; CHECK-GI-NEXT: lsl w14, w14, #8
+; CHECK-GI-NEXT: ldr w12, [sp, #936]
+; CHECK-GI-NEXT: mov v12.h[6], w9
+; CHECK-GI-NEXT: mov v14.h[3], w11
+; CHECK-GI-NEXT: ldr w11, [sp, #1040]
; CHECK-GI-NEXT: sbfx w13, w13, #8, #8
-; CHECK-GI-NEXT: mul v15.8h, v8.8h, v12.8h
-; CHECK-GI-NEXT: smov w16, v31.h[6]
-; CHECK-GI-NEXT: mov v14.h[6], w10
-; CHECK-GI-NEXT: ldr w10, [sp, #992]
-; CHECK-GI-NEXT: mov v29.h[5], w14
+; CHECK-GI-NEXT: sbfx w14, w14, #8, #8
+; CHECK-GI-NEXT: sbfx w10, w10, #8, #8
; CHECK-GI-NEXT: lsl w12, w12, #8
+; CHECK-GI-NEXT: mov v15.h[2], w13
; CHECK-GI-NEXT: lsl w11, w11, #8
-; CHECK-GI-NEXT: mov v13.h[7], w13
-; CHECK-GI-NEXT: lsl w10, w10, #8
-; CHECK-GI-NEXT: ldr w13, [sp, #792]
-; CHECK-GI-NEXT: ldr w14, [sp, #1056]
+; CHECK-GI-NEXT: ldr w13, [sp, #992]
+; CHECK-GI-NEXT: mov v13.h[5], w14
; CHECK-GI-NEXT: sbfx w12, w12, #8, #8
+; CHECK-GI-NEXT: ldr w14, [sp, #944]
+; CHECK-GI-NEXT: mov v14.h[4], w10
+; CHECK-GI-NEXT: ldr w10, [sp, #1048]
; CHECK-GI-NEXT: sbfx w11, w11, #8, #8
-; CHECK-GI-NEXT: mov v30.s[2], w16
+; CHECK-GI-NEXT: lsl w13, w13, #8
+; CHECK-GI-NEXT: lsl w8, w8, #8
+; CHECK-GI-NEXT: ldr w9, [sp, #1000]
+; CHECK-GI-NEXT: lsl w10, w10, #8
+; CHECK-GI-NEXT: mov v15.h[3], w11
+; CHECK-GI-NEXT: lsl w14, w14, #8
+; CHECK-GI-NEXT: sbfx w13, w13, #8, #8
+; CHECK-GI-NEXT: mov v13.h[6], w12
+; CHECK-GI-NEXT: ldr w12, [sp, #1056]
; CHECK-GI-NEXT: sbfx w10, w10, #8, #8
-; CHECK-GI-NEXT: smov w8, v15.h[1]
-; CHECK-GI-NEXT: smov w9, v15.h[5]
-; CHECK-GI-NEXT: mov v29.h[6], w12
-; CHECK-GI-NEXT: lsl w12, w13, #8
-; CHECK-GI-NEXT: lsl w13, w14, #8
-; CHECK-GI-NEXT: mov v10.h[7], w11
+; CHECK-GI-NEXT: sbfx w8, w8, #8, #8
+; CHECK-GI-NEXT: lsl w9, w9, #8
+; CHECK-GI-NEXT: mov v14.h[5], w13
+; CHECK-GI-NEXT: smov w11, v29.h[3]
+; CHECK-GI-NEXT: smov w13, v29.h[7]
+; CHECK-GI-NEXT: mov v15.h[4], w10
+; CHECK-GI-NEXT: lsl w10, w12, #8
+; CHECK-GI-NEXT: mov v12.h[7], w8
+; CHECK-GI-NEXT: sbfx w12, w14, #8, #8
+; CHECK-GI-NEXT: ldr w8, [sp, #800]
+; CHECK-GI-NEXT: ldr w14, [sp, #1064]
+; CHECK-GI-NEXT: sbfx w9, w9, #8, #8
+; CHECK-GI-NEXT: sbfx w10, w10, #8, #8
+; CHECK-GI-NEXT: mov v16.s[2], wzr
+; CHECK-GI-NEXT: mov v13.h[7], w12
+; CHECK-GI-NEXT: ldr w12, [sp, #1008]
+; CHECK-GI-NEXT: lsl w8, w8, #8
+; CHECK-GI-NEXT: mov v14.h[6], w9
+; CHECK-GI-NEXT: mov v15.h[5], w10
+; CHECK-GI-NEXT: lsl w9, w14, #8
+; CHECK-GI-NEXT: mul v0.8h, v8.8h, v12.8h
+; CHECK-GI-NEXT: lsl w10, w12, #8
+; CHECK-GI-NEXT: sbfx w8, w8, #8, #8
+; CHECK-GI-NEXT: ldr w12, [sp, #808]
+; CHECK-GI-NEXT: sbfx w9, w9, #8, #8
+; CHECK-GI-NEXT: ldr w14, [sp, #1072]
+; CHECK-GI-NEXT: sbfx w10, w10, #8, #8
+; CHECK-GI-NEXT: mov v11.h[6], w8
+; CHECK-GI-NEXT: smov w8, v30.h[3]
+; CHECK-GI-NEXT: mov v15.h[6], w9
+; CHECK-GI-NEXT: lsl w9, w12, #8
+; CHECK-GI-NEXT: lsl w12, w14, #8
; CHECK-GI-NEXT: mov v14.h[7], w10
+; CHECK-GI-NEXT: smov w10, v0.h[0]
; CHECK-GI-NEXT: mul v12.8h, v9.8h, v13.8h
+; CHECK-GI-NEXT: sbfx w9, w9, #8, #8
+; CHECK-GI-NEXT: mov v26.s[3], w11
+; CHECK-GI-NEXT: smov w11, v0.h[1]
; CHECK-GI-NEXT: sbfx w12, w12, #8, #8
-; CHECK-GI-NEXT: sbfx w13, w13, #8, #8
-; CHECK-GI-NEXT: smov w10, v15.h[0]
-; CHECK-GI-NEXT: smov w11, v15.h[4]
-; CHECK-GI-NEXT: smov w14, v31.h[7]
-; CHECK-GI-NEXT: smov w15, v31.h[3]
-; CHECK-GI-NEXT: mov v11.h[7], w12
-; CHECK-GI-NEXT: mov v29.h[7], w13
-; CHECK-GI-NEXT: mov v6.s[1], wzr
+; CHECK-GI-NEXT: mov v28.s[3], w8
+; CHECK-GI-NEXT: smov w8, v0.h[5]
+; CHECK-GI-NEXT: mov v11.h[7], w9
+; CHECK-GI-NEXT: smov w9, v0.h[4]
+; CHECK-GI-NEXT: smov w14, v30.h[7]
+; CHECK-GI-NEXT: fmov s29, w10
+; CHECK-GI-NEXT: mov v15.h[7], w12
; CHECK-GI-NEXT: mul v13.8h, v10.8h, v14.8h
; CHECK-GI-NEXT: smov w12, v12.h[0]
+; CHECK-GI-NEXT: mov v27.s[3], w13
+; CHECK-GI-NEXT: smov w10, v0.h[2]
; CHECK-GI-NEXT: smov w13, v12.h[1]
-; CHECK-GI-NEXT: mov v7.s[1], wzr
-; CHECK-GI-NEXT: mov v2.s[1], wzr
-; CHECK-GI-NEXT: mov v4.s[1], wzr
-; CHECK-GI-NEXT: fmov s31, w11
-; CHECK-GI-NEXT: mov v30.s[3], w14
+; CHECK-GI-NEXT: mov v18.s[2], wzr
+; CHECK-GI-NEXT: mov v17.s[2], wzr
+; CHECK-GI-NEXT: mov v29.s[1], w11
; CHECK-GI-NEXT: smov w11, v12.h[4]
-; CHECK-GI-NEXT: mul v14.8h, v11.8h, v29.8h
-; CHECK-GI-NEXT: fmov s29, w10
-; CHECK-GI-NEXT: smov w10, v15.h[2]
-; CHECK-GI-NEXT: smov w14, v13.h[0]
+; CHECK-GI-NEXT: fmov s30, w9
+; CHECK-GI-NEXT: smov w9, v0.h[6]
+; CHECK-GI-NEXT: mov v31.s[3], w14
+; CHECK-GI-NEXT: smov w14, v12.h[5]
; CHECK-GI-NEXT: fmov s8, w12
-; CHECK-GI-NEXT: smov w16, v13.h[1]
-; CHECK-GI-NEXT: mov v31.s[1], w9
-; CHECK-GI-NEXT: smov w9, v12.h[2]
-; CHECK-GI-NEXT: mov v28.s[3], w15
-; CHECK-GI-NEXT: mov v29.s[1], w8
-; CHECK-GI-NEXT: smov w8, v15.h[6]
-; CHECK-GI-NEXT: smov w15, v12.h[5]
-; CHECK-GI-NEXT: mov v8.s[1], w13
+; CHECK-GI-NEXT: smov w12, v13.h[0]
+; CHECK-GI-NEXT: mul v15.8h, v11.8h, v15.8h
+; CHECK-GI-NEXT: mov v30.s[1], w8
+; CHECK-GI-NEXT: smov w8, v12.h[2]
+; CHECK-GI-NEXT: mov v20.s[2], wzr
; CHECK-GI-NEXT: fmov s9, w11
-; CHECK-GI-NEXT: smov w12, v15.h[3]
-; CHECK-GI-NEXT: fmov s10, w14
-; CHECK-GI-NEXT: smov w14, v13.h[2]
-; CHECK-GI-NEXT: smov w11, v12.h[6]
-; CHECK-GI-NEXT: smov w13, v15.h[7]
-; CHECK-GI-NEXT: mov v3.s[1], wzr
-; CHECK-GI-NEXT: mov v5.s[1], wzr
-; CHECK-GI-NEXT: mov v31.s[2], w8
-; CHECK-GI-NEXT: smov w8, v13.h[4]
+; CHECK-GI-NEXT: smov w11, v13.h[4]
; CHECK-GI-NEXT: mov v29.s[2], w10
-; CHECK-GI-NEXT: mov v10.s[1], w16
-; CHECK-GI-NEXT: smov w16, v14.h[0]
-; CHECK-GI-NEXT: mov v8.s[2], w9
+; CHECK-GI-NEXT: smov w10, v13.h[1]
+; CHECK-GI-NEXT: mov v8.s[1], w13
+; CHECK-GI-NEXT: smov w13, v0.h[3]
+; CHECK-GI-NEXT: fmov s10, w12
+; CHECK-GI-NEXT: smov w12, v12.h[6]
+; CHECK-GI-NEXT: mov v6.s[2], wzr
+; CHECK-GI-NEXT: mov v30.s[2], w9
; CHECK-GI-NEXT: smov w9, v13.h[5]
-; CHECK-GI-NEXT: smov w10, v12.h[3]
-; CHECK-GI-NEXT: mov v9.s[1], w15
-; CHECK-GI-NEXT: smov w15, v13.h[6]
-; CHECK-GI-NEXT: mov v1.s[1], wzr
-; CHECK-GI-NEXT: mov v0.s[1], wzr
-; CHECK-GI-NEXT: fmov s11, w8
-; CHECK-GI-NEXT: smov w8, v14.h[1]
-; CHECK-GI-NEXT: mov v29.s[3], w12
-; CHECK-GI-NEXT: mov v10.s[2], w14
-; CHECK-GI-NEXT: smov w14, v12.h[7]
-; CHECK-GI-NEXT: fmov s12, w16
-; CHECK-GI-NEXT: smov w12, v14.h[4]
-; CHECK-GI-NEXT: mov v8.s[3], w10
-; CHECK-GI-NEXT: ldr w10, [sp, #536]
+; CHECK-GI-NEXT: mov v9.s[1], w14
+; CHECK-GI-NEXT: smov w14, v0.h[7]
+; CHECK-GI-NEXT: fmov s11, w11
+; CHECK-GI-NEXT: smov w11, v13.h[6]
+; CHECK-GI-NEXT: mov v8.s[2], w8
+; CHECK-GI-NEXT: smov w8, v12.h[3]
+; CHECK-GI-NEXT: mov v10.s[1], w10
+; CHECK-GI-NEXT: smov w10, v13.h[2]
+; CHECK-GI-NEXT: mov v29.s[3], w13
+; CHECK-GI-NEXT: smov w13, v15.h[0]
; CHECK-GI-NEXT: mov v11.s[1], w9
-; CHECK-GI-NEXT: ldr w9, [sp, #272]
-; CHECK-GI-NEXT: mov v9.s[2], w11
-; CHECK-GI-NEXT: ldr w11, [sp, #800]
-; CHECK-GI-NEXT: mov v12.s[1], w8
-; CHECK-GI-NEXT: ldr w8, [sp, #1064]
-; CHECK-GI-NEXT: mov v31.s[3], w13
-; CHECK-GI-NEXT: smov w13, v14.h[5]
+; CHECK-GI-NEXT: smov w9, v12.h[7]
+; CHECK-GI-NEXT: mov v9.s[2], w12
+; CHECK-GI-NEXT: mov v30.s[3], w14
+; CHECK-GI-NEXT: smov w14, v15.h[4]
+; CHECK-GI-NEXT: smov w12, v15.h[1]
+; CHECK-GI-NEXT: mov v8.s[3], w8
+; CHECK-GI-NEXT: smov w8, v13.h[3]
+; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT: mov v10.s[2], w10
+; CHECK-GI-NEXT: smov w10, v15.h[5]
+; CHECK-GI-NEXT: fmov s12, w13
+; CHECK-GI-NEXT: mov v11.s[2], w11
+; CHECK-GI-NEXT: smov w11, v13.h[7]
+; CHECK-GI-NEXT: mov v9.s[3], w9
+; CHECK-GI-NEXT: fmov s13, w14
+; CHECK-GI-NEXT: ldr w9, [sp, #288]
+; CHECK-GI-NEXT: mov v0.s[1], wzr
+; CHECK-GI-NEXT: mov v12.s[1], w12
+; CHECK-GI-NEXT: ldr w12, [sp, #552]
+; CHECK-GI-NEXT: smov w13, v15.h[2]
+; CHECK-GI-NEXT: mov v10.s[3], w8
+; CHECK-GI-NEXT: smov w8, v15.h[6]
; CHECK-GI-NEXT: sxtb w9, w9
-; CHECK-GI-NEXT: sxtb w10, w10
-; CHECK-GI-NEXT: sxtb w11, w11
-; CHECK-GI-NEXT: sxtb w8, w8
-; CHECK-GI-NEXT: mov v11.s[2], w15
-; CHECK-GI-NEXT: smov w15, v13.h[3]
-; CHECK-GI-NEXT: smov w16, v13.h[7]
-; CHECK-GI-NEXT: fmov s13, w12
-; CHECK-GI-NEXT: mul w9, w9, w10
-; CHECK-GI-NEXT: smov w12, v14.h[2]
-; CHECK-GI-NEXT: mul w8, w11, w8
-; CHECK-GI-NEXT: mov v19.s[2], wzr
-; CHECK-GI-NEXT: mov v21.s[2], wzr
-; CHECK-GI-NEXT: mov v16.s[2], wzr
-; CHECK-GI-NEXT: mov v18.s[2], wzr
-; CHECK-GI-NEXT: mov v17.s[2], wzr
-; CHECK-GI-NEXT: mov v13.s[1], w13
-; CHECK-GI-NEXT: smov w13, v14.h[6]
-; CHECK-GI-NEXT: sxth w9, w9
-; CHECK-GI-NEXT: sxth w10, w8
-; CHECK-GI-NEXT: mov v20.s[2], wzr
-; CHECK-GI-NEXT: mov v6.s[2], wzr
+; CHECK-GI-NEXT: mov v13.s[1], w10
+; CHECK-GI-NEXT: ldr w10, [sp, #816]
+; CHECK-GI-NEXT: mov v11.s[3], w11
+; CHECK-GI-NEXT: ldr w11, [sp, #1080]
+; CHECK-GI-NEXT: sxtb w12, w12
; CHECK-GI-NEXT: mov v7.s[2], wzr
+; CHECK-GI-NEXT: sxtb w10, w10
; CHECK-GI-NEXT: mov v2.s[2], wzr
; CHECK-GI-NEXT: mov v4.s[2], wzr
+; CHECK-GI-NEXT: sxtb w11, w11
; CHECK-GI-NEXT: mov v3.s[2], wzr
; CHECK-GI-NEXT: mov v5.s[2], wzr
-; CHECK-GI-NEXT: add v22.4s, v22.4s, v23.4s
-; CHECK-GI-NEXT: add v25.4s, v24.4s, v25.4s
-; CHECK-GI-NEXT: fmov s23, w9
-; CHECK-GI-NEXT: fmov s24, w10
-; CHECK-GI-NEXT: mov v12.s[2], w12
-; CHECK-GI-NEXT: mov v13.s[2], w13
-; CHECK-GI-NEXT: smov w8, v14.h[3]
-; CHECK-GI-NEXT: smov w9, v14.h[7]
-; CHECK-GI-NEXT: mov v1.s[2], wzr
+; CHECK-GI-NEXT: mov v13.s[2], w8
+; CHECK-GI-NEXT: mul w8, w9, w12
; CHECK-GI-NEXT: mov v0.s[2], wzr
+; CHECK-GI-NEXT: mul w9, w10, w11
+; CHECK-GI-NEXT: add v14.4s, v22.4s, v23.4s
+; CHECK-GI-NEXT: mov v12.s[2], w13
+; CHECK-GI-NEXT: mov v1.s[2], wzr
+; CHECK-GI-NEXT: add v24.4s, v24.4s, v25.4s
+; CHECK-GI-NEXT: add v25.4s, v26.4s, v27.4s
+; CHECK-GI-NEXT: fmov s22, w8
+; CHECK-GI-NEXT: smov w8, v15.h[3]
; CHECK-GI-NEXT: mov v19.s[3], wzr
+; CHECK-GI-NEXT: fmov s23, w9
+; CHECK-GI-NEXT: smov w9, v15.h[7]
; CHECK-GI-NEXT: mov v21.s[3], wzr
; CHECK-GI-NEXT: mov v16.s[3], wzr
; CHECK-GI-NEXT: mov v18.s[3], wzr
@@ -6560,61 +6562,59 @@ define i32 @test_sdot_v33i8_double(<33 x i8> %a, <33 x i8> %b, <33 x i8> %c, <33
; CHECK-GI-NEXT: mov v20.s[3], wzr
; CHECK-GI-NEXT: mov v6.s[3], wzr
; CHECK-GI-NEXT: mov v7.s[3], wzr
+; CHECK-GI-NEXT: mov v27.16b, v0.16b
; CHECK-GI-NEXT: mov v2.s[3], wzr
; CHECK-GI-NEXT: mov v4.s[3], wzr
; CHECK-GI-NEXT: mov v3.s[3], wzr
; CHECK-GI-NEXT: mov v5.s[3], wzr
+; CHECK-GI-NEXT: mov v22.s[1], wzr
; CHECK-GI-NEXT: mov v23.s[1], wzr
-; CHECK-GI-NEXT: mov v24.s[1], wzr
-; CHECK-GI-NEXT: mov v9.s[3], w14
-; CHECK-GI-NEXT: mov v10.s[3], w15
-; CHECK-GI-NEXT: mov v11.s[3], w16
; CHECK-GI-NEXT: mov v1.s[3], wzr
; CHECK-GI-NEXT: mov v12.s[3], w8
; CHECK-GI-NEXT: mov v13.s[3], w9
-; CHECK-GI-NEXT: mov v0.s[3], wzr
-; CHECK-GI-NEXT: add v19.4s, v19.4s, v21.4s
+; CHECK-GI-NEXT: mov v27.s[3], wzr
+; CHECK-GI-NEXT: add v0.4s, v19.4s, v21.4s
; CHECK-GI-NEXT: add v16.4s, v16.4s, v18.4s
; CHECK-GI-NEXT: add v17.4s, v17.4s, v20.4s
; CHECK-GI-NEXT: add v6.4s, v6.4s, v7.4s
; CHECK-GI-NEXT: add v2.4s, v2.4s, v4.4s
; CHECK-GI-NEXT: add v3.4s, v3.4s, v5.4s
+; CHECK-GI-NEXT: mov v22.s[2], wzr
; CHECK-GI-NEXT: mov v23.s[2], wzr
-; CHECK-GI-NEXT: mov v24.s[2], wzr
-; CHECK-GI-NEXT: add v26.4s, v26.4s, v27.4s
-; CHECK-GI-NEXT: add v27.4s, v28.4s, v30.4s
-; CHECK-GI-NEXT: add v1.4s, v1.4s, v19.4s
-; CHECK-GI-NEXT: add v4.4s, v16.4s, v17.4s
-; CHECK-GI-NEXT: add v5.4s, v29.4s, v31.4s
-; CHECK-GI-NEXT: add v7.4s, v8.4s, v9.4s
-; CHECK-GI-NEXT: add v16.4s, v10.4s, v11.4s
-; CHECK-GI-NEXT: add v17.4s, v12.4s, v13.4s
-; CHECK-GI-NEXT: add v0.4s, v0.4s, v6.4s
+; CHECK-GI-NEXT: add v26.4s, v28.4s, v31.4s
+; CHECK-GI-NEXT: add v0.4s, v1.4s, v0.4s
+; CHECK-GI-NEXT: add v1.4s, v16.4s, v17.4s
+; CHECK-GI-NEXT: add v4.4s, v29.4s, v30.4s
+; CHECK-GI-NEXT: add v5.4s, v8.4s, v9.4s
+; CHECK-GI-NEXT: add v7.4s, v10.4s, v11.4s
+; CHECK-GI-NEXT: add v16.4s, v12.4s, v13.4s
+; CHECK-GI-NEXT: add v6.4s, v27.4s, v6.4s
; CHECK-GI-NEXT: add v2.4s, v2.4s, v3.4s
+; CHECK-GI-NEXT: mov v22.s[3], wzr
+; CHECK-GI-NEXT: add v3.4s, v14.4s, v24.4s
; CHECK-GI-NEXT: mov v23.s[3], wzr
-; CHECK-GI-NEXT: mov v24.s[3], wzr
-; CHECK-GI-NEXT: add v3.4s, v22.4s, v25.4s
-; CHECK-GI-NEXT: add v6.4s, v26.4s, v27.4s
+; CHECK-GI-NEXT: add v17.4s, v25.4s, v26.4s
+; CHECK-GI-NEXT: add v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT: add v1.4s, v4.4s, v5.4s
+; CHECK-GI-NEXT: add v4.4s, v7.4s, v16.4s
+; CHECK-GI-NEXT: ldr x29, [sp, #80] // 8-byte Folded Reload
+; CHECK-GI-NEXT: add v2.4s, v6.4s, v2.4s
+; CHECK-GI-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
+; CHECK-GI-NEXT: add v3.4s, v3.4s, v17.4s
+; CHECK-GI-NEXT: add v0.4s, v22.4s, v0.4s
; CHECK-GI-NEXT: add v1.4s, v1.4s, v4.4s
-; CHECK-GI-NEXT: add v4.4s, v5.4s, v7.4s
-; CHECK-GI-NEXT: add v5.4s, v16.4s, v17.4s
-; CHECK-GI-NEXT: add v0.4s, v0.4s, v2.4s
-; CHECK-GI-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload
-; CHECK-GI-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
-; CHECK-GI-NEXT: add v2.4s, v3.4s, v6.4s
-; CHECK-GI-NEXT: add v1.4s, v23.4s, v1.4s
-; CHECK-GI-NEXT: add v3.4s, v4.4s, v5.4s
-; CHECK-GI-NEXT: add v0.4s, v24.4s, v0.4s
-; CHECK-GI-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
-; CHECK-GI-NEXT: add v1.4s, v2.4s, v1.4s
-; CHECK-GI-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; CHECK-GI-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
+; CHECK-GI-NEXT: add v2.4s, v23.4s, v2.4s
+; CHECK-GI-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
; CHECK-GI-NEXT: add v0.4s, v3.4s, v0.4s
-; CHECK-GI-NEXT: addv s1, v1.4s
+; CHECK-GI-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
+; CHECK-GI-NEXT: add v1.4s, v1.4s, v2.4s
; CHECK-GI-NEXT: addv s0, v0.4s
-; CHECK-GI-NEXT: fmov w8, s1
-; CHECK-GI-NEXT: fmov w9, s0
+; CHECK-GI-NEXT: addv s1, v1.4s
+; CHECK-GI-NEXT: fmov w8, s0
+; CHECK-GI-NEXT: fmov w9, s1
; CHECK-GI-NEXT: add w0, w8, w9
-; CHECK-GI-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
+; CHECK-GI-NEXT: add sp, sp, #96
; CHECK-GI-NEXT: ret
entry:
%az = sext <33 x i8> %a to <33 x i32>
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/div-by-constant.ll b/llvm/test/CodeGen/RISCV/GlobalISel/div-by-constant.ll
index 225ceed9627b7..cf22908e5a8a0 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/div-by-constant.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/div-by-constant.ll
@@ -594,8 +594,7 @@ define i8 @sdiv8_constant_no_srai(i8 %a) nounwind {
; RV32IM-NEXT: slli a0, a0, 24
; RV32IM-NEXT: srai a0, a0, 24
; RV32IM-NEXT: mul a0, a0, a1
-; RV32IM-NEXT: slli a0, a0, 16
-; RV32IM-NEXT: srai a0, a0, 24
+; RV32IM-NEXT: srai a0, a0, 8
; RV32IM-NEXT: zext.b a1, a0
; RV32IM-NEXT: srli a1, a1, 7
; RV32IM-NEXT: add a0, a0, a1
@@ -606,7 +605,6 @@ define i8 @sdiv8_constant_no_srai(i8 %a) nounwind {
; RV32IMZB-NEXT: li a1, 86
; RV32IMZB-NEXT: sext.b a0, a0
; RV32IMZB-NEXT: mul a0, a0, a1
-; RV32IMZB-NEXT: sext.h a0, a0
; RV32IMZB-NEXT: srai a0, a0, 8
; RV32IMZB-NEXT: zext.b a1, a0
; RV32IMZB-NEXT: srli a1, a1, 7
@@ -619,8 +617,7 @@ define i8 @sdiv8_constant_no_srai(i8 %a) nounwind {
; RV64IM-NEXT: slli a0, a0, 56
; RV64IM-NEXT: srai a0, a0, 56
; RV64IM-NEXT: mul a0, a0, a1
-; RV64IM-NEXT: slli a0, a0, 48
-; RV64IM-NEXT: srai a0, a0, 56
+; RV64IM-NEXT: srai a0, a0, 8
; RV64IM-NEXT: zext.b a1, a0
; RV64IM-NEXT: srli a1, a1, 7
; RV64IM-NEXT: add a0, a0, a1
@@ -631,7 +628,6 @@ define i8 @sdiv8_constant_no_srai(i8 %a) nounwind {
; RV64IMZB-NEXT: li a1, 86
; RV64IMZB-NEXT: sext.b a0, a0
; RV64IMZB-NEXT: mul a0, a0, a1
-; RV64IMZB-NEXT: sext.h a0, a0
; RV64IMZB-NEXT: srai a0, a0, 8
; RV64IMZB-NEXT: zext.b a1, a0
; RV64IMZB-NEXT: srli a1, a1, 7
@@ -648,8 +644,7 @@ define i8 @sdiv8_constant_srai(i8 %a) nounwind {
; RV32IM-NEXT: slli a0, a0, 24
; RV32IM-NEXT: srai a0, a0, 24
; RV32IM-NEXT: mul a0, a0, a1
-; RV32IM-NEXT: slli a0, a0, 16
-; RV32IM-NEXT: srai a0, a0, 25
+; RV32IM-NEXT: srai a0, a0, 9
; RV32IM-NEXT: zext.b a1, a0
; RV32IM-NEXT: srli a1, a1, 7
; RV32IM-NEXT: add a0, a0, a1
@@ -660,7 +655,6 @@ define i8 @sdiv8_constant_srai(i8 %a) nounwind {
; RV32IMZB-NEXT: li a1, 103
; RV32IMZB-NEXT: sext.b a0, a0
; RV32IMZB-NEXT: mul a0, a0, a1
-; RV32IMZB-NEXT: sext.h a0, a0
; RV32IMZB-NEXT: srai a0, a0, 9
; RV32IMZB-NEXT: zext.b a1, a0
; RV32IMZB-NEXT: srli a1, a1, 7
@@ -673,8 +667,7 @@ define i8 @sdiv8_constant_srai(i8 %a) nounwind {
; RV64IM-NEXT: slli a0, a0, 56
; RV64IM-NEXT: srai a0, a0, 56
; RV64IM-NEXT: mul a0, a0, a1
-; RV64IM-NEXT: slli a0, a0, 48
-; RV64IM-NEXT: srai a0, a0, 57
+; RV64IM-NEXT: srai a0, a0, 9
; RV64IM-NEXT: zext.b a1, a0
; RV64IM-NEXT: srli a1, a1, 7
; RV64IM-NEXT: add a0, a0, a1
@@ -685,7 +678,6 @@ define i8 @sdiv8_constant_srai(i8 %a) nounwind {
; RV64IMZB-NEXT: li a1, 103
; RV64IMZB-NEXT: sext.b a0, a0
; RV64IMZB-NEXT: mul a0, a0, a1
-; RV64IMZB-NEXT: sext.h a0, a0
; RV64IMZB-NEXT: srai a0, a0, 9
; RV64IMZB-NEXT: zext.b a1, a0
; RV64IMZB-NEXT: srli a1, a1, 7
@@ -702,8 +694,7 @@ define i8 @sdiv8_constant_add_srai(i8 %a) nounwind {
; RV32IM-NEXT: slli a2, a0, 24
; RV32IM-NEXT: srai a2, a2, 24
; RV32IM-NEXT: mul a1, a2, a1
-; RV32IM-NEXT: slli a1, a1, 16
-; RV32IM-NEXT: srai a1, a1, 24
+; RV32IM-NEXT: srai a1, a1, 8
; RV32IM-NEXT: add a0, a1, a0
; RV32IM-NEXT: slli a0, a0, 24
; RV32IM-NEXT: srai a0, a0, 26
@@ -717,7 +708,6 @@ define i8 @sdiv8_constant_add_srai(i8 %a) nounwind {
; RV32IMZB-NEXT: li a1, -109
; RV32IMZB-NEXT: sext.b a2, a0
; RV32IMZB-NEXT: mul a1, a2, a1
-; RV32IMZB-NEXT: sext.h a1, a1
; RV32IMZB-NEXT: srai a1, a1, 8
; RV32IMZB-NEXT: add a0, a1, a0
; RV32IMZB-NEXT: sext.b a0, a0
@@ -733,8 +723,7 @@ define i8 @sdiv8_constant_add_srai(i8 %a) nounwind {
; RV64IM-NEXT: slli a2, a0, 56
; RV64IM-NEXT: srai a2, a2, 56
; RV64IM-NEXT: mul a1, a2, a1
-; RV64IM-NEXT: slli a1, a1, 48
-; RV64IM-NEXT: srai a1, a1, 56
+; RV64IM-NEXT: srai a1, a1, 8
; RV64IM-NEXT: add a0, a1, a0
; RV64IM-NEXT: slli a0, a0, 56
; RV64IM-NEXT: srai a0, a0, 58
@@ -748,7 +737,6 @@ define i8 @sdiv8_constant_add_srai(i8 %a) nounwind {
; RV64IMZB-NEXT: li a1, -109
; RV64IMZB-NEXT: sext.b a2, a0
; RV64IMZB-NEXT: mul a1, a2, a1
-; RV64IMZB-NEXT: sext.h a1, a1
; RV64IMZB-NEXT: srai a1, a1, 8
; RV64IMZB-NEXT: add a0, a1, a0
; RV64IMZB-NEXT: sext.b a0, a0
@@ -768,8 +756,7 @@ define i8 @sdiv8_constant_sub_srai(i8 %a) nounwind {
; RV32IM-NEXT: slli a2, a0, 24
; RV32IM-NEXT: srai a2, a2, 24
; RV32IM-NEXT: mul a1, a2, a1
-; RV32IM-NEXT: slli a1, a1, 16
-; RV32IM-NEXT: srai a1, a1, 24
+; RV32IM-NEXT: srai a1, a1, 8
; RV32IM-NEXT: sub a1, a1, a0
; RV32IM-NEXT: slli a1, a1, 24
; RV32IM-NEXT: srai a0, a1, 26
@@ -783,7 +770,6 @@ define i8 @sdiv8_constant_sub_srai(i8 %a) nounwind {
; RV32IMZB-NEXT: li a1, 109
; RV32IMZB-NEXT: sext.b a2, a0
; RV32IMZB-NEXT: mul a1, a2, a1
-; RV32IMZB-NEXT: sext.h a1, a1
; RV32IMZB-NEXT: srai a1, a1, 8
; RV32IMZB-NEXT: sub a1, a1, a0
; RV32IMZB-NEXT: sext.b a0, a1
@@ -799,8 +785,7 @@ define i8 @sdiv8_constant_sub_srai(i8 %a) nounwind {
; RV64IM-NEXT: slli a2, a0, 56
; RV64IM-NEXT: srai a2, a2, 56
; RV64IM-NEXT: mul a1, a2, a1
-; RV64IM-NEXT: slli a1, a1, 48
-; RV64IM-NEXT: srai a1, a1, 56
+; RV64IM-NEXT: srai a1, a1, 8
; RV64IM-NEXT: sub a1, a1, a0
; RV64IM-NEXT: slli a1, a1, 56
; RV64IM-NEXT: srai a0, a1, 58
@@ -814,7 +799,6 @@ define i8 @sdiv8_constant_sub_srai(i8 %a) nounwind {
; RV64IMZB-NEXT: li a1, 109
; RV64IMZB-NEXT: sext.b a2, a0
; RV64IMZB-NEXT: mul a1, a2, a1
-; RV64IMZB-NEXT: sext.h a1, a1
; RV64IMZB-NEXT: srai a1, a1, 8
; RV64IMZB-NEXT: sub a1, a1, a0
; RV64IMZB-NEXT: sext.b a0, a1
@@ -861,7 +845,7 @@ define i16 @sdiv16_constant_no_srai(i16 %a) nounwind {
; RV64IM-NEXT: addi a1, a1, 1366
; RV64IM-NEXT: srai a0, a0, 48
; RV64IM-NEXT: mul a0, a0, a1
-; RV64IM-NEXT: sraiw a0, a0, 16
+; RV64IM-NEXT: srai a0, a0, 16
; RV64IM-NEXT: slli a1, a0, 48
; RV64IM-NEXT: srli a1, a1, 48
; RV64IM-NEXT: srli a1, a1, 15
@@ -874,7 +858,7 @@ define i16 @sdiv16_constant_no_srai(i16 %a) nounwind {
; RV64IMZB-NEXT: addi a1, a1, 1366
; RV64IMZB-NEXT: sext.h a0, a0
; RV64IMZB-NEXT: mul a0, a0, a1
-; RV64IMZB-NEXT: sraiw a0, a0, 16
+; RV64IMZB-NEXT: srai a0, a0, 16
; RV64IMZB-NEXT: zext.h a1, a0
; RV64IMZB-NEXT: srli a1, a1, 15
; RV64IMZB-NEXT: add a0, a0, a1
@@ -917,7 +901,7 @@ define i16 @sdiv16_constant_srai(i16 %a) nounwind {
; RV64IM-NEXT: addi a1, a1, 1639
; RV64IM-NEXT: srai a0, a0, 48
; RV64IM-NEXT: mul a0, a0, a1
-; RV64IM-NEXT: sraiw a0, a0, 17
+; RV64IM-NEXT: srai a0, a0, 17
; RV64IM-NEXT: slli a1, a0, 48
; RV64IM-NEXT: srli a1, a1, 48
; RV64IM-NEXT: srli a1, a1, 15
@@ -930,7 +914,7 @@ define i16 @sdiv16_constant_srai(i16 %a) nounwind {
; RV64IMZB-NEXT: addi a1, a1, 1639
; RV64IMZB-NEXT: sext.h a0, a0
; RV64IMZB-NEXT: mul a0, a0, a1
-; RV64IMZB-NEXT: sraiw a0, a0, 17
+; RV64IMZB-NEXT: srai a0, a0, 17
; RV64IMZB-NEXT: zext.h a1, a0
; RV64IMZB-NEXT: srli a1, a1, 15
; RV64IMZB-NEXT: add a0, a0, a1
@@ -979,7 +963,7 @@ define i16 @sdiv16_constant_add_srai(i16 %a) nounwind {
; RV64IM-NEXT: addi a1, a1, -1911
; RV64IM-NEXT: srai a2, a2, 48
; RV64IM-NEXT: mul a1, a2, a1
-; RV64IM-NEXT: sraiw a1, a1, 16
+; RV64IM-NEXT: srai a1, a1, 16
; RV64IM-NEXT: add a0, a1, a0
; RV64IM-NEXT: slli a0, a0, 48
; RV64IM-NEXT: srai a0, a0, 51
@@ -995,7 +979,7 @@ define i16 @sdiv16_constant_add_srai(i16 %a) nounwind {
; RV64IMZB-NEXT: addi a1, a1, -1911
; RV64IMZB-NEXT: sext.h a2, a0
; RV64IMZB-NEXT: mul a1, a2, a1
-; RV64IMZB-NEXT: sraiw a1, a1, 16
+; RV64IMZB-NEXT: srai a1, a1, 16
; RV64IMZB-NEXT: add a0, a1, a0
; RV64IMZB-NEXT: sext.h a0, a0
; RV64IMZB-NEXT: srai a0, a0, 3
@@ -1047,7 +1031,7 @@ define i16 @sdiv16_constant_sub_srai(i16 %a) nounwind {
; RV64IM-NEXT: addi a1, a1, 1911
; RV64IM-NEXT: srai a2, a2, 48
; RV64IM-NEXT: mul a1, a2, a1
-; RV64IM-NEXT: sraiw a1, a1, 16
+; RV64IM-NEXT: srai a1, a1, 16
; RV64IM-NEXT: sub a1, a1, a0
; RV64IM-NEXT: slli a1, a1, 48
; RV64IM-NEXT: srai a0, a1, 51
@@ -1063,7 +1047,7 @@ define i16 @sdiv16_constant_sub_srai(i16 %a) nounwind {
; RV64IMZB-NEXT: addi a1, a1, 1911
; RV64IMZB-NEXT: sext.h a2, a0
; RV64IMZB-NEXT: mul a1, a2, a1
-; RV64IMZB-NEXT: sraiw a1, a1, 16
+; RV64IMZB-NEXT: srai a1, a1, 16
; RV64IMZB-NEXT: sub a1, a1, a0
; RV64IMZB-NEXT: sext.h a0, a1
; RV64IMZB-NEXT: srai a0, a0, 3
More information about the llvm-commits
mailing list