[llvm] [AArch64][GlobalISel] Add legalization for vecreduce.fmul (PR #73309)

David Green via llvm-commits llvm-commits at lists.llvm.org
Fri Dec 15 14:15:27 PST 2023


https://github.com/davemgreen updated https://github.com/llvm/llvm-project/pull/73309

>From 3a5186be3168cb77dca30f6e5449295e2792db9c Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Fri, 15 Dec 2023 21:46:57 +0000
Subject: [PATCH] [AArch64][GlobalISel] Lower scalarizing G_UNMERGE_VALUES to
 G_EXTRACT_VECTOR_ELT

This adds post-legalizing lowering of G_UNMERGE_VALUES which take a vector and
produce scalar values for each lane. They are converted to a G_EXTRACT_VECTOR_ELT
for each lane, allowing all the existing tablegen patterns to apply to them.

A couple of tablegen patterns need to be altered to make sure the type of the
constant operand is known, so that the patterns are recognized under global
isel.
---
 llvm/lib/Target/AArch64/AArch64Combine.td     |  11 +-
 llvm/lib/Target/AArch64/AArch64InstrInfo.td   |  12 +-
 .../GISel/AArch64PostLegalizerLowering.cpp    |  22 ++
 llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll  | 176 +++++-----
 llvm/test/CodeGen/AArch64/aarch64-mulv.ll     | 155 ++++-----
 llvm/test/CodeGen/AArch64/aarch64-smull.ll    |  18 +-
 llvm/test/CodeGen/AArch64/fptoi.ll            |  16 +-
 llvm/test/CodeGen/AArch64/reduce-and.ll       | 309 +++++++-----------
 llvm/test/CodeGen/AArch64/reduce-or.ll        | 309 +++++++-----------
 llvm/test/CodeGen/AArch64/reduce-xor.ll       | 309 +++++++-----------
 llvm/test/CodeGen/AArch64/sext.ll             |  42 +--
 .../AArch64/vecreduce-umax-legalization.ll    |  98 +++---
 llvm/test/CodeGen/AArch64/xtn.ll              |  24 +-
 llvm/test/CodeGen/AArch64/zext.ll             |  41 +--
 14 files changed, 623 insertions(+), 919 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index d6c00be80bd9c2..99f256b887821b 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -134,6 +134,14 @@ def shuffle_vector_lowering : GICombineGroup<[dup, rev, ext, zip, uzp, trn,
                                               form_duplane,
                                               shuf_to_ins]>;
 
+// Turn G_UNMERGE_VALUES -> G_EXTRACT_VECTOR_ELT's
+def vector_unmerge_lowering : GICombineRule <
+  (defs root:$root),
+  (match (wip_match_opcode G_UNMERGE_VALUES):$root,
+          [{ return matchScalarizeVectorUnmerge(*${root}, MRI); }]),
+  (apply [{ applyScalarizeVectorUnmerge(*${root}, MRI, B); }])
+>;
+
 def adjust_icmp_imm_matchdata :
   GIDefMatchData<"std::pair<uint64_t, CmpInst::Predicate>">;
 def adjust_icmp_imm : GICombineRule <
@@ -251,7 +259,8 @@ def AArch64PostLegalizerLowering
                         icmp_lowering, build_vector_lowering,
                         lower_vector_fcmp, form_truncstore,
                         vector_sext_inreg_to_shift,
-                        unmerge_ext_to_unmerge, lower_mull]> {
+                        unmerge_ext_to_unmerge, lower_mull,
+                        vector_unmerge_lowering]> {
 }
 
 // Post-legalization combines which are primarily optimizations.
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 44b0337fe78791..5e60bb80e7fec4 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -6480,23 +6480,23 @@ def : Pat<(v2i64 (vector_insert v2i64:$src, (i64 (bitconvert (f64 FPR64:$Sn))),
 // f32 bitcast(vector_extract(v4i32 src, lane)) -> EXTRACT_SUBREG(INSvi32lane(-, 0, src, lane))
 def : Pat<(f32 (bitconvert (i32 (vector_extract v4i32:$src, imm:$Immd)))),
           (EXTRACT_SUBREG (INSvi32lane (IMPLICIT_DEF), 0, V128:$src, imm:$Immd), ssub)>;
-def : Pat<(f32 (bitconvert (i32 (vector_extract v4i32:$src, 0)))),
+def : Pat<(f32 (bitconvert (i32 (vector_extract v4i32:$src, (i64 0))))),
           (EXTRACT_SUBREG V128:$src, ssub)>;
 def : Pat<(f64 (bitconvert (i64 (vector_extract v2i64:$src, imm:$Immd)))),
           (EXTRACT_SUBREG (INSvi64lane (IMPLICIT_DEF), 0, V128:$src, imm:$Immd), dsub)>;
-def : Pat<(f64 (bitconvert (i64 (vector_extract v2i64:$src, 0)))),
+def : Pat<(f64 (bitconvert (i64 (vector_extract v2i64:$src, (i64 0))))),
           (EXTRACT_SUBREG V128:$src, dsub)>;
 
 // Floating point vector extractions are codegen'd as either a sequence of
 // subregister extractions, or a MOV (aka DUP here) if
 // the lane number is anything other than zero.
-def : Pat<(vector_extract (v2f64 V128:$Rn), 0),
+def : Pat<(f64 (vector_extract (v2f64 V128:$Rn), (i64 0))),
           (f64 (EXTRACT_SUBREG V128:$Rn, dsub))>;
-def : Pat<(vector_extract (v4f32 V128:$Rn), 0),
+def : Pat<(f32 (vector_extract (v4f32 V128:$Rn), (i64 0))),
           (f32 (EXTRACT_SUBREG V128:$Rn, ssub))>;
-def : Pat<(vector_extract (v8f16 V128:$Rn), 0),
+def : Pat<(f16 (vector_extract (v8f16 V128:$Rn), (i64 0))),
           (f16 (EXTRACT_SUBREG V128:$Rn, hsub))>;
-def : Pat<(vector_extract (v8bf16 V128:$Rn), 0),
+def : Pat<(bf16 (vector_extract (v8bf16 V128:$Rn), (i64 0))),
           (bf16 (EXTRACT_SUBREG V128:$Rn, hsub))>;
 
 
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
index 687063873a16b2..ef0513cfee5131 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
@@ -769,6 +769,28 @@ void applyDupLane(MachineInstr &MI, MachineRegisterInfo &MRI,
   MI.eraseFromParent();
 }
 
+bool matchScalarizeVectorUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI) {
+  assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
+  Register Src1Reg = MI.getOperand(MI.getNumOperands() - 1).getReg();
+  const LLT SrcTy = MRI.getType(Src1Reg);
+  return SrcTy.isVector() && !SrcTy.isScalable() &&
+         MI.getNumOperands() == (unsigned)SrcTy.getNumElements() + 1;
+}
+
+void applyScalarizeVectorUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
+                                 MachineIRBuilder &B) {
+  assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
+  Register Src1Reg = MI.getOperand(MI.getNumOperands() - 1).getReg();
+  const LLT SrcTy = MRI.getType(Src1Reg);
+  assert((SrcTy.isVector() && !SrcTy.isScalable()) &&
+         "Expected a fixed length vector");
+
+  for (int I = 0; I < SrcTy.getNumElements(); ++I) {
+    B.buildExtractVectorElementConstant(MI.getOperand(I).getReg(), Src1Reg, I);
+  }
+  MI.eraseFromParent();
+}
+
 bool matchBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI) {
   assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
   auto Splat = getAArch64VectorSplat(MI, MRI);
diff --git a/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll b/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll
index 5e477e8947d1b8..194fe5be40c2bd 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll
@@ -516,20 +516,17 @@ define i8 @sminv_v4i8(<4 x i8> %a) {
 ; CHECK-GI-LABEL: sminv_v4i8:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    mov h1, v0.h[1]
-; CHECK-GI-NEXT:    mov h2, v0.h[2]
-; CHECK-GI-NEXT:    fmov w8, s0
-; CHECK-GI-NEXT:    mov h3, v0.h[3]
+; CHECK-GI-NEXT:    umov w8, v0.h[0]
+; CHECK-GI-NEXT:    umov w9, v0.h[1]
+; CHECK-GI-NEXT:    umov w10, v0.h[2]
+; CHECK-GI-NEXT:    umov w12, v0.h[3]
+; CHECK-GI-NEXT:    sxtb w11, w8
+; CHECK-GI-NEXT:    cmp w11, w9, sxtb
+; CHECK-GI-NEXT:    sxtb w11, w10
+; CHECK-GI-NEXT:    csel w8, w8, w9, lt
+; CHECK-GI-NEXT:    cmp w11, w12, sxtb
 ; CHECK-GI-NEXT:    sxtb w9, w8
-; CHECK-GI-NEXT:    fmov w10, s1
-; CHECK-GI-NEXT:    fmov w11, s2
-; CHECK-GI-NEXT:    cmp w9, w10, sxtb
-; CHECK-GI-NEXT:    sxtb w9, w11
-; CHECK-GI-NEXT:    csel w8, w8, w10, lt
-; CHECK-GI-NEXT:    fmov w10, s3
-; CHECK-GI-NEXT:    cmp w9, w10, sxtb
-; CHECK-GI-NEXT:    sxtb w9, w8
-; CHECK-GI-NEXT:    csel w10, w11, w10, lt
+; CHECK-GI-NEXT:    csel w10, w10, w12, lt
 ; CHECK-GI-NEXT:    cmp w9, w10, sxtb
 ; CHECK-GI-NEXT:    csel w0, w8, w10, lt
 ; CHECK-GI-NEXT:    ret
@@ -611,19 +608,16 @@ define i16 @sminv_v3i16(<3 x i16> %a) {
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov h1, v0.h[1]
-; CHECK-GI-NEXT:    mov h2, v0.h[2]
-; CHECK-GI-NEXT:    fmov w8, s0
-; CHECK-GI-NEXT:    fmov w9, s0
-; CHECK-GI-NEXT:    sxth w8, w8
-; CHECK-GI-NEXT:    fmov w10, s1
-; CHECK-GI-NEXT:    fmov w11, s2
+; CHECK-GI-NEXT:    smov w8, v0.h[0]
+; CHECK-GI-NEXT:    umov w9, v0.h[0]
+; CHECK-GI-NEXT:    umov w10, v0.h[1]
+; CHECK-GI-NEXT:    smov w11, v0.h[2]
+; CHECK-GI-NEXT:    umov w13, v0.h[2]
 ; CHECK-GI-NEXT:    fmov w12, s1
-; CHECK-GI-NEXT:    cmp w8, w10, sxth
-; CHECK-GI-NEXT:    sxth w8, w11
-; CHECK-GI-NEXT:    fmov w10, s2
-; CHECK-GI-NEXT:    csel w9, w9, w12, lt
-; CHECK-GI-NEXT:    cmp w8, w9, sxth
-; CHECK-GI-NEXT:    csel w0, w9, w10, gt
+; CHECK-GI-NEXT:    cmp w8, w12, sxth
+; CHECK-GI-NEXT:    csel w8, w9, w10, lt
+; CHECK-GI-NEXT:    cmp w11, w8, sxth
+; CHECK-GI-NEXT:    csel w0, w8, w13, gt
 ; CHECK-GI-NEXT:    ret
 entry:
   %arg1 = call i16 @llvm.vector.reduce.smin.v3i16(<3 x i16> %a)
@@ -887,20 +881,17 @@ define i8 @smaxv_v4i8(<4 x i8> %a) {
 ; CHECK-GI-LABEL: smaxv_v4i8:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    mov h1, v0.h[1]
-; CHECK-GI-NEXT:    mov h2, v0.h[2]
-; CHECK-GI-NEXT:    fmov w8, s0
-; CHECK-GI-NEXT:    mov h3, v0.h[3]
-; CHECK-GI-NEXT:    sxtb w9, w8
-; CHECK-GI-NEXT:    fmov w10, s1
-; CHECK-GI-NEXT:    fmov w11, s2
-; CHECK-GI-NEXT:    cmp w9, w10, sxtb
-; CHECK-GI-NEXT:    sxtb w9, w11
-; CHECK-GI-NEXT:    csel w8, w8, w10, gt
-; CHECK-GI-NEXT:    fmov w10, s3
-; CHECK-GI-NEXT:    cmp w9, w10, sxtb
+; CHECK-GI-NEXT:    umov w8, v0.h[0]
+; CHECK-GI-NEXT:    umov w9, v0.h[1]
+; CHECK-GI-NEXT:    umov w10, v0.h[2]
+; CHECK-GI-NEXT:    umov w12, v0.h[3]
+; CHECK-GI-NEXT:    sxtb w11, w8
+; CHECK-GI-NEXT:    cmp w11, w9, sxtb
+; CHECK-GI-NEXT:    sxtb w11, w10
+; CHECK-GI-NEXT:    csel w8, w8, w9, gt
+; CHECK-GI-NEXT:    cmp w11, w12, sxtb
 ; CHECK-GI-NEXT:    sxtb w9, w8
-; CHECK-GI-NEXT:    csel w10, w11, w10, gt
+; CHECK-GI-NEXT:    csel w10, w10, w12, gt
 ; CHECK-GI-NEXT:    cmp w9, w10, sxtb
 ; CHECK-GI-NEXT:    csel w0, w8, w10, gt
 ; CHECK-GI-NEXT:    ret
@@ -982,19 +973,16 @@ define i16 @smaxv_v3i16(<3 x i16> %a) {
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov h1, v0.h[1]
-; CHECK-GI-NEXT:    mov h2, v0.h[2]
-; CHECK-GI-NEXT:    fmov w8, s0
-; CHECK-GI-NEXT:    fmov w9, s0
-; CHECK-GI-NEXT:    sxth w8, w8
-; CHECK-GI-NEXT:    fmov w10, s1
-; CHECK-GI-NEXT:    fmov w11, s2
+; CHECK-GI-NEXT:    smov w8, v0.h[0]
+; CHECK-GI-NEXT:    umov w9, v0.h[0]
+; CHECK-GI-NEXT:    umov w10, v0.h[1]
+; CHECK-GI-NEXT:    smov w11, v0.h[2]
+; CHECK-GI-NEXT:    umov w13, v0.h[2]
 ; CHECK-GI-NEXT:    fmov w12, s1
-; CHECK-GI-NEXT:    cmp w8, w10, sxth
-; CHECK-GI-NEXT:    sxth w8, w11
-; CHECK-GI-NEXT:    fmov w10, s2
-; CHECK-GI-NEXT:    csel w9, w9, w12, gt
-; CHECK-GI-NEXT:    cmp w8, w9, sxth
-; CHECK-GI-NEXT:    csel w0, w9, w10, lt
+; CHECK-GI-NEXT:    cmp w8, w12, sxth
+; CHECK-GI-NEXT:    csel w8, w9, w10, gt
+; CHECK-GI-NEXT:    cmp w11, w8, sxth
+; CHECK-GI-NEXT:    csel w0, w8, w13, lt
 ; CHECK-GI-NEXT:    ret
 entry:
   %arg1 = call i16 @llvm.vector.reduce.smax.v3i16(<3 x i16> %a)
@@ -1256,19 +1244,16 @@ define i8 @uminv_v4i8(<4 x i8> %a) {
 ; CHECK-GI-LABEL: uminv_v4i8:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    mov h1, v0.h[1]
-; CHECK-GI-NEXT:    mov h2, v0.h[2]
-; CHECK-GI-NEXT:    mov h3, v0.h[3]
-; CHECK-GI-NEXT:    fmov w8, s0
-; CHECK-GI-NEXT:    fmov w10, s1
-; CHECK-GI-NEXT:    fmov w11, s2
-; CHECK-GI-NEXT:    fmov w12, s3
-; CHECK-GI-NEXT:    and w9, w8, #0xff
-; CHECK-GI-NEXT:    cmp w9, w10, uxtb
-; CHECK-GI-NEXT:    and w9, w11, #0xff
-; CHECK-GI-NEXT:    csel w8, w8, w10, lo
-; CHECK-GI-NEXT:    cmp w9, w12, uxtb
-; CHECK-GI-NEXT:    csel w9, w11, w12, lo
+; CHECK-GI-NEXT:    umov w8, v0.h[0]
+; CHECK-GI-NEXT:    umov w9, v0.h[1]
+; CHECK-GI-NEXT:    umov w10, v0.h[2]
+; CHECK-GI-NEXT:    umov w11, v0.h[3]
+; CHECK-GI-NEXT:    and w12, w8, #0xff
+; CHECK-GI-NEXT:    cmp w12, w9, uxtb
+; CHECK-GI-NEXT:    and w12, w10, #0xff
+; CHECK-GI-NEXT:    csel w8, w8, w9, lo
+; CHECK-GI-NEXT:    cmp w12, w11, uxtb
+; CHECK-GI-NEXT:    csel w9, w10, w11, lo
 ; CHECK-GI-NEXT:    and w10, w8, #0xff
 ; CHECK-GI-NEXT:    cmp w10, w9, uxtb
 ; CHECK-GI-NEXT:    csel w0, w8, w9, lo
@@ -1351,19 +1336,16 @@ define i16 @uminv_v3i16(<3 x i16> %a) {
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov h1, v0.h[1]
-; CHECK-GI-NEXT:    mov h2, v0.h[2]
-; CHECK-GI-NEXT:    fmov w8, s0
-; CHECK-GI-NEXT:    fmov w9, s0
-; CHECK-GI-NEXT:    uxth w8, w8
-; CHECK-GI-NEXT:    fmov w10, s1
-; CHECK-GI-NEXT:    fmov w11, s2
+; CHECK-GI-NEXT:    umov w8, v0.h[0]
+; CHECK-GI-NEXT:    umov w9, v0.h[0]
+; CHECK-GI-NEXT:    umov w10, v0.h[1]
+; CHECK-GI-NEXT:    umov w11, v0.h[2]
+; CHECK-GI-NEXT:    umov w13, v0.h[2]
 ; CHECK-GI-NEXT:    fmov w12, s1
-; CHECK-GI-NEXT:    cmp w8, w10, uxth
-; CHECK-GI-NEXT:    uxth w8, w11
-; CHECK-GI-NEXT:    fmov w10, s2
-; CHECK-GI-NEXT:    csel w9, w9, w12, lo
-; CHECK-GI-NEXT:    cmp w8, w9, uxth
-; CHECK-GI-NEXT:    csel w0, w9, w10, hi
+; CHECK-GI-NEXT:    cmp w8, w12, uxth
+; CHECK-GI-NEXT:    csel w8, w9, w10, lo
+; CHECK-GI-NEXT:    cmp w11, w8, uxth
+; CHECK-GI-NEXT:    csel w0, w8, w13, hi
 ; CHECK-GI-NEXT:    ret
 entry:
   %arg1 = call i16 @llvm.vector.reduce.umin.v3i16(<3 x i16> %a)
@@ -1625,19 +1607,16 @@ define i8 @umaxv_v4i8(<4 x i8> %a) {
 ; CHECK-GI-LABEL: umaxv_v4i8:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    mov h1, v0.h[1]
-; CHECK-GI-NEXT:    mov h2, v0.h[2]
-; CHECK-GI-NEXT:    mov h3, v0.h[3]
-; CHECK-GI-NEXT:    fmov w8, s0
-; CHECK-GI-NEXT:    fmov w10, s1
-; CHECK-GI-NEXT:    fmov w11, s2
-; CHECK-GI-NEXT:    fmov w12, s3
-; CHECK-GI-NEXT:    and w9, w8, #0xff
-; CHECK-GI-NEXT:    cmp w9, w10, uxtb
-; CHECK-GI-NEXT:    and w9, w11, #0xff
-; CHECK-GI-NEXT:    csel w8, w8, w10, hi
-; CHECK-GI-NEXT:    cmp w9, w12, uxtb
-; CHECK-GI-NEXT:    csel w9, w11, w12, hi
+; CHECK-GI-NEXT:    umov w8, v0.h[0]
+; CHECK-GI-NEXT:    umov w9, v0.h[1]
+; CHECK-GI-NEXT:    umov w10, v0.h[2]
+; CHECK-GI-NEXT:    umov w11, v0.h[3]
+; CHECK-GI-NEXT:    and w12, w8, #0xff
+; CHECK-GI-NEXT:    cmp w12, w9, uxtb
+; CHECK-GI-NEXT:    and w12, w10, #0xff
+; CHECK-GI-NEXT:    csel w8, w8, w9, hi
+; CHECK-GI-NEXT:    cmp w12, w11, uxtb
+; CHECK-GI-NEXT:    csel w9, w10, w11, hi
 ; CHECK-GI-NEXT:    and w10, w8, #0xff
 ; CHECK-GI-NEXT:    cmp w10, w9, uxtb
 ; CHECK-GI-NEXT:    csel w0, w8, w9, hi
@@ -1719,19 +1698,16 @@ define i16 @umaxv_v3i16(<3 x i16> %a) {
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov h1, v0.h[1]
-; CHECK-GI-NEXT:    mov h2, v0.h[2]
-; CHECK-GI-NEXT:    fmov w8, s0
-; CHECK-GI-NEXT:    fmov w9, s0
-; CHECK-GI-NEXT:    uxth w8, w8
-; CHECK-GI-NEXT:    fmov w10, s1
-; CHECK-GI-NEXT:    fmov w11, s2
+; CHECK-GI-NEXT:    umov w8, v0.h[0]
+; CHECK-GI-NEXT:    umov w9, v0.h[0]
+; CHECK-GI-NEXT:    umov w10, v0.h[1]
+; CHECK-GI-NEXT:    umov w11, v0.h[2]
+; CHECK-GI-NEXT:    umov w13, v0.h[2]
 ; CHECK-GI-NEXT:    fmov w12, s1
-; CHECK-GI-NEXT:    cmp w8, w10, uxth
-; CHECK-GI-NEXT:    uxth w8, w11
-; CHECK-GI-NEXT:    fmov w10, s2
-; CHECK-GI-NEXT:    csel w9, w9, w12, hi
-; CHECK-GI-NEXT:    cmp w8, w9, uxth
-; CHECK-GI-NEXT:    csel w0, w9, w10, lo
+; CHECK-GI-NEXT:    cmp w8, w12, uxth
+; CHECK-GI-NEXT:    csel w8, w9, w10, hi
+; CHECK-GI-NEXT:    cmp w11, w8, uxth
+; CHECK-GI-NEXT:    csel w0, w8, w13, lo
 ; CHECK-GI-NEXT:    ret
 entry:
   %arg1 = call i16 @llvm.vector.reduce.umax.v3i16(<3 x i16> %a)
diff --git a/llvm/test/CodeGen/AArch64/aarch64-mulv.ll b/llvm/test/CodeGen/AArch64/aarch64-mulv.ll
index 90f09379e68fd2..7b7ca9d8ffc2db 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-mulv.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-mulv.ll
@@ -73,13 +73,10 @@ define i8 @mulv_v4i8(<4 x i8> %a) {
 ; CHECK-GI-LABEL: mulv_v4i8:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    mov h1, v0.h[1]
-; CHECK-GI-NEXT:    mov h2, v0.h[2]
-; CHECK-GI-NEXT:    mov h3, v0.h[3]
-; CHECK-GI-NEXT:    fmov w8, s0
-; CHECK-GI-NEXT:    fmov w9, s1
-; CHECK-GI-NEXT:    fmov w10, s2
-; CHECK-GI-NEXT:    fmov w11, s3
+; CHECK-GI-NEXT:    umov w8, v0.h[0]
+; CHECK-GI-NEXT:    umov w9, v0.h[1]
+; CHECK-GI-NEXT:    umov w10, v0.h[2]
+; CHECK-GI-NEXT:    umov w11, v0.h[3]
 ; CHECK-GI-NEXT:    mul w8, w8, w9
 ; CHECK-GI-NEXT:    mul w9, w10, w11
 ; CHECK-GI-NEXT:    mul w0, w8, w9
@@ -113,27 +110,20 @@ define i8 @mulv_v8i8(<8 x i8> %a) {
 ; CHECK-GI-LABEL: mulv_v8i8:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    mov b1, v0.b[1]
-; CHECK-GI-NEXT:    mov b2, v0.b[2]
-; CHECK-GI-NEXT:    mov b3, v0.b[3]
-; CHECK-GI-NEXT:    mov b4, v0.b[4]
-; CHECK-GI-NEXT:    mov b5, v0.b[5]
-; CHECK-GI-NEXT:    fmov w8, s0
-; CHECK-GI-NEXT:    mov b6, v0.b[6]
-; CHECK-GI-NEXT:    mov b7, v0.b[7]
-; CHECK-GI-NEXT:    fmov w9, s1
-; CHECK-GI-NEXT:    fmov w10, s2
-; CHECK-GI-NEXT:    fmov w11, s3
-; CHECK-GI-NEXT:    fmov w12, s5
+; CHECK-GI-NEXT:    umov w8, v0.b[0]
+; CHECK-GI-NEXT:    umov w9, v0.b[1]
+; CHECK-GI-NEXT:    umov w10, v0.b[2]
+; CHECK-GI-NEXT:    umov w11, v0.b[3]
+; CHECK-GI-NEXT:    umov w12, v0.b[4]
+; CHECK-GI-NEXT:    umov w13, v0.b[5]
+; CHECK-GI-NEXT:    umov w14, v0.b[6]
+; CHECK-GI-NEXT:    umov w15, v0.b[7]
 ; CHECK-GI-NEXT:    mul w8, w8, w9
-; CHECK-GI-NEXT:    fmov w9, s4
-; CHECK-GI-NEXT:    mul w10, w10, w11
-; CHECK-GI-NEXT:    fmov w11, s6
-; CHECK-GI-NEXT:    mul w9, w9, w12
-; CHECK-GI-NEXT:    fmov w12, s7
-; CHECK-GI-NEXT:    mul w8, w8, w10
-; CHECK-GI-NEXT:    mul w11, w11, w12
-; CHECK-GI-NEXT:    mul w9, w9, w11
+; CHECK-GI-NEXT:    mul w9, w10, w11
+; CHECK-GI-NEXT:    mul w10, w12, w13
+; CHECK-GI-NEXT:    mul w11, w14, w15
+; CHECK-GI-NEXT:    mul w8, w8, w9
+; CHECK-GI-NEXT:    mul w9, w10, w11
 ; CHECK-GI-NEXT:    mul w0, w8, w9
 ; CHECK-GI-NEXT:    ret
 entry:
@@ -167,27 +157,20 @@ define i8 @mulv_v16i8(<16 x i8> %a) {
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
 ; CHECK-GI-NEXT:    mul v0.8b, v0.8b, v1.8b
-; CHECK-GI-NEXT:    mov b1, v0.b[1]
-; CHECK-GI-NEXT:    mov b2, v0.b[2]
-; CHECK-GI-NEXT:    mov b3, v0.b[3]
-; CHECK-GI-NEXT:    mov b4, v0.b[4]
-; CHECK-GI-NEXT:    mov b5, v0.b[5]
-; CHECK-GI-NEXT:    fmov w8, s0
-; CHECK-GI-NEXT:    mov b6, v0.b[6]
-; CHECK-GI-NEXT:    mov b7, v0.b[7]
-; CHECK-GI-NEXT:    fmov w9, s1
-; CHECK-GI-NEXT:    fmov w10, s2
-; CHECK-GI-NEXT:    fmov w11, s3
-; CHECK-GI-NEXT:    fmov w12, s5
+; CHECK-GI-NEXT:    umov w8, v0.b[0]
+; CHECK-GI-NEXT:    umov w9, v0.b[1]
+; CHECK-GI-NEXT:    umov w10, v0.b[2]
+; CHECK-GI-NEXT:    umov w11, v0.b[3]
+; CHECK-GI-NEXT:    umov w12, v0.b[4]
+; CHECK-GI-NEXT:    umov w13, v0.b[5]
+; CHECK-GI-NEXT:    umov w14, v0.b[6]
+; CHECK-GI-NEXT:    umov w15, v0.b[7]
+; CHECK-GI-NEXT:    mul w8, w8, w9
+; CHECK-GI-NEXT:    mul w9, w10, w11
+; CHECK-GI-NEXT:    mul w10, w12, w13
+; CHECK-GI-NEXT:    mul w11, w14, w15
 ; CHECK-GI-NEXT:    mul w8, w8, w9
-; CHECK-GI-NEXT:    fmov w9, s4
-; CHECK-GI-NEXT:    mul w10, w10, w11
-; CHECK-GI-NEXT:    fmov w11, s6
-; CHECK-GI-NEXT:    mul w9, w9, w12
-; CHECK-GI-NEXT:    fmov w12, s7
-; CHECK-GI-NEXT:    mul w8, w8, w10
-; CHECK-GI-NEXT:    mul w11, w11, w12
-; CHECK-GI-NEXT:    mul w9, w9, w11
+; CHECK-GI-NEXT:    mul w9, w10, w11
 ; CHECK-GI-NEXT:    mul w0, w8, w9
 ; CHECK-GI-NEXT:    ret
 entry:
@@ -225,27 +208,20 @@ define i8 @mulv_v32i8(<32 x i8> %a) {
 ; CHECK-GI-NEXT:    mul v0.8b, v0.8b, v2.8b
 ; CHECK-GI-NEXT:    mul v1.8b, v1.8b, v3.8b
 ; CHECK-GI-NEXT:    mul v0.8b, v0.8b, v1.8b
-; CHECK-GI-NEXT:    mov b1, v0.b[1]
-; CHECK-GI-NEXT:    mov b2, v0.b[2]
-; CHECK-GI-NEXT:    mov b3, v0.b[3]
-; CHECK-GI-NEXT:    mov b4, v0.b[4]
-; CHECK-GI-NEXT:    mov b5, v0.b[5]
-; CHECK-GI-NEXT:    fmov w8, s0
-; CHECK-GI-NEXT:    mov b6, v0.b[6]
-; CHECK-GI-NEXT:    mov b7, v0.b[7]
-; CHECK-GI-NEXT:    fmov w9, s1
-; CHECK-GI-NEXT:    fmov w10, s2
-; CHECK-GI-NEXT:    fmov w11, s3
-; CHECK-GI-NEXT:    fmov w12, s5
+; CHECK-GI-NEXT:    umov w8, v0.b[0]
+; CHECK-GI-NEXT:    umov w9, v0.b[1]
+; CHECK-GI-NEXT:    umov w10, v0.b[2]
+; CHECK-GI-NEXT:    umov w11, v0.b[3]
+; CHECK-GI-NEXT:    umov w12, v0.b[4]
+; CHECK-GI-NEXT:    umov w13, v0.b[5]
+; CHECK-GI-NEXT:    umov w14, v0.b[6]
+; CHECK-GI-NEXT:    umov w15, v0.b[7]
 ; CHECK-GI-NEXT:    mul w8, w8, w9
-; CHECK-GI-NEXT:    fmov w9, s4
-; CHECK-GI-NEXT:    mul w10, w10, w11
-; CHECK-GI-NEXT:    fmov w11, s6
-; CHECK-GI-NEXT:    mul w9, w9, w12
-; CHECK-GI-NEXT:    fmov w12, s7
-; CHECK-GI-NEXT:    mul w8, w8, w10
-; CHECK-GI-NEXT:    mul w11, w11, w12
-; CHECK-GI-NEXT:    mul w9, w9, w11
+; CHECK-GI-NEXT:    mul w9, w10, w11
+; CHECK-GI-NEXT:    mul w10, w12, w13
+; CHECK-GI-NEXT:    mul w11, w14, w15
+; CHECK-GI-NEXT:    mul w8, w8, w9
+; CHECK-GI-NEXT:    mul w9, w10, w11
 ; CHECK-GI-NEXT:    mul w0, w8, w9
 ; CHECK-GI-NEXT:    ret
 entry:
@@ -289,13 +265,11 @@ define i16 @mulv_v3i16(<3 x i16> %a) {
 ; CHECK-GI-LABEL: mulv_v3i16:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    mov h1, v0.h[1]
-; CHECK-GI-NEXT:    mov h2, v0.h[2]
-; CHECK-GI-NEXT:    fmov w8, s0
-; CHECK-GI-NEXT:    fmov w9, s1
+; CHECK-GI-NEXT:    umov w8, v0.h[0]
+; CHECK-GI-NEXT:    umov w9, v0.h[1]
+; CHECK-GI-NEXT:    umov w10, v0.h[2]
 ; CHECK-GI-NEXT:    mul w8, w8, w9
-; CHECK-GI-NEXT:    fmov w9, s2
-; CHECK-GI-NEXT:    mul w0, w8, w9
+; CHECK-GI-NEXT:    mul w0, w8, w10
 ; CHECK-GI-NEXT:    ret
 entry:
   %arg1 = call i16 @llvm.vector.reduce.mul.v3i16(<3 x i16> %a)
@@ -318,13 +292,10 @@ define i16 @mulv_v4i16(<4 x i16> %a) {
 ; CHECK-GI-LABEL: mulv_v4i16:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    mov h1, v0.h[1]
-; CHECK-GI-NEXT:    mov h2, v0.h[2]
-; CHECK-GI-NEXT:    mov h3, v0.h[3]
-; CHECK-GI-NEXT:    fmov w8, s0
-; CHECK-GI-NEXT:    fmov w9, s1
-; CHECK-GI-NEXT:    fmov w10, s2
-; CHECK-GI-NEXT:    fmov w11, s3
+; CHECK-GI-NEXT:    umov w8, v0.h[0]
+; CHECK-GI-NEXT:    umov w9, v0.h[1]
+; CHECK-GI-NEXT:    umov w10, v0.h[2]
+; CHECK-GI-NEXT:    umov w11, v0.h[3]
 ; CHECK-GI-NEXT:    mul w8, w8, w9
 ; CHECK-GI-NEXT:    mul w9, w10, w11
 ; CHECK-GI-NEXT:    mul w0, w8, w9
@@ -352,13 +323,10 @@ define i16 @mulv_v8i16(<8 x i16> %a) {
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
 ; CHECK-GI-NEXT:    mul v0.4h, v0.4h, v1.4h
-; CHECK-GI-NEXT:    mov h1, v0.h[1]
-; CHECK-GI-NEXT:    mov h2, v0.h[2]
-; CHECK-GI-NEXT:    mov h3, v0.h[3]
-; CHECK-GI-NEXT:    fmov w8, s0
-; CHECK-GI-NEXT:    fmov w9, s1
-; CHECK-GI-NEXT:    fmov w10, s2
-; CHECK-GI-NEXT:    fmov w11, s3
+; CHECK-GI-NEXT:    umov w8, v0.h[0]
+; CHECK-GI-NEXT:    umov w9, v0.h[1]
+; CHECK-GI-NEXT:    umov w10, v0.h[2]
+; CHECK-GI-NEXT:    umov w11, v0.h[3]
 ; CHECK-GI-NEXT:    mul w8, w8, w9
 ; CHECK-GI-NEXT:    mul w9, w10, w11
 ; CHECK-GI-NEXT:    mul w0, w8, w9
@@ -390,15 +358,12 @@ define i16 @mulv_v16i16(<16 x i16> %a) {
 ; CHECK-GI-NEXT:    mul v0.4h, v0.4h, v2.4h
 ; CHECK-GI-NEXT:    mul v1.4h, v1.4h, v3.4h
 ; CHECK-GI-NEXT:    mul v0.4h, v0.4h, v1.4h
-; CHECK-GI-NEXT:    mov h1, v0.h[1]
-; CHECK-GI-NEXT:    mov h2, v0.h[2]
-; CHECK-GI-NEXT:    mov h3, v0.h[3]
-; CHECK-GI-NEXT:    fmov w8, s0
-; CHECK-GI-NEXT:    fmov w9, s1
-; CHECK-GI-NEXT:    fmov w10, s2
+; CHECK-GI-NEXT:    umov w8, v0.h[0]
+; CHECK-GI-NEXT:    umov w9, v0.h[1]
+; CHECK-GI-NEXT:    umov w10, v0.h[2]
+; CHECK-GI-NEXT:    umov w11, v0.h[3]
 ; CHECK-GI-NEXT:    mul w8, w8, w9
-; CHECK-GI-NEXT:    fmov w9, s3
-; CHECK-GI-NEXT:    mul w9, w10, w9
+; CHECK-GI-NEXT:    mul w9, w10, w11
 ; CHECK-GI-NEXT:    mul w0, w8, w9
 ; CHECK-GI-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/AArch64/aarch64-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-smull.ll
index 99aa28d859e1f8..dbc5417e23133d 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-smull.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-smull.ll
@@ -3,6 +3,19 @@
 ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+sve < %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SVE
 ; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
 
+; CHECK-GI:       warning: Instruction selection used fallback path for smull_zext_v4i16_v4i32
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for pmlsl2_v8i16_uzp1
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for smlsl2_v8i16_uzp1
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for umlsl2_v8i16_uzp1
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for smlsl2_v4i32_uzp1
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for umlsl2_v4i32_uzp1
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for pmlsl_pmlsl2_v8i16_uzp1
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for smlsl_smlsl2_v8i16_uzp1
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for umlsl_umlsl2_v8i16_uzp1
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for smlsl_smlsl2_v4i32_uzp1
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for umlsl_umlsl2_v4i32_uzp1
+; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for do_stuff
+
 define <8 x i16> @smull_v8i8_v8i16(ptr %A, ptr %B) nounwind {
 ; CHECK-LABEL: smull_v8i8_v8i16:
 ; CHECK:       // %bb.0:
@@ -226,11 +239,10 @@ define <2 x i64> @smull_zext_v2i32_v2i64(ptr %A, ptr %B) nounwind {
 ; CHECK-GI-NEXT:    movi d0, #0x00ffff0000ffff
 ; CHECK-GI-NEXT:    mov v1.s[1], v2.s[0]
 ; CHECK-GI-NEXT:    and v0.8b, v1.8b, v0.8b
-; CHECK-GI-NEXT:    mov s1, v0.s[1]
-; CHECK-GI-NEXT:    fmov w8, s0
+; CHECK-GI-NEXT:    mov w8, v0.s[0]
+; CHECK-GI-NEXT:    mov w9, v0.s[1]
 ; CHECK-GI-NEXT:    ldr d0, [x1]
 ; CHECK-GI-NEXT:    sshll v0.2d, v0.2s, #0
-; CHECK-GI-NEXT:    fmov w9, s1
 ; CHECK-GI-NEXT:    fmov d1, x8
 ; CHECK-GI-NEXT:    mov d3, v0.d[1]
 ; CHECK-GI-NEXT:    mov v1.d[1], x9
diff --git a/llvm/test/CodeGen/AArch64/fptoi.ll b/llvm/test/CodeGen/AArch64/fptoi.ll
index f30dad966492c1..23ba85d54c7a4f 100644
--- a/llvm/test/CodeGen/AArch64/fptoi.ll
+++ b/llvm/test/CodeGen/AArch64/fptoi.ll
@@ -5846,11 +5846,9 @@ define <3 x i8> @fptos_v3f16_v3i8(<3 x half> %a) {
 ; CHECK-GI-FP16-LABEL: fptos_v3f16_v3i8:
 ; CHECK-GI-FP16:       // %bb.0: // %entry
 ; CHECK-GI-FP16-NEXT:    fcvtzs v0.4h, v0.4h
-; CHECK-GI-FP16-NEXT:    mov h1, v0.h[1]
-; CHECK-GI-FP16-NEXT:    mov h2, v0.h[2]
-; CHECK-GI-FP16-NEXT:    fmov w0, s0
-; CHECK-GI-FP16-NEXT:    fmov w1, s1
-; CHECK-GI-FP16-NEXT:    fmov w2, s2
+; CHECK-GI-FP16-NEXT:    umov w0, v0.h[0]
+; CHECK-GI-FP16-NEXT:    umov w1, v0.h[1]
+; CHECK-GI-FP16-NEXT:    umov w2, v0.h[2]
 ; CHECK-GI-FP16-NEXT:    ret
 entry:
   %c = fptosi <3 x half> %a to <3 x i8>
@@ -5890,11 +5888,9 @@ define <3 x i8> @fptou_v3f16_v3i8(<3 x half> %a) {
 ; CHECK-GI-FP16-LABEL: fptou_v3f16_v3i8:
 ; CHECK-GI-FP16:       // %bb.0: // %entry
 ; CHECK-GI-FP16-NEXT:    fcvtzu v0.4h, v0.4h
-; CHECK-GI-FP16-NEXT:    mov h1, v0.h[1]
-; CHECK-GI-FP16-NEXT:    mov h2, v0.h[2]
-; CHECK-GI-FP16-NEXT:    fmov w0, s0
-; CHECK-GI-FP16-NEXT:    fmov w1, s1
-; CHECK-GI-FP16-NEXT:    fmov w2, s2
+; CHECK-GI-FP16-NEXT:    umov w0, v0.h[0]
+; CHECK-GI-FP16-NEXT:    umov w1, v0.h[1]
+; CHECK-GI-FP16-NEXT:    umov w2, v0.h[2]
 ; CHECK-GI-FP16-NEXT:    ret
 entry:
   %c = fptoui <3 x half> %a to <3 x i8>
diff --git a/llvm/test/CodeGen/AArch64/reduce-and.ll b/llvm/test/CodeGen/AArch64/reduce-and.ll
index a20a76c00418d1..8b7438a42b711e 100644
--- a/llvm/test/CodeGen/AArch64/reduce-and.ll
+++ b/llvm/test/CodeGen/AArch64/reduce-and.ll
@@ -53,13 +53,10 @@ define i1 @test_redand_v4i1(<4 x i1> %a) {
 ; GISEL-LABEL: test_redand_v4i1:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
-; GISEL-NEXT:    mov h1, v0.h[1]
-; GISEL-NEXT:    mov h2, v0.h[2]
-; GISEL-NEXT:    mov h3, v0.h[3]
-; GISEL-NEXT:    fmov w8, s0
-; GISEL-NEXT:    fmov w9, s1
-; GISEL-NEXT:    fmov w10, s2
-; GISEL-NEXT:    fmov w11, s3
+; GISEL-NEXT:    umov w8, v0.h[0]
+; GISEL-NEXT:    umov w9, v0.h[1]
+; GISEL-NEXT:    umov w10, v0.h[2]
+; GISEL-NEXT:    umov w11, v0.h[3]
 ; GISEL-NEXT:    and w8, w8, w9
 ; GISEL-NEXT:    and w9, w10, w11
 ; GISEL-NEXT:    and w8, w8, w9
@@ -82,27 +79,20 @@ define i1 @test_redand_v8i1(<8 x i1> %a) {
 ; GISEL-LABEL: test_redand_v8i1:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
-; GISEL-NEXT:    mov b1, v0.b[1]
-; GISEL-NEXT:    mov b2, v0.b[2]
-; GISEL-NEXT:    mov b3, v0.b[3]
-; GISEL-NEXT:    mov b4, v0.b[4]
-; GISEL-NEXT:    mov b5, v0.b[5]
-; GISEL-NEXT:    mov b6, v0.b[6]
-; GISEL-NEXT:    mov b7, v0.b[7]
-; GISEL-NEXT:    fmov w8, s0
-; GISEL-NEXT:    fmov w9, s1
-; GISEL-NEXT:    fmov w10, s2
-; GISEL-NEXT:    fmov w11, s3
-; GISEL-NEXT:    fmov w12, s4
-; GISEL-NEXT:    fmov w13, s5
-; GISEL-NEXT:    fmov w14, s6
+; GISEL-NEXT:    umov w8, v0.b[0]
+; GISEL-NEXT:    umov w9, v0.b[1]
+; GISEL-NEXT:    umov w10, v0.b[2]
+; GISEL-NEXT:    umov w11, v0.b[3]
+; GISEL-NEXT:    umov w12, v0.b[4]
+; GISEL-NEXT:    umov w13, v0.b[5]
+; GISEL-NEXT:    umov w14, v0.b[6]
+; GISEL-NEXT:    umov w15, v0.b[7]
 ; GISEL-NEXT:    and w8, w8, w9
-; GISEL-NEXT:    fmov w9, s7
-; GISEL-NEXT:    and w10, w10, w11
-; GISEL-NEXT:    and w11, w12, w13
-; GISEL-NEXT:    and w8, w8, w10
-; GISEL-NEXT:    and w9, w14, w9
-; GISEL-NEXT:    and w9, w11, w9
+; GISEL-NEXT:    and w9, w10, w11
+; GISEL-NEXT:    and w10, w12, w13
+; GISEL-NEXT:    and w11, w14, w15
+; GISEL-NEXT:    and w8, w8, w9
+; GISEL-NEXT:    and w9, w10, w11
 ; GISEL-NEXT:    and w8, w8, w9
 ; GISEL-NEXT:    and w0, w8, #0x1
 ; GISEL-NEXT:    ret
@@ -122,49 +112,34 @@ define i1 @test_redand_v16i1(<16 x i1> %a) {
 ;
 ; GISEL-LABEL: test_redand_v16i1:
 ; GISEL:       // %bb.0:
-; GISEL-NEXT:    mov b1, v0.b[1]
-; GISEL-NEXT:    mov b2, v0.b[2]
-; GISEL-NEXT:    mov b3, v0.b[3]
-; GISEL-NEXT:    mov b4, v0.b[4]
-; GISEL-NEXT:    mov b5, v0.b[5]
-; GISEL-NEXT:    mov b6, v0.b[6]
-; GISEL-NEXT:    mov b7, v0.b[7]
-; GISEL-NEXT:    fmov w8, s0
-; GISEL-NEXT:    mov b16, v0.b[8]
-; GISEL-NEXT:    mov b17, v0.b[9]
-; GISEL-NEXT:    mov b18, v0.b[10]
-; GISEL-NEXT:    mov b19, v0.b[11]
-; GISEL-NEXT:    fmov w9, s1
-; GISEL-NEXT:    fmov w10, s2
-; GISEL-NEXT:    fmov w11, s3
-; GISEL-NEXT:    fmov w12, s6
-; GISEL-NEXT:    mov b20, v0.b[12]
-; GISEL-NEXT:    mov b21, v0.b[13]
-; GISEL-NEXT:    fmov w13, s7
-; GISEL-NEXT:    mov b22, v0.b[14]
-; GISEL-NEXT:    mov b23, v0.b[15]
+; GISEL-NEXT:    umov w8, v0.b[0]
+; GISEL-NEXT:    umov w9, v0.b[1]
+; GISEL-NEXT:    umov w10, v0.b[2]
+; GISEL-NEXT:    umov w11, v0.b[3]
+; GISEL-NEXT:    umov w12, v0.b[4]
+; GISEL-NEXT:    umov w13, v0.b[5]
+; GISEL-NEXT:    umov w14, v0.b[6]
+; GISEL-NEXT:    umov w15, v0.b[7]
+; GISEL-NEXT:    umov w16, v0.b[8]
+; GISEL-NEXT:    umov w17, v0.b[9]
+; GISEL-NEXT:    umov w18, v0.b[10]
+; GISEL-NEXT:    umov w0, v0.b[11]
 ; GISEL-NEXT:    and w8, w8, w9
+; GISEL-NEXT:    umov w1, v0.b[12]
+; GISEL-NEXT:    umov w2, v0.b[13]
 ; GISEL-NEXT:    and w9, w10, w11
-; GISEL-NEXT:    fmov w10, s4
+; GISEL-NEXT:    and w10, w12, w13
+; GISEL-NEXT:    umov w3, v0.b[14]
+; GISEL-NEXT:    and w11, w14, w15
 ; GISEL-NEXT:    and w8, w8, w9
-; GISEL-NEXT:    fmov w11, s5
-; GISEL-NEXT:    fmov w14, s18
-; GISEL-NEXT:    fmov w15, s19
-; GISEL-NEXT:    fmov w16, s22
-; GISEL-NEXT:    fmov w17, s23
-; GISEL-NEXT:    and w10, w10, w11
-; GISEL-NEXT:    and w11, w12, w13
-; GISEL-NEXT:    fmov w12, s16
+; GISEL-NEXT:    umov w4, v0.b[15]
+; GISEL-NEXT:    and w12, w16, w17
+; GISEL-NEXT:    and w13, w18, w0
 ; GISEL-NEXT:    and w9, w10, w11
-; GISEL-NEXT:    fmov w13, s17
-; GISEL-NEXT:    and w8, w8, w9
-; GISEL-NEXT:    and w12, w12, w13
-; GISEL-NEXT:    and w13, w14, w15
-; GISEL-NEXT:    fmov w14, s20
-; GISEL-NEXT:    fmov w15, s21
+; GISEL-NEXT:    and w14, w1, w2
 ; GISEL-NEXT:    and w10, w12, w13
-; GISEL-NEXT:    and w14, w14, w15
-; GISEL-NEXT:    and w15, w16, w17
+; GISEL-NEXT:    and w8, w8, w9
+; GISEL-NEXT:    and w15, w3, w4
 ; GISEL-NEXT:    and w11, w14, w15
 ; GISEL-NEXT:    and w9, w10, w11
 ; GISEL-NEXT:    and w8, w8, w9
@@ -184,49 +159,34 @@ define <16 x i1> @test_redand_ins_v16i1(<16 x i1> %a) {
 ;
 ; GISEL-LABEL: test_redand_ins_v16i1:
 ; GISEL:       // %bb.0:
-; GISEL-NEXT:    mov b1, v0.b[1]
-; GISEL-NEXT:    mov b2, v0.b[2]
-; GISEL-NEXT:    mov b3, v0.b[3]
-; GISEL-NEXT:    mov b4, v0.b[4]
-; GISEL-NEXT:    mov b5, v0.b[5]
-; GISEL-NEXT:    mov b6, v0.b[6]
-; GISEL-NEXT:    mov b7, v0.b[7]
-; GISEL-NEXT:    fmov w8, s0
-; GISEL-NEXT:    mov b16, v0.b[8]
-; GISEL-NEXT:    mov b17, v0.b[9]
-; GISEL-NEXT:    mov b18, v0.b[10]
-; GISEL-NEXT:    mov b19, v0.b[11]
-; GISEL-NEXT:    fmov w9, s1
-; GISEL-NEXT:    fmov w10, s2
-; GISEL-NEXT:    fmov w11, s3
-; GISEL-NEXT:    fmov w12, s6
-; GISEL-NEXT:    mov b20, v0.b[12]
-; GISEL-NEXT:    mov b21, v0.b[13]
-; GISEL-NEXT:    fmov w13, s7
-; GISEL-NEXT:    mov b22, v0.b[14]
-; GISEL-NEXT:    mov b23, v0.b[15]
+; GISEL-NEXT:    umov w8, v0.b[0]
+; GISEL-NEXT:    umov w9, v0.b[1]
+; GISEL-NEXT:    umov w10, v0.b[2]
+; GISEL-NEXT:    umov w11, v0.b[3]
+; GISEL-NEXT:    umov w12, v0.b[4]
+; GISEL-NEXT:    umov w13, v0.b[5]
+; GISEL-NEXT:    umov w14, v0.b[6]
+; GISEL-NEXT:    umov w15, v0.b[7]
+; GISEL-NEXT:    umov w16, v0.b[8]
+; GISEL-NEXT:    umov w17, v0.b[9]
+; GISEL-NEXT:    umov w18, v0.b[10]
+; GISEL-NEXT:    umov w0, v0.b[11]
 ; GISEL-NEXT:    and w8, w8, w9
+; GISEL-NEXT:    umov w1, v0.b[12]
+; GISEL-NEXT:    umov w2, v0.b[13]
 ; GISEL-NEXT:    and w9, w10, w11
-; GISEL-NEXT:    fmov w10, s4
+; GISEL-NEXT:    and w10, w12, w13
+; GISEL-NEXT:    umov w3, v0.b[14]
+; GISEL-NEXT:    and w11, w14, w15
 ; GISEL-NEXT:    and w8, w8, w9
-; GISEL-NEXT:    fmov w11, s5
-; GISEL-NEXT:    fmov w14, s18
-; GISEL-NEXT:    fmov w15, s19
-; GISEL-NEXT:    fmov w16, s22
-; GISEL-NEXT:    fmov w17, s23
-; GISEL-NEXT:    and w10, w10, w11
-; GISEL-NEXT:    and w11, w12, w13
-; GISEL-NEXT:    fmov w12, s16
+; GISEL-NEXT:    umov w4, v0.b[15]
+; GISEL-NEXT:    and w12, w16, w17
+; GISEL-NEXT:    and w13, w18, w0
 ; GISEL-NEXT:    and w9, w10, w11
-; GISEL-NEXT:    fmov w13, s17
-; GISEL-NEXT:    and w8, w8, w9
-; GISEL-NEXT:    and w12, w12, w13
-; GISEL-NEXT:    and w13, w14, w15
-; GISEL-NEXT:    fmov w14, s20
-; GISEL-NEXT:    fmov w15, s21
+; GISEL-NEXT:    and w14, w1, w2
 ; GISEL-NEXT:    and w10, w12, w13
-; GISEL-NEXT:    and w14, w14, w15
-; GISEL-NEXT:    and w15, w16, w17
+; GISEL-NEXT:    and w8, w8, w9
+; GISEL-NEXT:    and w15, w3, w4
 ; GISEL-NEXT:    and w11, w14, w15
 ; GISEL-NEXT:    and w9, w10, w11
 ; GISEL-NEXT:    and w8, w8, w9
@@ -287,13 +247,10 @@ define i8 @test_redand_v4i8(<4 x i8> %a) {
 ; GISEL-LABEL: test_redand_v4i8:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
-; GISEL-NEXT:    mov h1, v0.h[1]
-; GISEL-NEXT:    mov h2, v0.h[2]
-; GISEL-NEXT:    mov h3, v0.h[3]
-; GISEL-NEXT:    fmov w8, s0
-; GISEL-NEXT:    fmov w9, s1
-; GISEL-NEXT:    fmov w10, s2
-; GISEL-NEXT:    fmov w11, s3
+; GISEL-NEXT:    umov w8, v0.h[0]
+; GISEL-NEXT:    umov w9, v0.h[1]
+; GISEL-NEXT:    umov w10, v0.h[2]
+; GISEL-NEXT:    umov w11, v0.h[3]
 ; GISEL-NEXT:    and w8, w8, w9
 ; GISEL-NEXT:    and w9, w10, w11
 ; GISEL-NEXT:    and w0, w8, w9
@@ -315,27 +272,20 @@ define i8 @test_redand_v8i8(<8 x i8> %a) {
 ; GISEL-LABEL: test_redand_v8i8:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
-; GISEL-NEXT:    mov b1, v0.b[1]
-; GISEL-NEXT:    mov b2, v0.b[2]
-; GISEL-NEXT:    mov b3, v0.b[3]
-; GISEL-NEXT:    mov b4, v0.b[4]
-; GISEL-NEXT:    mov b5, v0.b[5]
-; GISEL-NEXT:    mov b6, v0.b[6]
-; GISEL-NEXT:    mov b7, v0.b[7]
-; GISEL-NEXT:    fmov w8, s0
-; GISEL-NEXT:    fmov w9, s1
-; GISEL-NEXT:    fmov w10, s2
-; GISEL-NEXT:    fmov w11, s3
-; GISEL-NEXT:    fmov w12, s4
-; GISEL-NEXT:    fmov w13, s5
-; GISEL-NEXT:    fmov w14, s6
+; GISEL-NEXT:    umov w8, v0.b[0]
+; GISEL-NEXT:    umov w9, v0.b[1]
+; GISEL-NEXT:    umov w10, v0.b[2]
+; GISEL-NEXT:    umov w11, v0.b[3]
+; GISEL-NEXT:    umov w12, v0.b[4]
+; GISEL-NEXT:    umov w13, v0.b[5]
+; GISEL-NEXT:    umov w14, v0.b[6]
+; GISEL-NEXT:    umov w15, v0.b[7]
 ; GISEL-NEXT:    and w8, w8, w9
-; GISEL-NEXT:    fmov w9, s7
-; GISEL-NEXT:    and w10, w10, w11
-; GISEL-NEXT:    and w11, w12, w13
-; GISEL-NEXT:    and w8, w8, w10
-; GISEL-NEXT:    and w9, w14, w9
-; GISEL-NEXT:    and w9, w11, w9
+; GISEL-NEXT:    and w9, w10, w11
+; GISEL-NEXT:    and w10, w12, w13
+; GISEL-NEXT:    and w11, w14, w15
+; GISEL-NEXT:    and w8, w8, w9
+; GISEL-NEXT:    and w9, w10, w11
 ; GISEL-NEXT:    and w0, w8, w9
 ; GISEL-NEXT:    ret
   %and_result = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> %a)
@@ -358,27 +308,20 @@ define i8 @test_redand_v16i8(<16 x i8> %a) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    mov d1, v0.d[1]
 ; GISEL-NEXT:    and v0.8b, v0.8b, v1.8b
-; GISEL-NEXT:    mov b1, v0.b[1]
-; GISEL-NEXT:    mov b2, v0.b[2]
-; GISEL-NEXT:    mov b3, v0.b[3]
-; GISEL-NEXT:    mov b4, v0.b[4]
-; GISEL-NEXT:    mov b5, v0.b[5]
-; GISEL-NEXT:    mov b6, v0.b[6]
-; GISEL-NEXT:    mov b7, v0.b[7]
-; GISEL-NEXT:    fmov w8, s0
-; GISEL-NEXT:    fmov w9, s1
-; GISEL-NEXT:    fmov w10, s2
-; GISEL-NEXT:    fmov w11, s3
-; GISEL-NEXT:    fmov w12, s4
-; GISEL-NEXT:    fmov w13, s5
-; GISEL-NEXT:    fmov w14, s6
+; GISEL-NEXT:    umov w8, v0.b[0]
+; GISEL-NEXT:    umov w9, v0.b[1]
+; GISEL-NEXT:    umov w10, v0.b[2]
+; GISEL-NEXT:    umov w11, v0.b[3]
+; GISEL-NEXT:    umov w12, v0.b[4]
+; GISEL-NEXT:    umov w13, v0.b[5]
+; GISEL-NEXT:    umov w14, v0.b[6]
+; GISEL-NEXT:    umov w15, v0.b[7]
 ; GISEL-NEXT:    and w8, w8, w9
-; GISEL-NEXT:    fmov w9, s7
-; GISEL-NEXT:    and w10, w10, w11
-; GISEL-NEXT:    and w11, w12, w13
-; GISEL-NEXT:    and w8, w8, w10
-; GISEL-NEXT:    and w9, w14, w9
-; GISEL-NEXT:    and w9, w11, w9
+; GISEL-NEXT:    and w9, w10, w11
+; GISEL-NEXT:    and w10, w12, w13
+; GISEL-NEXT:    and w11, w14, w15
+; GISEL-NEXT:    and w8, w8, w9
+; GISEL-NEXT:    and w9, w10, w11
 ; GISEL-NEXT:    and w0, w8, w9
 ; GISEL-NEXT:    ret
   %and_result = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> %a)
@@ -403,27 +346,20 @@ define i8 @test_redand_v32i8(<32 x i8> %a) {
 ; GISEL-NEXT:    and v0.16b, v0.16b, v1.16b
 ; GISEL-NEXT:    mov d1, v0.d[1]
 ; GISEL-NEXT:    and v0.8b, v0.8b, v1.8b
-; GISEL-NEXT:    mov b1, v0.b[1]
-; GISEL-NEXT:    mov b2, v0.b[2]
-; GISEL-NEXT:    mov b3, v0.b[3]
-; GISEL-NEXT:    mov b4, v0.b[4]
-; GISEL-NEXT:    mov b5, v0.b[5]
-; GISEL-NEXT:    mov b6, v0.b[6]
-; GISEL-NEXT:    mov b7, v0.b[7]
-; GISEL-NEXT:    fmov w8, s0
-; GISEL-NEXT:    fmov w9, s1
-; GISEL-NEXT:    fmov w10, s2
-; GISEL-NEXT:    fmov w11, s3
-; GISEL-NEXT:    fmov w12, s4
-; GISEL-NEXT:    fmov w13, s5
-; GISEL-NEXT:    fmov w14, s6
+; GISEL-NEXT:    umov w8, v0.b[0]
+; GISEL-NEXT:    umov w9, v0.b[1]
+; GISEL-NEXT:    umov w10, v0.b[2]
+; GISEL-NEXT:    umov w11, v0.b[3]
+; GISEL-NEXT:    umov w12, v0.b[4]
+; GISEL-NEXT:    umov w13, v0.b[5]
+; GISEL-NEXT:    umov w14, v0.b[6]
+; GISEL-NEXT:    umov w15, v0.b[7]
+; GISEL-NEXT:    and w8, w8, w9
+; GISEL-NEXT:    and w9, w10, w11
+; GISEL-NEXT:    and w10, w12, w13
+; GISEL-NEXT:    and w11, w14, w15
 ; GISEL-NEXT:    and w8, w8, w9
-; GISEL-NEXT:    fmov w9, s7
-; GISEL-NEXT:    and w10, w10, w11
-; GISEL-NEXT:    and w11, w12, w13
-; GISEL-NEXT:    and w8, w8, w10
-; GISEL-NEXT:    and w9, w14, w9
-; GISEL-NEXT:    and w9, w11, w9
+; GISEL-NEXT:    and w9, w10, w11
 ; GISEL-NEXT:    and w0, w8, w9
 ; GISEL-NEXT:    ret
   %and_result = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> %a)
@@ -442,13 +378,10 @@ define i16 @test_redand_v4i16(<4 x i16> %a) {
 ; GISEL-LABEL: test_redand_v4i16:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
-; GISEL-NEXT:    mov h1, v0.h[1]
-; GISEL-NEXT:    mov h2, v0.h[2]
-; GISEL-NEXT:    mov h3, v0.h[3]
-; GISEL-NEXT:    fmov w8, s0
-; GISEL-NEXT:    fmov w9, s1
-; GISEL-NEXT:    fmov w10, s2
-; GISEL-NEXT:    fmov w11, s3
+; GISEL-NEXT:    umov w8, v0.h[0]
+; GISEL-NEXT:    umov w9, v0.h[1]
+; GISEL-NEXT:    umov w10, v0.h[2]
+; GISEL-NEXT:    umov w11, v0.h[3]
 ; GISEL-NEXT:    and w8, w8, w9
 ; GISEL-NEXT:    and w9, w10, w11
 ; GISEL-NEXT:    and w0, w8, w9
@@ -472,13 +405,10 @@ define i16 @test_redand_v8i16(<8 x i16> %a) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    mov d1, v0.d[1]
 ; GISEL-NEXT:    and v0.8b, v0.8b, v1.8b
-; GISEL-NEXT:    mov h1, v0.h[1]
-; GISEL-NEXT:    mov h2, v0.h[2]
-; GISEL-NEXT:    mov h3, v0.h[3]
-; GISEL-NEXT:    fmov w8, s0
-; GISEL-NEXT:    fmov w9, s1
-; GISEL-NEXT:    fmov w10, s2
-; GISEL-NEXT:    fmov w11, s3
+; GISEL-NEXT:    umov w8, v0.h[0]
+; GISEL-NEXT:    umov w9, v0.h[1]
+; GISEL-NEXT:    umov w10, v0.h[2]
+; GISEL-NEXT:    umov w11, v0.h[3]
 ; GISEL-NEXT:    and w8, w8, w9
 ; GISEL-NEXT:    and w9, w10, w11
 ; GISEL-NEXT:    and w0, w8, w9
@@ -504,13 +434,10 @@ define i16 @test_redand_v16i16(<16 x i16> %a) {
 ; GISEL-NEXT:    and v0.16b, v0.16b, v1.16b
 ; GISEL-NEXT:    mov d1, v0.d[1]
 ; GISEL-NEXT:    and v0.8b, v0.8b, v1.8b
-; GISEL-NEXT:    mov h1, v0.h[1]
-; GISEL-NEXT:    mov h2, v0.h[2]
-; GISEL-NEXT:    mov h3, v0.h[3]
-; GISEL-NEXT:    fmov w8, s0
-; GISEL-NEXT:    fmov w9, s1
-; GISEL-NEXT:    fmov w10, s2
-; GISEL-NEXT:    fmov w11, s3
+; GISEL-NEXT:    umov w8, v0.h[0]
+; GISEL-NEXT:    umov w9, v0.h[1]
+; GISEL-NEXT:    umov w10, v0.h[2]
+; GISEL-NEXT:    umov w11, v0.h[3]
 ; GISEL-NEXT:    and w8, w8, w9
 ; GISEL-NEXT:    and w9, w10, w11
 ; GISEL-NEXT:    and w0, w8, w9
diff --git a/llvm/test/CodeGen/AArch64/reduce-or.ll b/llvm/test/CodeGen/AArch64/reduce-or.ll
index 4c30a32934964b..c4ac01f32e3651 100644
--- a/llvm/test/CodeGen/AArch64/reduce-or.ll
+++ b/llvm/test/CodeGen/AArch64/reduce-or.ll
@@ -53,13 +53,10 @@ define i1 @test_redor_v4i1(<4 x i1> %a) {
 ; GISEL-LABEL: test_redor_v4i1:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
-; GISEL-NEXT:    mov h1, v0.h[1]
-; GISEL-NEXT:    mov h2, v0.h[2]
-; GISEL-NEXT:    mov h3, v0.h[3]
-; GISEL-NEXT:    fmov w8, s0
-; GISEL-NEXT:    fmov w9, s1
-; GISEL-NEXT:    fmov w10, s2
-; GISEL-NEXT:    fmov w11, s3
+; GISEL-NEXT:    umov w8, v0.h[0]
+; GISEL-NEXT:    umov w9, v0.h[1]
+; GISEL-NEXT:    umov w10, v0.h[2]
+; GISEL-NEXT:    umov w11, v0.h[3]
 ; GISEL-NEXT:    orr w8, w8, w9
 ; GISEL-NEXT:    orr w9, w10, w11
 ; GISEL-NEXT:    orr w8, w8, w9
@@ -82,27 +79,20 @@ define i1 @test_redor_v8i1(<8 x i1> %a) {
 ; GISEL-LABEL: test_redor_v8i1:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
-; GISEL-NEXT:    mov b1, v0.b[1]
-; GISEL-NEXT:    mov b2, v0.b[2]
-; GISEL-NEXT:    mov b3, v0.b[3]
-; GISEL-NEXT:    mov b4, v0.b[4]
-; GISEL-NEXT:    mov b5, v0.b[5]
-; GISEL-NEXT:    mov b6, v0.b[6]
-; GISEL-NEXT:    mov b7, v0.b[7]
-; GISEL-NEXT:    fmov w8, s0
-; GISEL-NEXT:    fmov w9, s1
-; GISEL-NEXT:    fmov w10, s2
-; GISEL-NEXT:    fmov w11, s3
-; GISEL-NEXT:    fmov w12, s4
-; GISEL-NEXT:    fmov w13, s5
-; GISEL-NEXT:    fmov w14, s6
+; GISEL-NEXT:    umov w8, v0.b[0]
+; GISEL-NEXT:    umov w9, v0.b[1]
+; GISEL-NEXT:    umov w10, v0.b[2]
+; GISEL-NEXT:    umov w11, v0.b[3]
+; GISEL-NEXT:    umov w12, v0.b[4]
+; GISEL-NEXT:    umov w13, v0.b[5]
+; GISEL-NEXT:    umov w14, v0.b[6]
+; GISEL-NEXT:    umov w15, v0.b[7]
 ; GISEL-NEXT:    orr w8, w8, w9
-; GISEL-NEXT:    fmov w9, s7
-; GISEL-NEXT:    orr w10, w10, w11
-; GISEL-NEXT:    orr w11, w12, w13
-; GISEL-NEXT:    orr w8, w8, w10
-; GISEL-NEXT:    orr w9, w14, w9
-; GISEL-NEXT:    orr w9, w11, w9
+; GISEL-NEXT:    orr w9, w10, w11
+; GISEL-NEXT:    orr w10, w12, w13
+; GISEL-NEXT:    orr w11, w14, w15
+; GISEL-NEXT:    orr w8, w8, w9
+; GISEL-NEXT:    orr w9, w10, w11
 ; GISEL-NEXT:    orr w8, w8, w9
 ; GISEL-NEXT:    and w0, w8, #0x1
 ; GISEL-NEXT:    ret
@@ -122,49 +112,34 @@ define i1 @test_redor_v16i1(<16 x i1> %a) {
 ;
 ; GISEL-LABEL: test_redor_v16i1:
 ; GISEL:       // %bb.0:
-; GISEL-NEXT:    mov b1, v0.b[1]
-; GISEL-NEXT:    mov b2, v0.b[2]
-; GISEL-NEXT:    mov b3, v0.b[3]
-; GISEL-NEXT:    mov b4, v0.b[4]
-; GISEL-NEXT:    mov b5, v0.b[5]
-; GISEL-NEXT:    mov b6, v0.b[6]
-; GISEL-NEXT:    mov b7, v0.b[7]
-; GISEL-NEXT:    fmov w8, s0
-; GISEL-NEXT:    mov b16, v0.b[8]
-; GISEL-NEXT:    mov b17, v0.b[9]
-; GISEL-NEXT:    mov b18, v0.b[10]
-; GISEL-NEXT:    mov b19, v0.b[11]
-; GISEL-NEXT:    fmov w9, s1
-; GISEL-NEXT:    fmov w10, s2
-; GISEL-NEXT:    fmov w11, s3
-; GISEL-NEXT:    fmov w12, s6
-; GISEL-NEXT:    mov b20, v0.b[12]
-; GISEL-NEXT:    mov b21, v0.b[13]
-; GISEL-NEXT:    fmov w13, s7
-; GISEL-NEXT:    mov b22, v0.b[14]
-; GISEL-NEXT:    mov b23, v0.b[15]
+; GISEL-NEXT:    umov w8, v0.b[0]
+; GISEL-NEXT:    umov w9, v0.b[1]
+; GISEL-NEXT:    umov w10, v0.b[2]
+; GISEL-NEXT:    umov w11, v0.b[3]
+; GISEL-NEXT:    umov w12, v0.b[4]
+; GISEL-NEXT:    umov w13, v0.b[5]
+; GISEL-NEXT:    umov w14, v0.b[6]
+; GISEL-NEXT:    umov w15, v0.b[7]
+; GISEL-NEXT:    umov w16, v0.b[8]
+; GISEL-NEXT:    umov w17, v0.b[9]
+; GISEL-NEXT:    umov w18, v0.b[10]
+; GISEL-NEXT:    umov w0, v0.b[11]
 ; GISEL-NEXT:    orr w8, w8, w9
+; GISEL-NEXT:    umov w1, v0.b[12]
+; GISEL-NEXT:    umov w2, v0.b[13]
 ; GISEL-NEXT:    orr w9, w10, w11
-; GISEL-NEXT:    fmov w10, s4
+; GISEL-NEXT:    orr w10, w12, w13
+; GISEL-NEXT:    umov w3, v0.b[14]
+; GISEL-NEXT:    orr w11, w14, w15
 ; GISEL-NEXT:    orr w8, w8, w9
-; GISEL-NEXT:    fmov w11, s5
-; GISEL-NEXT:    fmov w14, s18
-; GISEL-NEXT:    fmov w15, s19
-; GISEL-NEXT:    fmov w16, s22
-; GISEL-NEXT:    fmov w17, s23
-; GISEL-NEXT:    orr w10, w10, w11
-; GISEL-NEXT:    orr w11, w12, w13
-; GISEL-NEXT:    fmov w12, s16
+; GISEL-NEXT:    umov w4, v0.b[15]
+; GISEL-NEXT:    orr w12, w16, w17
+; GISEL-NEXT:    orr w13, w18, w0
 ; GISEL-NEXT:    orr w9, w10, w11
-; GISEL-NEXT:    fmov w13, s17
-; GISEL-NEXT:    orr w8, w8, w9
-; GISEL-NEXT:    orr w12, w12, w13
-; GISEL-NEXT:    orr w13, w14, w15
-; GISEL-NEXT:    fmov w14, s20
-; GISEL-NEXT:    fmov w15, s21
+; GISEL-NEXT:    orr w14, w1, w2
 ; GISEL-NEXT:    orr w10, w12, w13
-; GISEL-NEXT:    orr w14, w14, w15
-; GISEL-NEXT:    orr w15, w16, w17
+; GISEL-NEXT:    orr w8, w8, w9
+; GISEL-NEXT:    orr w15, w3, w4
 ; GISEL-NEXT:    orr w11, w14, w15
 ; GISEL-NEXT:    orr w9, w10, w11
 ; GISEL-NEXT:    orr w8, w8, w9
@@ -184,49 +159,34 @@ define <16 x i1> @test_redor_ins_v16i1(<16 x i1> %a) {
 ;
 ; GISEL-LABEL: test_redor_ins_v16i1:
 ; GISEL:       // %bb.0:
-; GISEL-NEXT:    mov b1, v0.b[1]
-; GISEL-NEXT:    mov b2, v0.b[2]
-; GISEL-NEXT:    mov b3, v0.b[3]
-; GISEL-NEXT:    mov b4, v0.b[4]
-; GISEL-NEXT:    mov b5, v0.b[5]
-; GISEL-NEXT:    mov b6, v0.b[6]
-; GISEL-NEXT:    mov b7, v0.b[7]
-; GISEL-NEXT:    fmov w8, s0
-; GISEL-NEXT:    mov b16, v0.b[8]
-; GISEL-NEXT:    mov b17, v0.b[9]
-; GISEL-NEXT:    mov b18, v0.b[10]
-; GISEL-NEXT:    mov b19, v0.b[11]
-; GISEL-NEXT:    fmov w9, s1
-; GISEL-NEXT:    fmov w10, s2
-; GISEL-NEXT:    fmov w11, s3
-; GISEL-NEXT:    fmov w12, s6
-; GISEL-NEXT:    mov b20, v0.b[12]
-; GISEL-NEXT:    mov b21, v0.b[13]
-; GISEL-NEXT:    fmov w13, s7
-; GISEL-NEXT:    mov b22, v0.b[14]
-; GISEL-NEXT:    mov b23, v0.b[15]
+; GISEL-NEXT:    umov w8, v0.b[0]
+; GISEL-NEXT:    umov w9, v0.b[1]
+; GISEL-NEXT:    umov w10, v0.b[2]
+; GISEL-NEXT:    umov w11, v0.b[3]
+; GISEL-NEXT:    umov w12, v0.b[4]
+; GISEL-NEXT:    umov w13, v0.b[5]
+; GISEL-NEXT:    umov w14, v0.b[6]
+; GISEL-NEXT:    umov w15, v0.b[7]
+; GISEL-NEXT:    umov w16, v0.b[8]
+; GISEL-NEXT:    umov w17, v0.b[9]
+; GISEL-NEXT:    umov w18, v0.b[10]
+; GISEL-NEXT:    umov w0, v0.b[11]
 ; GISEL-NEXT:    orr w8, w8, w9
+; GISEL-NEXT:    umov w1, v0.b[12]
+; GISEL-NEXT:    umov w2, v0.b[13]
 ; GISEL-NEXT:    orr w9, w10, w11
-; GISEL-NEXT:    fmov w10, s4
+; GISEL-NEXT:    orr w10, w12, w13
+; GISEL-NEXT:    umov w3, v0.b[14]
+; GISEL-NEXT:    orr w11, w14, w15
 ; GISEL-NEXT:    orr w8, w8, w9
-; GISEL-NEXT:    fmov w11, s5
-; GISEL-NEXT:    fmov w14, s18
-; GISEL-NEXT:    fmov w15, s19
-; GISEL-NEXT:    fmov w16, s22
-; GISEL-NEXT:    fmov w17, s23
-; GISEL-NEXT:    orr w10, w10, w11
-; GISEL-NEXT:    orr w11, w12, w13
-; GISEL-NEXT:    fmov w12, s16
+; GISEL-NEXT:    umov w4, v0.b[15]
+; GISEL-NEXT:    orr w12, w16, w17
+; GISEL-NEXT:    orr w13, w18, w0
 ; GISEL-NEXT:    orr w9, w10, w11
-; GISEL-NEXT:    fmov w13, s17
-; GISEL-NEXT:    orr w8, w8, w9
-; GISEL-NEXT:    orr w12, w12, w13
-; GISEL-NEXT:    orr w13, w14, w15
-; GISEL-NEXT:    fmov w14, s20
-; GISEL-NEXT:    fmov w15, s21
+; GISEL-NEXT:    orr w14, w1, w2
 ; GISEL-NEXT:    orr w10, w12, w13
-; GISEL-NEXT:    orr w14, w14, w15
-; GISEL-NEXT:    orr w15, w16, w17
+; GISEL-NEXT:    orr w8, w8, w9
+; GISEL-NEXT:    orr w15, w3, w4
 ; GISEL-NEXT:    orr w11, w14, w15
 ; GISEL-NEXT:    orr w9, w10, w11
 ; GISEL-NEXT:    orr w8, w8, w9
@@ -289,13 +249,10 @@ define i8 @test_redor_v4i8(<4 x i8> %a) {
 ; GISEL-LABEL: test_redor_v4i8:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
-; GISEL-NEXT:    mov h1, v0.h[1]
-; GISEL-NEXT:    mov h2, v0.h[2]
-; GISEL-NEXT:    mov h3, v0.h[3]
-; GISEL-NEXT:    fmov w8, s0
-; GISEL-NEXT:    fmov w9, s1
-; GISEL-NEXT:    fmov w10, s2
-; GISEL-NEXT:    fmov w11, s3
+; GISEL-NEXT:    umov w8, v0.h[0]
+; GISEL-NEXT:    umov w9, v0.h[1]
+; GISEL-NEXT:    umov w10, v0.h[2]
+; GISEL-NEXT:    umov w11, v0.h[3]
 ; GISEL-NEXT:    orr w8, w8, w9
 ; GISEL-NEXT:    orr w9, w10, w11
 ; GISEL-NEXT:    orr w0, w8, w9
@@ -317,27 +274,20 @@ define i8 @test_redor_v8i8(<8 x i8> %a) {
 ; GISEL-LABEL: test_redor_v8i8:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
-; GISEL-NEXT:    mov b1, v0.b[1]
-; GISEL-NEXT:    mov b2, v0.b[2]
-; GISEL-NEXT:    mov b3, v0.b[3]
-; GISEL-NEXT:    mov b4, v0.b[4]
-; GISEL-NEXT:    mov b5, v0.b[5]
-; GISEL-NEXT:    mov b6, v0.b[6]
-; GISEL-NEXT:    mov b7, v0.b[7]
-; GISEL-NEXT:    fmov w8, s0
-; GISEL-NEXT:    fmov w9, s1
-; GISEL-NEXT:    fmov w10, s2
-; GISEL-NEXT:    fmov w11, s3
-; GISEL-NEXT:    fmov w12, s4
-; GISEL-NEXT:    fmov w13, s5
-; GISEL-NEXT:    fmov w14, s6
+; GISEL-NEXT:    umov w8, v0.b[0]
+; GISEL-NEXT:    umov w9, v0.b[1]
+; GISEL-NEXT:    umov w10, v0.b[2]
+; GISEL-NEXT:    umov w11, v0.b[3]
+; GISEL-NEXT:    umov w12, v0.b[4]
+; GISEL-NEXT:    umov w13, v0.b[5]
+; GISEL-NEXT:    umov w14, v0.b[6]
+; GISEL-NEXT:    umov w15, v0.b[7]
 ; GISEL-NEXT:    orr w8, w8, w9
-; GISEL-NEXT:    fmov w9, s7
-; GISEL-NEXT:    orr w10, w10, w11
-; GISEL-NEXT:    orr w11, w12, w13
-; GISEL-NEXT:    orr w8, w8, w10
-; GISEL-NEXT:    orr w9, w14, w9
-; GISEL-NEXT:    orr w9, w11, w9
+; GISEL-NEXT:    orr w9, w10, w11
+; GISEL-NEXT:    orr w10, w12, w13
+; GISEL-NEXT:    orr w11, w14, w15
+; GISEL-NEXT:    orr w8, w8, w9
+; GISEL-NEXT:    orr w9, w10, w11
 ; GISEL-NEXT:    orr w0, w8, w9
 ; GISEL-NEXT:    ret
   %or_result = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> %a)
@@ -360,27 +310,20 @@ define i8 @test_redor_v16i8(<16 x i8> %a) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    mov d1, v0.d[1]
 ; GISEL-NEXT:    orr v0.8b, v0.8b, v1.8b
-; GISEL-NEXT:    mov b1, v0.b[1]
-; GISEL-NEXT:    mov b2, v0.b[2]
-; GISEL-NEXT:    mov b3, v0.b[3]
-; GISEL-NEXT:    mov b4, v0.b[4]
-; GISEL-NEXT:    mov b5, v0.b[5]
-; GISEL-NEXT:    mov b6, v0.b[6]
-; GISEL-NEXT:    mov b7, v0.b[7]
-; GISEL-NEXT:    fmov w8, s0
-; GISEL-NEXT:    fmov w9, s1
-; GISEL-NEXT:    fmov w10, s2
-; GISEL-NEXT:    fmov w11, s3
-; GISEL-NEXT:    fmov w12, s4
-; GISEL-NEXT:    fmov w13, s5
-; GISEL-NEXT:    fmov w14, s6
+; GISEL-NEXT:    umov w8, v0.b[0]
+; GISEL-NEXT:    umov w9, v0.b[1]
+; GISEL-NEXT:    umov w10, v0.b[2]
+; GISEL-NEXT:    umov w11, v0.b[3]
+; GISEL-NEXT:    umov w12, v0.b[4]
+; GISEL-NEXT:    umov w13, v0.b[5]
+; GISEL-NEXT:    umov w14, v0.b[6]
+; GISEL-NEXT:    umov w15, v0.b[7]
 ; GISEL-NEXT:    orr w8, w8, w9
-; GISEL-NEXT:    fmov w9, s7
-; GISEL-NEXT:    orr w10, w10, w11
-; GISEL-NEXT:    orr w11, w12, w13
-; GISEL-NEXT:    orr w8, w8, w10
-; GISEL-NEXT:    orr w9, w14, w9
-; GISEL-NEXT:    orr w9, w11, w9
+; GISEL-NEXT:    orr w9, w10, w11
+; GISEL-NEXT:    orr w10, w12, w13
+; GISEL-NEXT:    orr w11, w14, w15
+; GISEL-NEXT:    orr w8, w8, w9
+; GISEL-NEXT:    orr w9, w10, w11
 ; GISEL-NEXT:    orr w0, w8, w9
 ; GISEL-NEXT:    ret
   %or_result = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> %a)
@@ -405,27 +348,20 @@ define i8 @test_redor_v32i8(<32 x i8> %a) {
 ; GISEL-NEXT:    orr v0.16b, v0.16b, v1.16b
 ; GISEL-NEXT:    mov d1, v0.d[1]
 ; GISEL-NEXT:    orr v0.8b, v0.8b, v1.8b
-; GISEL-NEXT:    mov b1, v0.b[1]
-; GISEL-NEXT:    mov b2, v0.b[2]
-; GISEL-NEXT:    mov b3, v0.b[3]
-; GISEL-NEXT:    mov b4, v0.b[4]
-; GISEL-NEXT:    mov b5, v0.b[5]
-; GISEL-NEXT:    mov b6, v0.b[6]
-; GISEL-NEXT:    mov b7, v0.b[7]
-; GISEL-NEXT:    fmov w8, s0
-; GISEL-NEXT:    fmov w9, s1
-; GISEL-NEXT:    fmov w10, s2
-; GISEL-NEXT:    fmov w11, s3
-; GISEL-NEXT:    fmov w12, s4
-; GISEL-NEXT:    fmov w13, s5
-; GISEL-NEXT:    fmov w14, s6
+; GISEL-NEXT:    umov w8, v0.b[0]
+; GISEL-NEXT:    umov w9, v0.b[1]
+; GISEL-NEXT:    umov w10, v0.b[2]
+; GISEL-NEXT:    umov w11, v0.b[3]
+; GISEL-NEXT:    umov w12, v0.b[4]
+; GISEL-NEXT:    umov w13, v0.b[5]
+; GISEL-NEXT:    umov w14, v0.b[6]
+; GISEL-NEXT:    umov w15, v0.b[7]
+; GISEL-NEXT:    orr w8, w8, w9
+; GISEL-NEXT:    orr w9, w10, w11
+; GISEL-NEXT:    orr w10, w12, w13
+; GISEL-NEXT:    orr w11, w14, w15
 ; GISEL-NEXT:    orr w8, w8, w9
-; GISEL-NEXT:    fmov w9, s7
-; GISEL-NEXT:    orr w10, w10, w11
-; GISEL-NEXT:    orr w11, w12, w13
-; GISEL-NEXT:    orr w8, w8, w10
-; GISEL-NEXT:    orr w9, w14, w9
-; GISEL-NEXT:    orr w9, w11, w9
+; GISEL-NEXT:    orr w9, w10, w11
 ; GISEL-NEXT:    orr w0, w8, w9
 ; GISEL-NEXT:    ret
   %or_result = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> %a)
@@ -444,13 +380,10 @@ define i16 @test_redor_v4i16(<4 x i16> %a) {
 ; GISEL-LABEL: test_redor_v4i16:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
-; GISEL-NEXT:    mov h1, v0.h[1]
-; GISEL-NEXT:    mov h2, v0.h[2]
-; GISEL-NEXT:    mov h3, v0.h[3]
-; GISEL-NEXT:    fmov w8, s0
-; GISEL-NEXT:    fmov w9, s1
-; GISEL-NEXT:    fmov w10, s2
-; GISEL-NEXT:    fmov w11, s3
+; GISEL-NEXT:    umov w8, v0.h[0]
+; GISEL-NEXT:    umov w9, v0.h[1]
+; GISEL-NEXT:    umov w10, v0.h[2]
+; GISEL-NEXT:    umov w11, v0.h[3]
 ; GISEL-NEXT:    orr w8, w8, w9
 ; GISEL-NEXT:    orr w9, w10, w11
 ; GISEL-NEXT:    orr w0, w8, w9
@@ -474,13 +407,10 @@ define i16 @test_redor_v8i16(<8 x i16> %a) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    mov d1, v0.d[1]
 ; GISEL-NEXT:    orr v0.8b, v0.8b, v1.8b
-; GISEL-NEXT:    mov h1, v0.h[1]
-; GISEL-NEXT:    mov h2, v0.h[2]
-; GISEL-NEXT:    mov h3, v0.h[3]
-; GISEL-NEXT:    fmov w8, s0
-; GISEL-NEXT:    fmov w9, s1
-; GISEL-NEXT:    fmov w10, s2
-; GISEL-NEXT:    fmov w11, s3
+; GISEL-NEXT:    umov w8, v0.h[0]
+; GISEL-NEXT:    umov w9, v0.h[1]
+; GISEL-NEXT:    umov w10, v0.h[2]
+; GISEL-NEXT:    umov w11, v0.h[3]
 ; GISEL-NEXT:    orr w8, w8, w9
 ; GISEL-NEXT:    orr w9, w10, w11
 ; GISEL-NEXT:    orr w0, w8, w9
@@ -506,13 +436,10 @@ define i16 @test_redor_v16i16(<16 x i16> %a) {
 ; GISEL-NEXT:    orr v0.16b, v0.16b, v1.16b
 ; GISEL-NEXT:    mov d1, v0.d[1]
 ; GISEL-NEXT:    orr v0.8b, v0.8b, v1.8b
-; GISEL-NEXT:    mov h1, v0.h[1]
-; GISEL-NEXT:    mov h2, v0.h[2]
-; GISEL-NEXT:    mov h3, v0.h[3]
-; GISEL-NEXT:    fmov w8, s0
-; GISEL-NEXT:    fmov w9, s1
-; GISEL-NEXT:    fmov w10, s2
-; GISEL-NEXT:    fmov w11, s3
+; GISEL-NEXT:    umov w8, v0.h[0]
+; GISEL-NEXT:    umov w9, v0.h[1]
+; GISEL-NEXT:    umov w10, v0.h[2]
+; GISEL-NEXT:    umov w11, v0.h[3]
 ; GISEL-NEXT:    orr w8, w8, w9
 ; GISEL-NEXT:    orr w9, w10, w11
 ; GISEL-NEXT:    orr w0, w8, w9
diff --git a/llvm/test/CodeGen/AArch64/reduce-xor.ll b/llvm/test/CodeGen/AArch64/reduce-xor.ll
index c74b3734a1b76c..5c2a808ef2e88c 100644
--- a/llvm/test/CodeGen/AArch64/reduce-xor.ll
+++ b/llvm/test/CodeGen/AArch64/reduce-xor.ll
@@ -48,13 +48,10 @@ define i1 @test_redxor_v4i1(<4 x i1> %a) {
 ; GISEL-LABEL: test_redxor_v4i1:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
-; GISEL-NEXT:    mov h1, v0.h[1]
-; GISEL-NEXT:    mov h2, v0.h[2]
-; GISEL-NEXT:    mov h3, v0.h[3]
-; GISEL-NEXT:    fmov w8, s0
-; GISEL-NEXT:    fmov w9, s1
-; GISEL-NEXT:    fmov w10, s2
-; GISEL-NEXT:    fmov w11, s3
+; GISEL-NEXT:    umov w8, v0.h[0]
+; GISEL-NEXT:    umov w9, v0.h[1]
+; GISEL-NEXT:    umov w10, v0.h[2]
+; GISEL-NEXT:    umov w11, v0.h[3]
 ; GISEL-NEXT:    eor w8, w8, w9
 ; GISEL-NEXT:    eor w9, w10, w11
 ; GISEL-NEXT:    eor w8, w8, w9
@@ -75,27 +72,20 @@ define i1 @test_redxor_v8i1(<8 x i1> %a) {
 ; GISEL-LABEL: test_redxor_v8i1:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
-; GISEL-NEXT:    mov b1, v0.b[1]
-; GISEL-NEXT:    mov b2, v0.b[2]
-; GISEL-NEXT:    mov b3, v0.b[3]
-; GISEL-NEXT:    mov b4, v0.b[4]
-; GISEL-NEXT:    mov b5, v0.b[5]
-; GISEL-NEXT:    mov b6, v0.b[6]
-; GISEL-NEXT:    mov b7, v0.b[7]
-; GISEL-NEXT:    fmov w8, s0
-; GISEL-NEXT:    fmov w9, s1
-; GISEL-NEXT:    fmov w10, s2
-; GISEL-NEXT:    fmov w11, s3
-; GISEL-NEXT:    fmov w12, s4
-; GISEL-NEXT:    fmov w13, s5
-; GISEL-NEXT:    fmov w14, s6
+; GISEL-NEXT:    umov w8, v0.b[0]
+; GISEL-NEXT:    umov w9, v0.b[1]
+; GISEL-NEXT:    umov w10, v0.b[2]
+; GISEL-NEXT:    umov w11, v0.b[3]
+; GISEL-NEXT:    umov w12, v0.b[4]
+; GISEL-NEXT:    umov w13, v0.b[5]
+; GISEL-NEXT:    umov w14, v0.b[6]
+; GISEL-NEXT:    umov w15, v0.b[7]
 ; GISEL-NEXT:    eor w8, w8, w9
-; GISEL-NEXT:    fmov w9, s7
-; GISEL-NEXT:    eor w10, w10, w11
-; GISEL-NEXT:    eor w11, w12, w13
-; GISEL-NEXT:    eor w8, w8, w10
-; GISEL-NEXT:    eor w9, w14, w9
-; GISEL-NEXT:    eor w9, w11, w9
+; GISEL-NEXT:    eor w9, w10, w11
+; GISEL-NEXT:    eor w10, w12, w13
+; GISEL-NEXT:    eor w11, w14, w15
+; GISEL-NEXT:    eor w8, w8, w9
+; GISEL-NEXT:    eor w9, w10, w11
 ; GISEL-NEXT:    eor w8, w8, w9
 ; GISEL-NEXT:    and w0, w8, #0x1
 ; GISEL-NEXT:    ret
@@ -113,49 +103,34 @@ define i1 @test_redxor_v16i1(<16 x i1> %a) {
 ;
 ; GISEL-LABEL: test_redxor_v16i1:
 ; GISEL:       // %bb.0:
-; GISEL-NEXT:    mov b1, v0.b[1]
-; GISEL-NEXT:    mov b2, v0.b[2]
-; GISEL-NEXT:    mov b3, v0.b[3]
-; GISEL-NEXT:    mov b4, v0.b[4]
-; GISEL-NEXT:    mov b5, v0.b[5]
-; GISEL-NEXT:    mov b6, v0.b[6]
-; GISEL-NEXT:    mov b7, v0.b[7]
-; GISEL-NEXT:    fmov w8, s0
-; GISEL-NEXT:    mov b16, v0.b[8]
-; GISEL-NEXT:    mov b17, v0.b[9]
-; GISEL-NEXT:    mov b18, v0.b[10]
-; GISEL-NEXT:    mov b19, v0.b[11]
-; GISEL-NEXT:    fmov w9, s1
-; GISEL-NEXT:    fmov w10, s2
-; GISEL-NEXT:    fmov w11, s3
-; GISEL-NEXT:    fmov w12, s6
-; GISEL-NEXT:    mov b20, v0.b[12]
-; GISEL-NEXT:    mov b21, v0.b[13]
-; GISEL-NEXT:    fmov w13, s7
-; GISEL-NEXT:    mov b22, v0.b[14]
-; GISEL-NEXT:    mov b23, v0.b[15]
+; GISEL-NEXT:    umov w8, v0.b[0]
+; GISEL-NEXT:    umov w9, v0.b[1]
+; GISEL-NEXT:    umov w10, v0.b[2]
+; GISEL-NEXT:    umov w11, v0.b[3]
+; GISEL-NEXT:    umov w12, v0.b[4]
+; GISEL-NEXT:    umov w13, v0.b[5]
+; GISEL-NEXT:    umov w14, v0.b[6]
+; GISEL-NEXT:    umov w15, v0.b[7]
+; GISEL-NEXT:    umov w16, v0.b[8]
+; GISEL-NEXT:    umov w17, v0.b[9]
+; GISEL-NEXT:    umov w18, v0.b[10]
+; GISEL-NEXT:    umov w0, v0.b[11]
 ; GISEL-NEXT:    eor w8, w8, w9
+; GISEL-NEXT:    umov w1, v0.b[12]
+; GISEL-NEXT:    umov w2, v0.b[13]
 ; GISEL-NEXT:    eor w9, w10, w11
-; GISEL-NEXT:    fmov w10, s4
+; GISEL-NEXT:    eor w10, w12, w13
+; GISEL-NEXT:    umov w3, v0.b[14]
+; GISEL-NEXT:    eor w11, w14, w15
 ; GISEL-NEXT:    eor w8, w8, w9
-; GISEL-NEXT:    fmov w11, s5
-; GISEL-NEXT:    fmov w14, s18
-; GISEL-NEXT:    fmov w15, s19
-; GISEL-NEXT:    fmov w16, s22
-; GISEL-NEXT:    fmov w17, s23
-; GISEL-NEXT:    eor w10, w10, w11
-; GISEL-NEXT:    eor w11, w12, w13
-; GISEL-NEXT:    fmov w12, s16
+; GISEL-NEXT:    umov w4, v0.b[15]
+; GISEL-NEXT:    eor w12, w16, w17
+; GISEL-NEXT:    eor w13, w18, w0
 ; GISEL-NEXT:    eor w9, w10, w11
-; GISEL-NEXT:    fmov w13, s17
-; GISEL-NEXT:    eor w8, w8, w9
-; GISEL-NEXT:    eor w12, w12, w13
-; GISEL-NEXT:    eor w13, w14, w15
-; GISEL-NEXT:    fmov w14, s20
-; GISEL-NEXT:    fmov w15, s21
+; GISEL-NEXT:    eor w14, w1, w2
 ; GISEL-NEXT:    eor w10, w12, w13
-; GISEL-NEXT:    eor w14, w14, w15
-; GISEL-NEXT:    eor w15, w16, w17
+; GISEL-NEXT:    eor w8, w8, w9
+; GISEL-NEXT:    eor w15, w3, w4
 ; GISEL-NEXT:    eor w11, w14, w15
 ; GISEL-NEXT:    eor w9, w10, w11
 ; GISEL-NEXT:    eor w8, w8, w9
@@ -173,49 +148,34 @@ define <16 x i1> @test_redxor_ins_v16i1(<16 x i1> %a) {
 ;
 ; GISEL-LABEL: test_redxor_ins_v16i1:
 ; GISEL:       // %bb.0:
-; GISEL-NEXT:    mov b1, v0.b[1]
-; GISEL-NEXT:    mov b2, v0.b[2]
-; GISEL-NEXT:    mov b3, v0.b[3]
-; GISEL-NEXT:    mov b4, v0.b[4]
-; GISEL-NEXT:    mov b5, v0.b[5]
-; GISEL-NEXT:    mov b6, v0.b[6]
-; GISEL-NEXT:    mov b7, v0.b[7]
-; GISEL-NEXT:    fmov w8, s0
-; GISEL-NEXT:    mov b16, v0.b[8]
-; GISEL-NEXT:    mov b17, v0.b[9]
-; GISEL-NEXT:    mov b18, v0.b[10]
-; GISEL-NEXT:    mov b19, v0.b[11]
-; GISEL-NEXT:    fmov w9, s1
-; GISEL-NEXT:    fmov w10, s2
-; GISEL-NEXT:    fmov w11, s3
-; GISEL-NEXT:    fmov w12, s6
-; GISEL-NEXT:    mov b20, v0.b[12]
-; GISEL-NEXT:    mov b21, v0.b[13]
-; GISEL-NEXT:    fmov w13, s7
-; GISEL-NEXT:    mov b22, v0.b[14]
-; GISEL-NEXT:    mov b23, v0.b[15]
+; GISEL-NEXT:    umov w8, v0.b[0]
+; GISEL-NEXT:    umov w9, v0.b[1]
+; GISEL-NEXT:    umov w10, v0.b[2]
+; GISEL-NEXT:    umov w11, v0.b[3]
+; GISEL-NEXT:    umov w12, v0.b[4]
+; GISEL-NEXT:    umov w13, v0.b[5]
+; GISEL-NEXT:    umov w14, v0.b[6]
+; GISEL-NEXT:    umov w15, v0.b[7]
+; GISEL-NEXT:    umov w16, v0.b[8]
+; GISEL-NEXT:    umov w17, v0.b[9]
+; GISEL-NEXT:    umov w18, v0.b[10]
+; GISEL-NEXT:    umov w0, v0.b[11]
 ; GISEL-NEXT:    eor w8, w8, w9
+; GISEL-NEXT:    umov w1, v0.b[12]
+; GISEL-NEXT:    umov w2, v0.b[13]
 ; GISEL-NEXT:    eor w9, w10, w11
-; GISEL-NEXT:    fmov w10, s4
+; GISEL-NEXT:    eor w10, w12, w13
+; GISEL-NEXT:    umov w3, v0.b[14]
+; GISEL-NEXT:    eor w11, w14, w15
 ; GISEL-NEXT:    eor w8, w8, w9
-; GISEL-NEXT:    fmov w11, s5
-; GISEL-NEXT:    fmov w14, s18
-; GISEL-NEXT:    fmov w15, s19
-; GISEL-NEXT:    fmov w16, s22
-; GISEL-NEXT:    fmov w17, s23
-; GISEL-NEXT:    eor w10, w10, w11
-; GISEL-NEXT:    eor w11, w12, w13
-; GISEL-NEXT:    fmov w12, s16
+; GISEL-NEXT:    umov w4, v0.b[15]
+; GISEL-NEXT:    eor w12, w16, w17
+; GISEL-NEXT:    eor w13, w18, w0
 ; GISEL-NEXT:    eor w9, w10, w11
-; GISEL-NEXT:    fmov w13, s17
-; GISEL-NEXT:    eor w8, w8, w9
-; GISEL-NEXT:    eor w12, w12, w13
-; GISEL-NEXT:    eor w13, w14, w15
-; GISEL-NEXT:    fmov w14, s20
-; GISEL-NEXT:    fmov w15, s21
+; GISEL-NEXT:    eor w14, w1, w2
 ; GISEL-NEXT:    eor w10, w12, w13
-; GISEL-NEXT:    eor w14, w14, w15
-; GISEL-NEXT:    eor w15, w16, w17
+; GISEL-NEXT:    eor w8, w8, w9
+; GISEL-NEXT:    eor w15, w3, w4
 ; GISEL-NEXT:    eor w11, w14, w15
 ; GISEL-NEXT:    eor w9, w10, w11
 ; GISEL-NEXT:    eor w8, w8, w9
@@ -278,13 +238,10 @@ define i8 @test_redxor_v4i8(<4 x i8> %a) {
 ; GISEL-LABEL: test_redxor_v4i8:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
-; GISEL-NEXT:    mov h1, v0.h[1]
-; GISEL-NEXT:    mov h2, v0.h[2]
-; GISEL-NEXT:    mov h3, v0.h[3]
-; GISEL-NEXT:    fmov w8, s0
-; GISEL-NEXT:    fmov w9, s1
-; GISEL-NEXT:    fmov w10, s2
-; GISEL-NEXT:    fmov w11, s3
+; GISEL-NEXT:    umov w8, v0.h[0]
+; GISEL-NEXT:    umov w9, v0.h[1]
+; GISEL-NEXT:    umov w10, v0.h[2]
+; GISEL-NEXT:    umov w11, v0.h[3]
 ; GISEL-NEXT:    eor w8, w8, w9
 ; GISEL-NEXT:    eor w9, w10, w11
 ; GISEL-NEXT:    eor w0, w8, w9
@@ -306,27 +263,20 @@ define i8 @test_redxor_v8i8(<8 x i8> %a) {
 ; GISEL-LABEL: test_redxor_v8i8:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
-; GISEL-NEXT:    mov b1, v0.b[1]
-; GISEL-NEXT:    mov b2, v0.b[2]
-; GISEL-NEXT:    mov b3, v0.b[3]
-; GISEL-NEXT:    mov b4, v0.b[4]
-; GISEL-NEXT:    mov b5, v0.b[5]
-; GISEL-NEXT:    mov b6, v0.b[6]
-; GISEL-NEXT:    mov b7, v0.b[7]
-; GISEL-NEXT:    fmov w8, s0
-; GISEL-NEXT:    fmov w9, s1
-; GISEL-NEXT:    fmov w10, s2
-; GISEL-NEXT:    fmov w11, s3
-; GISEL-NEXT:    fmov w12, s4
-; GISEL-NEXT:    fmov w13, s5
-; GISEL-NEXT:    fmov w14, s6
+; GISEL-NEXT:    umov w8, v0.b[0]
+; GISEL-NEXT:    umov w9, v0.b[1]
+; GISEL-NEXT:    umov w10, v0.b[2]
+; GISEL-NEXT:    umov w11, v0.b[3]
+; GISEL-NEXT:    umov w12, v0.b[4]
+; GISEL-NEXT:    umov w13, v0.b[5]
+; GISEL-NEXT:    umov w14, v0.b[6]
+; GISEL-NEXT:    umov w15, v0.b[7]
 ; GISEL-NEXT:    eor w8, w8, w9
-; GISEL-NEXT:    fmov w9, s7
-; GISEL-NEXT:    eor w10, w10, w11
-; GISEL-NEXT:    eor w11, w12, w13
-; GISEL-NEXT:    eor w8, w8, w10
-; GISEL-NEXT:    eor w9, w14, w9
-; GISEL-NEXT:    eor w9, w11, w9
+; GISEL-NEXT:    eor w9, w10, w11
+; GISEL-NEXT:    eor w10, w12, w13
+; GISEL-NEXT:    eor w11, w14, w15
+; GISEL-NEXT:    eor w8, w8, w9
+; GISEL-NEXT:    eor w9, w10, w11
 ; GISEL-NEXT:    eor w0, w8, w9
 ; GISEL-NEXT:    ret
   %xor_result = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> %a)
@@ -349,27 +299,20 @@ define i8 @test_redxor_v16i8(<16 x i8> %a) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    mov d1, v0.d[1]
 ; GISEL-NEXT:    eor v0.8b, v0.8b, v1.8b
-; GISEL-NEXT:    mov b1, v0.b[1]
-; GISEL-NEXT:    mov b2, v0.b[2]
-; GISEL-NEXT:    mov b3, v0.b[3]
-; GISEL-NEXT:    mov b4, v0.b[4]
-; GISEL-NEXT:    mov b5, v0.b[5]
-; GISEL-NEXT:    mov b6, v0.b[6]
-; GISEL-NEXT:    mov b7, v0.b[7]
-; GISEL-NEXT:    fmov w8, s0
-; GISEL-NEXT:    fmov w9, s1
-; GISEL-NEXT:    fmov w10, s2
-; GISEL-NEXT:    fmov w11, s3
-; GISEL-NEXT:    fmov w12, s4
-; GISEL-NEXT:    fmov w13, s5
-; GISEL-NEXT:    fmov w14, s6
+; GISEL-NEXT:    umov w8, v0.b[0]
+; GISEL-NEXT:    umov w9, v0.b[1]
+; GISEL-NEXT:    umov w10, v0.b[2]
+; GISEL-NEXT:    umov w11, v0.b[3]
+; GISEL-NEXT:    umov w12, v0.b[4]
+; GISEL-NEXT:    umov w13, v0.b[5]
+; GISEL-NEXT:    umov w14, v0.b[6]
+; GISEL-NEXT:    umov w15, v0.b[7]
 ; GISEL-NEXT:    eor w8, w8, w9
-; GISEL-NEXT:    fmov w9, s7
-; GISEL-NEXT:    eor w10, w10, w11
-; GISEL-NEXT:    eor w11, w12, w13
-; GISEL-NEXT:    eor w8, w8, w10
-; GISEL-NEXT:    eor w9, w14, w9
-; GISEL-NEXT:    eor w9, w11, w9
+; GISEL-NEXT:    eor w9, w10, w11
+; GISEL-NEXT:    eor w10, w12, w13
+; GISEL-NEXT:    eor w11, w14, w15
+; GISEL-NEXT:    eor w8, w8, w9
+; GISEL-NEXT:    eor w9, w10, w11
 ; GISEL-NEXT:    eor w0, w8, w9
 ; GISEL-NEXT:    ret
   %xor_result = call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> %a)
@@ -394,27 +337,20 @@ define i8 @test_redxor_v32i8(<32 x i8> %a) {
 ; GISEL-NEXT:    eor v0.16b, v0.16b, v1.16b
 ; GISEL-NEXT:    mov d1, v0.d[1]
 ; GISEL-NEXT:    eor v0.8b, v0.8b, v1.8b
-; GISEL-NEXT:    mov b1, v0.b[1]
-; GISEL-NEXT:    mov b2, v0.b[2]
-; GISEL-NEXT:    mov b3, v0.b[3]
-; GISEL-NEXT:    mov b4, v0.b[4]
-; GISEL-NEXT:    mov b5, v0.b[5]
-; GISEL-NEXT:    mov b6, v0.b[6]
-; GISEL-NEXT:    mov b7, v0.b[7]
-; GISEL-NEXT:    fmov w8, s0
-; GISEL-NEXT:    fmov w9, s1
-; GISEL-NEXT:    fmov w10, s2
-; GISEL-NEXT:    fmov w11, s3
-; GISEL-NEXT:    fmov w12, s4
-; GISEL-NEXT:    fmov w13, s5
-; GISEL-NEXT:    fmov w14, s6
+; GISEL-NEXT:    umov w8, v0.b[0]
+; GISEL-NEXT:    umov w9, v0.b[1]
+; GISEL-NEXT:    umov w10, v0.b[2]
+; GISEL-NEXT:    umov w11, v0.b[3]
+; GISEL-NEXT:    umov w12, v0.b[4]
+; GISEL-NEXT:    umov w13, v0.b[5]
+; GISEL-NEXT:    umov w14, v0.b[6]
+; GISEL-NEXT:    umov w15, v0.b[7]
+; GISEL-NEXT:    eor w8, w8, w9
+; GISEL-NEXT:    eor w9, w10, w11
+; GISEL-NEXT:    eor w10, w12, w13
+; GISEL-NEXT:    eor w11, w14, w15
 ; GISEL-NEXT:    eor w8, w8, w9
-; GISEL-NEXT:    fmov w9, s7
-; GISEL-NEXT:    eor w10, w10, w11
-; GISEL-NEXT:    eor w11, w12, w13
-; GISEL-NEXT:    eor w8, w8, w10
-; GISEL-NEXT:    eor w9, w14, w9
-; GISEL-NEXT:    eor w9, w11, w9
+; GISEL-NEXT:    eor w9, w10, w11
 ; GISEL-NEXT:    eor w0, w8, w9
 ; GISEL-NEXT:    ret
   %xor_result = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> %a)
@@ -433,13 +369,10 @@ define i16 @test_redxor_v4i16(<4 x i16> %a) {
 ; GISEL-LABEL: test_redxor_v4i16:
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
-; GISEL-NEXT:    mov h1, v0.h[1]
-; GISEL-NEXT:    mov h2, v0.h[2]
-; GISEL-NEXT:    mov h3, v0.h[3]
-; GISEL-NEXT:    fmov w8, s0
-; GISEL-NEXT:    fmov w9, s1
-; GISEL-NEXT:    fmov w10, s2
-; GISEL-NEXT:    fmov w11, s3
+; GISEL-NEXT:    umov w8, v0.h[0]
+; GISEL-NEXT:    umov w9, v0.h[1]
+; GISEL-NEXT:    umov w10, v0.h[2]
+; GISEL-NEXT:    umov w11, v0.h[3]
 ; GISEL-NEXT:    eor w8, w8, w9
 ; GISEL-NEXT:    eor w9, w10, w11
 ; GISEL-NEXT:    eor w0, w8, w9
@@ -463,13 +396,10 @@ define i16 @test_redxor_v8i16(<8 x i16> %a) {
 ; GISEL:       // %bb.0:
 ; GISEL-NEXT:    mov d1, v0.d[1]
 ; GISEL-NEXT:    eor v0.8b, v0.8b, v1.8b
-; GISEL-NEXT:    mov h1, v0.h[1]
-; GISEL-NEXT:    mov h2, v0.h[2]
-; GISEL-NEXT:    mov h3, v0.h[3]
-; GISEL-NEXT:    fmov w8, s0
-; GISEL-NEXT:    fmov w9, s1
-; GISEL-NEXT:    fmov w10, s2
-; GISEL-NEXT:    fmov w11, s3
+; GISEL-NEXT:    umov w8, v0.h[0]
+; GISEL-NEXT:    umov w9, v0.h[1]
+; GISEL-NEXT:    umov w10, v0.h[2]
+; GISEL-NEXT:    umov w11, v0.h[3]
 ; GISEL-NEXT:    eor w8, w8, w9
 ; GISEL-NEXT:    eor w9, w10, w11
 ; GISEL-NEXT:    eor w0, w8, w9
@@ -495,13 +425,10 @@ define i16 @test_redxor_v16i16(<16 x i16> %a) {
 ; GISEL-NEXT:    eor v0.16b, v0.16b, v1.16b
 ; GISEL-NEXT:    mov d1, v0.d[1]
 ; GISEL-NEXT:    eor v0.8b, v0.8b, v1.8b
-; GISEL-NEXT:    mov h1, v0.h[1]
-; GISEL-NEXT:    mov h2, v0.h[2]
-; GISEL-NEXT:    mov h3, v0.h[3]
-; GISEL-NEXT:    fmov w8, s0
-; GISEL-NEXT:    fmov w9, s1
-; GISEL-NEXT:    fmov w10, s2
-; GISEL-NEXT:    fmov w11, s3
+; GISEL-NEXT:    umov w8, v0.h[0]
+; GISEL-NEXT:    umov w9, v0.h[1]
+; GISEL-NEXT:    umov w10, v0.h[2]
+; GISEL-NEXT:    umov w11, v0.h[3]
 ; GISEL-NEXT:    eor w8, w8, w9
 ; GISEL-NEXT:    eor w9, w10, w11
 ; GISEL-NEXT:    eor w0, w8, w9
diff --git a/llvm/test/CodeGen/AArch64/sext.ll b/llvm/test/CodeGen/AArch64/sext.ll
index 4d26228caf62e9..014e4071a4bf61 100644
--- a/llvm/test/CodeGen/AArch64/sext.ll
+++ b/llvm/test/CodeGen/AArch64/sext.ll
@@ -289,18 +289,14 @@ define <3 x i32> @sext_v3i16_v3i32(<3 x i16> %a) {
 ; CHECK-GI-LABEL: sext_v3i16_v3i32:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    mov h1, v0.h[1]
-; CHECK-GI-NEXT:    fmov w8, s0
-; CHECK-GI-NEXT:    mov h2, v0.h[2]
-; CHECK-GI-NEXT:    sxth w8, w8
-; CHECK-GI-NEXT:    fmov w9, s1
-; CHECK-GI-NEXT:    fmov s0, w8
-; CHECK-GI-NEXT:    fmov w8, s2
-; CHECK-GI-NEXT:    sxth w9, w9
-; CHECK-GI-NEXT:    sxth w8, w8
-; CHECK-GI-NEXT:    mov v0.s[1], w9
-; CHECK-GI-NEXT:    mov v0.s[2], w8
-; CHECK-GI-NEXT:    mov v0.s[3], w8
+; CHECK-GI-NEXT:    smov w8, v0.h[0]
+; CHECK-GI-NEXT:    smov w9, v0.h[1]
+; CHECK-GI-NEXT:    fmov s1, w8
+; CHECK-GI-NEXT:    smov w8, v0.h[2]
+; CHECK-GI-NEXT:    mov v1.s[1], w9
+; CHECK-GI-NEXT:    mov v1.s[2], w8
+; CHECK-GI-NEXT:    mov v1.s[3], w8
+; CHECK-GI-NEXT:    mov v0.16b, v1.16b
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = sext <3 x i16> %a to <3 x i32>
@@ -322,15 +318,10 @@ define <3 x i64> @sext_v3i16_v3i64(<3 x i16> %a) {
 ; CHECK-GI-LABEL: sext_v3i16_v3i64:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    mov h1, v0.h[1]
-; CHECK-GI-NEXT:    mov h2, v0.h[2]
-; CHECK-GI-NEXT:    fmov w8, s0
-; CHECK-GI-NEXT:    sxth x8, w8
-; CHECK-GI-NEXT:    fmov w9, s1
-; CHECK-GI-NEXT:    fmov w10, s2
+; CHECK-GI-NEXT:    smov x8, v0.h[0]
+; CHECK-GI-NEXT:    smov x9, v0.h[1]
+; CHECK-GI-NEXT:    smov x10, v0.h[2]
 ; CHECK-GI-NEXT:    fmov d0, x8
-; CHECK-GI-NEXT:    sxth x9, w9
-; CHECK-GI-NEXT:    sxth x10, w10
 ; CHECK-GI-NEXT:    fmov d1, x9
 ; CHECK-GI-NEXT:    fmov d2, x10
 ; CHECK-GI-NEXT:    ret
@@ -352,15 +343,10 @@ define <3 x i64> @sext_v3i32_v3i64(<3 x i32> %a) {
 ;
 ; CHECK-GI-LABEL: sext_v3i32_v3i64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    mov s1, v0.s[1]
-; CHECK-GI-NEXT:    mov s2, v0.s[2]
-; CHECK-GI-NEXT:    fmov w8, s0
-; CHECK-GI-NEXT:    sxtw x8, w8
-; CHECK-GI-NEXT:    fmov w9, s1
-; CHECK-GI-NEXT:    fmov w10, s2
+; CHECK-GI-NEXT:    smov x8, v0.s[0]
+; CHECK-GI-NEXT:    smov x9, v0.s[1]
+; CHECK-GI-NEXT:    smov x10, v0.s[2]
 ; CHECK-GI-NEXT:    fmov d0, x8
-; CHECK-GI-NEXT:    sxtw x9, w9
-; CHECK-GI-NEXT:    sxtw x10, w10
 ; CHECK-GI-NEXT:    fmov d1, x9
 ; CHECK-GI-NEXT:    fmov d2, x10
 ; CHECK-GI-NEXT:    ret
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll
index 53aefaf3d33600..7f804fe48fd854 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll
@@ -168,53 +168,32 @@ define i8 @test_v9i8(<9 x i8> %a) nounwind {
 ; CHECK-GI-LABEL: test_v9i8:
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    mov b1, v0.b[1]
-; CHECK-GI-NEXT:    mov b2, v0.b[2]
-; CHECK-GI-NEXT:    fmov w8, s0
-; CHECK-GI-NEXT:    mov b3, v0.b[3]
-; CHECK-GI-NEXT:    mov b4, v0.b[4]
-; CHECK-GI-NEXT:    fmov w9, s0
-; CHECK-GI-NEXT:    uxtb w8, w8
-; CHECK-GI-NEXT:    fmov w10, s1
-; CHECK-GI-NEXT:    fmov w11, s2
-; CHECK-GI-NEXT:    fmov w12, s1
-; CHECK-GI-NEXT:    mov b1, v0.b[5]
-; CHECK-GI-NEXT:    mov b2, v0.b[6]
-; CHECK-GI-NEXT:    cmp w8, w10, uxtb
-; CHECK-GI-NEXT:    fmov w10, s3
-; CHECK-GI-NEXT:    uxtb w8, w11
-; CHECK-GI-NEXT:    csel w9, w9, w12, hi
-; CHECK-GI-NEXT:    cmp w8, w9, uxtb
-; CHECK-GI-NEXT:    uxtb w8, w10
-; CHECK-GI-NEXT:    fmov w10, s4
-; CHECK-GI-NEXT:    csel w9, w9, w11, lo
-; CHECK-GI-NEXT:    fmov w11, s3
-; CHECK-GI-NEXT:    mov b3, v0.b[7]
-; CHECK-GI-NEXT:    mov b0, v0.b[8]
-; CHECK-GI-NEXT:    cmp w8, w9, uxtb
-; CHECK-GI-NEXT:    uxtb w8, w10
-; CHECK-GI-NEXT:    fmov w10, s1
-; CHECK-GI-NEXT:    csel w9, w9, w11, lo
-; CHECK-GI-NEXT:    fmov w11, s4
-; CHECK-GI-NEXT:    cmp w8, w9, uxtb
-; CHECK-GI-NEXT:    uxtb w8, w10
-; CHECK-GI-NEXT:    fmov w10, s2
-; CHECK-GI-NEXT:    csel w9, w9, w11, lo
+; CHECK-GI-NEXT:    umov w8, v0.b[0]
+; CHECK-GI-NEXT:    umov w9, v0.b[1]
+; CHECK-GI-NEXT:    umov w10, v0.b[2]
 ; CHECK-GI-NEXT:    fmov w11, s1
-; CHECK-GI-NEXT:    cmp w8, w9, uxtb
-; CHECK-GI-NEXT:    uxtb w8, w10
-; CHECK-GI-NEXT:    fmov w10, s3
-; CHECK-GI-NEXT:    csel w9, w9, w11, lo
-; CHECK-GI-NEXT:    fmov w11, s2
-; CHECK-GI-NEXT:    cmp w8, w9, uxtb
-; CHECK-GI-NEXT:    uxtb w8, w10
-; CHECK-GI-NEXT:    fmov w10, s0
-; CHECK-GI-NEXT:    csel w9, w9, w11, lo
-; CHECK-GI-NEXT:    fmov w11, s3
-; CHECK-GI-NEXT:    cmp w8, w9, uxtb
-; CHECK-GI-NEXT:    uxtb w8, w10
-; CHECK-GI-NEXT:    csel w9, w9, w11, lo
-; CHECK-GI-NEXT:    cmp w8, w9, uxtb
-; CHECK-GI-NEXT:    csel w0, w9, w10, lo
+; CHECK-GI-NEXT:    cmp w8, w11, uxtb
+; CHECK-GI-NEXT:    umov w11, v0.b[3]
+; CHECK-GI-NEXT:    csel w8, w8, w9, hi
+; CHECK-GI-NEXT:    umov w9, v0.b[4]
+; CHECK-GI-NEXT:    cmp w10, w8, uxtb
+; CHECK-GI-NEXT:    csel w8, w8, w10, lo
+; CHECK-GI-NEXT:    umov w10, v0.b[5]
+; CHECK-GI-NEXT:    cmp w11, w8, uxtb
+; CHECK-GI-NEXT:    csel w8, w8, w11, lo
+; CHECK-GI-NEXT:    umov w11, v0.b[6]
+; CHECK-GI-NEXT:    cmp w9, w8, uxtb
+; CHECK-GI-NEXT:    csel w8, w8, w9, lo
+; CHECK-GI-NEXT:    umov w9, v0.b[7]
+; CHECK-GI-NEXT:    cmp w10, w8, uxtb
+; CHECK-GI-NEXT:    csel w8, w8, w10, lo
+; CHECK-GI-NEXT:    umov w10, v0.b[8]
+; CHECK-GI-NEXT:    cmp w11, w8, uxtb
+; CHECK-GI-NEXT:    csel w8, w8, w11, lo
+; CHECK-GI-NEXT:    cmp w9, w8, uxtb
+; CHECK-GI-NEXT:    csel w8, w8, w9, lo
+; CHECK-GI-NEXT:    cmp w10, w8, uxtb
+; CHECK-GI-NEXT:    csel w0, w8, w10, lo
 ; CHECK-GI-NEXT:    ret
   %b = call i8 @llvm.vector.reduce.umax.v9i8(<9 x i8> %a)
   ret i8 %b
@@ -259,21 +238,18 @@ define i1 @test_v4i1(<4 x i1> %a) nounwind {
 ; CHECK-GI-LABEL: test_v4i1:
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    mov h1, v0.h[1]
-; CHECK-GI-NEXT:    mov h2, v0.h[2]
-; CHECK-GI-NEXT:    mov h3, v0.h[3]
-; CHECK-GI-NEXT:    fmov w8, s0
-; CHECK-GI-NEXT:    fmov w10, s1
-; CHECK-GI-NEXT:    fmov w12, s2
-; CHECK-GI-NEXT:    fmov w13, s3
-; CHECK-GI-NEXT:    and w9, w8, #0x1
-; CHECK-GI-NEXT:    and w11, w10, #0x1
-; CHECK-GI-NEXT:    cmp w9, w11
-; CHECK-GI-NEXT:    and w9, w12, #0x1
-; CHECK-GI-NEXT:    and w11, w13, #0x1
-; CHECK-GI-NEXT:    csel w8, w8, w10, hi
-; CHECK-GI-NEXT:    cmp w9, w11
-; CHECK-GI-NEXT:    csel w9, w12, w13, hi
+; CHECK-GI-NEXT:    umov w8, v0.h[0]
+; CHECK-GI-NEXT:    umov w9, v0.h[1]
+; CHECK-GI-NEXT:    umov w10, v0.h[2]
+; CHECK-GI-NEXT:    umov w11, v0.h[3]
+; CHECK-GI-NEXT:    and w12, w8, #0x1
+; CHECK-GI-NEXT:    and w13, w9, #0x1
+; CHECK-GI-NEXT:    cmp w12, w13
+; CHECK-GI-NEXT:    and w12, w10, #0x1
+; CHECK-GI-NEXT:    and w13, w11, #0x1
+; CHECK-GI-NEXT:    csel w8, w8, w9, hi
+; CHECK-GI-NEXT:    cmp w12, w13
+; CHECK-GI-NEXT:    csel w9, w10, w11, hi
 ; CHECK-GI-NEXT:    and w10, w8, #0x1
 ; CHECK-GI-NEXT:    and w11, w9, #0x1
 ; CHECK-GI-NEXT:    cmp w10, w11
diff --git a/llvm/test/CodeGen/AArch64/xtn.ll b/llvm/test/CodeGen/AArch64/xtn.ll
index 0dd4e3644b7835..21982fadbe8036 100644
--- a/llvm/test/CodeGen/AArch64/xtn.ll
+++ b/llvm/test/CodeGen/AArch64/xtn.ll
@@ -224,23 +224,13 @@ entry:
 }
 
 define <3 x i8> @xtn_v3i16_v3i8(<3 x i16> %a) {
-; CHECK-SD-LABEL: xtn_v3i16_v3i8:
-; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT:    umov w0, v0.h[0]
-; CHECK-SD-NEXT:    umov w1, v0.h[1]
-; CHECK-SD-NEXT:    umov w2, v0.h[2]
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: xtn_v3i16_v3i8:
-; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    mov h1, v0.h[1]
-; CHECK-GI-NEXT:    mov h2, v0.h[2]
-; CHECK-GI-NEXT:    fmov w0, s0
-; CHECK-GI-NEXT:    fmov w1, s1
-; CHECK-GI-NEXT:    fmov w2, s2
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: xtn_v3i16_v3i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    umov w0, v0.h[0]
+; CHECK-NEXT:    umov w1, v0.h[1]
+; CHECK-NEXT:    umov w2, v0.h[2]
+; CHECK-NEXT:    ret
 entry:
   %arg1 = trunc <3 x i16> %a to <3 x i8>
   ret <3 x i8> %arg1
diff --git a/llvm/test/CodeGen/AArch64/zext.ll b/llvm/test/CodeGen/AArch64/zext.ll
index 42c0bf79e77897..e513340f5b18ad 100644
--- a/llvm/test/CodeGen/AArch64/zext.ll
+++ b/llvm/test/CodeGen/AArch64/zext.ll
@@ -2,6 +2,8 @@
 ; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
 ; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
 
+; CHECK-GI:       warning: Instruction selection used fallback path for zext_v16i10_v16i16
+
 define i16 @zext_i8_to_i16(i8 %a) {
 ; CHECK-LABEL: zext_i8_to_i16:
 ; CHECK:       // %bb.0: // %entry
@@ -333,18 +335,14 @@ define <3 x i32> @zext_v3i16_v3i32(<3 x i16> %a) {
 ; CHECK-GI-LABEL: zext_v3i16_v3i32:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    mov h1, v0.h[1]
-; CHECK-GI-NEXT:    fmov w8, s0
-; CHECK-GI-NEXT:    mov h2, v0.h[2]
-; CHECK-GI-NEXT:    uxth w8, w8
-; CHECK-GI-NEXT:    fmov w9, s1
-; CHECK-GI-NEXT:    fmov s0, w8
-; CHECK-GI-NEXT:    fmov w8, s2
-; CHECK-GI-NEXT:    uxth w9, w9
-; CHECK-GI-NEXT:    uxth w8, w8
-; CHECK-GI-NEXT:    mov v0.s[1], w9
-; CHECK-GI-NEXT:    mov v0.s[2], w8
-; CHECK-GI-NEXT:    mov v0.s[3], w8
+; CHECK-GI-NEXT:    umov w8, v0.h[0]
+; CHECK-GI-NEXT:    umov w9, v0.h[1]
+; CHECK-GI-NEXT:    fmov s1, w8
+; CHECK-GI-NEXT:    umov w8, v0.h[2]
+; CHECK-GI-NEXT:    mov v1.s[1], w9
+; CHECK-GI-NEXT:    mov v1.s[2], w8
+; CHECK-GI-NEXT:    mov v1.s[3], w8
+; CHECK-GI-NEXT:    mov v0.16b, v1.16b
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = zext <3 x i16> %a to <3 x i32>
@@ -366,15 +364,10 @@ define <3 x i64> @zext_v3i16_v3i64(<3 x i16> %a) {
 ; CHECK-GI-LABEL: zext_v3i16_v3i64:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    mov h1, v0.h[1]
-; CHECK-GI-NEXT:    mov h2, v0.h[2]
-; CHECK-GI-NEXT:    fmov w8, s0
-; CHECK-GI-NEXT:    ubfx x8, x8, #0, #16
-; CHECK-GI-NEXT:    fmov w9, s1
-; CHECK-GI-NEXT:    fmov w10, s2
+; CHECK-GI-NEXT:    umov w8, v0.h[0]
+; CHECK-GI-NEXT:    umov w9, v0.h[1]
+; CHECK-GI-NEXT:    umov w10, v0.h[2]
 ; CHECK-GI-NEXT:    fmov d0, x8
-; CHECK-GI-NEXT:    ubfx x9, x9, #0, #16
-; CHECK-GI-NEXT:    ubfx x10, x10, #0, #16
 ; CHECK-GI-NEXT:    fmov d1, x9
 ; CHECK-GI-NEXT:    fmov d2, x10
 ; CHECK-GI-NEXT:    ret
@@ -396,12 +389,10 @@ define <3 x i64> @zext_v3i32_v3i64(<3 x i32> %a) {
 ;
 ; CHECK-GI-LABEL: zext_v3i32_v3i64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    mov s1, v0.s[1]
-; CHECK-GI-NEXT:    mov s2, v0.s[2]
-; CHECK-GI-NEXT:    fmov w8, s0
+; CHECK-GI-NEXT:    mov w8, v0.s[0]
+; CHECK-GI-NEXT:    mov w9, v0.s[1]
+; CHECK-GI-NEXT:    mov w10, v0.s[2]
 ; CHECK-GI-NEXT:    fmov d0, x8
-; CHECK-GI-NEXT:    fmov w9, s1
-; CHECK-GI-NEXT:    fmov w10, s2
 ; CHECK-GI-NEXT:    fmov d1, x9
 ; CHECK-GI-NEXT:    fmov d2, x10
 ; CHECK-GI-NEXT:    ret



More information about the llvm-commits mailing list