[llvm] c0931d4 - [AArch64][GlobalISel] Lower scalarizing G_UNMERGE_VALUES to G_EXTRACT_VECTOR_ELT
David Green via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 21 01:22:28 PST 2023
Author: David Green
Date: 2023-12-21T09:22:23Z
New Revision: c0931d4950a93526aa08ec3ab86f64ffb616b406
URL: https://github.com/llvm/llvm-project/commit/c0931d4950a93526aa08ec3ab86f64ffb616b406
DIFF: https://github.com/llvm/llvm-project/commit/c0931d4950a93526aa08ec3ab86f64ffb616b406.diff
LOG: [AArch64][GlobalISel] Lower scalarizing G_UNMERGE_VALUES to G_EXTRACT_VECTOR_ELT
This adds post-legalizing lowering of G_UNMERGE_VALUES which take a vector and
produce scalar values for each lane. They are converted to a G_EXTRACT_VECTOR_ELT
for each lane, allowing all the existing tablegen patterns to apply to them.
A couple of tablegen patterns need to be altered to make sure the type of the
constant operand is known, so that the patterns are recognized under global
isel.
Closes #75662
Added:
Modified:
llvm/lib/Target/AArch64/AArch64Combine.td
llvm/lib/Target/AArch64/AArch64InstrInfo.td
llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll
llvm/test/CodeGen/AArch64/aarch64-mulv.ll
llvm/test/CodeGen/AArch64/aarch64-smull.ll
llvm/test/CodeGen/AArch64/fptoi.ll
llvm/test/CodeGen/AArch64/reduce-and.ll
llvm/test/CodeGen/AArch64/reduce-or.ll
llvm/test/CodeGen/AArch64/reduce-xor.ll
llvm/test/CodeGen/AArch64/sext.ll
llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll
llvm/test/CodeGen/AArch64/xtn.ll
llvm/test/CodeGen/AArch64/zext.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index d6c00be80bd9c2..99f256b887821b 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -134,6 +134,14 @@ def shuffle_vector_lowering : GICombineGroup<[dup, rev, ext, zip, uzp, trn,
form_duplane,
shuf_to_ins]>;
+// Turn G_UNMERGE_VALUES -> G_EXTRACT_VECTOR_ELT's
+def vector_unmerge_lowering : GICombineRule <
+ (defs root:$root),
+ (match (wip_match_opcode G_UNMERGE_VALUES):$root,
+ [{ return matchScalarizeVectorUnmerge(*${root}, MRI); }]),
+ (apply [{ applyScalarizeVectorUnmerge(*${root}, MRI, B); }])
+>;
+
def adjust_icmp_imm_matchdata :
GIDefMatchData<"std::pair<uint64_t, CmpInst::Predicate>">;
def adjust_icmp_imm : GICombineRule <
@@ -251,7 +259,8 @@ def AArch64PostLegalizerLowering
icmp_lowering, build_vector_lowering,
lower_vector_fcmp, form_truncstore,
vector_sext_inreg_to_shift,
- unmerge_ext_to_unmerge, lower_mull]> {
+ unmerge_ext_to_unmerge, lower_mull,
+ vector_unmerge_lowering]> {
}
// Post-legalization combines which are primarily optimizations.
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index bdb38f0c378952..4ccac40f99a0ae 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -6482,23 +6482,23 @@ def : Pat<(v2i64 (vector_insert v2i64:$src, (i64 (bitconvert (f64 FPR64:$Sn))),
// f32 bitcast(vector_extract(v4i32 src, lane)) -> EXTRACT_SUBREG(INSvi32lane(-, 0, src, lane))
def : Pat<(f32 (bitconvert (i32 (vector_extract v4i32:$src, imm:$Immd)))),
(EXTRACT_SUBREG (INSvi32lane (IMPLICIT_DEF), 0, V128:$src, imm:$Immd), ssub)>;
-def : Pat<(f32 (bitconvert (i32 (vector_extract v4i32:$src, 0)))),
+def : Pat<(f32 (bitconvert (i32 (vector_extract v4i32:$src, (i64 0))))),
(EXTRACT_SUBREG V128:$src, ssub)>;
def : Pat<(f64 (bitconvert (i64 (vector_extract v2i64:$src, imm:$Immd)))),
(EXTRACT_SUBREG (INSvi64lane (IMPLICIT_DEF), 0, V128:$src, imm:$Immd), dsub)>;
-def : Pat<(f64 (bitconvert (i64 (vector_extract v2i64:$src, 0)))),
+def : Pat<(f64 (bitconvert (i64 (vector_extract v2i64:$src, (i64 0))))),
(EXTRACT_SUBREG V128:$src, dsub)>;
// Floating point vector extractions are codegen'd as either a sequence of
// subregister extractions, or a MOV (aka DUP here) if
// the lane number is anything other than zero.
-def : Pat<(vector_extract (v2f64 V128:$Rn), 0),
+def : Pat<(f64 (vector_extract (v2f64 V128:$Rn), (i64 0))),
(f64 (EXTRACT_SUBREG V128:$Rn, dsub))>;
-def : Pat<(vector_extract (v4f32 V128:$Rn), 0),
+def : Pat<(f32 (vector_extract (v4f32 V128:$Rn), (i64 0))),
(f32 (EXTRACT_SUBREG V128:$Rn, ssub))>;
-def : Pat<(vector_extract (v8f16 V128:$Rn), 0),
+def : Pat<(f16 (vector_extract (v8f16 V128:$Rn), (i64 0))),
(f16 (EXTRACT_SUBREG V128:$Rn, hsub))>;
-def : Pat<(vector_extract (v8bf16 V128:$Rn), 0),
+def : Pat<(bf16 (vector_extract (v8bf16 V128:$Rn), (i64 0))),
(bf16 (EXTRACT_SUBREG V128:$Rn, hsub))>;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
index 687063873a16b2..830203b61c586b 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
@@ -769,6 +769,27 @@ void applyDupLane(MachineInstr &MI, MachineRegisterInfo &MRI,
MI.eraseFromParent();
}
+bool matchScalarizeVectorUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI) {
+ auto &Unmerge = cast<GUnmerge>(MI);
+ Register Src1Reg = Unmerge.getReg(Unmerge.getNumOperands() - 1);
+ const LLT SrcTy = MRI.getType(Src1Reg);
+ return SrcTy.isVector() && !SrcTy.isScalable() &&
+ Unmerge.getNumOperands() == (unsigned)SrcTy.getNumElements() + 1;
+}
+
+void applyScalarizeVectorUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) {
+ auto &Unmerge = cast<GUnmerge>(MI);
+ Register Src1Reg = Unmerge.getReg(Unmerge.getNumOperands() - 1);
+ const LLT SrcTy = MRI.getType(Src1Reg);
+ assert((SrcTy.isVector() && !SrcTy.isScalable()) &&
+ "Expected a fixed length vector");
+
+ for (int I = 0; I < SrcTy.getNumElements(); ++I)
+ B.buildExtractVectorElementConstant(Unmerge.getReg(I), Src1Reg, I);
+ MI.eraseFromParent();
+}
+
bool matchBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI) {
assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
auto Splat = getAArch64VectorSplat(MI, MRI);
diff --git a/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll b/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll
index 5e477e8947d1b8..194fe5be40c2bd 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll
@@ -516,20 +516,17 @@ define i8 @sminv_v4i8(<4 x i8> %a) {
; CHECK-GI-LABEL: sminv_v4i8:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: mov h1, v0.h[1]
-; CHECK-GI-NEXT: mov h2, v0.h[2]
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: mov h3, v0.h[3]
+; CHECK-GI-NEXT: umov w8, v0.h[0]
+; CHECK-GI-NEXT: umov w9, v0.h[1]
+; CHECK-GI-NEXT: umov w10, v0.h[2]
+; CHECK-GI-NEXT: umov w12, v0.h[3]
+; CHECK-GI-NEXT: sxtb w11, w8
+; CHECK-GI-NEXT: cmp w11, w9, sxtb
+; CHECK-GI-NEXT: sxtb w11, w10
+; CHECK-GI-NEXT: csel w8, w8, w9, lt
+; CHECK-GI-NEXT: cmp w11, w12, sxtb
; CHECK-GI-NEXT: sxtb w9, w8
-; CHECK-GI-NEXT: fmov w10, s1
-; CHECK-GI-NEXT: fmov w11, s2
-; CHECK-GI-NEXT: cmp w9, w10, sxtb
-; CHECK-GI-NEXT: sxtb w9, w11
-; CHECK-GI-NEXT: csel w8, w8, w10, lt
-; CHECK-GI-NEXT: fmov w10, s3
-; CHECK-GI-NEXT: cmp w9, w10, sxtb
-; CHECK-GI-NEXT: sxtb w9, w8
-; CHECK-GI-NEXT: csel w10, w11, w10, lt
+; CHECK-GI-NEXT: csel w10, w10, w12, lt
; CHECK-GI-NEXT: cmp w9, w10, sxtb
; CHECK-GI-NEXT: csel w0, w8, w10, lt
; CHECK-GI-NEXT: ret
@@ -611,19 +608,16 @@ define i16 @sminv_v3i16(<3 x i16> %a) {
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT: mov h1, v0.h[1]
-; CHECK-GI-NEXT: mov h2, v0.h[2]
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: fmov w9, s0
-; CHECK-GI-NEXT: sxth w8, w8
-; CHECK-GI-NEXT: fmov w10, s1
-; CHECK-GI-NEXT: fmov w11, s2
+; CHECK-GI-NEXT: smov w8, v0.h[0]
+; CHECK-GI-NEXT: umov w9, v0.h[0]
+; CHECK-GI-NEXT: umov w10, v0.h[1]
+; CHECK-GI-NEXT: smov w11, v0.h[2]
+; CHECK-GI-NEXT: umov w13, v0.h[2]
; CHECK-GI-NEXT: fmov w12, s1
-; CHECK-GI-NEXT: cmp w8, w10, sxth
-; CHECK-GI-NEXT: sxth w8, w11
-; CHECK-GI-NEXT: fmov w10, s2
-; CHECK-GI-NEXT: csel w9, w9, w12, lt
-; CHECK-GI-NEXT: cmp w8, w9, sxth
-; CHECK-GI-NEXT: csel w0, w9, w10, gt
+; CHECK-GI-NEXT: cmp w8, w12, sxth
+; CHECK-GI-NEXT: csel w8, w9, w10, lt
+; CHECK-GI-NEXT: cmp w11, w8, sxth
+; CHECK-GI-NEXT: csel w0, w8, w13, gt
; CHECK-GI-NEXT: ret
entry:
%arg1 = call i16 @llvm.vector.reduce.smin.v3i16(<3 x i16> %a)
@@ -887,20 +881,17 @@ define i8 @smaxv_v4i8(<4 x i8> %a) {
; CHECK-GI-LABEL: smaxv_v4i8:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: mov h1, v0.h[1]
-; CHECK-GI-NEXT: mov h2, v0.h[2]
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: mov h3, v0.h[3]
-; CHECK-GI-NEXT: sxtb w9, w8
-; CHECK-GI-NEXT: fmov w10, s1
-; CHECK-GI-NEXT: fmov w11, s2
-; CHECK-GI-NEXT: cmp w9, w10, sxtb
-; CHECK-GI-NEXT: sxtb w9, w11
-; CHECK-GI-NEXT: csel w8, w8, w10, gt
-; CHECK-GI-NEXT: fmov w10, s3
-; CHECK-GI-NEXT: cmp w9, w10, sxtb
+; CHECK-GI-NEXT: umov w8, v0.h[0]
+; CHECK-GI-NEXT: umov w9, v0.h[1]
+; CHECK-GI-NEXT: umov w10, v0.h[2]
+; CHECK-GI-NEXT: umov w12, v0.h[3]
+; CHECK-GI-NEXT: sxtb w11, w8
+; CHECK-GI-NEXT: cmp w11, w9, sxtb
+; CHECK-GI-NEXT: sxtb w11, w10
+; CHECK-GI-NEXT: csel w8, w8, w9, gt
+; CHECK-GI-NEXT: cmp w11, w12, sxtb
; CHECK-GI-NEXT: sxtb w9, w8
-; CHECK-GI-NEXT: csel w10, w11, w10, gt
+; CHECK-GI-NEXT: csel w10, w10, w12, gt
; CHECK-GI-NEXT: cmp w9, w10, sxtb
; CHECK-GI-NEXT: csel w0, w8, w10, gt
; CHECK-GI-NEXT: ret
@@ -982,19 +973,16 @@ define i16 @smaxv_v3i16(<3 x i16> %a) {
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT: mov h1, v0.h[1]
-; CHECK-GI-NEXT: mov h2, v0.h[2]
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: fmov w9, s0
-; CHECK-GI-NEXT: sxth w8, w8
-; CHECK-GI-NEXT: fmov w10, s1
-; CHECK-GI-NEXT: fmov w11, s2
+; CHECK-GI-NEXT: smov w8, v0.h[0]
+; CHECK-GI-NEXT: umov w9, v0.h[0]
+; CHECK-GI-NEXT: umov w10, v0.h[1]
+; CHECK-GI-NEXT: smov w11, v0.h[2]
+; CHECK-GI-NEXT: umov w13, v0.h[2]
; CHECK-GI-NEXT: fmov w12, s1
-; CHECK-GI-NEXT: cmp w8, w10, sxth
-; CHECK-GI-NEXT: sxth w8, w11
-; CHECK-GI-NEXT: fmov w10, s2
-; CHECK-GI-NEXT: csel w9, w9, w12, gt
-; CHECK-GI-NEXT: cmp w8, w9, sxth
-; CHECK-GI-NEXT: csel w0, w9, w10, lt
+; CHECK-GI-NEXT: cmp w8, w12, sxth
+; CHECK-GI-NEXT: csel w8, w9, w10, gt
+; CHECK-GI-NEXT: cmp w11, w8, sxth
+; CHECK-GI-NEXT: csel w0, w8, w13, lt
; CHECK-GI-NEXT: ret
entry:
%arg1 = call i16 @llvm.vector.reduce.smax.v3i16(<3 x i16> %a)
@@ -1256,19 +1244,16 @@ define i8 @uminv_v4i8(<4 x i8> %a) {
; CHECK-GI-LABEL: uminv_v4i8:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: mov h1, v0.h[1]
-; CHECK-GI-NEXT: mov h2, v0.h[2]
-; CHECK-GI-NEXT: mov h3, v0.h[3]
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: fmov w10, s1
-; CHECK-GI-NEXT: fmov w11, s2
-; CHECK-GI-NEXT: fmov w12, s3
-; CHECK-GI-NEXT: and w9, w8, #0xff
-; CHECK-GI-NEXT: cmp w9, w10, uxtb
-; CHECK-GI-NEXT: and w9, w11, #0xff
-; CHECK-GI-NEXT: csel w8, w8, w10, lo
-; CHECK-GI-NEXT: cmp w9, w12, uxtb
-; CHECK-GI-NEXT: csel w9, w11, w12, lo
+; CHECK-GI-NEXT: umov w8, v0.h[0]
+; CHECK-GI-NEXT: umov w9, v0.h[1]
+; CHECK-GI-NEXT: umov w10, v0.h[2]
+; CHECK-GI-NEXT: umov w11, v0.h[3]
+; CHECK-GI-NEXT: and w12, w8, #0xff
+; CHECK-GI-NEXT: cmp w12, w9, uxtb
+; CHECK-GI-NEXT: and w12, w10, #0xff
+; CHECK-GI-NEXT: csel w8, w8, w9, lo
+; CHECK-GI-NEXT: cmp w12, w11, uxtb
+; CHECK-GI-NEXT: csel w9, w10, w11, lo
; CHECK-GI-NEXT: and w10, w8, #0xff
; CHECK-GI-NEXT: cmp w10, w9, uxtb
; CHECK-GI-NEXT: csel w0, w8, w9, lo
@@ -1351,19 +1336,16 @@ define i16 @uminv_v3i16(<3 x i16> %a) {
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT: mov h1, v0.h[1]
-; CHECK-GI-NEXT: mov h2, v0.h[2]
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: fmov w9, s0
-; CHECK-GI-NEXT: uxth w8, w8
-; CHECK-GI-NEXT: fmov w10, s1
-; CHECK-GI-NEXT: fmov w11, s2
+; CHECK-GI-NEXT: umov w8, v0.h[0]
+; CHECK-GI-NEXT: umov w9, v0.h[0]
+; CHECK-GI-NEXT: umov w10, v0.h[1]
+; CHECK-GI-NEXT: umov w11, v0.h[2]
+; CHECK-GI-NEXT: umov w13, v0.h[2]
; CHECK-GI-NEXT: fmov w12, s1
-; CHECK-GI-NEXT: cmp w8, w10, uxth
-; CHECK-GI-NEXT: uxth w8, w11
-; CHECK-GI-NEXT: fmov w10, s2
-; CHECK-GI-NEXT: csel w9, w9, w12, lo
-; CHECK-GI-NEXT: cmp w8, w9, uxth
-; CHECK-GI-NEXT: csel w0, w9, w10, hi
+; CHECK-GI-NEXT: cmp w8, w12, uxth
+; CHECK-GI-NEXT: csel w8, w9, w10, lo
+; CHECK-GI-NEXT: cmp w11, w8, uxth
+; CHECK-GI-NEXT: csel w0, w8, w13, hi
; CHECK-GI-NEXT: ret
entry:
%arg1 = call i16 @llvm.vector.reduce.umin.v3i16(<3 x i16> %a)
@@ -1625,19 +1607,16 @@ define i8 @umaxv_v4i8(<4 x i8> %a) {
; CHECK-GI-LABEL: umaxv_v4i8:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: mov h1, v0.h[1]
-; CHECK-GI-NEXT: mov h2, v0.h[2]
-; CHECK-GI-NEXT: mov h3, v0.h[3]
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: fmov w10, s1
-; CHECK-GI-NEXT: fmov w11, s2
-; CHECK-GI-NEXT: fmov w12, s3
-; CHECK-GI-NEXT: and w9, w8, #0xff
-; CHECK-GI-NEXT: cmp w9, w10, uxtb
-; CHECK-GI-NEXT: and w9, w11, #0xff
-; CHECK-GI-NEXT: csel w8, w8, w10, hi
-; CHECK-GI-NEXT: cmp w9, w12, uxtb
-; CHECK-GI-NEXT: csel w9, w11, w12, hi
+; CHECK-GI-NEXT: umov w8, v0.h[0]
+; CHECK-GI-NEXT: umov w9, v0.h[1]
+; CHECK-GI-NEXT: umov w10, v0.h[2]
+; CHECK-GI-NEXT: umov w11, v0.h[3]
+; CHECK-GI-NEXT: and w12, w8, #0xff
+; CHECK-GI-NEXT: cmp w12, w9, uxtb
+; CHECK-GI-NEXT: and w12, w10, #0xff
+; CHECK-GI-NEXT: csel w8, w8, w9, hi
+; CHECK-GI-NEXT: cmp w12, w11, uxtb
+; CHECK-GI-NEXT: csel w9, w10, w11, hi
; CHECK-GI-NEXT: and w10, w8, #0xff
; CHECK-GI-NEXT: cmp w10, w9, uxtb
; CHECK-GI-NEXT: csel w0, w8, w9, hi
@@ -1719,19 +1698,16 @@ define i16 @umaxv_v3i16(<3 x i16> %a) {
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT: mov h1, v0.h[1]
-; CHECK-GI-NEXT: mov h2, v0.h[2]
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: fmov w9, s0
-; CHECK-GI-NEXT: uxth w8, w8
-; CHECK-GI-NEXT: fmov w10, s1
-; CHECK-GI-NEXT: fmov w11, s2
+; CHECK-GI-NEXT: umov w8, v0.h[0]
+; CHECK-GI-NEXT: umov w9, v0.h[0]
+; CHECK-GI-NEXT: umov w10, v0.h[1]
+; CHECK-GI-NEXT: umov w11, v0.h[2]
+; CHECK-GI-NEXT: umov w13, v0.h[2]
; CHECK-GI-NEXT: fmov w12, s1
-; CHECK-GI-NEXT: cmp w8, w10, uxth
-; CHECK-GI-NEXT: uxth w8, w11
-; CHECK-GI-NEXT: fmov w10, s2
-; CHECK-GI-NEXT: csel w9, w9, w12, hi
-; CHECK-GI-NEXT: cmp w8, w9, uxth
-; CHECK-GI-NEXT: csel w0, w9, w10, lo
+; CHECK-GI-NEXT: cmp w8, w12, uxth
+; CHECK-GI-NEXT: csel w8, w9, w10, hi
+; CHECK-GI-NEXT: cmp w11, w8, uxth
+; CHECK-GI-NEXT: csel w0, w8, w13, lo
; CHECK-GI-NEXT: ret
entry:
%arg1 = call i16 @llvm.vector.reduce.umax.v3i16(<3 x i16> %a)
diff --git a/llvm/test/CodeGen/AArch64/aarch64-mulv.ll b/llvm/test/CodeGen/AArch64/aarch64-mulv.ll
index 90f09379e68fd2..7b7ca9d8ffc2db 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-mulv.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-mulv.ll
@@ -73,13 +73,10 @@ define i8 @mulv_v4i8(<4 x i8> %a) {
; CHECK-GI-LABEL: mulv_v4i8:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: mov h1, v0.h[1]
-; CHECK-GI-NEXT: mov h2, v0.h[2]
-; CHECK-GI-NEXT: mov h3, v0.h[3]
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: fmov w9, s1
-; CHECK-GI-NEXT: fmov w10, s2
-; CHECK-GI-NEXT: fmov w11, s3
+; CHECK-GI-NEXT: umov w8, v0.h[0]
+; CHECK-GI-NEXT: umov w9, v0.h[1]
+; CHECK-GI-NEXT: umov w10, v0.h[2]
+; CHECK-GI-NEXT: umov w11, v0.h[3]
; CHECK-GI-NEXT: mul w8, w8, w9
; CHECK-GI-NEXT: mul w9, w10, w11
; CHECK-GI-NEXT: mul w0, w8, w9
@@ -113,27 +110,20 @@ define i8 @mulv_v8i8(<8 x i8> %a) {
; CHECK-GI-LABEL: mulv_v8i8:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: mov b1, v0.b[1]
-; CHECK-GI-NEXT: mov b2, v0.b[2]
-; CHECK-GI-NEXT: mov b3, v0.b[3]
-; CHECK-GI-NEXT: mov b4, v0.b[4]
-; CHECK-GI-NEXT: mov b5, v0.b[5]
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: mov b6, v0.b[6]
-; CHECK-GI-NEXT: mov b7, v0.b[7]
-; CHECK-GI-NEXT: fmov w9, s1
-; CHECK-GI-NEXT: fmov w10, s2
-; CHECK-GI-NEXT: fmov w11, s3
-; CHECK-GI-NEXT: fmov w12, s5
+; CHECK-GI-NEXT: umov w8, v0.b[0]
+; CHECK-GI-NEXT: umov w9, v0.b[1]
+; CHECK-GI-NEXT: umov w10, v0.b[2]
+; CHECK-GI-NEXT: umov w11, v0.b[3]
+; CHECK-GI-NEXT: umov w12, v0.b[4]
+; CHECK-GI-NEXT: umov w13, v0.b[5]
+; CHECK-GI-NEXT: umov w14, v0.b[6]
+; CHECK-GI-NEXT: umov w15, v0.b[7]
; CHECK-GI-NEXT: mul w8, w8, w9
-; CHECK-GI-NEXT: fmov w9, s4
-; CHECK-GI-NEXT: mul w10, w10, w11
-; CHECK-GI-NEXT: fmov w11, s6
-; CHECK-GI-NEXT: mul w9, w9, w12
-; CHECK-GI-NEXT: fmov w12, s7
-; CHECK-GI-NEXT: mul w8, w8, w10
-; CHECK-GI-NEXT: mul w11, w11, w12
-; CHECK-GI-NEXT: mul w9, w9, w11
+; CHECK-GI-NEXT: mul w9, w10, w11
+; CHECK-GI-NEXT: mul w10, w12, w13
+; CHECK-GI-NEXT: mul w11, w14, w15
+; CHECK-GI-NEXT: mul w8, w8, w9
+; CHECK-GI-NEXT: mul w9, w10, w11
; CHECK-GI-NEXT: mul w0, w8, w9
; CHECK-GI-NEXT: ret
entry:
@@ -167,27 +157,20 @@ define i8 @mulv_v16i8(<16 x i8> %a) {
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: mov d1, v0.d[1]
; CHECK-GI-NEXT: mul v0.8b, v0.8b, v1.8b
-; CHECK-GI-NEXT: mov b1, v0.b[1]
-; CHECK-GI-NEXT: mov b2, v0.b[2]
-; CHECK-GI-NEXT: mov b3, v0.b[3]
-; CHECK-GI-NEXT: mov b4, v0.b[4]
-; CHECK-GI-NEXT: mov b5, v0.b[5]
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: mov b6, v0.b[6]
-; CHECK-GI-NEXT: mov b7, v0.b[7]
-; CHECK-GI-NEXT: fmov w9, s1
-; CHECK-GI-NEXT: fmov w10, s2
-; CHECK-GI-NEXT: fmov w11, s3
-; CHECK-GI-NEXT: fmov w12, s5
+; CHECK-GI-NEXT: umov w8, v0.b[0]
+; CHECK-GI-NEXT: umov w9, v0.b[1]
+; CHECK-GI-NEXT: umov w10, v0.b[2]
+; CHECK-GI-NEXT: umov w11, v0.b[3]
+; CHECK-GI-NEXT: umov w12, v0.b[4]
+; CHECK-GI-NEXT: umov w13, v0.b[5]
+; CHECK-GI-NEXT: umov w14, v0.b[6]
+; CHECK-GI-NEXT: umov w15, v0.b[7]
+; CHECK-GI-NEXT: mul w8, w8, w9
+; CHECK-GI-NEXT: mul w9, w10, w11
+; CHECK-GI-NEXT: mul w10, w12, w13
+; CHECK-GI-NEXT: mul w11, w14, w15
; CHECK-GI-NEXT: mul w8, w8, w9
-; CHECK-GI-NEXT: fmov w9, s4
-; CHECK-GI-NEXT: mul w10, w10, w11
-; CHECK-GI-NEXT: fmov w11, s6
-; CHECK-GI-NEXT: mul w9, w9, w12
-; CHECK-GI-NEXT: fmov w12, s7
-; CHECK-GI-NEXT: mul w8, w8, w10
-; CHECK-GI-NEXT: mul w11, w11, w12
-; CHECK-GI-NEXT: mul w9, w9, w11
+; CHECK-GI-NEXT: mul w9, w10, w11
; CHECK-GI-NEXT: mul w0, w8, w9
; CHECK-GI-NEXT: ret
entry:
@@ -225,27 +208,20 @@ define i8 @mulv_v32i8(<32 x i8> %a) {
; CHECK-GI-NEXT: mul v0.8b, v0.8b, v2.8b
; CHECK-GI-NEXT: mul v1.8b, v1.8b, v3.8b
; CHECK-GI-NEXT: mul v0.8b, v0.8b, v1.8b
-; CHECK-GI-NEXT: mov b1, v0.b[1]
-; CHECK-GI-NEXT: mov b2, v0.b[2]
-; CHECK-GI-NEXT: mov b3, v0.b[3]
-; CHECK-GI-NEXT: mov b4, v0.b[4]
-; CHECK-GI-NEXT: mov b5, v0.b[5]
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: mov b6, v0.b[6]
-; CHECK-GI-NEXT: mov b7, v0.b[7]
-; CHECK-GI-NEXT: fmov w9, s1
-; CHECK-GI-NEXT: fmov w10, s2
-; CHECK-GI-NEXT: fmov w11, s3
-; CHECK-GI-NEXT: fmov w12, s5
+; CHECK-GI-NEXT: umov w8, v0.b[0]
+; CHECK-GI-NEXT: umov w9, v0.b[1]
+; CHECK-GI-NEXT: umov w10, v0.b[2]
+; CHECK-GI-NEXT: umov w11, v0.b[3]
+; CHECK-GI-NEXT: umov w12, v0.b[4]
+; CHECK-GI-NEXT: umov w13, v0.b[5]
+; CHECK-GI-NEXT: umov w14, v0.b[6]
+; CHECK-GI-NEXT: umov w15, v0.b[7]
; CHECK-GI-NEXT: mul w8, w8, w9
-; CHECK-GI-NEXT: fmov w9, s4
-; CHECK-GI-NEXT: mul w10, w10, w11
-; CHECK-GI-NEXT: fmov w11, s6
-; CHECK-GI-NEXT: mul w9, w9, w12
-; CHECK-GI-NEXT: fmov w12, s7
-; CHECK-GI-NEXT: mul w8, w8, w10
-; CHECK-GI-NEXT: mul w11, w11, w12
-; CHECK-GI-NEXT: mul w9, w9, w11
+; CHECK-GI-NEXT: mul w9, w10, w11
+; CHECK-GI-NEXT: mul w10, w12, w13
+; CHECK-GI-NEXT: mul w11, w14, w15
+; CHECK-GI-NEXT: mul w8, w8, w9
+; CHECK-GI-NEXT: mul w9, w10, w11
; CHECK-GI-NEXT: mul w0, w8, w9
; CHECK-GI-NEXT: ret
entry:
@@ -289,13 +265,11 @@ define i16 @mulv_v3i16(<3 x i16> %a) {
; CHECK-GI-LABEL: mulv_v3i16:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: mov h1, v0.h[1]
-; CHECK-GI-NEXT: mov h2, v0.h[2]
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: fmov w9, s1
+; CHECK-GI-NEXT: umov w8, v0.h[0]
+; CHECK-GI-NEXT: umov w9, v0.h[1]
+; CHECK-GI-NEXT: umov w10, v0.h[2]
; CHECK-GI-NEXT: mul w8, w8, w9
-; CHECK-GI-NEXT: fmov w9, s2
-; CHECK-GI-NEXT: mul w0, w8, w9
+; CHECK-GI-NEXT: mul w0, w8, w10
; CHECK-GI-NEXT: ret
entry:
%arg1 = call i16 @llvm.vector.reduce.mul.v3i16(<3 x i16> %a)
@@ -318,13 +292,10 @@ define i16 @mulv_v4i16(<4 x i16> %a) {
; CHECK-GI-LABEL: mulv_v4i16:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: mov h1, v0.h[1]
-; CHECK-GI-NEXT: mov h2, v0.h[2]
-; CHECK-GI-NEXT: mov h3, v0.h[3]
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: fmov w9, s1
-; CHECK-GI-NEXT: fmov w10, s2
-; CHECK-GI-NEXT: fmov w11, s3
+; CHECK-GI-NEXT: umov w8, v0.h[0]
+; CHECK-GI-NEXT: umov w9, v0.h[1]
+; CHECK-GI-NEXT: umov w10, v0.h[2]
+; CHECK-GI-NEXT: umov w11, v0.h[3]
; CHECK-GI-NEXT: mul w8, w8, w9
; CHECK-GI-NEXT: mul w9, w10, w11
; CHECK-GI-NEXT: mul w0, w8, w9
@@ -352,13 +323,10 @@ define i16 @mulv_v8i16(<8 x i16> %a) {
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: mov d1, v0.d[1]
; CHECK-GI-NEXT: mul v0.4h, v0.4h, v1.4h
-; CHECK-GI-NEXT: mov h1, v0.h[1]
-; CHECK-GI-NEXT: mov h2, v0.h[2]
-; CHECK-GI-NEXT: mov h3, v0.h[3]
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: fmov w9, s1
-; CHECK-GI-NEXT: fmov w10, s2
-; CHECK-GI-NEXT: fmov w11, s3
+; CHECK-GI-NEXT: umov w8, v0.h[0]
+; CHECK-GI-NEXT: umov w9, v0.h[1]
+; CHECK-GI-NEXT: umov w10, v0.h[2]
+; CHECK-GI-NEXT: umov w11, v0.h[3]
; CHECK-GI-NEXT: mul w8, w8, w9
; CHECK-GI-NEXT: mul w9, w10, w11
; CHECK-GI-NEXT: mul w0, w8, w9
@@ -390,15 +358,12 @@ define i16 @mulv_v16i16(<16 x i16> %a) {
; CHECK-GI-NEXT: mul v0.4h, v0.4h, v2.4h
; CHECK-GI-NEXT: mul v1.4h, v1.4h, v3.4h
; CHECK-GI-NEXT: mul v0.4h, v0.4h, v1.4h
-; CHECK-GI-NEXT: mov h1, v0.h[1]
-; CHECK-GI-NEXT: mov h2, v0.h[2]
-; CHECK-GI-NEXT: mov h3, v0.h[3]
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: fmov w9, s1
-; CHECK-GI-NEXT: fmov w10, s2
+; CHECK-GI-NEXT: umov w8, v0.h[0]
+; CHECK-GI-NEXT: umov w9, v0.h[1]
+; CHECK-GI-NEXT: umov w10, v0.h[2]
+; CHECK-GI-NEXT: umov w11, v0.h[3]
; CHECK-GI-NEXT: mul w8, w8, w9
-; CHECK-GI-NEXT: fmov w9, s3
-; CHECK-GI-NEXT: mul w9, w10, w9
+; CHECK-GI-NEXT: mul w9, w10, w11
; CHECK-GI-NEXT: mul w0, w8, w9
; CHECK-GI-NEXT: ret
entry:
diff --git a/llvm/test/CodeGen/AArch64/aarch64-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-smull.ll
index 99aa28d859e1f8..dbc5417e23133d 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-smull.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-smull.ll
@@ -3,6 +3,19 @@
; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+sve < %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SVE
; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+; CHECK-GI: warning: Instruction selection used fallback path for smull_zext_v4i16_v4i32
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for pmlsl2_v8i16_uzp1
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for smlsl2_v8i16_uzp1
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for umlsl2_v8i16_uzp1
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for smlsl2_v4i32_uzp1
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for umlsl2_v4i32_uzp1
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for pmlsl_pmlsl2_v8i16_uzp1
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for smlsl_smlsl2_v8i16_uzp1
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for umlsl_umlsl2_v8i16_uzp1
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for smlsl_smlsl2_v4i32_uzp1
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for umlsl_umlsl2_v4i32_uzp1
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for do_stuff
+
define <8 x i16> @smull_v8i8_v8i16(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: smull_v8i8_v8i16:
; CHECK: // %bb.0:
@@ -226,11 +239,10 @@ define <2 x i64> @smull_zext_v2i32_v2i64(ptr %A, ptr %B) nounwind {
; CHECK-GI-NEXT: movi d0, #0x00ffff0000ffff
; CHECK-GI-NEXT: mov v1.s[1], v2.s[0]
; CHECK-GI-NEXT: and v0.8b, v1.8b, v0.8b
-; CHECK-GI-NEXT: mov s1, v0.s[1]
-; CHECK-GI-NEXT: fmov w8, s0
+; CHECK-GI-NEXT: mov w8, v0.s[0]
+; CHECK-GI-NEXT: mov w9, v0.s[1]
; CHECK-GI-NEXT: ldr d0, [x1]
; CHECK-GI-NEXT: sshll v0.2d, v0.2s, #0
-; CHECK-GI-NEXT: fmov w9, s1
; CHECK-GI-NEXT: fmov d1, x8
; CHECK-GI-NEXT: mov d3, v0.d[1]
; CHECK-GI-NEXT: mov v1.d[1], x9
diff --git a/llvm/test/CodeGen/AArch64/fptoi.ll b/llvm/test/CodeGen/AArch64/fptoi.ll
index f30dad966492c1..23ba85d54c7a4f 100644
--- a/llvm/test/CodeGen/AArch64/fptoi.ll
+++ b/llvm/test/CodeGen/AArch64/fptoi.ll
@@ -5846,11 +5846,9 @@ define <3 x i8> @fptos_v3f16_v3i8(<3 x half> %a) {
; CHECK-GI-FP16-LABEL: fptos_v3f16_v3i8:
; CHECK-GI-FP16: // %bb.0: // %entry
; CHECK-GI-FP16-NEXT: fcvtzs v0.4h, v0.4h
-; CHECK-GI-FP16-NEXT: mov h1, v0.h[1]
-; CHECK-GI-FP16-NEXT: mov h2, v0.h[2]
-; CHECK-GI-FP16-NEXT: fmov w0, s0
-; CHECK-GI-FP16-NEXT: fmov w1, s1
-; CHECK-GI-FP16-NEXT: fmov w2, s2
+; CHECK-GI-FP16-NEXT: umov w0, v0.h[0]
+; CHECK-GI-FP16-NEXT: umov w1, v0.h[1]
+; CHECK-GI-FP16-NEXT: umov w2, v0.h[2]
; CHECK-GI-FP16-NEXT: ret
entry:
%c = fptosi <3 x half> %a to <3 x i8>
@@ -5890,11 +5888,9 @@ define <3 x i8> @fptou_v3f16_v3i8(<3 x half> %a) {
; CHECK-GI-FP16-LABEL: fptou_v3f16_v3i8:
; CHECK-GI-FP16: // %bb.0: // %entry
; CHECK-GI-FP16-NEXT: fcvtzu v0.4h, v0.4h
-; CHECK-GI-FP16-NEXT: mov h1, v0.h[1]
-; CHECK-GI-FP16-NEXT: mov h2, v0.h[2]
-; CHECK-GI-FP16-NEXT: fmov w0, s0
-; CHECK-GI-FP16-NEXT: fmov w1, s1
-; CHECK-GI-FP16-NEXT: fmov w2, s2
+; CHECK-GI-FP16-NEXT: umov w0, v0.h[0]
+; CHECK-GI-FP16-NEXT: umov w1, v0.h[1]
+; CHECK-GI-FP16-NEXT: umov w2, v0.h[2]
; CHECK-GI-FP16-NEXT: ret
entry:
%c = fptoui <3 x half> %a to <3 x i8>
diff --git a/llvm/test/CodeGen/AArch64/reduce-and.ll b/llvm/test/CodeGen/AArch64/reduce-and.ll
index a20a76c00418d1..8b7438a42b711e 100644
--- a/llvm/test/CodeGen/AArch64/reduce-and.ll
+++ b/llvm/test/CodeGen/AArch64/reduce-and.ll
@@ -53,13 +53,10 @@ define i1 @test_redand_v4i1(<4 x i1> %a) {
; GISEL-LABEL: test_redand_v4i1:
; GISEL: // %bb.0:
; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0
-; GISEL-NEXT: mov h1, v0.h[1]
-; GISEL-NEXT: mov h2, v0.h[2]
-; GISEL-NEXT: mov h3, v0.h[3]
-; GISEL-NEXT: fmov w8, s0
-; GISEL-NEXT: fmov w9, s1
-; GISEL-NEXT: fmov w10, s2
-; GISEL-NEXT: fmov w11, s3
+; GISEL-NEXT: umov w8, v0.h[0]
+; GISEL-NEXT: umov w9, v0.h[1]
+; GISEL-NEXT: umov w10, v0.h[2]
+; GISEL-NEXT: umov w11, v0.h[3]
; GISEL-NEXT: and w8, w8, w9
; GISEL-NEXT: and w9, w10, w11
; GISEL-NEXT: and w8, w8, w9
@@ -82,27 +79,20 @@ define i1 @test_redand_v8i1(<8 x i1> %a) {
; GISEL-LABEL: test_redand_v8i1:
; GISEL: // %bb.0:
; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0
-; GISEL-NEXT: mov b1, v0.b[1]
-; GISEL-NEXT: mov b2, v0.b[2]
-; GISEL-NEXT: mov b3, v0.b[3]
-; GISEL-NEXT: mov b4, v0.b[4]
-; GISEL-NEXT: mov b5, v0.b[5]
-; GISEL-NEXT: mov b6, v0.b[6]
-; GISEL-NEXT: mov b7, v0.b[7]
-; GISEL-NEXT: fmov w8, s0
-; GISEL-NEXT: fmov w9, s1
-; GISEL-NEXT: fmov w10, s2
-; GISEL-NEXT: fmov w11, s3
-; GISEL-NEXT: fmov w12, s4
-; GISEL-NEXT: fmov w13, s5
-; GISEL-NEXT: fmov w14, s6
+; GISEL-NEXT: umov w8, v0.b[0]
+; GISEL-NEXT: umov w9, v0.b[1]
+; GISEL-NEXT: umov w10, v0.b[2]
+; GISEL-NEXT: umov w11, v0.b[3]
+; GISEL-NEXT: umov w12, v0.b[4]
+; GISEL-NEXT: umov w13, v0.b[5]
+; GISEL-NEXT: umov w14, v0.b[6]
+; GISEL-NEXT: umov w15, v0.b[7]
; GISEL-NEXT: and w8, w8, w9
-; GISEL-NEXT: fmov w9, s7
-; GISEL-NEXT: and w10, w10, w11
-; GISEL-NEXT: and w11, w12, w13
-; GISEL-NEXT: and w8, w8, w10
-; GISEL-NEXT: and w9, w14, w9
-; GISEL-NEXT: and w9, w11, w9
+; GISEL-NEXT: and w9, w10, w11
+; GISEL-NEXT: and w10, w12, w13
+; GISEL-NEXT: and w11, w14, w15
+; GISEL-NEXT: and w8, w8, w9
+; GISEL-NEXT: and w9, w10, w11
; GISEL-NEXT: and w8, w8, w9
; GISEL-NEXT: and w0, w8, #0x1
; GISEL-NEXT: ret
@@ -122,49 +112,34 @@ define i1 @test_redand_v16i1(<16 x i1> %a) {
;
; GISEL-LABEL: test_redand_v16i1:
; GISEL: // %bb.0:
-; GISEL-NEXT: mov b1, v0.b[1]
-; GISEL-NEXT: mov b2, v0.b[2]
-; GISEL-NEXT: mov b3, v0.b[3]
-; GISEL-NEXT: mov b4, v0.b[4]
-; GISEL-NEXT: mov b5, v0.b[5]
-; GISEL-NEXT: mov b6, v0.b[6]
-; GISEL-NEXT: mov b7, v0.b[7]
-; GISEL-NEXT: fmov w8, s0
-; GISEL-NEXT: mov b16, v0.b[8]
-; GISEL-NEXT: mov b17, v0.b[9]
-; GISEL-NEXT: mov b18, v0.b[10]
-; GISEL-NEXT: mov b19, v0.b[11]
-; GISEL-NEXT: fmov w9, s1
-; GISEL-NEXT: fmov w10, s2
-; GISEL-NEXT: fmov w11, s3
-; GISEL-NEXT: fmov w12, s6
-; GISEL-NEXT: mov b20, v0.b[12]
-; GISEL-NEXT: mov b21, v0.b[13]
-; GISEL-NEXT: fmov w13, s7
-; GISEL-NEXT: mov b22, v0.b[14]
-; GISEL-NEXT: mov b23, v0.b[15]
+; GISEL-NEXT: umov w8, v0.b[0]
+; GISEL-NEXT: umov w9, v0.b[1]
+; GISEL-NEXT: umov w10, v0.b[2]
+; GISEL-NEXT: umov w11, v0.b[3]
+; GISEL-NEXT: umov w12, v0.b[4]
+; GISEL-NEXT: umov w13, v0.b[5]
+; GISEL-NEXT: umov w14, v0.b[6]
+; GISEL-NEXT: umov w15, v0.b[7]
+; GISEL-NEXT: umov w16, v0.b[8]
+; GISEL-NEXT: umov w17, v0.b[9]
+; GISEL-NEXT: umov w18, v0.b[10]
+; GISEL-NEXT: umov w0, v0.b[11]
; GISEL-NEXT: and w8, w8, w9
+; GISEL-NEXT: umov w1, v0.b[12]
+; GISEL-NEXT: umov w2, v0.b[13]
; GISEL-NEXT: and w9, w10, w11
-; GISEL-NEXT: fmov w10, s4
+; GISEL-NEXT: and w10, w12, w13
+; GISEL-NEXT: umov w3, v0.b[14]
+; GISEL-NEXT: and w11, w14, w15
; GISEL-NEXT: and w8, w8, w9
-; GISEL-NEXT: fmov w11, s5
-; GISEL-NEXT: fmov w14, s18
-; GISEL-NEXT: fmov w15, s19
-; GISEL-NEXT: fmov w16, s22
-; GISEL-NEXT: fmov w17, s23
-; GISEL-NEXT: and w10, w10, w11
-; GISEL-NEXT: and w11, w12, w13
-; GISEL-NEXT: fmov w12, s16
+; GISEL-NEXT: umov w4, v0.b[15]
+; GISEL-NEXT: and w12, w16, w17
+; GISEL-NEXT: and w13, w18, w0
; GISEL-NEXT: and w9, w10, w11
-; GISEL-NEXT: fmov w13, s17
-; GISEL-NEXT: and w8, w8, w9
-; GISEL-NEXT: and w12, w12, w13
-; GISEL-NEXT: and w13, w14, w15
-; GISEL-NEXT: fmov w14, s20
-; GISEL-NEXT: fmov w15, s21
+; GISEL-NEXT: and w14, w1, w2
; GISEL-NEXT: and w10, w12, w13
-; GISEL-NEXT: and w14, w14, w15
-; GISEL-NEXT: and w15, w16, w17
+; GISEL-NEXT: and w8, w8, w9
+; GISEL-NEXT: and w15, w3, w4
; GISEL-NEXT: and w11, w14, w15
; GISEL-NEXT: and w9, w10, w11
; GISEL-NEXT: and w8, w8, w9
@@ -184,49 +159,34 @@ define <16 x i1> @test_redand_ins_v16i1(<16 x i1> %a) {
;
; GISEL-LABEL: test_redand_ins_v16i1:
; GISEL: // %bb.0:
-; GISEL-NEXT: mov b1, v0.b[1]
-; GISEL-NEXT: mov b2, v0.b[2]
-; GISEL-NEXT: mov b3, v0.b[3]
-; GISEL-NEXT: mov b4, v0.b[4]
-; GISEL-NEXT: mov b5, v0.b[5]
-; GISEL-NEXT: mov b6, v0.b[6]
-; GISEL-NEXT: mov b7, v0.b[7]
-; GISEL-NEXT: fmov w8, s0
-; GISEL-NEXT: mov b16, v0.b[8]
-; GISEL-NEXT: mov b17, v0.b[9]
-; GISEL-NEXT: mov b18, v0.b[10]
-; GISEL-NEXT: mov b19, v0.b[11]
-; GISEL-NEXT: fmov w9, s1
-; GISEL-NEXT: fmov w10, s2
-; GISEL-NEXT: fmov w11, s3
-; GISEL-NEXT: fmov w12, s6
-; GISEL-NEXT: mov b20, v0.b[12]
-; GISEL-NEXT: mov b21, v0.b[13]
-; GISEL-NEXT: fmov w13, s7
-; GISEL-NEXT: mov b22, v0.b[14]
-; GISEL-NEXT: mov b23, v0.b[15]
+; GISEL-NEXT: umov w8, v0.b[0]
+; GISEL-NEXT: umov w9, v0.b[1]
+; GISEL-NEXT: umov w10, v0.b[2]
+; GISEL-NEXT: umov w11, v0.b[3]
+; GISEL-NEXT: umov w12, v0.b[4]
+; GISEL-NEXT: umov w13, v0.b[5]
+; GISEL-NEXT: umov w14, v0.b[6]
+; GISEL-NEXT: umov w15, v0.b[7]
+; GISEL-NEXT: umov w16, v0.b[8]
+; GISEL-NEXT: umov w17, v0.b[9]
+; GISEL-NEXT: umov w18, v0.b[10]
+; GISEL-NEXT: umov w0, v0.b[11]
; GISEL-NEXT: and w8, w8, w9
+; GISEL-NEXT: umov w1, v0.b[12]
+; GISEL-NEXT: umov w2, v0.b[13]
; GISEL-NEXT: and w9, w10, w11
-; GISEL-NEXT: fmov w10, s4
+; GISEL-NEXT: and w10, w12, w13
+; GISEL-NEXT: umov w3, v0.b[14]
+; GISEL-NEXT: and w11, w14, w15
; GISEL-NEXT: and w8, w8, w9
-; GISEL-NEXT: fmov w11, s5
-; GISEL-NEXT: fmov w14, s18
-; GISEL-NEXT: fmov w15, s19
-; GISEL-NEXT: fmov w16, s22
-; GISEL-NEXT: fmov w17, s23
-; GISEL-NEXT: and w10, w10, w11
-; GISEL-NEXT: and w11, w12, w13
-; GISEL-NEXT: fmov w12, s16
+; GISEL-NEXT: umov w4, v0.b[15]
+; GISEL-NEXT: and w12, w16, w17
+; GISEL-NEXT: and w13, w18, w0
; GISEL-NEXT: and w9, w10, w11
-; GISEL-NEXT: fmov w13, s17
-; GISEL-NEXT: and w8, w8, w9
-; GISEL-NEXT: and w12, w12, w13
-; GISEL-NEXT: and w13, w14, w15
-; GISEL-NEXT: fmov w14, s20
-; GISEL-NEXT: fmov w15, s21
+; GISEL-NEXT: and w14, w1, w2
; GISEL-NEXT: and w10, w12, w13
-; GISEL-NEXT: and w14, w14, w15
-; GISEL-NEXT: and w15, w16, w17
+; GISEL-NEXT: and w8, w8, w9
+; GISEL-NEXT: and w15, w3, w4
; GISEL-NEXT: and w11, w14, w15
; GISEL-NEXT: and w9, w10, w11
; GISEL-NEXT: and w8, w8, w9
@@ -287,13 +247,10 @@ define i8 @test_redand_v4i8(<4 x i8> %a) {
; GISEL-LABEL: test_redand_v4i8:
; GISEL: // %bb.0:
; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0
-; GISEL-NEXT: mov h1, v0.h[1]
-; GISEL-NEXT: mov h2, v0.h[2]
-; GISEL-NEXT: mov h3, v0.h[3]
-; GISEL-NEXT: fmov w8, s0
-; GISEL-NEXT: fmov w9, s1
-; GISEL-NEXT: fmov w10, s2
-; GISEL-NEXT: fmov w11, s3
+; GISEL-NEXT: umov w8, v0.h[0]
+; GISEL-NEXT: umov w9, v0.h[1]
+; GISEL-NEXT: umov w10, v0.h[2]
+; GISEL-NEXT: umov w11, v0.h[3]
; GISEL-NEXT: and w8, w8, w9
; GISEL-NEXT: and w9, w10, w11
; GISEL-NEXT: and w0, w8, w9
@@ -315,27 +272,20 @@ define i8 @test_redand_v8i8(<8 x i8> %a) {
; GISEL-LABEL: test_redand_v8i8:
; GISEL: // %bb.0:
; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0
-; GISEL-NEXT: mov b1, v0.b[1]
-; GISEL-NEXT: mov b2, v0.b[2]
-; GISEL-NEXT: mov b3, v0.b[3]
-; GISEL-NEXT: mov b4, v0.b[4]
-; GISEL-NEXT: mov b5, v0.b[5]
-; GISEL-NEXT: mov b6, v0.b[6]
-; GISEL-NEXT: mov b7, v0.b[7]
-; GISEL-NEXT: fmov w8, s0
-; GISEL-NEXT: fmov w9, s1
-; GISEL-NEXT: fmov w10, s2
-; GISEL-NEXT: fmov w11, s3
-; GISEL-NEXT: fmov w12, s4
-; GISEL-NEXT: fmov w13, s5
-; GISEL-NEXT: fmov w14, s6
+; GISEL-NEXT: umov w8, v0.b[0]
+; GISEL-NEXT: umov w9, v0.b[1]
+; GISEL-NEXT: umov w10, v0.b[2]
+; GISEL-NEXT: umov w11, v0.b[3]
+; GISEL-NEXT: umov w12, v0.b[4]
+; GISEL-NEXT: umov w13, v0.b[5]
+; GISEL-NEXT: umov w14, v0.b[6]
+; GISEL-NEXT: umov w15, v0.b[7]
; GISEL-NEXT: and w8, w8, w9
-; GISEL-NEXT: fmov w9, s7
-; GISEL-NEXT: and w10, w10, w11
-; GISEL-NEXT: and w11, w12, w13
-; GISEL-NEXT: and w8, w8, w10
-; GISEL-NEXT: and w9, w14, w9
-; GISEL-NEXT: and w9, w11, w9
+; GISEL-NEXT: and w9, w10, w11
+; GISEL-NEXT: and w10, w12, w13
+; GISEL-NEXT: and w11, w14, w15
+; GISEL-NEXT: and w8, w8, w9
+; GISEL-NEXT: and w9, w10, w11
; GISEL-NEXT: and w0, w8, w9
; GISEL-NEXT: ret
%and_result = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> %a)
@@ -358,27 +308,20 @@ define i8 @test_redand_v16i8(<16 x i8> %a) {
; GISEL: // %bb.0:
; GISEL-NEXT: mov d1, v0.d[1]
; GISEL-NEXT: and v0.8b, v0.8b, v1.8b
-; GISEL-NEXT: mov b1, v0.b[1]
-; GISEL-NEXT: mov b2, v0.b[2]
-; GISEL-NEXT: mov b3, v0.b[3]
-; GISEL-NEXT: mov b4, v0.b[4]
-; GISEL-NEXT: mov b5, v0.b[5]
-; GISEL-NEXT: mov b6, v0.b[6]
-; GISEL-NEXT: mov b7, v0.b[7]
-; GISEL-NEXT: fmov w8, s0
-; GISEL-NEXT: fmov w9, s1
-; GISEL-NEXT: fmov w10, s2
-; GISEL-NEXT: fmov w11, s3
-; GISEL-NEXT: fmov w12, s4
-; GISEL-NEXT: fmov w13, s5
-; GISEL-NEXT: fmov w14, s6
+; GISEL-NEXT: umov w8, v0.b[0]
+; GISEL-NEXT: umov w9, v0.b[1]
+; GISEL-NEXT: umov w10, v0.b[2]
+; GISEL-NEXT: umov w11, v0.b[3]
+; GISEL-NEXT: umov w12, v0.b[4]
+; GISEL-NEXT: umov w13, v0.b[5]
+; GISEL-NEXT: umov w14, v0.b[6]
+; GISEL-NEXT: umov w15, v0.b[7]
; GISEL-NEXT: and w8, w8, w9
-; GISEL-NEXT: fmov w9, s7
-; GISEL-NEXT: and w10, w10, w11
-; GISEL-NEXT: and w11, w12, w13
-; GISEL-NEXT: and w8, w8, w10
-; GISEL-NEXT: and w9, w14, w9
-; GISEL-NEXT: and w9, w11, w9
+; GISEL-NEXT: and w9, w10, w11
+; GISEL-NEXT: and w10, w12, w13
+; GISEL-NEXT: and w11, w14, w15
+; GISEL-NEXT: and w8, w8, w9
+; GISEL-NEXT: and w9, w10, w11
; GISEL-NEXT: and w0, w8, w9
; GISEL-NEXT: ret
%and_result = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> %a)
@@ -403,27 +346,20 @@ define i8 @test_redand_v32i8(<32 x i8> %a) {
; GISEL-NEXT: and v0.16b, v0.16b, v1.16b
; GISEL-NEXT: mov d1, v0.d[1]
; GISEL-NEXT: and v0.8b, v0.8b, v1.8b
-; GISEL-NEXT: mov b1, v0.b[1]
-; GISEL-NEXT: mov b2, v0.b[2]
-; GISEL-NEXT: mov b3, v0.b[3]
-; GISEL-NEXT: mov b4, v0.b[4]
-; GISEL-NEXT: mov b5, v0.b[5]
-; GISEL-NEXT: mov b6, v0.b[6]
-; GISEL-NEXT: mov b7, v0.b[7]
-; GISEL-NEXT: fmov w8, s0
-; GISEL-NEXT: fmov w9, s1
-; GISEL-NEXT: fmov w10, s2
-; GISEL-NEXT: fmov w11, s3
-; GISEL-NEXT: fmov w12, s4
-; GISEL-NEXT: fmov w13, s5
-; GISEL-NEXT: fmov w14, s6
+; GISEL-NEXT: umov w8, v0.b[0]
+; GISEL-NEXT: umov w9, v0.b[1]
+; GISEL-NEXT: umov w10, v0.b[2]
+; GISEL-NEXT: umov w11, v0.b[3]
+; GISEL-NEXT: umov w12, v0.b[4]
+; GISEL-NEXT: umov w13, v0.b[5]
+; GISEL-NEXT: umov w14, v0.b[6]
+; GISEL-NEXT: umov w15, v0.b[7]
+; GISEL-NEXT: and w8, w8, w9
+; GISEL-NEXT: and w9, w10, w11
+; GISEL-NEXT: and w10, w12, w13
+; GISEL-NEXT: and w11, w14, w15
; GISEL-NEXT: and w8, w8, w9
-; GISEL-NEXT: fmov w9, s7
-; GISEL-NEXT: and w10, w10, w11
-; GISEL-NEXT: and w11, w12, w13
-; GISEL-NEXT: and w8, w8, w10
-; GISEL-NEXT: and w9, w14, w9
-; GISEL-NEXT: and w9, w11, w9
+; GISEL-NEXT: and w9, w10, w11
; GISEL-NEXT: and w0, w8, w9
; GISEL-NEXT: ret
%and_result = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> %a)
@@ -442,13 +378,10 @@ define i16 @test_redand_v4i16(<4 x i16> %a) {
; GISEL-LABEL: test_redand_v4i16:
; GISEL: // %bb.0:
; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0
-; GISEL-NEXT: mov h1, v0.h[1]
-; GISEL-NEXT: mov h2, v0.h[2]
-; GISEL-NEXT: mov h3, v0.h[3]
-; GISEL-NEXT: fmov w8, s0
-; GISEL-NEXT: fmov w9, s1
-; GISEL-NEXT: fmov w10, s2
-; GISEL-NEXT: fmov w11, s3
+; GISEL-NEXT: umov w8, v0.h[0]
+; GISEL-NEXT: umov w9, v0.h[1]
+; GISEL-NEXT: umov w10, v0.h[2]
+; GISEL-NEXT: umov w11, v0.h[3]
; GISEL-NEXT: and w8, w8, w9
; GISEL-NEXT: and w9, w10, w11
; GISEL-NEXT: and w0, w8, w9
@@ -472,13 +405,10 @@ define i16 @test_redand_v8i16(<8 x i16> %a) {
; GISEL: // %bb.0:
; GISEL-NEXT: mov d1, v0.d[1]
; GISEL-NEXT: and v0.8b, v0.8b, v1.8b
-; GISEL-NEXT: mov h1, v0.h[1]
-; GISEL-NEXT: mov h2, v0.h[2]
-; GISEL-NEXT: mov h3, v0.h[3]
-; GISEL-NEXT: fmov w8, s0
-; GISEL-NEXT: fmov w9, s1
-; GISEL-NEXT: fmov w10, s2
-; GISEL-NEXT: fmov w11, s3
+; GISEL-NEXT: umov w8, v0.h[0]
+; GISEL-NEXT: umov w9, v0.h[1]
+; GISEL-NEXT: umov w10, v0.h[2]
+; GISEL-NEXT: umov w11, v0.h[3]
; GISEL-NEXT: and w8, w8, w9
; GISEL-NEXT: and w9, w10, w11
; GISEL-NEXT: and w0, w8, w9
@@ -504,13 +434,10 @@ define i16 @test_redand_v16i16(<16 x i16> %a) {
; GISEL-NEXT: and v0.16b, v0.16b, v1.16b
; GISEL-NEXT: mov d1, v0.d[1]
; GISEL-NEXT: and v0.8b, v0.8b, v1.8b
-; GISEL-NEXT: mov h1, v0.h[1]
-; GISEL-NEXT: mov h2, v0.h[2]
-; GISEL-NEXT: mov h3, v0.h[3]
-; GISEL-NEXT: fmov w8, s0
-; GISEL-NEXT: fmov w9, s1
-; GISEL-NEXT: fmov w10, s2
-; GISEL-NEXT: fmov w11, s3
+; GISEL-NEXT: umov w8, v0.h[0]
+; GISEL-NEXT: umov w9, v0.h[1]
+; GISEL-NEXT: umov w10, v0.h[2]
+; GISEL-NEXT: umov w11, v0.h[3]
; GISEL-NEXT: and w8, w8, w9
; GISEL-NEXT: and w9, w10, w11
; GISEL-NEXT: and w0, w8, w9
diff --git a/llvm/test/CodeGen/AArch64/reduce-or.ll b/llvm/test/CodeGen/AArch64/reduce-or.ll
index 4c30a32934964b..c4ac01f32e3651 100644
--- a/llvm/test/CodeGen/AArch64/reduce-or.ll
+++ b/llvm/test/CodeGen/AArch64/reduce-or.ll
@@ -53,13 +53,10 @@ define i1 @test_redor_v4i1(<4 x i1> %a) {
; GISEL-LABEL: test_redor_v4i1:
; GISEL: // %bb.0:
; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0
-; GISEL-NEXT: mov h1, v0.h[1]
-; GISEL-NEXT: mov h2, v0.h[2]
-; GISEL-NEXT: mov h3, v0.h[3]
-; GISEL-NEXT: fmov w8, s0
-; GISEL-NEXT: fmov w9, s1
-; GISEL-NEXT: fmov w10, s2
-; GISEL-NEXT: fmov w11, s3
+; GISEL-NEXT: umov w8, v0.h[0]
+; GISEL-NEXT: umov w9, v0.h[1]
+; GISEL-NEXT: umov w10, v0.h[2]
+; GISEL-NEXT: umov w11, v0.h[3]
; GISEL-NEXT: orr w8, w8, w9
; GISEL-NEXT: orr w9, w10, w11
; GISEL-NEXT: orr w8, w8, w9
@@ -82,27 +79,20 @@ define i1 @test_redor_v8i1(<8 x i1> %a) {
; GISEL-LABEL: test_redor_v8i1:
; GISEL: // %bb.0:
; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0
-; GISEL-NEXT: mov b1, v0.b[1]
-; GISEL-NEXT: mov b2, v0.b[2]
-; GISEL-NEXT: mov b3, v0.b[3]
-; GISEL-NEXT: mov b4, v0.b[4]
-; GISEL-NEXT: mov b5, v0.b[5]
-; GISEL-NEXT: mov b6, v0.b[6]
-; GISEL-NEXT: mov b7, v0.b[7]
-; GISEL-NEXT: fmov w8, s0
-; GISEL-NEXT: fmov w9, s1
-; GISEL-NEXT: fmov w10, s2
-; GISEL-NEXT: fmov w11, s3
-; GISEL-NEXT: fmov w12, s4
-; GISEL-NEXT: fmov w13, s5
-; GISEL-NEXT: fmov w14, s6
+; GISEL-NEXT: umov w8, v0.b[0]
+; GISEL-NEXT: umov w9, v0.b[1]
+; GISEL-NEXT: umov w10, v0.b[2]
+; GISEL-NEXT: umov w11, v0.b[3]
+; GISEL-NEXT: umov w12, v0.b[4]
+; GISEL-NEXT: umov w13, v0.b[5]
+; GISEL-NEXT: umov w14, v0.b[6]
+; GISEL-NEXT: umov w15, v0.b[7]
; GISEL-NEXT: orr w8, w8, w9
-; GISEL-NEXT: fmov w9, s7
-; GISEL-NEXT: orr w10, w10, w11
-; GISEL-NEXT: orr w11, w12, w13
-; GISEL-NEXT: orr w8, w8, w10
-; GISEL-NEXT: orr w9, w14, w9
-; GISEL-NEXT: orr w9, w11, w9
+; GISEL-NEXT: orr w9, w10, w11
+; GISEL-NEXT: orr w10, w12, w13
+; GISEL-NEXT: orr w11, w14, w15
+; GISEL-NEXT: orr w8, w8, w9
+; GISEL-NEXT: orr w9, w10, w11
; GISEL-NEXT: orr w8, w8, w9
; GISEL-NEXT: and w0, w8, #0x1
; GISEL-NEXT: ret
@@ -122,49 +112,34 @@ define i1 @test_redor_v16i1(<16 x i1> %a) {
;
; GISEL-LABEL: test_redor_v16i1:
; GISEL: // %bb.0:
-; GISEL-NEXT: mov b1, v0.b[1]
-; GISEL-NEXT: mov b2, v0.b[2]
-; GISEL-NEXT: mov b3, v0.b[3]
-; GISEL-NEXT: mov b4, v0.b[4]
-; GISEL-NEXT: mov b5, v0.b[5]
-; GISEL-NEXT: mov b6, v0.b[6]
-; GISEL-NEXT: mov b7, v0.b[7]
-; GISEL-NEXT: fmov w8, s0
-; GISEL-NEXT: mov b16, v0.b[8]
-; GISEL-NEXT: mov b17, v0.b[9]
-; GISEL-NEXT: mov b18, v0.b[10]
-; GISEL-NEXT: mov b19, v0.b[11]
-; GISEL-NEXT: fmov w9, s1
-; GISEL-NEXT: fmov w10, s2
-; GISEL-NEXT: fmov w11, s3
-; GISEL-NEXT: fmov w12, s6
-; GISEL-NEXT: mov b20, v0.b[12]
-; GISEL-NEXT: mov b21, v0.b[13]
-; GISEL-NEXT: fmov w13, s7
-; GISEL-NEXT: mov b22, v0.b[14]
-; GISEL-NEXT: mov b23, v0.b[15]
+; GISEL-NEXT: umov w8, v0.b[0]
+; GISEL-NEXT: umov w9, v0.b[1]
+; GISEL-NEXT: umov w10, v0.b[2]
+; GISEL-NEXT: umov w11, v0.b[3]
+; GISEL-NEXT: umov w12, v0.b[4]
+; GISEL-NEXT: umov w13, v0.b[5]
+; GISEL-NEXT: umov w14, v0.b[6]
+; GISEL-NEXT: umov w15, v0.b[7]
+; GISEL-NEXT: umov w16, v0.b[8]
+; GISEL-NEXT: umov w17, v0.b[9]
+; GISEL-NEXT: umov w18, v0.b[10]
+; GISEL-NEXT: umov w0, v0.b[11]
; GISEL-NEXT: orr w8, w8, w9
+; GISEL-NEXT: umov w1, v0.b[12]
+; GISEL-NEXT: umov w2, v0.b[13]
; GISEL-NEXT: orr w9, w10, w11
-; GISEL-NEXT: fmov w10, s4
+; GISEL-NEXT: orr w10, w12, w13
+; GISEL-NEXT: umov w3, v0.b[14]
+; GISEL-NEXT: orr w11, w14, w15
; GISEL-NEXT: orr w8, w8, w9
-; GISEL-NEXT: fmov w11, s5
-; GISEL-NEXT: fmov w14, s18
-; GISEL-NEXT: fmov w15, s19
-; GISEL-NEXT: fmov w16, s22
-; GISEL-NEXT: fmov w17, s23
-; GISEL-NEXT: orr w10, w10, w11
-; GISEL-NEXT: orr w11, w12, w13
-; GISEL-NEXT: fmov w12, s16
+; GISEL-NEXT: umov w4, v0.b[15]
+; GISEL-NEXT: orr w12, w16, w17
+; GISEL-NEXT: orr w13, w18, w0
; GISEL-NEXT: orr w9, w10, w11
-; GISEL-NEXT: fmov w13, s17
-; GISEL-NEXT: orr w8, w8, w9
-; GISEL-NEXT: orr w12, w12, w13
-; GISEL-NEXT: orr w13, w14, w15
-; GISEL-NEXT: fmov w14, s20
-; GISEL-NEXT: fmov w15, s21
+; GISEL-NEXT: orr w14, w1, w2
; GISEL-NEXT: orr w10, w12, w13
-; GISEL-NEXT: orr w14, w14, w15
-; GISEL-NEXT: orr w15, w16, w17
+; GISEL-NEXT: orr w8, w8, w9
+; GISEL-NEXT: orr w15, w3, w4
; GISEL-NEXT: orr w11, w14, w15
; GISEL-NEXT: orr w9, w10, w11
; GISEL-NEXT: orr w8, w8, w9
@@ -184,49 +159,34 @@ define <16 x i1> @test_redor_ins_v16i1(<16 x i1> %a) {
;
; GISEL-LABEL: test_redor_ins_v16i1:
; GISEL: // %bb.0:
-; GISEL-NEXT: mov b1, v0.b[1]
-; GISEL-NEXT: mov b2, v0.b[2]
-; GISEL-NEXT: mov b3, v0.b[3]
-; GISEL-NEXT: mov b4, v0.b[4]
-; GISEL-NEXT: mov b5, v0.b[5]
-; GISEL-NEXT: mov b6, v0.b[6]
-; GISEL-NEXT: mov b7, v0.b[7]
-; GISEL-NEXT: fmov w8, s0
-; GISEL-NEXT: mov b16, v0.b[8]
-; GISEL-NEXT: mov b17, v0.b[9]
-; GISEL-NEXT: mov b18, v0.b[10]
-; GISEL-NEXT: mov b19, v0.b[11]
-; GISEL-NEXT: fmov w9, s1
-; GISEL-NEXT: fmov w10, s2
-; GISEL-NEXT: fmov w11, s3
-; GISEL-NEXT: fmov w12, s6
-; GISEL-NEXT: mov b20, v0.b[12]
-; GISEL-NEXT: mov b21, v0.b[13]
-; GISEL-NEXT: fmov w13, s7
-; GISEL-NEXT: mov b22, v0.b[14]
-; GISEL-NEXT: mov b23, v0.b[15]
+; GISEL-NEXT: umov w8, v0.b[0]
+; GISEL-NEXT: umov w9, v0.b[1]
+; GISEL-NEXT: umov w10, v0.b[2]
+; GISEL-NEXT: umov w11, v0.b[3]
+; GISEL-NEXT: umov w12, v0.b[4]
+; GISEL-NEXT: umov w13, v0.b[5]
+; GISEL-NEXT: umov w14, v0.b[6]
+; GISEL-NEXT: umov w15, v0.b[7]
+; GISEL-NEXT: umov w16, v0.b[8]
+; GISEL-NEXT: umov w17, v0.b[9]
+; GISEL-NEXT: umov w18, v0.b[10]
+; GISEL-NEXT: umov w0, v0.b[11]
; GISEL-NEXT: orr w8, w8, w9
+; GISEL-NEXT: umov w1, v0.b[12]
+; GISEL-NEXT: umov w2, v0.b[13]
; GISEL-NEXT: orr w9, w10, w11
-; GISEL-NEXT: fmov w10, s4
+; GISEL-NEXT: orr w10, w12, w13
+; GISEL-NEXT: umov w3, v0.b[14]
+; GISEL-NEXT: orr w11, w14, w15
; GISEL-NEXT: orr w8, w8, w9
-; GISEL-NEXT: fmov w11, s5
-; GISEL-NEXT: fmov w14, s18
-; GISEL-NEXT: fmov w15, s19
-; GISEL-NEXT: fmov w16, s22
-; GISEL-NEXT: fmov w17, s23
-; GISEL-NEXT: orr w10, w10, w11
-; GISEL-NEXT: orr w11, w12, w13
-; GISEL-NEXT: fmov w12, s16
+; GISEL-NEXT: umov w4, v0.b[15]
+; GISEL-NEXT: orr w12, w16, w17
+; GISEL-NEXT: orr w13, w18, w0
; GISEL-NEXT: orr w9, w10, w11
-; GISEL-NEXT: fmov w13, s17
-; GISEL-NEXT: orr w8, w8, w9
-; GISEL-NEXT: orr w12, w12, w13
-; GISEL-NEXT: orr w13, w14, w15
-; GISEL-NEXT: fmov w14, s20
-; GISEL-NEXT: fmov w15, s21
+; GISEL-NEXT: orr w14, w1, w2
; GISEL-NEXT: orr w10, w12, w13
-; GISEL-NEXT: orr w14, w14, w15
-; GISEL-NEXT: orr w15, w16, w17
+; GISEL-NEXT: orr w8, w8, w9
+; GISEL-NEXT: orr w15, w3, w4
; GISEL-NEXT: orr w11, w14, w15
; GISEL-NEXT: orr w9, w10, w11
; GISEL-NEXT: orr w8, w8, w9
@@ -289,13 +249,10 @@ define i8 @test_redor_v4i8(<4 x i8> %a) {
; GISEL-LABEL: test_redor_v4i8:
; GISEL: // %bb.0:
; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0
-; GISEL-NEXT: mov h1, v0.h[1]
-; GISEL-NEXT: mov h2, v0.h[2]
-; GISEL-NEXT: mov h3, v0.h[3]
-; GISEL-NEXT: fmov w8, s0
-; GISEL-NEXT: fmov w9, s1
-; GISEL-NEXT: fmov w10, s2
-; GISEL-NEXT: fmov w11, s3
+; GISEL-NEXT: umov w8, v0.h[0]
+; GISEL-NEXT: umov w9, v0.h[1]
+; GISEL-NEXT: umov w10, v0.h[2]
+; GISEL-NEXT: umov w11, v0.h[3]
; GISEL-NEXT: orr w8, w8, w9
; GISEL-NEXT: orr w9, w10, w11
; GISEL-NEXT: orr w0, w8, w9
@@ -317,27 +274,20 @@ define i8 @test_redor_v8i8(<8 x i8> %a) {
; GISEL-LABEL: test_redor_v8i8:
; GISEL: // %bb.0:
; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0
-; GISEL-NEXT: mov b1, v0.b[1]
-; GISEL-NEXT: mov b2, v0.b[2]
-; GISEL-NEXT: mov b3, v0.b[3]
-; GISEL-NEXT: mov b4, v0.b[4]
-; GISEL-NEXT: mov b5, v0.b[5]
-; GISEL-NEXT: mov b6, v0.b[6]
-; GISEL-NEXT: mov b7, v0.b[7]
-; GISEL-NEXT: fmov w8, s0
-; GISEL-NEXT: fmov w9, s1
-; GISEL-NEXT: fmov w10, s2
-; GISEL-NEXT: fmov w11, s3
-; GISEL-NEXT: fmov w12, s4
-; GISEL-NEXT: fmov w13, s5
-; GISEL-NEXT: fmov w14, s6
+; GISEL-NEXT: umov w8, v0.b[0]
+; GISEL-NEXT: umov w9, v0.b[1]
+; GISEL-NEXT: umov w10, v0.b[2]
+; GISEL-NEXT: umov w11, v0.b[3]
+; GISEL-NEXT: umov w12, v0.b[4]
+; GISEL-NEXT: umov w13, v0.b[5]
+; GISEL-NEXT: umov w14, v0.b[6]
+; GISEL-NEXT: umov w15, v0.b[7]
; GISEL-NEXT: orr w8, w8, w9
-; GISEL-NEXT: fmov w9, s7
-; GISEL-NEXT: orr w10, w10, w11
-; GISEL-NEXT: orr w11, w12, w13
-; GISEL-NEXT: orr w8, w8, w10
-; GISEL-NEXT: orr w9, w14, w9
-; GISEL-NEXT: orr w9, w11, w9
+; GISEL-NEXT: orr w9, w10, w11
+; GISEL-NEXT: orr w10, w12, w13
+; GISEL-NEXT: orr w11, w14, w15
+; GISEL-NEXT: orr w8, w8, w9
+; GISEL-NEXT: orr w9, w10, w11
; GISEL-NEXT: orr w0, w8, w9
; GISEL-NEXT: ret
%or_result = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> %a)
@@ -360,27 +310,20 @@ define i8 @test_redor_v16i8(<16 x i8> %a) {
; GISEL: // %bb.0:
; GISEL-NEXT: mov d1, v0.d[1]
; GISEL-NEXT: orr v0.8b, v0.8b, v1.8b
-; GISEL-NEXT: mov b1, v0.b[1]
-; GISEL-NEXT: mov b2, v0.b[2]
-; GISEL-NEXT: mov b3, v0.b[3]
-; GISEL-NEXT: mov b4, v0.b[4]
-; GISEL-NEXT: mov b5, v0.b[5]
-; GISEL-NEXT: mov b6, v0.b[6]
-; GISEL-NEXT: mov b7, v0.b[7]
-; GISEL-NEXT: fmov w8, s0
-; GISEL-NEXT: fmov w9, s1
-; GISEL-NEXT: fmov w10, s2
-; GISEL-NEXT: fmov w11, s3
-; GISEL-NEXT: fmov w12, s4
-; GISEL-NEXT: fmov w13, s5
-; GISEL-NEXT: fmov w14, s6
+; GISEL-NEXT: umov w8, v0.b[0]
+; GISEL-NEXT: umov w9, v0.b[1]
+; GISEL-NEXT: umov w10, v0.b[2]
+; GISEL-NEXT: umov w11, v0.b[3]
+; GISEL-NEXT: umov w12, v0.b[4]
+; GISEL-NEXT: umov w13, v0.b[5]
+; GISEL-NEXT: umov w14, v0.b[6]
+; GISEL-NEXT: umov w15, v0.b[7]
; GISEL-NEXT: orr w8, w8, w9
-; GISEL-NEXT: fmov w9, s7
-; GISEL-NEXT: orr w10, w10, w11
-; GISEL-NEXT: orr w11, w12, w13
-; GISEL-NEXT: orr w8, w8, w10
-; GISEL-NEXT: orr w9, w14, w9
-; GISEL-NEXT: orr w9, w11, w9
+; GISEL-NEXT: orr w9, w10, w11
+; GISEL-NEXT: orr w10, w12, w13
+; GISEL-NEXT: orr w11, w14, w15
+; GISEL-NEXT: orr w8, w8, w9
+; GISEL-NEXT: orr w9, w10, w11
; GISEL-NEXT: orr w0, w8, w9
; GISEL-NEXT: ret
%or_result = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> %a)
@@ -405,27 +348,20 @@ define i8 @test_redor_v32i8(<32 x i8> %a) {
; GISEL-NEXT: orr v0.16b, v0.16b, v1.16b
; GISEL-NEXT: mov d1, v0.d[1]
; GISEL-NEXT: orr v0.8b, v0.8b, v1.8b
-; GISEL-NEXT: mov b1, v0.b[1]
-; GISEL-NEXT: mov b2, v0.b[2]
-; GISEL-NEXT: mov b3, v0.b[3]
-; GISEL-NEXT: mov b4, v0.b[4]
-; GISEL-NEXT: mov b5, v0.b[5]
-; GISEL-NEXT: mov b6, v0.b[6]
-; GISEL-NEXT: mov b7, v0.b[7]
-; GISEL-NEXT: fmov w8, s0
-; GISEL-NEXT: fmov w9, s1
-; GISEL-NEXT: fmov w10, s2
-; GISEL-NEXT: fmov w11, s3
-; GISEL-NEXT: fmov w12, s4
-; GISEL-NEXT: fmov w13, s5
-; GISEL-NEXT: fmov w14, s6
+; GISEL-NEXT: umov w8, v0.b[0]
+; GISEL-NEXT: umov w9, v0.b[1]
+; GISEL-NEXT: umov w10, v0.b[2]
+; GISEL-NEXT: umov w11, v0.b[3]
+; GISEL-NEXT: umov w12, v0.b[4]
+; GISEL-NEXT: umov w13, v0.b[5]
+; GISEL-NEXT: umov w14, v0.b[6]
+; GISEL-NEXT: umov w15, v0.b[7]
+; GISEL-NEXT: orr w8, w8, w9
+; GISEL-NEXT: orr w9, w10, w11
+; GISEL-NEXT: orr w10, w12, w13
+; GISEL-NEXT: orr w11, w14, w15
; GISEL-NEXT: orr w8, w8, w9
-; GISEL-NEXT: fmov w9, s7
-; GISEL-NEXT: orr w10, w10, w11
-; GISEL-NEXT: orr w11, w12, w13
-; GISEL-NEXT: orr w8, w8, w10
-; GISEL-NEXT: orr w9, w14, w9
-; GISEL-NEXT: orr w9, w11, w9
+; GISEL-NEXT: orr w9, w10, w11
; GISEL-NEXT: orr w0, w8, w9
; GISEL-NEXT: ret
%or_result = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> %a)
@@ -444,13 +380,10 @@ define i16 @test_redor_v4i16(<4 x i16> %a) {
; GISEL-LABEL: test_redor_v4i16:
; GISEL: // %bb.0:
; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0
-; GISEL-NEXT: mov h1, v0.h[1]
-; GISEL-NEXT: mov h2, v0.h[2]
-; GISEL-NEXT: mov h3, v0.h[3]
-; GISEL-NEXT: fmov w8, s0
-; GISEL-NEXT: fmov w9, s1
-; GISEL-NEXT: fmov w10, s2
-; GISEL-NEXT: fmov w11, s3
+; GISEL-NEXT: umov w8, v0.h[0]
+; GISEL-NEXT: umov w9, v0.h[1]
+; GISEL-NEXT: umov w10, v0.h[2]
+; GISEL-NEXT: umov w11, v0.h[3]
; GISEL-NEXT: orr w8, w8, w9
; GISEL-NEXT: orr w9, w10, w11
; GISEL-NEXT: orr w0, w8, w9
@@ -474,13 +407,10 @@ define i16 @test_redor_v8i16(<8 x i16> %a) {
; GISEL: // %bb.0:
; GISEL-NEXT: mov d1, v0.d[1]
; GISEL-NEXT: orr v0.8b, v0.8b, v1.8b
-; GISEL-NEXT: mov h1, v0.h[1]
-; GISEL-NEXT: mov h2, v0.h[2]
-; GISEL-NEXT: mov h3, v0.h[3]
-; GISEL-NEXT: fmov w8, s0
-; GISEL-NEXT: fmov w9, s1
-; GISEL-NEXT: fmov w10, s2
-; GISEL-NEXT: fmov w11, s3
+; GISEL-NEXT: umov w8, v0.h[0]
+; GISEL-NEXT: umov w9, v0.h[1]
+; GISEL-NEXT: umov w10, v0.h[2]
+; GISEL-NEXT: umov w11, v0.h[3]
; GISEL-NEXT: orr w8, w8, w9
; GISEL-NEXT: orr w9, w10, w11
; GISEL-NEXT: orr w0, w8, w9
@@ -506,13 +436,10 @@ define i16 @test_redor_v16i16(<16 x i16> %a) {
; GISEL-NEXT: orr v0.16b, v0.16b, v1.16b
; GISEL-NEXT: mov d1, v0.d[1]
; GISEL-NEXT: orr v0.8b, v0.8b, v1.8b
-; GISEL-NEXT: mov h1, v0.h[1]
-; GISEL-NEXT: mov h2, v0.h[2]
-; GISEL-NEXT: mov h3, v0.h[3]
-; GISEL-NEXT: fmov w8, s0
-; GISEL-NEXT: fmov w9, s1
-; GISEL-NEXT: fmov w10, s2
-; GISEL-NEXT: fmov w11, s3
+; GISEL-NEXT: umov w8, v0.h[0]
+; GISEL-NEXT: umov w9, v0.h[1]
+; GISEL-NEXT: umov w10, v0.h[2]
+; GISEL-NEXT: umov w11, v0.h[3]
; GISEL-NEXT: orr w8, w8, w9
; GISEL-NEXT: orr w9, w10, w11
; GISEL-NEXT: orr w0, w8, w9
diff --git a/llvm/test/CodeGen/AArch64/reduce-xor.ll b/llvm/test/CodeGen/AArch64/reduce-xor.ll
index c74b3734a1b76c..5c2a808ef2e88c 100644
--- a/llvm/test/CodeGen/AArch64/reduce-xor.ll
+++ b/llvm/test/CodeGen/AArch64/reduce-xor.ll
@@ -48,13 +48,10 @@ define i1 @test_redxor_v4i1(<4 x i1> %a) {
; GISEL-LABEL: test_redxor_v4i1:
; GISEL: // %bb.0:
; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0
-; GISEL-NEXT: mov h1, v0.h[1]
-; GISEL-NEXT: mov h2, v0.h[2]
-; GISEL-NEXT: mov h3, v0.h[3]
-; GISEL-NEXT: fmov w8, s0
-; GISEL-NEXT: fmov w9, s1
-; GISEL-NEXT: fmov w10, s2
-; GISEL-NEXT: fmov w11, s3
+; GISEL-NEXT: umov w8, v0.h[0]
+; GISEL-NEXT: umov w9, v0.h[1]
+; GISEL-NEXT: umov w10, v0.h[2]
+; GISEL-NEXT: umov w11, v0.h[3]
; GISEL-NEXT: eor w8, w8, w9
; GISEL-NEXT: eor w9, w10, w11
; GISEL-NEXT: eor w8, w8, w9
@@ -75,27 +72,20 @@ define i1 @test_redxor_v8i1(<8 x i1> %a) {
; GISEL-LABEL: test_redxor_v8i1:
; GISEL: // %bb.0:
; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0
-; GISEL-NEXT: mov b1, v0.b[1]
-; GISEL-NEXT: mov b2, v0.b[2]
-; GISEL-NEXT: mov b3, v0.b[3]
-; GISEL-NEXT: mov b4, v0.b[4]
-; GISEL-NEXT: mov b5, v0.b[5]
-; GISEL-NEXT: mov b6, v0.b[6]
-; GISEL-NEXT: mov b7, v0.b[7]
-; GISEL-NEXT: fmov w8, s0
-; GISEL-NEXT: fmov w9, s1
-; GISEL-NEXT: fmov w10, s2
-; GISEL-NEXT: fmov w11, s3
-; GISEL-NEXT: fmov w12, s4
-; GISEL-NEXT: fmov w13, s5
-; GISEL-NEXT: fmov w14, s6
+; GISEL-NEXT: umov w8, v0.b[0]
+; GISEL-NEXT: umov w9, v0.b[1]
+; GISEL-NEXT: umov w10, v0.b[2]
+; GISEL-NEXT: umov w11, v0.b[3]
+; GISEL-NEXT: umov w12, v0.b[4]
+; GISEL-NEXT: umov w13, v0.b[5]
+; GISEL-NEXT: umov w14, v0.b[6]
+; GISEL-NEXT: umov w15, v0.b[7]
; GISEL-NEXT: eor w8, w8, w9
-; GISEL-NEXT: fmov w9, s7
-; GISEL-NEXT: eor w10, w10, w11
-; GISEL-NEXT: eor w11, w12, w13
-; GISEL-NEXT: eor w8, w8, w10
-; GISEL-NEXT: eor w9, w14, w9
-; GISEL-NEXT: eor w9, w11, w9
+; GISEL-NEXT: eor w9, w10, w11
+; GISEL-NEXT: eor w10, w12, w13
+; GISEL-NEXT: eor w11, w14, w15
+; GISEL-NEXT: eor w8, w8, w9
+; GISEL-NEXT: eor w9, w10, w11
; GISEL-NEXT: eor w8, w8, w9
; GISEL-NEXT: and w0, w8, #0x1
; GISEL-NEXT: ret
@@ -113,49 +103,34 @@ define i1 @test_redxor_v16i1(<16 x i1> %a) {
;
; GISEL-LABEL: test_redxor_v16i1:
; GISEL: // %bb.0:
-; GISEL-NEXT: mov b1, v0.b[1]
-; GISEL-NEXT: mov b2, v0.b[2]
-; GISEL-NEXT: mov b3, v0.b[3]
-; GISEL-NEXT: mov b4, v0.b[4]
-; GISEL-NEXT: mov b5, v0.b[5]
-; GISEL-NEXT: mov b6, v0.b[6]
-; GISEL-NEXT: mov b7, v0.b[7]
-; GISEL-NEXT: fmov w8, s0
-; GISEL-NEXT: mov b16, v0.b[8]
-; GISEL-NEXT: mov b17, v0.b[9]
-; GISEL-NEXT: mov b18, v0.b[10]
-; GISEL-NEXT: mov b19, v0.b[11]
-; GISEL-NEXT: fmov w9, s1
-; GISEL-NEXT: fmov w10, s2
-; GISEL-NEXT: fmov w11, s3
-; GISEL-NEXT: fmov w12, s6
-; GISEL-NEXT: mov b20, v0.b[12]
-; GISEL-NEXT: mov b21, v0.b[13]
-; GISEL-NEXT: fmov w13, s7
-; GISEL-NEXT: mov b22, v0.b[14]
-; GISEL-NEXT: mov b23, v0.b[15]
+; GISEL-NEXT: umov w8, v0.b[0]
+; GISEL-NEXT: umov w9, v0.b[1]
+; GISEL-NEXT: umov w10, v0.b[2]
+; GISEL-NEXT: umov w11, v0.b[3]
+; GISEL-NEXT: umov w12, v0.b[4]
+; GISEL-NEXT: umov w13, v0.b[5]
+; GISEL-NEXT: umov w14, v0.b[6]
+; GISEL-NEXT: umov w15, v0.b[7]
+; GISEL-NEXT: umov w16, v0.b[8]
+; GISEL-NEXT: umov w17, v0.b[9]
+; GISEL-NEXT: umov w18, v0.b[10]
+; GISEL-NEXT: umov w0, v0.b[11]
; GISEL-NEXT: eor w8, w8, w9
+; GISEL-NEXT: umov w1, v0.b[12]
+; GISEL-NEXT: umov w2, v0.b[13]
; GISEL-NEXT: eor w9, w10, w11
-; GISEL-NEXT: fmov w10, s4
+; GISEL-NEXT: eor w10, w12, w13
+; GISEL-NEXT: umov w3, v0.b[14]
+; GISEL-NEXT: eor w11, w14, w15
; GISEL-NEXT: eor w8, w8, w9
-; GISEL-NEXT: fmov w11, s5
-; GISEL-NEXT: fmov w14, s18
-; GISEL-NEXT: fmov w15, s19
-; GISEL-NEXT: fmov w16, s22
-; GISEL-NEXT: fmov w17, s23
-; GISEL-NEXT: eor w10, w10, w11
-; GISEL-NEXT: eor w11, w12, w13
-; GISEL-NEXT: fmov w12, s16
+; GISEL-NEXT: umov w4, v0.b[15]
+; GISEL-NEXT: eor w12, w16, w17
+; GISEL-NEXT: eor w13, w18, w0
; GISEL-NEXT: eor w9, w10, w11
-; GISEL-NEXT: fmov w13, s17
-; GISEL-NEXT: eor w8, w8, w9
-; GISEL-NEXT: eor w12, w12, w13
-; GISEL-NEXT: eor w13, w14, w15
-; GISEL-NEXT: fmov w14, s20
-; GISEL-NEXT: fmov w15, s21
+; GISEL-NEXT: eor w14, w1, w2
; GISEL-NEXT: eor w10, w12, w13
-; GISEL-NEXT: eor w14, w14, w15
-; GISEL-NEXT: eor w15, w16, w17
+; GISEL-NEXT: eor w8, w8, w9
+; GISEL-NEXT: eor w15, w3, w4
; GISEL-NEXT: eor w11, w14, w15
; GISEL-NEXT: eor w9, w10, w11
; GISEL-NEXT: eor w8, w8, w9
@@ -173,49 +148,34 @@ define <16 x i1> @test_redxor_ins_v16i1(<16 x i1> %a) {
;
; GISEL-LABEL: test_redxor_ins_v16i1:
; GISEL: // %bb.0:
-; GISEL-NEXT: mov b1, v0.b[1]
-; GISEL-NEXT: mov b2, v0.b[2]
-; GISEL-NEXT: mov b3, v0.b[3]
-; GISEL-NEXT: mov b4, v0.b[4]
-; GISEL-NEXT: mov b5, v0.b[5]
-; GISEL-NEXT: mov b6, v0.b[6]
-; GISEL-NEXT: mov b7, v0.b[7]
-; GISEL-NEXT: fmov w8, s0
-; GISEL-NEXT: mov b16, v0.b[8]
-; GISEL-NEXT: mov b17, v0.b[9]
-; GISEL-NEXT: mov b18, v0.b[10]
-; GISEL-NEXT: mov b19, v0.b[11]
-; GISEL-NEXT: fmov w9, s1
-; GISEL-NEXT: fmov w10, s2
-; GISEL-NEXT: fmov w11, s3
-; GISEL-NEXT: fmov w12, s6
-; GISEL-NEXT: mov b20, v0.b[12]
-; GISEL-NEXT: mov b21, v0.b[13]
-; GISEL-NEXT: fmov w13, s7
-; GISEL-NEXT: mov b22, v0.b[14]
-; GISEL-NEXT: mov b23, v0.b[15]
+; GISEL-NEXT: umov w8, v0.b[0]
+; GISEL-NEXT: umov w9, v0.b[1]
+; GISEL-NEXT: umov w10, v0.b[2]
+; GISEL-NEXT: umov w11, v0.b[3]
+; GISEL-NEXT: umov w12, v0.b[4]
+; GISEL-NEXT: umov w13, v0.b[5]
+; GISEL-NEXT: umov w14, v0.b[6]
+; GISEL-NEXT: umov w15, v0.b[7]
+; GISEL-NEXT: umov w16, v0.b[8]
+; GISEL-NEXT: umov w17, v0.b[9]
+; GISEL-NEXT: umov w18, v0.b[10]
+; GISEL-NEXT: umov w0, v0.b[11]
; GISEL-NEXT: eor w8, w8, w9
+; GISEL-NEXT: umov w1, v0.b[12]
+; GISEL-NEXT: umov w2, v0.b[13]
; GISEL-NEXT: eor w9, w10, w11
-; GISEL-NEXT: fmov w10, s4
+; GISEL-NEXT: eor w10, w12, w13
+; GISEL-NEXT: umov w3, v0.b[14]
+; GISEL-NEXT: eor w11, w14, w15
; GISEL-NEXT: eor w8, w8, w9
-; GISEL-NEXT: fmov w11, s5
-; GISEL-NEXT: fmov w14, s18
-; GISEL-NEXT: fmov w15, s19
-; GISEL-NEXT: fmov w16, s22
-; GISEL-NEXT: fmov w17, s23
-; GISEL-NEXT: eor w10, w10, w11
-; GISEL-NEXT: eor w11, w12, w13
-; GISEL-NEXT: fmov w12, s16
+; GISEL-NEXT: umov w4, v0.b[15]
+; GISEL-NEXT: eor w12, w16, w17
+; GISEL-NEXT: eor w13, w18, w0
; GISEL-NEXT: eor w9, w10, w11
-; GISEL-NEXT: fmov w13, s17
-; GISEL-NEXT: eor w8, w8, w9
-; GISEL-NEXT: eor w12, w12, w13
-; GISEL-NEXT: eor w13, w14, w15
-; GISEL-NEXT: fmov w14, s20
-; GISEL-NEXT: fmov w15, s21
+; GISEL-NEXT: eor w14, w1, w2
; GISEL-NEXT: eor w10, w12, w13
-; GISEL-NEXT: eor w14, w14, w15
-; GISEL-NEXT: eor w15, w16, w17
+; GISEL-NEXT: eor w8, w8, w9
+; GISEL-NEXT: eor w15, w3, w4
; GISEL-NEXT: eor w11, w14, w15
; GISEL-NEXT: eor w9, w10, w11
; GISEL-NEXT: eor w8, w8, w9
@@ -278,13 +238,10 @@ define i8 @test_redxor_v4i8(<4 x i8> %a) {
; GISEL-LABEL: test_redxor_v4i8:
; GISEL: // %bb.0:
; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0
-; GISEL-NEXT: mov h1, v0.h[1]
-; GISEL-NEXT: mov h2, v0.h[2]
-; GISEL-NEXT: mov h3, v0.h[3]
-; GISEL-NEXT: fmov w8, s0
-; GISEL-NEXT: fmov w9, s1
-; GISEL-NEXT: fmov w10, s2
-; GISEL-NEXT: fmov w11, s3
+; GISEL-NEXT: umov w8, v0.h[0]
+; GISEL-NEXT: umov w9, v0.h[1]
+; GISEL-NEXT: umov w10, v0.h[2]
+; GISEL-NEXT: umov w11, v0.h[3]
; GISEL-NEXT: eor w8, w8, w9
; GISEL-NEXT: eor w9, w10, w11
; GISEL-NEXT: eor w0, w8, w9
@@ -306,27 +263,20 @@ define i8 @test_redxor_v8i8(<8 x i8> %a) {
; GISEL-LABEL: test_redxor_v8i8:
; GISEL: // %bb.0:
; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0
-; GISEL-NEXT: mov b1, v0.b[1]
-; GISEL-NEXT: mov b2, v0.b[2]
-; GISEL-NEXT: mov b3, v0.b[3]
-; GISEL-NEXT: mov b4, v0.b[4]
-; GISEL-NEXT: mov b5, v0.b[5]
-; GISEL-NEXT: mov b6, v0.b[6]
-; GISEL-NEXT: mov b7, v0.b[7]
-; GISEL-NEXT: fmov w8, s0
-; GISEL-NEXT: fmov w9, s1
-; GISEL-NEXT: fmov w10, s2
-; GISEL-NEXT: fmov w11, s3
-; GISEL-NEXT: fmov w12, s4
-; GISEL-NEXT: fmov w13, s5
-; GISEL-NEXT: fmov w14, s6
+; GISEL-NEXT: umov w8, v0.b[0]
+; GISEL-NEXT: umov w9, v0.b[1]
+; GISEL-NEXT: umov w10, v0.b[2]
+; GISEL-NEXT: umov w11, v0.b[3]
+; GISEL-NEXT: umov w12, v0.b[4]
+; GISEL-NEXT: umov w13, v0.b[5]
+; GISEL-NEXT: umov w14, v0.b[6]
+; GISEL-NEXT: umov w15, v0.b[7]
; GISEL-NEXT: eor w8, w8, w9
-; GISEL-NEXT: fmov w9, s7
-; GISEL-NEXT: eor w10, w10, w11
-; GISEL-NEXT: eor w11, w12, w13
-; GISEL-NEXT: eor w8, w8, w10
-; GISEL-NEXT: eor w9, w14, w9
-; GISEL-NEXT: eor w9, w11, w9
+; GISEL-NEXT: eor w9, w10, w11
+; GISEL-NEXT: eor w10, w12, w13
+; GISEL-NEXT: eor w11, w14, w15
+; GISEL-NEXT: eor w8, w8, w9
+; GISEL-NEXT: eor w9, w10, w11
; GISEL-NEXT: eor w0, w8, w9
; GISEL-NEXT: ret
%xor_result = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> %a)
@@ -349,27 +299,20 @@ define i8 @test_redxor_v16i8(<16 x i8> %a) {
; GISEL: // %bb.0:
; GISEL-NEXT: mov d1, v0.d[1]
; GISEL-NEXT: eor v0.8b, v0.8b, v1.8b
-; GISEL-NEXT: mov b1, v0.b[1]
-; GISEL-NEXT: mov b2, v0.b[2]
-; GISEL-NEXT: mov b3, v0.b[3]
-; GISEL-NEXT: mov b4, v0.b[4]
-; GISEL-NEXT: mov b5, v0.b[5]
-; GISEL-NEXT: mov b6, v0.b[6]
-; GISEL-NEXT: mov b7, v0.b[7]
-; GISEL-NEXT: fmov w8, s0
-; GISEL-NEXT: fmov w9, s1
-; GISEL-NEXT: fmov w10, s2
-; GISEL-NEXT: fmov w11, s3
-; GISEL-NEXT: fmov w12, s4
-; GISEL-NEXT: fmov w13, s5
-; GISEL-NEXT: fmov w14, s6
+; GISEL-NEXT: umov w8, v0.b[0]
+; GISEL-NEXT: umov w9, v0.b[1]
+; GISEL-NEXT: umov w10, v0.b[2]
+; GISEL-NEXT: umov w11, v0.b[3]
+; GISEL-NEXT: umov w12, v0.b[4]
+; GISEL-NEXT: umov w13, v0.b[5]
+; GISEL-NEXT: umov w14, v0.b[6]
+; GISEL-NEXT: umov w15, v0.b[7]
; GISEL-NEXT: eor w8, w8, w9
-; GISEL-NEXT: fmov w9, s7
-; GISEL-NEXT: eor w10, w10, w11
-; GISEL-NEXT: eor w11, w12, w13
-; GISEL-NEXT: eor w8, w8, w10
-; GISEL-NEXT: eor w9, w14, w9
-; GISEL-NEXT: eor w9, w11, w9
+; GISEL-NEXT: eor w9, w10, w11
+; GISEL-NEXT: eor w10, w12, w13
+; GISEL-NEXT: eor w11, w14, w15
+; GISEL-NEXT: eor w8, w8, w9
+; GISEL-NEXT: eor w9, w10, w11
; GISEL-NEXT: eor w0, w8, w9
; GISEL-NEXT: ret
%xor_result = call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> %a)
@@ -394,27 +337,20 @@ define i8 @test_redxor_v32i8(<32 x i8> %a) {
; GISEL-NEXT: eor v0.16b, v0.16b, v1.16b
; GISEL-NEXT: mov d1, v0.d[1]
; GISEL-NEXT: eor v0.8b, v0.8b, v1.8b
-; GISEL-NEXT: mov b1, v0.b[1]
-; GISEL-NEXT: mov b2, v0.b[2]
-; GISEL-NEXT: mov b3, v0.b[3]
-; GISEL-NEXT: mov b4, v0.b[4]
-; GISEL-NEXT: mov b5, v0.b[5]
-; GISEL-NEXT: mov b6, v0.b[6]
-; GISEL-NEXT: mov b7, v0.b[7]
-; GISEL-NEXT: fmov w8, s0
-; GISEL-NEXT: fmov w9, s1
-; GISEL-NEXT: fmov w10, s2
-; GISEL-NEXT: fmov w11, s3
-; GISEL-NEXT: fmov w12, s4
-; GISEL-NEXT: fmov w13, s5
-; GISEL-NEXT: fmov w14, s6
+; GISEL-NEXT: umov w8, v0.b[0]
+; GISEL-NEXT: umov w9, v0.b[1]
+; GISEL-NEXT: umov w10, v0.b[2]
+; GISEL-NEXT: umov w11, v0.b[3]
+; GISEL-NEXT: umov w12, v0.b[4]
+; GISEL-NEXT: umov w13, v0.b[5]
+; GISEL-NEXT: umov w14, v0.b[6]
+; GISEL-NEXT: umov w15, v0.b[7]
+; GISEL-NEXT: eor w8, w8, w9
+; GISEL-NEXT: eor w9, w10, w11
+; GISEL-NEXT: eor w10, w12, w13
+; GISEL-NEXT: eor w11, w14, w15
; GISEL-NEXT: eor w8, w8, w9
-; GISEL-NEXT: fmov w9, s7
-; GISEL-NEXT: eor w10, w10, w11
-; GISEL-NEXT: eor w11, w12, w13
-; GISEL-NEXT: eor w8, w8, w10
-; GISEL-NEXT: eor w9, w14, w9
-; GISEL-NEXT: eor w9, w11, w9
+; GISEL-NEXT: eor w9, w10, w11
; GISEL-NEXT: eor w0, w8, w9
; GISEL-NEXT: ret
%xor_result = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> %a)
@@ -433,13 +369,10 @@ define i16 @test_redxor_v4i16(<4 x i16> %a) {
; GISEL-LABEL: test_redxor_v4i16:
; GISEL: // %bb.0:
; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0
-; GISEL-NEXT: mov h1, v0.h[1]
-; GISEL-NEXT: mov h2, v0.h[2]
-; GISEL-NEXT: mov h3, v0.h[3]
-; GISEL-NEXT: fmov w8, s0
-; GISEL-NEXT: fmov w9, s1
-; GISEL-NEXT: fmov w10, s2
-; GISEL-NEXT: fmov w11, s3
+; GISEL-NEXT: umov w8, v0.h[0]
+; GISEL-NEXT: umov w9, v0.h[1]
+; GISEL-NEXT: umov w10, v0.h[2]
+; GISEL-NEXT: umov w11, v0.h[3]
; GISEL-NEXT: eor w8, w8, w9
; GISEL-NEXT: eor w9, w10, w11
; GISEL-NEXT: eor w0, w8, w9
@@ -463,13 +396,10 @@ define i16 @test_redxor_v8i16(<8 x i16> %a) {
; GISEL: // %bb.0:
; GISEL-NEXT: mov d1, v0.d[1]
; GISEL-NEXT: eor v0.8b, v0.8b, v1.8b
-; GISEL-NEXT: mov h1, v0.h[1]
-; GISEL-NEXT: mov h2, v0.h[2]
-; GISEL-NEXT: mov h3, v0.h[3]
-; GISEL-NEXT: fmov w8, s0
-; GISEL-NEXT: fmov w9, s1
-; GISEL-NEXT: fmov w10, s2
-; GISEL-NEXT: fmov w11, s3
+; GISEL-NEXT: umov w8, v0.h[0]
+; GISEL-NEXT: umov w9, v0.h[1]
+; GISEL-NEXT: umov w10, v0.h[2]
+; GISEL-NEXT: umov w11, v0.h[3]
; GISEL-NEXT: eor w8, w8, w9
; GISEL-NEXT: eor w9, w10, w11
; GISEL-NEXT: eor w0, w8, w9
@@ -495,13 +425,10 @@ define i16 @test_redxor_v16i16(<16 x i16> %a) {
; GISEL-NEXT: eor v0.16b, v0.16b, v1.16b
; GISEL-NEXT: mov d1, v0.d[1]
; GISEL-NEXT: eor v0.8b, v0.8b, v1.8b
-; GISEL-NEXT: mov h1, v0.h[1]
-; GISEL-NEXT: mov h2, v0.h[2]
-; GISEL-NEXT: mov h3, v0.h[3]
-; GISEL-NEXT: fmov w8, s0
-; GISEL-NEXT: fmov w9, s1
-; GISEL-NEXT: fmov w10, s2
-; GISEL-NEXT: fmov w11, s3
+; GISEL-NEXT: umov w8, v0.h[0]
+; GISEL-NEXT: umov w9, v0.h[1]
+; GISEL-NEXT: umov w10, v0.h[2]
+; GISEL-NEXT: umov w11, v0.h[3]
; GISEL-NEXT: eor w8, w8, w9
; GISEL-NEXT: eor w9, w10, w11
; GISEL-NEXT: eor w0, w8, w9
diff --git a/llvm/test/CodeGen/AArch64/sext.ll b/llvm/test/CodeGen/AArch64/sext.ll
index 4d26228caf62e9..014e4071a4bf61 100644
--- a/llvm/test/CodeGen/AArch64/sext.ll
+++ b/llvm/test/CodeGen/AArch64/sext.ll
@@ -289,18 +289,14 @@ define <3 x i32> @sext_v3i16_v3i32(<3 x i16> %a) {
; CHECK-GI-LABEL: sext_v3i16_v3i32:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: mov h1, v0.h[1]
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: mov h2, v0.h[2]
-; CHECK-GI-NEXT: sxth w8, w8
-; CHECK-GI-NEXT: fmov w9, s1
-; CHECK-GI-NEXT: fmov s0, w8
-; CHECK-GI-NEXT: fmov w8, s2
-; CHECK-GI-NEXT: sxth w9, w9
-; CHECK-GI-NEXT: sxth w8, w8
-; CHECK-GI-NEXT: mov v0.s[1], w9
-; CHECK-GI-NEXT: mov v0.s[2], w8
-; CHECK-GI-NEXT: mov v0.s[3], w8
+; CHECK-GI-NEXT: smov w8, v0.h[0]
+; CHECK-GI-NEXT: smov w9, v0.h[1]
+; CHECK-GI-NEXT: fmov s1, w8
+; CHECK-GI-NEXT: smov w8, v0.h[2]
+; CHECK-GI-NEXT: mov v1.s[1], w9
+; CHECK-GI-NEXT: mov v1.s[2], w8
+; CHECK-GI-NEXT: mov v1.s[3], w8
+; CHECK-GI-NEXT: mov v0.16b, v1.16b
; CHECK-GI-NEXT: ret
entry:
%c = sext <3 x i16> %a to <3 x i32>
@@ -322,15 +318,10 @@ define <3 x i64> @sext_v3i16_v3i64(<3 x i16> %a) {
; CHECK-GI-LABEL: sext_v3i16_v3i64:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: mov h1, v0.h[1]
-; CHECK-GI-NEXT: mov h2, v0.h[2]
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: sxth x8, w8
-; CHECK-GI-NEXT: fmov w9, s1
-; CHECK-GI-NEXT: fmov w10, s2
+; CHECK-GI-NEXT: smov x8, v0.h[0]
+; CHECK-GI-NEXT: smov x9, v0.h[1]
+; CHECK-GI-NEXT: smov x10, v0.h[2]
; CHECK-GI-NEXT: fmov d0, x8
-; CHECK-GI-NEXT: sxth x9, w9
-; CHECK-GI-NEXT: sxth x10, w10
; CHECK-GI-NEXT: fmov d1, x9
; CHECK-GI-NEXT: fmov d2, x10
; CHECK-GI-NEXT: ret
@@ -352,15 +343,10 @@ define <3 x i64> @sext_v3i32_v3i64(<3 x i32> %a) {
;
; CHECK-GI-LABEL: sext_v3i32_v3i64:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: mov s1, v0.s[1]
-; CHECK-GI-NEXT: mov s2, v0.s[2]
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: sxtw x8, w8
-; CHECK-GI-NEXT: fmov w9, s1
-; CHECK-GI-NEXT: fmov w10, s2
+; CHECK-GI-NEXT: smov x8, v0.s[0]
+; CHECK-GI-NEXT: smov x9, v0.s[1]
+; CHECK-GI-NEXT: smov x10, v0.s[2]
; CHECK-GI-NEXT: fmov d0, x8
-; CHECK-GI-NEXT: sxtw x9, w9
-; CHECK-GI-NEXT: sxtw x10, w10
; CHECK-GI-NEXT: fmov d1, x9
; CHECK-GI-NEXT: fmov d2, x10
; CHECK-GI-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll
index 53aefaf3d33600..7f804fe48fd854 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll
@@ -168,53 +168,32 @@ define i8 @test_v9i8(<9 x i8> %a) nounwind {
; CHECK-GI-LABEL: test_v9i8:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: mov b1, v0.b[1]
-; CHECK-GI-NEXT: mov b2, v0.b[2]
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: mov b3, v0.b[3]
-; CHECK-GI-NEXT: mov b4, v0.b[4]
-; CHECK-GI-NEXT: fmov w9, s0
-; CHECK-GI-NEXT: uxtb w8, w8
-; CHECK-GI-NEXT: fmov w10, s1
-; CHECK-GI-NEXT: fmov w11, s2
-; CHECK-GI-NEXT: fmov w12, s1
-; CHECK-GI-NEXT: mov b1, v0.b[5]
-; CHECK-GI-NEXT: mov b2, v0.b[6]
-; CHECK-GI-NEXT: cmp w8, w10, uxtb
-; CHECK-GI-NEXT: fmov w10, s3
-; CHECK-GI-NEXT: uxtb w8, w11
-; CHECK-GI-NEXT: csel w9, w9, w12, hi
-; CHECK-GI-NEXT: cmp w8, w9, uxtb
-; CHECK-GI-NEXT: uxtb w8, w10
-; CHECK-GI-NEXT: fmov w10, s4
-; CHECK-GI-NEXT: csel w9, w9, w11, lo
-; CHECK-GI-NEXT: fmov w11, s3
-; CHECK-GI-NEXT: mov b3, v0.b[7]
-; CHECK-GI-NEXT: mov b0, v0.b[8]
-; CHECK-GI-NEXT: cmp w8, w9, uxtb
-; CHECK-GI-NEXT: uxtb w8, w10
-; CHECK-GI-NEXT: fmov w10, s1
-; CHECK-GI-NEXT: csel w9, w9, w11, lo
-; CHECK-GI-NEXT: fmov w11, s4
-; CHECK-GI-NEXT: cmp w8, w9, uxtb
-; CHECK-GI-NEXT: uxtb w8, w10
-; CHECK-GI-NEXT: fmov w10, s2
-; CHECK-GI-NEXT: csel w9, w9, w11, lo
+; CHECK-GI-NEXT: umov w8, v0.b[0]
+; CHECK-GI-NEXT: umov w9, v0.b[1]
+; CHECK-GI-NEXT: umov w10, v0.b[2]
; CHECK-GI-NEXT: fmov w11, s1
-; CHECK-GI-NEXT: cmp w8, w9, uxtb
-; CHECK-GI-NEXT: uxtb w8, w10
-; CHECK-GI-NEXT: fmov w10, s3
-; CHECK-GI-NEXT: csel w9, w9, w11, lo
-; CHECK-GI-NEXT: fmov w11, s2
-; CHECK-GI-NEXT: cmp w8, w9, uxtb
-; CHECK-GI-NEXT: uxtb w8, w10
-; CHECK-GI-NEXT: fmov w10, s0
-; CHECK-GI-NEXT: csel w9, w9, w11, lo
-; CHECK-GI-NEXT: fmov w11, s3
-; CHECK-GI-NEXT: cmp w8, w9, uxtb
-; CHECK-GI-NEXT: uxtb w8, w10
-; CHECK-GI-NEXT: csel w9, w9, w11, lo
-; CHECK-GI-NEXT: cmp w8, w9, uxtb
-; CHECK-GI-NEXT: csel w0, w9, w10, lo
+; CHECK-GI-NEXT: cmp w8, w11, uxtb
+; CHECK-GI-NEXT: umov w11, v0.b[3]
+; CHECK-GI-NEXT: csel w8, w8, w9, hi
+; CHECK-GI-NEXT: umov w9, v0.b[4]
+; CHECK-GI-NEXT: cmp w10, w8, uxtb
+; CHECK-GI-NEXT: csel w8, w8, w10, lo
+; CHECK-GI-NEXT: umov w10, v0.b[5]
+; CHECK-GI-NEXT: cmp w11, w8, uxtb
+; CHECK-GI-NEXT: csel w8, w8, w11, lo
+; CHECK-GI-NEXT: umov w11, v0.b[6]
+; CHECK-GI-NEXT: cmp w9, w8, uxtb
+; CHECK-GI-NEXT: csel w8, w8, w9, lo
+; CHECK-GI-NEXT: umov w9, v0.b[7]
+; CHECK-GI-NEXT: cmp w10, w8, uxtb
+; CHECK-GI-NEXT: csel w8, w8, w10, lo
+; CHECK-GI-NEXT: umov w10, v0.b[8]
+; CHECK-GI-NEXT: cmp w11, w8, uxtb
+; CHECK-GI-NEXT: csel w8, w8, w11, lo
+; CHECK-GI-NEXT: cmp w9, w8, uxtb
+; CHECK-GI-NEXT: csel w8, w8, w9, lo
+; CHECK-GI-NEXT: cmp w10, w8, uxtb
+; CHECK-GI-NEXT: csel w0, w8, w10, lo
; CHECK-GI-NEXT: ret
%b = call i8 @llvm.vector.reduce.umax.v9i8(<9 x i8> %a)
ret i8 %b
@@ -259,21 +238,18 @@ define i1 @test_v4i1(<4 x i1> %a) nounwind {
; CHECK-GI-LABEL: test_v4i1:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: mov h1, v0.h[1]
-; CHECK-GI-NEXT: mov h2, v0.h[2]
-; CHECK-GI-NEXT: mov h3, v0.h[3]
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: fmov w10, s1
-; CHECK-GI-NEXT: fmov w12, s2
-; CHECK-GI-NEXT: fmov w13, s3
-; CHECK-GI-NEXT: and w9, w8, #0x1
-; CHECK-GI-NEXT: and w11, w10, #0x1
-; CHECK-GI-NEXT: cmp w9, w11
-; CHECK-GI-NEXT: and w9, w12, #0x1
-; CHECK-GI-NEXT: and w11, w13, #0x1
-; CHECK-GI-NEXT: csel w8, w8, w10, hi
-; CHECK-GI-NEXT: cmp w9, w11
-; CHECK-GI-NEXT: csel w9, w12, w13, hi
+; CHECK-GI-NEXT: umov w8, v0.h[0]
+; CHECK-GI-NEXT: umov w9, v0.h[1]
+; CHECK-GI-NEXT: umov w10, v0.h[2]
+; CHECK-GI-NEXT: umov w11, v0.h[3]
+; CHECK-GI-NEXT: and w12, w8, #0x1
+; CHECK-GI-NEXT: and w13, w9, #0x1
+; CHECK-GI-NEXT: cmp w12, w13
+; CHECK-GI-NEXT: and w12, w10, #0x1
+; CHECK-GI-NEXT: and w13, w11, #0x1
+; CHECK-GI-NEXT: csel w8, w8, w9, hi
+; CHECK-GI-NEXT: cmp w12, w13
+; CHECK-GI-NEXT: csel w9, w10, w11, hi
; CHECK-GI-NEXT: and w10, w8, #0x1
; CHECK-GI-NEXT: and w11, w9, #0x1
; CHECK-GI-NEXT: cmp w10, w11
diff --git a/llvm/test/CodeGen/AArch64/xtn.ll b/llvm/test/CodeGen/AArch64/xtn.ll
index 0dd4e3644b7835..21982fadbe8036 100644
--- a/llvm/test/CodeGen/AArch64/xtn.ll
+++ b/llvm/test/CodeGen/AArch64/xtn.ll
@@ -224,23 +224,13 @@ entry:
}
define <3 x i8> @xtn_v3i16_v3i8(<3 x i16> %a) {
-; CHECK-SD-LABEL: xtn_v3i16_v3i8:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT: umov w0, v0.h[0]
-; CHECK-SD-NEXT: umov w1, v0.h[1]
-; CHECK-SD-NEXT: umov w2, v0.h[2]
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: xtn_v3i16_v3i8:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: mov h1, v0.h[1]
-; CHECK-GI-NEXT: mov h2, v0.h[2]
-; CHECK-GI-NEXT: fmov w0, s0
-; CHECK-GI-NEXT: fmov w1, s1
-; CHECK-GI-NEXT: fmov w2, s2
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: xtn_v3i16_v3i8:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: umov w0, v0.h[0]
+; CHECK-NEXT: umov w1, v0.h[1]
+; CHECK-NEXT: umov w2, v0.h[2]
+; CHECK-NEXT: ret
entry:
%arg1 = trunc <3 x i16> %a to <3 x i8>
ret <3 x i8> %arg1
diff --git a/llvm/test/CodeGen/AArch64/zext.ll b/llvm/test/CodeGen/AArch64/zext.ll
index 42c0bf79e77897..e513340f5b18ad 100644
--- a/llvm/test/CodeGen/AArch64/zext.ll
+++ b/llvm/test/CodeGen/AArch64/zext.ll
@@ -2,6 +2,8 @@
; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+; CHECK-GI: warning: Instruction selection used fallback path for zext_v16i10_v16i16
+
define i16 @zext_i8_to_i16(i8 %a) {
; CHECK-LABEL: zext_i8_to_i16:
; CHECK: // %bb.0: // %entry
@@ -333,18 +335,14 @@ define <3 x i32> @zext_v3i16_v3i32(<3 x i16> %a) {
; CHECK-GI-LABEL: zext_v3i16_v3i32:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: mov h1, v0.h[1]
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: mov h2, v0.h[2]
-; CHECK-GI-NEXT: uxth w8, w8
-; CHECK-GI-NEXT: fmov w9, s1
-; CHECK-GI-NEXT: fmov s0, w8
-; CHECK-GI-NEXT: fmov w8, s2
-; CHECK-GI-NEXT: uxth w9, w9
-; CHECK-GI-NEXT: uxth w8, w8
-; CHECK-GI-NEXT: mov v0.s[1], w9
-; CHECK-GI-NEXT: mov v0.s[2], w8
-; CHECK-GI-NEXT: mov v0.s[3], w8
+; CHECK-GI-NEXT: umov w8, v0.h[0]
+; CHECK-GI-NEXT: umov w9, v0.h[1]
+; CHECK-GI-NEXT: fmov s1, w8
+; CHECK-GI-NEXT: umov w8, v0.h[2]
+; CHECK-GI-NEXT: mov v1.s[1], w9
+; CHECK-GI-NEXT: mov v1.s[2], w8
+; CHECK-GI-NEXT: mov v1.s[3], w8
+; CHECK-GI-NEXT: mov v0.16b, v1.16b
; CHECK-GI-NEXT: ret
entry:
%c = zext <3 x i16> %a to <3 x i32>
@@ -366,15 +364,10 @@ define <3 x i64> @zext_v3i16_v3i64(<3 x i16> %a) {
; CHECK-GI-LABEL: zext_v3i16_v3i64:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: mov h1, v0.h[1]
-; CHECK-GI-NEXT: mov h2, v0.h[2]
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: ubfx x8, x8, #0, #16
-; CHECK-GI-NEXT: fmov w9, s1
-; CHECK-GI-NEXT: fmov w10, s2
+; CHECK-GI-NEXT: umov w8, v0.h[0]
+; CHECK-GI-NEXT: umov w9, v0.h[1]
+; CHECK-GI-NEXT: umov w10, v0.h[2]
; CHECK-GI-NEXT: fmov d0, x8
-; CHECK-GI-NEXT: ubfx x9, x9, #0, #16
-; CHECK-GI-NEXT: ubfx x10, x10, #0, #16
; CHECK-GI-NEXT: fmov d1, x9
; CHECK-GI-NEXT: fmov d2, x10
; CHECK-GI-NEXT: ret
@@ -396,12 +389,10 @@ define <3 x i64> @zext_v3i32_v3i64(<3 x i32> %a) {
;
; CHECK-GI-LABEL: zext_v3i32_v3i64:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: mov s1, v0.s[1]
-; CHECK-GI-NEXT: mov s2, v0.s[2]
-; CHECK-GI-NEXT: fmov w8, s0
+; CHECK-GI-NEXT: mov w8, v0.s[0]
+; CHECK-GI-NEXT: mov w9, v0.s[1]
+; CHECK-GI-NEXT: mov w10, v0.s[2]
; CHECK-GI-NEXT: fmov d0, x8
-; CHECK-GI-NEXT: fmov w9, s1
-; CHECK-GI-NEXT: fmov w10, s2
; CHECK-GI-NEXT: fmov d1, x9
; CHECK-GI-NEXT: fmov d2, x10
; CHECK-GI-NEXT: ret
More information about the llvm-commits
mailing list