[llvm] 59fbb9e - [AArch64] Add tablegen patterns for i8 and i16 vector insert/extract pairs (#136091)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Apr 28 00:17:49 PDT 2025
Author: David Green
Date: 2025-04-28T08:17:45+01:00
New Revision: 59fbb9e7751b36bbca1064abb6c21ce59c038a70
URL: https://github.com/llvm/llvm-project/commit/59fbb9e7751b36bbca1064abb6c21ce59c038a70
DIFF: https://github.com/llvm/llvm-project/commit/59fbb9e7751b36bbca1064abb6c21ce59c038a70.diff
LOG: [AArch64] Add tablegen patterns for i8 and i16 vector insert/extract pairs (#136091)
An i8 and i16 vector extract/insert has to go via a i32 to make sure the
types are legal. This patch adds patterns for extract from a i8/i16
vector, inserted into a i16/i32 vector. This avoids the round trip via a
GPR which can limit performance.
Added:
Modified:
llvm/include/llvm/Target/TargetSelectionDAG.td
llvm/lib/Target/AArch64/AArch64InstrInfo.td
llvm/test/CodeGen/AArch64/arm64-extract-insert-varidx.ll
llvm/test/CodeGen/AArch64/bitcast-extend.ll
llvm/test/CodeGen/AArch64/fix-shuffle-vector-be-rev.ll
llvm/test/CodeGen/AArch64/itofp.ll
llvm/test/CodeGen/AArch64/neon-bitcast.ll
llvm/test/CodeGen/AArch64/shuffle-extend.ll
llvm/test/CodeGen/AArch64/vector-fcvt.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td
index a53527442719a..3515a7da71075 100644
--- a/llvm/include/llvm/Target/TargetSelectionDAG.td
+++ b/llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -827,8 +827,11 @@ def step_vector : SDNode<"ISD::STEP_VECTOR", SDTypeProfile<1, 1,
def scalar_to_vector : SDNode<"ISD::SCALAR_TO_VECTOR", SDTypeProfile<1, 1, []>,
[]>;
-// vector_extract/vector_insert are deprecated. extractelt/insertelt
-// are preferred.
+// vector_extract/vector_insert are similar to extractelt/insertelt but allow
+// types that require promotion (a 16i8 extract where i8 is not a legal type so
+// uses i32 for example). extractelt/insertelt are preferred where the element
+// type and the extracted types match due to the extra type checking they
+// perform.
def vector_extract : SDNode<"ISD::EXTRACT_VECTOR_ELT",
SDTypeProfile<1, 2, [SDTCisPtrTy<2>]>, []>;
def vector_insert : SDNode<"ISD::INSERT_VECTOR_ELT",
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index f1c95fdfc8974..f7b13092821d6 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -7347,6 +7347,41 @@ def : Pat<(v2i32 (vector_insert v2i32:$src, (i32 (bitconvert (f32 FPR32:$Sn))),
def : Pat<(v2i64 (vector_insert v2i64:$src, (i64 (bitconvert (f64 FPR64:$Sn))), (i64 imm:$Immd))),
(INSvi64lane V128:$src, imm:$Immd, (INSERT_SUBREG (IMPLICIT_DEF), FPR64:$Sn, dsub), 0)>;
+// Patterns for i8/i16 -> v2i32/v4i16 lane moves via insert and extract that go via i32.
+multiclass Neon_INS_elt_ext_pattern<ValueType VT128, ValueType VT64, ValueType OutVT,
+ Instruction INS, SDNodeXForm VecIndexMult> {
+ // VT64->OutVT
+ def : Pat<(OutVT (vector_insert (OutVT V64:$src),
+ (i32 (vector_extract (VT64 V64:$Rn), (i64 imm:$Immn))),
+ (i64 imm:$Immd))),
+ (EXTRACT_SUBREG
+ (INS (INSERT_SUBREG (VT128 (IMPLICIT_DEF)), V64:$src, dsub), (VecIndexMult imm:$Immd),
+ (INSERT_SUBREG (VT128 (IMPLICIT_DEF)), V64:$Rn, dsub), imm:$Immn),
+ dsub)>;
+ def : Pat<(OutVT (scalar_to_vector (i32 (vector_extract (VT64 V64:$Rn), (i64 imm:$Immn))))),
+ (EXTRACT_SUBREG
+ (INS (IMPLICIT_DEF), 0,
+ (INSERT_SUBREG (VT128 (IMPLICIT_DEF)), V64:$Rn, dsub), imm:$Immn),
+ dsub)>;
+
+ // VT128->OutVT
+ def : Pat<(OutVT (vector_insert (OutVT V64:$src),
+ (i32 (vector_extract (VT128 V128:$Rn), (i64 imm:$Immn))),
+ (i64 imm:$Immd))),
+ (EXTRACT_SUBREG
+ (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub), (VecIndexMult imm:$Immd),
+ V128:$Rn, imm:$Immn),
+ dsub)>;
+ def : Pat<(OutVT (scalar_to_vector (i32 (vector_extract (VT128 V128:$Rn), (i64 imm:$Immn))))),
+ (EXTRACT_SUBREG
+ (INS (IMPLICIT_DEF), 0, V128:$Rn, imm:$Immn),
+ dsub)>;
+}
+
+defm : Neon_INS_elt_ext_pattern<v16i8, v8i8, v4i16, INSvi8lane, VecIndex_x2>;
+defm : Neon_INS_elt_ext_pattern<v16i8, v8i8, v2i32, INSvi8lane, VecIndex_x4>;
+defm : Neon_INS_elt_ext_pattern<v8i16, v4i16, v2i32, INSvi16lane, VecIndex_x2>;
+
// bitcast of an extract
// f32 bitcast(vector_extract(v4i32 src, lane)) -> EXTRACT_SUBREG(INSvi32lane(-, 0, src, lane))
def : Pat<(f32 (bitconvert (i32 (vector_extract v4i32:$src, imm:$Immd)))),
diff --git a/llvm/test/CodeGen/AArch64/arm64-extract-insert-varidx.ll b/llvm/test/CodeGen/AArch64/arm64-extract-insert-varidx.ll
index 7a4cdd52db904..fccb1fb675768 100644
--- a/llvm/test/CodeGen/AArch64/arm64-extract-insert-varidx.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-extract-insert-varidx.ll
@@ -11,14 +11,11 @@ define <4 x i8> @test_varidx_extract_v8s8(<8 x i8> %x, i32 %idx) {
; CHECK-SDAG-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-SDAG-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SDAG-NEXT: str d0, [sp, #8]
-; CHECK-SDAG-NEXT: umov w9, v0.b[1]
; CHECK-SDAG-NEXT: bfxil x8, x0, #0, #3
; CHECK-SDAG-NEXT: ld1 { v1.b }[0], [x8]
-; CHECK-SDAG-NEXT: umov w8, v0.b[2]
-; CHECK-SDAG-NEXT: mov v1.h[1], w9
-; CHECK-SDAG-NEXT: umov w9, v0.b[3]
-; CHECK-SDAG-NEXT: mov v1.h[2], w8
-; CHECK-SDAG-NEXT: mov v1.h[3], w9
+; CHECK-SDAG-NEXT: mov v1.b[2], v0.b[1]
+; CHECK-SDAG-NEXT: mov v1.b[4], v0.b[2]
+; CHECK-SDAG-NEXT: mov v1.b[6], v0.b[3]
; CHECK-SDAG-NEXT: fmov d0, d1
; CHECK-SDAG-NEXT: add sp, sp, #16
; CHECK-SDAG-NEXT: ret
@@ -168,11 +165,10 @@ define <2 x i16> @test_varidx_extract_v4s16(<4 x i16> %x, i32 %idx) {
; CHECK-SDAG-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-SDAG-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SDAG-NEXT: str d0, [sp, #8]
-; CHECK-SDAG-NEXT: umov w9, v0.h[1]
; CHECK-SDAG-NEXT: bfi x8, x0, #1, #2
-; CHECK-SDAG-NEXT: ld1 { v0.h }[0], [x8]
-; CHECK-SDAG-NEXT: mov v0.s[1], w9
-; CHECK-SDAG-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SDAG-NEXT: ld1 { v1.h }[0], [x8]
+; CHECK-SDAG-NEXT: mov v1.h[2], v0.h[1]
+; CHECK-SDAG-NEXT: fmov d0, d1
; CHECK-SDAG-NEXT: add sp, sp, #16
; CHECK-SDAG-NEXT: ret
;
diff --git a/llvm/test/CodeGen/AArch64/bitcast-extend.ll b/llvm/test/CodeGen/AArch64/bitcast-extend.ll
index 85daa3ca6623e..33238ccf86a39 100644
--- a/llvm/test/CodeGen/AArch64/bitcast-extend.ll
+++ b/llvm/test/CodeGen/AArch64/bitcast-extend.ll
@@ -70,16 +70,12 @@ define <4 x i64> @z_i32_v4i64(i32 %x) {
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: fmov s0, w0
; CHECK-SD-NEXT: movi v1.2d, #0x000000000000ff
-; CHECK-SD-NEXT: umov w8, v0.b[2]
-; CHECK-SD-NEXT: umov w9, v0.b[0]
-; CHECK-SD-NEXT: umov w10, v0.b[3]
-; CHECK-SD-NEXT: umov w11, v0.b[1]
-; CHECK-SD-NEXT: fmov s0, w9
-; CHECK-SD-NEXT: fmov s2, w8
-; CHECK-SD-NEXT: mov v0.s[1], w11
-; CHECK-SD-NEXT: mov v2.s[1], w10
-; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #0
-; CHECK-SD-NEXT: ushll v2.2d, v2.2s, #0
+; CHECK-SD-NEXT: mov v2.b[0], v0.b[0]
+; CHECK-SD-NEXT: mov v3.b[0], v0.b[2]
+; CHECK-SD-NEXT: mov v2.b[4], v0.b[1]
+; CHECK-SD-NEXT: mov v3.b[4], v0.b[3]
+; CHECK-SD-NEXT: ushll v0.2d, v2.2s, #0
+; CHECK-SD-NEXT: ushll v2.2d, v3.2s, #0
; CHECK-SD-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-SD-NEXT: and v1.16b, v2.16b, v1.16b
; CHECK-SD-NEXT: ret
@@ -176,16 +172,12 @@ define <4 x i64> @s_i32_v4i64(i32 %x) {
; CHECK-SD-LABEL: s_i32_v4i64:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: fmov s0, w0
-; CHECK-SD-NEXT: umov w8, v0.b[2]
-; CHECK-SD-NEXT: umov w9, v0.b[0]
-; CHECK-SD-NEXT: umov w10, v0.b[3]
-; CHECK-SD-NEXT: umov w11, v0.b[1]
-; CHECK-SD-NEXT: fmov s0, w9
-; CHECK-SD-NEXT: fmov s1, w8
-; CHECK-SD-NEXT: mov v0.s[1], w11
-; CHECK-SD-NEXT: mov v1.s[1], w10
-; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #0
-; CHECK-SD-NEXT: ushll v1.2d, v1.2s, #0
+; CHECK-SD-NEXT: mov v1.b[0], v0.b[0]
+; CHECK-SD-NEXT: mov v2.b[0], v0.b[2]
+; CHECK-SD-NEXT: mov v1.b[4], v0.b[1]
+; CHECK-SD-NEXT: mov v2.b[4], v0.b[3]
+; CHECK-SD-NEXT: ushll v0.2d, v1.2s, #0
+; CHECK-SD-NEXT: ushll v1.2d, v2.2s, #0
; CHECK-SD-NEXT: shl v0.2d, v0.2d, #56
; CHECK-SD-NEXT: shl v1.2d, v1.2d, #56
; CHECK-SD-NEXT: sshr v0.2d, v0.2d, #56
diff --git a/llvm/test/CodeGen/AArch64/fix-shuffle-vector-be-rev.ll b/llvm/test/CodeGen/AArch64/fix-shuffle-vector-be-rev.ll
index 8b74de1c127dd..e90b6cb7f809b 100644
--- a/llvm/test/CodeGen/AArch64/fix-shuffle-vector-be-rev.ll
+++ b/llvm/test/CodeGen/AArch64/fix-shuffle-vector-be-rev.ll
@@ -5,16 +5,12 @@
define <4 x i16> @test_reconstructshuffle(<16 x i8> %a, <16 x i8> %b) nounwind {
; CHECKLE-LABEL: test_reconstructshuffle:
; CHECKLE: // %bb.0:
-; CHECKLE-NEXT: umov w8, v0.b[3]
-; CHECKLE-NEXT: umov w9, v0.b[2]
-; CHECKLE-NEXT: fmov s2, w8
-; CHECKLE-NEXT: umov w8, v0.b[1]
-; CHECKLE-NEXT: mov v2.h[1], w9
-; CHECKLE-NEXT: mov v2.h[2], w8
-; CHECKLE-NEXT: umov w8, v0.b[0]
-; CHECKLE-NEXT: ext v0.16b, v1.16b, v1.16b, #8
-; CHECKLE-NEXT: mov v2.h[3], w8
-; CHECKLE-NEXT: zip2 v0.8b, v0.8b, v0.8b
+; CHECKLE-NEXT: mov v2.b[0], v0.b[3]
+; CHECKLE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
+; CHECKLE-NEXT: mov v2.b[2], v0.b[2]
+; CHECKLE-NEXT: mov v2.b[4], v0.b[1]
+; CHECKLE-NEXT: mov v2.b[6], v0.b[0]
+; CHECKLE-NEXT: zip2 v0.8b, v1.8b, v0.8b
; CHECKLE-NEXT: add v0.4h, v2.4h, v0.4h
; CHECKLE-NEXT: bic v0.4h, #255, lsl #8
; CHECKLE-NEXT: ret
@@ -25,16 +21,12 @@ define <4 x i16> @test_reconstructshuffle(<16 x i8> %a, <16 x i8> %b) nounwind {
; CHECKBE-NEXT: rev64 v1.16b, v1.16b
; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
-; CHECKBE-NEXT: umov w8, v0.b[3]
-; CHECKBE-NEXT: umov w9, v0.b[2]
-; CHECKBE-NEXT: fmov s2, w8
-; CHECKBE-NEXT: umov w8, v0.b[1]
-; CHECKBE-NEXT: mov v2.h[1], w9
-; CHECKBE-NEXT: mov v2.h[2], w8
-; CHECKBE-NEXT: umov w8, v0.b[0]
-; CHECKBE-NEXT: ext v0.16b, v1.16b, v1.16b, #8
-; CHECKBE-NEXT: mov v2.h[3], w8
-; CHECKBE-NEXT: zip2 v0.8b, v0.8b, v0.8b
+; CHECKBE-NEXT: mov v2.b[0], v0.b[3]
+; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
+; CHECKBE-NEXT: mov v2.b[2], v0.b[2]
+; CHECKBE-NEXT: mov v2.b[4], v0.b[1]
+; CHECKBE-NEXT: mov v2.b[6], v0.b[0]
+; CHECKBE-NEXT: zip2 v0.8b, v1.8b, v0.8b
; CHECKBE-NEXT: add v0.4h, v2.4h, v0.4h
; CHECKBE-NEXT: bic v0.4h, #255, lsl #8
; CHECKBE-NEXT: rev64 v0.4h, v0.4h
diff --git a/llvm/test/CodeGen/AArch64/itofp.ll b/llvm/test/CodeGen/AArch64/itofp.ll
index 07957c117868d..fb2bdb4d63f47 100644
--- a/llvm/test/CodeGen/AArch64/itofp.ll
+++ b/llvm/test/CodeGen/AArch64/itofp.ll
@@ -3443,26 +3443,18 @@ define <8 x double> @stofp_v8i8_v8f64(<8 x i8> %a) {
; CHECK-SD-LABEL: stofp_v8i8_v8f64:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT: umov w8, v0.b[0]
-; CHECK-SD-NEXT: umov w9, v0.b[2]
-; CHECK-SD-NEXT: umov w11, v0.b[4]
-; CHECK-SD-NEXT: umov w12, v0.b[6]
-; CHECK-SD-NEXT: umov w10, v0.b[1]
-; CHECK-SD-NEXT: umov w13, v0.b[3]
-; CHECK-SD-NEXT: umov w14, v0.b[5]
-; CHECK-SD-NEXT: umov w15, v0.b[7]
-; CHECK-SD-NEXT: fmov s0, w8
-; CHECK-SD-NEXT: fmov s1, w9
-; CHECK-SD-NEXT: fmov s2, w11
-; CHECK-SD-NEXT: fmov s3, w12
-; CHECK-SD-NEXT: mov v0.s[1], w10
-; CHECK-SD-NEXT: mov v1.s[1], w13
-; CHECK-SD-NEXT: mov v2.s[1], w14
-; CHECK-SD-NEXT: mov v3.s[1], w15
-; CHECK-SD-NEXT: shl v0.2s, v0.2s, #24
-; CHECK-SD-NEXT: shl v1.2s, v1.2s, #24
-; CHECK-SD-NEXT: shl v2.2s, v2.2s, #24
-; CHECK-SD-NEXT: shl v3.2s, v3.2s, #24
+; CHECK-SD-NEXT: mov v1.b[0], v0.b[0]
+; CHECK-SD-NEXT: mov v2.b[0], v0.b[2]
+; CHECK-SD-NEXT: mov v3.b[0], v0.b[4]
+; CHECK-SD-NEXT: mov v4.b[0], v0.b[6]
+; CHECK-SD-NEXT: mov v1.b[4], v0.b[1]
+; CHECK-SD-NEXT: mov v2.b[4], v0.b[3]
+; CHECK-SD-NEXT: mov v3.b[4], v0.b[5]
+; CHECK-SD-NEXT: mov v4.b[4], v0.b[7]
+; CHECK-SD-NEXT: shl v0.2s, v1.2s, #24
+; CHECK-SD-NEXT: shl v1.2s, v2.2s, #24
+; CHECK-SD-NEXT: shl v2.2s, v3.2s, #24
+; CHECK-SD-NEXT: shl v3.2s, v4.2s, #24
; CHECK-SD-NEXT: sshr v0.2s, v0.2s, #24
; CHECK-SD-NEXT: sshr v1.2s, v1.2s, #24
; CHECK-SD-NEXT: sshr v2.2s, v2.2s, #24
@@ -3500,27 +3492,19 @@ define <8 x double> @utofp_v8i8_v8f64(<8 x i8> %a) {
; CHECK-SD-LABEL: utofp_v8i8_v8f64:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT: umov w8, v0.b[0]
-; CHECK-SD-NEXT: umov w9, v0.b[2]
-; CHECK-SD-NEXT: umov w11, v0.b[4]
-; CHECK-SD-NEXT: umov w12, v0.b[6]
-; CHECK-SD-NEXT: umov w10, v0.b[1]
-; CHECK-SD-NEXT: umov w13, v0.b[3]
-; CHECK-SD-NEXT: umov w14, v0.b[5]
-; CHECK-SD-NEXT: umov w15, v0.b[7]
+; CHECK-SD-NEXT: mov v2.b[0], v0.b[0]
+; CHECK-SD-NEXT: mov v3.b[0], v0.b[2]
+; CHECK-SD-NEXT: mov v4.b[0], v0.b[4]
+; CHECK-SD-NEXT: mov v5.b[0], v0.b[6]
; CHECK-SD-NEXT: movi d1, #0x0000ff000000ff
-; CHECK-SD-NEXT: fmov s0, w8
-; CHECK-SD-NEXT: fmov s2, w9
-; CHECK-SD-NEXT: fmov s3, w11
-; CHECK-SD-NEXT: fmov s4, w12
-; CHECK-SD-NEXT: mov v0.s[1], w10
-; CHECK-SD-NEXT: mov v2.s[1], w13
-; CHECK-SD-NEXT: mov v3.s[1], w14
-; CHECK-SD-NEXT: mov v4.s[1], w15
-; CHECK-SD-NEXT: and v0.8b, v0.8b, v1.8b
-; CHECK-SD-NEXT: and v2.8b, v2.8b, v1.8b
-; CHECK-SD-NEXT: and v3.8b, v3.8b, v1.8b
-; CHECK-SD-NEXT: and v1.8b, v4.8b, v1.8b
+; CHECK-SD-NEXT: mov v2.b[4], v0.b[1]
+; CHECK-SD-NEXT: mov v3.b[4], v0.b[3]
+; CHECK-SD-NEXT: mov v4.b[4], v0.b[5]
+; CHECK-SD-NEXT: mov v5.b[4], v0.b[7]
+; CHECK-SD-NEXT: and v0.8b, v2.8b, v1.8b
+; CHECK-SD-NEXT: and v2.8b, v3.8b, v1.8b
+; CHECK-SD-NEXT: and v3.8b, v4.8b, v1.8b
+; CHECK-SD-NEXT: and v1.8b, v5.8b, v1.8b
; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #0
; CHECK-SD-NEXT: ushll v2.2d, v2.2s, #0
; CHECK-SD-NEXT: ushll v3.2d, v3.2s, #0
@@ -3553,68 +3537,52 @@ entry:
define <16 x double> @stofp_v16i8_v16f64(<16 x i8> %a) {
; CHECK-SD-LABEL: stofp_v16i8_v16f64:
; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: umov w8, v0.b[0]
; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT: umov w9, v0.b[1]
-; CHECK-SD-NEXT: umov w10, v0.b[2]
-; CHECK-SD-NEXT: umov w12, v0.b[4]
-; CHECK-SD-NEXT: umov w14, v0.b[6]
-; CHECK-SD-NEXT: umov w11, v0.b[3]
-; CHECK-SD-NEXT: umov w13, v0.b[5]
-; CHECK-SD-NEXT: fmov s2, w8
-; CHECK-SD-NEXT: umov w15, v1.b[0]
-; CHECK-SD-NEXT: umov w17, v1.b[2]
-; CHECK-SD-NEXT: umov w0, v1.b[4]
-; CHECK-SD-NEXT: umov w16, v1.b[1]
-; CHECK-SD-NEXT: umov w18, v1.b[3]
-; CHECK-SD-NEXT: umov w8, v0.b[7]
-; CHECK-SD-NEXT: fmov s0, w10
-; CHECK-SD-NEXT: umov w10, v1.b[5]
-; CHECK-SD-NEXT: mov v2.s[1], w9
-; CHECK-SD-NEXT: umov w9, v1.b[6]
-; CHECK-SD-NEXT: fmov s3, w12
-; CHECK-SD-NEXT: umov w12, v1.b[7]
-; CHECK-SD-NEXT: fmov s1, w14
-; CHECK-SD-NEXT: fmov s4, w15
-; CHECK-SD-NEXT: fmov s5, w17
-; CHECK-SD-NEXT: fmov s6, w0
-; CHECK-SD-NEXT: mov v0.s[1], w11
-; CHECK-SD-NEXT: mov v3.s[1], w13
-; CHECK-SD-NEXT: fmov s7, w9
-; CHECK-SD-NEXT: mov v1.s[1], w8
-; CHECK-SD-NEXT: mov v4.s[1], w16
-; CHECK-SD-NEXT: mov v5.s[1], w18
-; CHECK-SD-NEXT: mov v6.s[1], w10
-; CHECK-SD-NEXT: shl v2.2s, v2.2s, #24
-; CHECK-SD-NEXT: shl v0.2s, v0.2s, #24
-; CHECK-SD-NEXT: mov v7.s[1], w12
-; CHECK-SD-NEXT: shl v3.2s, v3.2s, #24
-; CHECK-SD-NEXT: shl v1.2s, v1.2s, #24
-; CHECK-SD-NEXT: shl v4.2s, v4.2s, #24
-; CHECK-SD-NEXT: sshr v2.2s, v2.2s, #24
-; CHECK-SD-NEXT: shl v5.2s, v5.2s, #24
-; CHECK-SD-NEXT: shl v6.2s, v6.2s, #24
+; CHECK-SD-NEXT: mov v2.b[0], v0.b[0]
+; CHECK-SD-NEXT: mov v3.b[0], v0.b[2]
+; CHECK-SD-NEXT: mov v4.b[0], v0.b[4]
+; CHECK-SD-NEXT: mov v5.b[0], v0.b[6]
+; CHECK-SD-NEXT: mov v6.b[0], v1.b[0]
+; CHECK-SD-NEXT: mov v7.b[0], v1.b[2]
+; CHECK-SD-NEXT: mov v16.b[0], v1.b[4]
+; CHECK-SD-NEXT: mov v17.b[0], v1.b[6]
+; CHECK-SD-NEXT: mov v2.b[4], v0.b[1]
+; CHECK-SD-NEXT: mov v3.b[4], v0.b[3]
+; CHECK-SD-NEXT: mov v4.b[4], v0.b[5]
+; CHECK-SD-NEXT: mov v5.b[4], v0.b[7]
+; CHECK-SD-NEXT: mov v6.b[4], v1.b[1]
+; CHECK-SD-NEXT: mov v7.b[4], v1.b[3]
+; CHECK-SD-NEXT: mov v16.b[4], v1.b[5]
+; CHECK-SD-NEXT: mov v17.b[4], v1.b[7]
+; CHECK-SD-NEXT: shl v0.2s, v2.2s, #24
+; CHECK-SD-NEXT: shl v1.2s, v3.2s, #24
+; CHECK-SD-NEXT: shl v2.2s, v4.2s, #24
+; CHECK-SD-NEXT: shl v3.2s, v5.2s, #24
+; CHECK-SD-NEXT: shl v4.2s, v6.2s, #24
+; CHECK-SD-NEXT: shl v5.2s, v7.2s, #24
+; CHECK-SD-NEXT: shl v6.2s, v16.2s, #24
+; CHECK-SD-NEXT: shl v7.2s, v17.2s, #24
; CHECK-SD-NEXT: sshr v0.2s, v0.2s, #24
+; CHECK-SD-NEXT: sshr v1.2s, v1.2s, #24
+; CHECK-SD-NEXT: sshr v2.2s, v2.2s, #24
; CHECK-SD-NEXT: sshr v3.2s, v3.2s, #24
-; CHECK-SD-NEXT: shl v7.2s, v7.2s, #24
; CHECK-SD-NEXT: sshr v4.2s, v4.2s, #24
-; CHECK-SD-NEXT: sshr v1.2s, v1.2s, #24
; CHECK-SD-NEXT: sshr v5.2s, v5.2s, #24
; CHECK-SD-NEXT: sshr v6.2s, v6.2s, #24
+; CHECK-SD-NEXT: sshr v7.2s, v7.2s, #24
+; CHECK-SD-NEXT: sshll v0.2d, v0.2s, #0
+; CHECK-SD-NEXT: sshll v1.2d, v1.2s, #0
; CHECK-SD-NEXT: sshll v2.2d, v2.2s, #0
-; CHECK-SD-NEXT: sshll v16.2d, v0.2s, #0
; CHECK-SD-NEXT: sshll v3.2d, v3.2s, #0
-; CHECK-SD-NEXT: sshr v7.2s, v7.2s, #24
; CHECK-SD-NEXT: sshll v4.2d, v4.2s, #0
-; CHECK-SD-NEXT: sshll v17.2d, v1.2s, #0
; CHECK-SD-NEXT: sshll v5.2d, v5.2s, #0
; CHECK-SD-NEXT: sshll v6.2d, v6.2s, #0
-; CHECK-SD-NEXT: scvtf v0.2d, v2.2d
-; CHECK-SD-NEXT: scvtf v1.2d, v16.2d
-; CHECK-SD-NEXT: scvtf v2.2d, v3.2d
; CHECK-SD-NEXT: sshll v7.2d, v7.2s, #0
+; CHECK-SD-NEXT: scvtf v0.2d, v0.2d
+; CHECK-SD-NEXT: scvtf v1.2d, v1.2d
+; CHECK-SD-NEXT: scvtf v2.2d, v2.2d
+; CHECK-SD-NEXT: scvtf v3.2d, v3.2d
; CHECK-SD-NEXT: scvtf v4.2d, v4.2d
-; CHECK-SD-NEXT: scvtf v3.2d, v17.2d
; CHECK-SD-NEXT: scvtf v5.2d, v5.2d
; CHECK-SD-NEXT: scvtf v6.2d, v6.2d
; CHECK-SD-NEXT: scvtf v7.2d, v7.2d
@@ -3654,63 +3622,47 @@ define <16 x double> @utofp_v16i8_v16f64(<16 x i8> %a) {
; CHECK-SD-LABEL: utofp_v16i8_v16f64:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: ext v2.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT: umov w8, v0.b[0]
-; CHECK-SD-NEXT: umov w10, v0.b[2]
-; CHECK-SD-NEXT: umov w9, v0.b[1]
-; CHECK-SD-NEXT: umov w12, v0.b[4]
-; CHECK-SD-NEXT: umov w11, v0.b[3]
-; CHECK-SD-NEXT: umov w13, v0.b[5]
-; CHECK-SD-NEXT: umov w18, v0.b[6]
+; CHECK-SD-NEXT: mov v3.b[0], v0.b[0]
+; CHECK-SD-NEXT: mov v4.b[0], v0.b[2]
+; CHECK-SD-NEXT: mov v5.b[0], v0.b[4]
+; CHECK-SD-NEXT: mov v6.b[0], v0.b[6]
; CHECK-SD-NEXT: movi d1, #0x0000ff000000ff
-; CHECK-SD-NEXT: umov w14, v2.b[0]
-; CHECK-SD-NEXT: umov w16, v2.b[2]
-; CHECK-SD-NEXT: umov w0, v2.b[4]
-; CHECK-SD-NEXT: fmov s3, w8
-; CHECK-SD-NEXT: umov w8, v0.b[7]
-; CHECK-SD-NEXT: fmov s0, w10
-; CHECK-SD-NEXT: umov w10, v2.b[6]
-; CHECK-SD-NEXT: umov w15, v2.b[1]
-; CHECK-SD-NEXT: umov w17, v2.b[3]
-; CHECK-SD-NEXT: fmov s4, w12
-; CHECK-SD-NEXT: umov w12, v2.b[5]
-; CHECK-SD-NEXT: fmov s7, w18
-; CHECK-SD-NEXT: mov v3.s[1], w9
-; CHECK-SD-NEXT: umov w9, v2.b[7]
-; CHECK-SD-NEXT: fmov s2, w14
-; CHECK-SD-NEXT: fmov s5, w16
-; CHECK-SD-NEXT: fmov s6, w0
-; CHECK-SD-NEXT: mov v0.s[1], w11
-; CHECK-SD-NEXT: fmov s16, w10
-; CHECK-SD-NEXT: mov v4.s[1], w13
-; CHECK-SD-NEXT: mov v7.s[1], w8
-; CHECK-SD-NEXT: mov v2.s[1], w15
-; CHECK-SD-NEXT: mov v5.s[1], w17
-; CHECK-SD-NEXT: mov v6.s[1], w12
-; CHECK-SD-NEXT: and v3.8b, v3.8b, v1.8b
-; CHECK-SD-NEXT: mov v16.s[1], w9
-; CHECK-SD-NEXT: and v0.8b, v0.8b, v1.8b
-; CHECK-SD-NEXT: and v4.8b, v4.8b, v1.8b
-; CHECK-SD-NEXT: and v7.8b, v7.8b, v1.8b
-; CHECK-SD-NEXT: and v2.8b, v2.8b, v1.8b
-; CHECK-SD-NEXT: ushll v3.2d, v3.2s, #0
-; CHECK-SD-NEXT: and v5.8b, v5.8b, v1.8b
-; CHECK-SD-NEXT: and v6.8b, v6.8b, v1.8b
-; CHECK-SD-NEXT: and v1.8b, v16.8b, v1.8b
-; CHECK-SD-NEXT: ushll v16.2d, v0.2s, #0
-; CHECK-SD-NEXT: ushll v17.2d, v4.2s, #0
+; CHECK-SD-NEXT: mov v7.b[0], v2.b[0]
+; CHECK-SD-NEXT: mov v16.b[0], v2.b[2]
+; CHECK-SD-NEXT: mov v17.b[0], v2.b[4]
+; CHECK-SD-NEXT: mov v18.b[0], v2.b[6]
+; CHECK-SD-NEXT: mov v3.b[4], v0.b[1]
+; CHECK-SD-NEXT: mov v4.b[4], v0.b[3]
+; CHECK-SD-NEXT: mov v5.b[4], v0.b[5]
+; CHECK-SD-NEXT: mov v6.b[4], v0.b[7]
+; CHECK-SD-NEXT: mov v7.b[4], v2.b[1]
+; CHECK-SD-NEXT: mov v16.b[4], v2.b[3]
+; CHECK-SD-NEXT: mov v17.b[4], v2.b[5]
+; CHECK-SD-NEXT: mov v18.b[4], v2.b[7]
+; CHECK-SD-NEXT: and v0.8b, v3.8b, v1.8b
+; CHECK-SD-NEXT: and v2.8b, v4.8b, v1.8b
+; CHECK-SD-NEXT: and v3.8b, v5.8b, v1.8b
+; CHECK-SD-NEXT: and v4.8b, v6.8b, v1.8b
+; CHECK-SD-NEXT: and v5.8b, v7.8b, v1.8b
+; CHECK-SD-NEXT: and v6.8b, v16.8b, v1.8b
+; CHECK-SD-NEXT: and v7.8b, v17.8b, v1.8b
+; CHECK-SD-NEXT: and v1.8b, v18.8b, v1.8b
+; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #0
; CHECK-SD-NEXT: ushll v2.2d, v2.2s, #0
-; CHECK-SD-NEXT: ushll v7.2d, v7.2s, #0
-; CHECK-SD-NEXT: ucvtf v0.2d, v3.2d
+; CHECK-SD-NEXT: ushll v3.2d, v3.2s, #0
+; CHECK-SD-NEXT: ushll v4.2d, v4.2s, #0
; CHECK-SD-NEXT: ushll v5.2d, v5.2s, #0
; CHECK-SD-NEXT: ushll v6.2d, v6.2s, #0
-; CHECK-SD-NEXT: ushll v18.2d, v1.2s, #0
-; CHECK-SD-NEXT: ucvtf v1.2d, v16.2d
-; CHECK-SD-NEXT: ucvtf v4.2d, v2.2d
-; CHECK-SD-NEXT: ucvtf v2.2d, v17.2d
-; CHECK-SD-NEXT: ucvtf v3.2d, v7.2d
-; CHECK-SD-NEXT: ucvtf v5.2d, v5.2d
-; CHECK-SD-NEXT: ucvtf v6.2d, v6.2d
-; CHECK-SD-NEXT: ucvtf v7.2d, v18.2d
+; CHECK-SD-NEXT: ushll v7.2d, v7.2s, #0
+; CHECK-SD-NEXT: ushll v16.2d, v1.2s, #0
+; CHECK-SD-NEXT: ucvtf v0.2d, v0.2d
+; CHECK-SD-NEXT: ucvtf v1.2d, v2.2d
+; CHECK-SD-NEXT: ucvtf v2.2d, v3.2d
+; CHECK-SD-NEXT: ucvtf v3.2d, v4.2d
+; CHECK-SD-NEXT: ucvtf v4.2d, v5.2d
+; CHECK-SD-NEXT: ucvtf v5.2d, v6.2d
+; CHECK-SD-NEXT: ucvtf v6.2d, v7.2d
+; CHECK-SD-NEXT: ucvtf v7.2d, v16.2d
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: utofp_v16i8_v16f64:
@@ -3747,143 +3699,111 @@ define <32 x double> @stofp_v32i8_v32f64(<32 x i8> %a) {
; CHECK-SD-LABEL: stofp_v32i8_v32f64:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: ext v3.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT: ext v2.16b, v1.16b, v1.16b, #8
-; CHECK-SD-NEXT: umov w9, v3.b[0]
-; CHECK-SD-NEXT: umov w11, v3.b[4]
-; CHECK-SD-NEXT: umov w13, v3.b[6]
-; CHECK-SD-NEXT: umov w18, v2.b[2]
-; CHECK-SD-NEXT: umov w10, v3.b[2]
-; CHECK-SD-NEXT: umov w12, v3.b[1]
-; CHECK-SD-NEXT: umov w16, v2.b[0]
-; CHECK-SD-NEXT: umov w14, v3.b[3]
-; CHECK-SD-NEXT: umov w15, v3.b[5]
-; CHECK-SD-NEXT: umov w17, v3.b[7]
-; CHECK-SD-NEXT: fmov s6, w9
-; CHECK-SD-NEXT: fmov s5, w11
-; CHECK-SD-NEXT: fmov s7, w13
-; CHECK-SD-NEXT: umov w13, v2.b[4]
-; CHECK-SD-NEXT: umov w11, v2.b[3]
-; CHECK-SD-NEXT: umov w9, v2.b[6]
-; CHECK-SD-NEXT: fmov s17, w18
-; CHECK-SD-NEXT: fmov s4, w10
-; CHECK-SD-NEXT: umov w10, v2.b[1]
-; CHECK-SD-NEXT: mov v6.s[1], w12
-; CHECK-SD-NEXT: fmov s3, w16
-; CHECK-SD-NEXT: umov w12, v2.b[5]
-; CHECK-SD-NEXT: mov v5.s[1], w15
-; CHECK-SD-NEXT: umov w15, v1.b[0]
-; CHECK-SD-NEXT: umov w16, v0.b[6]
-; CHECK-SD-NEXT: fmov s16, w13
-; CHECK-SD-NEXT: umov w13, v1.b[2]
-; CHECK-SD-NEXT: mov v17.s[1], w11
-; CHECK-SD-NEXT: umov w11, v1.b[6]
-; CHECK-SD-NEXT: fmov s18, w9
-; CHECK-SD-NEXT: umov w9, v1.b[4]
-; CHECK-SD-NEXT: mov v3.s[1], w10
-; CHECK-SD-NEXT: umov w10, v0.b[0]
-; CHECK-SD-NEXT: mov v4.s[1], w14
-; CHECK-SD-NEXT: mov v16.s[1], w12
-; CHECK-SD-NEXT: umov w12, v1.b[7]
-; CHECK-SD-NEXT: umov w14, v1.b[5]
-; CHECK-SD-NEXT: fmov s20, w13
-; CHECK-SD-NEXT: umov w13, v1.b[3]
-; CHECK-SD-NEXT: fmov s22, w15
-; CHECK-SD-NEXT: fmov s21, w11
-; CHECK-SD-NEXT: umov w11, v1.b[1]
-; CHECK-SD-NEXT: fmov s1, w9
-; CHECK-SD-NEXT: fmov s19, w10
-; CHECK-SD-NEXT: umov w10, v0.b[4]
-; CHECK-SD-NEXT: umov w9, v0.b[7]
-; CHECK-SD-NEXT: fmov s23, w16
-; CHECK-SD-NEXT: mov v7.s[1], w17
+; CHECK-SD-NEXT: mov v5.b[0], v1.b[6]
+; CHECK-SD-NEXT: mov v17.b[0], v1.b[4]
+; CHECK-SD-NEXT: mov v20.b[0], v1.b[2]
+; CHECK-SD-NEXT: mov v21.b[0], v1.b[0]
+; CHECK-SD-NEXT: mov v18.b[0], v0.b[0]
+; CHECK-SD-NEXT: mov v19.b[0], v0.b[6]
+; CHECK-SD-NEXT: mov v22.b[0], v0.b[4]
+; CHECK-SD-NEXT: ext v16.16b, v1.16b, v1.16b, #8
+; CHECK-SD-NEXT: mov v2.b[0], v3.b[0]
+; CHECK-SD-NEXT: mov v4.b[0], v3.b[2]
+; CHECK-SD-NEXT: mov v6.b[0], v3.b[4]
+; CHECK-SD-NEXT: mov v7.b[0], v3.b[6]
+; CHECK-SD-NEXT: mov v5.b[4], v1.b[7]
+; CHECK-SD-NEXT: mov v17.b[4], v1.b[5]
+; CHECK-SD-NEXT: mov v20.b[4], v1.b[3]
+; CHECK-SD-NEXT: mov v21.b[4], v1.b[1]
+; CHECK-SD-NEXT: mov v19.b[4], v0.b[7]
+; CHECK-SD-NEXT: mov v22.b[4], v0.b[5]
+; CHECK-SD-NEXT: mov v18.b[4], v0.b[1]
+; CHECK-SD-NEXT: mov v23.b[0], v16.b[0]
+; CHECK-SD-NEXT: mov v2.b[4], v3.b[1]
+; CHECK-SD-NEXT: mov v4.b[4], v3.b[3]
+; CHECK-SD-NEXT: mov v6.b[4], v3.b[5]
+; CHECK-SD-NEXT: mov v7.b[4], v3.b[7]
+; CHECK-SD-NEXT: mov v3.b[0], v0.b[2]
+; CHECK-SD-NEXT: shl v5.2s, v5.2s, #24
; CHECK-SD-NEXT: shl v17.2s, v17.2s, #24
-; CHECK-SD-NEXT: mov v21.s[1], w12
-; CHECK-SD-NEXT: mov v1.s[1], w14
-; CHECK-SD-NEXT: umov w14, v0.b[5]
-; CHECK-SD-NEXT: umov w12, v0.b[1]
-; CHECK-SD-NEXT: mov v20.s[1], w13
-; CHECK-SD-NEXT: umov w13, v0.b[2]
-; CHECK-SD-NEXT: mov v22.s[1], w11
-; CHECK-SD-NEXT: umov w11, v0.b[3]
-; CHECK-SD-NEXT: fmov s0, w10
-; CHECK-SD-NEXT: mov v23.s[1], w9
-; CHECK-SD-NEXT: umov w9, v2.b[7]
-; CHECK-SD-NEXT: shl v16.2s, v16.2s, #24
-; CHECK-SD-NEXT: shl v21.2s, v21.2s, #24
-; CHECK-SD-NEXT: shl v1.2s, v1.2s, #24
-; CHECK-SD-NEXT: sshr v17.2s, v17.2s, #24
-; CHECK-SD-NEXT: mov v0.s[1], w14
-; CHECK-SD-NEXT: fmov s24, w13
-; CHECK-SD-NEXT: mov v19.s[1], w12
-; CHECK-SD-NEXT: sshr v16.2s, v16.2s, #24
-; CHECK-SD-NEXT: shl v6.2s, v6.2s, #24
; CHECK-SD-NEXT: shl v20.2s, v20.2s, #24
-; CHECK-SD-NEXT: sshr v21.2s, v21.2s, #24
-; CHECK-SD-NEXT: sshr v1.2s, v1.2s, #24
-; CHECK-SD-NEXT: shl v2.2s, v23.2s, #24
-; CHECK-SD-NEXT: mov v18.s[1], w9
-; CHECK-SD-NEXT: mov v24.s[1], w11
-; CHECK-SD-NEXT: shl v22.2s, v22.2s, #24
-; CHECK-SD-NEXT: shl v0.2s, v0.2s, #24
-; CHECK-SD-NEXT: shl v4.2s, v4.2s, #24
-; CHECK-SD-NEXT: shl v5.2s, v5.2s, #24
-; CHECK-SD-NEXT: sshll v21.2d, v21.2s, #0
-; CHECK-SD-NEXT: sshll v1.2d, v1.2s, #0
-; CHECK-SD-NEXT: sshr v2.2s, v2.2s, #24
-; CHECK-SD-NEXT: shl v7.2s, v7.2s, #24
+; CHECK-SD-NEXT: mov v24.b[0], v16.b[4]
+; CHECK-SD-NEXT: mov v23.b[4], v16.b[1]
+; CHECK-SD-NEXT: shl v18.2s, v18.2s, #24
; CHECK-SD-NEXT: shl v19.2s, v19.2s, #24
+; CHECK-SD-NEXT: sshr v5.2s, v5.2s, #24
+; CHECK-SD-NEXT: shl v1.2s, v2.2s, #24
+; CHECK-SD-NEXT: shl v2.2s, v4.2s, #24
+; CHECK-SD-NEXT: sshr v17.2s, v17.2s, #24
+; CHECK-SD-NEXT: mov v3.b[4], v0.b[3]
+; CHECK-SD-NEXT: shl v0.2s, v21.2s, #24
+; CHECK-SD-NEXT: shl v4.2s, v6.2s, #24
+; CHECK-SD-NEXT: shl v6.2s, v7.2s, #24
+; CHECK-SD-NEXT: mov v7.b[0], v16.b[2]
+; CHECK-SD-NEXT: sshll v5.2d, v5.2s, #0
; CHECK-SD-NEXT: sshr v20.2s, v20.2s, #24
+; CHECK-SD-NEXT: mov v21.b[0], v16.b[6]
+; CHECK-SD-NEXT: sshll v17.2d, v17.2s, #0
; CHECK-SD-NEXT: sshr v0.2s, v0.2s, #24
-; CHECK-SD-NEXT: shl v18.2s, v18.2s, #24
-; CHECK-SD-NEXT: shl v23.2s, v24.2s, #24
-; CHECK-SD-NEXT: scvtf v21.2d, v21.2d
-; CHECK-SD-NEXT: scvtf v1.2d, v1.2d
-; CHECK-SD-NEXT: sshll v2.2d, v2.2s, #0
-; CHECK-SD-NEXT: sshr v22.2s, v22.2s, #24
+; CHECK-SD-NEXT: shl v22.2s, v22.2s, #24
+; CHECK-SD-NEXT: shl v3.2s, v3.2s, #24
+; CHECK-SD-NEXT: mov v24.b[4], v16.b[5]
; CHECK-SD-NEXT: sshr v19.2s, v19.2s, #24
-; CHECK-SD-NEXT: sshr v5.2s, v5.2s, #24
+; CHECK-SD-NEXT: scvtf v5.2d, v5.2d
+; CHECK-SD-NEXT: mov v7.b[4], v16.b[3]
+; CHECK-SD-NEXT: sshll v20.2d, v20.2s, #0
+; CHECK-SD-NEXT: scvtf v17.2d, v17.2d
; CHECK-SD-NEXT: sshll v0.2d, v0.2s, #0
-; CHECK-SD-NEXT: sshr v23.2s, v23.2s, #24
-; CHECK-SD-NEXT: sshr v4.2s, v4.2s, #24
-; CHECK-SD-NEXT: scvtf v2.2d, v2.2d
+; CHECK-SD-NEXT: mov v21.b[4], v16.b[7]
+; CHECK-SD-NEXT: sshr v3.2s, v3.2s, #24
+; CHECK-SD-NEXT: sshr v16.2s, v22.2s, #24
+; CHECK-SD-NEXT: sshll v19.2d, v19.2s, #0
+; CHECK-SD-NEXT: scvtf v20.2d, v20.2d
+; CHECK-SD-NEXT: shl v22.2s, v24.2s, #24
; CHECK-SD-NEXT: sshr v6.2s, v6.2s, #24
-; CHECK-SD-NEXT: sshll v20.2d, v20.2s, #0
-; CHECK-SD-NEXT: stp q1, q21, [x8, #160]
-; CHECK-SD-NEXT: shl v1.2s, v3.2s, #24
-; CHECK-SD-NEXT: sshr v3.2s, v18.2s, #24
; CHECK-SD-NEXT: scvtf v0.2d, v0.2d
-; CHECK-SD-NEXT: sshll v22.2d, v22.2s, #0
-; CHECK-SD-NEXT: sshll v18.2d, v23.2s, #0
-; CHECK-SD-NEXT: sshll v19.2d, v19.2s, #0
-; CHECK-SD-NEXT: sshll v5.2d, v5.2s, #0
-; CHECK-SD-NEXT: sshll v4.2d, v4.2s, #0
+; CHECK-SD-NEXT: shl v7.2s, v7.2s, #24
+; CHECK-SD-NEXT: sshr v4.2s, v4.2s, #24
+; CHECK-SD-NEXT: stp q17, q5, [x8, #160]
+; CHECK-SD-NEXT: sshr v5.2s, v18.2s, #24
+; CHECK-SD-NEXT: shl v17.2s, v23.2s, #24
+; CHECK-SD-NEXT: sshll v3.2d, v3.2s, #0
+; CHECK-SD-NEXT: sshll v16.2d, v16.2s, #0
+; CHECK-SD-NEXT: shl v21.2s, v21.2s, #24
+; CHECK-SD-NEXT: scvtf v19.2d, v19.2d
+; CHECK-SD-NEXT: sshr v2.2s, v2.2s, #24
; CHECK-SD-NEXT: sshr v1.2s, v1.2s, #24
-; CHECK-SD-NEXT: sshll v6.2d, v6.2s, #0
-; CHECK-SD-NEXT: scvtf v20.2d, v20.2d
-; CHECK-SD-NEXT: scvtf v22.2d, v22.2d
-; CHECK-SD-NEXT: stp q0, q2, [x8, #32]
-; CHECK-SD-NEXT: sshll v2.2d, v3.2s, #0
-; CHECK-SD-NEXT: sshll v3.2d, v16.2s, #0
-; CHECK-SD-NEXT: sshll v16.2d, v17.2s, #0
-; CHECK-SD-NEXT: sshll v1.2d, v1.2s, #0
+; CHECK-SD-NEXT: stp q0, q20, [x8, #128]
+; CHECK-SD-NEXT: sshll v5.2d, v5.2s, #0
; CHECK-SD-NEXT: sshr v0.2s, v7.2s, #24
-; CHECK-SD-NEXT: scvtf v7.2d, v18.2d
-; CHECK-SD-NEXT: scvtf v17.2d, v19.2d
-; CHECK-SD-NEXT: stp q22, q20, [x8, #128]
-; CHECK-SD-NEXT: scvtf v2.2d, v2.2d
+; CHECK-SD-NEXT: sshr v7.2s, v17.2s, #24
; CHECK-SD-NEXT: scvtf v3.2d, v3.2d
; CHECK-SD-NEXT: scvtf v16.2d, v16.2d
-; CHECK-SD-NEXT: scvtf v1.2d, v1.2d
+; CHECK-SD-NEXT: sshr v18.2s, v21.2s, #24
+; CHECK-SD-NEXT: sshr v20.2s, v22.2s, #24
+; CHECK-SD-NEXT: sshll v6.2d, v6.2s, #0
+; CHECK-SD-NEXT: scvtf v5.2d, v5.2d
; CHECK-SD-NEXT: sshll v0.2d, v0.2s, #0
-; CHECK-SD-NEXT: stp q17, q7, [x8]
-; CHECK-SD-NEXT: stp q3, q2, [x8, #224]
+; CHECK-SD-NEXT: sshll v4.2d, v4.2s, #0
+; CHECK-SD-NEXT: sshll v7.2d, v7.2s, #0
+; CHECK-SD-NEXT: sshll v2.2d, v2.2s, #0
+; CHECK-SD-NEXT: sshll v1.2d, v1.2s, #0
+; CHECK-SD-NEXT: stp q16, q19, [x8, #32]
+; CHECK-SD-NEXT: sshll v17.2d, v18.2s, #0
+; CHECK-SD-NEXT: sshll v16.2d, v20.2s, #0
; CHECK-SD-NEXT: scvtf v0.2d, v0.2d
-; CHECK-SD-NEXT: scvtf v2.2d, v5.2d
-; CHECK-SD-NEXT: stp q1, q16, [x8, #192]
-; CHECK-SD-NEXT: scvtf v3.2d, v4.2d
-; CHECK-SD-NEXT: scvtf v1.2d, v6.2d
-; CHECK-SD-NEXT: stp q2, q0, [x8, #96]
-; CHECK-SD-NEXT: stp q1, q3, [x8, #64]
+; CHECK-SD-NEXT: scvtf v4.2d, v4.2d
+; CHECK-SD-NEXT: stp q5, q3, [x8]
+; CHECK-SD-NEXT: scvtf v3.2d, v7.2d
+; CHECK-SD-NEXT: scvtf v5.2d, v6.2d
+; CHECK-SD-NEXT: scvtf v17.2d, v17.2d
+; CHECK-SD-NEXT: scvtf v16.2d, v16.2d
+; CHECK-SD-NEXT: scvtf v2.2d, v2.2d
+; CHECK-SD-NEXT: stp q4, q5, [x8, #96]
+; CHECK-SD-NEXT: stp q3, q0, [x8, #192]
+; CHECK-SD-NEXT: scvtf v0.2d, v1.2d
+; CHECK-SD-NEXT: stp q16, q17, [x8, #224]
+; CHECK-SD-NEXT: stp q0, q2, [x8, #64]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: stofp_v32i8_v32f64:
@@ -3949,129 +3869,97 @@ entry:
define <32 x double> @utofp_v32i8_v32f64(<32 x i8> %a) {
; CHECK-SD-LABEL: utofp_v32i8_v32f64:
; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: mov v6.b[0], v1.b[6]
+; CHECK-SD-NEXT: mov v7.b[0], v1.b[4]
+; CHECK-SD-NEXT: ext v3.16b, v1.16b, v1.16b, #8
+; CHECK-SD-NEXT: mov v16.b[0], v1.b[2]
+; CHECK-SD-NEXT: mov v17.b[0], v1.b[0]
+; CHECK-SD-NEXT: mov v19.b[0], v0.b[6]
+; CHECK-SD-NEXT: mov v20.b[0], v0.b[4]
+; CHECK-SD-NEXT: movi d5, #0x0000ff000000ff
+; CHECK-SD-NEXT: mov v24.b[0], v0.b[2]
+; CHECK-SD-NEXT: mov v25.b[0], v0.b[0]
; CHECK-SD-NEXT: ext v2.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT: ext v16.16b, v1.16b, v1.16b, #8
-; CHECK-SD-NEXT: movi d3, #0x0000ff000000ff
-; CHECK-SD-NEXT: umov w11, v2.b[0]
-; CHECK-SD-NEXT: umov w14, v2.b[4]
-; CHECK-SD-NEXT: umov w12, v2.b[2]
-; CHECK-SD-NEXT: umov w15, v2.b[6]
-; CHECK-SD-NEXT: umov w16, v2.b[1]
-; CHECK-SD-NEXT: umov w13, v16.b[2]
-; CHECK-SD-NEXT: umov w17, v16.b[0]
-; CHECK-SD-NEXT: umov w9, v2.b[3]
-; CHECK-SD-NEXT: umov w10, v2.b[5]
-; CHECK-SD-NEXT: fmov s4, w11
-; CHECK-SD-NEXT: fmov s6, w14
-; CHECK-SD-NEXT: umov w14, v16.b[6]
-; CHECK-SD-NEXT: fmov s5, w12
-; CHECK-SD-NEXT: umov w12, v16.b[4]
-; CHECK-SD-NEXT: umov w11, v16.b[1]
-; CHECK-SD-NEXT: fmov s7, w15
-; CHECK-SD-NEXT: umov w15, v16.b[7]
-; CHECK-SD-NEXT: fmov s18, w13
-; CHECK-SD-NEXT: mov v4.s[1], w16
-; CHECK-SD-NEXT: umov w16, v1.b[4]
-; CHECK-SD-NEXT: umov w13, v1.b[6]
-; CHECK-SD-NEXT: fmov s17, w17
-; CHECK-SD-NEXT: fmov s20, w14
-; CHECK-SD-NEXT: mov v5.s[1], w9
-; CHECK-SD-NEXT: umov w9, v1.b[7]
-; CHECK-SD-NEXT: fmov s19, w12
-; CHECK-SD-NEXT: mov v6.s[1], w10
-; CHECK-SD-NEXT: umov w10, v1.b[2]
-; CHECK-SD-NEXT: umov w12, v0.b[6]
-; CHECK-SD-NEXT: umov w14, v1.b[0]
-; CHECK-SD-NEXT: mov v17.s[1], w11
-; CHECK-SD-NEXT: mov v20.s[1], w15
-; CHECK-SD-NEXT: umov w11, v1.b[5]
-; CHECK-SD-NEXT: umov w15, v1.b[3]
-; CHECK-SD-NEXT: fmov s21, w16
-; CHECK-SD-NEXT: umov w16, v1.b[1]
-; CHECK-SD-NEXT: fmov s1, w13
-; CHECK-SD-NEXT: umov w13, v0.b[4]
-; CHECK-SD-NEXT: and v6.8b, v6.8b, v3.8b
-; CHECK-SD-NEXT: fmov s22, w10
-; CHECK-SD-NEXT: fmov s23, w12
-; CHECK-SD-NEXT: fmov s24, w14
-; CHECK-SD-NEXT: mov v21.s[1], w11
-; CHECK-SD-NEXT: umov w10, v0.b[5]
-; CHECK-SD-NEXT: umov w12, v0.b[0]
-; CHECK-SD-NEXT: mov v1.s[1], w9
-; CHECK-SD-NEXT: umov w9, v0.b[7]
-; CHECK-SD-NEXT: umov w11, v16.b[3]
-; CHECK-SD-NEXT: mov v22.s[1], w15
-; CHECK-SD-NEXT: mov v24.s[1], w16
-; CHECK-SD-NEXT: fmov s25, w13
-; CHECK-SD-NEXT: umov w13, v0.b[3]
-; CHECK-SD-NEXT: and v20.8b, v20.8b, v3.8b
-; CHECK-SD-NEXT: and v5.8b, v5.8b, v3.8b
-; CHECK-SD-NEXT: and v21.8b, v21.8b, v3.8b
-; CHECK-SD-NEXT: mov v23.s[1], w9
-; CHECK-SD-NEXT: umov w9, v0.b[2]
-; CHECK-SD-NEXT: and v1.8b, v1.8b, v3.8b
-; CHECK-SD-NEXT: mov v25.s[1], w10
-; CHECK-SD-NEXT: umov w10, v0.b[1]
-; CHECK-SD-NEXT: and v0.8b, v22.8b, v3.8b
-; CHECK-SD-NEXT: fmov s22, w12
-; CHECK-SD-NEXT: and v24.8b, v24.8b, v3.8b
-; CHECK-SD-NEXT: umov w12, v16.b[5]
-; CHECK-SD-NEXT: ushll v1.2d, v1.2s, #0
-; CHECK-SD-NEXT: ushll v21.2d, v21.2s, #0
-; CHECK-SD-NEXT: mov v18.s[1], w11
-; CHECK-SD-NEXT: and v16.8b, v23.8b, v3.8b
-; CHECK-SD-NEXT: fmov s23, w9
-; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #0
-; CHECK-SD-NEXT: and v25.8b, v25.8b, v3.8b
-; CHECK-SD-NEXT: ushll v24.2d, v24.2s, #0
-; CHECK-SD-NEXT: mov v22.s[1], w10
-; CHECK-SD-NEXT: ucvtf v1.2d, v1.2d
-; CHECK-SD-NEXT: ucvtf v21.2d, v21.2d
-; CHECK-SD-NEXT: umov w9, v2.b[7]
-; CHECK-SD-NEXT: mov v23.s[1], w13
+; CHECK-SD-NEXT: mov v6.b[4], v1.b[7]
+; CHECK-SD-NEXT: mov v7.b[4], v1.b[5]
+; CHECK-SD-NEXT: mov v18.b[0], v3.b[0]
+; CHECK-SD-NEXT: mov v21.b[0], v3.b[2]
+; CHECK-SD-NEXT: mov v23.b[0], v3.b[4]
+; CHECK-SD-NEXT: mov v16.b[4], v1.b[3]
+; CHECK-SD-NEXT: mov v17.b[4], v1.b[1]
+; CHECK-SD-NEXT: mov v1.b[0], v3.b[6]
+; CHECK-SD-NEXT: mov v19.b[4], v0.b[7]
+; CHECK-SD-NEXT: mov v20.b[4], v0.b[5]
+; CHECK-SD-NEXT: mov v24.b[4], v0.b[3]
+; CHECK-SD-NEXT: mov v25.b[4], v0.b[1]
+; CHECK-SD-NEXT: and v6.8b, v6.8b, v5.8b
+; CHECK-SD-NEXT: and v7.8b, v7.8b, v5.8b
+; CHECK-SD-NEXT: mov v18.b[4], v3.b[1]
+; CHECK-SD-NEXT: mov v21.b[4], v3.b[3]
+; CHECK-SD-NEXT: mov v23.b[4], v3.b[5]
+; CHECK-SD-NEXT: and v16.8b, v16.8b, v5.8b
+; CHECK-SD-NEXT: and v17.8b, v17.8b, v5.8b
+; CHECK-SD-NEXT: mov v1.b[4], v3.b[7]
+; CHECK-SD-NEXT: and v3.8b, v19.8b, v5.8b
+; CHECK-SD-NEXT: ushll v6.2d, v6.2s, #0
+; CHECK-SD-NEXT: ushll v7.2d, v7.2s, #0
+; CHECK-SD-NEXT: and v20.8b, v20.8b, v5.8b
; CHECK-SD-NEXT: ushll v16.2d, v16.2s, #0
-; CHECK-SD-NEXT: ucvtf v0.2d, v0.2d
-; CHECK-SD-NEXT: ushll v2.2d, v25.2s, #0
-; CHECK-SD-NEXT: mov v19.s[1], w12
-; CHECK-SD-NEXT: ucvtf v24.2d, v24.2d
-; CHECK-SD-NEXT: and v18.8b, v18.8b, v3.8b
-; CHECK-SD-NEXT: stp q21, q1, [x8, #160]
-; CHECK-SD-NEXT: and v1.8b, v22.8b, v3.8b
+; CHECK-SD-NEXT: mov v4.b[0], v2.b[0]
+; CHECK-SD-NEXT: mov v22.b[0], v2.b[2]
+; CHECK-SD-NEXT: ushll v17.2d, v17.2s, #0
+; CHECK-SD-NEXT: ushll v0.2d, v3.2s, #0
+; CHECK-SD-NEXT: mov v19.b[0], v2.b[4]
+; CHECK-SD-NEXT: ucvtf v6.2d, v6.2d
+; CHECK-SD-NEXT: ucvtf v3.2d, v7.2d
+; CHECK-SD-NEXT: ushll v20.2d, v20.2s, #0
+; CHECK-SD-NEXT: mov v7.b[0], v2.b[6]
; CHECK-SD-NEXT: ucvtf v16.2d, v16.2d
-; CHECK-SD-NEXT: and v23.8b, v23.8b, v3.8b
-; CHECK-SD-NEXT: ucvtf v2.2d, v2.2d
-; CHECK-SD-NEXT: mov v7.s[1], w9
-; CHECK-SD-NEXT: stp q24, q0, [x8, #128]
-; CHECK-SD-NEXT: and v0.8b, v19.8b, v3.8b
-; CHECK-SD-NEXT: ushll v18.2d, v18.2s, #0
-; CHECK-SD-NEXT: ushll v1.2d, v1.2s, #0
-; CHECK-SD-NEXT: ushll v19.2d, v23.2s, #0
-; CHECK-SD-NEXT: stp q2, q16, [x8, #32]
-; CHECK-SD-NEXT: and v16.8b, v17.8b, v3.8b
-; CHECK-SD-NEXT: ushll v17.2d, v20.2s, #0
-; CHECK-SD-NEXT: and v2.8b, v7.8b, v3.8b
-; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #0
-; CHECK-SD-NEXT: ucvtf v1.2d, v1.2d
-; CHECK-SD-NEXT: ucvtf v7.2d, v19.2d
-; CHECK-SD-NEXT: and v3.8b, v4.8b, v3.8b
-; CHECK-SD-NEXT: ushll v4.2d, v6.2s, #0
+; CHECK-SD-NEXT: and v24.8b, v24.8b, v5.8b
; CHECK-SD-NEXT: ucvtf v17.2d, v17.2d
-; CHECK-SD-NEXT: ushll v16.2d, v16.2s, #0
-; CHECK-SD-NEXT: ucvtf v6.2d, v18.2d
; CHECK-SD-NEXT: ucvtf v0.2d, v0.2d
+; CHECK-SD-NEXT: mov v4.b[4], v2.b[1]
+; CHECK-SD-NEXT: ucvtf v20.2d, v20.2d
+; CHECK-SD-NEXT: mov v22.b[4], v2.b[3]
+; CHECK-SD-NEXT: mov v19.b[4], v2.b[5]
+; CHECK-SD-NEXT: stp q3, q6, [x8, #160]
+; CHECK-SD-NEXT: and v6.8b, v25.8b, v5.8b
+; CHECK-SD-NEXT: and v1.8b, v1.8b, v5.8b
+; CHECK-SD-NEXT: mov v7.b[4], v2.b[7]
+; CHECK-SD-NEXT: and v2.8b, v23.8b, v5.8b
+; CHECK-SD-NEXT: ushll v3.2d, v24.2s, #0
+; CHECK-SD-NEXT: stp q17, q16, [x8, #128]
+; CHECK-SD-NEXT: and v16.8b, v21.8b, v5.8b
+; CHECK-SD-NEXT: and v4.8b, v4.8b, v5.8b
+; CHECK-SD-NEXT: stp q20, q0, [x8, #32]
+; CHECK-SD-NEXT: ushll v6.2d, v6.2s, #0
+; CHECK-SD-NEXT: and v0.8b, v18.8b, v5.8b
+; CHECK-SD-NEXT: ushll v1.2d, v1.2s, #0
; CHECK-SD-NEXT: ushll v2.2d, v2.2s, #0
-; CHECK-SD-NEXT: ushll v3.2d, v3.2s, #0
-; CHECK-SD-NEXT: stp q1, q7, [x8]
-; CHECK-SD-NEXT: ushll v1.2d, v5.2s, #0
-; CHECK-SD-NEXT: ucvtf v5.2d, v16.2d
-; CHECK-SD-NEXT: ucvtf v2.2d, v2.2d
-; CHECK-SD-NEXT: stp q0, q17, [x8, #224]
-; CHECK-SD-NEXT: ucvtf v0.2d, v4.2d
; CHECK-SD-NEXT: ucvtf v3.2d, v3.2d
+; CHECK-SD-NEXT: ushll v16.2d, v16.2s, #0
+; CHECK-SD-NEXT: and v7.8b, v7.8b, v5.8b
+; CHECK-SD-NEXT: and v17.8b, v19.8b, v5.8b
+; CHECK-SD-NEXT: ucvtf v6.2d, v6.2d
+; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-SD-NEXT: and v18.8b, v22.8b, v5.8b
; CHECK-SD-NEXT: ucvtf v1.2d, v1.2d
-; CHECK-SD-NEXT: stp q5, q6, [x8, #192]
-; CHECK-SD-NEXT: stp q0, q2, [x8, #96]
-; CHECK-SD-NEXT: stp q3, q1, [x8, #64]
+; CHECK-SD-NEXT: ucvtf v2.2d, v2.2d
+; CHECK-SD-NEXT: ushll v4.2d, v4.2s, #0
+; CHECK-SD-NEXT: ucvtf v16.2d, v16.2d
+; CHECK-SD-NEXT: ushll v5.2d, v7.2s, #0
+; CHECK-SD-NEXT: ushll v7.2d, v17.2s, #0
+; CHECK-SD-NEXT: ucvtf v0.2d, v0.2d
+; CHECK-SD-NEXT: stp q6, q3, [x8]
+; CHECK-SD-NEXT: ushll v3.2d, v18.2s, #0
+; CHECK-SD-NEXT: stp q2, q1, [x8, #224]
+; CHECK-SD-NEXT: ucvtf v5.2d, v5.2d
+; CHECK-SD-NEXT: ucvtf v1.2d, v7.2d
+; CHECK-SD-NEXT: stp q0, q16, [x8, #192]
+; CHECK-SD-NEXT: ucvtf v2.2d, v3.2d
+; CHECK-SD-NEXT: ucvtf v0.2d, v4.2d
+; CHECK-SD-NEXT: stp q1, q5, [x8, #96]
+; CHECK-SD-NEXT: stp q0, q2, [x8, #64]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: utofp_v32i8_v32f64:
diff --git a/llvm/test/CodeGen/AArch64/neon-bitcast.ll b/llvm/test/CodeGen/AArch64/neon-bitcast.ll
index 07772b716ec58..c039da26b7c15 100644
--- a/llvm/test/CodeGen/AArch64/neon-bitcast.ll
+++ b/llvm/test/CodeGen/AArch64/neon-bitcast.ll
@@ -554,11 +554,9 @@ define <4 x i8> @bitcast_i32_to_v4i8(i32 %word) {
define <2 x i8> @bitcast_i16_to_v2i8(i16 %word) {
; CHECK-LE-LABEL: bitcast_i16_to_v2i8:
; CHECK-LE: // %bb.0:
-; CHECK-LE-NEXT: fmov s0, w0
-; CHECK-LE-NEXT: umov w8, v0.b[0]
-; CHECK-LE-NEXT: umov w9, v0.b[1]
-; CHECK-LE-NEXT: fmov s0, w8
-; CHECK-LE-NEXT: mov v0.s[1], w9
+; CHECK-LE-NEXT: fmov s1, w0
+; CHECK-LE-NEXT: mov v0.b[0], v1.b[0]
+; CHECK-LE-NEXT: mov v0.b[4], v1.b[1]
; CHECK-LE-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-LE-NEXT: ret
;
@@ -566,11 +564,9 @@ define <2 x i8> @bitcast_i16_to_v2i8(i16 %word) {
; CHECK-BE: // %bb.0:
; CHECK-BE-NEXT: fmov s0, w0
; CHECK-BE-NEXT: rev16 v0.16b, v0.16b
-; CHECK-BE-NEXT: umov w8, v0.b[0]
-; CHECK-BE-NEXT: umov w9, v0.b[1]
-; CHECK-BE-NEXT: fmov s0, w8
-; CHECK-BE-NEXT: mov v0.s[1], w9
-; CHECK-BE-NEXT: rev64 v0.2s, v0.2s
+; CHECK-BE-NEXT: mov v1.b[0], v0.b[0]
+; CHECK-BE-NEXT: mov v1.b[4], v0.b[1]
+; CHECK-BE-NEXT: rev64 v0.2s, v1.2s
; CHECK-BE-NEXT: ret
%ret = bitcast i16 %word to <2 x i8>
ret <2 x i8> %ret
diff --git a/llvm/test/CodeGen/AArch64/shuffle-extend.ll b/llvm/test/CodeGen/AArch64/shuffle-extend.ll
index bb31380cc3ade..7658e5ab6936b 100644
--- a/llvm/test/CodeGen/AArch64/shuffle-extend.ll
+++ b/llvm/test/CodeGen/AArch64/shuffle-extend.ll
@@ -4,10 +4,8 @@
define <2 x i8> @test_v16i8_v2i32_824(<16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: test_v16i8_v2i32_824:
; CHECK: // %bb.0:
-; CHECK-NEXT: umov w8, v0.b[8]
-; CHECK-NEXT: umov w9, v1.b[8]
-; CHECK-NEXT: fmov s0, w8
-; CHECK-NEXT: mov v0.s[1], w9
+; CHECK-NEXT: mov v0.b[0], v0.b[8]
+; CHECK-NEXT: mov v0.b[4], v1.b[8]
; CHECK-NEXT: add v0.2s, v0.2s, v0.2s
; CHECK-NEXT: ret
%c = shufflevector <16 x i8> %a, <16 x i8> %b, <2 x i32> <i32 8, i32 24>
@@ -18,10 +16,8 @@ define <2 x i8> @test_v16i8_v2i32_824(<16 x i8> %a, <16 x i8> %b) {
define <2 x i8> @test_v16i8_v2i32_016(<16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: test_v16i8_v2i32_016:
; CHECK: // %bb.0:
-; CHECK-NEXT: umov w8, v0.b[0]
-; CHECK-NEXT: umov w9, v1.b[0]
-; CHECK-NEXT: fmov s0, w8
-; CHECK-NEXT: mov v0.s[1], w9
+; CHECK-NEXT: mov v0.b[0], v0.b[0]
+; CHECK-NEXT: mov v0.b[4], v1.b[0]
; CHECK-NEXT: add v0.2s, v0.2s, v0.2s
; CHECK-NEXT: ret
%c = shufflevector <16 x i8> %a, <16 x i8> %b, <2 x i32> <i32 0, i32 16>
@@ -33,11 +29,9 @@ define <2 x i8> @test_v8i8_v2i32_08(<8 x i8> %a, <8 x i8> %b) {
; CHECK-LABEL: test_v8i8_v2i32_08:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: umov w8, v0.b[0]
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT: umov w9, v1.b[0]
-; CHECK-NEXT: fmov s0, w8
-; CHECK-NEXT: mov v0.s[1], w9
+; CHECK-NEXT: mov v0.b[0], v0.b[0]
+; CHECK-NEXT: mov v0.b[4], v1.b[0]
; CHECK-NEXT: add v0.2s, v0.2s, v0.2s
; CHECK-NEXT: ret
%c = shufflevector <8 x i8> %a, <8 x i8> %b, <2 x i32> <i32 0, i32 8>
@@ -48,10 +42,8 @@ define <2 x i8> @test_v8i8_v2i32_08(<8 x i8> %a, <8 x i8> %b) {
define <2 x i16> @test_v8i16_v2i32_08(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: test_v8i16_v2i32_08:
; CHECK: // %bb.0:
-; CHECK-NEXT: umov w8, v0.h[0]
-; CHECK-NEXT: umov w9, v1.h[0]
-; CHECK-NEXT: fmov s0, w8
-; CHECK-NEXT: mov v0.s[1], w9
+; CHECK-NEXT: mov v0.h[0], v0.h[0]
+; CHECK-NEXT: mov v0.h[2], v1.h[0]
; CHECK-NEXT: add v0.2s, v0.2s, v0.2s
; CHECK-NEXT: ret
%c = shufflevector <8 x i16> %a, <8 x i16> %b, <2 x i32> <i32 0, i32 8>
@@ -63,11 +55,9 @@ define <2 x i16> @test_v4i16_v2i32_04(<4 x i16> %a, <4 x i16> %b) {
; CHECK-LABEL: test_v4i16_v2i32_04:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: umov w8, v0.h[0]
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT: umov w9, v1.h[0]
-; CHECK-NEXT: fmov s0, w8
-; CHECK-NEXT: mov v0.s[1], w9
+; CHECK-NEXT: mov v0.h[0], v0.h[0]
+; CHECK-NEXT: mov v0.h[2], v1.h[0]
; CHECK-NEXT: add v0.2s, v0.2s, v0.2s
; CHECK-NEXT: ret
%c = shufflevector <4 x i16> %a, <4 x i16> %b, <2 x i32> <i32 0, i32 4>
@@ -79,14 +69,10 @@ define <2 x i16> @test_v4i16_v2i32_04(<4 x i16> %a, <4 x i16> %b) {
define <4 x i8> @test_v16i8_v4i16_824(<16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: test_v16i8_v4i16_824:
; CHECK: // %bb.0:
-; CHECK-NEXT: umov w8, v0.b[8]
-; CHECK-NEXT: umov w9, v1.b[8]
-; CHECK-NEXT: fmov s2, w8
-; CHECK-NEXT: umov w8, v0.b[0]
-; CHECK-NEXT: mov v2.h[1], w9
-; CHECK-NEXT: mov v2.h[2], w8
-; CHECK-NEXT: umov w8, v1.b[0]
-; CHECK-NEXT: mov v2.h[3], w8
+; CHECK-NEXT: mov v2.b[0], v0.b[8]
+; CHECK-NEXT: mov v2.b[2], v1.b[8]
+; CHECK-NEXT: mov v2.b[4], v0.b[0]
+; CHECK-NEXT: mov v2.b[6], v1.b[0]
; CHECK-NEXT: add v0.4h, v2.4h, v2.4h
; CHECK-NEXT: ret
%c = shufflevector <16 x i8> %a, <16 x i8> %b, <4 x i32> <i32 8, i32 24, i32 0, i32 16>
@@ -97,14 +83,10 @@ define <4 x i8> @test_v16i8_v4i16_824(<16 x i8> %a, <16 x i8> %b) {
define <4 x i8> @test_v16i8_v4i16_016(<16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: test_v16i8_v4i16_016:
; CHECK: // %bb.0:
-; CHECK-NEXT: umov w8, v0.b[0]
-; CHECK-NEXT: umov w9, v1.b[0]
-; CHECK-NEXT: fmov s2, w8
-; CHECK-NEXT: umov w8, v0.b[4]
-; CHECK-NEXT: mov v2.h[1], w9
-; CHECK-NEXT: mov v2.h[2], w8
-; CHECK-NEXT: umov w8, v1.b[4]
-; CHECK-NEXT: mov v2.h[3], w8
+; CHECK-NEXT: mov v2.b[0], v0.b[0]
+; CHECK-NEXT: mov v2.b[2], v1.b[0]
+; CHECK-NEXT: mov v2.b[4], v0.b[4]
+; CHECK-NEXT: mov v2.b[6], v1.b[4]
; CHECK-NEXT: add v0.4h, v2.4h, v2.4h
; CHECK-NEXT: ret
%c = shufflevector <16 x i8> %a, <16 x i8> %b, <4 x i32> <i32 0, i32 16, i32 4, i32 20>
@@ -116,15 +98,11 @@ define <4 x i8> @test_v8i8_v4i16_08(<8 x i8> %a, <8 x i8> %b) {
; CHECK-LABEL: test_v8i8_v4i16_08:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: umov w8, v0.b[0]
+; CHECK-NEXT: mov v2.b[0], v0.b[0]
; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT: umov w9, v1.b[0]
-; CHECK-NEXT: fmov s2, w8
-; CHECK-NEXT: umov w8, v0.b[4]
-; CHECK-NEXT: mov v2.h[1], w9
-; CHECK-NEXT: mov v2.h[2], w8
-; CHECK-NEXT: umov w8, v1.b[4]
-; CHECK-NEXT: mov v2.h[3], w8
+; CHECK-NEXT: mov v2.b[2], v1.b[0]
+; CHECK-NEXT: mov v2.b[4], v0.b[4]
+; CHECK-NEXT: mov v2.b[6], v1.b[4]
; CHECK-NEXT: add v0.4h, v2.4h, v2.4h
; CHECK-NEXT: ret
%c = shufflevector <8 x i8> %a, <8 x i8> %b, <4 x i32> <i32 0, i32 8, i32 4, i32 12>
@@ -215,23 +193,19 @@ define i1 @test2(ptr %add.ptr, ptr %result, <2 x i64> %hi, <2 x i64> %lo) {
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q2, [x0]
; CHECK-NEXT: movi v3.16b, #1
+; CHECK-NEXT: mov w9, #1 // =0x1
; CHECK-NEXT: cmgt v0.2d, v2.2d, v0.2d
; CHECK-NEXT: cmgt v4.2d, v1.2d, v2.2d
; CHECK-NEXT: sub v1.2d, v2.2d, v1.2d
+; CHECK-NEXT: dup v2.2d, x9
; CHECK-NEXT: and v0.16b, v0.16b, v3.16b
; CHECK-NEXT: and v3.16b, v4.16b, v3.16b
-; CHECK-NEXT: umov w8, v0.b[8]
-; CHECK-NEXT: umov w9, v3.b[8]
-; CHECK-NEXT: umov w10, v0.b[0]
-; CHECK-NEXT: fmov s0, w8
-; CHECK-NEXT: umov w8, v3.b[0]
-; CHECK-NEXT: fmov s3, w10
-; CHECK-NEXT: mov v0.s[1], w9
-; CHECK-NEXT: mov w9, #1 // =0x1
-; CHECK-NEXT: mov v3.s[1], w8
-; CHECK-NEXT: dup v2.2d, x9
-; CHECK-NEXT: add v0.2s, v0.2s, v0.2s
-; CHECK-NEXT: orr v0.8b, v0.8b, v3.8b
+; CHECK-NEXT: mov v5.b[0], v0.b[8]
+; CHECK-NEXT: mov v0.b[0], v0.b[0]
+; CHECK-NEXT: mov v5.b[4], v3.b[8]
+; CHECK-NEXT: mov v0.b[4], v3.b[0]
+; CHECK-NEXT: add v3.2s, v5.2s, v5.2s
+; CHECK-NEXT: orr v0.8b, v3.8b, v0.8b
; CHECK-NEXT: mov w8, v0.s[1]
; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: add v0.2d, v1.2d, v2.2d
diff --git a/llvm/test/CodeGen/AArch64/vector-fcvt.ll b/llvm/test/CodeGen/AArch64/vector-fcvt.ll
index a6b43d514594e..d31659c30f21d 100644
--- a/llvm/test/CodeGen/AArch64/vector-fcvt.ll
+++ b/llvm/test/CodeGen/AArch64/vector-fcvt.ll
@@ -243,26 +243,18 @@ define <8 x double> @sitofp_v8i8_double(<8 x i8> %a) {
; CHECK-LABEL: sitofp_v8i8_double:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: umov w8, v0.b[0]
-; CHECK-NEXT: umov w9, v0.b[2]
-; CHECK-NEXT: umov w11, v0.b[4]
-; CHECK-NEXT: umov w12, v0.b[6]
-; CHECK-NEXT: umov w10, v0.b[1]
-; CHECK-NEXT: umov w13, v0.b[3]
-; CHECK-NEXT: umov w14, v0.b[5]
-; CHECK-NEXT: umov w15, v0.b[7]
-; CHECK-NEXT: fmov s0, w8
-; CHECK-NEXT: fmov s1, w9
-; CHECK-NEXT: fmov s2, w11
-; CHECK-NEXT: fmov s3, w12
-; CHECK-NEXT: mov v0.s[1], w10
-; CHECK-NEXT: mov v1.s[1], w13
-; CHECK-NEXT: mov v2.s[1], w14
-; CHECK-NEXT: mov v3.s[1], w15
-; CHECK-NEXT: shl v0.2s, v0.2s, #24
-; CHECK-NEXT: shl v1.2s, v1.2s, #24
-; CHECK-NEXT: shl v2.2s, v2.2s, #24
-; CHECK-NEXT: shl v3.2s, v3.2s, #24
+; CHECK-NEXT: mov v1.b[0], v0.b[0]
+; CHECK-NEXT: mov v2.b[0], v0.b[2]
+; CHECK-NEXT: mov v3.b[0], v0.b[4]
+; CHECK-NEXT: mov v4.b[0], v0.b[6]
+; CHECK-NEXT: mov v1.b[4], v0.b[1]
+; CHECK-NEXT: mov v2.b[4], v0.b[3]
+; CHECK-NEXT: mov v3.b[4], v0.b[5]
+; CHECK-NEXT: mov v4.b[4], v0.b[7]
+; CHECK-NEXT: shl v0.2s, v1.2s, #24
+; CHECK-NEXT: shl v1.2s, v2.2s, #24
+; CHECK-NEXT: shl v2.2s, v3.2s, #24
+; CHECK-NEXT: shl v3.2s, v4.2s, #24
; CHECK-NEXT: sshr v0.2s, v0.2s, #24
; CHECK-NEXT: sshr v1.2s, v1.2s, #24
; CHECK-NEXT: sshr v2.2s, v2.2s, #24
@@ -283,68 +275,52 @@ define <8 x double> @sitofp_v8i8_double(<8 x i8> %a) {
define <16 x double> @sitofp_v16i8_double(<16 x i8> %a) {
; CHECK-LABEL: sitofp_v16i8_double:
; CHECK: // %bb.0:
-; CHECK-NEXT: umov w8, v0.b[0]
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: umov w9, v0.b[1]
-; CHECK-NEXT: umov w10, v0.b[2]
-; CHECK-NEXT: umov w12, v0.b[4]
-; CHECK-NEXT: umov w14, v0.b[6]
-; CHECK-NEXT: umov w11, v0.b[3]
-; CHECK-NEXT: umov w13, v0.b[5]
-; CHECK-NEXT: fmov s2, w8
-; CHECK-NEXT: umov w15, v1.b[0]
-; CHECK-NEXT: umov w17, v1.b[2]
-; CHECK-NEXT: umov w0, v1.b[4]
-; CHECK-NEXT: umov w16, v1.b[1]
-; CHECK-NEXT: umov w18, v1.b[3]
-; CHECK-NEXT: umov w8, v0.b[7]
-; CHECK-NEXT: fmov s0, w10
-; CHECK-NEXT: umov w10, v1.b[5]
-; CHECK-NEXT: mov v2.s[1], w9
-; CHECK-NEXT: umov w9, v1.b[6]
-; CHECK-NEXT: fmov s3, w12
-; CHECK-NEXT: umov w12, v1.b[7]
-; CHECK-NEXT: fmov s1, w14
-; CHECK-NEXT: fmov s4, w15
-; CHECK-NEXT: fmov s5, w17
-; CHECK-NEXT: fmov s6, w0
-; CHECK-NEXT: mov v0.s[1], w11
-; CHECK-NEXT: mov v3.s[1], w13
-; CHECK-NEXT: fmov s7, w9
-; CHECK-NEXT: mov v1.s[1], w8
-; CHECK-NEXT: mov v4.s[1], w16
-; CHECK-NEXT: mov v5.s[1], w18
-; CHECK-NEXT: mov v6.s[1], w10
-; CHECK-NEXT: shl v2.2s, v2.2s, #24
-; CHECK-NEXT: shl v0.2s, v0.2s, #24
-; CHECK-NEXT: mov v7.s[1], w12
-; CHECK-NEXT: shl v3.2s, v3.2s, #24
-; CHECK-NEXT: shl v1.2s, v1.2s, #24
-; CHECK-NEXT: shl v4.2s, v4.2s, #24
-; CHECK-NEXT: sshr v2.2s, v2.2s, #24
-; CHECK-NEXT: shl v5.2s, v5.2s, #24
-; CHECK-NEXT: shl v6.2s, v6.2s, #24
+; CHECK-NEXT: mov v2.b[0], v0.b[0]
+; CHECK-NEXT: mov v3.b[0], v0.b[2]
+; CHECK-NEXT: mov v4.b[0], v0.b[4]
+; CHECK-NEXT: mov v5.b[0], v0.b[6]
+; CHECK-NEXT: mov v6.b[0], v1.b[0]
+; CHECK-NEXT: mov v7.b[0], v1.b[2]
+; CHECK-NEXT: mov v16.b[0], v1.b[4]
+; CHECK-NEXT: mov v17.b[0], v1.b[6]
+; CHECK-NEXT: mov v2.b[4], v0.b[1]
+; CHECK-NEXT: mov v3.b[4], v0.b[3]
+; CHECK-NEXT: mov v4.b[4], v0.b[5]
+; CHECK-NEXT: mov v5.b[4], v0.b[7]
+; CHECK-NEXT: mov v6.b[4], v1.b[1]
+; CHECK-NEXT: mov v7.b[4], v1.b[3]
+; CHECK-NEXT: mov v16.b[4], v1.b[5]
+; CHECK-NEXT: mov v17.b[4], v1.b[7]
+; CHECK-NEXT: shl v0.2s, v2.2s, #24
+; CHECK-NEXT: shl v1.2s, v3.2s, #24
+; CHECK-NEXT: shl v2.2s, v4.2s, #24
+; CHECK-NEXT: shl v3.2s, v5.2s, #24
+; CHECK-NEXT: shl v4.2s, v6.2s, #24
+; CHECK-NEXT: shl v5.2s, v7.2s, #24
+; CHECK-NEXT: shl v6.2s, v16.2s, #24
+; CHECK-NEXT: shl v7.2s, v17.2s, #24
; CHECK-NEXT: sshr v0.2s, v0.2s, #24
+; CHECK-NEXT: sshr v1.2s, v1.2s, #24
+; CHECK-NEXT: sshr v2.2s, v2.2s, #24
; CHECK-NEXT: sshr v3.2s, v3.2s, #24
-; CHECK-NEXT: shl v7.2s, v7.2s, #24
; CHECK-NEXT: sshr v4.2s, v4.2s, #24
-; CHECK-NEXT: sshr v1.2s, v1.2s, #24
; CHECK-NEXT: sshr v5.2s, v5.2s, #24
; CHECK-NEXT: sshr v6.2s, v6.2s, #24
+; CHECK-NEXT: sshr v7.2s, v7.2s, #24
+; CHECK-NEXT: sshll v0.2d, v0.2s, #0
+; CHECK-NEXT: sshll v1.2d, v1.2s, #0
; CHECK-NEXT: sshll v2.2d, v2.2s, #0
-; CHECK-NEXT: sshll v16.2d, v0.2s, #0
; CHECK-NEXT: sshll v3.2d, v3.2s, #0
-; CHECK-NEXT: sshr v7.2s, v7.2s, #24
; CHECK-NEXT: sshll v4.2d, v4.2s, #0
-; CHECK-NEXT: sshll v17.2d, v1.2s, #0
; CHECK-NEXT: sshll v5.2d, v5.2s, #0
; CHECK-NEXT: sshll v6.2d, v6.2s, #0
-; CHECK-NEXT: scvtf v0.2d, v2.2d
-; CHECK-NEXT: scvtf v1.2d, v16.2d
-; CHECK-NEXT: scvtf v2.2d, v3.2d
; CHECK-NEXT: sshll v7.2d, v7.2s, #0
+; CHECK-NEXT: scvtf v0.2d, v0.2d
+; CHECK-NEXT: scvtf v1.2d, v1.2d
+; CHECK-NEXT: scvtf v2.2d, v2.2d
+; CHECK-NEXT: scvtf v3.2d, v3.2d
; CHECK-NEXT: scvtf v4.2d, v4.2d
-; CHECK-NEXT: scvtf v3.2d, v17.2d
; CHECK-NEXT: scvtf v5.2d, v5.2d
; CHECK-NEXT: scvtf v6.2d, v6.2d
; CHECK-NEXT: scvtf v7.2d, v7.2d
@@ -420,27 +396,19 @@ define <8 x double> @uitofp_v8i8_double(<8 x i8> %a) {
; CHECK-LABEL: uitofp_v8i8_double:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: umov w8, v0.b[0]
-; CHECK-NEXT: umov w9, v0.b[2]
-; CHECK-NEXT: umov w11, v0.b[4]
-; CHECK-NEXT: umov w12, v0.b[6]
-; CHECK-NEXT: umov w10, v0.b[1]
-; CHECK-NEXT: umov w13, v0.b[3]
-; CHECK-NEXT: umov w14, v0.b[5]
-; CHECK-NEXT: umov w15, v0.b[7]
+; CHECK-NEXT: mov v2.b[0], v0.b[0]
+; CHECK-NEXT: mov v3.b[0], v0.b[2]
+; CHECK-NEXT: mov v4.b[0], v0.b[4]
+; CHECK-NEXT: mov v5.b[0], v0.b[6]
; CHECK-NEXT: movi d1, #0x0000ff000000ff
-; CHECK-NEXT: fmov s0, w8
-; CHECK-NEXT: fmov s2, w9
-; CHECK-NEXT: fmov s3, w11
-; CHECK-NEXT: fmov s4, w12
-; CHECK-NEXT: mov v0.s[1], w10
-; CHECK-NEXT: mov v2.s[1], w13
-; CHECK-NEXT: mov v3.s[1], w14
-; CHECK-NEXT: mov v4.s[1], w15
-; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: and v2.8b, v2.8b, v1.8b
-; CHECK-NEXT: and v3.8b, v3.8b, v1.8b
-; CHECK-NEXT: and v1.8b, v4.8b, v1.8b
+; CHECK-NEXT: mov v2.b[4], v0.b[1]
+; CHECK-NEXT: mov v3.b[4], v0.b[3]
+; CHECK-NEXT: mov v4.b[4], v0.b[5]
+; CHECK-NEXT: mov v5.b[4], v0.b[7]
+; CHECK-NEXT: and v0.8b, v2.8b, v1.8b
+; CHECK-NEXT: and v2.8b, v3.8b, v1.8b
+; CHECK-NEXT: and v3.8b, v4.8b, v1.8b
+; CHECK-NEXT: and v1.8b, v5.8b, v1.8b
; CHECK-NEXT: ushll v0.2d, v0.2s, #0
; CHECK-NEXT: ushll v2.2d, v2.2s, #0
; CHECK-NEXT: ushll v3.2d, v3.2s, #0
@@ -458,63 +426,47 @@ define <16 x double> @uitofp_v16i8_double(<16 x i8> %a) {
; CHECK-LABEL: uitofp_v16i8_double:
; CHECK: // %bb.0:
; CHECK-NEXT: ext v2.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: umov w8, v0.b[0]
-; CHECK-NEXT: umov w10, v0.b[2]
-; CHECK-NEXT: umov w9, v0.b[1]
-; CHECK-NEXT: umov w12, v0.b[4]
-; CHECK-NEXT: umov w11, v0.b[3]
-; CHECK-NEXT: umov w13, v0.b[5]
-; CHECK-NEXT: umov w18, v0.b[6]
+; CHECK-NEXT: mov v3.b[0], v0.b[0]
+; CHECK-NEXT: mov v4.b[0], v0.b[2]
+; CHECK-NEXT: mov v5.b[0], v0.b[4]
+; CHECK-NEXT: mov v6.b[0], v0.b[6]
; CHECK-NEXT: movi d1, #0x0000ff000000ff
-; CHECK-NEXT: umov w14, v2.b[0]
-; CHECK-NEXT: umov w16, v2.b[2]
-; CHECK-NEXT: umov w0, v2.b[4]
-; CHECK-NEXT: fmov s3, w8
-; CHECK-NEXT: umov w8, v0.b[7]
-; CHECK-NEXT: fmov s0, w10
-; CHECK-NEXT: umov w10, v2.b[6]
-; CHECK-NEXT: umov w15, v2.b[1]
-; CHECK-NEXT: umov w17, v2.b[3]
-; CHECK-NEXT: fmov s4, w12
-; CHECK-NEXT: umov w12, v2.b[5]
-; CHECK-NEXT: fmov s7, w18
-; CHECK-NEXT: mov v3.s[1], w9
-; CHECK-NEXT: umov w9, v2.b[7]
-; CHECK-NEXT: fmov s2, w14
-; CHECK-NEXT: fmov s5, w16
-; CHECK-NEXT: fmov s6, w0
-; CHECK-NEXT: mov v0.s[1], w11
-; CHECK-NEXT: fmov s16, w10
-; CHECK-NEXT: mov v4.s[1], w13
-; CHECK-NEXT: mov v7.s[1], w8
-; CHECK-NEXT: mov v2.s[1], w15
-; CHECK-NEXT: mov v5.s[1], w17
-; CHECK-NEXT: mov v6.s[1], w12
-; CHECK-NEXT: and v3.8b, v3.8b, v1.8b
-; CHECK-NEXT: mov v16.s[1], w9
-; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: and v4.8b, v4.8b, v1.8b
-; CHECK-NEXT: and v7.8b, v7.8b, v1.8b
-; CHECK-NEXT: and v2.8b, v2.8b, v1.8b
-; CHECK-NEXT: ushll v3.2d, v3.2s, #0
-; CHECK-NEXT: and v5.8b, v5.8b, v1.8b
-; CHECK-NEXT: and v6.8b, v6.8b, v1.8b
-; CHECK-NEXT: and v1.8b, v16.8b, v1.8b
-; CHECK-NEXT: ushll v16.2d, v0.2s, #0
-; CHECK-NEXT: ushll v17.2d, v4.2s, #0
+; CHECK-NEXT: mov v7.b[0], v2.b[0]
+; CHECK-NEXT: mov v16.b[0], v2.b[2]
+; CHECK-NEXT: mov v17.b[0], v2.b[4]
+; CHECK-NEXT: mov v18.b[0], v2.b[6]
+; CHECK-NEXT: mov v3.b[4], v0.b[1]
+; CHECK-NEXT: mov v4.b[4], v0.b[3]
+; CHECK-NEXT: mov v5.b[4], v0.b[5]
+; CHECK-NEXT: mov v6.b[4], v0.b[7]
+; CHECK-NEXT: mov v7.b[4], v2.b[1]
+; CHECK-NEXT: mov v16.b[4], v2.b[3]
+; CHECK-NEXT: mov v17.b[4], v2.b[5]
+; CHECK-NEXT: mov v18.b[4], v2.b[7]
+; CHECK-NEXT: and v0.8b, v3.8b, v1.8b
+; CHECK-NEXT: and v2.8b, v4.8b, v1.8b
+; CHECK-NEXT: and v3.8b, v5.8b, v1.8b
+; CHECK-NEXT: and v4.8b, v6.8b, v1.8b
+; CHECK-NEXT: and v5.8b, v7.8b, v1.8b
+; CHECK-NEXT: and v6.8b, v16.8b, v1.8b
+; CHECK-NEXT: and v7.8b, v17.8b, v1.8b
+; CHECK-NEXT: and v1.8b, v18.8b, v1.8b
+; CHECK-NEXT: ushll v0.2d, v0.2s, #0
; CHECK-NEXT: ushll v2.2d, v2.2s, #0
-; CHECK-NEXT: ushll v7.2d, v7.2s, #0
-; CHECK-NEXT: ucvtf v0.2d, v3.2d
+; CHECK-NEXT: ushll v3.2d, v3.2s, #0
+; CHECK-NEXT: ushll v4.2d, v4.2s, #0
; CHECK-NEXT: ushll v5.2d, v5.2s, #0
; CHECK-NEXT: ushll v6.2d, v6.2s, #0
-; CHECK-NEXT: ushll v18.2d, v1.2s, #0
-; CHECK-NEXT: ucvtf v1.2d, v16.2d
-; CHECK-NEXT: ucvtf v4.2d, v2.2d
-; CHECK-NEXT: ucvtf v2.2d, v17.2d
-; CHECK-NEXT: ucvtf v3.2d, v7.2d
-; CHECK-NEXT: ucvtf v5.2d, v5.2d
-; CHECK-NEXT: ucvtf v6.2d, v6.2d
-; CHECK-NEXT: ucvtf v7.2d, v18.2d
+; CHECK-NEXT: ushll v7.2d, v7.2s, #0
+; CHECK-NEXT: ushll v16.2d, v1.2s, #0
+; CHECK-NEXT: ucvtf v0.2d, v0.2d
+; CHECK-NEXT: ucvtf v1.2d, v2.2d
+; CHECK-NEXT: ucvtf v2.2d, v3.2d
+; CHECK-NEXT: ucvtf v3.2d, v4.2d
+; CHECK-NEXT: ucvtf v4.2d, v5.2d
+; CHECK-NEXT: ucvtf v5.2d, v6.2d
+; CHECK-NEXT: ucvtf v6.2d, v7.2d
+; CHECK-NEXT: ucvtf v7.2d, v16.2d
; CHECK-NEXT: ret
%1 = uitofp <16 x i8> %a to <16 x double>
ret <16 x double> %1
More information about the llvm-commits
mailing list