[llvm] 5686f06 - [AArch64][GlobalISel] Select USHLL2 Instruction
Tuan Chuong Goh via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 23 05:55:37 PDT 2023
Author: Tuan Chuong Goh
Date: 2023-08-23T13:41:47+01:00
New Revision: 5686f06d7fc02b7e2ab1eceb56f3830b6fdf7301
URL: https://github.com/llvm/llvm-project/commit/5686f06d7fc02b7e2ab1eceb56f3830b6fdf7301
DIFF: https://github.com/llvm/llvm-project/commit/5686f06d7fc02b7e2ab1eceb56f3830b6fdf7301.diff
LOG: [AArch64][GlobalISel] Select USHLL2 Instruction
Select ushll2 instruction instead of using mov and ushll
Differential Revision: https://reviews.llvm.org/D158420
Added:
Modified:
llvm/include/llvm/Target/GlobalISel/Target.td
llvm/lib/Target/AArch64/AArch64InstrFormats.td
llvm/lib/Target/AArch64/AArch64InstrInfo.td
llvm/test/CodeGen/AArch64/aarch64-addv.ll
llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll
llvm/test/CodeGen/AArch64/arm64-vabs.ll
llvm/test/CodeGen/AArch64/sext.ll
llvm/test/CodeGen/AArch64/zext.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/Target/GlobalISel/Target.td b/llvm/include/llvm/Target/GlobalISel/Target.td
index 4cb3fd1bf79c14..dd21efe0774165 100644
--- a/llvm/include/llvm/Target/GlobalISel/Target.td
+++ b/llvm/include/llvm/Target/GlobalISel/Target.td
@@ -24,6 +24,7 @@ def s32 : LLT;
def s64 : LLT;
def v2s32 : LLT;
def v4s16 : LLT;
+def v8s8 : LLT;
// Defines a matcher for complex operands. This is analogous to ComplexPattern
// from SelectionDAG.
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 1ff52f52009678..885b70a50121f3 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -133,6 +133,16 @@ def extract_high_v4i32 :
def extract_high_v2i64 :
ComplexPattern<v1i64, 1, "SelectExtractHigh", [extract_subvector, bitconvert]>;
+def gi_extract_high_v16i8 :
+ GIComplexOperandMatcher<v8s8, "selectExtractHigh">,
+ GIComplexPatternEquiv<extract_high_v16i8>;
+def gi_extract_high_v8i16 :
+ GIComplexOperandMatcher<v4s16, "selectExtractHigh">,
+ GIComplexPatternEquiv<extract_high_v8i16>;
+def gi_extract_high_v4i32 :
+ GIComplexOperandMatcher<v2s32, "selectExtractHigh">,
+ GIComplexPatternEquiv<extract_high_v4i32>;
+
def extract_high_v8f16 :
ComplexPattern<v4f16, 1, "SelectExtractHigh", [extract_subvector, bitconvert]>;
def extract_high_v4f32 :
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index bd9c2f655aed3d..5bdb1d9ffc6d9b 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -7285,23 +7285,23 @@ def : Pat<(v2i64 (sext (v2i32 V64:$Rn))), (SSHLLv2i32_shift V64:$Rn, (i32 0))>
def : Pat<(v2i64 (zext (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>;
def : Pat<(v2i64 (anyext (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>;
// Also match an extend from the upper half of a 128 bit source register.
-def : Pat<(v8i16 (anyext (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))),
+def : Pat<(v8i16 (anyext (v8i8 (extract_high_v16i8 (v16i8 V128:$Rn)) ))),
(USHLLv16i8_shift V128:$Rn, (i32 0))>;
-def : Pat<(v8i16 (zext (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))),
+def : Pat<(v8i16 (zext (v8i8 (extract_high_v16i8 (v16i8 V128:$Rn)) ))),
(USHLLv16i8_shift V128:$Rn, (i32 0))>;
-def : Pat<(v8i16 (sext (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))),
+def : Pat<(v8i16 (sext (v8i8 (extract_high_v16i8 (v16i8 V128:$Rn)) ))),
(SSHLLv16i8_shift V128:$Rn, (i32 0))>;
-def : Pat<(v4i32 (anyext (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))),
+def : Pat<(v4i32 (anyext (v4i16 (extract_high_v8i16 (v8i16 V128:$Rn)) ))),
(USHLLv8i16_shift V128:$Rn, (i32 0))>;
-def : Pat<(v4i32 (zext (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))),
+def : Pat<(v4i32 (zext (v4i16 (extract_high_v8i16 (v8i16 V128:$Rn)) ))),
(USHLLv8i16_shift V128:$Rn, (i32 0))>;
-def : Pat<(v4i32 (sext (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))),
+def : Pat<(v4i32 (sext (v4i16 (extract_high_v8i16 (v8i16 V128:$Rn)) ))),
(SSHLLv8i16_shift V128:$Rn, (i32 0))>;
-def : Pat<(v2i64 (anyext (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))),
+def : Pat<(v2i64 (anyext (v2i32 (extract_high_v4i32 (v4i32 V128:$Rn)) ))),
(USHLLv4i32_shift V128:$Rn, (i32 0))>;
-def : Pat<(v2i64 (zext (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))),
+def : Pat<(v2i64 (zext (v2i32 (extract_high_v4i32 (v4i32 V128:$Rn)) ))),
(USHLLv4i32_shift V128:$Rn, (i32 0))>;
-def : Pat<(v2i64 (sext (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))),
+def : Pat<(v2i64 (sext (v2i32 (extract_high_v4i32 (v4i32 V128:$Rn)) ))),
(SSHLLv4i32_shift V128:$Rn, (i32 0))>;
// Vector shift sxtl aliases
diff --git a/llvm/test/CodeGen/AArch64/aarch64-addv.ll b/llvm/test/CodeGen/AArch64/aarch64-addv.ll
index e3bf87c7bb79c3..6cab309d7c094c 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-addv.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-addv.ll
@@ -85,21 +85,19 @@ define i32 @oversized_ADDV_256(ptr noalias nocapture readonly %arg1, ptr noalias
; GISEL-NEXT: movi v0.2d, #0000000000000000
; GISEL-NEXT: ushll v1.8h, v1.8b, #0
; GISEL-NEXT: ushll v2.8h, v2.8b, #0
-; GISEL-NEXT: mov d3, v1.d[1]
-; GISEL-NEXT: mov d4, v2.d[1]
-; GISEL-NEXT: usubl v1.4s, v1.4h, v2.4h
-; GISEL-NEXT: usubl v2.4s, v3.4h, v4.4h
-; GISEL-NEXT: cmgt v3.4s, v0.4s, v1.4s
-; GISEL-NEXT: neg v4.4s, v1.4s
-; GISEL-NEXT: shl v3.4s, v3.4s, #31
-; GISEL-NEXT: cmgt v0.4s, v0.4s, v2.4s
-; GISEL-NEXT: neg v5.4s, v2.4s
-; GISEL-NEXT: sshr v3.4s, v3.4s, #31
+; GISEL-NEXT: usubl v3.4s, v1.4h, v2.4h
+; GISEL-NEXT: usubl2 v1.4s, v1.8h, v2.8h
+; GISEL-NEXT: cmgt v2.4s, v0.4s, v3.4s
+; GISEL-NEXT: cmgt v0.4s, v0.4s, v1.4s
+; GISEL-NEXT: neg v4.4s, v3.4s
+; GISEL-NEXT: neg v5.4s, v1.4s
+; GISEL-NEXT: shl v2.4s, v2.4s, #31
; GISEL-NEXT: shl v0.4s, v0.4s, #31
-; GISEL-NEXT: bit v1.16b, v4.16b, v3.16b
+; GISEL-NEXT: sshr v2.4s, v2.4s, #31
; GISEL-NEXT: sshr v0.4s, v0.4s, #31
-; GISEL-NEXT: bsl v0.16b, v5.16b, v2.16b
-; GISEL-NEXT: add v0.4s, v1.4s, v0.4s
+; GISEL-NEXT: bsl v2.16b, v4.16b, v3.16b
+; GISEL-NEXT: bsl v0.16b, v5.16b, v1.16b
+; GISEL-NEXT: add v0.4s, v2.4s, v0.4s
; GISEL-NEXT: addv s0, v0.4s
; GISEL-NEXT: fmov w0, s0
; GISEL-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll b/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll
index 43d9eb7f368979..37c6202cda4a1f 100644
--- a/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll
@@ -41,9 +41,9 @@ define <16 x i16> @func3(<16 x i8> %v0) nounwind {
;
; CHECK-GI-LABEL: func3:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: mov d1, v0[1]
-; CHECK-GI-NEXT: ushll.8h v0, v0, #0
-; CHECK-GI-NEXT: ushll.8h v1, v1, #0
+; CHECK-GI-NEXT: ushll.8h v2, v0, #0
+; CHECK-GI-NEXT: ushll2.8h v1, v0, #0
+; CHECK-GI-NEXT: mov.16b v0, v2
; CHECK-GI-NEXT: ret
%r = zext <16 x i8> %v0 to <16 x i16>
ret <16 x i16> %r
@@ -58,9 +58,9 @@ define <16 x i16> @func4(<16 x i8> %v0) nounwind {
;
; CHECK-GI-LABEL: func4:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: mov d1, v0[1]
-; CHECK-GI-NEXT: sshll.8h v0, v0, #0
-; CHECK-GI-NEXT: sshll.8h v1, v1, #0
+; CHECK-GI-NEXT: sshll.8h v2, v0, #0
+; CHECK-GI-NEXT: sshll2.8h v1, v0, #0
+; CHECK-GI-NEXT: mov.16b v0, v2
; CHECK-GI-NEXT: ret
%r = sext <16 x i8> %v0 to <16 x i16>
ret <16 x i16> %r
@@ -97,9 +97,9 @@ define <8 x i32> @afunc3(<8 x i16> %v0) nounwind {
;
; CHECK-GI-LABEL: afunc3:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: mov d1, v0[1]
-; CHECK-GI-NEXT: ushll.4s v0, v0, #0
-; CHECK-GI-NEXT: ushll.4s v1, v1, #0
+; CHECK-GI-NEXT: ushll.4s v2, v0, #0
+; CHECK-GI-NEXT: ushll2.4s v1, v0, #0
+; CHECK-GI-NEXT: mov.16b v0, v2
; CHECK-GI-NEXT: ret
%r = zext <8 x i16> %v0 to <8 x i32>
ret <8 x i32> %r
@@ -114,9 +114,9 @@ define <8 x i32> @afunc4(<8 x i16> %v0) nounwind {
;
; CHECK-GI-LABEL: afunc4:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: mov d1, v0[1]
-; CHECK-GI-NEXT: sshll.4s v0, v0, #0
-; CHECK-GI-NEXT: sshll.4s v1, v1, #0
+; CHECK-GI-NEXT: sshll.4s v2, v0, #0
+; CHECK-GI-NEXT: sshll2.4s v1, v0, #0
+; CHECK-GI-NEXT: mov.16b v0, v2
; CHECK-GI-NEXT: ret
%r = sext <8 x i16> %v0 to <8 x i32>
ret <8 x i32> %r
@@ -132,10 +132,9 @@ define <8 x i32> @bfunc1(<8 x i8> %v0) nounwind {
;
; CHECK-GI-LABEL: bfunc1:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: ushll.8h v0, v0, #0
-; CHECK-GI-NEXT: mov d1, v0[1]
-; CHECK-GI-NEXT: ushll.4s v0, v0, #0
-; CHECK-GI-NEXT: ushll.4s v1, v1, #0
+; CHECK-GI-NEXT: ushll.8h v1, v0, #0
+; CHECK-GI-NEXT: ushll.4s v0, v1, #0
+; CHECK-GI-NEXT: ushll2.4s v1, v1, #0
; CHECK-GI-NEXT: ret
%r = zext <8 x i8> %v0 to <8 x i32>
ret <8 x i32> %r
@@ -151,10 +150,9 @@ define <8 x i32> @bfunc2(<8 x i8> %v0) nounwind {
;
; CHECK-GI-LABEL: bfunc2:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: sshll.8h v0, v0, #0
-; CHECK-GI-NEXT: mov d1, v0[1]
-; CHECK-GI-NEXT: sshll.4s v0, v0, #0
-; CHECK-GI-NEXT: sshll.4s v1, v1, #0
+; CHECK-GI-NEXT: sshll.8h v1, v0, #0
+; CHECK-GI-NEXT: sshll.4s v0, v1, #0
+; CHECK-GI-NEXT: sshll2.4s v1, v1, #0
; CHECK-GI-NEXT: ret
%r = sext <8 x i8> %v0 to <8 x i32>
ret <8 x i32> %r
@@ -173,9 +171,9 @@ define <4 x i64> @zfunc1(<4 x i32> %v0) nounwind {
;
; CHECK-GI-LABEL: zfunc1:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: mov d1, v0[1]
-; CHECK-GI-NEXT: ushll.2d v0, v0, #0
-; CHECK-GI-NEXT: ushll.2d v1, v1, #0
+; CHECK-GI-NEXT: ushll.2d v2, v0, #0
+; CHECK-GI-NEXT: ushll2.2d v1, v0, #0
+; CHECK-GI-NEXT: mov.16b v0, v2
; CHECK-GI-NEXT: ret
%r = zext <4 x i32> %v0 to <4 x i64>
ret <4 x i64> %r
@@ -190,9 +188,9 @@ define <4 x i64> @zfunc2(<4 x i32> %v0) nounwind {
;
; CHECK-GI-LABEL: zfunc2:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: mov d1, v0[1]
-; CHECK-GI-NEXT: sshll.2d v0, v0, #0
-; CHECK-GI-NEXT: sshll.2d v1, v1, #0
+; CHECK-GI-NEXT: sshll.2d v2, v0, #0
+; CHECK-GI-NEXT: sshll2.2d v1, v0, #0
+; CHECK-GI-NEXT: mov.16b v0, v2
; CHECK-GI-NEXT: ret
%r = sext <4 x i32> %v0 to <4 x i64>
ret <4 x i64> %r
@@ -208,10 +206,9 @@ define <4 x i64> @bfunc3(<4 x i16> %v0) nounwind {
;
; CHECK-GI-LABEL: bfunc3:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: ushll.4s v0, v0, #0
-; CHECK-GI-NEXT: mov d1, v0[1]
-; CHECK-GI-NEXT: ushll.2d v0, v0, #0
-; CHECK-GI-NEXT: ushll.2d v1, v1, #0
+; CHECK-GI-NEXT: ushll.4s v1, v0, #0
+; CHECK-GI-NEXT: ushll.2d v0, v1, #0
+; CHECK-GI-NEXT: ushll2.2d v1, v1, #0
; CHECK-GI-NEXT: ret
%r = zext <4 x i16> %v0 to <4 x i64>
ret <4 x i64> %r
@@ -227,10 +224,9 @@ define <4 x i64> @cfunc4(<4 x i16> %v0) nounwind {
;
; CHECK-GI-LABEL: cfunc4:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: sshll.4s v0, v0, #0
-; CHECK-GI-NEXT: mov d1, v0[1]
-; CHECK-GI-NEXT: sshll.2d v0, v0, #0
-; CHECK-GI-NEXT: sshll.2d v1, v1, #0
+; CHECK-GI-NEXT: sshll.4s v1, v0, #0
+; CHECK-GI-NEXT: sshll.2d v0, v1, #0
+; CHECK-GI-NEXT: sshll2.2d v1, v1, #0
; CHECK-GI-NEXT: ret
%r = sext <4 x i16> %v0 to <4 x i64>
ret <4 x i64> %r
@@ -249,12 +245,11 @@ define <4 x i64> @zext_v4i8_to_v4i64(<4 x i8> %v0) nounwind {
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ushll.4s v0, v0, #0
; CHECK-GI-NEXT: adrp x8, .LCPI14_0
-; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI14_0]
-; CHECK-GI-NEXT: mov d1, v0[1]
-; CHECK-GI-NEXT: ushll.2d v0, v0, #0
-; CHECK-GI-NEXT: and.16b v0, v0, v2
-; CHECK-GI-NEXT: ushll.2d v1, v1, #0
-; CHECK-GI-NEXT: and.16b v1, v1, v2
+; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI14_0]
+; CHECK-GI-NEXT: ushll.2d v1, v0, #0
+; CHECK-GI-NEXT: ushll2.2d v2, v0, #0
+; CHECK-GI-NEXT: and.16b v0, v1, v3
+; CHECK-GI-NEXT: and.16b v1, v2, v3
; CHECK-GI-NEXT: ret
%r = zext <4 x i8> %v0 to <4 x i64>
ret <4 x i64> %r
@@ -275,13 +270,12 @@ define <4 x i64> @sext_v4i8_to_v4i64(<4 x i8> %v0) nounwind {
; CHECK-GI-LABEL: sext_v4i8_to_v4i64:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ushll.4s v0, v0, #0
-; CHECK-GI-NEXT: mov d1, v0[1]
-; CHECK-GI-NEXT: ushll.2d v0, v0, #0
-; CHECK-GI-NEXT: shl.2d v0, v0, #56
-; CHECK-GI-NEXT: ushll.2d v1, v1, #0
-; CHECK-GI-NEXT: sshr.2d v0, v0, #56
+; CHECK-GI-NEXT: ushll.2d v1, v0, #0
+; CHECK-GI-NEXT: ushll2.2d v0, v0, #0
; CHECK-GI-NEXT: shl.2d v1, v1, #56
-; CHECK-GI-NEXT: sshr.2d v1, v1, #56
+; CHECK-GI-NEXT: shl.2d v2, v0, #56
+; CHECK-GI-NEXT: sshr.2d v0, v1, #56
+; CHECK-GI-NEXT: sshr.2d v1, v2, #56
; CHECK-GI-NEXT: ret
%r = sext <4 x i8> %v0 to <4 x i64>
ret <4 x i64> %r
@@ -302,15 +296,12 @@ define <8 x i64> @zext_v8i8_to_v8i64(<8 x i8> %v0) nounwind {
; CHECK-GI-LABEL: zext_v8i8_to_v8i64:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ushll.8h v0, v0, #0
-; CHECK-GI-NEXT: mov d1, v0[1]
-; CHECK-GI-NEXT: ushll.4s v0, v0, #0
-; CHECK-GI-NEXT: ushll.4s v2, v1, #0
-; CHECK-GI-NEXT: mov d1, v0[1]
-; CHECK-GI-NEXT: ushll.2d v0, v0, #0
-; CHECK-GI-NEXT: mov d3, v2[1]
-; CHECK-GI-NEXT: ushll.2d v2, v2, #0
-; CHECK-GI-NEXT: ushll.2d v1, v1, #0
-; CHECK-GI-NEXT: ushll.2d v3, v3, #0
+; CHECK-GI-NEXT: ushll.4s v1, v0, #0
+; CHECK-GI-NEXT: ushll2.4s v3, v0, #0
+; CHECK-GI-NEXT: ushll.2d v0, v1, #0
+; CHECK-GI-NEXT: ushll2.2d v1, v1, #0
+; CHECK-GI-NEXT: ushll.2d v2, v3, #0
+; CHECK-GI-NEXT: ushll2.2d v3, v3, #0
; CHECK-GI-NEXT: ret
%r = zext <8 x i8> %v0 to <8 x i64>
ret <8 x i64> %r
@@ -331,15 +322,12 @@ define <8 x i64> @sext_v8i8_to_v8i64(<8 x i8> %v0) nounwind {
; CHECK-GI-LABEL: sext_v8i8_to_v8i64:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: sshll.8h v0, v0, #0
-; CHECK-GI-NEXT: mov d1, v0[1]
-; CHECK-GI-NEXT: sshll.4s v0, v0, #0
-; CHECK-GI-NEXT: sshll.4s v2, v1, #0
-; CHECK-GI-NEXT: mov d1, v0[1]
-; CHECK-GI-NEXT: sshll.2d v0, v0, #0
-; CHECK-GI-NEXT: mov d3, v2[1]
-; CHECK-GI-NEXT: sshll.2d v2, v2, #0
-; CHECK-GI-NEXT: sshll.2d v1, v1, #0
-; CHECK-GI-NEXT: sshll.2d v3, v3, #0
+; CHECK-GI-NEXT: sshll.4s v1, v0, #0
+; CHECK-GI-NEXT: sshll2.4s v3, v0, #0
+; CHECK-GI-NEXT: sshll.2d v0, v1, #0
+; CHECK-GI-NEXT: sshll2.2d v1, v1, #0
+; CHECK-GI-NEXT: sshll.2d v2, v3, #0
+; CHECK-GI-NEXT: sshll2.2d v3, v3, #0
; CHECK-GI-NEXT: ret
%r = sext <8 x i8> %v0 to <8 x i64>
ret <8 x i64> %r
diff --git a/llvm/test/CodeGen/AArch64/arm64-vabs.ll b/llvm/test/CodeGen/AArch64/arm64-vabs.ll
index 1cc9040b5a2dc3..18efdc86e16aa8 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vabs.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vabs.ll
@@ -287,44 +287,38 @@ define i32 @uabd16b_rdx_i32(<16 x i8> %a, <16 x i8> %b) {
;
; CHECK-GI-LABEL: uabd16b_rdx_i32:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: mov d3, v0[1]
-; CHECK-GI-NEXT: mov d4, v1[1]
-; CHECK-GI-NEXT: ushll.8h v0, v0, #0
-; CHECK-GI-NEXT: ushll.8h v1, v1, #0
+; CHECK-GI-NEXT: ushll.8h v3, v0, #0
+; CHECK-GI-NEXT: ushll.8h v4, v1, #0
+; CHECK-GI-NEXT: ushll2.8h v0, v0, #0
+; CHECK-GI-NEXT: ushll2.8h v1, v1, #0
; CHECK-GI-NEXT: movi.2d v2, #0000000000000000
-; CHECK-GI-NEXT: mov d5, v0[1]
-; CHECK-GI-NEXT: ushll.8h v3, v3, #0
-; CHECK-GI-NEXT: ushll.8h v4, v4, #0
-; CHECK-GI-NEXT: mov d7, v1[1]
-; CHECK-GI-NEXT: usubl.4s v0, v0, v1
-; CHECK-GI-NEXT: mov d6, v3[1]
-; CHECK-GI-NEXT: mov d16, v4[1]
-; CHECK-GI-NEXT: usubl.4s v3, v3, v4
-; CHECK-GI-NEXT: usubl.4s v1, v5, v7
-; CHECK-GI-NEXT: cmgt.4s v5, v2, v0
-; CHECK-GI-NEXT: usubl.4s v4, v6, v16
-; CHECK-GI-NEXT: cmgt.4s v7, v2, v3
-; CHECK-GI-NEXT: neg.4s v16, v0
-; CHECK-GI-NEXT: cmgt.4s v6, v2, v1
-; CHECK-GI-NEXT: shl.4s v5, v5, #31
-; CHECK-GI-NEXT: neg.4s v17, v1
-; CHECK-GI-NEXT: neg.4s v18, v3
-; CHECK-GI-NEXT: shl.4s v7, v7, #31
-; CHECK-GI-NEXT: cmgt.4s v2, v2, v4
+; CHECK-GI-NEXT: usubl.4s v5, v3, v4
+; CHECK-GI-NEXT: usubl2.4s v3, v3, v4
+; CHECK-GI-NEXT: usubl.4s v4, v0, v1
+; CHECK-GI-NEXT: usubl2.4s v0, v0, v1
+; CHECK-GI-NEXT: cmgt.4s v1, v2, v5
+; CHECK-GI-NEXT: cmgt.4s v6, v2, v3
+; CHECK-GI-NEXT: neg.4s v16, v5
+; CHECK-GI-NEXT: cmgt.4s v7, v2, v4
+; CHECK-GI-NEXT: cmgt.4s v2, v2, v0
+; CHECK-GI-NEXT: neg.4s v17, v3
+; CHECK-GI-NEXT: neg.4s v18, v4
+; CHECK-GI-NEXT: neg.4s v19, v0
+; CHECK-GI-NEXT: shl.4s v1, v1, #31
; CHECK-GI-NEXT: shl.4s v6, v6, #31
-; CHECK-GI-NEXT: neg.4s v19, v4
-; CHECK-GI-NEXT: sshr.4s v5, v5, #31
-; CHECK-GI-NEXT: sshr.4s v7, v7, #31
+; CHECK-GI-NEXT: shl.4s v7, v7, #31
; CHECK-GI-NEXT: shl.4s v2, v2, #31
+; CHECK-GI-NEXT: sshr.4s v1, v1, #31
; CHECK-GI-NEXT: sshr.4s v6, v6, #31
-; CHECK-GI-NEXT: bit.16b v0, v16, v5
-; CHECK-GI-NEXT: bit.16b v3, v18, v7
+; CHECK-GI-NEXT: sshr.4s v7, v7, #31
; CHECK-GI-NEXT: sshr.4s v2, v2, #31
-; CHECK-GI-NEXT: bit.16b v1, v17, v6
-; CHECK-GI-NEXT: bsl.16b v2, v19, v4
-; CHECK-GI-NEXT: add.4s v0, v0, v1
-; CHECK-GI-NEXT: add.4s v1, v3, v2
-; CHECK-GI-NEXT: add.4s v0, v0, v1
+; CHECK-GI-NEXT: bsl.16b v1, v16, v5
+; CHECK-GI-NEXT: bit.16b v3, v17, v6
+; CHECK-GI-NEXT: bit.16b v4, v18, v7
+; CHECK-GI-NEXT: bit.16b v0, v19, v2
+; CHECK-GI-NEXT: add.4s v1, v1, v3
+; CHECK-GI-NEXT: add.4s v0, v4, v0
+; CHECK-GI-NEXT: add.4s v0, v1, v0
; CHECK-GI-NEXT: addv.4s s0, v0
; CHECK-GI-NEXT: fmov w0, s0
; CHECK-GI-NEXT: ret
@@ -349,44 +343,38 @@ define i32 @sabd16b_rdx_i32(<16 x i8> %a, <16 x i8> %b) {
;
; CHECK-GI-LABEL: sabd16b_rdx_i32:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: mov d3, v0[1]
-; CHECK-GI-NEXT: mov d4, v1[1]
-; CHECK-GI-NEXT: sshll.8h v0, v0, #0
-; CHECK-GI-NEXT: sshll.8h v1, v1, #0
+; CHECK-GI-NEXT: sshll.8h v3, v0, #0
+; CHECK-GI-NEXT: sshll.8h v4, v1, #0
+; CHECK-GI-NEXT: sshll2.8h v0, v0, #0
+; CHECK-GI-NEXT: sshll2.8h v1, v1, #0
; CHECK-GI-NEXT: movi.2d v2, #0000000000000000
-; CHECK-GI-NEXT: mov d5, v0[1]
-; CHECK-GI-NEXT: sshll.8h v3, v3, #0
-; CHECK-GI-NEXT: sshll.8h v4, v4, #0
-; CHECK-GI-NEXT: mov d7, v1[1]
-; CHECK-GI-NEXT: ssubl.4s v0, v0, v1
-; CHECK-GI-NEXT: mov d6, v3[1]
-; CHECK-GI-NEXT: mov d16, v4[1]
-; CHECK-GI-NEXT: ssubl.4s v3, v3, v4
-; CHECK-GI-NEXT: ssubl.4s v1, v5, v7
-; CHECK-GI-NEXT: cmgt.4s v5, v2, v0
-; CHECK-GI-NEXT: ssubl.4s v4, v6, v16
-; CHECK-GI-NEXT: cmgt.4s v7, v2, v3
-; CHECK-GI-NEXT: neg.4s v16, v0
-; CHECK-GI-NEXT: cmgt.4s v6, v2, v1
-; CHECK-GI-NEXT: shl.4s v5, v5, #31
-; CHECK-GI-NEXT: neg.4s v17, v1
-; CHECK-GI-NEXT: neg.4s v18, v3
-; CHECK-GI-NEXT: shl.4s v7, v7, #31
-; CHECK-GI-NEXT: cmgt.4s v2, v2, v4
+; CHECK-GI-NEXT: ssubl.4s v5, v3, v4
+; CHECK-GI-NEXT: ssubl2.4s v3, v3, v4
+; CHECK-GI-NEXT: ssubl.4s v4, v0, v1
+; CHECK-GI-NEXT: ssubl2.4s v0, v0, v1
+; CHECK-GI-NEXT: cmgt.4s v1, v2, v5
+; CHECK-GI-NEXT: cmgt.4s v6, v2, v3
+; CHECK-GI-NEXT: neg.4s v16, v5
+; CHECK-GI-NEXT: cmgt.4s v7, v2, v4
+; CHECK-GI-NEXT: cmgt.4s v2, v2, v0
+; CHECK-GI-NEXT: neg.4s v17, v3
+; CHECK-GI-NEXT: neg.4s v18, v4
+; CHECK-GI-NEXT: neg.4s v19, v0
+; CHECK-GI-NEXT: shl.4s v1, v1, #31
; CHECK-GI-NEXT: shl.4s v6, v6, #31
-; CHECK-GI-NEXT: neg.4s v19, v4
-; CHECK-GI-NEXT: sshr.4s v5, v5, #31
-; CHECK-GI-NEXT: sshr.4s v7, v7, #31
+; CHECK-GI-NEXT: shl.4s v7, v7, #31
; CHECK-GI-NEXT: shl.4s v2, v2, #31
+; CHECK-GI-NEXT: sshr.4s v1, v1, #31
; CHECK-GI-NEXT: sshr.4s v6, v6, #31
-; CHECK-GI-NEXT: bit.16b v0, v16, v5
-; CHECK-GI-NEXT: bit.16b v3, v18, v7
+; CHECK-GI-NEXT: sshr.4s v7, v7, #31
; CHECK-GI-NEXT: sshr.4s v2, v2, #31
-; CHECK-GI-NEXT: bit.16b v1, v17, v6
-; CHECK-GI-NEXT: bsl.16b v2, v19, v4
-; CHECK-GI-NEXT: add.4s v0, v0, v1
-; CHECK-GI-NEXT: add.4s v1, v3, v2
-; CHECK-GI-NEXT: add.4s v0, v0, v1
+; CHECK-GI-NEXT: bsl.16b v1, v16, v5
+; CHECK-GI-NEXT: bit.16b v3, v17, v6
+; CHECK-GI-NEXT: bit.16b v4, v18, v7
+; CHECK-GI-NEXT: bit.16b v0, v19, v2
+; CHECK-GI-NEXT: add.4s v1, v1, v3
+; CHECK-GI-NEXT: add.4s v0, v4, v0
+; CHECK-GI-NEXT: add.4s v0, v1, v0
; CHECK-GI-NEXT: addv.4s s0, v0
; CHECK-GI-NEXT: fmov w0, s0
; CHECK-GI-NEXT: ret
@@ -419,21 +407,19 @@ define i32 @uabd8h_rdx(ptr %a, ptr %b) {
; CHECK-GI-NEXT: ldr q1, [x0]
; CHECK-GI-NEXT: ldr q2, [x1]
; CHECK-GI-NEXT: movi.2d v0, #0000000000000000
-; CHECK-GI-NEXT: mov d3, v1[1]
-; CHECK-GI-NEXT: mov d4, v2[1]
-; CHECK-GI-NEXT: usubl.4s v1, v1, v2
-; CHECK-GI-NEXT: usubl.4s v2, v3, v4
-; CHECK-GI-NEXT: cmgt.4s v3, v0, v1
-; CHECK-GI-NEXT: neg.4s v4, v1
-; CHECK-GI-NEXT: shl.4s v3, v3, #31
-; CHECK-GI-NEXT: cmgt.4s v0, v0, v2
-; CHECK-GI-NEXT: neg.4s v5, v2
-; CHECK-GI-NEXT: sshr.4s v3, v3, #31
+; CHECK-GI-NEXT: usubl.4s v3, v1, v2
+; CHECK-GI-NEXT: usubl2.4s v1, v1, v2
+; CHECK-GI-NEXT: cmgt.4s v2, v0, v3
+; CHECK-GI-NEXT: cmgt.4s v0, v0, v1
+; CHECK-GI-NEXT: neg.4s v4, v3
+; CHECK-GI-NEXT: neg.4s v5, v1
+; CHECK-GI-NEXT: shl.4s v2, v2, #31
; CHECK-GI-NEXT: shl.4s v0, v0, #31
-; CHECK-GI-NEXT: bit.16b v1, v4, v3
+; CHECK-GI-NEXT: sshr.4s v2, v2, #31
; CHECK-GI-NEXT: sshr.4s v0, v0, #31
-; CHECK-GI-NEXT: bsl.16b v0, v5, v2
-; CHECK-GI-NEXT: add.4s v0, v1, v0
+; CHECK-GI-NEXT: bsl.16b v2, v4, v3
+; CHECK-GI-NEXT: bsl.16b v0, v5, v1
+; CHECK-GI-NEXT: add.4s v0, v2, v0
; CHECK-GI-NEXT: addv.4s s0, v0
; CHECK-GI-NEXT: fmov w0, s0
; CHECK-GI-NEXT: ret
@@ -459,22 +445,20 @@ define i32 @sabd8h_rdx(<8 x i16> %a, <8 x i16> %b) {
;
; CHECK-GI-LABEL: sabd8h_rdx:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: mov d3, v0[1]
-; CHECK-GI-NEXT: mov d4, v1[1]
; CHECK-GI-NEXT: movi.2d v2, #0000000000000000
-; CHECK-GI-NEXT: ssubl.4s v0, v0, v1
-; CHECK-GI-NEXT: ssubl.4s v1, v3, v4
-; CHECK-GI-NEXT: cmgt.4s v3, v2, v0
-; CHECK-GI-NEXT: neg.4s v4, v0
-; CHECK-GI-NEXT: cmgt.4s v2, v2, v1
-; CHECK-GI-NEXT: shl.4s v3, v3, #31
-; CHECK-GI-NEXT: neg.4s v5, v1
+; CHECK-GI-NEXT: ssubl.4s v3, v0, v1
+; CHECK-GI-NEXT: ssubl2.4s v0, v0, v1
+; CHECK-GI-NEXT: cmgt.4s v1, v2, v3
+; CHECK-GI-NEXT: cmgt.4s v2, v2, v0
+; CHECK-GI-NEXT: neg.4s v4, v3
+; CHECK-GI-NEXT: neg.4s v5, v0
+; CHECK-GI-NEXT: shl.4s v1, v1, #31
; CHECK-GI-NEXT: shl.4s v2, v2, #31
-; CHECK-GI-NEXT: sshr.4s v3, v3, #31
+; CHECK-GI-NEXT: sshr.4s v1, v1, #31
; CHECK-GI-NEXT: sshr.4s v2, v2, #31
-; CHECK-GI-NEXT: bit.16b v0, v4, v3
-; CHECK-GI-NEXT: bit.16b v1, v5, v2
-; CHECK-GI-NEXT: add.4s v0, v0, v1
+; CHECK-GI-NEXT: bsl.16b v1, v4, v3
+; CHECK-GI-NEXT: bit.16b v0, v5, v2
+; CHECK-GI-NEXT: add.4s v0, v1, v0
; CHECK-GI-NEXT: addv.4s s0, v0
; CHECK-GI-NEXT: fmov w0, s0
; CHECK-GI-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/sext.ll b/llvm/test/CodeGen/AArch64/sext.ll
index fd991104e43465..4d26228caf62e9 100644
--- a/llvm/test/CodeGen/AArch64/sext.ll
+++ b/llvm/test/CodeGen/AArch64/sext.ll
@@ -474,13 +474,12 @@ define <4 x i64> @sext_v4i8_v4i64(<4 x i8> %a) {
; CHECK-GI-LABEL: sext_v4i8_v4i64:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT: mov d1, v0.d[1]
-; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0
-; CHECK-GI-NEXT: shl v0.2d, v0.2d, #56
-; CHECK-GI-NEXT: ushll v1.2d, v1.2s, #0
-; CHECK-GI-NEXT: sshr v0.2d, v0.2d, #56
+; CHECK-GI-NEXT: ushll v1.2d, v0.2s, #0
+; CHECK-GI-NEXT: ushll2 v0.2d, v0.4s, #0
; CHECK-GI-NEXT: shl v1.2d, v1.2d, #56
-; CHECK-GI-NEXT: sshr v1.2d, v1.2d, #56
+; CHECK-GI-NEXT: shl v2.2d, v0.2d, #56
+; CHECK-GI-NEXT: sshr v0.2d, v1.2d, #56
+; CHECK-GI-NEXT: sshr v1.2d, v2.2d, #56
; CHECK-GI-NEXT: ret
entry:
%c = sext <4 x i8> %a to <4 x i64>
@@ -507,10 +506,9 @@ define <4 x i64> @sext_v4i16_v4i64(<4 x i16> %a) {
;
; CHECK-GI-LABEL: sext_v4i16_v4i64:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT: mov d1, v0.d[1]
-; CHECK-GI-NEXT: sshll v0.2d, v0.2s, #0
-; CHECK-GI-NEXT: sshll v1.2d, v1.2s, #0
+; CHECK-GI-NEXT: sshll v1.4s, v0.4h, #0
+; CHECK-GI-NEXT: sshll v0.2d, v1.2s, #0
+; CHECK-GI-NEXT: sshll2 v1.2d, v1.4s, #0
; CHECK-GI-NEXT: ret
entry:
%c = sext <4 x i16> %a to <4 x i64>
@@ -526,9 +524,9 @@ define <4 x i64> @sext_v4i32_v4i64(<4 x i32> %a) {
;
; CHECK-GI-LABEL: sext_v4i32_v4i64:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: mov d1, v0.d[1]
-; CHECK-GI-NEXT: sshll v0.2d, v0.2s, #0
-; CHECK-GI-NEXT: sshll v1.2d, v1.2s, #0
+; CHECK-GI-NEXT: sshll v2.2d, v0.2s, #0
+; CHECK-GI-NEXT: sshll2 v1.2d, v0.4s, #0
+; CHECK-GI-NEXT: mov v0.16b, v2.16b
; CHECK-GI-NEXT: ret
entry:
%c = sext <4 x i32> %a to <4 x i64>
@@ -573,13 +571,12 @@ define <4 x i64> @sext_v4i10_v4i64(<4 x i10> %a) {
; CHECK-GI-LABEL: sext_v4i10_v4i64:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT: mov d1, v0.d[1]
-; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0
-; CHECK-GI-NEXT: shl v0.2d, v0.2d, #54
-; CHECK-GI-NEXT: ushll v1.2d, v1.2s, #0
-; CHECK-GI-NEXT: sshr v0.2d, v0.2d, #54
+; CHECK-GI-NEXT: ushll v1.2d, v0.2s, #0
+; CHECK-GI-NEXT: ushll2 v0.2d, v0.4s, #0
; CHECK-GI-NEXT: shl v1.2d, v1.2d, #54
-; CHECK-GI-NEXT: sshr v1.2d, v1.2d, #54
+; CHECK-GI-NEXT: shl v2.2d, v0.2d, #54
+; CHECK-GI-NEXT: sshr v0.2d, v1.2d, #54
+; CHECK-GI-NEXT: sshr v1.2d, v2.2d, #54
; CHECK-GI-NEXT: ret
entry:
%c = sext <4 x i10> %a to <4 x i64>
@@ -606,10 +603,9 @@ define <8 x i32> @sext_v8i8_v8i32(<8 x i8> %a) {
;
; CHECK-GI-LABEL: sext_v8i8_v8i32:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-GI-NEXT: mov d1, v0.d[1]
-; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT: sshll v1.4s, v1.4h, #0
+; CHECK-GI-NEXT: sshll v1.8h, v0.8b, #0
+; CHECK-GI-NEXT: sshll v0.4s, v1.4h, #0
+; CHECK-GI-NEXT: sshll2 v1.4s, v1.8h, #0
; CHECK-GI-NEXT: ret
entry:
%c = sext <8 x i8> %a to <8 x i32>
@@ -631,15 +627,12 @@ define <8 x i64> @sext_v8i8_v8i64(<8 x i8> %a) {
; CHECK-GI-LABEL: sext_v8i8_v8i64:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-GI-NEXT: mov d1, v0.d[1]
-; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT: sshll v2.4s, v1.4h, #0
-; CHECK-GI-NEXT: mov d1, v0.d[1]
-; CHECK-GI-NEXT: sshll v0.2d, v0.2s, #0
-; CHECK-GI-NEXT: mov d3, v2.d[1]
-; CHECK-GI-NEXT: sshll v2.2d, v2.2s, #0
-; CHECK-GI-NEXT: sshll v1.2d, v1.2s, #0
-; CHECK-GI-NEXT: sshll v3.2d, v3.2s, #0
+; CHECK-GI-NEXT: sshll v1.4s, v0.4h, #0
+; CHECK-GI-NEXT: sshll2 v3.4s, v0.8h, #0
+; CHECK-GI-NEXT: sshll v0.2d, v1.2s, #0
+; CHECK-GI-NEXT: sshll2 v1.2d, v1.4s, #0
+; CHECK-GI-NEXT: sshll v2.2d, v3.2s, #0
+; CHECK-GI-NEXT: sshll2 v3.2d, v3.4s, #0
; CHECK-GI-NEXT: ret
entry:
%c = sext <8 x i8> %a to <8 x i64>
@@ -655,9 +648,9 @@ define <8 x i32> @sext_v8i16_v8i32(<8 x i16> %a) {
;
; CHECK-GI-LABEL: sext_v8i16_v8i32:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: mov d1, v0.d[1]
-; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT: sshll v1.4s, v1.4h, #0
+; CHECK-GI-NEXT: sshll v2.4s, v0.4h, #0
+; CHECK-GI-NEXT: sshll2 v1.4s, v0.8h, #0
+; CHECK-GI-NEXT: mov v0.16b, v2.16b
; CHECK-GI-NEXT: ret
entry:
%c = sext <8 x i16> %a to <8 x i32>
@@ -677,15 +670,12 @@ define <8 x i64> @sext_v8i16_v8i64(<8 x i16> %a) {
;
; CHECK-GI-LABEL: sext_v8i16_v8i64:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: mov d1, v0.d[1]
-; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT: sshll v2.4s, v1.4h, #0
-; CHECK-GI-NEXT: mov d1, v0.d[1]
-; CHECK-GI-NEXT: sshll v0.2d, v0.2s, #0
-; CHECK-GI-NEXT: mov d3, v2.d[1]
-; CHECK-GI-NEXT: sshll v2.2d, v2.2s, #0
-; CHECK-GI-NEXT: sshll v1.2d, v1.2s, #0
-; CHECK-GI-NEXT: sshll v3.2d, v3.2s, #0
+; CHECK-GI-NEXT: sshll v1.4s, v0.4h, #0
+; CHECK-GI-NEXT: sshll2 v3.4s, v0.8h, #0
+; CHECK-GI-NEXT: sshll v0.2d, v1.2s, #0
+; CHECK-GI-NEXT: sshll2 v1.2d, v1.4s, #0
+; CHECK-GI-NEXT: sshll v2.2d, v3.2s, #0
+; CHECK-GI-NEXT: sshll2 v3.2d, v3.4s, #0
; CHECK-GI-NEXT: ret
entry:
%c = sext <8 x i16> %a to <8 x i64>
@@ -705,13 +695,12 @@ define <8 x i64> @sext_v8i32_v8i64(<8 x i32> %a) {
;
; CHECK-GI-LABEL: sext_v8i32_v8i64:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: mov d2, v0.d[1]
-; CHECK-GI-NEXT: mov d3, v1.d[1]
-; CHECK-GI-NEXT: sshll v0.2d, v0.2s, #0
-; CHECK-GI-NEXT: sshll v4.2d, v2.2s, #0
+; CHECK-GI-NEXT: sshll v4.2d, v0.2s, #0
+; CHECK-GI-NEXT: sshll2 v5.2d, v0.4s, #0
; CHECK-GI-NEXT: sshll v2.2d, v1.2s, #0
-; CHECK-GI-NEXT: sshll v3.2d, v3.2s, #0
-; CHECK-GI-NEXT: mov v1.16b, v4.16b
+; CHECK-GI-NEXT: sshll2 v3.2d, v1.4s, #0
+; CHECK-GI-NEXT: mov v0.16b, v4.16b
+; CHECK-GI-NEXT: mov v1.16b, v5.16b
; CHECK-GI-NEXT: ret
entry:
%c = sext <8 x i32> %a to <8 x i64>
@@ -742,13 +731,12 @@ define <8 x i32> @sext_v8i10_v8i32(<8 x i10> %a) {
;
; CHECK-GI-LABEL: sext_v8i10_v8i32:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: mov d1, v0.d[1]
-; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT: shl v0.4s, v0.4s, #22
-; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0
-; CHECK-GI-NEXT: sshr v0.4s, v0.4s, #22
+; CHECK-GI-NEXT: ushll v1.4s, v0.4h, #0
+; CHECK-GI-NEXT: ushll2 v0.4s, v0.8h, #0
; CHECK-GI-NEXT: shl v1.4s, v1.4s, #22
-; CHECK-GI-NEXT: sshr v1.4s, v1.4s, #22
+; CHECK-GI-NEXT: shl v2.4s, v0.4s, #22
+; CHECK-GI-NEXT: sshr v0.4s, v1.4s, #22
+; CHECK-GI-NEXT: sshr v1.4s, v2.4s, #22
; CHECK-GI-NEXT: ret
entry:
%c = sext <8 x i10> %a to <8 x i32>
@@ -776,23 +764,20 @@ define <8 x i64> @sext_v8i10_v8i64(<8 x i10> %a) {
;
; CHECK-GI-LABEL: sext_v8i10_v8i64:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: mov d1, v0.d[1]
-; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT: mov d2, v0.d[1]
-; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0
-; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0
-; CHECK-GI-NEXT: shl v0.2d, v0.2d, #54
-; CHECK-GI-NEXT: mov d3, v1.d[1]
-; CHECK-GI-NEXT: ushll v2.2d, v2.2s, #0
-; CHECK-GI-NEXT: ushll v1.2d, v1.2s, #0
-; CHECK-GI-NEXT: sshr v0.2d, v0.2d, #54
+; CHECK-GI-NEXT: ushll v1.4s, v0.4h, #0
+; CHECK-GI-NEXT: ushll2 v0.4s, v0.8h, #0
+; CHECK-GI-NEXT: ushll v2.2d, v1.2s, #0
+; CHECK-GI-NEXT: ushll2 v1.2d, v1.4s, #0
+; CHECK-GI-NEXT: ushll v3.2d, v0.2s, #0
+; CHECK-GI-NEXT: ushll2 v0.2d, v0.4s, #0
; CHECK-GI-NEXT: shl v2.2d, v2.2d, #54
-; CHECK-GI-NEXT: shl v4.2d, v1.2d, #54
-; CHECK-GI-NEXT: ushll v3.2d, v3.2s, #0
-; CHECK-GI-NEXT: sshr v1.2d, v2.2d, #54
-; CHECK-GI-NEXT: sshr v2.2d, v4.2d, #54
+; CHECK-GI-NEXT: shl v1.2d, v1.2d, #54
; CHECK-GI-NEXT: shl v3.2d, v3.2d, #54
-; CHECK-GI-NEXT: sshr v3.2d, v3.2d, #54
+; CHECK-GI-NEXT: shl v4.2d, v0.2d, #54
+; CHECK-GI-NEXT: sshr v0.2d, v2.2d, #54
+; CHECK-GI-NEXT: sshr v1.2d, v1.2d, #54
+; CHECK-GI-NEXT: sshr v2.2d, v3.2d, #54
+; CHECK-GI-NEXT: sshr v3.2d, v4.2d, #54
; CHECK-GI-NEXT: ret
entry:
%c = sext <8 x i10> %a to <8 x i64>
@@ -808,9 +793,9 @@ define <16 x i16> @sext_v16i8_v16i16(<16 x i8> %a) {
;
; CHECK-GI-LABEL: sext_v16i8_v16i16:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: mov d1, v0.d[1]
-; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-GI-NEXT: sshll v1.8h, v1.8b, #0
+; CHECK-GI-NEXT: sshll v2.8h, v0.8b, #0
+; CHECK-GI-NEXT: sshll2 v1.8h, v0.16b, #0
+; CHECK-GI-NEXT: mov v0.16b, v2.16b
; CHECK-GI-NEXT: ret
entry:
%c = sext <16 x i8> %a to <16 x i16>
@@ -830,15 +815,12 @@ define <16 x i32> @sext_v16i8_v16i32(<16 x i8> %a) {
;
; CHECK-GI-LABEL: sext_v16i8_v16i32:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: mov d1, v0.d[1]
-; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-GI-NEXT: sshll v2.8h, v1.8b, #0
-; CHECK-GI-NEXT: mov d1, v0.d[1]
-; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT: mov d3, v2.d[1]
-; CHECK-GI-NEXT: sshll v2.4s, v2.4h, #0
-; CHECK-GI-NEXT: sshll v1.4s, v1.4h, #0
-; CHECK-GI-NEXT: sshll v3.4s, v3.4h, #0
+; CHECK-GI-NEXT: sshll v1.8h, v0.8b, #0
+; CHECK-GI-NEXT: sshll2 v3.8h, v0.16b, #0
+; CHECK-GI-NEXT: sshll v0.4s, v1.4h, #0
+; CHECK-GI-NEXT: sshll2 v1.4s, v1.8h, #0
+; CHECK-GI-NEXT: sshll v2.4s, v3.4h, #0
+; CHECK-GI-NEXT: sshll2 v3.4s, v3.8h, #0
; CHECK-GI-NEXT: ret
entry:
%c = sext <16 x i8> %a to <16 x i32>
@@ -866,27 +848,20 @@ define <16 x i64> @sext_v16i8_v16i64(<16 x i8> %a) {
;
; CHECK-GI-LABEL: sext_v16i8_v16i64:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: mov d1, v0.d[1]
-; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-GI-NEXT: mov d2, v0.d[1]
-; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT: sshll v1.8h, v1.8b, #0
-; CHECK-GI-NEXT: mov d3, v1.d[1]
-; CHECK-GI-NEXT: sshll v2.4s, v2.4h, #0
-; CHECK-GI-NEXT: sshll v4.4s, v1.4h, #0
-; CHECK-GI-NEXT: mov d1, v0.d[1]
-; CHECK-GI-NEXT: sshll v0.2d, v0.2s, #0
-; CHECK-GI-NEXT: mov d5, v4.d[1]
-; CHECK-GI-NEXT: sshll v4.2d, v4.2s, #0
-; CHECK-GI-NEXT: sshll v6.4s, v3.4h, #0
-; CHECK-GI-NEXT: mov d3, v2.d[1]
-; CHECK-GI-NEXT: sshll v2.2d, v2.2s, #0
-; CHECK-GI-NEXT: sshll v1.2d, v1.2s, #0
-; CHECK-GI-NEXT: mov d7, v6.d[1]
-; CHECK-GI-NEXT: sshll v5.2d, v5.2s, #0
-; CHECK-GI-NEXT: sshll v6.2d, v6.2s, #0
-; CHECK-GI-NEXT: sshll v3.2d, v3.2s, #0
-; CHECK-GI-NEXT: sshll v7.2d, v7.2s, #0
+; CHECK-GI-NEXT: sshll v1.8h, v0.8b, #0
+; CHECK-GI-NEXT: sshll2 v0.8h, v0.16b, #0
+; CHECK-GI-NEXT: sshll v2.4s, v1.4h, #0
+; CHECK-GI-NEXT: sshll2 v3.4s, v1.8h, #0
+; CHECK-GI-NEXT: sshll v5.4s, v0.4h, #0
+; CHECK-GI-NEXT: sshll2 v7.4s, v0.8h, #0
+; CHECK-GI-NEXT: sshll v0.2d, v2.2s, #0
+; CHECK-GI-NEXT: sshll2 v1.2d, v2.4s, #0
+; CHECK-GI-NEXT: sshll v2.2d, v3.2s, #0
+; CHECK-GI-NEXT: sshll2 v3.2d, v3.4s, #0
+; CHECK-GI-NEXT: sshll v4.2d, v5.2s, #0
+; CHECK-GI-NEXT: sshll2 v5.2d, v5.4s, #0
+; CHECK-GI-NEXT: sshll v6.2d, v7.2s, #0
+; CHECK-GI-NEXT: sshll2 v7.2d, v7.4s, #0
; CHECK-GI-NEXT: ret
entry:
%c = sext <16 x i8> %a to <16 x i64>
@@ -906,13 +881,12 @@ define <16 x i32> @sext_v16i16_v16i32(<16 x i16> %a) {
;
; CHECK-GI-LABEL: sext_v16i16_v16i32:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: mov d2, v0.d[1]
-; CHECK-GI-NEXT: mov d3, v1.d[1]
-; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT: sshll v4.4s, v2.4h, #0
+; CHECK-GI-NEXT: sshll v4.4s, v0.4h, #0
+; CHECK-GI-NEXT: sshll2 v5.4s, v0.8h, #0
; CHECK-GI-NEXT: sshll v2.4s, v1.4h, #0
-; CHECK-GI-NEXT: sshll v3.4s, v3.4h, #0
-; CHECK-GI-NEXT: mov v1.16b, v4.16b
+; CHECK-GI-NEXT: sshll2 v3.4s, v1.8h, #0
+; CHECK-GI-NEXT: mov v0.16b, v4.16b
+; CHECK-GI-NEXT: mov v1.16b, v5.16b
; CHECK-GI-NEXT: ret
entry:
%c = sext <16 x i16> %a to <16 x i32>
@@ -938,24 +912,18 @@ define <16 x i64> @sext_v16i16_v16i64(<16 x i16> %a) {
;
; CHECK-GI-LABEL: sext_v16i16_v16i64:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: mov d2, v0.d[1]
-; CHECK-GI-NEXT: mov d3, v1.d[1]
-; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT: sshll v1.4s, v1.4h, #0
-; CHECK-GI-NEXT: sshll v2.4s, v2.4h, #0
-; CHECK-GI-NEXT: sshll v6.4s, v3.4h, #0
-; CHECK-GI-NEXT: mov d3, v0.d[1]
-; CHECK-GI-NEXT: mov d7, v1.d[1]
-; CHECK-GI-NEXT: sshll v0.2d, v0.2s, #0
-; CHECK-GI-NEXT: sshll v4.2d, v1.2s, #0
-; CHECK-GI-NEXT: mov d5, v2.d[1]
-; CHECK-GI-NEXT: mov d16, v6.d[1]
-; CHECK-GI-NEXT: sshll v2.2d, v2.2s, #0
-; CHECK-GI-NEXT: sshll v1.2d, v3.2s, #0
-; CHECK-GI-NEXT: sshll v6.2d, v6.2s, #0
-; CHECK-GI-NEXT: sshll v3.2d, v5.2s, #0
-; CHECK-GI-NEXT: sshll v5.2d, v7.2s, #0
-; CHECK-GI-NEXT: sshll v7.2d, v16.2s, #0
+; CHECK-GI-NEXT: sshll v2.4s, v0.4h, #0
+; CHECK-GI-NEXT: sshll2 v3.4s, v0.8h, #0
+; CHECK-GI-NEXT: sshll v5.4s, v1.4h, #0
+; CHECK-GI-NEXT: sshll2 v7.4s, v1.8h, #0
+; CHECK-GI-NEXT: sshll v0.2d, v2.2s, #0
+; CHECK-GI-NEXT: sshll2 v1.2d, v2.4s, #0
+; CHECK-GI-NEXT: sshll v2.2d, v3.2s, #0
+; CHECK-GI-NEXT: sshll2 v3.2d, v3.4s, #0
+; CHECK-GI-NEXT: sshll v4.2d, v5.2s, #0
+; CHECK-GI-NEXT: sshll2 v5.2d, v5.4s, #0
+; CHECK-GI-NEXT: sshll v6.2d, v7.2s, #0
+; CHECK-GI-NEXT: sshll2 v7.2d, v7.4s, #0
; CHECK-GI-NEXT: ret
entry:
%c = sext <16 x i16> %a to <16 x i64>
@@ -980,20 +948,18 @@ define <16 x i64> @sext_v16i32_v16i64(<16 x i32> %a) {
;
; CHECK-GI-LABEL: sext_v16i32_v16i64:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: mov d6, v1.d[1]
-; CHECK-GI-NEXT: mov d5, v0.d[1]
-; CHECK-GI-NEXT: mov d7, v2.d[1]
-; CHECK-GI-NEXT: mov d18, v3.d[1]
-; CHECK-GI-NEXT: sshll v16.2d, v1.2s, #0
-; CHECK-GI-NEXT: sshll v0.2d, v0.2s, #0
+; CHECK-GI-NEXT: sshll v16.2d, v0.2s, #0
+; CHECK-GI-NEXT: sshll2 v17.2d, v0.4s, #0
+; CHECK-GI-NEXT: sshll v18.2d, v1.2s, #0
+; CHECK-GI-NEXT: sshll2 v19.2d, v1.4s, #0
; CHECK-GI-NEXT: sshll v4.2d, v2.2s, #0
-; CHECK-GI-NEXT: sshll v17.2d, v6.2s, #0
-; CHECK-GI-NEXT: sshll v1.2d, v5.2s, #0
+; CHECK-GI-NEXT: sshll2 v5.2d, v2.4s, #0
; CHECK-GI-NEXT: sshll v6.2d, v3.2s, #0
-; CHECK-GI-NEXT: sshll v5.2d, v7.2s, #0
-; CHECK-GI-NEXT: sshll v7.2d, v18.2s, #0
-; CHECK-GI-NEXT: mov v2.16b, v16.16b
-; CHECK-GI-NEXT: mov v3.16b, v17.16b
+; CHECK-GI-NEXT: sshll2 v7.2d, v3.4s, #0
+; CHECK-GI-NEXT: mov v0.16b, v16.16b
+; CHECK-GI-NEXT: mov v1.16b, v17.16b
+; CHECK-GI-NEXT: mov v2.16b, v18.16b
+; CHECK-GI-NEXT: mov v3.16b, v19.16b
; CHECK-GI-NEXT: ret
entry:
%c = sext <16 x i32> %a to <16 x i64>
diff --git a/llvm/test/CodeGen/AArch64/zext.ll b/llvm/test/CodeGen/AArch64/zext.ll
index 6499cc9e7a282e..979631adfe2a77 100644
--- a/llvm/test/CodeGen/AArch64/zext.ll
+++ b/llvm/test/CodeGen/AArch64/zext.ll
@@ -597,12 +597,11 @@ define <4 x i64> @zext_v4i8_v4i64(<4 x i8> %a) {
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-GI-NEXT: adrp x8, .LCPI30_0
-; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI30_0]
-; CHECK-GI-NEXT: mov d1, v0.d[1]
-; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0
-; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
-; CHECK-GI-NEXT: ushll v1.2d, v1.2s, #0
-; CHECK-GI-NEXT: and v1.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI30_0]
+; CHECK-GI-NEXT: ushll v1.2d, v0.2s, #0
+; CHECK-GI-NEXT: ushll2 v2.2d, v0.4s, #0
+; CHECK-GI-NEXT: and v0.16b, v1.16b, v3.16b
+; CHECK-GI-NEXT: and v1.16b, v2.16b, v3.16b
; CHECK-GI-NEXT: ret
entry:
%c = zext <4 x i8> %a to <4 x i64>
@@ -629,10 +628,9 @@ define <4 x i64> @zext_v4i16_v4i64(<4 x i16> %a) {
;
; CHECK-GI-LABEL: zext_v4i16_v4i64:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT: mov d1, v0.d[1]
-; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0
-; CHECK-GI-NEXT: ushll v1.2d, v1.2s, #0
+; CHECK-GI-NEXT: ushll v1.4s, v0.4h, #0
+; CHECK-GI-NEXT: ushll v0.2d, v1.2s, #0
+; CHECK-GI-NEXT: ushll2 v1.2d, v1.4s, #0
; CHECK-GI-NEXT: ret
entry:
%c = zext <4 x i16> %a to <4 x i64>
@@ -648,9 +646,9 @@ define <4 x i64> @zext_v4i32_v4i64(<4 x i32> %a) {
;
; CHECK-GI-LABEL: zext_v4i32_v4i64:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: mov d1, v0.d[1]
-; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0
-; CHECK-GI-NEXT: ushll v1.2d, v1.2s, #0
+; CHECK-GI-NEXT: ushll v2.2d, v0.2s, #0
+; CHECK-GI-NEXT: ushll2 v1.2d, v0.4s, #0
+; CHECK-GI-NEXT: mov v0.16b, v2.16b
; CHECK-GI-NEXT: ret
entry:
%c = zext <4 x i32> %a to <4 x i64>
@@ -706,12 +704,11 @@ define <4 x i64> @zext_v4i10_v4i64(<4 x i10> %a) {
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-GI-NEXT: adrp x8, .LCPI36_0
-; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI36_0]
-; CHECK-GI-NEXT: mov d1, v0.d[1]
-; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0
-; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
-; CHECK-GI-NEXT: ushll v1.2d, v1.2s, #0
-; CHECK-GI-NEXT: and v1.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI36_0]
+; CHECK-GI-NEXT: ushll v1.2d, v0.2s, #0
+; CHECK-GI-NEXT: ushll2 v2.2d, v0.4s, #0
+; CHECK-GI-NEXT: and v0.16b, v1.16b, v3.16b
+; CHECK-GI-NEXT: and v1.16b, v2.16b, v3.16b
; CHECK-GI-NEXT: ret
entry:
%c = zext <4 x i10> %a to <4 x i64>
@@ -738,10 +735,9 @@ define <8 x i32> @zext_v8i8_v8i32(<8 x i8> %a) {
;
; CHECK-GI-LABEL: zext_v8i8_v8i32:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-GI-NEXT: mov d1, v0.d[1]
-; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0
+; CHECK-GI-NEXT: ushll v1.8h, v0.8b, #0
+; CHECK-GI-NEXT: ushll v0.4s, v1.4h, #0
+; CHECK-GI-NEXT: ushll2 v1.4s, v1.8h, #0
; CHECK-GI-NEXT: ret
entry:
%c = zext <8 x i8> %a to <8 x i32>
@@ -763,15 +759,12 @@ define <8 x i64> @zext_v8i8_v8i64(<8 x i8> %a) {
; CHECK-GI-LABEL: zext_v8i8_v8i64:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-GI-NEXT: mov d1, v0.d[1]
-; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT: ushll v2.4s, v1.4h, #0
-; CHECK-GI-NEXT: mov d1, v0.d[1]
-; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0
-; CHECK-GI-NEXT: mov d3, v2.d[1]
-; CHECK-GI-NEXT: ushll v2.2d, v2.2s, #0
-; CHECK-GI-NEXT: ushll v1.2d, v1.2s, #0
-; CHECK-GI-NEXT: ushll v3.2d, v3.2s, #0
+; CHECK-GI-NEXT: ushll v1.4s, v0.4h, #0
+; CHECK-GI-NEXT: ushll2 v3.4s, v0.8h, #0
+; CHECK-GI-NEXT: ushll v0.2d, v1.2s, #0
+; CHECK-GI-NEXT: ushll2 v1.2d, v1.4s, #0
+; CHECK-GI-NEXT: ushll v2.2d, v3.2s, #0
+; CHECK-GI-NEXT: ushll2 v3.2d, v3.4s, #0
; CHECK-GI-NEXT: ret
entry:
%c = zext <8 x i8> %a to <8 x i64>
@@ -787,9 +780,9 @@ define <8 x i32> @zext_v8i16_v8i32(<8 x i16> %a) {
;
; CHECK-GI-LABEL: zext_v8i16_v8i32:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: mov d1, v0.d[1]
-; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0
+; CHECK-GI-NEXT: ushll v2.4s, v0.4h, #0
+; CHECK-GI-NEXT: ushll2 v1.4s, v0.8h, #0
+; CHECK-GI-NEXT: mov v0.16b, v2.16b
; CHECK-GI-NEXT: ret
entry:
%c = zext <8 x i16> %a to <8 x i32>
@@ -809,15 +802,12 @@ define <8 x i64> @zext_v8i16_v8i64(<8 x i16> %a) {
;
; CHECK-GI-LABEL: zext_v8i16_v8i64:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: mov d1, v0.d[1]
-; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT: ushll v2.4s, v1.4h, #0
-; CHECK-GI-NEXT: mov d1, v0.d[1]
-; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0
-; CHECK-GI-NEXT: mov d3, v2.d[1]
-; CHECK-GI-NEXT: ushll v2.2d, v2.2s, #0
-; CHECK-GI-NEXT: ushll v1.2d, v1.2s, #0
-; CHECK-GI-NEXT: ushll v3.2d, v3.2s, #0
+; CHECK-GI-NEXT: ushll v1.4s, v0.4h, #0
+; CHECK-GI-NEXT: ushll2 v3.4s, v0.8h, #0
+; CHECK-GI-NEXT: ushll v0.2d, v1.2s, #0
+; CHECK-GI-NEXT: ushll2 v1.2d, v1.4s, #0
+; CHECK-GI-NEXT: ushll v2.2d, v3.2s, #0
+; CHECK-GI-NEXT: ushll2 v3.2d, v3.4s, #0
; CHECK-GI-NEXT: ret
entry:
%c = zext <8 x i16> %a to <8 x i64>
@@ -837,13 +827,12 @@ define <8 x i64> @zext_v8i32_v8i64(<8 x i32> %a) {
;
; CHECK-GI-LABEL: zext_v8i32_v8i64:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: mov d2, v0.d[1]
-; CHECK-GI-NEXT: mov d3, v1.d[1]
-; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0
-; CHECK-GI-NEXT: ushll v4.2d, v2.2s, #0
+; CHECK-GI-NEXT: ushll v4.2d, v0.2s, #0
+; CHECK-GI-NEXT: ushll2 v5.2d, v0.4s, #0
; CHECK-GI-NEXT: ushll v2.2d, v1.2s, #0
-; CHECK-GI-NEXT: ushll v3.2d, v3.2s, #0
-; CHECK-GI-NEXT: mov v1.16b, v4.16b
+; CHECK-GI-NEXT: ushll2 v3.2d, v1.4s, #0
+; CHECK-GI-NEXT: mov v0.16b, v4.16b
+; CHECK-GI-NEXT: mov v1.16b, v5.16b
; CHECK-GI-NEXT: ret
entry:
%c = zext <8 x i32> %a to <8 x i64>
@@ -877,13 +866,12 @@ define <8 x i32> @zext_v8i10_v8i32(<8 x i10> %a) {
;
; CHECK-GI-LABEL: zext_v8i10_v8i32:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: mov d1, v0.d[1]
; CHECK-GI-NEXT: adrp x8, .LCPI44_0
-; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI44_0]
-; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
-; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0
-; CHECK-GI-NEXT: and v1.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT: ushll v1.4s, v0.4h, #0
+; CHECK-GI-NEXT: ushll2 v2.4s, v0.8h, #0
+; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI44_0]
+; CHECK-GI-NEXT: and v0.16b, v1.16b, v3.16b
+; CHECK-GI-NEXT: and v1.16b, v2.16b, v3.16b
; CHECK-GI-NEXT: ret
entry:
%c = zext <8 x i10> %a to <8 x i32>
@@ -904,21 +892,18 @@ define <8 x i64> @zext_v8i10_v8i64(<8 x i10> %a) {
;
; CHECK-GI-LABEL: zext_v8i10_v8i64:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: mov d1, v0.d[1]
-; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT: ushll v1.4s, v0.4h, #0
+; CHECK-GI-NEXT: ushll2 v0.4s, v0.8h, #0
; CHECK-GI-NEXT: adrp x8, .LCPI45_0
-; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI45_0]
-; CHECK-GI-NEXT: mov d2, v0.d[1]
-; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0
-; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0
-; CHECK-GI-NEXT: and v0.16b, v0.16b, v4.16b
-; CHECK-GI-NEXT: mov d3, v1.d[1]
-; CHECK-GI-NEXT: ushll v2.2d, v2.2s, #0
-; CHECK-GI-NEXT: ushll v5.2d, v1.2s, #0
-; CHECK-GI-NEXT: and v1.16b, v2.16b, v4.16b
-; CHECK-GI-NEXT: and v2.16b, v5.16b, v4.16b
-; CHECK-GI-NEXT: ushll v3.2d, v3.2s, #0
-; CHECK-GI-NEXT: and v3.16b, v3.16b, v4.16b
+; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI45_0]
+; CHECK-GI-NEXT: ushll v2.2d, v1.2s, #0
+; CHECK-GI-NEXT: ushll2 v1.2d, v1.4s, #0
+; CHECK-GI-NEXT: ushll v4.2d, v0.2s, #0
+; CHECK-GI-NEXT: ushll2 v5.2d, v0.4s, #0
+; CHECK-GI-NEXT: and v0.16b, v2.16b, v3.16b
+; CHECK-GI-NEXT: and v1.16b, v1.16b, v3.16b
+; CHECK-GI-NEXT: and v2.16b, v4.16b, v3.16b
+; CHECK-GI-NEXT: and v3.16b, v5.16b, v3.16b
; CHECK-GI-NEXT: ret
entry:
%c = zext <8 x i10> %a to <8 x i64>
@@ -934,9 +919,9 @@ define <16 x i16> @zext_v16i8_v16i16(<16 x i8> %a) {
;
; CHECK-GI-LABEL: zext_v16i8_v16i16:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: mov d1, v0.d[1]
-; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
+; CHECK-GI-NEXT: ushll v2.8h, v0.8b, #0
+; CHECK-GI-NEXT: ushll2 v1.8h, v0.16b, #0
+; CHECK-GI-NEXT: mov v0.16b, v2.16b
; CHECK-GI-NEXT: ret
entry:
%c = zext <16 x i8> %a to <16 x i16>
@@ -956,15 +941,12 @@ define <16 x i32> @zext_v16i8_v16i32(<16 x i8> %a) {
;
; CHECK-GI-LABEL: zext_v16i8_v16i32:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: mov d1, v0.d[1]
-; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-GI-NEXT: ushll v2.8h, v1.8b, #0
-; CHECK-GI-NEXT: mov d1, v0.d[1]
-; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT: mov d3, v2.d[1]
-; CHECK-GI-NEXT: ushll v2.4s, v2.4h, #0
-; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0
-; CHECK-GI-NEXT: ushll v3.4s, v3.4h, #0
+; CHECK-GI-NEXT: ushll v1.8h, v0.8b, #0
+; CHECK-GI-NEXT: ushll2 v3.8h, v0.16b, #0
+; CHECK-GI-NEXT: ushll v0.4s, v1.4h, #0
+; CHECK-GI-NEXT: ushll2 v1.4s, v1.8h, #0
+; CHECK-GI-NEXT: ushll v2.4s, v3.4h, #0
+; CHECK-GI-NEXT: ushll2 v3.4s, v3.8h, #0
; CHECK-GI-NEXT: ret
entry:
%c = zext <16 x i8> %a to <16 x i32>
@@ -992,27 +974,20 @@ define <16 x i64> @zext_v16i8_v16i64(<16 x i8> %a) {
;
; CHECK-GI-LABEL: zext_v16i8_v16i64:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: mov d1, v0.d[1]
-; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-GI-NEXT: mov d2, v0.d[1]
-; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
-; CHECK-GI-NEXT: mov d3, v1.d[1]
-; CHECK-GI-NEXT: ushll v2.4s, v2.4h, #0
-; CHECK-GI-NEXT: ushll v4.4s, v1.4h, #0
-; CHECK-GI-NEXT: mov d1, v0.d[1]
-; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0
-; CHECK-GI-NEXT: mov d5, v4.d[1]
-; CHECK-GI-NEXT: ushll v4.2d, v4.2s, #0
-; CHECK-GI-NEXT: ushll v6.4s, v3.4h, #0
-; CHECK-GI-NEXT: mov d3, v2.d[1]
-; CHECK-GI-NEXT: ushll v2.2d, v2.2s, #0
-; CHECK-GI-NEXT: ushll v1.2d, v1.2s, #0
-; CHECK-GI-NEXT: mov d7, v6.d[1]
-; CHECK-GI-NEXT: ushll v5.2d, v5.2s, #0
-; CHECK-GI-NEXT: ushll v6.2d, v6.2s, #0
-; CHECK-GI-NEXT: ushll v3.2d, v3.2s, #0
-; CHECK-GI-NEXT: ushll v7.2d, v7.2s, #0
+; CHECK-GI-NEXT: ushll v1.8h, v0.8b, #0
+; CHECK-GI-NEXT: ushll2 v0.8h, v0.16b, #0
+; CHECK-GI-NEXT: ushll v2.4s, v1.4h, #0
+; CHECK-GI-NEXT: ushll2 v3.4s, v1.8h, #0
+; CHECK-GI-NEXT: ushll v5.4s, v0.4h, #0
+; CHECK-GI-NEXT: ushll2 v7.4s, v0.8h, #0
+; CHECK-GI-NEXT: ushll v0.2d, v2.2s, #0
+; CHECK-GI-NEXT: ushll2 v1.2d, v2.4s, #0
+; CHECK-GI-NEXT: ushll v2.2d, v3.2s, #0
+; CHECK-GI-NEXT: ushll2 v3.2d, v3.4s, #0
+; CHECK-GI-NEXT: ushll v4.2d, v5.2s, #0
+; CHECK-GI-NEXT: ushll2 v5.2d, v5.4s, #0
+; CHECK-GI-NEXT: ushll v6.2d, v7.2s, #0
+; CHECK-GI-NEXT: ushll2 v7.2d, v7.4s, #0
; CHECK-GI-NEXT: ret
entry:
%c = zext <16 x i8> %a to <16 x i64>
@@ -1032,13 +1007,12 @@ define <16 x i32> @zext_v16i16_v16i32(<16 x i16> %a) {
;
; CHECK-GI-LABEL: zext_v16i16_v16i32:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: mov d2, v0.d[1]
-; CHECK-GI-NEXT: mov d3, v1.d[1]
-; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT: ushll v4.4s, v2.4h, #0
+; CHECK-GI-NEXT: ushll v4.4s, v0.4h, #0
+; CHECK-GI-NEXT: ushll2 v5.4s, v0.8h, #0
; CHECK-GI-NEXT: ushll v2.4s, v1.4h, #0
-; CHECK-GI-NEXT: ushll v3.4s, v3.4h, #0
-; CHECK-GI-NEXT: mov v1.16b, v4.16b
+; CHECK-GI-NEXT: ushll2 v3.4s, v1.8h, #0
+; CHECK-GI-NEXT: mov v0.16b, v4.16b
+; CHECK-GI-NEXT: mov v1.16b, v5.16b
; CHECK-GI-NEXT: ret
entry:
%c = zext <16 x i16> %a to <16 x i32>
@@ -1064,24 +1038,18 @@ define <16 x i64> @zext_v16i16_v16i64(<16 x i16> %a) {
;
; CHECK-GI-LABEL: zext_v16i16_v16i64:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: mov d2, v0.d[1]
-; CHECK-GI-NEXT: mov d3, v1.d[1]
-; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0
-; CHECK-GI-NEXT: ushll v2.4s, v2.4h, #0
-; CHECK-GI-NEXT: ushll v6.4s, v3.4h, #0
-; CHECK-GI-NEXT: mov d3, v0.d[1]
-; CHECK-GI-NEXT: mov d7, v1.d[1]
-; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0
-; CHECK-GI-NEXT: ushll v4.2d, v1.2s, #0
-; CHECK-GI-NEXT: mov d5, v2.d[1]
-; CHECK-GI-NEXT: mov d16, v6.d[1]
-; CHECK-GI-NEXT: ushll v2.2d, v2.2s, #0
-; CHECK-GI-NEXT: ushll v1.2d, v3.2s, #0
-; CHECK-GI-NEXT: ushll v6.2d, v6.2s, #0
-; CHECK-GI-NEXT: ushll v3.2d, v5.2s, #0
-; CHECK-GI-NEXT: ushll v5.2d, v7.2s, #0
-; CHECK-GI-NEXT: ushll v7.2d, v16.2s, #0
+; CHECK-GI-NEXT: ushll v2.4s, v0.4h, #0
+; CHECK-GI-NEXT: ushll2 v3.4s, v0.8h, #0
+; CHECK-GI-NEXT: ushll v5.4s, v1.4h, #0
+; CHECK-GI-NEXT: ushll2 v7.4s, v1.8h, #0
+; CHECK-GI-NEXT: ushll v0.2d, v2.2s, #0
+; CHECK-GI-NEXT: ushll2 v1.2d, v2.4s, #0
+; CHECK-GI-NEXT: ushll v2.2d, v3.2s, #0
+; CHECK-GI-NEXT: ushll2 v3.2d, v3.4s, #0
+; CHECK-GI-NEXT: ushll v4.2d, v5.2s, #0
+; CHECK-GI-NEXT: ushll2 v5.2d, v5.4s, #0
+; CHECK-GI-NEXT: ushll v6.2d, v7.2s, #0
+; CHECK-GI-NEXT: ushll2 v7.2d, v7.4s, #0
; CHECK-GI-NEXT: ret
entry:
%c = zext <16 x i16> %a to <16 x i64>
@@ -1106,20 +1074,18 @@ define <16 x i64> @zext_v16i32_v16i64(<16 x i32> %a) {
;
; CHECK-GI-LABEL: zext_v16i32_v16i64:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: mov d6, v1.d[1]
-; CHECK-GI-NEXT: mov d5, v0.d[1]
-; CHECK-GI-NEXT: mov d7, v2.d[1]
-; CHECK-GI-NEXT: mov d18, v3.d[1]
-; CHECK-GI-NEXT: ushll v16.2d, v1.2s, #0
-; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-GI-NEXT: ushll v16.2d, v0.2s, #0
+; CHECK-GI-NEXT: ushll2 v17.2d, v0.4s, #0
+; CHECK-GI-NEXT: ushll v18.2d, v1.2s, #0
+; CHECK-GI-NEXT: ushll2 v19.2d, v1.4s, #0
; CHECK-GI-NEXT: ushll v4.2d, v2.2s, #0
-; CHECK-GI-NEXT: ushll v17.2d, v6.2s, #0
-; CHECK-GI-NEXT: ushll v1.2d, v5.2s, #0
+; CHECK-GI-NEXT: ushll2 v5.2d, v2.4s, #0
; CHECK-GI-NEXT: ushll v6.2d, v3.2s, #0
-; CHECK-GI-NEXT: ushll v5.2d, v7.2s, #0
-; CHECK-GI-NEXT: ushll v7.2d, v18.2s, #0
-; CHECK-GI-NEXT: mov v2.16b, v16.16b
-; CHECK-GI-NEXT: mov v3.16b, v17.16b
+; CHECK-GI-NEXT: ushll2 v7.2d, v3.4s, #0
+; CHECK-GI-NEXT: mov v0.16b, v16.16b
+; CHECK-GI-NEXT: mov v1.16b, v17.16b
+; CHECK-GI-NEXT: mov v2.16b, v18.16b
+; CHECK-GI-NEXT: mov v3.16b, v19.16b
; CHECK-GI-NEXT: ret
entry:
%c = zext <16 x i32> %a to <16 x i64>
More information about the llvm-commits
mailing list