[llvm] 602f436 - [AArch64] Add patterns for constructive splice. (#113912)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 30 06:17:35 PDT 2024
Author: Sander de Smalen
Date: 2024-10-30T13:17:31Z
New Revision: 602f43686c45017e3140789f8d574d2c344b4d71
URL: https://github.com/llvm/llvm-project/commit/602f43686c45017e3140789f8d574d2c344b4d71
DIFF: https://github.com/llvm/llvm-project/commit/602f43686c45017e3140789f8d574d2c344b4d71.diff
LOG: [AArch64] Add patterns for constructive splice. (#113912)
SVE2 adds the constructive splice instruction, which takes a tuple.
Even though the register allocator must ensure that the tuple uses
consecutive registers for the tuple, it's likely to be more efficient
than using the destructive splice instruction when the first operand
is reused.
Added:
Modified:
llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
llvm/lib/Target/AArch64/SVEInstrFormats.td
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ptest.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc-stores.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 2564ddc5f2e5ca..d6662d15617fab 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -3851,7 +3851,7 @@ let Predicates = [HasSVE2] in {
let Predicates = [HasSVE2orSME] in {
// SVE2 vector splice (constructive)
- defm SPLICE_ZPZZ : sve2_int_perm_splice_cons<"splice">;
+ defm SPLICE_ZPZZ : sve2_int_perm_splice_cons<"splice", AArch64splice>;
} // End HasSVE2orSME
let Predicates = [HasSVE2] in {
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 9fa184c545705b..552d5b9b23a7e4 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -7314,11 +7314,33 @@ class sve2_int_perm_splice_cons<bits<2> sz8_64, string asm,
let hasSideEffects = 0;
}
-multiclass sve2_int_perm_splice_cons<string asm> {
+multiclass sve2_int_perm_splice_cons<string asm, SDPatternOperator op> {
def _B : sve2_int_perm_splice_cons<0b00, asm, ZPR8, ZZ_b>;
def _H : sve2_int_perm_splice_cons<0b01, asm, ZPR16, ZZ_h>;
def _S : sve2_int_perm_splice_cons<0b10, asm, ZPR32, ZZ_s>;
def _D : sve2_int_perm_splice_cons<0b11, asm, ZPR64, ZZ_d>;
+
+ let AddedComplexity = 2 in {
+ foreach VT = [nxv16i8] in
+ def : Pat<(VT (op nxv16i1:$pred, VT:$zn1, VT:$zn2)),
+ (!cast<Instruction>(NAME # _B)
+ nxv16i1:$pred, (REG_SEQUENCE ZPR2, VT:$zn1, zsub0, VT:$zn2, zsub1))>;
+
+ foreach VT = [nxv8i16, nxv8f16, nxv8bf16] in
+ def : Pat<(VT (op nxv8i1:$pred, VT:$zn1, VT:$zn2)),
+ (!cast<Instruction>(NAME # _H)
+ nxv8i1:$pred, (REG_SEQUENCE ZPR2, VT:$zn1, zsub0, VT:$zn2, zsub1))>;
+
+ foreach VT = [nxv4i32, nxv4f16, nxv4f32, nxv4bf16] in
+ def : Pat<(VT (op nxv4i1:$pred, VT:$zn1, VT:$zn2)),
+ (!cast<Instruction>(NAME # _S)
+ nxv4i1:$pred, (REG_SEQUENCE ZPR2, VT:$zn1, zsub0, VT:$zn2, zsub1))>;
+
+ foreach VT = [nxv2i64, nxv2f16, nxv2f32, nxv2f64, nxv2bf16] in
+ def : Pat<(VT (op nxv2i1:$pred, VT:$zn1, VT:$zn2)),
+ (!cast<Instruction>(NAME # _D)
+ nxv2i1:$pred, (REG_SEQUENCE ZPR2, VT:$zn1, zsub0, VT:$zn2, zsub1))>;
+ }
}
class sve2_int_perm_expand<bits<2> sz, string asm,
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll
index c1810c678ea522..6e2ecfca9e963e 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
+; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE2
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,SME
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
@@ -61,10 +61,10 @@ define <8 x i8> @concat_v8i8(<4 x i8> %op1, <4 x i8> %op2) {
define <16 x i8> @concat_v16i8(<8 x i8> %op1, <8 x i8> %op2) {
; CHECK-LABEL: concat_v16i8:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d1 killed $d1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: ptrue p0.b, vl8
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: splice z0.b, p0, { z0.b, z1.b }
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
@@ -172,10 +172,10 @@ define <4 x i16> @concat_v4i16(<2 x i16> %op1, <2 x i16> %op2) {
define <8 x i16> @concat_v8i16(<4 x i16> %op1, <4 x i16> %op2) {
; CHECK-LABEL: concat_v8i16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d1 killed $d1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: splice z0.h, p0, { z0.h, z1.h }
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
@@ -270,10 +270,10 @@ define <2 x i32> @concat_v2i32(<1 x i32> %op1, <1 x i32> %op2) {
define <4 x i32> @concat_v4i32(<2 x i32> %op1, <2 x i32> %op2) {
; CHECK-LABEL: concat_v4i32:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d1 killed $d1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: ptrue p0.s, vl2
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: splice z0.s, p0, { z0.s, z1.s }
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
@@ -340,10 +340,10 @@ define void @concat_v16i32(ptr %a, ptr %b, ptr %c) {
define <2 x i64> @concat_v2i64(<1 x i64> %op1, <1 x i64> %op2) {
; CHECK-LABEL: concat_v2i64:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d1 killed $d1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: ptrue p0.d, vl1
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: splice z0.d, p0, { z0.d, z1.d }
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
@@ -406,17 +406,33 @@ define void @concat_v8i64(ptr %a, ptr %b, ptr %c) {
;
define <4 x half> @concat_v4f16(<2 x half> %op1, <2 x half> %op2) {
-; CHECK-LABEL: concat_v4f16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: mov z2.h, z1.h[1]
-; CHECK-NEXT: mov z3.h, z0.h[1]
-; CHECK-NEXT: zip1 z1.h, z1.h, z2.h
-; CHECK-NEXT: zip1 z0.h, z0.h, z3.h
-; CHECK-NEXT: zip1 z0.s, z0.s, z1.s
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
-; CHECK-NEXT: ret
+; SVE2-LABEL: concat_v4f16:
+; SVE2: // %bb.0:
+; SVE2-NEXT: cnth x8
+; SVE2-NEXT: adrp x9, .LCPI15_0
+; SVE2-NEXT: adrp x10, .LCPI15_1
+; SVE2-NEXT: mov z2.h, w8
+; SVE2-NEXT: ldr q3, [x9, :lo12:.LCPI15_0]
+; SVE2-NEXT: ldr q4, [x10, :lo12:.LCPI15_1]
+; SVE2-NEXT: ptrue p0.h, vl8
+; SVE2-NEXT: // kill: def $d1 killed $d1 killed $z0_z1 def $z0_z1
+; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0_z1 def $z0_z1
+; SVE2-NEXT: mad z2.h, p0/m, z3.h, z4.h
+; SVE2-NEXT: tbl z0.h, { z0.h, z1.h }, z2.h
+; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0
+; SVE2-NEXT: ret
+;
+; SME-LABEL: concat_v4f16:
+; SME: // %bb.0:
+; SME-NEXT: // kill: def $d1 killed $d1 def $z1
+; SME-NEXT: // kill: def $d0 killed $d0 def $z0
+; SME-NEXT: mov z2.h, z1.h[1]
+; SME-NEXT: mov z3.h, z0.h[1]
+; SME-NEXT: zip1 z1.h, z1.h, z2.h
+; SME-NEXT: zip1 z0.h, z0.h, z3.h
+; SME-NEXT: zip1 z0.s, z0.s, z1.s
+; SME-NEXT: // kill: def $d0 killed $d0 killed $z0
+; SME-NEXT: ret
;
; NONEON-NOSVE-LABEL: concat_v4f16:
; NONEON-NOSVE: // %bb.0:
@@ -436,10 +452,10 @@ define <4 x half> @concat_v4f16(<2 x half> %op1, <2 x half> %op2) {
define <8 x half> @concat_v8f16(<4 x half> %op1, <4 x half> %op2) {
; CHECK-LABEL: concat_v8f16:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d1 killed $d1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: splice z0.h, p0, { z0.h, z1.h }
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
@@ -534,10 +550,10 @@ define <2 x float> @concat_v2f32(<1 x float> %op1, <1 x float> %op2) {
define <4 x float> @concat_v4f32(<2 x float> %op1, <2 x float> %op2) {
; CHECK-LABEL: concat_v4f32:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d1 killed $d1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: ptrue p0.s, vl2
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: splice z0.s, p0, { z0.s, z1.s }
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
@@ -604,10 +620,10 @@ define void @concat_v16f32(ptr %a, ptr %b, ptr %c) {
define <2 x double> @concat_v2f64(<1 x double> %op1, <1 x double> %op2) {
; CHECK-LABEL: concat_v2f64:
; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $d1 killed $d1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: ptrue p0.d, vl1
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT: splice z0.d, p0, { z0.d, z1.d }
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll
index f1771a753826cc..2282e74af5d006 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE
-; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE2
-; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,SVE2
+; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s --check-prefixes=SVE
+; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s --check-prefixes=SVE2
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=SVE2
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
@@ -842,16 +842,16 @@ define void @test_copysign_v4f32_v4f64(ptr %ap, ptr %bp) {
;
; SVE2-LABEL: test_copysign_v4f32_v4f64:
; SVE2: // %bb.0:
-; SVE2-NEXT: ldp q0, q1, [x1]
+; SVE2-NEXT: ldp q1, q0, [x1]
; SVE2-NEXT: ptrue p0.d
-; SVE2-NEXT: ldr q2, [x0]
-; SVE2-NEXT: fcvt z1.s, p0/m, z1.d
; SVE2-NEXT: fcvt z0.s, p0/m, z0.d
+; SVE2-NEXT: fcvt z1.s, p0/m, z1.d
; SVE2-NEXT: ptrue p0.s, vl2
-; SVE2-NEXT: uzp1 z1.s, z1.s, z1.s
-; SVE2-NEXT: uzp1 z0.s, z0.s, z0.s
-; SVE2-NEXT: splice z0.s, p0, z0.s, z1.s
+; SVE2-NEXT: uzp1 z3.s, z0.s, z0.s
+; SVE2-NEXT: uzp1 z2.s, z1.s, z1.s
; SVE2-NEXT: mov z1.s, #0x7fffffff
+; SVE2-NEXT: splice z0.s, p0, { z2.s, z3.s }
+; SVE2-NEXT: ldr q2, [x0]
; SVE2-NEXT: bsl z2.d, z2.d, z0.d, z1.d
; SVE2-NEXT: str q2, [x0]
; SVE2-NEXT: ret
@@ -1237,16 +1237,16 @@ define void @test_copysign_v8f16_v8f32(ptr %ap, ptr %bp) {
;
; SVE2-LABEL: test_copysign_v8f16_v8f32:
; SVE2: // %bb.0:
-; SVE2-NEXT: ldp q0, q1, [x1]
+; SVE2-NEXT: ldp q1, q0, [x1]
; SVE2-NEXT: ptrue p0.s
-; SVE2-NEXT: ldr q2, [x0]
-; SVE2-NEXT: fcvt z1.h, p0/m, z1.s
; SVE2-NEXT: fcvt z0.h, p0/m, z0.s
+; SVE2-NEXT: fcvt z1.h, p0/m, z1.s
; SVE2-NEXT: ptrue p0.h, vl4
-; SVE2-NEXT: uzp1 z1.h, z1.h, z1.h
-; SVE2-NEXT: uzp1 z0.h, z0.h, z0.h
-; SVE2-NEXT: splice z0.h, p0, z0.h, z1.h
+; SVE2-NEXT: uzp1 z3.h, z0.h, z0.h
+; SVE2-NEXT: uzp1 z2.h, z1.h, z1.h
; SVE2-NEXT: mov z1.h, #32767 // =0x7fff
+; SVE2-NEXT: splice z0.h, p0, { z2.h, z3.h }
+; SVE2-NEXT: ldr q2, [x0]
; SVE2-NEXT: bsl z2.d, z2.d, z0.d, z1.d
; SVE2-NEXT: str q2, [x0]
; SVE2-NEXT: ret
@@ -1349,5 +1349,3 @@ declare <8 x float> @llvm.copysign.v8f32(<8 x float> %a, <8 x float> %b) #0
declare <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %b) #0
declare <4 x double> @llvm.copysign.v4f64(<4 x double> %a, <4 x double> %b) #0
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; CHECK: {{.*}}
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll
index 516772b8ca6640..1fdcd4f8268708 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll
@@ -1,7 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE
-; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE2
-; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,SVE2
+; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
@@ -26,19 +25,6 @@ define <4 x i8> @sdiv_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
-; NEON-NOSVE-LABEL: sdiv_v4i8:
-; NEON-NOSVE: // %bb.0:
-; NEON-NOSVE-NEXT: shl v0.4h, v0.4h, #8
-; NEON-NOSVE-NEXT: shl v1.4h, v1.4h, #8
-; NEON-NOSVE-NEXT: ptrue p0.s, vl4
-; NEON-NOSVE-NEXT: sshr v0.4h, v0.4h, #8
-; NEON-NOSVE-NEXT: sshr v1.4h, v1.4h, #8
-; NEON-NOSVE-NEXT: sshll v1.4s, v1.4h, #0
-; NEON-NOSVE-NEXT: sshll v0.4s, v0.4h, #0
-; NEON-NOSVE-NEXT: sdiv z0.s, p0/m, z0.s, z1.s
-; NEON-NOSVE-NEXT: xtn v0.4h, v0.4s
-; NEON-NOSVE-NEXT: ret
-;
; NONEON-NOSVE-LABEL: sdiv_v4i8:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #32
@@ -85,27 +71,12 @@ define <8 x i8> @sdiv_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ptrue p0.h, vl4
; CHECK-NEXT: uzp1 z1.h, z2.h, z2.h
-; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT: splice z1.h, p0, z1.h, z0.h
-; CHECK-NEXT: uzp1 z0.b, z1.b, z1.b
+; CHECK-NEXT: uzp1 z2.h, z0.h, z0.h
+; CHECK-NEXT: splice z0.h, p0, { z1.h, z2.h }
+; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
-; NEON-NOSVE-LABEL: sdiv_v8i8:
-; NEON-NOSVE: // %bb.0:
-; NEON-NOSVE-NEXT: sshll v1.8h, v1.8b, #0
-; NEON-NOSVE-NEXT: sshll v0.8h, v0.8b, #0
-; NEON-NOSVE-NEXT: ptrue p0.s, vl4
-; NEON-NOSVE-NEXT: sshll2 v2.4s, v1.8h, #0
-; NEON-NOSVE-NEXT: sshll2 v3.4s, v0.8h, #0
-; NEON-NOSVE-NEXT: sshll v1.4s, v1.4h, #0
-; NEON-NOSVE-NEXT: sshll v0.4s, v0.4h, #0
-; NEON-NOSVE-NEXT: sdivr z2.s, p0/m, z2.s, z3.s
-; NEON-NOSVE-NEXT: sdiv z0.s, p0/m, z0.s, z1.s
-; NEON-NOSVE-NEXT: uzp1 v0.8h, v0.8h, v2.8h
-; NEON-NOSVE-NEXT: xtn v0.8b, v0.8h
-; NEON-NOSVE-NEXT: ret
-;
; NONEON-NOSVE-LABEL: sdiv_v8i8:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #32
@@ -177,45 +148,21 @@ define <16 x i8> @sdiv_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8
; CHECK-NEXT: sunpklo z1.s, z1.h
; CHECK-NEXT: sdivr z3.s, p0/m, z3.s, z5.s
-; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h
+; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h
+; CHECK-NEXT: uzp1 z5.h, z2.h, z2.h
; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: uzp1 z1.h, z4.h, z4.h
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h
-; CHECK-NEXT: splice z1.h, p0, z1.h, z2.h
-; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT: splice z3.h, p0, z3.h, z0.h
-; CHECK-NEXT: uzp1 z0.b, z1.b, z1.b
+; CHECK-NEXT: uzp1 z1.h, z3.h, z3.h
+; CHECK-NEXT: uzp1 z2.h, z0.h, z0.h
+; CHECK-NEXT: splice z0.h, p0, { z4.h, z5.h }
+; CHECK-NEXT: splice z1.h, p0, { z1.h, z2.h }
; CHECK-NEXT: ptrue p0.b, vl8
-; CHECK-NEXT: uzp1 z1.b, z3.b, z3.b
-; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b
+; CHECK-NEXT: uzp1 z2.b, z0.b, z0.b
+; CHECK-NEXT: uzp1 z3.b, z1.b, z1.b
+; CHECK-NEXT: splice z0.b, p0, { z2.b, z3.b }
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
-; NEON-NOSVE-LABEL: sdiv_v16i8:
-; NEON-NOSVE: // %bb.0:
-; NEON-NOSVE-NEXT: sshll2 v2.8h, v1.16b, #0
-; NEON-NOSVE-NEXT: sshll2 v3.8h, v0.16b, #0
-; NEON-NOSVE-NEXT: sshll v1.8h, v1.8b, #0
-; NEON-NOSVE-NEXT: sshll v0.8h, v0.8b, #0
-; NEON-NOSVE-NEXT: ptrue p0.s, vl4
-; NEON-NOSVE-NEXT: sshll2 v4.4s, v2.8h, #0
-; NEON-NOSVE-NEXT: sshll2 v5.4s, v3.8h, #0
-; NEON-NOSVE-NEXT: sshll v2.4s, v2.4h, #0
-; NEON-NOSVE-NEXT: sshll v3.4s, v3.4h, #0
-; NEON-NOSVE-NEXT: sdivr z4.s, p0/m, z4.s, z5.s
-; NEON-NOSVE-NEXT: sshll2 v5.4s, v0.8h, #0
-; NEON-NOSVE-NEXT: sshll v0.4s, v0.4h, #0
-; NEON-NOSVE-NEXT: sdivr z2.s, p0/m, z2.s, z3.s
-; NEON-NOSVE-NEXT: sshll2 v3.4s, v1.8h, #0
-; NEON-NOSVE-NEXT: sshll v1.4s, v1.4h, #0
-; NEON-NOSVE-NEXT: sdivr z3.s, p0/m, z3.s, z5.s
-; NEON-NOSVE-NEXT: sdiv z0.s, p0/m, z0.s, z1.s
-; NEON-NOSVE-NEXT: uzp1 v1.8h, v2.8h, v4.8h
-; NEON-NOSVE-NEXT: uzp1 v0.8h, v0.8h, v3.8h
-; NEON-NOSVE-NEXT: uzp1 v0.16b, v0.16b, v1.16b
-; NEON-NOSVE-NEXT: ret
-;
; NONEON-NOSVE-LABEL: sdiv_v16i8:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]!
@@ -319,7 +266,6 @@ define void @sdiv_v32i8(ptr %a, ptr %b) {
; CHECK-NEXT: sunpklo z4.h, z2.b
; CHECK-NEXT: sunpklo z2.s, z3.h
; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8
-; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: sunpklo z5.s, z4.h
; CHECK-NEXT: ext z4.b, z4.b, z4.b, #8
; CHECK-NEXT: sunpklo z3.s, z3.h
@@ -328,7 +274,6 @@ define void @sdiv_v32i8(ptr %a, ptr %b) {
; CHECK-NEXT: ldr q5, [x0]
; CHECK-NEXT: sunpklo z16.h, z5.b
; CHECK-NEXT: ext z5.b, z5.b, z5.b, #8
-; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h
; CHECK-NEXT: sunpklo z5.h, z5.b
; CHECK-NEXT: sunpklo z18.s, z16.h
; CHECK-NEXT: ext z16.b, z16.b, z16.b, #8
@@ -337,81 +282,36 @@ define void @sdiv_v32i8(ptr %a, ptr %b) {
; CHECK-NEXT: sunpklo z18.s, z5.h
; CHECK-NEXT: ext z5.b, z5.b, z5.b, #8
; CHECK-NEXT: sunpklo z5.s, z5.h
-; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h
; CHECK-NEXT: sdivr z7.s, p0/m, z7.s, z16.s
; CHECK-NEXT: sunpklo z16.s, z6.h
; CHECK-NEXT: ext z6.b, z6.b, z6.b, #8
; CHECK-NEXT: sunpklo z6.s, z6.h
+; CHECK-NEXT: uzp1 z20.h, z17.h, z17.h
; CHECK-NEXT: sdivr z16.s, p0/m, z16.s, z18.s
+; CHECK-NEXT: uzp1 z18.h, z0.h, z0.h
+; CHECK-NEXT: uzp1 z19.h, z1.h, z1.h
+; CHECK-NEXT: uzp1 z21.h, z7.h, z7.h
; CHECK-NEXT: sdiv z5.s, p0/m, z5.s, z6.s
-; CHECK-NEXT: uzp1 z6.h, z7.h, z7.h
-; CHECK-NEXT: uzp1 z7.h, z16.h, z16.h
+; CHECK-NEXT: uzp1 z0.h, z16.h, z16.h
; CHECK-NEXT: sdivr z3.s, p0/m, z3.s, z4.s
-; CHECK-NEXT: uzp1 z4.h, z17.h, z17.h
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h
-; CHECK-NEXT: uzp1 z5.h, z5.h, z5.h
-; CHECK-NEXT: splice z4.h, p0, z4.h, z6.h
-; CHECK-NEXT: splice z7.h, p0, z7.h, z5.h
-; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
-; CHECK-NEXT: uzp1 z1.b, z4.b, z4.b
-; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h
-; CHECK-NEXT: splice z2.h, p0, z2.h, z3.h
-; CHECK-NEXT: uzp1 z3.b, z7.b, z7.b
+; CHECK-NEXT: uzp1 z1.h, z5.h, z5.h
+; CHECK-NEXT: uzp1 z4.h, z2.h, z2.h
+; CHECK-NEXT: splice z2.h, p0, { z20.h, z21.h }
+; CHECK-NEXT: splice z0.h, p0, { z0.h, z1.h }
+; CHECK-NEXT: uzp1 z5.h, z3.h, z3.h
+; CHECK-NEXT: splice z3.h, p0, { z18.h, z19.h }
+; CHECK-NEXT: splice z1.h, p0, { z4.h, z5.h }
+; CHECK-NEXT: uzp1 z4.b, z2.b, z2.b
; CHECK-NEXT: ptrue p0.b, vl8
-; CHECK-NEXT: splice z1.b, p0, z1.b, z3.b
-; CHECK-NEXT: uzp1 z2.b, z2.b, z2.b
-; CHECK-NEXT: splice z0.b, p0, z0.b, z2.b
-; CHECK-NEXT: stp q1, q0, [x0]
+; CHECK-NEXT: uzp1 z2.b, z3.b, z3.b
+; CHECK-NEXT: uzp1 z5.b, z0.b, z0.b
+; CHECK-NEXT: uzp1 z3.b, z1.b, z1.b
+; CHECK-NEXT: splice z0.b, p0, { z4.b, z5.b }
+; CHECK-NEXT: splice z1.b, p0, { z2.b, z3.b }
+; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
;
-; NEON-NOSVE-LABEL: sdiv_v32i8:
-; NEON-NOSVE: // %bb.0:
-; NEON-NOSVE-NEXT: ldp q6, q3, [x1]
-; NEON-NOSVE-NEXT: ptrue p0.s, vl4
-; NEON-NOSVE-NEXT: ldr q2, [x0, #16]
-; NEON-NOSVE-NEXT: sshll2 v1.8h, v3.16b, #0
-; NEON-NOSVE-NEXT: sshll2 v4.8h, v2.16b, #0
-; NEON-NOSVE-NEXT: sshll v3.8h, v3.8b, #0
-; NEON-NOSVE-NEXT: sshll v2.8h, v2.8b, #0
-; NEON-NOSVE-NEXT: sshll2 v7.8h, v6.16b, #0
-; NEON-NOSVE-NEXT: sshll v6.8h, v6.8b, #0
-; NEON-NOSVE-NEXT: sshll2 v0.4s, v1.8h, #0
-; NEON-NOSVE-NEXT: sshll2 v5.4s, v4.8h, #0
-; NEON-NOSVE-NEXT: sshll v1.4s, v1.4h, #0
-; NEON-NOSVE-NEXT: sshll v4.4s, v4.4h, #0
-; NEON-NOSVE-NEXT: sshll2 v17.4s, v7.8h, #0
-; NEON-NOSVE-NEXT: sshll v7.4s, v7.4h, #0
-; NEON-NOSVE-NEXT: sdivr z0.s, p0/m, z0.s, z5.s
-; NEON-NOSVE-NEXT: sshll2 v5.4s, v2.8h, #0
-; NEON-NOSVE-NEXT: sshll v2.4s, v2.4h, #0
-; NEON-NOSVE-NEXT: sdivr z1.s, p0/m, z1.s, z4.s
-; NEON-NOSVE-NEXT: sshll2 v4.4s, v3.8h, #0
-; NEON-NOSVE-NEXT: sshll v3.4s, v3.4h, #0
-; NEON-NOSVE-NEXT: sdivr z4.s, p0/m, z4.s, z5.s
-; NEON-NOSVE-NEXT: ldr q5, [x0]
-; NEON-NOSVE-NEXT: sshll2 v16.8h, v5.16b, #0
-; NEON-NOSVE-NEXT: sshll v5.8h, v5.8b, #0
-; NEON-NOSVE-NEXT: uzp1 v0.8h, v1.8h, v0.8h
-; NEON-NOSVE-NEXT: sshll2 v18.4s, v16.8h, #0
-; NEON-NOSVE-NEXT: sshll v16.4s, v16.4h, #0
-; NEON-NOSVE-NEXT: sdivr z17.s, p0/m, z17.s, z18.s
-; NEON-NOSVE-NEXT: sshll2 v18.4s, v5.8h, #0
-; NEON-NOSVE-NEXT: sshll v5.4s, v5.4h, #0
-; NEON-NOSVE-NEXT: sdivr z7.s, p0/m, z7.s, z16.s
-; NEON-NOSVE-NEXT: sshll2 v16.4s, v6.8h, #0
-; NEON-NOSVE-NEXT: sshll v6.4s, v6.4h, #0
-; NEON-NOSVE-NEXT: sdivr z16.s, p0/m, z16.s, z18.s
-; NEON-NOSVE-NEXT: sdiv z5.s, p0/m, z5.s, z6.s
-; NEON-NOSVE-NEXT: sdiv z2.s, p0/m, z2.s, z3.s
-; NEON-NOSVE-NEXT: uzp1 v3.8h, v7.8h, v17.8h
-; NEON-NOSVE-NEXT: uzp1 v5.8h, v5.8h, v16.8h
-; NEON-NOSVE-NEXT: uzp1 v1.8h, v2.8h, v4.8h
-; NEON-NOSVE-NEXT: uzp1 v2.16b, v5.16b, v3.16b
-; NEON-NOSVE-NEXT: uzp1 v0.16b, v1.16b, v0.16b
-; NEON-NOSVE-NEXT: stp q2, q0, [x0]
-; NEON-NOSVE-NEXT: ret
-;
; NONEON-NOSVE-LABEL: sdiv_v32i8:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #96
@@ -571,17 +471,6 @@ define <2 x i16> @sdiv_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
-; NEON-NOSVE-LABEL: sdiv_v2i16:
-; NEON-NOSVE: // %bb.0:
-; NEON-NOSVE-NEXT: shl v1.2s, v1.2s, #16
-; NEON-NOSVE-NEXT: shl v0.2s, v0.2s, #16
-; NEON-NOSVE-NEXT: ptrue p0.s, vl2
-; NEON-NOSVE-NEXT: sshr v1.2s, v1.2s, #16
-; NEON-NOSVE-NEXT: sshr v0.2s, v0.2s, #16
-; NEON-NOSVE-NEXT: sdiv z0.s, p0/m, z0.s, z1.s
-; NEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $z0
-; NEON-NOSVE-NEXT: ret
-;
; NONEON-NOSVE-LABEL: sdiv_v2i16:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #32
@@ -614,15 +503,6 @@ define <4 x i16> @sdiv_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
-; NEON-NOSVE-LABEL: sdiv_v4i16:
-; NEON-NOSVE: // %bb.0:
-; NEON-NOSVE-NEXT: sshll v1.4s, v1.4h, #0
-; NEON-NOSVE-NEXT: sshll v0.4s, v0.4h, #0
-; NEON-NOSVE-NEXT: ptrue p0.s, vl4
-; NEON-NOSVE-NEXT: sdiv z0.s, p0/m, z0.s, z1.s
-; NEON-NOSVE-NEXT: xtn v0.4h, v0.4s
-; NEON-NOSVE-NEXT: ret
-;
; NONEON-NOSVE-LABEL: sdiv_v4i16:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #32
@@ -664,26 +544,14 @@ define <8 x i16> @sdiv_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
; CHECK-NEXT: sunpklo z1.s, z1.h
; CHECK-NEXT: sunpklo z0.s, z0.h
; CHECK-NEXT: sdivr z2.s, p0/m, z2.s, z3.s
-; CHECK-NEXT: sdivr z1.s, p0/m, z1.s, z0.s
+; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: uzp1 z0.h, z2.h, z2.h
-; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h
-; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h
+; CHECK-NEXT: uzp1 z1.h, z2.h, z2.h
+; CHECK-NEXT: uzp1 z2.h, z0.h, z0.h
+; CHECK-NEXT: splice z0.h, p0, { z1.h, z2.h }
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
-; NEON-NOSVE-LABEL: sdiv_v8i16:
-; NEON-NOSVE: // %bb.0:
-; NEON-NOSVE-NEXT: sshll2 v2.4s, v1.8h, #0
-; NEON-NOSVE-NEXT: sshll2 v3.4s, v0.8h, #0
-; NEON-NOSVE-NEXT: sshll v1.4s, v1.4h, #0
-; NEON-NOSVE-NEXT: sshll v0.4s, v0.4h, #0
-; NEON-NOSVE-NEXT: ptrue p0.s, vl4
-; NEON-NOSVE-NEXT: sdivr z2.s, p0/m, z2.s, z3.s
-; NEON-NOSVE-NEXT: sdiv z0.s, p0/m, z0.s, z1.s
-; NEON-NOSVE-NEXT: uzp1 v0.8h, v0.8h, v2.8h
-; NEON-NOSVE-NEXT: ret
-;
; NONEON-NOSVE-LABEL: sdiv_v8i16:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]!
@@ -748,41 +616,18 @@ define void @sdiv_v16i16(ptr %a, ptr %b) {
; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8
; CHECK-NEXT: sunpklo z3.s, z3.h
; CHECK-NEXT: sdivr z5.s, p0/m, z5.s, z6.s
-; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h
; CHECK-NEXT: sdiv z3.s, p0/m, z3.s, z4.s
+; CHECK-NEXT: uzp1 z4.h, z5.h, z5.h
; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: uzp1 z1.h, z5.h, z5.h
+; CHECK-NEXT: uzp1 z1.h, z2.h, z2.h
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h
-; CHECK-NEXT: splice z1.h, p0, z1.h, z3.h
-; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT: splice z2.h, p0, z2.h, z0.h
-; CHECK-NEXT: stp q1, q2, [x0]
+; CHECK-NEXT: uzp1 z5.h, z3.h, z3.h
+; CHECK-NEXT: uzp1 z2.h, z0.h, z0.h
+; CHECK-NEXT: splice z0.h, p0, { z4.h, z5.h }
+; CHECK-NEXT: splice z1.h, p0, { z1.h, z2.h }
+; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
;
-; NEON-NOSVE-LABEL: sdiv_v16i16:
-; NEON-NOSVE: // %bb.0:
-; NEON-NOSVE-NEXT: ldp q4, q1, [x1]
-; NEON-NOSVE-NEXT: ptrue p0.s, vl4
-; NEON-NOSVE-NEXT: ldr q0, [x0, #16]
-; NEON-NOSVE-NEXT: sshll2 v2.4s, v1.8h, #0
-; NEON-NOSVE-NEXT: sshll2 v3.4s, v0.8h, #0
-; NEON-NOSVE-NEXT: sshll2 v5.4s, v4.8h, #0
-; NEON-NOSVE-NEXT: sshll v4.4s, v4.4h, #0
-; NEON-NOSVE-NEXT: sshll v1.4s, v1.4h, #0
-; NEON-NOSVE-NEXT: sshll v0.4s, v0.4h, #0
-; NEON-NOSVE-NEXT: sdivr z2.s, p0/m, z2.s, z3.s
-; NEON-NOSVE-NEXT: ldr q3, [x0]
-; NEON-NOSVE-NEXT: sshll2 v6.4s, v3.8h, #0
-; NEON-NOSVE-NEXT: sshll v3.4s, v3.4h, #0
-; NEON-NOSVE-NEXT: sdivr z5.s, p0/m, z5.s, z6.s
-; NEON-NOSVE-NEXT: sdiv z3.s, p0/m, z3.s, z4.s
-; NEON-NOSVE-NEXT: sdiv z0.s, p0/m, z0.s, z1.s
-; NEON-NOSVE-NEXT: uzp1 v1.8h, v3.8h, v5.8h
-; NEON-NOSVE-NEXT: uzp1 v0.8h, v0.8h, v2.8h
-; NEON-NOSVE-NEXT: stp q1, q0, [x0]
-; NEON-NOSVE-NEXT: ret
-;
; NONEON-NOSVE-LABEL: sdiv_v16i16:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #96
@@ -876,15 +721,6 @@ define <2 x i32> @sdiv_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
-; NEON-NOSVE-LABEL: sdiv_v2i32:
-; NEON-NOSVE: // %bb.0:
-; NEON-NOSVE-NEXT: ptrue p0.s, vl2
-; NEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $z0
-; NEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $z1
-; NEON-NOSVE-NEXT: sdiv z0.s, p0/m, z0.s, z1.s
-; NEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $z0
-; NEON-NOSVE-NEXT: ret
-;
; NONEON-NOSVE-LABEL: sdiv_v2i32:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #32
@@ -913,15 +749,6 @@ define <4 x i32> @sdiv_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
-; NEON-NOSVE-LABEL: sdiv_v4i32:
-; NEON-NOSVE: // %bb.0:
-; NEON-NOSVE-NEXT: ptrue p0.s, vl4
-; NEON-NOSVE-NEXT: // kill: def $q0 killed $q0 def $z0
-; NEON-NOSVE-NEXT: // kill: def $q1 killed $q1 def $z1
-; NEON-NOSVE-NEXT: sdiv z0.s, p0/m, z0.s, z1.s
-; NEON-NOSVE-NEXT: // kill: def $q0 killed $q0 killed $z0
-; NEON-NOSVE-NEXT: ret
-;
; NONEON-NOSVE-LABEL: sdiv_v4i32:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]!
@@ -957,17 +784,6 @@ define void @sdiv_v8i32(ptr %a, ptr %b) {
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
;
-; NEON-NOSVE-LABEL: sdiv_v8i32:
-; NEON-NOSVE: // %bb.0:
-; NEON-NOSVE-NEXT: ldp q0, q3, [x1]
-; NEON-NOSVE-NEXT: ptrue p0.s, vl4
-; NEON-NOSVE-NEXT: ldp q1, q2, [x0]
-; NEON-NOSVE-NEXT: sdivr z0.s, p0/m, z0.s, z1.s
-; NEON-NOSVE-NEXT: movprfx z1, z2
-; NEON-NOSVE-NEXT: sdiv z1.s, p0/m, z1.s, z3.s
-; NEON-NOSVE-NEXT: stp q0, q1, [x0]
-; NEON-NOSVE-NEXT: ret
-;
; NONEON-NOSVE-LABEL: sdiv_v8i32:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #96
@@ -1021,15 +837,6 @@ define <1 x i64> @sdiv_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
-; NEON-NOSVE-LABEL: sdiv_v1i64:
-; NEON-NOSVE: // %bb.0:
-; NEON-NOSVE-NEXT: ptrue p0.d, vl1
-; NEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $z0
-; NEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $z1
-; NEON-NOSVE-NEXT: sdiv z0.d, p0/m, z0.d, z1.d
-; NEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $z0
-; NEON-NOSVE-NEXT: ret
-;
; NONEON-NOSVE-LABEL: sdiv_v1i64:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #16
@@ -1055,15 +862,6 @@ define <2 x i64> @sdiv_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
-; NEON-NOSVE-LABEL: sdiv_v2i64:
-; NEON-NOSVE: // %bb.0:
-; NEON-NOSVE-NEXT: ptrue p0.d, vl2
-; NEON-NOSVE-NEXT: // kill: def $q0 killed $q0 def $z0
-; NEON-NOSVE-NEXT: // kill: def $q1 killed $q1 def $z1
-; NEON-NOSVE-NEXT: sdiv z0.d, p0/m, z0.d, z1.d
-; NEON-NOSVE-NEXT: // kill: def $q0 killed $q0 killed $z0
-; NEON-NOSVE-NEXT: ret
-;
; NONEON-NOSVE-LABEL: sdiv_v2i64:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]!
@@ -1093,17 +891,6 @@ define void @sdiv_v4i64(ptr %a, ptr %b) {
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
;
-; NEON-NOSVE-LABEL: sdiv_v4i64:
-; NEON-NOSVE: // %bb.0:
-; NEON-NOSVE-NEXT: ldp q0, q3, [x1]
-; NEON-NOSVE-NEXT: ptrue p0.d, vl2
-; NEON-NOSVE-NEXT: ldp q1, q2, [x0]
-; NEON-NOSVE-NEXT: sdivr z0.d, p0/m, z0.d, z1.d
-; NEON-NOSVE-NEXT: movprfx z1, z2
-; NEON-NOSVE-NEXT: sdiv z1.d, p0/m, z1.d, z3.d
-; NEON-NOSVE-NEXT: stp q0, q1, [x0]
-; NEON-NOSVE-NEXT: ret
-;
; NONEON-NOSVE-LABEL: sdiv_v4i64:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #96
@@ -1135,9 +922,7 @@ define void @sdiv_v4i64(ptr %a, ptr %b) {
ret void
}
-;
; UDIV
-;
define <4 x i8> @udiv_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
; CHECK-LABEL: udiv_v4i8:
@@ -1154,17 +939,6 @@ define <4 x i8> @udiv_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
-; NEON-NOSVE-LABEL: udiv_v4i8:
-; NEON-NOSVE: // %bb.0:
-; NEON-NOSVE-NEXT: bic v0.4h, #255, lsl #8
-; NEON-NOSVE-NEXT: bic v1.4h, #255, lsl #8
-; NEON-NOSVE-NEXT: ptrue p0.s, vl4
-; NEON-NOSVE-NEXT: ushll v1.4s, v1.4h, #0
-; NEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0
-; NEON-NOSVE-NEXT: udiv z0.s, p0/m, z0.s, z1.s
-; NEON-NOSVE-NEXT: xtn v0.4h, v0.4s
-; NEON-NOSVE-NEXT: ret
-;
; NONEON-NOSVE-LABEL: udiv_v4i8:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #32
@@ -1211,27 +985,12 @@ define <8 x i8> @udiv_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
; CHECK-NEXT: udiv z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ptrue p0.h, vl4
; CHECK-NEXT: uzp1 z1.h, z2.h, z2.h
-; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT: splice z1.h, p0, z1.h, z0.h
-; CHECK-NEXT: uzp1 z0.b, z1.b, z1.b
+; CHECK-NEXT: uzp1 z2.h, z0.h, z0.h
+; CHECK-NEXT: splice z0.h, p0, { z1.h, z2.h }
+; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
-; NEON-NOSVE-LABEL: udiv_v8i8:
-; NEON-NOSVE: // %bb.0:
-; NEON-NOSVE-NEXT: ushll v1.8h, v1.8b, #0
-; NEON-NOSVE-NEXT: ushll v0.8h, v0.8b, #0
-; NEON-NOSVE-NEXT: ptrue p0.s, vl4
-; NEON-NOSVE-NEXT: ushll2 v2.4s, v1.8h, #0
-; NEON-NOSVE-NEXT: ushll2 v3.4s, v0.8h, #0
-; NEON-NOSVE-NEXT: ushll v1.4s, v1.4h, #0
-; NEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0
-; NEON-NOSVE-NEXT: udivr z2.s, p0/m, z2.s, z3.s
-; NEON-NOSVE-NEXT: udiv z0.s, p0/m, z0.s, z1.s
-; NEON-NOSVE-NEXT: uzp1 v0.8h, v0.8h, v2.8h
-; NEON-NOSVE-NEXT: xtn v0.8b, v0.8h
-; NEON-NOSVE-NEXT: ret
-;
; NONEON-NOSVE-LABEL: udiv_v8i8:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #32
@@ -1303,45 +1062,21 @@ define <16 x i8> @udiv_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8
; CHECK-NEXT: uunpklo z1.s, z1.h
; CHECK-NEXT: udivr z3.s, p0/m, z3.s, z5.s
-; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h
+; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h
+; CHECK-NEXT: uzp1 z5.h, z2.h, z2.h
; CHECK-NEXT: udiv z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: uzp1 z1.h, z4.h, z4.h
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h
-; CHECK-NEXT: splice z1.h, p0, z1.h, z2.h
-; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT: splice z3.h, p0, z3.h, z0.h
-; CHECK-NEXT: uzp1 z0.b, z1.b, z1.b
+; CHECK-NEXT: uzp1 z1.h, z3.h, z3.h
+; CHECK-NEXT: uzp1 z2.h, z0.h, z0.h
+; CHECK-NEXT: splice z0.h, p0, { z4.h, z5.h }
+; CHECK-NEXT: splice z1.h, p0, { z1.h, z2.h }
; CHECK-NEXT: ptrue p0.b, vl8
-; CHECK-NEXT: uzp1 z1.b, z3.b, z3.b
-; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b
+; CHECK-NEXT: uzp1 z2.b, z0.b, z0.b
+; CHECK-NEXT: uzp1 z3.b, z1.b, z1.b
+; CHECK-NEXT: splice z0.b, p0, { z2.b, z3.b }
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
-; NEON-NOSVE-LABEL: udiv_v16i8:
-; NEON-NOSVE: // %bb.0:
-; NEON-NOSVE-NEXT: ushll2 v2.8h, v1.16b, #0
-; NEON-NOSVE-NEXT: ushll2 v3.8h, v0.16b, #0
-; NEON-NOSVE-NEXT: ushll v1.8h, v1.8b, #0
-; NEON-NOSVE-NEXT: ushll v0.8h, v0.8b, #0
-; NEON-NOSVE-NEXT: ptrue p0.s, vl4
-; NEON-NOSVE-NEXT: ushll2 v4.4s, v2.8h, #0
-; NEON-NOSVE-NEXT: ushll2 v5.4s, v3.8h, #0
-; NEON-NOSVE-NEXT: ushll v2.4s, v2.4h, #0
-; NEON-NOSVE-NEXT: ushll v3.4s, v3.4h, #0
-; NEON-NOSVE-NEXT: udivr z4.s, p0/m, z4.s, z5.s
-; NEON-NOSVE-NEXT: ushll2 v5.4s, v0.8h, #0
-; NEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0
-; NEON-NOSVE-NEXT: udivr z2.s, p0/m, z2.s, z3.s
-; NEON-NOSVE-NEXT: ushll2 v3.4s, v1.8h, #0
-; NEON-NOSVE-NEXT: ushll v1.4s, v1.4h, #0
-; NEON-NOSVE-NEXT: udivr z3.s, p0/m, z3.s, z5.s
-; NEON-NOSVE-NEXT: udiv z0.s, p0/m, z0.s, z1.s
-; NEON-NOSVE-NEXT: uzp1 v1.8h, v2.8h, v4.8h
-; NEON-NOSVE-NEXT: uzp1 v0.8h, v0.8h, v3.8h
-; NEON-NOSVE-NEXT: uzp1 v0.16b, v0.16b, v1.16b
-; NEON-NOSVE-NEXT: ret
-;
; NONEON-NOSVE-LABEL: udiv_v16i8:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]!
@@ -1445,7 +1180,6 @@ define void @udiv_v32i8(ptr %a, ptr %b) {
; CHECK-NEXT: uunpklo z4.h, z2.b
; CHECK-NEXT: uunpklo z2.s, z3.h
; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8
-; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: uunpklo z5.s, z4.h
; CHECK-NEXT: ext z4.b, z4.b, z4.b, #8
; CHECK-NEXT: uunpklo z3.s, z3.h
@@ -1454,7 +1188,6 @@ define void @udiv_v32i8(ptr %a, ptr %b) {
; CHECK-NEXT: ldr q5, [x0]
; CHECK-NEXT: uunpklo z16.h, z5.b
; CHECK-NEXT: ext z5.b, z5.b, z5.b, #8
-; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h
; CHECK-NEXT: uunpklo z5.h, z5.b
; CHECK-NEXT: uunpklo z18.s, z16.h
; CHECK-NEXT: ext z16.b, z16.b, z16.b, #8
@@ -1463,81 +1196,36 @@ define void @udiv_v32i8(ptr %a, ptr %b) {
; CHECK-NEXT: uunpklo z18.s, z5.h
; CHECK-NEXT: ext z5.b, z5.b, z5.b, #8
; CHECK-NEXT: uunpklo z5.s, z5.h
-; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h
; CHECK-NEXT: udivr z7.s, p0/m, z7.s, z16.s
; CHECK-NEXT: uunpklo z16.s, z6.h
; CHECK-NEXT: ext z6.b, z6.b, z6.b, #8
; CHECK-NEXT: uunpklo z6.s, z6.h
+; CHECK-NEXT: uzp1 z20.h, z17.h, z17.h
; CHECK-NEXT: udivr z16.s, p0/m, z16.s, z18.s
+; CHECK-NEXT: uzp1 z18.h, z0.h, z0.h
+; CHECK-NEXT: uzp1 z19.h, z1.h, z1.h
+; CHECK-NEXT: uzp1 z21.h, z7.h, z7.h
; CHECK-NEXT: udiv z5.s, p0/m, z5.s, z6.s
-; CHECK-NEXT: uzp1 z6.h, z7.h, z7.h
-; CHECK-NEXT: uzp1 z7.h, z16.h, z16.h
+; CHECK-NEXT: uzp1 z0.h, z16.h, z16.h
; CHECK-NEXT: udivr z3.s, p0/m, z3.s, z4.s
-; CHECK-NEXT: uzp1 z4.h, z17.h, z17.h
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h
-; CHECK-NEXT: uzp1 z5.h, z5.h, z5.h
-; CHECK-NEXT: splice z4.h, p0, z4.h, z6.h
-; CHECK-NEXT: splice z7.h, p0, z7.h, z5.h
-; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
-; CHECK-NEXT: uzp1 z1.b, z4.b, z4.b
-; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h
-; CHECK-NEXT: splice z2.h, p0, z2.h, z3.h
-; CHECK-NEXT: uzp1 z3.b, z7.b, z7.b
+; CHECK-NEXT: uzp1 z1.h, z5.h, z5.h
+; CHECK-NEXT: uzp1 z4.h, z2.h, z2.h
+; CHECK-NEXT: splice z2.h, p0, { z20.h, z21.h }
+; CHECK-NEXT: splice z0.h, p0, { z0.h, z1.h }
+; CHECK-NEXT: uzp1 z5.h, z3.h, z3.h
+; CHECK-NEXT: splice z3.h, p0, { z18.h, z19.h }
+; CHECK-NEXT: splice z1.h, p0, { z4.h, z5.h }
+; CHECK-NEXT: uzp1 z4.b, z2.b, z2.b
; CHECK-NEXT: ptrue p0.b, vl8
-; CHECK-NEXT: splice z1.b, p0, z1.b, z3.b
-; CHECK-NEXT: uzp1 z2.b, z2.b, z2.b
-; CHECK-NEXT: splice z0.b, p0, z0.b, z2.b
-; CHECK-NEXT: stp q1, q0, [x0]
+; CHECK-NEXT: uzp1 z2.b, z3.b, z3.b
+; CHECK-NEXT: uzp1 z5.b, z0.b, z0.b
+; CHECK-NEXT: uzp1 z3.b, z1.b, z1.b
+; CHECK-NEXT: splice z0.b, p0, { z4.b, z5.b }
+; CHECK-NEXT: splice z1.b, p0, { z2.b, z3.b }
+; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
;
-; NEON-NOSVE-LABEL: udiv_v32i8:
-; NEON-NOSVE: // %bb.0:
-; NEON-NOSVE-NEXT: ldp q6, q3, [x1]
-; NEON-NOSVE-NEXT: ptrue p0.s, vl4
-; NEON-NOSVE-NEXT: ldr q2, [x0, #16]
-; NEON-NOSVE-NEXT: ushll2 v1.8h, v3.16b, #0
-; NEON-NOSVE-NEXT: ushll2 v4.8h, v2.16b, #0
-; NEON-NOSVE-NEXT: ushll v3.8h, v3.8b, #0
-; NEON-NOSVE-NEXT: ushll v2.8h, v2.8b, #0
-; NEON-NOSVE-NEXT: ushll2 v7.8h, v6.16b, #0
-; NEON-NOSVE-NEXT: ushll v6.8h, v6.8b, #0
-; NEON-NOSVE-NEXT: ushll2 v0.4s, v1.8h, #0
-; NEON-NOSVE-NEXT: ushll2 v5.4s, v4.8h, #0
-; NEON-NOSVE-NEXT: ushll v1.4s, v1.4h, #0
-; NEON-NOSVE-NEXT: ushll v4.4s, v4.4h, #0
-; NEON-NOSVE-NEXT: ushll2 v17.4s, v7.8h, #0
-; NEON-NOSVE-NEXT: ushll v7.4s, v7.4h, #0
-; NEON-NOSVE-NEXT: udivr z0.s, p0/m, z0.s, z5.s
-; NEON-NOSVE-NEXT: ushll2 v5.4s, v2.8h, #0
-; NEON-NOSVE-NEXT: ushll v2.4s, v2.4h, #0
-; NEON-NOSVE-NEXT: udivr z1.s, p0/m, z1.s, z4.s
-; NEON-NOSVE-NEXT: ushll2 v4.4s, v3.8h, #0
-; NEON-NOSVE-NEXT: ushll v3.4s, v3.4h, #0
-; NEON-NOSVE-NEXT: udivr z4.s, p0/m, z4.s, z5.s
-; NEON-NOSVE-NEXT: ldr q5, [x0]
-; NEON-NOSVE-NEXT: ushll2 v16.8h, v5.16b, #0
-; NEON-NOSVE-NEXT: ushll v5.8h, v5.8b, #0
-; NEON-NOSVE-NEXT: uzp1 v0.8h, v1.8h, v0.8h
-; NEON-NOSVE-NEXT: ushll2 v18.4s, v16.8h, #0
-; NEON-NOSVE-NEXT: ushll v16.4s, v16.4h, #0
-; NEON-NOSVE-NEXT: udivr z17.s, p0/m, z17.s, z18.s
-; NEON-NOSVE-NEXT: ushll2 v18.4s, v5.8h, #0
-; NEON-NOSVE-NEXT: ushll v5.4s, v5.4h, #0
-; NEON-NOSVE-NEXT: udivr z7.s, p0/m, z7.s, z16.s
-; NEON-NOSVE-NEXT: ushll2 v16.4s, v6.8h, #0
-; NEON-NOSVE-NEXT: ushll v6.4s, v6.4h, #0
-; NEON-NOSVE-NEXT: udivr z16.s, p0/m, z16.s, z18.s
-; NEON-NOSVE-NEXT: udiv z5.s, p0/m, z5.s, z6.s
-; NEON-NOSVE-NEXT: udiv z2.s, p0/m, z2.s, z3.s
-; NEON-NOSVE-NEXT: uzp1 v3.8h, v7.8h, v17.8h
-; NEON-NOSVE-NEXT: uzp1 v5.8h, v5.8h, v16.8h
-; NEON-NOSVE-NEXT: uzp1 v1.8h, v2.8h, v4.8h
-; NEON-NOSVE-NEXT: uzp1 v2.16b, v5.16b, v3.16b
-; NEON-NOSVE-NEXT: uzp1 v0.16b, v1.16b, v0.16b
-; NEON-NOSVE-NEXT: stp q2, q0, [x0]
-; NEON-NOSVE-NEXT: ret
-;
; NONEON-NOSVE-LABEL: udiv_v32i8:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #96
@@ -1697,16 +1385,6 @@ define <2 x i16> @udiv_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
-; NEON-NOSVE-LABEL: udiv_v2i16:
-; NEON-NOSVE: // %bb.0:
-; NEON-NOSVE-NEXT: movi d2, #0x00ffff0000ffff
-; NEON-NOSVE-NEXT: ptrue p0.s, vl2
-; NEON-NOSVE-NEXT: and v1.8b, v1.8b, v2.8b
-; NEON-NOSVE-NEXT: and v0.8b, v0.8b, v2.8b
-; NEON-NOSVE-NEXT: udiv z0.s, p0/m, z0.s, z1.s
-; NEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $z0
-; NEON-NOSVE-NEXT: ret
-;
; NONEON-NOSVE-LABEL: udiv_v2i16:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #32
@@ -1739,15 +1417,6 @@ define <4 x i16> @udiv_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
-; NEON-NOSVE-LABEL: udiv_v4i16:
-; NEON-NOSVE: // %bb.0:
-; NEON-NOSVE-NEXT: ushll v1.4s, v1.4h, #0
-; NEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0
-; NEON-NOSVE-NEXT: ptrue p0.s, vl4
-; NEON-NOSVE-NEXT: udiv z0.s, p0/m, z0.s, z1.s
-; NEON-NOSVE-NEXT: xtn v0.4h, v0.4s
-; NEON-NOSVE-NEXT: ret
-;
; NONEON-NOSVE-LABEL: udiv_v4i16:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #32
@@ -1789,26 +1458,14 @@ define <8 x i16> @udiv_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
; CHECK-NEXT: uunpklo z1.s, z1.h
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: udivr z2.s, p0/m, z2.s, z3.s
-; CHECK-NEXT: udivr z1.s, p0/m, z1.s, z0.s
+; CHECK-NEXT: udiv z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: uzp1 z0.h, z2.h, z2.h
-; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h
-; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h
+; CHECK-NEXT: uzp1 z1.h, z2.h, z2.h
+; CHECK-NEXT: uzp1 z2.h, z0.h, z0.h
+; CHECK-NEXT: splice z0.h, p0, { z1.h, z2.h }
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
-; NEON-NOSVE-LABEL: udiv_v8i16:
-; NEON-NOSVE: // %bb.0:
-; NEON-NOSVE-NEXT: ushll2 v2.4s, v1.8h, #0
-; NEON-NOSVE-NEXT: ushll2 v3.4s, v0.8h, #0
-; NEON-NOSVE-NEXT: ushll v1.4s, v1.4h, #0
-; NEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0
-; NEON-NOSVE-NEXT: ptrue p0.s, vl4
-; NEON-NOSVE-NEXT: udivr z2.s, p0/m, z2.s, z3.s
-; NEON-NOSVE-NEXT: udiv z0.s, p0/m, z0.s, z1.s
-; NEON-NOSVE-NEXT: uzp1 v0.8h, v0.8h, v2.8h
-; NEON-NOSVE-NEXT: ret
-;
; NONEON-NOSVE-LABEL: udiv_v8i16:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]!
@@ -1873,41 +1530,18 @@ define void @udiv_v16i16(ptr %a, ptr %b) {
; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8
; CHECK-NEXT: uunpklo z3.s, z3.h
; CHECK-NEXT: udivr z5.s, p0/m, z5.s, z6.s
-; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h
; CHECK-NEXT: udiv z3.s, p0/m, z3.s, z4.s
+; CHECK-NEXT: uzp1 z4.h, z5.h, z5.h
; CHECK-NEXT: udiv z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: uzp1 z1.h, z5.h, z5.h
+; CHECK-NEXT: uzp1 z1.h, z2.h, z2.h
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h
-; CHECK-NEXT: splice z1.h, p0, z1.h, z3.h
-; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT: splice z2.h, p0, z2.h, z0.h
-; CHECK-NEXT: stp q1, q2, [x0]
+; CHECK-NEXT: uzp1 z5.h, z3.h, z3.h
+; CHECK-NEXT: uzp1 z2.h, z0.h, z0.h
+; CHECK-NEXT: splice z0.h, p0, { z4.h, z5.h }
+; CHECK-NEXT: splice z1.h, p0, { z1.h, z2.h }
+; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
;
-; NEON-NOSVE-LABEL: udiv_v16i16:
-; NEON-NOSVE: // %bb.0:
-; NEON-NOSVE-NEXT: ldp q4, q1, [x1]
-; NEON-NOSVE-NEXT: ptrue p0.s, vl4
-; NEON-NOSVE-NEXT: ldr q0, [x0, #16]
-; NEON-NOSVE-NEXT: ushll2 v2.4s, v1.8h, #0
-; NEON-NOSVE-NEXT: ushll2 v3.4s, v0.8h, #0
-; NEON-NOSVE-NEXT: ushll2 v5.4s, v4.8h, #0
-; NEON-NOSVE-NEXT: ushll v4.4s, v4.4h, #0
-; NEON-NOSVE-NEXT: ushll v1.4s, v1.4h, #0
-; NEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0
-; NEON-NOSVE-NEXT: udivr z2.s, p0/m, z2.s, z3.s
-; NEON-NOSVE-NEXT: ldr q3, [x0]
-; NEON-NOSVE-NEXT: ushll2 v6.4s, v3.8h, #0
-; NEON-NOSVE-NEXT: ushll v3.4s, v3.4h, #0
-; NEON-NOSVE-NEXT: udivr z5.s, p0/m, z5.s, z6.s
-; NEON-NOSVE-NEXT: udiv z3.s, p0/m, z3.s, z4.s
-; NEON-NOSVE-NEXT: udiv z0.s, p0/m, z0.s, z1.s
-; NEON-NOSVE-NEXT: uzp1 v1.8h, v3.8h, v5.8h
-; NEON-NOSVE-NEXT: uzp1 v0.8h, v0.8h, v2.8h
-; NEON-NOSVE-NEXT: stp q1, q0, [x0]
-; NEON-NOSVE-NEXT: ret
-;
; NONEON-NOSVE-LABEL: udiv_v16i16:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #96
@@ -2001,15 +1635,6 @@ define <2 x i32> @udiv_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
-; NEON-NOSVE-LABEL: udiv_v2i32:
-; NEON-NOSVE: // %bb.0:
-; NEON-NOSVE-NEXT: ptrue p0.s, vl2
-; NEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $z0
-; NEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $z1
-; NEON-NOSVE-NEXT: udiv z0.s, p0/m, z0.s, z1.s
-; NEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $z0
-; NEON-NOSVE-NEXT: ret
-;
; NONEON-NOSVE-LABEL: udiv_v2i32:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #32
@@ -2038,15 +1663,6 @@ define <4 x i32> @udiv_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
-; NEON-NOSVE-LABEL: udiv_v4i32:
-; NEON-NOSVE: // %bb.0:
-; NEON-NOSVE-NEXT: ptrue p0.s, vl4
-; NEON-NOSVE-NEXT: // kill: def $q0 killed $q0 def $z0
-; NEON-NOSVE-NEXT: // kill: def $q1 killed $q1 def $z1
-; NEON-NOSVE-NEXT: udiv z0.s, p0/m, z0.s, z1.s
-; NEON-NOSVE-NEXT: // kill: def $q0 killed $q0 killed $z0
-; NEON-NOSVE-NEXT: ret
-;
; NONEON-NOSVE-LABEL: udiv_v4i32:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]!
@@ -2082,17 +1698,6 @@ define void @udiv_v8i32(ptr %a, ptr %b) {
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
;
-; NEON-NOSVE-LABEL: udiv_v8i32:
-; NEON-NOSVE: // %bb.0:
-; NEON-NOSVE-NEXT: ldp q0, q3, [x1]
-; NEON-NOSVE-NEXT: ptrue p0.s, vl4
-; NEON-NOSVE-NEXT: ldp q1, q2, [x0]
-; NEON-NOSVE-NEXT: udivr z0.s, p0/m, z0.s, z1.s
-; NEON-NOSVE-NEXT: movprfx z1, z2
-; NEON-NOSVE-NEXT: udiv z1.s, p0/m, z1.s, z3.s
-; NEON-NOSVE-NEXT: stp q0, q1, [x0]
-; NEON-NOSVE-NEXT: ret
-;
; NONEON-NOSVE-LABEL: udiv_v8i32:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #96
@@ -2146,15 +1751,6 @@ define <1 x i64> @udiv_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
-; NEON-NOSVE-LABEL: udiv_v1i64:
-; NEON-NOSVE: // %bb.0:
-; NEON-NOSVE-NEXT: ptrue p0.d, vl1
-; NEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $z0
-; NEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $z1
-; NEON-NOSVE-NEXT: udiv z0.d, p0/m, z0.d, z1.d
-; NEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $z0
-; NEON-NOSVE-NEXT: ret
-;
; NONEON-NOSVE-LABEL: udiv_v1i64:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #16
@@ -2180,15 +1776,6 @@ define <2 x i64> @udiv_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
-; NEON-NOSVE-LABEL: udiv_v2i64:
-; NEON-NOSVE: // %bb.0:
-; NEON-NOSVE-NEXT: ptrue p0.d, vl2
-; NEON-NOSVE-NEXT: // kill: def $q0 killed $q0 def $z0
-; NEON-NOSVE-NEXT: // kill: def $q1 killed $q1 def $z1
-; NEON-NOSVE-NEXT: udiv z0.d, p0/m, z0.d, z1.d
-; NEON-NOSVE-NEXT: // kill: def $q0 killed $q0 killed $z0
-; NEON-NOSVE-NEXT: ret
-;
; NONEON-NOSVE-LABEL: udiv_v2i64:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]!
@@ -2218,17 +1805,6 @@ define void @udiv_v4i64(ptr %a, ptr %b) {
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
;
-; NEON-NOSVE-LABEL: udiv_v4i64:
-; NEON-NOSVE: // %bb.0:
-; NEON-NOSVE-NEXT: ldp q0, q3, [x1]
-; NEON-NOSVE-NEXT: ptrue p0.d, vl2
-; NEON-NOSVE-NEXT: ldp q1, q2, [x0]
-; NEON-NOSVE-NEXT: udivr z0.d, p0/m, z0.d, z1.d
-; NEON-NOSVE-NEXT: movprfx z1, z2
-; NEON-NOSVE-NEXT: udiv z1.d, p0/m, z1.d, z3.d
-; NEON-NOSVE-NEXT: stp q0, q1, [x0]
-; NEON-NOSVE-NEXT: ret
-;
; NONEON-NOSVE-LABEL: udiv_v4i64:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #96
@@ -2261,64 +1837,22 @@ define void @udiv_v4i64(ptr %a, ptr %b) {
}
define void @udiv_constantsplat_v8i32(ptr %a) {
-; SVE-LABEL: udiv_constantsplat_v8i32:
-; SVE: // %bb.0:
-; SVE-NEXT: mov w8, #8969 // =0x2309
-; SVE-NEXT: ldp q1, q2, [x0]
-; SVE-NEXT: movk w8, #22765, lsl #16
-; SVE-NEXT: ptrue p0.s, vl4
-; SVE-NEXT: mov z0.s, w8
-; SVE-NEXT: movprfx z3, z1
-; SVE-NEXT: umulh z3.s, p0/m, z3.s, z0.s
-; SVE-NEXT: umulh z0.s, p0/m, z0.s, z2.s
-; SVE-NEXT: sub z1.s, z1.s, z3.s
-; SVE-NEXT: sub z2.s, z2.s, z0.s
-; SVE-NEXT: lsr z1.s, z1.s, #1
-; SVE-NEXT: lsr z2.s, z2.s, #1
-; SVE-NEXT: add z1.s, z1.s, z3.s
-; SVE-NEXT: add z0.s, z2.s, z0.s
-; SVE-NEXT: lsr z1.s, z1.s, #6
-; SVE-NEXT: lsr z0.s, z0.s, #6
-; SVE-NEXT: stp q1, q0, [x0]
-; SVE-NEXT: ret
-;
-; SVE2-LABEL: udiv_constantsplat_v8i32:
-; SVE2: // %bb.0:
-; SVE2-NEXT: mov w8, #8969 // =0x2309
-; SVE2-NEXT: ldp q1, q2, [x0]
-; SVE2-NEXT: movk w8, #22765, lsl #16
-; SVE2-NEXT: mov z0.s, w8
-; SVE2-NEXT: umulh z3.s, z1.s, z0.s
-; SVE2-NEXT: umulh z0.s, z2.s, z0.s
-; SVE2-NEXT: sub z1.s, z1.s, z3.s
-; SVE2-NEXT: sub z2.s, z2.s, z0.s
-; SVE2-NEXT: usra z3.s, z1.s, #1
-; SVE2-NEXT: usra z0.s, z2.s, #1
-; SVE2-NEXT: lsr z1.s, z3.s, #6
-; SVE2-NEXT: lsr z0.s, z0.s, #6
-; SVE2-NEXT: stp q1, q0, [x0]
-; SVE2-NEXT: ret
-;
-; NEON-NOSVE-LABEL: udiv_constantsplat_v8i32:
-; NEON-NOSVE: // %bb.0:
-; NEON-NOSVE-NEXT: mov w8, #8969 // =0x2309
-; NEON-NOSVE-NEXT: ldp q1, q2, [x0]
-; NEON-NOSVE-NEXT: movk w8, #22765, lsl #16
-; NEON-NOSVE-NEXT: dup v0.4s, w8
-; NEON-NOSVE-NEXT: umull2 v3.2d, v1.4s, v0.4s
-; NEON-NOSVE-NEXT: umull v4.2d, v1.2s, v0.2s
-; NEON-NOSVE-NEXT: umull2 v5.2d, v2.4s, v0.4s
-; NEON-NOSVE-NEXT: umull v0.2d, v2.2s, v0.2s
-; NEON-NOSVE-NEXT: uzp2 v3.4s, v4.4s, v3.4s
-; NEON-NOSVE-NEXT: uzp2 v0.4s, v0.4s, v5.4s
-; NEON-NOSVE-NEXT: sub v1.4s, v1.4s, v3.4s
-; NEON-NOSVE-NEXT: sub v2.4s, v2.4s, v0.4s
-; NEON-NOSVE-NEXT: usra v3.4s, v1.4s, #1
-; NEON-NOSVE-NEXT: usra v0.4s, v2.4s, #1
-; NEON-NOSVE-NEXT: ushr v1.4s, v3.4s, #6
-; NEON-NOSVE-NEXT: ushr v0.4s, v0.4s, #6
-; NEON-NOSVE-NEXT: stp q1, q0, [x0]
-; NEON-NOSVE-NEXT: ret
+; CHECK-LABEL: udiv_constantsplat_v8i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #8969 // =0x2309
+; CHECK-NEXT: ldp q1, q2, [x0]
+; CHECK-NEXT: movk w8, #22765, lsl #16
+; CHECK-NEXT: mov z0.s, w8
+; CHECK-NEXT: umulh z3.s, z1.s, z0.s
+; CHECK-NEXT: umulh z0.s, z2.s, z0.s
+; CHECK-NEXT: sub z1.s, z1.s, z3.s
+; CHECK-NEXT: sub z2.s, z2.s, z0.s
+; CHECK-NEXT: usra z3.s, z1.s, #1
+; CHECK-NEXT: usra z0.s, z2.s, #1
+; CHECK-NEXT: lsr z1.s, z3.s, #6
+; CHECK-NEXT: lsr z0.s, z0.s, #6
+; CHECK-NEXT: stp q1, q0, [x0]
+; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: udiv_constantsplat_v8i32:
; NONEON-NOSVE: // %bb.0:
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll
index b4641172f8b06d..9497ec88e57b4d 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s
; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
@@ -76,10 +76,10 @@ define <8 x i8> @srem_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
; CHECK-NEXT: sdivr z2.s, p0/m, z2.s, z3.s
; CHECK-NEXT: ptrue p0.h, vl4
; CHECK-NEXT: uzp1 z3.h, z4.h, z4.h
-; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h
-; CHECK-NEXT: splice z3.h, p0, z3.h, z2.h
+; CHECK-NEXT: uzp1 z4.h, z2.h, z2.h
+; CHECK-NEXT: splice z2.h, p0, { z3.h, z4.h }
; CHECK-NEXT: ptrue p0.b, vl8
-; CHECK-NEXT: uzp1 z2.b, z3.b, z3.b
+; CHECK-NEXT: uzp1 z2.b, z2.b, z2.b
; CHECK-NEXT: mls z0.b, p0/m, z2.b, z1.b
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
@@ -160,23 +160,23 @@ define <16 x i8> @srem_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
; CHECK-NEXT: mov z3.d, z1.d
; CHECK-NEXT: sunpklo z5.s, z5.h
; CHECK-NEXT: ext z3.b, z3.b, z1.b, #8
-; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h
; CHECK-NEXT: sunpklo z3.h, z3.b
; CHECK-NEXT: sunpklo z6.s, z3.h
; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8
; CHECK-NEXT: sunpklo z3.s, z3.h
; CHECK-NEXT: sdivr z6.s, p0/m, z6.s, z7.s
-; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h
; CHECK-NEXT: sdivr z3.s, p0/m, z3.s, z5.s
+; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: splice z4.h, p0, z4.h, z2.h
-; CHECK-NEXT: uzp1 z5.h, z6.h, z6.h
-; CHECK-NEXT: uzp1 z2.b, z4.b, z4.b
-; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h
-; CHECK-NEXT: splice z5.h, p0, z5.h, z3.h
+; CHECK-NEXT: uzp1 z5.h, z2.h, z2.h
+; CHECK-NEXT: uzp1 z6.h, z6.h, z6.h
+; CHECK-NEXT: splice z2.h, p0, { z4.h, z5.h }
+; CHECK-NEXT: uzp1 z4.b, z2.b, z2.b
+; CHECK-NEXT: uzp1 z7.h, z3.h, z3.h
+; CHECK-NEXT: splice z3.h, p0, { z6.h, z7.h }
; CHECK-NEXT: ptrue p0.b, vl8
-; CHECK-NEXT: uzp1 z3.b, z5.b, z5.b
-; CHECK-NEXT: splice z2.b, p0, z2.b, z3.b
+; CHECK-NEXT: uzp1 z5.b, z3.b, z3.b
+; CHECK-NEXT: splice z2.b, p0, { z4.b, z5.b }
; CHECK-NEXT: ptrue p0.b, vl16
; CHECK-NEXT: mls z0.b, p0/m, z2.b, z1.b
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
@@ -300,14 +300,12 @@ define void @srem_v32i8(ptr %a, ptr %b) {
; CHECK-NEXT: sunpklo z4.s, z16.h
; CHECK-NEXT: ext z7.b, z7.b, z7.b, #8
; CHECK-NEXT: ext z16.b, z16.b, z16.b, #8
-; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h
; CHECK-NEXT: sunpklo z7.s, z7.h
; CHECK-NEXT: movprfx z6, z4
; CHECK-NEXT: sdiv z6.s, p0/m, z6.s, z3.s
; CHECK-NEXT: ldr q3, [x0]
; CHECK-NEXT: ldr q4, [x1]
; CHECK-NEXT: sunpklo z16.s, z16.h
-; CHECK-NEXT: uzp1 z5.h, z5.h, z5.h
; CHECK-NEXT: sunpklo z17.h, z4.b
; CHECK-NEXT: sunpklo z18.h, z3.b
; CHECK-NEXT: sdivr z7.s, p0/m, z7.s, z16.s
@@ -317,11 +315,9 @@ define void @srem_v32i8(ptr %a, ptr %b) {
; CHECK-NEXT: ext z18.b, z18.b, z18.b, #8
; CHECK-NEXT: sunpklo z17.s, z17.h
; CHECK-NEXT: sunpklo z18.s, z18.h
-; CHECK-NEXT: uzp1 z6.h, z6.h, z6.h
; CHECK-NEXT: sdivr z19.s, p0/m, z19.s, z20.s
; CHECK-NEXT: mov z20.d, z3.d
; CHECK-NEXT: ext z20.b, z20.b, z3.b, #8
-; CHECK-NEXT: uzp1 z7.h, z7.h, z7.h
; CHECK-NEXT: sunpklo z20.h, z20.b
; CHECK-NEXT: sunpklo z22.s, z20.h
; CHECK-NEXT: ext z20.b, z20.b, z20.b, #8
@@ -329,32 +325,36 @@ define void @srem_v32i8(ptr %a, ptr %b) {
; CHECK-NEXT: mov z18.d, z4.d
; CHECK-NEXT: sunpklo z20.s, z20.h
; CHECK-NEXT: ext z18.b, z18.b, z4.b, #8
-; CHECK-NEXT: uzp1 z16.h, z19.h, z19.h
; CHECK-NEXT: sunpklo z18.h, z18.b
; CHECK-NEXT: sunpklo z21.s, z18.h
; CHECK-NEXT: ext z18.b, z18.b, z18.b, #8
; CHECK-NEXT: sunpklo z18.s, z18.h
; CHECK-NEXT: sdivr z21.s, p0/m, z21.s, z22.s
-; CHECK-NEXT: uzp1 z17.h, z17.h, z17.h
+; CHECK-NEXT: uzp1 z22.h, z2.h, z2.h
+; CHECK-NEXT: uzp1 z23.h, z5.h, z5.h
+; CHECK-NEXT: uzp1 z5.h, z6.h, z6.h
+; CHECK-NEXT: uzp1 z6.h, z7.h, z7.h
; CHECK-NEXT: sdivr z18.s, p0/m, z18.s, z20.s
+; CHECK-NEXT: uzp1 z19.h, z19.h, z19.h
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: splice z16.h, p0, z16.h, z17.h
-; CHECK-NEXT: splice z2.h, p0, z2.h, z5.h
-; CHECK-NEXT: splice z6.h, p0, z6.h, z7.h
-; CHECK-NEXT: uzp1 z19.h, z21.h, z21.h
-; CHECK-NEXT: uzp1 z5.b, z16.b, z16.b
-; CHECK-NEXT: uzp1 z2.b, z2.b, z2.b
-; CHECK-NEXT: uzp1 z6.b, z6.b, z6.b
-; CHECK-NEXT: uzp1 z18.h, z18.h, z18.h
-; CHECK-NEXT: splice z19.h, p0, z19.h, z18.h
+; CHECK-NEXT: uzp1 z20.h, z17.h, z17.h
+; CHECK-NEXT: splice z7.h, p0, { z22.h, z23.h }
+; CHECK-NEXT: splice z5.h, p0, { z5.h, z6.h }
+; CHECK-NEXT: uzp1 z16.h, z21.h, z21.h
+; CHECK-NEXT: splice z2.h, p0, { z19.h, z20.h }
+; CHECK-NEXT: uzp1 z6.b, z7.b, z7.b
+; CHECK-NEXT: uzp1 z7.b, z5.b, z5.b
+; CHECK-NEXT: uzp1 z17.h, z18.h, z18.h
+; CHECK-NEXT: splice z16.h, p0, { z16.h, z17.h }
+; CHECK-NEXT: uzp1 z17.b, z2.b, z2.b
; CHECK-NEXT: ptrue p0.b, vl8
-; CHECK-NEXT: splice z2.b, p0, z2.b, z6.b
-; CHECK-NEXT: uzp1 z7.b, z19.b, z19.b
-; CHECK-NEXT: splice z5.b, p0, z5.b, z7.b
+; CHECK-NEXT: splice z5.b, p0, { z6.b, z7.b }
+; CHECK-NEXT: uzp1 z18.b, z16.b, z16.b
+; CHECK-NEXT: splice z2.b, p0, { z17.b, z18.b }
; CHECK-NEXT: ptrue p0.b, vl16
-; CHECK-NEXT: mls z0.b, p0/m, z2.b, z1.b
-; CHECK-NEXT: mls z3.b, p0/m, z5.b, z4.b
-; CHECK-NEXT: stp q3, q0, [x0]
+; CHECK-NEXT: mls z0.b, p0/m, z5.b, z1.b
+; CHECK-NEXT: msb z2.b, p0/m, z4.b, z3.b
+; CHECK-NEXT: stp q2, q0, [x0]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: srem_v32i8:
@@ -600,9 +600,9 @@ define <8 x i16> @srem_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
; CHECK-NEXT: sunpklo z3.s, z3.h
; CHECK-NEXT: sdivr z3.s, p0/m, z3.s, z4.s
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h
-; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h
-; CHECK-NEXT: splice z2.h, p0, z2.h, z3.h
+; CHECK-NEXT: uzp1 z4.h, z2.h, z2.h
+; CHECK-NEXT: uzp1 z5.h, z3.h, z3.h
+; CHECK-NEXT: splice z2.h, p0, { z4.h, z5.h }
; CHECK-NEXT: ptrue p0.h, vl8
; CHECK-NEXT: mls z0.h, p0/m, z2.h, z1.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
@@ -680,23 +680,23 @@ define void @srem_v16i16(ptr %a, ptr %b) {
; CHECK-NEXT: sdivr z5.s, p0/m, z5.s, z6.s
; CHECK-NEXT: mov z6.d, z4.d
; CHECK-NEXT: ext z6.b, z6.b, z4.b, #8
-; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h
; CHECK-NEXT: sunpklo z6.s, z6.h
; CHECK-NEXT: sdivr z6.s, p0/m, z6.s, z7.s
; CHECK-NEXT: mov z7.d, z1.d
; CHECK-NEXT: ext z7.b, z7.b, z1.b, #8
-; CHECK-NEXT: uzp1 z5.h, z5.h, z5.h
; CHECK-NEXT: sunpklo z7.s, z7.h
; CHECK-NEXT: sdivr z7.s, p0/m, z7.s, z16.s
+; CHECK-NEXT: uzp1 z16.h, z5.h, z5.h
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: uzp1 z6.h, z6.h, z6.h
-; CHECK-NEXT: splice z5.h, p0, z5.h, z6.h
-; CHECK-NEXT: uzp1 z7.h, z7.h, z7.h
-; CHECK-NEXT: splice z2.h, p0, z2.h, z7.h
+; CHECK-NEXT: uzp1 z17.h, z6.h, z6.h
+; CHECK-NEXT: uzp1 z5.h, z2.h, z2.h
+; CHECK-NEXT: splice z2.h, p0, { z16.h, z17.h }
+; CHECK-NEXT: uzp1 z6.h, z7.h, z7.h
+; CHECK-NEXT: splice z5.h, p0, { z5.h, z6.h }
; CHECK-NEXT: ptrue p0.h, vl8
-; CHECK-NEXT: mls z3.h, p0/m, z5.h, z4.h
-; CHECK-NEXT: mls z0.h, p0/m, z2.h, z1.h
-; CHECK-NEXT: stp q3, q0, [x0]
+; CHECK-NEXT: msb z2.h, p0/m, z4.h, z3.h
+; CHECK-NEXT: mls z0.h, p0/m, z5.h, z1.h
+; CHECK-NEXT: stp q2, q0, [x0]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: srem_v16i16:
@@ -1126,10 +1126,10 @@ define <8 x i8> @urem_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
; CHECK-NEXT: udivr z2.s, p0/m, z2.s, z3.s
; CHECK-NEXT: ptrue p0.h, vl4
; CHECK-NEXT: uzp1 z3.h, z4.h, z4.h
-; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h
-; CHECK-NEXT: splice z3.h, p0, z3.h, z2.h
+; CHECK-NEXT: uzp1 z4.h, z2.h, z2.h
+; CHECK-NEXT: splice z2.h, p0, { z3.h, z4.h }
; CHECK-NEXT: ptrue p0.b, vl8
-; CHECK-NEXT: uzp1 z2.b, z3.b, z3.b
+; CHECK-NEXT: uzp1 z2.b, z2.b, z2.b
; CHECK-NEXT: mls z0.b, p0/m, z2.b, z1.b
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
@@ -1210,23 +1210,23 @@ define <16 x i8> @urem_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
; CHECK-NEXT: mov z3.d, z1.d
; CHECK-NEXT: uunpklo z5.s, z5.h
; CHECK-NEXT: ext z3.b, z3.b, z1.b, #8
-; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h
; CHECK-NEXT: uunpklo z3.h, z3.b
; CHECK-NEXT: uunpklo z6.s, z3.h
; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8
; CHECK-NEXT: uunpklo z3.s, z3.h
; CHECK-NEXT: udivr z6.s, p0/m, z6.s, z7.s
-; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h
; CHECK-NEXT: udivr z3.s, p0/m, z3.s, z5.s
+; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: splice z4.h, p0, z4.h, z2.h
-; CHECK-NEXT: uzp1 z5.h, z6.h, z6.h
-; CHECK-NEXT: uzp1 z2.b, z4.b, z4.b
-; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h
-; CHECK-NEXT: splice z5.h, p0, z5.h, z3.h
+; CHECK-NEXT: uzp1 z5.h, z2.h, z2.h
+; CHECK-NEXT: uzp1 z6.h, z6.h, z6.h
+; CHECK-NEXT: splice z2.h, p0, { z4.h, z5.h }
+; CHECK-NEXT: uzp1 z4.b, z2.b, z2.b
+; CHECK-NEXT: uzp1 z7.h, z3.h, z3.h
+; CHECK-NEXT: splice z3.h, p0, { z6.h, z7.h }
; CHECK-NEXT: ptrue p0.b, vl8
-; CHECK-NEXT: uzp1 z3.b, z5.b, z5.b
-; CHECK-NEXT: splice z2.b, p0, z2.b, z3.b
+; CHECK-NEXT: uzp1 z5.b, z3.b, z3.b
+; CHECK-NEXT: splice z2.b, p0, { z4.b, z5.b }
; CHECK-NEXT: ptrue p0.b, vl16
; CHECK-NEXT: mls z0.b, p0/m, z2.b, z1.b
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
@@ -1350,14 +1350,12 @@ define void @urem_v32i8(ptr %a, ptr %b) {
; CHECK-NEXT: uunpklo z4.s, z16.h
; CHECK-NEXT: ext z7.b, z7.b, z7.b, #8
; CHECK-NEXT: ext z16.b, z16.b, z16.b, #8
-; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h
; CHECK-NEXT: uunpklo z7.s, z7.h
; CHECK-NEXT: movprfx z6, z4
; CHECK-NEXT: udiv z6.s, p0/m, z6.s, z3.s
; CHECK-NEXT: ldr q3, [x0]
; CHECK-NEXT: ldr q4, [x1]
; CHECK-NEXT: uunpklo z16.s, z16.h
-; CHECK-NEXT: uzp1 z5.h, z5.h, z5.h
; CHECK-NEXT: uunpklo z17.h, z4.b
; CHECK-NEXT: uunpklo z18.h, z3.b
; CHECK-NEXT: udivr z7.s, p0/m, z7.s, z16.s
@@ -1367,11 +1365,9 @@ define void @urem_v32i8(ptr %a, ptr %b) {
; CHECK-NEXT: ext z18.b, z18.b, z18.b, #8
; CHECK-NEXT: uunpklo z17.s, z17.h
; CHECK-NEXT: uunpklo z18.s, z18.h
-; CHECK-NEXT: uzp1 z6.h, z6.h, z6.h
; CHECK-NEXT: udivr z19.s, p0/m, z19.s, z20.s
; CHECK-NEXT: mov z20.d, z3.d
; CHECK-NEXT: ext z20.b, z20.b, z3.b, #8
-; CHECK-NEXT: uzp1 z7.h, z7.h, z7.h
; CHECK-NEXT: uunpklo z20.h, z20.b
; CHECK-NEXT: uunpklo z22.s, z20.h
; CHECK-NEXT: ext z20.b, z20.b, z20.b, #8
@@ -1379,32 +1375,36 @@ define void @urem_v32i8(ptr %a, ptr %b) {
; CHECK-NEXT: mov z18.d, z4.d
; CHECK-NEXT: uunpklo z20.s, z20.h
; CHECK-NEXT: ext z18.b, z18.b, z4.b, #8
-; CHECK-NEXT: uzp1 z16.h, z19.h, z19.h
; CHECK-NEXT: uunpklo z18.h, z18.b
; CHECK-NEXT: uunpklo z21.s, z18.h
; CHECK-NEXT: ext z18.b, z18.b, z18.b, #8
; CHECK-NEXT: uunpklo z18.s, z18.h
; CHECK-NEXT: udivr z21.s, p0/m, z21.s, z22.s
-; CHECK-NEXT: uzp1 z17.h, z17.h, z17.h
+; CHECK-NEXT: uzp1 z22.h, z2.h, z2.h
+; CHECK-NEXT: uzp1 z23.h, z5.h, z5.h
+; CHECK-NEXT: uzp1 z5.h, z6.h, z6.h
+; CHECK-NEXT: uzp1 z6.h, z7.h, z7.h
; CHECK-NEXT: udivr z18.s, p0/m, z18.s, z20.s
+; CHECK-NEXT: uzp1 z19.h, z19.h, z19.h
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: splice z16.h, p0, z16.h, z17.h
-; CHECK-NEXT: splice z2.h, p0, z2.h, z5.h
-; CHECK-NEXT: splice z6.h, p0, z6.h, z7.h
-; CHECK-NEXT: uzp1 z19.h, z21.h, z21.h
-; CHECK-NEXT: uzp1 z5.b, z16.b, z16.b
-; CHECK-NEXT: uzp1 z2.b, z2.b, z2.b
-; CHECK-NEXT: uzp1 z6.b, z6.b, z6.b
-; CHECK-NEXT: uzp1 z18.h, z18.h, z18.h
-; CHECK-NEXT: splice z19.h, p0, z19.h, z18.h
+; CHECK-NEXT: uzp1 z20.h, z17.h, z17.h
+; CHECK-NEXT: splice z7.h, p0, { z22.h, z23.h }
+; CHECK-NEXT: splice z5.h, p0, { z5.h, z6.h }
+; CHECK-NEXT: uzp1 z16.h, z21.h, z21.h
+; CHECK-NEXT: splice z2.h, p0, { z19.h, z20.h }
+; CHECK-NEXT: uzp1 z6.b, z7.b, z7.b
+; CHECK-NEXT: uzp1 z7.b, z5.b, z5.b
+; CHECK-NEXT: uzp1 z17.h, z18.h, z18.h
+; CHECK-NEXT: splice z16.h, p0, { z16.h, z17.h }
+; CHECK-NEXT: uzp1 z17.b, z2.b, z2.b
; CHECK-NEXT: ptrue p0.b, vl8
-; CHECK-NEXT: splice z2.b, p0, z2.b, z6.b
-; CHECK-NEXT: uzp1 z7.b, z19.b, z19.b
-; CHECK-NEXT: splice z5.b, p0, z5.b, z7.b
+; CHECK-NEXT: splice z5.b, p0, { z6.b, z7.b }
+; CHECK-NEXT: uzp1 z18.b, z16.b, z16.b
+; CHECK-NEXT: splice z2.b, p0, { z17.b, z18.b }
; CHECK-NEXT: ptrue p0.b, vl16
-; CHECK-NEXT: mls z0.b, p0/m, z2.b, z1.b
-; CHECK-NEXT: mls z3.b, p0/m, z5.b, z4.b
-; CHECK-NEXT: stp q3, q0, [x0]
+; CHECK-NEXT: mls z0.b, p0/m, z5.b, z1.b
+; CHECK-NEXT: msb z2.b, p0/m, z4.b, z3.b
+; CHECK-NEXT: stp q2, q0, [x0]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: urem_v32i8:
@@ -1650,9 +1650,9 @@ define <8 x i16> @urem_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
; CHECK-NEXT: uunpklo z3.s, z3.h
; CHECK-NEXT: udivr z3.s, p0/m, z3.s, z4.s
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h
-; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h
-; CHECK-NEXT: splice z2.h, p0, z2.h, z3.h
+; CHECK-NEXT: uzp1 z4.h, z2.h, z2.h
+; CHECK-NEXT: uzp1 z5.h, z3.h, z3.h
+; CHECK-NEXT: splice z2.h, p0, { z4.h, z5.h }
; CHECK-NEXT: ptrue p0.h, vl8
; CHECK-NEXT: mls z0.h, p0/m, z2.h, z1.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
@@ -1730,23 +1730,23 @@ define void @urem_v16i16(ptr %a, ptr %b) {
; CHECK-NEXT: udivr z5.s, p0/m, z5.s, z6.s
; CHECK-NEXT: mov z6.d, z4.d
; CHECK-NEXT: ext z6.b, z6.b, z4.b, #8
-; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h
; CHECK-NEXT: uunpklo z6.s, z6.h
; CHECK-NEXT: udivr z6.s, p0/m, z6.s, z7.s
; CHECK-NEXT: mov z7.d, z1.d
; CHECK-NEXT: ext z7.b, z7.b, z1.b, #8
-; CHECK-NEXT: uzp1 z5.h, z5.h, z5.h
; CHECK-NEXT: uunpklo z7.s, z7.h
; CHECK-NEXT: udivr z7.s, p0/m, z7.s, z16.s
+; CHECK-NEXT: uzp1 z16.h, z5.h, z5.h
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: uzp1 z6.h, z6.h, z6.h
-; CHECK-NEXT: splice z5.h, p0, z5.h, z6.h
-; CHECK-NEXT: uzp1 z7.h, z7.h, z7.h
-; CHECK-NEXT: splice z2.h, p0, z2.h, z7.h
+; CHECK-NEXT: uzp1 z17.h, z6.h, z6.h
+; CHECK-NEXT: uzp1 z5.h, z2.h, z2.h
+; CHECK-NEXT: splice z2.h, p0, { z16.h, z17.h }
+; CHECK-NEXT: uzp1 z6.h, z7.h, z7.h
+; CHECK-NEXT: splice z5.h, p0, { z5.h, z6.h }
; CHECK-NEXT: ptrue p0.h, vl8
-; CHECK-NEXT: mls z3.h, p0/m, z5.h, z4.h
-; CHECK-NEXT: mls z0.h, p0/m, z2.h, z1.h
-; CHECK-NEXT: stp q3, q0, [x0]
+; CHECK-NEXT: msb z2.h, p0/m, z4.h, z3.h
+; CHECK-NEXT: mls z0.h, p0/m, z5.h, z1.h
+; CHECK-NEXT: stp q2, q0, [x0]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: urem_v16i16:
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ptest.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ptest.ll
index 5235423c00d9a1..e07036f2a1acfc 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ptest.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ptest.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s
; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
@@ -11,28 +11,28 @@ define i1 @ptest_v16i1(ptr %a, ptr %b) {
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q1, q0, [x0, #32]
; CHECK-NEXT: ptrue p0.s, vl4
-; CHECK-NEXT: ldp q2, q3, [x0]
+; CHECK-NEXT: ldp q3, q2, [x0]
; CHECK-NEXT: fcmne p1.s, p0/z, z0.s, #0.0
; CHECK-NEXT: fcmne p2.s, p0/z, z1.s, #0.0
-; CHECK-NEXT: fcmne p3.s, p0/z, z3.s, #0.0
-; CHECK-NEXT: fcmne p0.s, p0/z, z2.s, #0.0
+; CHECK-NEXT: fcmne p3.s, p0/z, z2.s, #0.0
+; CHECK-NEXT: fcmne p0.s, p0/z, z3.s, #0.0
; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: mov z2.s, p3/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: mov z3.s, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h
-; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h
-; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h
-; CHECK-NEXT: splice z1.h, p0, z1.h, z0.h
-; CHECK-NEXT: splice z3.h, p0, z3.h, z2.h
+; CHECK-NEXT: uzp1 z5.h, z0.h, z0.h
+; CHECK-NEXT: uzp1 z4.h, z1.h, z1.h
+; CHECK-NEXT: uzp1 z1.h, z2.h, z2.h
+; CHECK-NEXT: uzp1 z0.h, z3.h, z3.h
+; CHECK-NEXT: splice z2.h, p0, { z4.h, z5.h }
+; CHECK-NEXT: splice z0.h, p0, { z0.h, z1.h }
; CHECK-NEXT: ptrue p0.b, vl8
-; CHECK-NEXT: uzp1 z0.b, z1.b, z1.b
-; CHECK-NEXT: uzp1 z1.b, z3.b, z3.b
-; CHECK-NEXT: splice z1.b, p0, z1.b, z0.b
+; CHECK-NEXT: uzp1 z2.b, z2.b, z2.b
+; CHECK-NEXT: uzp1 z1.b, z0.b, z0.b
+; CHECK-NEXT: splice z0.b, p0, { z1.b, z2.b }
; CHECK-NEXT: ptrue p0.b, vl16
-; CHECK-NEXT: umaxv b0, p0, z1.b
+; CHECK-NEXT: umaxv b0, p0, z0.b
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
@@ -120,49 +120,49 @@ define i1 @ptest_v16i1(ptr %a, ptr %b) {
define i1 @ptest_or_v16i1(ptr %a, ptr %b) {
; CHECK-LABEL: ptest_or_v16i1:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldp q1, q0, [x0, #32]
+; CHECK-NEXT: ldp q0, q1, [x0, #32]
; CHECK-NEXT: ptrue p0.s, vl4
-; CHECK-NEXT: ldp q2, q3, [x0]
-; CHECK-NEXT: ldp q4, q5, [x1, #32]
-; CHECK-NEXT: fcmne p1.s, p0/z, z0.s, #0.0
-; CHECK-NEXT: fcmne p2.s, p0/z, z1.s, #0.0
-; CHECK-NEXT: ldp q0, q1, [x1]
+; CHECK-NEXT: ldp q2, q3, [x1, #32]
+; CHECK-NEXT: ldp q4, q5, [x0]
+; CHECK-NEXT: fcmne p1.s, p0/z, z1.s, #0.0
+; CHECK-NEXT: ldp q1, q6, [x1]
; CHECK-NEXT: fcmne p3.s, p0/z, z3.s, #0.0
-; CHECK-NEXT: fcmne p4.s, p0/z, z2.s, #0.0
-; CHECK-NEXT: fcmne p5.s, p0/z, z5.s, #0.0
-; CHECK-NEXT: fcmne p6.s, p0/z, z4.s, #0.0
-; CHECK-NEXT: fcmne p7.s, p0/z, z1.s, #0.0
-; CHECK-NEXT: fcmne p0.s, p0/z, z0.s, #0.0
+; CHECK-NEXT: fcmne p2.s, p0/z, z0.s, #0.0
+; CHECK-NEXT: fcmne p5.s, p0/z, z2.s, #0.0
+; CHECK-NEXT: fcmne p4.s, p0/z, z5.s, #0.0
+; CHECK-NEXT: fcmne p7.s, p0/z, z4.s, #0.0
+; CHECK-NEXT: fcmne p6.s, p0/z, z6.s, #0.0
+; CHECK-NEXT: fcmne p0.s, p0/z, z1.s, #0.0
; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: mov z2.s, p3/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: mov z3.s, p4/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: mov z4.s, p5/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: mov z5.s, p6/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT: mov z3.s, p4/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: mov z6.s, p7/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: mov z5.s, p6/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: mov z7.s, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h
-; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h
-; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h
-; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h
-; CHECK-NEXT: uzp1 z5.h, z5.h, z5.h
+; CHECK-NEXT: uzp1 z17.h, z0.h, z0.h
+; CHECK-NEXT: uzp1 z19.h, z2.h, z2.h
+; CHECK-NEXT: uzp1 z16.h, z1.h, z1.h
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: uzp1 z6.h, z6.h, z6.h
-; CHECK-NEXT: uzp1 z7.h, z7.h, z7.h
-; CHECK-NEXT: splice z1.h, p0, z1.h, z0.h
-; CHECK-NEXT: splice z3.h, p0, z3.h, z2.h
-; CHECK-NEXT: splice z5.h, p0, z5.h, z4.h
-; CHECK-NEXT: splice z7.h, p0, z7.h, z6.h
+; CHECK-NEXT: uzp1 z1.h, z3.h, z3.h
+; CHECK-NEXT: uzp1 z18.h, z4.h, z4.h
+; CHECK-NEXT: uzp1 z3.h, z5.h, z5.h
+; CHECK-NEXT: uzp1 z0.h, z6.h, z6.h
+; CHECK-NEXT: uzp1 z2.h, z7.h, z7.h
+; CHECK-NEXT: splice z4.h, p0, { z16.h, z17.h }
+; CHECK-NEXT: splice z5.h, p0, { z18.h, z19.h }
+; CHECK-NEXT: splice z0.h, p0, { z0.h, z1.h }
+; CHECK-NEXT: splice z1.h, p0, { z2.h, z3.h }
; CHECK-NEXT: ptrue p0.b, vl8
-; CHECK-NEXT: uzp1 z0.b, z1.b, z1.b
-; CHECK-NEXT: uzp1 z1.b, z3.b, z3.b
-; CHECK-NEXT: uzp1 z2.b, z5.b, z5.b
-; CHECK-NEXT: uzp1 z3.b, z7.b, z7.b
-; CHECK-NEXT: splice z1.b, p0, z1.b, z0.b
-; CHECK-NEXT: splice z3.b, p0, z3.b, z2.b
+; CHECK-NEXT: uzp1 z3.b, z4.b, z4.b
+; CHECK-NEXT: uzp1 z5.b, z5.b, z5.b
+; CHECK-NEXT: uzp1 z2.b, z0.b, z0.b
+; CHECK-NEXT: uzp1 z4.b, z1.b, z1.b
+; CHECK-NEXT: splice z0.b, p0, { z2.b, z3.b }
+; CHECK-NEXT: splice z1.b, p0, { z4.b, z5.b }
; CHECK-NEXT: ptrue p0.b, vl16
-; CHECK-NEXT: orr z0.d, z1.d, z3.d
+; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: umaxv b0, p0, z0.b
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: and w0, w8, #0x1
@@ -329,49 +329,49 @@ declare i1 @llvm.vector.reduce.or.i1.v16i1(<16 x i1>)
define i1 @ptest_and_v16i1(ptr %a, ptr %b) {
; CHECK-LABEL: ptest_and_v16i1:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldp q1, q0, [x0, #32]
+; CHECK-NEXT: ldp q0, q1, [x0, #32]
; CHECK-NEXT: ptrue p0.s, vl4
-; CHECK-NEXT: ldp q2, q3, [x0]
-; CHECK-NEXT: ldp q4, q5, [x1, #32]
-; CHECK-NEXT: fcmne p1.s, p0/z, z0.s, #0.0
-; CHECK-NEXT: fcmne p2.s, p0/z, z1.s, #0.0
-; CHECK-NEXT: ldp q0, q1, [x1]
+; CHECK-NEXT: ldp q2, q3, [x1, #32]
+; CHECK-NEXT: ldp q4, q5, [x0]
+; CHECK-NEXT: fcmne p1.s, p0/z, z1.s, #0.0
+; CHECK-NEXT: ldp q1, q6, [x1]
; CHECK-NEXT: fcmne p3.s, p0/z, z3.s, #0.0
-; CHECK-NEXT: fcmne p4.s, p0/z, z2.s, #0.0
-; CHECK-NEXT: fcmne p5.s, p0/z, z5.s, #0.0
-; CHECK-NEXT: fcmne p6.s, p0/z, z4.s, #0.0
-; CHECK-NEXT: fcmne p7.s, p0/z, z1.s, #0.0
-; CHECK-NEXT: fcmne p0.s, p0/z, z0.s, #0.0
+; CHECK-NEXT: fcmne p2.s, p0/z, z0.s, #0.0
+; CHECK-NEXT: fcmne p5.s, p0/z, z2.s, #0.0
+; CHECK-NEXT: fcmne p4.s, p0/z, z5.s, #0.0
+; CHECK-NEXT: fcmne p7.s, p0/z, z4.s, #0.0
+; CHECK-NEXT: fcmne p6.s, p0/z, z6.s, #0.0
+; CHECK-NEXT: fcmne p0.s, p0/z, z1.s, #0.0
; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: mov z2.s, p3/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: mov z3.s, p4/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: mov z4.s, p5/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: mov z5.s, p6/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT: mov z3.s, p4/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: mov z6.s, p7/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: mov z5.s, p6/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: mov z7.s, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h
-; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h
-; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h
-; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h
-; CHECK-NEXT: uzp1 z5.h, z5.h, z5.h
+; CHECK-NEXT: uzp1 z17.h, z0.h, z0.h
+; CHECK-NEXT: uzp1 z19.h, z2.h, z2.h
+; CHECK-NEXT: uzp1 z16.h, z1.h, z1.h
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: uzp1 z6.h, z6.h, z6.h
-; CHECK-NEXT: uzp1 z7.h, z7.h, z7.h
-; CHECK-NEXT: splice z1.h, p0, z1.h, z0.h
-; CHECK-NEXT: splice z3.h, p0, z3.h, z2.h
-; CHECK-NEXT: splice z5.h, p0, z5.h, z4.h
-; CHECK-NEXT: splice z7.h, p0, z7.h, z6.h
+; CHECK-NEXT: uzp1 z1.h, z3.h, z3.h
+; CHECK-NEXT: uzp1 z18.h, z4.h, z4.h
+; CHECK-NEXT: uzp1 z3.h, z5.h, z5.h
+; CHECK-NEXT: uzp1 z0.h, z6.h, z6.h
+; CHECK-NEXT: uzp1 z2.h, z7.h, z7.h
+; CHECK-NEXT: splice z4.h, p0, { z16.h, z17.h }
+; CHECK-NEXT: splice z5.h, p0, { z18.h, z19.h }
+; CHECK-NEXT: splice z0.h, p0, { z0.h, z1.h }
+; CHECK-NEXT: splice z1.h, p0, { z2.h, z3.h }
; CHECK-NEXT: ptrue p0.b, vl8
-; CHECK-NEXT: uzp1 z0.b, z1.b, z1.b
-; CHECK-NEXT: uzp1 z1.b, z3.b, z3.b
-; CHECK-NEXT: uzp1 z2.b, z5.b, z5.b
-; CHECK-NEXT: uzp1 z3.b, z7.b, z7.b
-; CHECK-NEXT: splice z1.b, p0, z1.b, z0.b
-; CHECK-NEXT: splice z3.b, p0, z3.b, z2.b
+; CHECK-NEXT: uzp1 z3.b, z4.b, z4.b
+; CHECK-NEXT: uzp1 z5.b, z5.b, z5.b
+; CHECK-NEXT: uzp1 z2.b, z0.b, z0.b
+; CHECK-NEXT: uzp1 z4.b, z1.b, z1.b
+; CHECK-NEXT: splice z0.b, p0, { z2.b, z3.b }
+; CHECK-NEXT: splice z1.b, p0, { z4.b, z5.b }
; CHECK-NEXT: ptrue p0.b, vl16
-; CHECK-NEXT: and z0.d, z1.d, z3.d
+; CHECK-NEXT: and z0.d, z0.d, z1.d
; CHECK-NEXT: uminv b0, p0, z0.b
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: and w0, w8, #0x1
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc-stores.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc-stores.ll
index c0aa162b19b77d..13fcd94ea8a260 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc-stores.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc-stores.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s
; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
@@ -129,11 +129,11 @@ define void @store_trunc_v2i64i8(ptr %ap, ptr %dest) {
define void @store_trunc_v2i256i64(ptr %ap, ptr %dest) {
; CHECK-LABEL: store_trunc_v2i256i64:
; CHECK: // %bb.0:
+; CHECK-NEXT: ldr d1, [x0, #32]
; CHECK-NEXT: ptrue p0.d, vl1
-; CHECK-NEXT: ldr d0, [x0, #32]
-; CHECK-NEXT: ldr d1, [x0]
-; CHECK-NEXT: splice z1.d, p0, z1.d, z0.d
-; CHECK-NEXT: str q1, [x1]
+; CHECK-NEXT: ldr d0, [x0]
+; CHECK-NEXT: splice z0.d, p0, { z0.d, z1.d }
+; CHECK-NEXT: str q0, [x1]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: store_trunc_v2i256i64:
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc.ll
index 77aaeeadcfc2f0..9d241f6f927e11 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s
; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
@@ -13,11 +13,11 @@ target triple = "aarch64-unknown-linux-gnu"
define <16 x i8> @trunc_v16i16_v16i8(ptr %in) nounwind {
; CHECK-LABEL: trunc_v16i16_v16i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldp q0, q1, [x0]
+; CHECK-NEXT: ldp q1, q0, [x0]
; CHECK-NEXT: ptrue p0.b, vl8
-; CHECK-NEXT: uzp1 z1.b, z1.b, z1.b
-; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
-; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b
+; CHECK-NEXT: uzp1 z3.b, z0.b, z0.b
+; CHECK-NEXT: uzp1 z2.b, z1.b, z1.b
+; CHECK-NEXT: splice z0.b, p0, { z2.b, z3.b }
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
@@ -69,18 +69,18 @@ define <16 x i8> @trunc_v16i16_v16i8(ptr %in) nounwind {
define void @trunc_v32i16_v32i8(ptr %in, ptr %out) nounwind {
; CHECK-LABEL: trunc_v32i16_v32i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldp q0, q1, [x0, #32]
+; CHECK-NEXT: ldp q1, q0, [x0, #32]
; CHECK-NEXT: ptrue p0.b, vl8
-; CHECK-NEXT: ldp q2, q3, [x0]
-; CHECK-NEXT: uzp1 z1.b, z1.b, z1.b
-; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
-; CHECK-NEXT: uzp1 z3.b, z3.b, z3.b
-; CHECK-NEXT: uzp1 z2.b, z2.b, z2.b
-; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b
-; CHECK-NEXT: splice z2.b, p0, z2.b, z3.b
-; CHECK-NEXT: add z0.b, z0.b, z0.b
+; CHECK-NEXT: ldp q3, q2, [x0]
+; CHECK-NEXT: uzp1 z5.b, z0.b, z0.b
+; CHECK-NEXT: uzp1 z4.b, z1.b, z1.b
+; CHECK-NEXT: uzp1 z1.b, z2.b, z2.b
+; CHECK-NEXT: uzp1 z0.b, z3.b, z3.b
+; CHECK-NEXT: splice z2.b, p0, { z4.b, z5.b }
+; CHECK-NEXT: splice z0.b, p0, { z0.b, z1.b }
; CHECK-NEXT: add z1.b, z2.b, z2.b
-; CHECK-NEXT: stp q1, q0, [x1]
+; CHECK-NEXT: add z0.b, z0.b, z0.b
+; CHECK-NEXT: stp q0, q1, [x1]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: trunc_v32i16_v32i8:
@@ -216,27 +216,27 @@ define void @trunc_v32i16_v32i8(ptr %in, ptr %out) nounwind {
define void @trunc_v64i16_v64i8(ptr %in, ptr %out) nounwind {
; CHECK-LABEL: trunc_v64i16_v64i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldp q0, q1, [x0, #64]
+; CHECK-NEXT: ldp q1, q0, [x0, #64]
; CHECK-NEXT: ptrue p0.b, vl8
-; CHECK-NEXT: ldp q2, q3, [x0]
-; CHECK-NEXT: ldp q4, q5, [x0, #96]
-; CHECK-NEXT: ldp q6, q7, [x0, #32]
-; CHECK-NEXT: uzp1 z1.b, z1.b, z1.b
-; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
-; CHECK-NEXT: uzp1 z3.b, z3.b, z3.b
-; CHECK-NEXT: uzp1 z2.b, z2.b, z2.b
-; CHECK-NEXT: uzp1 z5.b, z5.b, z5.b
-; CHECK-NEXT: uzp1 z4.b, z4.b, z4.b
-; CHECK-NEXT: uzp1 z7.b, z7.b, z7.b
-; CHECK-NEXT: uzp1 z6.b, z6.b, z6.b
-; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b
-; CHECK-NEXT: splice z2.b, p0, z2.b, z3.b
-; CHECK-NEXT: splice z4.b, p0, z4.b, z5.b
-; CHECK-NEXT: splice z6.b, p0, z6.b, z7.b
+; CHECK-NEXT: ldp q2, q3, [x0, #96]
+; CHECK-NEXT: ldp q4, q5, [x0]
+; CHECK-NEXT: uzp1 z7.b, z0.b, z0.b
+; CHECK-NEXT: uzp1 z6.b, z1.b, z1.b
+; CHECK-NEXT: ldp q1, q0, [x0, #32]
+; CHECK-NEXT: uzp1 z17.b, z3.b, z3.b
+; CHECK-NEXT: uzp1 z16.b, z2.b, z2.b
+; CHECK-NEXT: uzp1 z3.b, z5.b, z5.b
+; CHECK-NEXT: uzp1 z2.b, z4.b, z4.b
+; CHECK-NEXT: uzp1 z5.b, z0.b, z0.b
+; CHECK-NEXT: splice z0.b, p0, { z6.b, z7.b }
+; CHECK-NEXT: uzp1 z4.b, z1.b, z1.b
+; CHECK-NEXT: splice z1.b, p0, { z16.b, z17.b }
+; CHECK-NEXT: splice z2.b, p0, { z2.b, z3.b }
+; CHECK-NEXT: splice z3.b, p0, { z4.b, z5.b }
; CHECK-NEXT: add z0.b, z0.b, z0.b
+; CHECK-NEXT: add z1.b, z1.b, z1.b
; CHECK-NEXT: add z2.b, z2.b, z2.b
-; CHECK-NEXT: add z1.b, z4.b, z4.b
-; CHECK-NEXT: add z3.b, z6.b, z6.b
+; CHECK-NEXT: add z3.b, z3.b, z3.b
; CHECK-NEXT: stp q0, q1, [x1, #32]
; CHECK-NEXT: stp q2, q3, [x1]
; CHECK-NEXT: ret
@@ -527,49 +527,49 @@ define void @trunc_v128i16_v128i8(ptr %in, ptr %out) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q2, q3, [x0, #192]
; CHECK-NEXT: ptrue p0.b, vl8
+; CHECK-NEXT: ldp q4, q5, [x0]
; CHECK-NEXT: ldp q6, q7, [x0, #64]
-; CHECK-NEXT: ldp q16, q17, [x0, #224]
-; CHECK-NEXT: uzp1 z3.b, z3.b, z3.b
-; CHECK-NEXT: uzp1 z2.b, z2.b, z2.b
-; CHECK-NEXT: ldp q20, q21, [x0, #160]
-; CHECK-NEXT: uzp1 z7.b, z7.b, z7.b
+; CHECK-NEXT: uzp1 z17.b, z3.b, z3.b
+; CHECK-NEXT: ldp q3, q18, [x0, #224]
+; CHECK-NEXT: uzp1 z16.b, z2.b, z2.b
+; CHECK-NEXT: ldp q2, q19, [x0, #128]
; CHECK-NEXT: ldp q0, q1, [x0, #32]
-; CHECK-NEXT: uzp1 z17.b, z17.b, z17.b
-; CHECK-NEXT: ldp q4, q5, [x0, #96]
-; CHECK-NEXT: uzp1 z16.b, z16.b, z16.b
-; CHECK-NEXT: ldp q18, q19, [x0, #128]
-; CHECK-NEXT: splice z2.b, p0, z2.b, z3.b
-; CHECK-NEXT: uzp1 z3.b, z21.b, z21.b
-; CHECK-NEXT: uzp1 z20.b, z20.b, z20.b
-; CHECK-NEXT: uzp1 z6.b, z6.b, z6.b
-; CHECK-NEXT: ldp q21, q22, [x0]
-; CHECK-NEXT: splice z16.b, p0, z16.b, z17.b
+; CHECK-NEXT: uzp1 z21.b, z18.b, z18.b
+; CHECK-NEXT: ldp q18, q22, [x0, #160]
+; CHECK-NEXT: uzp1 z20.b, z3.b, z3.b
+; CHECK-NEXT: uzp1 z24.b, z19.b, z19.b
+; CHECK-NEXT: ldp q3, q19, [x0, #96]
+; CHECK-NEXT: uzp1 z23.b, z2.b, z2.b
+; CHECK-NEXT: uzp1 z26.b, z22.b, z22.b
+; CHECK-NEXT: splice z2.b, p0, { z16.b, z17.b }
+; CHECK-NEXT: uzp1 z17.b, z7.b, z7.b
+; CHECK-NEXT: uzp1 z25.b, z18.b, z18.b
+; CHECK-NEXT: splice z7.b, p0, { z20.b, z21.b }
+; CHECK-NEXT: uzp1 z21.b, z5.b, z5.b
; CHECK-NEXT: uzp1 z19.b, z19.b, z19.b
-; CHECK-NEXT: uzp1 z18.b, z18.b, z18.b
-; CHECK-NEXT: uzp1 z4.b, z4.b, z4.b
-; CHECK-NEXT: splice z20.b, p0, z20.b, z3.b
-; CHECK-NEXT: uzp1 z3.b, z5.b, z5.b
-; CHECK-NEXT: splice z6.b, p0, z6.b, z7.b
-; CHECK-NEXT: uzp1 z5.b, z22.b, z22.b
-; CHECK-NEXT: uzp1 z7.b, z21.b, z21.b
-; CHECK-NEXT: uzp1 z1.b, z1.b, z1.b
-; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
-; CHECK-NEXT: splice z18.b, p0, z18.b, z19.b
-; CHECK-NEXT: add z2.b, z2.b, z2.b
-; CHECK-NEXT: splice z4.b, p0, z4.b, z3.b
-; CHECK-NEXT: add z3.b, z16.b, z16.b
-; CHECK-NEXT: splice z7.b, p0, z7.b, z5.b
-; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b
-; CHECK-NEXT: add z1.b, z20.b, z20.b
-; CHECK-NEXT: add z5.b, z18.b, z18.b
-; CHECK-NEXT: stp q2, q3, [x1, #96]
-; CHECK-NEXT: add z2.b, z6.b, z6.b
+; CHECK-NEXT: uzp1 z20.b, z4.b, z4.b
+; CHECK-NEXT: uzp1 z5.b, z1.b, z1.b
+; CHECK-NEXT: uzp1 z16.b, z6.b, z6.b
+; CHECK-NEXT: splice z6.b, p0, { z23.b, z24.b }
+; CHECK-NEXT: uzp1 z18.b, z3.b, z3.b
+; CHECK-NEXT: splice z3.b, p0, { z25.b, z26.b }
+; CHECK-NEXT: uzp1 z4.b, z0.b, z0.b
+; CHECK-NEXT: add z0.b, z2.b, z2.b
+; CHECK-NEXT: add z7.b, z7.b, z7.b
+; CHECK-NEXT: splice z1.b, p0, { z16.b, z17.b }
+; CHECK-NEXT: splice z2.b, p0, { z18.b, z19.b }
+; CHECK-NEXT: splice z16.b, p0, { z20.b, z21.b }
+; CHECK-NEXT: splice z4.b, p0, { z4.b, z5.b }
+; CHECK-NEXT: add z6.b, z6.b, z6.b
+; CHECK-NEXT: add z3.b, z3.b, z3.b
+; CHECK-NEXT: stp q0, q7, [x1, #96]
+; CHECK-NEXT: add z0.b, z1.b, z1.b
+; CHECK-NEXT: add z1.b, z2.b, z2.b
+; CHECK-NEXT: add z2.b, z16.b, z16.b
+; CHECK-NEXT: stp q6, q3, [x1, #64]
; CHECK-NEXT: add z3.b, z4.b, z4.b
-; CHECK-NEXT: add z4.b, z7.b, z7.b
-; CHECK-NEXT: add z0.b, z0.b, z0.b
-; CHECK-NEXT: stp q5, q1, [x1, #64]
-; CHECK-NEXT: stp q2, q3, [x1, #32]
-; CHECK-NEXT: stp q4, q0, [x1]
+; CHECK-NEXT: stp q0, q1, [x1, #32]
+; CHECK-NEXT: stp q2, q3, [x1]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: trunc_v128i16_v128i8:
@@ -1181,11 +1181,11 @@ define void @trunc_v128i16_v128i8(ptr %in, ptr %out) nounwind {
define <8 x i8> @trunc_v8i32_v8i8(ptr %in) nounwind {
; CHECK-LABEL: trunc_v8i32_v8i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldp q0, q1, [x0]
+; CHECK-NEXT: ldp q1, q0, [x0]
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h
-; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h
+; CHECK-NEXT: uzp1 z3.h, z0.h, z0.h
+; CHECK-NEXT: uzp1 z2.h, z1.h, z1.h
+; CHECK-NEXT: splice z0.h, p0, { z2.h, z3.h }
; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
@@ -1219,17 +1219,17 @@ define <16 x i8> @trunc_v16i32_v16i8(ptr %in) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q1, q0, [x0, #32]
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: ldp q2, q3, [x0]
-; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h
-; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h
-; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h
-; CHECK-NEXT: splice z1.h, p0, z1.h, z0.h
-; CHECK-NEXT: splice z2.h, p0, z2.h, z3.h
+; CHECK-NEXT: ldp q3, q2, [x0]
+; CHECK-NEXT: uzp1 z5.h, z0.h, z0.h
+; CHECK-NEXT: uzp1 z4.h, z1.h, z1.h
+; CHECK-NEXT: uzp1 z1.h, z2.h, z2.h
+; CHECK-NEXT: uzp1 z0.h, z3.h, z3.h
+; CHECK-NEXT: splice z2.h, p0, { z4.h, z5.h }
+; CHECK-NEXT: splice z0.h, p0, { z0.h, z1.h }
; CHECK-NEXT: ptrue p0.b, vl8
-; CHECK-NEXT: uzp1 z1.b, z1.b, z1.b
-; CHECK-NEXT: uzp1 z0.b, z2.b, z2.b
-; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b
+; CHECK-NEXT: uzp1 z2.b, z2.b, z2.b
+; CHECK-NEXT: uzp1 z1.b, z0.b, z0.b
+; CHECK-NEXT: splice z0.b, p0, { z1.b, z2.b }
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
@@ -1277,32 +1277,32 @@ define <16 x i8> @trunc_v16i32_v16i8(ptr %in) nounwind {
define void @trunc_v32i32_v32i8(ptr %in, ptr %out) nounwind {
; CHECK-LABEL: trunc_v32i32_v32i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldp q0, q1, [x0, #32]
+; CHECK-NEXT: ldp q0, q1, [x0, #96]
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: ldp q2, q3, [x0, #96]
+; CHECK-NEXT: ldp q2, q3, [x0, #32]
; CHECK-NEXT: ldp q4, q5, [x0, #64]
; CHECK-NEXT: ldp q6, q7, [x0]
-; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h
-; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h
-; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h
-; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT: uzp1 z5.h, z5.h, z5.h
-; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h
-; CHECK-NEXT: uzp1 z7.h, z7.h, z7.h
-; CHECK-NEXT: uzp1 z6.h, z6.h, z6.h
-; CHECK-NEXT: splice z2.h, p0, z2.h, z3.h
-; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h
-; CHECK-NEXT: splice z4.h, p0, z4.h, z5.h
-; CHECK-NEXT: splice z6.h, p0, z6.h, z7.h
+; CHECK-NEXT: uzp1 z17.h, z1.h, z1.h
+; CHECK-NEXT: uzp1 z16.h, z0.h, z0.h
+; CHECK-NEXT: uzp1 z1.h, z3.h, z3.h
+; CHECK-NEXT: uzp1 z19.h, z5.h, z5.h
+; CHECK-NEXT: uzp1 z0.h, z2.h, z2.h
+; CHECK-NEXT: uzp1 z3.h, z7.h, z7.h
+; CHECK-NEXT: uzp1 z18.h, z4.h, z4.h
+; CHECK-NEXT: uzp1 z2.h, z6.h, z6.h
+; CHECK-NEXT: splice z4.h, p0, { z16.h, z17.h }
+; CHECK-NEXT: splice z0.h, p0, { z0.h, z1.h }
+; CHECK-NEXT: splice z5.h, p0, { z18.h, z19.h }
+; CHECK-NEXT: splice z1.h, p0, { z2.h, z3.h }
; CHECK-NEXT: ptrue p0.b, vl8
-; CHECK-NEXT: uzp1 z1.b, z2.b, z2.b
-; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
-; CHECK-NEXT: uzp1 z2.b, z4.b, z4.b
-; CHECK-NEXT: uzp1 z3.b, z6.b, z6.b
-; CHECK-NEXT: splice z2.b, p0, z2.b, z1.b
-; CHECK-NEXT: splice z3.b, p0, z3.b, z0.b
-; CHECK-NEXT: add z0.b, z2.b, z2.b
-; CHECK-NEXT: add z1.b, z3.b, z3.b
+; CHECK-NEXT: uzp1 z3.b, z4.b, z4.b
+; CHECK-NEXT: uzp1 z7.b, z0.b, z0.b
+; CHECK-NEXT: uzp1 z2.b, z5.b, z5.b
+; CHECK-NEXT: uzp1 z6.b, z1.b, z1.b
+; CHECK-NEXT: splice z0.b, p0, { z2.b, z3.b }
+; CHECK-NEXT: splice z1.b, p0, { z6.b, z7.b }
+; CHECK-NEXT: add z0.b, z0.b, z0.b
+; CHECK-NEXT: add z1.b, z1.b, z1.b
; CHECK-NEXT: stp q1, q0, [x1]
; CHECK-NEXT: ret
;
@@ -1429,56 +1429,56 @@ define void @trunc_v64i32_v64i8(ptr %in, ptr %out) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q2, q3, [x0, #160]
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: ldp q4, q5, [x0, #128]
+; CHECK-NEXT: ldp q4, q5, [x0, #96]
+; CHECK-NEXT: ldp q6, q7, [x0]
+; CHECK-NEXT: uzp1 z17.h, z3.h, z3.h
+; CHECK-NEXT: ldp q3, q18, [x0, #128]
+; CHECK-NEXT: uzp1 z16.h, z2.h, z2.h
+; CHECK-NEXT: ldp q2, q19, [x0, #192]
; CHECK-NEXT: ldp q0, q1, [x0, #64]
-; CHECK-NEXT: ldp q6, q7, [x0, #96]
-; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h
-; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h
-; CHECK-NEXT: uzp1 z5.h, z5.h, z5.h
-; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h
-; CHECK-NEXT: ldp q16, q17, [x0]
-; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h
-; CHECK-NEXT: ldp q18, q19, [x0, #192]
-; CHECK-NEXT: uzp1 z7.h, z7.h, z7.h
-; CHECK-NEXT: ldp q20, q21, [x0, #224]
-; CHECK-NEXT: splice z2.h, p0, z2.h, z3.h
-; CHECK-NEXT: ldp q22, q23, [x0, #32]
-; CHECK-NEXT: splice z4.h, p0, z4.h, z5.h
-; CHECK-NEXT: uzp1 z19.h, z19.h, z19.h
-; CHECK-NEXT: uzp1 z18.h, z18.h, z18.h
-; CHECK-NEXT: uzp1 z17.h, z17.h, z17.h
-; CHECK-NEXT: uzp1 z3.h, z21.h, z21.h
-; CHECK-NEXT: uzp1 z5.h, z20.h, z20.h
-; CHECK-NEXT: uzp1 z16.h, z16.h, z16.h
-; CHECK-NEXT: uzp1 z20.h, z23.h, z23.h
-; CHECK-NEXT: uzp1 z21.h, z22.h, z22.h
-; CHECK-NEXT: uzp1 z6.h, z6.h, z6.h
-; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT: splice z18.h, p0, z18.h, z19.h
-; CHECK-NEXT: splice z5.h, p0, z5.h, z3.h
-; CHECK-NEXT: splice z16.h, p0, z16.h, z17.h
-; CHECK-NEXT: splice z21.h, p0, z21.h, z20.h
-; CHECK-NEXT: splice z6.h, p0, z6.h, z7.h
-; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h
-; CHECK-NEXT: uzp1 z1.b, z2.b, z2.b
-; CHECK-NEXT: uzp1 z2.b, z4.b, z4.b
+; CHECK-NEXT: uzp1 z21.h, z18.h, z18.h
+; CHECK-NEXT: ldp q18, q22, [x0, #224]
+; CHECK-NEXT: uzp1 z20.h, z3.h, z3.h
+; CHECK-NEXT: ldp q3, q23, [x0, #32]
+; CHECK-NEXT: splice z16.h, p0, { z16.h, z17.h }
+; CHECK-NEXT: uzp1 z27.h, z19.h, z19.h
+; CHECK-NEXT: uzp1 z25.h, z22.h, z22.h
+; CHECK-NEXT: uzp1 z26.h, z2.h, z2.h
+; CHECK-NEXT: uzp1 z24.h, z18.h, z18.h
+; CHECK-NEXT: uzp1 z18.h, z23.h, z23.h
+; CHECK-NEXT: uzp1 z23.h, z5.h, z5.h
+; CHECK-NEXT: uzp1 z17.h, z3.h, z3.h
+; CHECK-NEXT: uzp1 z3.h, z7.h, z7.h
+; CHECK-NEXT: uzp1 z22.h, z4.h, z4.h
+; CHECK-NEXT: uzp1 z2.h, z6.h, z6.h
+; CHECK-NEXT: uzp1 z5.h, z1.h, z1.h
+; CHECK-NEXT: splice z1.h, p0, { z20.h, z21.h }
+; CHECK-NEXT: splice z6.h, p0, { z24.h, z25.h }
+; CHECK-NEXT: uzp1 z4.h, z0.h, z0.h
+; CHECK-NEXT: splice z0.h, p0, { z26.h, z27.h }
+; CHECK-NEXT: splice z7.h, p0, { z17.h, z18.h }
+; CHECK-NEXT: uzp1 z17.b, z16.b, z16.b
+; CHECK-NEXT: splice z2.h, p0, { z2.h, z3.h }
+; CHECK-NEXT: splice z3.h, p0, { z22.h, z23.h }
+; CHECK-NEXT: splice z4.h, p0, { z4.h, z5.h }
+; CHECK-NEXT: uzp1 z16.b, z1.b, z1.b
; CHECK-NEXT: ptrue p0.b, vl8
-; CHECK-NEXT: uzp1 z4.b, z18.b, z18.b
-; CHECK-NEXT: uzp1 z3.b, z5.b, z5.b
-; CHECK-NEXT: uzp1 z7.b, z16.b, z16.b
-; CHECK-NEXT: uzp1 z5.b, z21.b, z21.b
-; CHECK-NEXT: splice z2.b, p0, z2.b, z1.b
-; CHECK-NEXT: uzp1 z1.b, z6.b, z6.b
-; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
-; CHECK-NEXT: splice z4.b, p0, z4.b, z3.b
-; CHECK-NEXT: splice z7.b, p0, z7.b, z5.b
-; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b
-; CHECK-NEXT: add z1.b, z2.b, z2.b
-; CHECK-NEXT: add z2.b, z4.b, z4.b
-; CHECK-NEXT: add z3.b, z7.b, z7.b
+; CHECK-NEXT: uzp1 z6.b, z6.b, z6.b
+; CHECK-NEXT: uzp1 z5.b, z0.b, z0.b
+; CHECK-NEXT: uzp1 z1.b, z7.b, z7.b
+; CHECK-NEXT: uzp1 z0.b, z2.b, z2.b
+; CHECK-NEXT: uzp1 z3.b, z3.b, z3.b
+; CHECK-NEXT: splice z7.b, p0, { z16.b, z17.b }
+; CHECK-NEXT: uzp1 z2.b, z4.b, z4.b
+; CHECK-NEXT: splice z4.b, p0, { z5.b, z6.b }
+; CHECK-NEXT: splice z0.b, p0, { z0.b, z1.b }
+; CHECK-NEXT: splice z1.b, p0, { z2.b, z3.b }
+; CHECK-NEXT: add z2.b, z7.b, z7.b
+; CHECK-NEXT: add z3.b, z4.b, z4.b
; CHECK-NEXT: add z0.b, z0.b, z0.b
-; CHECK-NEXT: stp q1, q2, [x1, #32]
-; CHECK-NEXT: stp q3, q0, [x1]
+; CHECK-NEXT: add z1.b, z1.b, z1.b
+; CHECK-NEXT: stp q2, q3, [x1, #32]
+; CHECK-NEXT: stp q0, q1, [x1]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: trunc_v64i32_v64i8:
@@ -1765,11 +1765,11 @@ define void @trunc_v64i32_v64i8(ptr %in, ptr %out) nounwind {
define <8 x i16> @trunc_v8i32_v8i16(ptr %in) nounwind {
; CHECK-LABEL: trunc_v8i32_v8i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldp q0, q1, [x0]
+; CHECK-NEXT: ldp q1, q0, [x0]
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h
-; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h
+; CHECK-NEXT: uzp1 z3.h, z0.h, z0.h
+; CHECK-NEXT: uzp1 z2.h, z1.h, z1.h
+; CHECK-NEXT: splice z0.h, p0, { z2.h, z3.h }
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
@@ -1801,18 +1801,18 @@ define <8 x i16> @trunc_v8i32_v8i16(ptr %in) nounwind {
define void @trunc_v16i32_v16i16(ptr %in, ptr %out) nounwind {
; CHECK-LABEL: trunc_v16i32_v16i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldp q0, q1, [x0, #32]
+; CHECK-NEXT: ldp q1, q0, [x0, #32]
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: ldp q2, q3, [x0]
-; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h
-; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h
-; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h
-; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h
-; CHECK-NEXT: splice z2.h, p0, z2.h, z3.h
-; CHECK-NEXT: add z0.h, z0.h, z0.h
+; CHECK-NEXT: ldp q3, q2, [x0]
+; CHECK-NEXT: uzp1 z5.h, z0.h, z0.h
+; CHECK-NEXT: uzp1 z4.h, z1.h, z1.h
+; CHECK-NEXT: uzp1 z1.h, z2.h, z2.h
+; CHECK-NEXT: uzp1 z0.h, z3.h, z3.h
+; CHECK-NEXT: splice z2.h, p0, { z4.h, z5.h }
+; CHECK-NEXT: splice z0.h, p0, { z0.h, z1.h }
; CHECK-NEXT: add z1.h, z2.h, z2.h
-; CHECK-NEXT: stp q1, q0, [x1]
+; CHECK-NEXT: add z0.h, z0.h, z0.h
+; CHECK-NEXT: stp q0, q1, [x1]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: trunc_v16i32_v16i16:
@@ -1877,27 +1877,27 @@ define void @trunc_v16i32_v16i16(ptr %in, ptr %out) nounwind {
define void @trunc_v32i32_v32i16(ptr %in, ptr %out) nounwind {
; CHECK-LABEL: trunc_v32i32_v32i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldp q0, q1, [x0, #64]
+; CHECK-NEXT: ldp q1, q0, [x0, #64]
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: ldp q2, q3, [x0]
-; CHECK-NEXT: ldp q4, q5, [x0, #96]
-; CHECK-NEXT: ldp q6, q7, [x0, #32]
-; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h
-; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h
-; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h
-; CHECK-NEXT: uzp1 z5.h, z5.h, z5.h
-; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h
-; CHECK-NEXT: uzp1 z7.h, z7.h, z7.h
-; CHECK-NEXT: uzp1 z6.h, z6.h, z6.h
-; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h
-; CHECK-NEXT: splice z2.h, p0, z2.h, z3.h
-; CHECK-NEXT: splice z4.h, p0, z4.h, z5.h
-; CHECK-NEXT: splice z6.h, p0, z6.h, z7.h
+; CHECK-NEXT: ldp q2, q3, [x0, #96]
+; CHECK-NEXT: ldp q4, q5, [x0]
+; CHECK-NEXT: uzp1 z7.h, z0.h, z0.h
+; CHECK-NEXT: uzp1 z6.h, z1.h, z1.h
+; CHECK-NEXT: ldp q1, q0, [x0, #32]
+; CHECK-NEXT: uzp1 z17.h, z3.h, z3.h
+; CHECK-NEXT: uzp1 z16.h, z2.h, z2.h
+; CHECK-NEXT: uzp1 z3.h, z5.h, z5.h
+; CHECK-NEXT: uzp1 z2.h, z4.h, z4.h
+; CHECK-NEXT: uzp1 z5.h, z0.h, z0.h
+; CHECK-NEXT: splice z0.h, p0, { z6.h, z7.h }
+; CHECK-NEXT: uzp1 z4.h, z1.h, z1.h
+; CHECK-NEXT: splice z1.h, p0, { z16.h, z17.h }
+; CHECK-NEXT: splice z2.h, p0, { z2.h, z3.h }
+; CHECK-NEXT: splice z3.h, p0, { z4.h, z5.h }
; CHECK-NEXT: add z0.h, z0.h, z0.h
+; CHECK-NEXT: add z1.h, z1.h, z1.h
; CHECK-NEXT: add z2.h, z2.h, z2.h
-; CHECK-NEXT: add z1.h, z4.h, z4.h
-; CHECK-NEXT: add z3.h, z6.h, z6.h
+; CHECK-NEXT: add z3.h, z3.h, z3.h
; CHECK-NEXT: stp q0, q1, [x1, #32]
; CHECK-NEXT: stp q2, q3, [x1]
; CHECK-NEXT: ret
@@ -2027,49 +2027,49 @@ define void @trunc_v64i32_v64i16(ptr %in, ptr %out) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q2, q3, [x0, #192]
; CHECK-NEXT: ptrue p0.h, vl4
+; CHECK-NEXT: ldp q4, q5, [x0]
; CHECK-NEXT: ldp q6, q7, [x0, #64]
-; CHECK-NEXT: ldp q16, q17, [x0, #224]
-; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h
-; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h
-; CHECK-NEXT: ldp q20, q21, [x0, #160]
-; CHECK-NEXT: uzp1 z7.h, z7.h, z7.h
+; CHECK-NEXT: uzp1 z17.h, z3.h, z3.h
+; CHECK-NEXT: ldp q3, q18, [x0, #224]
+; CHECK-NEXT: uzp1 z16.h, z2.h, z2.h
+; CHECK-NEXT: ldp q2, q19, [x0, #128]
; CHECK-NEXT: ldp q0, q1, [x0, #32]
-; CHECK-NEXT: uzp1 z17.h, z17.h, z17.h
-; CHECK-NEXT: ldp q4, q5, [x0, #96]
-; CHECK-NEXT: uzp1 z16.h, z16.h, z16.h
-; CHECK-NEXT: ldp q18, q19, [x0, #128]
-; CHECK-NEXT: splice z2.h, p0, z2.h, z3.h
-; CHECK-NEXT: uzp1 z3.h, z21.h, z21.h
-; CHECK-NEXT: uzp1 z20.h, z20.h, z20.h
-; CHECK-NEXT: uzp1 z6.h, z6.h, z6.h
-; CHECK-NEXT: ldp q21, q22, [x0]
-; CHECK-NEXT: splice z16.h, p0, z16.h, z17.h
+; CHECK-NEXT: uzp1 z21.h, z18.h, z18.h
+; CHECK-NEXT: ldp q18, q22, [x0, #160]
+; CHECK-NEXT: uzp1 z20.h, z3.h, z3.h
+; CHECK-NEXT: uzp1 z24.h, z19.h, z19.h
+; CHECK-NEXT: ldp q3, q19, [x0, #96]
+; CHECK-NEXT: uzp1 z23.h, z2.h, z2.h
+; CHECK-NEXT: uzp1 z26.h, z22.h, z22.h
+; CHECK-NEXT: splice z2.h, p0, { z16.h, z17.h }
+; CHECK-NEXT: uzp1 z17.h, z7.h, z7.h
+; CHECK-NEXT: uzp1 z25.h, z18.h, z18.h
+; CHECK-NEXT: splice z7.h, p0, { z20.h, z21.h }
+; CHECK-NEXT: uzp1 z21.h, z5.h, z5.h
; CHECK-NEXT: uzp1 z19.h, z19.h, z19.h
-; CHECK-NEXT: uzp1 z18.h, z18.h, z18.h
-; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h
-; CHECK-NEXT: splice z20.h, p0, z20.h, z3.h
-; CHECK-NEXT: uzp1 z3.h, z5.h, z5.h
-; CHECK-NEXT: splice z6.h, p0, z6.h, z7.h
-; CHECK-NEXT: uzp1 z5.h, z22.h, z22.h
-; CHECK-NEXT: uzp1 z7.h, z21.h, z21.h
-; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h
-; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT: splice z18.h, p0, z18.h, z19.h
-; CHECK-NEXT: add z2.h, z2.h, z2.h
-; CHECK-NEXT: splice z4.h, p0, z4.h, z3.h
-; CHECK-NEXT: add z3.h, z16.h, z16.h
-; CHECK-NEXT: splice z7.h, p0, z7.h, z5.h
-; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h
-; CHECK-NEXT: add z1.h, z20.h, z20.h
-; CHECK-NEXT: add z5.h, z18.h, z18.h
-; CHECK-NEXT: stp q2, q3, [x1, #96]
-; CHECK-NEXT: add z2.h, z6.h, z6.h
+; CHECK-NEXT: uzp1 z20.h, z4.h, z4.h
+; CHECK-NEXT: uzp1 z5.h, z1.h, z1.h
+; CHECK-NEXT: uzp1 z16.h, z6.h, z6.h
+; CHECK-NEXT: splice z6.h, p0, { z23.h, z24.h }
+; CHECK-NEXT: uzp1 z18.h, z3.h, z3.h
+; CHECK-NEXT: splice z3.h, p0, { z25.h, z26.h }
+; CHECK-NEXT: uzp1 z4.h, z0.h, z0.h
+; CHECK-NEXT: add z0.h, z2.h, z2.h
+; CHECK-NEXT: add z7.h, z7.h, z7.h
+; CHECK-NEXT: splice z1.h, p0, { z16.h, z17.h }
+; CHECK-NEXT: splice z2.h, p0, { z18.h, z19.h }
+; CHECK-NEXT: splice z16.h, p0, { z20.h, z21.h }
+; CHECK-NEXT: splice z4.h, p0, { z4.h, z5.h }
+; CHECK-NEXT: add z6.h, z6.h, z6.h
+; CHECK-NEXT: add z3.h, z3.h, z3.h
+; CHECK-NEXT: stp q0, q7, [x1, #96]
+; CHECK-NEXT: add z0.h, z1.h, z1.h
+; CHECK-NEXT: add z1.h, z2.h, z2.h
+; CHECK-NEXT: add z2.h, z16.h, z16.h
+; CHECK-NEXT: stp q6, q3, [x1, #64]
; CHECK-NEXT: add z3.h, z4.h, z4.h
-; CHECK-NEXT: add z4.h, z7.h, z7.h
-; CHECK-NEXT: add z0.h, z0.h, z0.h
-; CHECK-NEXT: stp q5, q1, [x1, #64]
-; CHECK-NEXT: stp q2, q3, [x1, #32]
-; CHECK-NEXT: stp q4, q0, [x1]
+; CHECK-NEXT: stp q0, q1, [x1, #32]
+; CHECK-NEXT: stp q2, q3, [x1]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: trunc_v64i32_v64i16:
@@ -2360,11 +2360,11 @@ define void @trunc_v64i32_v64i16(ptr %in, ptr %out) nounwind {
define <4 x i8> @trunc_v4i64_v4i8(ptr %in) nounwind {
; CHECK-LABEL: trunc_v4i64_v4i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldp q0, q1, [x0]
+; CHECK-NEXT: ldp q1, q0, [x0]
; CHECK-NEXT: ptrue p0.s, vl2
-; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s
-; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
-; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s
+; CHECK-NEXT: uzp1 z3.s, z0.s, z0.s
+; CHECK-NEXT: uzp1 z2.s, z1.s, z1.s
+; CHECK-NEXT: splice z0.s, p0, { z2.s, z3.s }
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
@@ -2392,18 +2392,18 @@ define <8 x i8> @trunc_v8i64_v8i8(ptr %in) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q1, q0, [x0, #32]
; CHECK-NEXT: ptrue p0.s, vl2
-; CHECK-NEXT: ldp q2, q3, [x0]
-; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
-; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s
-; CHECK-NEXT: uzp1 z3.s, z3.s, z3.s
-; CHECK-NEXT: uzp1 z2.s, z2.s, z2.s
-; CHECK-NEXT: splice z1.s, p0, z1.s, z0.s
-; CHECK-NEXT: splice z2.s, p0, z2.s, z3.s
+; CHECK-NEXT: ldp q3, q2, [x0]
+; CHECK-NEXT: uzp1 z5.s, z0.s, z0.s
+; CHECK-NEXT: uzp1 z4.s, z1.s, z1.s
+; CHECK-NEXT: uzp1 z1.s, z2.s, z2.s
+; CHECK-NEXT: uzp1 z0.s, z3.s, z3.s
+; CHECK-NEXT: splice z2.s, p0, { z4.s, z5.s }
+; CHECK-NEXT: splice z0.s, p0, { z0.s, z1.s }
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: uzp1 z0.h, z1.h, z1.h
-; CHECK-NEXT: uzp1 z1.h, z2.h, z2.h
-; CHECK-NEXT: splice z1.h, p0, z1.h, z0.h
-; CHECK-NEXT: uzp1 z0.b, z1.b, z1.b
+; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h
+; CHECK-NEXT: uzp1 z1.h, z0.h, z0.h
+; CHECK-NEXT: splice z0.h, p0, { z1.h, z2.h }
+; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
@@ -2439,34 +2439,34 @@ define <8 x i8> @trunc_v8i64_v8i8(ptr %in) nounwind {
define <16 x i8> @trunc_v16i64_v16i8(ptr %in) nounwind {
; CHECK-LABEL: trunc_v16i64_v16i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldp q0, q1, [x0, #32]
+; CHECK-NEXT: ldp q0, q1, [x0, #96]
; CHECK-NEXT: ptrue p0.s, vl2
-; CHECK-NEXT: ldp q2, q3, [x0, #96]
+; CHECK-NEXT: ldp q2, q3, [x0, #32]
; CHECK-NEXT: ldp q4, q5, [x0, #64]
; CHECK-NEXT: ldp q6, q7, [x0]
-; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s
-; CHECK-NEXT: uzp1 z3.s, z3.s, z3.s
-; CHECK-NEXT: uzp1 z2.s, z2.s, z2.s
-; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
-; CHECK-NEXT: uzp1 z5.s, z5.s, z5.s
-; CHECK-NEXT: uzp1 z4.s, z4.s, z4.s
-; CHECK-NEXT: uzp1 z7.s, z7.s, z7.s
-; CHECK-NEXT: uzp1 z6.s, z6.s, z6.s
-; CHECK-NEXT: splice z2.s, p0, z2.s, z3.s
-; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s
-; CHECK-NEXT: splice z4.s, p0, z4.s, z5.s
-; CHECK-NEXT: splice z6.s, p0, z6.s, z7.s
+; CHECK-NEXT: uzp1 z17.s, z1.s, z1.s
+; CHECK-NEXT: uzp1 z16.s, z0.s, z0.s
+; CHECK-NEXT: uzp1 z19.s, z3.s, z3.s
+; CHECK-NEXT: uzp1 z1.s, z5.s, z5.s
+; CHECK-NEXT: uzp1 z18.s, z2.s, z2.s
+; CHECK-NEXT: uzp1 z0.s, z4.s, z4.s
+; CHECK-NEXT: uzp1 z3.s, z7.s, z7.s
+; CHECK-NEXT: uzp1 z2.s, z6.s, z6.s
+; CHECK-NEXT: splice z4.s, p0, { z16.s, z17.s }
+; CHECK-NEXT: splice z0.s, p0, { z0.s, z1.s }
+; CHECK-NEXT: splice z1.s, p0, { z18.s, z19.s }
+; CHECK-NEXT: splice z2.s, p0, { z2.s, z3.s }
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: uzp1 z1.h, z2.h, z2.h
-; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT: uzp1 z2.h, z4.h, z4.h
-; CHECK-NEXT: uzp1 z3.h, z6.h, z6.h
-; CHECK-NEXT: splice z2.h, p0, z2.h, z1.h
-; CHECK-NEXT: splice z3.h, p0, z3.h, z0.h
+; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h
+; CHECK-NEXT: uzp1 z3.h, z0.h, z0.h
+; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h
+; CHECK-NEXT: uzp1 z0.h, z2.h, z2.h
+; CHECK-NEXT: splice z2.h, p0, { z3.h, z4.h }
+; CHECK-NEXT: splice z0.h, p0, { z0.h, z1.h }
; CHECK-NEXT: ptrue p0.b, vl8
-; CHECK-NEXT: uzp1 z1.b, z2.b, z2.b
-; CHECK-NEXT: uzp1 z0.b, z3.b, z3.b
-; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b
+; CHECK-NEXT: uzp1 z2.b, z2.b, z2.b
+; CHECK-NEXT: uzp1 z1.b, z0.b, z0.b
+; CHECK-NEXT: splice z0.b, p0, { z1.b, z2.b }
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
@@ -2523,62 +2523,62 @@ define <16 x i8> @trunc_v16i64_v16i8(ptr %in) nounwind {
define void @trunc_v32i64_v32i8(ptr %in, ptr %out) nounwind {
; CHECK-LABEL: trunc_v32i64_v32i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldp q0, q1, [x0]
+; CHECK-NEXT: ldp q5, q6, [x0, #224]
; CHECK-NEXT: ptrue p0.s, vl2
-; CHECK-NEXT: ldp q2, q3, [x0, #224]
-; CHECK-NEXT: ldp q4, q5, [x0, #32]
-; CHECK-NEXT: ldp q6, q7, [x0, #64]
-; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s
-; CHECK-NEXT: ldp q16, q17, [x0, #192]
-; CHECK-NEXT: uzp1 z3.s, z3.s, z3.s
-; CHECK-NEXT: ldp q18, q19, [x0, #128]
-; CHECK-NEXT: uzp1 z2.s, z2.s, z2.s
-; CHECK-NEXT: ldp q20, q21, [x0, #160]
-; CHECK-NEXT: uzp1 z7.s, z7.s, z7.s
-; CHECK-NEXT: ldp q22, q23, [x0, #96]
-; CHECK-NEXT: uzp1 z17.s, z17.s, z17.s
-; CHECK-NEXT: uzp1 z16.s, z16.s, z16.s
-; CHECK-NEXT: uzp1 z19.s, z19.s, z19.s
-; CHECK-NEXT: uzp1 z18.s, z18.s, z18.s
-; CHECK-NEXT: uzp1 z21.s, z21.s, z21.s
-; CHECK-NEXT: uzp1 z20.s, z20.s, z20.s
-; CHECK-NEXT: uzp1 z6.s, z6.s, z6.s
-; CHECK-NEXT: uzp1 z23.s, z23.s, z23.s
-; CHECK-NEXT: uzp1 z22.s, z22.s, z22.s
-; CHECK-NEXT: uzp1 z5.s, z5.s, z5.s
-; CHECK-NEXT: uzp1 z4.s, z4.s, z4.s
-; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
-; CHECK-NEXT: splice z2.s, p0, z2.s, z3.s
-; CHECK-NEXT: splice z16.s, p0, z16.s, z17.s
-; CHECK-NEXT: splice z20.s, p0, z20.s, z21.s
-; CHECK-NEXT: splice z18.s, p0, z18.s, z19.s
-; CHECK-NEXT: splice z22.s, p0, z22.s, z23.s
-; CHECK-NEXT: splice z6.s, p0, z6.s, z7.s
-; CHECK-NEXT: splice z4.s, p0, z4.s, z5.s
-; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s
+; CHECK-NEXT: ldp q2, q3, [x0, #32]
+; CHECK-NEXT: ldp q4, q7, [x0, #64]
+; CHECK-NEXT: uzp1 z17.s, z6.s, z6.s
+; CHECK-NEXT: ldp q6, q18, [x0, #192]
+; CHECK-NEXT: uzp1 z16.s, z5.s, z5.s
+; CHECK-NEXT: ldp q5, q19, [x0, #128]
+; CHECK-NEXT: ldp q0, q1, [x0]
+; CHECK-NEXT: uzp1 z21.s, z18.s, z18.s
+; CHECK-NEXT: ldp q18, q22, [x0, #160]
+; CHECK-NEXT: uzp1 z20.s, z6.s, z6.s
+; CHECK-NEXT: ldp q6, q23, [x0, #96]
+; CHECK-NEXT: splice z16.s, p0, { z16.s, z17.s }
+; CHECK-NEXT: uzp1 z27.s, z19.s, z19.s
+; CHECK-NEXT: uzp1 z25.s, z22.s, z22.s
+; CHECK-NEXT: uzp1 z26.s, z5.s, z5.s
+; CHECK-NEXT: uzp1 z24.s, z18.s, z18.s
+; CHECK-NEXT: uzp1 z18.s, z23.s, z23.s
+; CHECK-NEXT: uzp1 z23.s, z3.s, z3.s
+; CHECK-NEXT: uzp1 z17.s, z6.s, z6.s
+; CHECK-NEXT: uzp1 z6.s, z7.s, z7.s
+; CHECK-NEXT: uzp1 z22.s, z2.s, z2.s
+; CHECK-NEXT: uzp1 z5.s, z4.s, z4.s
+; CHECK-NEXT: uzp1 z2.s, z1.s, z1.s
+; CHECK-NEXT: splice z3.s, p0, { z20.s, z21.s }
+; CHECK-NEXT: uzp1 z1.s, z0.s, z0.s
+; CHECK-NEXT: splice z0.s, p0, { z24.s, z25.s }
+; CHECK-NEXT: splice z7.s, p0, { z26.s, z27.s }
+; CHECK-NEXT: splice z4.s, p0, { z17.s, z18.s }
+; CHECK-NEXT: uzp1 z17.h, z16.h, z16.h
+; CHECK-NEXT: splice z5.s, p0, { z5.s, z6.s }
+; CHECK-NEXT: splice z6.s, p0, { z22.s, z23.s }
+; CHECK-NEXT: splice z1.s, p0, { z1.s, z2.s }
+; CHECK-NEXT: uzp1 z16.h, z3.h, z3.h
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: uzp1 z1.h, z2.h, z2.h
-; CHECK-NEXT: uzp1 z2.h, z16.h, z16.h
-; CHECK-NEXT: uzp1 z3.h, z20.h, z20.h
-; CHECK-NEXT: uzp1 z5.h, z18.h, z18.h
-; CHECK-NEXT: uzp1 z7.h, z22.h, z22.h
-; CHECK-NEXT: uzp1 z6.h, z6.h, z6.h
-; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h
-; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT: splice z2.h, p0, z2.h, z1.h
-; CHECK-NEXT: splice z5.h, p0, z5.h, z3.h
-; CHECK-NEXT: splice z6.h, p0, z6.h, z7.h
-; CHECK-NEXT: splice z0.h, p0, z0.h, z4.h
+; CHECK-NEXT: uzp1 z3.h, z0.h, z0.h
+; CHECK-NEXT: uzp1 z19.h, z4.h, z4.h
+; CHECK-NEXT: uzp1 z2.h, z7.h, z7.h
+; CHECK-NEXT: uzp1 z18.h, z5.h, z5.h
+; CHECK-NEXT: uzp1 z5.h, z6.h, z6.h
+; CHECK-NEXT: splice z0.h, p0, { z16.h, z17.h }
+; CHECK-NEXT: uzp1 z4.h, z1.h, z1.h
+; CHECK-NEXT: splice z1.h, p0, { z2.h, z3.h }
+; CHECK-NEXT: splice z2.h, p0, { z18.h, z19.h }
+; CHECK-NEXT: splice z3.h, p0, { z4.h, z5.h }
+; CHECK-NEXT: uzp1 z5.b, z0.b, z0.b
; CHECK-NEXT: ptrue p0.b, vl8
-; CHECK-NEXT: uzp1 z1.b, z2.b, z2.b
-; CHECK-NEXT: uzp1 z2.b, z5.b, z5.b
-; CHECK-NEXT: uzp1 z3.b, z6.b, z6.b
-; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
-; CHECK-NEXT: splice z2.b, p0, z2.b, z1.b
-; CHECK-NEXT: splice z0.b, p0, z0.b, z3.b
-; CHECK-NEXT: add z1.b, z2.b, z2.b
+; CHECK-NEXT: uzp1 z4.b, z1.b, z1.b
+; CHECK-NEXT: uzp1 z7.b, z2.b, z2.b
+; CHECK-NEXT: uzp1 z6.b, z3.b, z3.b
+; CHECK-NEXT: splice z0.b, p0, { z4.b, z5.b }
+; CHECK-NEXT: splice z1.b, p0, { z6.b, z7.b }
; CHECK-NEXT: add z0.b, z0.b, z0.b
-; CHECK-NEXT: stp q0, q1, [x1]
+; CHECK-NEXT: add z1.b, z1.b, z1.b
+; CHECK-NEXT: stp q1, q0, [x1]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: trunc_v32i64_v32i8:
@@ -2731,11 +2731,11 @@ define void @trunc_v32i64_v32i8(ptr %in, ptr %out) nounwind {
define <4 x i16> @trunc_v4i64_v4i16(ptr %in) nounwind {
; CHECK-LABEL: trunc_v4i64_v4i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldp q0, q1, [x0]
+; CHECK-NEXT: ldp q1, q0, [x0]
; CHECK-NEXT: ptrue p0.s, vl2
-; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s
-; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
-; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s
+; CHECK-NEXT: uzp1 z3.s, z0.s, z0.s
+; CHECK-NEXT: uzp1 z2.s, z1.s, z1.s
+; CHECK-NEXT: splice z0.s, p0, { z2.s, z3.s }
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
@@ -2763,17 +2763,17 @@ define <8 x i16> @trunc_v8i64_v8i16(ptr %in) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q1, q0, [x0, #32]
; CHECK-NEXT: ptrue p0.s, vl2
-; CHECK-NEXT: ldp q2, q3, [x0]
-; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
-; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s
-; CHECK-NEXT: uzp1 z3.s, z3.s, z3.s
-; CHECK-NEXT: uzp1 z2.s, z2.s, z2.s
-; CHECK-NEXT: splice z1.s, p0, z1.s, z0.s
-; CHECK-NEXT: splice z2.s, p0, z2.s, z3.s
+; CHECK-NEXT: ldp q3, q2, [x0]
+; CHECK-NEXT: uzp1 z5.s, z0.s, z0.s
+; CHECK-NEXT: uzp1 z4.s, z1.s, z1.s
+; CHECK-NEXT: uzp1 z1.s, z2.s, z2.s
+; CHECK-NEXT: uzp1 z0.s, z3.s, z3.s
+; CHECK-NEXT: splice z2.s, p0, { z4.s, z5.s }
+; CHECK-NEXT: splice z0.s, p0, { z0.s, z1.s }
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h
-; CHECK-NEXT: uzp1 z0.h, z2.h, z2.h
-; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h
+; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h
+; CHECK-NEXT: uzp1 z1.h, z0.h, z0.h
+; CHECK-NEXT: splice z0.h, p0, { z1.h, z2.h }
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
@@ -2810,32 +2810,32 @@ define <8 x i16> @trunc_v8i64_v8i16(ptr %in) nounwind {
define void @trunc_v16i64_v16i16(ptr %in, ptr %out) nounwind {
; CHECK-LABEL: trunc_v16i64_v16i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldp q0, q1, [x0, #32]
+; CHECK-NEXT: ldp q0, q1, [x0, #96]
; CHECK-NEXT: ptrue p0.s, vl2
-; CHECK-NEXT: ldp q2, q3, [x0, #96]
+; CHECK-NEXT: ldp q2, q3, [x0, #32]
; CHECK-NEXT: ldp q4, q5, [x0, #64]
; CHECK-NEXT: ldp q6, q7, [x0]
-; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s
-; CHECK-NEXT: uzp1 z3.s, z3.s, z3.s
-; CHECK-NEXT: uzp1 z2.s, z2.s, z2.s
-; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
-; CHECK-NEXT: uzp1 z5.s, z5.s, z5.s
-; CHECK-NEXT: uzp1 z4.s, z4.s, z4.s
-; CHECK-NEXT: uzp1 z7.s, z7.s, z7.s
-; CHECK-NEXT: uzp1 z6.s, z6.s, z6.s
-; CHECK-NEXT: splice z2.s, p0, z2.s, z3.s
-; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s
-; CHECK-NEXT: splice z4.s, p0, z4.s, z5.s
-; CHECK-NEXT: splice z6.s, p0, z6.s, z7.s
+; CHECK-NEXT: uzp1 z17.s, z1.s, z1.s
+; CHECK-NEXT: uzp1 z16.s, z0.s, z0.s
+; CHECK-NEXT: uzp1 z1.s, z3.s, z3.s
+; CHECK-NEXT: uzp1 z19.s, z5.s, z5.s
+; CHECK-NEXT: uzp1 z0.s, z2.s, z2.s
+; CHECK-NEXT: uzp1 z3.s, z7.s, z7.s
+; CHECK-NEXT: uzp1 z18.s, z4.s, z4.s
+; CHECK-NEXT: uzp1 z2.s, z6.s, z6.s
+; CHECK-NEXT: splice z4.s, p0, { z16.s, z17.s }
+; CHECK-NEXT: splice z0.s, p0, { z0.s, z1.s }
+; CHECK-NEXT: splice z5.s, p0, { z18.s, z19.s }
+; CHECK-NEXT: splice z1.s, p0, { z2.s, z3.s }
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: uzp1 z1.h, z2.h, z2.h
-; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT: uzp1 z2.h, z4.h, z4.h
-; CHECK-NEXT: uzp1 z3.h, z6.h, z6.h
-; CHECK-NEXT: splice z2.h, p0, z2.h, z1.h
-; CHECK-NEXT: splice z3.h, p0, z3.h, z0.h
-; CHECK-NEXT: add z0.h, z2.h, z2.h
-; CHECK-NEXT: add z1.h, z3.h, z3.h
+; CHECK-NEXT: uzp1 z3.h, z4.h, z4.h
+; CHECK-NEXT: uzp1 z7.h, z0.h, z0.h
+; CHECK-NEXT: uzp1 z2.h, z5.h, z5.h
+; CHECK-NEXT: uzp1 z6.h, z1.h, z1.h
+; CHECK-NEXT: splice z0.h, p0, { z2.h, z3.h }
+; CHECK-NEXT: splice z1.h, p0, { z6.h, z7.h }
+; CHECK-NEXT: add z0.h, z0.h, z0.h
+; CHECK-NEXT: add z1.h, z1.h, z1.h
; CHECK-NEXT: stp q1, q0, [x1]
; CHECK-NEXT: ret
;
@@ -2915,56 +2915,56 @@ define void @trunc_v32i64_v32i16(ptr %in, ptr %out) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q2, q3, [x0, #160]
; CHECK-NEXT: ptrue p0.s, vl2
-; CHECK-NEXT: ldp q4, q5, [x0, #128]
+; CHECK-NEXT: ldp q4, q5, [x0, #96]
+; CHECK-NEXT: ldp q6, q7, [x0]
+; CHECK-NEXT: uzp1 z17.s, z3.s, z3.s
+; CHECK-NEXT: ldp q3, q18, [x0, #128]
+; CHECK-NEXT: uzp1 z16.s, z2.s, z2.s
+; CHECK-NEXT: ldp q2, q19, [x0, #192]
; CHECK-NEXT: ldp q0, q1, [x0, #64]
-; CHECK-NEXT: ldp q6, q7, [x0, #96]
-; CHECK-NEXT: uzp1 z3.s, z3.s, z3.s
-; CHECK-NEXT: uzp1 z2.s, z2.s, z2.s
-; CHECK-NEXT: uzp1 z5.s, z5.s, z5.s
-; CHECK-NEXT: uzp1 z4.s, z4.s, z4.s
-; CHECK-NEXT: ldp q16, q17, [x0]
-; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s
-; CHECK-NEXT: ldp q18, q19, [x0, #192]
-; CHECK-NEXT: uzp1 z7.s, z7.s, z7.s
-; CHECK-NEXT: ldp q20, q21, [x0, #224]
-; CHECK-NEXT: splice z2.s, p0, z2.s, z3.s
-; CHECK-NEXT: ldp q22, q23, [x0, #32]
-; CHECK-NEXT: splice z4.s, p0, z4.s, z5.s
-; CHECK-NEXT: uzp1 z19.s, z19.s, z19.s
-; CHECK-NEXT: uzp1 z18.s, z18.s, z18.s
-; CHECK-NEXT: uzp1 z17.s, z17.s, z17.s
-; CHECK-NEXT: uzp1 z3.s, z21.s, z21.s
-; CHECK-NEXT: uzp1 z5.s, z20.s, z20.s
-; CHECK-NEXT: uzp1 z16.s, z16.s, z16.s
-; CHECK-NEXT: uzp1 z20.s, z23.s, z23.s
-; CHECK-NEXT: uzp1 z21.s, z22.s, z22.s
-; CHECK-NEXT: uzp1 z6.s, z6.s, z6.s
-; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
-; CHECK-NEXT: splice z18.s, p0, z18.s, z19.s
-; CHECK-NEXT: splice z5.s, p0, z5.s, z3.s
-; CHECK-NEXT: splice z16.s, p0, z16.s, z17.s
-; CHECK-NEXT: splice z21.s, p0, z21.s, z20.s
-; CHECK-NEXT: splice z6.s, p0, z6.s, z7.s
-; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s
-; CHECK-NEXT: uzp1 z1.h, z2.h, z2.h
-; CHECK-NEXT: uzp1 z2.h, z4.h, z4.h
+; CHECK-NEXT: uzp1 z21.s, z18.s, z18.s
+; CHECK-NEXT: ldp q18, q22, [x0, #224]
+; CHECK-NEXT: uzp1 z20.s, z3.s, z3.s
+; CHECK-NEXT: ldp q3, q23, [x0, #32]
+; CHECK-NEXT: splice z16.s, p0, { z16.s, z17.s }
+; CHECK-NEXT: uzp1 z27.s, z19.s, z19.s
+; CHECK-NEXT: uzp1 z25.s, z22.s, z22.s
+; CHECK-NEXT: uzp1 z26.s, z2.s, z2.s
+; CHECK-NEXT: uzp1 z24.s, z18.s, z18.s
+; CHECK-NEXT: uzp1 z18.s, z23.s, z23.s
+; CHECK-NEXT: uzp1 z23.s, z5.s, z5.s
+; CHECK-NEXT: uzp1 z17.s, z3.s, z3.s
+; CHECK-NEXT: uzp1 z3.s, z7.s, z7.s
+; CHECK-NEXT: uzp1 z22.s, z4.s, z4.s
+; CHECK-NEXT: uzp1 z2.s, z6.s, z6.s
+; CHECK-NEXT: uzp1 z5.s, z1.s, z1.s
+; CHECK-NEXT: splice z1.s, p0, { z20.s, z21.s }
+; CHECK-NEXT: splice z6.s, p0, { z24.s, z25.s }
+; CHECK-NEXT: uzp1 z4.s, z0.s, z0.s
+; CHECK-NEXT: splice z0.s, p0, { z26.s, z27.s }
+; CHECK-NEXT: splice z7.s, p0, { z17.s, z18.s }
+; CHECK-NEXT: uzp1 z17.h, z16.h, z16.h
+; CHECK-NEXT: splice z2.s, p0, { z2.s, z3.s }
+; CHECK-NEXT: splice z3.s, p0, { z22.s, z23.s }
+; CHECK-NEXT: splice z4.s, p0, { z4.s, z5.s }
+; CHECK-NEXT: uzp1 z16.h, z1.h, z1.h
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: uzp1 z4.h, z18.h, z18.h
-; CHECK-NEXT: uzp1 z3.h, z5.h, z5.h
-; CHECK-NEXT: uzp1 z7.h, z16.h, z16.h
-; CHECK-NEXT: uzp1 z5.h, z21.h, z21.h
-; CHECK-NEXT: splice z2.h, p0, z2.h, z1.h
-; CHECK-NEXT: uzp1 z1.h, z6.h, z6.h
-; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT: splice z4.h, p0, z4.h, z3.h
-; CHECK-NEXT: splice z7.h, p0, z7.h, z5.h
-; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h
-; CHECK-NEXT: add z1.h, z2.h, z2.h
-; CHECK-NEXT: add z2.h, z4.h, z4.h
-; CHECK-NEXT: add z3.h, z7.h, z7.h
+; CHECK-NEXT: uzp1 z6.h, z6.h, z6.h
+; CHECK-NEXT: uzp1 z5.h, z0.h, z0.h
+; CHECK-NEXT: uzp1 z1.h, z7.h, z7.h
+; CHECK-NEXT: uzp1 z0.h, z2.h, z2.h
+; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h
+; CHECK-NEXT: splice z7.h, p0, { z16.h, z17.h }
+; CHECK-NEXT: uzp1 z2.h, z4.h, z4.h
+; CHECK-NEXT: splice z4.h, p0, { z5.h, z6.h }
+; CHECK-NEXT: splice z0.h, p0, { z0.h, z1.h }
+; CHECK-NEXT: splice z1.h, p0, { z2.h, z3.h }
+; CHECK-NEXT: add z2.h, z7.h, z7.h
+; CHECK-NEXT: add z3.h, z4.h, z4.h
; CHECK-NEXT: add z0.h, z0.h, z0.h
-; CHECK-NEXT: stp q1, q2, [x1, #32]
-; CHECK-NEXT: stp q3, q0, [x1]
+; CHECK-NEXT: add z1.h, z1.h, z1.h
+; CHECK-NEXT: stp q2, q3, [x1, #32]
+; CHECK-NEXT: stp q0, q1, [x1]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: trunc_v32i64_v32i16:
@@ -3118,11 +3118,11 @@ define void @trunc_v32i64_v32i16(ptr %in, ptr %out) nounwind {
define <4 x i32> @trunc_v4i64_v4i32(ptr %in) nounwind {
; CHECK-LABEL: trunc_v4i64_v4i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldp q0, q1, [x0]
+; CHECK-NEXT: ldp q1, q0, [x0]
; CHECK-NEXT: ptrue p0.s, vl2
-; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s
-; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
-; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s
+; CHECK-NEXT: uzp1 z3.s, z0.s, z0.s
+; CHECK-NEXT: uzp1 z2.s, z1.s, z1.s
+; CHECK-NEXT: splice z0.s, p0, { z2.s, z3.s }
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
@@ -3146,18 +3146,18 @@ define <4 x i32> @trunc_v4i64_v4i32(ptr %in) nounwind {
define void @trunc_v8i64_v8i32(ptr %in, ptr %out) nounwind {
; CHECK-LABEL: trunc_v8i64_v8i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldp q0, q1, [x0, #32]
+; CHECK-NEXT: ldp q1, q0, [x0, #32]
; CHECK-NEXT: ptrue p0.s, vl2
-; CHECK-NEXT: ldp q2, q3, [x0]
-; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s
-; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
-; CHECK-NEXT: uzp1 z3.s, z3.s, z3.s
-; CHECK-NEXT: uzp1 z2.s, z2.s, z2.s
-; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s
-; CHECK-NEXT: splice z2.s, p0, z2.s, z3.s
-; CHECK-NEXT: add z0.s, z0.s, z0.s
+; CHECK-NEXT: ldp q3, q2, [x0]
+; CHECK-NEXT: uzp1 z5.s, z0.s, z0.s
+; CHECK-NEXT: uzp1 z4.s, z1.s, z1.s
+; CHECK-NEXT: uzp1 z1.s, z2.s, z2.s
+; CHECK-NEXT: uzp1 z0.s, z3.s, z3.s
+; CHECK-NEXT: splice z2.s, p0, { z4.s, z5.s }
+; CHECK-NEXT: splice z0.s, p0, { z0.s, z1.s }
; CHECK-NEXT: add z1.s, z2.s, z2.s
-; CHECK-NEXT: stp q1, q0, [x1]
+; CHECK-NEXT: add z0.s, z0.s, z0.s
+; CHECK-NEXT: stp q0, q1, [x1]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: trunc_v8i64_v8i32:
@@ -3202,27 +3202,27 @@ define void @trunc_v8i64_v8i32(ptr %in, ptr %out) nounwind {
define void @trunc_v16i64_v16i32(ptr %in, ptr %out) nounwind {
; CHECK-LABEL: trunc_v16i64_v16i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldp q0, q1, [x0, #64]
+; CHECK-NEXT: ldp q1, q0, [x0, #64]
; CHECK-NEXT: ptrue p0.s, vl2
-; CHECK-NEXT: ldp q2, q3, [x0]
-; CHECK-NEXT: ldp q4, q5, [x0, #96]
-; CHECK-NEXT: ldp q6, q7, [x0, #32]
-; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s
-; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
-; CHECK-NEXT: uzp1 z3.s, z3.s, z3.s
-; CHECK-NEXT: uzp1 z2.s, z2.s, z2.s
-; CHECK-NEXT: uzp1 z5.s, z5.s, z5.s
-; CHECK-NEXT: uzp1 z4.s, z4.s, z4.s
-; CHECK-NEXT: uzp1 z7.s, z7.s, z7.s
-; CHECK-NEXT: uzp1 z6.s, z6.s, z6.s
-; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s
-; CHECK-NEXT: splice z2.s, p0, z2.s, z3.s
-; CHECK-NEXT: splice z4.s, p0, z4.s, z5.s
-; CHECK-NEXT: splice z6.s, p0, z6.s, z7.s
+; CHECK-NEXT: ldp q2, q3, [x0, #96]
+; CHECK-NEXT: ldp q4, q5, [x0]
+; CHECK-NEXT: uzp1 z7.s, z0.s, z0.s
+; CHECK-NEXT: uzp1 z6.s, z1.s, z1.s
+; CHECK-NEXT: ldp q1, q0, [x0, #32]
+; CHECK-NEXT: uzp1 z17.s, z3.s, z3.s
+; CHECK-NEXT: uzp1 z16.s, z2.s, z2.s
+; CHECK-NEXT: uzp1 z3.s, z5.s, z5.s
+; CHECK-NEXT: uzp1 z2.s, z4.s, z4.s
+; CHECK-NEXT: uzp1 z5.s, z0.s, z0.s
+; CHECK-NEXT: splice z0.s, p0, { z6.s, z7.s }
+; CHECK-NEXT: uzp1 z4.s, z1.s, z1.s
+; CHECK-NEXT: splice z1.s, p0, { z16.s, z17.s }
+; CHECK-NEXT: splice z2.s, p0, { z2.s, z3.s }
+; CHECK-NEXT: splice z3.s, p0, { z4.s, z5.s }
; CHECK-NEXT: add z0.s, z0.s, z0.s
+; CHECK-NEXT: add z1.s, z1.s, z1.s
; CHECK-NEXT: add z2.s, z2.s, z2.s
-; CHECK-NEXT: add z1.s, z4.s, z4.s
-; CHECK-NEXT: add z3.s, z6.s, z6.s
+; CHECK-NEXT: add z3.s, z3.s, z3.s
; CHECK-NEXT: stp q0, q1, [x1, #32]
; CHECK-NEXT: stp q2, q3, [x1]
; CHECK-NEXT: ret
@@ -3297,49 +3297,49 @@ define void @trunc_v32i64_v32i32(ptr %in, ptr %out) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q2, q3, [x0, #192]
; CHECK-NEXT: ptrue p0.s, vl2
+; CHECK-NEXT: ldp q4, q5, [x0]
; CHECK-NEXT: ldp q6, q7, [x0, #64]
-; CHECK-NEXT: ldp q16, q17, [x0, #224]
-; CHECK-NEXT: uzp1 z3.s, z3.s, z3.s
-; CHECK-NEXT: uzp1 z2.s, z2.s, z2.s
-; CHECK-NEXT: ldp q20, q21, [x0, #160]
-; CHECK-NEXT: uzp1 z7.s, z7.s, z7.s
+; CHECK-NEXT: uzp1 z17.s, z3.s, z3.s
+; CHECK-NEXT: ldp q3, q18, [x0, #224]
+; CHECK-NEXT: uzp1 z16.s, z2.s, z2.s
+; CHECK-NEXT: ldp q2, q19, [x0, #128]
; CHECK-NEXT: ldp q0, q1, [x0, #32]
-; CHECK-NEXT: uzp1 z17.s, z17.s, z17.s
-; CHECK-NEXT: ldp q4, q5, [x0, #96]
-; CHECK-NEXT: uzp1 z16.s, z16.s, z16.s
-; CHECK-NEXT: ldp q18, q19, [x0, #128]
-; CHECK-NEXT: splice z2.s, p0, z2.s, z3.s
-; CHECK-NEXT: uzp1 z3.s, z21.s, z21.s
-; CHECK-NEXT: uzp1 z20.s, z20.s, z20.s
-; CHECK-NEXT: uzp1 z6.s, z6.s, z6.s
-; CHECK-NEXT: ldp q21, q22, [x0]
-; CHECK-NEXT: splice z16.s, p0, z16.s, z17.s
+; CHECK-NEXT: uzp1 z21.s, z18.s, z18.s
+; CHECK-NEXT: ldp q18, q22, [x0, #160]
+; CHECK-NEXT: uzp1 z20.s, z3.s, z3.s
+; CHECK-NEXT: uzp1 z24.s, z19.s, z19.s
+; CHECK-NEXT: ldp q3, q19, [x0, #96]
+; CHECK-NEXT: uzp1 z23.s, z2.s, z2.s
+; CHECK-NEXT: uzp1 z26.s, z22.s, z22.s
+; CHECK-NEXT: splice z2.s, p0, { z16.s, z17.s }
+; CHECK-NEXT: uzp1 z17.s, z7.s, z7.s
+; CHECK-NEXT: uzp1 z25.s, z18.s, z18.s
+; CHECK-NEXT: splice z7.s, p0, { z20.s, z21.s }
+; CHECK-NEXT: uzp1 z21.s, z5.s, z5.s
; CHECK-NEXT: uzp1 z19.s, z19.s, z19.s
-; CHECK-NEXT: uzp1 z18.s, z18.s, z18.s
-; CHECK-NEXT: uzp1 z4.s, z4.s, z4.s
-; CHECK-NEXT: splice z20.s, p0, z20.s, z3.s
-; CHECK-NEXT: uzp1 z3.s, z5.s, z5.s
-; CHECK-NEXT: splice z6.s, p0, z6.s, z7.s
-; CHECK-NEXT: uzp1 z5.s, z22.s, z22.s
-; CHECK-NEXT: uzp1 z7.s, z21.s, z21.s
-; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s
-; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
-; CHECK-NEXT: splice z18.s, p0, z18.s, z19.s
-; CHECK-NEXT: add z2.s, z2.s, z2.s
-; CHECK-NEXT: splice z4.s, p0, z4.s, z3.s
-; CHECK-NEXT: add z3.s, z16.s, z16.s
-; CHECK-NEXT: splice z7.s, p0, z7.s, z5.s
-; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s
-; CHECK-NEXT: add z1.s, z20.s, z20.s
-; CHECK-NEXT: add z5.s, z18.s, z18.s
-; CHECK-NEXT: stp q2, q3, [x1, #96]
-; CHECK-NEXT: add z2.s, z6.s, z6.s
+; CHECK-NEXT: uzp1 z20.s, z4.s, z4.s
+; CHECK-NEXT: uzp1 z5.s, z1.s, z1.s
+; CHECK-NEXT: uzp1 z16.s, z6.s, z6.s
+; CHECK-NEXT: splice z6.s, p0, { z23.s, z24.s }
+; CHECK-NEXT: uzp1 z18.s, z3.s, z3.s
+; CHECK-NEXT: splice z3.s, p0, { z25.s, z26.s }
+; CHECK-NEXT: uzp1 z4.s, z0.s, z0.s
+; CHECK-NEXT: add z0.s, z2.s, z2.s
+; CHECK-NEXT: add z7.s, z7.s, z7.s
+; CHECK-NEXT: splice z1.s, p0, { z16.s, z17.s }
+; CHECK-NEXT: splice z2.s, p0, { z18.s, z19.s }
+; CHECK-NEXT: splice z16.s, p0, { z20.s, z21.s }
+; CHECK-NEXT: splice z4.s, p0, { z4.s, z5.s }
+; CHECK-NEXT: add z6.s, z6.s, z6.s
+; CHECK-NEXT: add z3.s, z3.s, z3.s
+; CHECK-NEXT: stp q0, q7, [x1, #96]
+; CHECK-NEXT: add z0.s, z1.s, z1.s
+; CHECK-NEXT: add z1.s, z2.s, z2.s
+; CHECK-NEXT: add z2.s, z16.s, z16.s
+; CHECK-NEXT: stp q6, q3, [x1, #64]
; CHECK-NEXT: add z3.s, z4.s, z4.s
-; CHECK-NEXT: add z4.s, z7.s, z7.s
-; CHECK-NEXT: add z0.s, z0.s, z0.s
-; CHECK-NEXT: stp q5, q1, [x1, #64]
-; CHECK-NEXT: stp q2, q3, [x1, #32]
-; CHECK-NEXT: stp q4, q0, [x1]
+; CHECK-NEXT: stp q0, q1, [x1, #32]
+; CHECK-NEXT: stp q2, q3, [x1]
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: trunc_v32i64_v32i32:
More information about the llvm-commits
mailing list