[llvm] 4d2f0f7 - [AArch64][SME] Avoid going through memory for streaming-compatible splats
Sander de Smalen via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 5 05:05:20 PST 2022
Author: Sander de Smalen
Date: 2022-12-05T13:04:30Z
New Revision: 4d2f0f723a0226f2b41fba4b9226fc03da3c36a6
URL: https://github.com/llvm/llvm-project/commit/4d2f0f723a0226f2b41fba4b9226fc03da3c36a6
DIFF: https://github.com/llvm/llvm-project/commit/4d2f0f723a0226f2b41fba4b9226fc03da3c36a6.diff
LOG: [AArch64][SME] Avoid going through memory for streaming-compatible splats
Reviewed By: david-arm, paulwalker-arm
Differential Revision: https://reviews.llvm.org/D139111
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-compares.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-convert.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-select.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-insert-vector-elt.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-arith.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-immediates.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-select.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-load.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-store.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ptest.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-sdiv-pow2.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-splat-vector.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-stores.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 9a9cbcf31130..4ae57c9ec9e1 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1633,6 +1633,7 @@ void AArch64TargetLowering::addTypeForStreamingSVE(MVT VT) {
setOperationAction(ISD::ANY_EXTEND, VT, Custom);
setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
+ setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
setOperationAction(ISD::AND, VT, Custom);
setOperationAction(ISD::ADD, VT, Custom);
@@ -11434,7 +11435,8 @@ SDValue AArch64TargetLowering::LowerSPLAT_VECTOR(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
- if (useSVEForFixedLengthVectorVT(VT))
+ if (useSVEForFixedLengthVectorVT(VT,
+ Subtarget->forceStreamingCompatibleSVE()))
return LowerToScalableOp(Op, DAG);
assert(VT.isScalableVector() && VT.getVectorElementType() == MVT::i1 &&
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll
index d55678c481e9..fe177ddb5fff 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll
@@ -10,15 +10,11 @@ target triple = "aarch64-unknown-linux-gnu"
define <4 x i8> @ctlz_v4i8(<4 x i8> %op) #0 {
; CHECK-LABEL: ctlz_v4i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI0_0
-; CHECK-NEXT: adrp x9, .LCPI0_1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI0_0]
-; CHECK-NEXT: ldr d2, [x9, :lo12:.LCPI0_1]
-; CHECK-NEXT: and z0.d, z0.d, z1.d
+; CHECK-NEXT: and z0.h, z0.h, #0xff
; CHECK-NEXT: clz z0.h, p0/m, z0.h
-; CHECK-NEXT: sub z0.h, z0.h, z2.h
+; CHECK-NEXT: sub z0.h, z0.h, #8 // =0x8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
%res = call <4 x i8> @llvm.ctlz.v4i8(<4 x i8> %op)
@@ -67,15 +63,11 @@ define void @ctlz_v32i8(ptr %a) #0 {
define <2 x i16> @ctlz_v2i16(<2 x i16> %op) #0 {
; CHECK-LABEL: ctlz_v2i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI4_0
-; CHECK-NEXT: adrp x9, .LCPI4_1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ptrue p0.s, vl2
-; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI4_0]
-; CHECK-NEXT: ldr d2, [x9, :lo12:.LCPI4_1]
-; CHECK-NEXT: and z0.d, z0.d, z1.d
+; CHECK-NEXT: and z0.s, z0.s, #0xffff
; CHECK-NEXT: clz z0.s, p0/m, z0.s
-; CHECK-NEXT: sub z0.s, z0.s, z2.s
+; CHECK-NEXT: sub z0.s, z0.s, #16 // =0x10
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
%res = call <2 x i16> @llvm.ctlz.v2i16(<2 x i16> %op)
@@ -206,11 +198,9 @@ define void @ctlz_v4i64(ptr %a) #0 {
define <4 x i8> @ctpop_v4i8(<4 x i8> %op) #0 {
; CHECK-LABEL: ctpop_v4i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI14_0
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI14_0]
-; CHECK-NEXT: and z0.d, z0.d, z1.d
+; CHECK-NEXT: and z0.h, z0.h, #0xff
; CHECK-NEXT: cnt z0.h, p0/m, z0.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
@@ -260,11 +250,9 @@ define void @ctpop_v32i8(ptr %a) #0 {
define <2 x i16> @ctpop_v2i16(<2 x i16> %op) #0 {
; CHECK-LABEL: ctpop_v2i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI18_0
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ptrue p0.s, vl2
-; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI18_0]
-; CHECK-NEXT: and z0.d, z0.d, z1.d
+; CHECK-NEXT: and z0.s, z0.s, #0xffff
; CHECK-NEXT: cnt z0.s, p0/m, z0.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
@@ -396,11 +384,9 @@ define void @ctpop_v4i64(ptr %a) #0 {
define <4 x i8> @cttz_v4i8(<4 x i8> %op) #0 {
; CHECK-LABEL: cttz_v4i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI28_0
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI28_0]
-; CHECK-NEXT: orr z0.d, z0.d, z1.d
+; CHECK-NEXT: orr z0.h, z0.h, #0x100
; CHECK-NEXT: rbit z0.h, p0/m, z0.h
; CHECK-NEXT: clz z0.h, p0/m, z0.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
@@ -455,11 +441,9 @@ define void @cttz_v32i8(ptr %a) #0 {
define <2 x i16> @cttz_v2i16(<2 x i16> %op) #0 {
; CHECK-LABEL: cttz_v2i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI32_0
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ptrue p0.s, vl2
-; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI32_0]
-; CHECK-NEXT: orr z0.d, z0.d, z1.d
+; CHECK-NEXT: orr z0.s, z0.s, #0x10000
; CHECK-NEXT: rbit z0.s, p0/m, z0.s
; CHECK-NEXT: clz z0.s, p0/m, z0.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll
index 699cd1d5f3c6..5ef230dd5c9a 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll
@@ -12,23 +12,20 @@ target triple = "aarch64"
define <8 x i32> @fixed_bitselect_v8i32(ptr %pre_cond_ptr, ptr %left_ptr, ptr %right_ptr) #0 {
; CHECK-LABEL: fixed_bitselect_v8i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI0_0
-; CHECK-NEXT: ldp q1, q0, [x0]
-; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI0_0]
-; CHECK-NEXT: adrp x8, .LCPI0_1
-; CHECK-NEXT: ldp q3, q4, [x1]
-; CHECK-NEXT: sub z6.s, z2.s, z1.s
-; CHECK-NEXT: sub z2.s, z2.s, z0.s
-; CHECK-NEXT: and z3.d, z6.d, z3.d
-; CHECK-NEXT: ldp q7, q16, [x2]
+; CHECK-NEXT: ldp q3, q2, [x0]
+; CHECK-NEXT: mov z0.s, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: add z7.s, z3.s, z0.s
+; CHECK-NEXT: subr z3.s, z3.s, #0 // =0x0
+; CHECK-NEXT: ldp q1, q4, [x1]
+; CHECK-NEXT: add z0.s, z2.s, z0.s
+; CHECK-NEXT: subr z2.s, z2.s, #0 // =0x0
+; CHECK-NEXT: and z1.d, z3.d, z1.d
+; CHECK-NEXT: ldp q5, q6, [x2]
; CHECK-NEXT: and z2.d, z2.d, z4.d
-; CHECK-NEXT: ldr q5, [x8, :lo12:.LCPI0_1]
-; CHECK-NEXT: add z1.s, z1.s, z5.s
-; CHECK-NEXT: add z0.s, z0.s, z5.s
-; CHECK-NEXT: and z4.d, z0.d, z16.d
-; CHECK-NEXT: and z0.d, z1.d, z7.d
-; CHECK-NEXT: orr z0.d, z0.d, z3.d
-; CHECK-NEXT: orr z1.d, z4.d, z2.d
+; CHECK-NEXT: and z3.d, z0.d, z6.d
+; CHECK-NEXT: and z0.d, z7.d, z5.d
+; CHECK-NEXT: orr z0.d, z0.d, z1.d
+; CHECK-NEXT: orr z1.d, z3.d, z2.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll
index 3e7204d804f8..7cac1fc06987 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll
@@ -45,17 +45,16 @@ define <2 x i64> @load_zext_v2i32i64(<2 x i32>* %ap) #0 {
define <2 x i256> @load_zext_v2i64i256(<2 x i64>* %ap) #0 {
; CHECK-LABEL: load_zext_v2i64i256:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI3_0
-; CHECK-NEXT: ldr q0, [x0]
-; CHECK-NEXT: mov x1, xzr
-; CHECK-NEXT: mov x5, xzr
-; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI3_0]
+; CHECK-NEXT: mov z0.d, #0 // =0x0
+; CHECK-NEXT: ldr q1, [x0]
; CHECK-NEXT: mov z2.d, z0.d[1]
-; CHECK-NEXT: fmov x0, d0
-; CHECK-NEXT: fmov x4, d2
+; CHECK-NEXT: fmov x2, d0
+; CHECK-NEXT: fmov x3, d2
+; CHECK-NEXT: mov x1, xzr
; CHECK-NEXT: mov z0.d, z1.d[1]
-; CHECK-NEXT: fmov x2, d1
-; CHECK-NEXT: fmov x3, d0
+; CHECK-NEXT: fmov x0, d1
+; CHECK-NEXT: fmov x4, d0
+; CHECK-NEXT: mov x5, xzr
; CHECK-NEXT: mov x6, x2
; CHECK-NEXT: mov x7, x3
; CHECK-NEXT: ret
@@ -136,33 +135,30 @@ define <4 x i256> @load_sext_v4i32i256(<4 x i32>* %ap) #0 {
define <2 x i256> @load_sext_v2i64i256(<2 x i64>* %ap) #0 {
; CHECK-LABEL: load_sext_v2i64i256:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #64
-; CHECK-NEXT: .cfi_def_cfa_offset 64
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: fmov x8, d0
; CHECK-NEXT: mov z0.d, z0.d[1]
+; CHECK-NEXT: fmov x10, d0
; CHECK-NEXT: asr x9, x8, #63
-; CHECK-NEXT: stp x8, x9, [sp, #16]
-; CHECK-NEXT: fmov x8, d0
-; CHECK-NEXT: stp x9, x9, [sp]
-; CHECK-NEXT: ldp q1, q0, [sp]
-; CHECK-NEXT: asr x10, x8, #63
-; CHECK-NEXT: stp x8, x10, [sp, #48]
-; CHECK-NEXT: fmov x2, d1
-; CHECK-NEXT: stp x10, x10, [sp, #32]
-; CHECK-NEXT: ldp q3, q2, [sp, #32]
-; CHECK-NEXT: mov z4.d, z0.d[1]
-; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: asr x11, x10, #63
+; CHECK-NEXT: stp x8, x9, [sp, #-32]!
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: mov z0.d, x9
+; CHECK-NEXT: stp x10, x11, [sp, #16]
+; CHECK-NEXT: mov z1.d, z0.d[1]
+; CHECK-NEXT: fmov x2, d0
+; CHECK-NEXT: mov z0.d, x11
+; CHECK-NEXT: fmov x3, d1
+; CHECK-NEXT: ldp q1, q3, [sp], #32
+; CHECK-NEXT: mov z2.d, z0.d[1]
+; CHECK-NEXT: fmov x6, d0
; CHECK-NEXT: mov z0.d, z1.d[1]
-; CHECK-NEXT: fmov x1, d4
-; CHECK-NEXT: fmov x3, d0
-; CHECK-NEXT: fmov x6, d3
-; CHECK-NEXT: mov z1.d, z2.d[1]
-; CHECK-NEXT: fmov x4, d2
-; CHECK-NEXT: mov z2.d, z3.d[1]
-; CHECK-NEXT: fmov x5, d1
+; CHECK-NEXT: fmov x0, d1
+; CHECK-NEXT: mov z1.d, z3.d[1]
; CHECK-NEXT: fmov x7, d2
-; CHECK-NEXT: add sp, sp, #64
+; CHECK-NEXT: fmov x4, d3
+; CHECK-NEXT: fmov x1, d0
+; CHECK-NEXT: fmov x5, d1
; CHECK-NEXT: ret
%a = load <2 x i64>, <2 x i64>* %ap
%val = sext <2 x i64> %a to <2 x i256>
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-compares.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-compares.ll
index e91439b027aa..c4f4eef63068 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-compares.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-compares.ll
@@ -285,17 +285,16 @@ define void @fcmp_ugt_v16f16(ptr %a, ptr %b, ptr %c) #0 {
; CHECK-LABEL: fcmp_ugt_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q0, q3, [x1]
-; CHECK-NEXT: adrp x8, .LCPI14_0
; CHECK-NEXT: ptrue p0.h, vl8
; CHECK-NEXT: ldp q1, q2, [x0]
; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h
-; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI14_0]
+; CHECK-NEXT: mov z0.h, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: mov z1.h, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: fcmge p0.h, p0/z, z3.h, z2.h
-; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: eor z0.d, z0.d, z4.d
-; CHECK-NEXT: eor z1.d, z1.d, z4.d
-; CHECK-NEXT: stp q0, q1, [x2]
+; CHECK-NEXT: eor z1.d, z1.d, z0.d
+; CHECK-NEXT: mov z2.h, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: eor z0.d, z2.d, z0.d
+; CHECK-NEXT: stp q1, q0, [x2]
; CHECK-NEXT: ret
%op1 = load <16 x half>, ptr %a
%op2 = load <16 x half>, ptr %b
@@ -337,17 +336,16 @@ define void @fcmp_ult_v16f16(ptr %a, ptr %b, ptr %c) #0 {
; CHECK-LABEL: fcmp_ult_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q0, q3, [x1]
-; CHECK-NEXT: adrp x8, .LCPI16_0
; CHECK-NEXT: ptrue p0.h, vl8
; CHECK-NEXT: ldp q1, q2, [x0]
; CHECK-NEXT: fcmge p1.h, p0/z, z1.h, z0.h
-; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI16_0]
+; CHECK-NEXT: mov z0.h, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: mov z1.h, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: fcmge p0.h, p0/z, z2.h, z3.h
-; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: eor z0.d, z0.d, z4.d
-; CHECK-NEXT: eor z1.d, z1.d, z4.d
-; CHECK-NEXT: stp q0, q1, [x2]
+; CHECK-NEXT: eor z1.d, z1.d, z0.d
+; CHECK-NEXT: mov z2.h, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: eor z0.d, z2.d, z0.d
+; CHECK-NEXT: stp q1, q0, [x2]
; CHECK-NEXT: ret
%op1 = load <16 x half>, ptr %a
%op2 = load <16 x half>, ptr %b
@@ -389,17 +387,16 @@ define void @fcmp_uge_v16f16(ptr %a, ptr %b, ptr %c) #0 {
; CHECK-LABEL: fcmp_uge_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q0, q3, [x1]
-; CHECK-NEXT: adrp x8, .LCPI18_0
; CHECK-NEXT: ptrue p0.h, vl8
; CHECK-NEXT: ldp q1, q2, [x0]
; CHECK-NEXT: fcmgt p1.h, p0/z, z0.h, z1.h
-; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI18_0]
+; CHECK-NEXT: mov z0.h, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: mov z1.h, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: fcmgt p0.h, p0/z, z3.h, z2.h
-; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: eor z0.d, z0.d, z4.d
-; CHECK-NEXT: eor z1.d, z1.d, z4.d
-; CHECK-NEXT: stp q0, q1, [x2]
+; CHECK-NEXT: eor z1.d, z1.d, z0.d
+; CHECK-NEXT: mov z2.h, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: eor z0.d, z2.d, z0.d
+; CHECK-NEXT: stp q1, q0, [x2]
; CHECK-NEXT: ret
%op1 = load <16 x half>, ptr %a
%op2 = load <16 x half>, ptr %b
@@ -441,17 +438,16 @@ define void @fcmp_ule_v16f16(ptr %a, ptr %b, ptr %c) #0 {
; CHECK-LABEL: fcmp_ule_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q0, q3, [x1]
-; CHECK-NEXT: adrp x8, .LCPI20_0
; CHECK-NEXT: ptrue p0.h, vl8
; CHECK-NEXT: ldp q1, q2, [x0]
; CHECK-NEXT: fcmgt p1.h, p0/z, z1.h, z0.h
-; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI20_0]
+; CHECK-NEXT: mov z0.h, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: mov z1.h, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: fcmgt p0.h, p0/z, z2.h, z3.h
-; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: eor z0.d, z0.d, z4.d
-; CHECK-NEXT: eor z1.d, z1.d, z4.d
-; CHECK-NEXT: stp q0, q1, [x2]
+; CHECK-NEXT: eor z1.d, z1.d, z0.d
+; CHECK-NEXT: mov z2.h, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: eor z0.d, z2.d, z0.d
+; CHECK-NEXT: stp q1, q0, [x2]
; CHECK-NEXT: ret
%op1 = load <16 x half>, ptr %a
%op2 = load <16 x half>, ptr %b
@@ -493,17 +489,16 @@ define void @fcmp_ord_v16f16(ptr %a, ptr %b, ptr %c) #0 {
; CHECK-LABEL: fcmp_ord_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q0, q3, [x1]
-; CHECK-NEXT: adrp x8, .LCPI22_0
; CHECK-NEXT: ptrue p0.h, vl8
; CHECK-NEXT: ldp q1, q2, [x0]
; CHECK-NEXT: fcmuo p1.h, p0/z, z1.h, z0.h
-; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI22_0]
+; CHECK-NEXT: mov z0.h, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: mov z1.h, p1/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: fcmuo p0.h, p0/z, z2.h, z3.h
-; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: eor z0.d, z0.d, z4.d
-; CHECK-NEXT: eor z1.d, z1.d, z4.d
-; CHECK-NEXT: stp q0, q1, [x2]
+; CHECK-NEXT: eor z1.d, z1.d, z0.d
+; CHECK-NEXT: mov z2.h, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: eor z0.d, z2.d, z0.d
+; CHECK-NEXT: stp q1, q0, [x2]
; CHECK-NEXT: ret
%op1 = load <16 x half>, ptr %a
%op2 = load <16 x half>, ptr %b
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-convert.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-convert.ll
index 68b4f2857b46..5970d2f6beac 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-convert.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-convert.ll
@@ -7,13 +7,12 @@ target triple = "aarch64-unknown-linux-gnu"
define void @fp_convert_combine_crash(<8 x float> *%a, <8 x i32> *%b) #0 {
; CHECK-LABEL: fp_convert_combine_crash:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI0_0
+; CHECK-NEXT: ldp q0, q1, [x0]
+; CHECK-NEXT: fmov z2.s, #8.00000000
; CHECK-NEXT: ptrue p0.s, vl4
-; CHECK-NEXT: ldp q0, q2, [x0]
-; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI0_0]
-; CHECK-NEXT: fmul z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: fmul z1.s, p0/m, z1.s, z2.s
+; CHECK-NEXT: fmul z0.s, p0/m, z0.s, z2.s
; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s
+; CHECK-NEXT: fmul z1.s, p0/m, z1.s, z2.s
; CHECK-NEXT: fcvtzs z1.s, p0/m, z1.s
; CHECK-NEXT: stp q0, q1, [x1]
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-select.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-select.ll
index bfc25873aa06..aac9373952b3 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-select.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-select.ll
@@ -6,25 +6,15 @@ target triple = "aarch64-unknown-linux-gnu"
define <2 x half> @select_v2f16(<2 x half> %op1, <2 x half> %op2, i1 %mask) #0 {
; CHECK-LABEL: select_v2f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: tst w0, #0x1
-; CHECK-NEXT: adrp x9, .LCPI0_0
-; CHECK-NEXT: csetm w8, ne
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: ldr d3, [x9, :lo12:.LCPI0_0]
-; CHECK-NEXT: strh w8, [sp, #14]
-; CHECK-NEXT: strh w8, [sp, #12]
-; CHECK-NEXT: strh w8, [sp, #10]
-; CHECK-NEXT: strh w8, [sp, #8]
-; CHECK-NEXT: ldr d2, [sp, #8]
-; CHECK-NEXT: eor z3.d, z2.d, z3.d
+; CHECK-NEXT: csetm w8, ne
+; CHECK-NEXT: mov z2.h, w8
+; CHECK-NEXT: bic z1.d, z1.d, z2.d
; CHECK-NEXT: and z0.d, z0.d, z2.d
-; CHECK-NEXT: and z1.d, z1.d, z3.d
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
-; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
%sel = select i1 %mask, <2 x half> %op1, <2 x half> %op2
ret <2 x half> %sel
@@ -33,25 +23,15 @@ define <2 x half> @select_v2f16(<2 x half> %op1, <2 x half> %op2, i1 %mask) #0 {
define <4 x half> @select_v4f16(<4 x half> %op1, <4 x half> %op2, i1 %mask) #0 {
; CHECK-LABEL: select_v4f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: tst w0, #0x1
-; CHECK-NEXT: adrp x9, .LCPI1_0
-; CHECK-NEXT: csetm w8, ne
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: ldr d3, [x9, :lo12:.LCPI1_0]
-; CHECK-NEXT: strh w8, [sp, #14]
-; CHECK-NEXT: strh w8, [sp, #12]
-; CHECK-NEXT: strh w8, [sp, #10]
-; CHECK-NEXT: strh w8, [sp, #8]
-; CHECK-NEXT: ldr d2, [sp, #8]
-; CHECK-NEXT: eor z3.d, z2.d, z3.d
+; CHECK-NEXT: csetm w8, ne
+; CHECK-NEXT: mov z2.h, w8
+; CHECK-NEXT: bic z1.d, z1.d, z2.d
; CHECK-NEXT: and z0.d, z0.d, z2.d
-; CHECK-NEXT: and z1.d, z1.d, z3.d
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
-; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
%sel = select i1 %mask, <4 x half> %op1, <4 x half> %op2
ret <4 x half> %sel
@@ -60,29 +40,15 @@ define <4 x half> @select_v4f16(<4 x half> %op1, <4 x half> %op2, i1 %mask) #0 {
define <8 x half> @select_v8f16(<8 x half> %op1, <8 x half> %op2, i1 %mask) #0 {
; CHECK-LABEL: select_v8f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: tst w0, #0x1
-; CHECK-NEXT: adrp x9, .LCPI2_0
-; CHECK-NEXT: csetm w8, ne
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT: ldr q3, [x9, :lo12:.LCPI2_0]
-; CHECK-NEXT: strh w8, [sp, #14]
-; CHECK-NEXT: strh w8, [sp, #12]
-; CHECK-NEXT: strh w8, [sp, #10]
-; CHECK-NEXT: strh w8, [sp, #8]
-; CHECK-NEXT: strh w8, [sp, #6]
-; CHECK-NEXT: strh w8, [sp, #4]
-; CHECK-NEXT: strh w8, [sp, #2]
-; CHECK-NEXT: strh w8, [sp]
-; CHECK-NEXT: ldr q2, [sp]
-; CHECK-NEXT: eor z3.d, z2.d, z3.d
+; CHECK-NEXT: csetm w8, ne
+; CHECK-NEXT: mov z2.h, w8
+; CHECK-NEXT: bic z1.d, z1.d, z2.d
; CHECK-NEXT: and z0.d, z0.d, z2.d
-; CHECK-NEXT: and z1.d, z1.d, z3.d
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
-; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
%sel = select i1 %mask, <8 x half> %op1, <8 x half> %op2
ret <8 x half> %sel
@@ -91,34 +57,20 @@ define <8 x half> @select_v8f16(<8 x half> %op1, <8 x half> %op2, i1 %mask) #0 {
define void @select_v16f16(ptr %a, ptr %b, i1 %mask) #0 {
; CHECK-LABEL: select_v16f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: tst w2, #0x1
-; CHECK-NEXT: adrp x9, .LCPI3_0
-; CHECK-NEXT: csetm w8, ne
; CHECK-NEXT: ldr q0, [x0]
+; CHECK-NEXT: csetm w8, ne
; CHECK-NEXT: ldr q1, [x0, #16]
; CHECK-NEXT: ldr q2, [x1]
; CHECK-NEXT: ldr q3, [x1, #16]
-; CHECK-NEXT: strh w8, [sp, #14]
-; CHECK-NEXT: strh w8, [sp, #12]
-; CHECK-NEXT: ldr q5, [x9, :lo12:.LCPI3_0]
-; CHECK-NEXT: strh w8, [sp, #10]
-; CHECK-NEXT: strh w8, [sp, #8]
-; CHECK-NEXT: strh w8, [sp, #6]
-; CHECK-NEXT: strh w8, [sp, #4]
-; CHECK-NEXT: strh w8, [sp, #2]
-; CHECK-NEXT: strh w8, [sp]
-; CHECK-NEXT: ldr q4, [sp]
-; CHECK-NEXT: eor z5.d, z4.d, z5.d
-; CHECK-NEXT: and z1.d, z1.d, z4.d
+; CHECK-NEXT: mov z4.h, w8
+; CHECK-NEXT: bic z2.d, z2.d, z4.d
; CHECK-NEXT: and z0.d, z0.d, z4.d
-; CHECK-NEXT: and z2.d, z2.d, z5.d
-; CHECK-NEXT: and z3.d, z3.d, z5.d
+; CHECK-NEXT: bic z3.d, z3.d, z4.d
+; CHECK-NEXT: and z1.d, z1.d, z4.d
; CHECK-NEXT: orr z0.d, z0.d, z2.d
; CHECK-NEXT: orr z1.d, z1.d, z3.d
; CHECK-NEXT: stp q0, q1, [x0]
-; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
%op1 = load volatile <16 x half>, ptr %a
%op2 = load volatile <16 x half>, ptr %b
@@ -130,22 +82,17 @@ define void @select_v16f16(ptr %a, ptr %b, i1 %mask) #0 {
define <2 x float> @select_v2f32(<2 x float> %op1, <2 x float> %op2, i1 %mask) #0 {
; CHECK-LABEL: select_v2f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: tst w0, #0x1
-; CHECK-NEXT: adrp x9, .LCPI4_0
-; CHECK-NEXT: csetm w8, ne
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: ldr d3, [x9, :lo12:.LCPI4_0]
-; CHECK-NEXT: stp w8, w8, [sp, #8]
-; CHECK-NEXT: ldr d2, [sp, #8]
-; CHECK-NEXT: eor z3.d, z2.d, z3.d
+; CHECK-NEXT: csetm w8, ne
+; CHECK-NEXT: mvn w9, w8
+; CHECK-NEXT: mov z2.s, w8
+; CHECK-NEXT: mov z3.s, w9
; CHECK-NEXT: and z0.d, z0.d, z2.d
; CHECK-NEXT: and z1.d, z1.d, z3.d
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
-; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
%sel = select i1 %mask, <2 x float> %op1, <2 x float> %op2
ret <2 x float> %sel
@@ -154,23 +101,17 @@ define <2 x float> @select_v2f32(<2 x float> %op1, <2 x float> %op2, i1 %mask) #
define <4 x float> @select_v4f32(<4 x float> %op1, <4 x float> %op2, i1 %mask) #0 {
; CHECK-LABEL: select_v4f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: tst w0, #0x1
-; CHECK-NEXT: adrp x9, .LCPI5_0
-; CHECK-NEXT: csetm w8, ne
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT: ldr q3, [x9, :lo12:.LCPI5_0]
-; CHECK-NEXT: stp w8, w8, [sp, #8]
-; CHECK-NEXT: stp w8, w8, [sp]
-; CHECK-NEXT: ldr q2, [sp]
-; CHECK-NEXT: eor z3.d, z2.d, z3.d
+; CHECK-NEXT: csetm w8, ne
+; CHECK-NEXT: mvn w9, w8
+; CHECK-NEXT: mov z2.s, w8
+; CHECK-NEXT: mov z3.s, w9
; CHECK-NEXT: and z0.d, z0.d, z2.d
; CHECK-NEXT: and z1.d, z1.d, z3.d
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
-; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
%sel = select i1 %mask, <4 x float> %op1, <4 x float> %op2
ret <4 x float> %sel
@@ -179,20 +120,15 @@ define <4 x float> @select_v4f32(<4 x float> %op1, <4 x float> %op2, i1 %mask) #
define void @select_v8f32(ptr %a, ptr %b, i1 %mask) #0 {
; CHECK-LABEL: select_v8f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: tst w2, #0x1
-; CHECK-NEXT: adrp x9, .LCPI6_0
-; CHECK-NEXT: csetm w8, ne
; CHECK-NEXT: ldr q0, [x0]
+; CHECK-NEXT: csetm w8, ne
; CHECK-NEXT: ldr q1, [x0, #16]
+; CHECK-NEXT: mvn w9, w8
; CHECK-NEXT: ldr q2, [x1]
; CHECK-NEXT: ldr q3, [x1, #16]
-; CHECK-NEXT: stp w8, w8, [sp, #8]
-; CHECK-NEXT: stp w8, w8, [sp]
-; CHECK-NEXT: ldr q5, [x9, :lo12:.LCPI6_0]
-; CHECK-NEXT: ldr q4, [sp]
-; CHECK-NEXT: eor z5.d, z4.d, z5.d
+; CHECK-NEXT: mov z4.s, w8
+; CHECK-NEXT: mov z5.s, w9
; CHECK-NEXT: and z1.d, z1.d, z4.d
; CHECK-NEXT: and z0.d, z0.d, z4.d
; CHECK-NEXT: and z2.d, z2.d, z5.d
@@ -200,7 +136,6 @@ define void @select_v8f32(ptr %a, ptr %b, i1 %mask) #0 {
; CHECK-NEXT: orr z0.d, z0.d, z2.d
; CHECK-NEXT: orr z1.d, z1.d, z3.d
; CHECK-NEXT: stp q0, q1, [x0]
-; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
%op1 = load volatile <8 x float>, ptr %a
%op2 = load volatile <8 x float>, ptr %b
@@ -213,13 +148,12 @@ define <1 x double> @select_v1f64(<1 x double> %op1, <1 x double> %op2, i1 %mask
; CHECK-LABEL: select_v1f64:
; CHECK: // %bb.0:
; CHECK-NEXT: tst w0, #0x1
-; CHECK-NEXT: mov x9, #-1
-; CHECK-NEXT: csetm x8, ne
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: fmov d3, x9
-; CHECK-NEXT: fmov d2, x8
-; CHECK-NEXT: eor z3.d, z2.d, z3.d
+; CHECK-NEXT: csetm x8, ne
+; CHECK-NEXT: mvn x9, x8
+; CHECK-NEXT: mov z2.d, x8
+; CHECK-NEXT: mov z3.d, x9
; CHECK-NEXT: and z0.d, z0.d, z2.d
; CHECK-NEXT: and z1.d, z1.d, z3.d
; CHECK-NEXT: orr z0.d, z0.d, z1.d
@@ -235,18 +169,14 @@ define <2 x double> @select_v2f64(<2 x double> %op1, <2 x double> %op2, i1 %mask
; CHECK-NEXT: tst w0, #0x1
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT: adrp x9, .LCPI8_0
; CHECK-NEXT: csetm x8, ne
-; CHECK-NEXT: stp x8, x8, [sp, #-16]!
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: ldr q2, [sp]
-; CHECK-NEXT: ldr q3, [x9, :lo12:.LCPI8_0]
+; CHECK-NEXT: mvn x9, x8
+; CHECK-NEXT: mov z2.d, x8
+; CHECK-NEXT: mov z3.d, x9
; CHECK-NEXT: and z0.d, z0.d, z2.d
-; CHECK-NEXT: eor z3.d, z2.d, z3.d
; CHECK-NEXT: and z1.d, z1.d, z3.d
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
-; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
%sel = select i1 %mask, <2 x double> %op1, <2 x double> %op2
ret <2 x double> %sel
@@ -259,22 +189,18 @@ define void @select_v4f64(ptr %a, ptr %b, i1 %mask) #0 {
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: csetm x8, ne
; CHECK-NEXT: ldr q1, [x0, #16]
+; CHECK-NEXT: mvn x9, x8
; CHECK-NEXT: ldr q2, [x1]
-; CHECK-NEXT: adrp x9, .LCPI9_0
; CHECK-NEXT: ldr q3, [x1, #16]
-; CHECK-NEXT: stp x8, x8, [sp, #-16]!
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: ldr q4, [x9, :lo12:.LCPI9_0]
-; CHECK-NEXT: ldr q5, [sp]
-; CHECK-NEXT: eor z4.d, z5.d, z4.d
-; CHECK-NEXT: and z1.d, z1.d, z5.d
-; CHECK-NEXT: and z0.d, z0.d, z5.d
-; CHECK-NEXT: and z2.d, z2.d, z4.d
-; CHECK-NEXT: and z3.d, z3.d, z4.d
+; CHECK-NEXT: mov z4.d, x8
+; CHECK-NEXT: mov z5.d, x9
+; CHECK-NEXT: and z1.d, z1.d, z4.d
+; CHECK-NEXT: and z0.d, z0.d, z4.d
+; CHECK-NEXT: and z2.d, z2.d, z5.d
+; CHECK-NEXT: and z3.d, z3.d, z5.d
; CHECK-NEXT: orr z0.d, z0.d, z2.d
; CHECK-NEXT: orr z1.d, z1.d, z3.d
; CHECK-NEXT: stp q0, q1, [x0]
-; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
%op1 = load volatile <4 x double>, ptr %a
%op2 = load volatile <4 x double>, ptr %b
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll
index e0764eacc62f..7289d740160a 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll
@@ -468,7 +468,8 @@ define <1 x i16> @fcvtzu_v1f64_v1i16(<1 x double> %op1) #0 {
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: fcvtzs w8, d0
-; CHECK-NEXT: fmov s0, w8
+; CHECK-NEXT: mov z0.h, w8
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
%res = fptoui <1 x double> %op1 to <1 x i16>
ret <1 x i16> %res
@@ -718,7 +719,8 @@ define <1 x i64> @fcvtzu_v1f64_v1i64(<1 x double> %op1) #0 {
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: fcvtzu x8, d0
-; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: mov z0.d, x8
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
%res = fptoui <1 x double> %op1 to <1 x i64>
ret <1 x i64> %res
@@ -1219,7 +1221,8 @@ define <1 x i16> @fcvtzs_v1f64_v1i16(<1 x double> %op1) #0 {
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: fcvtzs w8, d0
-; CHECK-NEXT: fmov s0, w8
+; CHECK-NEXT: mov z0.h, w8
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
%res = fptosi <1 x double> %op1 to <1 x i16>
ret <1 x i16> %res
@@ -1469,7 +1472,8 @@ define <1 x i64> @fcvtzs_v1f64_v1i64(<1 x double> %op1) #0 {
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: fcvtzs x8, d0
-; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: mov z0.d, x8
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
%res = fptosi <1 x double> %op1 to <1 x i64>
ret <1 x i64> %res
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll
index 38351d1e45d0..0830df07745a 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll
@@ -10,23 +10,18 @@ define <2 x half> @select_v2f16(<2 x half> %op1, <2 x half> %op2, <2 x i1> %mask
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2
; CHECK-NEXT: mov z3.s, z2.s[1]
-; CHECK-NEXT: adrp x8, .LCPI0_0
-; CHECK-NEXT: fmov w9, s2
-; CHECK-NEXT: fmov w10, s3
+; CHECK-NEXT: fmov w8, s2
+; CHECK-NEXT: fmov w9, s3
; CHECK-NEXT: ptrue p0.h, vl4
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI0_0]
-; CHECK-NEXT: adrp x8, .LCPI0_1
-; CHECK-NEXT: strh w9, [sp, #8]
-; CHECK-NEXT: strh w10, [sp, #10]
-; CHECK-NEXT: ldr d3, [sp, #8]
-; CHECK-NEXT: ldr d4, [x8, :lo12:.LCPI0_1]
-; CHECK-NEXT: lsl z3.h, p0/m, z3.h, z2.h
-; CHECK-NEXT: asrr z2.h, p0/m, z2.h, z3.h
-; CHECK-NEXT: eor z3.d, z2.d, z4.d
+; CHECK-NEXT: strh w8, [sp, #8]
+; CHECK-NEXT: strh w9, [sp, #10]
+; CHECK-NEXT: ldr d2, [sp, #8]
+; CHECK-NEXT: lsl z2.h, p0/m, z2.h, #15
+; CHECK-NEXT: asr z2.h, p0/m, z2.h, #15
+; CHECK-NEXT: bic z1.d, z1.d, z2.d
; CHECK-NEXT: and z0.d, z0.d, z2.d
-; CHECK-NEXT: and z1.d, z1.d, z3.d
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: add sp, sp, #16
@@ -38,19 +33,14 @@ define <2 x half> @select_v2f16(<2 x half> %op1, <2 x half> %op2, <2 x i1> %mask
define <4 x half> @select_v4f16(<4 x half> %op1, <4 x half> %op2, <4 x i1> %mask) #0 {
; CHECK-LABEL: select_v4f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI1_0
-; CHECK-NEXT: adrp x9, .LCPI1_1
; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2
; CHECK-NEXT: ptrue p0.h, vl4
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: ldr d3, [x8, :lo12:.LCPI1_0]
-; CHECK-NEXT: ldr d4, [x9, :lo12:.LCPI1_1]
-; CHECK-NEXT: lsl z2.h, p0/m, z2.h, z3.h
-; CHECK-NEXT: asr z2.h, p0/m, z2.h, z3.h
-; CHECK-NEXT: eor z3.d, z2.d, z4.d
+; CHECK-NEXT: lsl z2.h, p0/m, z2.h, #15
+; CHECK-NEXT: asr z2.h, p0/m, z2.h, #15
+; CHECK-NEXT: bic z1.d, z1.d, z2.d
; CHECK-NEXT: and z0.d, z0.d, z2.d
-; CHECK-NEXT: and z1.d, z1.d, z3.d
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
@@ -61,20 +51,15 @@ define <4 x half> @select_v4f16(<4 x half> %op1, <4 x half> %op2, <4 x i1> %mask
define <8 x half> @select_v8f16(<8 x half> %op1, <8 x half> %op2, <8 x i1> %mask) #0 {
; CHECK-LABEL: select_v8f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI2_0
-; CHECK-NEXT: adrp x9, .LCPI2_1
; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2
; CHECK-NEXT: ptrue p0.h, vl8
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: uunpklo z2.h, z2.b
-; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI2_0]
-; CHECK-NEXT: ldr q4, [x9, :lo12:.LCPI2_1]
-; CHECK-NEXT: lsl z2.h, p0/m, z2.h, z3.h
-; CHECK-NEXT: asr z2.h, p0/m, z2.h, z3.h
-; CHECK-NEXT: eor z3.d, z2.d, z4.d
+; CHECK-NEXT: lsl z2.h, p0/m, z2.h, #15
+; CHECK-NEXT: asr z2.h, p0/m, z2.h, #15
+; CHECK-NEXT: bic z1.d, z1.d, z2.d
; CHECK-NEXT: and z0.d, z0.d, z2.d
-; CHECK-NEXT: and z1.d, z1.d, z3.d
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
@@ -85,23 +70,19 @@ define <8 x half> @select_v8f16(<8 x half> %op1, <8 x half> %op2, <8 x i1> %mask
define void @select_v16f16(ptr %a, ptr %b) #0 {
; CHECK-LABEL: select_v16f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldp q0, q1, [x1]
-; CHECK-NEXT: adrp x8, .LCPI3_0
+; CHECK-NEXT: ldp q0, q1, [x0]
; CHECK-NEXT: ptrue p0.h, vl8
-; CHECK-NEXT: ldp q3, q2, [x0]
-; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI3_0]
-; CHECK-NEXT: fcmeq p1.h, p0/z, z2.h, z1.h
-; CHECK-NEXT: fcmeq p0.h, p0/z, z3.h, z0.h
-; CHECK-NEXT: mov z5.h, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: mov z6.h, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: and z2.d, z2.d, z5.d
-; CHECK-NEXT: eor z5.d, z5.d, z4.d
-; CHECK-NEXT: eor z4.d, z6.d, z4.d
-; CHECK-NEXT: and z3.d, z3.d, z6.d
-; CHECK-NEXT: and z0.d, z0.d, z4.d
-; CHECK-NEXT: and z1.d, z1.d, z5.d
-; CHECK-NEXT: orr z0.d, z3.d, z0.d
-; CHECK-NEXT: orr z1.d, z2.d, z1.d
+; CHECK-NEXT: ldp q3, q2, [x1]
+; CHECK-NEXT: fcmeq p1.h, p0/z, z1.h, z2.h
+; CHECK-NEXT: fcmeq p0.h, p0/z, z0.h, z3.h
+; CHECK-NEXT: mov z4.h, p1/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: mov z5.h, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: bic z2.d, z2.d, z4.d
+; CHECK-NEXT: bic z3.d, z3.d, z5.d
+; CHECK-NEXT: and z0.d, z0.d, z5.d
+; CHECK-NEXT: and z1.d, z1.d, z4.d
+; CHECK-NEXT: orr z0.d, z0.d, z3.d
+; CHECK-NEXT: orr z1.d, z1.d, z2.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
%op1 = load <16 x half>, ptr %a
@@ -115,19 +96,14 @@ define void @select_v16f16(ptr %a, ptr %b) #0 {
define <2 x float> @select_v2f32(<2 x float> %op1, <2 x float> %op2, <2 x i1> %mask) #0 {
; CHECK-LABEL: select_v2f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI4_0
-; CHECK-NEXT: adrp x9, .LCPI4_1
; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2
; CHECK-NEXT: ptrue p0.s, vl2
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: ldr d3, [x8, :lo12:.LCPI4_0]
-; CHECK-NEXT: ldr d4, [x9, :lo12:.LCPI4_1]
-; CHECK-NEXT: lsl z2.s, p0/m, z2.s, z3.s
-; CHECK-NEXT: asr z2.s, p0/m, z2.s, z3.s
-; CHECK-NEXT: eor z3.d, z2.d, z4.d
+; CHECK-NEXT: lsl z2.s, p0/m, z2.s, #31
+; CHECK-NEXT: asr z2.s, p0/m, z2.s, #31
+; CHECK-NEXT: bic z1.d, z1.d, z2.d
; CHECK-NEXT: and z0.d, z0.d, z2.d
-; CHECK-NEXT: and z1.d, z1.d, z3.d
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
@@ -138,20 +114,15 @@ define <2 x float> @select_v2f32(<2 x float> %op1, <2 x float> %op2, <2 x i1> %m
define <4 x float> @select_v4f32(<4 x float> %op1, <4 x float> %op2, <4 x i1> %mask) #0 {
; CHECK-LABEL: select_v4f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI5_0
-; CHECK-NEXT: adrp x9, .LCPI5_1
; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2
; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: uunpklo z2.s, z2.h
-; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI5_0]
-; CHECK-NEXT: ldr q4, [x9, :lo12:.LCPI5_1]
-; CHECK-NEXT: lsl z2.s, p0/m, z2.s, z3.s
-; CHECK-NEXT: asr z2.s, p0/m, z2.s, z3.s
-; CHECK-NEXT: eor z3.d, z2.d, z4.d
+; CHECK-NEXT: lsl z2.s, p0/m, z2.s, #31
+; CHECK-NEXT: asr z2.s, p0/m, z2.s, #31
+; CHECK-NEXT: bic z1.d, z1.d, z2.d
; CHECK-NEXT: and z0.d, z0.d, z2.d
-; CHECK-NEXT: and z1.d, z1.d, z3.d
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
@@ -162,23 +133,19 @@ define <4 x float> @select_v4f32(<4 x float> %op1, <4 x float> %op2, <4 x i1> %m
define void @select_v8f32(ptr %a, ptr %b) #0 {
; CHECK-LABEL: select_v8f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldp q0, q1, [x1]
-; CHECK-NEXT: adrp x8, .LCPI6_0
+; CHECK-NEXT: ldp q0, q1, [x0]
; CHECK-NEXT: ptrue p0.s, vl4
-; CHECK-NEXT: ldp q3, q2, [x0]
-; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI6_0]
-; CHECK-NEXT: fcmeq p1.s, p0/z, z2.s, z1.s
-; CHECK-NEXT: fcmeq p0.s, p0/z, z3.s, z0.s
-; CHECK-NEXT: mov z5.s, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: mov z6.s, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: and z2.d, z2.d, z5.d
-; CHECK-NEXT: eor z5.d, z5.d, z4.d
-; CHECK-NEXT: eor z4.d, z6.d, z4.d
-; CHECK-NEXT: and z3.d, z3.d, z6.d
-; CHECK-NEXT: and z0.d, z0.d, z4.d
-; CHECK-NEXT: and z1.d, z1.d, z5.d
-; CHECK-NEXT: orr z0.d, z3.d, z0.d
-; CHECK-NEXT: orr z1.d, z2.d, z1.d
+; CHECK-NEXT: ldp q3, q2, [x1]
+; CHECK-NEXT: fcmeq p1.s, p0/z, z1.s, z2.s
+; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, z3.s
+; CHECK-NEXT: mov z4.s, p1/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: mov z5.s, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: bic z2.d, z2.d, z4.d
+; CHECK-NEXT: bic z3.d, z3.d, z5.d
+; CHECK-NEXT: and z0.d, z0.d, z5.d
+; CHECK-NEXT: and z1.d, z1.d, z4.d
+; CHECK-NEXT: orr z0.d, z0.d, z3.d
+; CHECK-NEXT: orr z1.d, z1.d, z2.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
%op1 = load <8 x float>, ptr %a
@@ -193,13 +160,12 @@ define <1 x double> @select_v1f64(<1 x double> %op1, <1 x double> %op2, <1 x i1>
; CHECK-LABEL: select_v1f64:
; CHECK: // %bb.0:
; CHECK-NEXT: tst w0, #0x1
-; CHECK-NEXT: mov x9, #-1
-; CHECK-NEXT: csetm x8, ne
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: fmov d3, x9
-; CHECK-NEXT: fmov d2, x8
-; CHECK-NEXT: eor z3.d, z2.d, z3.d
+; CHECK-NEXT: csetm x8, ne
+; CHECK-NEXT: mvn x9, x8
+; CHECK-NEXT: mov z2.d, x8
+; CHECK-NEXT: mov z3.d, x9
; CHECK-NEXT: and z0.d, z0.d, z2.d
; CHECK-NEXT: and z1.d, z1.d, z3.d
; CHECK-NEXT: orr z0.d, z0.d, z1.d
@@ -212,20 +178,15 @@ define <1 x double> @select_v1f64(<1 x double> %op1, <1 x double> %op2, <1 x i1>
define <2 x double> @select_v2f64(<2 x double> %op1, <2 x double> %op2, <2 x i1> %mask) #0 {
; CHECK-LABEL: select_v2f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI8_0
-; CHECK-NEXT: adrp x9, .LCPI8_1
; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: uunpklo z2.d, z2.s
-; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI8_0]
-; CHECK-NEXT: ldr q4, [x9, :lo12:.LCPI8_1]
-; CHECK-NEXT: lsl z2.d, p0/m, z2.d, z3.d
-; CHECK-NEXT: asr z2.d, p0/m, z2.d, z3.d
-; CHECK-NEXT: eor z3.d, z2.d, z4.d
+; CHECK-NEXT: lsl z2.d, p0/m, z2.d, #63
+; CHECK-NEXT: asr z2.d, p0/m, z2.d, #63
+; CHECK-NEXT: bic z1.d, z1.d, z2.d
; CHECK-NEXT: and z0.d, z0.d, z2.d
-; CHECK-NEXT: and z1.d, z1.d, z3.d
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
@@ -236,23 +197,19 @@ define <2 x double> @select_v2f64(<2 x double> %op1, <2 x double> %op2, <2 x i1>
define void @select_v4f64(ptr %a, ptr %b) #0 {
; CHECK-LABEL: select_v4f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldp q0, q1, [x1]
-; CHECK-NEXT: adrp x8, .LCPI9_0
+; CHECK-NEXT: ldp q0, q1, [x0]
; CHECK-NEXT: ptrue p0.d, vl2
-; CHECK-NEXT: ldp q3, q2, [x0]
-; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI9_0]
-; CHECK-NEXT: fcmeq p1.d, p0/z, z2.d, z1.d
-; CHECK-NEXT: fcmeq p0.d, p0/z, z3.d, z0.d
-; CHECK-NEXT: mov z5.d, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: mov z6.d, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: and z2.d, z2.d, z5.d
-; CHECK-NEXT: eor z5.d, z5.d, z4.d
-; CHECK-NEXT: eor z4.d, z6.d, z4.d
-; CHECK-NEXT: and z3.d, z3.d, z6.d
-; CHECK-NEXT: and z0.d, z0.d, z4.d
-; CHECK-NEXT: and z1.d, z1.d, z5.d
-; CHECK-NEXT: orr z0.d, z3.d, z0.d
-; CHECK-NEXT: orr z1.d, z2.d, z1.d
+; CHECK-NEXT: ldp q3, q2, [x1]
+; CHECK-NEXT: fcmeq p1.d, p0/z, z1.d, z2.d
+; CHECK-NEXT: fcmeq p0.d, p0/z, z0.d, z3.d
+; CHECK-NEXT: mov z4.d, p1/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: mov z5.d, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: bic z2.d, z2.d, z4.d
+; CHECK-NEXT: bic z3.d, z3.d, z5.d
+; CHECK-NEXT: and z0.d, z0.d, z5.d
+; CHECK-NEXT: and z1.d, z1.d, z4.d
+; CHECK-NEXT: orr z0.d, z0.d, z3.d
+; CHECK-NEXT: orr z1.d, z1.d, z2.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
%op1 = load <4 x double>, ptr %a
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-insert-vector-elt.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-insert-vector-elt.ll
index 3b4a43c4a52c..093acf0ac848 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-insert-vector-elt.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-insert-vector-elt.ll
@@ -202,8 +202,8 @@ define <8 x i32> @insertelement_v8i32(<8 x i32>* %a) #0 {
define <1 x i64> @insertelement_v1i64(<1 x i64> %op1) #0 {
; CHECK-LABEL: insertelement_v1i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #5
-; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: mov z0.d, #5 // =0x5
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
%r = insertelement <1 x i64> %op1, i64 5, i64 0
ret <1 x i64> %r
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-arith.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-arith.ll
index 37ad519bb2c4..f90afcd99970 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-arith.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-arith.ll
@@ -587,12 +587,10 @@ define void @sub_v4i64(<4 x i64>* %a, <4 x i64>* %b) #0 {
define <4 x i8> @abs_v4i8(<4 x i8> %op1) #0 {
; CHECK-LABEL: abs_v4i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI42_0
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI42_0]
-; CHECK-NEXT: lsl z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: asr z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #8
+; CHECK-NEXT: asr z0.h, p0/m, z0.h, #8
; CHECK-NEXT: abs z0.h, p0/m, z0.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
@@ -642,12 +640,10 @@ define void @abs_v32i8(<32 x i8>* %a) #0 {
define <2 x i16> @abs_v2i16(<2 x i16> %op1) #0 {
; CHECK-LABEL: abs_v2i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI46_0
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ptrue p0.s, vl2
-; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI46_0]
-; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: asr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #16
+; CHECK-NEXT: asr z0.s, p0/m, z0.s, #16
; CHECK-NEXT: abs z0.s, p0/m, z0.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll
index 12695bd22977..b22545526faf 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll
@@ -10,15 +10,13 @@ target triple = "aarch64-unknown-linux-gnu"
define <4 x i8> @sdiv_v4i8(<4 x i8> %op1, <4 x i8> %op2) #0 {
; CHECK-LABEL: sdiv_v4i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI0_0
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI0_0]
-; CHECK-NEXT: lsl z1.h, p0/m, z1.h, z2.h
-; CHECK-NEXT: lsl z0.h, p0/m, z0.h, z2.h
-; CHECK-NEXT: asr z1.h, p0/m, z1.h, z2.h
-; CHECK-NEXT: asr z0.h, p0/m, z0.h, z2.h
+; CHECK-NEXT: lsl z1.h, p0/m, z1.h, #8
+; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #8
+; CHECK-NEXT: asr z1.h, p0/m, z1.h, #8
+; CHECK-NEXT: asr z0.h, p0/m, z0.h, #8
; CHECK-NEXT: sunpklo z1.s, z1.h
; CHECK-NEXT: sunpklo z0.s, z0.h
; CHECK-NEXT: ptrue p0.s, vl4
@@ -139,15 +137,13 @@ define void @sdiv_v32i8(<32 x i8>* %a, <32 x i8>* %b) #0 {
define <2 x i16> @sdiv_v2i16(<2 x i16> %op1, <2 x i16> %op2) #0 {
; CHECK-LABEL: sdiv_v2i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI4_0
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ptrue p0.s, vl2
-; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI4_0]
-; CHECK-NEXT: lsl z1.s, p0/m, z1.s, z2.s
-; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z2.s
-; CHECK-NEXT: asr z1.s, p0/m, z1.s, z2.s
-; CHECK-NEXT: asr z0.s, p0/m, z0.s, z2.s
+; CHECK-NEXT: lsl z1.s, p0/m, z1.s, #16
+; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #16
+; CHECK-NEXT: asr z1.s, p0/m, z1.s, #16
+; CHECK-NEXT: asr z0.s, p0/m, z0.s, #16
; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
@@ -312,13 +308,11 @@ define void @sdiv_v4i64(<4 x i64>* %a, <4 x i64>* %b) #0 {
define <4 x i8> @udiv_v4i8(<4 x i8> %op1, <4 x i8> %op2) #0 {
; CHECK-LABEL: udiv_v4i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI14_0
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ptrue p0.s, vl4
-; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI14_0]
-; CHECK-NEXT: and z1.d, z1.d, z2.d
-; CHECK-NEXT: and z0.d, z0.d, z2.d
+; CHECK-NEXT: and z0.h, z0.h, #0xff
+; CHECK-NEXT: and z1.h, z1.h, #0xff
; CHECK-NEXT: uunpklo z1.s, z1.h
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: udiv z0.s, p0/m, z0.s, z1.s
@@ -438,13 +432,11 @@ define void @udiv_v32i8(<32 x i8>* %a, <32 x i8>* %b) #0 {
define <2 x i16> @udiv_v2i16(<2 x i16> %op1, <2 x i16> %op2) #0 {
; CHECK-LABEL: udiv_v2i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI18_0
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ptrue p0.s, vl2
-; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI18_0]
-; CHECK-NEXT: and z1.d, z1.d, z2.d
-; CHECK-NEXT: and z0.d, z0.d, z2.d
+; CHECK-NEXT: and z1.s, z1.s, #0xffff
+; CHECK-NEXT: and z0.s, z0.s, #0xffff
; CHECK-NEXT: udiv z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
@@ -605,26 +597,23 @@ define void @udiv_v4i64(<4 x i64>* %a, <4 x i64>* %b) #0 {
define void @udiv_constantsplat_v8i32(<8 x i32>* %a) #0 {
; CHECK-LABEL: udiv_constantsplat_v8i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI28_0
-; CHECK-NEXT: adrp x9, .LCPI28_1
-; CHECK-NEXT: ldp q1, q2, [x0]
+; CHECK-NEXT: ldp q0, q1, [x0]
+; CHECK-NEXT: mov w8, #8969
; CHECK-NEXT: ptrue p0.s, vl4
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI28_0]
-; CHECK-NEXT: adrp x8, .LCPI28_2
-; CHECK-NEXT: ldr q3, [x9, :lo12:.LCPI28_1]
-; CHECK-NEXT: movprfx z5, z1
-; CHECK-NEXT: umulh z5.s, p0/m, z5.s, z0.s
-; CHECK-NEXT: sub z1.s, z1.s, z5.s
-; CHECK-NEXT: umulh z0.s, p0/m, z0.s, z2.s
-; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI28_2]
-; CHECK-NEXT: sub z2.s, z2.s, z0.s
-; CHECK-NEXT: lsr z1.s, p0/m, z1.s, z3.s
-; CHECK-NEXT: lsr z2.s, p0/m, z2.s, z3.s
-; CHECK-NEXT: add z1.s, z1.s, z5.s
-; CHECK-NEXT: add z0.s, z2.s, z0.s
-; CHECK-NEXT: lsr z1.s, p0/m, z1.s, z4.s
-; CHECK-NEXT: lsr z0.s, p0/m, z0.s, z4.s
-; CHECK-NEXT: stp q1, q0, [x0]
+; CHECK-NEXT: movk w8, #22765, lsl #16
+; CHECK-NEXT: mov z2.s, w8
+; CHECK-NEXT: movprfx z3, z0
+; CHECK-NEXT: umulh z3.s, p0/m, z3.s, z2.s
+; CHECK-NEXT: umulh z2.s, p0/m, z2.s, z1.s
+; CHECK-NEXT: sub z0.s, z0.s, z3.s
+; CHECK-NEXT: sub z1.s, z1.s, z2.s
+; CHECK-NEXT: lsr z0.s, p0/m, z0.s, #1
+; CHECK-NEXT: lsr z1.s, p0/m, z1.s, #1
+; CHECK-NEXT: add z0.s, z0.s, z3.s
+; CHECK-NEXT: add z1.s, z1.s, z2.s
+; CHECK-NEXT: lsr z0.s, p0/m, z0.s, #6
+; CHECK-NEXT: lsr z1.s, p0/m, z1.s, #6
+; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
%op1 = load <8 x i32>, <8 x i32>* %a
%res = udiv <8 x i32> %op1, <i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95>
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll
index 8bbd8a359ecd..fcbb37b77425 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll
@@ -13,19 +13,17 @@ target triple = "aarch64-unknown-linux-gnu"
define void @sext_v8i1_v8i32(<8 x i1> %a, <8 x i32>* %out) #0 {
; CHECK-LABEL: sext_v8i1_v8i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI0_0
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: uunpklo z0.h, z0.b
-; CHECK-NEXT: uunpklo z2.s, z0.h
+; CHECK-NEXT: uunpklo z1.s, z0.h
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI0_0]
; CHECK-NEXT: uunpklo z0.s, z0.h
-; CHECK-NEXT: lsl z2.s, p0/m, z2.s, z1.s
-; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: asr z2.s, p0/m, z2.s, z1.s
-; CHECK-NEXT: asr z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: stp q2, q0, [x0]
+; CHECK-NEXT: lsl z1.s, p0/m, z1.s, #31
+; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #31
+; CHECK-NEXT: asr z1.s, p0/m, z1.s, #31
+; CHECK-NEXT: asr z0.s, p0/m, z0.s, #31
+; CHECK-NEXT: stp q1, q0, [x0]
; CHECK-NEXT: ret
%b = sext <8 x i1> %a to <8 x i32>
store <8 x i32> %b, <8 x i32>* %out
@@ -42,19 +40,17 @@ define void @sext_v8i1_v8i32(<8 x i1> %a, <8 x i32>* %out) #0 {
define void @sext_v4i3_v4i64(<4 x i3> %a, <4 x i64>* %out) #0 {
; CHECK-LABEL: sext_v4i3_v4i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI1_0
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: uunpklo z0.s, z0.h
-; CHECK-NEXT: uunpklo z2.d, z0.s
+; CHECK-NEXT: uunpklo z1.d, z0.s
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI1_0]
; CHECK-NEXT: uunpklo z0.d, z0.s
-; CHECK-NEXT: lsl z2.d, p0/m, z2.d, z1.d
-; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: asr z2.d, p0/m, z2.d, z1.d
-; CHECK-NEXT: asr z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: stp q2, q0, [x0]
+; CHECK-NEXT: lsl z1.d, p0/m, z1.d, #61
+; CHECK-NEXT: lsl z0.d, p0/m, z0.d, #61
+; CHECK-NEXT: asr z1.d, p0/m, z1.d, #61
+; CHECK-NEXT: asr z0.d, p0/m, z0.d, #61
+; CHECK-NEXT: stp q1, q0, [x0]
; CHECK-NEXT: ret
%b = sext <4 x i3> %a to <4 x i64>
store <4 x i64> %b, <4 x i64>* %out
@@ -188,19 +184,17 @@ define void @sext_v32i8_v32i32(<32 x i8>* %in, <32 x i32>* %out) #0 {
define void @sext_v4i8_v4i64(<4 x i8> %a, <4 x i64>* %out) #0 {
; CHECK-LABEL: sext_v4i8_v4i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI7_0
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: uunpklo z0.s, z0.h
-; CHECK-NEXT: uunpklo z2.d, z0.s
+; CHECK-NEXT: uunpklo z1.d, z0.s
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI7_0]
; CHECK-NEXT: uunpklo z0.d, z0.s
-; CHECK-NEXT: lsl z2.d, p0/m, z2.d, z1.d
-; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: asr z2.d, p0/m, z2.d, z1.d
-; CHECK-NEXT: asr z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: stp q2, q0, [x0]
+; CHECK-NEXT: lsl z1.d, p0/m, z1.d, #56
+; CHECK-NEXT: lsl z0.d, p0/m, z0.d, #56
+; CHECK-NEXT: asr z1.d, p0/m, z1.d, #56
+; CHECK-NEXT: asr z0.d, p0/m, z0.d, #56
+; CHECK-NEXT: stp q1, q0, [x0]
; CHECK-NEXT: ret
%b = sext <4 x i8> %a to <4 x i64>
store <4 x i64>%b, <4 x i64>* %out
@@ -611,10 +605,8 @@ define void @zext_v32i8_v32i32(<32 x i8>* %in, <32 x i32>* %out) #0 {
define void @zext_v4i8_v4i64(<4 x i8> %a, <4 x i64>* %out) #0 {
; CHECK-LABEL: zext_v4i8_v4i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI23_0
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI23_0]
-; CHECK-NEXT: and z0.d, z0.d, z1.d
+; CHECK-NEXT: and z0.h, z0.h, #0xff
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: uunpklo z1.d, z0.s
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-immediates.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-immediates.ll
index e1013e1810fe..490b96c6a5f9 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-immediates.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-immediates.ll
@@ -16,12 +16,10 @@ target triple = "aarch64-unknown-linux-gnu"
define void @add_v32i8(ptr %a) #0 {
; CHECK-LABEL: add_v32i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI0_0
-; CHECK-NEXT: ldp q1, q2, [x0]
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI0_0]
-; CHECK-NEXT: add z1.b, z1.b, z0.b
-; CHECK-NEXT: add z0.b, z2.b, z0.b
-; CHECK-NEXT: stp q1, q0, [x0]
+; CHECK-NEXT: ldp q0, q1, [x0]
+; CHECK-NEXT: add z0.b, z0.b, #7 // =0x7
+; CHECK-NEXT: add z1.b, z1.b, #7 // =0x7
+; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
%op1 = load <32 x i8>, ptr %a
%ins = insertelement <32 x i8> undef, i8 7, i32 0
@@ -34,12 +32,10 @@ define void @add_v32i8(ptr %a) #0 {
define void @add_v16i16(ptr %a) #0 {
; CHECK-LABEL: add_v16i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI1_0
-; CHECK-NEXT: ldp q1, q2, [x0]
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI1_0]
-; CHECK-NEXT: add z1.h, z1.h, z0.h
-; CHECK-NEXT: add z0.h, z2.h, z0.h
-; CHECK-NEXT: stp q1, q0, [x0]
+; CHECK-NEXT: ldp q0, q1, [x0]
+; CHECK-NEXT: add z0.h, z0.h, #15 // =0xf
+; CHECK-NEXT: add z1.h, z1.h, #15 // =0xf
+; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
%op1 = load <16 x i16>, ptr %a
%ins = insertelement <16 x i16> undef, i16 15, i64 0
@@ -52,12 +48,10 @@ define void @add_v16i16(ptr %a) #0 {
define void @add_v8i32(ptr %a) #0 {
; CHECK-LABEL: add_v8i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI2_0
-; CHECK-NEXT: ldp q1, q2, [x0]
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI2_0]
-; CHECK-NEXT: add z1.s, z1.s, z0.s
-; CHECK-NEXT: add z0.s, z2.s, z0.s
-; CHECK-NEXT: stp q1, q0, [x0]
+; CHECK-NEXT: ldp q0, q1, [x0]
+; CHECK-NEXT: add z0.s, z0.s, #31 // =0x1f
+; CHECK-NEXT: add z1.s, z1.s, #31 // =0x1f
+; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
%op1 = load <8 x i32>, ptr %a
%ins = insertelement <8 x i32> undef, i32 31, i64 0
@@ -70,12 +64,10 @@ define void @add_v8i32(ptr %a) #0 {
define void @add_v4i64(ptr %a) #0 {
; CHECK-LABEL: add_v4i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI3_0
-; CHECK-NEXT: ldp q1, q2, [x0]
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI3_0]
-; CHECK-NEXT: add z1.d, z1.d, z0.d
-; CHECK-NEXT: add z0.d, z2.d, z0.d
-; CHECK-NEXT: stp q1, q0, [x0]
+; CHECK-NEXT: ldp q0, q1, [x0]
+; CHECK-NEXT: add z0.d, z0.d, #63 // =0x3f
+; CHECK-NEXT: add z1.d, z1.d, #63 // =0x3f
+; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
%op1 = load <4 x i64>, ptr %a
%ins = insertelement <4 x i64> undef, i64 63, i64 0
@@ -92,12 +84,10 @@ define void @add_v4i64(ptr %a) #0 {
define void @and_v32i8(ptr %a) #0 {
; CHECK-LABEL: and_v32i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI4_0
-; CHECK-NEXT: ldp q1, q2, [x0]
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI4_0]
-; CHECK-NEXT: and z1.d, z1.d, z0.d
-; CHECK-NEXT: and z0.d, z2.d, z0.d
-; CHECK-NEXT: stp q1, q0, [x0]
+; CHECK-NEXT: ldp q0, q1, [x0]
+; CHECK-NEXT: and z0.b, z0.b, #0x7
+; CHECK-NEXT: and z1.b, z1.b, #0x7
+; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
%op1 = load <32 x i8>, ptr %a
%ins = insertelement <32 x i8> undef, i8 7, i32 0
@@ -110,12 +100,10 @@ define void @and_v32i8(ptr %a) #0 {
define void @and_v16i16(ptr %a) #0 {
; CHECK-LABEL: and_v16i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI5_0
-; CHECK-NEXT: ldp q1, q2, [x0]
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI5_0]
-; CHECK-NEXT: and z1.d, z1.d, z0.d
-; CHECK-NEXT: and z0.d, z2.d, z0.d
-; CHECK-NEXT: stp q1, q0, [x0]
+; CHECK-NEXT: ldp q0, q1, [x0]
+; CHECK-NEXT: and z0.h, z0.h, #0xf
+; CHECK-NEXT: and z1.h, z1.h, #0xf
+; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
%op1 = load <16 x i16>, ptr %a
%ins = insertelement <16 x i16> undef, i16 15, i64 0
@@ -128,12 +116,10 @@ define void @and_v16i16(ptr %a) #0 {
define void @and_v8i32(ptr %a) #0 {
; CHECK-LABEL: and_v8i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI6_0
-; CHECK-NEXT: ldp q1, q2, [x0]
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI6_0]
-; CHECK-NEXT: and z1.d, z1.d, z0.d
-; CHECK-NEXT: and z0.d, z2.d, z0.d
-; CHECK-NEXT: stp q1, q0, [x0]
+; CHECK-NEXT: ldp q0, q1, [x0]
+; CHECK-NEXT: and z0.s, z0.s, #0x1f
+; CHECK-NEXT: and z1.s, z1.s, #0x1f
+; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
%op1 = load <8 x i32>, ptr %a
%ins = insertelement <8 x i32> undef, i32 31, i64 0
@@ -146,12 +132,10 @@ define void @and_v8i32(ptr %a) #0 {
define void @and_v4i64(ptr %a) #0 {
; CHECK-LABEL: and_v4i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI7_0
-; CHECK-NEXT: ldp q1, q2, [x0]
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI7_0]
-; CHECK-NEXT: and z1.d, z1.d, z0.d
-; CHECK-NEXT: and z0.d, z2.d, z0.d
-; CHECK-NEXT: stp q1, q0, [x0]
+; CHECK-NEXT: ldp q0, q1, [x0]
+; CHECK-NEXT: and z0.d, z0.d, #0x3f
+; CHECK-NEXT: and z1.d, z1.d, #0x3f
+; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
%op1 = load <4 x i64>, ptr %a
%ins = insertelement <4 x i64> undef, i64 63, i64 0
@@ -168,13 +152,11 @@ define void @and_v4i64(ptr %a) #0 {
define void @ashr_v32i8(ptr %a) #0 {
; CHECK-LABEL: ashr_v32i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI8_0
+; CHECK-NEXT: ldp q0, q1, [x0]
; CHECK-NEXT: ptrue p0.b, vl16
-; CHECK-NEXT: ldp q1, q2, [x0]
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI8_0]
-; CHECK-NEXT: asr z1.b, p0/m, z1.b, z0.b
-; CHECK-NEXT: asrr z0.b, p0/m, z0.b, z2.b
-; CHECK-NEXT: stp q1, q0, [x0]
+; CHECK-NEXT: asr z0.b, p0/m, z0.b, #7
+; CHECK-NEXT: asr z1.b, p0/m, z1.b, #7
+; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
%op1 = load <32 x i8>, ptr %a
%ins = insertelement <32 x i8> undef, i8 7, i32 0
@@ -187,13 +169,11 @@ define void @ashr_v32i8(ptr %a) #0 {
define void @ashr_v16i16(ptr %a) #0 {
; CHECK-LABEL: ashr_v16i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI9_0
+; CHECK-NEXT: ldp q0, q1, [x0]
; CHECK-NEXT: ptrue p0.h, vl8
-; CHECK-NEXT: ldp q1, q2, [x0]
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI9_0]
-; CHECK-NEXT: asr z1.h, p0/m, z1.h, z0.h
-; CHECK-NEXT: asrr z0.h, p0/m, z0.h, z2.h
-; CHECK-NEXT: stp q1, q0, [x0]
+; CHECK-NEXT: asr z0.h, p0/m, z0.h, #15
+; CHECK-NEXT: asr z1.h, p0/m, z1.h, #15
+; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
%op1 = load <16 x i16>, ptr %a
%ins = insertelement <16 x i16> undef, i16 15, i64 0
@@ -206,13 +186,11 @@ define void @ashr_v16i16(ptr %a) #0 {
define void @ashr_v8i32(ptr %a) #0 {
; CHECK-LABEL: ashr_v8i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI10_0
+; CHECK-NEXT: ldp q0, q1, [x0]
; CHECK-NEXT: ptrue p0.s, vl4
-; CHECK-NEXT: ldp q1, q2, [x0]
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI10_0]
-; CHECK-NEXT: asr z1.s, p0/m, z1.s, z0.s
-; CHECK-NEXT: asrr z0.s, p0/m, z0.s, z2.s
-; CHECK-NEXT: stp q1, q0, [x0]
+; CHECK-NEXT: asr z0.s, p0/m, z0.s, #31
+; CHECK-NEXT: asr z1.s, p0/m, z1.s, #31
+; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
%op1 = load <8 x i32>, ptr %a
%ins = insertelement <8 x i32> undef, i32 31, i64 0
@@ -225,13 +203,11 @@ define void @ashr_v8i32(ptr %a) #0 {
define void @ashr_v4i64(ptr %a) #0 {
; CHECK-LABEL: ashr_v4i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI11_0
+; CHECK-NEXT: ldp q0, q1, [x0]
; CHECK-NEXT: ptrue p0.d, vl2
-; CHECK-NEXT: ldp q1, q2, [x0]
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI11_0]
-; CHECK-NEXT: asr z1.d, p0/m, z1.d, z0.d
-; CHECK-NEXT: asrr z0.d, p0/m, z0.d, z2.d
-; CHECK-NEXT: stp q1, q0, [x0]
+; CHECK-NEXT: asr z0.d, p0/m, z0.d, #63
+; CHECK-NEXT: asr z1.d, p0/m, z1.d, #63
+; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
%op1 = load <4 x i64>, ptr %a
%ins = insertelement <4 x i64> undef, i64 63, i64 0
@@ -248,13 +224,11 @@ define void @ashr_v4i64(ptr %a) #0 {
define void @icmp_eq_v32i8(ptr %a) #0 {
; CHECK-LABEL: icmp_eq_v32i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI12_0
+; CHECK-NEXT: ldp q0, q1, [x0]
; CHECK-NEXT: ptrue p0.b, vl16
-; CHECK-NEXT: ldp q1, q2, [x0]
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI12_0]
-; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z0.b
-; CHECK-NEXT: cmpeq p0.b, p0/z, z2.b, z0.b
+; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, #7
; CHECK-NEXT: mov z0.b, p1/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: cmpeq p0.b, p0/z, z1.b, #7
; CHECK-NEXT: mov z1.b, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
@@ -270,13 +244,11 @@ define void @icmp_eq_v32i8(ptr %a) #0 {
define void @icmp_sge_v16i16(ptr %a) #0 {
; CHECK-LABEL: icmp_sge_v16i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI13_0
+; CHECK-NEXT: ldp q0, q1, [x0]
; CHECK-NEXT: ptrue p0.h, vl8
-; CHECK-NEXT: ldp q1, q2, [x0]
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI13_0]
-; CHECK-NEXT: cmpge p1.h, p0/z, z1.h, z0.h
-; CHECK-NEXT: cmpge p0.h, p0/z, z2.h, z0.h
+; CHECK-NEXT: cmpge p1.h, p0/z, z0.h, #15
; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: cmpge p0.h, p0/z, z1.h, #15
; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
@@ -292,13 +264,11 @@ define void @icmp_sge_v16i16(ptr %a) #0 {
define void @icmp_sgt_v8i32(ptr %a) #0 {
; CHECK-LABEL: icmp_sgt_v8i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI14_0
+; CHECK-NEXT: ldp q0, q1, [x0]
; CHECK-NEXT: ptrue p0.s, vl4
-; CHECK-NEXT: ldp q1, q2, [x0]
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI14_0]
-; CHECK-NEXT: cmpgt p1.s, p0/z, z1.s, z0.s
-; CHECK-NEXT: cmpgt p0.s, p0/z, z2.s, z0.s
+; CHECK-NEXT: cmpgt p1.s, p0/z, z0.s, #-8
; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: cmpgt p0.s, p0/z, z1.s, #-8
; CHECK-NEXT: mov z1.s, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
@@ -314,13 +284,11 @@ define void @icmp_sgt_v8i32(ptr %a) #0 {
define void @icmp_ult_v4i64(ptr %a) #0 {
; CHECK-LABEL: icmp_ult_v4i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI15_0
+; CHECK-NEXT: ldp q0, q1, [x0]
; CHECK-NEXT: ptrue p0.d, vl2
-; CHECK-NEXT: ldp q1, q2, [x0]
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI15_0]
-; CHECK-NEXT: cmphi p1.d, p0/z, z0.d, z1.d
-; CHECK-NEXT: cmphi p0.d, p0/z, z0.d, z2.d
+; CHECK-NEXT: cmplo p1.d, p0/z, z0.d, #63
; CHECK-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: cmplo p0.d, p0/z, z1.d, #63
; CHECK-NEXT: mov z1.d, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
@@ -340,13 +308,11 @@ define void @icmp_ult_v4i64(ptr %a) #0 {
define void @lshr_v32i8(ptr %a) #0 {
; CHECK-LABEL: lshr_v32i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI16_0
+; CHECK-NEXT: ldp q0, q1, [x0]
; CHECK-NEXT: ptrue p0.b, vl16
-; CHECK-NEXT: ldp q1, q2, [x0]
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI16_0]
-; CHECK-NEXT: lsr z1.b, p0/m, z1.b, z0.b
-; CHECK-NEXT: lsrr z0.b, p0/m, z0.b, z2.b
-; CHECK-NEXT: stp q1, q0, [x0]
+; CHECK-NEXT: lsr z0.b, p0/m, z0.b, #7
+; CHECK-NEXT: lsr z1.b, p0/m, z1.b, #7
+; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
%op1 = load <32 x i8>, ptr %a
%ins = insertelement <32 x i8> undef, i8 7, i64 0
@@ -359,13 +325,11 @@ define void @lshr_v32i8(ptr %a) #0 {
define void @lshr_v16i16(ptr %a) #0 {
; CHECK-LABEL: lshr_v16i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI17_0
+; CHECK-NEXT: ldp q0, q1, [x0]
; CHECK-NEXT: ptrue p0.h, vl8
-; CHECK-NEXT: ldp q1, q2, [x0]
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI17_0]
-; CHECK-NEXT: lsr z1.h, p0/m, z1.h, z0.h
-; CHECK-NEXT: lsrr z0.h, p0/m, z0.h, z2.h
-; CHECK-NEXT: stp q1, q0, [x0]
+; CHECK-NEXT: lsr z0.h, p0/m, z0.h, #15
+; CHECK-NEXT: lsr z1.h, p0/m, z1.h, #15
+; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
%op1 = load <16 x i16>, ptr %a
%ins = insertelement <16 x i16> undef, i16 15, i64 0
@@ -378,13 +342,11 @@ define void @lshr_v16i16(ptr %a) #0 {
define void @lshr_v8i32(ptr %a) #0 {
; CHECK-LABEL: lshr_v8i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI18_0
+; CHECK-NEXT: ldp q0, q1, [x0]
; CHECK-NEXT: ptrue p0.s, vl4
-; CHECK-NEXT: ldp q1, q2, [x0]
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI18_0]
-; CHECK-NEXT: lsr z1.s, p0/m, z1.s, z0.s
-; CHECK-NEXT: lsrr z0.s, p0/m, z0.s, z2.s
-; CHECK-NEXT: stp q1, q0, [x0]
+; CHECK-NEXT: lsr z0.s, p0/m, z0.s, #31
+; CHECK-NEXT: lsr z1.s, p0/m, z1.s, #31
+; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
%op1 = load <8 x i32>, ptr %a
%ins = insertelement <8 x i32> undef, i32 31, i64 0
@@ -397,13 +359,11 @@ define void @lshr_v8i32(ptr %a) #0 {
define void @lshr_v4i64(ptr %a) #0 {
; CHECK-LABEL: lshr_v4i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI19_0
+; CHECK-NEXT: ldp q0, q1, [x0]
; CHECK-NEXT: ptrue p0.d, vl2
-; CHECK-NEXT: ldp q1, q2, [x0]
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI19_0]
-; CHECK-NEXT: lsr z1.d, p0/m, z1.d, z0.d
-; CHECK-NEXT: lsrr z0.d, p0/m, z0.d, z2.d
-; CHECK-NEXT: stp q1, q0, [x0]
+; CHECK-NEXT: lsr z0.d, p0/m, z0.d, #63
+; CHECK-NEXT: lsr z1.d, p0/m, z1.d, #63
+; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
%op1 = load <4 x i64>, ptr %a
%ins = insertelement <4 x i64> undef, i64 63, i64 0
@@ -420,13 +380,12 @@ define void @lshr_v4i64(ptr %a) #0 {
define void @mul_v32i8(ptr %a) #0 {
; CHECK-LABEL: mul_v32i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI20_0
+; CHECK-NEXT: ldp q0, q1, [x0]
+; CHECK-NEXT: mov z2.b, #7 // =0x7
; CHECK-NEXT: ptrue p0.b, vl16
-; CHECK-NEXT: ldp q1, q2, [x0]
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI20_0]
-; CHECK-NEXT: mul z1.b, p0/m, z1.b, z0.b
; CHECK-NEXT: mul z0.b, p0/m, z0.b, z2.b
-; CHECK-NEXT: stp q1, q0, [x0]
+; CHECK-NEXT: mul z1.b, p0/m, z1.b, z2.b
+; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
%op1 = load <32 x i8>, ptr %a
%ins = insertelement <32 x i8> undef, i8 7, i64 0
@@ -439,13 +398,12 @@ define void @mul_v32i8(ptr %a) #0 {
define void @mul_v16i16(ptr %a) #0 {
; CHECK-LABEL: mul_v16i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI21_0
+; CHECK-NEXT: ldp q0, q1, [x0]
+; CHECK-NEXT: mov z2.h, #15 // =0xf
; CHECK-NEXT: ptrue p0.h, vl8
-; CHECK-NEXT: ldp q1, q2, [x0]
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI21_0]
-; CHECK-NEXT: mul z1.h, p0/m, z1.h, z0.h
; CHECK-NEXT: mul z0.h, p0/m, z0.h, z2.h
-; CHECK-NEXT: stp q1, q0, [x0]
+; CHECK-NEXT: mul z1.h, p0/m, z1.h, z2.h
+; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
%op1 = load <16 x i16>, ptr %a
%ins = insertelement <16 x i16> undef, i16 15, i64 0
@@ -458,13 +416,12 @@ define void @mul_v16i16(ptr %a) #0 {
define void @mul_v8i32(ptr %a) #0 {
; CHECK-LABEL: mul_v8i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI22_0
+; CHECK-NEXT: ldp q0, q1, [x0]
+; CHECK-NEXT: mov z2.s, #31 // =0x1f
; CHECK-NEXT: ptrue p0.s, vl4
-; CHECK-NEXT: ldp q1, q2, [x0]
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI22_0]
-; CHECK-NEXT: mul z1.s, p0/m, z1.s, z0.s
; CHECK-NEXT: mul z0.s, p0/m, z0.s, z2.s
-; CHECK-NEXT: stp q1, q0, [x0]
+; CHECK-NEXT: mul z1.s, p0/m, z1.s, z2.s
+; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
%op1 = load <8 x i32>, ptr %a
%ins = insertelement <8 x i32> undef, i32 31, i64 0
@@ -477,13 +434,12 @@ define void @mul_v8i32(ptr %a) #0 {
define void @mul_v4i64(ptr %a) #0 {
; CHECK-LABEL: mul_v4i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI23_0
+; CHECK-NEXT: ldp q0, q1, [x0]
+; CHECK-NEXT: mov z2.d, #63 // =0x3f
; CHECK-NEXT: ptrue p0.d, vl2
-; CHECK-NEXT: ldp q1, q2, [x0]
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI23_0]
-; CHECK-NEXT: mul z1.d, p0/m, z1.d, z0.d
; CHECK-NEXT: mul z0.d, p0/m, z0.d, z2.d
-; CHECK-NEXT: stp q1, q0, [x0]
+; CHECK-NEXT: mul z1.d, p0/m, z1.d, z2.d
+; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
%op1 = load <4 x i64>, ptr %a
%ins = insertelement <4 x i64> undef, i64 63, i64 0
@@ -500,12 +456,10 @@ define void @mul_v4i64(ptr %a) #0 {
define void @or_v32i8(ptr %a) #0 {
; CHECK-LABEL: or_v32i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI24_0
-; CHECK-NEXT: ldp q1, q2, [x0]
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI24_0]
-; CHECK-NEXT: orr z1.d, z1.d, z0.d
-; CHECK-NEXT: orr z0.d, z2.d, z0.d
-; CHECK-NEXT: stp q1, q0, [x0]
+; CHECK-NEXT: ldp q0, q1, [x0]
+; CHECK-NEXT: orr z0.b, z0.b, #0x7
+; CHECK-NEXT: orr z1.b, z1.b, #0x7
+; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
%op1 = load <32 x i8>, ptr %a
%ins = insertelement <32 x i8> undef, i8 7, i64 0
@@ -518,12 +472,10 @@ define void @or_v32i8(ptr %a) #0 {
define void @or_v16i16(ptr %a) #0 {
; CHECK-LABEL: or_v16i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI25_0
-; CHECK-NEXT: ldp q1, q2, [x0]
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI25_0]
-; CHECK-NEXT: orr z1.d, z1.d, z0.d
-; CHECK-NEXT: orr z0.d, z2.d, z0.d
-; CHECK-NEXT: stp q1, q0, [x0]
+; CHECK-NEXT: ldp q0, q1, [x0]
+; CHECK-NEXT: orr z0.h, z0.h, #0xf
+; CHECK-NEXT: orr z1.h, z1.h, #0xf
+; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
%op1 = load <16 x i16>, ptr %a
%ins = insertelement <16 x i16> undef, i16 15, i64 0
@@ -536,12 +488,10 @@ define void @or_v16i16(ptr %a) #0 {
define void @or_v8i32(ptr %a) #0 {
; CHECK-LABEL: or_v8i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI26_0
-; CHECK-NEXT: ldp q1, q2, [x0]
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI26_0]
-; CHECK-NEXT: orr z1.d, z1.d, z0.d
-; CHECK-NEXT: orr z0.d, z2.d, z0.d
-; CHECK-NEXT: stp q1, q0, [x0]
+; CHECK-NEXT: ldp q0, q1, [x0]
+; CHECK-NEXT: orr z0.s, z0.s, #0x1f
+; CHECK-NEXT: orr z1.s, z1.s, #0x1f
+; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
%op1 = load <8 x i32>, ptr %a
%ins = insertelement <8 x i32> undef, i32 31, i64 0
@@ -554,12 +504,10 @@ define void @or_v8i32(ptr %a) #0 {
define void @or_v4i64(ptr %a) #0 {
; CHECK-LABEL: or_v4i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI27_0
-; CHECK-NEXT: ldp q1, q2, [x0]
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI27_0]
-; CHECK-NEXT: orr z1.d, z1.d, z0.d
-; CHECK-NEXT: orr z0.d, z2.d, z0.d
-; CHECK-NEXT: stp q1, q0, [x0]
+; CHECK-NEXT: ldp q0, q1, [x0]
+; CHECK-NEXT: orr z0.d, z0.d, #0x3f
+; CHECK-NEXT: orr z1.d, z1.d, #0x3f
+; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
%op1 = load <4 x i64>, ptr %a
%ins = insertelement <4 x i64> undef, i64 63, i64 0
@@ -576,13 +524,11 @@ define void @or_v4i64(ptr %a) #0 {
define void @shl_v32i8(ptr %a) #0 {
; CHECK-LABEL: shl_v32i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI28_0
+; CHECK-NEXT: ldp q0, q1, [x0]
; CHECK-NEXT: ptrue p0.b, vl16
-; CHECK-NEXT: ldp q1, q2, [x0]
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI28_0]
-; CHECK-NEXT: lsl z1.b, p0/m, z1.b, z0.b
-; CHECK-NEXT: lslr z0.b, p0/m, z0.b, z2.b
-; CHECK-NEXT: stp q1, q0, [x0]
+; CHECK-NEXT: lsl z0.b, p0/m, z0.b, #7
+; CHECK-NEXT: lsl z1.b, p0/m, z1.b, #7
+; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
%op1 = load <32 x i8>, ptr %a
%ins = insertelement <32 x i8> undef, i8 7, i64 0
@@ -595,13 +541,11 @@ define void @shl_v32i8(ptr %a) #0 {
define void @shl_v16i16(ptr %a) #0 {
; CHECK-LABEL: shl_v16i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI29_0
+; CHECK-NEXT: ldp q0, q1, [x0]
; CHECK-NEXT: ptrue p0.h, vl8
-; CHECK-NEXT: ldp q1, q2, [x0]
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI29_0]
-; CHECK-NEXT: lsl z1.h, p0/m, z1.h, z0.h
-; CHECK-NEXT: lslr z0.h, p0/m, z0.h, z2.h
-; CHECK-NEXT: stp q1, q0, [x0]
+; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #15
+; CHECK-NEXT: lsl z1.h, p0/m, z1.h, #15
+; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
%op1 = load <16 x i16>, ptr %a
%ins = insertelement <16 x i16> undef, i16 15, i64 0
@@ -614,13 +558,11 @@ define void @shl_v16i16(ptr %a) #0 {
define void @shl_v8i32(ptr %a) #0 {
; CHECK-LABEL: shl_v8i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI30_0
+; CHECK-NEXT: ldp q0, q1, [x0]
; CHECK-NEXT: ptrue p0.s, vl4
-; CHECK-NEXT: ldp q1, q2, [x0]
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI30_0]
-; CHECK-NEXT: lsl z1.s, p0/m, z1.s, z0.s
-; CHECK-NEXT: lslr z0.s, p0/m, z0.s, z2.s
-; CHECK-NEXT: stp q1, q0, [x0]
+; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #31
+; CHECK-NEXT: lsl z1.s, p0/m, z1.s, #31
+; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
%op1 = load <8 x i32>, ptr %a
%ins = insertelement <8 x i32> undef, i32 31, i64 0
@@ -633,13 +575,11 @@ define void @shl_v8i32(ptr %a) #0 {
define void @shl_v4i64(ptr %a) #0 {
; CHECK-LABEL: shl_v4i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI31_0
+; CHECK-NEXT: ldp q0, q1, [x0]
; CHECK-NEXT: ptrue p0.d, vl2
-; CHECK-NEXT: ldp q1, q2, [x0]
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI31_0]
-; CHECK-NEXT: lsl z1.d, p0/m, z1.d, z0.d
-; CHECK-NEXT: lslr z0.d, p0/m, z0.d, z2.d
-; CHECK-NEXT: stp q1, q0, [x0]
+; CHECK-NEXT: lsl z0.d, p0/m, z0.d, #63
+; CHECK-NEXT: lsl z1.d, p0/m, z1.d, #63
+; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
%op1 = load <4 x i64>, ptr %a
%ins = insertelement <4 x i64> undef, i64 63, i64 0
@@ -656,13 +596,12 @@ define void @shl_v4i64(ptr %a) #0 {
define void @smax_v32i8(ptr %a) #0 {
; CHECK-LABEL: smax_v32i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI32_0
+; CHECK-NEXT: ldp q0, q1, [x0]
+; CHECK-NEXT: mov z2.b, #7 // =0x7
; CHECK-NEXT: ptrue p0.b, vl16
-; CHECK-NEXT: ldp q1, q2, [x0]
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI32_0]
-; CHECK-NEXT: smax z1.b, p0/m, z1.b, z0.b
; CHECK-NEXT: smax z0.b, p0/m, z0.b, z2.b
-; CHECK-NEXT: stp q1, q0, [x0]
+; CHECK-NEXT: smax z1.b, p0/m, z1.b, z2.b
+; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
%op1 = load <32 x i8>, ptr %a
%ins = insertelement <32 x i8> undef, i8 7, i64 0
@@ -675,13 +614,12 @@ define void @smax_v32i8(ptr %a) #0 {
define void @smax_v16i16(ptr %a) #0 {
; CHECK-LABEL: smax_v16i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI33_0
+; CHECK-NEXT: ldp q0, q1, [x0]
+; CHECK-NEXT: mov z2.h, #15 // =0xf
; CHECK-NEXT: ptrue p0.h, vl8
-; CHECK-NEXT: ldp q1, q2, [x0]
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI33_0]
-; CHECK-NEXT: smax z1.h, p0/m, z1.h, z0.h
; CHECK-NEXT: smax z0.h, p0/m, z0.h, z2.h
-; CHECK-NEXT: stp q1, q0, [x0]
+; CHECK-NEXT: smax z1.h, p0/m, z1.h, z2.h
+; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
%op1 = load <16 x i16>, ptr %a
%ins = insertelement <16 x i16> undef, i16 15, i64 0
@@ -694,13 +632,12 @@ define void @smax_v16i16(ptr %a) #0 {
define void @smax_v8i32(ptr %a) #0 {
; CHECK-LABEL: smax_v8i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI34_0
+; CHECK-NEXT: ldp q0, q1, [x0]
+; CHECK-NEXT: mov z2.s, #31 // =0x1f
; CHECK-NEXT: ptrue p0.s, vl4
-; CHECK-NEXT: ldp q1, q2, [x0]
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI34_0]
-; CHECK-NEXT: smax z1.s, p0/m, z1.s, z0.s
; CHECK-NEXT: smax z0.s, p0/m, z0.s, z2.s
-; CHECK-NEXT: stp q1, q0, [x0]
+; CHECK-NEXT: smax z1.s, p0/m, z1.s, z2.s
+; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
%op1 = load <8 x i32>, ptr %a
%ins = insertelement <8 x i32> undef, i32 31, i64 0
@@ -713,13 +650,12 @@ define void @smax_v8i32(ptr %a) #0 {
define void @smax_v4i64(ptr %a) #0 {
; CHECK-LABEL: smax_v4i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI35_0
+; CHECK-NEXT: ldp q0, q1, [x0]
+; CHECK-NEXT: mov z2.d, #63 // =0x3f
; CHECK-NEXT: ptrue p0.d, vl2
-; CHECK-NEXT: ldp q1, q2, [x0]
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI35_0]
-; CHECK-NEXT: smax z1.d, p0/m, z1.d, z0.d
; CHECK-NEXT: smax z0.d, p0/m, z0.d, z2.d
-; CHECK-NEXT: stp q1, q0, [x0]
+; CHECK-NEXT: smax z1.d, p0/m, z1.d, z2.d
+; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
%op1 = load <4 x i64>, ptr %a
%ins = insertelement <4 x i64> undef, i64 63, i64 0
@@ -736,13 +672,12 @@ define void @smax_v4i64(ptr %a) #0 {
define void @smin_v32i8(ptr %a) #0 {
; CHECK-LABEL: smin_v32i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI36_0
+; CHECK-NEXT: ldp q0, q1, [x0]
+; CHECK-NEXT: mov z2.b, #7 // =0x7
; CHECK-NEXT: ptrue p0.b, vl16
-; CHECK-NEXT: ldp q1, q2, [x0]
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI36_0]
-; CHECK-NEXT: smin z1.b, p0/m, z1.b, z0.b
; CHECK-NEXT: smin z0.b, p0/m, z0.b, z2.b
-; CHECK-NEXT: stp q1, q0, [x0]
+; CHECK-NEXT: smin z1.b, p0/m, z1.b, z2.b
+; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
%op1 = load <32 x i8>, ptr %a
%ins = insertelement <32 x i8> undef, i8 7, i64 0
@@ -755,13 +690,12 @@ define void @smin_v32i8(ptr %a) #0 {
define void @smin_v16i16(ptr %a) #0 {
; CHECK-LABEL: smin_v16i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI37_0
+; CHECK-NEXT: ldp q0, q1, [x0]
+; CHECK-NEXT: mov z2.h, #15 // =0xf
; CHECK-NEXT: ptrue p0.h, vl8
-; CHECK-NEXT: ldp q1, q2, [x0]
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI37_0]
-; CHECK-NEXT: smin z1.h, p0/m, z1.h, z0.h
; CHECK-NEXT: smin z0.h, p0/m, z0.h, z2.h
-; CHECK-NEXT: stp q1, q0, [x0]
+; CHECK-NEXT: smin z1.h, p0/m, z1.h, z2.h
+; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
%op1 = load <16 x i16>, ptr %a
%ins = insertelement <16 x i16> undef, i16 15, i64 0
@@ -774,13 +708,12 @@ define void @smin_v16i16(ptr %a) #0 {
define void @smin_v8i32(ptr %a) #0 {
; CHECK-LABEL: smin_v8i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI38_0
+; CHECK-NEXT: ldp q0, q1, [x0]
+; CHECK-NEXT: mov z2.s, #31 // =0x1f
; CHECK-NEXT: ptrue p0.s, vl4
-; CHECK-NEXT: ldp q1, q2, [x0]
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI38_0]
-; CHECK-NEXT: smin z1.s, p0/m, z1.s, z0.s
; CHECK-NEXT: smin z0.s, p0/m, z0.s, z2.s
-; CHECK-NEXT: stp q1, q0, [x0]
+; CHECK-NEXT: smin z1.s, p0/m, z1.s, z2.s
+; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
%op1 = load <8 x i32>, ptr %a
%ins = insertelement <8 x i32> undef, i32 31, i64 0
@@ -793,13 +726,12 @@ define void @smin_v8i32(ptr %a) #0 {
define void @smin_v4i64(ptr %a) #0 {
; CHECK-LABEL: smin_v4i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI39_0
+; CHECK-NEXT: ldp q0, q1, [x0]
+; CHECK-NEXT: mov z2.d, #63 // =0x3f
; CHECK-NEXT: ptrue p0.d, vl2
-; CHECK-NEXT: ldp q1, q2, [x0]
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI39_0]
-; CHECK-NEXT: smin z1.d, p0/m, z1.d, z0.d
; CHECK-NEXT: smin z0.d, p0/m, z0.d, z2.d
-; CHECK-NEXT: stp q1, q0, [x0]
+; CHECK-NEXT: smin z1.d, p0/m, z1.d, z2.d
+; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
%op1 = load <4 x i64>, ptr %a
%ins = insertelement <4 x i64> undef, i64 63, i64 0
@@ -816,12 +748,10 @@ define void @smin_v4i64(ptr %a) #0 {
define void @sub_v32i8(ptr %a) #0 {
; CHECK-LABEL: sub_v32i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI40_0
-; CHECK-NEXT: ldp q1, q2, [x0]
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI40_0]
-; CHECK-NEXT: sub z1.b, z1.b, z0.b
-; CHECK-NEXT: sub z0.b, z2.b, z0.b
-; CHECK-NEXT: stp q1, q0, [x0]
+; CHECK-NEXT: ldp q0, q1, [x0]
+; CHECK-NEXT: sub z0.b, z0.b, #7 // =0x7
+; CHECK-NEXT: sub z1.b, z1.b, #7 // =0x7
+; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
%op1 = load <32 x i8>, ptr %a
%ins = insertelement <32 x i8> undef, i8 7, i64 0
@@ -834,12 +764,10 @@ define void @sub_v32i8(ptr %a) #0 {
define void @sub_v16i16(ptr %a) #0 {
; CHECK-LABEL: sub_v16i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI41_0
-; CHECK-NEXT: ldp q1, q2, [x0]
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI41_0]
-; CHECK-NEXT: sub z1.h, z1.h, z0.h
-; CHECK-NEXT: sub z0.h, z2.h, z0.h
-; CHECK-NEXT: stp q1, q0, [x0]
+; CHECK-NEXT: ldp q0, q1, [x0]
+; CHECK-NEXT: sub z0.h, z0.h, #15 // =0xf
+; CHECK-NEXT: sub z1.h, z1.h, #15 // =0xf
+; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
%op1 = load <16 x i16>, ptr %a
%ins = insertelement <16 x i16> undef, i16 15, i64 0
@@ -852,12 +780,10 @@ define void @sub_v16i16(ptr %a) #0 {
define void @sub_v8i32(ptr %a) #0 {
; CHECK-LABEL: sub_v8i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI42_0
-; CHECK-NEXT: ldp q1, q2, [x0]
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI42_0]
-; CHECK-NEXT: sub z1.s, z1.s, z0.s
-; CHECK-NEXT: sub z0.s, z2.s, z0.s
-; CHECK-NEXT: stp q1, q0, [x0]
+; CHECK-NEXT: ldp q0, q1, [x0]
+; CHECK-NEXT: sub z0.s, z0.s, #31 // =0x1f
+; CHECK-NEXT: sub z1.s, z1.s, #31 // =0x1f
+; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
%op1 = load <8 x i32>, ptr %a
%ins = insertelement <8 x i32> undef, i32 31, i64 0
@@ -870,12 +796,10 @@ define void @sub_v8i32(ptr %a) #0 {
define void @sub_v4i64(ptr %a) #0 {
; CHECK-LABEL: sub_v4i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI43_0
-; CHECK-NEXT: ldp q1, q2, [x0]
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI43_0]
-; CHECK-NEXT: sub z1.d, z1.d, z0.d
-; CHECK-NEXT: sub z0.d, z2.d, z0.d
-; CHECK-NEXT: stp q1, q0, [x0]
+; CHECK-NEXT: ldp q0, q1, [x0]
+; CHECK-NEXT: sub z0.d, z0.d, #63 // =0x3f
+; CHECK-NEXT: sub z1.d, z1.d, #63 // =0x3f
+; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
%op1 = load <4 x i64>, ptr %a
%ins = insertelement <4 x i64> undef, i64 63, i64 0
@@ -892,13 +816,12 @@ define void @sub_v4i64(ptr %a) #0 {
define void @umax_v32i8(ptr %a) #0 {
; CHECK-LABEL: umax_v32i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI44_0
+; CHECK-NEXT: ldp q0, q1, [x0]
+; CHECK-NEXT: mov z2.b, #7 // =0x7
; CHECK-NEXT: ptrue p0.b, vl16
-; CHECK-NEXT: ldp q1, q2, [x0]
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI44_0]
-; CHECK-NEXT: umax z1.b, p0/m, z1.b, z0.b
; CHECK-NEXT: umax z0.b, p0/m, z0.b, z2.b
-; CHECK-NEXT: stp q1, q0, [x0]
+; CHECK-NEXT: umax z1.b, p0/m, z1.b, z2.b
+; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
%op1 = load <32 x i8>, ptr %a
%ins = insertelement <32 x i8> undef, i8 7, i64 0
@@ -911,13 +834,12 @@ define void @umax_v32i8(ptr %a) #0 {
define void @umax_v16i16(ptr %a) #0 {
; CHECK-LABEL: umax_v16i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI45_0
+; CHECK-NEXT: ldp q0, q1, [x0]
+; CHECK-NEXT: mov z2.h, #15 // =0xf
; CHECK-NEXT: ptrue p0.h, vl8
-; CHECK-NEXT: ldp q1, q2, [x0]
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI45_0]
-; CHECK-NEXT: umax z1.h, p0/m, z1.h, z0.h
; CHECK-NEXT: umax z0.h, p0/m, z0.h, z2.h
-; CHECK-NEXT: stp q1, q0, [x0]
+; CHECK-NEXT: umax z1.h, p0/m, z1.h, z2.h
+; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
%op1 = load <16 x i16>, ptr %a
%ins = insertelement <16 x i16> undef, i16 15, i64 0
@@ -930,13 +852,12 @@ define void @umax_v16i16(ptr %a) #0 {
define void @umax_v8i32(ptr %a) #0 {
; CHECK-LABEL: umax_v8i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI46_0
+; CHECK-NEXT: ldp q0, q1, [x0]
+; CHECK-NEXT: mov z2.s, #31 // =0x1f
; CHECK-NEXT: ptrue p0.s, vl4
-; CHECK-NEXT: ldp q1, q2, [x0]
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI46_0]
-; CHECK-NEXT: umax z1.s, p0/m, z1.s, z0.s
; CHECK-NEXT: umax z0.s, p0/m, z0.s, z2.s
-; CHECK-NEXT: stp q1, q0, [x0]
+; CHECK-NEXT: umax z1.s, p0/m, z1.s, z2.s
+; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
%op1 = load <8 x i32>, ptr %a
%ins = insertelement <8 x i32> undef, i32 31, i64 0
@@ -949,13 +870,12 @@ define void @umax_v8i32(ptr %a) #0 {
define void @umax_v4i64(ptr %a) #0 {
; CHECK-LABEL: umax_v4i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI47_0
+; CHECK-NEXT: ldp q0, q1, [x0]
+; CHECK-NEXT: mov z2.d, #63 // =0x3f
; CHECK-NEXT: ptrue p0.d, vl2
-; CHECK-NEXT: ldp q1, q2, [x0]
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI47_0]
-; CHECK-NEXT: umax z1.d, p0/m, z1.d, z0.d
; CHECK-NEXT: umax z0.d, p0/m, z0.d, z2.d
-; CHECK-NEXT: stp q1, q0, [x0]
+; CHECK-NEXT: umax z1.d, p0/m, z1.d, z2.d
+; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
%op1 = load <4 x i64>, ptr %a
%ins = insertelement <4 x i64> undef, i64 63, i64 0
@@ -972,13 +892,12 @@ define void @umax_v4i64(ptr %a) #0 {
define void @umin_v32i8(ptr %a) #0 {
; CHECK-LABEL: umin_v32i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI48_0
+; CHECK-NEXT: ldp q0, q1, [x0]
+; CHECK-NEXT: mov z2.b, #7 // =0x7
; CHECK-NEXT: ptrue p0.b, vl16
-; CHECK-NEXT: ldp q1, q2, [x0]
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI48_0]
-; CHECK-NEXT: umin z1.b, p0/m, z1.b, z0.b
; CHECK-NEXT: umin z0.b, p0/m, z0.b, z2.b
-; CHECK-NEXT: stp q1, q0, [x0]
+; CHECK-NEXT: umin z1.b, p0/m, z1.b, z2.b
+; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
%op1 = load <32 x i8>, ptr %a
%ins = insertelement <32 x i8> undef, i8 7, i64 0
@@ -991,13 +910,12 @@ define void @umin_v32i8(ptr %a) #0 {
define void @umin_v16i16(ptr %a) #0 {
; CHECK-LABEL: umin_v16i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI49_0
+; CHECK-NEXT: ldp q0, q1, [x0]
+; CHECK-NEXT: mov z2.h, #15 // =0xf
; CHECK-NEXT: ptrue p0.h, vl8
-; CHECK-NEXT: ldp q1, q2, [x0]
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI49_0]
-; CHECK-NEXT: umin z1.h, p0/m, z1.h, z0.h
; CHECK-NEXT: umin z0.h, p0/m, z0.h, z2.h
-; CHECK-NEXT: stp q1, q0, [x0]
+; CHECK-NEXT: umin z1.h, p0/m, z1.h, z2.h
+; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
%op1 = load <16 x i16>, ptr %a
%ins = insertelement <16 x i16> undef, i16 15, i64 0
@@ -1010,13 +928,12 @@ define void @umin_v16i16(ptr %a) #0 {
define void @umin_v8i32(ptr %a) #0 {
; CHECK-LABEL: umin_v8i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI50_0
+; CHECK-NEXT: ldp q0, q1, [x0]
+; CHECK-NEXT: mov z2.s, #31 // =0x1f
; CHECK-NEXT: ptrue p0.s, vl4
-; CHECK-NEXT: ldp q1, q2, [x0]
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI50_0]
-; CHECK-NEXT: umin z1.s, p0/m, z1.s, z0.s
; CHECK-NEXT: umin z0.s, p0/m, z0.s, z2.s
-; CHECK-NEXT: stp q1, q0, [x0]
+; CHECK-NEXT: umin z1.s, p0/m, z1.s, z2.s
+; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
%op1 = load <8 x i32>, ptr %a
%ins = insertelement <8 x i32> undef, i32 31, i64 0
@@ -1029,13 +946,12 @@ define void @umin_v8i32(ptr %a) #0 {
define void @umin_v4i64(ptr %a) #0 {
; CHECK-LABEL: umin_v4i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI51_0
+; CHECK-NEXT: ldp q0, q1, [x0]
+; CHECK-NEXT: mov z2.d, #63 // =0x3f
; CHECK-NEXT: ptrue p0.d, vl2
-; CHECK-NEXT: ldp q1, q2, [x0]
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI51_0]
-; CHECK-NEXT: umin z1.d, p0/m, z1.d, z0.d
; CHECK-NEXT: umin z0.d, p0/m, z0.d, z2.d
-; CHECK-NEXT: stp q1, q0, [x0]
+; CHECK-NEXT: umin z1.d, p0/m, z1.d, z2.d
+; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
%op1 = load <4 x i64>, ptr %a
%ins = insertelement <4 x i64> undef, i64 63, i64 0
@@ -1052,12 +968,10 @@ define void @umin_v4i64(ptr %a) #0 {
define void @xor_v32i8(ptr %a) #0 {
; CHECK-LABEL: xor_v32i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI52_0
-; CHECK-NEXT: ldp q1, q2, [x0]
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI52_0]
-; CHECK-NEXT: eor z1.d, z1.d, z0.d
-; CHECK-NEXT: eor z0.d, z2.d, z0.d
-; CHECK-NEXT: stp q1, q0, [x0]
+; CHECK-NEXT: ldp q0, q1, [x0]
+; CHECK-NEXT: eor z0.b, z0.b, #0x7
+; CHECK-NEXT: eor z1.b, z1.b, #0x7
+; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
%op1 = load <32 x i8>, ptr %a
%ins = insertelement <32 x i8> undef, i8 7, i64 0
@@ -1070,12 +984,10 @@ define void @xor_v32i8(ptr %a) #0 {
define void @xor_v16i16(ptr %a) #0 {
; CHECK-LABEL: xor_v16i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI53_0
-; CHECK-NEXT: ldp q1, q2, [x0]
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI53_0]
-; CHECK-NEXT: eor z1.d, z1.d, z0.d
-; CHECK-NEXT: eor z0.d, z2.d, z0.d
-; CHECK-NEXT: stp q1, q0, [x0]
+; CHECK-NEXT: ldp q0, q1, [x0]
+; CHECK-NEXT: eor z0.h, z0.h, #0xf
+; CHECK-NEXT: eor z1.h, z1.h, #0xf
+; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
%op1 = load <16 x i16>, ptr %a
%ins = insertelement <16 x i16> undef, i16 15, i64 0
@@ -1088,12 +1000,10 @@ define void @xor_v16i16(ptr %a) #0 {
define void @xor_v8i32(ptr %a) #0 {
; CHECK-LABEL: xor_v8i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI54_0
-; CHECK-NEXT: ldp q1, q2, [x0]
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI54_0]
-; CHECK-NEXT: eor z1.d, z1.d, z0.d
-; CHECK-NEXT: eor z0.d, z2.d, z0.d
-; CHECK-NEXT: stp q1, q0, [x0]
+; CHECK-NEXT: ldp q0, q1, [x0]
+; CHECK-NEXT: eor z0.s, z0.s, #0x1f
+; CHECK-NEXT: eor z1.s, z1.s, #0x1f
+; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
%op1 = load <8 x i32>, ptr %a
%ins = insertelement <8 x i32> undef, i32 31, i64 0
@@ -1106,12 +1016,10 @@ define void @xor_v8i32(ptr %a) #0 {
define void @xor_v4i64(ptr %a) #0 {
; CHECK-LABEL: xor_v4i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI55_0
-; CHECK-NEXT: ldp q1, q2, [x0]
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI55_0]
-; CHECK-NEXT: eor z1.d, z1.d, z0.d
-; CHECK-NEXT: eor z0.d, z2.d, z0.d
-; CHECK-NEXT: stp q1, q0, [x0]
+; CHECK-NEXT: ldp q0, q1, [x0]
+; CHECK-NEXT: eor z0.d, z0.d, #0x3f
+; CHECK-NEXT: eor z1.d, z1.d, #0x3f
+; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
%op1 = load <4 x i64>, ptr %a
%ins = insertelement <4 x i64> undef, i64 63, i64 0
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll
index bc90841aebee..ab6c5f0307cc 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll
@@ -13,19 +13,15 @@ target triple = "aarch64-unknown-linux-gnu"
define <4 x i8> @smulh_v4i8(<4 x i8> %op1, <4 x i8> %op2) #0 {
; CHECK-LABEL: smulh_v4i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI0_0
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI0_0]
-; CHECK-NEXT: adrp x8, .LCPI0_1
-; CHECK-NEXT: lsl z0.h, p0/m, z0.h, z2.h
-; CHECK-NEXT: lsl z1.h, p0/m, z1.h, z2.h
-; CHECK-NEXT: ldr d3, [x8, :lo12:.LCPI0_1]
-; CHECK-NEXT: asr z0.h, p0/m, z0.h, z2.h
-; CHECK-NEXT: asr z1.h, p0/m, z1.h, z2.h
+; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #8
+; CHECK-NEXT: lsl z1.h, p0/m, z1.h, #8
+; CHECK-NEXT: asr z0.h, p0/m, z0.h, #8
+; CHECK-NEXT: asr z1.h, p0/m, z1.h, #8
; CHECK-NEXT: mul z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: lsr z0.h, p0/m, z0.h, z3.h
+; CHECK-NEXT: lsr z0.h, p0/m, z0.h, #4
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
%insert = insertelement <4 x i16> undef, i16 4, i64 0
@@ -78,7 +74,6 @@ define void @smulh_v32i8(<32 x i8>* %a, <32 x i8>* %b) #0 {
; CHECK-LABEL: smulh_v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q1, q0, [x0]
-; CHECK-NEXT: adrp x8, .LCPI3_0
; CHECK-NEXT: ptrue p0.h, vl8
; CHECK-NEXT: sunpklo z4.h, z1.b
; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8
@@ -95,23 +90,22 @@ define void @smulh_v32i8(<32 x i8>* %a, <32 x i8>* %b) #0 {
; CHECK-NEXT: sunpklo z2.h, z2.b
; CHECK-NEXT: mul z1.h, p0/m, z1.h, z3.h
; CHECK-NEXT: mul z0.h, p0/m, z0.h, z2.h
-; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI3_0]
+; CHECK-NEXT: movprfx z2, z5
+; CHECK-NEXT: mul z2.h, p0/m, z2.h, z7.h
; CHECK-NEXT: movprfx z3, z4
; CHECK-NEXT: mul z3.h, p0/m, z3.h, z6.h
-; CHECK-NEXT: mul z5.h, p0/m, z5.h, z7.h
-; CHECK-NEXT: movprfx z4, z5
-; CHECK-NEXT: lsr z4.h, p0/m, z4.h, z2.h
-; CHECK-NEXT: lsr z3.h, p0/m, z3.h, z2.h
-; CHECK-NEXT: lsr z1.h, p0/m, z1.h, z2.h
-; CHECK-NEXT: lsr z0.h, p0/m, z0.h, z2.h
+; CHECK-NEXT: lsr z1.h, p0/m, z1.h, #8
+; CHECK-NEXT: lsr z3.h, p0/m, z3.h, #8
+; CHECK-NEXT: lsr z2.h, p0/m, z2.h, #8
+; CHECK-NEXT: lsr z0.h, p0/m, z0.h, #8
; CHECK-NEXT: ptrue p0.b, vl8
; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
; CHECK-NEXT: uzp1 z1.b, z1.b, z1.b
-; CHECK-NEXT: uzp1 z2.b, z3.b, z3.b
-; CHECK-NEXT: uzp1 z3.b, z4.b, z4.b
-; CHECK-NEXT: splice z2.b, p0, z2.b, z1.b
-; CHECK-NEXT: splice z3.b, p0, z3.b, z0.b
-; CHECK-NEXT: stp q2, q3, [x0]
+; CHECK-NEXT: uzp1 z3.b, z3.b, z3.b
+; CHECK-NEXT: uzp1 z2.b, z2.b, z2.b
+; CHECK-NEXT: splice z3.b, p0, z3.b, z1.b
+; CHECK-NEXT: splice z2.b, p0, z2.b, z0.b
+; CHECK-NEXT: stp q3, q2, [x0]
; CHECK-NEXT: ret
%op1 = load <32 x i8>, <32 x i8>* %a
%op2 = load <32 x i8>, <32 x i8>* %b
@@ -127,17 +121,15 @@ define void @smulh_v32i8(<32 x i8>* %a, <32 x i8>* %b) #0 {
define <2 x i16> @smulh_v2i16(<2 x i16> %op1, <2 x i16> %op2) #0 {
; CHECK-LABEL: smulh_v2i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI4_0
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ptrue p0.s, vl2
-; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI4_0]
-; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z2.s
-; CHECK-NEXT: lsl z1.s, p0/m, z1.s, z2.s
-; CHECK-NEXT: asr z0.s, p0/m, z0.s, z2.s
-; CHECK-NEXT: asr z1.s, p0/m, z1.s, z2.s
+; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #16
+; CHECK-NEXT: lsl z1.s, p0/m, z1.s, #16
+; CHECK-NEXT: asr z0.s, p0/m, z0.s, #16
+; CHECK-NEXT: asr z1.s, p0/m, z1.s, #16
; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: lsr z0.s, p0/m, z0.s, z2.s
+; CHECK-NEXT: lsr z0.s, p0/m, z0.s, #16
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
%1 = sext <2 x i16> %op1 to <2 x i32>
@@ -368,17 +360,13 @@ define void @smulh_v4i64(<4 x i64>* %a, <4 x i64>* %b) #0 {
define <4 x i8> @umulh_v4i8(<4 x i8> %op1, <4 x i8> %op2) #0 {
; CHECK-LABEL: umulh_v4i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI14_0
-; CHECK-NEXT: adrp x9, .LCPI14_1
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI14_0]
-; CHECK-NEXT: ldr d3, [x9, :lo12:.LCPI14_1]
-; CHECK-NEXT: and z0.d, z0.d, z2.d
-; CHECK-NEXT: and z1.d, z1.d, z2.d
+; CHECK-NEXT: and z0.h, z0.h, #0xff
+; CHECK-NEXT: and z1.h, z1.h, #0xff
; CHECK-NEXT: mul z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: lsr z0.h, p0/m, z0.h, z3.h
+; CHECK-NEXT: lsr z0.h, p0/m, z0.h, #4
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
%1 = zext <4 x i8> %op1 to <4 x i16>
@@ -427,7 +415,6 @@ define void @umulh_v32i8(<32 x i8>* %a, <32 x i8>* %b) #0 {
; CHECK-LABEL: umulh_v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q1, q0, [x0]
-; CHECK-NEXT: adrp x8, .LCPI17_0
; CHECK-NEXT: ptrue p0.h, vl8
; CHECK-NEXT: uunpklo z4.h, z1.b
; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8
@@ -444,23 +431,22 @@ define void @umulh_v32i8(<32 x i8>* %a, <32 x i8>* %b) #0 {
; CHECK-NEXT: uunpklo z2.h, z2.b
; CHECK-NEXT: mul z1.h, p0/m, z1.h, z3.h
; CHECK-NEXT: mul z0.h, p0/m, z0.h, z2.h
-; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI17_0]
+; CHECK-NEXT: movprfx z2, z5
+; CHECK-NEXT: mul z2.h, p0/m, z2.h, z7.h
; CHECK-NEXT: movprfx z3, z4
; CHECK-NEXT: mul z3.h, p0/m, z3.h, z6.h
-; CHECK-NEXT: mul z5.h, p0/m, z5.h, z7.h
-; CHECK-NEXT: movprfx z4, z5
-; CHECK-NEXT: lsr z4.h, p0/m, z4.h, z2.h
-; CHECK-NEXT: lsr z3.h, p0/m, z3.h, z2.h
-; CHECK-NEXT: lsr z1.h, p0/m, z1.h, z2.h
-; CHECK-NEXT: lsr z0.h, p0/m, z0.h, z2.h
+; CHECK-NEXT: lsr z1.h, p0/m, z1.h, #8
+; CHECK-NEXT: lsr z3.h, p0/m, z3.h, #8
+; CHECK-NEXT: lsr z2.h, p0/m, z2.h, #8
+; CHECK-NEXT: lsr z0.h, p0/m, z0.h, #8
; CHECK-NEXT: ptrue p0.b, vl8
; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
; CHECK-NEXT: uzp1 z1.b, z1.b, z1.b
-; CHECK-NEXT: uzp1 z2.b, z3.b, z3.b
-; CHECK-NEXT: uzp1 z3.b, z4.b, z4.b
-; CHECK-NEXT: splice z2.b, p0, z2.b, z1.b
-; CHECK-NEXT: splice z3.b, p0, z3.b, z0.b
-; CHECK-NEXT: stp q2, q3, [x0]
+; CHECK-NEXT: uzp1 z3.b, z3.b, z3.b
+; CHECK-NEXT: uzp1 z2.b, z2.b, z2.b
+; CHECK-NEXT: splice z3.b, p0, z3.b, z1.b
+; CHECK-NEXT: splice z2.b, p0, z2.b, z0.b
+; CHECK-NEXT: stp q3, q2, [x0]
; CHECK-NEXT: ret
%op1 = load <32 x i8>, <32 x i8>* %a
%op2 = load <32 x i8>, <32 x i8>* %b
@@ -476,17 +462,13 @@ define void @umulh_v32i8(<32 x i8>* %a, <32 x i8>* %b) #0 {
define <2 x i16> @umulh_v2i16(<2 x i16> %op1, <2 x i16> %op2) #0 {
; CHECK-LABEL: umulh_v2i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI18_0
-; CHECK-NEXT: adrp x9, .LCPI18_1
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ptrue p0.s, vl2
-; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI18_0]
-; CHECK-NEXT: ldr d3, [x9, :lo12:.LCPI18_1]
-; CHECK-NEXT: and z0.d, z0.d, z2.d
-; CHECK-NEXT: and z1.d, z1.d, z2.d
+; CHECK-NEXT: and z0.s, z0.s, #0xffff
+; CHECK-NEXT: and z1.s, z1.s, #0xffff
; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: lsr z0.s, p0/m, z0.s, z3.s
+; CHECK-NEXT: lsr z0.s, p0/m, z0.s, #16
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
%1 = zext <2 x i16> %op1 to <2 x i32>
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll
index 7fa94d4d6342..e8fe9c33b8fd 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll
@@ -10,16 +10,14 @@ target triple = "aarch64-unknown-linux-gnu"
define <4 x i8> @srem_v4i8(<4 x i8> %op1, <4 x i8> %op2) #0 {
; CHECK-LABEL: srem_v4i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI0_0
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ptrue p0.h, vl4
; CHECK-NEXT: ptrue p1.s, vl4
-; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI0_0]
-; CHECK-NEXT: lsl z1.h, p0/m, z1.h, z2.h
-; CHECK-NEXT: lsl z0.h, p0/m, z0.h, z2.h
-; CHECK-NEXT: asr z1.h, p0/m, z1.h, z2.h
-; CHECK-NEXT: asr z0.h, p0/m, z0.h, z2.h
+; CHECK-NEXT: lsl z1.h, p0/m, z1.h, #8
+; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #8
+; CHECK-NEXT: asr z1.h, p0/m, z1.h, #8
+; CHECK-NEXT: asr z0.h, p0/m, z0.h, #8
; CHECK-NEXT: sunpklo z2.s, z1.h
; CHECK-NEXT: sunpklo z3.s, z0.h
; CHECK-NEXT: sdivr z2.s, p1/m, z2.s, z3.s
@@ -325,13 +323,11 @@ define void @srem_v4i64(<4 x i64>* %a, <4 x i64>* %b) #0 {
define <4 x i8> @urem_v4i8(<4 x i8> %op1, <4 x i8> %op2) #0 {
; CHECK-LABEL: urem_v4i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI13_0
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ptrue p0.s, vl4
-; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI13_0]
-; CHECK-NEXT: and z1.d, z1.d, z2.d
-; CHECK-NEXT: and z0.d, z0.d, z2.d
+; CHECK-NEXT: and z0.h, z0.h, #0xff
+; CHECK-NEXT: and z1.h, z1.h, #0xff
; CHECK-NEXT: uunpklo z2.s, z1.h
; CHECK-NEXT: uunpklo z3.s, z0.h
; CHECK-NEXT: udivr z2.s, p0/m, z2.s, z3.s
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-select.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-select.ll
index 989fb79060ce..e26dac067e04 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-select.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-select.ll
@@ -6,25 +6,15 @@ target triple = "aarch64-unknown-linux-gnu"
define <4 x i8> @select_v4i8(<4 x i8> %op1, <4 x i8> %op2, i1 %mask) #0 {
; CHECK-LABEL: select_v4i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: tst w0, #0x1
-; CHECK-NEXT: adrp x9, .LCPI0_0
-; CHECK-NEXT: csetm w8, ne
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: ldr d3, [x9, :lo12:.LCPI0_0]
-; CHECK-NEXT: strh w8, [sp, #14]
-; CHECK-NEXT: strh w8, [sp, #12]
-; CHECK-NEXT: strh w8, [sp, #10]
-; CHECK-NEXT: strh w8, [sp, #8]
-; CHECK-NEXT: ldr d2, [sp, #8]
-; CHECK-NEXT: eor z3.d, z2.d, z3.d
+; CHECK-NEXT: csetm w8, ne
+; CHECK-NEXT: mov z2.h, w8
+; CHECK-NEXT: bic z1.d, z1.d, z2.d
; CHECK-NEXT: and z0.d, z0.d, z2.d
-; CHECK-NEXT: and z1.d, z1.d, z3.d
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
-; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
%sel = select i1 %mask, <4 x i8> %op1, <4 x i8> %op2
ret <4 x i8> %sel
@@ -33,29 +23,15 @@ define <4 x i8> @select_v4i8(<4 x i8> %op1, <4 x i8> %op2, i1 %mask) #0 {
define <8 x i8> @select_v8i8(<8 x i8> %op1, <8 x i8> %op2, i1 %mask) #0 {
; CHECK-LABEL: select_v8i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: tst w0, #0x1
-; CHECK-NEXT: adrp x9, .LCPI1_0
-; CHECK-NEXT: csetm w8, ne
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: ldr d3, [x9, :lo12:.LCPI1_0]
-; CHECK-NEXT: strb w8, [sp, #15]
-; CHECK-NEXT: strb w8, [sp, #14]
-; CHECK-NEXT: strb w8, [sp, #13]
-; CHECK-NEXT: strb w8, [sp, #12]
-; CHECK-NEXT: strb w8, [sp, #11]
-; CHECK-NEXT: strb w8, [sp, #10]
-; CHECK-NEXT: strb w8, [sp, #9]
-; CHECK-NEXT: strb w8, [sp, #8]
-; CHECK-NEXT: ldr d2, [sp, #8]
-; CHECK-NEXT: eor z3.d, z2.d, z3.d
+; CHECK-NEXT: csetm w8, ne
+; CHECK-NEXT: mov z2.b, w8
+; CHECK-NEXT: bic z1.d, z1.d, z2.d
; CHECK-NEXT: and z0.d, z0.d, z2.d
-; CHECK-NEXT: and z1.d, z1.d, z3.d
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
-; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
%sel = select i1 %mask, <8 x i8> %op1, <8 x i8> %op2
ret <8 x i8> %sel
@@ -64,37 +40,15 @@ define <8 x i8> @select_v8i8(<8 x i8> %op1, <8 x i8> %op2, i1 %mask) #0 {
define <16 x i8> @select_v16i8(<16 x i8> %op1, <16 x i8> %op2, i1 %mask) #0 {
; CHECK-LABEL: select_v16i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: tst w0, #0x1
-; CHECK-NEXT: adrp x9, .LCPI2_0
-; CHECK-NEXT: csetm w8, ne
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT: ldr q3, [x9, :lo12:.LCPI2_0]
-; CHECK-NEXT: strb w8, [sp, #15]
-; CHECK-NEXT: strb w8, [sp, #14]
-; CHECK-NEXT: strb w8, [sp, #13]
-; CHECK-NEXT: strb w8, [sp, #12]
-; CHECK-NEXT: strb w8, [sp, #11]
-; CHECK-NEXT: strb w8, [sp, #10]
-; CHECK-NEXT: strb w8, [sp, #9]
-; CHECK-NEXT: strb w8, [sp, #8]
-; CHECK-NEXT: strb w8, [sp, #7]
-; CHECK-NEXT: strb w8, [sp, #6]
-; CHECK-NEXT: strb w8, [sp, #5]
-; CHECK-NEXT: strb w8, [sp, #4]
-; CHECK-NEXT: strb w8, [sp, #3]
-; CHECK-NEXT: strb w8, [sp, #2]
-; CHECK-NEXT: strb w8, [sp, #1]
-; CHECK-NEXT: strb w8, [sp]
-; CHECK-NEXT: ldr q2, [sp]
-; CHECK-NEXT: eor z3.d, z2.d, z3.d
+; CHECK-NEXT: csetm w8, ne
+; CHECK-NEXT: mov z2.b, w8
+; CHECK-NEXT: bic z1.d, z1.d, z2.d
; CHECK-NEXT: and z0.d, z0.d, z2.d
-; CHECK-NEXT: and z1.d, z1.d, z3.d
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
-; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
%sel = select i1 %mask, <16 x i8> %op1, <16 x i8> %op2
ret <16 x i8> %sel
@@ -103,42 +57,20 @@ define <16 x i8> @select_v16i8(<16 x i8> %op1, <16 x i8> %op2, i1 %mask) #0 {
define void @select_v32i8(ptr %a, ptr %b, i1 %mask) #0 {
; CHECK-LABEL: select_v32i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: tst w2, #0x1
-; CHECK-NEXT: adrp x9, .LCPI3_0
-; CHECK-NEXT: csetm w8, ne
; CHECK-NEXT: ldr q0, [x0]
+; CHECK-NEXT: csetm w8, ne
; CHECK-NEXT: ldr q1, [x0, #16]
; CHECK-NEXT: ldr q2, [x1]
; CHECK-NEXT: ldr q3, [x1, #16]
-; CHECK-NEXT: strb w8, [sp, #15]
-; CHECK-NEXT: strb w8, [sp, #14]
-; CHECK-NEXT: ldr q5, [x9, :lo12:.LCPI3_0]
-; CHECK-NEXT: strb w8, [sp, #13]
-; CHECK-NEXT: strb w8, [sp, #12]
-; CHECK-NEXT: strb w8, [sp, #11]
-; CHECK-NEXT: strb w8, [sp, #10]
-; CHECK-NEXT: strb w8, [sp, #9]
-; CHECK-NEXT: strb w8, [sp, #8]
-; CHECK-NEXT: strb w8, [sp, #7]
-; CHECK-NEXT: strb w8, [sp, #6]
-; CHECK-NEXT: strb w8, [sp, #5]
-; CHECK-NEXT: strb w8, [sp, #4]
-; CHECK-NEXT: strb w8, [sp, #3]
-; CHECK-NEXT: strb w8, [sp, #2]
-; CHECK-NEXT: strb w8, [sp, #1]
-; CHECK-NEXT: strb w8, [sp]
-; CHECK-NEXT: ldr q4, [sp]
-; CHECK-NEXT: eor z5.d, z4.d, z5.d
-; CHECK-NEXT: and z1.d, z1.d, z4.d
+; CHECK-NEXT: mov z4.b, w8
+; CHECK-NEXT: bic z2.d, z2.d, z4.d
; CHECK-NEXT: and z0.d, z0.d, z4.d
-; CHECK-NEXT: and z2.d, z2.d, z5.d
-; CHECK-NEXT: and z3.d, z3.d, z5.d
+; CHECK-NEXT: bic z3.d, z3.d, z4.d
+; CHECK-NEXT: and z1.d, z1.d, z4.d
; CHECK-NEXT: orr z0.d, z0.d, z2.d
; CHECK-NEXT: orr z1.d, z1.d, z3.d
; CHECK-NEXT: stp q0, q1, [x0]
-; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
%op1 = load volatile <32 x i8>, ptr %a
%op2 = load volatile <32 x i8>, ptr %b
@@ -150,22 +82,17 @@ define void @select_v32i8(ptr %a, ptr %b, i1 %mask) #0 {
define <2 x i16> @select_v2i16(<2 x i16> %op1, <2 x i16> %op2, i1 %mask) #0 {
; CHECK-LABEL: select_v2i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: tst w0, #0x1
-; CHECK-NEXT: adrp x9, .LCPI4_0
-; CHECK-NEXT: csetm w8, ne
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: ldr d3, [x9, :lo12:.LCPI4_0]
-; CHECK-NEXT: stp w8, w8, [sp, #8]
-; CHECK-NEXT: ldr d2, [sp, #8]
-; CHECK-NEXT: eor z3.d, z2.d, z3.d
+; CHECK-NEXT: csetm w8, ne
+; CHECK-NEXT: mvn w9, w8
+; CHECK-NEXT: mov z2.s, w8
+; CHECK-NEXT: mov z3.s, w9
; CHECK-NEXT: and z0.d, z0.d, z2.d
; CHECK-NEXT: and z1.d, z1.d, z3.d
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
-; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
%sel = select i1 %mask, <2 x i16> %op1, <2 x i16> %op2
ret <2 x i16> %sel
@@ -174,25 +101,15 @@ define <2 x i16> @select_v2i16(<2 x i16> %op1, <2 x i16> %op2, i1 %mask) #0 {
define <4 x i16> @select_v4i16(<4 x i16> %op1, <4 x i16> %op2, i1 %mask) #0 {
; CHECK-LABEL: select_v4i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: tst w0, #0x1
-; CHECK-NEXT: adrp x9, .LCPI5_0
-; CHECK-NEXT: csetm w8, ne
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: ldr d3, [x9, :lo12:.LCPI5_0]
-; CHECK-NEXT: strh w8, [sp, #14]
-; CHECK-NEXT: strh w8, [sp, #12]
-; CHECK-NEXT: strh w8, [sp, #10]
-; CHECK-NEXT: strh w8, [sp, #8]
-; CHECK-NEXT: ldr d2, [sp, #8]
-; CHECK-NEXT: eor z3.d, z2.d, z3.d
+; CHECK-NEXT: csetm w8, ne
+; CHECK-NEXT: mov z2.h, w8
+; CHECK-NEXT: bic z1.d, z1.d, z2.d
; CHECK-NEXT: and z0.d, z0.d, z2.d
-; CHECK-NEXT: and z1.d, z1.d, z3.d
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
-; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
%sel = select i1 %mask, <4 x i16> %op1, <4 x i16> %op2
ret <4 x i16> %sel
@@ -201,29 +118,15 @@ define <4 x i16> @select_v4i16(<4 x i16> %op1, <4 x i16> %op2, i1 %mask) #0 {
define <8 x i16> @select_v8i16(<8 x i16> %op1, <8 x i16> %op2, i1 %mask) #0 {
; CHECK-LABEL: select_v8i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: tst w0, #0x1
-; CHECK-NEXT: adrp x9, .LCPI6_0
-; CHECK-NEXT: csetm w8, ne
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT: ldr q3, [x9, :lo12:.LCPI6_0]
-; CHECK-NEXT: strh w8, [sp, #14]
-; CHECK-NEXT: strh w8, [sp, #12]
-; CHECK-NEXT: strh w8, [sp, #10]
-; CHECK-NEXT: strh w8, [sp, #8]
-; CHECK-NEXT: strh w8, [sp, #6]
-; CHECK-NEXT: strh w8, [sp, #4]
-; CHECK-NEXT: strh w8, [sp, #2]
-; CHECK-NEXT: strh w8, [sp]
-; CHECK-NEXT: ldr q2, [sp]
-; CHECK-NEXT: eor z3.d, z2.d, z3.d
+; CHECK-NEXT: csetm w8, ne
+; CHECK-NEXT: mov z2.h, w8
+; CHECK-NEXT: bic z1.d, z1.d, z2.d
; CHECK-NEXT: and z0.d, z0.d, z2.d
-; CHECK-NEXT: and z1.d, z1.d, z3.d
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
-; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
%sel = select i1 %mask, <8 x i16> %op1, <8 x i16> %op2
ret <8 x i16> %sel
@@ -232,34 +135,20 @@ define <8 x i16> @select_v8i16(<8 x i16> %op1, <8 x i16> %op2, i1 %mask) #0 {
define void @select_v16i16(ptr %a, ptr %b, i1 %mask) #0 {
; CHECK-LABEL: select_v16i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: tst w2, #0x1
-; CHECK-NEXT: adrp x9, .LCPI7_0
-; CHECK-NEXT: csetm w8, ne
; CHECK-NEXT: ldr q0, [x0]
+; CHECK-NEXT: csetm w8, ne
; CHECK-NEXT: ldr q1, [x0, #16]
; CHECK-NEXT: ldr q2, [x1]
; CHECK-NEXT: ldr q3, [x1, #16]
-; CHECK-NEXT: strh w8, [sp, #14]
-; CHECK-NEXT: strh w8, [sp, #12]
-; CHECK-NEXT: ldr q5, [x9, :lo12:.LCPI7_0]
-; CHECK-NEXT: strh w8, [sp, #10]
-; CHECK-NEXT: strh w8, [sp, #8]
-; CHECK-NEXT: strh w8, [sp, #6]
-; CHECK-NEXT: strh w8, [sp, #4]
-; CHECK-NEXT: strh w8, [sp, #2]
-; CHECK-NEXT: strh w8, [sp]
-; CHECK-NEXT: ldr q4, [sp]
-; CHECK-NEXT: eor z5.d, z4.d, z5.d
-; CHECK-NEXT: and z1.d, z1.d, z4.d
+; CHECK-NEXT: mov z4.h, w8
+; CHECK-NEXT: bic z2.d, z2.d, z4.d
; CHECK-NEXT: and z0.d, z0.d, z4.d
-; CHECK-NEXT: and z2.d, z2.d, z5.d
-; CHECK-NEXT: and z3.d, z3.d, z5.d
+; CHECK-NEXT: bic z3.d, z3.d, z4.d
+; CHECK-NEXT: and z1.d, z1.d, z4.d
; CHECK-NEXT: orr z0.d, z0.d, z2.d
; CHECK-NEXT: orr z1.d, z1.d, z3.d
; CHECK-NEXT: stp q0, q1, [x0]
-; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
%op1 = load volatile <16 x i16>, ptr %a
%op2 = load volatile <16 x i16>, ptr %b
@@ -271,22 +160,17 @@ define void @select_v16i16(ptr %a, ptr %b, i1 %mask) #0 {
define <2 x i32> @select_v2i32(<2 x i32> %op1, <2 x i32> %op2, i1 %mask) #0 {
; CHECK-LABEL: select_v2i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: tst w0, #0x1
-; CHECK-NEXT: adrp x9, .LCPI8_0
-; CHECK-NEXT: csetm w8, ne
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: ldr d3, [x9, :lo12:.LCPI8_0]
-; CHECK-NEXT: stp w8, w8, [sp, #8]
-; CHECK-NEXT: ldr d2, [sp, #8]
-; CHECK-NEXT: eor z3.d, z2.d, z3.d
+; CHECK-NEXT: csetm w8, ne
+; CHECK-NEXT: mvn w9, w8
+; CHECK-NEXT: mov z2.s, w8
+; CHECK-NEXT: mov z3.s, w9
; CHECK-NEXT: and z0.d, z0.d, z2.d
; CHECK-NEXT: and z1.d, z1.d, z3.d
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
-; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
%sel = select i1 %mask, <2 x i32> %op1, <2 x i32> %op2
ret <2 x i32> %sel
@@ -295,23 +179,17 @@ define <2 x i32> @select_v2i32(<2 x i32> %op1, <2 x i32> %op2, i1 %mask) #0 {
define <4 x i32> @select_v4i32(<4 x i32> %op1, <4 x i32> %op2, i1 %mask) #0 {
; CHECK-LABEL: select_v4i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: tst w0, #0x1
-; CHECK-NEXT: adrp x9, .LCPI9_0
-; CHECK-NEXT: csetm w8, ne
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT: ldr q3, [x9, :lo12:.LCPI9_0]
-; CHECK-NEXT: stp w8, w8, [sp, #8]
-; CHECK-NEXT: stp w8, w8, [sp]
-; CHECK-NEXT: ldr q2, [sp]
-; CHECK-NEXT: eor z3.d, z2.d, z3.d
+; CHECK-NEXT: csetm w8, ne
+; CHECK-NEXT: mvn w9, w8
+; CHECK-NEXT: mov z2.s, w8
+; CHECK-NEXT: mov z3.s, w9
; CHECK-NEXT: and z0.d, z0.d, z2.d
; CHECK-NEXT: and z1.d, z1.d, z3.d
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
-; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
%sel = select i1 %mask, <4 x i32> %op1, <4 x i32> %op2
ret <4 x i32> %sel
@@ -320,20 +198,15 @@ define <4 x i32> @select_v4i32(<4 x i32> %op1, <4 x i32> %op2, i1 %mask) #0 {
define void @select_v8i32(ptr %a, ptr %b, i1 %mask) #0 {
; CHECK-LABEL: select_v8i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: tst w2, #0x1
-; CHECK-NEXT: adrp x9, .LCPI10_0
-; CHECK-NEXT: csetm w8, ne
; CHECK-NEXT: ldr q0, [x0]
+; CHECK-NEXT: csetm w8, ne
; CHECK-NEXT: ldr q1, [x0, #16]
+; CHECK-NEXT: mvn w9, w8
; CHECK-NEXT: ldr q2, [x1]
; CHECK-NEXT: ldr q3, [x1, #16]
-; CHECK-NEXT: stp w8, w8, [sp, #8]
-; CHECK-NEXT: stp w8, w8, [sp]
-; CHECK-NEXT: ldr q5, [x9, :lo12:.LCPI10_0]
-; CHECK-NEXT: ldr q4, [sp]
-; CHECK-NEXT: eor z5.d, z4.d, z5.d
+; CHECK-NEXT: mov z4.s, w8
+; CHECK-NEXT: mov z5.s, w9
; CHECK-NEXT: and z1.d, z1.d, z4.d
; CHECK-NEXT: and z0.d, z0.d, z4.d
; CHECK-NEXT: and z2.d, z2.d, z5.d
@@ -341,7 +214,6 @@ define void @select_v8i32(ptr %a, ptr %b, i1 %mask) #0 {
; CHECK-NEXT: orr z0.d, z0.d, z2.d
; CHECK-NEXT: orr z1.d, z1.d, z3.d
; CHECK-NEXT: stp q0, q1, [x0]
-; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
%op1 = load volatile <8 x i32>, ptr %a
%op2 = load volatile <8 x i32>, ptr %b
@@ -354,13 +226,12 @@ define <1 x i64> @select_v1i64(<1 x i64> %op1, <1 x i64> %op2, i1 %mask) #0 {
; CHECK-LABEL: select_v1i64:
; CHECK: // %bb.0:
; CHECK-NEXT: tst w0, #0x1
-; CHECK-NEXT: mov x9, #-1
-; CHECK-NEXT: csetm x8, ne
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: fmov d3, x9
-; CHECK-NEXT: fmov d2, x8
-; CHECK-NEXT: eor z3.d, z2.d, z3.d
+; CHECK-NEXT: csetm x8, ne
+; CHECK-NEXT: mvn x9, x8
+; CHECK-NEXT: mov z2.d, x8
+; CHECK-NEXT: mov z3.d, x9
; CHECK-NEXT: and z0.d, z0.d, z2.d
; CHECK-NEXT: and z1.d, z1.d, z3.d
; CHECK-NEXT: orr z0.d, z0.d, z1.d
@@ -376,18 +247,14 @@ define <2 x i64> @select_v2i64(<2 x i64> %op1, <2 x i64> %op2, i1 %mask) #0 {
; CHECK-NEXT: tst w0, #0x1
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT: adrp x9, .LCPI12_0
; CHECK-NEXT: csetm x8, ne
-; CHECK-NEXT: stp x8, x8, [sp, #-16]!
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: ldr q2, [sp]
-; CHECK-NEXT: ldr q3, [x9, :lo12:.LCPI12_0]
+; CHECK-NEXT: mvn x9, x8
+; CHECK-NEXT: mov z2.d, x8
+; CHECK-NEXT: mov z3.d, x9
; CHECK-NEXT: and z0.d, z0.d, z2.d
-; CHECK-NEXT: eor z3.d, z2.d, z3.d
; CHECK-NEXT: and z1.d, z1.d, z3.d
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
-; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
%sel = select i1 %mask, <2 x i64> %op1, <2 x i64> %op2
ret <2 x i64> %sel
@@ -400,22 +267,18 @@ define void @select_v4i64(ptr %a, ptr %b, i1 %mask) #0 {
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: csetm x8, ne
; CHECK-NEXT: ldr q1, [x0, #16]
+; CHECK-NEXT: mvn x9, x8
; CHECK-NEXT: ldr q2, [x1]
-; CHECK-NEXT: adrp x9, .LCPI13_0
; CHECK-NEXT: ldr q3, [x1, #16]
-; CHECK-NEXT: stp x8, x8, [sp, #-16]!
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: ldr q4, [x9, :lo12:.LCPI13_0]
-; CHECK-NEXT: ldr q5, [sp]
-; CHECK-NEXT: eor z4.d, z5.d, z4.d
-; CHECK-NEXT: and z1.d, z1.d, z5.d
-; CHECK-NEXT: and z0.d, z0.d, z5.d
-; CHECK-NEXT: and z2.d, z2.d, z4.d
-; CHECK-NEXT: and z3.d, z3.d, z4.d
+; CHECK-NEXT: mov z4.d, x8
+; CHECK-NEXT: mov z5.d, x9
+; CHECK-NEXT: and z1.d, z1.d, z4.d
+; CHECK-NEXT: and z0.d, z0.d, z4.d
+; CHECK-NEXT: and z2.d, z2.d, z5.d
+; CHECK-NEXT: and z3.d, z3.d, z5.d
; CHECK-NEXT: orr z0.d, z0.d, z2.d
; CHECK-NEXT: orr z1.d, z1.d, z3.d
; CHECK-NEXT: stp q0, q1, [x0]
-; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
%op1 = load volatile <4 x i64>, ptr %a
%op2 = load volatile <4 x i64>, ptr %b
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll
index 2c2310cf340c..4d02fa70e06c 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll
@@ -10,16 +10,12 @@ target triple = "aarch64-unknown-linux-gnu"
define <4 x i8> @ashr_v4i8(<4 x i8> %op1, <4 x i8> %op2) #0 {
; CHECK-LABEL: ashr_v4i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI0_0
-; CHECK-NEXT: adrp x9, .LCPI0_1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ptrue p0.h, vl4
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI0_0]
-; CHECK-NEXT: ldr d3, [x9, :lo12:.LCPI0_1]
-; CHECK-NEXT: lsl z0.h, p0/m, z0.h, z2.h
-; CHECK-NEXT: asr z0.h, p0/m, z0.h, z2.h
-; CHECK-NEXT: and z1.d, z1.d, z3.d
+; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #8
+; CHECK-NEXT: and z1.h, z1.h, #0xff
+; CHECK-NEXT: asr z0.h, p0/m, z0.h, #8
; CHECK-NEXT: asr z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
@@ -73,16 +69,12 @@ define void @ashr_v32i8(<32 x i8>* %a, <32 x i8>* %b) #0 {
define <2 x i16> @ashr_v2i16(<2 x i16> %op1, <2 x i16> %op2) #0 {
; CHECK-LABEL: ashr_v2i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI4_0
-; CHECK-NEXT: adrp x9, .LCPI4_1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ptrue p0.s, vl2
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI4_0]
-; CHECK-NEXT: ldr d3, [x9, :lo12:.LCPI4_1]
-; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z2.s
-; CHECK-NEXT: asr z0.s, p0/m, z0.s, z2.s
-; CHECK-NEXT: and z1.d, z1.d, z3.d
+; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #16
+; CHECK-NEXT: and z1.s, z1.s, #0xffff
+; CHECK-NEXT: asr z0.s, p0/m, z0.s, #16
; CHECK-NEXT: asr z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
@@ -226,13 +218,11 @@ define void @ashr_v4i64(<4 x i64>* %a, <4 x i64>* %b) #0 {
define <4 x i8> @lshr_v4i8(<4 x i8> %op1, <4 x i8> %op2) #0 {
; CHECK-LABEL: lshr_v4i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI14_0
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI14_0]
-; CHECK-NEXT: and z1.d, z1.d, z2.d
-; CHECK-NEXT: and z0.d, z0.d, z2.d
+; CHECK-NEXT: and z1.h, z1.h, #0xff
+; CHECK-NEXT: and z0.h, z0.h, #0xff
; CHECK-NEXT: lsr z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
@@ -286,13 +276,11 @@ define void @lshr_v32i8(<32 x i8>* %a, <32 x i8>* %b) #0 {
define <2 x i16> @lshr_v2i16(<2 x i16> %op1, <2 x i16> %op2) #0 {
; CHECK-LABEL: lshr_v2i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI18_0
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ptrue p0.s, vl2
-; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI18_0]
-; CHECK-NEXT: and z1.d, z1.d, z2.d
-; CHECK-NEXT: and z0.d, z0.d, z2.d
+; CHECK-NEXT: and z1.s, z1.s, #0xffff
+; CHECK-NEXT: and z0.s, z0.s, #0xffff
; CHECK-NEXT: lsr z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
@@ -436,12 +424,10 @@ define void @lshr_v4i64(<4 x i64>* %a, <4 x i64>* %b) #0 {
define <2 x i8> @shl_v2i8(<2 x i8> %op1, <2 x i8> %op2) #0 {
; CHECK-LABEL: shl_v2i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI28_0
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ptrue p0.s, vl2
-; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI28_0]
-; CHECK-NEXT: and z1.d, z1.d, z2.d
+; CHECK-NEXT: and z1.s, z1.s, #0xff
; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
@@ -452,12 +438,10 @@ define <2 x i8> @shl_v2i8(<2 x i8> %op1, <2 x i8> %op2) #0 {
define <4 x i8> @shl_v4i8(<4 x i8> %op1, <4 x i8> %op2) #0 {
; CHECK-LABEL: shl_v4i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI29_0
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI29_0]
-; CHECK-NEXT: and z1.d, z1.d, z2.d
+; CHECK-NEXT: and z1.h, z1.h, #0xff
; CHECK-NEXT: lsl z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll
index fc66a2a37c7f..f0bd11500c3d 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll
@@ -55,11 +55,9 @@ define void @ucvtf_v16i16_v16f16(<16 x i16>* %a, <16 x half>* %b) #0 {
define <2 x float> @ucvtf_v2i16_v2f32(<2 x i16> %op1) #0 {
; CHECK-LABEL: ucvtf_v2i16_v2f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI3_0
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ptrue p0.s, vl2
-; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI3_0]
-; CHECK-NEXT: and z0.d, z0.d, z1.d
+; CHECK-NEXT: and z0.s, z0.s, #0xffff
; CHECK-NEXT: ucvtf z0.s, p0/m, z0.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
@@ -142,11 +140,9 @@ define <1 x double> @ucvtf_v1i16_v1f64(<1 x i16> %op1) #0 {
define <2 x double> @ucvtf_v2i16_v2f64(<2 x i16> %op1) #0 {
; CHECK-LABEL: ucvtf_v2i16_v2f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI8_0
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ptrue p0.d, vl2
-; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI8_0]
-; CHECK-NEXT: and z0.d, z0.d, z1.d
+; CHECK-NEXT: and z0.s, z0.s, #0xffff
; CHECK-NEXT: uunpklo z0.d, z0.s
; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
@@ -644,12 +640,10 @@ define void @scvtf_v16i16_v16f16(<16 x i16>* %a, <16 x half>* %b) #0 {
define <2 x float> @scvtf_v2i16_v2f32(<2 x i16> %op1) #0 {
; CHECK-LABEL: scvtf_v2i16_v2f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI33_0
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ptrue p0.s, vl2
-; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI33_0]
-; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: asr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #16
+; CHECK-NEXT: asr z0.s, p0/m, z0.s, #16
; CHECK-NEXT: scvtf z0.s, p0/m, z0.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
@@ -720,12 +714,10 @@ define void @scvtf_v16i16_v16f32(<16 x i16>* %a, <16 x float>* %b) #0 {
define <2 x double> @scvtf_v2i16_v2f64(<2 x i16> %op1) #0 {
; CHECK-LABEL: scvtf_v2i16_v2f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI37_0
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ptrue p0.s, vl2
-; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI37_0]
-; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: asr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #16
+; CHECK-NEXT: asr z0.s, p0/m, z0.s, #16
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: sunpklo z0.d, z0.s
; CHECK-NEXT: scvtf z0.d, p0/m, z0.d
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll
index 5d6fd5339503..4f36876ef0eb 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll
@@ -6,19 +6,14 @@ target triple = "aarch64-unknown-linux-gnu"
define <4 x i8> @select_v4i8(<4 x i8> %op1, <4 x i8> %op2, <4 x i1> %mask) #0 {
; CHECK-LABEL: select_v4i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI0_0
-; CHECK-NEXT: adrp x9, .LCPI0_1
; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2
; CHECK-NEXT: ptrue p0.h, vl4
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: ldr d3, [x8, :lo12:.LCPI0_0]
-; CHECK-NEXT: ldr d4, [x9, :lo12:.LCPI0_1]
-; CHECK-NEXT: lsl z2.h, p0/m, z2.h, z3.h
-; CHECK-NEXT: asr z2.h, p0/m, z2.h, z3.h
-; CHECK-NEXT: eor z3.d, z2.d, z4.d
+; CHECK-NEXT: lsl z2.h, p0/m, z2.h, #15
+; CHECK-NEXT: asr z2.h, p0/m, z2.h, #15
+; CHECK-NEXT: bic z1.d, z1.d, z2.d
; CHECK-NEXT: and z0.d, z0.d, z2.d
-; CHECK-NEXT: and z1.d, z1.d, z3.d
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
@@ -29,19 +24,14 @@ define <4 x i8> @select_v4i8(<4 x i8> %op1, <4 x i8> %op2, <4 x i1> %mask) #0 {
define <8 x i8> @select_v8i8(<8 x i8> %op1, <8 x i8> %op2, <8 x i1> %mask) #0 {
; CHECK-LABEL: select_v8i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI1_0
-; CHECK-NEXT: adrp x9, .LCPI1_1
; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2
; CHECK-NEXT: ptrue p0.b, vl8
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: ldr d3, [x8, :lo12:.LCPI1_0]
-; CHECK-NEXT: ldr d4, [x9, :lo12:.LCPI1_1]
-; CHECK-NEXT: lsl z2.b, p0/m, z2.b, z3.b
-; CHECK-NEXT: asr z2.b, p0/m, z2.b, z3.b
-; CHECK-NEXT: eor z3.d, z2.d, z4.d
+; CHECK-NEXT: lsl z2.b, p0/m, z2.b, #7
+; CHECK-NEXT: asr z2.b, p0/m, z2.b, #7
+; CHECK-NEXT: bic z1.d, z1.d, z2.d
; CHECK-NEXT: and z0.d, z0.d, z2.d
-; CHECK-NEXT: and z1.d, z1.d, z3.d
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
@@ -52,19 +42,14 @@ define <8 x i8> @select_v8i8(<8 x i8> %op1, <8 x i8> %op2, <8 x i1> %mask) #0 {
define <16 x i8> @select_v16i8(<16 x i8> %op1, <16 x i8> %op2, <16 x i1> %mask) #0 {
; CHECK-LABEL: select_v16i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI2_0
-; CHECK-NEXT: adrp x9, .LCPI2_1
; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2
; CHECK-NEXT: ptrue p0.b, vl16
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI2_0]
-; CHECK-NEXT: ldr q4, [x9, :lo12:.LCPI2_1]
-; CHECK-NEXT: lsl z2.b, p0/m, z2.b, z3.b
-; CHECK-NEXT: asr z2.b, p0/m, z2.b, z3.b
-; CHECK-NEXT: eor z3.d, z2.d, z4.d
+; CHECK-NEXT: lsl z2.b, p0/m, z2.b, #7
+; CHECK-NEXT: asr z2.b, p0/m, z2.b, #7
+; CHECK-NEXT: bic z1.d, z1.d, z2.d
; CHECK-NEXT: and z0.d, z0.d, z2.d
-; CHECK-NEXT: and z1.d, z1.d, z3.d
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
@@ -75,23 +60,19 @@ define <16 x i8> @select_v16i8(<16 x i8> %op1, <16 x i8> %op2, <16 x i1> %mask)
define void @select_v32i8(ptr %a, ptr %b) #0 {
; CHECK-LABEL: select_v32i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldp q1, q0, [x1]
-; CHECK-NEXT: adrp x8, .LCPI3_0
+; CHECK-NEXT: ldp q1, q0, [x0]
; CHECK-NEXT: ptrue p0.b, vl16
-; CHECK-NEXT: ldp q3, q2, [x0]
-; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI3_0]
-; CHECK-NEXT: cmpeq p1.b, p0/z, z2.b, z0.b
-; CHECK-NEXT: cmpeq p0.b, p0/z, z3.b, z1.b
-; CHECK-NEXT: mov z5.b, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: mov z6.b, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: and z2.d, z2.d, z5.d
-; CHECK-NEXT: eor z5.d, z5.d, z4.d
-; CHECK-NEXT: eor z4.d, z6.d, z4.d
-; CHECK-NEXT: and z3.d, z3.d, z6.d
-; CHECK-NEXT: and z1.d, z1.d, z4.d
-; CHECK-NEXT: and z0.d, z0.d, z5.d
-; CHECK-NEXT: orr z1.d, z3.d, z1.d
-; CHECK-NEXT: orr z0.d, z2.d, z0.d
+; CHECK-NEXT: ldp q3, q2, [x1]
+; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, z2.b
+; CHECK-NEXT: cmpeq p0.b, p0/z, z1.b, z3.b
+; CHECK-NEXT: mov z4.b, p1/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: mov z5.b, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: bic z2.d, z2.d, z4.d
+; CHECK-NEXT: bic z3.d, z3.d, z5.d
+; CHECK-NEXT: and z1.d, z1.d, z5.d
+; CHECK-NEXT: and z0.d, z0.d, z4.d
+; CHECK-NEXT: orr z1.d, z1.d, z3.d
+; CHECK-NEXT: orr z0.d, z0.d, z2.d
; CHECK-NEXT: stp q1, q0, [x0]
; CHECK-NEXT: ret
%op1 = load <32 x i8>, ptr %a
@@ -105,19 +86,14 @@ define void @select_v32i8(ptr %a, ptr %b) #0 {
define <2 x i16> @select_v2i16(<2 x i16> %op1, <2 x i16> %op2, <2 x i1> %mask) #0 {
; CHECK-LABEL: select_v2i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI4_0
-; CHECK-NEXT: adrp x9, .LCPI4_1
; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2
; CHECK-NEXT: ptrue p0.s, vl2
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: ldr d3, [x8, :lo12:.LCPI4_0]
-; CHECK-NEXT: ldr d4, [x9, :lo12:.LCPI4_1]
-; CHECK-NEXT: lsl z2.s, p0/m, z2.s, z3.s
-; CHECK-NEXT: asr z2.s, p0/m, z2.s, z3.s
-; CHECK-NEXT: eor z3.d, z2.d, z4.d
+; CHECK-NEXT: lsl z2.s, p0/m, z2.s, #31
+; CHECK-NEXT: asr z2.s, p0/m, z2.s, #31
+; CHECK-NEXT: bic z1.d, z1.d, z2.d
; CHECK-NEXT: and z0.d, z0.d, z2.d
-; CHECK-NEXT: and z1.d, z1.d, z3.d
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
@@ -128,19 +104,14 @@ define <2 x i16> @select_v2i16(<2 x i16> %op1, <2 x i16> %op2, <2 x i1> %mask) #
define <4 x i16> @select_v4i16(<4 x i16> %op1, <4 x i16> %op2, <4 x i1> %mask) #0 {
; CHECK-LABEL: select_v4i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI5_0
-; CHECK-NEXT: adrp x9, .LCPI5_1
; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2
; CHECK-NEXT: ptrue p0.h, vl4
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: ldr d3, [x8, :lo12:.LCPI5_0]
-; CHECK-NEXT: ldr d4, [x9, :lo12:.LCPI5_1]
-; CHECK-NEXT: lsl z2.h, p0/m, z2.h, z3.h
-; CHECK-NEXT: asr z2.h, p0/m, z2.h, z3.h
-; CHECK-NEXT: eor z3.d, z2.d, z4.d
+; CHECK-NEXT: lsl z2.h, p0/m, z2.h, #15
+; CHECK-NEXT: asr z2.h, p0/m, z2.h, #15
+; CHECK-NEXT: bic z1.d, z1.d, z2.d
; CHECK-NEXT: and z0.d, z0.d, z2.d
-; CHECK-NEXT: and z1.d, z1.d, z3.d
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
@@ -151,20 +122,15 @@ define <4 x i16> @select_v4i16(<4 x i16> %op1, <4 x i16> %op2, <4 x i1> %mask) #
define <8 x i16> @select_v8i16(<8 x i16> %op1, <8 x i16> %op2, <8 x i1> %mask) #0 {
; CHECK-LABEL: select_v8i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI6_0
-; CHECK-NEXT: adrp x9, .LCPI6_1
; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2
; CHECK-NEXT: ptrue p0.h, vl8
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: uunpklo z2.h, z2.b
-; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI6_0]
-; CHECK-NEXT: ldr q4, [x9, :lo12:.LCPI6_1]
-; CHECK-NEXT: lsl z2.h, p0/m, z2.h, z3.h
-; CHECK-NEXT: asr z2.h, p0/m, z2.h, z3.h
-; CHECK-NEXT: eor z3.d, z2.d, z4.d
+; CHECK-NEXT: lsl z2.h, p0/m, z2.h, #15
+; CHECK-NEXT: asr z2.h, p0/m, z2.h, #15
+; CHECK-NEXT: bic z1.d, z1.d, z2.d
; CHECK-NEXT: and z0.d, z0.d, z2.d
-; CHECK-NEXT: and z1.d, z1.d, z3.d
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
@@ -175,23 +141,19 @@ define <8 x i16> @select_v8i16(<8 x i16> %op1, <8 x i16> %op2, <8 x i1> %mask) #
define void @select_v16i16(ptr %a, ptr %b) #0 {
; CHECK-LABEL: select_v16i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldp q1, q0, [x1]
-; CHECK-NEXT: adrp x8, .LCPI7_0
+; CHECK-NEXT: ldp q1, q0, [x0]
; CHECK-NEXT: ptrue p0.h, vl8
-; CHECK-NEXT: ldp q3, q2, [x0]
-; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI7_0]
-; CHECK-NEXT: cmpeq p1.h, p0/z, z2.h, z0.h
-; CHECK-NEXT: cmpeq p0.h, p0/z, z3.h, z1.h
-; CHECK-NEXT: mov z5.h, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: mov z6.h, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: and z2.d, z2.d, z5.d
-; CHECK-NEXT: eor z5.d, z5.d, z4.d
-; CHECK-NEXT: eor z4.d, z6.d, z4.d
-; CHECK-NEXT: and z3.d, z3.d, z6.d
-; CHECK-NEXT: and z1.d, z1.d, z4.d
-; CHECK-NEXT: and z0.d, z0.d, z5.d
-; CHECK-NEXT: orr z1.d, z3.d, z1.d
-; CHECK-NEXT: orr z0.d, z2.d, z0.d
+; CHECK-NEXT: ldp q3, q2, [x1]
+; CHECK-NEXT: cmpeq p1.h, p0/z, z0.h, z2.h
+; CHECK-NEXT: cmpeq p0.h, p0/z, z1.h, z3.h
+; CHECK-NEXT: mov z4.h, p1/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: mov z5.h, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: bic z2.d, z2.d, z4.d
+; CHECK-NEXT: bic z3.d, z3.d, z5.d
+; CHECK-NEXT: and z1.d, z1.d, z5.d
+; CHECK-NEXT: and z0.d, z0.d, z4.d
+; CHECK-NEXT: orr z1.d, z1.d, z3.d
+; CHECK-NEXT: orr z0.d, z0.d, z2.d
; CHECK-NEXT: stp q1, q0, [x0]
; CHECK-NEXT: ret
%op1 = load <16 x i16>, ptr %a
@@ -205,19 +167,14 @@ define void @select_v16i16(ptr %a, ptr %b) #0 {
define <2 x i32> @select_v2i32(<2 x i32> %op1, <2 x i32> %op2, <2 x i1> %mask) #0 {
; CHECK-LABEL: select_v2i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI8_0
-; CHECK-NEXT: adrp x9, .LCPI8_1
; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2
; CHECK-NEXT: ptrue p0.s, vl2
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: ldr d3, [x8, :lo12:.LCPI8_0]
-; CHECK-NEXT: ldr d4, [x9, :lo12:.LCPI8_1]
-; CHECK-NEXT: lsl z2.s, p0/m, z2.s, z3.s
-; CHECK-NEXT: asr z2.s, p0/m, z2.s, z3.s
-; CHECK-NEXT: eor z3.d, z2.d, z4.d
+; CHECK-NEXT: lsl z2.s, p0/m, z2.s, #31
+; CHECK-NEXT: asr z2.s, p0/m, z2.s, #31
+; CHECK-NEXT: bic z1.d, z1.d, z2.d
; CHECK-NEXT: and z0.d, z0.d, z2.d
-; CHECK-NEXT: and z1.d, z1.d, z3.d
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
@@ -228,20 +185,15 @@ define <2 x i32> @select_v2i32(<2 x i32> %op1, <2 x i32> %op2, <2 x i1> %mask) #
define <4 x i32> @select_v4i32(<4 x i32> %op1, <4 x i32> %op2, <4 x i1> %mask) #0 {
; CHECK-LABEL: select_v4i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI9_0
-; CHECK-NEXT: adrp x9, .LCPI9_1
; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2
; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: uunpklo z2.s, z2.h
-; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI9_0]
-; CHECK-NEXT: ldr q4, [x9, :lo12:.LCPI9_1]
-; CHECK-NEXT: lsl z2.s, p0/m, z2.s, z3.s
-; CHECK-NEXT: asr z2.s, p0/m, z2.s, z3.s
-; CHECK-NEXT: eor z3.d, z2.d, z4.d
+; CHECK-NEXT: lsl z2.s, p0/m, z2.s, #31
+; CHECK-NEXT: asr z2.s, p0/m, z2.s, #31
+; CHECK-NEXT: bic z1.d, z1.d, z2.d
; CHECK-NEXT: and z0.d, z0.d, z2.d
-; CHECK-NEXT: and z1.d, z1.d, z3.d
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
@@ -252,23 +204,19 @@ define <4 x i32> @select_v4i32(<4 x i32> %op1, <4 x i32> %op2, <4 x i1> %mask) #
define void @select_v8i32(ptr %a, ptr %b) #0 {
; CHECK-LABEL: select_v8i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldp q1, q0, [x1]
-; CHECK-NEXT: adrp x8, .LCPI10_0
+; CHECK-NEXT: ldp q1, q0, [x0]
; CHECK-NEXT: ptrue p0.s, vl4
-; CHECK-NEXT: ldp q3, q2, [x0]
-; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI10_0]
-; CHECK-NEXT: cmpeq p1.s, p0/z, z2.s, z0.s
-; CHECK-NEXT: cmpeq p0.s, p0/z, z3.s, z1.s
-; CHECK-NEXT: mov z5.s, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: mov z6.s, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: and z2.d, z2.d, z5.d
-; CHECK-NEXT: eor z5.d, z5.d, z4.d
-; CHECK-NEXT: eor z4.d, z6.d, z4.d
-; CHECK-NEXT: and z3.d, z3.d, z6.d
-; CHECK-NEXT: and z1.d, z1.d, z4.d
-; CHECK-NEXT: and z0.d, z0.d, z5.d
-; CHECK-NEXT: orr z1.d, z3.d, z1.d
-; CHECK-NEXT: orr z0.d, z2.d, z0.d
+; CHECK-NEXT: ldp q3, q2, [x1]
+; CHECK-NEXT: cmpeq p1.s, p0/z, z0.s, z2.s
+; CHECK-NEXT: cmpeq p0.s, p0/z, z1.s, z3.s
+; CHECK-NEXT: mov z4.s, p1/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: mov z5.s, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: bic z2.d, z2.d, z4.d
+; CHECK-NEXT: bic z3.d, z3.d, z5.d
+; CHECK-NEXT: and z1.d, z1.d, z5.d
+; CHECK-NEXT: and z0.d, z0.d, z4.d
+; CHECK-NEXT: orr z1.d, z1.d, z3.d
+; CHECK-NEXT: orr z0.d, z0.d, z2.d
; CHECK-NEXT: stp q1, q0, [x0]
; CHECK-NEXT: ret
%op1 = load <8 x i32>, ptr %a
@@ -283,13 +231,12 @@ define <1 x i64> @select_v1i64(<1 x i64> %op1, <1 x i64> %op2, <1 x i1> %mask) #
; CHECK-LABEL: select_v1i64:
; CHECK: // %bb.0:
; CHECK-NEXT: tst w0, #0x1
-; CHECK-NEXT: mov x9, #-1
-; CHECK-NEXT: csetm x8, ne
; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: fmov d3, x9
-; CHECK-NEXT: fmov d2, x8
-; CHECK-NEXT: eor z3.d, z2.d, z3.d
+; CHECK-NEXT: csetm x8, ne
+; CHECK-NEXT: mvn x9, x8
+; CHECK-NEXT: mov z2.d, x8
+; CHECK-NEXT: mov z3.d, x9
; CHECK-NEXT: and z0.d, z0.d, z2.d
; CHECK-NEXT: and z1.d, z1.d, z3.d
; CHECK-NEXT: orr z0.d, z0.d, z1.d
@@ -302,20 +249,15 @@ define <1 x i64> @select_v1i64(<1 x i64> %op1, <1 x i64> %op2, <1 x i1> %mask) #
define <2 x i64> @select_v2i64(<2 x i64> %op1, <2 x i64> %op2, <2 x i1> %mask) #0 {
; CHECK-LABEL: select_v2i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI12_0
-; CHECK-NEXT: adrp x9, .LCPI12_1
; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: uunpklo z2.d, z2.s
-; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI12_0]
-; CHECK-NEXT: ldr q4, [x9, :lo12:.LCPI12_1]
-; CHECK-NEXT: lsl z2.d, p0/m, z2.d, z3.d
-; CHECK-NEXT: asr z2.d, p0/m, z2.d, z3.d
-; CHECK-NEXT: eor z3.d, z2.d, z4.d
+; CHECK-NEXT: lsl z2.d, p0/m, z2.d, #63
+; CHECK-NEXT: asr z2.d, p0/m, z2.d, #63
+; CHECK-NEXT: bic z1.d, z1.d, z2.d
; CHECK-NEXT: and z0.d, z0.d, z2.d
-; CHECK-NEXT: and z1.d, z1.d, z3.d
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
@@ -326,23 +268,19 @@ define <2 x i64> @select_v2i64(<2 x i64> %op1, <2 x i64> %op2, <2 x i1> %mask) #
define void @select_v4i64(ptr %a, ptr %b) #0 {
; CHECK-LABEL: select_v4i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldp q1, q0, [x1]
-; CHECK-NEXT: adrp x8, .LCPI13_0
+; CHECK-NEXT: ldp q1, q0, [x0]
; CHECK-NEXT: ptrue p0.d, vl2
-; CHECK-NEXT: ldp q3, q2, [x0]
-; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI13_0]
-; CHECK-NEXT: cmpeq p1.d, p0/z, z2.d, z0.d
-; CHECK-NEXT: cmpeq p0.d, p0/z, z3.d, z1.d
-; CHECK-NEXT: mov z5.d, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: mov z6.d, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: and z2.d, z2.d, z5.d
-; CHECK-NEXT: eor z5.d, z5.d, z4.d
-; CHECK-NEXT: eor z4.d, z6.d, z4.d
-; CHECK-NEXT: and z3.d, z3.d, z6.d
-; CHECK-NEXT: and z1.d, z1.d, z4.d
-; CHECK-NEXT: and z0.d, z0.d, z5.d
-; CHECK-NEXT: orr z1.d, z3.d, z1.d
-; CHECK-NEXT: orr z0.d, z2.d, z0.d
+; CHECK-NEXT: ldp q3, q2, [x1]
+; CHECK-NEXT: cmpeq p1.d, p0/z, z0.d, z2.d
+; CHECK-NEXT: cmpeq p0.d, p0/z, z1.d, z3.d
+; CHECK-NEXT: mov z4.d, p1/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: mov z5.d, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: bic z2.d, z2.d, z4.d
+; CHECK-NEXT: bic z3.d, z3.d, z5.d
+; CHECK-NEXT: and z1.d, z1.d, z5.d
+; CHECK-NEXT: and z0.d, z0.d, z4.d
+; CHECK-NEXT: orr z1.d, z1.d, z3.d
+; CHECK-NEXT: orr z0.d, z0.d, z2.d
; CHECK-NEXT: stp q1, q0, [x0]
; CHECK-NEXT: ret
%op1 = load <4 x i64>, ptr %a
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-load.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-load.ll
index 2e8623d4a27b..34c7d792e8dd 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-load.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-load.ll
@@ -10,12 +10,10 @@ target triple = "aarch64-unknown-linux-gnu"
define <4 x i8> @masked_load_v4i8(<4 x i8>* %src, <4 x i1> %mask) #0 {
; CHECK-LABEL: masked_load_v4i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI0_0
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI0_0]
-; CHECK-NEXT: lsl z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: asr z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #15
+; CHECK-NEXT: asr z0.h, p0/m, z0.h, #15
; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0
; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0]
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
@@ -27,12 +25,10 @@ define <4 x i8> @masked_load_v4i8(<4 x i8>* %src, <4 x i1> %mask) #0 {
define <8 x i8> @masked_load_v8i8(<8 x i8>* %src, <8 x i1> %mask) #0 {
; CHECK-LABEL: masked_load_v8i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI1_0
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ptrue p0.b, vl8
-; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI1_0]
-; CHECK-NEXT: lsl z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT: asr z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: lsl z0.b, p0/m, z0.b, #7
+; CHECK-NEXT: asr z0.b, p0/m, z0.b, #7
; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
@@ -44,12 +40,10 @@ define <8 x i8> @masked_load_v8i8(<8 x i8>* %src, <8 x i1> %mask) #0 {
define <16 x i8> @masked_load_v16i8(<16 x i8>* %src, <16 x i1> %mask) #0 {
; CHECK-LABEL: masked_load_v16i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI2_0
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: ptrue p0.b, vl16
-; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI2_0]
-; CHECK-NEXT: lsl z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT: asr z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: lsl z0.b, p0/m, z0.b, #7
+; CHECK-NEXT: asr z0.b, p0/m, z0.b, #7
; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
@@ -117,20 +111,18 @@ define <32 x i8> @masked_load_v32i8(<32 x i8>* %src, <32 x i1> %mask) #0 {
; CHECK-NEXT: ptrue p0.b, vl16
; CHECK-NEXT: strb w10, [sp, #8]
; CHECK-NEXT: strb w8, [sp, #7]
-; CHECK-NEXT: adrp x8, .LCPI3_0
+; CHECK-NEXT: mov w8, #16
; CHECK-NEXT: strb w4, [sp, #3]
; CHECK-NEXT: strb w3, [sp, #2]
; CHECK-NEXT: strb w2, [sp, #1]
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI3_0]
; CHECK-NEXT: strb w1, [sp]
-; CHECK-NEXT: mov w8, #16
-; CHECK-NEXT: ldp q2, q1, [sp]
-; CHECK-NEXT: lsl z2.b, p0/m, z2.b, z0.b
-; CHECK-NEXT: lsl z1.b, p0/m, z1.b, z0.b
-; CHECK-NEXT: asr z1.b, p0/m, z1.b, z0.b
-; CHECK-NEXT: asrr z0.b, p0/m, z0.b, z2.b
-; CHECK-NEXT: cmpne p1.b, p0/z, z1.b, #0
-; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0
+; CHECK-NEXT: ldp q1, q0, [sp]
+; CHECK-NEXT: lsl z1.b, p0/m, z1.b, #7
+; CHECK-NEXT: asr z1.b, p0/m, z1.b, #7
+; CHECK-NEXT: lsl z0.b, p0/m, z0.b, #7
+; CHECK-NEXT: asr z0.b, p0/m, z0.b, #7
+; CHECK-NEXT: cmpne p1.b, p0/z, z0.b, #0
+; CHECK-NEXT: cmpne p0.b, p0/z, z1.b, #0
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
; CHECK-NEXT: ld1b { z1.b }, p1/z, [x0, x8]
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
@@ -147,18 +139,16 @@ define <2 x half> @masked_load_v2f16(<2 x half>* %src, <2 x i1> %mask) #0 {
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: mov z1.s, z0.s[1]
-; CHECK-NEXT: fmov w9, s0
-; CHECK-NEXT: fmov w10, s1
-; CHECK-NEXT: adrp x8, .LCPI4_0
+; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: str wzr, [sp, #12]
+; CHECK-NEXT: mov z0.s, z0.s[1]
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: strh w9, [sp, #8]
-; CHECK-NEXT: strh w10, [sp, #10]
-; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI4_0]
-; CHECK-NEXT: ldr d1, [sp, #8]
-; CHECK-NEXT: lsl z1.h, p0/m, z1.h, z0.h
-; CHECK-NEXT: asrr z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: fmov w9, s0
+; CHECK-NEXT: strh w8, [sp, #8]
+; CHECK-NEXT: strh w9, [sp, #10]
+; CHECK-NEXT: ldr d0, [sp, #8]
+; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #15
+; CHECK-NEXT: asr z0.h, p0/m, z0.h, #15
; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
@@ -171,12 +161,10 @@ define <2 x half> @masked_load_v2f16(<2 x half>* %src, <2 x i1> %mask) #0 {
define <4 x half> @masked_load_v4f16(<4 x half>* %src, <4 x i1> %mask) #0 {
; CHECK-LABEL: masked_load_v4f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI5_0
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI5_0]
-; CHECK-NEXT: lsl z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: asr z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #15
+; CHECK-NEXT: asr z0.h, p0/m, z0.h, #15
; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
@@ -188,13 +176,11 @@ define <4 x half> @masked_load_v4f16(<4 x half>* %src, <4 x i1> %mask) #0 {
define <8 x half> @masked_load_v8f16(<8 x half>* %src, <8 x i1> %mask) #0 {
; CHECK-LABEL: masked_load_v8f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI6_0
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ptrue p0.h, vl8
; CHECK-NEXT: uunpklo z0.h, z0.b
-; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI6_0]
-; CHECK-NEXT: lsl z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: asr z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #15
+; CHECK-NEXT: asr z0.h, p0/m, z0.h, #15
; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
@@ -206,19 +192,17 @@ define <8 x half> @masked_load_v8f16(<8 x half>* %src, <8 x i1> %mask) #0 {
define <16 x half> @masked_load_v16f16(<16 x half>* %src, <16 x i1> %mask) #0 {
; CHECK-LABEL: masked_load_v16f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI7_0
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT: uunpklo z2.h, z0.b
-; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT: uunpklo z1.h, z0.b
; CHECK-NEXT: ptrue p0.h, vl8
+; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT: lsl z1.h, p0/m, z1.h, #15
; CHECK-NEXT: uunpklo z0.h, z0.b
-; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI7_0]
; CHECK-NEXT: mov x8, #8
-; CHECK-NEXT: lsl z2.h, p0/m, z2.h, z1.h
-; CHECK-NEXT: lsl z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: asr z2.h, p0/m, z2.h, z1.h
-; CHECK-NEXT: asr z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: cmpne p1.h, p0/z, z2.h, #0
+; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #15
+; CHECK-NEXT: asr z1.h, p0/m, z1.h, #15
+; CHECK-NEXT: asr z0.h, p0/m, z0.h, #15
+; CHECK-NEXT: cmpne p1.h, p0/z, z1.h, #0
; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0
; CHECK-NEXT: ld1h { z0.h }, p1/z, [x0]
; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0, x8, lsl #1]
@@ -232,12 +216,10 @@ define <16 x half> @masked_load_v16f16(<16 x half>* %src, <16 x i1> %mask) #0 {
define <2 x float> @masked_load_v2f32(<2 x float>* %src, <2 x i1> %mask) #0 {
; CHECK-LABEL: masked_load_v2f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI8_0
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ptrue p0.s, vl2
-; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI8_0]
-; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: asr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #31
+; CHECK-NEXT: asr z0.s, p0/m, z0.s, #31
; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
@@ -249,13 +231,11 @@ define <2 x float> @masked_load_v2f32(<2 x float>* %src, <2 x i1> %mask) #0 {
define <4 x float> @masked_load_v4f32(<4 x float>* %src, <4 x i1> %mask) #0 {
; CHECK-LABEL: masked_load_v4f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI9_0
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: uunpklo z0.s, z0.h
-; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI9_0]
-; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: asr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #31
+; CHECK-NEXT: asr z0.s, p0/m, z0.s, #31
; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
@@ -268,46 +248,44 @@ define <8 x float> @masked_load_v8f32(<8 x float>* %src, <8 x i1> %mask) #0 {
; CHECK-LABEL: masked_load_v8f32:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: fmov w9, s0
+; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: mov z1.b, z0.b[3]
; CHECK-NEXT: mov z2.b, z0.b[2]
-; CHECK-NEXT: adrp x8, .LCPI10_0
+; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: mov z3.b, z0.b[1]
; CHECK-NEXT: mov z4.b, z0.b[7]
; CHECK-NEXT: mov z5.b, z0.b[6]
; CHECK-NEXT: mov z6.b, z0.b[5]
-; CHECK-NEXT: fmov w10, s1
+; CHECK-NEXT: fmov w9, s1
; CHECK-NEXT: mov z0.b, z0.b[4]
-; CHECK-NEXT: fmov w11, s2
-; CHECK-NEXT: strh w9, [sp, #-16]!
+; CHECK-NEXT: fmov w10, s2
+; CHECK-NEXT: strh w8, [sp, #-16]!
; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: fmov w9, s3
-; CHECK-NEXT: strh w10, [sp, #6]
-; CHECK-NEXT: fmov w10, s4
-; CHECK-NEXT: strh w11, [sp, #4]
-; CHECK-NEXT: fmov w11, s5
-; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI10_0]
-; CHECK-NEXT: strh w9, [sp, #2]
-; CHECK-NEXT: fmov w9, s6
-; CHECK-NEXT: strh w10, [sp, #14]
-; CHECK-NEXT: fmov w10, s0
-; CHECK-NEXT: strh w11, [sp, #12]
-; CHECK-NEXT: ptrue p0.s, vl4
-; CHECK-NEXT: strh w9, [sp, #10]
+; CHECK-NEXT: fmov w8, s3
+; CHECK-NEXT: strh w9, [sp, #6]
+; CHECK-NEXT: fmov w9, s4
+; CHECK-NEXT: strh w10, [sp, #4]
+; CHECK-NEXT: fmov w10, s5
+; CHECK-NEXT: strh w8, [sp, #2]
+; CHECK-NEXT: fmov w8, s6
+; CHECK-NEXT: strh w9, [sp, #14]
+; CHECK-NEXT: fmov w9, s0
+; CHECK-NEXT: strh w10, [sp, #12]
+; CHECK-NEXT: strh w8, [sp, #10]
; CHECK-NEXT: mov x8, #4
-; CHECK-NEXT: strh w10, [sp, #8]
-; CHECK-NEXT: ldp d0, d2, [sp]
+; CHECK-NEXT: strh w9, [sp, #8]
+; CHECK-NEXT: ldp d0, d1, [sp]
; CHECK-NEXT: uunpklo z0.s, z0.h
-; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: uunpklo z2.s, z2.h
-; CHECK-NEXT: asr z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: lsl z2.s, p0/m, z2.s, z1.s
+; CHECK-NEXT: uunpklo z1.s, z1.h
+; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #31
+; CHECK-NEXT: lsl z1.s, p0/m, z1.s, #31
+; CHECK-NEXT: asr z0.s, p0/m, z0.s, #31
+; CHECK-NEXT: asr z1.s, p0/m, z1.s, #31
; CHECK-NEXT: cmpne p1.s, p0/z, z0.s, #0
-; CHECK-NEXT: ld1w { z0.s }, p1/z, [x0]
-; CHECK-NEXT: asrr z1.s, p0/m, z1.s, z2.s
; CHECK-NEXT: cmpne p0.s, p0/z, z1.s, #0
-; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT: ld1w { z0.s }, p1/z, [x0]
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0, x8, lsl #2]
+; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
@@ -318,13 +296,11 @@ define <8 x float> @masked_load_v8f32(<8 x float>* %src, <8 x i1> %mask) #0 {
define <2 x double> @masked_load_v2f64(<2 x double>* %src, <2 x i1> %mask) #0 {
; CHECK-LABEL: masked_load_v2f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI11_0
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: uunpklo z0.d, z0.s
-; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI11_0]
-; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: asr z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: lsl z0.d, p0/m, z0.d, #63
+; CHECK-NEXT: asr z0.d, p0/m, z0.d, #63
; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
@@ -336,20 +312,18 @@ define <2 x double> @masked_load_v2f64(<2 x double>* %src, <2 x i1> %mask) #0 {
define <4 x double> @masked_load_v4f64(<4 x double>* %src, <4 x i1> %mask) #0 {
; CHECK-LABEL: masked_load_v4f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI12_0
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ptrue p0.d, vl2
+; CHECK-NEXT: mov x8, #2
; CHECK-NEXT: uunpklo z0.s, z0.h
-; CHECK-NEXT: uunpklo z2.d, z0.s
+; CHECK-NEXT: uunpklo z1.d, z0.s
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI12_0]
; CHECK-NEXT: uunpklo z0.d, z0.s
-; CHECK-NEXT: mov x8, #2
-; CHECK-NEXT: lsl z2.d, p0/m, z2.d, z1.d
-; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: asr z2.d, p0/m, z2.d, z1.d
-; CHECK-NEXT: asr z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: cmpne p1.d, p0/z, z2.d, #0
+; CHECK-NEXT: lsl z1.d, p0/m, z1.d, #63
+; CHECK-NEXT: lsl z0.d, p0/m, z0.d, #63
+; CHECK-NEXT: asr z1.d, p0/m, z1.d, #63
+; CHECK-NEXT: asr z0.d, p0/m, z0.d, #63
+; CHECK-NEXT: cmpne p1.d, p0/z, z1.d, #0
; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
; CHECK-NEXT: ld1d { z0.d }, p1/z, [x0]
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0, x8, lsl #3]
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-store.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-store.ll
index 78ff21c75aff..930376ec4f6c 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-store.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-store.ll
@@ -10,16 +10,13 @@ target triple = "aarch64-unknown-linux-gnu"
define void @masked_store_v4i8(<4 x i8>* %dst, <4 x i1> %mask) #0 {
; CHECK-LABEL: masked_store_v4i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI0_0
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI0_0]
-; CHECK-NEXT: adrp x8, .LCPI0_1
-; CHECK-NEXT: lsl z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI0_1]
-; CHECK-NEXT: asr z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #15
+; CHECK-NEXT: asr z0.h, p0/m, z0.h, #15
; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0
-; CHECK-NEXT: st1b { z2.h }, p0, [x0]
+; CHECK-NEXT: mov z0.h, #0 // =0x0
+; CHECK-NEXT: st1b { z0.h }, p0, [x0]
; CHECK-NEXT: ret
call void @llvm.masked.store.v4i8(<4 x i8> zeroinitializer, <4 x i8>* %dst, i32 8, <4 x i1> %mask)
ret void
@@ -28,16 +25,13 @@ define void @masked_store_v4i8(<4 x i8>* %dst, <4 x i1> %mask) #0 {
define void @masked_store_v8i8(<8 x i8>* %dst, <8 x i1> %mask) #0 {
; CHECK-LABEL: masked_store_v8i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI1_0
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ptrue p0.b, vl8
-; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI1_0]
-; CHECK-NEXT: adrp x8, .LCPI1_1
-; CHECK-NEXT: lsl z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI1_1]
-; CHECK-NEXT: asr z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: lsl z0.b, p0/m, z0.b, #7
+; CHECK-NEXT: asr z0.b, p0/m, z0.b, #7
; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0
-; CHECK-NEXT: st1b { z2.b }, p0, [x0]
+; CHECK-NEXT: mov z0.b, #0 // =0x0
+; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
call void @llvm.masked.store.v8i8(<8 x i8> zeroinitializer, <8 x i8>* %dst, i32 8, <8 x i1> %mask)
ret void
@@ -46,16 +40,13 @@ define void @masked_store_v8i8(<8 x i8>* %dst, <8 x i1> %mask) #0 {
define void @masked_store_v16i8(<16 x i8>* %dst, <16 x i1> %mask) #0 {
; CHECK-LABEL: masked_store_v16i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI2_0
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: ptrue p0.b, vl16
-; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI2_0]
-; CHECK-NEXT: adrp x8, .LCPI2_1
-; CHECK-NEXT: lsl z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI2_1]
-; CHECK-NEXT: asr z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: lsl z0.b, p0/m, z0.b, #7
+; CHECK-NEXT: asr z0.b, p0/m, z0.b, #7
; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0
-; CHECK-NEXT: st1b { z2.b }, p0, [x0]
+; CHECK-NEXT: mov z0.b, #0 // =0x0
+; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
call void @llvm.masked.store.v16i8(<16 x i8> zeroinitializer, <16 x i8>* %dst, i32 8, <16 x i1> %mask)
ret void
@@ -113,31 +104,29 @@ define void @masked_store_v32i8(<32 x i8>* %dst, <32 x i1> %mask) #0 {
; CHECK-NEXT: strb w9, [sp, #21]
; CHECK-NEXT: ldr w9, [sp, #120]
; CHECK-NEXT: strb w10, [sp, #20]
-; CHECK-NEXT: ldr w10, [sp, #104]
+; CHECK-NEXT: ldr w10, [sp, #112]
; CHECK-NEXT: strb w8, [sp, #19]
-; CHECK-NEXT: ldr w8, [sp, #112]
+; CHECK-NEXT: ldr w8, [sp, #104]
; CHECK-NEXT: strb w4, [sp, #3]
-; CHECK-NEXT: adrp x11, .LCPI3_0
-; CHECK-NEXT: strb w3, [sp, #2]
; CHECK-NEXT: ptrue p0.b, vl16
+; CHECK-NEXT: strb w3, [sp, #2]
; CHECK-NEXT: strb w2, [sp, #1]
; CHECK-NEXT: strb w1, [sp]
-; CHECK-NEXT: ldr q0, [x11, :lo12:.LCPI3_0]
; CHECK-NEXT: strb w9, [sp, #18]
-; CHECK-NEXT: strb w8, [sp, #17]
-; CHECK-NEXT: adrp x8, .LCPI3_1
-; CHECK-NEXT: strb w10, [sp, #16]
-; CHECK-NEXT: ldp q1, q2, [sp]
-; CHECK-NEXT: lsl z1.b, p0/m, z1.b, z0.b
-; CHECK-NEXT: asr z1.b, p0/m, z1.b, z0.b
-; CHECK-NEXT: lsl z2.b, p0/m, z2.b, z0.b
-; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI3_1]
+; CHECK-NEXT: strb w10, [sp, #17]
+; CHECK-NEXT: strb w8, [sp, #16]
; CHECK-NEXT: mov w8, #16
-; CHECK-NEXT: asrr z0.b, p0/m, z0.b, z2.b
-; CHECK-NEXT: cmpne p1.b, p0/z, z1.b, #0
+; CHECK-NEXT: ldp q0, q1, [sp]
+; CHECK-NEXT: lsl z0.b, p0/m, z0.b, #7
+; CHECK-NEXT: asr z0.b, p0/m, z0.b, #7
+; CHECK-NEXT: lsl z1.b, p0/m, z1.b, #7
+; CHECK-NEXT: cmpne p1.b, p0/z, z0.b, #0
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: asr z0.b, p0/m, z0.b, #7
; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0
-; CHECK-NEXT: st1b { z3.b }, p0, [x0, x8]
-; CHECK-NEXT: st1b { z3.b }, p1, [x0]
+; CHECK-NEXT: mov z0.b, #0 // =0x0
+; CHECK-NEXT: st1b { z0.b }, p0, [x0, x8]
+; CHECK-NEXT: st1b { z0.b }, p1, [x0]
; CHECK-NEXT: add sp, sp, #32
; CHECK-NEXT: ret
call void @llvm.masked.store.v32i8(<32 x i8> zeroinitializer, <32 x i8>* %dst, i32 8, <32 x i1> %mask)
@@ -150,22 +139,19 @@ define void @masked_store_v2f16(<2 x half>* %dst, <2 x i1> %mask) #0 {
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: mov z1.s, z0.s[1]
-; CHECK-NEXT: fmov w9, s0
-; CHECK-NEXT: fmov w10, s1
-; CHECK-NEXT: adrp x8, .LCPI4_0
+; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: str wzr, [sp, #12]
+; CHECK-NEXT: mov z0.s, z0.s[1]
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: strh w9, [sp, #8]
-; CHECK-NEXT: strh w10, [sp, #10]
-; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI4_0]
-; CHECK-NEXT: ldr d1, [sp, #8]
-; CHECK-NEXT: adrp x8, .LCPI4_1
-; CHECK-NEXT: lsl z1.h, p0/m, z1.h, z0.h
-; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI4_1]
-; CHECK-NEXT: asrr z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: fmov w9, s0
+; CHECK-NEXT: strh w8, [sp, #8]
+; CHECK-NEXT: strh w9, [sp, #10]
+; CHECK-NEXT: ldr d0, [sp, #8]
+; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #15
+; CHECK-NEXT: asr z0.h, p0/m, z0.h, #15
; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0
-; CHECK-NEXT: st1h { z2.h }, p0, [x0]
+; CHECK-NEXT: mov z0.h, #0 // =0x0
+; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
call void @llvm.masked.store.v2f16(<2 x half> zeroinitializer, <2 x half>* %dst, i32 8, <2 x i1> %mask)
@@ -175,16 +161,13 @@ define void @masked_store_v2f16(<2 x half>* %dst, <2 x i1> %mask) #0 {
define void @masked_store_v4f16(<4 x half>* %dst, <4 x i1> %mask) #0 {
; CHECK-LABEL: masked_store_v4f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI5_0
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI5_0]
-; CHECK-NEXT: adrp x8, .LCPI5_1
-; CHECK-NEXT: lsl z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI5_1]
-; CHECK-NEXT: asr z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #15
+; CHECK-NEXT: asr z0.h, p0/m, z0.h, #15
; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0
-; CHECK-NEXT: st1h { z2.h }, p0, [x0]
+; CHECK-NEXT: mov z0.h, #0 // =0x0
+; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
call void @llvm.masked.store.v4f16(<4 x half> zeroinitializer, <4 x half>* %dst, i32 8, <4 x i1> %mask)
ret void
@@ -193,17 +176,14 @@ define void @masked_store_v4f16(<4 x half>* %dst, <4 x i1> %mask) #0 {
define void @masked_store_v8f16(<8 x half>* %dst, <8 x i1> %mask) #0 {
; CHECK-LABEL: masked_store_v8f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI6_0
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ptrue p0.h, vl8
; CHECK-NEXT: uunpklo z0.h, z0.b
-; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI6_0]
-; CHECK-NEXT: adrp x8, .LCPI6_1
-; CHECK-NEXT: lsl z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI6_1]
-; CHECK-NEXT: asr z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #15
+; CHECK-NEXT: asr z0.h, p0/m, z0.h, #15
; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0
-; CHECK-NEXT: st1h { z2.h }, p0, [x0]
+; CHECK-NEXT: mov z0.h, #0 // =0x0
+; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
call void @llvm.masked.store.v8f16(<8 x half> zeroinitializer, <8 x half>* %dst, i32 8, <8 x i1> %mask)
ret void
@@ -212,25 +192,22 @@ define void @masked_store_v8f16(<8 x half>* %dst, <8 x i1> %mask) #0 {
define void @masked_store_v16f16(<16 x half>* %dst, <16 x i1> %mask) #0 {
; CHECK-LABEL: masked_store_v16f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI7_0
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: mov z1.d, z0.d
-; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8
-; CHECK-NEXT: uunpklo z0.h, z0.b
; CHECK-NEXT: ptrue p0.h, vl8
+; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
+; CHECK-NEXT: uunpklo z0.h, z0.b
; CHECK-NEXT: uunpklo z1.h, z1.b
-; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI7_0]
-; CHECK-NEXT: adrp x8, .LCPI7_1
-; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI7_1]
-; CHECK-NEXT: lsl z1.h, p0/m, z1.h, z2.h
; CHECK-NEXT: mov x8, #8
-; CHECK-NEXT: lsl z0.h, p0/m, z0.h, z2.h
-; CHECK-NEXT: asr z1.h, p0/m, z1.h, z2.h
-; CHECK-NEXT: asr z0.h, p0/m, z0.h, z2.h
+; CHECK-NEXT: lsl z1.h, p0/m, z1.h, #15
+; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #15
+; CHECK-NEXT: asr z1.h, p0/m, z1.h, #15
+; CHECK-NEXT: asr z0.h, p0/m, z0.h, #15
; CHECK-NEXT: cmpne p1.h, p0/z, z1.h, #0
+; CHECK-NEXT: mov z1.h, #0 // =0x0
; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0
-; CHECK-NEXT: st1h { z3.h }, p1, [x0, x8, lsl #1]
-; CHECK-NEXT: st1h { z3.h }, p0, [x0]
+; CHECK-NEXT: st1h { z1.h }, p1, [x0, x8, lsl #1]
+; CHECK-NEXT: st1h { z1.h }, p0, [x0]
; CHECK-NEXT: ret
call void @llvm.masked.store.v16f16(<16 x half> zeroinitializer, <16 x half>* %dst, i32 8, <16 x i1> %mask)
ret void
@@ -239,17 +216,14 @@ define void @masked_store_v16f16(<16 x half>* %dst, <16 x i1> %mask) #0 {
define void @masked_store_v4f32(<4 x float>* %dst, <4 x i1> %mask) #0 {
; CHECK-LABEL: masked_store_v4f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI8_0
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ptrue p0.s, vl4
; CHECK-NEXT: uunpklo z0.s, z0.h
-; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI8_0]
-; CHECK-NEXT: adrp x8, .LCPI8_1
-; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI8_1]
-; CHECK-NEXT: asr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #31
+; CHECK-NEXT: asr z0.s, p0/m, z0.s, #31
; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0
-; CHECK-NEXT: st1w { z2.s }, p0, [x0]
+; CHECK-NEXT: mov z0.s, #0 // =0x0
+; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
call void @llvm.masked.store.v4f32(<4 x float> zeroinitializer, <4 x float>* %dst, i32 8, <4 x i1> %mask)
ret void
@@ -263,46 +237,42 @@ define void @masked_store_v8f32(<8 x float>* %dst, <8 x i1> %mask) #0 {
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: mov z1.b, z0.b[7]
; CHECK-NEXT: mov z2.b, z0.b[6]
-; CHECK-NEXT: fmov w9, s1
+; CHECK-NEXT: fmov w8, s1
; CHECK-NEXT: mov z1.b, z0.b[5]
-; CHECK-NEXT: fmov w10, s2
-; CHECK-NEXT: mov z2.b, z0.b[4]
-; CHECK-NEXT: fmov w11, s1
-; CHECK-NEXT: adrp x8, .LCPI9_0
-; CHECK-NEXT: strh w9, [sp, #14]
; CHECK-NEXT: fmov w9, s2
-; CHECK-NEXT: strh w10, [sp, #12]
-; CHECK-NEXT: adrp x10, .LCPI9_1
-; CHECK-NEXT: strh w11, [sp, #10]
-; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI9_0]
-; CHECK-NEXT: strh w9, [sp, #8]
-; CHECK-NEXT: mov x9, #4
+; CHECK-NEXT: mov z2.b, z0.b[4]
+; CHECK-NEXT: fmov w10, s1
+; CHECK-NEXT: ptrue p0.s, vl4
+; CHECK-NEXT: strh w8, [sp, #14]
+; CHECK-NEXT: fmov w8, s2
+; CHECK-NEXT: strh w9, [sp, #12]
+; CHECK-NEXT: mov z2.b, z0.b[3]
+; CHECK-NEXT: strh w10, [sp, #10]
+; CHECK-NEXT: mov z3.b, z0.b[2]
+; CHECK-NEXT: strh w8, [sp, #8]
+; CHECK-NEXT: mov z4.b, z0.b[1]
; CHECK-NEXT: ldr d1, [sp, #8]
; CHECK-NEXT: fmov w8, s0
-; CHECK-NEXT: ldr q3, [x10, :lo12:.LCPI9_1]
-; CHECK-NEXT: ptrue p0.s, vl4
-; CHECK-NEXT: mov z4.b, z0.b[3]
-; CHECK-NEXT: mov z5.b, z0.b[2]
-; CHECK-NEXT: uunpklo z1.s, z1.h
-; CHECK-NEXT: mov z6.b, z0.b[1]
-; CHECK-NEXT: lsl z1.s, p0/m, z1.s, z2.s
-; CHECK-NEXT: movprfx z0, z1
-; CHECK-NEXT: asr z0.s, p0/m, z0.s, z2.s
+; CHECK-NEXT: mov x9, #4
+; CHECK-NEXT: fmov w10, s2
+; CHECK-NEXT: uunpklo z0.s, z1.h
+; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #31
+; CHECK-NEXT: asr z0.s, p0/m, z0.s, #31
; CHECK-NEXT: cmpne p1.s, p0/z, z0.s, #0
-; CHECK-NEXT: fmov w10, s4
-; CHECK-NEXT: st1w { z3.s }, p1, [x0, x9, lsl #2]
-; CHECK-NEXT: fmov w9, s5
+; CHECK-NEXT: mov z0.s, #0 // =0x0
+; CHECK-NEXT: st1w { z0.s }, p1, [x0, x9, lsl #2]
+; CHECK-NEXT: fmov w9, s3
; CHECK-NEXT: strh w8, [sp]
-; CHECK-NEXT: fmov w8, s6
+; CHECK-NEXT: fmov w8, s4
; CHECK-NEXT: strh w10, [sp, #6]
; CHECK-NEXT: strh w9, [sp, #4]
; CHECK-NEXT: strh w8, [sp, #2]
-; CHECK-NEXT: ldr d0, [sp]
-; CHECK-NEXT: uunpklo z0.s, z0.h
-; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z2.s
-; CHECK-NEXT: asr z0.s, p0/m, z0.s, z2.s
-; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0
-; CHECK-NEXT: st1w { z3.s }, p0, [x0]
+; CHECK-NEXT: ldr d1, [sp]
+; CHECK-NEXT: uunpklo z1.s, z1.h
+; CHECK-NEXT: lsl z1.s, p0/m, z1.s, #31
+; CHECK-NEXT: asr z1.s, p0/m, z1.s, #31
+; CHECK-NEXT: cmpne p0.s, p0/z, z1.s, #0
+; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
call void @llvm.masked.store.v8f32(<8 x float> zeroinitializer, <8 x float>* %dst, i32 8, <8 x i1> %mask)
@@ -312,17 +282,14 @@ define void @masked_store_v8f32(<8 x float>* %dst, <8 x i1> %mask) #0 {
define void @masked_store_v2f64(<2 x double>* %dst, <2 x i1> %mask) #0 {
; CHECK-LABEL: masked_store_v2f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI10_0
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ptrue p0.d, vl2
; CHECK-NEXT: uunpklo z0.d, z0.s
-; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI10_0]
-; CHECK-NEXT: adrp x8, .LCPI10_1
-; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI10_1]
-; CHECK-NEXT: asr z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: lsl z0.d, p0/m, z0.d, #63
+; CHECK-NEXT: asr z0.d, p0/m, z0.d, #63
; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
-; CHECK-NEXT: st1d { z2.d }, p0, [x0]
+; CHECK-NEXT: mov z0.d, #0 // =0x0
+; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
call void @llvm.masked.store.v2f64(<2 x double> zeroinitializer, <2 x double>* %dst, i32 8, <2 x i1> %mask)
ret void
@@ -331,25 +298,22 @@ define void @masked_store_v2f64(<2 x double>* %dst, <2 x i1> %mask) #0 {
define void @masked_store_v4f64(<4 x double>* %dst, <4 x i1> %mask) #0 {
; CHECK-LABEL: masked_store_v4f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI11_0
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ptrue p0.d, vl2
+; CHECK-NEXT: mov x8, #2
; CHECK-NEXT: uunpklo z0.s, z0.h
-; CHECK-NEXT: uunpklo z2.d, z0.s
+; CHECK-NEXT: uunpklo z1.d, z0.s
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI11_0]
-; CHECK-NEXT: adrp x8, .LCPI11_1
; CHECK-NEXT: uunpklo z0.d, z0.s
-; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI11_1]
-; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: mov x8, #2
-; CHECK-NEXT: lsl z2.d, p0/m, z2.d, z1.d
-; CHECK-NEXT: asr z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: asrr z1.d, p0/m, z1.d, z2.d
+; CHECK-NEXT: lsl z1.d, p0/m, z1.d, #63
+; CHECK-NEXT: lsl z0.d, p0/m, z0.d, #63
+; CHECK-NEXT: asr z1.d, p0/m, z1.d, #63
+; CHECK-NEXT: asr z0.d, p0/m, z0.d, #63
; CHECK-NEXT: cmpne p1.d, p0/z, z0.d, #0
+; CHECK-NEXT: mov z0.d, #0 // =0x0
; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0
-; CHECK-NEXT: st1d { z3.d }, p1, [x0, x8, lsl #3]
-; CHECK-NEXT: st1d { z3.d }, p0, [x0]
+; CHECK-NEXT: st1d { z0.d }, p1, [x0, x8, lsl #3]
+; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
call void @llvm.masked.store.v4f64(<4 x double> zeroinitializer, <4 x double>* %dst, i32 8, <4 x i1> %mask)
ret void
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ptest.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ptest.ll
index 0b0d1144d4ae..ca5c1e25781d 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ptest.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ptest.ll
@@ -6,32 +6,30 @@ target triple = "aarch64-unknown-linux-gnu"
define i1 @ptest_v16i1(ptr %a, ptr %b) #0 {
; CHECK-LABEL: ptest_v16i1:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI0_0
+; CHECK-NEXT: ldp q0, q1, [x0, #32]
; CHECK-NEXT: ptrue p0.s, vl4
-; CHECK-NEXT: ldp q1, q2, [x0, #32]
; CHECK-NEXT: ptrue p1.h, vl4
-; CHECK-NEXT: ldp q3, q4, [x0]
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI0_0]
-; CHECK-NEXT: fcmne p2.s, p0/z, z2.s, z0.s
-; CHECK-NEXT: mov z2.s, p2/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: fcmne p2.s, p0/z, z1.s, z0.s
+; CHECK-NEXT: ldp q2, q3, [x0]
+; CHECK-NEXT: fcmne p2.s, p0/z, z1.s, #0.0
; CHECK-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: fcmne p2.s, p0/z, z4.s, z0.s
-; CHECK-NEXT: fcmne p0.s, p0/z, z3.s, z0.s
+; CHECK-NEXT: fcmne p2.s, p0/z, z0.s, #0.0
; CHECK-NEXT: mov z0.s, p2/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h
; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h
-; CHECK-NEXT: mov z3.s, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: splice z1.h, p1, z1.h, z2.h
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT: uzp1 z2.h, z3.h, z3.h
-; CHECK-NEXT: splice z2.h, p1, z2.h, z0.h
-; CHECK-NEXT: uzp1 z1.b, z1.b, z1.b
-; CHECK-NEXT: uzp1 z0.b, z2.b, z2.b
+; CHECK-NEXT: splice z0.h, p1, z0.h, z1.h
+; CHECK-NEXT: fcmne p2.s, p0/z, z3.s, #0.0
+; CHECK-NEXT: fcmne p0.s, p0/z, z2.s, #0.0
+; CHECK-NEXT: mov z2.s, p2/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: mov z3.s, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h
+; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h
+; CHECK-NEXT: splice z3.h, p1, z3.h, z2.h
+; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
+; CHECK-NEXT: uzp1 z1.b, z3.b, z3.b
; CHECK-NEXT: ptrue p0.b, vl8
-; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b
+; CHECK-NEXT: splice z1.b, p0, z1.b, z0.b
; CHECK-NEXT: ptrue p0.b, vl16
-; CHECK-NEXT: orv b0, p0, z0.b
+; CHECK-NEXT: orv b0, p0, z1.b
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
@@ -45,51 +43,49 @@ define i1 @ptest_v16i1(ptr %a, ptr %b) #0 {
define i1 @ptest_or_v16i1(ptr %a, ptr %b) #0 {
; CHECK-LABEL: ptest_or_v16i1:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI1_0
+; CHECK-NEXT: ldp q0, q1, [x0, #32]
; CHECK-NEXT: ptrue p0.s, vl4
-; CHECK-NEXT: ldp q0, q2, [x0, #32]
; CHECK-NEXT: ptrue p1.h, vl4
-; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI1_0]
-; CHECK-NEXT: ldp q3, q4, [x0]
-; CHECK-NEXT: fcmne p2.s, p0/z, z2.s, z1.s
-; CHECK-NEXT: fcmne p3.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: fcmne p3.s, p0/z, z0.s, #0.0
+; CHECK-NEXT: ldp q2, q3, [x0]
+; CHECK-NEXT: fcmne p2.s, p0/z, z1.s, #0.0
+; CHECK-NEXT: mov z1.s, p3/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: mov z0.s, p2/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: mov z2.s, p3/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h
-; CHECK-NEXT: splice z2.h, p1, z2.h, z0.h
-; CHECK-NEXT: ldp q0, q5, [x1, #32]
-; CHECK-NEXT: fcmne p2.s, p0/z, z4.s, z1.s
-; CHECK-NEXT: uzp1 z2.b, z2.b, z2.b
-; CHECK-NEXT: mov z4.s, p2/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: fcmne p2.s, p0/z, z3.s, z1.s
-; CHECK-NEXT: mov z3.s, p2/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: splice z1.h, p1, z1.h, z0.h
+; CHECK-NEXT: fcmne p3.s, p0/z, z2.s, #0.0
+; CHECK-NEXT: mov z4.s, p3/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: fcmne p2.s, p0/z, z3.s, #0.0
; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h
+; CHECK-NEXT: ldp q3, q0, [x1, #32]
+; CHECK-NEXT: mov z2.s, p2/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h
+; CHECK-NEXT: splice z4.h, p1, z4.h, z2.h
+; CHECK-NEXT: fcmne p3.s, p0/z, z3.s, #0.0
+; CHECK-NEXT: mov z3.s, p3/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: ldp q5, q6, [x1]
+; CHECK-NEXT: fcmne p2.s, p0/z, z0.s, #0.0
; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h
-; CHECK-NEXT: fcmne p3.s, p0/z, z0.s, z1.s
-; CHECK-NEXT: splice z3.h, p1, z3.h, z4.h
-; CHECK-NEXT: fcmne p2.s, p0/z, z5.s, z1.s
-; CHECK-NEXT: uzp1 z3.b, z3.b, z3.b
-; CHECK-NEXT: ldp q4, q5, [x1]
; CHECK-NEXT: mov z0.s, p2/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT: fcmne p2.s, p0/z, z5.s, z1.s
-; CHECK-NEXT: fcmne p0.s, p0/z, z4.s, z1.s
-; CHECK-NEXT: mov z5.s, p3/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: splice z3.h, p1, z3.h, z0.h
+; CHECK-NEXT: uzp1 z0.b, z1.b, z1.b
+; CHECK-NEXT: uzp1 z1.b, z4.b, z4.b
+; CHECK-NEXT: fcmne p2.s, p0/z, z6.s, #0.0
+; CHECK-NEXT: fcmne p0.s, p0/z, z5.s, #0.0
+; CHECK-NEXT: mov z2.s, p2/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: mov z4.s, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: uzp1 z5.h, z5.h, z5.h
-; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h
+; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h
; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h
-; CHECK-NEXT: splice z5.h, p1, z5.h, z0.h
-; CHECK-NEXT: splice z4.h, p1, z4.h, z1.h
-; CHECK-NEXT: ptrue p3.b, vl8
-; CHECK-NEXT: uzp1 z0.b, z5.b, z5.b
-; CHECK-NEXT: uzp1 z1.b, z4.b, z4.b
-; CHECK-NEXT: splice z3.b, p3, z3.b, z2.b
-; CHECK-NEXT: splice z1.b, p3, z1.b, z0.b
+; CHECK-NEXT: splice z4.h, p1, z4.h, z2.h
+; CHECK-NEXT: ptrue p0.b, vl8
+; CHECK-NEXT: uzp1 z2.b, z3.b, z3.b
+; CHECK-NEXT: uzp1 z3.b, z4.b, z4.b
+; CHECK-NEXT: splice z1.b, p0, z1.b, z0.b
+; CHECK-NEXT: splice z3.b, p0, z3.b, z2.b
+; CHECK-NEXT: orr z0.d, z1.d, z3.d
; CHECK-NEXT: ptrue p0.b, vl16
-; CHECK-NEXT: orr z0.d, z3.d, z1.d
; CHECK-NEXT: orv b0, p0, z0.b
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: and w0, w8, #0x1
@@ -114,51 +110,49 @@ declare i1 @llvm.vector.reduce.or.i1.v16i1(<16 x i1>)
define i1 @ptest_and_v16i1(ptr %a, ptr %b) #0 {
; CHECK-LABEL: ptest_and_v16i1:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI2_0
+; CHECK-NEXT: ldp q0, q1, [x0, #32]
; CHECK-NEXT: ptrue p0.s, vl4
-; CHECK-NEXT: ldp q0, q2, [x0, #32]
; CHECK-NEXT: ptrue p1.h, vl4
-; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI2_0]
-; CHECK-NEXT: ldp q3, q4, [x0]
-; CHECK-NEXT: fcmne p2.s, p0/z, z2.s, z1.s
-; CHECK-NEXT: fcmne p3.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: fcmne p3.s, p0/z, z0.s, #0.0
+; CHECK-NEXT: ldp q2, q3, [x0]
+; CHECK-NEXT: fcmne p2.s, p0/z, z1.s, #0.0
+; CHECK-NEXT: mov z1.s, p3/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: mov z0.s, p2/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: mov z2.s, p3/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h
-; CHECK-NEXT: splice z2.h, p1, z2.h, z0.h
-; CHECK-NEXT: ldp q0, q5, [x1, #32]
-; CHECK-NEXT: fcmne p2.s, p0/z, z4.s, z1.s
-; CHECK-NEXT: uzp1 z2.b, z2.b, z2.b
-; CHECK-NEXT: mov z4.s, p2/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: fcmne p2.s, p0/z, z3.s, z1.s
-; CHECK-NEXT: mov z3.s, p2/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: splice z1.h, p1, z1.h, z0.h
+; CHECK-NEXT: fcmne p3.s, p0/z, z2.s, #0.0
+; CHECK-NEXT: mov z4.s, p3/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: fcmne p2.s, p0/z, z3.s, #0.0
; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h
+; CHECK-NEXT: ldp q3, q0, [x1, #32]
+; CHECK-NEXT: mov z2.s, p2/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h
+; CHECK-NEXT: splice z4.h, p1, z4.h, z2.h
+; CHECK-NEXT: fcmne p3.s, p0/z, z3.s, #0.0
+; CHECK-NEXT: mov z3.s, p3/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: ldp q5, q6, [x1]
+; CHECK-NEXT: fcmne p2.s, p0/z, z0.s, #0.0
; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h
-; CHECK-NEXT: fcmne p3.s, p0/z, z0.s, z1.s
-; CHECK-NEXT: splice z3.h, p1, z3.h, z4.h
-; CHECK-NEXT: fcmne p2.s, p0/z, z5.s, z1.s
-; CHECK-NEXT: uzp1 z3.b, z3.b, z3.b
-; CHECK-NEXT: ldp q4, q5, [x1]
; CHECK-NEXT: mov z0.s, p2/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT: fcmne p2.s, p0/z, z5.s, z1.s
-; CHECK-NEXT: fcmne p0.s, p0/z, z4.s, z1.s
-; CHECK-NEXT: mov z5.s, p3/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: splice z3.h, p1, z3.h, z0.h
+; CHECK-NEXT: uzp1 z0.b, z1.b, z1.b
+; CHECK-NEXT: uzp1 z1.b, z4.b, z4.b
+; CHECK-NEXT: fcmne p2.s, p0/z, z6.s, #0.0
+; CHECK-NEXT: fcmne p0.s, p0/z, z5.s, #0.0
+; CHECK-NEXT: mov z2.s, p2/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: mov z4.s, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: uzp1 z5.h, z5.h, z5.h
-; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h
+; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h
; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h
-; CHECK-NEXT: splice z5.h, p1, z5.h, z0.h
-; CHECK-NEXT: splice z4.h, p1, z4.h, z1.h
-; CHECK-NEXT: ptrue p3.b, vl8
-; CHECK-NEXT: uzp1 z0.b, z5.b, z5.b
-; CHECK-NEXT: uzp1 z1.b, z4.b, z4.b
-; CHECK-NEXT: splice z3.b, p3, z3.b, z2.b
-; CHECK-NEXT: splice z1.b, p3, z1.b, z0.b
+; CHECK-NEXT: splice z4.h, p1, z4.h, z2.h
+; CHECK-NEXT: ptrue p0.b, vl8
+; CHECK-NEXT: uzp1 z2.b, z3.b, z3.b
+; CHECK-NEXT: uzp1 z3.b, z4.b, z4.b
+; CHECK-NEXT: splice z1.b, p0, z1.b, z0.b
+; CHECK-NEXT: splice z3.b, p0, z3.b, z2.b
+; CHECK-NEXT: and z0.d, z1.d, z3.d
; CHECK-NEXT: ptrue p0.b, vl16
-; CHECK-NEXT: and z0.d, z3.d, z1.d
; CHECK-NEXT: andv b0, p0, z0.b
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: and w0, w8, #0x1
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll
index 1fba00d9f7b6..f41e2ebb0a6e 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll
@@ -10,12 +10,10 @@ target triple = "aarch64-unknown-linux-gnu"
define <4 x i8> @bitreverse_v4i8(<4 x i8> %op) #0 {
; CHECK-LABEL: bitreverse_v4i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI0_0
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ptrue p0.h, vl4
; CHECK-NEXT: rbit z0.h, p0/m, z0.h
-; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI0_0]
-; CHECK-NEXT: lsr z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: lsr z0.h, p0/m, z0.h, #8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
%res = call <4 x i8> @llvm.bitreverse.v4i8(<4 x i8> %op)
@@ -64,12 +62,10 @@ define void @bitreverse_v32i8(<32 x i8>* %a) #0 {
define <2 x i16> @bitreverse_v2i16(<2 x i16> %op) #0 {
; CHECK-LABEL: bitreverse_v2i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI4_0
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ptrue p0.s, vl2
; CHECK-NEXT: rbit z0.s, p0/m, z0.s
-; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI4_0]
-; CHECK-NEXT: lsr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: lsr z0.s, p0/m, z0.s, #16
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
%res = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> %op)
@@ -200,28 +196,21 @@ define void @bitreverse_v4i64(<4 x i64>* %a) #0 {
define <2 x i16> @bswap_v2i16(<2 x i16> %op) #0 {
; CHECK-LABEL: bswap_v2i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI14_0
-; CHECK-NEXT: adrp x10, .LCPI14_2
-; CHECK-NEXT: adrp x9, .LCPI14_1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ptrue p0.s, vl2
-; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI14_0]
-; CHECK-NEXT: adrp x8, .LCPI14_3
-; CHECK-NEXT: ldr d3, [x10, :lo12:.LCPI14_2]
-; CHECK-NEXT: movprfx z4, z0
-; CHECK-NEXT: lsr z4.s, p0/m, z4.s, z1.s
-; CHECK-NEXT: ldr d2, [x9, :lo12:.LCPI14_1]
-; CHECK-NEXT: movprfx z5, z0
-; CHECK-NEXT: lsr z5.s, p0/m, z5.s, z2.s
-; CHECK-NEXT: lslr z1.s, p0/m, z1.s, z0.s
-; CHECK-NEXT: and z0.d, z0.d, z3.d
-; CHECK-NEXT: and z3.d, z5.d, z3.d
-; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z2.s
-; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI14_3]
-; CHECK-NEXT: orr z3.d, z3.d, z4.d
-; CHECK-NEXT: orr z0.d, z1.d, z0.d
-; CHECK-NEXT: orr z0.d, z0.d, z3.d
-; CHECK-NEXT: lsr z0.s, p0/m, z0.s, z2.s
+; CHECK-NEXT: movprfx z1, z0
+; CHECK-NEXT: lsr z1.s, p0/m, z1.s, #24
+; CHECK-NEXT: movprfx z2, z0
+; CHECK-NEXT: lsr z2.s, p0/m, z2.s, #8
+; CHECK-NEXT: movprfx z3, z0
+; CHECK-NEXT: lsl z3.s, p0/m, z3.s, #24
+; CHECK-NEXT: and z0.s, z0.s, #0xff00
+; CHECK-NEXT: and z2.s, z2.s, #0xff00
+; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #8
+; CHECK-NEXT: orr z1.d, z2.d, z1.d
+; CHECK-NEXT: orr z0.d, z3.d, z0.d
+; CHECK-NEXT: orr z0.d, z0.d, z1.d
+; CHECK-NEXT: lsr z0.s, p0/m, z0.s, #16
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
%res = call <2 x i16> @llvm.bswap.v2i16(<2 x i16> %op)
@@ -231,14 +220,12 @@ define <2 x i16> @bswap_v2i16(<2 x i16> %op) #0 {
define <4 x i16> @bswap_v4i16(<4 x i16> %op) #0 {
; CHECK-LABEL: bswap_v4i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI15_0
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI15_0]
-; CHECK-NEXT: movprfx z2, z0
-; CHECK-NEXT: lsr z2.h, p0/m, z2.h, z1.h
-; CHECK-NEXT: lsl z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: orr z0.d, z0.d, z2.d
+; CHECK-NEXT: movprfx z1, z0
+; CHECK-NEXT: lsr z1.h, p0/m, z1.h, #8
+; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #8
+; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
%res = call <4 x i16> @llvm.bswap.v4i16(<4 x i16> %op)
@@ -248,14 +235,12 @@ define <4 x i16> @bswap_v4i16(<4 x i16> %op) #0 {
define <8 x i16> @bswap_v8i16(<8 x i16> %op) #0 {
; CHECK-LABEL: bswap_v8i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI16_0
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: ptrue p0.h, vl8
-; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI16_0]
-; CHECK-NEXT: movprfx z2, z0
-; CHECK-NEXT: lsr z2.h, p0/m, z2.h, z1.h
-; CHECK-NEXT: lsl z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: orr z0.d, z0.d, z2.d
+; CHECK-NEXT: movprfx z1, z0
+; CHECK-NEXT: lsr z1.h, p0/m, z1.h, #8
+; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #8
+; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
%res = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %op)
@@ -265,18 +250,16 @@ define <8 x i16> @bswap_v8i16(<8 x i16> %op) #0 {
define void @bswap_v16i16(<16 x i16>* %a) #0 {
; CHECK-LABEL: bswap_v16i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI17_0
+; CHECK-NEXT: ldp q1, q0, [x0]
; CHECK-NEXT: ptrue p0.h, vl8
-; CHECK-NEXT: ldp q2, q0, [x0]
-; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI17_0]
-; CHECK-NEXT: movprfx z3, z0
-; CHECK-NEXT: lsr z3.h, p0/m, z3.h, z1.h
-; CHECK-NEXT: movprfx z4, z2
-; CHECK-NEXT: lsr z4.h, p0/m, z4.h, z1.h
-; CHECK-NEXT: lsl z2.h, p0/m, z2.h, z1.h
-; CHECK-NEXT: lsl z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: orr z1.d, z2.d, z4.d
-; CHECK-NEXT: orr z0.d, z0.d, z3.d
+; CHECK-NEXT: movprfx z2, z0
+; CHECK-NEXT: lsr z2.h, p0/m, z2.h, #8
+; CHECK-NEXT: movprfx z3, z1
+; CHECK-NEXT: lsr z3.h, p0/m, z3.h, #8
+; CHECK-NEXT: lsl z1.h, p0/m, z1.h, #8
+; CHECK-NEXT: orr z1.d, z1.d, z3.d
+; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #8
+; CHECK-NEXT: orr z0.d, z0.d, z2.d
; CHECK-NEXT: stp q1, q0, [x0]
; CHECK-NEXT: ret
%op = load <16 x i16>, <16 x i16>* %a
@@ -288,26 +271,20 @@ define void @bswap_v16i16(<16 x i16>* %a) #0 {
define <2 x i32> @bswap_v2i32(<2 x i32> %op) #0 {
; CHECK-LABEL: bswap_v2i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI18_0
-; CHECK-NEXT: adrp x10, .LCPI18_2
-; CHECK-NEXT: adrp x9, .LCPI18_1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ptrue p0.s, vl2
-; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI18_0]
-; CHECK-NEXT: movprfx z4, z0
-; CHECK-NEXT: lsr z4.s, p0/m, z4.s, z1.s
-; CHECK-NEXT: ldr d3, [x10, :lo12:.LCPI18_2]
-; CHECK-NEXT: ldr d2, [x9, :lo12:.LCPI18_1]
-; CHECK-NEXT: movprfx z5, z0
-; CHECK-NEXT: lsr z5.s, p0/m, z5.s, z2.s
-; CHECK-NEXT: and z5.d, z5.d, z3.d
-; CHECK-NEXT: and z3.d, z0.d, z3.d
-; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: movprfx z1, z3
-; CHECK-NEXT: lsl z1.s, p0/m, z1.s, z2.s
-; CHECK-NEXT: orr z2.d, z5.d, z4.d
+; CHECK-NEXT: movprfx z1, z0
+; CHECK-NEXT: lsr z1.s, p0/m, z1.s, #24
+; CHECK-NEXT: movprfx z2, z0
+; CHECK-NEXT: lsr z2.s, p0/m, z2.s, #8
+; CHECK-NEXT: movprfx z3, z0
+; CHECK-NEXT: lsl z3.s, p0/m, z3.s, #24
+; CHECK-NEXT: and z0.s, z0.s, #0xff00
+; CHECK-NEXT: and z2.s, z2.s, #0xff00
+; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #8
+; CHECK-NEXT: orr z1.d, z2.d, z1.d
+; CHECK-NEXT: orr z0.d, z3.d, z0.d
; CHECK-NEXT: orr z0.d, z0.d, z1.d
-; CHECK-NEXT: orr z0.d, z0.d, z2.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
%res = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %op)
@@ -317,26 +294,20 @@ define <2 x i32> @bswap_v2i32(<2 x i32> %op) #0 {
define <4 x i32> @bswap_v4i32(<4 x i32> %op) #0 {
; CHECK-LABEL: bswap_v4i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI19_0
-; CHECK-NEXT: adrp x10, .LCPI19_2
-; CHECK-NEXT: adrp x9, .LCPI19_1
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: ptrue p0.s, vl4
-; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI19_0]
-; CHECK-NEXT: movprfx z4, z0
-; CHECK-NEXT: lsr z4.s, p0/m, z4.s, z1.s
-; CHECK-NEXT: ldr q3, [x10, :lo12:.LCPI19_2]
-; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI19_1]
-; CHECK-NEXT: movprfx z5, z0
-; CHECK-NEXT: lsr z5.s, p0/m, z5.s, z2.s
-; CHECK-NEXT: and z5.d, z5.d, z3.d
-; CHECK-NEXT: and z3.d, z0.d, z3.d
-; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: movprfx z1, z3
-; CHECK-NEXT: lsl z1.s, p0/m, z1.s, z2.s
-; CHECK-NEXT: orr z2.d, z5.d, z4.d
+; CHECK-NEXT: movprfx z1, z0
+; CHECK-NEXT: lsr z1.s, p0/m, z1.s, #24
+; CHECK-NEXT: movprfx z2, z0
+; CHECK-NEXT: lsr z2.s, p0/m, z2.s, #8
+; CHECK-NEXT: movprfx z3, z0
+; CHECK-NEXT: lsl z3.s, p0/m, z3.s, #24
+; CHECK-NEXT: and z0.s, z0.s, #0xff00
+; CHECK-NEXT: and z2.s, z2.s, #0xff00
+; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #8
+; CHECK-NEXT: orr z1.d, z2.d, z1.d
+; CHECK-NEXT: orr z0.d, z3.d, z0.d
; CHECK-NEXT: orr z0.d, z0.d, z1.d
-; CHECK-NEXT: orr z0.d, z0.d, z2.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
%res = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %op)
@@ -346,38 +317,33 @@ define <4 x i32> @bswap_v4i32(<4 x i32> %op) #0 {
define void @bswap_v8i32(<8 x i32>* %a) #0 {
; CHECK-LABEL: bswap_v8i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI20_0
-; CHECK-NEXT: adrp x9, .LCPI20_1
-; CHECK-NEXT: ldp q4, q1, [x0]
+; CHECK-NEXT: ldp q1, q0, [x0]
; CHECK-NEXT: ptrue p0.s, vl4
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI20_0]
-; CHECK-NEXT: adrp x8, .LCPI20_2
-; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI20_1]
+; CHECK-NEXT: movprfx z2, z0
+; CHECK-NEXT: lsr z2.s, p0/m, z2.s, #24
+; CHECK-NEXT: movprfx z3, z0
+; CHECK-NEXT: lsr z3.s, p0/m, z3.s, #8
+; CHECK-NEXT: movprfx z4, z1
+; CHECK-NEXT: lsr z4.s, p0/m, z4.s, #24
; CHECK-NEXT: movprfx z5, z1
-; CHECK-NEXT: lsr z5.s, p0/m, z5.s, z0.s
-; CHECK-NEXT: movprfx z6, z1
-; CHECK-NEXT: lsr z6.s, p0/m, z6.s, z2.s
-; CHECK-NEXT: movprfx z7, z1
-; CHECK-NEXT: lsl z7.s, p0/m, z7.s, z0.s
-; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI20_2]
-; CHECK-NEXT: movprfx z16, z4
-; CHECK-NEXT: lsr z16.s, p0/m, z16.s, z2.s
-; CHECK-NEXT: and z1.d, z1.d, z3.d
-; CHECK-NEXT: and z6.d, z6.d, z3.d
-; CHECK-NEXT: and z16.d, z16.d, z3.d
-; CHECK-NEXT: and z3.d, z4.d, z3.d
-; CHECK-NEXT: orr z5.d, z6.d, z5.d
-; CHECK-NEXT: movprfx z6, z4
-; CHECK-NEXT: lsr z6.s, p0/m, z6.s, z0.s
-; CHECK-NEXT: lslr z0.s, p0/m, z0.s, z4.s
-; CHECK-NEXT: lsl z1.s, p0/m, z1.s, z2.s
-; CHECK-NEXT: lslr z2.s, p0/m, z2.s, z3.s
-; CHECK-NEXT: orr z3.d, z16.d, z6.d
+; CHECK-NEXT: lsr z5.s, p0/m, z5.s, #8
+; CHECK-NEXT: and z3.s, z3.s, #0xff00
+; CHECK-NEXT: and z5.s, z5.s, #0xff00
+; CHECK-NEXT: orr z2.d, z3.d, z2.d
+; CHECK-NEXT: movprfx z3, z0
+; CHECK-NEXT: lsl z3.s, p0/m, z3.s, #24
+; CHECK-NEXT: orr z4.d, z5.d, z4.d
+; CHECK-NEXT: movprfx z5, z1
+; CHECK-NEXT: lsl z5.s, p0/m, z5.s, #24
+; CHECK-NEXT: and z1.s, z1.s, #0xff00
+; CHECK-NEXT: and z0.s, z0.s, #0xff00
+; CHECK-NEXT: lsl z1.s, p0/m, z1.s, #8
+; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #8
+; CHECK-NEXT: orr z1.d, z5.d, z1.d
+; CHECK-NEXT: orr z0.d, z3.d, z0.d
+; CHECK-NEXT: orr z1.d, z1.d, z4.d
; CHECK-NEXT: orr z0.d, z0.d, z2.d
-; CHECK-NEXT: orr z1.d, z7.d, z1.d
-; CHECK-NEXT: orr z0.d, z0.d, z3.d
-; CHECK-NEXT: orr z1.d, z1.d, z5.d
-; CHECK-NEXT: stp q0, q1, [x0]
+; CHECK-NEXT: stp q1, q0, [x0]
; CHECK-NEXT: ret
%op = load <8 x i32>, <8 x i32>* %a
%res = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %op)
@@ -388,46 +354,35 @@ define void @bswap_v8i32(<8 x i32>* %a) #0 {
define <1 x i64> @bswap_v1i64(<1 x i64> %op) #0 {
; CHECK-LABEL: bswap_v1i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #56
-; CHECK-NEXT: mov w9, #40
-; CHECK-NEXT: mov w10, #65280
-; CHECK-NEXT: mov w11, #24
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ptrue p0.d, vl1
-; CHECK-NEXT: fmov d1, x8
-; CHECK-NEXT: mov w8, #16711680
-; CHECK-NEXT: fmov d2, x9
-; CHECK-NEXT: mov w9, #8
-; CHECK-NEXT: fmov d3, x10
-; CHECK-NEXT: movprfx z7, z0
-; CHECK-NEXT: lsr z7.d, p0/m, z7.d, z1.d
-; CHECK-NEXT: fmov d5, x8
-; CHECK-NEXT: mov w8, #-16777216
-; CHECK-NEXT: movprfx z16, z0
-; CHECK-NEXT: lsr z16.d, p0/m, z16.d, z2.d
-; CHECK-NEXT: fmov d4, x11
-; CHECK-NEXT: fmov d6, x9
-; CHECK-NEXT: and z16.d, z16.d, z3.d
-; CHECK-NEXT: fmov d17, x8
-; CHECK-NEXT: orr z7.d, z16.d, z7.d
-; CHECK-NEXT: movprfx z16, z0
-; CHECK-NEXT: lsr z16.d, p0/m, z16.d, z4.d
-; CHECK-NEXT: movprfx z18, z0
-; CHECK-NEXT: lsr z18.d, p0/m, z18.d, z6.d
-; CHECK-NEXT: and z16.d, z16.d, z5.d
-; CHECK-NEXT: and z5.d, z0.d, z5.d
-; CHECK-NEXT: and z18.d, z18.d, z17.d
-; CHECK-NEXT: and z17.d, z0.d, z17.d
-; CHECK-NEXT: lslr z6.d, p0/m, z6.d, z17.d
-; CHECK-NEXT: lslr z4.d, p0/m, z4.d, z5.d
-; CHECK-NEXT: and z3.d, z0.d, z3.d
-; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: orr z16.d, z18.d, z16.d
-; CHECK-NEXT: movprfx z1, z3
-; CHECK-NEXT: lsl z1.d, p0/m, z1.d, z2.d
-; CHECK-NEXT: orr z2.d, z4.d, z6.d
-; CHECK-NEXT: orr z0.d, z0.d, z1.d
-; CHECK-NEXT: orr z1.d, z16.d, z7.d
+; CHECK-NEXT: movprfx z1, z0
+; CHECK-NEXT: lsr z1.d, p0/m, z1.d, #56
+; CHECK-NEXT: movprfx z2, z0
+; CHECK-NEXT: lsr z2.d, p0/m, z2.d, #40
+; CHECK-NEXT: movprfx z3, z0
+; CHECK-NEXT: lsr z3.d, p0/m, z3.d, #24
+; CHECK-NEXT: movprfx z4, z0
+; CHECK-NEXT: lsr z4.d, p0/m, z4.d, #8
+; CHECK-NEXT: mov z5.d, z0.d
+; CHECK-NEXT: and z2.d, z2.d, #0xff00
+; CHECK-NEXT: and z3.d, z3.d, #0xff0000
+; CHECK-NEXT: orr z1.d, z2.d, z1.d
+; CHECK-NEXT: mov z2.d, z0.d
+; CHECK-NEXT: and z4.d, z4.d, #0xff000000
+; CHECK-NEXT: and z5.d, z5.d, #0xff000000
+; CHECK-NEXT: orr z3.d, z4.d, z3.d
+; CHECK-NEXT: and z2.d, z2.d, #0xff0000
+; CHECK-NEXT: movprfx z4, z5
+; CHECK-NEXT: lsl z4.d, p0/m, z4.d, #8
+; CHECK-NEXT: movprfx z5, z0
+; CHECK-NEXT: lsl z5.d, p0/m, z5.d, #56
+; CHECK-NEXT: and z0.d, z0.d, #0xff00
+; CHECK-NEXT: lsl z2.d, p0/m, z2.d, #24
+; CHECK-NEXT: lsl z0.d, p0/m, z0.d, #40
+; CHECK-NEXT: orr z2.d, z2.d, z4.d
+; CHECK-NEXT: orr z0.d, z5.d, z0.d
+; CHECK-NEXT: orr z1.d, z3.d, z1.d
; CHECK-NEXT: orr z0.d, z0.d, z2.d
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
@@ -439,46 +394,35 @@ define <1 x i64> @bswap_v1i64(<1 x i64> %op) #0 {
define <2 x i64> @bswap_v2i64(<2 x i64> %op) #0 {
; CHECK-LABEL: bswap_v2i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI22_0
-; CHECK-NEXT: adrp x9, .LCPI22_1
-; CHECK-NEXT: adrp x10, .LCPI22_2
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: ptrue p0.d, vl2
-; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI22_0]
-; CHECK-NEXT: adrp x8, .LCPI22_3
-; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI22_1]
-; CHECK-NEXT: adrp x9, .LCPI22_4
-; CHECK-NEXT: ldr q3, [x10, :lo12:.LCPI22_2]
-; CHECK-NEXT: adrp x10, .LCPI22_5
-; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI22_3]
-; CHECK-NEXT: adrp x8, .LCPI22_6
-; CHECK-NEXT: ldr q5, [x9, :lo12:.LCPI22_4]
-; CHECK-NEXT: movprfx z7, z0
-; CHECK-NEXT: lsr z7.d, p0/m, z7.d, z1.d
-; CHECK-NEXT: movprfx z16, z0
-; CHECK-NEXT: lsr z16.d, p0/m, z16.d, z2.d
-; CHECK-NEXT: ldr q6, [x10, :lo12:.LCPI22_5]
-; CHECK-NEXT: ldr q17, [x8, :lo12:.LCPI22_6]
-; CHECK-NEXT: and z16.d, z16.d, z3.d
-; CHECK-NEXT: orr z7.d, z16.d, z7.d
-; CHECK-NEXT: movprfx z16, z0
-; CHECK-NEXT: lsr z16.d, p0/m, z16.d, z4.d
-; CHECK-NEXT: movprfx z18, z0
-; CHECK-NEXT: lsr z18.d, p0/m, z18.d, z6.d
-; CHECK-NEXT: and z16.d, z16.d, z5.d
-; CHECK-NEXT: and z18.d, z18.d, z17.d
-; CHECK-NEXT: and z17.d, z0.d, z17.d
-; CHECK-NEXT: and z5.d, z0.d, z5.d
-; CHECK-NEXT: lslr z6.d, p0/m, z6.d, z17.d
-; CHECK-NEXT: lslr z4.d, p0/m, z4.d, z5.d
-; CHECK-NEXT: and z3.d, z0.d, z3.d
-; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: orr z16.d, z18.d, z16.d
-; CHECK-NEXT: movprfx z1, z3
-; CHECK-NEXT: lsl z1.d, p0/m, z1.d, z2.d
-; CHECK-NEXT: orr z2.d, z4.d, z6.d
-; CHECK-NEXT: orr z0.d, z0.d, z1.d
-; CHECK-NEXT: orr z1.d, z16.d, z7.d
+; CHECK-NEXT: movprfx z1, z0
+; CHECK-NEXT: lsr z1.d, p0/m, z1.d, #56
+; CHECK-NEXT: movprfx z2, z0
+; CHECK-NEXT: lsr z2.d, p0/m, z2.d, #40
+; CHECK-NEXT: movprfx z3, z0
+; CHECK-NEXT: lsr z3.d, p0/m, z3.d, #24
+; CHECK-NEXT: movprfx z4, z0
+; CHECK-NEXT: lsr z4.d, p0/m, z4.d, #8
+; CHECK-NEXT: mov z5.d, z0.d
+; CHECK-NEXT: and z2.d, z2.d, #0xff00
+; CHECK-NEXT: and z3.d, z3.d, #0xff0000
+; CHECK-NEXT: orr z1.d, z2.d, z1.d
+; CHECK-NEXT: mov z2.d, z0.d
+; CHECK-NEXT: and z4.d, z4.d, #0xff000000
+; CHECK-NEXT: and z5.d, z5.d, #0xff000000
+; CHECK-NEXT: orr z3.d, z4.d, z3.d
+; CHECK-NEXT: and z2.d, z2.d, #0xff0000
+; CHECK-NEXT: movprfx z4, z5
+; CHECK-NEXT: lsl z4.d, p0/m, z4.d, #8
+; CHECK-NEXT: movprfx z5, z0
+; CHECK-NEXT: lsl z5.d, p0/m, z5.d, #56
+; CHECK-NEXT: and z0.d, z0.d, #0xff00
+; CHECK-NEXT: lsl z2.d, p0/m, z2.d, #24
+; CHECK-NEXT: lsl z0.d, p0/m, z0.d, #40
+; CHECK-NEXT: orr z2.d, z2.d, z4.d
+; CHECK-NEXT: orr z0.d, z5.d, z0.d
+; CHECK-NEXT: orr z1.d, z3.d, z1.d
; CHECK-NEXT: orr z0.d, z0.d, z2.d
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
@@ -490,75 +434,67 @@ define <2 x i64> @bswap_v2i64(<2 x i64> %op) #0 {
define void @bswap_v4i64(<4 x i64>* %a) #0 {
; CHECK-LABEL: bswap_v4i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI23_0
-; CHECK-NEXT: adrp x9, .LCPI23_1
-; CHECK-NEXT: adrp x10, .LCPI23_3
+; CHECK-NEXT: ldp q1, q0, [x0]
; CHECK-NEXT: ptrue p0.d, vl2
-; CHECK-NEXT: ldp q1, q2, [x0]
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI23_0]
-; CHECK-NEXT: adrp x8, .LCPI23_2
-; CHECK-NEXT: ldr q3, [x9, :lo12:.LCPI23_1]
-; CHECK-NEXT: adrp x9, .LCPI23_4
-; CHECK-NEXT: ldr q5, [x10, :lo12:.LCPI23_3]
-; CHECK-NEXT: adrp x10, .LCPI23_6
-; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI23_2]
-; CHECK-NEXT: adrp x8, .LCPI23_5
-; CHECK-NEXT: ldr q6, [x9, :lo12:.LCPI23_4]
-; CHECK-NEXT: movprfx z16, z2
-; CHECK-NEXT: lsr z16.d, p0/m, z16.d, z3.d
-; CHECK-NEXT: ldr q17, [x10, :lo12:.LCPI23_6]
-; CHECK-NEXT: movprfx z18, z2
-; CHECK-NEXT: lsr z18.d, p0/m, z18.d, z0.d
-; CHECK-NEXT: ldr q7, [x8, :lo12:.LCPI23_5]
-; CHECK-NEXT: movprfx z19, z2
-; CHECK-NEXT: lsr z19.d, p0/m, z19.d, z5.d
-; CHECK-NEXT: movprfx z20, z2
-; CHECK-NEXT: lsr z20.d, p0/m, z20.d, z7.d
-; CHECK-NEXT: and z16.d, z16.d, z4.d
-; CHECK-NEXT: and z19.d, z19.d, z6.d
-; CHECK-NEXT: and z20.d, z20.d, z17.d
-; CHECK-NEXT: orr z16.d, z16.d, z18.d
-; CHECK-NEXT: orr z18.d, z20.d, z19.d
-; CHECK-NEXT: and z19.d, z2.d, z17.d
-; CHECK-NEXT: and z20.d, z2.d, z6.d
-; CHECK-NEXT: lsl z19.d, p0/m, z19.d, z7.d
-; CHECK-NEXT: lsl z20.d, p0/m, z20.d, z5.d
-; CHECK-NEXT: orr z16.d, z18.d, z16.d
-; CHECK-NEXT: orr z18.d, z20.d, z19.d
-; CHECK-NEXT: movprfx z19, z2
-; CHECK-NEXT: lsl z19.d, p0/m, z19.d, z0.d
-; CHECK-NEXT: and z2.d, z2.d, z4.d
-; CHECK-NEXT: movprfx z20, z1
-; CHECK-NEXT: lsr z20.d, p0/m, z20.d, z3.d
-; CHECK-NEXT: lsl z2.d, p0/m, z2.d, z3.d
-; CHECK-NEXT: movprfx z21, z1
-; CHECK-NEXT: lsr z21.d, p0/m, z21.d, z0.d
-; CHECK-NEXT: and z20.d, z20.d, z4.d
-; CHECK-NEXT: orr z2.d, z19.d, z2.d
-; CHECK-NEXT: orr z19.d, z20.d, z21.d
-; CHECK-NEXT: movprfx z20, z1
-; CHECK-NEXT: lsr z20.d, p0/m, z20.d, z5.d
-; CHECK-NEXT: movprfx z21, z1
-; CHECK-NEXT: lsr z21.d, p0/m, z21.d, z7.d
-; CHECK-NEXT: and z20.d, z20.d, z6.d
-; CHECK-NEXT: and z21.d, z21.d, z17.d
-; CHECK-NEXT: and z17.d, z1.d, z17.d
-; CHECK-NEXT: and z6.d, z1.d, z6.d
-; CHECK-NEXT: lslr z7.d, p0/m, z7.d, z17.d
-; CHECK-NEXT: lslr z5.d, p0/m, z5.d, z6.d
-; CHECK-NEXT: lslr z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: orr z20.d, z21.d, z20.d
-; CHECK-NEXT: and z4.d, z1.d, z4.d
-; CHECK-NEXT: movprfx z1, z4
-; CHECK-NEXT: lsl z1.d, p0/m, z1.d, z3.d
-; CHECK-NEXT: orr z3.d, z5.d, z7.d
-; CHECK-NEXT: orr z0.d, z0.d, z1.d
-; CHECK-NEXT: orr z1.d, z20.d, z19.d
+; CHECK-NEXT: movprfx z2, z0
+; CHECK-NEXT: lsr z2.d, p0/m, z2.d, #56
+; CHECK-NEXT: movprfx z3, z0
+; CHECK-NEXT: lsr z3.d, p0/m, z3.d, #40
+; CHECK-NEXT: movprfx z4, z0
+; CHECK-NEXT: lsr z4.d, p0/m, z4.d, #24
+; CHECK-NEXT: movprfx z5, z0
+; CHECK-NEXT: lsr z5.d, p0/m, z5.d, #8
+; CHECK-NEXT: and z3.d, z3.d, #0xff00
+; CHECK-NEXT: and z4.d, z4.d, #0xff0000
+; CHECK-NEXT: and z5.d, z5.d, #0xff000000
+; CHECK-NEXT: orr z2.d, z3.d, z2.d
+; CHECK-NEXT: orr z3.d, z5.d, z4.d
+; CHECK-NEXT: mov z6.d, z0.d
+; CHECK-NEXT: mov z7.d, z0.d
+; CHECK-NEXT: movprfx z16, z0
+; CHECK-NEXT: lsl z16.d, p0/m, z16.d, #56
+; CHECK-NEXT: orr z2.d, z3.d, z2.d
+; CHECK-NEXT: and z6.d, z6.d, #0xff000000
+; CHECK-NEXT: and z7.d, z7.d, #0xff0000
+; CHECK-NEXT: movprfx z3, z6
+; CHECK-NEXT: lsl z3.d, p0/m, z3.d, #8
+; CHECK-NEXT: movprfx z4, z7
+; CHECK-NEXT: lsl z4.d, p0/m, z4.d, #24
+; CHECK-NEXT: orr z3.d, z4.d, z3.d
+; CHECK-NEXT: movprfx z4, z1
+; CHECK-NEXT: lsr z4.d, p0/m, z4.d, #40
+; CHECK-NEXT: and z0.d, z0.d, #0xff00
+; CHECK-NEXT: movprfx z5, z1
+; CHECK-NEXT: lsr z5.d, p0/m, z5.d, #56
+; CHECK-NEXT: and z4.d, z4.d, #0xff00
+; CHECK-NEXT: lsl z0.d, p0/m, z0.d, #40
+; CHECK-NEXT: orr z4.d, z4.d, z5.d
+; CHECK-NEXT: movprfx z5, z1
+; CHECK-NEXT: lsr z5.d, p0/m, z5.d, #24
+; CHECK-NEXT: movprfx z7, z1
+; CHECK-NEXT: lsr z7.d, p0/m, z7.d, #8
+; CHECK-NEXT: orr z0.d, z16.d, z0.d
+; CHECK-NEXT: mov z6.d, z1.d
+; CHECK-NEXT: mov z16.d, z1.d
+; CHECK-NEXT: and z5.d, z5.d, #0xff0000
+; CHECK-NEXT: and z7.d, z7.d, #0xff000000
+; CHECK-NEXT: orr z5.d, z7.d, z5.d
+; CHECK-NEXT: and z6.d, z6.d, #0xff000000
+; CHECK-NEXT: and z16.d, z16.d, #0xff0000
+; CHECK-NEXT: movprfx z7, z1
+; CHECK-NEXT: lsl z7.d, p0/m, z7.d, #56
+; CHECK-NEXT: and z1.d, z1.d, #0xff00
+; CHECK-NEXT: lsl z6.d, p0/m, z6.d, #8
+; CHECK-NEXT: lsl z16.d, p0/m, z16.d, #24
+; CHECK-NEXT: lsl z1.d, p0/m, z1.d, #40
+; CHECK-NEXT: orr z6.d, z16.d, z6.d
+; CHECK-NEXT: orr z1.d, z7.d, z1.d
+; CHECK-NEXT: orr z4.d, z5.d, z4.d
+; CHECK-NEXT: orr z1.d, z1.d, z6.d
; CHECK-NEXT: orr z0.d, z0.d, z3.d
-; CHECK-NEXT: orr z2.d, z2.d, z18.d
-; CHECK-NEXT: orr z0.d, z0.d, z1.d
-; CHECK-NEXT: orr z1.d, z2.d, z16.d
-; CHECK-NEXT: stp q0, q1, [x0]
+; CHECK-NEXT: orr z1.d, z1.d, z4.d
+; CHECK-NEXT: orr z0.d, z0.d, z2.d
+; CHECK-NEXT: stp q1, q0, [x0]
; CHECK-NEXT: ret
%op = load <4 x i64>, <4 x i64>* %a
%res = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %op)
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-sdiv-pow2.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-sdiv-pow2.ll
index ac941fb3a162..9045a383f2dc 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-sdiv-pow2.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-sdiv-pow2.ll
@@ -6,12 +6,10 @@ target triple = "aarch64-unknown-linux-gnu"
define <4 x i8> @sdiv_v4i8(<4 x i8> %op1) #0 {
; CHECK-LABEL: sdiv_v4i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI0_0
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI0_0]
-; CHECK-NEXT: lsl z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: asr z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #8
+; CHECK-NEXT: asr z0.h, p0/m, z0.h, #8
; CHECK-NEXT: asrd z0.h, p0/m, z0.h, #5
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
@@ -61,12 +59,10 @@ define void @sdiv_v32i8(<32 x i8>* %a) #0 {
define <2 x i16> @sdiv_v2i16(<2 x i16> %op1) #0 {
; CHECK-LABEL: sdiv_v2i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI4_0
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: ptrue p0.s, vl2
-; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI4_0]
-; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: asr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #16
+; CHECK-NEXT: asr z0.s, p0/m, z0.s, #16
; CHECK-NEXT: asrd z0.s, p0/m, z0.s, #5
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-splat-vector.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-splat-vector.ll
index c328e3124ced..9c3287758424 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-splat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-splat-vector.ll
@@ -11,14 +11,8 @@ target triple = "aarch64-unknown-linux-gnu"
define <4 x i8> @splat_v4i8(i8 %a) #0 {
; CHECK-LABEL: splat_v4i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: strh w0, [sp, #14]
-; CHECK-NEXT: strh w0, [sp, #12]
-; CHECK-NEXT: strh w0, [sp, #10]
-; CHECK-NEXT: strh w0, [sp, #8]
-; CHECK-NEXT: ldr d0, [sp, #8]
-; CHECK-NEXT: add sp, sp, #16
+; CHECK-NEXT: mov z0.h, w0
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
%insert = insertelement <4 x i8> undef, i8 %a, i64 0
%splat = shufflevector <4 x i8> %insert, <4 x i8> undef, <4 x i32> zeroinitializer
@@ -28,18 +22,8 @@ define <4 x i8> @splat_v4i8(i8 %a) #0 {
define <8 x i8> @splat_v8i8(i8 %a) #0 {
; CHECK-LABEL: splat_v8i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: strb w0, [sp, #15]
-; CHECK-NEXT: strb w0, [sp, #14]
-; CHECK-NEXT: strb w0, [sp, #13]
-; CHECK-NEXT: strb w0, [sp, #12]
-; CHECK-NEXT: strb w0, [sp, #11]
-; CHECK-NEXT: strb w0, [sp, #10]
-; CHECK-NEXT: strb w0, [sp, #9]
-; CHECK-NEXT: strb w0, [sp, #8]
-; CHECK-NEXT: ldr d0, [sp, #8]
-; CHECK-NEXT: add sp, sp, #16
+; CHECK-NEXT: mov z0.b, w0
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
%insert = insertelement <8 x i8> undef, i8 %a, i64 0
%splat = shufflevector <8 x i8> %insert, <8 x i8> undef, <8 x i32> zeroinitializer
@@ -49,25 +33,8 @@ define <8 x i8> @splat_v8i8(i8 %a) #0 {
define <16 x i8> @splat_v16i8(i8 %a) #0 {
; CHECK-LABEL: splat_v16i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: strb w0, [sp, #15]
-; CHECK-NEXT: strb w0, [sp, #14]
-; CHECK-NEXT: strb w0, [sp, #13]
-; CHECK-NEXT: strb w0, [sp, #12]
-; CHECK-NEXT: strb w0, [sp, #11]
-; CHECK-NEXT: strb w0, [sp, #10]
-; CHECK-NEXT: strb w0, [sp, #9]
-; CHECK-NEXT: strb w0, [sp, #8]
-; CHECK-NEXT: strb w0, [sp, #7]
-; CHECK-NEXT: strb w0, [sp, #6]
-; CHECK-NEXT: strb w0, [sp, #5]
-; CHECK-NEXT: strb w0, [sp, #4]
-; CHECK-NEXT: strb w0, [sp, #3]
-; CHECK-NEXT: strb w0, [sp, #2]
-; CHECK-NEXT: strb w0, [sp, #1]
-; CHECK-NEXT: strb w0, [sp]
-; CHECK-NEXT: ldr q0, [sp], #16
+; CHECK-NEXT: mov z0.b, w0
+; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
%insert = insertelement <16 x i8> undef, i8 %a, i64 0
%splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
@@ -77,27 +44,8 @@ define <16 x i8> @splat_v16i8(i8 %a) #0 {
define void @splat_v32i8(i8 %a, <32 x i8>* %b) #0 {
; CHECK-LABEL: splat_v32i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: strb w0, [sp, #15]
-; CHECK-NEXT: strb w0, [sp, #14]
-; CHECK-NEXT: strb w0, [sp, #13]
-; CHECK-NEXT: strb w0, [sp, #12]
-; CHECK-NEXT: strb w0, [sp, #11]
-; CHECK-NEXT: strb w0, [sp, #10]
-; CHECK-NEXT: strb w0, [sp, #9]
-; CHECK-NEXT: strb w0, [sp, #8]
-; CHECK-NEXT: strb w0, [sp, #7]
-; CHECK-NEXT: strb w0, [sp, #6]
-; CHECK-NEXT: strb w0, [sp, #5]
-; CHECK-NEXT: strb w0, [sp, #4]
-; CHECK-NEXT: strb w0, [sp, #3]
-; CHECK-NEXT: strb w0, [sp, #2]
-; CHECK-NEXT: strb w0, [sp, #1]
-; CHECK-NEXT: strb w0, [sp]
-; CHECK-NEXT: ldr q0, [sp]
+; CHECK-NEXT: mov z0.b, w0
; CHECK-NEXT: stp q0, q0, [x1]
-; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
%insert = insertelement <32 x i8> undef, i8 %a, i64 0
%splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
@@ -108,11 +56,8 @@ define void @splat_v32i8(i8 %a, <32 x i8>* %b) #0 {
define <2 x i16> @splat_v2i16(i16 %a) #0 {
; CHECK-LABEL: splat_v2i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: stp w0, w0, [sp, #8]
-; CHECK-NEXT: ldr d0, [sp, #8]
-; CHECK-NEXT: add sp, sp, #16
+; CHECK-NEXT: mov z0.s, w0
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
%insert = insertelement <2 x i16> undef, i16 %a, i64 0
%splat = shufflevector <2 x i16> %insert, <2 x i16> undef, <2 x i32> zeroinitializer
@@ -122,14 +67,8 @@ define <2 x i16> @splat_v2i16(i16 %a) #0 {
define <4 x i16> @splat_v4i16(i16 %a) #0 {
; CHECK-LABEL: splat_v4i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: strh w0, [sp, #14]
-; CHECK-NEXT: strh w0, [sp, #12]
-; CHECK-NEXT: strh w0, [sp, #10]
-; CHECK-NEXT: strh w0, [sp, #8]
-; CHECK-NEXT: ldr d0, [sp, #8]
-; CHECK-NEXT: add sp, sp, #16
+; CHECK-NEXT: mov z0.h, w0
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
%insert = insertelement <4 x i16> undef, i16 %a, i64 0
%splat = shufflevector <4 x i16> %insert, <4 x i16> undef, <4 x i32> zeroinitializer
@@ -139,17 +78,8 @@ define <4 x i16> @splat_v4i16(i16 %a) #0 {
define <8 x i16> @splat_v8i16(i16 %a) #0 {
; CHECK-LABEL: splat_v8i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: strh w0, [sp, #14]
-; CHECK-NEXT: strh w0, [sp, #12]
-; CHECK-NEXT: strh w0, [sp, #10]
-; CHECK-NEXT: strh w0, [sp, #8]
-; CHECK-NEXT: strh w0, [sp, #6]
-; CHECK-NEXT: strh w0, [sp, #4]
-; CHECK-NEXT: strh w0, [sp, #2]
-; CHECK-NEXT: strh w0, [sp]
-; CHECK-NEXT: ldr q0, [sp], #16
+; CHECK-NEXT: mov z0.h, w0
+; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
%insert = insertelement <8 x i16> undef, i16 %a, i64 0
%splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
@@ -159,19 +89,8 @@ define <8 x i16> @splat_v8i16(i16 %a) #0 {
define void @splat_v16i16(i16 %a, <16 x i16>* %b) #0 {
; CHECK-LABEL: splat_v16i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: strh w0, [sp, #14]
-; CHECK-NEXT: strh w0, [sp, #12]
-; CHECK-NEXT: strh w0, [sp, #10]
-; CHECK-NEXT: strh w0, [sp, #8]
-; CHECK-NEXT: strh w0, [sp, #6]
-; CHECK-NEXT: strh w0, [sp, #4]
-; CHECK-NEXT: strh w0, [sp, #2]
-; CHECK-NEXT: strh w0, [sp]
-; CHECK-NEXT: ldr q0, [sp]
+; CHECK-NEXT: mov z0.h, w0
; CHECK-NEXT: stp q0, q0, [x1]
-; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
%insert = insertelement <16 x i16> undef, i16 %a, i64 0
%splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
@@ -182,11 +101,8 @@ define void @splat_v16i16(i16 %a, <16 x i16>* %b) #0 {
define <2 x i32> @splat_v2i32(i32 %a) #0 {
; CHECK-LABEL: splat_v2i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: stp w0, w0, [sp, #8]
-; CHECK-NEXT: ldr d0, [sp, #8]
-; CHECK-NEXT: add sp, sp, #16
+; CHECK-NEXT: mov z0.s, w0
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
%insert = insertelement <2 x i32> undef, i32 %a, i64 0
%splat = shufflevector <2 x i32> %insert, <2 x i32> undef, <2 x i32> zeroinitializer
@@ -196,11 +112,8 @@ define <2 x i32> @splat_v2i32(i32 %a) #0 {
define <4 x i32> @splat_v4i32(i32 %a) #0 {
; CHECK-LABEL: splat_v4i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: stp w0, w0, [sp, #8]
-; CHECK-NEXT: stp w0, w0, [sp]
-; CHECK-NEXT: ldr q0, [sp], #16
+; CHECK-NEXT: mov z0.s, w0
+; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
%insert = insertelement <4 x i32> undef, i32 %a, i64 0
%splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
@@ -210,13 +123,8 @@ define <4 x i32> @splat_v4i32(i32 %a) #0 {
define void @splat_v8i32(i32 %a, <8 x i32>* %b) #0 {
; CHECK-LABEL: splat_v8i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: stp w0, w0, [sp, #8]
-; CHECK-NEXT: stp w0, w0, [sp]
-; CHECK-NEXT: ldr q0, [sp]
+; CHECK-NEXT: mov z0.s, w0
; CHECK-NEXT: stp q0, q0, [x1]
-; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
%insert = insertelement <8 x i32> undef, i32 %a, i64 0
%splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
@@ -227,7 +135,8 @@ define void @splat_v8i32(i32 %a, <8 x i32>* %b) #0 {
define <1 x i64> @splat_v1i64(i64 %a) #0 {
; CHECK-LABEL: splat_v1i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: fmov d0, x0
+; CHECK-NEXT: mov z0.d, x0
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
%insert = insertelement <1 x i64> undef, i64 %a, i64 0
%splat = shufflevector <1 x i64> %insert, <1 x i64> undef, <1 x i32> zeroinitializer
@@ -237,9 +146,8 @@ define <1 x i64> @splat_v1i64(i64 %a) #0 {
define <2 x i64> @splat_v2i64(i64 %a) #0 {
; CHECK-LABEL: splat_v2i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: stp x0, x0, [sp, #-16]!
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: ldr q0, [sp], #16
+; CHECK-NEXT: mov z0.d, x0
+; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
%insert = insertelement <2 x i64> undef, i64 %a, i64 0
%splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
@@ -249,11 +157,8 @@ define <2 x i64> @splat_v2i64(i64 %a) #0 {
define void @splat_v4i64(i64 %a, <4 x i64>* %b) #0 {
; CHECK-LABEL: splat_v4i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: stp x0, x0, [sp, #-16]!
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: ldr q0, [sp]
+; CHECK-NEXT: mov z0.d, x0
; CHECK-NEXT: stp q0, q0, [x1]
-; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
%insert = insertelement <4 x i64> undef, i64 %a, i64 0
%splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
@@ -268,12 +173,9 @@ define void @splat_v4i64(i64 %a, <4 x i64>* %b) #0 {
define <2 x half> @splat_v2f16(half %a) #0 {
; CHECK-LABEL: splat_v2f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: str h0, [sp, #10]
-; CHECK-NEXT: str h0, [sp, #8]
-; CHECK-NEXT: ldr d0, [sp, #8]
-; CHECK-NEXT: add sp, sp, #16
+; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
+; CHECK-NEXT: mov z0.h, h0
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
%insert = insertelement <2 x half> undef, half %a, i64 0
%splat = shufflevector <2 x half> %insert, <2 x half> undef, <2 x i32> zeroinitializer
@@ -283,14 +185,9 @@ define <2 x half> @splat_v2f16(half %a) #0 {
define <4 x half> @splat_v4f16(half %a) #0 {
; CHECK-LABEL: splat_v4f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: str h0, [sp, #14]
-; CHECK-NEXT: str h0, [sp, #12]
-; CHECK-NEXT: str h0, [sp, #10]
-; CHECK-NEXT: str h0, [sp, #8]
-; CHECK-NEXT: ldr d0, [sp, #8]
-; CHECK-NEXT: add sp, sp, #16
+; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
+; CHECK-NEXT: mov z0.h, h0
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
%insert = insertelement <4 x half> undef, half %a, i64 0
%splat = shufflevector <4 x half> %insert, <4 x half> undef, <4 x i32> zeroinitializer
@@ -300,17 +197,9 @@ define <4 x half> @splat_v4f16(half %a) #0 {
define <8 x half> @splat_v8f16(half %a) #0 {
; CHECK-LABEL: splat_v8f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: str h0, [sp, #14]
-; CHECK-NEXT: str h0, [sp, #12]
-; CHECK-NEXT: str h0, [sp, #10]
-; CHECK-NEXT: str h0, [sp, #8]
-; CHECK-NEXT: str h0, [sp, #6]
-; CHECK-NEXT: str h0, [sp, #4]
-; CHECK-NEXT: str h0, [sp, #2]
-; CHECK-NEXT: str h0, [sp]
-; CHECK-NEXT: ldr q0, [sp], #16
+; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
+; CHECK-NEXT: mov z0.h, h0
+; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
%insert = insertelement <8 x half> undef, half %a, i64 0
%splat = shufflevector <8 x half> %insert, <8 x half> undef, <8 x i32> zeroinitializer
@@ -320,19 +209,9 @@ define <8 x half> @splat_v8f16(half %a) #0 {
define void @splat_v16f16(half %a, <16 x half>* %b) #0 {
; CHECK-LABEL: splat_v16f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: str h0, [sp, #14]
-; CHECK-NEXT: str h0, [sp, #12]
-; CHECK-NEXT: str h0, [sp, #10]
-; CHECK-NEXT: str h0, [sp, #8]
-; CHECK-NEXT: str h0, [sp, #6]
-; CHECK-NEXT: str h0, [sp, #4]
-; CHECK-NEXT: str h0, [sp, #2]
-; CHECK-NEXT: str h0, [sp]
-; CHECK-NEXT: ldr q0, [sp]
+; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
+; CHECK-NEXT: mov z0.h, h0
; CHECK-NEXT: stp q0, q0, [x0]
-; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
%insert = insertelement <16 x half> undef, half %a, i64 0
%splat = shufflevector <16 x half> %insert, <16 x half> undef, <16 x i32> zeroinitializer
@@ -343,11 +222,9 @@ define void @splat_v16f16(half %a, <16 x half>* %b) #0 {
define <2 x float> @splat_v2f32(float %a, <2 x float> %op2) #0 {
; CHECK-LABEL: splat_v2f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: stp s0, s0, [sp, #8]
-; CHECK-NEXT: ldr d0, [sp, #8]
-; CHECK-NEXT: add sp, sp, #16
+; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
+; CHECK-NEXT: mov z0.s, s0
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
%insert = insertelement <2 x float> undef, float %a, i64 0
%splat = shufflevector <2 x float> %insert, <2 x float> undef, <2 x i32> zeroinitializer
@@ -357,11 +234,9 @@ define <2 x float> @splat_v2f32(float %a, <2 x float> %op2) #0 {
define <4 x float> @splat_v4f32(float %a, <4 x float> %op2) #0 {
; CHECK-LABEL: splat_v4f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: stp s0, s0, [sp, #8]
-; CHECK-NEXT: stp s0, s0, [sp]
-; CHECK-NEXT: ldr q0, [sp], #16
+; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
+; CHECK-NEXT: mov z0.s, s0
+; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
%insert = insertelement <4 x float> undef, float %a, i64 0
%splat = shufflevector <4 x float> %insert, <4 x float> undef, <4 x i32> zeroinitializer
@@ -371,13 +246,9 @@ define <4 x float> @splat_v4f32(float %a, <4 x float> %op2) #0 {
define void @splat_v8f32(float %a, <8 x float>* %b) #0 {
; CHECK-LABEL: splat_v8f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: stp s0, s0, [sp, #8]
-; CHECK-NEXT: stp s0, s0, [sp]
-; CHECK-NEXT: ldr q0, [sp]
+; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
+; CHECK-NEXT: mov z0.s, s0
; CHECK-NEXT: stp q0, q0, [x0]
-; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
%insert = insertelement <8 x float> undef, float %a, i64 0
%splat = shufflevector <8 x float> %insert, <8 x float> undef, <8 x i32> zeroinitializer
@@ -397,9 +268,9 @@ define <1 x double> @splat_v1f64(double %a, <1 x double> %op2) #0 {
define <2 x double> @splat_v2f64(double %a, <2 x double> %op2) #0 {
; CHECK-LABEL: splat_v2f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: stp d0, d0, [sp, #-16]!
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: ldr q0, [sp], #16
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT: mov z0.d, d0
+; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
%insert = insertelement <2 x double> undef, double %a, i64 0
%splat = shufflevector <2 x double> %insert, <2 x double> undef, <2 x i32> zeroinitializer
@@ -409,11 +280,9 @@ define <2 x double> @splat_v2f64(double %a, <2 x double> %op2) #0 {
define void @splat_v4f64(double %a, <4 x double>* %b) #0 {
; CHECK-LABEL: splat_v4f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: stp d0, d0, [sp, #-16]!
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: ldr q0, [sp]
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT: mov z0.d, d0
; CHECK-NEXT: stp q0, q0, [x0]
-; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
%insert = insertelement <4 x double> undef, double %a, i64 0
%splat = shufflevector <4 x double> %insert, <4 x double> undef, <4 x i32> zeroinitializer
@@ -428,8 +297,7 @@ define void @splat_v4f64(double %a, <4 x double>* %b) #0 {
define void @splat_imm_v32i8(<32 x i8>* %a) #0 {
; CHECK-LABEL: splat_imm_v32i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI24_0
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI24_0]
+; CHECK-NEXT: mov z0.b, #1 // =0x1
; CHECK-NEXT: stp q0, q0, [x0]
; CHECK-NEXT: ret
%insert = insertelement <32 x i8> undef, i8 1, i64 0
@@ -441,8 +309,7 @@ define void @splat_imm_v32i8(<32 x i8>* %a) #0 {
define void @splat_imm_v16i16(<16 x i16>* %a) #0 {
; CHECK-LABEL: splat_imm_v16i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI25_0
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI25_0]
+; CHECK-NEXT: mov z0.h, #2 // =0x2
; CHECK-NEXT: stp q0, q0, [x0]
; CHECK-NEXT: ret
%insert = insertelement <16 x i16> undef, i16 2, i64 0
@@ -454,8 +321,7 @@ define void @splat_imm_v16i16(<16 x i16>* %a) #0 {
define void @splat_imm_v8i32(<8 x i32>* %a) #0 {
; CHECK-LABEL: splat_imm_v8i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI26_0
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI26_0]
+; CHECK-NEXT: mov z0.s, #3 // =0x3
; CHECK-NEXT: stp q0, q0, [x0]
; CHECK-NEXT: ret
%insert = insertelement <8 x i32> undef, i32 3, i64 0
@@ -467,8 +333,7 @@ define void @splat_imm_v8i32(<8 x i32>* %a) #0 {
define void @splat_imm_v4i64(<4 x i64>* %a) #0 {
; CHECK-LABEL: splat_imm_v4i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI27_0
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI27_0]
+; CHECK-NEXT: mov z0.d, #4 // =0x4
; CHECK-NEXT: stp q0, q0, [x0]
; CHECK-NEXT: ret
%insert = insertelement <4 x i64> undef, i64 4, i64 0
@@ -484,8 +349,7 @@ define void @splat_imm_v4i64(<4 x i64>* %a) #0 {
define void @splat_imm_v16f16(<16 x half>* %a) #0 {
; CHECK-LABEL: splat_imm_v16f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI28_0
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI28_0]
+; CHECK-NEXT: fmov z0.h, #5.00000000
; CHECK-NEXT: stp q0, q0, [x0]
; CHECK-NEXT: ret
%insert = insertelement <16 x half> undef, half 5.0, i64 0
@@ -497,8 +361,7 @@ define void @splat_imm_v16f16(<16 x half>* %a) #0 {
define void @splat_imm_v8f32(<8 x float>* %a) #0 {
; CHECK-LABEL: splat_imm_v8f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI29_0
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI29_0]
+; CHECK-NEXT: fmov z0.s, #6.00000000
; CHECK-NEXT: stp q0, q0, [x0]
; CHECK-NEXT: ret
%insert = insertelement <8 x float> undef, float 6.0, i64 0
@@ -510,8 +373,7 @@ define void @splat_imm_v8f32(<8 x float>* %a) #0 {
define void @splat_imm_v4f64(<4 x double>* %a) #0 {
; CHECK-LABEL: splat_imm_v4f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI30_0
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI30_0]
+; CHECK-NEXT: fmov z0.d, #7.00000000
; CHECK-NEXT: stp q0, q0, [x0]
; CHECK-NEXT: ret
%insert = insertelement <4 x double> undef, double 7.0, i64 0
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-stores.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-stores.ll
index 4b40f0d2e776..8298281d6d8d 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-stores.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-stores.ll
@@ -6,9 +6,8 @@ target triple = "aarch64-unknown-linux-gnu"
define void @store_v4i8(<4 x i8>* %a) #0 {
; CHECK-LABEL: store_v4i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI0_0
; CHECK-NEXT: ptrue p0.h, vl4
-; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI0_0]
+; CHECK-NEXT: mov z0.h, #0 // =0x0
; CHECK-NEXT: st1b { z0.h }, p0, [x0]
; CHECK-NEXT: ret
store <4 x i8> zeroinitializer, <4 x i8>* %a
@@ -18,8 +17,7 @@ define void @store_v4i8(<4 x i8>* %a) #0 {
define void @store_v8i8(<8 x i8>* %a) #0 {
; CHECK-LABEL: store_v8i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI1_0
-; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI1_0]
+; CHECK-NEXT: mov z0.b, #0 // =0x0
; CHECK-NEXT: str d0, [x0]
; CHECK-NEXT: ret
store <8 x i8> zeroinitializer, <8 x i8>* %a
@@ -29,8 +27,7 @@ define void @store_v8i8(<8 x i8>* %a) #0 {
define void @store_v16i8(<16 x i8>* %a) #0 {
; CHECK-LABEL: store_v16i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI2_0
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI2_0]
+; CHECK-NEXT: mov z0.b, #0 // =0x0
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: ret
store <16 x i8> zeroinitializer, <16 x i8>* %a
@@ -40,8 +37,7 @@ define void @store_v16i8(<16 x i8>* %a) #0 {
define void @store_v32i8(<32 x i8>* %a) #0 {
; CHECK-LABEL: store_v32i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI3_0
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI3_0]
+; CHECK-NEXT: mov z0.b, #0 // =0x0
; CHECK-NEXT: stp q0, q0, [x0]
; CHECK-NEXT: ret
store <32 x i8> zeroinitializer, <32 x i8>* %a
@@ -51,9 +47,8 @@ define void @store_v32i8(<32 x i8>* %a) #0 {
define void @store_v2i16(<2 x i16>* %a) #0 {
; CHECK-LABEL: store_v2i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI4_0
; CHECK-NEXT: ptrue p0.s, vl2
-; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI4_0]
+; CHECK-NEXT: mov z0.s, #0 // =0x0
; CHECK-NEXT: st1h { z0.s }, p0, [x0]
; CHECK-NEXT: ret
store <2 x i16> zeroinitializer, <2 x i16>* %a
@@ -63,8 +58,7 @@ define void @store_v2i16(<2 x i16>* %a) #0 {
define void @store_v2f16(<2 x half>* %a) #0 {
; CHECK-LABEL: store_v2f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI5_0
-; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI5_0]
+; CHECK-NEXT: mov z0.h, #0 // =0x0
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: str w8, [x0]
; CHECK-NEXT: ret
@@ -75,8 +69,7 @@ define void @store_v2f16(<2 x half>* %a) #0 {
define void @store_v4i16(<4 x i16>* %a) #0 {
; CHECK-LABEL: store_v4i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI6_0
-; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI6_0]
+; CHECK-NEXT: mov z0.h, #0 // =0x0
; CHECK-NEXT: str d0, [x0]
; CHECK-NEXT: ret
store <4 x i16> zeroinitializer, <4 x i16>* %a
@@ -86,8 +79,7 @@ define void @store_v4i16(<4 x i16>* %a) #0 {
define void @store_v4f16(<4 x half>* %a) #0 {
; CHECK-LABEL: store_v4f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI7_0
-; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI7_0]
+; CHECK-NEXT: mov z0.h, #0 // =0x0
; CHECK-NEXT: str d0, [x0]
; CHECK-NEXT: ret
store <4 x half> zeroinitializer, <4 x half>* %a
@@ -97,8 +89,7 @@ define void @store_v4f16(<4 x half>* %a) #0 {
define void @store_v8i16(<8 x i16>* %a) #0 {
; CHECK-LABEL: store_v8i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI8_0
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI8_0]
+; CHECK-NEXT: mov z0.h, #0 // =0x0
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: ret
store <8 x i16> zeroinitializer, <8 x i16>* %a
@@ -108,8 +99,7 @@ define void @store_v8i16(<8 x i16>* %a) #0 {
define void @store_v8f16(<8 x half>* %a) #0 {
; CHECK-LABEL: store_v8f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI9_0
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI9_0]
+; CHECK-NEXT: mov z0.h, #0 // =0x0
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: ret
store <8 x half> zeroinitializer, <8 x half>* %a
@@ -119,8 +109,7 @@ define void @store_v8f16(<8 x half>* %a) #0 {
define void @store_v16i16(<16 x i16>* %a) #0 {
; CHECK-LABEL: store_v16i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI10_0
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI10_0]
+; CHECK-NEXT: mov z0.h, #0 // =0x0
; CHECK-NEXT: stp q0, q0, [x0]
; CHECK-NEXT: ret
store <16 x i16> zeroinitializer, <16 x i16>* %a
@@ -130,8 +119,7 @@ define void @store_v16i16(<16 x i16>* %a) #0 {
define void @store_v16f16(<16 x half>* %a) #0 {
; CHECK-LABEL: store_v16f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI11_0
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI11_0]
+; CHECK-NEXT: mov z0.h, #0 // =0x0
; CHECK-NEXT: stp q0, q0, [x0]
; CHECK-NEXT: ret
store <16 x half> zeroinitializer, <16 x half>* %a
@@ -177,8 +165,7 @@ define void @store_v4f32(<4 x float>* %a) #0 {
define void @store_v8i32(<8 x i32>* %a) #0 {
; CHECK-LABEL: store_v8i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI16_0
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI16_0]
+; CHECK-NEXT: mov z0.s, #0 // =0x0
; CHECK-NEXT: stp q0, q0, [x0]
; CHECK-NEXT: ret
store <8 x i32> zeroinitializer, <8 x i32>* %a
@@ -188,8 +175,7 @@ define void @store_v8i32(<8 x i32>* %a) #0 {
define void @store_v8f32(<8 x float>* %a) #0 {
; CHECK-LABEL: store_v8f32:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI17_0
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI17_0]
+; CHECK-NEXT: mov z0.s, #0 // =0x0
; CHECK-NEXT: stp q0, q0, [x0]
; CHECK-NEXT: ret
store <8 x float> zeroinitializer, <8 x float>* %a
@@ -199,7 +185,7 @@ define void @store_v8f32(<8 x float>* %a) #0 {
define void @store_v1i64(<1 x i64>* %a) #0 {
; CHECK-LABEL: store_v1i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: fmov d0, xzr
+; CHECK-NEXT: mov z0.d, #0 // =0x0
; CHECK-NEXT: str d0, [x0]
; CHECK-NEXT: ret
store <1 x i64> zeroinitializer, <1 x i64>* %a
@@ -237,8 +223,7 @@ define void @store_v2f64(<2 x double>* %a) #0 {
define void @store_v4i64(<4 x i64>* %a) #0 {
; CHECK-LABEL: store_v4i64:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI22_0
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI22_0]
+; CHECK-NEXT: mov z0.d, #0 // =0x0
; CHECK-NEXT: stp q0, q0, [x0]
; CHECK-NEXT: ret
store <4 x i64> zeroinitializer, <4 x i64>* %a
@@ -248,8 +233,7 @@ define void @store_v4i64(<4 x i64>* %a) #0 {
define void @store_v4f64(<4 x double>* %a) #0 {
; CHECK-LABEL: store_v4f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI23_0
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI23_0]
+; CHECK-NEXT: mov z0.d, #0 // =0x0
; CHECK-NEXT: stp q0, q0, [x0]
; CHECK-NEXT: ret
store <4 x double> zeroinitializer, <4 x double>* %a
More information about the llvm-commits
mailing list