[llvm] 46a30df - Reland "[AArch64] NFC: Add RUN lines for streaming-compatible code." (#91599)
Sander de Smalen via llvm-commits
llvm-commits at lists.llvm.org
Tue May 28 07:03:39 PDT 2024
Author: Sander de Smalen
Date: 2024-05-28T14:02:34Z
New Revision: 46a30dfdfd765021a76c927f70f95024d30786f2
URL: https://github.com/llvm/llvm-project/commit/46a30dfdfd765021a76c927f70f95024d30786f2
DIFF: https://github.com/llvm/llvm-project/commit/46a30dfdfd765021a76c927f70f95024d30786f2.diff
LOG: Reland "[AArch64] NFC: Add RUN lines for streaming-compatible code." (#91599)
This reverts commit aa9d467abaeb440dc70b64c0f35b8d5e731f3a19.
Added:
Modified:
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-and-combine.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitcast.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-build-vector.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-vector-elt.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-arith.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-compares.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-convert.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-fma.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-minmax.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce-fa64.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-rounding.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-select.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-insert-vector-elt.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-arith.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-compares.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-immediates.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-log.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-minmax.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mla-neon-fa64.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-reduce.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-select.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-limit-duplane.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-loads.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-log-reduce.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-load.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-store.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-optimize-ptrue.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-rev.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-zip-uzp-trn.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ptest.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-sdiv-pow2.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-splat-vector.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-stores.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-subvector.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc-stores.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-vector-shuffle.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-test-register-mov.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-and-combine.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-and-combine.ll
index e843537c10a33..ed3222529a3bb 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-and-combine.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-and-combine.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
@@ -14,6 +15,12 @@ define <4 x i8> @vls_sve_and_4xi8(<4 x i8> %b) nounwind {
; CHECK-NEXT: and z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: vls_sve_and_4xi8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: movi d1, #0xff000000ff0000
+; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: ret
%c = and <4 x i8> %b, <i8 0, i8 255, i8 0, i8 255>
ret <4 x i8> %c
}
@@ -27,6 +34,12 @@ define <8 x i8> @vls_sve_and_8xi8(<8 x i8> %b) nounwind {
; CHECK-NEXT: and z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: vls_sve_and_8xi8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: movi d1, #0xff00ff00ff00ff00
+; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: ret
%c = and <8 x i8> %b, <i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255>
ret <8 x i8> %c
}
@@ -40,6 +53,12 @@ define <16 x i8> @vls_sve_and_16xi8(<16 x i8> %b) nounwind {
; CHECK-NEXT: and z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: vls_sve_and_16xi8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: movi v1.2d, #0xff00ff00ff00ff00
+; NONEON-NOSVE-NEXT: and v0.16b, v0.16b, v1.16b
+; NONEON-NOSVE-NEXT: ret
%c = and <16 x i8> %b, <i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255>
ret <16 x i8> %c
}
@@ -56,6 +75,13 @@ define <32 x i8> @vls_sve_and_32xi8(<32 x i8> %ap) nounwind {
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: vls_sve_and_32xi8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: movi v2.2d, #0xff00ff00ff00ff00
+; NONEON-NOSVE-NEXT: and v0.16b, v0.16b, v2.16b
+; NONEON-NOSVE-NEXT: and v1.16b, v1.16b, v2.16b
+; NONEON-NOSVE-NEXT: ret
%b = and <32 x i8> %ap, <i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255,
i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255, i8 0, i8 255>
ret <32 x i8> %b
@@ -73,6 +99,13 @@ define <2 x i16> @vls_sve_and_2xi16(<2 x i16> %b) nounwind {
; CHECK-NEXT: ldr d0, [sp, #8]
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: vls_sve_and_2xi16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; NONEON-NOSVE-NEXT: mov v0.s[0], wzr
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0
+; NONEON-NOSVE-NEXT: ret
%c = and <2 x i16> %b, <i16 0, i16 65535>
ret <2 x i16> %c
}
@@ -86,6 +119,12 @@ define <4 x i16> @vls_sve_and_4xi16(<4 x i16> %b) nounwind {
; CHECK-NEXT: and z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: vls_sve_and_4xi16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: movi d1, #0xffff0000ffff0000
+; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: ret
%c = and <4 x i16> %b, <i16 0, i16 65535, i16 0, i16 65535>
ret <4 x i16> %c
}
@@ -99,6 +138,12 @@ define <8 x i16> @vls_sve_and_8xi16(<8 x i16> %b) nounwind {
; CHECK-NEXT: and z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: vls_sve_and_8xi16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: movi v1.2d, #0xffff0000ffff0000
+; NONEON-NOSVE-NEXT: and v0.16b, v0.16b, v1.16b
+; NONEON-NOSVE-NEXT: ret
%c = and <8 x i16> %b, <i16 0, i16 65535, i16 0, i16 65535, i16 0, i16 65535, i16 0, i16 65535>
ret <8 x i16> %c
}
@@ -115,6 +160,13 @@ define <16 x i16> @vls_sve_and_16xi16(<16 x i16> %b) nounwind {
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: vls_sve_and_16xi16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: movi v2.2d, #0xffff0000ffff0000
+; NONEON-NOSVE-NEXT: and v0.16b, v0.16b, v2.16b
+; NONEON-NOSVE-NEXT: and v1.16b, v1.16b, v2.16b
+; NONEON-NOSVE-NEXT: ret
%c = and <16 x i16> %b, <i16 0, i16 65535, i16 0, i16 65535, i16 0, i16 65535, i16 0, i16 65535, i16 0, i16 65535, i16 0, i16 65535, i16 0, i16 65535, i16 0, i16 65535>
ret <16 x i16> %c
}
@@ -128,6 +180,13 @@ define <2 x i32> @vls_sve_and_2xi32(<2 x i32> %b) nounwind {
; CHECK-NEXT: and z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: vls_sve_and_2xi32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; NONEON-NOSVE-NEXT: mov v0.s[0], wzr
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0
+; NONEON-NOSVE-NEXT: ret
%c = and <2 x i32> %b, <i32 0, i32 4294967295>
ret <2 x i32> %c
}
@@ -141,6 +200,12 @@ define <4 x i32> @vls_sve_and_4xi32(<4 x i32> %b) nounwind {
; CHECK-NEXT: and z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: vls_sve_and_4xi32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: movi v1.2d, #0xffffffff00000000
+; NONEON-NOSVE-NEXT: and v0.16b, v0.16b, v1.16b
+; NONEON-NOSVE-NEXT: ret
%c = and <4 x i32> %b, <i32 0, i32 4294967295, i32 0, i32 4294967295>
ret <4 x i32> %c
}
@@ -157,6 +222,13 @@ define <8 x i32> @vls_sve_and_8xi32(<8 x i32> %b) nounwind {
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: vls_sve_and_8xi32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: movi v2.2d, #0xffffffff00000000
+; NONEON-NOSVE-NEXT: and v0.16b, v0.16b, v2.16b
+; NONEON-NOSVE-NEXT: and v1.16b, v1.16b, v2.16b
+; NONEON-NOSVE-NEXT: ret
%c = and <8 x i32> %b, <i32 0, i32 4294967295, i32 0, i32 4294967295, i32 0, i32 4294967295, i32 0, i32 4294967295>
ret <8 x i32> %c
}
@@ -170,6 +242,11 @@ define <2 x i64> @vls_sve_and_2xi64(<2 x i64> %b) nounwind {
; CHECK-NEXT: and z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: vls_sve_and_2xi64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov v0.d[0], xzr
+; NONEON-NOSVE-NEXT: ret
%c = and <2 x i64> %b, <i64 0, i64 18446744073709551615>
ret <2 x i64> %c
}
@@ -185,6 +262,12 @@ define <4 x i64> @vls_sve_and_4xi64(<4 x i64> %b) nounwind {
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: vls_sve_and_4xi64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov v0.d[0], xzr
+; NONEON-NOSVE-NEXT: mov v1.d[0], xzr
+; NONEON-NOSVE-NEXT: ret
%c = and <4 x i64> %b, <i64 0, i64 18446744073709551615, i64 0, i64 18446744073709551615>
ret <4 x i64> %c
}
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll
index aa42d5c2a8c13..cd6c2b489efe4 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
@@ -18,6 +19,16 @@ define <4 x i8> @ctlz_v4i8(<4 x i8> %op) {
; CHECK-NEXT: sub z0.h, z0.h, #8 // =0x8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ctlz_v4i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: movi d1, #0xff00ff00ff00ff
+; NONEON-NOSVE-NEXT: mov w8, #8 // =0x8
+; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: dup v1.4h, w8
+; NONEON-NOSVE-NEXT: clz v0.4h, v0.4h
+; NONEON-NOSVE-NEXT: sub v0.4h, v0.4h, v1.4h
+; NONEON-NOSVE-NEXT: ret
%res = call <4 x i8> @llvm.ctlz.v4i8(<4 x i8> %op)
ret <4 x i8> %res
}
@@ -30,6 +41,11 @@ define <8 x i8> @ctlz_v8i8(<8 x i8> %op) {
; CHECK-NEXT: clz z0.b, p0/m, z0.b
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ctlz_v8i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: clz v0.8b, v0.8b
+; NONEON-NOSVE-NEXT: ret
%res = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %op)
ret <8 x i8> %res
}
@@ -42,6 +58,11 @@ define <16 x i8> @ctlz_v16i8(<16 x i8> %op) {
; CHECK-NEXT: clz z0.b, p0/m, z0.b
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ctlz_v16i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: clz v0.16b, v0.16b
+; NONEON-NOSVE-NEXT: ret
%res = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %op)
ret <16 x i8> %res
}
@@ -55,6 +76,14 @@ define void @ctlz_v32i8(ptr %a) {
; CHECK-NEXT: clz z1.b, p0/m, z1.b
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ctlz_v32i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: clz v0.16b, v0.16b
+; NONEON-NOSVE-NEXT: clz v1.16b, v1.16b
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op = load <32 x i8>, ptr %a
%res = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %op)
store <32 x i8> %res, ptr %a
@@ -71,6 +100,16 @@ define <2 x i16> @ctlz_v2i16(<2 x i16> %op) {
; CHECK-NEXT: sub z0.s, z0.s, #16 // =0x10
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ctlz_v2i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: movi d1, #0x00ffff0000ffff
+; NONEON-NOSVE-NEXT: mov w8, #16 // =0x10
+; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: dup v1.2s, w8
+; NONEON-NOSVE-NEXT: clz v0.2s, v0.2s
+; NONEON-NOSVE-NEXT: sub v0.2s, v0.2s, v1.2s
+; NONEON-NOSVE-NEXT: ret
%res = call <2 x i16> @llvm.ctlz.v2i16(<2 x i16> %op)
ret <2 x i16> %res
}
@@ -83,6 +122,11 @@ define <4 x i16> @ctlz_v4i16(<4 x i16> %op) {
; CHECK-NEXT: clz z0.h, p0/m, z0.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ctlz_v4i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: clz v0.4h, v0.4h
+; NONEON-NOSVE-NEXT: ret
%res = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %op)
ret <4 x i16> %res
}
@@ -95,6 +139,11 @@ define <8 x i16> @ctlz_v8i16(<8 x i16> %op) {
; CHECK-NEXT: clz z0.h, p0/m, z0.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ctlz_v8i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: clz v0.8h, v0.8h
+; NONEON-NOSVE-NEXT: ret
%res = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %op)
ret <8 x i16> %res
}
@@ -108,6 +157,14 @@ define void @ctlz_v16i16(ptr %a) {
; CHECK-NEXT: clz z1.h, p0/m, z1.h
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ctlz_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: clz v0.8h, v0.8h
+; NONEON-NOSVE-NEXT: clz v1.8h, v1.8h
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op = load <16 x i16>, ptr %a
%res = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %op)
store <16 x i16> %res, ptr %a
@@ -122,6 +179,11 @@ define <2 x i32> @ctlz_v2i32(<2 x i32> %op) {
; CHECK-NEXT: clz z0.s, p0/m, z0.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ctlz_v2i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: clz v0.2s, v0.2s
+; NONEON-NOSVE-NEXT: ret
%res = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %op)
ret <2 x i32> %res
}
@@ -134,6 +196,11 @@ define <4 x i32> @ctlz_v4i32(<4 x i32> %op) {
; CHECK-NEXT: clz z0.s, p0/m, z0.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ctlz_v4i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: clz v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: ret
%res = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %op)
ret <4 x i32> %res
}
@@ -147,6 +214,14 @@ define void @ctlz_v8i32(ptr %a) {
; CHECK-NEXT: clz z1.s, p0/m, z1.s
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ctlz_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: clz v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: clz v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op = load <8 x i32>, ptr %a
%res = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %op)
store <8 x i32> %res, ptr %a
@@ -161,6 +236,27 @@ define <1 x i64> @ctlz_v1i64(<1 x i64> %op) {
; CHECK-NEXT: clz z0.d, p0/m, z0.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ctlz_v1i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ushr d1, d0, #1
+; NONEON-NOSVE-NEXT: orr v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: ushr d1, d0, #2
+; NONEON-NOSVE-NEXT: orr v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: ushr d1, d0, #4
+; NONEON-NOSVE-NEXT: orr v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: ushr d1, d0, #8
+; NONEON-NOSVE-NEXT: orr v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: ushr d1, d0, #16
+; NONEON-NOSVE-NEXT: orr v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: ushr d1, d0, #32
+; NONEON-NOSVE-NEXT: orr v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: mvn v0.8b, v0.8b
+; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b
+; NONEON-NOSVE-NEXT: uaddlp v0.4h, v0.8b
+; NONEON-NOSVE-NEXT: uaddlp v0.2s, v0.4h
+; NONEON-NOSVE-NEXT: uaddlp v0.1d, v0.2s
+; NONEON-NOSVE-NEXT: ret
%res = call <1 x i64> @llvm.ctlz.v1i64(<1 x i64> %op)
ret <1 x i64> %res
}
@@ -173,6 +269,27 @@ define <2 x i64> @ctlz_v2i64(<2 x i64> %op) {
; CHECK-NEXT: clz z0.d, p0/m, z0.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ctlz_v2i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ushr v1.2d, v0.2d, #1
+; NONEON-NOSVE-NEXT: orr v0.16b, v0.16b, v1.16b
+; NONEON-NOSVE-NEXT: ushr v1.2d, v0.2d, #2
+; NONEON-NOSVE-NEXT: orr v0.16b, v0.16b, v1.16b
+; NONEON-NOSVE-NEXT: ushr v1.2d, v0.2d, #4
+; NONEON-NOSVE-NEXT: orr v0.16b, v0.16b, v1.16b
+; NONEON-NOSVE-NEXT: ushr v1.2d, v0.2d, #8
+; NONEON-NOSVE-NEXT: orr v0.16b, v0.16b, v1.16b
+; NONEON-NOSVE-NEXT: ushr v1.2d, v0.2d, #16
+; NONEON-NOSVE-NEXT: orr v0.16b, v0.16b, v1.16b
+; NONEON-NOSVE-NEXT: ushr v1.2d, v0.2d, #32
+; NONEON-NOSVE-NEXT: orr v0.16b, v0.16b, v1.16b
+; NONEON-NOSVE-NEXT: mvn v0.16b, v0.16b
+; NONEON-NOSVE-NEXT: cnt v0.16b, v0.16b
+; NONEON-NOSVE-NEXT: uaddlp v0.8h, v0.16b
+; NONEON-NOSVE-NEXT: uaddlp v0.4s, v0.8h
+; NONEON-NOSVE-NEXT: uaddlp v0.2d, v0.4s
+; NONEON-NOSVE-NEXT: ret
%res = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %op)
ret <2 x i64> %res
}
@@ -186,6 +303,46 @@ define void @ctlz_v4i64(ptr %a) {
; CHECK-NEXT: clz z1.d, p0/m, z1.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ctlz_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ushr v2.2d, v0.2d, #1
+; NONEON-NOSVE-NEXT: ushr v3.2d, v1.2d, #1
+; NONEON-NOSVE-NEXT: orr v0.16b, v0.16b, v2.16b
+; NONEON-NOSVE-NEXT: orr v1.16b, v1.16b, v3.16b
+; NONEON-NOSVE-NEXT: ushr v2.2d, v0.2d, #2
+; NONEON-NOSVE-NEXT: ushr v3.2d, v1.2d, #2
+; NONEON-NOSVE-NEXT: orr v0.16b, v0.16b, v2.16b
+; NONEON-NOSVE-NEXT: orr v1.16b, v1.16b, v3.16b
+; NONEON-NOSVE-NEXT: ushr v2.2d, v0.2d, #4
+; NONEON-NOSVE-NEXT: ushr v3.2d, v1.2d, #4
+; NONEON-NOSVE-NEXT: orr v0.16b, v0.16b, v2.16b
+; NONEON-NOSVE-NEXT: orr v1.16b, v1.16b, v3.16b
+; NONEON-NOSVE-NEXT: ushr v2.2d, v0.2d, #8
+; NONEON-NOSVE-NEXT: ushr v3.2d, v1.2d, #8
+; NONEON-NOSVE-NEXT: orr v0.16b, v0.16b, v2.16b
+; NONEON-NOSVE-NEXT: orr v1.16b, v1.16b, v3.16b
+; NONEON-NOSVE-NEXT: ushr v2.2d, v0.2d, #16
+; NONEON-NOSVE-NEXT: ushr v3.2d, v1.2d, #16
+; NONEON-NOSVE-NEXT: orr v0.16b, v0.16b, v2.16b
+; NONEON-NOSVE-NEXT: orr v1.16b, v1.16b, v3.16b
+; NONEON-NOSVE-NEXT: ushr v2.2d, v0.2d, #32
+; NONEON-NOSVE-NEXT: ushr v3.2d, v1.2d, #32
+; NONEON-NOSVE-NEXT: orr v0.16b, v0.16b, v2.16b
+; NONEON-NOSVE-NEXT: orr v1.16b, v1.16b, v3.16b
+; NONEON-NOSVE-NEXT: mvn v0.16b, v0.16b
+; NONEON-NOSVE-NEXT: mvn v1.16b, v1.16b
+; NONEON-NOSVE-NEXT: cnt v0.16b, v0.16b
+; NONEON-NOSVE-NEXT: cnt v1.16b, v1.16b
+; NONEON-NOSVE-NEXT: uaddlp v0.8h, v0.16b
+; NONEON-NOSVE-NEXT: uaddlp v1.8h, v1.16b
+; NONEON-NOSVE-NEXT: uaddlp v0.4s, v0.8h
+; NONEON-NOSVE-NEXT: uaddlp v1.4s, v1.8h
+; NONEON-NOSVE-NEXT: uaddlp v0.2d, v0.4s
+; NONEON-NOSVE-NEXT: uaddlp v1.2d, v1.4s
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op = load <4 x i64>, ptr %a
%res = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %op)
store <4 x i64> %res, ptr %a
@@ -205,6 +362,14 @@ define <4 x i8> @ctpop_v4i8(<4 x i8> %op) {
; CHECK-NEXT: cnt z0.h, p0/m, z0.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ctpop_v4i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: movi d1, #0xff00ff00ff00ff
+; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b
+; NONEON-NOSVE-NEXT: uaddlp v0.4h, v0.8b
+; NONEON-NOSVE-NEXT: ret
%res = call <4 x i8> @llvm.ctpop.v4i8(<4 x i8> %op)
ret <4 x i8> %res
}
@@ -217,6 +382,11 @@ define <8 x i8> @ctpop_v8i8(<8 x i8> %op) {
; CHECK-NEXT: cnt z0.b, p0/m, z0.b
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ctpop_v8i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b
+; NONEON-NOSVE-NEXT: ret
%res = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %op)
ret <8 x i8> %res
}
@@ -229,6 +399,11 @@ define <16 x i8> @ctpop_v16i8(<16 x i8> %op) {
; CHECK-NEXT: cnt z0.b, p0/m, z0.b
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ctpop_v16i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: cnt v0.16b, v0.16b
+; NONEON-NOSVE-NEXT: ret
%res = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %op)
ret <16 x i8> %res
}
@@ -242,6 +417,14 @@ define void @ctpop_v32i8(ptr %a) {
; CHECK-NEXT: cnt z1.b, p0/m, z1.b
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ctpop_v32i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: cnt v0.16b, v0.16b
+; NONEON-NOSVE-NEXT: cnt v1.16b, v1.16b
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op = load <32 x i8>, ptr %a
%res = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %op)
store <32 x i8> %res, ptr %a
@@ -257,6 +440,15 @@ define <2 x i16> @ctpop_v2i16(<2 x i16> %op) {
; CHECK-NEXT: cnt z0.s, p0/m, z0.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ctpop_v2i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: movi d1, #0x00ffff0000ffff
+; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b
+; NONEON-NOSVE-NEXT: uaddlp v0.4h, v0.8b
+; NONEON-NOSVE-NEXT: uaddlp v0.2s, v0.4h
+; NONEON-NOSVE-NEXT: ret
%res = call <2 x i16> @llvm.ctpop.v2i16(<2 x i16> %op)
ret <2 x i16> %res
}
@@ -269,6 +461,12 @@ define <4 x i16> @ctpop_v4i16(<4 x i16> %op) {
; CHECK-NEXT: cnt z0.h, p0/m, z0.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ctpop_v4i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b
+; NONEON-NOSVE-NEXT: uaddlp v0.4h, v0.8b
+; NONEON-NOSVE-NEXT: ret
%res = call <4 x i16> @llvm.ctpop.v4i16(<4 x i16> %op)
ret <4 x i16> %res
}
@@ -281,6 +479,12 @@ define <8 x i16> @ctpop_v8i16(<8 x i16> %op) {
; CHECK-NEXT: cnt z0.h, p0/m, z0.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ctpop_v8i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: cnt v0.16b, v0.16b
+; NONEON-NOSVE-NEXT: uaddlp v0.8h, v0.16b
+; NONEON-NOSVE-NEXT: ret
%res = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %op)
ret <8 x i16> %res
}
@@ -294,6 +498,16 @@ define void @ctpop_v16i16(ptr %a) {
; CHECK-NEXT: cnt z1.h, p0/m, z1.h
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ctpop_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: cnt v0.16b, v0.16b
+; NONEON-NOSVE-NEXT: cnt v1.16b, v1.16b
+; NONEON-NOSVE-NEXT: uaddlp v0.8h, v0.16b
+; NONEON-NOSVE-NEXT: uaddlp v1.8h, v1.16b
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op = load <16 x i16>, ptr %a
%res = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %op)
store <16 x i16> %res, ptr %a
@@ -308,6 +522,13 @@ define <2 x i32> @ctpop_v2i32(<2 x i32> %op) {
; CHECK-NEXT: cnt z0.s, p0/m, z0.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ctpop_v2i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b
+; NONEON-NOSVE-NEXT: uaddlp v0.4h, v0.8b
+; NONEON-NOSVE-NEXT: uaddlp v0.2s, v0.4h
+; NONEON-NOSVE-NEXT: ret
%res = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %op)
ret <2 x i32> %res
}
@@ -320,6 +541,13 @@ define <4 x i32> @ctpop_v4i32(<4 x i32> %op) {
; CHECK-NEXT: cnt z0.s, p0/m, z0.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ctpop_v4i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: cnt v0.16b, v0.16b
+; NONEON-NOSVE-NEXT: uaddlp v0.8h, v0.16b
+; NONEON-NOSVE-NEXT: uaddlp v0.4s, v0.8h
+; NONEON-NOSVE-NEXT: ret
%res = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %op)
ret <4 x i32> %res
}
@@ -333,6 +561,18 @@ define void @ctpop_v8i32(ptr %a) {
; CHECK-NEXT: cnt z1.s, p0/m, z1.s
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ctpop_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: cnt v0.16b, v0.16b
+; NONEON-NOSVE-NEXT: cnt v1.16b, v1.16b
+; NONEON-NOSVE-NEXT: uaddlp v0.8h, v0.16b
+; NONEON-NOSVE-NEXT: uaddlp v1.8h, v1.16b
+; NONEON-NOSVE-NEXT: uaddlp v0.4s, v0.8h
+; NONEON-NOSVE-NEXT: uaddlp v1.4s, v1.8h
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op = load <8 x i32>, ptr %a
%res = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %op)
store <8 x i32> %res, ptr %a
@@ -347,6 +587,14 @@ define <1 x i64> @ctpop_v1i64(<1 x i64> %op) {
; CHECK-NEXT: cnt z0.d, p0/m, z0.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ctpop_v1i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b
+; NONEON-NOSVE-NEXT: uaddlp v0.4h, v0.8b
+; NONEON-NOSVE-NEXT: uaddlp v0.2s, v0.4h
+; NONEON-NOSVE-NEXT: uaddlp v0.1d, v0.2s
+; NONEON-NOSVE-NEXT: ret
%res = call <1 x i64> @llvm.ctpop.v1i64(<1 x i64> %op)
ret <1 x i64> %res
}
@@ -359,6 +607,14 @@ define <2 x i64> @ctpop_v2i64(<2 x i64> %op) {
; CHECK-NEXT: cnt z0.d, p0/m, z0.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ctpop_v2i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: cnt v0.16b, v0.16b
+; NONEON-NOSVE-NEXT: uaddlp v0.8h, v0.16b
+; NONEON-NOSVE-NEXT: uaddlp v0.4s, v0.8h
+; NONEON-NOSVE-NEXT: uaddlp v0.2d, v0.4s
+; NONEON-NOSVE-NEXT: ret
%res = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %op)
ret <2 x i64> %res
}
@@ -372,6 +628,20 @@ define void @ctpop_v4i64(ptr %a) {
; CHECK-NEXT: cnt z1.d, p0/m, z1.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ctpop_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: cnt v0.16b, v0.16b
+; NONEON-NOSVE-NEXT: cnt v1.16b, v1.16b
+; NONEON-NOSVE-NEXT: uaddlp v0.8h, v0.16b
+; NONEON-NOSVE-NEXT: uaddlp v1.8h, v1.16b
+; NONEON-NOSVE-NEXT: uaddlp v0.4s, v0.8h
+; NONEON-NOSVE-NEXT: uaddlp v1.4s, v1.8h
+; NONEON-NOSVE-NEXT: uaddlp v0.2d, v0.4s
+; NONEON-NOSVE-NEXT: uaddlp v1.2d, v1.4s
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op = load <4 x i64>, ptr %a
%res = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %op)
store <4 x i64> %res, ptr %a
@@ -392,6 +662,21 @@ define <4 x i8> @cttz_v4i8(<4 x i8> %op) {
; CHECK-NEXT: clz z0.h, p0/m, z0.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: cttz_v4i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov w8, #256 // =0x100
+; NONEON-NOSVE-NEXT: dup v1.4h, w8
+; NONEON-NOSVE-NEXT: mov w8, #1 // =0x1
+; NONEON-NOSVE-NEXT: dup v2.4h, w8
+; NONEON-NOSVE-NEXT: mov w8, #16 // =0x10
+; NONEON-NOSVE-NEXT: orr v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: sub v1.4h, v0.4h, v2.4h
+; NONEON-NOSVE-NEXT: bic v0.8b, v1.8b, v0.8b
+; NONEON-NOSVE-NEXT: dup v1.4h, w8
+; NONEON-NOSVE-NEXT: clz v0.4h, v0.4h
+; NONEON-NOSVE-NEXT: sub v0.4h, v1.4h, v0.4h
+; NONEON-NOSVE-NEXT: ret
%res = call <4 x i8> @llvm.cttz.v4i8(<4 x i8> %op)
ret <4 x i8> %res
}
@@ -405,6 +690,14 @@ define <8 x i8> @cttz_v8i8(<8 x i8> %op) {
; CHECK-NEXT: clz z0.b, p0/m, z0.b
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: cttz_v8i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: movi v1.8b, #1
+; NONEON-NOSVE-NEXT: sub v1.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: bic v0.8b, v1.8b, v0.8b
+; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b
+; NONEON-NOSVE-NEXT: ret
%res = call <8 x i8> @llvm.cttz.v8i8(<8 x i8> %op)
ret <8 x i8> %res
}
@@ -418,6 +711,14 @@ define <16 x i8> @cttz_v16i8(<16 x i8> %op) {
; CHECK-NEXT: clz z0.b, p0/m, z0.b
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: cttz_v16i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: movi v1.16b, #1
+; NONEON-NOSVE-NEXT: sub v1.16b, v0.16b, v1.16b
+; NONEON-NOSVE-NEXT: bic v0.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: cnt v0.16b, v0.16b
+; NONEON-NOSVE-NEXT: ret
%res = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %op)
ret <16 x i8> %res
}
@@ -433,6 +734,19 @@ define void @cttz_v32i8(ptr %a) {
; CHECK-NEXT: clz z1.b, p0/m, z1.b
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: cttz_v32i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: movi v0.16b, #1
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: sub v3.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: sub v0.16b, v2.16b, v0.16b
+; NONEON-NOSVE-NEXT: bic v1.16b, v3.16b, v1.16b
+; NONEON-NOSVE-NEXT: bic v0.16b, v0.16b, v2.16b
+; NONEON-NOSVE-NEXT: cnt v1.16b, v1.16b
+; NONEON-NOSVE-NEXT: cnt v0.16b, v0.16b
+; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op = load <32 x i8>, ptr %a
%res = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %op)
store <32 x i8> %res, ptr %a
@@ -449,6 +763,21 @@ define <2 x i16> @cttz_v2i16(<2 x i16> %op) {
; CHECK-NEXT: clz z0.s, p0/m, z0.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: cttz_v2i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov w8, #65536 // =0x10000
+; NONEON-NOSVE-NEXT: dup v1.2s, w8
+; NONEON-NOSVE-NEXT: mov w8, #1 // =0x1
+; NONEON-NOSVE-NEXT: dup v2.2s, w8
+; NONEON-NOSVE-NEXT: mov w8, #32 // =0x20
+; NONEON-NOSVE-NEXT: orr v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: sub v1.2s, v0.2s, v2.2s
+; NONEON-NOSVE-NEXT: bic v0.8b, v1.8b, v0.8b
+; NONEON-NOSVE-NEXT: dup v1.2s, w8
+; NONEON-NOSVE-NEXT: clz v0.2s, v0.2s
+; NONEON-NOSVE-NEXT: sub v0.2s, v1.2s, v0.2s
+; NONEON-NOSVE-NEXT: ret
%res = call <2 x i16> @llvm.cttz.v2i16(<2 x i16> %op)
ret <2 x i16> %res
}
@@ -462,6 +791,18 @@ define <4 x i16> @cttz_v4i16(<4 x i16> %op) {
; CHECK-NEXT: clz z0.h, p0/m, z0.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: cttz_v4i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov w8, #1 // =0x1
+; NONEON-NOSVE-NEXT: dup v1.4h, w8
+; NONEON-NOSVE-NEXT: mov w8, #16 // =0x10
+; NONEON-NOSVE-NEXT: sub v1.4h, v0.4h, v1.4h
+; NONEON-NOSVE-NEXT: bic v0.8b, v1.8b, v0.8b
+; NONEON-NOSVE-NEXT: dup v1.4h, w8
+; NONEON-NOSVE-NEXT: clz v0.4h, v0.4h
+; NONEON-NOSVE-NEXT: sub v0.4h, v1.4h, v0.4h
+; NONEON-NOSVE-NEXT: ret
%res = call <4 x i16> @llvm.cttz.v4i16(<4 x i16> %op)
ret <4 x i16> %res
}
@@ -475,6 +816,18 @@ define <8 x i16> @cttz_v8i16(<8 x i16> %op) {
; CHECK-NEXT: clz z0.h, p0/m, z0.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: cttz_v8i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov w8, #1 // =0x1
+; NONEON-NOSVE-NEXT: dup v1.8h, w8
+; NONEON-NOSVE-NEXT: mov w8, #16 // =0x10
+; NONEON-NOSVE-NEXT: sub v1.8h, v0.8h, v1.8h
+; NONEON-NOSVE-NEXT: bic v0.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: dup v1.8h, w8
+; NONEON-NOSVE-NEXT: clz v0.8h, v0.8h
+; NONEON-NOSVE-NEXT: sub v0.8h, v1.8h, v0.8h
+; NONEON-NOSVE-NEXT: ret
%res = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %op)
ret <8 x i16> %res
}
@@ -490,6 +843,24 @@ define void @cttz_v16i16(ptr %a) {
; CHECK-NEXT: clz z1.h, p0/m, z1.h
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: cttz_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov w8, #1 // =0x1
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: dup v0.8h, w8
+; NONEON-NOSVE-NEXT: mov w8, #16 // =0x10
+; NONEON-NOSVE-NEXT: sub v3.8h, v1.8h, v0.8h
+; NONEON-NOSVE-NEXT: sub v0.8h, v2.8h, v0.8h
+; NONEON-NOSVE-NEXT: bic v1.16b, v3.16b, v1.16b
+; NONEON-NOSVE-NEXT: bic v0.16b, v0.16b, v2.16b
+; NONEON-NOSVE-NEXT: dup v2.8h, w8
+; NONEON-NOSVE-NEXT: clz v1.8h, v1.8h
+; NONEON-NOSVE-NEXT: clz v0.8h, v0.8h
+; NONEON-NOSVE-NEXT: sub v1.8h, v2.8h, v1.8h
+; NONEON-NOSVE-NEXT: sub v0.8h, v2.8h, v0.8h
+; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op = load <16 x i16>, ptr %a
%res = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %op)
store <16 x i16> %res, ptr %a
@@ -505,6 +876,18 @@ define <2 x i32> @cttz_v2i32(<2 x i32> %op) {
; CHECK-NEXT: clz z0.s, p0/m, z0.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: cttz_v2i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov w8, #1 // =0x1
+; NONEON-NOSVE-NEXT: dup v1.2s, w8
+; NONEON-NOSVE-NEXT: mov w8, #32 // =0x20
+; NONEON-NOSVE-NEXT: sub v1.2s, v0.2s, v1.2s
+; NONEON-NOSVE-NEXT: bic v0.8b, v1.8b, v0.8b
+; NONEON-NOSVE-NEXT: dup v1.2s, w8
+; NONEON-NOSVE-NEXT: clz v0.2s, v0.2s
+; NONEON-NOSVE-NEXT: sub v0.2s, v1.2s, v0.2s
+; NONEON-NOSVE-NEXT: ret
%res = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %op)
ret <2 x i32> %res
}
@@ -518,6 +901,18 @@ define <4 x i32> @cttz_v4i32(<4 x i32> %op) {
; CHECK-NEXT: clz z0.s, p0/m, z0.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: cttz_v4i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov w8, #1 // =0x1
+; NONEON-NOSVE-NEXT: dup v1.4s, w8
+; NONEON-NOSVE-NEXT: mov w8, #32 // =0x20
+; NONEON-NOSVE-NEXT: sub v1.4s, v0.4s, v1.4s
+; NONEON-NOSVE-NEXT: bic v0.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: dup v1.4s, w8
+; NONEON-NOSVE-NEXT: clz v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: sub v0.4s, v1.4s, v0.4s
+; NONEON-NOSVE-NEXT: ret
%res = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %op)
ret <4 x i32> %res
}
@@ -533,6 +928,24 @@ define void @cttz_v8i32(ptr %a) {
; CHECK-NEXT: clz z1.s, p0/m, z1.s
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: cttz_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov w8, #1 // =0x1
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: dup v0.4s, w8
+; NONEON-NOSVE-NEXT: mov w8, #32 // =0x20
+; NONEON-NOSVE-NEXT: sub v3.4s, v1.4s, v0.4s
+; NONEON-NOSVE-NEXT: sub v0.4s, v2.4s, v0.4s
+; NONEON-NOSVE-NEXT: bic v1.16b, v3.16b, v1.16b
+; NONEON-NOSVE-NEXT: bic v0.16b, v0.16b, v2.16b
+; NONEON-NOSVE-NEXT: dup v2.4s, w8
+; NONEON-NOSVE-NEXT: clz v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: clz v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: sub v1.4s, v2.4s, v1.4s
+; NONEON-NOSVE-NEXT: sub v0.4s, v2.4s, v0.4s
+; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op = load <8 x i32>, ptr %a
%res = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %op)
store <8 x i32> %res, ptr %a
@@ -548,6 +961,18 @@ define <1 x i64> @cttz_v1i64(<1 x i64> %op) {
; CHECK-NEXT: clz z0.d, p0/m, z0.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: cttz_v1i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov w8, #1 // =0x1
+; NONEON-NOSVE-NEXT: fmov d1, x8
+; NONEON-NOSVE-NEXT: sub d1, d0, d1
+; NONEON-NOSVE-NEXT: bic v0.8b, v1.8b, v0.8b
+; NONEON-NOSVE-NEXT: cnt v0.8b, v0.8b
+; NONEON-NOSVE-NEXT: uaddlp v0.4h, v0.8b
+; NONEON-NOSVE-NEXT: uaddlp v0.2s, v0.4h
+; NONEON-NOSVE-NEXT: uaddlp v0.1d, v0.2s
+; NONEON-NOSVE-NEXT: ret
%res = call <1 x i64> @llvm.cttz.v1i64(<1 x i64> %op)
ret <1 x i64> %res
}
@@ -561,6 +986,18 @@ define <2 x i64> @cttz_v2i64(<2 x i64> %op) {
; CHECK-NEXT: clz z0.d, p0/m, z0.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: cttz_v2i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov w8, #1 // =0x1
+; NONEON-NOSVE-NEXT: dup v1.2d, x8
+; NONEON-NOSVE-NEXT: sub v1.2d, v0.2d, v1.2d
+; NONEON-NOSVE-NEXT: bic v0.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: cnt v0.16b, v0.16b
+; NONEON-NOSVE-NEXT: uaddlp v0.8h, v0.16b
+; NONEON-NOSVE-NEXT: uaddlp v0.4s, v0.8h
+; NONEON-NOSVE-NEXT: uaddlp v0.2d, v0.4s
+; NONEON-NOSVE-NEXT: ret
%res = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %op)
ret <2 x i64> %res
}
@@ -576,6 +1013,26 @@ define void @cttz_v4i64(ptr %a) {
; CHECK-NEXT: clz z1.d, p0/m, z1.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: cttz_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov w8, #1 // =0x1
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: dup v0.2d, x8
+; NONEON-NOSVE-NEXT: sub v3.2d, v1.2d, v0.2d
+; NONEON-NOSVE-NEXT: sub v0.2d, v2.2d, v0.2d
+; NONEON-NOSVE-NEXT: bic v1.16b, v3.16b, v1.16b
+; NONEON-NOSVE-NEXT: bic v0.16b, v0.16b, v2.16b
+; NONEON-NOSVE-NEXT: cnt v1.16b, v1.16b
+; NONEON-NOSVE-NEXT: cnt v0.16b, v0.16b
+; NONEON-NOSVE-NEXT: uaddlp v1.8h, v1.16b
+; NONEON-NOSVE-NEXT: uaddlp v0.8h, v0.16b
+; NONEON-NOSVE-NEXT: uaddlp v1.4s, v1.8h
+; NONEON-NOSVE-NEXT: uaddlp v0.4s, v0.8h
+; NONEON-NOSVE-NEXT: uaddlp v1.2d, v1.4s
+; NONEON-NOSVE-NEXT: uaddlp v0.2d, v0.4s
+; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op = load <4 x i64>, ptr %a
%res = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %op)
store <4 x i64> %res, ptr %a
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitcast.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitcast.ll
index 260ad16581f13..7e93ee99ed749 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitcast.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitcast.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
@@ -11,6 +12,12 @@ define void @bitcast_v4i8(ptr %a, ptr %b) {
; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0]
; CHECK-NEXT: st1b { z0.h }, p0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: bitcast_v4i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr w8, [x0]
+; NONEON-NOSVE-NEXT: str w8, [x1]
+; NONEON-NOSVE-NEXT: ret
%load = load volatile <4 x i8>, ptr %a
%cast = bitcast <4 x i8> %load to <4 x i8>
store volatile <4 x i8> %cast, ptr %b
@@ -23,6 +30,12 @@ define void @bitcast_v8i8(ptr %a, ptr %b) {
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: str d0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: bitcast_v8i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr d0, [x0]
+; NONEON-NOSVE-NEXT: str d0, [x1]
+; NONEON-NOSVE-NEXT: ret
%load = load volatile <8 x i8>, ptr %a
%cast = bitcast <8 x i8> %load to <8 x i8>
store volatile <8 x i8> %cast, ptr %b
@@ -35,6 +48,12 @@ define void @bitcast_v16i8(ptr %a, ptr %b) {
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: str q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: bitcast_v16i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: str q0, [x1]
+; NONEON-NOSVE-NEXT: ret
%load = load volatile <16 x i8>, ptr %a
%cast = bitcast <16 x i8> %load to <16 x i8>
store volatile <16 x i8> %cast, ptr %b
@@ -49,6 +68,14 @@ define void @bitcast_v32i8(ptr %a, ptr %b) {
; CHECK-NEXT: str q1, [x1, #16]
; CHECK-NEXT: str q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: bitcast_v32i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: ldr q1, [x0, #16]
+; NONEON-NOSVE-NEXT: str q1, [x1, #16]
+; NONEON-NOSVE-NEXT: str q0, [x1]
+; NONEON-NOSVE-NEXT: ret
%load = load volatile <32 x i8>, ptr %a
%cast = bitcast <32 x i8> %load to <32 x i8>
store volatile <32 x i8> %cast, ptr %b
@@ -72,6 +99,16 @@ define void @bitcast_v2i16(ptr %a, ptr %b) {
; CHECK-NEXT: str w8, [x1]
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: bitcast_v2i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldrh w8, [x0]
+; NONEON-NOSVE-NEXT: fmov s0, w8
+; NONEON-NOSVE-NEXT: add x8, x0, #2
+; NONEON-NOSVE-NEXT: ld1 { v0.h }[2], [x8]
+; NONEON-NOSVE-NEXT: uzp1 v0.4h, v0.4h, v0.4h
+; NONEON-NOSVE-NEXT: str s0, [x1]
+; NONEON-NOSVE-NEXT: ret
%load = load volatile <2 x i16>, ptr %a
%cast = bitcast <2 x i16> %load to <2 x half>
store volatile <2 x half> %cast, ptr %b
@@ -84,6 +121,12 @@ define void @bitcast_v4i16(ptr %a, ptr %b) {
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: str d0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: bitcast_v4i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr d0, [x0]
+; NONEON-NOSVE-NEXT: str d0, [x1]
+; NONEON-NOSVE-NEXT: ret
%load = load volatile <4 x i16>, ptr %a
%cast = bitcast <4 x i16> %load to <4 x half>
store volatile <4 x half> %cast, ptr %b
@@ -96,6 +139,12 @@ define void @bitcast_v8i16(ptr %a, ptr %b) {
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: str q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: bitcast_v8i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: str q0, [x1]
+; NONEON-NOSVE-NEXT: ret
%load = load volatile <8 x i16>, ptr %a
%cast = bitcast <8 x i16> %load to <8 x half>
store volatile <8 x half> %cast, ptr %b
@@ -110,6 +159,14 @@ define void @bitcast_v16i16(ptr %a, ptr %b) {
; CHECK-NEXT: str q1, [x1, #16]
; CHECK-NEXT: str q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: bitcast_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: ldr q1, [x0, #16]
+; NONEON-NOSVE-NEXT: str q1, [x1, #16]
+; NONEON-NOSVE-NEXT: str q0, [x1]
+; NONEON-NOSVE-NEXT: ret
%load = load volatile <16 x i16>, ptr %a
%cast = bitcast <16 x i16> %load to <16 x half>
store volatile <16 x half> %cast, ptr %b
@@ -122,6 +179,12 @@ define void @bitcast_v2i32(ptr %a, ptr %b) {
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: str d0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: bitcast_v2i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr d0, [x0]
+; NONEON-NOSVE-NEXT: str d0, [x1]
+; NONEON-NOSVE-NEXT: ret
%load = load volatile <2 x i32>, ptr %a
%cast = bitcast <2 x i32> %load to <2 x float>
store volatile <2 x float> %cast, ptr %b
@@ -134,6 +197,12 @@ define void @bitcast_v4i32(ptr %a, ptr %b) {
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: str q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: bitcast_v4i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: str q0, [x1]
+; NONEON-NOSVE-NEXT: ret
%load = load volatile <4 x i32>, ptr %a
%cast = bitcast <4 x i32> %load to <4 x float>
store volatile <4 x float> %cast, ptr %b
@@ -148,6 +217,14 @@ define void @bitcast_v8i32(ptr %a, ptr %b) {
; CHECK-NEXT: str q1, [x1, #16]
; CHECK-NEXT: str q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: bitcast_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: ldr q1, [x0, #16]
+; NONEON-NOSVE-NEXT: str q1, [x1, #16]
+; NONEON-NOSVE-NEXT: str q0, [x1]
+; NONEON-NOSVE-NEXT: ret
%load = load volatile <8 x i32>, ptr %a
%cast = bitcast <8 x i32> %load to <8 x float>
store volatile <8 x float> %cast, ptr %b
@@ -160,6 +237,12 @@ define void @bitcast_v1i64(ptr %a, ptr %b) {
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: str d0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: bitcast_v1i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr d0, [x0]
+; NONEON-NOSVE-NEXT: str d0, [x1]
+; NONEON-NOSVE-NEXT: ret
%load = load volatile <1 x i64>, ptr %a
%cast = bitcast <1 x i64> %load to <1 x double>
store volatile <1 x double> %cast, ptr %b
@@ -172,6 +255,12 @@ define void @bitcast_v2i64(ptr %a, ptr %b) {
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: str q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: bitcast_v2i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: str q0, [x1]
+; NONEON-NOSVE-NEXT: ret
%load = load volatile <2 x i64>, ptr %a
%cast = bitcast <2 x i64> %load to <2 x double>
store volatile <2 x double> %cast, ptr %b
@@ -186,6 +275,14 @@ define void @bitcast_v4i64(ptr %a, ptr %b) {
; CHECK-NEXT: str q1, [x1, #16]
; CHECK-NEXT: str q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: bitcast_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: ldr q1, [x0, #16]
+; NONEON-NOSVE-NEXT: str q1, [x1, #16]
+; NONEON-NOSVE-NEXT: str q0, [x1]
+; NONEON-NOSVE-NEXT: ret
%load = load volatile <4 x i64>, ptr %a
%cast = bitcast <4 x i64> %load to <4 x double>
store volatile <4 x double> %cast, ptr %b
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll
index 9a07bd8bd5ac9..6b8077053b590 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64"
@@ -30,6 +31,17 @@ define <8 x i32> @fixed_bitselect_v8i32(ptr %pre_cond_ptr, ptr %left_ptr, ptr %r
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fixed_bitselect_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ldp q3, q2, [x1]
+; NONEON-NOSVE-NEXT: ldp q5, q4, [x2]
+; NONEON-NOSVE-NEXT: neg v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: neg v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: bsl v0.16b, v3.16b, v5.16b
+; NONEON-NOSVE-NEXT: bsl v1.16b, v2.16b, v4.16b
+; NONEON-NOSVE-NEXT: ret
%pre_cond = load <8 x i32>, ptr %pre_cond_ptr
%left = load <8 x i32>, ptr %left_ptr
%right = load <8 x i32>, ptr %right_ptr
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-build-vector.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-build-vector.ll
index aec434b4819d7..318a9cf7d738b 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-build-vector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-build-vector.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
@@ -10,6 +11,12 @@ define void @build_vector_7_inc1_v4i1(ptr %a) {
; CHECK-NEXT: mov w8, #5 // =0x5
; CHECK-NEXT: strb w8, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: build_vector_7_inc1_v4i1:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov w8, #5 // =0x5
+; NONEON-NOSVE-NEXT: strb w8, [x0]
+; NONEON-NOSVE-NEXT: ret
store <4 x i1> <i1 true, i1 false, i1 true, i1 false>, ptr %a, align 1
ret void
}
@@ -23,6 +30,15 @@ define void @build_vector_7_inc1_v32i8(ptr %a) {
; CHECK-NEXT: add z1.b, z1.b, #23 // =0x17
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: build_vector_7_inc1_v32i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: adrp x8, .LCPI1_0
+; NONEON-NOSVE-NEXT: adrp x9, .LCPI1_1
+; NONEON-NOSVE-NEXT: ldr q0, [x8, :lo12:.LCPI1_0]
+; NONEON-NOSVE-NEXT: ldr q1, [x9, :lo12:.LCPI1_1]
+; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
store <32 x i8> <i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31, i8 32, i8 33, i8 34, i8 35, i8 36, i8 37, i8 38>, ptr %a, align 1
ret void
}
@@ -35,6 +51,15 @@ define void @build_vector_0_inc2_v16i16(ptr %a) {
; CHECK-NEXT: add z0.h, z0.h, #16 // =0x10
; CHECK-NEXT: str q0, [x0, #16]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: build_vector_0_inc2_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: adrp x8, .LCPI2_0
+; NONEON-NOSVE-NEXT: adrp x9, .LCPI2_1
+; NONEON-NOSVE-NEXT: ldr q0, [x8, :lo12:.LCPI2_0]
+; NONEON-NOSVE-NEXT: ldr q1, [x9, :lo12:.LCPI2_1]
+; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
store <16 x i16> <i16 0, i16 2, i16 4, i16 6, i16 8, i16 10, i16 12, i16 14, i16 16, i16 18, i16 20, i16 22, i16 24, i16 26, i16 28, i16 30>, ptr %a, align 2
ret void
}
@@ -48,6 +73,15 @@ define void @build_vector_0_dec3_v8i32(ptr %a) {
; CHECK-NEXT: add z1.s, z0.s, z1.s
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: build_vector_0_dec3_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: adrp x8, .LCPI3_0
+; NONEON-NOSVE-NEXT: adrp x9, .LCPI3_1
+; NONEON-NOSVE-NEXT: ldr q0, [x8, :lo12:.LCPI3_0]
+; NONEON-NOSVE-NEXT: ldr q1, [x9, :lo12:.LCPI3_1]
+; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
store <8 x i32> <i32 0, i32 -3, i32 -6, i32 -9, i32 -12, i32 -15, i32 -18, i32 -21>, ptr %a, align 4
ret void
}
@@ -64,6 +98,15 @@ define void @build_vector_minus2_dec32_v4i64(ptr %a) {
; CHECK-NEXT: add z0.d, z0.d, z2.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: build_vector_minus2_dec32_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: adrp x8, .LCPI4_0
+; NONEON-NOSVE-NEXT: adrp x9, .LCPI4_1
+; NONEON-NOSVE-NEXT: ldr q0, [x8, :lo12:.LCPI4_0]
+; NONEON-NOSVE-NEXT: ldr q1, [x9, :lo12:.LCPI4_1]
+; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
store <4 x i64> <i64 -2, i64 -34, i64 -66, i64 -98>, ptr %a, align 8
ret void
}
@@ -76,6 +119,15 @@ define void @build_vector_no_stride_v4i64(ptr %a) {
; CHECK-NEXT: index z1.d, #0, #4
; CHECK-NEXT: stp q1, q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: build_vector_no_stride_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: adrp x8, .LCPI5_0
+; NONEON-NOSVE-NEXT: adrp x9, .LCPI5_1
+; NONEON-NOSVE-NEXT: ldr q0, [x8, :lo12:.LCPI5_0]
+; NONEON-NOSVE-NEXT: ldr q1, [x9, :lo12:.LCPI5_1]
+; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
store <4 x i64> <i64 0, i64 4, i64 1, i64 8>, ptr %a, align 8
ret void
}
@@ -89,6 +141,15 @@ define void @build_vector_0_inc2_v16f16(ptr %a) {
; CHECK-NEXT: ldr q1, [x9, :lo12:.LCPI6_1]
; CHECK-NEXT: stp q1, q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: build_vector_0_inc2_v16f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: adrp x8, .LCPI6_0
+; NONEON-NOSVE-NEXT: adrp x9, .LCPI6_1
+; NONEON-NOSVE-NEXT: ldr q0, [x8, :lo12:.LCPI6_0]
+; NONEON-NOSVE-NEXT: ldr q1, [x9, :lo12:.LCPI6_1]
+; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
store <16 x half> <half 0.0, half 2.0, half 4.0, half 6.0, half 8.0, half 10.0, half 12.0, half 14.0, half 16.0, half 18.0, half 20.0, half 22.0, half 24.0, half 26.0, half 28.0, half 30.0>, ptr %a, align 2
ret void
}
@@ -103,6 +164,15 @@ define void @build_vector_0_dec3_v8f32(ptr %a) {
; CHECK-NEXT: ldr q1, [x9, :lo12:.LCPI7_1]
; CHECK-NEXT: stp q1, q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: build_vector_0_dec3_v8f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: adrp x8, .LCPI7_0
+; NONEON-NOSVE-NEXT: adrp x9, .LCPI7_1
+; NONEON-NOSVE-NEXT: ldr q0, [x8, :lo12:.LCPI7_0]
+; NONEON-NOSVE-NEXT: ldr q1, [x9, :lo12:.LCPI7_1]
+; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
store <8 x float> <float 0.0, float -3.0, float -6.0, float -9.0, float -12.0, float -15.0, float -18.0, float -21.0>, ptr %a, align 4
ret void
}
@@ -117,6 +187,15 @@ define void @build_vector_minus2_dec32_v4f64(ptr %a) {
; CHECK-NEXT: ldr q1, [x9, :lo12:.LCPI8_1]
; CHECK-NEXT: stp q1, q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: build_vector_minus2_dec32_v4f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: adrp x8, .LCPI8_0
+; NONEON-NOSVE-NEXT: adrp x9, .LCPI8_1
+; NONEON-NOSVE-NEXT: ldr q0, [x8, :lo12:.LCPI8_0]
+; NONEON-NOSVE-NEXT: ldr q1, [x9, :lo12:.LCPI8_1]
+; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
store <4 x double> <double -2.0, double -34.0, double -66.0, double -98.0>, ptr %a, align 8
ret void
}
@@ -131,6 +210,15 @@ define void @build_vector_no_stride_v4f64(ptr %a) {
; CHECK-NEXT: ldr q1, [x9, :lo12:.LCPI9_1]
; CHECK-NEXT: stp q1, q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: build_vector_no_stride_v4f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: adrp x8, .LCPI9_0
+; NONEON-NOSVE-NEXT: adrp x9, .LCPI9_1
+; NONEON-NOSVE-NEXT: ldr q0, [x8, :lo12:.LCPI9_0]
+; NONEON-NOSVE-NEXT: ldr q1, [x9, :lo12:.LCPI9_1]
+; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
store <4 x double> <double 0.0, double 4.0, double 1.0, double 8.0>, ptr %a, align 8
ret void
}
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll
index 82e75d6efda35..d2bfc7d4e8096 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
@@ -40,6 +41,11 @@ define <8 x i8> @concat_v8i8(<4 x i8> %op1, <4 x i8> %op2) {
; CHECK-NEXT: ldr d0, [sp, #8]
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: concat_v8i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: uzp1 v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: ret
%res = shufflevector <4 x i8> %op1, <4 x i8> %op2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <8 x i8> %res
}
@@ -53,6 +59,13 @@ define <16 x i8> @concat_v16i8(<8 x i8> %op1, <8 x i8> %op2) {
; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: concat_v16i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1
+; NONEON-NOSVE-NEXT: mov v0.d[1], v1.d[0]
+; NONEON-NOSVE-NEXT: ret
%res = shufflevector <8 x i8> %op1, <8 x i8> %op2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <16 x i8> %res
@@ -65,6 +78,13 @@ define void @concat_v32i8(ptr %a, ptr %b, ptr %c) {
; CHECK-NEXT: ldr q1, [x0]
; CHECK-NEXT: stp q1, q0, [x2]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: concat_v32i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x1]
+; NONEON-NOSVE-NEXT: ldr q1, [x0]
+; NONEON-NOSVE-NEXT: stp q1, q0, [x2]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x i8>, ptr %a
%op2 = load <16 x i8>, ptr %b
%res = shufflevector <16 x i8> %op1, <16 x i8> %op2, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
@@ -83,6 +103,14 @@ define void @concat_v64i8(ptr %a, ptr %b, ptr %c) {
; CHECK-NEXT: stp q0, q1, [x2, #32]
; CHECK-NEXT: stp q3, q2, [x2]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: concat_v64i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x1]
+; NONEON-NOSVE-NEXT: ldp q3, q2, [x0]
+; NONEON-NOSVE-NEXT: stp q0, q1, [x2, #32]
+; NONEON-NOSVE-NEXT: stp q3, q2, [x2]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <32 x i8>, ptr %a
%op2 = load <32 x i8>, ptr %b
%res = shufflevector <32 x i8> %op1, <32 x i8> %op2, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
@@ -121,6 +149,11 @@ define <4 x i16> @concat_v4i16(<2 x i16> %op1, <2 x i16> %op2) {
; CHECK-NEXT: ldr d0, [sp, #8]
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: concat_v4i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: uzp1 v0.4h, v0.4h, v1.4h
+; NONEON-NOSVE-NEXT: ret
%res = shufflevector <2 x i16> %op1, <2 x i16> %op2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x i16> %res
}
@@ -135,6 +168,13 @@ define <8 x i16> @concat_v8i16(<4 x i16> %op1, <4 x i16> %op2) {
; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: concat_v8i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1
+; NONEON-NOSVE-NEXT: mov v0.d[1], v1.d[0]
+; NONEON-NOSVE-NEXT: ret
%res = shufflevector <4 x i16> %op1, <4 x i16> %op2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <8 x i16> %res
}
@@ -146,6 +186,13 @@ define void @concat_v16i16(ptr %a, ptr %b, ptr %c) {
; CHECK-NEXT: ldr q1, [x0]
; CHECK-NEXT: stp q1, q0, [x2]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: concat_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x1]
+; NONEON-NOSVE-NEXT: ldr q1, [x0]
+; NONEON-NOSVE-NEXT: stp q1, q0, [x2]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x i16>, ptr %a
%op2 = load <8 x i16>, ptr %b
%res = shufflevector <8 x i16> %op1, <8 x i16> %op2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
@@ -162,6 +209,14 @@ define void @concat_v32i16(ptr %a, ptr %b, ptr %c) {
; CHECK-NEXT: stp q0, q1, [x2, #32]
; CHECK-NEXT: stp q3, q2, [x2]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: concat_v32i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x1]
+; NONEON-NOSVE-NEXT: ldp q3, q2, [x0]
+; NONEON-NOSVE-NEXT: stp q0, q1, [x2, #32]
+; NONEON-NOSVE-NEXT: stp q3, q2, [x2]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x i16>, ptr %a
%op2 = load <16 x i16>, ptr %b
%res = shufflevector <16 x i16> %op1, <16 x i16> %op2, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
@@ -185,6 +240,11 @@ define <2 x i32> @concat_v2i32(<1 x i32> %op1, <1 x i32> %op2) {
; CHECK-NEXT: zip1 z0.s, z0.s, z1.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: concat_v2i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: zip1 v0.2s, v0.2s, v1.2s
+; NONEON-NOSVE-NEXT: ret
%res = shufflevector <1 x i32> %op1, <1 x i32> %op2, <2 x i32> <i32 0, i32 1>
ret <2 x i32> %res
}
@@ -199,6 +259,13 @@ define <4 x i32> @concat_v4i32(<2 x i32> %op1, <2 x i32> %op2) {
; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: concat_v4i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1
+; NONEON-NOSVE-NEXT: mov v0.d[1], v1.d[0]
+; NONEON-NOSVE-NEXT: ret
%res = shufflevector <2 x i32> %op1, <2 x i32> %op2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x i32> %res
}
@@ -210,6 +277,13 @@ define void @concat_v8i32(ptr %a, ptr %b, ptr %c) {
; CHECK-NEXT: ldr q1, [x0]
; CHECK-NEXT: stp q1, q0, [x2]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: concat_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x1]
+; NONEON-NOSVE-NEXT: ldr q1, [x0]
+; NONEON-NOSVE-NEXT: stp q1, q0, [x2]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x i32>, ptr %a
%op2 = load <4 x i32>, ptr %b
%res = shufflevector <4 x i32> %op1, <4 x i32> %op2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -225,6 +299,14 @@ define void @concat_v16i32(ptr %a, ptr %b, ptr %c) {
; CHECK-NEXT: stp q0, q1, [x2, #32]
; CHECK-NEXT: stp q3, q2, [x2]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: concat_v16i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x1]
+; NONEON-NOSVE-NEXT: ldp q3, q2, [x0]
+; NONEON-NOSVE-NEXT: stp q0, q1, [x2, #32]
+; NONEON-NOSVE-NEXT: stp q3, q2, [x2]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x i32>, ptr %a
%op2 = load <8 x i32>, ptr %b
%res = shufflevector <8 x i32> %op1, <8 x i32> %op2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
@@ -247,6 +329,13 @@ define <2 x i64> @concat_v2i64(<1 x i64> %op1, <1 x i64> %op2) {
; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: concat_v2i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1
+; NONEON-NOSVE-NEXT: mov v0.d[1], v1.d[0]
+; NONEON-NOSVE-NEXT: ret
%res = shufflevector <1 x i64> %op1, <1 x i64> %op2, <2 x i32> <i32 0, i32 1>
ret <2 x i64> %res
}
@@ -258,6 +347,13 @@ define void @concat_v4i64(ptr %a, ptr %b, ptr %c) {
; CHECK-NEXT: ldr q1, [x0]
; CHECK-NEXT: stp q1, q0, [x2]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: concat_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x1]
+; NONEON-NOSVE-NEXT: ldr q1, [x0]
+; NONEON-NOSVE-NEXT: stp q1, q0, [x2]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <2 x i64>, ptr %a
%op2 = load <2 x i64>, ptr %b
%res = shufflevector <2 x i64> %op1, <2 x i64> %op2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -273,6 +369,14 @@ define void @concat_v8i64(ptr %a, ptr %b, ptr %c) {
; CHECK-NEXT: stp q0, q1, [x2, #32]
; CHECK-NEXT: stp q3, q2, [x2]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: concat_v8i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x1]
+; NONEON-NOSVE-NEXT: ldp q3, q2, [x0]
+; NONEON-NOSVE-NEXT: stp q0, q1, [x2, #32]
+; NONEON-NOSVE-NEXT: stp q3, q2, [x2]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x i64>, ptr %a
%op2 = load <4 x i64>, ptr %b
%res = shufflevector <4 x i64> %op1, <4 x i64> %op2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -300,6 +404,11 @@ define <4 x half> @concat_v4f16(<2 x half> %op1, <2 x half> %op2) {
; CHECK-NEXT: ldr d0, [sp, #8]
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: concat_v4f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: zip1 v0.2s, v0.2s, v1.2s
+; NONEON-NOSVE-NEXT: ret
%res = shufflevector <2 x half> %op1, <2 x half> %op2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x half> %res
}
@@ -313,6 +422,13 @@ define <8 x half> @concat_v8f16(<4 x half> %op1, <4 x half> %op2) {
; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: concat_v8f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1
+; NONEON-NOSVE-NEXT: mov v0.d[1], v1.d[0]
+; NONEON-NOSVE-NEXT: ret
%res = shufflevector <4 x half> %op1, <4 x half> %op2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <8 x half> %res
}
@@ -324,6 +440,13 @@ define void @concat_v16f16(ptr %a, ptr %b, ptr %c) {
; CHECK-NEXT: ldr q1, [x0]
; CHECK-NEXT: stp q1, q0, [x2]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: concat_v16f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x1]
+; NONEON-NOSVE-NEXT: ldr q1, [x0]
+; NONEON-NOSVE-NEXT: stp q1, q0, [x2]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x half>, ptr %a
%op2 = load <8 x half>, ptr %b
%res = shufflevector <8 x half> %op1, <8 x half> %op2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
@@ -340,6 +463,14 @@ define void @concat_v32f16(ptr %a, ptr %b, ptr %c) {
; CHECK-NEXT: stp q0, q1, [x2, #32]
; CHECK-NEXT: stp q3, q2, [x2]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: concat_v32f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x1]
+; NONEON-NOSVE-NEXT: ldp q3, q2, [x0]
+; NONEON-NOSVE-NEXT: stp q0, q1, [x2, #32]
+; NONEON-NOSVE-NEXT: stp q3, q2, [x2]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x half>, ptr %a
%op2 = load <16 x half>, ptr %b
%res = shufflevector <16 x half> %op1, <16 x half> %op2, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
@@ -363,6 +494,11 @@ define <2 x float> @concat_v2f32(<1 x float> %op1, <1 x float> %op2) {
; CHECK-NEXT: zip1 z0.s, z0.s, z1.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: concat_v2f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: zip1 v0.2s, v0.2s, v1.2s
+; NONEON-NOSVE-NEXT: ret
%res = shufflevector <1 x float> %op1, <1 x float> %op2, <2 x i32> <i32 0, i32 1>
ret <2 x float> %res
}
@@ -377,6 +513,13 @@ define <4 x float> @concat_v4f32(<2 x float> %op1, <2 x float> %op2) {
; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: concat_v4f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1
+; NONEON-NOSVE-NEXT: mov v0.d[1], v1.d[0]
+; NONEON-NOSVE-NEXT: ret
%res = shufflevector <2 x float> %op1, <2 x float> %op2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x float> %res
}
@@ -388,6 +531,13 @@ define void @concat_v8f32(ptr %a, ptr %b, ptr %c) {
; CHECK-NEXT: ldr q1, [x0]
; CHECK-NEXT: stp q1, q0, [x2]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: concat_v8f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x1]
+; NONEON-NOSVE-NEXT: ldr q1, [x0]
+; NONEON-NOSVE-NEXT: stp q1, q0, [x2]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x float>, ptr %a
%op2 = load <4 x float>, ptr %b
%res = shufflevector <4 x float> %op1, <4 x float> %op2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -403,6 +553,14 @@ define void @concat_v16f32(ptr %a, ptr %b, ptr %c) {
; CHECK-NEXT: stp q0, q1, [x2, #32]
; CHECK-NEXT: stp q3, q2, [x2]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: concat_v16f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x1]
+; NONEON-NOSVE-NEXT: ldp q3, q2, [x0]
+; NONEON-NOSVE-NEXT: stp q0, q1, [x2, #32]
+; NONEON-NOSVE-NEXT: stp q3, q2, [x2]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x float>, ptr %a
%op2 = load <8 x float>, ptr %b
%res = shufflevector <8 x float> %op1, <8 x float> %op2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
@@ -425,6 +583,13 @@ define <2 x double> @concat_v2f64(<1 x double> %op1, <1 x double> %op2) {
; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: concat_v2f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1
+; NONEON-NOSVE-NEXT: mov v0.d[1], v1.d[0]
+; NONEON-NOSVE-NEXT: ret
%res = shufflevector <1 x double> %op1, <1 x double> %op2, <2 x i32> <i32 0, i32 1>
ret <2 x double> %res
}
@@ -436,6 +601,13 @@ define void @concat_v4f64(ptr %a, ptr %b, ptr %c) {
; CHECK-NEXT: ldr q1, [x0]
; CHECK-NEXT: stp q1, q0, [x2]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: concat_v4f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x1]
+; NONEON-NOSVE-NEXT: ldr q1, [x0]
+; NONEON-NOSVE-NEXT: stp q1, q0, [x2]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <2 x double>, ptr %a
%op2 = load <2 x double>, ptr %b
%res = shufflevector <2 x double> %op1, <2 x double> %op2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -451,6 +623,14 @@ define void @concat_v8f64(ptr %a, ptr %b, ptr %c) {
; CHECK-NEXT: stp q0, q1, [x2, #32]
; CHECK-NEXT: stp q3, q2, [x2]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: concat_v8f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x1]
+; NONEON-NOSVE-NEXT: ldp q3, q2, [x0]
+; NONEON-NOSVE-NEXT: stp q0, q1, [x2, #32]
+; NONEON-NOSVE-NEXT: stp q3, q2, [x2]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x double>, ptr %a
%op2 = load <4 x double>, ptr %b
%res = shufflevector <4 x double> %op1, <4 x double> %op2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -468,6 +648,12 @@ define void @concat_v32i8_undef(ptr %a, ptr %b) {
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: str q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: concat_v32i8_undef:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: str q0, [x1]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x i8>, ptr %a
%res = shufflevector <16 x i8> %op1, <16 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15,
@@ -483,6 +669,12 @@ define void @concat_v16i16_undef(ptr %a, ptr %b) {
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: str q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: concat_v16i16_undef:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: str q0, [x1]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x i16>, ptr %a
%res = shufflevector <8 x i16> %op1, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -496,6 +688,12 @@ define void @concat_v8i32_undef(ptr %a, ptr %b) {
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: str q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: concat_v8i32_undef:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: str q0, [x1]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x i32>, ptr %a
%res = shufflevector <4 x i32> %op1, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
store <8 x i32> %res, ptr %b
@@ -508,6 +706,12 @@ define void @concat_v4i64_undef(ptr %a, ptr %b) {
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: str q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: concat_v4i64_undef:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: str q0, [x1]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <2 x i64>, ptr %a
%res = shufflevector <2 x i64> %op1, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
store <4 x i64> %res, ptr %b
@@ -524,6 +728,12 @@ define void @concat_v32i8_4op(ptr %a, ptr %b) {
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: str q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: concat_v32i8_4op:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr d0, [x0]
+; NONEON-NOSVE-NEXT: str q0, [x1]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x i8>, ptr %a
%shuffle = shufflevector <8 x i8> %op1, <8 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -541,6 +751,12 @@ define void @concat_v16i16_4op(ptr %a, ptr %b) {
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: str q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: concat_v16i16_4op:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr d0, [x0]
+; NONEON-NOSVE-NEXT: str q0, [x1]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x i16>, ptr %a
%shuffle = shufflevector <4 x i16> %op1, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%res = shufflevector <8 x i16> %shuffle, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
@@ -555,6 +771,12 @@ define void @concat_v8i32_4op(ptr %a, ptr %b) {
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: str q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: concat_v8i32_4op:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr d0, [x0]
+; NONEON-NOSVE-NEXT: str q0, [x1]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <2 x i32>, ptr %a
%shuffle = shufflevector <2 x i32> %op1, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%res = shufflevector <4 x i32> %shuffle, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -568,6 +790,12 @@ define void @concat_v4i64_4op(ptr %a, ptr %b) {
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: str q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: concat_v4i64_4op:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr d0, [x0]
+; NONEON-NOSVE-NEXT: str q0, [x1]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <1 x i64>, ptr %a
%shuffle = shufflevector <1 x i64> %op1, <1 x i64> undef, <2 x i32> <i32 0, i32 1>
%res = shufflevector <2 x i64> %shuffle, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll
index 040e5861e9810..728b85d39bb37 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
@@ -11,6 +12,12 @@ define <8 x i16> @load_zext_v8i8i16(ptr %ap) {
; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0]
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: load_zext_v8i8i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr d0, [x0]
+; NONEON-NOSVE-NEXT: ushll v0.8h, v0.8b, #0
+; NONEON-NOSVE-NEXT: ret
%a = load <8 x i8>, ptr %ap
%val = zext <8 x i8> %a to <8 x i16>
ret <8 x i16> %val
@@ -23,6 +30,12 @@ define <4 x i32> @load_zext_v4i16i32(ptr %ap) {
; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: load_zext_v4i16i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr d0, [x0]
+; NONEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0
+; NONEON-NOSVE-NEXT: ret
%a = load <4 x i16>, ptr %ap
%val = zext <4 x i16> %a to <4 x i32>
ret <4 x i32> %val
@@ -35,6 +48,12 @@ define <2 x i64> @load_zext_v2i32i64(ptr %ap) {
; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0]
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: load_zext_v2i32i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr d0, [x0]
+; NONEON-NOSVE-NEXT: ushll v0.2d, v0.2s, #0
+; NONEON-NOSVE-NEXT: ret
%a = load <2 x i32>, ptr %ap
%val = zext <2 x i32> %a to <2 x i64>
ret <2 x i64> %val
@@ -54,6 +73,19 @@ define <2 x i256> @load_zext_v2i64i256(ptr %ap) {
; CHECK-NEXT: mov x7, xzr
; CHECK-NEXT: fmov x4, d1
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: load_zext_v2i64i256:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: mov x1, xzr
+; NONEON-NOSVE-NEXT: mov x2, xzr
+; NONEON-NOSVE-NEXT: mov x3, xzr
+; NONEON-NOSVE-NEXT: mov x5, xzr
+; NONEON-NOSVE-NEXT: mov x6, xzr
+; NONEON-NOSVE-NEXT: mov x4, v0.d[1]
+; NONEON-NOSVE-NEXT: fmov x0, d0
+; NONEON-NOSVE-NEXT: mov x7, xzr
+; NONEON-NOSVE-NEXT: ret
%a = load <2 x i64>, ptr %ap
%val = zext <2 x i64> %a to <2 x i256>
ret <2 x i256> %val
@@ -75,6 +107,24 @@ define <16 x i32> @load_sext_v16i8i32(ptr %ap) {
; CHECK-NEXT: // kill: def $q2 killed $q2 killed $z2
; CHECK-NEXT: // kill: def $q3 killed $q3 killed $z3
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: load_sext_v16i8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: sshll v1.8h, v0.8b, #0
+; NONEON-NOSVE-NEXT: str q0, [sp, #-48]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
+; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
+; NONEON-NOSVE-NEXT: sshll v2.8h, v0.8b, #0
+; NONEON-NOSVE-NEXT: sshll v0.4s, v1.4h, #0
+; NONEON-NOSVE-NEXT: stp q2, q1, [sp, #16]
+; NONEON-NOSVE-NEXT: sshll v2.4s, v2.4h, #0
+; NONEON-NOSVE-NEXT: ldr d3, [sp, #40]
+; NONEON-NOSVE-NEXT: ldr d4, [sp, #24]
+; NONEON-NOSVE-NEXT: sshll v1.4s, v3.4h, #0
+; NONEON-NOSVE-NEXT: sshll v3.4s, v4.4h, #0
+; NONEON-NOSVE-NEXT: add sp, sp, #48
+; NONEON-NOSVE-NEXT: ret
%a = load <16 x i8>, ptr %ap
%val = sext <16 x i8> %a to <16 x i32>
ret <16 x i32> %val
@@ -90,6 +140,17 @@ define <8 x i32> @load_sext_v8i16i32(ptr %ap) {
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: load_sext_v8i16i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: sshll v0.4s, v0.4h, #0
+; NONEON-NOSVE-NEXT: sshll v1.4s, v1.4h, #0
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%a = load <8 x i16>, ptr %ap
%val = sext <8 x i16> %a to <8 x i32>
ret <8 x i32> %val
@@ -121,6 +182,39 @@ define <4 x i256> @load_sext_v4i32i256(ptr %ap) {
; CHECK-NEXT: stp x12, x12, [x8, #112]
; CHECK-NEXT: stp x11, x12, [x8, #96]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: load_sext_v4i32i256:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: sshll v0.2d, v0.2s, #0
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: add x10, x8, #32
+; NONEON-NOSVE-NEXT: add x11, x8, #96
+; NONEON-NOSVE-NEXT: sshll v1.2d, v1.2s, #0
+; NONEON-NOSVE-NEXT: mov x9, v0.d[1]
+; NONEON-NOSVE-NEXT: st1 { v0.d }[1], [x10]
+; NONEON-NOSVE-NEXT: fmov x10, d0
+; NONEON-NOSVE-NEXT: st1 { v1.d }[1], [x11]
+; NONEON-NOSVE-NEXT: mov x11, v1.d[1]
+; NONEON-NOSVE-NEXT: asr x10, x10, #63
+; NONEON-NOSVE-NEXT: str d0, [x8]
+; NONEON-NOSVE-NEXT: asr x9, x9, #63
+; NONEON-NOSVE-NEXT: str d1, [x8, #64]
+; NONEON-NOSVE-NEXT: stp x10, x10, [x8, #16]
+; NONEON-NOSVE-NEXT: stp x9, x9, [x8, #48]
+; NONEON-NOSVE-NEXT: str x9, [x8, #40]
+; NONEON-NOSVE-NEXT: fmov x9, d1
+; NONEON-NOSVE-NEXT: str x10, [x8, #8]
+; NONEON-NOSVE-NEXT: asr x10, x11, #63
+; NONEON-NOSVE-NEXT: asr x9, x9, #63
+; NONEON-NOSVE-NEXT: stp x10, x10, [x8, #112]
+; NONEON-NOSVE-NEXT: str x10, [x8, #104]
+; NONEON-NOSVE-NEXT: stp x9, x9, [x8, #80]
+; NONEON-NOSVE-NEXT: str x9, [x8, #72]
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%a = load <4 x i32>, ptr %ap
%val = sext <4 x i32> %a to <4 x i256>
ret <4 x i256> %val
@@ -154,6 +248,22 @@ define <2 x i256> @load_sext_v2i64i256(ptr %ap) {
; CHECK-NEXT: fmov x1, d6
; CHECK-NEXT: fmov x5, d0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: load_sext_v2i64i256:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: mov x8, v0.d[1]
+; NONEON-NOSVE-NEXT: dup v1.2d, v0.d[1]
+; NONEON-NOSVE-NEXT: fmov x0, d0
+; NONEON-NOSVE-NEXT: asr x1, x0, #63
+; NONEON-NOSVE-NEXT: asr x5, x8, #63
+; NONEON-NOSVE-NEXT: mov x2, x1
+; NONEON-NOSVE-NEXT: mov x3, x1
+; NONEON-NOSVE-NEXT: mov v1.d[1], x5
+; NONEON-NOSVE-NEXT: mov x6, x5
+; NONEON-NOSVE-NEXT: mov x7, x5
+; NONEON-NOSVE-NEXT: fmov x4, d1
+; NONEON-NOSVE-NEXT: ret
%a = load <2 x i64>, ptr %ap
%val = sext <2 x i64> %a to <2 x i256>
ret <2 x i256> %val
@@ -187,6 +297,34 @@ define <16 x i64> @load_zext_v16i16i64(ptr %ap) {
; CHECK-NEXT: // kill: def $q6 killed $q6 killed $z6
; CHECK-NEXT: // kill: def $q7 killed $q7 killed $z7
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: load_zext_v16i16i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ushll v2.4s, v0.4h, #0
+; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-96]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
+; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
+; NONEON-NOSVE-NEXT: ushll v3.4s, v1.4h, #0
+; NONEON-NOSVE-NEXT: ushll v1.4s, v0.4h, #0
+; NONEON-NOSVE-NEXT: ldr d0, [sp, #24]
+; NONEON-NOSVE-NEXT: ushll v4.2d, v3.2s, #0
+; NONEON-NOSVE-NEXT: ushll v5.4s, v0.4h, #0
+; NONEON-NOSVE-NEXT: ushll v0.2d, v2.2s, #0
+; NONEON-NOSVE-NEXT: stp q1, q2, [sp, #32]
+; NONEON-NOSVE-NEXT: ushll v2.2d, v1.2s, #0
+; NONEON-NOSVE-NEXT: ldr d6, [sp, #56]
+; NONEON-NOSVE-NEXT: ldr d7, [sp, #40]
+; NONEON-NOSVE-NEXT: stp q5, q3, [sp, #64]
+; NONEON-NOSVE-NEXT: ldr d16, [sp, #88]
+; NONEON-NOSVE-NEXT: ldr d17, [sp, #72]
+; NONEON-NOSVE-NEXT: ushll v1.2d, v6.2s, #0
+; NONEON-NOSVE-NEXT: ushll v3.2d, v7.2s, #0
+; NONEON-NOSVE-NEXT: ushll v6.2d, v5.2s, #0
+; NONEON-NOSVE-NEXT: ushll v5.2d, v16.2s, #0
+; NONEON-NOSVE-NEXT: ushll v7.2d, v17.2s, #0
+; NONEON-NOSVE-NEXT: add sp, sp, #96
+; NONEON-NOSVE-NEXT: ret
%a = load <16 x i16>, ptr %ap
%val = zext <16 x i16> %a to <16 x i64>
ret <16 x i64> %val
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll
index 45a804becbc55..ec6341d6085a0 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
@@ -27,6 +28,11 @@ define <4 x i1> @extract_subvector_v8i1(<8 x i1> %op) {
; CHECK-NEXT: ldr d0, [sp, #8]
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: extract_subvector_v8i1:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: zip2 v0.8b, v0.8b, v0.8b
+; NONEON-NOSVE-NEXT: ret
%ret = call <4 x i1> @llvm.vector.extract.v4i1.v8i1(<8 x i1> %op, i64 4)
ret <4 x i1> %ret
}
@@ -54,6 +60,11 @@ define <4 x i8> @extract_subvector_v8i8(<8 x i8> %op) {
; CHECK-NEXT: ldr d0, [sp, #8]
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: extract_subvector_v8i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: zip2 v0.8b, v0.8b, v0.8b
+; NONEON-NOSVE-NEXT: ret
%ret = call <4 x i8> @llvm.vector.extract.v4i8.v8i8(<8 x i8> %op, i64 4)
ret <4 x i8> %ret
}
@@ -65,6 +76,14 @@ define <8 x i8> @extract_subvector_v16i8(<16 x i8> %op) {
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: extract_subvector_v16i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%ret = call <8 x i8> @llvm.vector.extract.v8i8.v16i8(<16 x i8> %op, i64 8)
ret <8 x i8> %ret
}
@@ -75,6 +94,12 @@ define void @extract_subvector_v32i8(ptr %a, ptr %b) {
; CHECK-NEXT: ldr q0, [x0, #16]
; CHECK-NEXT: str q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: extract_subvector_v32i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0, #16]
+; NONEON-NOSVE-NEXT: str q0, [x1]
+; NONEON-NOSVE-NEXT: ret
%op = load <32 x i8>, ptr %a
%ret = call <16 x i8> @llvm.vector.extract.v16i8.v32i8(<32 x i8> %op, i64 16)
store <16 x i8> %ret, ptr %b
@@ -91,6 +116,15 @@ define <2 x i16> @extract_subvector_v4i16(<4 x i16> %op) {
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: extract_subvector_v4i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%ret = call <2 x i16> @llvm.vector.extract.v2i16.v4i16(<4 x i16> %op, i64 2)
ret <2 x i16> %ret
}
@@ -102,6 +136,14 @@ define <4 x i16> @extract_subvector_v8i16(<8 x i16> %op) {
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: extract_subvector_v8i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%ret = call <4 x i16> @llvm.vector.extract.v4i16.v8i16(<8 x i16> %op, i64 4)
ret <4 x i16> %ret
}
@@ -112,6 +154,12 @@ define void @extract_subvector_v16i16(ptr %a, ptr %b) {
; CHECK-NEXT: ldr q0, [x0, #16]
; CHECK-NEXT: str q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: extract_subvector_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0, #16]
+; NONEON-NOSVE-NEXT: str q0, [x1]
+; NONEON-NOSVE-NEXT: ret
%op = load <16 x i16>, ptr %a
%ret = call <8 x i16> @llvm.vector.extract.v8i16.v16i16(<16 x i16> %op, i64 8)
store <8 x i16> %ret, ptr %b
@@ -127,6 +175,12 @@ define <1 x i32> @extract_subvector_v2i32(<2 x i32> %op) {
; CHECK-NEXT: mov z0.s, z0.s[1]
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: extract_subvector_v2i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; NONEON-NOSVE-NEXT: dup v0.2s, v0.s[1]
+; NONEON-NOSVE-NEXT: ret
%ret = call <1 x i32> @llvm.vector.extract.v1i32.v2i32(<2 x i32> %op, i64 1)
ret <1 x i32> %ret
}
@@ -138,6 +192,14 @@ define <2 x i32> @extract_subvector_v4i32(<4 x i32> %op) {
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: extract_subvector_v4i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%ret = call <2 x i32> @llvm.vector.extract.v2i32.v4i32(<4 x i32> %op, i64 2)
ret <2 x i32> %ret
}
@@ -148,6 +210,12 @@ define void @extract_subvector_v8i32(ptr %a, ptr %b) {
; CHECK-NEXT: ldr q0, [x0, #16]
; CHECK-NEXT: str q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: extract_subvector_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0, #16]
+; NONEON-NOSVE-NEXT: str q0, [x1]
+; NONEON-NOSVE-NEXT: ret
%op = load <8 x i32>, ptr %a
%ret = call <4 x i32> @llvm.vector.extract.v4i32.v8i32(<8 x i32> %op, i64 4)
store <4 x i32> %ret, ptr %b
@@ -163,6 +231,14 @@ define <1 x i64> @extract_subvector_v2i64(<2 x i64> %op) {
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: extract_subvector_v2i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%ret = call <1 x i64> @llvm.vector.extract.v1i64.v2i64(<2 x i64> %op, i64 1)
ret <1 x i64> %ret
}
@@ -173,6 +249,12 @@ define void @extract_subvector_v4i64(ptr %a, ptr %b) {
; CHECK-NEXT: ldr q0, [x0, #16]
; CHECK-NEXT: str q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: extract_subvector_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0, #16]
+; NONEON-NOSVE-NEXT: str q0, [x1]
+; NONEON-NOSVE-NEXT: ret
%op = load <4 x i64>, ptr %a
%ret = call <2 x i64> @llvm.vector.extract.v2i64.v4i64(<4 x i64> %op, i64 2)
store <2 x i64> %ret, ptr %b
@@ -190,6 +272,12 @@ define <2 x half> @extract_subvector_v4f16(<4 x half> %op) {
; CHECK-NEXT: tbl z0.h, { z0.h }, z1.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: extract_subvector_v4f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; NONEON-NOSVE-NEXT: dup v0.2s, v0.s[1]
+; NONEON-NOSVE-NEXT: ret
%ret = call <2 x half> @llvm.vector.extract.v2f16.v4f16(<4 x half> %op, i64 2)
ret <2 x half> %ret
}
@@ -201,6 +289,14 @@ define <4 x half> @extract_subvector_v8f16(<8 x half> %op) {
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: extract_subvector_v8f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%ret = call <4 x half> @llvm.vector.extract.v4f16.v8f16(<8 x half> %op, i64 4)
ret <4 x half> %ret
}
@@ -211,6 +307,12 @@ define void @extract_subvector_v16f16(ptr %a, ptr %b) {
; CHECK-NEXT: ldr q0, [x0, #16]
; CHECK-NEXT: str q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: extract_subvector_v16f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0, #16]
+; NONEON-NOSVE-NEXT: str q0, [x1]
+; NONEON-NOSVE-NEXT: ret
%op = load <16 x half>, ptr %a
%ret = call <8 x half> @llvm.vector.extract.v8f16.v16f16(<16 x half> %op, i64 8)
store <8 x half> %ret, ptr %b
@@ -226,6 +328,12 @@ define <1 x float> @extract_subvector_v2f32(<2 x float> %op) {
; CHECK-NEXT: mov z0.s, z0.s[1]
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: extract_subvector_v2f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; NONEON-NOSVE-NEXT: dup v0.2s, v0.s[1]
+; NONEON-NOSVE-NEXT: ret
%ret = call <1 x float> @llvm.vector.extract.v1f32.v2f32(<2 x float> %op, i64 1)
ret <1 x float> %ret
}
@@ -237,6 +345,14 @@ define <2 x float> @extract_subvector_v4f32(<4 x float> %op) {
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: extract_subvector_v4f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%ret = call <2 x float> @llvm.vector.extract.v2f32.v4f32(<4 x float> %op, i64 2)
ret <2 x float> %ret
}
@@ -247,6 +363,12 @@ define void @extract_subvector_v8f32(ptr %a, ptr %b) {
; CHECK-NEXT: ldr q0, [x0, #16]
; CHECK-NEXT: str q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: extract_subvector_v8f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0, #16]
+; NONEON-NOSVE-NEXT: str q0, [x1]
+; NONEON-NOSVE-NEXT: ret
%op = load <8 x float>, ptr %a
%ret = call <4 x float> @llvm.vector.extract.v4f32.v8f32(<8 x float> %op, i64 4)
store <4 x float> %ret, ptr %b
@@ -262,6 +384,14 @@ define <1 x double> @extract_subvector_v2f64(<2 x double> %op) {
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: extract_subvector_v2f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%ret = call <1 x double> @llvm.vector.extract.v1f64.v2f64(<2 x double> %op, i64 1)
ret <1 x double> %ret
}
@@ -272,6 +402,12 @@ define void @extract_subvector_v4f64(ptr %a, ptr %b) {
; CHECK-NEXT: ldr q0, [x0, #16]
; CHECK-NEXT: str q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: extract_subvector_v4f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0, #16]
+; NONEON-NOSVE-NEXT: str q0, [x1]
+; NONEON-NOSVE-NEXT: ret
%op = load <4 x double>, ptr %a
%ret = call <2 x double> @llvm.vector.extract.v2f64.v4f64(<4 x double> %op, i64 2)
store <2 x double> %ret, ptr %b
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-vector-elt.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-vector-elt.ll
index 9c3b5e14289dc..ac60a614d7ce6 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-vector-elt.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-vector-elt.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
@@ -15,6 +16,12 @@ define half @extractelement_v2f16(<2 x half> %op1) {
; CHECK-NEXT: mov z0.h, z0.h[1]
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: extractelement_v2f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; NONEON-NOSVE-NEXT: mov h0, v0.h[1]
+; NONEON-NOSVE-NEXT: ret
%r = extractelement <2 x half> %op1, i64 1
ret half %r
}
@@ -26,6 +33,12 @@ define half @extractelement_v4f16(<4 x half> %op1) {
; CHECK-NEXT: mov z0.h, z0.h[3]
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: extractelement_v4f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; NONEON-NOSVE-NEXT: mov h0, v0.h[3]
+; NONEON-NOSVE-NEXT: ret
%r = extractelement <4 x half> %op1, i64 3
ret half %r
}
@@ -37,6 +50,11 @@ define half @extractelement_v8f16(<8 x half> %op1) {
; CHECK-NEXT: mov z0.h, z0.h[7]
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: extractelement_v8f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov h0, v0.h[7]
+; NONEON-NOSVE-NEXT: ret
%r = extractelement <8 x half> %op1, i64 7
ret half %r
}
@@ -48,6 +66,11 @@ define half @extractelement_v16f16(ptr %a) {
; CHECK-NEXT: mov z0.h, z0.h[7]
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: extractelement_v16f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr h0, [x0, #30]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x half>, ptr %a
%r = extractelement <16 x half> %op1, i64 15
ret half %r
@@ -60,6 +83,12 @@ define float @extractelement_v2f32(<2 x float> %op1) {
; CHECK-NEXT: mov z0.s, z0.s[1]
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: extractelement_v2f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; NONEON-NOSVE-NEXT: mov s0, v0.s[1]
+; NONEON-NOSVE-NEXT: ret
%r = extractelement <2 x float> %op1, i64 1
ret float %r
}
@@ -71,6 +100,11 @@ define float @extractelement_v4f32(<4 x float> %op1) {
; CHECK-NEXT: mov z0.s, z0.s[3]
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: extractelement_v4f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov s0, v0.s[3]
+; NONEON-NOSVE-NEXT: ret
%r = extractelement <4 x float> %op1, i64 3
ret float %r
}
@@ -82,6 +116,11 @@ define float @extractelement_v8f32(ptr %a) {
; CHECK-NEXT: mov z0.s, z0.s[3]
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: extractelement_v8f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr s0, [x0, #28]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x float>, ptr %a
%r = extractelement <8 x float> %op1, i64 7
ret float %r
@@ -91,6 +130,10 @@ define double @extractelement_v1f64(<1 x double> %op1) {
; CHECK-LABEL: extractelement_v1f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: extractelement_v1f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ret
%r = extractelement <1 x double> %op1, i64 0
ret double %r
}
@@ -101,6 +144,11 @@ define double @extractelement_v2f64(<2 x double> %op1) {
; CHECK-NEXT: mov z0.d, z0.d[1]
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: extractelement_v2f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov d0, v0.d[1]
+; NONEON-NOSVE-NEXT: ret
%r = extractelement <2 x double> %op1, i64 1
ret double %r
}
@@ -112,6 +160,11 @@ define double @extractelement_v4f64(ptr %a) {
; CHECK-NEXT: mov z0.d, z0.d[1]
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: extractelement_v4f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr d0, [x0, #24]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x double>, ptr %a
%r = extractelement <4 x double> %op1, i64 3
ret double %r
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll
index 21ce689f68e23..c1d84f6a15ed8 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll
@@ -2,6 +2,7 @@
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE
; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE2
; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE2
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
@@ -28,6 +29,16 @@ define void @test_copysign_v4f16_v4f16(ptr %ap, ptr %bp) {
; SVE2-NEXT: bsl z1.d, z1.d, z2.d, z0.d
; SVE2-NEXT: str d1, [x0]
; SVE2-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: test_copysign_v4f16_v4f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov w8, #32767 // =0x7fff
+; NONEON-NOSVE-NEXT: ldr d1, [x0]
+; NONEON-NOSVE-NEXT: ldr d2, [x1]
+; NONEON-NOSVE-NEXT: dup v0.4h, w8
+; NONEON-NOSVE-NEXT: bsl v0.8b, v1.8b, v2.8b
+; NONEON-NOSVE-NEXT: str d0, [x0]
+; NONEON-NOSVE-NEXT: ret
%a = load <4 x half>, ptr %ap
%b = load <4 x half>, ptr %bp
%r = call <4 x half> @llvm.copysign.v4f16(<4 x half> %a, <4 x half> %b)
@@ -54,6 +65,16 @@ define void @test_copysign_v8f16_v8f16(ptr %ap, ptr %bp) {
; SVE2-NEXT: bsl z1.d, z1.d, z2.d, z0.d
; SVE2-NEXT: str q1, [x0]
; SVE2-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: test_copysign_v8f16_v8f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov w8, #32767 // =0x7fff
+; NONEON-NOSVE-NEXT: ldr q1, [x0]
+; NONEON-NOSVE-NEXT: ldr q2, [x1]
+; NONEON-NOSVE-NEXT: dup v0.8h, w8
+; NONEON-NOSVE-NEXT: bsl v0.16b, v1.16b, v2.16b
+; NONEON-NOSVE-NEXT: str q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%a = load <8 x half>, ptr %ap
%b = load <8 x half>, ptr %bp
%r = call <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %b)
@@ -84,6 +105,17 @@ define void @test_copysign_v16f16_v16f16(ptr %ap, ptr %bp) {
; SVE2-NEXT: bsl z3.d, z3.d, z4.d, z0.d
; SVE2-NEXT: stp q2, q3, [x0]
; SVE2-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: test_copysign_v16f16_v16f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov w8, #32767 // =0x7fff
+; NONEON-NOSVE-NEXT: ldp q1, q4, [x1]
+; NONEON-NOSVE-NEXT: dup v0.8h, w8
+; NONEON-NOSVE-NEXT: ldp q2, q3, [x0]
+; NONEON-NOSVE-NEXT: bit v1.16b, v2.16b, v0.16b
+; NONEON-NOSVE-NEXT: bsl v0.16b, v3.16b, v4.16b
+; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%a = load <16 x half>, ptr %ap
%b = load <16 x half>, ptr %bp
%r = call <16 x half> @llvm.copysign.v16f16(<16 x half> %a, <16 x half> %b)
@@ -112,6 +144,16 @@ define void @test_copysign_v2f32_v2f32(ptr %ap, ptr %bp) {
; SVE2-NEXT: bsl z1.d, z1.d, z2.d, z0.d
; SVE2-NEXT: str d1, [x0]
; SVE2-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: test_copysign_v2f32_v2f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: movi d0, #0xffffffffffffffff
+; NONEON-NOSVE-NEXT: ldr d1, [x0]
+; NONEON-NOSVE-NEXT: ldr d2, [x1]
+; NONEON-NOSVE-NEXT: fneg v0.2s, v0.2s
+; NONEON-NOSVE-NEXT: bsl v0.8b, v1.8b, v2.8b
+; NONEON-NOSVE-NEXT: str d0, [x0]
+; NONEON-NOSVE-NEXT: ret
%a = load <2 x float>, ptr %ap
%b = load <2 x float>, ptr %bp
%r = call <2 x float> @llvm.copysign.v2f32(<2 x float> %a, <2 x float> %b)
@@ -138,6 +180,16 @@ define void @test_copysign_v4f32_v4f32(ptr %ap, ptr %bp) {
; SVE2-NEXT: bsl z1.d, z1.d, z2.d, z0.d
; SVE2-NEXT: str q1, [x0]
; SVE2-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: test_copysign_v4f32_v4f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: movi v0.2d, #0xffffffffffffffff
+; NONEON-NOSVE-NEXT: ldr q1, [x0]
+; NONEON-NOSVE-NEXT: ldr q2, [x1]
+; NONEON-NOSVE-NEXT: fneg v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: bsl v0.16b, v1.16b, v2.16b
+; NONEON-NOSVE-NEXT: str q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%a = load <4 x float>, ptr %ap
%b = load <4 x float>, ptr %bp
%r = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %b)
@@ -168,6 +220,17 @@ define void @test_copysign_v8f32_v8f32(ptr %ap, ptr %bp) {
; SVE2-NEXT: bsl z3.d, z3.d, z4.d, z0.d
; SVE2-NEXT: stp q2, q3, [x0]
; SVE2-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: test_copysign_v8f32_v8f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: movi v0.2d, #0xffffffffffffffff
+; NONEON-NOSVE-NEXT: ldp q1, q4, [x1]
+; NONEON-NOSVE-NEXT: ldp q2, q3, [x0]
+; NONEON-NOSVE-NEXT: fneg v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: bit v1.16b, v2.16b, v0.16b
+; NONEON-NOSVE-NEXT: bsl v0.16b, v3.16b, v4.16b
+; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%a = load <8 x float>, ptr %ap
%b = load <8 x float>, ptr %bp
%r = call <8 x float> @llvm.copysign.v8f32(<8 x float> %a, <8 x float> %b)
@@ -196,6 +259,16 @@ define void @test_copysign_v2f64_v2f64(ptr %ap, ptr %bp) {
; SVE2-NEXT: bsl z1.d, z1.d, z2.d, z0.d
; SVE2-NEXT: str q1, [x0]
; SVE2-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: test_copysign_v2f64_v2f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: movi v0.2d, #0xffffffffffffffff
+; NONEON-NOSVE-NEXT: ldr q1, [x0]
+; NONEON-NOSVE-NEXT: ldr q2, [x1]
+; NONEON-NOSVE-NEXT: fneg v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: bsl v0.16b, v1.16b, v2.16b
+; NONEON-NOSVE-NEXT: str q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%a = load <2 x double>, ptr %ap
%b = load <2 x double>, ptr %bp
%r = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %b)
@@ -226,6 +299,17 @@ define void @test_copysign_v4f64_v4f64(ptr %ap, ptr %bp) {
; SVE2-NEXT: bsl z3.d, z3.d, z4.d, z0.d
; SVE2-NEXT: stp q2, q3, [x0]
; SVE2-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: test_copysign_v4f64_v4f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: movi v0.2d, #0xffffffffffffffff
+; NONEON-NOSVE-NEXT: ldp q1, q4, [x1]
+; NONEON-NOSVE-NEXT: ldp q2, q3, [x0]
+; NONEON-NOSVE-NEXT: fneg v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: bit v1.16b, v2.16b, v0.16b
+; NONEON-NOSVE-NEXT: bsl v0.16b, v3.16b, v4.16b
+; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%a = load <4 x double>, ptr %ap
%b = load <4 x double>, ptr %bp
%r = call <4 x double> @llvm.copysign.v4f64(<4 x double> %a, <4 x double> %b)
@@ -260,6 +344,17 @@ define void @test_copysign_v2f32_v2f64(ptr %ap, ptr %bp) {
; SVE2-NEXT: bsl z2.d, z2.d, z0.d, z1.d
; SVE2-NEXT: str d2, [x0]
; SVE2-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: test_copysign_v2f32_v2f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: movi d0, #0xffffffffffffffff
+; NONEON-NOSVE-NEXT: ldr q1, [x1]
+; NONEON-NOSVE-NEXT: ldr d2, [x0]
+; NONEON-NOSVE-NEXT: fcvtn v1.2s, v1.2d
+; NONEON-NOSVE-NEXT: fneg v0.2s, v0.2s
+; NONEON-NOSVE-NEXT: bsl v0.8b, v2.8b, v1.8b
+; NONEON-NOSVE-NEXT: str d0, [x0]
+; NONEON-NOSVE-NEXT: ret
%a = load <2 x float>, ptr %ap
%b = load <2 x double>, ptr %bp
%tmp0 = fptrunc <2 x double> %b to <2 x float>
@@ -304,6 +399,18 @@ define void @test_copysign_v4f32_v4f64(ptr %ap, ptr %bp) {
; SVE2-NEXT: bsl z2.d, z2.d, z0.d, z1.d
; SVE2-NEXT: str q2, [x0]
; SVE2-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: test_copysign_v4f32_v4f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x1]
+; NONEON-NOSVE-NEXT: movi v0.2d, #0xffffffffffffffff
+; NONEON-NOSVE-NEXT: fcvtn v1.2s, v1.2d
+; NONEON-NOSVE-NEXT: fneg v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: fcvtn2 v1.4s, v2.2d
+; NONEON-NOSVE-NEXT: ldr q2, [x0]
+; NONEON-NOSVE-NEXT: bsl v0.16b, v2.16b, v1.16b
+; NONEON-NOSVE-NEXT: str q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%a = load <4 x float>, ptr %ap
%b = load <4 x double>, ptr %bp
%tmp0 = fptrunc <4 x double> %b to <4 x float>
@@ -337,6 +444,17 @@ define void @test_copysign_v2f64_v2f32(ptr %ap, ptr %bp) {
; SVE2-NEXT: bsl z2.d, z2.d, z0.d, z1.d
; SVE2-NEXT: str q2, [x0]
; SVE2-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: test_copysign_v2f64_v2f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: movi v0.2d, #0xffffffffffffffff
+; NONEON-NOSVE-NEXT: ldr d1, [x1]
+; NONEON-NOSVE-NEXT: ldr q2, [x0]
+; NONEON-NOSVE-NEXT: fcvtl v1.2d, v1.2s
+; NONEON-NOSVE-NEXT: fneg v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: bsl v0.16b, v2.16b, v1.16b
+; NONEON-NOSVE-NEXT: str q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%a = load <2 x double>, ptr %ap
%b = load < 2 x float>, ptr %bp
%tmp0 = fpext <2 x float> %b to <2 x double>
@@ -381,6 +499,23 @@ define void @test_copysign_v4f64_v4f32(ptr %ap, ptr %bp) {
; SVE2-NEXT: bsl z4.d, z4.d, z1.d, z2.d
; SVE2-NEXT: stp q3, q4, [x0]
; SVE2-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: test_copysign_v4f64_v4f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q1, [x1]
+; NONEON-NOSVE-NEXT: ldp q2, q3, [x0]
+; NONEON-NOSVE-NEXT: movi v0.2d, #0xffffffffffffffff
+; NONEON-NOSVE-NEXT: str q1, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d4, [sp, #8]
+; NONEON-NOSVE-NEXT: fcvtl v1.2d, v1.2s
+; NONEON-NOSVE-NEXT: fneg v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: fcvtl v4.2d, v4.2s
+; NONEON-NOSVE-NEXT: bit v1.16b, v2.16b, v0.16b
+; NONEON-NOSVE-NEXT: bsl v0.16b, v3.16b, v4.16b
+; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%a = load <4 x double>, ptr %ap
%b = load <4 x float>, ptr %bp
%tmp0 = fpext <4 x float> %b to <4 x double>
@@ -416,6 +551,17 @@ define void @test_copysign_v4f16_v4f32(ptr %ap, ptr %bp) {
; SVE2-NEXT: bsl z2.d, z2.d, z0.d, z1.d
; SVE2-NEXT: str d2, [x0]
; SVE2-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: test_copysign_v4f16_v4f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x1]
+; NONEON-NOSVE-NEXT: mov w8, #32767 // =0x7fff
+; NONEON-NOSVE-NEXT: ldr d2, [x0]
+; NONEON-NOSVE-NEXT: dup v1.4h, w8
+; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s
+; NONEON-NOSVE-NEXT: bit v0.8b, v2.8b, v1.8b
+; NONEON-NOSVE-NEXT: str d0, [x0]
+; NONEON-NOSVE-NEXT: ret
%a = load <4 x half>, ptr %ap
%b = load <4 x float>, ptr %bp
%tmp0 = fptrunc <4 x float> %b to <4 x half>
@@ -471,6 +617,25 @@ define void @test_copysign_v4f16_v4f64(ptr %ap, ptr %bp) {
; SVE2-NEXT: str d5, [x0]
; SVE2-NEXT: add sp, sp, #16
; SVE2-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: test_copysign_v4f16_v4f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q2, [x1]
+; NONEON-NOSVE-NEXT: mov w8, #32767 // =0x7fff
+; NONEON-NOSVE-NEXT: mov d1, v0.d[1]
+; NONEON-NOSVE-NEXT: fcvt h0, d0
+; NONEON-NOSVE-NEXT: fcvt h1, d1
+; NONEON-NOSVE-NEXT: mov v0.h[1], v1.h[0]
+; NONEON-NOSVE-NEXT: fcvt h1, d2
+; NONEON-NOSVE-NEXT: mov d2, v2.d[1]
+; NONEON-NOSVE-NEXT: mov v0.h[2], v1.h[0]
+; NONEON-NOSVE-NEXT: fcvt h1, d2
+; NONEON-NOSVE-NEXT: ldr d2, [x0]
+; NONEON-NOSVE-NEXT: mov v0.h[3], v1.h[0]
+; NONEON-NOSVE-NEXT: dup v1.4h, w8
+; NONEON-NOSVE-NEXT: bit v0.8b, v2.8b, v1.8b
+; NONEON-NOSVE-NEXT: str d0, [x0]
+; NONEON-NOSVE-NEXT: ret
%a = load <4 x half>, ptr %ap
%b = load <4 x double>, ptr %bp
%tmp0 = fptrunc <4 x double> %b to <4 x half>
@@ -514,6 +679,18 @@ define void @test_copysign_v8f16_v8f32(ptr %ap, ptr %bp) {
; SVE2-NEXT: bsl z2.d, z2.d, z0.d, z1.d
; SVE2-NEXT: str q2, [x0]
; SVE2-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: test_copysign_v8f16_v8f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x1]
+; NONEON-NOSVE-NEXT: mov w8, #32767 // =0x7fff
+; NONEON-NOSVE-NEXT: ldr q2, [x0]
+; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s
+; NONEON-NOSVE-NEXT: fcvtn2 v0.8h, v1.4s
+; NONEON-NOSVE-NEXT: dup v1.8h, w8
+; NONEON-NOSVE-NEXT: bit v0.16b, v2.16b, v1.16b
+; NONEON-NOSVE-NEXT: str q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%a = load <8 x half>, ptr %ap
%b = load <8 x float>, ptr %bp
%tmp0 = fptrunc <8 x float> %b to <8 x half>
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-arith.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-arith.ll
index b0a82e699939f..b51b89d08844d 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-arith.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-arith.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
@@ -17,6 +18,14 @@ define <2 x half> @fadd_v2f16(<2 x half> %op1, <2 x half> %op2) {
; CHECK-NEXT: fadd z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fadd_v2f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcvtl v1.4s, v1.4h
+; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h
+; NONEON-NOSVE-NEXT: fadd v0.4s, v0.4s, v1.4s
+; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s
+; NONEON-NOSVE-NEXT: ret
%res = fadd <2 x half> %op1, %op2
ret <2 x half> %res
}
@@ -30,6 +39,14 @@ define <4 x half> @fadd_v4f16(<4 x half> %op1, <4 x half> %op2) {
; CHECK-NEXT: fadd z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fadd_v4f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcvtl v1.4s, v1.4h
+; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h
+; NONEON-NOSVE-NEXT: fadd v0.4s, v0.4s, v1.4s
+; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s
+; NONEON-NOSVE-NEXT: ret
%res = fadd <4 x half> %op1, %op2
ret <4 x half> %res
}
@@ -43,6 +60,18 @@ define <8 x half> @fadd_v8f16(<8 x half> %op1, <8 x half> %op2) {
; CHECK-NEXT: fadd z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fadd_v8f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcvtl v2.4s, v1.4h
+; NONEON-NOSVE-NEXT: fcvtl v3.4s, v0.4h
+; NONEON-NOSVE-NEXT: fcvtl2 v1.4s, v1.8h
+; NONEON-NOSVE-NEXT: fcvtl2 v0.4s, v0.8h
+; NONEON-NOSVE-NEXT: fadd v2.4s, v3.4s, v2.4s
+; NONEON-NOSVE-NEXT: fadd v1.4s, v0.4s, v1.4s
+; NONEON-NOSVE-NEXT: fcvtn v0.4h, v2.4s
+; NONEON-NOSVE-NEXT: fcvtn2 v0.8h, v1.4s
+; NONEON-NOSVE-NEXT: ret
%res = fadd <8 x half> %op1, %op2
ret <8 x half> %res
}
@@ -58,6 +87,29 @@ define void @fadd_v16f16(ptr %a, ptr %b) {
; CHECK-NEXT: fadd z1.h, p0/m, z1.h, z3.h
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fadd_v16f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: fcvtl v4.4s, v0.4h
+; NONEON-NOSVE-NEXT: fcvtl v6.4s, v3.4h
+; NONEON-NOSVE-NEXT: fcvtl2 v0.4s, v0.8h
+; NONEON-NOSVE-NEXT: fcvtl v5.4s, v1.4h
+; NONEON-NOSVE-NEXT: fcvtl v7.4s, v2.4h
+; NONEON-NOSVE-NEXT: fcvtl2 v1.4s, v1.8h
+; NONEON-NOSVE-NEXT: fcvtl2 v3.4s, v3.8h
+; NONEON-NOSVE-NEXT: fcvtl2 v2.4s, v2.8h
+; NONEON-NOSVE-NEXT: fadd v4.4s, v5.4s, v4.4s
+; NONEON-NOSVE-NEXT: fadd v5.4s, v7.4s, v6.4s
+; NONEON-NOSVE-NEXT: fadd v0.4s, v1.4s, v0.4s
+; NONEON-NOSVE-NEXT: fadd v2.4s, v2.4s, v3.4s
+; NONEON-NOSVE-NEXT: fcvtn v1.4h, v4.4s
+; NONEON-NOSVE-NEXT: fcvtn v3.4h, v5.4s
+; NONEON-NOSVE-NEXT: fcvtn2 v1.8h, v0.4s
+; NONEON-NOSVE-NEXT: fcvtn2 v3.8h, v2.4s
+; NONEON-NOSVE-NEXT: stp q1, q3, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x half>, ptr %a
%op2 = load <16 x half>, ptr %b
%res = fadd <16 x half> %op1, %op2
@@ -74,6 +126,11 @@ define <2 x float> @fadd_v2f32(<2 x float> %op1, <2 x float> %op2) {
; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fadd_v2f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fadd v0.2s, v0.2s, v1.2s
+; NONEON-NOSVE-NEXT: ret
%res = fadd <2 x float> %op1, %op2
ret <2 x float> %res
}
@@ -87,6 +144,11 @@ define <4 x float> @fadd_v4f32(<4 x float> %op1, <4 x float> %op2) {
; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fadd_v4f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fadd v0.4s, v0.4s, v1.4s
+; NONEON-NOSVE-NEXT: ret
%res = fadd <4 x float> %op1, %op2
ret <4 x float> %res
}
@@ -102,6 +164,15 @@ define void @fadd_v8f32(ptr %a, ptr %b) {
; CHECK-NEXT: fadd z1.s, p0/m, z1.s, z3.s
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fadd_v8f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: fadd v0.4s, v1.4s, v0.4s
+; NONEON-NOSVE-NEXT: fadd v1.4s, v2.4s, v3.4s
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x float>, ptr %a
%op2 = load <8 x float>, ptr %b
%res = fadd <8 x float> %op1, %op2
@@ -118,6 +189,11 @@ define <2 x double> @fadd_v2f64(<2 x double> %op1, <2 x double> %op2) {
; CHECK-NEXT: fadd z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fadd_v2f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fadd v0.2d, v0.2d, v1.2d
+; NONEON-NOSVE-NEXT: ret
%res = fadd <2 x double> %op1, %op2
ret <2 x double> %res
}
@@ -133,6 +209,15 @@ define void @fadd_v4f64(ptr %a, ptr %b) {
; CHECK-NEXT: fadd z1.d, p0/m, z1.d, z3.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fadd_v4f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: fadd v0.2d, v1.2d, v0.2d
+; NONEON-NOSVE-NEXT: fadd v1.2d, v2.2d, v3.2d
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x double>, ptr %a
%op2 = load <4 x double>, ptr %b
%res = fadd <4 x double> %op1, %op2
@@ -153,6 +238,14 @@ define <2 x half> @fdiv_v2f16(<2 x half> %op1, <2 x half> %op2) {
; CHECK-NEXT: fdiv z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fdiv_v2f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcvtl v1.4s, v1.4h
+; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h
+; NONEON-NOSVE-NEXT: fdiv v0.4s, v0.4s, v1.4s
+; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s
+; NONEON-NOSVE-NEXT: ret
%res = fdiv <2 x half> %op1, %op2
ret <2 x half> %res
}
@@ -166,6 +259,14 @@ define <4 x half> @fdiv_v4f16(<4 x half> %op1, <4 x half> %op2) {
; CHECK-NEXT: fdiv z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fdiv_v4f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcvtl v1.4s, v1.4h
+; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h
+; NONEON-NOSVE-NEXT: fdiv v0.4s, v0.4s, v1.4s
+; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s
+; NONEON-NOSVE-NEXT: ret
%res = fdiv <4 x half> %op1, %op2
ret <4 x half> %res
}
@@ -179,6 +280,18 @@ define <8 x half> @fdiv_v8f16(<8 x half> %op1, <8 x half> %op2) {
; CHECK-NEXT: fdiv z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fdiv_v8f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcvtl v2.4s, v1.4h
+; NONEON-NOSVE-NEXT: fcvtl v3.4s, v0.4h
+; NONEON-NOSVE-NEXT: fcvtl2 v1.4s, v1.8h
+; NONEON-NOSVE-NEXT: fcvtl2 v0.4s, v0.8h
+; NONEON-NOSVE-NEXT: fdiv v2.4s, v3.4s, v2.4s
+; NONEON-NOSVE-NEXT: fdiv v1.4s, v0.4s, v1.4s
+; NONEON-NOSVE-NEXT: fcvtn v0.4h, v2.4s
+; NONEON-NOSVE-NEXT: fcvtn2 v0.8h, v1.4s
+; NONEON-NOSVE-NEXT: ret
%res = fdiv <8 x half> %op1, %op2
ret <8 x half> %res
}
@@ -194,6 +307,30 @@ define void @fdiv_v16f16(ptr %a, ptr %b) {
; CHECK-NEXT: fdiv z1.h, p0/m, z1.h, z3.h
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fdiv_v16f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q4, q1, [x1]
+; NONEON-NOSVE-NEXT: ldr q0, [x0, #16]
+; NONEON-NOSVE-NEXT: fcvtl v3.4s, v0.4h
+; NONEON-NOSVE-NEXT: fcvtl2 v0.4s, v0.8h
+; NONEON-NOSVE-NEXT: fcvtl v2.4s, v1.4h
+; NONEON-NOSVE-NEXT: fcvtl2 v5.4s, v4.8h
+; NONEON-NOSVE-NEXT: fcvtl v4.4s, v4.4h
+; NONEON-NOSVE-NEXT: fcvtl2 v1.4s, v1.8h
+; NONEON-NOSVE-NEXT: fdiv v2.4s, v3.4s, v2.4s
+; NONEON-NOSVE-NEXT: ldr q3, [x0]
+; NONEON-NOSVE-NEXT: fcvtl2 v6.4s, v3.8h
+; NONEON-NOSVE-NEXT: fcvtl v3.4s, v3.4h
+; NONEON-NOSVE-NEXT: fdiv v3.4s, v3.4s, v4.4s
+; NONEON-NOSVE-NEXT: fcvtn v2.4h, v2.4s
+; NONEON-NOSVE-NEXT: fdiv v5.4s, v6.4s, v5.4s
+; NONEON-NOSVE-NEXT: fdiv v0.4s, v0.4s, v1.4s
+; NONEON-NOSVE-NEXT: fcvtn v1.4h, v3.4s
+; NONEON-NOSVE-NEXT: fcvtn2 v1.8h, v5.4s
+; NONEON-NOSVE-NEXT: fcvtn2 v2.8h, v0.4s
+; NONEON-NOSVE-NEXT: stp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x half>, ptr %a
%op2 = load <16 x half>, ptr %b
%res = fdiv <16 x half> %op1, %op2
@@ -210,6 +347,11 @@ define <2 x float> @fdiv_v2f32(<2 x float> %op1, <2 x float> %op2) {
; CHECK-NEXT: fdiv z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fdiv_v2f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fdiv v0.2s, v0.2s, v1.2s
+; NONEON-NOSVE-NEXT: ret
%res = fdiv <2 x float> %op1, %op2
ret <2 x float> %res
}
@@ -223,6 +365,11 @@ define <4 x float> @fdiv_v4f32(<4 x float> %op1, <4 x float> %op2) {
; CHECK-NEXT: fdiv z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fdiv_v4f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fdiv v0.4s, v0.4s, v1.4s
+; NONEON-NOSVE-NEXT: ret
%res = fdiv <4 x float> %op1, %op2
ret <4 x float> %res
}
@@ -238,6 +385,15 @@ define void @fdiv_v8f32(ptr %a, ptr %b) {
; CHECK-NEXT: fdiv z1.s, p0/m, z1.s, z3.s
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fdiv_v8f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: fdiv v0.4s, v1.4s, v0.4s
+; NONEON-NOSVE-NEXT: fdiv v1.4s, v2.4s, v3.4s
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x float>, ptr %a
%op2 = load <8 x float>, ptr %b
%res = fdiv <8 x float> %op1, %op2
@@ -254,6 +410,11 @@ define <2 x double> @fdiv_v2f64(<2 x double> %op1, <2 x double> %op2) {
; CHECK-NEXT: fdiv z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fdiv_v2f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fdiv v0.2d, v0.2d, v1.2d
+; NONEON-NOSVE-NEXT: ret
%res = fdiv <2 x double> %op1, %op2
ret <2 x double> %res
}
@@ -269,6 +430,15 @@ define void @fdiv_v4f64(ptr %a, ptr %b) {
; CHECK-NEXT: fdiv z1.d, p0/m, z1.d, z3.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fdiv_v4f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: fdiv v0.2d, v1.2d, v0.2d
+; NONEON-NOSVE-NEXT: fdiv v1.2d, v2.2d, v3.2d
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x double>, ptr %a
%op2 = load <4 x double>, ptr %b
%res = fdiv <4 x double> %op1, %op2
@@ -290,6 +460,46 @@ define <2 x half> @fma_v2f16(<2 x half> %op1, <2 x half> %op2, <2 x half> %op3)
; CHECK-NEXT: fmad z0.h, p0/m, z1.h, z2.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fma_v2f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $d2 killed $d2 def $q2
+; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; NONEON-NOSVE-NEXT: mov h3, v2.h[1]
+; NONEON-NOSVE-NEXT: mov h4, v1.h[1]
+; NONEON-NOSVE-NEXT: mov h5, v0.h[1]
+; NONEON-NOSVE-NEXT: fcvt s6, h2
+; NONEON-NOSVE-NEXT: fcvt s7, h1
+; NONEON-NOSVE-NEXT: fcvt s16, h0
+; NONEON-NOSVE-NEXT: mov h17, v2.h[2]
+; NONEON-NOSVE-NEXT: mov h18, v1.h[2]
+; NONEON-NOSVE-NEXT: mov h19, v0.h[2]
+; NONEON-NOSVE-NEXT: mov h2, v2.h[3]
+; NONEON-NOSVE-NEXT: mov h1, v1.h[3]
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fmadd s6, s16, s7, s6
+; NONEON-NOSVE-NEXT: mov h16, v0.h[3]
+; NONEON-NOSVE-NEXT: fcvt s7, h19
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fmadd s3, s5, s4, s3
+; NONEON-NOSVE-NEXT: fcvt s4, h17
+; NONEON-NOSVE-NEXT: fcvt s5, h18
+; NONEON-NOSVE-NEXT: fcvt h0, s6
+; NONEON-NOSVE-NEXT: fmadd s4, s7, s5, s4
+; NONEON-NOSVE-NEXT: fcvt h3, s3
+; NONEON-NOSVE-NEXT: fcvt s5, h16
+; NONEON-NOSVE-NEXT: mov v0.h[1], v3.h[0]
+; NONEON-NOSVE-NEXT: fcvt h3, s4
+; NONEON-NOSVE-NEXT: fmadd s1, s5, s1, s2
+; NONEON-NOSVE-NEXT: mov v0.h[2], v3.h[0]
+; NONEON-NOSVE-NEXT: fcvt h1, s1
+; NONEON-NOSVE-NEXT: mov v0.h[3], v1.h[0]
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0
+; NONEON-NOSVE-NEXT: ret
%res = call <2 x half> @llvm.fma.v2f16(<2 x half> %op1, <2 x half> %op2, <2 x half> %op3)
ret <2 x half> %res
}
@@ -304,6 +514,46 @@ define <4 x half> @fma_v4f16(<4 x half> %op1, <4 x half> %op2, <4 x half> %op3)
; CHECK-NEXT: fmad z0.h, p0/m, z1.h, z2.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fma_v4f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $d2 killed $d2 def $q2
+; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; NONEON-NOSVE-NEXT: mov h3, v2.h[1]
+; NONEON-NOSVE-NEXT: mov h4, v1.h[1]
+; NONEON-NOSVE-NEXT: mov h5, v0.h[1]
+; NONEON-NOSVE-NEXT: fcvt s6, h2
+; NONEON-NOSVE-NEXT: fcvt s7, h1
+; NONEON-NOSVE-NEXT: fcvt s16, h0
+; NONEON-NOSVE-NEXT: mov h17, v2.h[2]
+; NONEON-NOSVE-NEXT: mov h18, v1.h[2]
+; NONEON-NOSVE-NEXT: mov h19, v0.h[2]
+; NONEON-NOSVE-NEXT: mov h2, v2.h[3]
+; NONEON-NOSVE-NEXT: mov h1, v1.h[3]
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fmadd s6, s16, s7, s6
+; NONEON-NOSVE-NEXT: mov h16, v0.h[3]
+; NONEON-NOSVE-NEXT: fcvt s7, h19
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fmadd s3, s5, s4, s3
+; NONEON-NOSVE-NEXT: fcvt s4, h17
+; NONEON-NOSVE-NEXT: fcvt s5, h18
+; NONEON-NOSVE-NEXT: fcvt h0, s6
+; NONEON-NOSVE-NEXT: fmadd s4, s7, s5, s4
+; NONEON-NOSVE-NEXT: fcvt h3, s3
+; NONEON-NOSVE-NEXT: fcvt s5, h16
+; NONEON-NOSVE-NEXT: mov v0.h[1], v3.h[0]
+; NONEON-NOSVE-NEXT: fcvt h3, s4
+; NONEON-NOSVE-NEXT: fmadd s1, s5, s1, s2
+; NONEON-NOSVE-NEXT: mov v0.h[2], v3.h[0]
+; NONEON-NOSVE-NEXT: fcvt h1, s1
+; NONEON-NOSVE-NEXT: mov v0.h[3], v1.h[0]
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0
+; NONEON-NOSVE-NEXT: ret
%res = call <4 x half> @llvm.fma.v4f16(<4 x half> %op1, <4 x half> %op2, <4 x half> %op3)
ret <4 x half> %res
}
@@ -318,6 +568,79 @@ define <8 x half> @fma_v8f16(<8 x half> %op1, <8 x half> %op2, <8 x half> %op3)
; CHECK-NEXT: fmad z0.h, p0/m, z1.h, z2.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fma_v8f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov h3, v2.h[1]
+; NONEON-NOSVE-NEXT: mov h4, v1.h[1]
+; NONEON-NOSVE-NEXT: mov h5, v0.h[1]
+; NONEON-NOSVE-NEXT: fcvt s6, h2
+; NONEON-NOSVE-NEXT: fcvt s7, h1
+; NONEON-NOSVE-NEXT: fcvt s16, h0
+; NONEON-NOSVE-NEXT: mov h17, v2.h[2]
+; NONEON-NOSVE-NEXT: mov h18, v1.h[2]
+; NONEON-NOSVE-NEXT: mov h19, v0.h[2]
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fmadd s6, s16, s7, s6
+; NONEON-NOSVE-NEXT: fcvt s7, h17
+; NONEON-NOSVE-NEXT: fcvt s16, h18
+; NONEON-NOSVE-NEXT: fcvt s17, h19
+; NONEON-NOSVE-NEXT: mov h18, v1.h[3]
+; NONEON-NOSVE-NEXT: mov h19, v0.h[3]
+; NONEON-NOSVE-NEXT: fmadd s4, s5, s4, s3
+; NONEON-NOSVE-NEXT: mov h5, v2.h[3]
+; NONEON-NOSVE-NEXT: fcvt h3, s6
+; NONEON-NOSVE-NEXT: fmadd s6, s17, s16, s7
+; NONEON-NOSVE-NEXT: mov h17, v2.h[4]
+; NONEON-NOSVE-NEXT: fcvt s7, h18
+; NONEON-NOSVE-NEXT: fcvt s16, h19
+; NONEON-NOSVE-NEXT: mov h18, v1.h[4]
+; NONEON-NOSVE-NEXT: fcvt h4, s4
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: mov h19, v0.h[4]
+; NONEON-NOSVE-NEXT: fcvt h6, s6
+; NONEON-NOSVE-NEXT: fcvt s17, h17
+; NONEON-NOSVE-NEXT: fcvt s18, h18
+; NONEON-NOSVE-NEXT: mov v3.h[1], v4.h[0]
+; NONEON-NOSVE-NEXT: mov h4, v2.h[5]
+; NONEON-NOSVE-NEXT: fmadd s5, s16, s7, s5
+; NONEON-NOSVE-NEXT: mov h7, v1.h[5]
+; NONEON-NOSVE-NEXT: mov h16, v0.h[5]
+; NONEON-NOSVE-NEXT: fcvt s19, h19
+; NONEON-NOSVE-NEXT: mov v3.h[2], v6.h[0]
+; NONEON-NOSVE-NEXT: mov h6, v2.h[6]
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvt s7, h7
+; NONEON-NOSVE-NEXT: fcvt s16, h16
+; NONEON-NOSVE-NEXT: fcvt h5, s5
+; NONEON-NOSVE-NEXT: fmadd s17, s19, s18, s17
+; NONEON-NOSVE-NEXT: mov h18, v1.h[6]
+; NONEON-NOSVE-NEXT: mov h19, v0.h[6]
+; NONEON-NOSVE-NEXT: mov h2, v2.h[7]
+; NONEON-NOSVE-NEXT: mov h1, v1.h[7]
+; NONEON-NOSVE-NEXT: mov h0, v0.h[7]
+; NONEON-NOSVE-NEXT: fmadd s4, s16, s7, s4
+; NONEON-NOSVE-NEXT: mov v3.h[3], v5.h[0]
+; NONEON-NOSVE-NEXT: fcvt s5, h6
+; NONEON-NOSVE-NEXT: fcvt s6, h18
+; NONEON-NOSVE-NEXT: fcvt s7, h19
+; NONEON-NOSVE-NEXT: fcvt h16, s17
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvt h4, s4
+; NONEON-NOSVE-NEXT: fmadd s5, s7, s6, s5
+; NONEON-NOSVE-NEXT: mov v3.h[4], v16.h[0]
+; NONEON-NOSVE-NEXT: fmadd s0, s0, s1, s2
+; NONEON-NOSVE-NEXT: mov v3.h[5], v4.h[0]
+; NONEON-NOSVE-NEXT: fcvt h4, s5
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: mov v3.h[6], v4.h[0]
+; NONEON-NOSVE-NEXT: mov v3.h[7], v0.h[0]
+; NONEON-NOSVE-NEXT: mov v0.16b, v3.16b
+; NONEON-NOSVE-NEXT: ret
%res = call <8 x half> @llvm.fma.v8f16(<8 x half> %op1, <8 x half> %op2, <8 x half> %op3)
ret <8 x half> %res
}
@@ -334,6 +657,150 @@ define void @fma_v16f16(ptr %a, ptr %b, ptr %c) {
; CHECK-NEXT: fmla z1.h, p0/m, z3.h, z4.h
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fma_v16f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q3, q0, [x0]
+; NONEON-NOSVE-NEXT: ldp q4, q1, [x1]
+; NONEON-NOSVE-NEXT: ldp q5, q2, [x2]
+; NONEON-NOSVE-NEXT: mov h25, v0.h[1]
+; NONEON-NOSVE-NEXT: fcvt s19, h0
+; NONEON-NOSVE-NEXT: mov h24, v0.h[2]
+; NONEON-NOSVE-NEXT: mov h17, v1.h[1]
+; NONEON-NOSVE-NEXT: fcvt s18, h1
+; NONEON-NOSVE-NEXT: mov h22, v1.h[2]
+; NONEON-NOSVE-NEXT: mov h16, v2.h[1]
+; NONEON-NOSVE-NEXT: fcvt s6, h2
+; NONEON-NOSVE-NEXT: mov h20, v2.h[2]
+; NONEON-NOSVE-NEXT: mov h26, v5.h[1]
+; NONEON-NOSVE-NEXT: mov h27, v4.h[1]
+; NONEON-NOSVE-NEXT: mov h28, v3.h[1]
+; NONEON-NOSVE-NEXT: fcvt s25, h25
+; NONEON-NOSVE-NEXT: mov h7, v2.h[3]
+; NONEON-NOSVE-NEXT: mov h29, v4.h[2]
+; NONEON-NOSVE-NEXT: fcvt s23, h17
+; NONEON-NOSVE-NEXT: mov h17, v0.h[3]
+; NONEON-NOSVE-NEXT: mov h30, v1.h[4]
+; NONEON-NOSVE-NEXT: fcvt s21, h16
+; NONEON-NOSVE-NEXT: fmadd s6, s19, s18, s6
+; NONEON-NOSVE-NEXT: fcvt s18, h20
+; NONEON-NOSVE-NEXT: fcvt s19, h22
+; NONEON-NOSVE-NEXT: fcvt s20, h24
+; NONEON-NOSVE-NEXT: mov h16, v1.h[3]
+; NONEON-NOSVE-NEXT: fcvt s22, h5
+; NONEON-NOSVE-NEXT: fcvt s24, h4
+; NONEON-NOSVE-NEXT: fcvt s26, h26
+; NONEON-NOSVE-NEXT: fcvt s27, h27
+; NONEON-NOSVE-NEXT: fcvt s28, h28
+; NONEON-NOSVE-NEXT: fcvt s7, h7
+; NONEON-NOSVE-NEXT: fmadd s21, s25, s23, s21
+; NONEON-NOSVE-NEXT: fcvt s23, h3
+; NONEON-NOSVE-NEXT: mov h25, v5.h[2]
+; NONEON-NOSVE-NEXT: fmadd s18, s20, s19, s18
+; NONEON-NOSVE-NEXT: mov h19, v3.h[2]
+; NONEON-NOSVE-NEXT: fcvt h6, s6
+; NONEON-NOSVE-NEXT: fcvt s16, h16
+; NONEON-NOSVE-NEXT: fcvt s17, h17
+; NONEON-NOSVE-NEXT: mov h31, v0.h[4]
+; NONEON-NOSVE-NEXT: fmadd s26, s28, s27, s26
+; NONEON-NOSVE-NEXT: mov h27, v4.h[3]
+; NONEON-NOSVE-NEXT: mov h28, v3.h[3]
+; NONEON-NOSVE-NEXT: fmadd s22, s23, s24, s22
+; NONEON-NOSVE-NEXT: fcvt h20, s21
+; NONEON-NOSVE-NEXT: mov h21, v2.h[4]
+; NONEON-NOSVE-NEXT: fcvt s23, h25
+; NONEON-NOSVE-NEXT: fcvt s24, h29
+; NONEON-NOSVE-NEXT: fcvt s19, h19
+; NONEON-NOSVE-NEXT: fmadd s16, s17, s16, s7
+; NONEON-NOSVE-NEXT: mov h25, v5.h[3]
+; NONEON-NOSVE-NEXT: fcvt h18, s18
+; NONEON-NOSVE-NEXT: fcvt h26, s26
+; NONEON-NOSVE-NEXT: mov h29, v2.h[5]
+; NONEON-NOSVE-NEXT: mov v6.h[1], v20.h[0]
+; NONEON-NOSVE-NEXT: fcvt s17, h21
+; NONEON-NOSVE-NEXT: fcvt s20, h30
+; NONEON-NOSVE-NEXT: fmadd s19, s19, s24, s23
+; NONEON-NOSVE-NEXT: fcvt s21, h31
+; NONEON-NOSVE-NEXT: fcvt h7, s22
+; NONEON-NOSVE-NEXT: fcvt s22, h25
+; NONEON-NOSVE-NEXT: fcvt s23, h27
+; NONEON-NOSVE-NEXT: fcvt s24, h28
+; NONEON-NOSVE-NEXT: mov h25, v5.h[4]
+; NONEON-NOSVE-NEXT: mov h27, v4.h[4]
+; NONEON-NOSVE-NEXT: mov h28, v3.h[4]
+; NONEON-NOSVE-NEXT: mov h30, v1.h[5]
+; NONEON-NOSVE-NEXT: mov h31, v0.h[5]
+; NONEON-NOSVE-NEXT: mov v6.h[2], v18.h[0]
+; NONEON-NOSVE-NEXT: fmadd s17, s21, s20, s17
+; NONEON-NOSVE-NEXT: mov v7.h[1], v26.h[0]
+; NONEON-NOSVE-NEXT: fcvt h18, s19
+; NONEON-NOSVE-NEXT: fmadd s19, s24, s23, s22
+; NONEON-NOSVE-NEXT: mov h26, v5.h[5]
+; NONEON-NOSVE-NEXT: fcvt h16, s16
+; NONEON-NOSVE-NEXT: fcvt s20, h25
+; NONEON-NOSVE-NEXT: fcvt s21, h27
+; NONEON-NOSVE-NEXT: fcvt s22, h28
+; NONEON-NOSVE-NEXT: mov h27, v4.h[5]
+; NONEON-NOSVE-NEXT: mov h28, v3.h[5]
+; NONEON-NOSVE-NEXT: fcvt s23, h29
+; NONEON-NOSVE-NEXT: fcvt s24, h30
+; NONEON-NOSVE-NEXT: fcvt s25, h31
+; NONEON-NOSVE-NEXT: mov h29, v2.h[6]
+; NONEON-NOSVE-NEXT: mov h30, v1.h[6]
+; NONEON-NOSVE-NEXT: mov h31, v0.h[6]
+; NONEON-NOSVE-NEXT: mov v7.h[2], v18.h[0]
+; NONEON-NOSVE-NEXT: fcvt h18, s19
+; NONEON-NOSVE-NEXT: fmadd s19, s22, s21, s20
+; NONEON-NOSVE-NEXT: mov h20, v5.h[6]
+; NONEON-NOSVE-NEXT: mov h21, v4.h[6]
+; NONEON-NOSVE-NEXT: mov h22, v3.h[6]
+; NONEON-NOSVE-NEXT: fcvt s26, h26
+; NONEON-NOSVE-NEXT: fmadd s23, s25, s24, s23
+; NONEON-NOSVE-NEXT: fcvt s27, h27
+; NONEON-NOSVE-NEXT: fcvt s28, h28
+; NONEON-NOSVE-NEXT: mov v6.h[3], v16.h[0]
+; NONEON-NOSVE-NEXT: fcvt h16, s17
+; NONEON-NOSVE-NEXT: fcvt s17, h29
+; NONEON-NOSVE-NEXT: fcvt s24, h30
+; NONEON-NOSVE-NEXT: fcvt s25, h31
+; NONEON-NOSVE-NEXT: fcvt s20, h20
+; NONEON-NOSVE-NEXT: fcvt s21, h21
+; NONEON-NOSVE-NEXT: fcvt s22, h22
+; NONEON-NOSVE-NEXT: mov v7.h[3], v18.h[0]
+; NONEON-NOSVE-NEXT: fmadd s26, s28, s27, s26
+; NONEON-NOSVE-NEXT: fcvt h18, s19
+; NONEON-NOSVE-NEXT: mov h5, v5.h[7]
+; NONEON-NOSVE-NEXT: mov h4, v4.h[7]
+; NONEON-NOSVE-NEXT: mov h3, v3.h[7]
+; NONEON-NOSVE-NEXT: mov h2, v2.h[7]
+; NONEON-NOSVE-NEXT: fmadd s17, s25, s24, s17
+; NONEON-NOSVE-NEXT: mov h1, v1.h[7]
+; NONEON-NOSVE-NEXT: mov h0, v0.h[7]
+; NONEON-NOSVE-NEXT: fmadd s19, s22, s21, s20
+; NONEON-NOSVE-NEXT: mov v6.h[4], v16.h[0]
+; NONEON-NOSVE-NEXT: fcvt h16, s23
+; NONEON-NOSVE-NEXT: mov v7.h[4], v18.h[0]
+; NONEON-NOSVE-NEXT: fcvt h18, s26
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: mov v6.h[5], v16.h[0]
+; NONEON-NOSVE-NEXT: mov v7.h[5], v18.h[0]
+; NONEON-NOSVE-NEXT: fmadd s3, s3, s4, s5
+; NONEON-NOSVE-NEXT: fcvt h4, s19
+; NONEON-NOSVE-NEXT: fcvt h5, s17
+; NONEON-NOSVE-NEXT: fmadd s0, s0, s1, s2
+; NONEON-NOSVE-NEXT: mov v7.h[6], v4.h[0]
+; NONEON-NOSVE-NEXT: fcvt h1, s3
+; NONEON-NOSVE-NEXT: mov v6.h[6], v5.h[0]
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: mov v7.h[7], v1.h[0]
+; NONEON-NOSVE-NEXT: mov v6.h[7], v0.h[0]
+; NONEON-NOSVE-NEXT: stp q7, q6, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x half>, ptr %a
%op2 = load <16 x half>, ptr %b
%op3 = load <16 x half>, ptr %c
@@ -352,6 +819,12 @@ define <2 x float> @fma_v2f32(<2 x float> %op1, <2 x float> %op2, <2 x float> %o
; CHECK-NEXT: fmad z0.s, p0/m, z1.s, z2.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fma_v2f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fmla v2.2s, v1.2s, v0.2s
+; NONEON-NOSVE-NEXT: fmov d0, d2
+; NONEON-NOSVE-NEXT: ret
%res = call <2 x float> @llvm.fma.v2f32(<2 x float> %op1, <2 x float> %op2, <2 x float> %op3)
ret <2 x float> %res
}
@@ -366,6 +839,12 @@ define <4 x float> @fma_v4f32(<4 x float> %op1, <4 x float> %op2, <4 x float> %o
; CHECK-NEXT: fmad z0.s, p0/m, z1.s, z2.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fma_v4f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fmla v2.4s, v1.4s, v0.4s
+; NONEON-NOSVE-NEXT: mov v0.16b, v2.16b
+; NONEON-NOSVE-NEXT: ret
%res = call <4 x float> @llvm.fma.v4f32(<4 x float> %op1, <4 x float> %op2, <4 x float> %op3)
ret <4 x float> %res
}
@@ -382,6 +861,16 @@ define void @fma_v8f32(ptr %a, ptr %b, ptr %c) {
; CHECK-NEXT: fmla z1.s, p0/m, z3.s, z4.s
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fma_v8f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q4, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q5, [x2]
+; NONEON-NOSVE-NEXT: ldp q2, q3, [x0]
+; NONEON-NOSVE-NEXT: fmla v1.4s, v0.4s, v2.4s
+; NONEON-NOSVE-NEXT: fmla v5.4s, v4.4s, v3.4s
+; NONEON-NOSVE-NEXT: stp q1, q5, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x float>, ptr %a
%op2 = load <8 x float>, ptr %b
%op3 = load <8 x float>, ptr %c
@@ -400,6 +889,12 @@ define <2 x double> @fma_v2f64(<2 x double> %op1, <2 x double> %op2, <2 x double
; CHECK-NEXT: fmad z0.d, p0/m, z1.d, z2.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fma_v2f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fmla v2.2d, v1.2d, v0.2d
+; NONEON-NOSVE-NEXT: mov v0.16b, v2.16b
+; NONEON-NOSVE-NEXT: ret
%res = call <2 x double> @llvm.fma.v2f64(<2 x double> %op1, <2 x double> %op2, <2 x double> %op3)
ret <2 x double> %res
}
@@ -416,6 +911,16 @@ define void @fma_v4f64(ptr %a, ptr %b, ptr %c) {
; CHECK-NEXT: fmla z1.d, p0/m, z3.d, z4.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fma_v4f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q4, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q5, [x2]
+; NONEON-NOSVE-NEXT: ldp q2, q3, [x0]
+; NONEON-NOSVE-NEXT: fmla v1.2d, v0.2d, v2.2d
+; NONEON-NOSVE-NEXT: fmla v5.2d, v4.2d, v3.2d
+; NONEON-NOSVE-NEXT: stp q1, q5, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x double>, ptr %a
%op2 = load <4 x double>, ptr %b
%op3 = load <4 x double>, ptr %c
@@ -437,6 +942,14 @@ define <2 x half> @fmul_v2f16(<2 x half> %op1, <2 x half> %op2) {
; CHECK-NEXT: fmul z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fmul_v2f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcvtl v1.4s, v1.4h
+; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h
+; NONEON-NOSVE-NEXT: fmul v0.4s, v0.4s, v1.4s
+; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s
+; NONEON-NOSVE-NEXT: ret
%res = fmul <2 x half> %op1, %op2
ret <2 x half> %res
}
@@ -450,6 +963,14 @@ define <4 x half> @fmul_v4f16(<4 x half> %op1, <4 x half> %op2) {
; CHECK-NEXT: fmul z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fmul_v4f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcvtl v1.4s, v1.4h
+; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h
+; NONEON-NOSVE-NEXT: fmul v0.4s, v0.4s, v1.4s
+; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s
+; NONEON-NOSVE-NEXT: ret
%res = fmul <4 x half> %op1, %op2
ret <4 x half> %res
}
@@ -463,6 +984,18 @@ define <8 x half> @fmul_v8f16(<8 x half> %op1, <8 x half> %op2) {
; CHECK-NEXT: fmul z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fmul_v8f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcvtl v2.4s, v1.4h
+; NONEON-NOSVE-NEXT: fcvtl v3.4s, v0.4h
+; NONEON-NOSVE-NEXT: fcvtl2 v1.4s, v1.8h
+; NONEON-NOSVE-NEXT: fcvtl2 v0.4s, v0.8h
+; NONEON-NOSVE-NEXT: fmul v2.4s, v3.4s, v2.4s
+; NONEON-NOSVE-NEXT: fmul v1.4s, v0.4s, v1.4s
+; NONEON-NOSVE-NEXT: fcvtn v0.4h, v2.4s
+; NONEON-NOSVE-NEXT: fcvtn2 v0.8h, v1.4s
+; NONEON-NOSVE-NEXT: ret
%res = fmul <8 x half> %op1, %op2
ret <8 x half> %res
}
@@ -478,6 +1011,29 @@ define void @fmul_v16f16(ptr %a, ptr %b) {
; CHECK-NEXT: fmul z1.h, p0/m, z1.h, z3.h
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fmul_v16f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: fcvtl v4.4s, v0.4h
+; NONEON-NOSVE-NEXT: fcvtl v6.4s, v3.4h
+; NONEON-NOSVE-NEXT: fcvtl2 v0.4s, v0.8h
+; NONEON-NOSVE-NEXT: fcvtl v5.4s, v1.4h
+; NONEON-NOSVE-NEXT: fcvtl v7.4s, v2.4h
+; NONEON-NOSVE-NEXT: fcvtl2 v1.4s, v1.8h
+; NONEON-NOSVE-NEXT: fcvtl2 v3.4s, v3.8h
+; NONEON-NOSVE-NEXT: fcvtl2 v2.4s, v2.8h
+; NONEON-NOSVE-NEXT: fmul v4.4s, v5.4s, v4.4s
+; NONEON-NOSVE-NEXT: fmul v5.4s, v7.4s, v6.4s
+; NONEON-NOSVE-NEXT: fmul v0.4s, v1.4s, v0.4s
+; NONEON-NOSVE-NEXT: fmul v2.4s, v2.4s, v3.4s
+; NONEON-NOSVE-NEXT: fcvtn v1.4h, v4.4s
+; NONEON-NOSVE-NEXT: fcvtn v3.4h, v5.4s
+; NONEON-NOSVE-NEXT: fcvtn2 v1.8h, v0.4s
+; NONEON-NOSVE-NEXT: fcvtn2 v3.8h, v2.4s
+; NONEON-NOSVE-NEXT: stp q1, q3, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x half>, ptr %a
%op2 = load <16 x half>, ptr %b
%res = fmul <16 x half> %op1, %op2
@@ -494,6 +1050,11 @@ define <2 x float> @fmul_v2f32(<2 x float> %op1, <2 x float> %op2) {
; CHECK-NEXT: fmul z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fmul_v2f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fmul v0.2s, v0.2s, v1.2s
+; NONEON-NOSVE-NEXT: ret
%res = fmul <2 x float> %op1, %op2
ret <2 x float> %res
}
@@ -507,6 +1068,11 @@ define <4 x float> @fmul_v4f32(<4 x float> %op1, <4 x float> %op2) {
; CHECK-NEXT: fmul z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fmul_v4f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fmul v0.4s, v0.4s, v1.4s
+; NONEON-NOSVE-NEXT: ret
%res = fmul <4 x float> %op1, %op2
ret <4 x float> %res
}
@@ -522,6 +1088,15 @@ define void @fmul_v8f32(ptr %a, ptr %b) {
; CHECK-NEXT: fmul z1.s, p0/m, z1.s, z3.s
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fmul_v8f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: fmul v0.4s, v1.4s, v0.4s
+; NONEON-NOSVE-NEXT: fmul v1.4s, v2.4s, v3.4s
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x float>, ptr %a
%op2 = load <8 x float>, ptr %b
%res = fmul <8 x float> %op1, %op2
@@ -538,6 +1113,11 @@ define <2 x double> @fmul_v2f64(<2 x double> %op1, <2 x double> %op2) {
; CHECK-NEXT: fmul z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fmul_v2f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fmul v0.2d, v0.2d, v1.2d
+; NONEON-NOSVE-NEXT: ret
%res = fmul <2 x double> %op1, %op2
ret <2 x double> %res
}
@@ -553,6 +1133,15 @@ define void @fmul_v4f64(ptr %a, ptr %b) {
; CHECK-NEXT: fmul z1.d, p0/m, z1.d, z3.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fmul_v4f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: fmul v0.2d, v1.2d, v0.2d
+; NONEON-NOSVE-NEXT: fmul v1.2d, v2.2d, v3.2d
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x double>, ptr %a
%op2 = load <4 x double>, ptr %b
%res = fmul <4 x double> %op1, %op2
@@ -572,6 +1161,12 @@ define <2 x half> @fneg_v2f16(<2 x half> %op) {
; CHECK-NEXT: fneg z0.h, p0/m, z0.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fneg_v2f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: movi v1.4h, #128, lsl #8
+; NONEON-NOSVE-NEXT: eor v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: ret
%res = fneg <2 x half> %op
ret <2 x half> %res
}
@@ -584,6 +1179,12 @@ define <4 x half> @fneg_v4f16(<4 x half> %op) {
; CHECK-NEXT: fneg z0.h, p0/m, z0.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fneg_v4f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: movi v1.4h, #128, lsl #8
+; NONEON-NOSVE-NEXT: eor v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: ret
%res = fneg <4 x half> %op
ret <4 x half> %res
}
@@ -596,6 +1197,12 @@ define <8 x half> @fneg_v8f16(<8 x half> %op) {
; CHECK-NEXT: fneg z0.h, p0/m, z0.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fneg_v8f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: movi v1.8h, #128, lsl #8
+; NONEON-NOSVE-NEXT: eor v0.16b, v0.16b, v1.16b
+; NONEON-NOSVE-NEXT: ret
%res = fneg <8 x half> %op
ret <8 x half> %res
}
@@ -609,6 +1216,15 @@ define void @fneg_v16f16(ptr %a, ptr %b) {
; CHECK-NEXT: fneg z1.h, p0/m, z1.h
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fneg_v16f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: movi v0.8h, #128, lsl #8
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: eor v1.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: eor v0.16b, v2.16b, v0.16b
+; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op = load <16 x half>, ptr %a
%res = fneg <16 x half> %op
store <16 x half> %res, ptr %a
@@ -623,6 +1239,11 @@ define <2 x float> @fneg_v2f32(<2 x float> %op) {
; CHECK-NEXT: fneg z0.s, p0/m, z0.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fneg_v2f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fneg v0.2s, v0.2s
+; NONEON-NOSVE-NEXT: ret
%res = fneg <2 x float> %op
ret <2 x float> %res
}
@@ -635,6 +1256,11 @@ define <4 x float> @fneg_v4f32(<4 x float> %op) {
; CHECK-NEXT: fneg z0.s, p0/m, z0.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fneg_v4f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fneg v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: ret
%res = fneg <4 x float> %op
ret <4 x float> %res
}
@@ -648,6 +1274,14 @@ define void @fneg_v8f32(ptr %a) {
; CHECK-NEXT: fneg z1.s, p0/m, z1.s
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fneg_v8f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: fneg v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: fneg v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op = load <8 x float>, ptr %a
%res = fneg <8 x float> %op
store <8 x float> %res, ptr %a
@@ -662,6 +1296,11 @@ define <2 x double> @fneg_v2f64(<2 x double> %op) {
; CHECK-NEXT: fneg z0.d, p0/m, z0.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fneg_v2f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fneg v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: ret
%res = fneg <2 x double> %op
ret <2 x double> %res
}
@@ -675,6 +1314,14 @@ define void @fneg_v4f64(ptr %a) {
; CHECK-NEXT: fneg z1.d, p0/m, z1.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fneg_v4f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: fneg v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: fneg v1.2d, v1.2d
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op = load <4 x double>, ptr %a
%res = fneg <4 x double> %op
store <4 x double> %res, ptr %a
@@ -693,6 +1340,30 @@ define <2 x half> @fsqrt_v2f16(<2 x half> %op) {
; CHECK-NEXT: fsqrt z0.h, p0/m, z0.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fsqrt_v2f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; NONEON-NOSVE-NEXT: mov h1, v0.h[1]
+; NONEON-NOSVE-NEXT: fcvt s2, h0
+; NONEON-NOSVE-NEXT: mov h3, v0.h[2]
+; NONEON-NOSVE-NEXT: mov h0, v0.h[3]
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fsqrt s2, s2
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fsqrt s1, s1
+; NONEON-NOSVE-NEXT: fcvt h1, s1
+; NONEON-NOSVE-NEXT: fsqrt s3, s3
+; NONEON-NOSVE-NEXT: fsqrt s4, s0
+; NONEON-NOSVE-NEXT: fcvt h0, s2
+; NONEON-NOSVE-NEXT: mov v0.h[1], v1.h[0]
+; NONEON-NOSVE-NEXT: fcvt h1, s3
+; NONEON-NOSVE-NEXT: mov v0.h[2], v1.h[0]
+; NONEON-NOSVE-NEXT: fcvt h1, s4
+; NONEON-NOSVE-NEXT: mov v0.h[3], v1.h[0]
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0
+; NONEON-NOSVE-NEXT: ret
%res = call <2 x half> @llvm.sqrt.v2f16(<2 x half> %op)
ret <2 x half> %res
}
@@ -705,6 +1376,30 @@ define <4 x half> @fsqrt_v4f16(<4 x half> %op) {
; CHECK-NEXT: fsqrt z0.h, p0/m, z0.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fsqrt_v4f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; NONEON-NOSVE-NEXT: mov h1, v0.h[1]
+; NONEON-NOSVE-NEXT: fcvt s2, h0
+; NONEON-NOSVE-NEXT: mov h3, v0.h[2]
+; NONEON-NOSVE-NEXT: mov h0, v0.h[3]
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fsqrt s2, s2
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fsqrt s1, s1
+; NONEON-NOSVE-NEXT: fcvt h1, s1
+; NONEON-NOSVE-NEXT: fsqrt s3, s3
+; NONEON-NOSVE-NEXT: fsqrt s4, s0
+; NONEON-NOSVE-NEXT: fcvt h0, s2
+; NONEON-NOSVE-NEXT: mov v0.h[1], v1.h[0]
+; NONEON-NOSVE-NEXT: fcvt h1, s3
+; NONEON-NOSVE-NEXT: mov v0.h[2], v1.h[0]
+; NONEON-NOSVE-NEXT: fcvt h1, s4
+; NONEON-NOSVE-NEXT: mov v0.h[3], v1.h[0]
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0
+; NONEON-NOSVE-NEXT: ret
%res = call <4 x half> @llvm.sqrt.v4f16(<4 x half> %op)
ret <4 x half> %res
}
@@ -717,6 +1412,48 @@ define <8 x half> @fsqrt_v8f16(<8 x half> %op) {
; CHECK-NEXT: fsqrt z0.h, p0/m, z0.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fsqrt_v8f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov h1, v0.h[1]
+; NONEON-NOSVE-NEXT: fcvt s2, h0
+; NONEON-NOSVE-NEXT: mov h3, v0.h[2]
+; NONEON-NOSVE-NEXT: mov h4, v0.h[3]
+; NONEON-NOSVE-NEXT: mov h5, v0.h[4]
+; NONEON-NOSVE-NEXT: mov h6, v0.h[5]
+; NONEON-NOSVE-NEXT: mov h7, v0.h[6]
+; NONEON-NOSVE-NEXT: mov h0, v0.h[7]
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fsqrt s2, s2
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: fcvt s7, h7
+; NONEON-NOSVE-NEXT: fcvt s16, h0
+; NONEON-NOSVE-NEXT: fcvt h0, s2
+; NONEON-NOSVE-NEXT: fsqrt s1, s1
+; NONEON-NOSVE-NEXT: fcvt h1, s1
+; NONEON-NOSVE-NEXT: mov v0.h[1], v1.h[0]
+; NONEON-NOSVE-NEXT: fsqrt s3, s3
+; NONEON-NOSVE-NEXT: fcvt h1, s3
+; NONEON-NOSVE-NEXT: mov v0.h[2], v1.h[0]
+; NONEON-NOSVE-NEXT: fsqrt s4, s4
+; NONEON-NOSVE-NEXT: fcvt h1, s4
+; NONEON-NOSVE-NEXT: mov v0.h[3], v1.h[0]
+; NONEON-NOSVE-NEXT: fsqrt s5, s5
+; NONEON-NOSVE-NEXT: fcvt h1, s5
+; NONEON-NOSVE-NEXT: mov v0.h[4], v1.h[0]
+; NONEON-NOSVE-NEXT: fsqrt s6, s6
+; NONEON-NOSVE-NEXT: fcvt h1, s6
+; NONEON-NOSVE-NEXT: mov v0.h[5], v1.h[0]
+; NONEON-NOSVE-NEXT: fsqrt s7, s7
+; NONEON-NOSVE-NEXT: fcvt h1, s7
+; NONEON-NOSVE-NEXT: mov v0.h[6], v1.h[0]
+; NONEON-NOSVE-NEXT: fsqrt s2, s16
+; NONEON-NOSVE-NEXT: fcvt h1, s2
+; NONEON-NOSVE-NEXT: mov v0.h[7], v1.h[0]
+; NONEON-NOSVE-NEXT: ret
%res = call <8 x half> @llvm.sqrt.v8f16(<8 x half> %op)
ret <8 x half> %res
}
@@ -730,6 +1467,89 @@ define void @fsqrt_v16f16(ptr %a, ptr %b) {
; CHECK-NEXT: fsqrt z1.h, p0/m, z1.h
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fsqrt_v16f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q1, [x0, #16]
+; NONEON-NOSVE-NEXT: ldr q16, [x0]
+; NONEON-NOSVE-NEXT: mov h0, v1.h[1]
+; NONEON-NOSVE-NEXT: mov h17, v16.h[1]
+; NONEON-NOSVE-NEXT: fcvt s2, h1
+; NONEON-NOSVE-NEXT: mov h3, v1.h[2]
+; NONEON-NOSVE-NEXT: fcvt s18, h16
+; NONEON-NOSVE-NEXT: mov h19, v16.h[2]
+; NONEON-NOSVE-NEXT: mov h4, v1.h[3]
+; NONEON-NOSVE-NEXT: mov h20, v16.h[3]
+; NONEON-NOSVE-NEXT: mov h5, v1.h[4]
+; NONEON-NOSVE-NEXT: mov h21, v16.h[4]
+; NONEON-NOSVE-NEXT: mov h6, v1.h[5]
+; NONEON-NOSVE-NEXT: mov h22, v16.h[5]
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvt s17, h17
+; NONEON-NOSVE-NEXT: fsqrt s2, s2
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s19, h19
+; NONEON-NOSVE-NEXT: mov h7, v1.h[6]
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvt s20, h20
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcvt s21, h21
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: fcvt s22, h22
+; NONEON-NOSVE-NEXT: mov h23, v16.h[6]
+; NONEON-NOSVE-NEXT: mov h16, v16.h[7]
+; NONEON-NOSVE-NEXT: mov h1, v1.h[7]
+; NONEON-NOSVE-NEXT: fcvt s7, h7
+; NONEON-NOSVE-NEXT: fcvt s23, h23
+; NONEON-NOSVE-NEXT: fcvt s16, h16
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fcvt h2, s2
+; NONEON-NOSVE-NEXT: fsqrt s0, s0
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: mov v2.h[1], v0.h[0]
+; NONEON-NOSVE-NEXT: fsqrt s17, s17
+; NONEON-NOSVE-NEXT: fcvt h17, s17
+; NONEON-NOSVE-NEXT: fsqrt s18, s18
+; NONEON-NOSVE-NEXT: fcvt h18, s18
+; NONEON-NOSVE-NEXT: mov v18.h[1], v17.h[0]
+; NONEON-NOSVE-NEXT: fsqrt s3, s3
+; NONEON-NOSVE-NEXT: fcvt h0, s3
+; NONEON-NOSVE-NEXT: mov v2.h[2], v0.h[0]
+; NONEON-NOSVE-NEXT: fsqrt s19, s19
+; NONEON-NOSVE-NEXT: fcvt h17, s19
+; NONEON-NOSVE-NEXT: mov v18.h[2], v17.h[0]
+; NONEON-NOSVE-NEXT: fsqrt s4, s4
+; NONEON-NOSVE-NEXT: fcvt h0, s4
+; NONEON-NOSVE-NEXT: mov v2.h[3], v0.h[0]
+; NONEON-NOSVE-NEXT: fsqrt s20, s20
+; NONEON-NOSVE-NEXT: fcvt h3, s20
+; NONEON-NOSVE-NEXT: mov v18.h[3], v3.h[0]
+; NONEON-NOSVE-NEXT: fsqrt s5, s5
+; NONEON-NOSVE-NEXT: fcvt h0, s5
+; NONEON-NOSVE-NEXT: mov v2.h[4], v0.h[0]
+; NONEON-NOSVE-NEXT: fsqrt s21, s21
+; NONEON-NOSVE-NEXT: fcvt h3, s21
+; NONEON-NOSVE-NEXT: mov v18.h[4], v3.h[0]
+; NONEON-NOSVE-NEXT: fsqrt s6, s6
+; NONEON-NOSVE-NEXT: fcvt h0, s6
+; NONEON-NOSVE-NEXT: mov v2.h[5], v0.h[0]
+; NONEON-NOSVE-NEXT: fsqrt s22, s22
+; NONEON-NOSVE-NEXT: fcvt h3, s22
+; NONEON-NOSVE-NEXT: mov v18.h[5], v3.h[0]
+; NONEON-NOSVE-NEXT: fsqrt s7, s7
+; NONEON-NOSVE-NEXT: fcvt h0, s7
+; NONEON-NOSVE-NEXT: mov v2.h[6], v0.h[0]
+; NONEON-NOSVE-NEXT: fsqrt s23, s23
+; NONEON-NOSVE-NEXT: fcvt h3, s23
+; NONEON-NOSVE-NEXT: mov v18.h[6], v3.h[0]
+; NONEON-NOSVE-NEXT: fsqrt s16, s16
+; NONEON-NOSVE-NEXT: fcvt h3, s16
+; NONEON-NOSVE-NEXT: mov v18.h[7], v3.h[0]
+; NONEON-NOSVE-NEXT: fsqrt s1, s1
+; NONEON-NOSVE-NEXT: fcvt h0, s1
+; NONEON-NOSVE-NEXT: mov v2.h[7], v0.h[0]
+; NONEON-NOSVE-NEXT: stp q18, q2, [x0]
+; NONEON-NOSVE-NEXT: ret
%op = load <16 x half>, ptr %a
%res = call <16 x half> @llvm.sqrt.v16f16(<16 x half> %op)
store <16 x half> %res, ptr %a
@@ -744,6 +1564,11 @@ define <2 x float> @fsqrt_v2f32(<2 x float> %op) {
; CHECK-NEXT: fsqrt z0.s, p0/m, z0.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fsqrt_v2f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fsqrt v0.2s, v0.2s
+; NONEON-NOSVE-NEXT: ret
%res = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %op)
ret <2 x float> %res
}
@@ -756,6 +1581,11 @@ define <4 x float> @fsqrt_v4f32(<4 x float> %op) {
; CHECK-NEXT: fsqrt z0.s, p0/m, z0.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fsqrt_v4f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fsqrt v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: ret
%res = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %op)
ret <4 x float> %res
}
@@ -769,6 +1599,14 @@ define void @fsqrt_v8f32(ptr %a) {
; CHECK-NEXT: fsqrt z1.s, p0/m, z1.s
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fsqrt_v8f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: fsqrt v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: fsqrt v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op = load <8 x float>, ptr %a
%res = call <8 x float> @llvm.sqrt.v8f32(<8 x float> %op)
store <8 x float> %res, ptr %a
@@ -783,6 +1621,11 @@ define <2 x double> @fsqrt_v2f64(<2 x double> %op) {
; CHECK-NEXT: fsqrt z0.d, p0/m, z0.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fsqrt_v2f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fsqrt v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: ret
%res = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %op)
ret <2 x double> %res
}
@@ -796,6 +1639,14 @@ define void @fsqrt_v4f64(ptr %a) {
; CHECK-NEXT: fsqrt z1.d, p0/m, z1.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fsqrt_v4f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: fsqrt v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: fsqrt v1.2d, v1.2d
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op = load <4 x double>, ptr %a
%res = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %op)
store <4 x double> %res, ptr %a
@@ -815,6 +1666,14 @@ define <2 x half> @fsub_v2f16(<2 x half> %op1, <2 x half> %op2) {
; CHECK-NEXT: fsub z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fsub_v2f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcvtl v1.4s, v1.4h
+; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h
+; NONEON-NOSVE-NEXT: fsub v0.4s, v0.4s, v1.4s
+; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s
+; NONEON-NOSVE-NEXT: ret
%res = fsub <2 x half> %op1, %op2
ret <2 x half> %res
}
@@ -828,6 +1687,14 @@ define <4 x half> @fsub_v4f16(<4 x half> %op1, <4 x half> %op2) {
; CHECK-NEXT: fsub z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fsub_v4f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcvtl v1.4s, v1.4h
+; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h
+; NONEON-NOSVE-NEXT: fsub v0.4s, v0.4s, v1.4s
+; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s
+; NONEON-NOSVE-NEXT: ret
%res = fsub <4 x half> %op1, %op2
ret <4 x half> %res
}
@@ -841,6 +1708,18 @@ define <8 x half> @fsub_v8f16(<8 x half> %op1, <8 x half> %op2) {
; CHECK-NEXT: fsub z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fsub_v8f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcvtl v2.4s, v1.4h
+; NONEON-NOSVE-NEXT: fcvtl v3.4s, v0.4h
+; NONEON-NOSVE-NEXT: fcvtl2 v1.4s, v1.8h
+; NONEON-NOSVE-NEXT: fcvtl2 v0.4s, v0.8h
+; NONEON-NOSVE-NEXT: fsub v2.4s, v3.4s, v2.4s
+; NONEON-NOSVE-NEXT: fsub v1.4s, v0.4s, v1.4s
+; NONEON-NOSVE-NEXT: fcvtn v0.4h, v2.4s
+; NONEON-NOSVE-NEXT: fcvtn2 v0.8h, v1.4s
+; NONEON-NOSVE-NEXT: ret
%res = fsub <8 x half> %op1, %op2
ret <8 x half> %res
}
@@ -856,6 +1735,29 @@ define void @fsub_v16f16(ptr %a, ptr %b) {
; CHECK-NEXT: fsub z1.h, p0/m, z1.h, z3.h
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fsub_v16f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: fcvtl v4.4s, v0.4h
+; NONEON-NOSVE-NEXT: fcvtl v6.4s, v3.4h
+; NONEON-NOSVE-NEXT: fcvtl2 v0.4s, v0.8h
+; NONEON-NOSVE-NEXT: fcvtl v5.4s, v1.4h
+; NONEON-NOSVE-NEXT: fcvtl v7.4s, v2.4h
+; NONEON-NOSVE-NEXT: fcvtl2 v1.4s, v1.8h
+; NONEON-NOSVE-NEXT: fcvtl2 v3.4s, v3.8h
+; NONEON-NOSVE-NEXT: fcvtl2 v2.4s, v2.8h
+; NONEON-NOSVE-NEXT: fsub v4.4s, v5.4s, v4.4s
+; NONEON-NOSVE-NEXT: fsub v5.4s, v7.4s, v6.4s
+; NONEON-NOSVE-NEXT: fsub v0.4s, v1.4s, v0.4s
+; NONEON-NOSVE-NEXT: fsub v2.4s, v2.4s, v3.4s
+; NONEON-NOSVE-NEXT: fcvtn v1.4h, v4.4s
+; NONEON-NOSVE-NEXT: fcvtn v3.4h, v5.4s
+; NONEON-NOSVE-NEXT: fcvtn2 v1.8h, v0.4s
+; NONEON-NOSVE-NEXT: fcvtn2 v3.8h, v2.4s
+; NONEON-NOSVE-NEXT: stp q1, q3, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x half>, ptr %a
%op2 = load <16 x half>, ptr %b
%res = fsub <16 x half> %op1, %op2
@@ -872,6 +1774,11 @@ define <2 x float> @fsub_v2f32(<2 x float> %op1, <2 x float> %op2) {
; CHECK-NEXT: fsub z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fsub_v2f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fsub v0.2s, v0.2s, v1.2s
+; NONEON-NOSVE-NEXT: ret
%res = fsub <2 x float> %op1, %op2
ret <2 x float> %res
}
@@ -885,6 +1792,11 @@ define <4 x float> @fsub_v4f32(<4 x float> %op1, <4 x float> %op2) {
; CHECK-NEXT: fsub z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fsub_v4f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fsub v0.4s, v0.4s, v1.4s
+; NONEON-NOSVE-NEXT: ret
%res = fsub <4 x float> %op1, %op2
ret <4 x float> %res
}
@@ -900,6 +1812,15 @@ define void @fsub_v8f32(ptr %a, ptr %b) {
; CHECK-NEXT: fsub z1.s, p0/m, z1.s, z3.s
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fsub_v8f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: fsub v0.4s, v1.4s, v0.4s
+; NONEON-NOSVE-NEXT: fsub v1.4s, v2.4s, v3.4s
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x float>, ptr %a
%op2 = load <8 x float>, ptr %b
%res = fsub <8 x float> %op1, %op2
@@ -916,6 +1837,11 @@ define <2 x double> @fsub_v2f64(<2 x double> %op1, <2 x double> %op2) {
; CHECK-NEXT: fsub z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fsub_v2f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fsub v0.2d, v0.2d, v1.2d
+; NONEON-NOSVE-NEXT: ret
%res = fsub <2 x double> %op1, %op2
ret <2 x double> %res
}
@@ -931,6 +1857,15 @@ define void @fsub_v4f64(ptr %a, ptr %b) {
; CHECK-NEXT: fsub z1.d, p0/m, z1.d, z3.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fsub_v4f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: fsub v0.2d, v1.2d, v0.2d
+; NONEON-NOSVE-NEXT: fsub v1.2d, v2.2d, v3.2d
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x double>, ptr %a
%op2 = load <4 x double>, ptr %b
%res = fsub <4 x double> %op1, %op2
@@ -950,6 +1885,11 @@ define <2 x half> @fabs_v2f16(<2 x half> %op) {
; CHECK-NEXT: fabs z0.h, p0/m, z0.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fabs_v2f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: bic v0.4h, #128, lsl #8
+; NONEON-NOSVE-NEXT: ret
%res = call <2 x half> @llvm.fabs.v2f16(<2 x half> %op)
ret <2 x half> %res
}
@@ -962,6 +1902,11 @@ define <4 x half> @fabs_v4f16(<4 x half> %op) {
; CHECK-NEXT: fabs z0.h, p0/m, z0.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fabs_v4f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: bic v0.4h, #128, lsl #8
+; NONEON-NOSVE-NEXT: ret
%res = call <4 x half> @llvm.fabs.v4f16(<4 x half> %op)
ret <4 x half> %res
}
@@ -974,6 +1919,11 @@ define <8 x half> @fabs_v8f16(<8 x half> %op) {
; CHECK-NEXT: fabs z0.h, p0/m, z0.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fabs_v8f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: bic v0.8h, #128, lsl #8
+; NONEON-NOSVE-NEXT: ret
%res = call <8 x half> @llvm.fabs.v8f16(<8 x half> %op)
ret <8 x half> %res
}
@@ -987,6 +1937,14 @@ define void @fabs_v16f16(ptr %a) {
; CHECK-NEXT: fabs z1.h, p0/m, z1.h
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fabs_v16f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: bic v0.8h, #128, lsl #8
+; NONEON-NOSVE-NEXT: bic v1.8h, #128, lsl #8
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op = load <16 x half>, ptr %a
%res = call <16 x half> @llvm.fabs.v16f16(<16 x half> %op)
store <16 x half> %res, ptr %a
@@ -1001,6 +1959,11 @@ define <2 x float> @fabs_v2f32(<2 x float> %op) {
; CHECK-NEXT: fabs z0.s, p0/m, z0.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fabs_v2f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fabs v0.2s, v0.2s
+; NONEON-NOSVE-NEXT: ret
%res = call <2 x float> @llvm.fabs.v2f32(<2 x float> %op)
ret <2 x float> %res
}
@@ -1013,6 +1976,11 @@ define <4 x float> @fabs_v4f32(<4 x float> %op) {
; CHECK-NEXT: fabs z0.s, p0/m, z0.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fabs_v4f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fabs v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: ret
%res = call <4 x float> @llvm.fabs.v4f32(<4 x float> %op)
ret <4 x float> %res
}
@@ -1026,6 +1994,14 @@ define void @fabs_v8f32(ptr %a) {
; CHECK-NEXT: fabs z1.s, p0/m, z1.s
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fabs_v8f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: fabs v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: fabs v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op = load <8 x float>, ptr %a
%res = call <8 x float> @llvm.fabs.v8f32(<8 x float> %op)
store <8 x float> %res, ptr %a
@@ -1040,6 +2016,11 @@ define <2 x double> @fabs_v2f64(<2 x double> %op) {
; CHECK-NEXT: fabs z0.d, p0/m, z0.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fabs_v2f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fabs v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: ret
%res = call <2 x double> @llvm.fabs.v2f64(<2 x double> %op)
ret <2 x double> %res
}
@@ -1053,6 +2034,14 @@ define void @fabs_v4f64(ptr %a) {
; CHECK-NEXT: fabs z1.d, p0/m, z1.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fabs_v4f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: fabs v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: fabs v1.2d, v1.2d
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op = load <4 x double>, ptr %a
%res = call <4 x double> @llvm.fabs.v4f64(<4 x double> %op)
store <4 x double> %res, ptr %a
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-compares.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-compares.ll
index cbd0ad66fba76..c5ed70c8a5f2f 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-compares.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-compares.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
@@ -19,6 +20,14 @@ define <2 x i16> @fcmp_oeq_v2f16(<2 x half> %op1, <2 x half> %op2) {
; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcmp_oeq_v2f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcvtl v1.4s, v1.4h
+; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h
+; NONEON-NOSVE-NEXT: fcmeq v0.4s, v0.4s, v1.4s
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0
+; NONEON-NOSVE-NEXT: ret
%cmp = fcmp oeq <2 x half> %op1, %op2
%sext = sext <2 x i1> %cmp to <2 x i16>
ret <2 x i16> %sext
@@ -34,6 +43,14 @@ define <4 x i16> @fcmp_oeq_v4f16(<4 x half> %op1, <4 x half> %op2) {
; CHECK-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcmp_oeq_v4f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcvtl v1.4s, v1.4h
+; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h
+; NONEON-NOSVE-NEXT: fcmeq v0.4s, v0.4s, v1.4s
+; NONEON-NOSVE-NEXT: xtn v0.4h, v0.4s
+; NONEON-NOSVE-NEXT: ret
%cmp = fcmp oeq <4 x half> %op1, %op2
%sext = sext <4 x i1> %cmp to <4 x i16>
ret <4 x i16> %sext
@@ -49,6 +66,65 @@ define <8 x i16> @fcmp_oeq_v8f16(<8 x half> %op1, <8 x half> %op2) {
; CHECK-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcmp_oeq_v8f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov h2, v1.h[1]
+; NONEON-NOSVE-NEXT: mov h3, v0.h[1]
+; NONEON-NOSVE-NEXT: fcvt s4, h1
+; NONEON-NOSVE-NEXT: fcvt s5, h0
+; NONEON-NOSVE-NEXT: mov h6, v1.h[2]
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcmp s3, s2
+; NONEON-NOSVE-NEXT: mov h2, v0.h[2]
+; NONEON-NOSVE-NEXT: mov h3, v1.h[3]
+; NONEON-NOSVE-NEXT: csetm w8, eq
+; NONEON-NOSVE-NEXT: fcmp s5, s4
+; NONEON-NOSVE-NEXT: fcvt s5, h6
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: mov h4, v0.h[3]
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: mov h6, v0.h[4]
+; NONEON-NOSVE-NEXT: csetm w9, eq
+; NONEON-NOSVE-NEXT: fcmp s2, s5
+; NONEON-NOSVE-NEXT: fmov s2, w9
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: mov h5, v1.h[4]
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: mov v2.h[1], w8
+; NONEON-NOSVE-NEXT: csetm w8, eq
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: mov h3, v1.h[5]
+; NONEON-NOSVE-NEXT: mov h4, v0.h[5]
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: mov v2.h[2], w8
+; NONEON-NOSVE-NEXT: csetm w8, eq
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcmp s6, s5
+; NONEON-NOSVE-NEXT: mov h5, v1.h[6]
+; NONEON-NOSVE-NEXT: mov h6, v0.h[6]
+; NONEON-NOSVE-NEXT: mov h1, v1.h[7]
+; NONEON-NOSVE-NEXT: mov h0, v0.h[7]
+; NONEON-NOSVE-NEXT: mov v2.h[3], w8
+; NONEON-NOSVE-NEXT: csetm w8, eq
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: fcvt s3, h5
+; NONEON-NOSVE-NEXT: fcvt s4, h6
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: mov v2.h[4], w8
+; NONEON-NOSVE-NEXT: csetm w8, eq
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: mov v2.h[5], w8
+; NONEON-NOSVE-NEXT: csetm w8, eq
+; NONEON-NOSVE-NEXT: fcmp s0, s1
+; NONEON-NOSVE-NEXT: mov v2.h[6], w8
+; NONEON-NOSVE-NEXT: csetm w8, eq
+; NONEON-NOSVE-NEXT: mov v2.h[7], w8
+; NONEON-NOSVE-NEXT: mov v0.16b, v2.16b
+; NONEON-NOSVE-NEXT: ret
%cmp = fcmp oeq <8 x half> %op1, %op2
%sext = sext <8 x i1> %cmp to <8 x i16>
ret <8 x i16> %sext
@@ -66,6 +142,123 @@ define void @fcmp_oeq_v16f16(ptr %a, ptr %b, ptr %c) {
; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: stp q0, q1, [x2]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcmp_oeq_v16f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q1, [x0, #16]
+; NONEON-NOSVE-NEXT: ldr q2, [x1, #16]
+; NONEON-NOSVE-NEXT: mov h0, v2.h[1]
+; NONEON-NOSVE-NEXT: mov h3, v1.h[1]
+; NONEON-NOSVE-NEXT: mov h4, v2.h[2]
+; NONEON-NOSVE-NEXT: mov h5, v1.h[2]
+; NONEON-NOSVE-NEXT: fcvt s6, h2
+; NONEON-NOSVE-NEXT: fcvt s7, h1
+; NONEON-NOSVE-NEXT: mov h16, v1.h[6]
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcmp s3, s0
+; NONEON-NOSVE-NEXT: mov h0, v2.h[3]
+; NONEON-NOSVE-NEXT: mov h3, v1.h[3]
+; NONEON-NOSVE-NEXT: csetm w8, eq
+; NONEON-NOSVE-NEXT: fcmp s7, s6
+; NONEON-NOSVE-NEXT: mov h6, v2.h[4]
+; NONEON-NOSVE-NEXT: mov h7, v1.h[4]
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: csetm w12, eq
+; NONEON-NOSVE-NEXT: fcmp s5, s4
+; NONEON-NOSVE-NEXT: mov h4, v2.h[5]
+; NONEON-NOSVE-NEXT: mov h5, v1.h[5]
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: fcvt s7, h7
+; NONEON-NOSVE-NEXT: csetm w11, eq
+; NONEON-NOSVE-NEXT: fcmp s3, s0
+; NONEON-NOSVE-NEXT: mov h3, v2.h[6]
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: mov h2, v2.h[7]
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: csetm w9, eq
+; NONEON-NOSVE-NEXT: fcmp s7, s6
+; NONEON-NOSVE-NEXT: mov h6, v1.h[7]
+; NONEON-NOSVE-NEXT: ldr q1, [x1]
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s7, h16
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: csetm w10, eq
+; NONEON-NOSVE-NEXT: fcmp s5, s4
+; NONEON-NOSVE-NEXT: mov h4, v1.h[1]
+; NONEON-NOSVE-NEXT: mov h5, v0.h[1]
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: csetm w13, eq
+; NONEON-NOSVE-NEXT: fcmp s7, s3
+; NONEON-NOSVE-NEXT: fmov s7, w12
+; NONEON-NOSVE-NEXT: fcvt s3, h4
+; NONEON-NOSVE-NEXT: fcvt s4, h5
+; NONEON-NOSVE-NEXT: fcvt s5, h0
+; NONEON-NOSVE-NEXT: csetm w14, eq
+; NONEON-NOSVE-NEXT: fcmp s6, s2
+; NONEON-NOSVE-NEXT: fcvt s2, h1
+; NONEON-NOSVE-NEXT: mov h6, v0.h[3]
+; NONEON-NOSVE-NEXT: mov v7.h[1], w8
+; NONEON-NOSVE-NEXT: csetm w15, eq
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: mov h3, v1.h[2]
+; NONEON-NOSVE-NEXT: mov h4, v0.h[2]
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: mov v7.h[2], w11
+; NONEON-NOSVE-NEXT: csetm w16, eq
+; NONEON-NOSVE-NEXT: fcmp s5, s2
+; NONEON-NOSVE-NEXT: mov h5, v1.h[3]
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: csetm w17, eq
+; NONEON-NOSVE-NEXT: mov v7.h[3], w9
+; NONEON-NOSVE-NEXT: fmov s2, w17
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: mov h3, v1.h[4]
+; NONEON-NOSVE-NEXT: mov h4, v0.h[4]
+; NONEON-NOSVE-NEXT: mov v2.h[1], w16
+; NONEON-NOSVE-NEXT: mov v7.h[4], w10
+; NONEON-NOSVE-NEXT: csetm w8, eq
+; NONEON-NOSVE-NEXT: fcmp s6, s5
+; NONEON-NOSVE-NEXT: mov h5, v1.h[5]
+; NONEON-NOSVE-NEXT: mov h6, v0.h[5]
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: mov v2.h[2], w8
+; NONEON-NOSVE-NEXT: mov v7.h[5], w13
+; NONEON-NOSVE-NEXT: csetm w8, eq
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: mov h3, v1.h[6]
+; NONEON-NOSVE-NEXT: mov v2.h[3], w8
+; NONEON-NOSVE-NEXT: mov h4, v0.h[6]
+; NONEON-NOSVE-NEXT: mov h1, v1.h[7]
+; NONEON-NOSVE-NEXT: mov h0, v0.h[7]
+; NONEON-NOSVE-NEXT: mov v7.h[6], w14
+; NONEON-NOSVE-NEXT: csetm w8, eq
+; NONEON-NOSVE-NEXT: fcmp s6, s5
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: mov v2.h[4], w8
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: mov v7.h[7], w15
+; NONEON-NOSVE-NEXT: csetm w8, eq
+; NONEON-NOSVE-NEXT: mov v2.h[5], w8
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: csetm w8, eq
+; NONEON-NOSVE-NEXT: fcmp s0, s1
+; NONEON-NOSVE-NEXT: mov v2.h[6], w8
+; NONEON-NOSVE-NEXT: csetm w8, eq
+; NONEON-NOSVE-NEXT: mov v2.h[7], w8
+; NONEON-NOSVE-NEXT: stp q2, q7, [x2]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x half>, ptr %a
%op2 = load <16 x half>, ptr %b
%cmp = fcmp oeq <16 x half> %op1, %op2
@@ -84,6 +277,11 @@ define <2 x i32> @fcmp_oeq_v2f32(<2 x float> %op1, <2 x float> %op2) {
; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcmp_oeq_v2f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcmeq v0.2s, v0.2s, v1.2s
+; NONEON-NOSVE-NEXT: ret
%cmp = fcmp oeq <2 x float> %op1, %op2
%sext = sext <2 x i1> %cmp to <2 x i32>
ret <2 x i32> %sext
@@ -99,6 +297,11 @@ define <4 x i32> @fcmp_oeq_v4f32(<4 x float> %op1, <4 x float> %op2) {
; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcmp_oeq_v4f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcmeq v0.4s, v0.4s, v1.4s
+; NONEON-NOSVE-NEXT: ret
%cmp = fcmp oeq <4 x float> %op1, %op2
%sext = sext <4 x i1> %cmp to <4 x i32>
ret <4 x i32> %sext
@@ -116,6 +319,15 @@ define void @fcmp_oeq_v8f32(ptr %a, ptr %b, ptr %c) {
; CHECK-NEXT: mov z1.s, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: stp q0, q1, [x2]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcmp_oeq_v8f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: fcmeq v0.4s, v1.4s, v0.4s
+; NONEON-NOSVE-NEXT: fcmeq v1.4s, v2.4s, v3.4s
+; NONEON-NOSVE-NEXT: stp q0, q1, [x2]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x float>, ptr %a
%op2 = load <8 x float>, ptr %b
%cmp = fcmp oeq <8 x float> %op1, %op2
@@ -132,6 +344,11 @@ define <1 x i64> @fcmp_oeq_v1f64(<1 x double> %op1, <1 x double> %op2) {
; CHECK-NEXT: mov z0.d, x8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcmp_oeq_v1f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcmeq d0, d0, d1
+; NONEON-NOSVE-NEXT: ret
%cmp = fcmp oeq <1 x double> %op1, %op2
%sext = sext <1 x i1> %cmp to <1 x i64>
ret <1 x i64> %sext
@@ -147,6 +364,11 @@ define <2 x i64> @fcmp_oeq_v2f64(<2 x double> %op1, <2 x double> %op2) {
; CHECK-NEXT: mov z0.d, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcmp_oeq_v2f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcmeq v0.2d, v0.2d, v1.2d
+; NONEON-NOSVE-NEXT: ret
%cmp = fcmp oeq <2 x double> %op1, %op2
%sext = sext <2 x i1> %cmp to <2 x i64>
ret <2 x i64> %sext
@@ -164,6 +386,15 @@ define void @fcmp_oeq_v4f64(ptr %a, ptr %b, ptr %c) {
; CHECK-NEXT: mov z1.d, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: stp q0, q1, [x2]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcmp_oeq_v4f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: fcmeq v0.2d, v1.2d, v0.2d
+; NONEON-NOSVE-NEXT: fcmeq v1.2d, v2.2d, v3.2d
+; NONEON-NOSVE-NEXT: stp q0, q1, [x2]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x double>, ptr %a
%op2 = load <4 x double>, ptr %b
%cmp = fcmp oeq <4 x double> %op1, %op2
@@ -192,6 +423,139 @@ define void @fcmp_ueq_v16f16(ptr %a, ptr %b, ptr %c) {
; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: stp q0, q1, [x2]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcmp_ueq_v16f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q1, [x0, #16]
+; NONEON-NOSVE-NEXT: ldr q2, [x1, #16]
+; NONEON-NOSVE-NEXT: mov h0, v2.h[1]
+; NONEON-NOSVE-NEXT: mov h3, v1.h[1]
+; NONEON-NOSVE-NEXT: fcvt s4, h2
+; NONEON-NOSVE-NEXT: mov h5, v2.h[2]
+; NONEON-NOSVE-NEXT: fcvt s6, h1
+; NONEON-NOSVE-NEXT: mov h7, v1.h[2]
+; NONEON-NOSVE-NEXT: mov h16, v1.h[6]
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcvt s7, h7
+; NONEON-NOSVE-NEXT: fcmp s3, s0
+; NONEON-NOSVE-NEXT: mov h0, v2.h[3]
+; NONEON-NOSVE-NEXT: mov h3, v1.h[3]
+; NONEON-NOSVE-NEXT: csetm w8, eq
+; NONEON-NOSVE-NEXT: csinv w8, w8, wzr, vc
+; NONEON-NOSVE-NEXT: fcmp s6, s4
+; NONEON-NOSVE-NEXT: mov h4, v2.h[4]
+; NONEON-NOSVE-NEXT: mov h6, v1.h[4]
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: csetm w9, eq
+; NONEON-NOSVE-NEXT: csinv w12, w9, wzr, vc
+; NONEON-NOSVE-NEXT: fcmp s7, s5
+; NONEON-NOSVE-NEXT: mov h5, v2.h[5]
+; NONEON-NOSVE-NEXT: mov h7, v1.h[5]
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: csetm w9, eq
+; NONEON-NOSVE-NEXT: csinv w10, w9, wzr, vc
+; NONEON-NOSVE-NEXT: fcmp s3, s0
+; NONEON-NOSVE-NEXT: mov h3, v2.h[6]
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcvt s7, h7
+; NONEON-NOSVE-NEXT: mov h2, v2.h[7]
+; NONEON-NOSVE-NEXT: ldr q0, [x1]
+; NONEON-NOSVE-NEXT: csetm w9, eq
+; NONEON-NOSVE-NEXT: csinv w11, w9, wzr, vc
+; NONEON-NOSVE-NEXT: fcmp s6, s4
+; NONEON-NOSVE-NEXT: mov h4, v1.h[7]
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s6, h16
+; NONEON-NOSVE-NEXT: ldr q1, [x0]
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: csetm w9, eq
+; NONEON-NOSVE-NEXT: csinv w9, w9, wzr, vc
+; NONEON-NOSVE-NEXT: fcmp s7, s5
+; NONEON-NOSVE-NEXT: mov h5, v0.h[1]
+; NONEON-NOSVE-NEXT: mov h7, v1.h[1]
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: csetm w13, eq
+; NONEON-NOSVE-NEXT: csinv w13, w13, wzr, vc
+; NONEON-NOSVE-NEXT: fcmp s6, s3
+; NONEON-NOSVE-NEXT: fcvt s3, h5
+; NONEON-NOSVE-NEXT: fcvt s5, h7
+; NONEON-NOSVE-NEXT: mov h6, v0.h[2]
+; NONEON-NOSVE-NEXT: mov h7, v1.h[2]
+; NONEON-NOSVE-NEXT: csetm w14, eq
+; NONEON-NOSVE-NEXT: csinv w14, w14, wzr, vc
+; NONEON-NOSVE-NEXT: fcmp s4, s2
+; NONEON-NOSVE-NEXT: fcvt s2, h0
+; NONEON-NOSVE-NEXT: fcvt s4, h1
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: fcvt s7, h7
+; NONEON-NOSVE-NEXT: csetm w15, eq
+; NONEON-NOSVE-NEXT: csinv w15, w15, wzr, vc
+; NONEON-NOSVE-NEXT: fcmp s5, s3
+; NONEON-NOSVE-NEXT: mov h3, v0.h[3]
+; NONEON-NOSVE-NEXT: mov h5, v1.h[3]
+; NONEON-NOSVE-NEXT: csetm w16, eq
+; NONEON-NOSVE-NEXT: csinv w16, w16, wzr, vc
+; NONEON-NOSVE-NEXT: fcmp s4, s2
+; NONEON-NOSVE-NEXT: fcvt s4, h3
+; NONEON-NOSVE-NEXT: fmov s2, w12
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: csetm w17, eq
+; NONEON-NOSVE-NEXT: csinv w17, w17, wzr, vc
+; NONEON-NOSVE-NEXT: fcmp s7, s6
+; NONEON-NOSVE-NEXT: mov h6, v0.h[4]
+; NONEON-NOSVE-NEXT: fmov s3, w17
+; NONEON-NOSVE-NEXT: mov h7, v1.h[4]
+; NONEON-NOSVE-NEXT: mov v2.h[1], w8
+; NONEON-NOSVE-NEXT: csetm w8, eq
+; NONEON-NOSVE-NEXT: mov v3.h[1], w16
+; NONEON-NOSVE-NEXT: csinv w8, w8, wzr, vc
+; NONEON-NOSVE-NEXT: fcmp s5, s4
+; NONEON-NOSVE-NEXT: mov h4, v0.h[5]
+; NONEON-NOSVE-NEXT: mov h5, v1.h[5]
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: fcvt s7, h7
+; NONEON-NOSVE-NEXT: mov v2.h[2], w10
+; NONEON-NOSVE-NEXT: mov v3.h[2], w8
+; NONEON-NOSVE-NEXT: csetm w8, eq
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: csinv w8, w8, wzr, vc
+; NONEON-NOSVE-NEXT: fcmp s7, s6
+; NONEON-NOSVE-NEXT: mov h6, v0.h[6]
+; NONEON-NOSVE-NEXT: mov h7, v1.h[6]
+; NONEON-NOSVE-NEXT: mov v2.h[3], w11
+; NONEON-NOSVE-NEXT: mov h0, v0.h[7]
+; NONEON-NOSVE-NEXT: mov h1, v1.h[7]
+; NONEON-NOSVE-NEXT: mov v3.h[3], w8
+; NONEON-NOSVE-NEXT: csetm w8, eq
+; NONEON-NOSVE-NEXT: csinv w8, w8, wzr, vc
+; NONEON-NOSVE-NEXT: fcmp s5, s4
+; NONEON-NOSVE-NEXT: fcvt s4, h6
+; NONEON-NOSVE-NEXT: fcvt s5, h7
+; NONEON-NOSVE-NEXT: mov v2.h[4], w9
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: mov v3.h[4], w8
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: csetm w8, eq
+; NONEON-NOSVE-NEXT: csinv w8, w8, wzr, vc
+; NONEON-NOSVE-NEXT: fcmp s5, s4
+; NONEON-NOSVE-NEXT: mov v2.h[5], w13
+; NONEON-NOSVE-NEXT: mov v3.h[5], w8
+; NONEON-NOSVE-NEXT: csetm w8, eq
+; NONEON-NOSVE-NEXT: csinv w8, w8, wzr, vc
+; NONEON-NOSVE-NEXT: fcmp s1, s0
+; NONEON-NOSVE-NEXT: mov v2.h[6], w14
+; NONEON-NOSVE-NEXT: mov v3.h[6], w8
+; NONEON-NOSVE-NEXT: csetm w8, eq
+; NONEON-NOSVE-NEXT: csinv w8, w8, wzr, vc
+; NONEON-NOSVE-NEXT: mov v2.h[7], w15
+; NONEON-NOSVE-NEXT: mov v3.h[7], w8
+; NONEON-NOSVE-NEXT: stp q3, q2, [x2]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x half>, ptr %a
%op2 = load <16 x half>, ptr %b
%cmp = fcmp ueq <16 x half> %op1, %op2
@@ -220,6 +584,139 @@ define void @fcmp_one_v16f16(ptr %a, ptr %b, ptr %c) {
; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: stp q0, q1, [x2]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcmp_one_v16f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q1, [x0, #16]
+; NONEON-NOSVE-NEXT: ldr q2, [x1, #16]
+; NONEON-NOSVE-NEXT: mov h0, v2.h[1]
+; NONEON-NOSVE-NEXT: mov h3, v1.h[1]
+; NONEON-NOSVE-NEXT: fcvt s4, h2
+; NONEON-NOSVE-NEXT: mov h5, v2.h[2]
+; NONEON-NOSVE-NEXT: fcvt s6, h1
+; NONEON-NOSVE-NEXT: mov h7, v1.h[2]
+; NONEON-NOSVE-NEXT: mov h16, v1.h[6]
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcvt s7, h7
+; NONEON-NOSVE-NEXT: fcmp s3, s0
+; NONEON-NOSVE-NEXT: mov h0, v2.h[3]
+; NONEON-NOSVE-NEXT: mov h3, v1.h[3]
+; NONEON-NOSVE-NEXT: csetm w8, mi
+; NONEON-NOSVE-NEXT: csinv w8, w8, wzr, le
+; NONEON-NOSVE-NEXT: fcmp s6, s4
+; NONEON-NOSVE-NEXT: mov h4, v2.h[4]
+; NONEON-NOSVE-NEXT: mov h6, v1.h[4]
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: csetm w9, mi
+; NONEON-NOSVE-NEXT: csinv w12, w9, wzr, le
+; NONEON-NOSVE-NEXT: fcmp s7, s5
+; NONEON-NOSVE-NEXT: mov h5, v2.h[5]
+; NONEON-NOSVE-NEXT: mov h7, v1.h[5]
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: csetm w9, mi
+; NONEON-NOSVE-NEXT: csinv w10, w9, wzr, le
+; NONEON-NOSVE-NEXT: fcmp s3, s0
+; NONEON-NOSVE-NEXT: mov h3, v2.h[6]
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcvt s7, h7
+; NONEON-NOSVE-NEXT: mov h2, v2.h[7]
+; NONEON-NOSVE-NEXT: ldr q0, [x1]
+; NONEON-NOSVE-NEXT: csetm w9, mi
+; NONEON-NOSVE-NEXT: csinv w11, w9, wzr, le
+; NONEON-NOSVE-NEXT: fcmp s6, s4
+; NONEON-NOSVE-NEXT: mov h4, v1.h[7]
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s6, h16
+; NONEON-NOSVE-NEXT: ldr q1, [x0]
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: csetm w9, mi
+; NONEON-NOSVE-NEXT: csinv w9, w9, wzr, le
+; NONEON-NOSVE-NEXT: fcmp s7, s5
+; NONEON-NOSVE-NEXT: mov h5, v0.h[1]
+; NONEON-NOSVE-NEXT: mov h7, v1.h[1]
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: csetm w13, mi
+; NONEON-NOSVE-NEXT: csinv w13, w13, wzr, le
+; NONEON-NOSVE-NEXT: fcmp s6, s3
+; NONEON-NOSVE-NEXT: fcvt s3, h5
+; NONEON-NOSVE-NEXT: fcvt s5, h7
+; NONEON-NOSVE-NEXT: mov h6, v0.h[2]
+; NONEON-NOSVE-NEXT: mov h7, v1.h[2]
+; NONEON-NOSVE-NEXT: csetm w14, mi
+; NONEON-NOSVE-NEXT: csinv w14, w14, wzr, le
+; NONEON-NOSVE-NEXT: fcmp s4, s2
+; NONEON-NOSVE-NEXT: fcvt s2, h0
+; NONEON-NOSVE-NEXT: fcvt s4, h1
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: fcvt s7, h7
+; NONEON-NOSVE-NEXT: csetm w15, mi
+; NONEON-NOSVE-NEXT: csinv w15, w15, wzr, le
+; NONEON-NOSVE-NEXT: fcmp s5, s3
+; NONEON-NOSVE-NEXT: mov h3, v0.h[3]
+; NONEON-NOSVE-NEXT: mov h5, v1.h[3]
+; NONEON-NOSVE-NEXT: csetm w16, mi
+; NONEON-NOSVE-NEXT: csinv w16, w16, wzr, le
+; NONEON-NOSVE-NEXT: fcmp s4, s2
+; NONEON-NOSVE-NEXT: fcvt s4, h3
+; NONEON-NOSVE-NEXT: fmov s2, w12
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: csetm w17, mi
+; NONEON-NOSVE-NEXT: csinv w17, w17, wzr, le
+; NONEON-NOSVE-NEXT: fcmp s7, s6
+; NONEON-NOSVE-NEXT: mov h6, v0.h[4]
+; NONEON-NOSVE-NEXT: fmov s3, w17
+; NONEON-NOSVE-NEXT: mov h7, v1.h[4]
+; NONEON-NOSVE-NEXT: mov v2.h[1], w8
+; NONEON-NOSVE-NEXT: csetm w8, mi
+; NONEON-NOSVE-NEXT: mov v3.h[1], w16
+; NONEON-NOSVE-NEXT: csinv w8, w8, wzr, le
+; NONEON-NOSVE-NEXT: fcmp s5, s4
+; NONEON-NOSVE-NEXT: mov h4, v0.h[5]
+; NONEON-NOSVE-NEXT: mov h5, v1.h[5]
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: fcvt s7, h7
+; NONEON-NOSVE-NEXT: mov v2.h[2], w10
+; NONEON-NOSVE-NEXT: mov v3.h[2], w8
+; NONEON-NOSVE-NEXT: csetm w8, mi
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: csinv w8, w8, wzr, le
+; NONEON-NOSVE-NEXT: fcmp s7, s6
+; NONEON-NOSVE-NEXT: mov h6, v0.h[6]
+; NONEON-NOSVE-NEXT: mov h7, v1.h[6]
+; NONEON-NOSVE-NEXT: mov v2.h[3], w11
+; NONEON-NOSVE-NEXT: mov h0, v0.h[7]
+; NONEON-NOSVE-NEXT: mov h1, v1.h[7]
+; NONEON-NOSVE-NEXT: mov v3.h[3], w8
+; NONEON-NOSVE-NEXT: csetm w8, mi
+; NONEON-NOSVE-NEXT: csinv w8, w8, wzr, le
+; NONEON-NOSVE-NEXT: fcmp s5, s4
+; NONEON-NOSVE-NEXT: fcvt s4, h6
+; NONEON-NOSVE-NEXT: fcvt s5, h7
+; NONEON-NOSVE-NEXT: mov v2.h[4], w9
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: mov v3.h[4], w8
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: csetm w8, mi
+; NONEON-NOSVE-NEXT: csinv w8, w8, wzr, le
+; NONEON-NOSVE-NEXT: fcmp s5, s4
+; NONEON-NOSVE-NEXT: mov v2.h[5], w13
+; NONEON-NOSVE-NEXT: mov v3.h[5], w8
+; NONEON-NOSVE-NEXT: csetm w8, mi
+; NONEON-NOSVE-NEXT: csinv w8, w8, wzr, le
+; NONEON-NOSVE-NEXT: fcmp s1, s0
+; NONEON-NOSVE-NEXT: mov v2.h[6], w14
+; NONEON-NOSVE-NEXT: mov v3.h[6], w8
+; NONEON-NOSVE-NEXT: csetm w8, mi
+; NONEON-NOSVE-NEXT: csinv w8, w8, wzr, le
+; NONEON-NOSVE-NEXT: mov v2.h[7], w15
+; NONEON-NOSVE-NEXT: mov v3.h[7], w8
+; NONEON-NOSVE-NEXT: stp q3, q2, [x2]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x half>, ptr %a
%op2 = load <16 x half>, ptr %b
%cmp = fcmp one <16 x half> %op1, %op2
@@ -244,6 +741,123 @@ define void @fcmp_une_v16f16(ptr %a, ptr %b, ptr %c) {
; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: stp q0, q1, [x2]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcmp_une_v16f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q1, [x0, #16]
+; NONEON-NOSVE-NEXT: ldr q2, [x1, #16]
+; NONEON-NOSVE-NEXT: mov h0, v2.h[1]
+; NONEON-NOSVE-NEXT: mov h3, v1.h[1]
+; NONEON-NOSVE-NEXT: mov h4, v2.h[2]
+; NONEON-NOSVE-NEXT: mov h5, v1.h[2]
+; NONEON-NOSVE-NEXT: fcvt s6, h2
+; NONEON-NOSVE-NEXT: fcvt s7, h1
+; NONEON-NOSVE-NEXT: mov h16, v1.h[6]
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcmp s3, s0
+; NONEON-NOSVE-NEXT: mov h0, v2.h[3]
+; NONEON-NOSVE-NEXT: mov h3, v1.h[3]
+; NONEON-NOSVE-NEXT: csetm w8, ne
+; NONEON-NOSVE-NEXT: fcmp s7, s6
+; NONEON-NOSVE-NEXT: mov h6, v2.h[4]
+; NONEON-NOSVE-NEXT: mov h7, v1.h[4]
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: csetm w12, ne
+; NONEON-NOSVE-NEXT: fcmp s5, s4
+; NONEON-NOSVE-NEXT: mov h4, v2.h[5]
+; NONEON-NOSVE-NEXT: mov h5, v1.h[5]
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: fcvt s7, h7
+; NONEON-NOSVE-NEXT: csetm w11, ne
+; NONEON-NOSVE-NEXT: fcmp s3, s0
+; NONEON-NOSVE-NEXT: mov h3, v2.h[6]
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: mov h2, v2.h[7]
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: csetm w9, ne
+; NONEON-NOSVE-NEXT: fcmp s7, s6
+; NONEON-NOSVE-NEXT: mov h6, v1.h[7]
+; NONEON-NOSVE-NEXT: ldr q1, [x1]
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s7, h16
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: csetm w10, ne
+; NONEON-NOSVE-NEXT: fcmp s5, s4
+; NONEON-NOSVE-NEXT: mov h4, v1.h[1]
+; NONEON-NOSVE-NEXT: mov h5, v0.h[1]
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: csetm w13, ne
+; NONEON-NOSVE-NEXT: fcmp s7, s3
+; NONEON-NOSVE-NEXT: fmov s7, w12
+; NONEON-NOSVE-NEXT: fcvt s3, h4
+; NONEON-NOSVE-NEXT: fcvt s4, h5
+; NONEON-NOSVE-NEXT: fcvt s5, h0
+; NONEON-NOSVE-NEXT: csetm w14, ne
+; NONEON-NOSVE-NEXT: fcmp s6, s2
+; NONEON-NOSVE-NEXT: fcvt s2, h1
+; NONEON-NOSVE-NEXT: mov h6, v0.h[3]
+; NONEON-NOSVE-NEXT: mov v7.h[1], w8
+; NONEON-NOSVE-NEXT: csetm w15, ne
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: mov h3, v1.h[2]
+; NONEON-NOSVE-NEXT: mov h4, v0.h[2]
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: mov v7.h[2], w11
+; NONEON-NOSVE-NEXT: csetm w16, ne
+; NONEON-NOSVE-NEXT: fcmp s5, s2
+; NONEON-NOSVE-NEXT: mov h5, v1.h[3]
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: csetm w17, ne
+; NONEON-NOSVE-NEXT: mov v7.h[3], w9
+; NONEON-NOSVE-NEXT: fmov s2, w17
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: mov h3, v1.h[4]
+; NONEON-NOSVE-NEXT: mov h4, v0.h[4]
+; NONEON-NOSVE-NEXT: mov v2.h[1], w16
+; NONEON-NOSVE-NEXT: mov v7.h[4], w10
+; NONEON-NOSVE-NEXT: csetm w8, ne
+; NONEON-NOSVE-NEXT: fcmp s6, s5
+; NONEON-NOSVE-NEXT: mov h5, v1.h[5]
+; NONEON-NOSVE-NEXT: mov h6, v0.h[5]
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: mov v2.h[2], w8
+; NONEON-NOSVE-NEXT: mov v7.h[5], w13
+; NONEON-NOSVE-NEXT: csetm w8, ne
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: mov h3, v1.h[6]
+; NONEON-NOSVE-NEXT: mov v2.h[3], w8
+; NONEON-NOSVE-NEXT: mov h4, v0.h[6]
+; NONEON-NOSVE-NEXT: mov h1, v1.h[7]
+; NONEON-NOSVE-NEXT: mov h0, v0.h[7]
+; NONEON-NOSVE-NEXT: mov v7.h[6], w14
+; NONEON-NOSVE-NEXT: csetm w8, ne
+; NONEON-NOSVE-NEXT: fcmp s6, s5
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: mov v2.h[4], w8
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: mov v7.h[7], w15
+; NONEON-NOSVE-NEXT: csetm w8, ne
+; NONEON-NOSVE-NEXT: mov v2.h[5], w8
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: csetm w8, ne
+; NONEON-NOSVE-NEXT: fcmp s0, s1
+; NONEON-NOSVE-NEXT: mov v2.h[6], w8
+; NONEON-NOSVE-NEXT: csetm w8, ne
+; NONEON-NOSVE-NEXT: mov v2.h[7], w8
+; NONEON-NOSVE-NEXT: stp q2, q7, [x2]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x half>, ptr %a
%op2 = load <16 x half>, ptr %b
%cmp = fcmp une <16 x half> %op1, %op2
@@ -268,6 +882,123 @@ define void @fcmp_ogt_v16f16(ptr %a, ptr %b, ptr %c) {
; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: stp q0, q1, [x2]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcmp_ogt_v16f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q1, [x0, #16]
+; NONEON-NOSVE-NEXT: ldr q2, [x1, #16]
+; NONEON-NOSVE-NEXT: mov h0, v2.h[1]
+; NONEON-NOSVE-NEXT: mov h3, v1.h[1]
+; NONEON-NOSVE-NEXT: mov h4, v2.h[2]
+; NONEON-NOSVE-NEXT: mov h5, v1.h[2]
+; NONEON-NOSVE-NEXT: fcvt s6, h2
+; NONEON-NOSVE-NEXT: fcvt s7, h1
+; NONEON-NOSVE-NEXT: mov h16, v1.h[6]
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcmp s3, s0
+; NONEON-NOSVE-NEXT: mov h0, v2.h[3]
+; NONEON-NOSVE-NEXT: mov h3, v1.h[3]
+; NONEON-NOSVE-NEXT: csetm w8, gt
+; NONEON-NOSVE-NEXT: fcmp s7, s6
+; NONEON-NOSVE-NEXT: mov h6, v2.h[4]
+; NONEON-NOSVE-NEXT: mov h7, v1.h[4]
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: csetm w12, gt
+; NONEON-NOSVE-NEXT: fcmp s5, s4
+; NONEON-NOSVE-NEXT: mov h4, v2.h[5]
+; NONEON-NOSVE-NEXT: mov h5, v1.h[5]
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: fcvt s7, h7
+; NONEON-NOSVE-NEXT: csetm w11, gt
+; NONEON-NOSVE-NEXT: fcmp s3, s0
+; NONEON-NOSVE-NEXT: mov h3, v2.h[6]
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: mov h2, v2.h[7]
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: csetm w9, gt
+; NONEON-NOSVE-NEXT: fcmp s7, s6
+; NONEON-NOSVE-NEXT: mov h6, v1.h[7]
+; NONEON-NOSVE-NEXT: ldr q1, [x1]
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s7, h16
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: csetm w10, gt
+; NONEON-NOSVE-NEXT: fcmp s5, s4
+; NONEON-NOSVE-NEXT: mov h4, v1.h[1]
+; NONEON-NOSVE-NEXT: mov h5, v0.h[1]
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: csetm w13, gt
+; NONEON-NOSVE-NEXT: fcmp s7, s3
+; NONEON-NOSVE-NEXT: fmov s7, w12
+; NONEON-NOSVE-NEXT: fcvt s3, h4
+; NONEON-NOSVE-NEXT: fcvt s4, h5
+; NONEON-NOSVE-NEXT: fcvt s5, h0
+; NONEON-NOSVE-NEXT: csetm w14, gt
+; NONEON-NOSVE-NEXT: fcmp s6, s2
+; NONEON-NOSVE-NEXT: fcvt s2, h1
+; NONEON-NOSVE-NEXT: mov h6, v0.h[3]
+; NONEON-NOSVE-NEXT: mov v7.h[1], w8
+; NONEON-NOSVE-NEXT: csetm w15, gt
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: mov h3, v1.h[2]
+; NONEON-NOSVE-NEXT: mov h4, v0.h[2]
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: mov v7.h[2], w11
+; NONEON-NOSVE-NEXT: csetm w16, gt
+; NONEON-NOSVE-NEXT: fcmp s5, s2
+; NONEON-NOSVE-NEXT: mov h5, v1.h[3]
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: csetm w17, gt
+; NONEON-NOSVE-NEXT: mov v7.h[3], w9
+; NONEON-NOSVE-NEXT: fmov s2, w17
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: mov h3, v1.h[4]
+; NONEON-NOSVE-NEXT: mov h4, v0.h[4]
+; NONEON-NOSVE-NEXT: mov v2.h[1], w16
+; NONEON-NOSVE-NEXT: mov v7.h[4], w10
+; NONEON-NOSVE-NEXT: csetm w8, gt
+; NONEON-NOSVE-NEXT: fcmp s6, s5
+; NONEON-NOSVE-NEXT: mov h5, v1.h[5]
+; NONEON-NOSVE-NEXT: mov h6, v0.h[5]
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: mov v2.h[2], w8
+; NONEON-NOSVE-NEXT: mov v7.h[5], w13
+; NONEON-NOSVE-NEXT: csetm w8, gt
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: mov h3, v1.h[6]
+; NONEON-NOSVE-NEXT: mov v2.h[3], w8
+; NONEON-NOSVE-NEXT: mov h4, v0.h[6]
+; NONEON-NOSVE-NEXT: mov h1, v1.h[7]
+; NONEON-NOSVE-NEXT: mov h0, v0.h[7]
+; NONEON-NOSVE-NEXT: mov v7.h[6], w14
+; NONEON-NOSVE-NEXT: csetm w8, gt
+; NONEON-NOSVE-NEXT: fcmp s6, s5
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: mov v2.h[4], w8
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: mov v7.h[7], w15
+; NONEON-NOSVE-NEXT: csetm w8, gt
+; NONEON-NOSVE-NEXT: mov v2.h[5], w8
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: csetm w8, gt
+; NONEON-NOSVE-NEXT: fcmp s0, s1
+; NONEON-NOSVE-NEXT: mov v2.h[6], w8
+; NONEON-NOSVE-NEXT: csetm w8, gt
+; NONEON-NOSVE-NEXT: mov v2.h[7], w8
+; NONEON-NOSVE-NEXT: stp q2, q7, [x2]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x half>, ptr %a
%op2 = load <16 x half>, ptr %b
%cmp = fcmp ogt <16 x half> %op1, %op2
@@ -295,6 +1026,123 @@ define void @fcmp_ugt_v16f16(ptr %a, ptr %b, ptr %c) {
; CHECK-NEXT: eor z0.d, z2.d, z0.d
; CHECK-NEXT: stp q1, q0, [x2]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcmp_ugt_v16f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q1, [x0, #16]
+; NONEON-NOSVE-NEXT: ldr q2, [x1, #16]
+; NONEON-NOSVE-NEXT: mov h0, v2.h[1]
+; NONEON-NOSVE-NEXT: mov h3, v1.h[1]
+; NONEON-NOSVE-NEXT: mov h4, v2.h[2]
+; NONEON-NOSVE-NEXT: mov h5, v1.h[2]
+; NONEON-NOSVE-NEXT: fcvt s6, h2
+; NONEON-NOSVE-NEXT: fcvt s7, h1
+; NONEON-NOSVE-NEXT: mov h16, v1.h[6]
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcmp s3, s0
+; NONEON-NOSVE-NEXT: mov h0, v2.h[3]
+; NONEON-NOSVE-NEXT: mov h3, v1.h[3]
+; NONEON-NOSVE-NEXT: csetm w8, hi
+; NONEON-NOSVE-NEXT: fcmp s7, s6
+; NONEON-NOSVE-NEXT: mov h6, v2.h[4]
+; NONEON-NOSVE-NEXT: mov h7, v1.h[4]
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: csetm w12, hi
+; NONEON-NOSVE-NEXT: fcmp s5, s4
+; NONEON-NOSVE-NEXT: mov h4, v2.h[5]
+; NONEON-NOSVE-NEXT: mov h5, v1.h[5]
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: fcvt s7, h7
+; NONEON-NOSVE-NEXT: csetm w11, hi
+; NONEON-NOSVE-NEXT: fcmp s3, s0
+; NONEON-NOSVE-NEXT: mov h3, v2.h[6]
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: mov h2, v2.h[7]
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: csetm w9, hi
+; NONEON-NOSVE-NEXT: fcmp s7, s6
+; NONEON-NOSVE-NEXT: mov h6, v1.h[7]
+; NONEON-NOSVE-NEXT: ldr q1, [x1]
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s7, h16
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: csetm w10, hi
+; NONEON-NOSVE-NEXT: fcmp s5, s4
+; NONEON-NOSVE-NEXT: mov h4, v1.h[1]
+; NONEON-NOSVE-NEXT: mov h5, v0.h[1]
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: csetm w13, hi
+; NONEON-NOSVE-NEXT: fcmp s7, s3
+; NONEON-NOSVE-NEXT: fmov s7, w12
+; NONEON-NOSVE-NEXT: fcvt s3, h4
+; NONEON-NOSVE-NEXT: fcvt s4, h5
+; NONEON-NOSVE-NEXT: fcvt s5, h0
+; NONEON-NOSVE-NEXT: csetm w14, hi
+; NONEON-NOSVE-NEXT: fcmp s6, s2
+; NONEON-NOSVE-NEXT: fcvt s2, h1
+; NONEON-NOSVE-NEXT: mov h6, v0.h[3]
+; NONEON-NOSVE-NEXT: mov v7.h[1], w8
+; NONEON-NOSVE-NEXT: csetm w15, hi
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: mov h3, v1.h[2]
+; NONEON-NOSVE-NEXT: mov h4, v0.h[2]
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: mov v7.h[2], w11
+; NONEON-NOSVE-NEXT: csetm w16, hi
+; NONEON-NOSVE-NEXT: fcmp s5, s2
+; NONEON-NOSVE-NEXT: mov h5, v1.h[3]
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: csetm w17, hi
+; NONEON-NOSVE-NEXT: mov v7.h[3], w9
+; NONEON-NOSVE-NEXT: fmov s2, w17
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: mov h3, v1.h[4]
+; NONEON-NOSVE-NEXT: mov h4, v0.h[4]
+; NONEON-NOSVE-NEXT: mov v2.h[1], w16
+; NONEON-NOSVE-NEXT: mov v7.h[4], w10
+; NONEON-NOSVE-NEXT: csetm w8, hi
+; NONEON-NOSVE-NEXT: fcmp s6, s5
+; NONEON-NOSVE-NEXT: mov h5, v1.h[5]
+; NONEON-NOSVE-NEXT: mov h6, v0.h[5]
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: mov v2.h[2], w8
+; NONEON-NOSVE-NEXT: mov v7.h[5], w13
+; NONEON-NOSVE-NEXT: csetm w8, hi
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: mov h3, v1.h[6]
+; NONEON-NOSVE-NEXT: mov v2.h[3], w8
+; NONEON-NOSVE-NEXT: mov h4, v0.h[6]
+; NONEON-NOSVE-NEXT: mov h1, v1.h[7]
+; NONEON-NOSVE-NEXT: mov h0, v0.h[7]
+; NONEON-NOSVE-NEXT: mov v7.h[6], w14
+; NONEON-NOSVE-NEXT: csetm w8, hi
+; NONEON-NOSVE-NEXT: fcmp s6, s5
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: mov v2.h[4], w8
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: mov v7.h[7], w15
+; NONEON-NOSVE-NEXT: csetm w8, hi
+; NONEON-NOSVE-NEXT: mov v2.h[5], w8
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: csetm w8, hi
+; NONEON-NOSVE-NEXT: fcmp s0, s1
+; NONEON-NOSVE-NEXT: mov v2.h[6], w8
+; NONEON-NOSVE-NEXT: csetm w8, hi
+; NONEON-NOSVE-NEXT: mov v2.h[7], w8
+; NONEON-NOSVE-NEXT: stp q2, q7, [x2]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x half>, ptr %a
%op2 = load <16 x half>, ptr %b
%cmp = fcmp ugt <16 x half> %op1, %op2
@@ -319,6 +1167,123 @@ define void @fcmp_olt_v16f16(ptr %a, ptr %b, ptr %c) {
; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: stp q0, q1, [x2]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcmp_olt_v16f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q1, [x0, #16]
+; NONEON-NOSVE-NEXT: ldr q2, [x1, #16]
+; NONEON-NOSVE-NEXT: mov h0, v2.h[1]
+; NONEON-NOSVE-NEXT: mov h3, v1.h[1]
+; NONEON-NOSVE-NEXT: mov h4, v2.h[2]
+; NONEON-NOSVE-NEXT: mov h5, v1.h[2]
+; NONEON-NOSVE-NEXT: fcvt s6, h2
+; NONEON-NOSVE-NEXT: fcvt s7, h1
+; NONEON-NOSVE-NEXT: mov h16, v1.h[6]
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcmp s3, s0
+; NONEON-NOSVE-NEXT: mov h0, v2.h[3]
+; NONEON-NOSVE-NEXT: mov h3, v1.h[3]
+; NONEON-NOSVE-NEXT: csetm w8, mi
+; NONEON-NOSVE-NEXT: fcmp s7, s6
+; NONEON-NOSVE-NEXT: mov h6, v2.h[4]
+; NONEON-NOSVE-NEXT: mov h7, v1.h[4]
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: csetm w12, mi
+; NONEON-NOSVE-NEXT: fcmp s5, s4
+; NONEON-NOSVE-NEXT: mov h4, v2.h[5]
+; NONEON-NOSVE-NEXT: mov h5, v1.h[5]
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: fcvt s7, h7
+; NONEON-NOSVE-NEXT: csetm w11, mi
+; NONEON-NOSVE-NEXT: fcmp s3, s0
+; NONEON-NOSVE-NEXT: mov h3, v2.h[6]
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: mov h2, v2.h[7]
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: csetm w9, mi
+; NONEON-NOSVE-NEXT: fcmp s7, s6
+; NONEON-NOSVE-NEXT: mov h6, v1.h[7]
+; NONEON-NOSVE-NEXT: ldr q1, [x1]
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s7, h16
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: csetm w10, mi
+; NONEON-NOSVE-NEXT: fcmp s5, s4
+; NONEON-NOSVE-NEXT: mov h4, v1.h[1]
+; NONEON-NOSVE-NEXT: mov h5, v0.h[1]
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: csetm w13, mi
+; NONEON-NOSVE-NEXT: fcmp s7, s3
+; NONEON-NOSVE-NEXT: fmov s7, w12
+; NONEON-NOSVE-NEXT: fcvt s3, h4
+; NONEON-NOSVE-NEXT: fcvt s4, h5
+; NONEON-NOSVE-NEXT: fcvt s5, h0
+; NONEON-NOSVE-NEXT: csetm w14, mi
+; NONEON-NOSVE-NEXT: fcmp s6, s2
+; NONEON-NOSVE-NEXT: fcvt s2, h1
+; NONEON-NOSVE-NEXT: mov h6, v0.h[3]
+; NONEON-NOSVE-NEXT: mov v7.h[1], w8
+; NONEON-NOSVE-NEXT: csetm w15, mi
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: mov h3, v1.h[2]
+; NONEON-NOSVE-NEXT: mov h4, v0.h[2]
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: mov v7.h[2], w11
+; NONEON-NOSVE-NEXT: csetm w16, mi
+; NONEON-NOSVE-NEXT: fcmp s5, s2
+; NONEON-NOSVE-NEXT: mov h5, v1.h[3]
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: csetm w17, mi
+; NONEON-NOSVE-NEXT: mov v7.h[3], w9
+; NONEON-NOSVE-NEXT: fmov s2, w17
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: mov h3, v1.h[4]
+; NONEON-NOSVE-NEXT: mov h4, v0.h[4]
+; NONEON-NOSVE-NEXT: mov v2.h[1], w16
+; NONEON-NOSVE-NEXT: mov v7.h[4], w10
+; NONEON-NOSVE-NEXT: csetm w8, mi
+; NONEON-NOSVE-NEXT: fcmp s6, s5
+; NONEON-NOSVE-NEXT: mov h5, v1.h[5]
+; NONEON-NOSVE-NEXT: mov h6, v0.h[5]
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: mov v2.h[2], w8
+; NONEON-NOSVE-NEXT: mov v7.h[5], w13
+; NONEON-NOSVE-NEXT: csetm w8, mi
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: mov h3, v1.h[6]
+; NONEON-NOSVE-NEXT: mov v2.h[3], w8
+; NONEON-NOSVE-NEXT: mov h4, v0.h[6]
+; NONEON-NOSVE-NEXT: mov h1, v1.h[7]
+; NONEON-NOSVE-NEXT: mov h0, v0.h[7]
+; NONEON-NOSVE-NEXT: mov v7.h[6], w14
+; NONEON-NOSVE-NEXT: csetm w8, mi
+; NONEON-NOSVE-NEXT: fcmp s6, s5
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: mov v2.h[4], w8
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: mov v7.h[7], w15
+; NONEON-NOSVE-NEXT: csetm w8, mi
+; NONEON-NOSVE-NEXT: mov v2.h[5], w8
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: csetm w8, mi
+; NONEON-NOSVE-NEXT: fcmp s0, s1
+; NONEON-NOSVE-NEXT: mov v2.h[6], w8
+; NONEON-NOSVE-NEXT: csetm w8, mi
+; NONEON-NOSVE-NEXT: mov v2.h[7], w8
+; NONEON-NOSVE-NEXT: stp q2, q7, [x2]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x half>, ptr %a
%op2 = load <16 x half>, ptr %b
%cmp = fcmp olt <16 x half> %op1, %op2
@@ -346,6 +1311,123 @@ define void @fcmp_ult_v16f16(ptr %a, ptr %b, ptr %c) {
; CHECK-NEXT: eor z0.d, z2.d, z0.d
; CHECK-NEXT: stp q1, q0, [x2]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcmp_ult_v16f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q1, [x0, #16]
+; NONEON-NOSVE-NEXT: ldr q2, [x1, #16]
+; NONEON-NOSVE-NEXT: mov h0, v2.h[1]
+; NONEON-NOSVE-NEXT: mov h3, v1.h[1]
+; NONEON-NOSVE-NEXT: mov h4, v2.h[2]
+; NONEON-NOSVE-NEXT: mov h5, v1.h[2]
+; NONEON-NOSVE-NEXT: fcvt s6, h2
+; NONEON-NOSVE-NEXT: fcvt s7, h1
+; NONEON-NOSVE-NEXT: mov h16, v1.h[6]
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcmp s3, s0
+; NONEON-NOSVE-NEXT: mov h0, v2.h[3]
+; NONEON-NOSVE-NEXT: mov h3, v1.h[3]
+; NONEON-NOSVE-NEXT: csetm w8, lt
+; NONEON-NOSVE-NEXT: fcmp s7, s6
+; NONEON-NOSVE-NEXT: mov h6, v2.h[4]
+; NONEON-NOSVE-NEXT: mov h7, v1.h[4]
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: csetm w12, lt
+; NONEON-NOSVE-NEXT: fcmp s5, s4
+; NONEON-NOSVE-NEXT: mov h4, v2.h[5]
+; NONEON-NOSVE-NEXT: mov h5, v1.h[5]
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: fcvt s7, h7
+; NONEON-NOSVE-NEXT: csetm w11, lt
+; NONEON-NOSVE-NEXT: fcmp s3, s0
+; NONEON-NOSVE-NEXT: mov h3, v2.h[6]
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: mov h2, v2.h[7]
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: csetm w9, lt
+; NONEON-NOSVE-NEXT: fcmp s7, s6
+; NONEON-NOSVE-NEXT: mov h6, v1.h[7]
+; NONEON-NOSVE-NEXT: ldr q1, [x1]
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s7, h16
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: csetm w10, lt
+; NONEON-NOSVE-NEXT: fcmp s5, s4
+; NONEON-NOSVE-NEXT: mov h4, v1.h[1]
+; NONEON-NOSVE-NEXT: mov h5, v0.h[1]
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: csetm w13, lt
+; NONEON-NOSVE-NEXT: fcmp s7, s3
+; NONEON-NOSVE-NEXT: fmov s7, w12
+; NONEON-NOSVE-NEXT: fcvt s3, h4
+; NONEON-NOSVE-NEXT: fcvt s4, h5
+; NONEON-NOSVE-NEXT: fcvt s5, h0
+; NONEON-NOSVE-NEXT: csetm w14, lt
+; NONEON-NOSVE-NEXT: fcmp s6, s2
+; NONEON-NOSVE-NEXT: fcvt s2, h1
+; NONEON-NOSVE-NEXT: mov h6, v0.h[3]
+; NONEON-NOSVE-NEXT: mov v7.h[1], w8
+; NONEON-NOSVE-NEXT: csetm w15, lt
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: mov h3, v1.h[2]
+; NONEON-NOSVE-NEXT: mov h4, v0.h[2]
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: mov v7.h[2], w11
+; NONEON-NOSVE-NEXT: csetm w16, lt
+; NONEON-NOSVE-NEXT: fcmp s5, s2
+; NONEON-NOSVE-NEXT: mov h5, v1.h[3]
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: csetm w17, lt
+; NONEON-NOSVE-NEXT: mov v7.h[3], w9
+; NONEON-NOSVE-NEXT: fmov s2, w17
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: mov h3, v1.h[4]
+; NONEON-NOSVE-NEXT: mov h4, v0.h[4]
+; NONEON-NOSVE-NEXT: mov v2.h[1], w16
+; NONEON-NOSVE-NEXT: mov v7.h[4], w10
+; NONEON-NOSVE-NEXT: csetm w8, lt
+; NONEON-NOSVE-NEXT: fcmp s6, s5
+; NONEON-NOSVE-NEXT: mov h5, v1.h[5]
+; NONEON-NOSVE-NEXT: mov h6, v0.h[5]
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: mov v2.h[2], w8
+; NONEON-NOSVE-NEXT: mov v7.h[5], w13
+; NONEON-NOSVE-NEXT: csetm w8, lt
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: mov h3, v1.h[6]
+; NONEON-NOSVE-NEXT: mov v2.h[3], w8
+; NONEON-NOSVE-NEXT: mov h4, v0.h[6]
+; NONEON-NOSVE-NEXT: mov h1, v1.h[7]
+; NONEON-NOSVE-NEXT: mov h0, v0.h[7]
+; NONEON-NOSVE-NEXT: mov v7.h[6], w14
+; NONEON-NOSVE-NEXT: csetm w8, lt
+; NONEON-NOSVE-NEXT: fcmp s6, s5
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: mov v2.h[4], w8
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: mov v7.h[7], w15
+; NONEON-NOSVE-NEXT: csetm w8, lt
+; NONEON-NOSVE-NEXT: mov v2.h[5], w8
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: csetm w8, lt
+; NONEON-NOSVE-NEXT: fcmp s0, s1
+; NONEON-NOSVE-NEXT: mov v2.h[6], w8
+; NONEON-NOSVE-NEXT: csetm w8, lt
+; NONEON-NOSVE-NEXT: mov v2.h[7], w8
+; NONEON-NOSVE-NEXT: stp q2, q7, [x2]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x half>, ptr %a
%op2 = load <16 x half>, ptr %b
%cmp = fcmp ult <16 x half> %op1, %op2
@@ -370,6 +1452,123 @@ define void @fcmp_oge_v16f16(ptr %a, ptr %b, ptr %c) {
; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: stp q0, q1, [x2]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcmp_oge_v16f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q1, [x0, #16]
+; NONEON-NOSVE-NEXT: ldr q2, [x1, #16]
+; NONEON-NOSVE-NEXT: mov h0, v2.h[1]
+; NONEON-NOSVE-NEXT: mov h3, v1.h[1]
+; NONEON-NOSVE-NEXT: mov h4, v2.h[2]
+; NONEON-NOSVE-NEXT: mov h5, v1.h[2]
+; NONEON-NOSVE-NEXT: fcvt s6, h2
+; NONEON-NOSVE-NEXT: fcvt s7, h1
+; NONEON-NOSVE-NEXT: mov h16, v1.h[6]
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcmp s3, s0
+; NONEON-NOSVE-NEXT: mov h0, v2.h[3]
+; NONEON-NOSVE-NEXT: mov h3, v1.h[3]
+; NONEON-NOSVE-NEXT: csetm w8, ge
+; NONEON-NOSVE-NEXT: fcmp s7, s6
+; NONEON-NOSVE-NEXT: mov h6, v2.h[4]
+; NONEON-NOSVE-NEXT: mov h7, v1.h[4]
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: csetm w12, ge
+; NONEON-NOSVE-NEXT: fcmp s5, s4
+; NONEON-NOSVE-NEXT: mov h4, v2.h[5]
+; NONEON-NOSVE-NEXT: mov h5, v1.h[5]
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: fcvt s7, h7
+; NONEON-NOSVE-NEXT: csetm w11, ge
+; NONEON-NOSVE-NEXT: fcmp s3, s0
+; NONEON-NOSVE-NEXT: mov h3, v2.h[6]
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: mov h2, v2.h[7]
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: csetm w9, ge
+; NONEON-NOSVE-NEXT: fcmp s7, s6
+; NONEON-NOSVE-NEXT: mov h6, v1.h[7]
+; NONEON-NOSVE-NEXT: ldr q1, [x1]
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s7, h16
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: csetm w10, ge
+; NONEON-NOSVE-NEXT: fcmp s5, s4
+; NONEON-NOSVE-NEXT: mov h4, v1.h[1]
+; NONEON-NOSVE-NEXT: mov h5, v0.h[1]
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: csetm w13, ge
+; NONEON-NOSVE-NEXT: fcmp s7, s3
+; NONEON-NOSVE-NEXT: fmov s7, w12
+; NONEON-NOSVE-NEXT: fcvt s3, h4
+; NONEON-NOSVE-NEXT: fcvt s4, h5
+; NONEON-NOSVE-NEXT: fcvt s5, h0
+; NONEON-NOSVE-NEXT: csetm w14, ge
+; NONEON-NOSVE-NEXT: fcmp s6, s2
+; NONEON-NOSVE-NEXT: fcvt s2, h1
+; NONEON-NOSVE-NEXT: mov h6, v0.h[3]
+; NONEON-NOSVE-NEXT: mov v7.h[1], w8
+; NONEON-NOSVE-NEXT: csetm w15, ge
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: mov h3, v1.h[2]
+; NONEON-NOSVE-NEXT: mov h4, v0.h[2]
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: mov v7.h[2], w11
+; NONEON-NOSVE-NEXT: csetm w16, ge
+; NONEON-NOSVE-NEXT: fcmp s5, s2
+; NONEON-NOSVE-NEXT: mov h5, v1.h[3]
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: csetm w17, ge
+; NONEON-NOSVE-NEXT: mov v7.h[3], w9
+; NONEON-NOSVE-NEXT: fmov s2, w17
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: mov h3, v1.h[4]
+; NONEON-NOSVE-NEXT: mov h4, v0.h[4]
+; NONEON-NOSVE-NEXT: mov v2.h[1], w16
+; NONEON-NOSVE-NEXT: mov v7.h[4], w10
+; NONEON-NOSVE-NEXT: csetm w8, ge
+; NONEON-NOSVE-NEXT: fcmp s6, s5
+; NONEON-NOSVE-NEXT: mov h5, v1.h[5]
+; NONEON-NOSVE-NEXT: mov h6, v0.h[5]
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: mov v2.h[2], w8
+; NONEON-NOSVE-NEXT: mov v7.h[5], w13
+; NONEON-NOSVE-NEXT: csetm w8, ge
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: mov h3, v1.h[6]
+; NONEON-NOSVE-NEXT: mov v2.h[3], w8
+; NONEON-NOSVE-NEXT: mov h4, v0.h[6]
+; NONEON-NOSVE-NEXT: mov h1, v1.h[7]
+; NONEON-NOSVE-NEXT: mov h0, v0.h[7]
+; NONEON-NOSVE-NEXT: mov v7.h[6], w14
+; NONEON-NOSVE-NEXT: csetm w8, ge
+; NONEON-NOSVE-NEXT: fcmp s6, s5
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: mov v2.h[4], w8
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: mov v7.h[7], w15
+; NONEON-NOSVE-NEXT: csetm w8, ge
+; NONEON-NOSVE-NEXT: mov v2.h[5], w8
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: csetm w8, ge
+; NONEON-NOSVE-NEXT: fcmp s0, s1
+; NONEON-NOSVE-NEXT: mov v2.h[6], w8
+; NONEON-NOSVE-NEXT: csetm w8, ge
+; NONEON-NOSVE-NEXT: mov v2.h[7], w8
+; NONEON-NOSVE-NEXT: stp q2, q7, [x2]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x half>, ptr %a
%op2 = load <16 x half>, ptr %b
%cmp = fcmp oge <16 x half> %op1, %op2
@@ -397,6 +1596,123 @@ define void @fcmp_uge_v16f16(ptr %a, ptr %b, ptr %c) {
; CHECK-NEXT: eor z0.d, z2.d, z0.d
; CHECK-NEXT: stp q1, q0, [x2]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcmp_uge_v16f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q1, [x0, #16]
+; NONEON-NOSVE-NEXT: ldr q2, [x1, #16]
+; NONEON-NOSVE-NEXT: mov h0, v2.h[1]
+; NONEON-NOSVE-NEXT: mov h3, v1.h[1]
+; NONEON-NOSVE-NEXT: mov h4, v2.h[2]
+; NONEON-NOSVE-NEXT: mov h5, v1.h[2]
+; NONEON-NOSVE-NEXT: fcvt s6, h2
+; NONEON-NOSVE-NEXT: fcvt s7, h1
+; NONEON-NOSVE-NEXT: mov h16, v1.h[6]
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcmp s3, s0
+; NONEON-NOSVE-NEXT: mov h0, v2.h[3]
+; NONEON-NOSVE-NEXT: mov h3, v1.h[3]
+; NONEON-NOSVE-NEXT: csetm w8, pl
+; NONEON-NOSVE-NEXT: fcmp s7, s6
+; NONEON-NOSVE-NEXT: mov h6, v2.h[4]
+; NONEON-NOSVE-NEXT: mov h7, v1.h[4]
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: csetm w12, pl
+; NONEON-NOSVE-NEXT: fcmp s5, s4
+; NONEON-NOSVE-NEXT: mov h4, v2.h[5]
+; NONEON-NOSVE-NEXT: mov h5, v1.h[5]
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: fcvt s7, h7
+; NONEON-NOSVE-NEXT: csetm w11, pl
+; NONEON-NOSVE-NEXT: fcmp s3, s0
+; NONEON-NOSVE-NEXT: mov h3, v2.h[6]
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: mov h2, v2.h[7]
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: csetm w9, pl
+; NONEON-NOSVE-NEXT: fcmp s7, s6
+; NONEON-NOSVE-NEXT: mov h6, v1.h[7]
+; NONEON-NOSVE-NEXT: ldr q1, [x1]
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s7, h16
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: csetm w10, pl
+; NONEON-NOSVE-NEXT: fcmp s5, s4
+; NONEON-NOSVE-NEXT: mov h4, v1.h[1]
+; NONEON-NOSVE-NEXT: mov h5, v0.h[1]
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: csetm w13, pl
+; NONEON-NOSVE-NEXT: fcmp s7, s3
+; NONEON-NOSVE-NEXT: fmov s7, w12
+; NONEON-NOSVE-NEXT: fcvt s3, h4
+; NONEON-NOSVE-NEXT: fcvt s4, h5
+; NONEON-NOSVE-NEXT: fcvt s5, h0
+; NONEON-NOSVE-NEXT: csetm w14, pl
+; NONEON-NOSVE-NEXT: fcmp s6, s2
+; NONEON-NOSVE-NEXT: fcvt s2, h1
+; NONEON-NOSVE-NEXT: mov h6, v0.h[3]
+; NONEON-NOSVE-NEXT: mov v7.h[1], w8
+; NONEON-NOSVE-NEXT: csetm w15, pl
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: mov h3, v1.h[2]
+; NONEON-NOSVE-NEXT: mov h4, v0.h[2]
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: mov v7.h[2], w11
+; NONEON-NOSVE-NEXT: csetm w16, pl
+; NONEON-NOSVE-NEXT: fcmp s5, s2
+; NONEON-NOSVE-NEXT: mov h5, v1.h[3]
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: csetm w17, pl
+; NONEON-NOSVE-NEXT: mov v7.h[3], w9
+; NONEON-NOSVE-NEXT: fmov s2, w17
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: mov h3, v1.h[4]
+; NONEON-NOSVE-NEXT: mov h4, v0.h[4]
+; NONEON-NOSVE-NEXT: mov v2.h[1], w16
+; NONEON-NOSVE-NEXT: mov v7.h[4], w10
+; NONEON-NOSVE-NEXT: csetm w8, pl
+; NONEON-NOSVE-NEXT: fcmp s6, s5
+; NONEON-NOSVE-NEXT: mov h5, v1.h[5]
+; NONEON-NOSVE-NEXT: mov h6, v0.h[5]
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: mov v2.h[2], w8
+; NONEON-NOSVE-NEXT: mov v7.h[5], w13
+; NONEON-NOSVE-NEXT: csetm w8, pl
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: mov h3, v1.h[6]
+; NONEON-NOSVE-NEXT: mov v2.h[3], w8
+; NONEON-NOSVE-NEXT: mov h4, v0.h[6]
+; NONEON-NOSVE-NEXT: mov h1, v1.h[7]
+; NONEON-NOSVE-NEXT: mov h0, v0.h[7]
+; NONEON-NOSVE-NEXT: mov v7.h[6], w14
+; NONEON-NOSVE-NEXT: csetm w8, pl
+; NONEON-NOSVE-NEXT: fcmp s6, s5
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: mov v2.h[4], w8
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: mov v7.h[7], w15
+; NONEON-NOSVE-NEXT: csetm w8, pl
+; NONEON-NOSVE-NEXT: mov v2.h[5], w8
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: csetm w8, pl
+; NONEON-NOSVE-NEXT: fcmp s0, s1
+; NONEON-NOSVE-NEXT: mov v2.h[6], w8
+; NONEON-NOSVE-NEXT: csetm w8, pl
+; NONEON-NOSVE-NEXT: mov v2.h[7], w8
+; NONEON-NOSVE-NEXT: stp q2, q7, [x2]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x half>, ptr %a
%op2 = load <16 x half>, ptr %b
%cmp = fcmp uge <16 x half> %op1, %op2
@@ -421,6 +1737,123 @@ define void @fcmp_ole_v16f16(ptr %a, ptr %b, ptr %c) {
; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: stp q0, q1, [x2]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcmp_ole_v16f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q1, [x0, #16]
+; NONEON-NOSVE-NEXT: ldr q2, [x1, #16]
+; NONEON-NOSVE-NEXT: mov h0, v2.h[1]
+; NONEON-NOSVE-NEXT: mov h3, v1.h[1]
+; NONEON-NOSVE-NEXT: mov h4, v2.h[2]
+; NONEON-NOSVE-NEXT: mov h5, v1.h[2]
+; NONEON-NOSVE-NEXT: fcvt s6, h2
+; NONEON-NOSVE-NEXT: fcvt s7, h1
+; NONEON-NOSVE-NEXT: mov h16, v1.h[6]
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcmp s3, s0
+; NONEON-NOSVE-NEXT: mov h0, v2.h[3]
+; NONEON-NOSVE-NEXT: mov h3, v1.h[3]
+; NONEON-NOSVE-NEXT: csetm w8, ls
+; NONEON-NOSVE-NEXT: fcmp s7, s6
+; NONEON-NOSVE-NEXT: mov h6, v2.h[4]
+; NONEON-NOSVE-NEXT: mov h7, v1.h[4]
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: csetm w12, ls
+; NONEON-NOSVE-NEXT: fcmp s5, s4
+; NONEON-NOSVE-NEXT: mov h4, v2.h[5]
+; NONEON-NOSVE-NEXT: mov h5, v1.h[5]
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: fcvt s7, h7
+; NONEON-NOSVE-NEXT: csetm w11, ls
+; NONEON-NOSVE-NEXT: fcmp s3, s0
+; NONEON-NOSVE-NEXT: mov h3, v2.h[6]
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: mov h2, v2.h[7]
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: csetm w9, ls
+; NONEON-NOSVE-NEXT: fcmp s7, s6
+; NONEON-NOSVE-NEXT: mov h6, v1.h[7]
+; NONEON-NOSVE-NEXT: ldr q1, [x1]
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s7, h16
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: csetm w10, ls
+; NONEON-NOSVE-NEXT: fcmp s5, s4
+; NONEON-NOSVE-NEXT: mov h4, v1.h[1]
+; NONEON-NOSVE-NEXT: mov h5, v0.h[1]
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: csetm w13, ls
+; NONEON-NOSVE-NEXT: fcmp s7, s3
+; NONEON-NOSVE-NEXT: fmov s7, w12
+; NONEON-NOSVE-NEXT: fcvt s3, h4
+; NONEON-NOSVE-NEXT: fcvt s4, h5
+; NONEON-NOSVE-NEXT: fcvt s5, h0
+; NONEON-NOSVE-NEXT: csetm w14, ls
+; NONEON-NOSVE-NEXT: fcmp s6, s2
+; NONEON-NOSVE-NEXT: fcvt s2, h1
+; NONEON-NOSVE-NEXT: mov h6, v0.h[3]
+; NONEON-NOSVE-NEXT: mov v7.h[1], w8
+; NONEON-NOSVE-NEXT: csetm w15, ls
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: mov h3, v1.h[2]
+; NONEON-NOSVE-NEXT: mov h4, v0.h[2]
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: mov v7.h[2], w11
+; NONEON-NOSVE-NEXT: csetm w16, ls
+; NONEON-NOSVE-NEXT: fcmp s5, s2
+; NONEON-NOSVE-NEXT: mov h5, v1.h[3]
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: csetm w17, ls
+; NONEON-NOSVE-NEXT: mov v7.h[3], w9
+; NONEON-NOSVE-NEXT: fmov s2, w17
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: mov h3, v1.h[4]
+; NONEON-NOSVE-NEXT: mov h4, v0.h[4]
+; NONEON-NOSVE-NEXT: mov v2.h[1], w16
+; NONEON-NOSVE-NEXT: mov v7.h[4], w10
+; NONEON-NOSVE-NEXT: csetm w8, ls
+; NONEON-NOSVE-NEXT: fcmp s6, s5
+; NONEON-NOSVE-NEXT: mov h5, v1.h[5]
+; NONEON-NOSVE-NEXT: mov h6, v0.h[5]
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: mov v2.h[2], w8
+; NONEON-NOSVE-NEXT: mov v7.h[5], w13
+; NONEON-NOSVE-NEXT: csetm w8, ls
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: mov h3, v1.h[6]
+; NONEON-NOSVE-NEXT: mov v2.h[3], w8
+; NONEON-NOSVE-NEXT: mov h4, v0.h[6]
+; NONEON-NOSVE-NEXT: mov h1, v1.h[7]
+; NONEON-NOSVE-NEXT: mov h0, v0.h[7]
+; NONEON-NOSVE-NEXT: mov v7.h[6], w14
+; NONEON-NOSVE-NEXT: csetm w8, ls
+; NONEON-NOSVE-NEXT: fcmp s6, s5
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: mov v2.h[4], w8
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: mov v7.h[7], w15
+; NONEON-NOSVE-NEXT: csetm w8, ls
+; NONEON-NOSVE-NEXT: mov v2.h[5], w8
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: csetm w8, ls
+; NONEON-NOSVE-NEXT: fcmp s0, s1
+; NONEON-NOSVE-NEXT: mov v2.h[6], w8
+; NONEON-NOSVE-NEXT: csetm w8, ls
+; NONEON-NOSVE-NEXT: mov v2.h[7], w8
+; NONEON-NOSVE-NEXT: stp q2, q7, [x2]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x half>, ptr %a
%op2 = load <16 x half>, ptr %b
%cmp = fcmp ole <16 x half> %op1, %op2
@@ -448,6 +1881,123 @@ define void @fcmp_ule_v16f16(ptr %a, ptr %b, ptr %c) {
; CHECK-NEXT: eor z0.d, z2.d, z0.d
; CHECK-NEXT: stp q1, q0, [x2]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcmp_ule_v16f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q1, [x0, #16]
+; NONEON-NOSVE-NEXT: ldr q2, [x1, #16]
+; NONEON-NOSVE-NEXT: mov h0, v2.h[1]
+; NONEON-NOSVE-NEXT: mov h3, v1.h[1]
+; NONEON-NOSVE-NEXT: mov h4, v2.h[2]
+; NONEON-NOSVE-NEXT: mov h5, v1.h[2]
+; NONEON-NOSVE-NEXT: fcvt s6, h2
+; NONEON-NOSVE-NEXT: fcvt s7, h1
+; NONEON-NOSVE-NEXT: mov h16, v1.h[6]
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcmp s3, s0
+; NONEON-NOSVE-NEXT: mov h0, v2.h[3]
+; NONEON-NOSVE-NEXT: mov h3, v1.h[3]
+; NONEON-NOSVE-NEXT: csetm w8, le
+; NONEON-NOSVE-NEXT: fcmp s7, s6
+; NONEON-NOSVE-NEXT: mov h6, v2.h[4]
+; NONEON-NOSVE-NEXT: mov h7, v1.h[4]
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: csetm w12, le
+; NONEON-NOSVE-NEXT: fcmp s5, s4
+; NONEON-NOSVE-NEXT: mov h4, v2.h[5]
+; NONEON-NOSVE-NEXT: mov h5, v1.h[5]
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: fcvt s7, h7
+; NONEON-NOSVE-NEXT: csetm w11, le
+; NONEON-NOSVE-NEXT: fcmp s3, s0
+; NONEON-NOSVE-NEXT: mov h3, v2.h[6]
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: mov h2, v2.h[7]
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: csetm w9, le
+; NONEON-NOSVE-NEXT: fcmp s7, s6
+; NONEON-NOSVE-NEXT: mov h6, v1.h[7]
+; NONEON-NOSVE-NEXT: ldr q1, [x1]
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s7, h16
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: csetm w10, le
+; NONEON-NOSVE-NEXT: fcmp s5, s4
+; NONEON-NOSVE-NEXT: mov h4, v1.h[1]
+; NONEON-NOSVE-NEXT: mov h5, v0.h[1]
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: csetm w13, le
+; NONEON-NOSVE-NEXT: fcmp s7, s3
+; NONEON-NOSVE-NEXT: fmov s7, w12
+; NONEON-NOSVE-NEXT: fcvt s3, h4
+; NONEON-NOSVE-NEXT: fcvt s4, h5
+; NONEON-NOSVE-NEXT: fcvt s5, h0
+; NONEON-NOSVE-NEXT: csetm w14, le
+; NONEON-NOSVE-NEXT: fcmp s6, s2
+; NONEON-NOSVE-NEXT: fcvt s2, h1
+; NONEON-NOSVE-NEXT: mov h6, v0.h[3]
+; NONEON-NOSVE-NEXT: mov v7.h[1], w8
+; NONEON-NOSVE-NEXT: csetm w15, le
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: mov h3, v1.h[2]
+; NONEON-NOSVE-NEXT: mov h4, v0.h[2]
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: mov v7.h[2], w11
+; NONEON-NOSVE-NEXT: csetm w16, le
+; NONEON-NOSVE-NEXT: fcmp s5, s2
+; NONEON-NOSVE-NEXT: mov h5, v1.h[3]
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: csetm w17, le
+; NONEON-NOSVE-NEXT: mov v7.h[3], w9
+; NONEON-NOSVE-NEXT: fmov s2, w17
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: mov h3, v1.h[4]
+; NONEON-NOSVE-NEXT: mov h4, v0.h[4]
+; NONEON-NOSVE-NEXT: mov v2.h[1], w16
+; NONEON-NOSVE-NEXT: mov v7.h[4], w10
+; NONEON-NOSVE-NEXT: csetm w8, le
+; NONEON-NOSVE-NEXT: fcmp s6, s5
+; NONEON-NOSVE-NEXT: mov h5, v1.h[5]
+; NONEON-NOSVE-NEXT: mov h6, v0.h[5]
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: mov v2.h[2], w8
+; NONEON-NOSVE-NEXT: mov v7.h[5], w13
+; NONEON-NOSVE-NEXT: csetm w8, le
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: mov h3, v1.h[6]
+; NONEON-NOSVE-NEXT: mov v2.h[3], w8
+; NONEON-NOSVE-NEXT: mov h4, v0.h[6]
+; NONEON-NOSVE-NEXT: mov h1, v1.h[7]
+; NONEON-NOSVE-NEXT: mov h0, v0.h[7]
+; NONEON-NOSVE-NEXT: mov v7.h[6], w14
+; NONEON-NOSVE-NEXT: csetm w8, le
+; NONEON-NOSVE-NEXT: fcmp s6, s5
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: mov v2.h[4], w8
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: mov v7.h[7], w15
+; NONEON-NOSVE-NEXT: csetm w8, le
+; NONEON-NOSVE-NEXT: mov v2.h[5], w8
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: csetm w8, le
+; NONEON-NOSVE-NEXT: fcmp s0, s1
+; NONEON-NOSVE-NEXT: mov v2.h[6], w8
+; NONEON-NOSVE-NEXT: csetm w8, le
+; NONEON-NOSVE-NEXT: mov v2.h[7], w8
+; NONEON-NOSVE-NEXT: stp q2, q7, [x2]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x half>, ptr %a
%op2 = load <16 x half>, ptr %b
%cmp = fcmp ule <16 x half> %op1, %op2
@@ -472,6 +2022,123 @@ define void @fcmp_uno_v16f16(ptr %a, ptr %b, ptr %c) {
; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: stp q0, q1, [x2]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcmp_uno_v16f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q1, [x0, #16]
+; NONEON-NOSVE-NEXT: ldr q2, [x1, #16]
+; NONEON-NOSVE-NEXT: mov h0, v2.h[1]
+; NONEON-NOSVE-NEXT: mov h3, v1.h[1]
+; NONEON-NOSVE-NEXT: mov h4, v2.h[2]
+; NONEON-NOSVE-NEXT: mov h5, v1.h[2]
+; NONEON-NOSVE-NEXT: fcvt s6, h2
+; NONEON-NOSVE-NEXT: fcvt s7, h1
+; NONEON-NOSVE-NEXT: mov h16, v1.h[6]
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcmp s3, s0
+; NONEON-NOSVE-NEXT: mov h0, v2.h[3]
+; NONEON-NOSVE-NEXT: mov h3, v1.h[3]
+; NONEON-NOSVE-NEXT: csetm w8, vs
+; NONEON-NOSVE-NEXT: fcmp s7, s6
+; NONEON-NOSVE-NEXT: mov h6, v2.h[4]
+; NONEON-NOSVE-NEXT: mov h7, v1.h[4]
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: csetm w12, vs
+; NONEON-NOSVE-NEXT: fcmp s5, s4
+; NONEON-NOSVE-NEXT: mov h4, v2.h[5]
+; NONEON-NOSVE-NEXT: mov h5, v1.h[5]
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: fcvt s7, h7
+; NONEON-NOSVE-NEXT: csetm w11, vs
+; NONEON-NOSVE-NEXT: fcmp s3, s0
+; NONEON-NOSVE-NEXT: mov h3, v2.h[6]
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: mov h2, v2.h[7]
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: csetm w9, vs
+; NONEON-NOSVE-NEXT: fcmp s7, s6
+; NONEON-NOSVE-NEXT: mov h6, v1.h[7]
+; NONEON-NOSVE-NEXT: ldr q1, [x1]
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s7, h16
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: csetm w10, vs
+; NONEON-NOSVE-NEXT: fcmp s5, s4
+; NONEON-NOSVE-NEXT: mov h4, v1.h[1]
+; NONEON-NOSVE-NEXT: mov h5, v0.h[1]
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: csetm w13, vs
+; NONEON-NOSVE-NEXT: fcmp s7, s3
+; NONEON-NOSVE-NEXT: fmov s7, w12
+; NONEON-NOSVE-NEXT: fcvt s3, h4
+; NONEON-NOSVE-NEXT: fcvt s4, h5
+; NONEON-NOSVE-NEXT: fcvt s5, h0
+; NONEON-NOSVE-NEXT: csetm w14, vs
+; NONEON-NOSVE-NEXT: fcmp s6, s2
+; NONEON-NOSVE-NEXT: fcvt s2, h1
+; NONEON-NOSVE-NEXT: mov h6, v0.h[3]
+; NONEON-NOSVE-NEXT: mov v7.h[1], w8
+; NONEON-NOSVE-NEXT: csetm w15, vs
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: mov h3, v1.h[2]
+; NONEON-NOSVE-NEXT: mov h4, v0.h[2]
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: mov v7.h[2], w11
+; NONEON-NOSVE-NEXT: csetm w16, vs
+; NONEON-NOSVE-NEXT: fcmp s5, s2
+; NONEON-NOSVE-NEXT: mov h5, v1.h[3]
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: csetm w17, vs
+; NONEON-NOSVE-NEXT: mov v7.h[3], w9
+; NONEON-NOSVE-NEXT: fmov s2, w17
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: mov h3, v1.h[4]
+; NONEON-NOSVE-NEXT: mov h4, v0.h[4]
+; NONEON-NOSVE-NEXT: mov v2.h[1], w16
+; NONEON-NOSVE-NEXT: mov v7.h[4], w10
+; NONEON-NOSVE-NEXT: csetm w8, vs
+; NONEON-NOSVE-NEXT: fcmp s6, s5
+; NONEON-NOSVE-NEXT: mov h5, v1.h[5]
+; NONEON-NOSVE-NEXT: mov h6, v0.h[5]
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: mov v2.h[2], w8
+; NONEON-NOSVE-NEXT: mov v7.h[5], w13
+; NONEON-NOSVE-NEXT: csetm w8, vs
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: mov h3, v1.h[6]
+; NONEON-NOSVE-NEXT: mov v2.h[3], w8
+; NONEON-NOSVE-NEXT: mov h4, v0.h[6]
+; NONEON-NOSVE-NEXT: mov h1, v1.h[7]
+; NONEON-NOSVE-NEXT: mov h0, v0.h[7]
+; NONEON-NOSVE-NEXT: mov v7.h[6], w14
+; NONEON-NOSVE-NEXT: csetm w8, vs
+; NONEON-NOSVE-NEXT: fcmp s6, s5
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: mov v2.h[4], w8
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: mov v7.h[7], w15
+; NONEON-NOSVE-NEXT: csetm w8, vs
+; NONEON-NOSVE-NEXT: mov v2.h[5], w8
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: csetm w8, vs
+; NONEON-NOSVE-NEXT: fcmp s0, s1
+; NONEON-NOSVE-NEXT: mov v2.h[6], w8
+; NONEON-NOSVE-NEXT: csetm w8, vs
+; NONEON-NOSVE-NEXT: mov v2.h[7], w8
+; NONEON-NOSVE-NEXT: stp q2, q7, [x2]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x half>, ptr %a
%op2 = load <16 x half>, ptr %b
%cmp = fcmp uno <16 x half> %op1, %op2
@@ -499,6 +2166,123 @@ define void @fcmp_ord_v16f16(ptr %a, ptr %b, ptr %c) {
; CHECK-NEXT: eor z0.d, z2.d, z0.d
; CHECK-NEXT: stp q1, q0, [x2]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcmp_ord_v16f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q1, [x0, #16]
+; NONEON-NOSVE-NEXT: ldr q2, [x1, #16]
+; NONEON-NOSVE-NEXT: mov h0, v2.h[1]
+; NONEON-NOSVE-NEXT: mov h3, v1.h[1]
+; NONEON-NOSVE-NEXT: mov h4, v2.h[2]
+; NONEON-NOSVE-NEXT: mov h5, v1.h[2]
+; NONEON-NOSVE-NEXT: fcvt s6, h2
+; NONEON-NOSVE-NEXT: fcvt s7, h1
+; NONEON-NOSVE-NEXT: mov h16, v1.h[6]
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcmp s3, s0
+; NONEON-NOSVE-NEXT: mov h0, v2.h[3]
+; NONEON-NOSVE-NEXT: mov h3, v1.h[3]
+; NONEON-NOSVE-NEXT: csetm w8, vc
+; NONEON-NOSVE-NEXT: fcmp s7, s6
+; NONEON-NOSVE-NEXT: mov h6, v2.h[4]
+; NONEON-NOSVE-NEXT: mov h7, v1.h[4]
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: csetm w12, vc
+; NONEON-NOSVE-NEXT: fcmp s5, s4
+; NONEON-NOSVE-NEXT: mov h4, v2.h[5]
+; NONEON-NOSVE-NEXT: mov h5, v1.h[5]
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: fcvt s7, h7
+; NONEON-NOSVE-NEXT: csetm w11, vc
+; NONEON-NOSVE-NEXT: fcmp s3, s0
+; NONEON-NOSVE-NEXT: mov h3, v2.h[6]
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: mov h2, v2.h[7]
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: csetm w9, vc
+; NONEON-NOSVE-NEXT: fcmp s7, s6
+; NONEON-NOSVE-NEXT: mov h6, v1.h[7]
+; NONEON-NOSVE-NEXT: ldr q1, [x1]
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s7, h16
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: csetm w10, vc
+; NONEON-NOSVE-NEXT: fcmp s5, s4
+; NONEON-NOSVE-NEXT: mov h4, v1.h[1]
+; NONEON-NOSVE-NEXT: mov h5, v0.h[1]
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: csetm w13, vc
+; NONEON-NOSVE-NEXT: fcmp s7, s3
+; NONEON-NOSVE-NEXT: fmov s7, w12
+; NONEON-NOSVE-NEXT: fcvt s3, h4
+; NONEON-NOSVE-NEXT: fcvt s4, h5
+; NONEON-NOSVE-NEXT: fcvt s5, h0
+; NONEON-NOSVE-NEXT: csetm w14, vc
+; NONEON-NOSVE-NEXT: fcmp s6, s2
+; NONEON-NOSVE-NEXT: fcvt s2, h1
+; NONEON-NOSVE-NEXT: mov h6, v0.h[3]
+; NONEON-NOSVE-NEXT: mov v7.h[1], w8
+; NONEON-NOSVE-NEXT: csetm w15, vc
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: mov h3, v1.h[2]
+; NONEON-NOSVE-NEXT: mov h4, v0.h[2]
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: mov v7.h[2], w11
+; NONEON-NOSVE-NEXT: csetm w16, vc
+; NONEON-NOSVE-NEXT: fcmp s5, s2
+; NONEON-NOSVE-NEXT: mov h5, v1.h[3]
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: csetm w17, vc
+; NONEON-NOSVE-NEXT: mov v7.h[3], w9
+; NONEON-NOSVE-NEXT: fmov s2, w17
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: mov h3, v1.h[4]
+; NONEON-NOSVE-NEXT: mov h4, v0.h[4]
+; NONEON-NOSVE-NEXT: mov v2.h[1], w16
+; NONEON-NOSVE-NEXT: mov v7.h[4], w10
+; NONEON-NOSVE-NEXT: csetm w8, vc
+; NONEON-NOSVE-NEXT: fcmp s6, s5
+; NONEON-NOSVE-NEXT: mov h5, v1.h[5]
+; NONEON-NOSVE-NEXT: mov h6, v0.h[5]
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: mov v2.h[2], w8
+; NONEON-NOSVE-NEXT: mov v7.h[5], w13
+; NONEON-NOSVE-NEXT: csetm w8, vc
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: mov h3, v1.h[6]
+; NONEON-NOSVE-NEXT: mov v2.h[3], w8
+; NONEON-NOSVE-NEXT: mov h4, v0.h[6]
+; NONEON-NOSVE-NEXT: mov h1, v1.h[7]
+; NONEON-NOSVE-NEXT: mov h0, v0.h[7]
+; NONEON-NOSVE-NEXT: mov v7.h[6], w14
+; NONEON-NOSVE-NEXT: csetm w8, vc
+; NONEON-NOSVE-NEXT: fcmp s6, s5
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: mov v2.h[4], w8
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: mov v7.h[7], w15
+; NONEON-NOSVE-NEXT: csetm w8, vc
+; NONEON-NOSVE-NEXT: mov v2.h[5], w8
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: csetm w8, vc
+; NONEON-NOSVE-NEXT: fcmp s0, s1
+; NONEON-NOSVE-NEXT: mov v2.h[6], w8
+; NONEON-NOSVE-NEXT: csetm w8, vc
+; NONEON-NOSVE-NEXT: mov v2.h[7], w8
+; NONEON-NOSVE-NEXT: stp q2, q7, [x2]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x half>, ptr %a
%op2 = load <16 x half>, ptr %b
%cmp = fcmp ord <16 x half> %op1, %op2
@@ -523,6 +2307,123 @@ define void @fcmp_eq_v16f16(ptr %a, ptr %b, ptr %c) {
; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: stp q0, q1, [x2]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcmp_eq_v16f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q1, [x0, #16]
+; NONEON-NOSVE-NEXT: ldr q2, [x1, #16]
+; NONEON-NOSVE-NEXT: mov h0, v2.h[1]
+; NONEON-NOSVE-NEXT: mov h3, v1.h[1]
+; NONEON-NOSVE-NEXT: mov h4, v2.h[2]
+; NONEON-NOSVE-NEXT: mov h5, v1.h[2]
+; NONEON-NOSVE-NEXT: fcvt s6, h2
+; NONEON-NOSVE-NEXT: fcvt s7, h1
+; NONEON-NOSVE-NEXT: mov h16, v1.h[6]
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcmp s3, s0
+; NONEON-NOSVE-NEXT: mov h0, v2.h[3]
+; NONEON-NOSVE-NEXT: mov h3, v1.h[3]
+; NONEON-NOSVE-NEXT: csetm w8, eq
+; NONEON-NOSVE-NEXT: fcmp s7, s6
+; NONEON-NOSVE-NEXT: mov h6, v2.h[4]
+; NONEON-NOSVE-NEXT: mov h7, v1.h[4]
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: csetm w12, eq
+; NONEON-NOSVE-NEXT: fcmp s5, s4
+; NONEON-NOSVE-NEXT: mov h4, v2.h[5]
+; NONEON-NOSVE-NEXT: mov h5, v1.h[5]
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: fcvt s7, h7
+; NONEON-NOSVE-NEXT: csetm w11, eq
+; NONEON-NOSVE-NEXT: fcmp s3, s0
+; NONEON-NOSVE-NEXT: mov h3, v2.h[6]
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: mov h2, v2.h[7]
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: csetm w9, eq
+; NONEON-NOSVE-NEXT: fcmp s7, s6
+; NONEON-NOSVE-NEXT: mov h6, v1.h[7]
+; NONEON-NOSVE-NEXT: ldr q1, [x1]
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s7, h16
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: csetm w10, eq
+; NONEON-NOSVE-NEXT: fcmp s5, s4
+; NONEON-NOSVE-NEXT: mov h4, v1.h[1]
+; NONEON-NOSVE-NEXT: mov h5, v0.h[1]
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: csetm w13, eq
+; NONEON-NOSVE-NEXT: fcmp s7, s3
+; NONEON-NOSVE-NEXT: fmov s7, w12
+; NONEON-NOSVE-NEXT: fcvt s3, h4
+; NONEON-NOSVE-NEXT: fcvt s4, h5
+; NONEON-NOSVE-NEXT: fcvt s5, h0
+; NONEON-NOSVE-NEXT: csetm w14, eq
+; NONEON-NOSVE-NEXT: fcmp s6, s2
+; NONEON-NOSVE-NEXT: fcvt s2, h1
+; NONEON-NOSVE-NEXT: mov h6, v0.h[3]
+; NONEON-NOSVE-NEXT: mov v7.h[1], w8
+; NONEON-NOSVE-NEXT: csetm w15, eq
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: mov h3, v1.h[2]
+; NONEON-NOSVE-NEXT: mov h4, v0.h[2]
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: mov v7.h[2], w11
+; NONEON-NOSVE-NEXT: csetm w16, eq
+; NONEON-NOSVE-NEXT: fcmp s5, s2
+; NONEON-NOSVE-NEXT: mov h5, v1.h[3]
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: csetm w17, eq
+; NONEON-NOSVE-NEXT: mov v7.h[3], w9
+; NONEON-NOSVE-NEXT: fmov s2, w17
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: mov h3, v1.h[4]
+; NONEON-NOSVE-NEXT: mov h4, v0.h[4]
+; NONEON-NOSVE-NEXT: mov v2.h[1], w16
+; NONEON-NOSVE-NEXT: mov v7.h[4], w10
+; NONEON-NOSVE-NEXT: csetm w8, eq
+; NONEON-NOSVE-NEXT: fcmp s6, s5
+; NONEON-NOSVE-NEXT: mov h5, v1.h[5]
+; NONEON-NOSVE-NEXT: mov h6, v0.h[5]
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: mov v2.h[2], w8
+; NONEON-NOSVE-NEXT: mov v7.h[5], w13
+; NONEON-NOSVE-NEXT: csetm w8, eq
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: mov h3, v1.h[6]
+; NONEON-NOSVE-NEXT: mov v2.h[3], w8
+; NONEON-NOSVE-NEXT: mov h4, v0.h[6]
+; NONEON-NOSVE-NEXT: mov h1, v1.h[7]
+; NONEON-NOSVE-NEXT: mov h0, v0.h[7]
+; NONEON-NOSVE-NEXT: mov v7.h[6], w14
+; NONEON-NOSVE-NEXT: csetm w8, eq
+; NONEON-NOSVE-NEXT: fcmp s6, s5
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: mov v2.h[4], w8
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: mov v7.h[7], w15
+; NONEON-NOSVE-NEXT: csetm w8, eq
+; NONEON-NOSVE-NEXT: mov v2.h[5], w8
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: csetm w8, eq
+; NONEON-NOSVE-NEXT: fcmp s0, s1
+; NONEON-NOSVE-NEXT: mov v2.h[6], w8
+; NONEON-NOSVE-NEXT: csetm w8, eq
+; NONEON-NOSVE-NEXT: mov v2.h[7], w8
+; NONEON-NOSVE-NEXT: stp q2, q7, [x2]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x half>, ptr %a
%op2 = load <16 x half>, ptr %b
%cmp = fcmp fast oeq <16 x half> %op1, %op2
@@ -547,6 +2448,123 @@ define void @fcmp_ne_v16f16(ptr %a, ptr %b, ptr %c) {
; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: stp q0, q1, [x2]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcmp_ne_v16f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q1, [x0, #16]
+; NONEON-NOSVE-NEXT: ldr q2, [x1, #16]
+; NONEON-NOSVE-NEXT: mov h0, v2.h[1]
+; NONEON-NOSVE-NEXT: mov h3, v1.h[1]
+; NONEON-NOSVE-NEXT: mov h4, v2.h[2]
+; NONEON-NOSVE-NEXT: mov h5, v1.h[2]
+; NONEON-NOSVE-NEXT: fcvt s6, h2
+; NONEON-NOSVE-NEXT: fcvt s7, h1
+; NONEON-NOSVE-NEXT: mov h16, v1.h[6]
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcmp s3, s0
+; NONEON-NOSVE-NEXT: mov h0, v2.h[3]
+; NONEON-NOSVE-NEXT: mov h3, v1.h[3]
+; NONEON-NOSVE-NEXT: csetm w8, ne
+; NONEON-NOSVE-NEXT: fcmp s7, s6
+; NONEON-NOSVE-NEXT: mov h6, v2.h[4]
+; NONEON-NOSVE-NEXT: mov h7, v1.h[4]
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: csetm w12, ne
+; NONEON-NOSVE-NEXT: fcmp s5, s4
+; NONEON-NOSVE-NEXT: mov h4, v2.h[5]
+; NONEON-NOSVE-NEXT: mov h5, v1.h[5]
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: fcvt s7, h7
+; NONEON-NOSVE-NEXT: csetm w11, ne
+; NONEON-NOSVE-NEXT: fcmp s3, s0
+; NONEON-NOSVE-NEXT: mov h3, v2.h[6]
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: mov h2, v2.h[7]
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: csetm w9, ne
+; NONEON-NOSVE-NEXT: fcmp s7, s6
+; NONEON-NOSVE-NEXT: mov h6, v1.h[7]
+; NONEON-NOSVE-NEXT: ldr q1, [x1]
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s7, h16
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: csetm w10, ne
+; NONEON-NOSVE-NEXT: fcmp s5, s4
+; NONEON-NOSVE-NEXT: mov h4, v1.h[1]
+; NONEON-NOSVE-NEXT: mov h5, v0.h[1]
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: csetm w13, ne
+; NONEON-NOSVE-NEXT: fcmp s7, s3
+; NONEON-NOSVE-NEXT: fmov s7, w12
+; NONEON-NOSVE-NEXT: fcvt s3, h4
+; NONEON-NOSVE-NEXT: fcvt s4, h5
+; NONEON-NOSVE-NEXT: fcvt s5, h0
+; NONEON-NOSVE-NEXT: csetm w14, ne
+; NONEON-NOSVE-NEXT: fcmp s6, s2
+; NONEON-NOSVE-NEXT: fcvt s2, h1
+; NONEON-NOSVE-NEXT: mov h6, v0.h[3]
+; NONEON-NOSVE-NEXT: mov v7.h[1], w8
+; NONEON-NOSVE-NEXT: csetm w15, ne
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: mov h3, v1.h[2]
+; NONEON-NOSVE-NEXT: mov h4, v0.h[2]
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: mov v7.h[2], w11
+; NONEON-NOSVE-NEXT: csetm w16, ne
+; NONEON-NOSVE-NEXT: fcmp s5, s2
+; NONEON-NOSVE-NEXT: mov h5, v1.h[3]
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: csetm w17, ne
+; NONEON-NOSVE-NEXT: mov v7.h[3], w9
+; NONEON-NOSVE-NEXT: fmov s2, w17
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: mov h3, v1.h[4]
+; NONEON-NOSVE-NEXT: mov h4, v0.h[4]
+; NONEON-NOSVE-NEXT: mov v2.h[1], w16
+; NONEON-NOSVE-NEXT: mov v7.h[4], w10
+; NONEON-NOSVE-NEXT: csetm w8, ne
+; NONEON-NOSVE-NEXT: fcmp s6, s5
+; NONEON-NOSVE-NEXT: mov h5, v1.h[5]
+; NONEON-NOSVE-NEXT: mov h6, v0.h[5]
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: mov v2.h[2], w8
+; NONEON-NOSVE-NEXT: mov v7.h[5], w13
+; NONEON-NOSVE-NEXT: csetm w8, ne
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: mov h3, v1.h[6]
+; NONEON-NOSVE-NEXT: mov v2.h[3], w8
+; NONEON-NOSVE-NEXT: mov h4, v0.h[6]
+; NONEON-NOSVE-NEXT: mov h1, v1.h[7]
+; NONEON-NOSVE-NEXT: mov h0, v0.h[7]
+; NONEON-NOSVE-NEXT: mov v7.h[6], w14
+; NONEON-NOSVE-NEXT: csetm w8, ne
+; NONEON-NOSVE-NEXT: fcmp s6, s5
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: mov v2.h[4], w8
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: mov v7.h[7], w15
+; NONEON-NOSVE-NEXT: csetm w8, ne
+; NONEON-NOSVE-NEXT: mov v2.h[5], w8
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: csetm w8, ne
+; NONEON-NOSVE-NEXT: fcmp s0, s1
+; NONEON-NOSVE-NEXT: mov v2.h[6], w8
+; NONEON-NOSVE-NEXT: csetm w8, ne
+; NONEON-NOSVE-NEXT: mov v2.h[7], w8
+; NONEON-NOSVE-NEXT: stp q2, q7, [x2]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x half>, ptr %a
%op2 = load <16 x half>, ptr %b
%cmp = fcmp fast one <16 x half> %op1, %op2
@@ -571,6 +2589,123 @@ define void @fcmp_gt_v16f16(ptr %a, ptr %b, ptr %c) {
; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: stp q0, q1, [x2]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcmp_gt_v16f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q1, [x0, #16]
+; NONEON-NOSVE-NEXT: ldr q2, [x1, #16]
+; NONEON-NOSVE-NEXT: mov h0, v2.h[1]
+; NONEON-NOSVE-NEXT: mov h3, v1.h[1]
+; NONEON-NOSVE-NEXT: mov h4, v2.h[2]
+; NONEON-NOSVE-NEXT: mov h5, v1.h[2]
+; NONEON-NOSVE-NEXT: fcvt s6, h2
+; NONEON-NOSVE-NEXT: fcvt s7, h1
+; NONEON-NOSVE-NEXT: mov h16, v1.h[6]
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcmp s3, s0
+; NONEON-NOSVE-NEXT: mov h0, v2.h[3]
+; NONEON-NOSVE-NEXT: mov h3, v1.h[3]
+; NONEON-NOSVE-NEXT: csetm w8, gt
+; NONEON-NOSVE-NEXT: fcmp s7, s6
+; NONEON-NOSVE-NEXT: mov h6, v2.h[4]
+; NONEON-NOSVE-NEXT: mov h7, v1.h[4]
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: csetm w12, gt
+; NONEON-NOSVE-NEXT: fcmp s5, s4
+; NONEON-NOSVE-NEXT: mov h4, v2.h[5]
+; NONEON-NOSVE-NEXT: mov h5, v1.h[5]
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: fcvt s7, h7
+; NONEON-NOSVE-NEXT: csetm w11, gt
+; NONEON-NOSVE-NEXT: fcmp s3, s0
+; NONEON-NOSVE-NEXT: mov h3, v2.h[6]
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: mov h2, v2.h[7]
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: csetm w9, gt
+; NONEON-NOSVE-NEXT: fcmp s7, s6
+; NONEON-NOSVE-NEXT: mov h6, v1.h[7]
+; NONEON-NOSVE-NEXT: ldr q1, [x1]
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s7, h16
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: csetm w10, gt
+; NONEON-NOSVE-NEXT: fcmp s5, s4
+; NONEON-NOSVE-NEXT: mov h4, v1.h[1]
+; NONEON-NOSVE-NEXT: mov h5, v0.h[1]
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: csetm w13, gt
+; NONEON-NOSVE-NEXT: fcmp s7, s3
+; NONEON-NOSVE-NEXT: fmov s7, w12
+; NONEON-NOSVE-NEXT: fcvt s3, h4
+; NONEON-NOSVE-NEXT: fcvt s4, h5
+; NONEON-NOSVE-NEXT: fcvt s5, h0
+; NONEON-NOSVE-NEXT: csetm w14, gt
+; NONEON-NOSVE-NEXT: fcmp s6, s2
+; NONEON-NOSVE-NEXT: fcvt s2, h1
+; NONEON-NOSVE-NEXT: mov h6, v0.h[3]
+; NONEON-NOSVE-NEXT: mov v7.h[1], w8
+; NONEON-NOSVE-NEXT: csetm w15, gt
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: mov h3, v1.h[2]
+; NONEON-NOSVE-NEXT: mov h4, v0.h[2]
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: mov v7.h[2], w11
+; NONEON-NOSVE-NEXT: csetm w16, gt
+; NONEON-NOSVE-NEXT: fcmp s5, s2
+; NONEON-NOSVE-NEXT: mov h5, v1.h[3]
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: csetm w17, gt
+; NONEON-NOSVE-NEXT: mov v7.h[3], w9
+; NONEON-NOSVE-NEXT: fmov s2, w17
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: mov h3, v1.h[4]
+; NONEON-NOSVE-NEXT: mov h4, v0.h[4]
+; NONEON-NOSVE-NEXT: mov v2.h[1], w16
+; NONEON-NOSVE-NEXT: mov v7.h[4], w10
+; NONEON-NOSVE-NEXT: csetm w8, gt
+; NONEON-NOSVE-NEXT: fcmp s6, s5
+; NONEON-NOSVE-NEXT: mov h5, v1.h[5]
+; NONEON-NOSVE-NEXT: mov h6, v0.h[5]
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: mov v2.h[2], w8
+; NONEON-NOSVE-NEXT: mov v7.h[5], w13
+; NONEON-NOSVE-NEXT: csetm w8, gt
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: mov h3, v1.h[6]
+; NONEON-NOSVE-NEXT: mov v2.h[3], w8
+; NONEON-NOSVE-NEXT: mov h4, v0.h[6]
+; NONEON-NOSVE-NEXT: mov h1, v1.h[7]
+; NONEON-NOSVE-NEXT: mov h0, v0.h[7]
+; NONEON-NOSVE-NEXT: mov v7.h[6], w14
+; NONEON-NOSVE-NEXT: csetm w8, gt
+; NONEON-NOSVE-NEXT: fcmp s6, s5
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: mov v2.h[4], w8
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: mov v7.h[7], w15
+; NONEON-NOSVE-NEXT: csetm w8, gt
+; NONEON-NOSVE-NEXT: mov v2.h[5], w8
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: csetm w8, gt
+; NONEON-NOSVE-NEXT: fcmp s0, s1
+; NONEON-NOSVE-NEXT: mov v2.h[6], w8
+; NONEON-NOSVE-NEXT: csetm w8, gt
+; NONEON-NOSVE-NEXT: mov v2.h[7], w8
+; NONEON-NOSVE-NEXT: stp q2, q7, [x2]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x half>, ptr %a
%op2 = load <16 x half>, ptr %b
%cmp = fcmp fast ogt <16 x half> %op1, %op2
@@ -595,6 +2730,123 @@ define void @fcmp_lt_v16f16(ptr %a, ptr %b, ptr %c) {
; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: stp q0, q1, [x2]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcmp_lt_v16f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q1, [x0, #16]
+; NONEON-NOSVE-NEXT: ldr q2, [x1, #16]
+; NONEON-NOSVE-NEXT: mov h0, v2.h[1]
+; NONEON-NOSVE-NEXT: mov h3, v1.h[1]
+; NONEON-NOSVE-NEXT: mov h4, v2.h[2]
+; NONEON-NOSVE-NEXT: mov h5, v1.h[2]
+; NONEON-NOSVE-NEXT: fcvt s6, h2
+; NONEON-NOSVE-NEXT: fcvt s7, h1
+; NONEON-NOSVE-NEXT: mov h16, v1.h[6]
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcmp s3, s0
+; NONEON-NOSVE-NEXT: mov h0, v2.h[3]
+; NONEON-NOSVE-NEXT: mov h3, v1.h[3]
+; NONEON-NOSVE-NEXT: csetm w8, lt
+; NONEON-NOSVE-NEXT: fcmp s7, s6
+; NONEON-NOSVE-NEXT: mov h6, v2.h[4]
+; NONEON-NOSVE-NEXT: mov h7, v1.h[4]
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: csetm w12, lt
+; NONEON-NOSVE-NEXT: fcmp s5, s4
+; NONEON-NOSVE-NEXT: mov h4, v2.h[5]
+; NONEON-NOSVE-NEXT: mov h5, v1.h[5]
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: fcvt s7, h7
+; NONEON-NOSVE-NEXT: csetm w11, lt
+; NONEON-NOSVE-NEXT: fcmp s3, s0
+; NONEON-NOSVE-NEXT: mov h3, v2.h[6]
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: mov h2, v2.h[7]
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: csetm w9, lt
+; NONEON-NOSVE-NEXT: fcmp s7, s6
+; NONEON-NOSVE-NEXT: mov h6, v1.h[7]
+; NONEON-NOSVE-NEXT: ldr q1, [x1]
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s7, h16
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: csetm w10, lt
+; NONEON-NOSVE-NEXT: fcmp s5, s4
+; NONEON-NOSVE-NEXT: mov h4, v1.h[1]
+; NONEON-NOSVE-NEXT: mov h5, v0.h[1]
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: csetm w13, lt
+; NONEON-NOSVE-NEXT: fcmp s7, s3
+; NONEON-NOSVE-NEXT: fmov s7, w12
+; NONEON-NOSVE-NEXT: fcvt s3, h4
+; NONEON-NOSVE-NEXT: fcvt s4, h5
+; NONEON-NOSVE-NEXT: fcvt s5, h0
+; NONEON-NOSVE-NEXT: csetm w14, lt
+; NONEON-NOSVE-NEXT: fcmp s6, s2
+; NONEON-NOSVE-NEXT: fcvt s2, h1
+; NONEON-NOSVE-NEXT: mov h6, v0.h[3]
+; NONEON-NOSVE-NEXT: mov v7.h[1], w8
+; NONEON-NOSVE-NEXT: csetm w15, lt
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: mov h3, v1.h[2]
+; NONEON-NOSVE-NEXT: mov h4, v0.h[2]
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: mov v7.h[2], w11
+; NONEON-NOSVE-NEXT: csetm w16, lt
+; NONEON-NOSVE-NEXT: fcmp s5, s2
+; NONEON-NOSVE-NEXT: mov h5, v1.h[3]
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: csetm w17, lt
+; NONEON-NOSVE-NEXT: mov v7.h[3], w9
+; NONEON-NOSVE-NEXT: fmov s2, w17
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: mov h3, v1.h[4]
+; NONEON-NOSVE-NEXT: mov h4, v0.h[4]
+; NONEON-NOSVE-NEXT: mov v2.h[1], w16
+; NONEON-NOSVE-NEXT: mov v7.h[4], w10
+; NONEON-NOSVE-NEXT: csetm w8, lt
+; NONEON-NOSVE-NEXT: fcmp s6, s5
+; NONEON-NOSVE-NEXT: mov h5, v1.h[5]
+; NONEON-NOSVE-NEXT: mov h6, v0.h[5]
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: mov v2.h[2], w8
+; NONEON-NOSVE-NEXT: mov v7.h[5], w13
+; NONEON-NOSVE-NEXT: csetm w8, lt
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: mov h3, v1.h[6]
+; NONEON-NOSVE-NEXT: mov v2.h[3], w8
+; NONEON-NOSVE-NEXT: mov h4, v0.h[6]
+; NONEON-NOSVE-NEXT: mov h1, v1.h[7]
+; NONEON-NOSVE-NEXT: mov h0, v0.h[7]
+; NONEON-NOSVE-NEXT: mov v7.h[6], w14
+; NONEON-NOSVE-NEXT: csetm w8, lt
+; NONEON-NOSVE-NEXT: fcmp s6, s5
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: mov v2.h[4], w8
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: mov v7.h[7], w15
+; NONEON-NOSVE-NEXT: csetm w8, lt
+; NONEON-NOSVE-NEXT: mov v2.h[5], w8
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: csetm w8, lt
+; NONEON-NOSVE-NEXT: fcmp s0, s1
+; NONEON-NOSVE-NEXT: mov v2.h[6], w8
+; NONEON-NOSVE-NEXT: csetm w8, lt
+; NONEON-NOSVE-NEXT: mov v2.h[7], w8
+; NONEON-NOSVE-NEXT: stp q2, q7, [x2]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x half>, ptr %a
%op2 = load <16 x half>, ptr %b
%cmp = fcmp fast olt <16 x half> %op1, %op2
@@ -619,6 +2871,123 @@ define void @fcmp_ge_v16f16(ptr %a, ptr %b, ptr %c) {
; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: stp q0, q1, [x2]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcmp_ge_v16f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q1, [x0, #16]
+; NONEON-NOSVE-NEXT: ldr q2, [x1, #16]
+; NONEON-NOSVE-NEXT: mov h0, v2.h[1]
+; NONEON-NOSVE-NEXT: mov h3, v1.h[1]
+; NONEON-NOSVE-NEXT: mov h4, v2.h[2]
+; NONEON-NOSVE-NEXT: mov h5, v1.h[2]
+; NONEON-NOSVE-NEXT: fcvt s6, h2
+; NONEON-NOSVE-NEXT: fcvt s7, h1
+; NONEON-NOSVE-NEXT: mov h16, v1.h[6]
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcmp s3, s0
+; NONEON-NOSVE-NEXT: mov h0, v2.h[3]
+; NONEON-NOSVE-NEXT: mov h3, v1.h[3]
+; NONEON-NOSVE-NEXT: csetm w8, ge
+; NONEON-NOSVE-NEXT: fcmp s7, s6
+; NONEON-NOSVE-NEXT: mov h6, v2.h[4]
+; NONEON-NOSVE-NEXT: mov h7, v1.h[4]
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: csetm w12, ge
+; NONEON-NOSVE-NEXT: fcmp s5, s4
+; NONEON-NOSVE-NEXT: mov h4, v2.h[5]
+; NONEON-NOSVE-NEXT: mov h5, v1.h[5]
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: fcvt s7, h7
+; NONEON-NOSVE-NEXT: csetm w11, ge
+; NONEON-NOSVE-NEXT: fcmp s3, s0
+; NONEON-NOSVE-NEXT: mov h3, v2.h[6]
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: mov h2, v2.h[7]
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: csetm w9, ge
+; NONEON-NOSVE-NEXT: fcmp s7, s6
+; NONEON-NOSVE-NEXT: mov h6, v1.h[7]
+; NONEON-NOSVE-NEXT: ldr q1, [x1]
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s7, h16
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: csetm w10, ge
+; NONEON-NOSVE-NEXT: fcmp s5, s4
+; NONEON-NOSVE-NEXT: mov h4, v1.h[1]
+; NONEON-NOSVE-NEXT: mov h5, v0.h[1]
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: csetm w13, ge
+; NONEON-NOSVE-NEXT: fcmp s7, s3
+; NONEON-NOSVE-NEXT: fmov s7, w12
+; NONEON-NOSVE-NEXT: fcvt s3, h4
+; NONEON-NOSVE-NEXT: fcvt s4, h5
+; NONEON-NOSVE-NEXT: fcvt s5, h0
+; NONEON-NOSVE-NEXT: csetm w14, ge
+; NONEON-NOSVE-NEXT: fcmp s6, s2
+; NONEON-NOSVE-NEXT: fcvt s2, h1
+; NONEON-NOSVE-NEXT: mov h6, v0.h[3]
+; NONEON-NOSVE-NEXT: mov v7.h[1], w8
+; NONEON-NOSVE-NEXT: csetm w15, ge
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: mov h3, v1.h[2]
+; NONEON-NOSVE-NEXT: mov h4, v0.h[2]
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: mov v7.h[2], w11
+; NONEON-NOSVE-NEXT: csetm w16, ge
+; NONEON-NOSVE-NEXT: fcmp s5, s2
+; NONEON-NOSVE-NEXT: mov h5, v1.h[3]
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: csetm w17, ge
+; NONEON-NOSVE-NEXT: mov v7.h[3], w9
+; NONEON-NOSVE-NEXT: fmov s2, w17
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: mov h3, v1.h[4]
+; NONEON-NOSVE-NEXT: mov h4, v0.h[4]
+; NONEON-NOSVE-NEXT: mov v2.h[1], w16
+; NONEON-NOSVE-NEXT: mov v7.h[4], w10
+; NONEON-NOSVE-NEXT: csetm w8, ge
+; NONEON-NOSVE-NEXT: fcmp s6, s5
+; NONEON-NOSVE-NEXT: mov h5, v1.h[5]
+; NONEON-NOSVE-NEXT: mov h6, v0.h[5]
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: mov v2.h[2], w8
+; NONEON-NOSVE-NEXT: mov v7.h[5], w13
+; NONEON-NOSVE-NEXT: csetm w8, ge
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: mov h3, v1.h[6]
+; NONEON-NOSVE-NEXT: mov v2.h[3], w8
+; NONEON-NOSVE-NEXT: mov h4, v0.h[6]
+; NONEON-NOSVE-NEXT: mov h1, v1.h[7]
+; NONEON-NOSVE-NEXT: mov h0, v0.h[7]
+; NONEON-NOSVE-NEXT: mov v7.h[6], w14
+; NONEON-NOSVE-NEXT: csetm w8, ge
+; NONEON-NOSVE-NEXT: fcmp s6, s5
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: mov v2.h[4], w8
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: mov v7.h[7], w15
+; NONEON-NOSVE-NEXT: csetm w8, ge
+; NONEON-NOSVE-NEXT: mov v2.h[5], w8
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: csetm w8, ge
+; NONEON-NOSVE-NEXT: fcmp s0, s1
+; NONEON-NOSVE-NEXT: mov v2.h[6], w8
+; NONEON-NOSVE-NEXT: csetm w8, ge
+; NONEON-NOSVE-NEXT: mov v2.h[7], w8
+; NONEON-NOSVE-NEXT: stp q2, q7, [x2]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x half>, ptr %a
%op2 = load <16 x half>, ptr %b
%cmp = fcmp fast oge <16 x half> %op1, %op2
@@ -643,6 +3012,123 @@ define void @fcmp_le_v16f16(ptr %a, ptr %b, ptr %c) {
; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: stp q0, q1, [x2]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcmp_le_v16f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q1, [x0, #16]
+; NONEON-NOSVE-NEXT: ldr q2, [x1, #16]
+; NONEON-NOSVE-NEXT: mov h0, v2.h[1]
+; NONEON-NOSVE-NEXT: mov h3, v1.h[1]
+; NONEON-NOSVE-NEXT: mov h4, v2.h[2]
+; NONEON-NOSVE-NEXT: mov h5, v1.h[2]
+; NONEON-NOSVE-NEXT: fcvt s6, h2
+; NONEON-NOSVE-NEXT: fcvt s7, h1
+; NONEON-NOSVE-NEXT: mov h16, v1.h[6]
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcmp s3, s0
+; NONEON-NOSVE-NEXT: mov h0, v2.h[3]
+; NONEON-NOSVE-NEXT: mov h3, v1.h[3]
+; NONEON-NOSVE-NEXT: csetm w8, le
+; NONEON-NOSVE-NEXT: fcmp s7, s6
+; NONEON-NOSVE-NEXT: mov h6, v2.h[4]
+; NONEON-NOSVE-NEXT: mov h7, v1.h[4]
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: csetm w12, le
+; NONEON-NOSVE-NEXT: fcmp s5, s4
+; NONEON-NOSVE-NEXT: mov h4, v2.h[5]
+; NONEON-NOSVE-NEXT: mov h5, v1.h[5]
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: fcvt s7, h7
+; NONEON-NOSVE-NEXT: csetm w11, le
+; NONEON-NOSVE-NEXT: fcmp s3, s0
+; NONEON-NOSVE-NEXT: mov h3, v2.h[6]
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: mov h2, v2.h[7]
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: csetm w9, le
+; NONEON-NOSVE-NEXT: fcmp s7, s6
+; NONEON-NOSVE-NEXT: mov h6, v1.h[7]
+; NONEON-NOSVE-NEXT: ldr q1, [x1]
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s7, h16
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: csetm w10, le
+; NONEON-NOSVE-NEXT: fcmp s5, s4
+; NONEON-NOSVE-NEXT: mov h4, v1.h[1]
+; NONEON-NOSVE-NEXT: mov h5, v0.h[1]
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: csetm w13, le
+; NONEON-NOSVE-NEXT: fcmp s7, s3
+; NONEON-NOSVE-NEXT: fmov s7, w12
+; NONEON-NOSVE-NEXT: fcvt s3, h4
+; NONEON-NOSVE-NEXT: fcvt s4, h5
+; NONEON-NOSVE-NEXT: fcvt s5, h0
+; NONEON-NOSVE-NEXT: csetm w14, le
+; NONEON-NOSVE-NEXT: fcmp s6, s2
+; NONEON-NOSVE-NEXT: fcvt s2, h1
+; NONEON-NOSVE-NEXT: mov h6, v0.h[3]
+; NONEON-NOSVE-NEXT: mov v7.h[1], w8
+; NONEON-NOSVE-NEXT: csetm w15, le
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: mov h3, v1.h[2]
+; NONEON-NOSVE-NEXT: mov h4, v0.h[2]
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: mov v7.h[2], w11
+; NONEON-NOSVE-NEXT: csetm w16, le
+; NONEON-NOSVE-NEXT: fcmp s5, s2
+; NONEON-NOSVE-NEXT: mov h5, v1.h[3]
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: csetm w17, le
+; NONEON-NOSVE-NEXT: mov v7.h[3], w9
+; NONEON-NOSVE-NEXT: fmov s2, w17
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: mov h3, v1.h[4]
+; NONEON-NOSVE-NEXT: mov h4, v0.h[4]
+; NONEON-NOSVE-NEXT: mov v2.h[1], w16
+; NONEON-NOSVE-NEXT: mov v7.h[4], w10
+; NONEON-NOSVE-NEXT: csetm w8, le
+; NONEON-NOSVE-NEXT: fcmp s6, s5
+; NONEON-NOSVE-NEXT: mov h5, v1.h[5]
+; NONEON-NOSVE-NEXT: mov h6, v0.h[5]
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: mov v2.h[2], w8
+; NONEON-NOSVE-NEXT: mov v7.h[5], w13
+; NONEON-NOSVE-NEXT: csetm w8, le
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: mov h3, v1.h[6]
+; NONEON-NOSVE-NEXT: mov v2.h[3], w8
+; NONEON-NOSVE-NEXT: mov h4, v0.h[6]
+; NONEON-NOSVE-NEXT: mov h1, v1.h[7]
+; NONEON-NOSVE-NEXT: mov h0, v0.h[7]
+; NONEON-NOSVE-NEXT: mov v7.h[6], w14
+; NONEON-NOSVE-NEXT: csetm w8, le
+; NONEON-NOSVE-NEXT: fcmp s6, s5
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: mov v2.h[4], w8
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: mov v7.h[7], w15
+; NONEON-NOSVE-NEXT: csetm w8, le
+; NONEON-NOSVE-NEXT: mov v2.h[5], w8
+; NONEON-NOSVE-NEXT: fcmp s4, s3
+; NONEON-NOSVE-NEXT: csetm w8, le
+; NONEON-NOSVE-NEXT: fcmp s0, s1
+; NONEON-NOSVE-NEXT: mov v2.h[6], w8
+; NONEON-NOSVE-NEXT: csetm w8, le
+; NONEON-NOSVE-NEXT: mov v2.h[7], w8
+; NONEON-NOSVE-NEXT: stp q2, q7, [x2]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x half>, ptr %a
%op2 = load <16 x half>, ptr %b
%cmp = fcmp fast ole <16 x half> %op1, %op2
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-convert.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-convert.ll
index 57d072a7bcd68..055af194be211 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-convert.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-convert.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
@@ -17,6 +18,17 @@ define void @fp_convert_combine_crash(ptr %a, ptr %b) {
; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s
; CHECK-NEXT: stp q1, q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fp_convert_combine_crash:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fmov v0.4s, #8.00000000
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: fmul v1.4s, v1.4s, v0.4s
+; NONEON-NOSVE-NEXT: fmul v0.4s, v2.4s, v0.4s
+; NONEON-NOSVE-NEXT: fcvtzs v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: fcvtzs v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: stp q1, q0, [x1]
+; NONEON-NOSVE-NEXT: ret
%f = load <8 x float>, ptr %a
%mul.i = fmul <8 x float> %f, <float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00,
float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00>
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll
index 6a2dc3c718252..ce8902cfa16c3 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
@@ -17,6 +18,12 @@ define void @fcvt_v2f16_to_v2f32(<2 x half> %a, ptr %b) {
; CHECK-NEXT: fcvt z0.s, p0/m, z0.h
; CHECK-NEXT: str d0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvt_v2f16_to_v2f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h
+; NONEON-NOSVE-NEXT: str d0, [x0]
+; NONEON-NOSVE-NEXT: ret
%res = fpext <2 x half> %a to <2 x float>
store <2 x float> %res, ptr %b
ret void
@@ -31,6 +38,12 @@ define void @fcvt_v4f16_to_v4f32(<4 x half> %a, ptr %b) {
; CHECK-NEXT: fcvt z0.s, p0/m, z0.h
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvt_v4f16_to_v4f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h
+; NONEON-NOSVE-NEXT: str q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%res = fpext <4 x half> %a to <4 x float>
store <4 x float> %res, ptr %b
ret void
@@ -48,6 +61,17 @@ define void @fcvt_v8f16_to_v8f32(<8 x half> %a, ptr %b) {
; CHECK-NEXT: fcvt z0.s, p0/m, z0.h
; CHECK-NEXT: stp q1, q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvt_v8f16_to_v8f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h
+; NONEON-NOSVE-NEXT: fcvtl v1.4s, v1.4h
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%res = fpext <8 x half> %a to <8 x float>
store <8 x float> %res, ptr %b
ret void
@@ -72,6 +96,21 @@ define void @fcvt_v16f16_to_v16f32(<16 x half> %a, ptr %b) {
; CHECK-NEXT: stp q3, q0, [x0]
; CHECK-NEXT: stp q2, q1, [x0, #32]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvt_v16f16_to_v16f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-32]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
+; NONEON-NOSVE-NEXT: ldr d2, [sp, #24]
+; NONEON-NOSVE-NEXT: ldr d3, [sp, #8]
+; NONEON-NOSVE-NEXT: fcvtl v1.4s, v1.4h
+; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h
+; NONEON-NOSVE-NEXT: fcvtl v2.4s, v2.4h
+; NONEON-NOSVE-NEXT: fcvtl v3.4s, v3.4h
+; NONEON-NOSVE-NEXT: stp q0, q3, [x0]
+; NONEON-NOSVE-NEXT: stp q1, q2, [x0, #32]
+; NONEON-NOSVE-NEXT: add sp, sp, #32
+; NONEON-NOSVE-NEXT: ret
%res = fpext <16 x half> %a to <16 x float>
store <16 x float> %res, ptr %b
ret void
@@ -90,6 +129,13 @@ define void @fcvt_v2f16_v2f32(ptr %a, ptr %b) {
; CHECK-NEXT: fcvt z0.s, p0/m, z0.h
; CHECK-NEXT: str d0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvt_v2f16_v2f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr s0, [x0]
+; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h
+; NONEON-NOSVE-NEXT: str d0, [x1]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <2 x half>, ptr %a
%res = fpext <2 x half> %op1 to <2 x float>
store <2 x float> %res, ptr %b
@@ -104,6 +150,13 @@ define void @fcvt_v4f16_v4f32(ptr %a, ptr %b) {
; CHECK-NEXT: fcvt z0.s, p0/m, z0.h
; CHECK-NEXT: str q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvt_v4f16_v4f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr d0, [x0]
+; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h
+; NONEON-NOSVE-NEXT: str q0, [x1]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x half>, ptr %a
%res = fpext <4 x half> %op1 to <4 x float>
store <4 x float> %res, ptr %b
@@ -121,6 +174,18 @@ define void @fcvt_v8f16_v8f32(ptr %a, ptr %b) {
; CHECK-NEXT: fcvt z1.s, p0/m, z1.h
; CHECK-NEXT: stp q1, q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvt_v8f16_v8f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h
+; NONEON-NOSVE-NEXT: fcvtl v1.4s, v1.4h
+; NONEON-NOSVE-NEXT: stp q0, q1, [x1]
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x half>, ptr %a
%res = fpext <8 x half> %op1 to <8 x float>
store <8 x float> %res, ptr %b
@@ -145,6 +210,22 @@ define void @fcvt_v16f16_v16f32(ptr %a, ptr %b) {
; CHECK-NEXT: stp q0, q1, [x1, #32]
; CHECK-NEXT: stp q2, q3, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvt_v16f16_v16f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-32]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
+; NONEON-NOSVE-NEXT: ldr d2, [sp, #24]
+; NONEON-NOSVE-NEXT: ldr d3, [sp, #8]
+; NONEON-NOSVE-NEXT: fcvtl v1.4s, v1.4h
+; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h
+; NONEON-NOSVE-NEXT: fcvtl v2.4s, v2.4h
+; NONEON-NOSVE-NEXT: fcvtl v3.4s, v3.4h
+; NONEON-NOSVE-NEXT: stp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: stp q1, q2, [x1, #32]
+; NONEON-NOSVE-NEXT: add sp, sp, #32
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x half>, ptr %a
%res = fpext <16 x half> %op1 to <16 x float>
store <16 x float> %res, ptr %b
@@ -162,6 +243,13 @@ define void @fcvt_v1f16_v1f64(ptr %a, ptr %b) {
; CHECK-NEXT: fcvt d0, h0
; CHECK-NEXT: str d0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvt_v1f16_v1f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr h0, [x0]
+; NONEON-NOSVE-NEXT: fcvt d0, h0
+; NONEON-NOSVE-NEXT: str d0, [x1]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <1 x half>, ptr %a
%res = fpext <1 x half> %op1 to <1 x double>
store <1 x double> %res, ptr %b
@@ -176,6 +264,14 @@ define void @fcvt_v2f16_v2f64(ptr %a, ptr %b) {
; CHECK-NEXT: fcvt z0.d, p0/m, z0.h
; CHECK-NEXT: str q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvt_v2f16_v2f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr s0, [x0]
+; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h
+; NONEON-NOSVE-NEXT: fcvtl v0.2d, v0.2s
+; NONEON-NOSVE-NEXT: str q0, [x1]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <2 x half>, ptr %a
%res = fpext <2 x half> %op1 to <2 x double>
store <2 x double> %res, ptr %b
@@ -193,6 +289,19 @@ define void @fcvt_v4f16_v4f64(ptr %a, ptr %b) {
; CHECK-NEXT: fcvt z1.d, p0/m, z1.h
; CHECK-NEXT: stp q1, q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvt_v4f16_v4f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr d0, [x0]
+; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: fcvtl v0.2d, v0.2s
+; NONEON-NOSVE-NEXT: fcvtl v1.2d, v1.2s
+; NONEON-NOSVE-NEXT: stp q0, q1, [x1]
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x half>, ptr %a
%res = fpext <4 x half> %op1 to <4 x double>
store <4 x double> %res, ptr %b
@@ -217,6 +326,26 @@ define void @fcvt_v8f16_v8f64(ptr %a, ptr %b) {
; CHECK-NEXT: stp q0, q1, [x1, #32]
; CHECK-NEXT: stp q2, q3, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvt_v8f16_v8f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: str q0, [sp, #-48]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h
+; NONEON-NOSVE-NEXT: fcvtl v1.4s, v1.4h
+; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #16]
+; NONEON-NOSVE-NEXT: fcvtl v0.2d, v0.2s
+; NONEON-NOSVE-NEXT: fcvtl v1.2d, v1.2s
+; NONEON-NOSVE-NEXT: ldr d2, [sp, #40]
+; NONEON-NOSVE-NEXT: ldr d3, [sp, #24]
+; NONEON-NOSVE-NEXT: fcvtl v2.2d, v2.2s
+; NONEON-NOSVE-NEXT: fcvtl v3.2d, v3.2s
+; NONEON-NOSVE-NEXT: stp q0, q2, [x1]
+; NONEON-NOSVE-NEXT: stp q1, q3, [x1, #32]
+; NONEON-NOSVE-NEXT: add sp, sp, #48
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x half>, ptr %a
%res = fpext <8 x half> %op1 to <8 x double>
store <8 x double> %res, ptr %b
@@ -258,6 +387,38 @@ define void @fcvt_v16f16_v16f64(ptr %a, ptr %b) {
; CHECK-NEXT: stp q4, q0, [x1, #32]
; CHECK-NEXT: stp q1, q2, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvt_v16f16_v16f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-96]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
+; NONEON-NOSVE-NEXT: ldr d2, [sp, #8]
+; NONEON-NOSVE-NEXT: ldr d3, [sp, #24]
+; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h
+; NONEON-NOSVE-NEXT: fcvtl v1.4s, v1.4h
+; NONEON-NOSVE-NEXT: fcvtl v2.4s, v2.4h
+; NONEON-NOSVE-NEXT: fcvtl v3.4s, v3.4h
+; NONEON-NOSVE-NEXT: stp q2, q0, [sp, #32]
+; NONEON-NOSVE-NEXT: fcvtl v0.2d, v0.2s
+; NONEON-NOSVE-NEXT: fcvtl v2.2d, v2.2s
+; NONEON-NOSVE-NEXT: stp q3, q1, [sp, #64]
+; NONEON-NOSVE-NEXT: ldr d5, [sp, #56]
+; NONEON-NOSVE-NEXT: fcvtl v1.2d, v1.2s
+; NONEON-NOSVE-NEXT: ldr d4, [sp, #88]
+; NONEON-NOSVE-NEXT: ldr d6, [sp, #72]
+; NONEON-NOSVE-NEXT: ldr d7, [sp, #40]
+; NONEON-NOSVE-NEXT: fcvtl v5.2d, v5.2s
+; NONEON-NOSVE-NEXT: fcvtl v3.2d, v3.2s
+; NONEON-NOSVE-NEXT: fcvtl v4.2d, v4.2s
+; NONEON-NOSVE-NEXT: stp q0, q5, [x1]
+; NONEON-NOSVE-NEXT: fcvtl v0.2d, v7.2s
+; NONEON-NOSVE-NEXT: stp q1, q4, [x1, #64]
+; NONEON-NOSVE-NEXT: fcvtl v1.2d, v6.2s
+; NONEON-NOSVE-NEXT: stp q2, q0, [x1, #32]
+; NONEON-NOSVE-NEXT: stp q3, q1, [x1, #96]
+; NONEON-NOSVE-NEXT: add sp, sp, #96
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x half>, ptr %a
%res = fpext <16 x half> %op1 to <16 x double>
store <16 x double> %res, ptr %b
@@ -275,6 +436,13 @@ define void @fcvt_v1f32_v1f64(ptr %a, ptr %b) {
; CHECK-NEXT: fcvt d0, s0
; CHECK-NEXT: str d0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvt_v1f32_v1f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr s0, [x0]
+; NONEON-NOSVE-NEXT: fcvtl v0.2d, v0.2s
+; NONEON-NOSVE-NEXT: str d0, [x1]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <1 x float>, ptr %a
%res = fpext <1 x float> %op1 to <1 x double>
store <1 x double> %res, ptr %b
@@ -289,6 +457,13 @@ define void @fcvt_v2f32_v2f64(ptr %a, ptr %b) {
; CHECK-NEXT: fcvt z0.d, p0/m, z0.s
; CHECK-NEXT: str q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvt_v2f32_v2f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr d0, [x0]
+; NONEON-NOSVE-NEXT: fcvtl v0.2d, v0.2s
+; NONEON-NOSVE-NEXT: str q0, [x1]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <2 x float>, ptr %a
%res = fpext <2 x float> %op1 to <2 x double>
store <2 x double> %res, ptr %b
@@ -306,6 +481,18 @@ define void @fcvt_v4f32_v4f64(ptr %a, ptr %b) {
; CHECK-NEXT: fcvt z1.d, p0/m, z1.s
; CHECK-NEXT: stp q1, q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvt_v4f32_v4f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: fcvtl v0.2d, v0.2s
+; NONEON-NOSVE-NEXT: fcvtl v1.2d, v1.2s
+; NONEON-NOSVE-NEXT: stp q0, q1, [x1]
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x float>, ptr %a
%res = fpext <4 x float> %op1 to <4 x double>
store <4 x double> %res, ptr %b
@@ -330,6 +517,22 @@ define void @fcvt_v8f32_v8f64(ptr %a, ptr %b) {
; CHECK-NEXT: stp q0, q1, [x1, #32]
; CHECK-NEXT: stp q2, q3, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvt_v8f32_v8f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-32]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
+; NONEON-NOSVE-NEXT: ldr d2, [sp, #24]
+; NONEON-NOSVE-NEXT: ldr d3, [sp, #8]
+; NONEON-NOSVE-NEXT: fcvtl v1.2d, v1.2s
+; NONEON-NOSVE-NEXT: fcvtl v0.2d, v0.2s
+; NONEON-NOSVE-NEXT: fcvtl v2.2d, v2.2s
+; NONEON-NOSVE-NEXT: fcvtl v3.2d, v3.2s
+; NONEON-NOSVE-NEXT: stp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: stp q1, q2, [x1, #32]
+; NONEON-NOSVE-NEXT: add sp, sp, #32
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x float>, ptr %a
%res = fpext <8 x float> %op1 to <8 x double>
store <8 x double> %res, ptr %b
@@ -348,6 +551,13 @@ define void @fcvt_v2f32_v2f16(ptr %a, ptr %b) {
; CHECK-NEXT: fcvt z0.h, p0/m, z0.s
; CHECK-NEXT: st1h { z0.s }, p0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvt_v2f32_v2f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr d0, [x0]
+; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s
+; NONEON-NOSVE-NEXT: str s0, [x1]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <2 x float>, ptr %a
%res = fptrunc <2 x float> %op1 to <2 x half>
store <2 x half> %res, ptr %b
@@ -362,6 +572,13 @@ define void @fcvt_v4f32_v4f16(ptr %a, ptr %b) {
; CHECK-NEXT: fcvt z0.h, p0/m, z0.s
; CHECK-NEXT: st1h { z0.s }, p0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvt_v4f32_v4f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s
+; NONEON-NOSVE-NEXT: str d0, [x1]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x float>, ptr %a
%res = fptrunc <4 x float> %op1 to <4 x half>
store <4 x half> %res, ptr %b
@@ -379,6 +596,14 @@ define void @fcvt_v8f32_v8f16(ptr %a, ptr %b) {
; CHECK-NEXT: st1h { z0.s }, p0, [x1, x8, lsl #1]
; CHECK-NEXT: st1h { z1.s }, p0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvt_v8f32_v8f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s
+; NONEON-NOSVE-NEXT: fcvtn2 v0.8h, v1.4s
+; NONEON-NOSVE-NEXT: str q0, [x1]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x float>, ptr %a
%res = fptrunc <8 x float> %op1 to <8 x half>
store <8 x half> %res, ptr %b
@@ -397,6 +622,13 @@ define void @fcvt_v1f64_v1f16(ptr %a, ptr %b) {
; CHECK-NEXT: fcvt z0.h, p0/m, z0.d
; CHECK-NEXT: st1h { z0.d }, p0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvt_v1f64_v1f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr d0, [x0]
+; NONEON-NOSVE-NEXT: fcvt h0, d0
+; NONEON-NOSVE-NEXT: str h0, [x1]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <1 x double>, ptr %a
%res = fptrunc <1 x double> %op1 to <1 x half>
store <1 x half> %res, ptr %b
@@ -411,6 +643,16 @@ define void @fcvt_v2f64_v2f16(ptr %a, ptr %b) {
; CHECK-NEXT: fcvt z0.h, p0/m, z0.d
; CHECK-NEXT: st1h { z0.d }, p0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvt_v2f64_v2f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: mov d1, v0.d[1]
+; NONEON-NOSVE-NEXT: fcvt h0, d0
+; NONEON-NOSVE-NEXT: fcvt h1, d1
+; NONEON-NOSVE-NEXT: mov v0.h[1], v1.h[0]
+; NONEON-NOSVE-NEXT: str s0, [x1]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <2 x double>, ptr %a
%res = fptrunc <2 x double> %op1 to <2 x half>
store <2 x half> %res, ptr %b
@@ -428,6 +670,21 @@ define void @fcvt_v4f64_v4f16(ptr %a, ptr %b) {
; CHECK-NEXT: st1h { z0.d }, p0, [x1, x8, lsl #1]
; CHECK-NEXT: st1h { z1.d }, p0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvt_v4f64_v4f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q2, [x0]
+; NONEON-NOSVE-NEXT: mov d1, v0.d[1]
+; NONEON-NOSVE-NEXT: fcvt h0, d0
+; NONEON-NOSVE-NEXT: fcvt h1, d1
+; NONEON-NOSVE-NEXT: mov v0.h[1], v1.h[0]
+; NONEON-NOSVE-NEXT: fcvt h1, d2
+; NONEON-NOSVE-NEXT: mov d2, v2.d[1]
+; NONEON-NOSVE-NEXT: mov v0.h[2], v1.h[0]
+; NONEON-NOSVE-NEXT: fcvt h1, d2
+; NONEON-NOSVE-NEXT: mov v0.h[3], v1.h[0]
+; NONEON-NOSVE-NEXT: str d0, [x1]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x double>, ptr %a
%res = fptrunc <4 x double> %op1 to <4 x half>
store <4 x half> %res, ptr %b
@@ -446,6 +703,13 @@ define void @fcvt_v1f64_v1f32(<1 x double> %op1, ptr %b) {
; CHECK-NEXT: fcvt z0.s, p0/m, z0.d
; CHECK-NEXT: st1w { z0.d }, p0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvt_v1f64_v1f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; NONEON-NOSVE-NEXT: fcvtn v0.2s, v0.2d
+; NONEON-NOSVE-NEXT: str s0, [x0]
+; NONEON-NOSVE-NEXT: ret
%res = fptrunc <1 x double> %op1 to <1 x float>
store <1 x float> %res, ptr %b
ret void
@@ -459,6 +723,12 @@ define void @fcvt_v2f64_v2f32(<2 x double> %op1, ptr %b) {
; CHECK-NEXT: fcvt z0.s, p0/m, z0.d
; CHECK-NEXT: st1w { z0.d }, p0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvt_v2f64_v2f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcvtn v0.2s, v0.2d
+; NONEON-NOSVE-NEXT: str d0, [x0]
+; NONEON-NOSVE-NEXT: ret
%res = fptrunc <2 x double> %op1 to <2 x float>
store <2 x float> %res, ptr %b
ret void
@@ -475,6 +745,14 @@ define void @fcvt_v4f64_v4f32(ptr %a, ptr %b) {
; CHECK-NEXT: st1w { z0.d }, p0, [x1, x8, lsl #2]
; CHECK-NEXT: st1w { z1.d }, p0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvt_v4f64_v4f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: fcvtn v0.2s, v0.2d
+; NONEON-NOSVE-NEXT: fcvtn2 v0.4s, v1.2d
+; NONEON-NOSVE-NEXT: str q0, [x1]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x double>, ptr %a
%res = fptrunc <4 x double> %op1 to <4 x float>
store <4 x float> %res, ptr %b
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-fma.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-fma.ll
index 153a04f486571..9d2b55903f314 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-fma.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-fma.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
@@ -17,6 +18,18 @@ define <4 x half> @fma_v4f16(<4 x half> %op1, <4 x half> %op2, <4 x half> %op3)
; CHECK-NEXT: fmad z0.h, p0/m, z1.h, z2.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fma_v4f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcvtl v1.4s, v1.4h
+; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h
+; NONEON-NOSVE-NEXT: fmul v0.4s, v0.4s, v1.4s
+; NONEON-NOSVE-NEXT: fcvtl v1.4s, v2.4h
+; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s
+; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h
+; NONEON-NOSVE-NEXT: fadd v0.4s, v0.4s, v1.4s
+; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s
+; NONEON-NOSVE-NEXT: ret
%mul = fmul contract <4 x half> %op1, %op2
%res = fadd contract <4 x half> %mul, %op3
ret <4 x half> %res
@@ -32,6 +45,26 @@ define <8 x half> @fma_v8f16(<8 x half> %op1, <8 x half> %op2, <8 x half> %op3)
; CHECK-NEXT: fmad z0.h, p0/m, z1.h, z2.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fma_v8f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcvtl v3.4s, v1.4h
+; NONEON-NOSVE-NEXT: fcvtl v4.4s, v0.4h
+; NONEON-NOSVE-NEXT: fcvtl2 v1.4s, v1.8h
+; NONEON-NOSVE-NEXT: fcvtl2 v0.4s, v0.8h
+; NONEON-NOSVE-NEXT: fmul v3.4s, v4.4s, v3.4s
+; NONEON-NOSVE-NEXT: fmul v0.4s, v0.4s, v1.4s
+; NONEON-NOSVE-NEXT: fcvtn v1.4h, v3.4s
+; NONEON-NOSVE-NEXT: fcvtl v3.4s, v2.4h
+; NONEON-NOSVE-NEXT: fcvtl2 v2.4s, v2.8h
+; NONEON-NOSVE-NEXT: fcvtn2 v1.8h, v0.4s
+; NONEON-NOSVE-NEXT: fcvtl v0.4s, v1.4h
+; NONEON-NOSVE-NEXT: fcvtl2 v1.4s, v1.8h
+; NONEON-NOSVE-NEXT: fadd v0.4s, v0.4s, v3.4s
+; NONEON-NOSVE-NEXT: fadd v1.4s, v1.4s, v2.4s
+; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s
+; NONEON-NOSVE-NEXT: fcvtn2 v0.8h, v1.4s
+; NONEON-NOSVE-NEXT: ret
%mul = fmul contract <8 x half> %op1, %op2
%res = fadd contract <8 x half> %mul, %op3
ret <8 x half> %res
@@ -49,6 +82,46 @@ define void @fma_v16f16(ptr %a, ptr %b, ptr %c) {
; CHECK-NEXT: fmla z1.h, p0/m, z3.h, z4.h
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fma_v16f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q2, [x0]
+; NONEON-NOSVE-NEXT: ldp q1, q3, [x1]
+; NONEON-NOSVE-NEXT: fcvtl v5.4s, v0.4h
+; NONEON-NOSVE-NEXT: fcvtl v7.4s, v2.4h
+; NONEON-NOSVE-NEXT: fcvtl2 v0.4s, v0.8h
+; NONEON-NOSVE-NEXT: fcvtl v4.4s, v1.4h
+; NONEON-NOSVE-NEXT: fcvtl v6.4s, v3.4h
+; NONEON-NOSVE-NEXT: fcvtl2 v1.4s, v1.8h
+; NONEON-NOSVE-NEXT: fcvtl2 v3.4s, v3.8h
+; NONEON-NOSVE-NEXT: fcvtl2 v2.4s, v2.8h
+; NONEON-NOSVE-NEXT: fmul v4.4s, v5.4s, v4.4s
+; NONEON-NOSVE-NEXT: fmul v5.4s, v7.4s, v6.4s
+; NONEON-NOSVE-NEXT: fmul v0.4s, v0.4s, v1.4s
+; NONEON-NOSVE-NEXT: fmul v2.4s, v2.4s, v3.4s
+; NONEON-NOSVE-NEXT: fcvtn v1.4h, v4.4s
+; NONEON-NOSVE-NEXT: fcvtn v3.4h, v5.4s
+; NONEON-NOSVE-NEXT: fcvtn2 v1.8h, v0.4s
+; NONEON-NOSVE-NEXT: fcvtn2 v3.8h, v2.4s
+; NONEON-NOSVE-NEXT: ldp q0, q2, [x2]
+; NONEON-NOSVE-NEXT: fcvtl v4.4s, v0.4h
+; NONEON-NOSVE-NEXT: fcvtl v5.4s, v1.4h
+; NONEON-NOSVE-NEXT: fcvtl v6.4s, v2.4h
+; NONEON-NOSVE-NEXT: fcvtl v7.4s, v3.4h
+; NONEON-NOSVE-NEXT: fcvtl2 v0.4s, v0.8h
+; NONEON-NOSVE-NEXT: fcvtl2 v1.4s, v1.8h
+; NONEON-NOSVE-NEXT: fcvtl2 v2.4s, v2.8h
+; NONEON-NOSVE-NEXT: fcvtl2 v3.4s, v3.8h
+; NONEON-NOSVE-NEXT: fadd v4.4s, v5.4s, v4.4s
+; NONEON-NOSVE-NEXT: fadd v5.4s, v7.4s, v6.4s
+; NONEON-NOSVE-NEXT: fadd v0.4s, v1.4s, v0.4s
+; NONEON-NOSVE-NEXT: fadd v2.4s, v3.4s, v2.4s
+; NONEON-NOSVE-NEXT: fcvtn v1.4h, v4.4s
+; NONEON-NOSVE-NEXT: fcvtn v3.4h, v5.4s
+; NONEON-NOSVE-NEXT: fcvtn2 v1.8h, v0.4s
+; NONEON-NOSVE-NEXT: fcvtn2 v3.8h, v2.4s
+; NONEON-NOSVE-NEXT: stp q1, q3, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x half>, ptr %a
%op2 = load <16 x half>, ptr %b
%op3 = load <16 x half>, ptr %c
@@ -68,6 +141,12 @@ define <2 x float> @fma_v2f32(<2 x float> %op1, <2 x float> %op2, <2 x float> %o
; CHECK-NEXT: fmad z0.s, p0/m, z1.s, z2.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fma_v2f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fmla v2.2s, v1.2s, v0.2s
+; NONEON-NOSVE-NEXT: fmov d0, d2
+; NONEON-NOSVE-NEXT: ret
%mul = fmul contract <2 x float> %op1, %op2
%res = fadd contract <2 x float> %mul, %op3
ret <2 x float> %res
@@ -83,6 +162,12 @@ define <4 x float> @fma_v4f32(<4 x float> %op1, <4 x float> %op2, <4 x float> %o
; CHECK-NEXT: fmad z0.s, p0/m, z1.s, z2.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fma_v4f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fmla v2.4s, v1.4s, v0.4s
+; NONEON-NOSVE-NEXT: mov v0.16b, v2.16b
+; NONEON-NOSVE-NEXT: ret
%mul = fmul contract <4 x float> %op1, %op2
%res = fadd contract <4 x float> %mul, %op3
ret <4 x float> %res
@@ -100,6 +185,16 @@ define void @fma_v8f32(ptr %a, ptr %b, ptr %c) {
; CHECK-NEXT: fmla z1.s, p0/m, z3.s, z4.s
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fma_v8f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q4, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q5, [x2]
+; NONEON-NOSVE-NEXT: ldp q2, q3, [x0]
+; NONEON-NOSVE-NEXT: fmla v1.4s, v0.4s, v2.4s
+; NONEON-NOSVE-NEXT: fmla v5.4s, v4.4s, v3.4s
+; NONEON-NOSVE-NEXT: stp q1, q5, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x float>, ptr %a
%op2 = load <8 x float>, ptr %b
%op3 = load <8 x float>, ptr %c
@@ -114,6 +209,11 @@ define <1 x double> @fma_v1f64(<1 x double> %op1, <1 x double> %op2, <1 x double
; CHECK: // %bb.0:
; CHECK-NEXT: fmadd d0, d0, d1, d2
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fma_v1f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fmadd d0, d0, d1, d2
+; NONEON-NOSVE-NEXT: ret
%mul = fmul contract <1 x double> %op1, %op2
%res = fadd contract <1 x double> %mul, %op3
ret <1 x double> %res
@@ -129,6 +229,12 @@ define <2 x double> @fma_v2f64(<2 x double> %op1, <2 x double> %op2, <2 x double
; CHECK-NEXT: fmad z0.d, p0/m, z1.d, z2.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fma_v2f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fmla v2.2d, v1.2d, v0.2d
+; NONEON-NOSVE-NEXT: mov v0.16b, v2.16b
+; NONEON-NOSVE-NEXT: ret
%mul = fmul contract <2 x double> %op1, %op2
%res = fadd contract <2 x double> %mul, %op3
ret <2 x double> %res
@@ -146,6 +252,16 @@ define void @fma_v4f64(ptr %a, ptr %b, ptr %c) {
; CHECK-NEXT: fmla z1.d, p0/m, z3.d, z4.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fma_v4f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q4, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q5, [x2]
+; NONEON-NOSVE-NEXT: ldp q2, q3, [x0]
+; NONEON-NOSVE-NEXT: fmla v1.2d, v0.2d, v2.2d
+; NONEON-NOSVE-NEXT: fmla v5.2d, v4.2d, v3.2d
+; NONEON-NOSVE-NEXT: stp q1, q5, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x double>, ptr %a
%op2 = load <4 x double>, ptr %b
%op3 = load <4 x double>, ptr %c
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-minmax.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-minmax.ll
index 6945a6102c055..a96adfec2ad10 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-minmax.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-minmax.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
@@ -17,6 +18,38 @@ define <4 x half> @fmaxnm_v4f16(<4 x half> %op1, <4 x half> %op2) {
; CHECK-NEXT: fmaxnm z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fmaxnm_v4f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; NONEON-NOSVE-NEXT: mov h2, v1.h[1]
+; NONEON-NOSVE-NEXT: mov h3, v0.h[1]
+; NONEON-NOSVE-NEXT: mov h4, v1.h[2]
+; NONEON-NOSVE-NEXT: mov h5, v0.h[2]
+; NONEON-NOSVE-NEXT: fcvt s6, h1
+; NONEON-NOSVE-NEXT: fcvt s7, h0
+; NONEON-NOSVE-NEXT: mov h1, v1.h[3]
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fmaxnm s2, s3, s2
+; NONEON-NOSVE-NEXT: fcvt s3, h4
+; NONEON-NOSVE-NEXT: fcvt s4, h5
+; NONEON-NOSVE-NEXT: fmaxnm s5, s7, s6
+; NONEON-NOSVE-NEXT: mov h6, v0.h[3]
+; NONEON-NOSVE-NEXT: fmaxnm s3, s4, s3
+; NONEON-NOSVE-NEXT: fcvt h2, s2
+; NONEON-NOSVE-NEXT: fcvt h0, s5
+; NONEON-NOSVE-NEXT: fcvt s4, h6
+; NONEON-NOSVE-NEXT: mov v0.h[1], v2.h[0]
+; NONEON-NOSVE-NEXT: fcvt h2, s3
+; NONEON-NOSVE-NEXT: fmaxnm s1, s4, s1
+; NONEON-NOSVE-NEXT: mov v0.h[2], v2.h[0]
+; NONEON-NOSVE-NEXT: fcvt h1, s1
+; NONEON-NOSVE-NEXT: mov v0.h[3], v1.h[0]
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0
+; NONEON-NOSVE-NEXT: ret
%res = call <4 x half> @llvm.maxnum.v4f16(<4 x half> %op1, <4 x half> %op2)
ret <4 x half> %res
}
@@ -30,6 +63,64 @@ define <8 x half> @fmaxnm_v8f16(<8 x half> %op1, <8 x half> %op2) {
; CHECK-NEXT: fmaxnm z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fmaxnm_v8f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov h2, v1.h[1]
+; NONEON-NOSVE-NEXT: mov h3, v0.h[1]
+; NONEON-NOSVE-NEXT: fcvt s4, h1
+; NONEON-NOSVE-NEXT: fcvt s5, h0
+; NONEON-NOSVE-NEXT: mov h6, v1.h[2]
+; NONEON-NOSVE-NEXT: mov h7, v0.h[2]
+; NONEON-NOSVE-NEXT: mov h16, v1.h[3]
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fmaxnm s4, s5, s4
+; NONEON-NOSVE-NEXT: mov h5, v0.h[3]
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: fcvt s7, h7
+; NONEON-NOSVE-NEXT: fcvt s16, h16
+; NONEON-NOSVE-NEXT: fmaxnm s3, s3, s2
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcvt h2, s4
+; NONEON-NOSVE-NEXT: fmaxnm s4, s7, s6
+; NONEON-NOSVE-NEXT: mov h6, v1.h[4]
+; NONEON-NOSVE-NEXT: mov h7, v0.h[4]
+; NONEON-NOSVE-NEXT: fcvt h3, s3
+; NONEON-NOSVE-NEXT: fmaxnm s5, s5, s16
+; NONEON-NOSVE-NEXT: mov h16, v0.h[5]
+; NONEON-NOSVE-NEXT: fcvt h4, s4
+; NONEON-NOSVE-NEXT: mov v2.h[1], v3.h[0]
+; NONEON-NOSVE-NEXT: fcvt s3, h6
+; NONEON-NOSVE-NEXT: fcvt s6, h7
+; NONEON-NOSVE-NEXT: mov h7, v1.h[5]
+; NONEON-NOSVE-NEXT: fcvt h5, s5
+; NONEON-NOSVE-NEXT: fcvt s16, h16
+; NONEON-NOSVE-NEXT: mov v2.h[2], v4.h[0]
+; NONEON-NOSVE-NEXT: mov h4, v1.h[6]
+; NONEON-NOSVE-NEXT: fmaxnm s3, s6, s3
+; NONEON-NOSVE-NEXT: mov h6, v0.h[6]
+; NONEON-NOSVE-NEXT: fcvt s7, h7
+; NONEON-NOSVE-NEXT: mov h1, v1.h[7]
+; NONEON-NOSVE-NEXT: mov h0, v0.h[7]
+; NONEON-NOSVE-NEXT: mov v2.h[3], v5.h[0]
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvt h3, s3
+; NONEON-NOSVE-NEXT: fcvt s5, h6
+; NONEON-NOSVE-NEXT: fmaxnm s6, s16, s7
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: mov v2.h[4], v3.h[0]
+; NONEON-NOSVE-NEXT: fmaxnm s4, s5, s4
+; NONEON-NOSVE-NEXT: fcvt h3, s6
+; NONEON-NOSVE-NEXT: fmaxnm s0, s0, s1
+; NONEON-NOSVE-NEXT: mov v2.h[5], v3.h[0]
+; NONEON-NOSVE-NEXT: fcvt h3, s4
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: mov v2.h[6], v3.h[0]
+; NONEON-NOSVE-NEXT: mov v2.h[7], v0.h[0]
+; NONEON-NOSVE-NEXT: mov v0.16b, v2.16b
+; NONEON-NOSVE-NEXT: ret
%res = call <8 x half> @llvm.maxnum.v8f16(<8 x half> %op1, <8 x half> %op2)
ret <8 x half> %res
}
@@ -45,6 +136,119 @@ define void @fmaxnm_v16f16(ptr %a, ptr %b) {
; CHECK-NEXT: fmaxnm z1.h, p0/m, z1.h, z3.h
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fmaxnm_v16f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q2, q0, [x0]
+; NONEON-NOSVE-NEXT: ldp q3, q1, [x1]
+; NONEON-NOSVE-NEXT: mov h7, v0.h[1]
+; NONEON-NOSVE-NEXT: mov h16, v0.h[2]
+; NONEON-NOSVE-NEXT: mov h18, v2.h[1]
+; NONEON-NOSVE-NEXT: mov h5, v1.h[1]
+; NONEON-NOSVE-NEXT: mov h6, v1.h[2]
+; NONEON-NOSVE-NEXT: mov h17, v3.h[1]
+; NONEON-NOSVE-NEXT: fcvt s4, h1
+; NONEON-NOSVE-NEXT: fcvt s19, h0
+; NONEON-NOSVE-NEXT: fcvt s20, h3
+; NONEON-NOSVE-NEXT: fcvt s21, h2
+; NONEON-NOSVE-NEXT: mov h22, v3.h[2]
+; NONEON-NOSVE-NEXT: mov h23, v2.h[2]
+; NONEON-NOSVE-NEXT: fcvt s7, h7
+; NONEON-NOSVE-NEXT: fcvt s16, h16
+; NONEON-NOSVE-NEXT: fcvt s18, h18
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: fcvt s17, h17
+; NONEON-NOSVE-NEXT: fmaxnm s4, s19, s4
+; NONEON-NOSVE-NEXT: mov h19, v0.h[3]
+; NONEON-NOSVE-NEXT: mov h24, v3.h[3]
+; NONEON-NOSVE-NEXT: fmaxnm s20, s21, s20
+; NONEON-NOSVE-NEXT: fcvt s21, h22
+; NONEON-NOSVE-NEXT: fcvt s22, h23
+; NONEON-NOSVE-NEXT: mov h23, v2.h[3]
+; NONEON-NOSVE-NEXT: mov h25, v2.h[6]
+; NONEON-NOSVE-NEXT: fmaxnm s5, s7, s5
+; NONEON-NOSVE-NEXT: mov h7, v1.h[3]
+; NONEON-NOSVE-NEXT: fmaxnm s6, s16, s6
+; NONEON-NOSVE-NEXT: fmaxnm s16, s18, s17
+; NONEON-NOSVE-NEXT: fcvt h4, s4
+; NONEON-NOSVE-NEXT: fcvt s18, h19
+; NONEON-NOSVE-NEXT: fcvt s19, h24
+; NONEON-NOSVE-NEXT: mov h24, v0.h[5]
+; NONEON-NOSVE-NEXT: fcvt h17, s5
+; NONEON-NOSVE-NEXT: fcvt s7, h7
+; NONEON-NOSVE-NEXT: fcvt h5, s20
+; NONEON-NOSVE-NEXT: fmaxnm s20, s22, s21
+; NONEON-NOSVE-NEXT: fcvt h16, s16
+; NONEON-NOSVE-NEXT: fcvt s21, h23
+; NONEON-NOSVE-NEXT: fcvt h6, s6
+; NONEON-NOSVE-NEXT: mov h22, v0.h[4]
+; NONEON-NOSVE-NEXT: mov h23, v2.h[4]
+; NONEON-NOSVE-NEXT: mov v4.h[1], v17.h[0]
+; NONEON-NOSVE-NEXT: mov h17, v1.h[4]
+; NONEON-NOSVE-NEXT: fmaxnm s7, s18, s7
+; NONEON-NOSVE-NEXT: mov h18, v3.h[4]
+; NONEON-NOSVE-NEXT: mov v5.h[1], v16.h[0]
+; NONEON-NOSVE-NEXT: fcvt h16, s20
+; NONEON-NOSVE-NEXT: fmaxnm s19, s21, s19
+; NONEON-NOSVE-NEXT: fcvt s20, h23
+; NONEON-NOSVE-NEXT: mov h21, v1.h[5]
+; NONEON-NOSVE-NEXT: mov h23, v2.h[5]
+; NONEON-NOSVE-NEXT: mov h2, v2.h[7]
+; NONEON-NOSVE-NEXT: mov v4.h[2], v6.h[0]
+; NONEON-NOSVE-NEXT: fcvt s6, h17
+; NONEON-NOSVE-NEXT: fcvt s17, h22
+; NONEON-NOSVE-NEXT: fcvt h7, s7
+; NONEON-NOSVE-NEXT: fcvt s18, h18
+; NONEON-NOSVE-NEXT: mov h22, v3.h[5]
+; NONEON-NOSVE-NEXT: mov v5.h[2], v16.h[0]
+; NONEON-NOSVE-NEXT: fcvt h16, s19
+; NONEON-NOSVE-NEXT: mov h19, v0.h[6]
+; NONEON-NOSVE-NEXT: mov h0, v0.h[7]
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fmaxnm s6, s17, s6
+; NONEON-NOSVE-NEXT: mov h17, v1.h[6]
+; NONEON-NOSVE-NEXT: mov h1, v1.h[7]
+; NONEON-NOSVE-NEXT: fmaxnm s18, s20, s18
+; NONEON-NOSVE-NEXT: mov h20, v3.h[6]
+; NONEON-NOSVE-NEXT: mov v4.h[3], v7.h[0]
+; NONEON-NOSVE-NEXT: fcvt s7, h22
+; NONEON-NOSVE-NEXT: fcvt s22, h23
+; NONEON-NOSVE-NEXT: mov v5.h[3], v16.h[0]
+; NONEON-NOSVE-NEXT: fcvt s16, h21
+; NONEON-NOSVE-NEXT: fcvt s21, h24
+; NONEON-NOSVE-NEXT: fcvt s19, h19
+; NONEON-NOSVE-NEXT: fcvt h6, s6
+; NONEON-NOSVE-NEXT: fcvt s17, h17
+; NONEON-NOSVE-NEXT: fcvt s23, h25
+; NONEON-NOSVE-NEXT: fcvt h18, s18
+; NONEON-NOSVE-NEXT: fcvt s20, h20
+; NONEON-NOSVE-NEXT: mov h3, v3.h[7]
+; NONEON-NOSVE-NEXT: fmaxnm s7, s22, s7
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fmaxnm s16, s21, s16
+; NONEON-NOSVE-NEXT: mov v4.h[4], v6.h[0]
+; NONEON-NOSVE-NEXT: fmaxnm s6, s19, s17
+; NONEON-NOSVE-NEXT: mov v5.h[4], v18.h[0]
+; NONEON-NOSVE-NEXT: fmaxnm s17, s23, s20
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt h7, s7
+; NONEON-NOSVE-NEXT: fmaxnm s0, s0, s1
+; NONEON-NOSVE-NEXT: fcvt h16, s16
+; NONEON-NOSVE-NEXT: fcvt h6, s6
+; NONEON-NOSVE-NEXT: fmaxnm s2, s2, s3
+; NONEON-NOSVE-NEXT: fcvt h3, s17
+; NONEON-NOSVE-NEXT: mov v5.h[5], v7.h[0]
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: mov v4.h[5], v16.h[0]
+; NONEON-NOSVE-NEXT: fcvt h1, s2
+; NONEON-NOSVE-NEXT: mov v5.h[6], v3.h[0]
+; NONEON-NOSVE-NEXT: mov v4.h[6], v6.h[0]
+; NONEON-NOSVE-NEXT: mov v5.h[7], v1.h[0]
+; NONEON-NOSVE-NEXT: mov v4.h[7], v0.h[0]
+; NONEON-NOSVE-NEXT: stp q5, q4, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x half>, ptr %a
%op2 = load <16 x half>, ptr %b
%res = call <16 x half> @llvm.maxnum.v16f16(<16 x half> %op1, <16 x half> %op2)
@@ -61,6 +265,11 @@ define <2 x float> @fmaxnm_v2f32(<2 x float> %op1, <2 x float> %op2) {
; CHECK-NEXT: fmaxnm z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fmaxnm_v2f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fmaxnm v0.2s, v0.2s, v1.2s
+; NONEON-NOSVE-NEXT: ret
%res = call <2 x float> @llvm.maxnum.v2f32(<2 x float> %op1, <2 x float> %op2)
ret <2 x float> %res
}
@@ -74,6 +283,11 @@ define <4 x float> @fmaxnm_v4f32(<4 x float> %op1, <4 x float> %op2) {
; CHECK-NEXT: fmaxnm z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fmaxnm_v4f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fmaxnm v0.4s, v0.4s, v1.4s
+; NONEON-NOSVE-NEXT: ret
%res = call <4 x float> @llvm.maxnum.v4f32(<4 x float> %op1, <4 x float> %op2)
ret <4 x float> %res
}
@@ -89,6 +303,15 @@ define void @fmaxnm_v8f32(ptr %a, ptr %b) {
; CHECK-NEXT: fmaxnm z1.s, p0/m, z1.s, z3.s
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fmaxnm_v8f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: fmaxnm v0.4s, v1.4s, v0.4s
+; NONEON-NOSVE-NEXT: fmaxnm v1.4s, v2.4s, v3.4s
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x float>, ptr %a
%op2 = load <8 x float>, ptr %b
%res = call <8 x float> @llvm.maxnum.v8f32(<8 x float> %op1, <8 x float> %op2)
@@ -101,6 +324,11 @@ define <1 x double> @fmaxnm_v1f64(<1 x double> %op1, <1 x double> %op2) {
; CHECK: // %bb.0:
; CHECK-NEXT: fmaxnm d0, d0, d1
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fmaxnm_v1f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fmaxnm d0, d0, d1
+; NONEON-NOSVE-NEXT: ret
%res = call <1 x double> @llvm.maxnum.v1f64(<1 x double> %op1, <1 x double> %op2)
ret <1 x double> %res
}
@@ -114,6 +342,11 @@ define <2 x double> @fmaxnm_v2f64(<2 x double> %op1, <2 x double> %op2) {
; CHECK-NEXT: fmaxnm z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fmaxnm_v2f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fmaxnm v0.2d, v0.2d, v1.2d
+; NONEON-NOSVE-NEXT: ret
%res = call <2 x double> @llvm.maxnum.v2f64(<2 x double> %op1, <2 x double> %op2)
ret <2 x double> %res
}
@@ -129,6 +362,15 @@ define void @fmaxnm_v4f64(ptr %a, ptr %b) {
; CHECK-NEXT: fmaxnm z1.d, p0/m, z1.d, z3.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fmaxnm_v4f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: fmaxnm v0.2d, v1.2d, v0.2d
+; NONEON-NOSVE-NEXT: fmaxnm v1.2d, v2.2d, v3.2d
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x double>, ptr %a
%op2 = load <4 x double>, ptr %b
%res = call <4 x double> @llvm.maxnum.v4f64(<4 x double> %op1, <4 x double> %op2)
@@ -149,6 +391,38 @@ define <4 x half> @fminnm_v4f16(<4 x half> %op1, <4 x half> %op2) {
; CHECK-NEXT: fminnm z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fminnm_v4f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; NONEON-NOSVE-NEXT: mov h2, v1.h[1]
+; NONEON-NOSVE-NEXT: mov h3, v0.h[1]
+; NONEON-NOSVE-NEXT: mov h4, v1.h[2]
+; NONEON-NOSVE-NEXT: mov h5, v0.h[2]
+; NONEON-NOSVE-NEXT: fcvt s6, h1
+; NONEON-NOSVE-NEXT: fcvt s7, h0
+; NONEON-NOSVE-NEXT: mov h1, v1.h[3]
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fminnm s2, s3, s2
+; NONEON-NOSVE-NEXT: fcvt s3, h4
+; NONEON-NOSVE-NEXT: fcvt s4, h5
+; NONEON-NOSVE-NEXT: fminnm s5, s7, s6
+; NONEON-NOSVE-NEXT: mov h6, v0.h[3]
+; NONEON-NOSVE-NEXT: fminnm s3, s4, s3
+; NONEON-NOSVE-NEXT: fcvt h2, s2
+; NONEON-NOSVE-NEXT: fcvt h0, s5
+; NONEON-NOSVE-NEXT: fcvt s4, h6
+; NONEON-NOSVE-NEXT: mov v0.h[1], v2.h[0]
+; NONEON-NOSVE-NEXT: fcvt h2, s3
+; NONEON-NOSVE-NEXT: fminnm s1, s4, s1
+; NONEON-NOSVE-NEXT: mov v0.h[2], v2.h[0]
+; NONEON-NOSVE-NEXT: fcvt h1, s1
+; NONEON-NOSVE-NEXT: mov v0.h[3], v1.h[0]
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0
+; NONEON-NOSVE-NEXT: ret
%res = call <4 x half> @llvm.minnum.v4f16(<4 x half> %op1, <4 x half> %op2)
ret <4 x half> %res
}
@@ -162,6 +436,64 @@ define <8 x half> @fminnm_v8f16(<8 x half> %op1, <8 x half> %op2) {
; CHECK-NEXT: fminnm z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fminnm_v8f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov h2, v1.h[1]
+; NONEON-NOSVE-NEXT: mov h3, v0.h[1]
+; NONEON-NOSVE-NEXT: fcvt s4, h1
+; NONEON-NOSVE-NEXT: fcvt s5, h0
+; NONEON-NOSVE-NEXT: mov h6, v1.h[2]
+; NONEON-NOSVE-NEXT: mov h7, v0.h[2]
+; NONEON-NOSVE-NEXT: mov h16, v1.h[3]
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fminnm s4, s5, s4
+; NONEON-NOSVE-NEXT: mov h5, v0.h[3]
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: fcvt s7, h7
+; NONEON-NOSVE-NEXT: fcvt s16, h16
+; NONEON-NOSVE-NEXT: fminnm s3, s3, s2
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcvt h2, s4
+; NONEON-NOSVE-NEXT: fminnm s4, s7, s6
+; NONEON-NOSVE-NEXT: mov h6, v1.h[4]
+; NONEON-NOSVE-NEXT: mov h7, v0.h[4]
+; NONEON-NOSVE-NEXT: fcvt h3, s3
+; NONEON-NOSVE-NEXT: fminnm s5, s5, s16
+; NONEON-NOSVE-NEXT: mov h16, v0.h[5]
+; NONEON-NOSVE-NEXT: fcvt h4, s4
+; NONEON-NOSVE-NEXT: mov v2.h[1], v3.h[0]
+; NONEON-NOSVE-NEXT: fcvt s3, h6
+; NONEON-NOSVE-NEXT: fcvt s6, h7
+; NONEON-NOSVE-NEXT: mov h7, v1.h[5]
+; NONEON-NOSVE-NEXT: fcvt h5, s5
+; NONEON-NOSVE-NEXT: fcvt s16, h16
+; NONEON-NOSVE-NEXT: mov v2.h[2], v4.h[0]
+; NONEON-NOSVE-NEXT: mov h4, v1.h[6]
+; NONEON-NOSVE-NEXT: fminnm s3, s6, s3
+; NONEON-NOSVE-NEXT: mov h6, v0.h[6]
+; NONEON-NOSVE-NEXT: fcvt s7, h7
+; NONEON-NOSVE-NEXT: mov h1, v1.h[7]
+; NONEON-NOSVE-NEXT: mov h0, v0.h[7]
+; NONEON-NOSVE-NEXT: mov v2.h[3], v5.h[0]
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvt h3, s3
+; NONEON-NOSVE-NEXT: fcvt s5, h6
+; NONEON-NOSVE-NEXT: fminnm s6, s16, s7
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: mov v2.h[4], v3.h[0]
+; NONEON-NOSVE-NEXT: fminnm s4, s5, s4
+; NONEON-NOSVE-NEXT: fcvt h3, s6
+; NONEON-NOSVE-NEXT: fminnm s0, s0, s1
+; NONEON-NOSVE-NEXT: mov v2.h[5], v3.h[0]
+; NONEON-NOSVE-NEXT: fcvt h3, s4
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: mov v2.h[6], v3.h[0]
+; NONEON-NOSVE-NEXT: mov v2.h[7], v0.h[0]
+; NONEON-NOSVE-NEXT: mov v0.16b, v2.16b
+; NONEON-NOSVE-NEXT: ret
%res = call <8 x half> @llvm.minnum.v8f16(<8 x half> %op1, <8 x half> %op2)
ret <8 x half> %res
}
@@ -177,6 +509,119 @@ define void @fminnm_v16f16(ptr %a, ptr %b) {
; CHECK-NEXT: fminnm z1.h, p0/m, z1.h, z3.h
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fminnm_v16f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q2, q0, [x0]
+; NONEON-NOSVE-NEXT: ldp q3, q1, [x1]
+; NONEON-NOSVE-NEXT: mov h7, v0.h[1]
+; NONEON-NOSVE-NEXT: mov h16, v0.h[2]
+; NONEON-NOSVE-NEXT: mov h18, v2.h[1]
+; NONEON-NOSVE-NEXT: mov h5, v1.h[1]
+; NONEON-NOSVE-NEXT: mov h6, v1.h[2]
+; NONEON-NOSVE-NEXT: mov h17, v3.h[1]
+; NONEON-NOSVE-NEXT: fcvt s4, h1
+; NONEON-NOSVE-NEXT: fcvt s19, h0
+; NONEON-NOSVE-NEXT: fcvt s20, h3
+; NONEON-NOSVE-NEXT: fcvt s21, h2
+; NONEON-NOSVE-NEXT: mov h22, v3.h[2]
+; NONEON-NOSVE-NEXT: mov h23, v2.h[2]
+; NONEON-NOSVE-NEXT: fcvt s7, h7
+; NONEON-NOSVE-NEXT: fcvt s16, h16
+; NONEON-NOSVE-NEXT: fcvt s18, h18
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: fcvt s17, h17
+; NONEON-NOSVE-NEXT: fminnm s4, s19, s4
+; NONEON-NOSVE-NEXT: mov h19, v0.h[3]
+; NONEON-NOSVE-NEXT: mov h24, v3.h[3]
+; NONEON-NOSVE-NEXT: fminnm s20, s21, s20
+; NONEON-NOSVE-NEXT: fcvt s21, h22
+; NONEON-NOSVE-NEXT: fcvt s22, h23
+; NONEON-NOSVE-NEXT: mov h23, v2.h[3]
+; NONEON-NOSVE-NEXT: mov h25, v2.h[6]
+; NONEON-NOSVE-NEXT: fminnm s5, s7, s5
+; NONEON-NOSVE-NEXT: mov h7, v1.h[3]
+; NONEON-NOSVE-NEXT: fminnm s6, s16, s6
+; NONEON-NOSVE-NEXT: fminnm s16, s18, s17
+; NONEON-NOSVE-NEXT: fcvt h4, s4
+; NONEON-NOSVE-NEXT: fcvt s18, h19
+; NONEON-NOSVE-NEXT: fcvt s19, h24
+; NONEON-NOSVE-NEXT: mov h24, v0.h[5]
+; NONEON-NOSVE-NEXT: fcvt h17, s5
+; NONEON-NOSVE-NEXT: fcvt s7, h7
+; NONEON-NOSVE-NEXT: fcvt h5, s20
+; NONEON-NOSVE-NEXT: fminnm s20, s22, s21
+; NONEON-NOSVE-NEXT: fcvt h16, s16
+; NONEON-NOSVE-NEXT: fcvt s21, h23
+; NONEON-NOSVE-NEXT: fcvt h6, s6
+; NONEON-NOSVE-NEXT: mov h22, v0.h[4]
+; NONEON-NOSVE-NEXT: mov h23, v2.h[4]
+; NONEON-NOSVE-NEXT: mov v4.h[1], v17.h[0]
+; NONEON-NOSVE-NEXT: mov h17, v1.h[4]
+; NONEON-NOSVE-NEXT: fminnm s7, s18, s7
+; NONEON-NOSVE-NEXT: mov h18, v3.h[4]
+; NONEON-NOSVE-NEXT: mov v5.h[1], v16.h[0]
+; NONEON-NOSVE-NEXT: fcvt h16, s20
+; NONEON-NOSVE-NEXT: fminnm s19, s21, s19
+; NONEON-NOSVE-NEXT: fcvt s20, h23
+; NONEON-NOSVE-NEXT: mov h21, v1.h[5]
+; NONEON-NOSVE-NEXT: mov h23, v2.h[5]
+; NONEON-NOSVE-NEXT: mov h2, v2.h[7]
+; NONEON-NOSVE-NEXT: mov v4.h[2], v6.h[0]
+; NONEON-NOSVE-NEXT: fcvt s6, h17
+; NONEON-NOSVE-NEXT: fcvt s17, h22
+; NONEON-NOSVE-NEXT: fcvt h7, s7
+; NONEON-NOSVE-NEXT: fcvt s18, h18
+; NONEON-NOSVE-NEXT: mov h22, v3.h[5]
+; NONEON-NOSVE-NEXT: mov v5.h[2], v16.h[0]
+; NONEON-NOSVE-NEXT: fcvt h16, s19
+; NONEON-NOSVE-NEXT: mov h19, v0.h[6]
+; NONEON-NOSVE-NEXT: mov h0, v0.h[7]
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fminnm s6, s17, s6
+; NONEON-NOSVE-NEXT: mov h17, v1.h[6]
+; NONEON-NOSVE-NEXT: mov h1, v1.h[7]
+; NONEON-NOSVE-NEXT: fminnm s18, s20, s18
+; NONEON-NOSVE-NEXT: mov h20, v3.h[6]
+; NONEON-NOSVE-NEXT: mov v4.h[3], v7.h[0]
+; NONEON-NOSVE-NEXT: fcvt s7, h22
+; NONEON-NOSVE-NEXT: fcvt s22, h23
+; NONEON-NOSVE-NEXT: mov v5.h[3], v16.h[0]
+; NONEON-NOSVE-NEXT: fcvt s16, h21
+; NONEON-NOSVE-NEXT: fcvt s21, h24
+; NONEON-NOSVE-NEXT: fcvt s19, h19
+; NONEON-NOSVE-NEXT: fcvt h6, s6
+; NONEON-NOSVE-NEXT: fcvt s17, h17
+; NONEON-NOSVE-NEXT: fcvt s23, h25
+; NONEON-NOSVE-NEXT: fcvt h18, s18
+; NONEON-NOSVE-NEXT: fcvt s20, h20
+; NONEON-NOSVE-NEXT: mov h3, v3.h[7]
+; NONEON-NOSVE-NEXT: fminnm s7, s22, s7
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fminnm s16, s21, s16
+; NONEON-NOSVE-NEXT: mov v4.h[4], v6.h[0]
+; NONEON-NOSVE-NEXT: fminnm s6, s19, s17
+; NONEON-NOSVE-NEXT: mov v5.h[4], v18.h[0]
+; NONEON-NOSVE-NEXT: fminnm s17, s23, s20
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt h7, s7
+; NONEON-NOSVE-NEXT: fminnm s0, s0, s1
+; NONEON-NOSVE-NEXT: fcvt h16, s16
+; NONEON-NOSVE-NEXT: fcvt h6, s6
+; NONEON-NOSVE-NEXT: fminnm s2, s2, s3
+; NONEON-NOSVE-NEXT: fcvt h3, s17
+; NONEON-NOSVE-NEXT: mov v5.h[5], v7.h[0]
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: mov v4.h[5], v16.h[0]
+; NONEON-NOSVE-NEXT: fcvt h1, s2
+; NONEON-NOSVE-NEXT: mov v5.h[6], v3.h[0]
+; NONEON-NOSVE-NEXT: mov v4.h[6], v6.h[0]
+; NONEON-NOSVE-NEXT: mov v5.h[7], v1.h[0]
+; NONEON-NOSVE-NEXT: mov v4.h[7], v0.h[0]
+; NONEON-NOSVE-NEXT: stp q5, q4, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x half>, ptr %a
%op2 = load <16 x half>, ptr %b
%res = call <16 x half> @llvm.minnum.v16f16(<16 x half> %op1, <16 x half> %op2)
@@ -193,6 +638,11 @@ define <2 x float> @fminnm_v2f32(<2 x float> %op1, <2 x float> %op2) {
; CHECK-NEXT: fminnm z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fminnm_v2f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fminnm v0.2s, v0.2s, v1.2s
+; NONEON-NOSVE-NEXT: ret
%res = call <2 x float> @llvm.minnum.v2f32(<2 x float> %op1, <2 x float> %op2)
ret <2 x float> %res
}
@@ -206,6 +656,11 @@ define <4 x float> @fminnm_v4f32(<4 x float> %op1, <4 x float> %op2) {
; CHECK-NEXT: fminnm z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fminnm_v4f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fminnm v0.4s, v0.4s, v1.4s
+; NONEON-NOSVE-NEXT: ret
%res = call <4 x float> @llvm.minnum.v4f32(<4 x float> %op1, <4 x float> %op2)
ret <4 x float> %res
}
@@ -221,6 +676,15 @@ define void @fminnm_v8f32(ptr %a, ptr %b) {
; CHECK-NEXT: fminnm z1.s, p0/m, z1.s, z3.s
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fminnm_v8f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: fminnm v0.4s, v1.4s, v0.4s
+; NONEON-NOSVE-NEXT: fminnm v1.4s, v2.4s, v3.4s
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x float>, ptr %a
%op2 = load <8 x float>, ptr %b
%res = call <8 x float> @llvm.minnum.v8f32(<8 x float> %op1, <8 x float> %op2)
@@ -233,6 +697,11 @@ define <1 x double> @fminnm_v1f64(<1 x double> %op1, <1 x double> %op2) {
; CHECK: // %bb.0:
; CHECK-NEXT: fminnm d0, d0, d1
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fminnm_v1f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fminnm d0, d0, d1
+; NONEON-NOSVE-NEXT: ret
%res = call <1 x double> @llvm.minnum.v1f64(<1 x double> %op1, <1 x double> %op2)
ret <1 x double> %res
}
@@ -246,6 +715,11 @@ define <2 x double> @fminnm_v2f64(<2 x double> %op1, <2 x double> %op2) {
; CHECK-NEXT: fminnm z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fminnm_v2f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fminnm v0.2d, v0.2d, v1.2d
+; NONEON-NOSVE-NEXT: ret
%res = call <2 x double> @llvm.minnum.v2f64(<2 x double> %op1, <2 x double> %op2)
ret <2 x double> %res
}
@@ -261,6 +735,15 @@ define void @fminnm_v4f64(ptr %a, ptr %b) {
; CHECK-NEXT: fminnm z1.d, p0/m, z1.d, z3.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fminnm_v4f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: fminnm v0.2d, v1.2d, v0.2d
+; NONEON-NOSVE-NEXT: fminnm v1.2d, v2.2d, v3.2d
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x double>, ptr %a
%op2 = load <4 x double>, ptr %b
%res = call <4 x double> @llvm.minnum.v4f64(<4 x double> %op1, <4 x double> %op2)
@@ -281,6 +764,38 @@ define <4 x half> @fmax_v4f16(<4 x half> %op1, <4 x half> %op2) {
; CHECK-NEXT: fmax z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fmax_v4f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; NONEON-NOSVE-NEXT: mov h2, v1.h[1]
+; NONEON-NOSVE-NEXT: mov h3, v0.h[1]
+; NONEON-NOSVE-NEXT: mov h4, v1.h[2]
+; NONEON-NOSVE-NEXT: mov h5, v0.h[2]
+; NONEON-NOSVE-NEXT: fcvt s6, h1
+; NONEON-NOSVE-NEXT: fcvt s7, h0
+; NONEON-NOSVE-NEXT: mov h1, v1.h[3]
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fmax s2, s3, s2
+; NONEON-NOSVE-NEXT: fcvt s3, h4
+; NONEON-NOSVE-NEXT: fcvt s4, h5
+; NONEON-NOSVE-NEXT: fmax s5, s7, s6
+; NONEON-NOSVE-NEXT: mov h6, v0.h[3]
+; NONEON-NOSVE-NEXT: fmax s3, s4, s3
+; NONEON-NOSVE-NEXT: fcvt h2, s2
+; NONEON-NOSVE-NEXT: fcvt h0, s5
+; NONEON-NOSVE-NEXT: fcvt s4, h6
+; NONEON-NOSVE-NEXT: mov v0.h[1], v2.h[0]
+; NONEON-NOSVE-NEXT: fcvt h2, s3
+; NONEON-NOSVE-NEXT: fmax s1, s4, s1
+; NONEON-NOSVE-NEXT: mov v0.h[2], v2.h[0]
+; NONEON-NOSVE-NEXT: fcvt h1, s1
+; NONEON-NOSVE-NEXT: mov v0.h[3], v1.h[0]
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0
+; NONEON-NOSVE-NEXT: ret
%res = call <4 x half> @llvm.maximum.v4f16(<4 x half> %op1, <4 x half> %op2)
ret <4 x half> %res
}
@@ -294,6 +809,64 @@ define <8 x half> @fmax_v8f16(<8 x half> %op1, <8 x half> %op2) {
; CHECK-NEXT: fmax z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fmax_v8f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov h2, v1.h[1]
+; NONEON-NOSVE-NEXT: mov h3, v0.h[1]
+; NONEON-NOSVE-NEXT: fcvt s4, h1
+; NONEON-NOSVE-NEXT: fcvt s5, h0
+; NONEON-NOSVE-NEXT: mov h6, v1.h[2]
+; NONEON-NOSVE-NEXT: mov h7, v0.h[2]
+; NONEON-NOSVE-NEXT: mov h16, v1.h[3]
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fmax s4, s5, s4
+; NONEON-NOSVE-NEXT: mov h5, v0.h[3]
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: fcvt s7, h7
+; NONEON-NOSVE-NEXT: fcvt s16, h16
+; NONEON-NOSVE-NEXT: fmax s3, s3, s2
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcvt h2, s4
+; NONEON-NOSVE-NEXT: fmax s4, s7, s6
+; NONEON-NOSVE-NEXT: mov h6, v1.h[4]
+; NONEON-NOSVE-NEXT: mov h7, v0.h[4]
+; NONEON-NOSVE-NEXT: fcvt h3, s3
+; NONEON-NOSVE-NEXT: fmax s5, s5, s16
+; NONEON-NOSVE-NEXT: mov h16, v0.h[5]
+; NONEON-NOSVE-NEXT: fcvt h4, s4
+; NONEON-NOSVE-NEXT: mov v2.h[1], v3.h[0]
+; NONEON-NOSVE-NEXT: fcvt s3, h6
+; NONEON-NOSVE-NEXT: fcvt s6, h7
+; NONEON-NOSVE-NEXT: mov h7, v1.h[5]
+; NONEON-NOSVE-NEXT: fcvt h5, s5
+; NONEON-NOSVE-NEXT: fcvt s16, h16
+; NONEON-NOSVE-NEXT: mov v2.h[2], v4.h[0]
+; NONEON-NOSVE-NEXT: mov h4, v1.h[6]
+; NONEON-NOSVE-NEXT: fmax s3, s6, s3
+; NONEON-NOSVE-NEXT: mov h6, v0.h[6]
+; NONEON-NOSVE-NEXT: fcvt s7, h7
+; NONEON-NOSVE-NEXT: mov h1, v1.h[7]
+; NONEON-NOSVE-NEXT: mov h0, v0.h[7]
+; NONEON-NOSVE-NEXT: mov v2.h[3], v5.h[0]
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvt h3, s3
+; NONEON-NOSVE-NEXT: fcvt s5, h6
+; NONEON-NOSVE-NEXT: fmax s6, s16, s7
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: mov v2.h[4], v3.h[0]
+; NONEON-NOSVE-NEXT: fmax s4, s5, s4
+; NONEON-NOSVE-NEXT: fcvt h3, s6
+; NONEON-NOSVE-NEXT: fmax s0, s0, s1
+; NONEON-NOSVE-NEXT: mov v2.h[5], v3.h[0]
+; NONEON-NOSVE-NEXT: fcvt h3, s4
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: mov v2.h[6], v3.h[0]
+; NONEON-NOSVE-NEXT: mov v2.h[7], v0.h[0]
+; NONEON-NOSVE-NEXT: mov v0.16b, v2.16b
+; NONEON-NOSVE-NEXT: ret
%res = call <8 x half> @llvm.maximum.v8f16(<8 x half> %op1, <8 x half> %op2)
ret <8 x half> %res
}
@@ -309,6 +882,119 @@ define void @fmax_v16f16(ptr %a, ptr %b) {
; CHECK-NEXT: fmax z1.h, p0/m, z1.h, z3.h
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fmax_v16f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q2, q0, [x0]
+; NONEON-NOSVE-NEXT: ldp q3, q1, [x1]
+; NONEON-NOSVE-NEXT: mov h7, v0.h[1]
+; NONEON-NOSVE-NEXT: mov h16, v0.h[2]
+; NONEON-NOSVE-NEXT: mov h18, v2.h[1]
+; NONEON-NOSVE-NEXT: mov h5, v1.h[1]
+; NONEON-NOSVE-NEXT: mov h6, v1.h[2]
+; NONEON-NOSVE-NEXT: mov h17, v3.h[1]
+; NONEON-NOSVE-NEXT: fcvt s4, h1
+; NONEON-NOSVE-NEXT: fcvt s19, h0
+; NONEON-NOSVE-NEXT: fcvt s20, h3
+; NONEON-NOSVE-NEXT: fcvt s21, h2
+; NONEON-NOSVE-NEXT: mov h22, v3.h[2]
+; NONEON-NOSVE-NEXT: mov h23, v2.h[2]
+; NONEON-NOSVE-NEXT: fcvt s7, h7
+; NONEON-NOSVE-NEXT: fcvt s16, h16
+; NONEON-NOSVE-NEXT: fcvt s18, h18
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: fcvt s17, h17
+; NONEON-NOSVE-NEXT: fmax s4, s19, s4
+; NONEON-NOSVE-NEXT: mov h19, v0.h[3]
+; NONEON-NOSVE-NEXT: mov h24, v3.h[3]
+; NONEON-NOSVE-NEXT: fmax s20, s21, s20
+; NONEON-NOSVE-NEXT: fcvt s21, h22
+; NONEON-NOSVE-NEXT: fcvt s22, h23
+; NONEON-NOSVE-NEXT: mov h23, v2.h[3]
+; NONEON-NOSVE-NEXT: mov h25, v2.h[6]
+; NONEON-NOSVE-NEXT: fmax s5, s7, s5
+; NONEON-NOSVE-NEXT: mov h7, v1.h[3]
+; NONEON-NOSVE-NEXT: fmax s6, s16, s6
+; NONEON-NOSVE-NEXT: fmax s16, s18, s17
+; NONEON-NOSVE-NEXT: fcvt h4, s4
+; NONEON-NOSVE-NEXT: fcvt s18, h19
+; NONEON-NOSVE-NEXT: fcvt s19, h24
+; NONEON-NOSVE-NEXT: mov h24, v0.h[5]
+; NONEON-NOSVE-NEXT: fcvt h17, s5
+; NONEON-NOSVE-NEXT: fcvt s7, h7
+; NONEON-NOSVE-NEXT: fcvt h5, s20
+; NONEON-NOSVE-NEXT: fmax s20, s22, s21
+; NONEON-NOSVE-NEXT: fcvt h16, s16
+; NONEON-NOSVE-NEXT: fcvt s21, h23
+; NONEON-NOSVE-NEXT: fcvt h6, s6
+; NONEON-NOSVE-NEXT: mov h22, v0.h[4]
+; NONEON-NOSVE-NEXT: mov h23, v2.h[4]
+; NONEON-NOSVE-NEXT: mov v4.h[1], v17.h[0]
+; NONEON-NOSVE-NEXT: mov h17, v1.h[4]
+; NONEON-NOSVE-NEXT: fmax s7, s18, s7
+; NONEON-NOSVE-NEXT: mov h18, v3.h[4]
+; NONEON-NOSVE-NEXT: mov v5.h[1], v16.h[0]
+; NONEON-NOSVE-NEXT: fcvt h16, s20
+; NONEON-NOSVE-NEXT: fmax s19, s21, s19
+; NONEON-NOSVE-NEXT: fcvt s20, h23
+; NONEON-NOSVE-NEXT: mov h21, v1.h[5]
+; NONEON-NOSVE-NEXT: mov h23, v2.h[5]
+; NONEON-NOSVE-NEXT: mov h2, v2.h[7]
+; NONEON-NOSVE-NEXT: mov v4.h[2], v6.h[0]
+; NONEON-NOSVE-NEXT: fcvt s6, h17
+; NONEON-NOSVE-NEXT: fcvt s17, h22
+; NONEON-NOSVE-NEXT: fcvt h7, s7
+; NONEON-NOSVE-NEXT: fcvt s18, h18
+; NONEON-NOSVE-NEXT: mov h22, v3.h[5]
+; NONEON-NOSVE-NEXT: mov v5.h[2], v16.h[0]
+; NONEON-NOSVE-NEXT: fcvt h16, s19
+; NONEON-NOSVE-NEXT: mov h19, v0.h[6]
+; NONEON-NOSVE-NEXT: mov h0, v0.h[7]
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fmax s6, s17, s6
+; NONEON-NOSVE-NEXT: mov h17, v1.h[6]
+; NONEON-NOSVE-NEXT: mov h1, v1.h[7]
+; NONEON-NOSVE-NEXT: fmax s18, s20, s18
+; NONEON-NOSVE-NEXT: mov h20, v3.h[6]
+; NONEON-NOSVE-NEXT: mov v4.h[3], v7.h[0]
+; NONEON-NOSVE-NEXT: fcvt s7, h22
+; NONEON-NOSVE-NEXT: fcvt s22, h23
+; NONEON-NOSVE-NEXT: mov v5.h[3], v16.h[0]
+; NONEON-NOSVE-NEXT: fcvt s16, h21
+; NONEON-NOSVE-NEXT: fcvt s21, h24
+; NONEON-NOSVE-NEXT: fcvt s19, h19
+; NONEON-NOSVE-NEXT: fcvt h6, s6
+; NONEON-NOSVE-NEXT: fcvt s17, h17
+; NONEON-NOSVE-NEXT: fcvt s23, h25
+; NONEON-NOSVE-NEXT: fcvt h18, s18
+; NONEON-NOSVE-NEXT: fcvt s20, h20
+; NONEON-NOSVE-NEXT: mov h3, v3.h[7]
+; NONEON-NOSVE-NEXT: fmax s7, s22, s7
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fmax s16, s21, s16
+; NONEON-NOSVE-NEXT: mov v4.h[4], v6.h[0]
+; NONEON-NOSVE-NEXT: fmax s6, s19, s17
+; NONEON-NOSVE-NEXT: mov v5.h[4], v18.h[0]
+; NONEON-NOSVE-NEXT: fmax s17, s23, s20
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt h7, s7
+; NONEON-NOSVE-NEXT: fmax s0, s0, s1
+; NONEON-NOSVE-NEXT: fcvt h16, s16
+; NONEON-NOSVE-NEXT: fcvt h6, s6
+; NONEON-NOSVE-NEXT: fmax s2, s2, s3
+; NONEON-NOSVE-NEXT: fcvt h3, s17
+; NONEON-NOSVE-NEXT: mov v5.h[5], v7.h[0]
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: mov v4.h[5], v16.h[0]
+; NONEON-NOSVE-NEXT: fcvt h1, s2
+; NONEON-NOSVE-NEXT: mov v5.h[6], v3.h[0]
+; NONEON-NOSVE-NEXT: mov v4.h[6], v6.h[0]
+; NONEON-NOSVE-NEXT: mov v5.h[7], v1.h[0]
+; NONEON-NOSVE-NEXT: mov v4.h[7], v0.h[0]
+; NONEON-NOSVE-NEXT: stp q5, q4, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x half>, ptr %a
%op2 = load <16 x half>, ptr %b
%res = call <16 x half> @llvm.maximum.v16f16(<16 x half> %op1, <16 x half> %op2)
@@ -325,6 +1011,11 @@ define <2 x float> @fmax_v2f32(<2 x float> %op1, <2 x float> %op2) {
; CHECK-NEXT: fmax z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fmax_v2f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fmax v0.2s, v0.2s, v1.2s
+; NONEON-NOSVE-NEXT: ret
%res = call <2 x float> @llvm.maximum.v2f32(<2 x float> %op1, <2 x float> %op2)
ret <2 x float> %res
}
@@ -338,6 +1029,11 @@ define <4 x float> @fmax_v4f32(<4 x float> %op1, <4 x float> %op2) {
; CHECK-NEXT: fmax z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fmax_v4f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fmax v0.4s, v0.4s, v1.4s
+; NONEON-NOSVE-NEXT: ret
%res = call <4 x float> @llvm.maximum.v4f32(<4 x float> %op1, <4 x float> %op2)
ret <4 x float> %res
}
@@ -353,6 +1049,15 @@ define void @fmax_v8f32(ptr %a, ptr %b) {
; CHECK-NEXT: fmax z1.s, p0/m, z1.s, z3.s
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fmax_v8f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: fmax v0.4s, v1.4s, v0.4s
+; NONEON-NOSVE-NEXT: fmax v1.4s, v2.4s, v3.4s
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x float>, ptr %a
%op2 = load <8 x float>, ptr %b
%res = call <8 x float> @llvm.maximum.v8f32(<8 x float> %op1, <8 x float> %op2)
@@ -365,6 +1070,11 @@ define <1 x double> @fmax_v1f64(<1 x double> %op1, <1 x double> %op2) {
; CHECK: // %bb.0:
; CHECK-NEXT: fmax d0, d0, d1
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fmax_v1f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fmax d0, d0, d1
+; NONEON-NOSVE-NEXT: ret
%res = call <1 x double> @llvm.maximum.v1f64(<1 x double> %op1, <1 x double> %op2)
ret <1 x double> %res
}
@@ -378,6 +1088,11 @@ define <2 x double> @fmax_v2f64(<2 x double> %op1, <2 x double> %op2) {
; CHECK-NEXT: fmax z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fmax_v2f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fmax v0.2d, v0.2d, v1.2d
+; NONEON-NOSVE-NEXT: ret
%res = call <2 x double> @llvm.maximum.v2f64(<2 x double> %op1, <2 x double> %op2)
ret <2 x double> %res
}
@@ -393,6 +1108,15 @@ define void @fmax_v4f64(ptr %a, ptr %b) {
; CHECK-NEXT: fmax z1.d, p0/m, z1.d, z3.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fmax_v4f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: fmax v0.2d, v1.2d, v0.2d
+; NONEON-NOSVE-NEXT: fmax v1.2d, v2.2d, v3.2d
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x double>, ptr %a
%op2 = load <4 x double>, ptr %b
%res = call <4 x double> @llvm.maximum.v4f64(<4 x double> %op1, <4 x double> %op2)
@@ -413,6 +1137,38 @@ define <4 x half> @fmin_v4f16(<4 x half> %op1, <4 x half> %op2) {
; CHECK-NEXT: fmin z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fmin_v4f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; NONEON-NOSVE-NEXT: mov h2, v1.h[1]
+; NONEON-NOSVE-NEXT: mov h3, v0.h[1]
+; NONEON-NOSVE-NEXT: mov h4, v1.h[2]
+; NONEON-NOSVE-NEXT: mov h5, v0.h[2]
+; NONEON-NOSVE-NEXT: fcvt s6, h1
+; NONEON-NOSVE-NEXT: fcvt s7, h0
+; NONEON-NOSVE-NEXT: mov h1, v1.h[3]
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fmin s2, s3, s2
+; NONEON-NOSVE-NEXT: fcvt s3, h4
+; NONEON-NOSVE-NEXT: fcvt s4, h5
+; NONEON-NOSVE-NEXT: fmin s5, s7, s6
+; NONEON-NOSVE-NEXT: mov h6, v0.h[3]
+; NONEON-NOSVE-NEXT: fmin s3, s4, s3
+; NONEON-NOSVE-NEXT: fcvt h2, s2
+; NONEON-NOSVE-NEXT: fcvt h0, s5
+; NONEON-NOSVE-NEXT: fcvt s4, h6
+; NONEON-NOSVE-NEXT: mov v0.h[1], v2.h[0]
+; NONEON-NOSVE-NEXT: fcvt h2, s3
+; NONEON-NOSVE-NEXT: fmin s1, s4, s1
+; NONEON-NOSVE-NEXT: mov v0.h[2], v2.h[0]
+; NONEON-NOSVE-NEXT: fcvt h1, s1
+; NONEON-NOSVE-NEXT: mov v0.h[3], v1.h[0]
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0
+; NONEON-NOSVE-NEXT: ret
%res = call <4 x half> @llvm.minimum.v4f16(<4 x half> %op1, <4 x half> %op2)
ret <4 x half> %res
}
@@ -426,6 +1182,64 @@ define <8 x half> @fmin_v8f16(<8 x half> %op1, <8 x half> %op2) {
; CHECK-NEXT: fmin z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fmin_v8f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov h2, v1.h[1]
+; NONEON-NOSVE-NEXT: mov h3, v0.h[1]
+; NONEON-NOSVE-NEXT: fcvt s4, h1
+; NONEON-NOSVE-NEXT: fcvt s5, h0
+; NONEON-NOSVE-NEXT: mov h6, v1.h[2]
+; NONEON-NOSVE-NEXT: mov h7, v0.h[2]
+; NONEON-NOSVE-NEXT: mov h16, v1.h[3]
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fmin s4, s5, s4
+; NONEON-NOSVE-NEXT: mov h5, v0.h[3]
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: fcvt s7, h7
+; NONEON-NOSVE-NEXT: fcvt s16, h16
+; NONEON-NOSVE-NEXT: fmin s3, s3, s2
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcvt h2, s4
+; NONEON-NOSVE-NEXT: fmin s4, s7, s6
+; NONEON-NOSVE-NEXT: mov h6, v1.h[4]
+; NONEON-NOSVE-NEXT: mov h7, v0.h[4]
+; NONEON-NOSVE-NEXT: fcvt h3, s3
+; NONEON-NOSVE-NEXT: fmin s5, s5, s16
+; NONEON-NOSVE-NEXT: mov h16, v0.h[5]
+; NONEON-NOSVE-NEXT: fcvt h4, s4
+; NONEON-NOSVE-NEXT: mov v2.h[1], v3.h[0]
+; NONEON-NOSVE-NEXT: fcvt s3, h6
+; NONEON-NOSVE-NEXT: fcvt s6, h7
+; NONEON-NOSVE-NEXT: mov h7, v1.h[5]
+; NONEON-NOSVE-NEXT: fcvt h5, s5
+; NONEON-NOSVE-NEXT: fcvt s16, h16
+; NONEON-NOSVE-NEXT: mov v2.h[2], v4.h[0]
+; NONEON-NOSVE-NEXT: mov h4, v1.h[6]
+; NONEON-NOSVE-NEXT: fmin s3, s6, s3
+; NONEON-NOSVE-NEXT: mov h6, v0.h[6]
+; NONEON-NOSVE-NEXT: fcvt s7, h7
+; NONEON-NOSVE-NEXT: mov h1, v1.h[7]
+; NONEON-NOSVE-NEXT: mov h0, v0.h[7]
+; NONEON-NOSVE-NEXT: mov v2.h[3], v5.h[0]
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvt h3, s3
+; NONEON-NOSVE-NEXT: fcvt s5, h6
+; NONEON-NOSVE-NEXT: fmin s6, s16, s7
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: mov v2.h[4], v3.h[0]
+; NONEON-NOSVE-NEXT: fmin s4, s5, s4
+; NONEON-NOSVE-NEXT: fcvt h3, s6
+; NONEON-NOSVE-NEXT: fmin s0, s0, s1
+; NONEON-NOSVE-NEXT: mov v2.h[5], v3.h[0]
+; NONEON-NOSVE-NEXT: fcvt h3, s4
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: mov v2.h[6], v3.h[0]
+; NONEON-NOSVE-NEXT: mov v2.h[7], v0.h[0]
+; NONEON-NOSVE-NEXT: mov v0.16b, v2.16b
+; NONEON-NOSVE-NEXT: ret
%res = call <8 x half> @llvm.minimum.v8f16(<8 x half> %op1, <8 x half> %op2)
ret <8 x half> %res
}
@@ -441,6 +1255,119 @@ define void @fmin_v16f16(ptr %a, ptr %b) {
; CHECK-NEXT: fmin z1.h, p0/m, z1.h, z3.h
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fmin_v16f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q2, q0, [x0]
+; NONEON-NOSVE-NEXT: ldp q3, q1, [x1]
+; NONEON-NOSVE-NEXT: mov h7, v0.h[1]
+; NONEON-NOSVE-NEXT: mov h16, v0.h[2]
+; NONEON-NOSVE-NEXT: mov h18, v2.h[1]
+; NONEON-NOSVE-NEXT: mov h5, v1.h[1]
+; NONEON-NOSVE-NEXT: mov h6, v1.h[2]
+; NONEON-NOSVE-NEXT: mov h17, v3.h[1]
+; NONEON-NOSVE-NEXT: fcvt s4, h1
+; NONEON-NOSVE-NEXT: fcvt s19, h0
+; NONEON-NOSVE-NEXT: fcvt s20, h3
+; NONEON-NOSVE-NEXT: fcvt s21, h2
+; NONEON-NOSVE-NEXT: mov h22, v3.h[2]
+; NONEON-NOSVE-NEXT: mov h23, v2.h[2]
+; NONEON-NOSVE-NEXT: fcvt s7, h7
+; NONEON-NOSVE-NEXT: fcvt s16, h16
+; NONEON-NOSVE-NEXT: fcvt s18, h18
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: fcvt s17, h17
+; NONEON-NOSVE-NEXT: fmin s4, s19, s4
+; NONEON-NOSVE-NEXT: mov h19, v0.h[3]
+; NONEON-NOSVE-NEXT: mov h24, v3.h[3]
+; NONEON-NOSVE-NEXT: fmin s20, s21, s20
+; NONEON-NOSVE-NEXT: fcvt s21, h22
+; NONEON-NOSVE-NEXT: fcvt s22, h23
+; NONEON-NOSVE-NEXT: mov h23, v2.h[3]
+; NONEON-NOSVE-NEXT: mov h25, v2.h[6]
+; NONEON-NOSVE-NEXT: fmin s5, s7, s5
+; NONEON-NOSVE-NEXT: mov h7, v1.h[3]
+; NONEON-NOSVE-NEXT: fmin s6, s16, s6
+; NONEON-NOSVE-NEXT: fmin s16, s18, s17
+; NONEON-NOSVE-NEXT: fcvt h4, s4
+; NONEON-NOSVE-NEXT: fcvt s18, h19
+; NONEON-NOSVE-NEXT: fcvt s19, h24
+; NONEON-NOSVE-NEXT: mov h24, v0.h[5]
+; NONEON-NOSVE-NEXT: fcvt h17, s5
+; NONEON-NOSVE-NEXT: fcvt s7, h7
+; NONEON-NOSVE-NEXT: fcvt h5, s20
+; NONEON-NOSVE-NEXT: fmin s20, s22, s21
+; NONEON-NOSVE-NEXT: fcvt h16, s16
+; NONEON-NOSVE-NEXT: fcvt s21, h23
+; NONEON-NOSVE-NEXT: fcvt h6, s6
+; NONEON-NOSVE-NEXT: mov h22, v0.h[4]
+; NONEON-NOSVE-NEXT: mov h23, v2.h[4]
+; NONEON-NOSVE-NEXT: mov v4.h[1], v17.h[0]
+; NONEON-NOSVE-NEXT: mov h17, v1.h[4]
+; NONEON-NOSVE-NEXT: fmin s7, s18, s7
+; NONEON-NOSVE-NEXT: mov h18, v3.h[4]
+; NONEON-NOSVE-NEXT: mov v5.h[1], v16.h[0]
+; NONEON-NOSVE-NEXT: fcvt h16, s20
+; NONEON-NOSVE-NEXT: fmin s19, s21, s19
+; NONEON-NOSVE-NEXT: fcvt s20, h23
+; NONEON-NOSVE-NEXT: mov h21, v1.h[5]
+; NONEON-NOSVE-NEXT: mov h23, v2.h[5]
+; NONEON-NOSVE-NEXT: mov h2, v2.h[7]
+; NONEON-NOSVE-NEXT: mov v4.h[2], v6.h[0]
+; NONEON-NOSVE-NEXT: fcvt s6, h17
+; NONEON-NOSVE-NEXT: fcvt s17, h22
+; NONEON-NOSVE-NEXT: fcvt h7, s7
+; NONEON-NOSVE-NEXT: fcvt s18, h18
+; NONEON-NOSVE-NEXT: mov h22, v3.h[5]
+; NONEON-NOSVE-NEXT: mov v5.h[2], v16.h[0]
+; NONEON-NOSVE-NEXT: fcvt h16, s19
+; NONEON-NOSVE-NEXT: mov h19, v0.h[6]
+; NONEON-NOSVE-NEXT: mov h0, v0.h[7]
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fmin s6, s17, s6
+; NONEON-NOSVE-NEXT: mov h17, v1.h[6]
+; NONEON-NOSVE-NEXT: mov h1, v1.h[7]
+; NONEON-NOSVE-NEXT: fmin s18, s20, s18
+; NONEON-NOSVE-NEXT: mov h20, v3.h[6]
+; NONEON-NOSVE-NEXT: mov v4.h[3], v7.h[0]
+; NONEON-NOSVE-NEXT: fcvt s7, h22
+; NONEON-NOSVE-NEXT: fcvt s22, h23
+; NONEON-NOSVE-NEXT: mov v5.h[3], v16.h[0]
+; NONEON-NOSVE-NEXT: fcvt s16, h21
+; NONEON-NOSVE-NEXT: fcvt s21, h24
+; NONEON-NOSVE-NEXT: fcvt s19, h19
+; NONEON-NOSVE-NEXT: fcvt h6, s6
+; NONEON-NOSVE-NEXT: fcvt s17, h17
+; NONEON-NOSVE-NEXT: fcvt s23, h25
+; NONEON-NOSVE-NEXT: fcvt h18, s18
+; NONEON-NOSVE-NEXT: fcvt s20, h20
+; NONEON-NOSVE-NEXT: mov h3, v3.h[7]
+; NONEON-NOSVE-NEXT: fmin s7, s22, s7
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fmin s16, s21, s16
+; NONEON-NOSVE-NEXT: mov v4.h[4], v6.h[0]
+; NONEON-NOSVE-NEXT: fmin s6, s19, s17
+; NONEON-NOSVE-NEXT: mov v5.h[4], v18.h[0]
+; NONEON-NOSVE-NEXT: fmin s17, s23, s20
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt h7, s7
+; NONEON-NOSVE-NEXT: fmin s0, s0, s1
+; NONEON-NOSVE-NEXT: fcvt h16, s16
+; NONEON-NOSVE-NEXT: fcvt h6, s6
+; NONEON-NOSVE-NEXT: fmin s2, s2, s3
+; NONEON-NOSVE-NEXT: fcvt h3, s17
+; NONEON-NOSVE-NEXT: mov v5.h[5], v7.h[0]
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: mov v4.h[5], v16.h[0]
+; NONEON-NOSVE-NEXT: fcvt h1, s2
+; NONEON-NOSVE-NEXT: mov v5.h[6], v3.h[0]
+; NONEON-NOSVE-NEXT: mov v4.h[6], v6.h[0]
+; NONEON-NOSVE-NEXT: mov v5.h[7], v1.h[0]
+; NONEON-NOSVE-NEXT: mov v4.h[7], v0.h[0]
+; NONEON-NOSVE-NEXT: stp q5, q4, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x half>, ptr %a
%op2 = load <16 x half>, ptr %b
%res = call <16 x half> @llvm.minimum.v16f16(<16 x half> %op1, <16 x half> %op2)
@@ -457,6 +1384,11 @@ define <2 x float> @fmin_v2f32(<2 x float> %op1, <2 x float> %op2) {
; CHECK-NEXT: fmin z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fmin_v2f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fmin v0.2s, v0.2s, v1.2s
+; NONEON-NOSVE-NEXT: ret
%res = call <2 x float> @llvm.minimum.v2f32(<2 x float> %op1, <2 x float> %op2)
ret <2 x float> %res
}
@@ -470,6 +1402,11 @@ define <4 x float> @fmin_v4f32(<4 x float> %op1, <4 x float> %op2) {
; CHECK-NEXT: fmin z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fmin_v4f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fmin v0.4s, v0.4s, v1.4s
+; NONEON-NOSVE-NEXT: ret
%res = call <4 x float> @llvm.minimum.v4f32(<4 x float> %op1, <4 x float> %op2)
ret <4 x float> %res
}
@@ -485,6 +1422,15 @@ define void @fmin_v8f32(ptr %a, ptr %b) {
; CHECK-NEXT: fmin z1.s, p0/m, z1.s, z3.s
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fmin_v8f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: fmin v0.4s, v1.4s, v0.4s
+; NONEON-NOSVE-NEXT: fmin v1.4s, v2.4s, v3.4s
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x float>, ptr %a
%op2 = load <8 x float>, ptr %b
%res = call <8 x float> @llvm.minimum.v8f32(<8 x float> %op1, <8 x float> %op2)
@@ -497,6 +1443,11 @@ define <1 x double> @fmin_v1f64(<1 x double> %op1, <1 x double> %op2) {
; CHECK: // %bb.0:
; CHECK-NEXT: fmin d0, d0, d1
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fmin_v1f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fmin d0, d0, d1
+; NONEON-NOSVE-NEXT: ret
%res = call <1 x double> @llvm.minimum.v1f64(<1 x double> %op1, <1 x double> %op2)
ret <1 x double> %res
}
@@ -510,6 +1461,11 @@ define <2 x double> @fmin_v2f64(<2 x double> %op1, <2 x double> %op2) {
; CHECK-NEXT: fmin z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fmin_v2f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fmin v0.2d, v0.2d, v1.2d
+; NONEON-NOSVE-NEXT: ret
%res = call <2 x double> @llvm.minimum.v2f64(<2 x double> %op1, <2 x double> %op2)
ret <2 x double> %res
}
@@ -525,6 +1481,15 @@ define void @fmin_v4f64(ptr %a, ptr %b) {
; CHECK-NEXT: fmin z1.d, p0/m, z1.d, z3.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fmin_v4f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: fmin v0.2d, v1.2d, v0.2d
+; NONEON-NOSVE-NEXT: fmin v1.2d, v2.2d, v3.2d
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x double>, ptr %a
%op2 = load <4 x double>, ptr %b
%res = call <4 x double> @llvm.minimum.v4f64(<4 x double> %op1, <4 x double> %op2)
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce-fa64.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce-fa64.ll
index e239ff5e35fd3..f1561011e2181 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce-fa64.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce-fa64.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sme-fa64 -force-streaming-compatible < %s | FileCheck %s -check-prefix=FA64
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s -check-prefix=NO-FA64
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
@@ -26,6 +27,30 @@ define half @fadda_v4f16(half %start, <4 x half> %a) {
; NO-FA64-NEXT: fadd h0, h0, h2
; NO-FA64-NEXT: fadd h0, h0, h1
; NO-FA64-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fadda_v4f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1
+; NONEON-NOSVE-NEXT: fcvt s2, h1
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fadd s0, s0, s2
+; NONEON-NOSVE-NEXT: mov h2, v1.h[1]
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fadd s0, s0, s2
+; NONEON-NOSVE-NEXT: mov h2, v1.h[2]
+; NONEON-NOSVE-NEXT: mov h1, v1.h[3]
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fadd s0, s0, s2
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fadd s0, s0, s1
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: ret
%res = call half @llvm.vector.reduce.fadd.v4f16(half %start, <4 x half> %a)
ret half %res
}
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce.ll
index 78ae7bb6cf30a..a0a7dad835662 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
@@ -19,6 +20,30 @@ define half @fadda_v4f16(half %start, <4 x half> %a) {
; CHECK-NEXT: fadd h0, h0, h2
; CHECK-NEXT: fadd h0, h0, h1
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fadda_v4f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1
+; NONEON-NOSVE-NEXT: fcvt s2, h1
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fadd s0, s0, s2
+; NONEON-NOSVE-NEXT: mov h2, v1.h[1]
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fadd s0, s0, s2
+; NONEON-NOSVE-NEXT: mov h2, v1.h[2]
+; NONEON-NOSVE-NEXT: mov h1, v1.h[3]
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fadd s0, s0, s2
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fadd s0, s0, s1
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: ret
%res = call half @llvm.vector.reduce.fadd.v4f16(half %start, <4 x half> %a)
ret half %res
}
@@ -43,6 +68,49 @@ define half @fadda_v8f16(half %start, <8 x half> %a) {
; CHECK-NEXT: fadd h0, h0, h2
; CHECK-NEXT: fadd h0, h0, h1
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fadda_v8f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvt s2, h1
+; NONEON-NOSVE-NEXT: fadd s0, s0, s2
+; NONEON-NOSVE-NEXT: mov h2, v1.h[1]
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fadd s0, s0, s2
+; NONEON-NOSVE-NEXT: mov h2, v1.h[2]
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fadd s0, s0, s2
+; NONEON-NOSVE-NEXT: mov h2, v1.h[3]
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fadd s0, s0, s2
+; NONEON-NOSVE-NEXT: mov h2, v1.h[4]
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fadd s0, s0, s2
+; NONEON-NOSVE-NEXT: mov h2, v1.h[5]
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fadd s0, s0, s2
+; NONEON-NOSVE-NEXT: mov h2, v1.h[6]
+; NONEON-NOSVE-NEXT: mov h1, v1.h[7]
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fadd s0, s0, s2
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fadd s0, s0, s1
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: ret
%res = call half @llvm.vector.reduce.fadd.v8f16(half %start, <8 x half> %a)
ret half %res
}
@@ -83,6 +151,90 @@ define half @fadda_v16f16(half %start, ptr %a) {
; CHECK-NEXT: fadd h0, h0, h2
; CHECK-NEXT: fadd h0, h0, h1
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fadda_v16f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q1, [x0]
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvt s2, h1
+; NONEON-NOSVE-NEXT: fadd s0, s0, s2
+; NONEON-NOSVE-NEXT: mov h2, v1.h[1]
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fadd s0, s0, s2
+; NONEON-NOSVE-NEXT: mov h2, v1.h[2]
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fadd s0, s0, s2
+; NONEON-NOSVE-NEXT: mov h2, v1.h[3]
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fadd s0, s0, s2
+; NONEON-NOSVE-NEXT: mov h2, v1.h[4]
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fadd s0, s0, s2
+; NONEON-NOSVE-NEXT: mov h2, v1.h[5]
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fadd s0, s0, s2
+; NONEON-NOSVE-NEXT: mov h2, v1.h[6]
+; NONEON-NOSVE-NEXT: mov h1, v1.h[7]
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fadd s0, s0, s2
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fadd s0, s0, s1
+; NONEON-NOSVE-NEXT: ldr q1, [x0, #16]
+; NONEON-NOSVE-NEXT: fcvt s2, h1
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fadd s0, s0, s2
+; NONEON-NOSVE-NEXT: mov h2, v1.h[1]
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fadd s0, s0, s2
+; NONEON-NOSVE-NEXT: mov h2, v1.h[2]
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fadd s0, s0, s2
+; NONEON-NOSVE-NEXT: mov h2, v1.h[3]
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fadd s0, s0, s2
+; NONEON-NOSVE-NEXT: mov h2, v1.h[4]
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fadd s0, s0, s2
+; NONEON-NOSVE-NEXT: mov h2, v1.h[5]
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fadd s0, s0, s2
+; NONEON-NOSVE-NEXT: mov h2, v1.h[6]
+; NONEON-NOSVE-NEXT: mov h1, v1.h[7]
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fadd s0, s0, s2
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fadd s0, s0, s1
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: ret
%op = load <16 x half>, ptr %a
%res = call half @llvm.vector.reduce.fadd.v16f16(half %start, <16 x half> %op)
ret half %res
@@ -96,6 +248,14 @@ define float @fadda_v2f32(float %start, <2 x float> %a) {
; CHECK-NEXT: mov z1.s, z1.s[1]
; CHECK-NEXT: fadd s0, s0, s1
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fadda_v2f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1
+; NONEON-NOSVE-NEXT: mov s2, v1.s[1]
+; NONEON-NOSVE-NEXT: fadd s0, s0, s1
+; NONEON-NOSVE-NEXT: fadd s0, s0, s2
+; NONEON-NOSVE-NEXT: ret
%res = call float @llvm.vector.reduce.fadd.v2f32(float %start, <2 x float> %a)
ret float %res
}
@@ -112,6 +272,17 @@ define float @fadda_v4f32(float %start, <4 x float> %a) {
; CHECK-NEXT: fadd s0, s0, s2
; CHECK-NEXT: fadd s0, s0, s1
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fadda_v4f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov s2, v1.s[1]
+; NONEON-NOSVE-NEXT: fadd s0, s0, s1
+; NONEON-NOSVE-NEXT: mov s3, v1.s[2]
+; NONEON-NOSVE-NEXT: mov s1, v1.s[3]
+; NONEON-NOSVE-NEXT: fadd s0, s0, s2
+; NONEON-NOSVE-NEXT: fadd s0, s0, s3
+; NONEON-NOSVE-NEXT: fadd s0, s0, s1
+; NONEON-NOSVE-NEXT: ret
%res = call float @llvm.vector.reduce.fadd.v4f32(float %start, <4 x float> %a)
ret float %res
}
@@ -136,6 +307,26 @@ define float @fadda_v8f32(float %start, ptr %a) {
; CHECK-NEXT: fadd s0, s0, s2
; CHECK-NEXT: fadd s0, s0, s1
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fadda_v8f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q1, [x0]
+; NONEON-NOSVE-NEXT: mov s2, v1.s[1]
+; NONEON-NOSVE-NEXT: fadd s0, s0, s1
+; NONEON-NOSVE-NEXT: mov s3, v1.s[2]
+; NONEON-NOSVE-NEXT: mov s1, v1.s[3]
+; NONEON-NOSVE-NEXT: fadd s0, s0, s2
+; NONEON-NOSVE-NEXT: fadd s0, s0, s3
+; NONEON-NOSVE-NEXT: fadd s0, s0, s1
+; NONEON-NOSVE-NEXT: ldr q1, [x0, #16]
+; NONEON-NOSVE-NEXT: mov s2, v1.s[1]
+; NONEON-NOSVE-NEXT: mov s3, v1.s[2]
+; NONEON-NOSVE-NEXT: fadd s0, s0, s1
+; NONEON-NOSVE-NEXT: mov s1, v1.s[3]
+; NONEON-NOSVE-NEXT: fadd s0, s0, s2
+; NONEON-NOSVE-NEXT: fadd s0, s0, s3
+; NONEON-NOSVE-NEXT: fadd s0, s0, s1
+; NONEON-NOSVE-NEXT: ret
%op = load <8 x float>, ptr %a
%res = call float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %op)
ret float %res
@@ -146,6 +337,11 @@ define double @fadda_v1f64(double %start, <1 x double> %a) {
; CHECK: // %bb.0:
; CHECK-NEXT: fadd d0, d0, d1
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fadda_v1f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fadd d0, d0, d1
+; NONEON-NOSVE-NEXT: ret
%res = call double @llvm.vector.reduce.fadd.v1f64(double %start, <1 x double> %a)
ret double %res
}
@@ -158,6 +354,13 @@ define double @fadda_v2f64(double %start, <2 x double> %a) {
; CHECK-NEXT: mov z1.d, z1.d[1]
; CHECK-NEXT: fadd d0, d0, d1
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fadda_v2f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov d2, v1.d[1]
+; NONEON-NOSVE-NEXT: fadd d0, d0, d1
+; NONEON-NOSVE-NEXT: fadd d0, d0, d2
+; NONEON-NOSVE-NEXT: ret
%res = call double @llvm.vector.reduce.fadd.v2f64(double %start, <2 x double> %a)
ret double %res
}
@@ -174,6 +377,17 @@ define double @fadda_v4f64(double %start, ptr %a) {
; CHECK-NEXT: mov z1.d, z1.d[1]
; CHECK-NEXT: fadd d0, d0, d1
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fadda_v4f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q3, q1, [x0]
+; NONEON-NOSVE-NEXT: mov d2, v3.d[1]
+; NONEON-NOSVE-NEXT: fadd d0, d0, d3
+; NONEON-NOSVE-NEXT: fadd d0, d0, d2
+; NONEON-NOSVE-NEXT: mov d2, v1.d[1]
+; NONEON-NOSVE-NEXT: fadd d0, d0, d1
+; NONEON-NOSVE-NEXT: fadd d0, d0, d2
+; NONEON-NOSVE-NEXT: ret
%op = load <4 x double>, ptr %a
%res = call double @llvm.vector.reduce.fadd.v4f64(double %start, <4 x double> %op)
ret double %res
@@ -191,6 +405,30 @@ define half @faddv_v4f16(half %start, <4 x half> %a) {
; CHECK-NEXT: faddv h1, p0, z1.h
; CHECK-NEXT: fadd h0, h0, h1
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: faddv_v4f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1
+; NONEON-NOSVE-NEXT: mov h2, v1.h[1]
+; NONEON-NOSVE-NEXT: fcvt s3, h1
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fadd s2, s3, s2
+; NONEON-NOSVE-NEXT: mov h3, v1.h[2]
+; NONEON-NOSVE-NEXT: mov h1, v1.h[3]
+; NONEON-NOSVE-NEXT: fcvt h2, s2
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fadd s2, s2, s3
+; NONEON-NOSVE-NEXT: fcvt h2, s2
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fadd s1, s2, s1
+; NONEON-NOSVE-NEXT: fcvt h1, s1
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fadd s0, s0, s1
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: ret
%res = call fast half @llvm.vector.reduce.fadd.v4f16(half %start, <4 x half> %a)
ret half %res
}
@@ -203,6 +441,49 @@ define half @faddv_v8f16(half %start, <8 x half> %a) {
; CHECK-NEXT: faddv h1, p0, z1.h
; CHECK-NEXT: fadd h0, h0, h1
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: faddv_v8f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov h2, v1.h[1]
+; NONEON-NOSVE-NEXT: fcvt s3, h1
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fadd s2, s3, s2
+; NONEON-NOSVE-NEXT: mov h3, v1.h[2]
+; NONEON-NOSVE-NEXT: fcvt h2, s2
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fadd s2, s2, s3
+; NONEON-NOSVE-NEXT: mov h3, v1.h[3]
+; NONEON-NOSVE-NEXT: fcvt h2, s2
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fadd s2, s2, s3
+; NONEON-NOSVE-NEXT: mov h3, v1.h[4]
+; NONEON-NOSVE-NEXT: fcvt h2, s2
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fadd s2, s2, s3
+; NONEON-NOSVE-NEXT: mov h3, v1.h[5]
+; NONEON-NOSVE-NEXT: fcvt h2, s2
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fadd s2, s2, s3
+; NONEON-NOSVE-NEXT: mov h3, v1.h[6]
+; NONEON-NOSVE-NEXT: mov h1, v1.h[7]
+; NONEON-NOSVE-NEXT: fcvt h2, s2
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fadd s2, s2, s3
+; NONEON-NOSVE-NEXT: fcvt h2, s2
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fadd s1, s2, s1
+; NONEON-NOSVE-NEXT: fcvt h1, s1
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fadd s0, s0, s1
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: ret
%res = call fast half @llvm.vector.reduce.fadd.v8f16(half %start, <8 x half> %a)
ret half %res
}
@@ -216,6 +497,58 @@ define half @faddv_v16f16(half %start, ptr %a) {
; CHECK-NEXT: faddv h1, p0, z1.h
; CHECK-NEXT: fadd h0, h0, h1
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: faddv_v16f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvtl v3.4s, v2.4h
+; NONEON-NOSVE-NEXT: fcvtl v4.4s, v1.4h
+; NONEON-NOSVE-NEXT: fcvtl2 v2.4s, v2.8h
+; NONEON-NOSVE-NEXT: fcvtl2 v1.4s, v1.8h
+; NONEON-NOSVE-NEXT: fadd v3.4s, v4.4s, v3.4s
+; NONEON-NOSVE-NEXT: fadd v1.4s, v1.4s, v2.4s
+; NONEON-NOSVE-NEXT: fcvtn v2.4h, v3.4s
+; NONEON-NOSVE-NEXT: fcvtn2 v2.8h, v1.4s
+; NONEON-NOSVE-NEXT: mov h1, v2.h[1]
+; NONEON-NOSVE-NEXT: fcvt s3, h2
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fadd s1, s3, s1
+; NONEON-NOSVE-NEXT: mov h3, v2.h[2]
+; NONEON-NOSVE-NEXT: fcvt h1, s1
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fadd s1, s1, s3
+; NONEON-NOSVE-NEXT: mov h3, v2.h[3]
+; NONEON-NOSVE-NEXT: fcvt h1, s1
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fadd s1, s1, s3
+; NONEON-NOSVE-NEXT: mov h3, v2.h[4]
+; NONEON-NOSVE-NEXT: fcvt h1, s1
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fadd s1, s1, s3
+; NONEON-NOSVE-NEXT: mov h3, v2.h[5]
+; NONEON-NOSVE-NEXT: fcvt h1, s1
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fadd s1, s1, s3
+; NONEON-NOSVE-NEXT: mov h3, v2.h[6]
+; NONEON-NOSVE-NEXT: mov h2, v2.h[7]
+; NONEON-NOSVE-NEXT: fcvt h1, s1
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fadd s1, s1, s3
+; NONEON-NOSVE-NEXT: fcvt h1, s1
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fadd s1, s1, s2
+; NONEON-NOSVE-NEXT: fcvt h1, s1
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fadd s0, s0, s1
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: ret
%op = load <16 x half>, ptr %a
%res = call fast half @llvm.vector.reduce.fadd.v16f16(half %start, <16 x half> %op)
ret half %res
@@ -229,6 +562,12 @@ define float @faddv_v2f32(float %start, <2 x float> %a) {
; CHECK-NEXT: faddv s1, p0, z1.s
; CHECK-NEXT: fadd s0, s0, s1
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: faddv_v2f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: faddp s1, v1.2s
+; NONEON-NOSVE-NEXT: fadd s0, s0, s1
+; NONEON-NOSVE-NEXT: ret
%res = call fast float @llvm.vector.reduce.fadd.v2f32(float %start, <2 x float> %a)
ret float %res
}
@@ -241,6 +580,13 @@ define float @faddv_v4f32(float %start, <4 x float> %a) {
; CHECK-NEXT: faddv s1, p0, z1.s
; CHECK-NEXT: fadd s0, s0, s1
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: faddv_v4f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: faddp v1.4s, v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: faddp s1, v1.2s
+; NONEON-NOSVE-NEXT: fadd s0, s0, s1
+; NONEON-NOSVE-NEXT: ret
%res = call fast float @llvm.vector.reduce.fadd.v4f32(float %start, <4 x float> %a)
ret float %res
}
@@ -254,6 +600,15 @@ define float @faddv_v8f32(float %start, ptr %a) {
; CHECK-NEXT: faddv s1, p0, z1.s
; CHECK-NEXT: fadd s0, s0, s1
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: faddv_v8f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q2, q1, [x0]
+; NONEON-NOSVE-NEXT: fadd v1.4s, v2.4s, v1.4s
+; NONEON-NOSVE-NEXT: faddp v1.4s, v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: faddp s1, v1.2s
+; NONEON-NOSVE-NEXT: fadd s0, s0, s1
+; NONEON-NOSVE-NEXT: ret
%op = load <8 x float>, ptr %a
%res = call fast float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %op)
ret float %res
@@ -264,6 +619,11 @@ define double @faddv_v1f64(double %start, <1 x double> %a) {
; CHECK: // %bb.0:
; CHECK-NEXT: fadd d0, d0, d1
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: faddv_v1f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fadd d0, d0, d1
+; NONEON-NOSVE-NEXT: ret
%res = call fast double @llvm.vector.reduce.fadd.v1f64(double %start, <1 x double> %a)
ret double %res
}
@@ -276,6 +636,12 @@ define double @faddv_v2f64(double %start, <2 x double> %a) {
; CHECK-NEXT: faddv d1, p0, z1.d
; CHECK-NEXT: fadd d0, d0, d1
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: faddv_v2f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: faddp d1, v1.2d
+; NONEON-NOSVE-NEXT: fadd d0, d0, d1
+; NONEON-NOSVE-NEXT: ret
%res = call fast double @llvm.vector.reduce.fadd.v2f64(double %start, <2 x double> %a)
ret double %res
}
@@ -289,6 +655,14 @@ define double @faddv_v4f64(double %start, ptr %a) {
; CHECK-NEXT: faddv d1, p0, z1.d
; CHECK-NEXT: fadd d0, d0, d1
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: faddv_v4f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q2, q1, [x0]
+; NONEON-NOSVE-NEXT: fadd v1.2d, v2.2d, v1.2d
+; NONEON-NOSVE-NEXT: faddp d1, v1.2d
+; NONEON-NOSVE-NEXT: fadd d0, d0, d1
+; NONEON-NOSVE-NEXT: ret
%op = load <4 x double>, ptr %a
%res = call fast double @llvm.vector.reduce.fadd.v4f64(double %start, <4 x double> %op)
ret double %res
@@ -306,6 +680,26 @@ define half @fmaxv_v4f16(<4 x half> %a) {
; CHECK-NEXT: fmaxnmv h0, p0, z0.h
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fmaxv_v4f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; NONEON-NOSVE-NEXT: mov h1, v0.h[1]
+; NONEON-NOSVE-NEXT: fcvt s2, h0
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fmaxnm s1, s2, s1
+; NONEON-NOSVE-NEXT: mov h2, v0.h[2]
+; NONEON-NOSVE-NEXT: mov h0, v0.h[3]
+; NONEON-NOSVE-NEXT: fcvt h1, s1
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fmaxnm s1, s1, s2
+; NONEON-NOSVE-NEXT: fcvt h1, s1
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fmaxnm s0, s1, s0
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: ret
%res = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> %a)
ret half %res
}
@@ -318,6 +712,45 @@ define half @fmaxv_v8f16(<8 x half> %a) {
; CHECK-NEXT: fmaxnmv h0, p0, z0.h
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fmaxv_v8f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov h1, v0.h[1]
+; NONEON-NOSVE-NEXT: fcvt s2, h0
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fmaxnm s1, s2, s1
+; NONEON-NOSVE-NEXT: mov h2, v0.h[2]
+; NONEON-NOSVE-NEXT: fcvt h1, s1
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fmaxnm s1, s1, s2
+; NONEON-NOSVE-NEXT: mov h2, v0.h[3]
+; NONEON-NOSVE-NEXT: fcvt h1, s1
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fmaxnm s1, s1, s2
+; NONEON-NOSVE-NEXT: mov h2, v0.h[4]
+; NONEON-NOSVE-NEXT: fcvt h1, s1
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fmaxnm s1, s1, s2
+; NONEON-NOSVE-NEXT: mov h2, v0.h[5]
+; NONEON-NOSVE-NEXT: fcvt h1, s1
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fmaxnm s1, s1, s2
+; NONEON-NOSVE-NEXT: mov h2, v0.h[6]
+; NONEON-NOSVE-NEXT: mov h0, v0.h[7]
+; NONEON-NOSVE-NEXT: fcvt h1, s1
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fmaxnm s1, s1, s2
+; NONEON-NOSVE-NEXT: fcvt h1, s1
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fmaxnm s0, s1, s0
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: ret
%res = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> %a)
ret half %res
}
@@ -331,6 +764,85 @@ define half @fmaxv_v16f16(ptr %a) {
; CHECK-NEXT: fmaxnmv h0, p0, z0.h
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fmaxv_v16f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: mov h2, v1.h[1]
+; NONEON-NOSVE-NEXT: mov h3, v0.h[1]
+; NONEON-NOSVE-NEXT: fcvt s4, h1
+; NONEON-NOSVE-NEXT: fcvt s5, h0
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fmaxnm s4, s5, s4
+; NONEON-NOSVE-NEXT: mov h5, v0.h[2]
+; NONEON-NOSVE-NEXT: fmaxnm s2, s3, s2
+; NONEON-NOSVE-NEXT: mov h3, v1.h[2]
+; NONEON-NOSVE-NEXT: fcvt h4, s4
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcvt h2, s2
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fmaxnm s3, s5, s3
+; NONEON-NOSVE-NEXT: mov h5, v0.h[3]
+; NONEON-NOSVE-NEXT: fmaxnm s2, s4, s2
+; NONEON-NOSVE-NEXT: mov h4, v1.h[3]
+; NONEON-NOSVE-NEXT: fcvt h3, s3
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcvt h2, s2
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fmaxnm s4, s5, s4
+; NONEON-NOSVE-NEXT: mov h5, v0.h[4]
+; NONEON-NOSVE-NEXT: fmaxnm s2, s2, s3
+; NONEON-NOSVE-NEXT: mov h3, v1.h[4]
+; NONEON-NOSVE-NEXT: fcvt h4, s4
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcvt h2, s2
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fmaxnm s3, s5, s3
+; NONEON-NOSVE-NEXT: mov h5, v0.h[5]
+; NONEON-NOSVE-NEXT: fmaxnm s2, s2, s4
+; NONEON-NOSVE-NEXT: mov h4, v1.h[5]
+; NONEON-NOSVE-NEXT: fcvt h3, s3
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcvt h2, s2
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fmaxnm s4, s5, s4
+; NONEON-NOSVE-NEXT: mov h5, v0.h[6]
+; NONEON-NOSVE-NEXT: mov h0, v0.h[7]
+; NONEON-NOSVE-NEXT: fmaxnm s2, s2, s3
+; NONEON-NOSVE-NEXT: fcvt h3, s4
+; NONEON-NOSVE-NEXT: mov h4, v1.h[6]
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: mov h1, v1.h[7]
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvt h2, s2
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fmaxnm s0, s0, s1
+; NONEON-NOSVE-NEXT: fmaxnm s2, s2, s3
+; NONEON-NOSVE-NEXT: fmaxnm s3, s5, s4
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: fcvt h2, s2
+; NONEON-NOSVE-NEXT: fcvt h3, s3
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fmaxnm s2, s2, s3
+; NONEON-NOSVE-NEXT: fcvt h1, s2
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fmaxnm s0, s1, s0
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: ret
%op = load <16 x half>, ptr %a
%res = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> %op)
ret half %res
@@ -344,6 +856,11 @@ define float @fmaxv_v2f32(<2 x float> %a) {
; CHECK-NEXT: fmaxnmv s0, p0, z0.s
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fmaxv_v2f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fmaxnmp s0, v0.2s
+; NONEON-NOSVE-NEXT: ret
%res = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> %a)
ret float %res
}
@@ -356,6 +873,11 @@ define float @fmaxv_v4f32(<4 x float> %a) {
; CHECK-NEXT: fmaxnmv s0, p0, z0.s
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fmaxv_v4f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fmaxnmv s0, v0.4s
+; NONEON-NOSVE-NEXT: ret
%res = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> %a)
ret float %res
}
@@ -369,6 +891,13 @@ define float @fmaxv_v8f32(ptr %a) {
; CHECK-NEXT: fmaxnmv s0, p0, z0.s
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fmaxv_v8f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: fmaxnm v0.4s, v1.4s, v0.4s
+; NONEON-NOSVE-NEXT: fmaxnmv s0, v0.4s
+; NONEON-NOSVE-NEXT: ret
%op = load <8 x float>, ptr %a
%res = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> %op)
ret float %res
@@ -378,6 +907,10 @@ define double @fmaxv_v1f64(<1 x double> %a) {
; CHECK-LABEL: fmaxv_v1f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fmaxv_v1f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ret
%res = call double @llvm.vector.reduce.fmax.v1f64(<1 x double> %a)
ret double %res
}
@@ -390,6 +923,11 @@ define double @fmaxv_v2f64(<2 x double> %a) {
; CHECK-NEXT: fmaxnmv d0, p0, z0.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fmaxv_v2f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fmaxnmp d0, v0.2d
+; NONEON-NOSVE-NEXT: ret
%res = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> %a)
ret double %res
}
@@ -403,6 +941,13 @@ define double @fmaxv_v4f64(ptr %a) {
; CHECK-NEXT: fmaxnmv d0, p0, z0.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fmaxv_v4f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: fmaxnm v0.2d, v1.2d, v0.2d
+; NONEON-NOSVE-NEXT: fmaxnmp d0, v0.2d
+; NONEON-NOSVE-NEXT: ret
%op = load <4 x double>, ptr %a
%res = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> %op)
ret double %res
@@ -420,6 +965,26 @@ define half @fminv_v4f16(<4 x half> %a) {
; CHECK-NEXT: fminnmv h0, p0, z0.h
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fminv_v4f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; NONEON-NOSVE-NEXT: mov h1, v0.h[1]
+; NONEON-NOSVE-NEXT: fcvt s2, h0
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fminnm s1, s2, s1
+; NONEON-NOSVE-NEXT: mov h2, v0.h[2]
+; NONEON-NOSVE-NEXT: mov h0, v0.h[3]
+; NONEON-NOSVE-NEXT: fcvt h1, s1
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fminnm s1, s1, s2
+; NONEON-NOSVE-NEXT: fcvt h1, s1
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fminnm s0, s1, s0
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: ret
%res = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> %a)
ret half %res
}
@@ -432,6 +997,45 @@ define half @fminv_v8f16(<8 x half> %a) {
; CHECK-NEXT: fminnmv h0, p0, z0.h
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fminv_v8f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov h1, v0.h[1]
+; NONEON-NOSVE-NEXT: fcvt s2, h0
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fminnm s1, s2, s1
+; NONEON-NOSVE-NEXT: mov h2, v0.h[2]
+; NONEON-NOSVE-NEXT: fcvt h1, s1
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fminnm s1, s1, s2
+; NONEON-NOSVE-NEXT: mov h2, v0.h[3]
+; NONEON-NOSVE-NEXT: fcvt h1, s1
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fminnm s1, s1, s2
+; NONEON-NOSVE-NEXT: mov h2, v0.h[4]
+; NONEON-NOSVE-NEXT: fcvt h1, s1
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fminnm s1, s1, s2
+; NONEON-NOSVE-NEXT: mov h2, v0.h[5]
+; NONEON-NOSVE-NEXT: fcvt h1, s1
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fminnm s1, s1, s2
+; NONEON-NOSVE-NEXT: mov h2, v0.h[6]
+; NONEON-NOSVE-NEXT: mov h0, v0.h[7]
+; NONEON-NOSVE-NEXT: fcvt h1, s1
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fminnm s1, s1, s2
+; NONEON-NOSVE-NEXT: fcvt h1, s1
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fminnm s0, s1, s0
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: ret
%res = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> %a)
ret half %res
}
@@ -445,6 +1049,85 @@ define half @fminv_v16f16(ptr %a) {
; CHECK-NEXT: fminnmv h0, p0, z0.h
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fminv_v16f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: mov h2, v1.h[1]
+; NONEON-NOSVE-NEXT: mov h3, v0.h[1]
+; NONEON-NOSVE-NEXT: fcvt s4, h1
+; NONEON-NOSVE-NEXT: fcvt s5, h0
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fminnm s4, s5, s4
+; NONEON-NOSVE-NEXT: mov h5, v0.h[2]
+; NONEON-NOSVE-NEXT: fminnm s2, s3, s2
+; NONEON-NOSVE-NEXT: mov h3, v1.h[2]
+; NONEON-NOSVE-NEXT: fcvt h4, s4
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcvt h2, s2
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fminnm s3, s5, s3
+; NONEON-NOSVE-NEXT: mov h5, v0.h[3]
+; NONEON-NOSVE-NEXT: fminnm s2, s4, s2
+; NONEON-NOSVE-NEXT: mov h4, v1.h[3]
+; NONEON-NOSVE-NEXT: fcvt h3, s3
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcvt h2, s2
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fminnm s4, s5, s4
+; NONEON-NOSVE-NEXT: mov h5, v0.h[4]
+; NONEON-NOSVE-NEXT: fminnm s2, s2, s3
+; NONEON-NOSVE-NEXT: mov h3, v1.h[4]
+; NONEON-NOSVE-NEXT: fcvt h4, s4
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcvt h2, s2
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fminnm s3, s5, s3
+; NONEON-NOSVE-NEXT: mov h5, v0.h[5]
+; NONEON-NOSVE-NEXT: fminnm s2, s2, s4
+; NONEON-NOSVE-NEXT: mov h4, v1.h[5]
+; NONEON-NOSVE-NEXT: fcvt h3, s3
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcvt h2, s2
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fminnm s4, s5, s4
+; NONEON-NOSVE-NEXT: mov h5, v0.h[6]
+; NONEON-NOSVE-NEXT: mov h0, v0.h[7]
+; NONEON-NOSVE-NEXT: fminnm s2, s2, s3
+; NONEON-NOSVE-NEXT: fcvt h3, s4
+; NONEON-NOSVE-NEXT: mov h4, v1.h[6]
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: mov h1, v1.h[7]
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvt h2, s2
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fminnm s0, s0, s1
+; NONEON-NOSVE-NEXT: fminnm s2, s2, s3
+; NONEON-NOSVE-NEXT: fminnm s3, s5, s4
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: fcvt h2, s2
+; NONEON-NOSVE-NEXT: fcvt h3, s3
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fminnm s2, s2, s3
+; NONEON-NOSVE-NEXT: fcvt h1, s2
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fminnm s0, s1, s0
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: ret
%op = load <16 x half>, ptr %a
%res = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> %op)
ret half %res
@@ -458,6 +1141,11 @@ define float @fminv_v2f32(<2 x float> %a) {
; CHECK-NEXT: fminnmv s0, p0, z0.s
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fminv_v2f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fminnmp s0, v0.2s
+; NONEON-NOSVE-NEXT: ret
%res = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> %a)
ret float %res
}
@@ -470,6 +1158,11 @@ define float @fminv_v4f32(<4 x float> %a) {
; CHECK-NEXT: fminnmv s0, p0, z0.s
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fminv_v4f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fminnmv s0, v0.4s
+; NONEON-NOSVE-NEXT: ret
%res = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> %a)
ret float %res
}
@@ -483,6 +1176,13 @@ define float @fminv_v8f32(ptr %a) {
; CHECK-NEXT: fminnmv s0, p0, z0.s
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fminv_v8f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: fminnm v0.4s, v1.4s, v0.4s
+; NONEON-NOSVE-NEXT: fminnmv s0, v0.4s
+; NONEON-NOSVE-NEXT: ret
%op = load <8 x float>, ptr %a
%res = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> %op)
ret float %res
@@ -492,6 +1192,10 @@ define double @fminv_v1f64(<1 x double> %a) {
; CHECK-LABEL: fminv_v1f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fminv_v1f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ret
%res = call double @llvm.vector.reduce.fmin.v1f64(<1 x double> %a)
ret double %res
}
@@ -504,6 +1208,11 @@ define double @fminv_v2f64(<2 x double> %a) {
; CHECK-NEXT: fminnmv d0, p0, z0.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fminv_v2f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fminnmp d0, v0.2d
+; NONEON-NOSVE-NEXT: ret
%res = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> %a)
ret double %res
}
@@ -517,6 +1226,13 @@ define double @fminv_v4f64(ptr %a) {
; CHECK-NEXT: fminnmv d0, p0, z0.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fminv_v4f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: fminnm v0.2d, v1.2d, v0.2d
+; NONEON-NOSVE-NEXT: fminnmp d0, v0.2d
+; NONEON-NOSVE-NEXT: ret
%op = load <4 x double>, ptr %a
%res = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> %op)
ret double %res
@@ -534,6 +1250,26 @@ define half @fmaximumv_v4f16(<4 x half> %a) {
; CHECK-NEXT: fmaxv h0, p0, z0.h
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fmaximumv_v4f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; NONEON-NOSVE-NEXT: mov h1, v0.h[1]
+; NONEON-NOSVE-NEXT: fcvt s2, h0
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fmax s1, s2, s1
+; NONEON-NOSVE-NEXT: mov h2, v0.h[2]
+; NONEON-NOSVE-NEXT: mov h0, v0.h[3]
+; NONEON-NOSVE-NEXT: fcvt h1, s1
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fmax s1, s1, s2
+; NONEON-NOSVE-NEXT: fcvt h1, s1
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fmax s0, s1, s0
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: ret
%res = call half @llvm.vector.reduce.fmaximum.v4f16(<4 x half> %a)
ret half %res
}
@@ -546,6 +1282,45 @@ define half @fmaximumv_v8f16(<8 x half> %a) {
; CHECK-NEXT: fmaxv h0, p0, z0.h
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fmaximumv_v8f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov h1, v0.h[1]
+; NONEON-NOSVE-NEXT: fcvt s2, h0
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fmax s1, s2, s1
+; NONEON-NOSVE-NEXT: mov h2, v0.h[2]
+; NONEON-NOSVE-NEXT: fcvt h1, s1
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fmax s1, s1, s2
+; NONEON-NOSVE-NEXT: mov h2, v0.h[3]
+; NONEON-NOSVE-NEXT: fcvt h1, s1
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fmax s1, s1, s2
+; NONEON-NOSVE-NEXT: mov h2, v0.h[4]
+; NONEON-NOSVE-NEXT: fcvt h1, s1
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fmax s1, s1, s2
+; NONEON-NOSVE-NEXT: mov h2, v0.h[5]
+; NONEON-NOSVE-NEXT: fcvt h1, s1
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fmax s1, s1, s2
+; NONEON-NOSVE-NEXT: mov h2, v0.h[6]
+; NONEON-NOSVE-NEXT: mov h0, v0.h[7]
+; NONEON-NOSVE-NEXT: fcvt h1, s1
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fmax s1, s1, s2
+; NONEON-NOSVE-NEXT: fcvt h1, s1
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fmax s0, s1, s0
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: ret
%res = call half @llvm.vector.reduce.fmaximum.v8f16(<8 x half> %a)
ret half %res
}
@@ -559,6 +1334,85 @@ define half @fmaximumv_v16f16(ptr %a) {
; CHECK-NEXT: fmaxv h0, p0, z0.h
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fmaximumv_v16f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: mov h2, v1.h[1]
+; NONEON-NOSVE-NEXT: mov h3, v0.h[1]
+; NONEON-NOSVE-NEXT: fcvt s4, h1
+; NONEON-NOSVE-NEXT: fcvt s5, h0
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fmax s4, s5, s4
+; NONEON-NOSVE-NEXT: mov h5, v0.h[2]
+; NONEON-NOSVE-NEXT: fmax s2, s3, s2
+; NONEON-NOSVE-NEXT: mov h3, v1.h[2]
+; NONEON-NOSVE-NEXT: fcvt h4, s4
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcvt h2, s2
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fmax s3, s5, s3
+; NONEON-NOSVE-NEXT: mov h5, v0.h[3]
+; NONEON-NOSVE-NEXT: fmax s2, s4, s2
+; NONEON-NOSVE-NEXT: mov h4, v1.h[3]
+; NONEON-NOSVE-NEXT: fcvt h3, s3
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcvt h2, s2
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fmax s4, s5, s4
+; NONEON-NOSVE-NEXT: mov h5, v0.h[4]
+; NONEON-NOSVE-NEXT: fmax s2, s2, s3
+; NONEON-NOSVE-NEXT: mov h3, v1.h[4]
+; NONEON-NOSVE-NEXT: fcvt h4, s4
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcvt h2, s2
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fmax s3, s5, s3
+; NONEON-NOSVE-NEXT: mov h5, v0.h[5]
+; NONEON-NOSVE-NEXT: fmax s2, s2, s4
+; NONEON-NOSVE-NEXT: mov h4, v1.h[5]
+; NONEON-NOSVE-NEXT: fcvt h3, s3
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcvt h2, s2
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fmax s4, s5, s4
+; NONEON-NOSVE-NEXT: mov h5, v0.h[6]
+; NONEON-NOSVE-NEXT: mov h0, v0.h[7]
+; NONEON-NOSVE-NEXT: fmax s2, s2, s3
+; NONEON-NOSVE-NEXT: fcvt h3, s4
+; NONEON-NOSVE-NEXT: mov h4, v1.h[6]
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: mov h1, v1.h[7]
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvt h2, s2
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fmax s0, s0, s1
+; NONEON-NOSVE-NEXT: fmax s2, s2, s3
+; NONEON-NOSVE-NEXT: fmax s3, s5, s4
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: fcvt h2, s2
+; NONEON-NOSVE-NEXT: fcvt h3, s3
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fmax s2, s2, s3
+; NONEON-NOSVE-NEXT: fcvt h1, s2
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fmax s0, s1, s0
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: ret
%op = load <16 x half>, ptr %a
%res = call half @llvm.vector.reduce.fmaximum.v16f16(<16 x half> %op)
ret half %res
@@ -572,6 +1426,11 @@ define float @fmaximumv_v2f32(<2 x float> %a) {
; CHECK-NEXT: fmaxv s0, p0, z0.s
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fmaximumv_v2f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fmaxp s0, v0.2s
+; NONEON-NOSVE-NEXT: ret
%res = call float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> %a)
ret float %res
}
@@ -584,6 +1443,11 @@ define float @fmaximumv_v4f32(<4 x float> %a) {
; CHECK-NEXT: fmaxv s0, p0, z0.s
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fmaximumv_v4f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fmaxv s0, v0.4s
+; NONEON-NOSVE-NEXT: ret
%res = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> %a)
ret float %res
}
@@ -597,6 +1461,13 @@ define float @fmaximumv_v8f32(ptr %a) {
; CHECK-NEXT: fmaxv s0, p0, z0.s
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fmaximumv_v8f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: fmax v0.4s, v1.4s, v0.4s
+; NONEON-NOSVE-NEXT: fmaxv s0, v0.4s
+; NONEON-NOSVE-NEXT: ret
%op = load <8 x float>, ptr %a
%res = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> %op)
ret float %res
@@ -606,6 +1477,10 @@ define double @fmaximumv_v1f64(<1 x double> %a) {
; CHECK-LABEL: fmaximumv_v1f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fmaximumv_v1f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ret
%res = call double @llvm.vector.reduce.fmaximum.v1f64(<1 x double> %a)
ret double %res
}
@@ -618,6 +1493,11 @@ define double @fmaximumv_v2f64(<2 x double> %a) {
; CHECK-NEXT: fmaxv d0, p0, z0.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fmaximumv_v2f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fmaxp d0, v0.2d
+; NONEON-NOSVE-NEXT: ret
%res = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> %a)
ret double %res
}
@@ -631,6 +1511,13 @@ define double @fmaximumv_v4f64(ptr %a) {
; CHECK-NEXT: fmaxv d0, p0, z0.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fmaximumv_v4f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: fmax v0.2d, v1.2d, v0.2d
+; NONEON-NOSVE-NEXT: fmaxp d0, v0.2d
+; NONEON-NOSVE-NEXT: ret
%op = load <4 x double>, ptr %a
%res = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> %op)
ret double %res
@@ -648,6 +1535,26 @@ define half @fminimumv_v4f16(<4 x half> %a) {
; CHECK-NEXT: fminv h0, p0, z0.h
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fminimumv_v4f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; NONEON-NOSVE-NEXT: mov h1, v0.h[1]
+; NONEON-NOSVE-NEXT: fcvt s2, h0
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fmin s1, s2, s1
+; NONEON-NOSVE-NEXT: mov h2, v0.h[2]
+; NONEON-NOSVE-NEXT: mov h0, v0.h[3]
+; NONEON-NOSVE-NEXT: fcvt h1, s1
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fmin s1, s1, s2
+; NONEON-NOSVE-NEXT: fcvt h1, s1
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fmin s0, s1, s0
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: ret
%res = call half @llvm.vector.reduce.fminimum.v4f16(<4 x half> %a)
ret half %res
}
@@ -660,6 +1567,45 @@ define half @fminimumv_v8f16(<8 x half> %a) {
; CHECK-NEXT: fminv h0, p0, z0.h
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fminimumv_v8f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov h1, v0.h[1]
+; NONEON-NOSVE-NEXT: fcvt s2, h0
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fmin s1, s2, s1
+; NONEON-NOSVE-NEXT: mov h2, v0.h[2]
+; NONEON-NOSVE-NEXT: fcvt h1, s1
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fmin s1, s1, s2
+; NONEON-NOSVE-NEXT: mov h2, v0.h[3]
+; NONEON-NOSVE-NEXT: fcvt h1, s1
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fmin s1, s1, s2
+; NONEON-NOSVE-NEXT: mov h2, v0.h[4]
+; NONEON-NOSVE-NEXT: fcvt h1, s1
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fmin s1, s1, s2
+; NONEON-NOSVE-NEXT: mov h2, v0.h[5]
+; NONEON-NOSVE-NEXT: fcvt h1, s1
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fmin s1, s1, s2
+; NONEON-NOSVE-NEXT: mov h2, v0.h[6]
+; NONEON-NOSVE-NEXT: mov h0, v0.h[7]
+; NONEON-NOSVE-NEXT: fcvt h1, s1
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fmin s1, s1, s2
+; NONEON-NOSVE-NEXT: fcvt h1, s1
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fmin s0, s1, s0
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: ret
%res = call half @llvm.vector.reduce.fminimum.v8f16(<8 x half> %a)
ret half %res
}
@@ -673,6 +1619,85 @@ define half @fminimumv_v16f16(ptr %a) {
; CHECK-NEXT: fminv h0, p0, z0.h
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fminimumv_v16f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: mov h2, v1.h[1]
+; NONEON-NOSVE-NEXT: mov h3, v0.h[1]
+; NONEON-NOSVE-NEXT: fcvt s4, h1
+; NONEON-NOSVE-NEXT: fcvt s5, h0
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fmin s4, s5, s4
+; NONEON-NOSVE-NEXT: mov h5, v0.h[2]
+; NONEON-NOSVE-NEXT: fmin s2, s3, s2
+; NONEON-NOSVE-NEXT: mov h3, v1.h[2]
+; NONEON-NOSVE-NEXT: fcvt h4, s4
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcvt h2, s2
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fmin s3, s5, s3
+; NONEON-NOSVE-NEXT: mov h5, v0.h[3]
+; NONEON-NOSVE-NEXT: fmin s2, s4, s2
+; NONEON-NOSVE-NEXT: mov h4, v1.h[3]
+; NONEON-NOSVE-NEXT: fcvt h3, s3
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcvt h2, s2
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fmin s4, s5, s4
+; NONEON-NOSVE-NEXT: mov h5, v0.h[4]
+; NONEON-NOSVE-NEXT: fmin s2, s2, s3
+; NONEON-NOSVE-NEXT: mov h3, v1.h[4]
+; NONEON-NOSVE-NEXT: fcvt h4, s4
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcvt h2, s2
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fmin s3, s5, s3
+; NONEON-NOSVE-NEXT: mov h5, v0.h[5]
+; NONEON-NOSVE-NEXT: fmin s2, s2, s4
+; NONEON-NOSVE-NEXT: mov h4, v1.h[5]
+; NONEON-NOSVE-NEXT: fcvt h3, s3
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcvt h2, s2
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fmin s4, s5, s4
+; NONEON-NOSVE-NEXT: mov h5, v0.h[6]
+; NONEON-NOSVE-NEXT: mov h0, v0.h[7]
+; NONEON-NOSVE-NEXT: fmin s2, s2, s3
+; NONEON-NOSVE-NEXT: fcvt h3, s4
+; NONEON-NOSVE-NEXT: mov h4, v1.h[6]
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: mov h1, v1.h[7]
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvt h2, s2
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fmin s0, s0, s1
+; NONEON-NOSVE-NEXT: fmin s2, s2, s3
+; NONEON-NOSVE-NEXT: fmin s3, s5, s4
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: fcvt h2, s2
+; NONEON-NOSVE-NEXT: fcvt h3, s3
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fmin s2, s2, s3
+; NONEON-NOSVE-NEXT: fcvt h1, s2
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fmin s0, s1, s0
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: ret
%op = load <16 x half>, ptr %a
%res = call half @llvm.vector.reduce.fminimum.v16f16(<16 x half> %op)
ret half %res
@@ -686,6 +1711,11 @@ define float @fminimumv_v2f32(<2 x float> %a) {
; CHECK-NEXT: fminv s0, p0, z0.s
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fminimumv_v2f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fminp s0, v0.2s
+; NONEON-NOSVE-NEXT: ret
%res = call float @llvm.vector.reduce.fminimum.v2f32(<2 x float> %a)
ret float %res
}
@@ -698,6 +1728,11 @@ define float @fminimumv_v4f32(<4 x float> %a) {
; CHECK-NEXT: fminv s0, p0, z0.s
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fminimumv_v4f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fminv s0, v0.4s
+; NONEON-NOSVE-NEXT: ret
%res = call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> %a)
ret float %res
}
@@ -711,6 +1746,13 @@ define float @fminimumv_v8f32(ptr %a) {
; CHECK-NEXT: fminv s0, p0, z0.s
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fminimumv_v8f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: fmin v0.4s, v1.4s, v0.4s
+; NONEON-NOSVE-NEXT: fminv s0, v0.4s
+; NONEON-NOSVE-NEXT: ret
%op = load <8 x float>, ptr %a
%res = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> %op)
ret float %res
@@ -720,6 +1762,10 @@ define double @fminimumv_v1f64(<1 x double> %a) {
; CHECK-LABEL: fminimumv_v1f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fminimumv_v1f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ret
%res = call double @llvm.vector.reduce.fminimum.v1f64(<1 x double> %a)
ret double %res
}
@@ -732,6 +1778,11 @@ define double @fminimumv_v2f64(<2 x double> %a) {
; CHECK-NEXT: fminv d0, p0, z0.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fminimumv_v2f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fminp d0, v0.2d
+; NONEON-NOSVE-NEXT: ret
%res = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> %a)
ret double %res
}
@@ -745,6 +1796,13 @@ define double @fminimumv_v4f64(ptr %a) {
; CHECK-NEXT: fminv d0, p0, z0.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fminimumv_v4f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: fmin v0.2d, v1.2d, v0.2d
+; NONEON-NOSVE-NEXT: fminp d0, v0.2d
+; NONEON-NOSVE-NEXT: ret
%op = load <4 x double>, ptr %a
%res = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> %op)
ret double %res
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-rounding.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-rounding.ll
index 412c27cb82f1d..6af2b885ace08 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-rounding.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-rounding.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
@@ -16,6 +17,13 @@ define <2 x half> @frintp_v2f16(<2 x half> %op) {
; CHECK-NEXT: frintp z0.h, p0/m, z0.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: frintp_v2f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h
+; NONEON-NOSVE-NEXT: frintp v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s
+; NONEON-NOSVE-NEXT: ret
%res = call <2 x half> @llvm.ceil.v2f16(<2 x half> %op)
ret <2 x half> %res
}
@@ -28,6 +36,13 @@ define <4 x half> @frintp_v4f16(<4 x half> %op) {
; CHECK-NEXT: frintp z0.h, p0/m, z0.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: frintp_v4f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h
+; NONEON-NOSVE-NEXT: frintp v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s
+; NONEON-NOSVE-NEXT: ret
%res = call <4 x half> @llvm.ceil.v4f16(<4 x half> %op)
ret <4 x half> %res
}
@@ -40,6 +55,16 @@ define <8 x half> @frintp_v8f16(<8 x half> %op) {
; CHECK-NEXT: frintp z0.h, p0/m, z0.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: frintp_v8f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcvtl v1.4s, v0.4h
+; NONEON-NOSVE-NEXT: fcvtl2 v2.4s, v0.8h
+; NONEON-NOSVE-NEXT: frintp v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: fcvtn v0.4h, v1.4s
+; NONEON-NOSVE-NEXT: frintp v1.4s, v2.4s
+; NONEON-NOSVE-NEXT: fcvtn2 v0.8h, v1.4s
+; NONEON-NOSVE-NEXT: ret
%res = call <8 x half> @llvm.ceil.v8f16(<8 x half> %op)
ret <8 x half> %res
}
@@ -53,6 +78,24 @@ define void @frintp_v16f16(ptr %a) {
; CHECK-NEXT: frintp z1.h, p0/m, z1.h
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: frintp_v16f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: fcvtl v2.4s, v0.4h
+; NONEON-NOSVE-NEXT: fcvtl v3.4s, v1.4h
+; NONEON-NOSVE-NEXT: fcvtl2 v0.4s, v0.8h
+; NONEON-NOSVE-NEXT: fcvtl2 v1.4s, v1.8h
+; NONEON-NOSVE-NEXT: frintp v2.4s, v2.4s
+; NONEON-NOSVE-NEXT: frintp v3.4s, v3.4s
+; NONEON-NOSVE-NEXT: frintp v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: frintp v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: fcvtn v2.4h, v2.4s
+; NONEON-NOSVE-NEXT: fcvtn v3.4h, v3.4s
+; NONEON-NOSVE-NEXT: fcvtn2 v2.8h, v0.4s
+; NONEON-NOSVE-NEXT: fcvtn2 v3.8h, v1.4s
+; NONEON-NOSVE-NEXT: stp q2, q3, [x0]
+; NONEON-NOSVE-NEXT: ret
%op = load <16 x half>, ptr %a
%res = call <16 x half> @llvm.ceil.v16f16(<16 x half> %op)
store <16 x half> %res, ptr %a
@@ -67,6 +110,11 @@ define <2 x float> @frintp_v2f32(<2 x float> %op) {
; CHECK-NEXT: frintp z0.s, p0/m, z0.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: frintp_v2f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: frintp v0.2s, v0.2s
+; NONEON-NOSVE-NEXT: ret
%res = call <2 x float> @llvm.ceil.v2f32(<2 x float> %op)
ret <2 x float> %res
}
@@ -79,6 +127,11 @@ define <4 x float> @frintp_v4f32(<4 x float> %op) {
; CHECK-NEXT: frintp z0.s, p0/m, z0.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: frintp_v4f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: frintp v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: ret
%res = call <4 x float> @llvm.ceil.v4f32(<4 x float> %op)
ret <4 x float> %res
}
@@ -92,6 +145,14 @@ define void @frintp_v8f32(ptr %a) {
; CHECK-NEXT: frintp z1.s, p0/m, z1.s
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: frintp_v8f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: frintp v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: frintp v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op = load <8 x float>, ptr %a
%res = call <8 x float> @llvm.ceil.v8f32(<8 x float> %op)
store <8 x float> %res, ptr %a
@@ -103,6 +164,11 @@ define <1 x double> @frintp_v1f64(<1 x double> %op) {
; CHECK: // %bb.0:
; CHECK-NEXT: frintp d0, d0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: frintp_v1f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: frintp d0, d0
+; NONEON-NOSVE-NEXT: ret
%res = call <1 x double> @llvm.ceil.v1f64(<1 x double> %op)
ret <1 x double> %res
}
@@ -115,6 +181,11 @@ define <2 x double> @frintp_v2f64(<2 x double> %op) {
; CHECK-NEXT: frintp z0.d, p0/m, z0.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: frintp_v2f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: frintp v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: ret
%res = call <2 x double> @llvm.ceil.v2f64(<2 x double> %op)
ret <2 x double> %res
}
@@ -128,6 +199,14 @@ define void @frintp_v4f64(ptr %a) {
; CHECK-NEXT: frintp z1.d, p0/m, z1.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: frintp_v4f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: frintp v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: frintp v1.2d, v1.2d
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op = load <4 x double>, ptr %a
%res = call <4 x double> @llvm.ceil.v4f64(<4 x double> %op)
store <4 x double> %res, ptr %a
@@ -146,6 +225,13 @@ define <2 x half> @frintm_v2f16(<2 x half> %op) {
; CHECK-NEXT: frintm z0.h, p0/m, z0.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: frintm_v2f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h
+; NONEON-NOSVE-NEXT: frintm v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s
+; NONEON-NOSVE-NEXT: ret
%res = call <2 x half> @llvm.floor.v2f16(<2 x half> %op)
ret <2 x half> %res
}
@@ -158,6 +244,13 @@ define <4 x half> @frintm_v4f16(<4 x half> %op) {
; CHECK-NEXT: frintm z0.h, p0/m, z0.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: frintm_v4f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h
+; NONEON-NOSVE-NEXT: frintm v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s
+; NONEON-NOSVE-NEXT: ret
%res = call <4 x half> @llvm.floor.v4f16(<4 x half> %op)
ret <4 x half> %res
}
@@ -170,6 +263,16 @@ define <8 x half> @frintm_v8f16(<8 x half> %op) {
; CHECK-NEXT: frintm z0.h, p0/m, z0.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: frintm_v8f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcvtl v1.4s, v0.4h
+; NONEON-NOSVE-NEXT: fcvtl2 v2.4s, v0.8h
+; NONEON-NOSVE-NEXT: frintm v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: fcvtn v0.4h, v1.4s
+; NONEON-NOSVE-NEXT: frintm v1.4s, v2.4s
+; NONEON-NOSVE-NEXT: fcvtn2 v0.8h, v1.4s
+; NONEON-NOSVE-NEXT: ret
%res = call <8 x half> @llvm.floor.v8f16(<8 x half> %op)
ret <8 x half> %res
}
@@ -183,6 +286,24 @@ define void @frintm_v16f16(ptr %a) {
; CHECK-NEXT: frintm z1.h, p0/m, z1.h
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: frintm_v16f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: fcvtl v2.4s, v0.4h
+; NONEON-NOSVE-NEXT: fcvtl v3.4s, v1.4h
+; NONEON-NOSVE-NEXT: fcvtl2 v0.4s, v0.8h
+; NONEON-NOSVE-NEXT: fcvtl2 v1.4s, v1.8h
+; NONEON-NOSVE-NEXT: frintm v2.4s, v2.4s
+; NONEON-NOSVE-NEXT: frintm v3.4s, v3.4s
+; NONEON-NOSVE-NEXT: frintm v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: frintm v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: fcvtn v2.4h, v2.4s
+; NONEON-NOSVE-NEXT: fcvtn v3.4h, v3.4s
+; NONEON-NOSVE-NEXT: fcvtn2 v2.8h, v0.4s
+; NONEON-NOSVE-NEXT: fcvtn2 v3.8h, v1.4s
+; NONEON-NOSVE-NEXT: stp q2, q3, [x0]
+; NONEON-NOSVE-NEXT: ret
%op = load <16 x half>, ptr %a
%res = call <16 x half> @llvm.floor.v16f16(<16 x half> %op)
store <16 x half> %res, ptr %a
@@ -197,6 +318,11 @@ define <2 x float> @frintm_v2f32(<2 x float> %op) {
; CHECK-NEXT: frintm z0.s, p0/m, z0.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: frintm_v2f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: frintm v0.2s, v0.2s
+; NONEON-NOSVE-NEXT: ret
%res = call <2 x float> @llvm.floor.v2f32(<2 x float> %op)
ret <2 x float> %res
}
@@ -209,6 +335,11 @@ define <4 x float> @frintm_v4f32(<4 x float> %op) {
; CHECK-NEXT: frintm z0.s, p0/m, z0.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: frintm_v4f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: frintm v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: ret
%res = call <4 x float> @llvm.floor.v4f32(<4 x float> %op)
ret <4 x float> %res
}
@@ -222,6 +353,14 @@ define void @frintm_v8f32(ptr %a) {
; CHECK-NEXT: frintm z1.s, p0/m, z1.s
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: frintm_v8f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: frintm v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: frintm v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op = load <8 x float>, ptr %a
%res = call <8 x float> @llvm.floor.v8f32(<8 x float> %op)
store <8 x float> %res, ptr %a
@@ -233,6 +372,11 @@ define <1 x double> @frintm_v1f64(<1 x double> %op) {
; CHECK: // %bb.0:
; CHECK-NEXT: frintm d0, d0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: frintm_v1f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: frintm d0, d0
+; NONEON-NOSVE-NEXT: ret
%res = call <1 x double> @llvm.floor.v1f64(<1 x double> %op)
ret <1 x double> %res
}
@@ -245,6 +389,11 @@ define <2 x double> @frintm_v2f64(<2 x double> %op) {
; CHECK-NEXT: frintm z0.d, p0/m, z0.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: frintm_v2f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: frintm v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: ret
%res = call <2 x double> @llvm.floor.v2f64(<2 x double> %op)
ret <2 x double> %res
}
@@ -258,6 +407,14 @@ define void @frintm_v4f64(ptr %a) {
; CHECK-NEXT: frintm z1.d, p0/m, z1.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: frintm_v4f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: frintm v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: frintm v1.2d, v1.2d
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op = load <4 x double>, ptr %a
%res = call <4 x double> @llvm.floor.v4f64(<4 x double> %op)
store <4 x double> %res, ptr %a
@@ -276,6 +433,13 @@ define <2 x half> @frinti_v2f16(<2 x half> %op) {
; CHECK-NEXT: frinti z0.h, p0/m, z0.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: frinti_v2f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h
+; NONEON-NOSVE-NEXT: frinti v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s
+; NONEON-NOSVE-NEXT: ret
%res = call <2 x half> @llvm.nearbyint.v2f16(<2 x half> %op)
ret <2 x half> %res
}
@@ -288,6 +452,13 @@ define <4 x half> @frinti_v4f16(<4 x half> %op) {
; CHECK-NEXT: frinti z0.h, p0/m, z0.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: frinti_v4f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h
+; NONEON-NOSVE-NEXT: frinti v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s
+; NONEON-NOSVE-NEXT: ret
%res = call <4 x half> @llvm.nearbyint.v4f16(<4 x half> %op)
ret <4 x half> %res
}
@@ -300,6 +471,16 @@ define <8 x half> @frinti_v8f16(<8 x half> %op) {
; CHECK-NEXT: frinti z0.h, p0/m, z0.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: frinti_v8f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcvtl v1.4s, v0.4h
+; NONEON-NOSVE-NEXT: fcvtl2 v2.4s, v0.8h
+; NONEON-NOSVE-NEXT: frinti v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: fcvtn v0.4h, v1.4s
+; NONEON-NOSVE-NEXT: frinti v1.4s, v2.4s
+; NONEON-NOSVE-NEXT: fcvtn2 v0.8h, v1.4s
+; NONEON-NOSVE-NEXT: ret
%res = call <8 x half> @llvm.nearbyint.v8f16(<8 x half> %op)
ret <8 x half> %res
}
@@ -313,6 +494,24 @@ define void @frinti_v16f16(ptr %a) {
; CHECK-NEXT: frinti z1.h, p0/m, z1.h
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: frinti_v16f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: fcvtl v2.4s, v0.4h
+; NONEON-NOSVE-NEXT: fcvtl v3.4s, v1.4h
+; NONEON-NOSVE-NEXT: fcvtl2 v0.4s, v0.8h
+; NONEON-NOSVE-NEXT: fcvtl2 v1.4s, v1.8h
+; NONEON-NOSVE-NEXT: frinti v2.4s, v2.4s
+; NONEON-NOSVE-NEXT: frinti v3.4s, v3.4s
+; NONEON-NOSVE-NEXT: frinti v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: frinti v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: fcvtn v2.4h, v2.4s
+; NONEON-NOSVE-NEXT: fcvtn v3.4h, v3.4s
+; NONEON-NOSVE-NEXT: fcvtn2 v2.8h, v0.4s
+; NONEON-NOSVE-NEXT: fcvtn2 v3.8h, v1.4s
+; NONEON-NOSVE-NEXT: stp q2, q3, [x0]
+; NONEON-NOSVE-NEXT: ret
%op = load <16 x half>, ptr %a
%res = call <16 x half> @llvm.nearbyint.v16f16(<16 x half> %op)
store <16 x half> %res, ptr %a
@@ -327,6 +526,11 @@ define <2 x float> @frinti_v2f32(<2 x float> %op) {
; CHECK-NEXT: frinti z0.s, p0/m, z0.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: frinti_v2f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: frinti v0.2s, v0.2s
+; NONEON-NOSVE-NEXT: ret
%res = call <2 x float> @llvm.nearbyint.v2f32(<2 x float> %op)
ret <2 x float> %res
}
@@ -339,6 +543,11 @@ define <4 x float> @frinti_v4f32(<4 x float> %op) {
; CHECK-NEXT: frinti z0.s, p0/m, z0.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: frinti_v4f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: frinti v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: ret
%res = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %op)
ret <4 x float> %res
}
@@ -352,6 +561,14 @@ define void @frinti_v8f32(ptr %a) {
; CHECK-NEXT: frinti z1.s, p0/m, z1.s
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: frinti_v8f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: frinti v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: frinti v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op = load <8 x float>, ptr %a
%res = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %op)
store <8 x float> %res, ptr %a
@@ -363,6 +580,11 @@ define <1 x double> @frinti_v1f64(<1 x double> %op) {
; CHECK: // %bb.0:
; CHECK-NEXT: frinti d0, d0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: frinti_v1f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: frinti d0, d0
+; NONEON-NOSVE-NEXT: ret
%res = call <1 x double> @llvm.nearbyint.v1f64(<1 x double> %op)
ret <1 x double> %res
}
@@ -375,6 +597,11 @@ define <2 x double> @frinti_v2f64(<2 x double> %op) {
; CHECK-NEXT: frinti z0.d, p0/m, z0.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: frinti_v2f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: frinti v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: ret
%res = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %op)
ret <2 x double> %res
}
@@ -388,6 +615,14 @@ define void @frinti_v4f64(ptr %a) {
; CHECK-NEXT: frinti z1.d, p0/m, z1.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: frinti_v4f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: frinti v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: frinti v1.2d, v1.2d
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op = load <4 x double>, ptr %a
%res = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %op)
store <4 x double> %res, ptr %a
@@ -406,6 +641,13 @@ define <2 x half> @frintx_v2f16(<2 x half> %op) {
; CHECK-NEXT: frintx z0.h, p0/m, z0.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: frintx_v2f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h
+; NONEON-NOSVE-NEXT: frintx v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s
+; NONEON-NOSVE-NEXT: ret
%res = call <2 x half> @llvm.rint.v2f16(<2 x half> %op)
ret <2 x half> %res
}
@@ -418,6 +660,13 @@ define <4 x half> @frintx_v4f16(<4 x half> %op) {
; CHECK-NEXT: frintx z0.h, p0/m, z0.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: frintx_v4f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h
+; NONEON-NOSVE-NEXT: frintx v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s
+; NONEON-NOSVE-NEXT: ret
%res = call <4 x half> @llvm.rint.v4f16(<4 x half> %op)
ret <4 x half> %res
}
@@ -430,6 +679,16 @@ define <8 x half> @frintx_v8f16(<8 x half> %op) {
; CHECK-NEXT: frintx z0.h, p0/m, z0.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: frintx_v8f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcvtl v1.4s, v0.4h
+; NONEON-NOSVE-NEXT: fcvtl2 v2.4s, v0.8h
+; NONEON-NOSVE-NEXT: frintx v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: fcvtn v0.4h, v1.4s
+; NONEON-NOSVE-NEXT: frintx v1.4s, v2.4s
+; NONEON-NOSVE-NEXT: fcvtn2 v0.8h, v1.4s
+; NONEON-NOSVE-NEXT: ret
%res = call <8 x half> @llvm.rint.v8f16(<8 x half> %op)
ret <8 x half> %res
}
@@ -443,6 +702,24 @@ define void @frintx_v16f16(ptr %a) {
; CHECK-NEXT: frintx z1.h, p0/m, z1.h
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: frintx_v16f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: fcvtl v2.4s, v0.4h
+; NONEON-NOSVE-NEXT: fcvtl v3.4s, v1.4h
+; NONEON-NOSVE-NEXT: fcvtl2 v0.4s, v0.8h
+; NONEON-NOSVE-NEXT: fcvtl2 v1.4s, v1.8h
+; NONEON-NOSVE-NEXT: frintx v2.4s, v2.4s
+; NONEON-NOSVE-NEXT: frintx v3.4s, v3.4s
+; NONEON-NOSVE-NEXT: frintx v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: frintx v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: fcvtn v2.4h, v2.4s
+; NONEON-NOSVE-NEXT: fcvtn v3.4h, v3.4s
+; NONEON-NOSVE-NEXT: fcvtn2 v2.8h, v0.4s
+; NONEON-NOSVE-NEXT: fcvtn2 v3.8h, v1.4s
+; NONEON-NOSVE-NEXT: stp q2, q3, [x0]
+; NONEON-NOSVE-NEXT: ret
%op = load <16 x half>, ptr %a
%res = call <16 x half> @llvm.rint.v16f16(<16 x half> %op)
store <16 x half> %res, ptr %a
@@ -457,6 +734,11 @@ define <2 x float> @frintx_v2f32(<2 x float> %op) {
; CHECK-NEXT: frintx z0.s, p0/m, z0.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: frintx_v2f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: frintx v0.2s, v0.2s
+; NONEON-NOSVE-NEXT: ret
%res = call <2 x float> @llvm.rint.v2f32(<2 x float> %op)
ret <2 x float> %res
}
@@ -469,6 +751,11 @@ define <4 x float> @frintx_v4f32(<4 x float> %op) {
; CHECK-NEXT: frintx z0.s, p0/m, z0.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: frintx_v4f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: frintx v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: ret
%res = call <4 x float> @llvm.rint.v4f32(<4 x float> %op)
ret <4 x float> %res
}
@@ -482,6 +769,14 @@ define void @frintx_v8f32(ptr %a) {
; CHECK-NEXT: frintx z1.s, p0/m, z1.s
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: frintx_v8f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: frintx v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: frintx v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op = load <8 x float>, ptr %a
%res = call <8 x float> @llvm.rint.v8f32(<8 x float> %op)
store <8 x float> %res, ptr %a
@@ -493,6 +788,11 @@ define <1 x double> @frintx_v1f64(<1 x double> %op) {
; CHECK: // %bb.0:
; CHECK-NEXT: frintx d0, d0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: frintx_v1f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: frintx d0, d0
+; NONEON-NOSVE-NEXT: ret
%res = call <1 x double> @llvm.rint.v1f64(<1 x double> %op)
ret <1 x double> %res
}
@@ -505,6 +805,11 @@ define <2 x double> @frintx_v2f64(<2 x double> %op) {
; CHECK-NEXT: frintx z0.d, p0/m, z0.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: frintx_v2f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: frintx v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: ret
%res = call <2 x double> @llvm.rint.v2f64(<2 x double> %op)
ret <2 x double> %res
}
@@ -518,6 +823,14 @@ define void @frintx_v4f64(ptr %a) {
; CHECK-NEXT: frintx z1.d, p0/m, z1.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: frintx_v4f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: frintx v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: frintx v1.2d, v1.2d
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op = load <4 x double>, ptr %a
%res = call <4 x double> @llvm.rint.v4f64(<4 x double> %op)
store <4 x double> %res, ptr %a
@@ -536,6 +849,13 @@ define <2 x half> @frinta_v2f16(<2 x half> %op) {
; CHECK-NEXT: frinta z0.h, p0/m, z0.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: frinta_v2f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h
+; NONEON-NOSVE-NEXT: frinta v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s
+; NONEON-NOSVE-NEXT: ret
%res = call <2 x half> @llvm.round.v2f16(<2 x half> %op)
ret <2 x half> %res
}
@@ -548,6 +868,13 @@ define <4 x half> @frinta_v4f16(<4 x half> %op) {
; CHECK-NEXT: frinta z0.h, p0/m, z0.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: frinta_v4f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h
+; NONEON-NOSVE-NEXT: frinta v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s
+; NONEON-NOSVE-NEXT: ret
%res = call <4 x half> @llvm.round.v4f16(<4 x half> %op)
ret <4 x half> %res
}
@@ -560,6 +887,16 @@ define <8 x half> @frinta_v8f16(<8 x half> %op) {
; CHECK-NEXT: frinta z0.h, p0/m, z0.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: frinta_v8f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcvtl v1.4s, v0.4h
+; NONEON-NOSVE-NEXT: fcvtl2 v2.4s, v0.8h
+; NONEON-NOSVE-NEXT: frinta v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: fcvtn v0.4h, v1.4s
+; NONEON-NOSVE-NEXT: frinta v1.4s, v2.4s
+; NONEON-NOSVE-NEXT: fcvtn2 v0.8h, v1.4s
+; NONEON-NOSVE-NEXT: ret
%res = call <8 x half> @llvm.round.v8f16(<8 x half> %op)
ret <8 x half> %res
}
@@ -573,6 +910,24 @@ define void @frinta_v16f16(ptr %a) {
; CHECK-NEXT: frinta z1.h, p0/m, z1.h
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: frinta_v16f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: fcvtl v2.4s, v0.4h
+; NONEON-NOSVE-NEXT: fcvtl v3.4s, v1.4h
+; NONEON-NOSVE-NEXT: fcvtl2 v0.4s, v0.8h
+; NONEON-NOSVE-NEXT: fcvtl2 v1.4s, v1.8h
+; NONEON-NOSVE-NEXT: frinta v2.4s, v2.4s
+; NONEON-NOSVE-NEXT: frinta v3.4s, v3.4s
+; NONEON-NOSVE-NEXT: frinta v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: frinta v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: fcvtn v2.4h, v2.4s
+; NONEON-NOSVE-NEXT: fcvtn v3.4h, v3.4s
+; NONEON-NOSVE-NEXT: fcvtn2 v2.8h, v0.4s
+; NONEON-NOSVE-NEXT: fcvtn2 v3.8h, v1.4s
+; NONEON-NOSVE-NEXT: stp q2, q3, [x0]
+; NONEON-NOSVE-NEXT: ret
%op = load <16 x half>, ptr %a
%res = call <16 x half> @llvm.round.v16f16(<16 x half> %op)
store <16 x half> %res, ptr %a
@@ -587,6 +942,11 @@ define <2 x float> @frinta_v2f32(<2 x float> %op) {
; CHECK-NEXT: frinta z0.s, p0/m, z0.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: frinta_v2f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: frinta v0.2s, v0.2s
+; NONEON-NOSVE-NEXT: ret
%res = call <2 x float> @llvm.round.v2f32(<2 x float> %op)
ret <2 x float> %res
}
@@ -599,6 +959,11 @@ define <4 x float> @frinta_v4f32(<4 x float> %op) {
; CHECK-NEXT: frinta z0.s, p0/m, z0.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: frinta_v4f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: frinta v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: ret
%res = call <4 x float> @llvm.round.v4f32(<4 x float> %op)
ret <4 x float> %res
}
@@ -612,6 +977,14 @@ define void @frinta_v8f32(ptr %a) {
; CHECK-NEXT: frinta z1.s, p0/m, z1.s
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: frinta_v8f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: frinta v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: frinta v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op = load <8 x float>, ptr %a
%res = call <8 x float> @llvm.round.v8f32(<8 x float> %op)
store <8 x float> %res, ptr %a
@@ -623,6 +996,11 @@ define <1 x double> @frinta_v1f64(<1 x double> %op) {
; CHECK: // %bb.0:
; CHECK-NEXT: frinta d0, d0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: frinta_v1f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: frinta d0, d0
+; NONEON-NOSVE-NEXT: ret
%res = call <1 x double> @llvm.round.v1f64(<1 x double> %op)
ret <1 x double> %res
}
@@ -635,6 +1013,11 @@ define <2 x double> @frinta_v2f64(<2 x double> %op) {
; CHECK-NEXT: frinta z0.d, p0/m, z0.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: frinta_v2f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: frinta v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: ret
%res = call <2 x double> @llvm.round.v2f64(<2 x double> %op)
ret <2 x double> %res
}
@@ -648,6 +1031,14 @@ define void @frinta_v4f64(ptr %a) {
; CHECK-NEXT: frinta z1.d, p0/m, z1.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: frinta_v4f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: frinta v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: frinta v1.2d, v1.2d
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op = load <4 x double>, ptr %a
%res = call <4 x double> @llvm.round.v4f64(<4 x double> %op)
store <4 x double> %res, ptr %a
@@ -666,6 +1057,13 @@ define <2 x half> @frintn_v2f16(<2 x half> %op) {
; CHECK-NEXT: frintn z0.h, p0/m, z0.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: frintn_v2f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h
+; NONEON-NOSVE-NEXT: frintn v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s
+; NONEON-NOSVE-NEXT: ret
%res = call <2 x half> @llvm.roundeven.v2f16(<2 x half> %op)
ret <2 x half> %res
}
@@ -678,6 +1076,13 @@ define <4 x half> @frintn_v4f16(<4 x half> %op) {
; CHECK-NEXT: frintn z0.h, p0/m, z0.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: frintn_v4f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h
+; NONEON-NOSVE-NEXT: frintn v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s
+; NONEON-NOSVE-NEXT: ret
%res = call <4 x half> @llvm.roundeven.v4f16(<4 x half> %op)
ret <4 x half> %res
}
@@ -690,6 +1095,16 @@ define <8 x half> @frintn_v8f16(<8 x half> %op) {
; CHECK-NEXT: frintn z0.h, p0/m, z0.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: frintn_v8f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcvtl v1.4s, v0.4h
+; NONEON-NOSVE-NEXT: fcvtl2 v2.4s, v0.8h
+; NONEON-NOSVE-NEXT: frintn v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: fcvtn v0.4h, v1.4s
+; NONEON-NOSVE-NEXT: frintn v1.4s, v2.4s
+; NONEON-NOSVE-NEXT: fcvtn2 v0.8h, v1.4s
+; NONEON-NOSVE-NEXT: ret
%res = call <8 x half> @llvm.roundeven.v8f16(<8 x half> %op)
ret <8 x half> %res
}
@@ -703,6 +1118,24 @@ define void @frintn_v16f16(ptr %a) {
; CHECK-NEXT: frintn z1.h, p0/m, z1.h
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: frintn_v16f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: fcvtl v2.4s, v0.4h
+; NONEON-NOSVE-NEXT: fcvtl v3.4s, v1.4h
+; NONEON-NOSVE-NEXT: fcvtl2 v0.4s, v0.8h
+; NONEON-NOSVE-NEXT: fcvtl2 v1.4s, v1.8h
+; NONEON-NOSVE-NEXT: frintn v2.4s, v2.4s
+; NONEON-NOSVE-NEXT: frintn v3.4s, v3.4s
+; NONEON-NOSVE-NEXT: frintn v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: frintn v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: fcvtn v2.4h, v2.4s
+; NONEON-NOSVE-NEXT: fcvtn v3.4h, v3.4s
+; NONEON-NOSVE-NEXT: fcvtn2 v2.8h, v0.4s
+; NONEON-NOSVE-NEXT: fcvtn2 v3.8h, v1.4s
+; NONEON-NOSVE-NEXT: stp q2, q3, [x0]
+; NONEON-NOSVE-NEXT: ret
%op = load <16 x half>, ptr %a
%res = call <16 x half> @llvm.roundeven.v16f16(<16 x half> %op)
store <16 x half> %res, ptr %a
@@ -717,6 +1150,11 @@ define <2 x float> @frintn_v2f32(<2 x float> %op) {
; CHECK-NEXT: frintn z0.s, p0/m, z0.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: frintn_v2f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: frintn v0.2s, v0.2s
+; NONEON-NOSVE-NEXT: ret
%res = call <2 x float> @llvm.roundeven.v2f32(<2 x float> %op)
ret <2 x float> %res
}
@@ -729,6 +1167,11 @@ define <4 x float> @frintn_v4f32(<4 x float> %op) {
; CHECK-NEXT: frintn z0.s, p0/m, z0.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: frintn_v4f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: frintn v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: ret
%res = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %op)
ret <4 x float> %res
}
@@ -742,6 +1185,14 @@ define void @frintn_v8f32(ptr %a) {
; CHECK-NEXT: frintn z1.s, p0/m, z1.s
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: frintn_v8f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: frintn v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: frintn v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op = load <8 x float>, ptr %a
%res = call <8 x float> @llvm.roundeven.v8f32(<8 x float> %op)
store <8 x float> %res, ptr %a
@@ -753,6 +1204,11 @@ define <1 x double> @frintn_v1f64(<1 x double> %op) {
; CHECK: // %bb.0:
; CHECK-NEXT: frintn d0, d0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: frintn_v1f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: frintn d0, d0
+; NONEON-NOSVE-NEXT: ret
%res = call <1 x double> @llvm.roundeven.v1f64(<1 x double> %op)
ret <1 x double> %res
}
@@ -765,6 +1221,11 @@ define <2 x double> @frintn_v2f64(<2 x double> %op) {
; CHECK-NEXT: frintn z0.d, p0/m, z0.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: frintn_v2f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: frintn v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: ret
%res = call <2 x double> @llvm.roundeven.v2f64(<2 x double> %op)
ret <2 x double> %res
}
@@ -778,6 +1239,14 @@ define void @frintn_v4f64(ptr %a) {
; CHECK-NEXT: frintn z1.d, p0/m, z1.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: frintn_v4f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: frintn v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: frintn v1.2d, v1.2d
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op = load <4 x double>, ptr %a
%res = call <4 x double> @llvm.roundeven.v4f64(<4 x double> %op)
store <4 x double> %res, ptr %a
@@ -796,6 +1265,13 @@ define <2 x half> @frintz_v2f16(<2 x half> %op) {
; CHECK-NEXT: frintz z0.h, p0/m, z0.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: frintz_v2f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h
+; NONEON-NOSVE-NEXT: frintz v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s
+; NONEON-NOSVE-NEXT: ret
%res = call <2 x half> @llvm.trunc.v2f16(<2 x half> %op)
ret <2 x half> %res
}
@@ -808,6 +1284,13 @@ define <4 x half> @frintz_v4f16(<4 x half> %op) {
; CHECK-NEXT: frintz z0.h, p0/m, z0.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: frintz_v4f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h
+; NONEON-NOSVE-NEXT: frintz v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s
+; NONEON-NOSVE-NEXT: ret
%res = call <4 x half> @llvm.trunc.v4f16(<4 x half> %op)
ret <4 x half> %res
}
@@ -820,6 +1303,16 @@ define <8 x half> @frintz_v8f16(<8 x half> %op) {
; CHECK-NEXT: frintz z0.h, p0/m, z0.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: frintz_v8f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcvtl v1.4s, v0.4h
+; NONEON-NOSVE-NEXT: fcvtl2 v2.4s, v0.8h
+; NONEON-NOSVE-NEXT: frintz v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: fcvtn v0.4h, v1.4s
+; NONEON-NOSVE-NEXT: frintz v1.4s, v2.4s
+; NONEON-NOSVE-NEXT: fcvtn2 v0.8h, v1.4s
+; NONEON-NOSVE-NEXT: ret
%res = call <8 x half> @llvm.trunc.v8f16(<8 x half> %op)
ret <8 x half> %res
}
@@ -833,6 +1326,24 @@ define void @frintz_v16f16(ptr %a) {
; CHECK-NEXT: frintz z1.h, p0/m, z1.h
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: frintz_v16f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: fcvtl v2.4s, v0.4h
+; NONEON-NOSVE-NEXT: fcvtl v3.4s, v1.4h
+; NONEON-NOSVE-NEXT: fcvtl2 v0.4s, v0.8h
+; NONEON-NOSVE-NEXT: fcvtl2 v1.4s, v1.8h
+; NONEON-NOSVE-NEXT: frintz v2.4s, v2.4s
+; NONEON-NOSVE-NEXT: frintz v3.4s, v3.4s
+; NONEON-NOSVE-NEXT: frintz v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: frintz v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: fcvtn v2.4h, v2.4s
+; NONEON-NOSVE-NEXT: fcvtn v3.4h, v3.4s
+; NONEON-NOSVE-NEXT: fcvtn2 v2.8h, v0.4s
+; NONEON-NOSVE-NEXT: fcvtn2 v3.8h, v1.4s
+; NONEON-NOSVE-NEXT: stp q2, q3, [x0]
+; NONEON-NOSVE-NEXT: ret
%op = load <16 x half>, ptr %a
%res = call <16 x half> @llvm.trunc.v16f16(<16 x half> %op)
store <16 x half> %res, ptr %a
@@ -847,6 +1358,11 @@ define <2 x float> @frintz_v2f32(<2 x float> %op) {
; CHECK-NEXT: frintz z0.s, p0/m, z0.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: frintz_v2f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: frintz v0.2s, v0.2s
+; NONEON-NOSVE-NEXT: ret
%res = call <2 x float> @llvm.trunc.v2f32(<2 x float> %op)
ret <2 x float> %res
}
@@ -859,6 +1375,11 @@ define <4 x float> @frintz_v4f32(<4 x float> %op) {
; CHECK-NEXT: frintz z0.s, p0/m, z0.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: frintz_v4f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: frintz v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: ret
%res = call <4 x float> @llvm.trunc.v4f32(<4 x float> %op)
ret <4 x float> %res
}
@@ -872,6 +1393,14 @@ define void @frintz_v8f32(ptr %a) {
; CHECK-NEXT: frintz z1.s, p0/m, z1.s
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: frintz_v8f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: frintz v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: frintz v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op = load <8 x float>, ptr %a
%res = call <8 x float> @llvm.trunc.v8f32(<8 x float> %op)
store <8 x float> %res, ptr %a
@@ -883,6 +1412,11 @@ define <1 x double> @frintz_v1f64(<1 x double> %op) {
; CHECK: // %bb.0:
; CHECK-NEXT: frintz d0, d0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: frintz_v1f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: frintz d0, d0
+; NONEON-NOSVE-NEXT: ret
%res = call <1 x double> @llvm.trunc.v1f64(<1 x double> %op)
ret <1 x double> %res
}
@@ -895,6 +1429,11 @@ define <2 x double> @frintz_v2f64(<2 x double> %op) {
; CHECK-NEXT: frintz z0.d, p0/m, z0.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: frintz_v2f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: frintz v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: ret
%res = call <2 x double> @llvm.trunc.v2f64(<2 x double> %op)
ret <2 x double> %res
}
@@ -908,6 +1447,14 @@ define void @frintz_v4f64(ptr %a) {
; CHECK-NEXT: frintz z1.d, p0/m, z1.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: frintz_v4f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: frintz v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: frintz v1.2d, v1.2d
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op = load <4 x double>, ptr %a
%res = call <4 x double> @llvm.trunc.v4f64(<4 x double> %op)
store <4 x double> %res, ptr %a
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-select.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-select.ll
index 89697cde848b5..824419b31a5a8 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-select.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-select.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
@@ -16,6 +17,14 @@ define <2 x half> @select_v2f16(<2 x half> %op1, <2 x half> %op2, i1 %mask) {
; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: select_v2f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: tst w0, #0x1
+; NONEON-NOSVE-NEXT: csetm w8, ne
+; NONEON-NOSVE-NEXT: dup v2.4h, w8
+; NONEON-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b
+; NONEON-NOSVE-NEXT: ret
%sel = select i1 %mask, <2 x half> %op1, <2 x half> %op2
ret <2 x half> %sel
}
@@ -32,6 +41,14 @@ define <4 x half> @select_v4f16(<4 x half> %op1, <4 x half> %op2, i1 %mask) {
; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: select_v4f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: tst w0, #0x1
+; NONEON-NOSVE-NEXT: csetm w8, ne
+; NONEON-NOSVE-NEXT: dup v2.4h, w8
+; NONEON-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b
+; NONEON-NOSVE-NEXT: ret
%sel = select i1 %mask, <4 x half> %op1, <4 x half> %op2
ret <4 x half> %sel
}
@@ -48,6 +65,14 @@ define <8 x half> @select_v8f16(<8 x half> %op1, <8 x half> %op2, i1 %mask) {
; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: select_v8f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: tst w0, #0x1
+; NONEON-NOSVE-NEXT: csetm w8, ne
+; NONEON-NOSVE-NEXT: dup v2.8h, w8
+; NONEON-NOSVE-NEXT: bif v0.16b, v1.16b, v2.16b
+; NONEON-NOSVE-NEXT: ret
%sel = select i1 %mask, <8 x half> %op1, <8 x half> %op2
ret <8 x half> %sel
}
@@ -67,6 +92,20 @@ define void @select_v16f16(ptr %a, ptr %b, i1 %mask) {
; CHECK-NEXT: sel z1.h, p0, z1.h, z3.h
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: select_v16f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: tst w2, #0x1
+; NONEON-NOSVE-NEXT: ldr q1, [x0]
+; NONEON-NOSVE-NEXT: ldr q2, [x0, #16]
+; NONEON-NOSVE-NEXT: csetm w8, ne
+; NONEON-NOSVE-NEXT: ldr q3, [x1]
+; NONEON-NOSVE-NEXT: ldr q4, [x1, #16]
+; NONEON-NOSVE-NEXT: dup v0.8h, w8
+; NONEON-NOSVE-NEXT: bif v1.16b, v3.16b, v0.16b
+; NONEON-NOSVE-NEXT: bsl v0.16b, v2.16b, v4.16b
+; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load volatile <16 x half>, ptr %a
%op2 = load volatile <16 x half>, ptr %b
%sel = select i1 %mask, <16 x half> %op1, <16 x half> %op2
@@ -86,6 +125,14 @@ define <2 x float> @select_v2f32(<2 x float> %op1, <2 x float> %op2, i1 %mask) {
; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: select_v2f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: tst w0, #0x1
+; NONEON-NOSVE-NEXT: csetm w8, ne
+; NONEON-NOSVE-NEXT: dup v2.2s, w8
+; NONEON-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b
+; NONEON-NOSVE-NEXT: ret
%sel = select i1 %mask, <2 x float> %op1, <2 x float> %op2
ret <2 x float> %sel
}
@@ -102,6 +149,14 @@ define <4 x float> @select_v4f32(<4 x float> %op1, <4 x float> %op2, i1 %mask) {
; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: select_v4f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: tst w0, #0x1
+; NONEON-NOSVE-NEXT: csetm w8, ne
+; NONEON-NOSVE-NEXT: dup v2.4s, w8
+; NONEON-NOSVE-NEXT: bif v0.16b, v1.16b, v2.16b
+; NONEON-NOSVE-NEXT: ret
%sel = select i1 %mask, <4 x float> %op1, <4 x float> %op2
ret <4 x float> %sel
}
@@ -121,6 +176,20 @@ define void @select_v8f32(ptr %a, ptr %b, i1 %mask) {
; CHECK-NEXT: sel z1.s, p0, z1.s, z3.s
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: select_v8f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: tst w2, #0x1
+; NONEON-NOSVE-NEXT: ldr q1, [x0]
+; NONEON-NOSVE-NEXT: ldr q2, [x0, #16]
+; NONEON-NOSVE-NEXT: csetm w8, ne
+; NONEON-NOSVE-NEXT: ldr q3, [x1]
+; NONEON-NOSVE-NEXT: ldr q4, [x1, #16]
+; NONEON-NOSVE-NEXT: dup v0.4s, w8
+; NONEON-NOSVE-NEXT: bif v1.16b, v3.16b, v0.16b
+; NONEON-NOSVE-NEXT: bsl v0.16b, v2.16b, v4.16b
+; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load volatile <8 x float>, ptr %a
%op2 = load volatile <8 x float>, ptr %b
%sel = select i1 %mask, <8 x float> %op1, <8 x float> %op2
@@ -134,6 +203,14 @@ define <1 x double> @select_v1f64(<1 x double> %op1, <1 x double> %op2, i1 %mask
; CHECK-NEXT: tst w0, #0x1
; CHECK-NEXT: fcsel d0, d0, d1, ne
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: select_v1f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: tst w0, #0x1
+; NONEON-NOSVE-NEXT: csetm x8, ne
+; NONEON-NOSVE-NEXT: fmov d2, x8
+; NONEON-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b
+; NONEON-NOSVE-NEXT: ret
%sel = select i1 %mask, <1 x double> %op1, <1 x double> %op2
ret <1 x double> %sel
}
@@ -151,6 +228,14 @@ define <2 x double> @select_v2f64(<2 x double> %op1, <2 x double> %op2, i1 %mask
; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: select_v2f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: tst w0, #0x1
+; NONEON-NOSVE-NEXT: csetm x8, ne
+; NONEON-NOSVE-NEXT: dup v2.2d, x8
+; NONEON-NOSVE-NEXT: bif v0.16b, v1.16b, v2.16b
+; NONEON-NOSVE-NEXT: ret
%sel = select i1 %mask, <2 x double> %op1, <2 x double> %op2
ret <2 x double> %sel
}
@@ -171,6 +256,20 @@ define void @select_v4f64(ptr %a, ptr %b, i1 %mask) {
; CHECK-NEXT: sel z1.d, p0, z1.d, z3.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: select_v4f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: tst w2, #0x1
+; NONEON-NOSVE-NEXT: ldr q1, [x0]
+; NONEON-NOSVE-NEXT: ldr q2, [x0, #16]
+; NONEON-NOSVE-NEXT: csetm x8, ne
+; NONEON-NOSVE-NEXT: ldr q3, [x1]
+; NONEON-NOSVE-NEXT: ldr q4, [x1, #16]
+; NONEON-NOSVE-NEXT: dup v0.2d, x8
+; NONEON-NOSVE-NEXT: bif v1.16b, v3.16b, v0.16b
+; NONEON-NOSVE-NEXT: bsl v0.16b, v2.16b, v4.16b
+; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load volatile <4 x double>, ptr %a
%op2 = load volatile <4 x double>, ptr %b
%sel = select i1 %mask, <4 x double> %op1, <4 x double> %op2
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll
index 5840ffb20994c..c853bdc5af8db 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
@@ -15,6 +16,13 @@ define <4 x i16> @fcvtzu_v4f16_v4i16(<4 x half> %op1) {
; CHECK-NEXT: fcvtzu z0.h, p0/m, z0.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvtzu_v4f16_v4i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h
+; NONEON-NOSVE-NEXT: fcvtzu v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: xtn v0.4h, v0.4s
+; NONEON-NOSVE-NEXT: ret
%res = fptoui <4 x half> %op1 to <4 x i16>
ret <4 x i16> %res
}
@@ -27,6 +35,21 @@ define void @fcvtzu_v8f16_v8i16(ptr %a, ptr %b) {
; CHECK-NEXT: fcvtzu z0.h, p0/m, z0.h
; CHECK-NEXT: str q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvtzu_v8f16_v8i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h
+; NONEON-NOSVE-NEXT: fcvtl v1.4s, v1.4h
+; NONEON-NOSVE-NEXT: fcvtzu v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: fcvtzu v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: uzp1 v0.8h, v0.8h, v1.8h
+; NONEON-NOSVE-NEXT: str q0, [x1]
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x half>, ptr %a
%res = fptoui <8 x half> %op1 to <8 x i16>
store <8 x i16> %res, ptr %b
@@ -42,6 +65,27 @@ define void @fcvtzu_v16f16_v16i16(ptr %a, ptr %b) {
; CHECK-NEXT: fcvtzu z1.h, p0/m, z1.h
; CHECK-NEXT: stp q0, q1, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvtzu_v16f16_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
+; NONEON-NOSVE-NEXT: ldr d2, [sp, #8]
+; NONEON-NOSVE-NEXT: ldr d3, [sp, #24]
+; NONEON-NOSVE-NEXT: fcvtl v1.4s, v1.4h
+; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h
+; NONEON-NOSVE-NEXT: fcvtl v2.4s, v2.4h
+; NONEON-NOSVE-NEXT: fcvtl v3.4s, v3.4h
+; NONEON-NOSVE-NEXT: fcvtzu v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: fcvtzu v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: fcvtzu v2.4s, v2.4s
+; NONEON-NOSVE-NEXT: fcvtzu v3.4s, v3.4s
+; NONEON-NOSVE-NEXT: uzp1 v1.8h, v1.8h, v2.8h
+; NONEON-NOSVE-NEXT: uzp1 v0.8h, v0.8h, v3.8h
+; NONEON-NOSVE-NEXT: stp q1, q0, [x1]
+; NONEON-NOSVE-NEXT: add sp, sp, #32
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x half>, ptr %a
%res = fptoui <16 x half> %op1 to <16 x i16>
store <16 x i16> %res, ptr %b
@@ -61,6 +105,13 @@ define <2 x i32> @fcvtzu_v2f16_v2i32(<2 x half> %op1) {
; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvtzu_v2f16_v2i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h
+; NONEON-NOSVE-NEXT: fcvtzu v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0
+; NONEON-NOSVE-NEXT: ret
%res = fptoui <2 x half> %op1 to <2 x i32>
ret <2 x i32> %res
}
@@ -74,6 +125,12 @@ define <4 x i32> @fcvtzu_v4f16_v4i32(<4 x half> %op1) {
; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvtzu_v4f16_v4i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h
+; NONEON-NOSVE-NEXT: fcvtzu v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: ret
%res = fptoui <4 x half> %op1 to <4 x i32>
ret <4 x i32> %res
}
@@ -90,6 +147,20 @@ define void @fcvtzu_v8f16_v8i32(ptr %a, ptr %b) {
; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.h
; CHECK-NEXT: stp q1, q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvtzu_v8f16_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h
+; NONEON-NOSVE-NEXT: fcvtl v1.4s, v1.4h
+; NONEON-NOSVE-NEXT: fcvtzu v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: fcvtzu v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: stp q0, q1, [x1]
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x half>, ptr %a
%res = fptoui <8 x half> %op1 to <8 x i32>
store <8 x i32> %res, ptr %b
@@ -114,6 +185,26 @@ define void @fcvtzu_v16f16_v16i32(ptr %a, ptr %b) {
; CHECK-NEXT: stp q2, q0, [x1, #32]
; CHECK-NEXT: stp q3, q1, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvtzu_v16f16_v16i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-32]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
+; NONEON-NOSVE-NEXT: ldr d2, [sp, #24]
+; NONEON-NOSVE-NEXT: ldr d3, [sp, #8]
+; NONEON-NOSVE-NEXT: fcvtl v1.4s, v1.4h
+; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h
+; NONEON-NOSVE-NEXT: fcvtl v2.4s, v2.4h
+; NONEON-NOSVE-NEXT: fcvtl v3.4s, v3.4h
+; NONEON-NOSVE-NEXT: fcvtzu v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: fcvtzu v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: fcvtzu v2.4s, v2.4s
+; NONEON-NOSVE-NEXT: fcvtzu v3.4s, v3.4s
+; NONEON-NOSVE-NEXT: stp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: stp q1, q2, [x1, #32]
+; NONEON-NOSVE-NEXT: add sp, sp, #32
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x half>, ptr %a
%res = fptoui <16 x half> %op1 to <16 x i32>
store <16 x i32> %res, ptr %b
@@ -130,6 +221,13 @@ define <1 x i64> @fcvtzu_v1f16_v1i64(<1 x half> %op1) {
; CHECK-NEXT: fcvtzu x8, h0
; CHECK-NEXT: fmov d0, x8
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvtzu_v1f16_v1i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvtzu x8, s0
+; NONEON-NOSVE-NEXT: fmov d0, x8
+; NONEON-NOSVE-NEXT: ret
%res = fptoui <1 x half> %op1 to <1 x i64>
ret <1 x i64> %res
}
@@ -145,6 +243,18 @@ define <2 x i64> @fcvtzu_v2f16_v2i64(<2 x half> %op1) {
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: ldr q0, [sp], #16
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvtzu_v2f16_v2i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; NONEON-NOSVE-NEXT: mov h1, v0.h[1]
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fcvtzu x8, s0
+; NONEON-NOSVE-NEXT: fcvtzu x9, s1
+; NONEON-NOSVE-NEXT: fmov d0, x8
+; NONEON-NOSVE-NEXT: mov v0.d[1], x9
+; NONEON-NOSVE-NEXT: ret
%res = fptoui <2 x half> %op1 to <2 x i64>
ret <2 x i64> %res
}
@@ -167,6 +277,27 @@ define void @fcvtzu_v4f16_v4i64(ptr %a, ptr %b) {
; CHECK-NEXT: stp q1, q0, [x1]
; CHECK-NEXT: add sp, sp, #32
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvtzu_v4f16_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr d0, [x0]
+; NONEON-NOSVE-NEXT: mov h1, v0.h[2]
+; NONEON-NOSVE-NEXT: mov h2, v0.h[3]
+; NONEON-NOSVE-NEXT: mov h3, v0.h[1]
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvtzu x9, s0
+; NONEON-NOSVE-NEXT: fcvtzu x8, s1
+; NONEON-NOSVE-NEXT: fcvtzu x10, s2
+; NONEON-NOSVE-NEXT: fcvtzu x11, s3
+; NONEON-NOSVE-NEXT: fmov d1, x9
+; NONEON-NOSVE-NEXT: fmov d0, x8
+; NONEON-NOSVE-NEXT: mov v1.d[1], x11
+; NONEON-NOSVE-NEXT: mov v0.d[1], x10
+; NONEON-NOSVE-NEXT: stp q1, q0, [x1]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x half>, ptr %a
%res = fptoui <4 x half> %op1 to <4 x i64>
store <4 x i64> %res, ptr %b
@@ -204,6 +335,47 @@ define void @fcvtzu_v8f16_v8i64(ptr %a, ptr %b) {
; CHECK-NEXT: stp q1, q0, [x1, #32]
; CHECK-NEXT: add sp, sp, #64
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvtzu_v8f16_v8i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d2, [sp, #8]
+; NONEON-NOSVE-NEXT: mov h1, v0.h[2]
+; NONEON-NOSVE-NEXT: mov h3, v0.h[3]
+; NONEON-NOSVE-NEXT: mov h4, v0.h[1]
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: mov h5, v2.h[2]
+; NONEON-NOSVE-NEXT: mov h6, v2.h[3]
+; NONEON-NOSVE-NEXT: mov h7, v2.h[1]
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvtzu x9, s0
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: fcvtzu x13, s2
+; NONEON-NOSVE-NEXT: fcvtzu x8, s1
+; NONEON-NOSVE-NEXT: fcvt s1, h7
+; NONEON-NOSVE-NEXT: fcvtzu x10, s3
+; NONEON-NOSVE-NEXT: fcvtzu x11, s4
+; NONEON-NOSVE-NEXT: fcvtzu x12, s5
+; NONEON-NOSVE-NEXT: fcvtzu x14, s6
+; NONEON-NOSVE-NEXT: fmov d3, x13
+; NONEON-NOSVE-NEXT: fmov d0, x8
+; NONEON-NOSVE-NEXT: fcvtzu x8, s1
+; NONEON-NOSVE-NEXT: fmov d1, x9
+; NONEON-NOSVE-NEXT: fmov d2, x12
+; NONEON-NOSVE-NEXT: mov v0.d[1], x10
+; NONEON-NOSVE-NEXT: mov v1.d[1], x11
+; NONEON-NOSVE-NEXT: mov v3.d[1], x8
+; NONEON-NOSVE-NEXT: mov v2.d[1], x14
+; NONEON-NOSVE-NEXT: stp q1, q0, [x1]
+; NONEON-NOSVE-NEXT: stp q3, q2, [x1, #32]
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x half>, ptr %a
%res = fptoui <8 x half> %op1 to <8 x i64>
store <8 x i64> %res, ptr %b
@@ -264,6 +436,80 @@ define void @fcvtzu_v16f16_v16i64(ptr %a, ptr %b) {
; CHECK-NEXT: stp q5, q2, [x1, #96]
; CHECK-NEXT: add sp, sp, #128
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvtzu_v16f16_v16i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
+; NONEON-NOSVE-NEXT: mov h2, v1.h[2]
+; NONEON-NOSVE-NEXT: fcvt s3, h1
+; NONEON-NOSVE-NEXT: ldr d4, [sp, #24]
+; NONEON-NOSVE-NEXT: mov h5, v1.h[3]
+; NONEON-NOSVE-NEXT: mov h7, v0.h[2]
+; NONEON-NOSVE-NEXT: mov h16, v0.h[3]
+; NONEON-NOSVE-NEXT: fcvt s6, h0
+; NONEON-NOSVE-NEXT: mov h0, v0.h[1]
+; NONEON-NOSVE-NEXT: mov h1, v1.h[1]
+; NONEON-NOSVE-NEXT: fcvt s17, h4
+; NONEON-NOSVE-NEXT: mov h18, v4.h[2]
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvtzu x8, s3
+; NONEON-NOSVE-NEXT: fcvt s3, h5
+; NONEON-NOSVE-NEXT: fcvt s5, h7
+; NONEON-NOSVE-NEXT: fcvt s7, h16
+; NONEON-NOSVE-NEXT: mov h16, v4.h[3]
+; NONEON-NOSVE-NEXT: fcvtzu x9, s6
+; NONEON-NOSVE-NEXT: ldr d6, [sp, #8]
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: mov h4, v4.h[1]
+; NONEON-NOSVE-NEXT: fcvtzu x11, s2
+; NONEON-NOSVE-NEXT: mov h2, v6.h[2]
+; NONEON-NOSVE-NEXT: fcvtzu x10, s17
+; NONEON-NOSVE-NEXT: fcvtzu x13, s5
+; NONEON-NOSVE-NEXT: fcvtzu x12, s3
+; NONEON-NOSVE-NEXT: mov h3, v6.h[3]
+; NONEON-NOSVE-NEXT: fcvt s16, h16
+; NONEON-NOSVE-NEXT: mov h5, v6.h[1]
+; NONEON-NOSVE-NEXT: fcvt s17, h18
+; NONEON-NOSVE-NEXT: fcvtzu x14, s7
+; NONEON-NOSVE-NEXT: fmov d7, x8
+; NONEON-NOSVE-NEXT: fcvtzu x8, s0
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: fmov d0, x11
+; NONEON-NOSVE-NEXT: fcvtzu x11, s1
+; NONEON-NOSVE-NEXT: fmov d1, x13
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvtzu x13, s16
+; NONEON-NOSVE-NEXT: fmov d16, x9
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcvtzu x15, s17
+; NONEON-NOSVE-NEXT: mov v0.d[1], x12
+; NONEON-NOSVE-NEXT: mov v1.d[1], x14
+; NONEON-NOSVE-NEXT: fcvtzu x9, s2
+; NONEON-NOSVE-NEXT: mov v16.d[1], x8
+; NONEON-NOSVE-NEXT: fcvtzu x8, s6
+; NONEON-NOSVE-NEXT: fcvtzu x14, s4
+; NONEON-NOSVE-NEXT: fcvtzu x12, s3
+; NONEON-NOSVE-NEXT: mov v7.d[1], x11
+; NONEON-NOSVE-NEXT: fmov d3, x10
+; NONEON-NOSVE-NEXT: fcvtzu x11, s5
+; NONEON-NOSVE-NEXT: fmov d2, x15
+; NONEON-NOSVE-NEXT: stp q16, q1, [x1, #64]
+; NONEON-NOSVE-NEXT: fmov d1, x9
+; NONEON-NOSVE-NEXT: fmov d4, x8
+; NONEON-NOSVE-NEXT: stp q7, q0, [x1]
+; NONEON-NOSVE-NEXT: mov v2.d[1], x13
+; NONEON-NOSVE-NEXT: mov v3.d[1], x14
+; NONEON-NOSVE-NEXT: mov v1.d[1], x12
+; NONEON-NOSVE-NEXT: mov v4.d[1], x11
+; NONEON-NOSVE-NEXT: stp q3, q2, [x1, #96]
+; NONEON-NOSVE-NEXT: stp q4, q1, [x1, #32]
+; NONEON-NOSVE-NEXT: add sp, sp, #32
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x half>, ptr %a
%res = fptoui <16 x half> %op1 to <16 x i64>
store <16 x i64> %res, ptr %b
@@ -282,6 +528,11 @@ define <2 x i16> @fcvtzu_v2f32_v2i16(<2 x float> %op1) {
; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvtzu_v2f32_v2i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcvtzs v0.2s, v0.2s
+; NONEON-NOSVE-NEXT: ret
%res = fptoui <2 x float> %op1 to <2 x i16>
ret <2 x i16> %res
}
@@ -295,6 +546,12 @@ define <4 x i16> @fcvtzu_v4f32_v4i16(<4 x float> %op1) {
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvtzu_v4f32_v4i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcvtzu v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: xtn v0.4h, v0.4s
+; NONEON-NOSVE-NEXT: ret
%res = fptoui <4 x float> %op1 to <4 x i16>
ret <4 x i16> %res
}
@@ -312,6 +569,14 @@ define <8 x i16> @fcvtzu_v8f32_v8i16(ptr %a) {
; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvtzu_v8f32_v8i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: fcvtzu v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: fcvtzu v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: uzp1 v0.8h, v0.8h, v1.8h
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x float>, ptr %a
%res = fptoui <8 x float> %op1 to <8 x i16>
ret <8 x i16> %res
@@ -336,6 +601,19 @@ define void @fcvtzu_v16f32_v16i16(ptr %a, ptr %b) {
; CHECK-NEXT: splice z2.h, p0, z2.h, z3.h
; CHECK-NEXT: stp q2, q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvtzu_v16f32_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0, #32]
+; NONEON-NOSVE-NEXT: ldp q2, q3, [x0]
+; NONEON-NOSVE-NEXT: fcvtzu v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: fcvtzu v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: fcvtzu v3.4s, v3.4s
+; NONEON-NOSVE-NEXT: fcvtzu v2.4s, v2.4s
+; NONEON-NOSVE-NEXT: uzp1 v0.8h, v0.8h, v1.8h
+; NONEON-NOSVE-NEXT: uzp1 v1.8h, v2.8h, v3.8h
+; NONEON-NOSVE-NEXT: stp q1, q0, [x1]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x float>, ptr %a
%res = fptoui <16 x float> %op1 to <16 x i16>
store <16 x i16> %res, ptr %b
@@ -354,6 +632,11 @@ define <2 x i32> @fcvtzu_v2f32_v2i32(<2 x float> %op1) {
; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvtzu_v2f32_v2i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcvtzu v0.2s, v0.2s
+; NONEON-NOSVE-NEXT: ret
%res = fptoui <2 x float> %op1 to <2 x i32>
ret <2 x i32> %res
}
@@ -366,6 +649,11 @@ define <4 x i32> @fcvtzu_v4f32_v4i32(<4 x float> %op1) {
; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvtzu_v4f32_v4i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcvtzu v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: ret
%res = fptoui <4 x float> %op1 to <4 x i32>
ret <4 x i32> %res
}
@@ -379,6 +667,14 @@ define void @fcvtzu_v8f32_v8i32(ptr %a, ptr %b) {
; CHECK-NEXT: fcvtzu z1.s, p0/m, z1.s
; CHECK-NEXT: stp q0, q1, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvtzu_v8f32_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: fcvtzu v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: fcvtzu v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: stp q0, q1, [x1]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x float>, ptr %a
%res = fptoui <8 x float> %op1 to <8 x i32>
store <8 x i32> %res, ptr %b
@@ -398,6 +694,13 @@ define <1 x i64> @fcvtzu_v1f32_v1i64(<1 x float> %op1) {
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvtzu_v1f32_v1i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcvtl v0.2d, v0.2s
+; NONEON-NOSVE-NEXT: fcvtzu v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0
+; NONEON-NOSVE-NEXT: ret
%res = fptoui <1 x float> %op1 to <1 x i64>
ret <1 x i64> %res
}
@@ -411,6 +714,12 @@ define <2 x i64> @fcvtzu_v2f32_v2i64(<2 x float> %op1) {
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvtzu_v2f32_v2i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcvtl v0.2d, v0.2s
+; NONEON-NOSVE-NEXT: fcvtzu v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: ret
%res = fptoui <2 x float> %op1 to <2 x i64>
ret <2 x i64> %res
}
@@ -427,6 +736,20 @@ define void @fcvtzu_v4f32_v4i64(ptr %a, ptr %b) {
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.s
; CHECK-NEXT: stp q1, q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvtzu_v4f32_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: fcvtl v0.2d, v0.2s
+; NONEON-NOSVE-NEXT: fcvtl v1.2d, v1.2s
+; NONEON-NOSVE-NEXT: fcvtzu v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: fcvtzu v1.2d, v1.2d
+; NONEON-NOSVE-NEXT: stp q0, q1, [x1]
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x float>, ptr %a
%res = fptoui <4 x float> %op1 to <4 x i64>
store <4 x i64> %res, ptr %b
@@ -451,6 +774,26 @@ define void @fcvtzu_v8f32_v8i64(ptr %a, ptr %b) {
; CHECK-NEXT: stp q2, q0, [x1, #32]
; CHECK-NEXT: stp q3, q1, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvtzu_v8f32_v8i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-32]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
+; NONEON-NOSVE-NEXT: ldr d2, [sp, #24]
+; NONEON-NOSVE-NEXT: ldr d3, [sp, #8]
+; NONEON-NOSVE-NEXT: fcvtl v1.2d, v1.2s
+; NONEON-NOSVE-NEXT: fcvtl v0.2d, v0.2s
+; NONEON-NOSVE-NEXT: fcvtl v2.2d, v2.2s
+; NONEON-NOSVE-NEXT: fcvtl v3.2d, v3.2s
+; NONEON-NOSVE-NEXT: fcvtzu v1.2d, v1.2d
+; NONEON-NOSVE-NEXT: fcvtzu v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: fcvtzu v2.2d, v2.2d
+; NONEON-NOSVE-NEXT: fcvtzu v3.2d, v3.2d
+; NONEON-NOSVE-NEXT: stp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: stp q1, q2, [x1, #32]
+; NONEON-NOSVE-NEXT: add sp, sp, #32
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x float>, ptr %a
%res = fptoui <8 x float> %op1 to <8 x i64>
store <8 x i64> %res, ptr %b
@@ -468,6 +811,12 @@ define <1 x i16> @fcvtzu_v1f64_v1i16(<1 x double> %op1) {
; CHECK-NEXT: mov z0.h, w8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvtzu_v1f64_v1i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcvtzs w8, d0
+; NONEON-NOSVE-NEXT: fmov s0, w8
+; NONEON-NOSVE-NEXT: ret
%res = fptoui <1 x double> %op1 to <1 x i16>
ret <1 x i16> %res
}
@@ -481,6 +830,12 @@ define <2 x i16> @fcvtzu_v2f64_v2i16(<2 x double> %op1) {
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvtzu_v2f64_v2i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcvtzs v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: xtn v0.2s, v0.2d
+; NONEON-NOSVE-NEXT: ret
%res = fptoui <2 x double> %op1 to <2 x i16>
ret <2 x i16> %res
}
@@ -509,6 +864,15 @@ define <4 x i16> @fcvtzu_v4f64_v4i16(ptr %a) {
; CHECK-NEXT: ldr d0, [sp, #8]
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvtzu_v4f64_v4i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: fcvtzs v1.2d, v1.2d
+; NONEON-NOSVE-NEXT: fcvtzs v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: uzp1 v0.4s, v0.4s, v1.4s
+; NONEON-NOSVE-NEXT: xtn v0.4h, v0.4s
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x double>, ptr %a
%res = fptoui <4 x double> %op1 to <4 x i16>
ret <4 x i16> %res
@@ -552,6 +916,23 @@ define <8 x i16> @fcvtzu_v8f64_v8i16(ptr %a) {
; CHECK-NEXT: strh w8, [sp, #2]
; CHECK-NEXT: ldr q0, [sp], #16
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvtzu_v8f64_v8i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #32]
+; NONEON-NOSVE-NEXT: adrp x8, .LCPI26_0
+; NONEON-NOSVE-NEXT: ldp q3, q2, [x0]
+; NONEON-NOSVE-NEXT: fcvtzs v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: fcvtzs v1.2d, v1.2d
+; NONEON-NOSVE-NEXT: fcvtzs v2.2d, v2.2d
+; NONEON-NOSVE-NEXT: fcvtzs v3.2d, v3.2d
+; NONEON-NOSVE-NEXT: xtn v7.2s, v0.2d
+; NONEON-NOSVE-NEXT: ldr q0, [x8, :lo12:.LCPI26_0]
+; NONEON-NOSVE-NEXT: xtn v6.2s, v1.2d
+; NONEON-NOSVE-NEXT: xtn v5.2s, v2.2d
+; NONEON-NOSVE-NEXT: xtn v4.2s, v3.2d
+; NONEON-NOSVE-NEXT: tbl v0.16b, { v4.16b, v5.16b, v6.16b, v7.16b }, v0.16b
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x double>, ptr %a
%res = fptoui <8 x double> %op1 to <8 x i16>
ret <8 x i16> %res
@@ -628,6 +1009,35 @@ define void @fcvtzu_v16f64_v16i16(ptr %a, ptr %b) {
; CHECK-NEXT: stp q1, q0, [x1]
; CHECK-NEXT: add sp, sp, #32
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvtzu_v16f64_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #96]
+; NONEON-NOSVE-NEXT: adrp x8, .LCPI27_0
+; NONEON-NOSVE-NEXT: ldp q2, q3, [x0, #32]
+; NONEON-NOSVE-NEXT: ldp q4, q5, [x0, #64]
+; NONEON-NOSVE-NEXT: ldp q7, q6, [x0]
+; NONEON-NOSVE-NEXT: fcvtzs v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: fcvtzs v3.2d, v3.2d
+; NONEON-NOSVE-NEXT: fcvtzs v1.2d, v1.2d
+; NONEON-NOSVE-NEXT: fcvtzs v2.2d, v2.2d
+; NONEON-NOSVE-NEXT: fcvtzs v5.2d, v5.2d
+; NONEON-NOSVE-NEXT: fcvtzs v4.2d, v4.2d
+; NONEON-NOSVE-NEXT: fcvtzs v6.2d, v6.2d
+; NONEON-NOSVE-NEXT: fcvtzs v7.2d, v7.2d
+; NONEON-NOSVE-NEXT: xtn v19.2s, v0.2d
+; NONEON-NOSVE-NEXT: ldr q0, [x8, :lo12:.LCPI27_0]
+; NONEON-NOSVE-NEXT: xtn v23.2s, v3.2d
+; NONEON-NOSVE-NEXT: xtn v18.2s, v1.2d
+; NONEON-NOSVE-NEXT: xtn v22.2s, v2.2d
+; NONEON-NOSVE-NEXT: xtn v17.2s, v5.2d
+; NONEON-NOSVE-NEXT: xtn v21.2s, v6.2d
+; NONEON-NOSVE-NEXT: xtn v16.2s, v4.2d
+; NONEON-NOSVE-NEXT: xtn v20.2s, v7.2d
+; NONEON-NOSVE-NEXT: tbl v1.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v0.16b
+; NONEON-NOSVE-NEXT: tbl v0.16b, { v20.16b, v21.16b, v22.16b, v23.16b }, v0.16b
+; NONEON-NOSVE-NEXT: stp q0, q1, [x1]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x double>, ptr %a
%res = fptoui <16 x double> %op1 to <16 x i16>
store <16 x i16> %res, ptr %b
@@ -647,6 +1057,13 @@ define <1 x i32> @fcvtzu_v1f64_v1i32(<1 x double> %op1) {
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvtzu_v1f64_v1i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; NONEON-NOSVE-NEXT: fcvtzu v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: xtn v0.2s, v0.2d
+; NONEON-NOSVE-NEXT: ret
%res = fptoui <1 x double> %op1 to <1 x i32>
ret <1 x i32> %res
}
@@ -660,6 +1077,12 @@ define <2 x i32> @fcvtzu_v2f64_v2i32(<2 x double> %op1) {
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvtzu_v2f64_v2i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcvtzu v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: xtn v0.2s, v0.2d
+; NONEON-NOSVE-NEXT: ret
%res = fptoui <2 x double> %op1 to <2 x i32>
ret <2 x i32> %res
}
@@ -677,6 +1100,14 @@ define <4 x i32> @fcvtzu_v4f64_v4i32(ptr %a) {
; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvtzu_v4f64_v4i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: fcvtzu v1.2d, v1.2d
+; NONEON-NOSVE-NEXT: fcvtzu v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: uzp1 v0.4s, v0.4s, v1.4s
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x double>, ptr %a
%res = fptoui <4 x double> %op1 to <4 x i32>
ret <4 x i32> %res
@@ -701,6 +1132,19 @@ define void @fcvtzu_v8f64_v8i32(ptr %a, ptr %b) {
; CHECK-NEXT: splice z2.s, p0, z2.s, z3.s
; CHECK-NEXT: stp q2, q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvtzu_v8f64_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0, #32]
+; NONEON-NOSVE-NEXT: ldp q2, q3, [x0]
+; NONEON-NOSVE-NEXT: fcvtzu v1.2d, v1.2d
+; NONEON-NOSVE-NEXT: fcvtzu v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: fcvtzu v3.2d, v3.2d
+; NONEON-NOSVE-NEXT: fcvtzu v2.2d, v2.2d
+; NONEON-NOSVE-NEXT: uzp1 v0.4s, v0.4s, v1.4s
+; NONEON-NOSVE-NEXT: uzp1 v1.4s, v2.4s, v3.4s
+; NONEON-NOSVE-NEXT: stp q1, q0, [x1]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x double>, ptr %a
%res = fptoui <8 x double> %op1 to <8 x i32>
store <8 x i32> %res, ptr %b
@@ -719,6 +1163,12 @@ define <1 x i64> @fcvtzu_v1f64_v1i64(<1 x double> %op1) {
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvtzu_v1f64_v1i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcvtzu x8, d0
+; NONEON-NOSVE-NEXT: fmov d0, x8
+; NONEON-NOSVE-NEXT: ret
%res = fptoui <1 x double> %op1 to <1 x i64>
ret <1 x i64> %res
}
@@ -731,6 +1181,11 @@ define <2 x i64> @fcvtzu_v2f64_v2i64(<2 x double> %op1) {
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvtzu_v2f64_v2i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcvtzu v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: ret
%res = fptoui <2 x double> %op1 to <2 x i64>
ret <2 x i64> %res
}
@@ -744,6 +1199,14 @@ define void @fcvtzu_v4f64_v4i64(ptr %a, ptr %b) {
; CHECK-NEXT: fcvtzu z1.d, p0/m, z1.d
; CHECK-NEXT: stp q0, q1, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvtzu_v4f64_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: fcvtzu v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: fcvtzu v1.2d, v1.2d
+; NONEON-NOSVE-NEXT: stp q0, q1, [x1]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x double>, ptr %a
%res = fptoui <4 x double> %op1 to <4 x i64>
store <4 x i64> %res, ptr %b
@@ -762,6 +1225,13 @@ define <4 x i16> @fcvtzs_v4f16_v4i16(<4 x half> %op1) {
; CHECK-NEXT: fcvtzs z0.h, p0/m, z0.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvtzs_v4f16_v4i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h
+; NONEON-NOSVE-NEXT: fcvtzs v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: xtn v0.4h, v0.4s
+; NONEON-NOSVE-NEXT: ret
%res = fptosi <4 x half> %op1 to <4 x i16>
ret <4 x i16> %res
}
@@ -774,6 +1244,21 @@ define void @fcvtzs_v8f16_v8i16(ptr %a, ptr %b) {
; CHECK-NEXT: fcvtzs z0.h, p0/m, z0.h
; CHECK-NEXT: str q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvtzs_v8f16_v8i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h
+; NONEON-NOSVE-NEXT: fcvtl v1.4s, v1.4h
+; NONEON-NOSVE-NEXT: fcvtzs v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: fcvtzs v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: uzp1 v0.8h, v0.8h, v1.8h
+; NONEON-NOSVE-NEXT: str q0, [x1]
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x half>, ptr %a
%res = fptosi <8 x half> %op1 to <8 x i16>
store <8 x i16> %res, ptr %b
@@ -789,6 +1274,27 @@ define void @fcvtzs_v16f16_v16i16(ptr %a, ptr %b) {
; CHECK-NEXT: fcvtzs z1.h, p0/m, z1.h
; CHECK-NEXT: stp q0, q1, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvtzs_v16f16_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
+; NONEON-NOSVE-NEXT: ldr d2, [sp, #8]
+; NONEON-NOSVE-NEXT: ldr d3, [sp, #24]
+; NONEON-NOSVE-NEXT: fcvtl v1.4s, v1.4h
+; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h
+; NONEON-NOSVE-NEXT: fcvtl v2.4s, v2.4h
+; NONEON-NOSVE-NEXT: fcvtl v3.4s, v3.4h
+; NONEON-NOSVE-NEXT: fcvtzs v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: fcvtzs v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: fcvtzs v2.4s, v2.4s
+; NONEON-NOSVE-NEXT: fcvtzs v3.4s, v3.4s
+; NONEON-NOSVE-NEXT: uzp1 v1.8h, v1.8h, v2.8h
+; NONEON-NOSVE-NEXT: uzp1 v0.8h, v0.8h, v3.8h
+; NONEON-NOSVE-NEXT: stp q1, q0, [x1]
+; NONEON-NOSVE-NEXT: add sp, sp, #32
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x half>, ptr %a
%res = fptosi <16 x half> %op1 to <16 x i16>
store <16 x i16> %res, ptr %b
@@ -808,6 +1314,13 @@ define <2 x i32> @fcvtzs_v2f16_v2i32(<2 x half> %op1) {
; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvtzs_v2f16_v2i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h
+; NONEON-NOSVE-NEXT: fcvtzs v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0
+; NONEON-NOSVE-NEXT: ret
%res = fptosi <2 x half> %op1 to <2 x i32>
ret <2 x i32> %res
}
@@ -821,6 +1334,12 @@ define <4 x i32> @fcvtzs_v4f16_v4i32(<4 x half> %op1) {
; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvtzs_v4f16_v4i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h
+; NONEON-NOSVE-NEXT: fcvtzs v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: ret
%res = fptosi <4 x half> %op1 to <4 x i32>
ret <4 x i32> %res
}
@@ -837,6 +1356,20 @@ define void @fcvtzs_v8f16_v8i32(ptr %a, ptr %b) {
; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.h
; CHECK-NEXT: stp q1, q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvtzs_v8f16_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h
+; NONEON-NOSVE-NEXT: fcvtl v1.4s, v1.4h
+; NONEON-NOSVE-NEXT: fcvtzs v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: fcvtzs v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: stp q0, q1, [x1]
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x half>, ptr %a
%res = fptosi <8 x half> %op1 to <8 x i32>
store <8 x i32> %res, ptr %b
@@ -861,6 +1394,26 @@ define void @fcvtzs_v16f16_v16i32(ptr %a, ptr %b) {
; CHECK-NEXT: stp q2, q0, [x1, #32]
; CHECK-NEXT: stp q3, q1, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvtzs_v16f16_v16i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-32]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
+; NONEON-NOSVE-NEXT: ldr d2, [sp, #24]
+; NONEON-NOSVE-NEXT: ldr d3, [sp, #8]
+; NONEON-NOSVE-NEXT: fcvtl v1.4s, v1.4h
+; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h
+; NONEON-NOSVE-NEXT: fcvtl v2.4s, v2.4h
+; NONEON-NOSVE-NEXT: fcvtl v3.4s, v3.4h
+; NONEON-NOSVE-NEXT: fcvtzs v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: fcvtzs v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: fcvtzs v2.4s, v2.4s
+; NONEON-NOSVE-NEXT: fcvtzs v3.4s, v3.4s
+; NONEON-NOSVE-NEXT: stp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: stp q1, q2, [x1, #32]
+; NONEON-NOSVE-NEXT: add sp, sp, #32
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x half>, ptr %a
%res = fptosi <16 x half> %op1 to <16 x i32>
store <16 x i32> %res, ptr %b
@@ -877,6 +1430,13 @@ define <1 x i64> @fcvtzs_v1f16_v1i64(<1 x half> %op1) {
; CHECK-NEXT: fcvtzs x8, h0
; CHECK-NEXT: fmov d0, x8
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvtzs_v1f16_v1i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvtzs x8, s0
+; NONEON-NOSVE-NEXT: fmov d0, x8
+; NONEON-NOSVE-NEXT: ret
%res = fptosi <1 x half> %op1 to <1 x i64>
ret <1 x i64> %res
}
@@ -893,6 +1453,18 @@ define <2 x i64> @fcvtzs_v2f16_v2i64(<2 x half> %op1) {
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: ldr q0, [sp], #16
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvtzs_v2f16_v2i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; NONEON-NOSVE-NEXT: mov h1, v0.h[1]
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fcvtzs x8, s0
+; NONEON-NOSVE-NEXT: fcvtzs x9, s1
+; NONEON-NOSVE-NEXT: fmov d0, x8
+; NONEON-NOSVE-NEXT: mov v0.d[1], x9
+; NONEON-NOSVE-NEXT: ret
%res = fptosi <2 x half> %op1 to <2 x i64>
ret <2 x i64> %res
}
@@ -915,6 +1487,27 @@ define void @fcvtzs_v4f16_v4i64(ptr %a, ptr %b) {
; CHECK-NEXT: stp q1, q0, [x1]
; CHECK-NEXT: add sp, sp, #32
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvtzs_v4f16_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr d0, [x0]
+; NONEON-NOSVE-NEXT: mov h1, v0.h[2]
+; NONEON-NOSVE-NEXT: mov h2, v0.h[3]
+; NONEON-NOSVE-NEXT: mov h3, v0.h[1]
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvtzs x9, s0
+; NONEON-NOSVE-NEXT: fcvtzs x8, s1
+; NONEON-NOSVE-NEXT: fcvtzs x10, s2
+; NONEON-NOSVE-NEXT: fcvtzs x11, s3
+; NONEON-NOSVE-NEXT: fmov d1, x9
+; NONEON-NOSVE-NEXT: fmov d0, x8
+; NONEON-NOSVE-NEXT: mov v1.d[1], x11
+; NONEON-NOSVE-NEXT: mov v0.d[1], x10
+; NONEON-NOSVE-NEXT: stp q1, q0, [x1]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x half>, ptr %a
%res = fptosi <4 x half> %op1 to <4 x i64>
store <4 x i64> %res, ptr %b
@@ -952,6 +1545,47 @@ define void @fcvtzs_v8f16_v8i64(ptr %a, ptr %b) {
; CHECK-NEXT: stp q1, q0, [x1, #32]
; CHECK-NEXT: add sp, sp, #64
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvtzs_v8f16_v8i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d2, [sp, #8]
+; NONEON-NOSVE-NEXT: mov h1, v0.h[2]
+; NONEON-NOSVE-NEXT: mov h3, v0.h[3]
+; NONEON-NOSVE-NEXT: mov h4, v0.h[1]
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: mov h5, v2.h[2]
+; NONEON-NOSVE-NEXT: mov h6, v2.h[3]
+; NONEON-NOSVE-NEXT: mov h7, v2.h[1]
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvtzs x9, s0
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: fcvtzs x13, s2
+; NONEON-NOSVE-NEXT: fcvtzs x8, s1
+; NONEON-NOSVE-NEXT: fcvt s1, h7
+; NONEON-NOSVE-NEXT: fcvtzs x10, s3
+; NONEON-NOSVE-NEXT: fcvtzs x11, s4
+; NONEON-NOSVE-NEXT: fcvtzs x12, s5
+; NONEON-NOSVE-NEXT: fcvtzs x14, s6
+; NONEON-NOSVE-NEXT: fmov d3, x13
+; NONEON-NOSVE-NEXT: fmov d0, x8
+; NONEON-NOSVE-NEXT: fcvtzs x8, s1
+; NONEON-NOSVE-NEXT: fmov d1, x9
+; NONEON-NOSVE-NEXT: fmov d2, x12
+; NONEON-NOSVE-NEXT: mov v0.d[1], x10
+; NONEON-NOSVE-NEXT: mov v1.d[1], x11
+; NONEON-NOSVE-NEXT: mov v3.d[1], x8
+; NONEON-NOSVE-NEXT: mov v2.d[1], x14
+; NONEON-NOSVE-NEXT: stp q1, q0, [x1]
+; NONEON-NOSVE-NEXT: stp q3, q2, [x1, #32]
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x half>, ptr %a
%res = fptosi <8 x half> %op1 to <8 x i64>
store <8 x i64> %res, ptr %b
@@ -1012,6 +1646,80 @@ define void @fcvtzs_v16f16_v16i64(ptr %a, ptr %b) {
; CHECK-NEXT: stp q5, q2, [x1, #96]
; CHECK-NEXT: add sp, sp, #128
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvtzs_v16f16_v16i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
+; NONEON-NOSVE-NEXT: mov h2, v1.h[2]
+; NONEON-NOSVE-NEXT: fcvt s3, h1
+; NONEON-NOSVE-NEXT: ldr d4, [sp, #24]
+; NONEON-NOSVE-NEXT: mov h5, v1.h[3]
+; NONEON-NOSVE-NEXT: mov h7, v0.h[2]
+; NONEON-NOSVE-NEXT: mov h16, v0.h[3]
+; NONEON-NOSVE-NEXT: fcvt s6, h0
+; NONEON-NOSVE-NEXT: mov h0, v0.h[1]
+; NONEON-NOSVE-NEXT: mov h1, v1.h[1]
+; NONEON-NOSVE-NEXT: fcvt s17, h4
+; NONEON-NOSVE-NEXT: mov h18, v4.h[2]
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvtzs x8, s3
+; NONEON-NOSVE-NEXT: fcvt s3, h5
+; NONEON-NOSVE-NEXT: fcvt s5, h7
+; NONEON-NOSVE-NEXT: fcvt s7, h16
+; NONEON-NOSVE-NEXT: mov h16, v4.h[3]
+; NONEON-NOSVE-NEXT: fcvtzs x9, s6
+; NONEON-NOSVE-NEXT: ldr d6, [sp, #8]
+; NONEON-NOSVE-NEXT: fcvt s0, h0
+; NONEON-NOSVE-NEXT: fcvt s1, h1
+; NONEON-NOSVE-NEXT: mov h4, v4.h[1]
+; NONEON-NOSVE-NEXT: fcvtzs x11, s2
+; NONEON-NOSVE-NEXT: mov h2, v6.h[2]
+; NONEON-NOSVE-NEXT: fcvtzs x10, s17
+; NONEON-NOSVE-NEXT: fcvtzs x13, s5
+; NONEON-NOSVE-NEXT: fcvtzs x12, s3
+; NONEON-NOSVE-NEXT: mov h3, v6.h[3]
+; NONEON-NOSVE-NEXT: fcvt s16, h16
+; NONEON-NOSVE-NEXT: mov h5, v6.h[1]
+; NONEON-NOSVE-NEXT: fcvt s17, h18
+; NONEON-NOSVE-NEXT: fcvtzs x14, s7
+; NONEON-NOSVE-NEXT: fmov d7, x8
+; NONEON-NOSVE-NEXT: fcvtzs x8, s0
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: fmov d0, x11
+; NONEON-NOSVE-NEXT: fcvtzs x11, s1
+; NONEON-NOSVE-NEXT: fmov d1, x13
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvtzs x13, s16
+; NONEON-NOSVE-NEXT: fmov d16, x9
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcvtzs x15, s17
+; NONEON-NOSVE-NEXT: mov v0.d[1], x12
+; NONEON-NOSVE-NEXT: mov v1.d[1], x14
+; NONEON-NOSVE-NEXT: fcvtzs x9, s2
+; NONEON-NOSVE-NEXT: mov v16.d[1], x8
+; NONEON-NOSVE-NEXT: fcvtzs x8, s6
+; NONEON-NOSVE-NEXT: fcvtzs x14, s4
+; NONEON-NOSVE-NEXT: fcvtzs x12, s3
+; NONEON-NOSVE-NEXT: mov v7.d[1], x11
+; NONEON-NOSVE-NEXT: fmov d3, x10
+; NONEON-NOSVE-NEXT: fcvtzs x11, s5
+; NONEON-NOSVE-NEXT: fmov d2, x15
+; NONEON-NOSVE-NEXT: stp q16, q1, [x1, #64]
+; NONEON-NOSVE-NEXT: fmov d1, x9
+; NONEON-NOSVE-NEXT: fmov d4, x8
+; NONEON-NOSVE-NEXT: stp q7, q0, [x1]
+; NONEON-NOSVE-NEXT: mov v2.d[1], x13
+; NONEON-NOSVE-NEXT: mov v3.d[1], x14
+; NONEON-NOSVE-NEXT: mov v1.d[1], x12
+; NONEON-NOSVE-NEXT: mov v4.d[1], x11
+; NONEON-NOSVE-NEXT: stp q3, q2, [x1, #96]
+; NONEON-NOSVE-NEXT: stp q4, q1, [x1, #32]
+; NONEON-NOSVE-NEXT: add sp, sp, #32
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x half>, ptr %a
%res = fptosi <16 x half> %op1 to <16 x i64>
store <16 x i64> %res, ptr %b
@@ -1030,6 +1738,11 @@ define <2 x i16> @fcvtzs_v2f32_v2i16(<2 x float> %op1) {
; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvtzs_v2f32_v2i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcvtzs v0.2s, v0.2s
+; NONEON-NOSVE-NEXT: ret
%res = fptosi <2 x float> %op1 to <2 x i16>
ret <2 x i16> %res
}
@@ -1043,6 +1756,12 @@ define <4 x i16> @fcvtzs_v4f32_v4i16(<4 x float> %op1) {
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvtzs_v4f32_v4i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcvtzs v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: xtn v0.4h, v0.4s
+; NONEON-NOSVE-NEXT: ret
%res = fptosi <4 x float> %op1 to <4 x i16>
ret <4 x i16> %res
}
@@ -1060,6 +1779,14 @@ define <8 x i16> @fcvtzs_v8f32_v8i16(ptr %a) {
; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvtzs_v8f32_v8i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: fcvtzs v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: fcvtzs v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: uzp1 v0.8h, v0.8h, v1.8h
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x float>, ptr %a
%res = fptosi <8 x float> %op1 to <8 x i16>
ret <8 x i16> %res
@@ -1084,6 +1811,19 @@ define void @fcvtzs_v16f32_v16i16(ptr %a, ptr %b) {
; CHECK-NEXT: splice z2.h, p0, z2.h, z3.h
; CHECK-NEXT: stp q2, q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvtzs_v16f32_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0, #32]
+; NONEON-NOSVE-NEXT: ldp q2, q3, [x0]
+; NONEON-NOSVE-NEXT: fcvtzs v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: fcvtzs v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: fcvtzs v3.4s, v3.4s
+; NONEON-NOSVE-NEXT: fcvtzs v2.4s, v2.4s
+; NONEON-NOSVE-NEXT: uzp1 v0.8h, v0.8h, v1.8h
+; NONEON-NOSVE-NEXT: uzp1 v1.8h, v2.8h, v3.8h
+; NONEON-NOSVE-NEXT: stp q1, q0, [x1]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x float>, ptr %a
%res = fptosi <16 x float> %op1 to <16 x i16>
store <16 x i16> %res, ptr %b
@@ -1102,6 +1842,11 @@ define <2 x i32> @fcvtzs_v2f32_v2i32(<2 x float> %op1) {
; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvtzs_v2f32_v2i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcvtzs v0.2s, v0.2s
+; NONEON-NOSVE-NEXT: ret
%res = fptosi <2 x float> %op1 to <2 x i32>
ret <2 x i32> %res
}
@@ -1114,6 +1859,11 @@ define <4 x i32> @fcvtzs_v4f32_v4i32(<4 x float> %op1) {
; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvtzs_v4f32_v4i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcvtzs v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: ret
%res = fptosi <4 x float> %op1 to <4 x i32>
ret <4 x i32> %res
}
@@ -1127,6 +1877,14 @@ define void @fcvtzs_v8f32_v8i32(ptr %a, ptr %b) {
; CHECK-NEXT: fcvtzs z1.s, p0/m, z1.s
; CHECK-NEXT: stp q0, q1, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvtzs_v8f32_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: fcvtzs v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: fcvtzs v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: stp q0, q1, [x1]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x float>, ptr %a
%res = fptosi <8 x float> %op1 to <8 x i32>
store <8 x i32> %res, ptr %b
@@ -1146,6 +1904,13 @@ define <1 x i64> @fcvtzs_v1f32_v1i64(<1 x float> %op1) {
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvtzs_v1f32_v1i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcvtl v0.2d, v0.2s
+; NONEON-NOSVE-NEXT: fcvtzs v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0
+; NONEON-NOSVE-NEXT: ret
%res = fptosi <1 x float> %op1 to <1 x i64>
ret <1 x i64> %res
}
@@ -1159,6 +1924,12 @@ define <2 x i64> @fcvtzs_v2f32_v2i64(<2 x float> %op1) {
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvtzs_v2f32_v2i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcvtl v0.2d, v0.2s
+; NONEON-NOSVE-NEXT: fcvtzs v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: ret
%res = fptosi <2 x float> %op1 to <2 x i64>
ret <2 x i64> %res
}
@@ -1175,6 +1946,20 @@ define void @fcvtzs_v4f32_v4i64(ptr %a, ptr %b) {
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s
; CHECK-NEXT: stp q1, q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvtzs_v4f32_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: fcvtl v0.2d, v0.2s
+; NONEON-NOSVE-NEXT: fcvtl v1.2d, v1.2s
+; NONEON-NOSVE-NEXT: fcvtzs v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: fcvtzs v1.2d, v1.2d
+; NONEON-NOSVE-NEXT: stp q0, q1, [x1]
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x float>, ptr %a
%res = fptosi <4 x float> %op1 to <4 x i64>
store <4 x i64> %res, ptr %b
@@ -1199,6 +1984,26 @@ define void @fcvtzs_v8f32_v8i64(ptr %a, ptr %b) {
; CHECK-NEXT: stp q2, q0, [x1, #32]
; CHECK-NEXT: stp q3, q1, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvtzs_v8f32_v8i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-32]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
+; NONEON-NOSVE-NEXT: ldr d2, [sp, #24]
+; NONEON-NOSVE-NEXT: ldr d3, [sp, #8]
+; NONEON-NOSVE-NEXT: fcvtl v1.2d, v1.2s
+; NONEON-NOSVE-NEXT: fcvtl v0.2d, v0.2s
+; NONEON-NOSVE-NEXT: fcvtl v2.2d, v2.2s
+; NONEON-NOSVE-NEXT: fcvtl v3.2d, v3.2s
+; NONEON-NOSVE-NEXT: fcvtzs v1.2d, v1.2d
+; NONEON-NOSVE-NEXT: fcvtzs v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: fcvtzs v2.2d, v2.2d
+; NONEON-NOSVE-NEXT: fcvtzs v3.2d, v3.2d
+; NONEON-NOSVE-NEXT: stp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: stp q1, q2, [x1, #32]
+; NONEON-NOSVE-NEXT: add sp, sp, #32
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x float>, ptr %a
%res = fptosi <8 x float> %op1 to <8 x i64>
store <8 x i64> %res, ptr %b
@@ -1218,6 +2023,12 @@ define <1 x i16> @fcvtzs_v1f64_v1i16(<1 x double> %op1) {
; CHECK-NEXT: mov z0.h, w8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvtzs_v1f64_v1i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcvtzs w8, d0
+; NONEON-NOSVE-NEXT: fmov s0, w8
+; NONEON-NOSVE-NEXT: ret
%res = fptosi <1 x double> %op1 to <1 x i16>
ret <1 x i16> %res
}
@@ -1231,6 +2042,12 @@ define <2 x i16> @fcvtzs_v2f64_v2i16(<2 x double> %op1) {
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvtzs_v2f64_v2i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcvtzs v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: xtn v0.2s, v0.2d
+; NONEON-NOSVE-NEXT: ret
%res = fptosi <2 x double> %op1 to <2 x i16>
ret <2 x i16> %res
}
@@ -1259,6 +2076,15 @@ define <4 x i16> @fcvtzs_v4f64_v4i16(ptr %a) {
; CHECK-NEXT: ldr d0, [sp, #8]
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvtzs_v4f64_v4i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: fcvtzs v1.2d, v1.2d
+; NONEON-NOSVE-NEXT: fcvtzs v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: uzp1 v0.4s, v0.4s, v1.4s
+; NONEON-NOSVE-NEXT: xtn v0.4h, v0.4s
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x double>, ptr %a
%res = fptosi <4 x double> %op1 to <4 x i16>
ret <4 x i16> %res
@@ -1302,6 +2128,23 @@ define <8 x i16> @fcvtzs_v8f64_v8i16(ptr %a) {
; CHECK-NEXT: strh w8, [sp, #2]
; CHECK-NEXT: ldr q0, [sp], #16
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvtzs_v8f64_v8i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #32]
+; NONEON-NOSVE-NEXT: adrp x8, .LCPI61_0
+; NONEON-NOSVE-NEXT: ldp q3, q2, [x0]
+; NONEON-NOSVE-NEXT: fcvtzs v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: fcvtzs v1.2d, v1.2d
+; NONEON-NOSVE-NEXT: fcvtzs v2.2d, v2.2d
+; NONEON-NOSVE-NEXT: fcvtzs v3.2d, v3.2d
+; NONEON-NOSVE-NEXT: xtn v7.2s, v0.2d
+; NONEON-NOSVE-NEXT: ldr q0, [x8, :lo12:.LCPI61_0]
+; NONEON-NOSVE-NEXT: xtn v6.2s, v1.2d
+; NONEON-NOSVE-NEXT: xtn v5.2s, v2.2d
+; NONEON-NOSVE-NEXT: xtn v4.2s, v3.2d
+; NONEON-NOSVE-NEXT: tbl v0.16b, { v4.16b, v5.16b, v6.16b, v7.16b }, v0.16b
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x double>, ptr %a
%res = fptosi <8 x double> %op1 to <8 x i16>
ret <8 x i16> %res
@@ -1378,6 +2221,35 @@ define void @fcvtzs_v16f64_v16i16(ptr %a, ptr %b) {
; CHECK-NEXT: stp q1, q0, [x1]
; CHECK-NEXT: add sp, sp, #32
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvtzs_v16f64_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #96]
+; NONEON-NOSVE-NEXT: adrp x8, .LCPI62_0
+; NONEON-NOSVE-NEXT: ldp q2, q3, [x0, #32]
+; NONEON-NOSVE-NEXT: ldp q4, q5, [x0, #64]
+; NONEON-NOSVE-NEXT: ldp q7, q6, [x0]
+; NONEON-NOSVE-NEXT: fcvtzs v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: fcvtzs v3.2d, v3.2d
+; NONEON-NOSVE-NEXT: fcvtzs v1.2d, v1.2d
+; NONEON-NOSVE-NEXT: fcvtzs v2.2d, v2.2d
+; NONEON-NOSVE-NEXT: fcvtzs v5.2d, v5.2d
+; NONEON-NOSVE-NEXT: fcvtzs v4.2d, v4.2d
+; NONEON-NOSVE-NEXT: fcvtzs v6.2d, v6.2d
+; NONEON-NOSVE-NEXT: fcvtzs v7.2d, v7.2d
+; NONEON-NOSVE-NEXT: xtn v19.2s, v0.2d
+; NONEON-NOSVE-NEXT: ldr q0, [x8, :lo12:.LCPI62_0]
+; NONEON-NOSVE-NEXT: xtn v23.2s, v3.2d
+; NONEON-NOSVE-NEXT: xtn v18.2s, v1.2d
+; NONEON-NOSVE-NEXT: xtn v22.2s, v2.2d
+; NONEON-NOSVE-NEXT: xtn v17.2s, v5.2d
+; NONEON-NOSVE-NEXT: xtn v21.2s, v6.2d
+; NONEON-NOSVE-NEXT: xtn v16.2s, v4.2d
+; NONEON-NOSVE-NEXT: xtn v20.2s, v7.2d
+; NONEON-NOSVE-NEXT: tbl v1.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v0.16b
+; NONEON-NOSVE-NEXT: tbl v0.16b, { v20.16b, v21.16b, v22.16b, v23.16b }, v0.16b
+; NONEON-NOSVE-NEXT: stp q0, q1, [x1]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x double>, ptr %a
%res = fptosi <16 x double> %op1 to <16 x i16>
store <16 x i16> %res, ptr %b
@@ -1397,6 +2269,13 @@ define <1 x i32> @fcvtzs_v1f64_v1i32(<1 x double> %op1) {
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvtzs_v1f64_v1i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; NONEON-NOSVE-NEXT: fcvtzs v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: xtn v0.2s, v0.2d
+; NONEON-NOSVE-NEXT: ret
%res = fptosi <1 x double> %op1 to <1 x i32>
ret <1 x i32> %res
}
@@ -1410,6 +2289,12 @@ define <2 x i32> @fcvtzs_v2f64_v2i32(<2 x double> %op1) {
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvtzs_v2f64_v2i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcvtzs v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: xtn v0.2s, v0.2d
+; NONEON-NOSVE-NEXT: ret
%res = fptosi <2 x double> %op1 to <2 x i32>
ret <2 x i32> %res
}
@@ -1427,6 +2312,14 @@ define <4 x i32> @fcvtzs_v4f64_v4i32(ptr %a) {
; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvtzs_v4f64_v4i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: fcvtzs v1.2d, v1.2d
+; NONEON-NOSVE-NEXT: fcvtzs v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: uzp1 v0.4s, v0.4s, v1.4s
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x double>, ptr %a
%res = fptosi <4 x double> %op1 to <4 x i32>
ret <4 x i32> %res
@@ -1451,6 +2344,19 @@ define void @fcvtzs_v8f64_v8i32(ptr %a, ptr %b) {
; CHECK-NEXT: splice z2.s, p0, z2.s, z3.s
; CHECK-NEXT: stp q2, q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvtzs_v8f64_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0, #32]
+; NONEON-NOSVE-NEXT: ldp q2, q3, [x0]
+; NONEON-NOSVE-NEXT: fcvtzs v1.2d, v1.2d
+; NONEON-NOSVE-NEXT: fcvtzs v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: fcvtzs v3.2d, v3.2d
+; NONEON-NOSVE-NEXT: fcvtzs v2.2d, v2.2d
+; NONEON-NOSVE-NEXT: uzp1 v0.4s, v0.4s, v1.4s
+; NONEON-NOSVE-NEXT: uzp1 v1.4s, v2.4s, v3.4s
+; NONEON-NOSVE-NEXT: stp q1, q0, [x1]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x double>, ptr %a
%res = fptosi <8 x double> %op1 to <8 x i32>
store <8 x i32> %res, ptr %b
@@ -1469,6 +2375,12 @@ define <1 x i64> @fcvtzs_v1f64_v1i64(<1 x double> %op1) {
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvtzs_v1f64_v1i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcvtzs x8, d0
+; NONEON-NOSVE-NEXT: fmov d0, x8
+; NONEON-NOSVE-NEXT: ret
%res = fptosi <1 x double> %op1 to <1 x i64>
ret <1 x i64> %res
}
@@ -1481,6 +2393,11 @@ define <2 x i64> @fcvtzs_v2f64_v2i64(<2 x double> %op1) {
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvtzs_v2f64_v2i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fcvtzs v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: ret
%res = fptosi <2 x double> %op1 to <2 x i64>
ret <2 x i64> %res
}
@@ -1494,6 +2411,14 @@ define void @fcvtzs_v4f64_v4i64(ptr %a, ptr %b) {
; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d
; CHECK-NEXT: stp q0, q1, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fcvtzs_v4f64_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: fcvtzs v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: fcvtzs v1.2d, v1.2d
+; NONEON-NOSVE-NEXT: stp q0, q1, [x1]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x double>, ptr %a
%res = fptosi <4 x double> %op1 to <4 x i64>
store <4 x i64> %res, ptr %b
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll
index c1c7b5c05f5d5..d3b0937467655 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
@@ -27,6 +28,14 @@ define <2 x half> @select_v2f16(<2 x half> %op1, <2 x half> %op2, <2 x i1> %mask
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: select_v2f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: uzp1 v2.4h, v2.4h, v0.4h
+; NONEON-NOSVE-NEXT: shl v2.4h, v2.4h, #15
+; NONEON-NOSVE-NEXT: cmlt v2.4h, v2.4h, #0
+; NONEON-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b
+; NONEON-NOSVE-NEXT: ret
%sel = select <2 x i1> %mask, <2 x half> %op1, <2 x half> %op2
ret <2 x half> %sel
}
@@ -45,6 +54,13 @@ define <4 x half> @select_v4f16(<4 x half> %op1, <4 x half> %op2, <4 x i1> %mask
; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: select_v4f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: shl v2.4h, v2.4h, #15
+; NONEON-NOSVE-NEXT: cmlt v2.4h, v2.4h, #0
+; NONEON-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b
+; NONEON-NOSVE-NEXT: ret
%sel = select <4 x i1> %mask, <4 x half> %op1, <4 x half> %op2
ret <4 x half> %sel
}
@@ -64,6 +80,14 @@ define <8 x half> @select_v8f16(<8 x half> %op1, <8 x half> %op2, <8 x i1> %mask
; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: select_v8f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ushll v2.8h, v2.8b, #0
+; NONEON-NOSVE-NEXT: shl v2.8h, v2.8h, #15
+; NONEON-NOSVE-NEXT: cmlt v2.8h, v2.8h, #0
+; NONEON-NOSVE-NEXT: bif v0.16b, v1.16b, v2.16b
+; NONEON-NOSVE-NEXT: ret
%sel = select <8 x i1> %mask, <8 x half> %op1, <8 x half> %op2
ret <8 x half> %sel
}
@@ -80,6 +104,126 @@ define void @select_v16f16(ptr %a, ptr %b) {
; CHECK-NEXT: sel z1.h, p0, z2.h, z3.h
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: select_v16f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: ldr q1, [x1]
+; NONEON-NOSVE-NEXT: mov h2, v1.h[1]
+; NONEON-NOSVE-NEXT: mov h3, v0.h[1]
+; NONEON-NOSVE-NEXT: mov h4, v1.h[2]
+; NONEON-NOSVE-NEXT: mov h5, v0.h[2]
+; NONEON-NOSVE-NEXT: fcvt s6, h1
+; NONEON-NOSVE-NEXT: fcvt s7, h0
+; NONEON-NOSVE-NEXT: mov h16, v1.h[6]
+; NONEON-NOSVE-NEXT: mov h17, v0.h[6]
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: fcvt s16, h16
+; NONEON-NOSVE-NEXT: fcvt s17, h17
+; NONEON-NOSVE-NEXT: fcmp s3, s2
+; NONEON-NOSVE-NEXT: mov h2, v1.h[3]
+; NONEON-NOSVE-NEXT: mov h3, v0.h[3]
+; NONEON-NOSVE-NEXT: csetm w8, eq
+; NONEON-NOSVE-NEXT: fcmp s7, s6
+; NONEON-NOSVE-NEXT: mov h6, v1.h[4]
+; NONEON-NOSVE-NEXT: mov h7, v0.h[4]
+; NONEON-NOSVE-NEXT: fcvt s2, h2
+; NONEON-NOSVE-NEXT: fcvt s3, h3
+; NONEON-NOSVE-NEXT: csetm w14, eq
+; NONEON-NOSVE-NEXT: fcmp s5, s4
+; NONEON-NOSVE-NEXT: mov h4, v1.h[5]
+; NONEON-NOSVE-NEXT: mov h5, v0.h[5]
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: fcvt s7, h7
+; NONEON-NOSVE-NEXT: csetm w12, eq
+; NONEON-NOSVE-NEXT: fcmp s3, s2
+; NONEON-NOSVE-NEXT: ldr q2, [x0, #16]
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: ldr q3, [x1, #16]
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: csetm w11, eq
+; NONEON-NOSVE-NEXT: fcmp s7, s6
+; NONEON-NOSVE-NEXT: mov h6, v1.h[7]
+; NONEON-NOSVE-NEXT: mov h7, v0.h[7]
+; NONEON-NOSVE-NEXT: mov h18, v3.h[3]
+; NONEON-NOSVE-NEXT: csetm w13, eq
+; NONEON-NOSVE-NEXT: fcmp s5, s4
+; NONEON-NOSVE-NEXT: mov h4, v3.h[1]
+; NONEON-NOSVE-NEXT: mov h5, v2.h[1]
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: fcvt s7, h7
+; NONEON-NOSVE-NEXT: csetm w9, eq
+; NONEON-NOSVE-NEXT: fcmp s17, s16
+; NONEON-NOSVE-NEXT: mov h16, v3.h[2]
+; NONEON-NOSVE-NEXT: fcvt s4, h4
+; NONEON-NOSVE-NEXT: mov h17, v2.h[2]
+; NONEON-NOSVE-NEXT: fcvt s5, h5
+; NONEON-NOSVE-NEXT: csetm w10, eq
+; NONEON-NOSVE-NEXT: fcmp s7, s6
+; NONEON-NOSVE-NEXT: fcvt s6, h3
+; NONEON-NOSVE-NEXT: fcvt s7, h2
+; NONEON-NOSVE-NEXT: csetm w15, eq
+; NONEON-NOSVE-NEXT: fcmp s5, s4
+; NONEON-NOSVE-NEXT: fmov s4, w14
+; NONEON-NOSVE-NEXT: csetm w16, eq
+; NONEON-NOSVE-NEXT: fcmp s7, s6
+; NONEON-NOSVE-NEXT: mov h6, v2.h[3]
+; NONEON-NOSVE-NEXT: fcvt s7, h16
+; NONEON-NOSVE-NEXT: fcvt s16, h17
+; NONEON-NOSVE-NEXT: mov v4.h[1], w8
+; NONEON-NOSVE-NEXT: fcvt s17, h18
+; NONEON-NOSVE-NEXT: csetm w14, eq
+; NONEON-NOSVE-NEXT: fmov s5, w14
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: fcmp s16, s7
+; NONEON-NOSVE-NEXT: mov h7, v3.h[4]
+; NONEON-NOSVE-NEXT: mov h16, v2.h[4]
+; NONEON-NOSVE-NEXT: mov v4.h[2], w12
+; NONEON-NOSVE-NEXT: mov v5.h[1], w16
+; NONEON-NOSVE-NEXT: csetm w8, eq
+; NONEON-NOSVE-NEXT: fcmp s6, s17
+; NONEON-NOSVE-NEXT: mov h17, v2.h[5]
+; NONEON-NOSVE-NEXT: fcvt s6, h7
+; NONEON-NOSVE-NEXT: fcvt s7, h16
+; NONEON-NOSVE-NEXT: mov h16, v3.h[5]
+; NONEON-NOSVE-NEXT: mov v4.h[3], w11
+; NONEON-NOSVE-NEXT: mov v5.h[2], w8
+; NONEON-NOSVE-NEXT: csetm w8, eq
+; NONEON-NOSVE-NEXT: fcvt s17, h17
+; NONEON-NOSVE-NEXT: fcmp s7, s6
+; NONEON-NOSVE-NEXT: mov h6, v3.h[6]
+; NONEON-NOSVE-NEXT: mov h7, v2.h[6]
+; NONEON-NOSVE-NEXT: fcvt s16, h16
+; NONEON-NOSVE-NEXT: mov v4.h[4], w13
+; NONEON-NOSVE-NEXT: mov v5.h[3], w8
+; NONEON-NOSVE-NEXT: csetm w8, eq
+; NONEON-NOSVE-NEXT: fcvt s6, h6
+; NONEON-NOSVE-NEXT: fcvt s7, h7
+; NONEON-NOSVE-NEXT: fcmp s17, s16
+; NONEON-NOSVE-NEXT: mov h16, v3.h[7]
+; NONEON-NOSVE-NEXT: mov h17, v2.h[7]
+; NONEON-NOSVE-NEXT: mov v5.h[4], w8
+; NONEON-NOSVE-NEXT: mov v4.h[5], w9
+; NONEON-NOSVE-NEXT: csetm w8, eq
+; NONEON-NOSVE-NEXT: fcmp s7, s6
+; NONEON-NOSVE-NEXT: fcvt s6, h16
+; NONEON-NOSVE-NEXT: fcvt s7, h17
+; NONEON-NOSVE-NEXT: mov v5.h[5], w8
+; NONEON-NOSVE-NEXT: mov v4.h[6], w10
+; NONEON-NOSVE-NEXT: csetm w8, eq
+; NONEON-NOSVE-NEXT: fcmp s7, s6
+; NONEON-NOSVE-NEXT: mov v5.h[6], w8
+; NONEON-NOSVE-NEXT: mov v4.h[7], w15
+; NONEON-NOSVE-NEXT: csetm w8, eq
+; NONEON-NOSVE-NEXT: mov v5.h[7], w8
+; NONEON-NOSVE-NEXT: bif v0.16b, v1.16b, v4.16b
+; NONEON-NOSVE-NEXT: mov v1.16b, v5.16b
+; NONEON-NOSVE-NEXT: bsl v1.16b, v2.16b, v3.16b
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x half>, ptr %a
%op2 = load <16 x half>, ptr %b
%mask = fcmp oeq <16 x half> %op1, %op2
@@ -102,6 +246,13 @@ define <2 x float> @select_v2f32(<2 x float> %op1, <2 x float> %op2, <2 x i1> %m
; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: select_v2f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: shl v2.2s, v2.2s, #31
+; NONEON-NOSVE-NEXT: cmlt v2.2s, v2.2s, #0
+; NONEON-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b
+; NONEON-NOSVE-NEXT: ret
%sel = select <2 x i1> %mask, <2 x float> %op1, <2 x float> %op2
ret <2 x float> %sel
}
@@ -121,6 +272,14 @@ define <4 x float> @select_v4f32(<4 x float> %op1, <4 x float> %op2, <4 x i1> %m
; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: select_v4f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ushll v2.4s, v2.4h, #0
+; NONEON-NOSVE-NEXT: shl v2.4s, v2.4s, #31
+; NONEON-NOSVE-NEXT: cmlt v2.4s, v2.4s, #0
+; NONEON-NOSVE-NEXT: bif v0.16b, v1.16b, v2.16b
+; NONEON-NOSVE-NEXT: ret
%sel = select <4 x i1> %mask, <4 x float> %op1, <4 x float> %op2
ret <4 x float> %sel
}
@@ -137,6 +296,18 @@ define void @select_v8f32(ptr %a, ptr %b) {
; CHECK-NEXT: sel z1.s, p0, z2.s, z3.s
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: select_v8f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q2, [x0]
+; NONEON-NOSVE-NEXT: ldp q1, q3, [x1]
+; NONEON-NOSVE-NEXT: fcmeq v4.4s, v0.4s, v1.4s
+; NONEON-NOSVE-NEXT: fcmeq v5.4s, v2.4s, v3.4s
+; NONEON-NOSVE-NEXT: bif v0.16b, v1.16b, v4.16b
+; NONEON-NOSVE-NEXT: mov v1.16b, v5.16b
+; NONEON-NOSVE-NEXT: bsl v1.16b, v2.16b, v3.16b
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x float>, ptr %a
%op2 = load <8 x float>, ptr %b
%mask = fcmp oeq <8 x float> %op1, %op2
@@ -151,6 +322,14 @@ define <1 x double> @select_v1f64(<1 x double> %op1, <1 x double> %op2, <1 x i1>
; CHECK-NEXT: tst w0, #0x1
; CHECK-NEXT: fcsel d0, d0, d1, ne
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: select_v1f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: tst w0, #0x1
+; NONEON-NOSVE-NEXT: csetm x8, ne
+; NONEON-NOSVE-NEXT: fmov d2, x8
+; NONEON-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b
+; NONEON-NOSVE-NEXT: ret
%sel = select <1 x i1> %mask, <1 x double> %op1, <1 x double> %op2
ret <1 x double> %sel
}
@@ -170,6 +349,14 @@ define <2 x double> @select_v2f64(<2 x double> %op1, <2 x double> %op2, <2 x i1>
; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: select_v2f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ushll v2.2d, v2.2s, #0
+; NONEON-NOSVE-NEXT: shl v2.2d, v2.2d, #63
+; NONEON-NOSVE-NEXT: cmlt v2.2d, v2.2d, #0
+; NONEON-NOSVE-NEXT: bif v0.16b, v1.16b, v2.16b
+; NONEON-NOSVE-NEXT: ret
%sel = select <2 x i1> %mask, <2 x double> %op1, <2 x double> %op2
ret <2 x double> %sel
}
@@ -186,6 +373,18 @@ define void @select_v4f64(ptr %a, ptr %b) {
; CHECK-NEXT: sel z1.d, p0, z2.d, z3.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: select_v4f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q2, [x0]
+; NONEON-NOSVE-NEXT: ldp q1, q3, [x1]
+; NONEON-NOSVE-NEXT: fcmeq v4.2d, v0.2d, v1.2d
+; NONEON-NOSVE-NEXT: fcmeq v5.2d, v2.2d, v3.2d
+; NONEON-NOSVE-NEXT: bif v0.16b, v1.16b, v4.16b
+; NONEON-NOSVE-NEXT: mov v1.16b, v5.16b
+; NONEON-NOSVE-NEXT: bsl v1.16b, v2.16b, v3.16b
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x double>, ptr %a
%op2 = load <4 x double>, ptr %b
%mask = fcmp oeq <4 x double> %op1, %op2
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-insert-vector-elt.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-insert-vector-elt.ll
index ff38db8c10c04..ae97a266c6ff0 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-insert-vector-elt.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-insert-vector-elt.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
@@ -21,6 +22,14 @@ define <4 x i8> @insertelement_v4i8(<4 x i8> %op1) {
; CHECK-NEXT: mov z0.h, p0/m, w8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: insertelement_v4i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; NONEON-NOSVE-NEXT: mov w8, #5 // =0x5
+; NONEON-NOSVE-NEXT: mov v0.h[3], w8
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0
+; NONEON-NOSVE-NEXT: ret
%r = insertelement <4 x i8> %op1, i8 5, i64 3
ret <4 x i8> %r
}
@@ -38,6 +47,14 @@ define <8 x i8> @insertelement_v8i8(<8 x i8> %op1) {
; CHECK-NEXT: mov z0.b, p0/m, w8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: insertelement_v8i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; NONEON-NOSVE-NEXT: mov w8, #5 // =0x5
+; NONEON-NOSVE-NEXT: mov v0.b[7], w8
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0
+; NONEON-NOSVE-NEXT: ret
%r = insertelement <8 x i8> %op1, i8 5, i64 7
ret <8 x i8> %r
}
@@ -55,6 +72,12 @@ define <16 x i8> @insertelement_v16i8(<16 x i8> %op1) {
; CHECK-NEXT: mov z0.b, p0/m, w8
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: insertelement_v16i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov w8, #5 // =0x5
+; NONEON-NOSVE-NEXT: mov v0.b[15], w8
+; NONEON-NOSVE-NEXT: ret
%r = insertelement <16 x i8> %op1, i8 5, i64 15
ret <16 x i8> %r
}
@@ -72,6 +95,12 @@ define <32 x i8> @insertelement_v32i8(<32 x i8> %op1) {
; CHECK-NEXT: mov z1.b, p0/m, w8
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: insertelement_v32i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov w8, #5 // =0x5
+; NONEON-NOSVE-NEXT: mov v1.b[15], w8
+; NONEON-NOSVE-NEXT: ret
%r = insertelement <32 x i8> %op1, i8 5, i64 31
ret <32 x i8> %r
}
@@ -90,6 +119,14 @@ define <2 x i16> @insertelement_v2i16(<2 x i16> %op1) {
; CHECK-NEXT: mov z0.s, p0/m, w8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: insertelement_v2i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; NONEON-NOSVE-NEXT: mov w8, #5 // =0x5
+; NONEON-NOSVE-NEXT: mov v0.s[1], w8
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0
+; NONEON-NOSVE-NEXT: ret
%r = insertelement <2 x i16> %op1, i16 5, i64 1
ret <2 x i16> %r
}
@@ -107,6 +144,14 @@ define <4 x i16> @insertelement_v4i16(<4 x i16> %op1) {
; CHECK-NEXT: mov z0.h, p0/m, w8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: insertelement_v4i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; NONEON-NOSVE-NEXT: mov w8, #5 // =0x5
+; NONEON-NOSVE-NEXT: mov v0.h[3], w8
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0
+; NONEON-NOSVE-NEXT: ret
%r = insertelement <4 x i16> %op1, i16 5, i64 3
ret <4 x i16> %r
}
@@ -124,6 +169,12 @@ define <8 x i16> @insertelement_v8i16(<8 x i16> %op1) {
; CHECK-NEXT: mov z0.h, p0/m, w8
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: insertelement_v8i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov w8, #5 // =0x5
+; NONEON-NOSVE-NEXT: mov v0.h[7], w8
+; NONEON-NOSVE-NEXT: ret
%r = insertelement <8 x i16> %op1, i16 5, i64 7
ret <8 x i16> %r
}
@@ -141,6 +192,12 @@ define <16 x i16> @insertelement_v16i16(<16 x i16> %op1) {
; CHECK-NEXT: mov z1.h, p0/m, w8
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: insertelement_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov w8, #5 // =0x5
+; NONEON-NOSVE-NEXT: mov v1.h[7], w8
+; NONEON-NOSVE-NEXT: ret
%r = insertelement <16 x i16> %op1, i16 5, i64 15
ret <16 x i16> %r
}
@@ -159,6 +216,14 @@ define <2 x i32> @insertelement_v2i32(<2 x i32> %op1) {
; CHECK-NEXT: mov z0.s, p0/m, w8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: insertelement_v2i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; NONEON-NOSVE-NEXT: mov w8, #5 // =0x5
+; NONEON-NOSVE-NEXT: mov v0.s[1], w8
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0
+; NONEON-NOSVE-NEXT: ret
%r = insertelement <2 x i32> %op1, i32 5, i64 1
ret <2 x i32> %r
}
@@ -176,6 +241,12 @@ define <4 x i32> @insertelement_v4i32(<4 x i32> %op1) {
; CHECK-NEXT: mov z0.s, p0/m, w8
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: insertelement_v4i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov w8, #5 // =0x5
+; NONEON-NOSVE-NEXT: mov v0.s[3], w8
+; NONEON-NOSVE-NEXT: ret
%r = insertelement <4 x i32> %op1, i32 5, i64 3
ret <4 x i32> %r
}
@@ -193,6 +264,13 @@ define <8 x i32> @insertelement_v8i32(ptr %a) {
; CHECK-NEXT: mov z1.s, p0/m, w8
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: insertelement_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: mov w8, #5 // =0x5
+; NONEON-NOSVE-NEXT: mov v1.s[3], w8
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x i32>, ptr %a
%r = insertelement <8 x i32> %op1, i32 5, i64 7
ret <8 x i32> %r
@@ -205,6 +283,12 @@ define <1 x i64> @insertelement_v1i64(<1 x i64> %op1) {
; CHECK-NEXT: mov z0.d, #5 // =0x5
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: insertelement_v1i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov w8, #5 // =0x5
+; NONEON-NOSVE-NEXT: fmov d0, x8
+; NONEON-NOSVE-NEXT: ret
%r = insertelement <1 x i64> %op1, i64 5, i64 0
ret <1 x i64> %r
}
@@ -222,6 +306,12 @@ define <2 x i64> @insertelement_v2i64(<2 x i64> %op1) {
; CHECK-NEXT: mov z0.d, p0/m, x8
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: insertelement_v2i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov w8, #5 // =0x5
+; NONEON-NOSVE-NEXT: mov v0.d[1], x8
+; NONEON-NOSVE-NEXT: ret
%r = insertelement <2 x i64> %op1, i64 5, i64 1
ret <2 x i64> %r
}
@@ -239,6 +329,13 @@ define <4 x i64> @insertelement_v4i64(ptr %a) {
; CHECK-NEXT: mov z1.d, p0/m, x8
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: insertelement_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: mov w8, #5 // =0x5
+; NONEON-NOSVE-NEXT: mov v1.d[1], x8
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x i64>, ptr %a
%r = insertelement <4 x i64> %op1, i64 5, i64 3
ret <4 x i64> %r
@@ -257,6 +354,16 @@ define <2 x half> @insertelement_v2f16(<2 x half> %op1) {
; CHECK-NEXT: ldr d0, [sp, #8]
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: insertelement_v2f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: adrp x8, .LCPI14_0
+; NONEON-NOSVE-NEXT: add x8, x8, :lo12:.LCPI14_0
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; NONEON-NOSVE-NEXT: ld1r { v1.4h }, [x8]
+; NONEON-NOSVE-NEXT: mov v1.h[0], v0.h[0]
+; NONEON-NOSVE-NEXT: fmov d0, d1
+; NONEON-NOSVE-NEXT: ret
%r = insertelement <2 x half> %op1, half 5.0, i64 1
ret <2 x half> %r
}
@@ -274,6 +381,15 @@ define <4 x half> @insertelement_v4f16(<4 x half> %op1) {
; CHECK-NEXT: mov z0.h, p0/m, h1
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: insertelement_v4f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; NONEON-NOSVE-NEXT: adrp x8, .LCPI15_0
+; NONEON-NOSVE-NEXT: add x8, x8, :lo12:.LCPI15_0
+; NONEON-NOSVE-NEXT: ld1 { v0.h }[3], [x8]
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0
+; NONEON-NOSVE-NEXT: ret
%r = insertelement <4 x half> %op1, half 5.0, i64 3
ret <4 x half> %r
}
@@ -291,6 +407,13 @@ define <8 x half> @insertelement_v8f16(<8 x half> %op1) {
; CHECK-NEXT: mov z0.h, p0/m, h1
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: insertelement_v8f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: adrp x8, .LCPI16_0
+; NONEON-NOSVE-NEXT: add x8, x8, :lo12:.LCPI16_0
+; NONEON-NOSVE-NEXT: ld1 { v0.h }[7], [x8]
+; NONEON-NOSVE-NEXT: ret
%r = insertelement <8 x half> %op1, half 5.0, i64 7
ret <8 x half> %r
}
@@ -308,6 +431,14 @@ define <16 x half> @insertelement_v16f16(ptr %a) {
; CHECK-NEXT: mov z1.h, p0/m, h2
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: insertelement_v16f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: adrp x8, .LCPI17_0
+; NONEON-NOSVE-NEXT: add x8, x8, :lo12:.LCPI17_0
+; NONEON-NOSVE-NEXT: ld1 { v1.h }[7], [x8]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x half>, ptr %a
%r = insertelement <16 x half> %op1, half 5.0, i64 15
ret <16 x half> %r
@@ -327,6 +458,14 @@ define <2 x float> @insertelement_v2f32(<2 x float> %op1) {
; CHECK-NEXT: mov z0.s, p0/m, s1
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: insertelement_v2f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fmov s1, #5.00000000
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; NONEON-NOSVE-NEXT: mov v0.s[1], v1.s[0]
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0
+; NONEON-NOSVE-NEXT: ret
%r = insertelement <2 x float> %op1, float 5.0, i64 1
ret <2 x float> %r
}
@@ -344,6 +483,12 @@ define <4 x float> @insertelement_v4f32(<4 x float> %op1) {
; CHECK-NEXT: mov z0.s, p0/m, s1
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: insertelement_v4f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fmov s1, #5.00000000
+; NONEON-NOSVE-NEXT: mov v0.s[3], v1.s[0]
+; NONEON-NOSVE-NEXT: ret
%r = insertelement <4 x float> %op1, float 5.0, i64 3
ret <4 x float> %r
}
@@ -361,6 +506,13 @@ define <8 x float> @insertelement_v8f32(ptr %a) {
; CHECK-NEXT: mov z1.s, p0/m, s2
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: insertelement_v8f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fmov s2, #5.00000000
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: mov v1.s[3], v2.s[0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x float>, ptr %a
%r = insertelement <8 x float> %op1, float 5.0, i64 7
ret <8 x float> %r
@@ -372,6 +524,12 @@ define <1 x double> @insertelement_v1f64(<1 x double> %op1) {
; CHECK: // %bb.0:
; CHECK-NEXT: fmov d0, #5.00000000
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: insertelement_v1f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov x8, #4617315517961601024 // =0x4014000000000000
+; NONEON-NOSVE-NEXT: fmov d0, x8
+; NONEON-NOSVE-NEXT: ret
%r = insertelement <1 x double> %op1, double 5.0, i64 0
ret <1 x double> %r
}
@@ -389,6 +547,12 @@ define <2 x double> @insertelement_v2f64(<2 x double> %op1) {
; CHECK-NEXT: mov z0.d, p0/m, d1
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: insertelement_v2f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fmov d1, #5.00000000
+; NONEON-NOSVE-NEXT: mov v0.d[1], v1.d[0]
+; NONEON-NOSVE-NEXT: ret
%r = insertelement <2 x double> %op1, double 5.0, i64 1
ret <2 x double> %r
}
@@ -406,6 +570,14 @@ define <4 x double> @insertelement_v4f64(ptr %a) {
; CHECK-NEXT: mov z1.d, p0/m, d2
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: insertelement_v4f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fmov d0, #5.00000000
+; NONEON-NOSVE-NEXT: ldr q1, [x0, #16]
+; NONEON-NOSVE-NEXT: mov v1.d[1], v0.d[0]
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x double>, ptr %a
%r = insertelement <4 x double> %op1, double 5.0, i64 3
ret <4 x double> %r
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-arith.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-arith.ll
index ee1706bc7c354..1b438559e0538 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-arith.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-arith.ll
@@ -2,6 +2,7 @@
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE
; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE2
; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE2
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
@@ -16,6 +17,11 @@ define <4 x i8> @add_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
; CHECK-NEXT: add z0.h, z0.h, z1.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: add_v4i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: add v0.4h, v0.4h, v1.4h
+; NONEON-NOSVE-NEXT: ret
%res = add <4 x i8> %op1, %op2
ret <4 x i8> %res
}
@@ -28,6 +34,11 @@ define <8 x i8> @add_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
; CHECK-NEXT: add z0.b, z0.b, z1.b
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: add_v8i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: add v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: ret
%res = add <8 x i8> %op1, %op2
ret <8 x i8> %res
}
@@ -40,6 +51,11 @@ define <16 x i8> @add_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
; CHECK-NEXT: add z0.b, z0.b, z1.b
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: add_v16i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: add v0.16b, v0.16b, v1.16b
+; NONEON-NOSVE-NEXT: ret
%res = add <16 x i8> %op1, %op2
ret <16 x i8> %res
}
@@ -53,6 +69,15 @@ define void @add_v32i8(ptr %a, ptr %b) {
; CHECK-NEXT: add z1.b, z2.b, z3.b
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: add_v32i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: add v0.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: add v1.16b, v2.16b, v3.16b
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <32 x i8>, ptr %a
%op2 = load <32 x i8>, ptr %b
%res = add <32 x i8> %op1, %op2
@@ -68,6 +93,11 @@ define <2 x i16> @add_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
; CHECK-NEXT: add z0.s, z0.s, z1.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: add_v2i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: add v0.2s, v0.2s, v1.2s
+; NONEON-NOSVE-NEXT: ret
%res = add <2 x i16> %op1, %op2
ret <2 x i16> %res
}
@@ -80,6 +110,11 @@ define <4 x i16> @add_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
; CHECK-NEXT: add z0.h, z0.h, z1.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: add_v4i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: add v0.4h, v0.4h, v1.4h
+; NONEON-NOSVE-NEXT: ret
%res = add <4 x i16> %op1, %op2
ret <4 x i16> %res
}
@@ -92,6 +127,11 @@ define <8 x i16> @add_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
; CHECK-NEXT: add z0.h, z0.h, z1.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: add_v8i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: add v0.8h, v0.8h, v1.8h
+; NONEON-NOSVE-NEXT: ret
%res = add <8 x i16> %op1, %op2
ret <8 x i16> %res
}
@@ -105,6 +145,15 @@ define void @add_v16i16(ptr %a, ptr %b) {
; CHECK-NEXT: add z1.h, z2.h, z3.h
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: add_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: add v0.8h, v1.8h, v0.8h
+; NONEON-NOSVE-NEXT: add v1.8h, v2.8h, v3.8h
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x i16>, ptr %a
%op2 = load <16 x i16>, ptr %b
%res = add <16 x i16> %op1, %op2
@@ -120,6 +169,11 @@ define <2 x i32> @add_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
; CHECK-NEXT: add z0.s, z0.s, z1.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: add_v2i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: add v0.2s, v0.2s, v1.2s
+; NONEON-NOSVE-NEXT: ret
%res = add <2 x i32> %op1, %op2
ret <2 x i32> %res
}
@@ -132,6 +186,11 @@ define <4 x i32> @add_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
; CHECK-NEXT: add z0.s, z0.s, z1.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: add_v4i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: add v0.4s, v0.4s, v1.4s
+; NONEON-NOSVE-NEXT: ret
%res = add <4 x i32> %op1, %op2
ret <4 x i32> %res
}
@@ -145,6 +204,15 @@ define void @add_v8i32(ptr %a, ptr %b) {
; CHECK-NEXT: add z1.s, z2.s, z3.s
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: add_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: add v0.4s, v1.4s, v0.4s
+; NONEON-NOSVE-NEXT: add v1.4s, v2.4s, v3.4s
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x i32>, ptr %a
%op2 = load <8 x i32>, ptr %b
%res = add <8 x i32> %op1, %op2
@@ -160,6 +228,11 @@ define <1 x i64> @add_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
; CHECK-NEXT: add z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: add_v1i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: add d0, d0, d1
+; NONEON-NOSVE-NEXT: ret
%res = add <1 x i64> %op1, %op2
ret <1 x i64> %res
}
@@ -172,6 +245,11 @@ define <2 x i64> @add_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
; CHECK-NEXT: add z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: add_v2i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: add v0.2d, v0.2d, v1.2d
+; NONEON-NOSVE-NEXT: ret
%res = add <2 x i64> %op1, %op2
ret <2 x i64> %res
}
@@ -185,6 +263,15 @@ define void @add_v4i64(ptr %a, ptr %b) {
; CHECK-NEXT: add z1.d, z2.d, z3.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: add_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: add v0.2d, v1.2d, v0.2d
+; NONEON-NOSVE-NEXT: add v1.2d, v2.2d, v3.2d
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x i64>, ptr %a
%op2 = load <4 x i64>, ptr %b
%res = add <4 x i64> %op1, %op2
@@ -213,6 +300,11 @@ define <4 x i8> @mul_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
; SVE2-NEXT: mul z0.h, z0.h, z1.h
; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0
; SVE2-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: mul_v4i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mul v0.4h, v0.4h, v1.4h
+; NONEON-NOSVE-NEXT: ret
%res = mul <4 x i8> %op1, %op2
ret <4 x i8> %res
}
@@ -234,6 +326,11 @@ define <8 x i8> @mul_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
; SVE2-NEXT: mul z0.b, z0.b, z1.b
; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0
; SVE2-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: mul_v8i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mul v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: ret
%res = mul <8 x i8> %op1, %op2
ret <8 x i8> %res
}
@@ -255,6 +352,11 @@ define <16 x i8> @mul_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
; SVE2-NEXT: mul z0.b, z0.b, z1.b
; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0
; SVE2-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: mul_v16i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mul v0.16b, v0.16b, v1.16b
+; NONEON-NOSVE-NEXT: ret
%res = mul <16 x i8> %op1, %op2
ret <16 x i8> %res
}
@@ -279,6 +381,15 @@ define void @mul_v32i8(ptr %a, ptr %b) {
; SVE2-NEXT: mul z1.b, z2.b, z3.b
; SVE2-NEXT: stp q0, q1, [x0]
; SVE2-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: mul_v32i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: mul v0.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: mul v1.16b, v2.16b, v3.16b
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <32 x i8>, ptr %a
%op2 = load <32 x i8>, ptr %b
%res = mul <32 x i8> %op1, %op2
@@ -303,6 +414,11 @@ define <2 x i16> @mul_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
; SVE2-NEXT: mul z0.s, z0.s, z1.s
; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0
; SVE2-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: mul_v2i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mul v0.2s, v0.2s, v1.2s
+; NONEON-NOSVE-NEXT: ret
%res = mul <2 x i16> %op1, %op2
ret <2 x i16> %res
}
@@ -324,6 +440,11 @@ define <4 x i16> @mul_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
; SVE2-NEXT: mul z0.h, z0.h, z1.h
; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0
; SVE2-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: mul_v4i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mul v0.4h, v0.4h, v1.4h
+; NONEON-NOSVE-NEXT: ret
%res = mul <4 x i16> %op1, %op2
ret <4 x i16> %res
}
@@ -345,6 +466,11 @@ define <8 x i16> @mul_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
; SVE2-NEXT: mul z0.h, z0.h, z1.h
; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0
; SVE2-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: mul_v8i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mul v0.8h, v0.8h, v1.8h
+; NONEON-NOSVE-NEXT: ret
%res = mul <8 x i16> %op1, %op2
ret <8 x i16> %res
}
@@ -369,6 +495,15 @@ define void @mul_v16i16(ptr %a, ptr %b) {
; SVE2-NEXT: mul z1.h, z2.h, z3.h
; SVE2-NEXT: stp q0, q1, [x0]
; SVE2-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: mul_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: mul v0.8h, v1.8h, v0.8h
+; NONEON-NOSVE-NEXT: mul v1.8h, v2.8h, v3.8h
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x i16>, ptr %a
%op2 = load <16 x i16>, ptr %b
%res = mul <16 x i16> %op1, %op2
@@ -393,6 +528,11 @@ define <2 x i32> @mul_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
; SVE2-NEXT: mul z0.s, z0.s, z1.s
; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0
; SVE2-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: mul_v2i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mul v0.2s, v0.2s, v1.2s
+; NONEON-NOSVE-NEXT: ret
%res = mul <2 x i32> %op1, %op2
ret <2 x i32> %res
}
@@ -414,6 +554,11 @@ define <4 x i32> @mul_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
; SVE2-NEXT: mul z0.s, z0.s, z1.s
; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0
; SVE2-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: mul_v4i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mul v0.4s, v0.4s, v1.4s
+; NONEON-NOSVE-NEXT: ret
%res = mul <4 x i32> %op1, %op2
ret <4 x i32> %res
}
@@ -438,6 +583,15 @@ define void @mul_v8i32(ptr %a, ptr %b) {
; SVE2-NEXT: mul z1.s, z2.s, z3.s
; SVE2-NEXT: stp q0, q1, [x0]
; SVE2-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: mul_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: mul v0.4s, v1.4s, v0.4s
+; NONEON-NOSVE-NEXT: mul v1.4s, v2.4s, v3.4s
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x i32>, ptr %a
%op2 = load <8 x i32>, ptr %b
%res = mul <8 x i32> %op1, %op2
@@ -462,6 +616,16 @@ define <1 x i64> @mul_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
; SVE2-NEXT: mul z0.d, z0.d, z1.d
; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0
; SVE2-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: mul_v1i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; NONEON-NOSVE-NEXT: fmov x8, d1
+; NONEON-NOSVE-NEXT: fmov x9, d0
+; NONEON-NOSVE-NEXT: mul x8, x9, x8
+; NONEON-NOSVE-NEXT: fmov d0, x8
+; NONEON-NOSVE-NEXT: ret
%res = mul <1 x i64> %op1, %op2
ret <1 x i64> %res
}
@@ -483,6 +647,18 @@ define <2 x i64> @mul_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
; SVE2-NEXT: mul z0.d, z0.d, z1.d
; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0
; SVE2-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: mul_v2i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fmov x10, d1
+; NONEON-NOSVE-NEXT: fmov x11, d0
+; NONEON-NOSVE-NEXT: mov x8, v1.d[1]
+; NONEON-NOSVE-NEXT: mov x9, v0.d[1]
+; NONEON-NOSVE-NEXT: mul x10, x11, x10
+; NONEON-NOSVE-NEXT: mul x8, x9, x8
+; NONEON-NOSVE-NEXT: fmov d0, x10
+; NONEON-NOSVE-NEXT: mov v0.d[1], x8
+; NONEON-NOSVE-NEXT: ret
%res = mul <2 x i64> %op1, %op2
ret <2 x i64> %res
}
@@ -507,6 +683,29 @@ define void @mul_v4i64(ptr %a, ptr %b) {
; SVE2-NEXT: mul z1.d, z2.d, z3.d
; SVE2-NEXT: stp q0, q1, [x0]
; SVE2-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: mul_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q2, q0, [x0]
+; NONEON-NOSVE-NEXT: ldp q3, q1, [x1]
+; NONEON-NOSVE-NEXT: fmov x9, d0
+; NONEON-NOSVE-NEXT: fmov x12, d2
+; NONEON-NOSVE-NEXT: mov x11, v2.d[1]
+; NONEON-NOSVE-NEXT: fmov x8, d1
+; NONEON-NOSVE-NEXT: mov x10, v3.d[1]
+; NONEON-NOSVE-NEXT: mov x13, v1.d[1]
+; NONEON-NOSVE-NEXT: mov x14, v0.d[1]
+; NONEON-NOSVE-NEXT: mul x8, x9, x8
+; NONEON-NOSVE-NEXT: fmov x9, d3
+; NONEON-NOSVE-NEXT: mul x10, x11, x10
+; NONEON-NOSVE-NEXT: mul x9, x12, x9
+; NONEON-NOSVE-NEXT: fmov d1, x8
+; NONEON-NOSVE-NEXT: mul x11, x14, x13
+; NONEON-NOSVE-NEXT: fmov d0, x9
+; NONEON-NOSVE-NEXT: mov v1.d[1], x11
+; NONEON-NOSVE-NEXT: mov v0.d[1], x10
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x i64>, ptr %a
%op2 = load <4 x i64>, ptr %b
%res = mul <4 x i64> %op1, %op2
@@ -526,6 +725,11 @@ define <4 x i8> @sub_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
; CHECK-NEXT: sub z0.h, z0.h, z1.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sub_v4i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: sub v0.4h, v0.4h, v1.4h
+; NONEON-NOSVE-NEXT: ret
%res = sub <4 x i8> %op1, %op2
ret <4 x i8> %res
}
@@ -538,6 +742,11 @@ define <8 x i8> @sub_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
; CHECK-NEXT: sub z0.b, z0.b, z1.b
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sub_v8i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: sub v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: ret
%res = sub <8 x i8> %op1, %op2
ret <8 x i8> %res
}
@@ -550,6 +759,11 @@ define <16 x i8> @sub_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
; CHECK-NEXT: sub z0.b, z0.b, z1.b
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sub_v16i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: sub v0.16b, v0.16b, v1.16b
+; NONEON-NOSVE-NEXT: ret
%res = sub <16 x i8> %op1, %op2
ret <16 x i8> %res
}
@@ -563,6 +777,15 @@ define void @sub_v32i8(ptr %a, ptr %b) {
; CHECK-NEXT: sub z1.b, z2.b, z3.b
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sub_v32i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: sub v0.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: sub v1.16b, v2.16b, v3.16b
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <32 x i8>, ptr %a
%op2 = load <32 x i8>, ptr %b
%res = sub <32 x i8> %op1, %op2
@@ -578,6 +801,11 @@ define <2 x i16> @sub_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
; CHECK-NEXT: sub z0.s, z0.s, z1.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sub_v2i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: sub v0.2s, v0.2s, v1.2s
+; NONEON-NOSVE-NEXT: ret
%res = sub <2 x i16> %op1, %op2
ret <2 x i16> %res
}
@@ -590,6 +818,11 @@ define <4 x i16> @sub_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
; CHECK-NEXT: sub z0.h, z0.h, z1.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sub_v4i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: sub v0.4h, v0.4h, v1.4h
+; NONEON-NOSVE-NEXT: ret
%res = sub <4 x i16> %op1, %op2
ret <4 x i16> %res
}
@@ -602,6 +835,11 @@ define <8 x i16> @sub_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
; CHECK-NEXT: sub z0.h, z0.h, z1.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sub_v8i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: sub v0.8h, v0.8h, v1.8h
+; NONEON-NOSVE-NEXT: ret
%res = sub <8 x i16> %op1, %op2
ret <8 x i16> %res
}
@@ -615,6 +853,15 @@ define void @sub_v16i16(ptr %a, ptr %b) {
; CHECK-NEXT: sub z1.h, z2.h, z3.h
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sub_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: sub v0.8h, v1.8h, v0.8h
+; NONEON-NOSVE-NEXT: sub v1.8h, v2.8h, v3.8h
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x i16>, ptr %a
%op2 = load <16 x i16>, ptr %b
%res = sub <16 x i16> %op1, %op2
@@ -630,6 +877,11 @@ define <2 x i32> @sub_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
; CHECK-NEXT: sub z0.s, z0.s, z1.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sub_v2i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: sub v0.2s, v0.2s, v1.2s
+; NONEON-NOSVE-NEXT: ret
%res = sub <2 x i32> %op1, %op2
ret <2 x i32> %res
}
@@ -642,6 +894,11 @@ define <4 x i32> @sub_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
; CHECK-NEXT: sub z0.s, z0.s, z1.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sub_v4i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: sub v0.4s, v0.4s, v1.4s
+; NONEON-NOSVE-NEXT: ret
%res = sub <4 x i32> %op1, %op2
ret <4 x i32> %res
}
@@ -655,6 +912,15 @@ define void @sub_v8i32(ptr %a, ptr %b) {
; CHECK-NEXT: sub z1.s, z2.s, z3.s
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sub_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: sub v0.4s, v1.4s, v0.4s
+; NONEON-NOSVE-NEXT: sub v1.4s, v2.4s, v3.4s
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x i32>, ptr %a
%op2 = load <8 x i32>, ptr %b
%res = sub <8 x i32> %op1, %op2
@@ -670,6 +936,11 @@ define <1 x i64> @sub_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
; CHECK-NEXT: sub z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sub_v1i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: sub d0, d0, d1
+; NONEON-NOSVE-NEXT: ret
%res = sub <1 x i64> %op1, %op2
ret <1 x i64> %res
}
@@ -682,6 +953,11 @@ define <2 x i64> @sub_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
; CHECK-NEXT: sub z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sub_v2i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: sub v0.2d, v0.2d, v1.2d
+; NONEON-NOSVE-NEXT: ret
%res = sub <2 x i64> %op1, %op2
ret <2 x i64> %res
}
@@ -695,6 +971,15 @@ define void @sub_v4i64(ptr %a, ptr %b) {
; CHECK-NEXT: sub z1.d, z2.d, z3.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sub_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: sub v0.2d, v1.2d, v0.2d
+; NONEON-NOSVE-NEXT: sub v1.2d, v2.2d, v3.2d
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x i64>, ptr %a
%op2 = load <4 x i64>, ptr %b
%res = sub <4 x i64> %op1, %op2
@@ -715,6 +1000,13 @@ define <4 x i8> @abs_v4i8(<4 x i8> %op1) {
; CHECK-NEXT: abs z0.h, p0/m, z0.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: abs_v4i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: shl v0.4h, v0.4h, #8
+; NONEON-NOSVE-NEXT: sshr v0.4h, v0.4h, #8
+; NONEON-NOSVE-NEXT: abs v0.4h, v0.4h
+; NONEON-NOSVE-NEXT: ret
%res = call <4 x i8> @llvm.abs.v4i8(<4 x i8> %op1, i1 false)
ret <4 x i8> %res
}
@@ -727,6 +1019,11 @@ define <8 x i8> @abs_v8i8(<8 x i8> %op1) {
; CHECK-NEXT: abs z0.b, p0/m, z0.b
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: abs_v8i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: abs v0.8b, v0.8b
+; NONEON-NOSVE-NEXT: ret
%res = call <8 x i8> @llvm.abs.v8i8(<8 x i8> %op1, i1 false)
ret <8 x i8> %res
}
@@ -739,6 +1036,11 @@ define <16 x i8> @abs_v16i8(<16 x i8> %op1) {
; CHECK-NEXT: abs z0.b, p0/m, z0.b
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: abs_v16i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: abs v0.16b, v0.16b
+; NONEON-NOSVE-NEXT: ret
%res = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %op1, i1 false)
ret <16 x i8> %res
}
@@ -752,6 +1054,14 @@ define void @abs_v32i8(ptr %a) {
; CHECK-NEXT: abs z1.b, p0/m, z1.b
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: abs_v32i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: abs v0.16b, v0.16b
+; NONEON-NOSVE-NEXT: abs v1.16b, v1.16b
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <32 x i8>, ptr %a
%res = call <32 x i8> @llvm.abs.v32i8(<32 x i8> %op1, i1 false)
store <32 x i8> %res, ptr %a
@@ -767,6 +1077,13 @@ define <2 x i16> @abs_v2i16(<2 x i16> %op1) {
; CHECK-NEXT: abs z0.s, p0/m, z0.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: abs_v2i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: shl v0.2s, v0.2s, #16
+; NONEON-NOSVE-NEXT: sshr v0.2s, v0.2s, #16
+; NONEON-NOSVE-NEXT: abs v0.2s, v0.2s
+; NONEON-NOSVE-NEXT: ret
%res = call <2 x i16> @llvm.abs.v2i16(<2 x i16> %op1, i1 false)
ret <2 x i16> %res
}
@@ -779,6 +1096,11 @@ define <4 x i16> @abs_v4i16(<4 x i16> %op1) {
; CHECK-NEXT: abs z0.h, p0/m, z0.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: abs_v4i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: abs v0.4h, v0.4h
+; NONEON-NOSVE-NEXT: ret
%res = call <4 x i16> @llvm.abs.v4i16(<4 x i16> %op1, i1 false)
ret <4 x i16> %res
}
@@ -791,6 +1113,11 @@ define <8 x i16> @abs_v8i16(<8 x i16> %op1) {
; CHECK-NEXT: abs z0.h, p0/m, z0.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: abs_v8i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: abs v0.8h, v0.8h
+; NONEON-NOSVE-NEXT: ret
%res = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %op1, i1 false)
ret <8 x i16> %res
}
@@ -804,6 +1131,14 @@ define void @abs_v16i16(ptr %a) {
; CHECK-NEXT: abs z1.h, p0/m, z1.h
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: abs_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: abs v0.8h, v0.8h
+; NONEON-NOSVE-NEXT: abs v1.8h, v1.8h
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x i16>, ptr %a
%res = call <16 x i16> @llvm.abs.v16i16(<16 x i16> %op1, i1 false)
store <16 x i16> %res, ptr %a
@@ -818,6 +1153,11 @@ define <2 x i32> @abs_v2i32(<2 x i32> %op1) {
; CHECK-NEXT: abs z0.s, p0/m, z0.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: abs_v2i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: abs v0.2s, v0.2s
+; NONEON-NOSVE-NEXT: ret
%res = call <2 x i32> @llvm.abs.v2i32(<2 x i32> %op1, i1 false)
ret <2 x i32> %res
}
@@ -830,6 +1170,11 @@ define <4 x i32> @abs_v4i32(<4 x i32> %op1) {
; CHECK-NEXT: abs z0.s, p0/m, z0.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: abs_v4i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: abs v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: ret
%res = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %op1, i1 false)
ret <4 x i32> %res
}
@@ -843,6 +1188,14 @@ define void @abs_v8i32(ptr %a) {
; CHECK-NEXT: abs z1.s, p0/m, z1.s
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: abs_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: abs v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: abs v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x i32>, ptr %a
%res = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %op1, i1 false)
store <8 x i32> %res, ptr %a
@@ -857,6 +1210,11 @@ define <1 x i64> @abs_v1i64(<1 x i64> %op1) {
; CHECK-NEXT: abs z0.d, p0/m, z0.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: abs_v1i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: abs d0, d0
+; NONEON-NOSVE-NEXT: ret
%res = call <1 x i64> @llvm.abs.v1i64(<1 x i64> %op1, i1 false)
ret <1 x i64> %res
}
@@ -869,6 +1227,11 @@ define <2 x i64> @abs_v2i64(<2 x i64> %op1) {
; CHECK-NEXT: abs z0.d, p0/m, z0.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: abs_v2i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: abs v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: ret
%res = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %op1, i1 false)
ret <2 x i64> %res
}
@@ -882,6 +1245,14 @@ define void @abs_v4i64(ptr %a) {
; CHECK-NEXT: abs z1.d, p0/m, z1.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: abs_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: abs v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: abs v1.2d, v1.2d
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x i64>, ptr %a
%res = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %op1, i1 false)
store <4 x i64> %res, ptr %a
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-compares.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-compares.ll
index c2f3bbfb51dd5..ee0ca0e60b5e5 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-compares.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-compares.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
@@ -18,6 +19,11 @@ define <8 x i8> @icmp_eq_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: icmp_eq_v8i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: cmeq v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: ret
%cmp = icmp eq <8 x i8> %op1, %op2
%sext = sext <8 x i1> %cmp to <8 x i8>
ret <8 x i8> %sext
@@ -33,6 +39,11 @@ define <16 x i8> @icmp_eq_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: icmp_eq_v16i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: cmeq v0.16b, v0.16b, v1.16b
+; NONEON-NOSVE-NEXT: ret
%cmp = icmp eq <16 x i8> %op1, %op2
%sext = sext <16 x i1> %cmp to <16 x i8>
ret <16 x i8> %sext
@@ -50,6 +61,15 @@ define void @icmp_eq_v32i8(ptr %a, ptr %b) {
; CHECK-NEXT: mov z1.b, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: icmp_eq_v32i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: cmeq v0.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: cmeq v1.16b, v2.16b, v3.16b
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <32 x i8>, ptr %a
%op2 = load <32 x i8>, ptr %b
%cmp = icmp eq <32 x i8> %op1, %op2
@@ -68,6 +88,11 @@ define <4 x i16> @icmp_eq_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
; CHECK-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: icmp_eq_v4i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: cmeq v0.4h, v0.4h, v1.4h
+; NONEON-NOSVE-NEXT: ret
%cmp = icmp eq <4 x i16> %op1, %op2
%sext = sext <4 x i1> %cmp to <4 x i16>
ret <4 x i16> %sext
@@ -83,6 +108,11 @@ define <8 x i16> @icmp_eq_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
; CHECK-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: icmp_eq_v8i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: cmeq v0.8h, v0.8h, v1.8h
+; NONEON-NOSVE-NEXT: ret
%cmp = icmp eq <8 x i16> %op1, %op2
%sext = sext <8 x i1> %cmp to <8 x i16>
ret <8 x i16> %sext
@@ -100,6 +130,15 @@ define void @icmp_eq_v16i16(ptr %a, ptr %b) {
; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: icmp_eq_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: cmeq v0.8h, v1.8h, v0.8h
+; NONEON-NOSVE-NEXT: cmeq v1.8h, v2.8h, v3.8h
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x i16>, ptr %a
%op2 = load <16 x i16>, ptr %b
%cmp = icmp eq <16 x i16> %op1, %op2
@@ -118,6 +157,11 @@ define <2 x i32> @icmp_eq_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: icmp_eq_v2i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: cmeq v0.2s, v0.2s, v1.2s
+; NONEON-NOSVE-NEXT: ret
%cmp = icmp eq <2 x i32> %op1, %op2
%sext = sext <2 x i1> %cmp to <2 x i32>
ret <2 x i32> %sext
@@ -133,6 +177,11 @@ define <4 x i32> @icmp_eq_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: icmp_eq_v4i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: cmeq v0.4s, v0.4s, v1.4s
+; NONEON-NOSVE-NEXT: ret
%cmp = icmp eq <4 x i32> %op1, %op2
%sext = sext <4 x i1> %cmp to <4 x i32>
ret <4 x i32> %sext
@@ -150,6 +199,15 @@ define void @icmp_eq_v8i32(ptr %a, ptr %b) {
; CHECK-NEXT: mov z1.s, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: icmp_eq_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: cmeq v0.4s, v1.4s, v0.4s
+; NONEON-NOSVE-NEXT: cmeq v1.4s, v2.4s, v3.4s
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x i32>, ptr %a
%op2 = load <8 x i32>, ptr %b
%cmp = icmp eq <8 x i32> %op1, %op2
@@ -168,6 +226,11 @@ define <1 x i64> @icmp_eq_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
; CHECK-NEXT: mov z0.d, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: icmp_eq_v1i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: cmeq d0, d0, d1
+; NONEON-NOSVE-NEXT: ret
%cmp = icmp eq <1 x i64> %op1, %op2
%sext = sext <1 x i1> %cmp to <1 x i64>
ret <1 x i64> %sext
@@ -183,6 +246,11 @@ define <2 x i64> @icmp_eq_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
; CHECK-NEXT: mov z0.d, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: icmp_eq_v2i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: cmeq v0.2d, v0.2d, v1.2d
+; NONEON-NOSVE-NEXT: ret
%cmp = icmp eq <2 x i64> %op1, %op2
%sext = sext <2 x i1> %cmp to <2 x i64>
ret <2 x i64> %sext
@@ -200,6 +268,15 @@ define void @icmp_eq_v4i64(ptr %a, ptr %b) {
; CHECK-NEXT: mov z1.d, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: icmp_eq_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: cmeq v0.2d, v1.2d, v0.2d
+; NONEON-NOSVE-NEXT: cmeq v1.2d, v2.2d, v3.2d
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x i64>, ptr %a
%op2 = load <4 x i64>, ptr %b
%cmp = icmp eq <4 x i64> %op1, %op2
@@ -224,6 +301,17 @@ define void @icmp_ne_v32i8(ptr %a, ptr %b) {
; CHECK-NEXT: mov z1.b, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: icmp_ne_v32i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: cmeq v0.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: cmeq v1.16b, v2.16b, v3.16b
+; NONEON-NOSVE-NEXT: mvn v0.16b, v0.16b
+; NONEON-NOSVE-NEXT: mvn v1.16b, v1.16b
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <32 x i8>, ptr %a
%op2 = load <32 x i8>, ptr %b
%cmp = icmp ne <32 x i8> %op1, %op2
@@ -246,6 +334,14 @@ define void @icmp_sge_v8i16(ptr %a, ptr %b) {
; CHECK-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: icmp_sge_v8i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: ldr q1, [x1]
+; NONEON-NOSVE-NEXT: cmge v0.8h, v0.8h, v1.8h
+; NONEON-NOSVE-NEXT: str q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x i16>, ptr %a
%op2 = load <8 x i16>, ptr %b
%cmp = icmp sge <8 x i16> %op1, %op2
@@ -270,6 +366,15 @@ define void @icmp_sgt_v16i16(ptr %a, ptr %b) {
; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: icmp_sgt_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: cmgt v0.8h, v1.8h, v0.8h
+; NONEON-NOSVE-NEXT: cmgt v1.8h, v2.8h, v3.8h
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x i16>, ptr %a
%op2 = load <16 x i16>, ptr %b
%cmp = icmp sgt <16 x i16> %op1, %op2
@@ -292,6 +397,14 @@ define void @icmp_sle_v4i32(ptr %a, ptr %b) {
; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: icmp_sle_v4i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: ldr q1, [x1]
+; NONEON-NOSVE-NEXT: cmge v0.4s, v1.4s, v0.4s
+; NONEON-NOSVE-NEXT: str q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x i32>, ptr %a
%op2 = load <4 x i32>, ptr %b
%cmp = icmp sle <4 x i32> %op1, %op2
@@ -316,6 +429,15 @@ define void @icmp_slt_v8i32(ptr %a, ptr %b) {
; CHECK-NEXT: mov z1.s, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: icmp_slt_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: cmgt v0.4s, v0.4s, v1.4s
+; NONEON-NOSVE-NEXT: cmgt v1.4s, v3.4s, v2.4s
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x i32>, ptr %a
%op2 = load <8 x i32>, ptr %b
%cmp = icmp slt <8 x i32> %op1, %op2
@@ -338,6 +460,14 @@ define void @icmp_uge_v2i64(ptr %a, ptr %b) {
; CHECK-NEXT: mov z0.d, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: icmp_uge_v2i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: ldr q1, [x1]
+; NONEON-NOSVE-NEXT: cmhs v0.2d, v0.2d, v1.2d
+; NONEON-NOSVE-NEXT: str q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <2 x i64>, ptr %a
%op2 = load <2 x i64>, ptr %b
%cmp = icmp uge <2 x i64> %op1, %op2
@@ -360,6 +490,14 @@ define void @icmp_ugt_v2i64(ptr %a, ptr %b) {
; CHECK-NEXT: mov z0.d, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: icmp_ugt_v2i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: ldr q1, [x1]
+; NONEON-NOSVE-NEXT: cmhi v0.2d, v0.2d, v1.2d
+; NONEON-NOSVE-NEXT: str q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <2 x i64>, ptr %a
%op2 = load <2 x i64>, ptr %b
%cmp = icmp ugt <2 x i64> %op1, %op2
@@ -382,6 +520,14 @@ define void @icmp_ule_v2i64(ptr %a, ptr %b) {
; CHECK-NEXT: mov z0.d, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: icmp_ule_v2i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: ldr q1, [x1]
+; NONEON-NOSVE-NEXT: cmhs v0.2d, v1.2d, v0.2d
+; NONEON-NOSVE-NEXT: str q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <2 x i64>, ptr %a
%op2 = load <2 x i64>, ptr %b
%cmp = icmp ule <2 x i64> %op1, %op2
@@ -404,6 +550,14 @@ define void @icmp_ult_v2i64(ptr %a, ptr %b) {
; CHECK-NEXT: mov z0.d, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: icmp_ult_v2i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: ldr q1, [x1]
+; NONEON-NOSVE-NEXT: cmhi v0.2d, v1.2d, v0.2d
+; NONEON-NOSVE-NEXT: str q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <2 x i64>, ptr %a
%op2 = load <2 x i64>, ptr %b
%cmp = icmp ult <2 x i64> %op1, %op2
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll
index e6fd775b4cfb9..d79d6c18ed5a6 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll
@@ -2,6 +2,7 @@
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE
; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE2
; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE2
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
@@ -24,6 +25,31 @@ define <4 x i8> @sdiv_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sdiv_v4i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: shl v0.4h, v0.4h, #8
+; NONEON-NOSVE-NEXT: shl v1.4h, v1.4h, #8
+; NONEON-NOSVE-NEXT: sshr v0.4h, v0.4h, #8
+; NONEON-NOSVE-NEXT: sshr v1.4h, v1.4h, #8
+; NONEON-NOSVE-NEXT: smov w8, v1.h[1]
+; NONEON-NOSVE-NEXT: smov w9, v0.h[1]
+; NONEON-NOSVE-NEXT: smov w10, v0.h[0]
+; NONEON-NOSVE-NEXT: smov w11, v0.h[2]
+; NONEON-NOSVE-NEXT: smov w12, v0.h[3]
+; NONEON-NOSVE-NEXT: sdiv w8, w9, w8
+; NONEON-NOSVE-NEXT: smov w9, v1.h[0]
+; NONEON-NOSVE-NEXT: sdiv w9, w10, w9
+; NONEON-NOSVE-NEXT: smov w10, v1.h[2]
+; NONEON-NOSVE-NEXT: sdiv w10, w11, w10
+; NONEON-NOSVE-NEXT: smov w11, v1.h[3]
+; NONEON-NOSVE-NEXT: fmov s0, w9
+; NONEON-NOSVE-NEXT: mov v0.h[1], w8
+; NONEON-NOSVE-NEXT: sdiv w8, w12, w11
+; NONEON-NOSVE-NEXT: mov v0.h[2], w10
+; NONEON-NOSVE-NEXT: mov v0.h[3], w8
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0
+; NONEON-NOSVE-NEXT: ret
%res = sdiv <4 x i8> %op1, %op2
ret <4 x i8> %res
}
@@ -51,6 +77,45 @@ define <8 x i8> @sdiv_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
; CHECK-NEXT: uzp1 z0.b, z1.b, z1.b
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sdiv_v8i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; NONEON-NOSVE-NEXT: smov w8, v1.b[1]
+; NONEON-NOSVE-NEXT: smov w9, v0.b[1]
+; NONEON-NOSVE-NEXT: smov w10, v0.b[0]
+; NONEON-NOSVE-NEXT: smov w11, v0.b[2]
+; NONEON-NOSVE-NEXT: smov w12, v0.b[3]
+; NONEON-NOSVE-NEXT: smov w13, v0.b[4]
+; NONEON-NOSVE-NEXT: smov w14, v0.b[5]
+; NONEON-NOSVE-NEXT: sdiv w8, w9, w8
+; NONEON-NOSVE-NEXT: smov w9, v1.b[0]
+; NONEON-NOSVE-NEXT: sdiv w9, w10, w9
+; NONEON-NOSVE-NEXT: smov w10, v1.b[2]
+; NONEON-NOSVE-NEXT: sdiv w10, w11, w10
+; NONEON-NOSVE-NEXT: smov w11, v1.b[3]
+; NONEON-NOSVE-NEXT: fmov s2, w9
+; NONEON-NOSVE-NEXT: smov w9, v1.b[6]
+; NONEON-NOSVE-NEXT: mov v2.b[1], w8
+; NONEON-NOSVE-NEXT: sdiv w11, w12, w11
+; NONEON-NOSVE-NEXT: smov w12, v1.b[4]
+; NONEON-NOSVE-NEXT: mov v2.b[2], w10
+; NONEON-NOSVE-NEXT: smov w10, v0.b[6]
+; NONEON-NOSVE-NEXT: sdiv w12, w13, w12
+; NONEON-NOSVE-NEXT: smov w13, v1.b[5]
+; NONEON-NOSVE-NEXT: mov v2.b[3], w11
+; NONEON-NOSVE-NEXT: smov w11, v0.b[7]
+; NONEON-NOSVE-NEXT: sdiv w8, w14, w13
+; NONEON-NOSVE-NEXT: mov v2.b[4], w12
+; NONEON-NOSVE-NEXT: sdiv w9, w10, w9
+; NONEON-NOSVE-NEXT: smov w10, v1.b[7]
+; NONEON-NOSVE-NEXT: mov v2.b[5], w8
+; NONEON-NOSVE-NEXT: sdiv w8, w11, w10
+; NONEON-NOSVE-NEXT: mov v2.b[6], w9
+; NONEON-NOSVE-NEXT: mov v2.b[7], w8
+; NONEON-NOSVE-NEXT: fmov d0, d2
+; NONEON-NOSVE-NEXT: ret
%res = sdiv <8 x i8> %op1, %op2
ret <8 x i8> %res
}
@@ -98,6 +163,75 @@ define <16 x i8> @sdiv_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sdiv_v16i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: smov w8, v1.b[1]
+; NONEON-NOSVE-NEXT: smov w9, v0.b[1]
+; NONEON-NOSVE-NEXT: smov w10, v0.b[0]
+; NONEON-NOSVE-NEXT: smov w11, v0.b[2]
+; NONEON-NOSVE-NEXT: smov w12, v0.b[3]
+; NONEON-NOSVE-NEXT: smov w13, v0.b[4]
+; NONEON-NOSVE-NEXT: smov w14, v0.b[5]
+; NONEON-NOSVE-NEXT: smov w15, v0.b[6]
+; NONEON-NOSVE-NEXT: smov w16, v0.b[7]
+; NONEON-NOSVE-NEXT: smov w17, v0.b[8]
+; NONEON-NOSVE-NEXT: smov w18, v0.b[9]
+; NONEON-NOSVE-NEXT: sdiv w8, w9, w8
+; NONEON-NOSVE-NEXT: smov w9, v1.b[0]
+; NONEON-NOSVE-NEXT: sdiv w9, w10, w9
+; NONEON-NOSVE-NEXT: smov w10, v1.b[2]
+; NONEON-NOSVE-NEXT: sdiv w10, w11, w10
+; NONEON-NOSVE-NEXT: smov w11, v1.b[3]
+; NONEON-NOSVE-NEXT: fmov s2, w9
+; NONEON-NOSVE-NEXT: smov w9, v1.b[10]
+; NONEON-NOSVE-NEXT: mov v2.b[1], w8
+; NONEON-NOSVE-NEXT: sdiv w11, w12, w11
+; NONEON-NOSVE-NEXT: smov w12, v1.b[4]
+; NONEON-NOSVE-NEXT: mov v2.b[2], w10
+; NONEON-NOSVE-NEXT: smov w10, v0.b[10]
+; NONEON-NOSVE-NEXT: sdiv w12, w13, w12
+; NONEON-NOSVE-NEXT: smov w13, v1.b[5]
+; NONEON-NOSVE-NEXT: mov v2.b[3], w11
+; NONEON-NOSVE-NEXT: smov w11, v0.b[11]
+; NONEON-NOSVE-NEXT: sdiv w13, w14, w13
+; NONEON-NOSVE-NEXT: smov w14, v1.b[6]
+; NONEON-NOSVE-NEXT: mov v2.b[4], w12
+; NONEON-NOSVE-NEXT: smov w12, v0.b[12]
+; NONEON-NOSVE-NEXT: sdiv w14, w15, w14
+; NONEON-NOSVE-NEXT: smov w15, v1.b[7]
+; NONEON-NOSVE-NEXT: mov v2.b[5], w13
+; NONEON-NOSVE-NEXT: smov w13, v0.b[13]
+; NONEON-NOSVE-NEXT: sdiv w15, w16, w15
+; NONEON-NOSVE-NEXT: smov w16, v1.b[8]
+; NONEON-NOSVE-NEXT: mov v2.b[6], w14
+; NONEON-NOSVE-NEXT: sdiv w16, w17, w16
+; NONEON-NOSVE-NEXT: smov w17, v1.b[9]
+; NONEON-NOSVE-NEXT: mov v2.b[7], w15
+; NONEON-NOSVE-NEXT: sdiv w8, w18, w17
+; NONEON-NOSVE-NEXT: mov v2.b[8], w16
+; NONEON-NOSVE-NEXT: sdiv w9, w10, w9
+; NONEON-NOSVE-NEXT: smov w10, v1.b[11]
+; NONEON-NOSVE-NEXT: mov v2.b[9], w8
+; NONEON-NOSVE-NEXT: sdiv w10, w11, w10
+; NONEON-NOSVE-NEXT: smov w11, v1.b[12]
+; NONEON-NOSVE-NEXT: mov v2.b[10], w9
+; NONEON-NOSVE-NEXT: smov w9, v1.b[14]
+; NONEON-NOSVE-NEXT: sdiv w11, w12, w11
+; NONEON-NOSVE-NEXT: smov w12, v1.b[13]
+; NONEON-NOSVE-NEXT: mov v2.b[11], w10
+; NONEON-NOSVE-NEXT: smov w10, v1.b[15]
+; NONEON-NOSVE-NEXT: sdiv w8, w13, w12
+; NONEON-NOSVE-NEXT: smov w12, v0.b[14]
+; NONEON-NOSVE-NEXT: mov v2.b[12], w11
+; NONEON-NOSVE-NEXT: smov w11, v0.b[15]
+; NONEON-NOSVE-NEXT: sdiv w9, w12, w9
+; NONEON-NOSVE-NEXT: mov v2.b[13], w8
+; NONEON-NOSVE-NEXT: sdiv w8, w11, w10
+; NONEON-NOSVE-NEXT: mov v2.b[14], w9
+; NONEON-NOSVE-NEXT: mov v2.b[15], w8
+; NONEON-NOSVE-NEXT: mov v0.16b, v2.16b
+; NONEON-NOSVE-NEXT: ret
%res = sdiv <16 x i8> %op1, %op2
ret <16 x i8> %res
}
@@ -178,6 +312,163 @@ define void @sdiv_v32i8(ptr %a, ptr %b) {
; CHECK-NEXT: splice z3.b, p0, z3.b, z1.b
; CHECK-NEXT: stp q3, q2, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sdiv_v32i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: str x27, [sp, #-80]! // 8-byte Folded Spill
+; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #16] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #32] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80
+; NONEON-NOSVE-NEXT: .cfi_offset w19, -8
+; NONEON-NOSVE-NEXT: .cfi_offset w20, -16
+; NONEON-NOSVE-NEXT: .cfi_offset w21, -24
+; NONEON-NOSVE-NEXT: .cfi_offset w22, -32
+; NONEON-NOSVE-NEXT: .cfi_offset w23, -40
+; NONEON-NOSVE-NEXT: .cfi_offset w24, -48
+; NONEON-NOSVE-NEXT: .cfi_offset w25, -56
+; NONEON-NOSVE-NEXT: .cfi_offset w26, -64
+; NONEON-NOSVE-NEXT: .cfi_offset w27, -80
+; NONEON-NOSVE-NEXT: ldr q0, [x0, #16]
+; NONEON-NOSVE-NEXT: ldr q1, [x1, #16]
+; NONEON-NOSVE-NEXT: ldr q2, [x0]
+; NONEON-NOSVE-NEXT: ldr q3, [x1]
+; NONEON-NOSVE-NEXT: smov w8, v1.b[1]
+; NONEON-NOSVE-NEXT: smov w9, v0.b[1]
+; NONEON-NOSVE-NEXT: smov w10, v0.b[0]
+; NONEON-NOSVE-NEXT: smov w11, v0.b[2]
+; NONEON-NOSVE-NEXT: smov w12, v0.b[3]
+; NONEON-NOSVE-NEXT: smov w13, v0.b[4]
+; NONEON-NOSVE-NEXT: smov w14, v0.b[5]
+; NONEON-NOSVE-NEXT: smov w15, v0.b[6]
+; NONEON-NOSVE-NEXT: smov w17, v0.b[8]
+; NONEON-NOSVE-NEXT: smov w2, v0.b[10]
+; NONEON-NOSVE-NEXT: smov w3, v0.b[11]
+; NONEON-NOSVE-NEXT: smov w4, v0.b[12]
+; NONEON-NOSVE-NEXT: sdiv w8, w9, w8
+; NONEON-NOSVE-NEXT: smov w9, v1.b[0]
+; NONEON-NOSVE-NEXT: smov w5, v0.b[13]
+; NONEON-NOSVE-NEXT: smov w6, v0.b[14]
+; NONEON-NOSVE-NEXT: smov w1, v3.b[1]
+; NONEON-NOSVE-NEXT: smov w7, v2.b[0]
+; NONEON-NOSVE-NEXT: smov w19, v2.b[2]
+; NONEON-NOSVE-NEXT: smov w20, v2.b[3]
+; NONEON-NOSVE-NEXT: smov w21, v2.b[4]
+; NONEON-NOSVE-NEXT: smov w22, v2.b[5]
+; NONEON-NOSVE-NEXT: smov w23, v2.b[6]
+; NONEON-NOSVE-NEXT: smov w24, v2.b[7]
+; NONEON-NOSVE-NEXT: smov w25, v2.b[8]
+; NONEON-NOSVE-NEXT: smov w26, v2.b[9]
+; NONEON-NOSVE-NEXT: smov w27, v2.b[10]
+; NONEON-NOSVE-NEXT: sdiv w9, w10, w9
+; NONEON-NOSVE-NEXT: smov w10, v1.b[2]
+; NONEON-NOSVE-NEXT: sdiv w11, w11, w10
+; NONEON-NOSVE-NEXT: smov w10, v1.b[3]
+; NONEON-NOSVE-NEXT: fmov s5, w9
+; NONEON-NOSVE-NEXT: smov w9, v3.b[11]
+; NONEON-NOSVE-NEXT: mov v5.b[1], w8
+; NONEON-NOSVE-NEXT: sdiv w10, w12, w10
+; NONEON-NOSVE-NEXT: smov w12, v1.b[4]
+; NONEON-NOSVE-NEXT: mov v5.b[2], w11
+; NONEON-NOSVE-NEXT: smov w11, v2.b[11]
+; NONEON-NOSVE-NEXT: sdiv w13, w13, w12
+; NONEON-NOSVE-NEXT: smov w12, v1.b[5]
+; NONEON-NOSVE-NEXT: mov v5.b[3], w10
+; NONEON-NOSVE-NEXT: smov w10, v3.b[12]
+; NONEON-NOSVE-NEXT: sdiv w12, w14, w12
+; NONEON-NOSVE-NEXT: smov w14, v1.b[6]
+; NONEON-NOSVE-NEXT: mov v5.b[4], w13
+; NONEON-NOSVE-NEXT: smov w13, v2.b[14]
+; NONEON-NOSVE-NEXT: sdiv w16, w15, w14
+; NONEON-NOSVE-NEXT: smov w14, v1.b[7]
+; NONEON-NOSVE-NEXT: smov w15, v0.b[7]
+; NONEON-NOSVE-NEXT: mov v5.b[5], w12
+; NONEON-NOSVE-NEXT: smov w12, v2.b[13]
+; NONEON-NOSVE-NEXT: sdiv w14, w15, w14
+; NONEON-NOSVE-NEXT: smov w15, v1.b[8]
+; NONEON-NOSVE-NEXT: mov v5.b[6], w16
+; NONEON-NOSVE-NEXT: sdiv w18, w17, w15
+; NONEON-NOSVE-NEXT: smov w15, v1.b[9]
+; NONEON-NOSVE-NEXT: smov w17, v0.b[9]
+; NONEON-NOSVE-NEXT: mov v5.b[7], w14
+; NONEON-NOSVE-NEXT: sdiv w17, w17, w15
+; NONEON-NOSVE-NEXT: smov w15, v1.b[10]
+; NONEON-NOSVE-NEXT: mov v5.b[8], w18
+; NONEON-NOSVE-NEXT: sdiv w15, w2, w15
+; NONEON-NOSVE-NEXT: smov w2, v1.b[11]
+; NONEON-NOSVE-NEXT: mov v5.b[9], w17
+; NONEON-NOSVE-NEXT: sdiv w2, w3, w2
+; NONEON-NOSVE-NEXT: smov w3, v1.b[12]
+; NONEON-NOSVE-NEXT: mov v5.b[10], w15
+; NONEON-NOSVE-NEXT: sdiv w3, w4, w3
+; NONEON-NOSVE-NEXT: smov w4, v1.b[13]
+; NONEON-NOSVE-NEXT: mov v5.b[11], w2
+; NONEON-NOSVE-NEXT: sdiv w4, w5, w4
+; NONEON-NOSVE-NEXT: smov w5, v1.b[14]
+; NONEON-NOSVE-NEXT: mov v5.b[12], w3
+; NONEON-NOSVE-NEXT: sdiv w5, w6, w5
+; NONEON-NOSVE-NEXT: smov w6, v2.b[1]
+; NONEON-NOSVE-NEXT: mov v5.b[13], w4
+; NONEON-NOSVE-NEXT: sdiv w1, w6, w1
+; NONEON-NOSVE-NEXT: smov w6, v3.b[0]
+; NONEON-NOSVE-NEXT: mov v5.b[14], w5
+; NONEON-NOSVE-NEXT: sdiv w6, w7, w6
+; NONEON-NOSVE-NEXT: smov w7, v3.b[2]
+; NONEON-NOSVE-NEXT: sdiv w7, w19, w7
+; NONEON-NOSVE-NEXT: smov w19, v3.b[3]
+; NONEON-NOSVE-NEXT: fmov s4, w6
+; NONEON-NOSVE-NEXT: mov v4.b[1], w1
+; NONEON-NOSVE-NEXT: sdiv w19, w20, w19
+; NONEON-NOSVE-NEXT: smov w20, v3.b[4]
+; NONEON-NOSVE-NEXT: mov v4.b[2], w7
+; NONEON-NOSVE-NEXT: sdiv w20, w21, w20
+; NONEON-NOSVE-NEXT: smov w21, v3.b[5]
+; NONEON-NOSVE-NEXT: mov v4.b[3], w19
+; NONEON-NOSVE-NEXT: sdiv w21, w22, w21
+; NONEON-NOSVE-NEXT: smov w22, v3.b[6]
+; NONEON-NOSVE-NEXT: mov v4.b[4], w20
+; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT: sdiv w22, w23, w22
+; NONEON-NOSVE-NEXT: smov w23, v3.b[7]
+; NONEON-NOSVE-NEXT: mov v4.b[5], w21
+; NONEON-NOSVE-NEXT: sdiv w23, w24, w23
+; NONEON-NOSVE-NEXT: smov w24, v3.b[8]
+; NONEON-NOSVE-NEXT: mov v4.b[6], w22
+; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT: sdiv w24, w25, w24
+; NONEON-NOSVE-NEXT: smov w25, v3.b[9]
+; NONEON-NOSVE-NEXT: mov v4.b[7], w23
+; NONEON-NOSVE-NEXT: sdiv w25, w26, w25
+; NONEON-NOSVE-NEXT: smov w26, v3.b[10]
+; NONEON-NOSVE-NEXT: mov v4.b[8], w24
+; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #32] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT: sdiv w8, w27, w26
+; NONEON-NOSVE-NEXT: mov v4.b[9], w25
+; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #16] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT: sdiv w9, w11, w9
+; NONEON-NOSVE-NEXT: smov w11, v2.b[12]
+; NONEON-NOSVE-NEXT: mov v4.b[10], w8
+; NONEON-NOSVE-NEXT: smov w8, v3.b[15]
+; NONEON-NOSVE-NEXT: sdiv w10, w11, w10
+; NONEON-NOSVE-NEXT: smov w11, v3.b[13]
+; NONEON-NOSVE-NEXT: mov v4.b[11], w9
+; NONEON-NOSVE-NEXT: smov w9, v1.b[15]
+; NONEON-NOSVE-NEXT: sdiv w11, w12, w11
+; NONEON-NOSVE-NEXT: smov w12, v3.b[14]
+; NONEON-NOSVE-NEXT: mov v4.b[12], w10
+; NONEON-NOSVE-NEXT: smov w10, v0.b[15]
+; NONEON-NOSVE-NEXT: sdiv w12, w13, w12
+; NONEON-NOSVE-NEXT: smov w13, v2.b[15]
+; NONEON-NOSVE-NEXT: mov v4.b[13], w11
+; NONEON-NOSVE-NEXT: sdiv w8, w13, w8
+; NONEON-NOSVE-NEXT: mov v4.b[14], w12
+; NONEON-NOSVE-NEXT: sdiv w9, w10, w9
+; NONEON-NOSVE-NEXT: mov v4.b[15], w8
+; NONEON-NOSVE-NEXT: mov v5.b[15], w9
+; NONEON-NOSVE-NEXT: stp q4, q5, [x0]
+; NONEON-NOSVE-NEXT: ldr x27, [sp], #80 // 8-byte Folded Reload
+; NONEON-NOSVE-NEXT: ret
%op1 = load <32 x i8>, ptr %a
%op2 = load <32 x i8>, ptr %b
%res = sdiv <32 x i8> %op1, %op2
@@ -196,6 +487,23 @@ define <2 x i16> @sdiv_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sdiv_v2i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: shl v0.2s, v0.2s, #16
+; NONEON-NOSVE-NEXT: shl v1.2s, v1.2s, #16
+; NONEON-NOSVE-NEXT: sshr v0.2s, v0.2s, #16
+; NONEON-NOSVE-NEXT: sshr v1.2s, v1.2s, #16
+; NONEON-NOSVE-NEXT: fmov w8, s1
+; NONEON-NOSVE-NEXT: fmov w9, s0
+; NONEON-NOSVE-NEXT: mov w10, v0.s[1]
+; NONEON-NOSVE-NEXT: sdiv w8, w9, w8
+; NONEON-NOSVE-NEXT: mov w9, v1.s[1]
+; NONEON-NOSVE-NEXT: sdiv w9, w10, w9
+; NONEON-NOSVE-NEXT: fmov s0, w8
+; NONEON-NOSVE-NEXT: mov v0.s[1], w9
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0
+; NONEON-NOSVE-NEXT: ret
%res = sdiv <2 x i16> %op1, %op2
ret <2 x i16> %res
}
@@ -212,6 +520,29 @@ define <4 x i16> @sdiv_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sdiv_v4i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; NONEON-NOSVE-NEXT: smov w8, v1.h[1]
+; NONEON-NOSVE-NEXT: smov w9, v0.h[1]
+; NONEON-NOSVE-NEXT: smov w10, v0.h[0]
+; NONEON-NOSVE-NEXT: smov w11, v0.h[2]
+; NONEON-NOSVE-NEXT: smov w12, v0.h[3]
+; NONEON-NOSVE-NEXT: sdiv w8, w9, w8
+; NONEON-NOSVE-NEXT: smov w9, v1.h[0]
+; NONEON-NOSVE-NEXT: sdiv w9, w10, w9
+; NONEON-NOSVE-NEXT: smov w10, v1.h[2]
+; NONEON-NOSVE-NEXT: sdiv w10, w11, w10
+; NONEON-NOSVE-NEXT: smov w11, v1.h[3]
+; NONEON-NOSVE-NEXT: fmov s0, w9
+; NONEON-NOSVE-NEXT: mov v0.h[1], w8
+; NONEON-NOSVE-NEXT: sdiv w8, w12, w11
+; NONEON-NOSVE-NEXT: mov v0.h[2], w10
+; NONEON-NOSVE-NEXT: mov v0.h[3], w8
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0
+; NONEON-NOSVE-NEXT: ret
%res = sdiv <4 x i16> %op1, %op2
ret <4 x i16> %res
}
@@ -238,6 +569,43 @@ define <8 x i16> @sdiv_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sdiv_v8i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: smov w8, v1.h[1]
+; NONEON-NOSVE-NEXT: smov w9, v0.h[1]
+; NONEON-NOSVE-NEXT: smov w10, v0.h[0]
+; NONEON-NOSVE-NEXT: smov w11, v0.h[2]
+; NONEON-NOSVE-NEXT: smov w12, v0.h[3]
+; NONEON-NOSVE-NEXT: smov w13, v0.h[4]
+; NONEON-NOSVE-NEXT: smov w14, v0.h[5]
+; NONEON-NOSVE-NEXT: sdiv w8, w9, w8
+; NONEON-NOSVE-NEXT: smov w9, v1.h[0]
+; NONEON-NOSVE-NEXT: sdiv w9, w10, w9
+; NONEON-NOSVE-NEXT: smov w10, v1.h[2]
+; NONEON-NOSVE-NEXT: sdiv w10, w11, w10
+; NONEON-NOSVE-NEXT: smov w11, v1.h[3]
+; NONEON-NOSVE-NEXT: fmov s2, w9
+; NONEON-NOSVE-NEXT: smov w9, v1.h[6]
+; NONEON-NOSVE-NEXT: mov v2.h[1], w8
+; NONEON-NOSVE-NEXT: sdiv w11, w12, w11
+; NONEON-NOSVE-NEXT: smov w12, v1.h[4]
+; NONEON-NOSVE-NEXT: mov v2.h[2], w10
+; NONEON-NOSVE-NEXT: smov w10, v0.h[6]
+; NONEON-NOSVE-NEXT: sdiv w12, w13, w12
+; NONEON-NOSVE-NEXT: smov w13, v1.h[5]
+; NONEON-NOSVE-NEXT: mov v2.h[3], w11
+; NONEON-NOSVE-NEXT: smov w11, v0.h[7]
+; NONEON-NOSVE-NEXT: sdiv w8, w14, w13
+; NONEON-NOSVE-NEXT: mov v2.h[4], w12
+; NONEON-NOSVE-NEXT: sdiv w9, w10, w9
+; NONEON-NOSVE-NEXT: smov w10, v1.h[7]
+; NONEON-NOSVE-NEXT: mov v2.h[5], w8
+; NONEON-NOSVE-NEXT: sdiv w8, w11, w10
+; NONEON-NOSVE-NEXT: mov v2.h[6], w9
+; NONEON-NOSVE-NEXT: mov v2.h[7], w8
+; NONEON-NOSVE-NEXT: mov v0.16b, v2.16b
+; NONEON-NOSVE-NEXT: ret
%res = sdiv <8 x i16> %op1, %op2
ret <8 x i16> %res
}
@@ -278,6 +646,79 @@ define void @sdiv_v16i16(ptr %a, ptr %b) {
; CHECK-NEXT: splice z3.h, p0, z3.h, z1.h
; CHECK-NEXT: stp q3, q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sdiv_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0, #16]
+; NONEON-NOSVE-NEXT: ldr q1, [x1, #16]
+; NONEON-NOSVE-NEXT: ldr q2, [x0]
+; NONEON-NOSVE-NEXT: ldr q3, [x1]
+; NONEON-NOSVE-NEXT: smov w8, v1.h[1]
+; NONEON-NOSVE-NEXT: smov w9, v0.h[1]
+; NONEON-NOSVE-NEXT: smov w10, v0.h[0]
+; NONEON-NOSVE-NEXT: smov w11, v0.h[2]
+; NONEON-NOSVE-NEXT: smov w12, v0.h[3]
+; NONEON-NOSVE-NEXT: smov w13, v0.h[4]
+; NONEON-NOSVE-NEXT: smov w14, v0.h[5]
+; NONEON-NOSVE-NEXT: smov w15, v0.h[6]
+; NONEON-NOSVE-NEXT: smov w16, v2.h[1]
+; NONEON-NOSVE-NEXT: smov w17, v2.h[0]
+; NONEON-NOSVE-NEXT: smov w18, v2.h[2]
+; NONEON-NOSVE-NEXT: smov w1, v2.h[3]
+; NONEON-NOSVE-NEXT: sdiv w8, w9, w8
+; NONEON-NOSVE-NEXT: smov w9, v1.h[0]
+; NONEON-NOSVE-NEXT: smov w2, v2.h[4]
+; NONEON-NOSVE-NEXT: smov w3, v2.h[5]
+; NONEON-NOSVE-NEXT: smov w4, v2.h[6]
+; NONEON-NOSVE-NEXT: sdiv w10, w10, w9
+; NONEON-NOSVE-NEXT: smov w9, v1.h[2]
+; NONEON-NOSVE-NEXT: sdiv w9, w11, w9
+; NONEON-NOSVE-NEXT: smov w11, v1.h[3]
+; NONEON-NOSVE-NEXT: fmov s5, w10
+; NONEON-NOSVE-NEXT: smov w10, v3.h[7]
+; NONEON-NOSVE-NEXT: mov v5.h[1], w8
+; NONEON-NOSVE-NEXT: sdiv w11, w12, w11
+; NONEON-NOSVE-NEXT: smov w12, v1.h[4]
+; NONEON-NOSVE-NEXT: mov v5.h[2], w9
+; NONEON-NOSVE-NEXT: smov w9, v2.h[7]
+; NONEON-NOSVE-NEXT: sdiv w12, w13, w12
+; NONEON-NOSVE-NEXT: smov w13, v1.h[5]
+; NONEON-NOSVE-NEXT: mov v5.h[3], w11
+; NONEON-NOSVE-NEXT: smov w11, v0.h[7]
+; NONEON-NOSVE-NEXT: sdiv w13, w14, w13
+; NONEON-NOSVE-NEXT: smov w14, v1.h[6]
+; NONEON-NOSVE-NEXT: mov v5.h[4], w12
+; NONEON-NOSVE-NEXT: sdiv w14, w15, w14
+; NONEON-NOSVE-NEXT: smov w15, v3.h[1]
+; NONEON-NOSVE-NEXT: mov v5.h[5], w13
+; NONEON-NOSVE-NEXT: sdiv w15, w16, w15
+; NONEON-NOSVE-NEXT: smov w16, v3.h[0]
+; NONEON-NOSVE-NEXT: mov v5.h[6], w14
+; NONEON-NOSVE-NEXT: sdiv w16, w17, w16
+; NONEON-NOSVE-NEXT: smov w17, v3.h[2]
+; NONEON-NOSVE-NEXT: sdiv w17, w18, w17
+; NONEON-NOSVE-NEXT: smov w18, v3.h[3]
+; NONEON-NOSVE-NEXT: fmov s4, w16
+; NONEON-NOSVE-NEXT: mov v4.h[1], w15
+; NONEON-NOSVE-NEXT: sdiv w18, w1, w18
+; NONEON-NOSVE-NEXT: smov w1, v3.h[4]
+; NONEON-NOSVE-NEXT: mov v4.h[2], w17
+; NONEON-NOSVE-NEXT: sdiv w1, w2, w1
+; NONEON-NOSVE-NEXT: smov w2, v3.h[5]
+; NONEON-NOSVE-NEXT: mov v4.h[3], w18
+; NONEON-NOSVE-NEXT: sdiv w2, w3, w2
+; NONEON-NOSVE-NEXT: smov w3, v3.h[6]
+; NONEON-NOSVE-NEXT: mov v4.h[4], w1
+; NONEON-NOSVE-NEXT: sdiv w8, w4, w3
+; NONEON-NOSVE-NEXT: mov v4.h[5], w2
+; NONEON-NOSVE-NEXT: sdiv w9, w9, w10
+; NONEON-NOSVE-NEXT: smov w10, v1.h[7]
+; NONEON-NOSVE-NEXT: mov v4.h[6], w8
+; NONEON-NOSVE-NEXT: sdiv w10, w11, w10
+; NONEON-NOSVE-NEXT: mov v4.h[7], w9
+; NONEON-NOSVE-NEXT: mov v5.h[7], w10
+; NONEON-NOSVE-NEXT: stp q4, q5, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x i16>, ptr %a
%op2 = load <16 x i16>, ptr %b
%res = sdiv <16 x i16> %op1, %op2
@@ -294,6 +735,21 @@ define <2 x i32> @sdiv_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sdiv_v2i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; NONEON-NOSVE-NEXT: fmov w8, s1
+; NONEON-NOSVE-NEXT: fmov w9, s0
+; NONEON-NOSVE-NEXT: mov w10, v0.s[1]
+; NONEON-NOSVE-NEXT: sdiv w8, w9, w8
+; NONEON-NOSVE-NEXT: mov w9, v1.s[1]
+; NONEON-NOSVE-NEXT: sdiv w9, w10, w9
+; NONEON-NOSVE-NEXT: fmov s0, w8
+; NONEON-NOSVE-NEXT: mov v0.s[1], w9
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0
+; NONEON-NOSVE-NEXT: ret
%res = sdiv <2 x i32> %op1, %op2
ret <2 x i32> %res
}
@@ -307,6 +763,26 @@ define <4 x i32> @sdiv_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sdiv_v4i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov w8, v1.s[1]
+; NONEON-NOSVE-NEXT: mov w9, v0.s[1]
+; NONEON-NOSVE-NEXT: fmov w10, s0
+; NONEON-NOSVE-NEXT: mov w11, v0.s[2]
+; NONEON-NOSVE-NEXT: mov w12, v0.s[3]
+; NONEON-NOSVE-NEXT: sdiv w8, w9, w8
+; NONEON-NOSVE-NEXT: fmov w9, s1
+; NONEON-NOSVE-NEXT: sdiv w9, w10, w9
+; NONEON-NOSVE-NEXT: mov w10, v1.s[2]
+; NONEON-NOSVE-NEXT: sdiv w10, w11, w10
+; NONEON-NOSVE-NEXT: mov w11, v1.s[3]
+; NONEON-NOSVE-NEXT: fmov s0, w9
+; NONEON-NOSVE-NEXT: mov v0.s[1], w8
+; NONEON-NOSVE-NEXT: sdiv w8, w12, w11
+; NONEON-NOSVE-NEXT: mov v0.s[2], w10
+; NONEON-NOSVE-NEXT: mov v0.s[3], w8
+; NONEON-NOSVE-NEXT: ret
%res = sdiv <4 x i32> %op1, %op2
ret <4 x i32> %res
}
@@ -322,6 +798,45 @@ define void @sdiv_v8i32(ptr %a, ptr %b) {
; CHECK-NEXT: sdiv z1.s, p0/m, z1.s, z3.s
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sdiv_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q2, q0, [x0]
+; NONEON-NOSVE-NEXT: ldp q3, q1, [x1]
+; NONEON-NOSVE-NEXT: mov w9, v0.s[1]
+; NONEON-NOSVE-NEXT: fmov w10, s0
+; NONEON-NOSVE-NEXT: mov w11, v0.s[2]
+; NONEON-NOSVE-NEXT: mov w8, v1.s[1]
+; NONEON-NOSVE-NEXT: mov w12, v2.s[1]
+; NONEON-NOSVE-NEXT: fmov w13, s2
+; NONEON-NOSVE-NEXT: mov w14, v2.s[2]
+; NONEON-NOSVE-NEXT: mov w15, v2.s[3]
+; NONEON-NOSVE-NEXT: mov w16, v0.s[3]
+; NONEON-NOSVE-NEXT: sdiv w8, w9, w8
+; NONEON-NOSVE-NEXT: fmov w9, s1
+; NONEON-NOSVE-NEXT: sdiv w9, w10, w9
+; NONEON-NOSVE-NEXT: mov w10, v1.s[2]
+; NONEON-NOSVE-NEXT: sdiv w10, w11, w10
+; NONEON-NOSVE-NEXT: mov w11, v3.s[1]
+; NONEON-NOSVE-NEXT: sdiv w11, w12, w11
+; NONEON-NOSVE-NEXT: fmov w12, s3
+; NONEON-NOSVE-NEXT: sdiv w12, w13, w12
+; NONEON-NOSVE-NEXT: mov w13, v3.s[2]
+; NONEON-NOSVE-NEXT: sdiv w13, w14, w13
+; NONEON-NOSVE-NEXT: mov w14, v3.s[3]
+; NONEON-NOSVE-NEXT: fmov s0, w12
+; NONEON-NOSVE-NEXT: mov v0.s[1], w11
+; NONEON-NOSVE-NEXT: sdiv w14, w15, w14
+; NONEON-NOSVE-NEXT: mov w15, v1.s[3]
+; NONEON-NOSVE-NEXT: fmov s1, w9
+; NONEON-NOSVE-NEXT: mov v0.s[2], w13
+; NONEON-NOSVE-NEXT: mov v1.s[1], w8
+; NONEON-NOSVE-NEXT: mov v1.s[2], w10
+; NONEON-NOSVE-NEXT: sdiv w8, w16, w15
+; NONEON-NOSVE-NEXT: mov v0.s[3], w14
+; NONEON-NOSVE-NEXT: mov v1.s[3], w8
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x i32>, ptr %a
%op2 = load <8 x i32>, ptr %b
%res = sdiv <8 x i32> %op1, %op2
@@ -338,6 +853,16 @@ define <1 x i64> @sdiv_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
; CHECK-NEXT: sdiv z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sdiv_v1i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; NONEON-NOSVE-NEXT: fmov x8, d1
+; NONEON-NOSVE-NEXT: fmov x9, d0
+; NONEON-NOSVE-NEXT: sdiv x8, x9, x8
+; NONEON-NOSVE-NEXT: fmov d0, x8
+; NONEON-NOSVE-NEXT: ret
%res = sdiv <1 x i64> %op1, %op2
ret <1 x i64> %res
}
@@ -351,6 +876,18 @@ define <2 x i64> @sdiv_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
; CHECK-NEXT: sdiv z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sdiv_v2i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fmov x8, d1
+; NONEON-NOSVE-NEXT: fmov x9, d0
+; NONEON-NOSVE-NEXT: mov x10, v0.d[1]
+; NONEON-NOSVE-NEXT: sdiv x8, x9, x8
+; NONEON-NOSVE-NEXT: mov x9, v1.d[1]
+; NONEON-NOSVE-NEXT: sdiv x9, x10, x9
+; NONEON-NOSVE-NEXT: fmov d0, x8
+; NONEON-NOSVE-NEXT: mov v0.d[1], x9
+; NONEON-NOSVE-NEXT: ret
%res = sdiv <2 x i64> %op1, %op2
ret <2 x i64> %res
}
@@ -366,6 +903,29 @@ define void @sdiv_v4i64(ptr %a, ptr %b) {
; CHECK-NEXT: sdiv z1.d, p0/m, z1.d, z3.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sdiv_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q2, q0, [x0]
+; NONEON-NOSVE-NEXT: ldp q3, q1, [x1]
+; NONEON-NOSVE-NEXT: fmov x9, d0
+; NONEON-NOSVE-NEXT: mov x10, v2.d[1]
+; NONEON-NOSVE-NEXT: fmov x11, d2
+; NONEON-NOSVE-NEXT: fmov x8, d1
+; NONEON-NOSVE-NEXT: mov x12, v0.d[1]
+; NONEON-NOSVE-NEXT: sdiv x8, x9, x8
+; NONEON-NOSVE-NEXT: mov x9, v3.d[1]
+; NONEON-NOSVE-NEXT: sdiv x9, x10, x9
+; NONEON-NOSVE-NEXT: fmov x10, d3
+; NONEON-NOSVE-NEXT: sdiv x10, x11, x10
+; NONEON-NOSVE-NEXT: mov x11, v1.d[1]
+; NONEON-NOSVE-NEXT: fmov d1, x8
+; NONEON-NOSVE-NEXT: sdiv x11, x12, x11
+; NONEON-NOSVE-NEXT: fmov d0, x10
+; NONEON-NOSVE-NEXT: mov v0.d[1], x9
+; NONEON-NOSVE-NEXT: mov v1.d[1], x11
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x i64>, ptr %a
%op2 = load <4 x i64>, ptr %b
%res = sdiv <4 x i64> %op1, %op2
@@ -391,6 +951,37 @@ define <4 x i8> @udiv_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: udiv_v4i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; NONEON-NOSVE-NEXT: umov w8, v1.h[1]
+; NONEON-NOSVE-NEXT: umov w9, v0.h[1]
+; NONEON-NOSVE-NEXT: umov w10, v0.h[0]
+; NONEON-NOSVE-NEXT: umov w11, v0.h[2]
+; NONEON-NOSVE-NEXT: umov w12, v0.h[3]
+; NONEON-NOSVE-NEXT: and w8, w8, #0xff
+; NONEON-NOSVE-NEXT: and w9, w9, #0xff
+; NONEON-NOSVE-NEXT: and w10, w10, #0xff
+; NONEON-NOSVE-NEXT: udiv w8, w9, w8
+; NONEON-NOSVE-NEXT: umov w9, v1.h[0]
+; NONEON-NOSVE-NEXT: and w11, w11, #0xff
+; NONEON-NOSVE-NEXT: and w9, w9, #0xff
+; NONEON-NOSVE-NEXT: udiv w9, w10, w9
+; NONEON-NOSVE-NEXT: umov w10, v1.h[2]
+; NONEON-NOSVE-NEXT: and w10, w10, #0xff
+; NONEON-NOSVE-NEXT: udiv w10, w11, w10
+; NONEON-NOSVE-NEXT: umov w11, v1.h[3]
+; NONEON-NOSVE-NEXT: fmov s0, w9
+; NONEON-NOSVE-NEXT: mov v0.h[1], w8
+; NONEON-NOSVE-NEXT: and w9, w11, #0xff
+; NONEON-NOSVE-NEXT: and w11, w12, #0xff
+; NONEON-NOSVE-NEXT: udiv w8, w11, w9
+; NONEON-NOSVE-NEXT: mov v0.h[2], w10
+; NONEON-NOSVE-NEXT: mov v0.h[3], w8
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0
+; NONEON-NOSVE-NEXT: ret
%res = udiv <4 x i8> %op1, %op2
ret <4 x i8> %res
}
@@ -418,6 +1009,45 @@ define <8 x i8> @udiv_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
; CHECK-NEXT: uzp1 z0.b, z1.b, z1.b
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: udiv_v8i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; NONEON-NOSVE-NEXT: umov w8, v1.b[1]
+; NONEON-NOSVE-NEXT: umov w9, v0.b[1]
+; NONEON-NOSVE-NEXT: umov w10, v0.b[0]
+; NONEON-NOSVE-NEXT: umov w11, v0.b[2]
+; NONEON-NOSVE-NEXT: umov w12, v0.b[3]
+; NONEON-NOSVE-NEXT: umov w13, v0.b[4]
+; NONEON-NOSVE-NEXT: umov w14, v0.b[5]
+; NONEON-NOSVE-NEXT: udiv w8, w9, w8
+; NONEON-NOSVE-NEXT: umov w9, v1.b[0]
+; NONEON-NOSVE-NEXT: udiv w9, w10, w9
+; NONEON-NOSVE-NEXT: umov w10, v1.b[2]
+; NONEON-NOSVE-NEXT: udiv w10, w11, w10
+; NONEON-NOSVE-NEXT: umov w11, v1.b[3]
+; NONEON-NOSVE-NEXT: fmov s2, w9
+; NONEON-NOSVE-NEXT: umov w9, v1.b[6]
+; NONEON-NOSVE-NEXT: mov v2.b[1], w8
+; NONEON-NOSVE-NEXT: udiv w11, w12, w11
+; NONEON-NOSVE-NEXT: umov w12, v1.b[4]
+; NONEON-NOSVE-NEXT: mov v2.b[2], w10
+; NONEON-NOSVE-NEXT: umov w10, v0.b[6]
+; NONEON-NOSVE-NEXT: udiv w12, w13, w12
+; NONEON-NOSVE-NEXT: umov w13, v1.b[5]
+; NONEON-NOSVE-NEXT: mov v2.b[3], w11
+; NONEON-NOSVE-NEXT: umov w11, v0.b[7]
+; NONEON-NOSVE-NEXT: udiv w8, w14, w13
+; NONEON-NOSVE-NEXT: mov v2.b[4], w12
+; NONEON-NOSVE-NEXT: udiv w9, w10, w9
+; NONEON-NOSVE-NEXT: umov w10, v1.b[7]
+; NONEON-NOSVE-NEXT: mov v2.b[5], w8
+; NONEON-NOSVE-NEXT: udiv w8, w11, w10
+; NONEON-NOSVE-NEXT: mov v2.b[6], w9
+; NONEON-NOSVE-NEXT: mov v2.b[7], w8
+; NONEON-NOSVE-NEXT: fmov d0, d2
+; NONEON-NOSVE-NEXT: ret
%res = udiv <8 x i8> %op1, %op2
ret <8 x i8> %res
}
@@ -465,6 +1095,75 @@ define <16 x i8> @udiv_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: udiv_v16i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: umov w8, v1.b[1]
+; NONEON-NOSVE-NEXT: umov w9, v0.b[1]
+; NONEON-NOSVE-NEXT: umov w10, v0.b[0]
+; NONEON-NOSVE-NEXT: umov w11, v0.b[2]
+; NONEON-NOSVE-NEXT: umov w12, v0.b[3]
+; NONEON-NOSVE-NEXT: umov w13, v0.b[4]
+; NONEON-NOSVE-NEXT: umov w14, v0.b[5]
+; NONEON-NOSVE-NEXT: umov w15, v0.b[6]
+; NONEON-NOSVE-NEXT: umov w16, v0.b[7]
+; NONEON-NOSVE-NEXT: umov w17, v0.b[8]
+; NONEON-NOSVE-NEXT: umov w18, v0.b[9]
+; NONEON-NOSVE-NEXT: udiv w8, w9, w8
+; NONEON-NOSVE-NEXT: umov w9, v1.b[0]
+; NONEON-NOSVE-NEXT: udiv w9, w10, w9
+; NONEON-NOSVE-NEXT: umov w10, v1.b[2]
+; NONEON-NOSVE-NEXT: udiv w10, w11, w10
+; NONEON-NOSVE-NEXT: umov w11, v1.b[3]
+; NONEON-NOSVE-NEXT: fmov s2, w9
+; NONEON-NOSVE-NEXT: umov w9, v1.b[10]
+; NONEON-NOSVE-NEXT: mov v2.b[1], w8
+; NONEON-NOSVE-NEXT: udiv w11, w12, w11
+; NONEON-NOSVE-NEXT: umov w12, v1.b[4]
+; NONEON-NOSVE-NEXT: mov v2.b[2], w10
+; NONEON-NOSVE-NEXT: umov w10, v0.b[10]
+; NONEON-NOSVE-NEXT: udiv w12, w13, w12
+; NONEON-NOSVE-NEXT: umov w13, v1.b[5]
+; NONEON-NOSVE-NEXT: mov v2.b[3], w11
+; NONEON-NOSVE-NEXT: umov w11, v0.b[11]
+; NONEON-NOSVE-NEXT: udiv w13, w14, w13
+; NONEON-NOSVE-NEXT: umov w14, v1.b[6]
+; NONEON-NOSVE-NEXT: mov v2.b[4], w12
+; NONEON-NOSVE-NEXT: umov w12, v0.b[12]
+; NONEON-NOSVE-NEXT: udiv w14, w15, w14
+; NONEON-NOSVE-NEXT: umov w15, v1.b[7]
+; NONEON-NOSVE-NEXT: mov v2.b[5], w13
+; NONEON-NOSVE-NEXT: umov w13, v0.b[13]
+; NONEON-NOSVE-NEXT: udiv w15, w16, w15
+; NONEON-NOSVE-NEXT: umov w16, v1.b[8]
+; NONEON-NOSVE-NEXT: mov v2.b[6], w14
+; NONEON-NOSVE-NEXT: udiv w16, w17, w16
+; NONEON-NOSVE-NEXT: umov w17, v1.b[9]
+; NONEON-NOSVE-NEXT: mov v2.b[7], w15
+; NONEON-NOSVE-NEXT: udiv w8, w18, w17
+; NONEON-NOSVE-NEXT: mov v2.b[8], w16
+; NONEON-NOSVE-NEXT: udiv w9, w10, w9
+; NONEON-NOSVE-NEXT: umov w10, v1.b[11]
+; NONEON-NOSVE-NEXT: mov v2.b[9], w8
+; NONEON-NOSVE-NEXT: udiv w10, w11, w10
+; NONEON-NOSVE-NEXT: umov w11, v1.b[12]
+; NONEON-NOSVE-NEXT: mov v2.b[10], w9
+; NONEON-NOSVE-NEXT: umov w9, v1.b[14]
+; NONEON-NOSVE-NEXT: udiv w11, w12, w11
+; NONEON-NOSVE-NEXT: umov w12, v1.b[13]
+; NONEON-NOSVE-NEXT: mov v2.b[11], w10
+; NONEON-NOSVE-NEXT: umov w10, v1.b[15]
+; NONEON-NOSVE-NEXT: udiv w8, w13, w12
+; NONEON-NOSVE-NEXT: umov w12, v0.b[14]
+; NONEON-NOSVE-NEXT: mov v2.b[12], w11
+; NONEON-NOSVE-NEXT: umov w11, v0.b[15]
+; NONEON-NOSVE-NEXT: udiv w9, w12, w9
+; NONEON-NOSVE-NEXT: mov v2.b[13], w8
+; NONEON-NOSVE-NEXT: udiv w8, w11, w10
+; NONEON-NOSVE-NEXT: mov v2.b[14], w9
+; NONEON-NOSVE-NEXT: mov v2.b[15], w8
+; NONEON-NOSVE-NEXT: mov v0.16b, v2.16b
+; NONEON-NOSVE-NEXT: ret
%res = udiv <16 x i8> %op1, %op2
ret <16 x i8> %res
}
@@ -545,6 +1244,163 @@ define void @udiv_v32i8(ptr %a, ptr %b) {
; CHECK-NEXT: splice z3.b, p0, z3.b, z1.b
; CHECK-NEXT: stp q3, q2, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: udiv_v32i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: str x27, [sp, #-80]! // 8-byte Folded Spill
+; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #16] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #32] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80
+; NONEON-NOSVE-NEXT: .cfi_offset w19, -8
+; NONEON-NOSVE-NEXT: .cfi_offset w20, -16
+; NONEON-NOSVE-NEXT: .cfi_offset w21, -24
+; NONEON-NOSVE-NEXT: .cfi_offset w22, -32
+; NONEON-NOSVE-NEXT: .cfi_offset w23, -40
+; NONEON-NOSVE-NEXT: .cfi_offset w24, -48
+; NONEON-NOSVE-NEXT: .cfi_offset w25, -56
+; NONEON-NOSVE-NEXT: .cfi_offset w26, -64
+; NONEON-NOSVE-NEXT: .cfi_offset w27, -80
+; NONEON-NOSVE-NEXT: ldr q0, [x0, #16]
+; NONEON-NOSVE-NEXT: ldr q1, [x1, #16]
+; NONEON-NOSVE-NEXT: ldr q2, [x0]
+; NONEON-NOSVE-NEXT: ldr q3, [x1]
+; NONEON-NOSVE-NEXT: umov w8, v1.b[1]
+; NONEON-NOSVE-NEXT: umov w9, v0.b[1]
+; NONEON-NOSVE-NEXT: umov w10, v0.b[0]
+; NONEON-NOSVE-NEXT: umov w11, v0.b[2]
+; NONEON-NOSVE-NEXT: umov w12, v0.b[3]
+; NONEON-NOSVE-NEXT: umov w13, v0.b[4]
+; NONEON-NOSVE-NEXT: umov w14, v0.b[5]
+; NONEON-NOSVE-NEXT: umov w15, v0.b[6]
+; NONEON-NOSVE-NEXT: umov w17, v0.b[8]
+; NONEON-NOSVE-NEXT: umov w2, v0.b[10]
+; NONEON-NOSVE-NEXT: umov w3, v0.b[11]
+; NONEON-NOSVE-NEXT: umov w4, v0.b[12]
+; NONEON-NOSVE-NEXT: udiv w8, w9, w8
+; NONEON-NOSVE-NEXT: umov w9, v1.b[0]
+; NONEON-NOSVE-NEXT: umov w5, v0.b[13]
+; NONEON-NOSVE-NEXT: umov w6, v0.b[14]
+; NONEON-NOSVE-NEXT: umov w1, v3.b[1]
+; NONEON-NOSVE-NEXT: umov w7, v2.b[0]
+; NONEON-NOSVE-NEXT: umov w19, v2.b[2]
+; NONEON-NOSVE-NEXT: umov w20, v2.b[3]
+; NONEON-NOSVE-NEXT: umov w21, v2.b[4]
+; NONEON-NOSVE-NEXT: umov w22, v2.b[5]
+; NONEON-NOSVE-NEXT: umov w23, v2.b[6]
+; NONEON-NOSVE-NEXT: umov w24, v2.b[7]
+; NONEON-NOSVE-NEXT: umov w25, v2.b[8]
+; NONEON-NOSVE-NEXT: umov w26, v2.b[9]
+; NONEON-NOSVE-NEXT: umov w27, v2.b[10]
+; NONEON-NOSVE-NEXT: udiv w9, w10, w9
+; NONEON-NOSVE-NEXT: umov w10, v1.b[2]
+; NONEON-NOSVE-NEXT: udiv w11, w11, w10
+; NONEON-NOSVE-NEXT: umov w10, v1.b[3]
+; NONEON-NOSVE-NEXT: fmov s5, w9
+; NONEON-NOSVE-NEXT: umov w9, v3.b[11]
+; NONEON-NOSVE-NEXT: mov v5.b[1], w8
+; NONEON-NOSVE-NEXT: udiv w10, w12, w10
+; NONEON-NOSVE-NEXT: umov w12, v1.b[4]
+; NONEON-NOSVE-NEXT: mov v5.b[2], w11
+; NONEON-NOSVE-NEXT: umov w11, v2.b[11]
+; NONEON-NOSVE-NEXT: udiv w13, w13, w12
+; NONEON-NOSVE-NEXT: umov w12, v1.b[5]
+; NONEON-NOSVE-NEXT: mov v5.b[3], w10
+; NONEON-NOSVE-NEXT: umov w10, v3.b[12]
+; NONEON-NOSVE-NEXT: udiv w12, w14, w12
+; NONEON-NOSVE-NEXT: umov w14, v1.b[6]
+; NONEON-NOSVE-NEXT: mov v5.b[4], w13
+; NONEON-NOSVE-NEXT: umov w13, v2.b[14]
+; NONEON-NOSVE-NEXT: udiv w16, w15, w14
+; NONEON-NOSVE-NEXT: umov w14, v1.b[7]
+; NONEON-NOSVE-NEXT: umov w15, v0.b[7]
+; NONEON-NOSVE-NEXT: mov v5.b[5], w12
+; NONEON-NOSVE-NEXT: umov w12, v2.b[13]
+; NONEON-NOSVE-NEXT: udiv w14, w15, w14
+; NONEON-NOSVE-NEXT: umov w15, v1.b[8]
+; NONEON-NOSVE-NEXT: mov v5.b[6], w16
+; NONEON-NOSVE-NEXT: udiv w18, w17, w15
+; NONEON-NOSVE-NEXT: umov w15, v1.b[9]
+; NONEON-NOSVE-NEXT: umov w17, v0.b[9]
+; NONEON-NOSVE-NEXT: mov v5.b[7], w14
+; NONEON-NOSVE-NEXT: udiv w17, w17, w15
+; NONEON-NOSVE-NEXT: umov w15, v1.b[10]
+; NONEON-NOSVE-NEXT: mov v5.b[8], w18
+; NONEON-NOSVE-NEXT: udiv w15, w2, w15
+; NONEON-NOSVE-NEXT: umov w2, v1.b[11]
+; NONEON-NOSVE-NEXT: mov v5.b[9], w17
+; NONEON-NOSVE-NEXT: udiv w2, w3, w2
+; NONEON-NOSVE-NEXT: umov w3, v1.b[12]
+; NONEON-NOSVE-NEXT: mov v5.b[10], w15
+; NONEON-NOSVE-NEXT: udiv w3, w4, w3
+; NONEON-NOSVE-NEXT: umov w4, v1.b[13]
+; NONEON-NOSVE-NEXT: mov v5.b[11], w2
+; NONEON-NOSVE-NEXT: udiv w4, w5, w4
+; NONEON-NOSVE-NEXT: umov w5, v1.b[14]
+; NONEON-NOSVE-NEXT: mov v5.b[12], w3
+; NONEON-NOSVE-NEXT: udiv w5, w6, w5
+; NONEON-NOSVE-NEXT: umov w6, v2.b[1]
+; NONEON-NOSVE-NEXT: mov v5.b[13], w4
+; NONEON-NOSVE-NEXT: udiv w1, w6, w1
+; NONEON-NOSVE-NEXT: umov w6, v3.b[0]
+; NONEON-NOSVE-NEXT: mov v5.b[14], w5
+; NONEON-NOSVE-NEXT: udiv w6, w7, w6
+; NONEON-NOSVE-NEXT: umov w7, v3.b[2]
+; NONEON-NOSVE-NEXT: udiv w7, w19, w7
+; NONEON-NOSVE-NEXT: umov w19, v3.b[3]
+; NONEON-NOSVE-NEXT: fmov s4, w6
+; NONEON-NOSVE-NEXT: mov v4.b[1], w1
+; NONEON-NOSVE-NEXT: udiv w19, w20, w19
+; NONEON-NOSVE-NEXT: umov w20, v3.b[4]
+; NONEON-NOSVE-NEXT: mov v4.b[2], w7
+; NONEON-NOSVE-NEXT: udiv w20, w21, w20
+; NONEON-NOSVE-NEXT: umov w21, v3.b[5]
+; NONEON-NOSVE-NEXT: mov v4.b[3], w19
+; NONEON-NOSVE-NEXT: udiv w21, w22, w21
+; NONEON-NOSVE-NEXT: umov w22, v3.b[6]
+; NONEON-NOSVE-NEXT: mov v4.b[4], w20
+; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT: udiv w22, w23, w22
+; NONEON-NOSVE-NEXT: umov w23, v3.b[7]
+; NONEON-NOSVE-NEXT: mov v4.b[5], w21
+; NONEON-NOSVE-NEXT: udiv w23, w24, w23
+; NONEON-NOSVE-NEXT: umov w24, v3.b[8]
+; NONEON-NOSVE-NEXT: mov v4.b[6], w22
+; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT: udiv w24, w25, w24
+; NONEON-NOSVE-NEXT: umov w25, v3.b[9]
+; NONEON-NOSVE-NEXT: mov v4.b[7], w23
+; NONEON-NOSVE-NEXT: udiv w25, w26, w25
+; NONEON-NOSVE-NEXT: umov w26, v3.b[10]
+; NONEON-NOSVE-NEXT: mov v4.b[8], w24
+; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #32] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT: udiv w8, w27, w26
+; NONEON-NOSVE-NEXT: mov v4.b[9], w25
+; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #16] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT: udiv w9, w11, w9
+; NONEON-NOSVE-NEXT: umov w11, v2.b[12]
+; NONEON-NOSVE-NEXT: mov v4.b[10], w8
+; NONEON-NOSVE-NEXT: umov w8, v3.b[15]
+; NONEON-NOSVE-NEXT: udiv w10, w11, w10
+; NONEON-NOSVE-NEXT: umov w11, v3.b[13]
+; NONEON-NOSVE-NEXT: mov v4.b[11], w9
+; NONEON-NOSVE-NEXT: umov w9, v1.b[15]
+; NONEON-NOSVE-NEXT: udiv w11, w12, w11
+; NONEON-NOSVE-NEXT: umov w12, v3.b[14]
+; NONEON-NOSVE-NEXT: mov v4.b[12], w10
+; NONEON-NOSVE-NEXT: umov w10, v0.b[15]
+; NONEON-NOSVE-NEXT: udiv w12, w13, w12
+; NONEON-NOSVE-NEXT: umov w13, v2.b[15]
+; NONEON-NOSVE-NEXT: mov v4.b[13], w11
+; NONEON-NOSVE-NEXT: udiv w8, w13, w8
+; NONEON-NOSVE-NEXT: mov v4.b[14], w12
+; NONEON-NOSVE-NEXT: udiv w9, w10, w9
+; NONEON-NOSVE-NEXT: mov v4.b[15], w8
+; NONEON-NOSVE-NEXT: mov v5.b[15], w9
+; NONEON-NOSVE-NEXT: stp q4, q5, [x0]
+; NONEON-NOSVE-NEXT: ldr x27, [sp], #80 // 8-byte Folded Reload
+; NONEON-NOSVE-NEXT: ret
%op1 = load <32 x i8>, ptr %a
%op2 = load <32 x i8>, ptr %b
%res = udiv <32 x i8> %op1, %op2
@@ -563,6 +1419,22 @@ define <2 x i16> @udiv_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
; CHECK-NEXT: udiv z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: udiv_v2i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: movi d2, #0x00ffff0000ffff
+; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v2.8b
+; NONEON-NOSVE-NEXT: and v1.8b, v1.8b, v2.8b
+; NONEON-NOSVE-NEXT: fmov w8, s1
+; NONEON-NOSVE-NEXT: fmov w9, s0
+; NONEON-NOSVE-NEXT: mov w10, v0.s[1]
+; NONEON-NOSVE-NEXT: udiv w8, w9, w8
+; NONEON-NOSVE-NEXT: mov w9, v1.s[1]
+; NONEON-NOSVE-NEXT: udiv w9, w10, w9
+; NONEON-NOSVE-NEXT: fmov s0, w8
+; NONEON-NOSVE-NEXT: mov v0.s[1], w9
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0
+; NONEON-NOSVE-NEXT: ret
%res = udiv <2 x i16> %op1, %op2
ret <2 x i16> %res
}
@@ -579,6 +1451,29 @@ define <4 x i16> @udiv_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: udiv_v4i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; NONEON-NOSVE-NEXT: umov w8, v1.h[1]
+; NONEON-NOSVE-NEXT: umov w9, v0.h[1]
+; NONEON-NOSVE-NEXT: umov w10, v0.h[0]
+; NONEON-NOSVE-NEXT: umov w11, v0.h[2]
+; NONEON-NOSVE-NEXT: umov w12, v0.h[3]
+; NONEON-NOSVE-NEXT: udiv w8, w9, w8
+; NONEON-NOSVE-NEXT: umov w9, v1.h[0]
+; NONEON-NOSVE-NEXT: udiv w9, w10, w9
+; NONEON-NOSVE-NEXT: umov w10, v1.h[2]
+; NONEON-NOSVE-NEXT: udiv w10, w11, w10
+; NONEON-NOSVE-NEXT: umov w11, v1.h[3]
+; NONEON-NOSVE-NEXT: fmov s0, w9
+; NONEON-NOSVE-NEXT: mov v0.h[1], w8
+; NONEON-NOSVE-NEXT: udiv w8, w12, w11
+; NONEON-NOSVE-NEXT: mov v0.h[2], w10
+; NONEON-NOSVE-NEXT: mov v0.h[3], w8
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0
+; NONEON-NOSVE-NEXT: ret
%res = udiv <4 x i16> %op1, %op2
ret <4 x i16> %res
}
@@ -605,6 +1500,43 @@ define <8 x i16> @udiv_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: udiv_v8i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: umov w8, v1.h[1]
+; NONEON-NOSVE-NEXT: umov w9, v0.h[1]
+; NONEON-NOSVE-NEXT: umov w10, v0.h[0]
+; NONEON-NOSVE-NEXT: umov w11, v0.h[2]
+; NONEON-NOSVE-NEXT: umov w12, v0.h[3]
+; NONEON-NOSVE-NEXT: umov w13, v0.h[4]
+; NONEON-NOSVE-NEXT: umov w14, v0.h[5]
+; NONEON-NOSVE-NEXT: udiv w8, w9, w8
+; NONEON-NOSVE-NEXT: umov w9, v1.h[0]
+; NONEON-NOSVE-NEXT: udiv w9, w10, w9
+; NONEON-NOSVE-NEXT: umov w10, v1.h[2]
+; NONEON-NOSVE-NEXT: udiv w10, w11, w10
+; NONEON-NOSVE-NEXT: umov w11, v1.h[3]
+; NONEON-NOSVE-NEXT: fmov s2, w9
+; NONEON-NOSVE-NEXT: umov w9, v1.h[6]
+; NONEON-NOSVE-NEXT: mov v2.h[1], w8
+; NONEON-NOSVE-NEXT: udiv w11, w12, w11
+; NONEON-NOSVE-NEXT: umov w12, v1.h[4]
+; NONEON-NOSVE-NEXT: mov v2.h[2], w10
+; NONEON-NOSVE-NEXT: umov w10, v0.h[6]
+; NONEON-NOSVE-NEXT: udiv w12, w13, w12
+; NONEON-NOSVE-NEXT: umov w13, v1.h[5]
+; NONEON-NOSVE-NEXT: mov v2.h[3], w11
+; NONEON-NOSVE-NEXT: umov w11, v0.h[7]
+; NONEON-NOSVE-NEXT: udiv w8, w14, w13
+; NONEON-NOSVE-NEXT: mov v2.h[4], w12
+; NONEON-NOSVE-NEXT: udiv w9, w10, w9
+; NONEON-NOSVE-NEXT: umov w10, v1.h[7]
+; NONEON-NOSVE-NEXT: mov v2.h[5], w8
+; NONEON-NOSVE-NEXT: udiv w8, w11, w10
+; NONEON-NOSVE-NEXT: mov v2.h[6], w9
+; NONEON-NOSVE-NEXT: mov v2.h[7], w8
+; NONEON-NOSVE-NEXT: mov v0.16b, v2.16b
+; NONEON-NOSVE-NEXT: ret
%res = udiv <8 x i16> %op1, %op2
ret <8 x i16> %res
}
@@ -645,6 +1577,79 @@ define void @udiv_v16i16(ptr %a, ptr %b) {
; CHECK-NEXT: splice z3.h, p0, z3.h, z1.h
; CHECK-NEXT: stp q3, q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: udiv_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0, #16]
+; NONEON-NOSVE-NEXT: ldr q1, [x1, #16]
+; NONEON-NOSVE-NEXT: ldr q2, [x0]
+; NONEON-NOSVE-NEXT: ldr q3, [x1]
+; NONEON-NOSVE-NEXT: umov w8, v1.h[1]
+; NONEON-NOSVE-NEXT: umov w9, v0.h[1]
+; NONEON-NOSVE-NEXT: umov w10, v0.h[0]
+; NONEON-NOSVE-NEXT: umov w11, v0.h[2]
+; NONEON-NOSVE-NEXT: umov w12, v0.h[3]
+; NONEON-NOSVE-NEXT: umov w13, v0.h[4]
+; NONEON-NOSVE-NEXT: umov w14, v0.h[5]
+; NONEON-NOSVE-NEXT: umov w15, v0.h[6]
+; NONEON-NOSVE-NEXT: umov w16, v2.h[1]
+; NONEON-NOSVE-NEXT: umov w17, v2.h[0]
+; NONEON-NOSVE-NEXT: umov w18, v2.h[2]
+; NONEON-NOSVE-NEXT: umov w1, v2.h[3]
+; NONEON-NOSVE-NEXT: udiv w8, w9, w8
+; NONEON-NOSVE-NEXT: umov w9, v1.h[0]
+; NONEON-NOSVE-NEXT: umov w2, v2.h[4]
+; NONEON-NOSVE-NEXT: umov w3, v2.h[5]
+; NONEON-NOSVE-NEXT: umov w4, v2.h[6]
+; NONEON-NOSVE-NEXT: udiv w10, w10, w9
+; NONEON-NOSVE-NEXT: umov w9, v1.h[2]
+; NONEON-NOSVE-NEXT: udiv w9, w11, w9
+; NONEON-NOSVE-NEXT: umov w11, v1.h[3]
+; NONEON-NOSVE-NEXT: fmov s5, w10
+; NONEON-NOSVE-NEXT: umov w10, v3.h[7]
+; NONEON-NOSVE-NEXT: mov v5.h[1], w8
+; NONEON-NOSVE-NEXT: udiv w11, w12, w11
+; NONEON-NOSVE-NEXT: umov w12, v1.h[4]
+; NONEON-NOSVE-NEXT: mov v5.h[2], w9
+; NONEON-NOSVE-NEXT: umov w9, v2.h[7]
+; NONEON-NOSVE-NEXT: udiv w12, w13, w12
+; NONEON-NOSVE-NEXT: umov w13, v1.h[5]
+; NONEON-NOSVE-NEXT: mov v5.h[3], w11
+; NONEON-NOSVE-NEXT: umov w11, v0.h[7]
+; NONEON-NOSVE-NEXT: udiv w13, w14, w13
+; NONEON-NOSVE-NEXT: umov w14, v1.h[6]
+; NONEON-NOSVE-NEXT: mov v5.h[4], w12
+; NONEON-NOSVE-NEXT: udiv w14, w15, w14
+; NONEON-NOSVE-NEXT: umov w15, v3.h[1]
+; NONEON-NOSVE-NEXT: mov v5.h[5], w13
+; NONEON-NOSVE-NEXT: udiv w15, w16, w15
+; NONEON-NOSVE-NEXT: umov w16, v3.h[0]
+; NONEON-NOSVE-NEXT: mov v5.h[6], w14
+; NONEON-NOSVE-NEXT: udiv w16, w17, w16
+; NONEON-NOSVE-NEXT: umov w17, v3.h[2]
+; NONEON-NOSVE-NEXT: udiv w17, w18, w17
+; NONEON-NOSVE-NEXT: umov w18, v3.h[3]
+; NONEON-NOSVE-NEXT: fmov s4, w16
+; NONEON-NOSVE-NEXT: mov v4.h[1], w15
+; NONEON-NOSVE-NEXT: udiv w18, w1, w18
+; NONEON-NOSVE-NEXT: umov w1, v3.h[4]
+; NONEON-NOSVE-NEXT: mov v4.h[2], w17
+; NONEON-NOSVE-NEXT: udiv w1, w2, w1
+; NONEON-NOSVE-NEXT: umov w2, v3.h[5]
+; NONEON-NOSVE-NEXT: mov v4.h[3], w18
+; NONEON-NOSVE-NEXT: udiv w2, w3, w2
+; NONEON-NOSVE-NEXT: umov w3, v3.h[6]
+; NONEON-NOSVE-NEXT: mov v4.h[4], w1
+; NONEON-NOSVE-NEXT: udiv w8, w4, w3
+; NONEON-NOSVE-NEXT: mov v4.h[5], w2
+; NONEON-NOSVE-NEXT: udiv w9, w9, w10
+; NONEON-NOSVE-NEXT: umov w10, v1.h[7]
+; NONEON-NOSVE-NEXT: mov v4.h[6], w8
+; NONEON-NOSVE-NEXT: udiv w10, w11, w10
+; NONEON-NOSVE-NEXT: mov v4.h[7], w9
+; NONEON-NOSVE-NEXT: mov v5.h[7], w10
+; NONEON-NOSVE-NEXT: stp q4, q5, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x i16>, ptr %a
%op2 = load <16 x i16>, ptr %b
%res = udiv <16 x i16> %op1, %op2
@@ -661,6 +1666,21 @@ define <2 x i32> @udiv_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
; CHECK-NEXT: udiv z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: udiv_v2i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; NONEON-NOSVE-NEXT: fmov w8, s1
+; NONEON-NOSVE-NEXT: fmov w9, s0
+; NONEON-NOSVE-NEXT: mov w10, v0.s[1]
+; NONEON-NOSVE-NEXT: udiv w8, w9, w8
+; NONEON-NOSVE-NEXT: mov w9, v1.s[1]
+; NONEON-NOSVE-NEXT: udiv w9, w10, w9
+; NONEON-NOSVE-NEXT: fmov s0, w8
+; NONEON-NOSVE-NEXT: mov v0.s[1], w9
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0
+; NONEON-NOSVE-NEXT: ret
%res = udiv <2 x i32> %op1, %op2
ret <2 x i32> %res
}
@@ -674,6 +1694,26 @@ define <4 x i32> @udiv_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
; CHECK-NEXT: udiv z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: udiv_v4i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov w8, v1.s[1]
+; NONEON-NOSVE-NEXT: mov w9, v0.s[1]
+; NONEON-NOSVE-NEXT: fmov w10, s0
+; NONEON-NOSVE-NEXT: mov w11, v0.s[2]
+; NONEON-NOSVE-NEXT: mov w12, v0.s[3]
+; NONEON-NOSVE-NEXT: udiv w8, w9, w8
+; NONEON-NOSVE-NEXT: fmov w9, s1
+; NONEON-NOSVE-NEXT: udiv w9, w10, w9
+; NONEON-NOSVE-NEXT: mov w10, v1.s[2]
+; NONEON-NOSVE-NEXT: udiv w10, w11, w10
+; NONEON-NOSVE-NEXT: mov w11, v1.s[3]
+; NONEON-NOSVE-NEXT: fmov s0, w9
+; NONEON-NOSVE-NEXT: mov v0.s[1], w8
+; NONEON-NOSVE-NEXT: udiv w8, w12, w11
+; NONEON-NOSVE-NEXT: mov v0.s[2], w10
+; NONEON-NOSVE-NEXT: mov v0.s[3], w8
+; NONEON-NOSVE-NEXT: ret
%res = udiv <4 x i32> %op1, %op2
ret <4 x i32> %res
}
@@ -689,6 +1729,45 @@ define void @udiv_v8i32(ptr %a, ptr %b) {
; CHECK-NEXT: udiv z1.s, p0/m, z1.s, z3.s
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: udiv_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q2, q0, [x0]
+; NONEON-NOSVE-NEXT: ldp q3, q1, [x1]
+; NONEON-NOSVE-NEXT: mov w9, v0.s[1]
+; NONEON-NOSVE-NEXT: fmov w10, s0
+; NONEON-NOSVE-NEXT: mov w11, v0.s[2]
+; NONEON-NOSVE-NEXT: mov w8, v1.s[1]
+; NONEON-NOSVE-NEXT: mov w12, v2.s[1]
+; NONEON-NOSVE-NEXT: fmov w13, s2
+; NONEON-NOSVE-NEXT: mov w14, v2.s[2]
+; NONEON-NOSVE-NEXT: mov w15, v2.s[3]
+; NONEON-NOSVE-NEXT: mov w16, v0.s[3]
+; NONEON-NOSVE-NEXT: udiv w8, w9, w8
+; NONEON-NOSVE-NEXT: fmov w9, s1
+; NONEON-NOSVE-NEXT: udiv w9, w10, w9
+; NONEON-NOSVE-NEXT: mov w10, v1.s[2]
+; NONEON-NOSVE-NEXT: udiv w10, w11, w10
+; NONEON-NOSVE-NEXT: mov w11, v3.s[1]
+; NONEON-NOSVE-NEXT: udiv w11, w12, w11
+; NONEON-NOSVE-NEXT: fmov w12, s3
+; NONEON-NOSVE-NEXT: udiv w12, w13, w12
+; NONEON-NOSVE-NEXT: mov w13, v3.s[2]
+; NONEON-NOSVE-NEXT: udiv w13, w14, w13
+; NONEON-NOSVE-NEXT: mov w14, v3.s[3]
+; NONEON-NOSVE-NEXT: fmov s0, w12
+; NONEON-NOSVE-NEXT: mov v0.s[1], w11
+; NONEON-NOSVE-NEXT: udiv w14, w15, w14
+; NONEON-NOSVE-NEXT: mov w15, v1.s[3]
+; NONEON-NOSVE-NEXT: fmov s1, w9
+; NONEON-NOSVE-NEXT: mov v0.s[2], w13
+; NONEON-NOSVE-NEXT: mov v1.s[1], w8
+; NONEON-NOSVE-NEXT: mov v1.s[2], w10
+; NONEON-NOSVE-NEXT: udiv w8, w16, w15
+; NONEON-NOSVE-NEXT: mov v0.s[3], w14
+; NONEON-NOSVE-NEXT: mov v1.s[3], w8
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x i32>, ptr %a
%op2 = load <8 x i32>, ptr %b
%res = udiv <8 x i32> %op1, %op2
@@ -705,6 +1784,16 @@ define <1 x i64> @udiv_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
; CHECK-NEXT: udiv z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: udiv_v1i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; NONEON-NOSVE-NEXT: fmov x8, d1
+; NONEON-NOSVE-NEXT: fmov x9, d0
+; NONEON-NOSVE-NEXT: udiv x8, x9, x8
+; NONEON-NOSVE-NEXT: fmov d0, x8
+; NONEON-NOSVE-NEXT: ret
%res = udiv <1 x i64> %op1, %op2
ret <1 x i64> %res
}
@@ -718,6 +1807,18 @@ define <2 x i64> @udiv_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
; CHECK-NEXT: udiv z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: udiv_v2i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fmov x8, d1
+; NONEON-NOSVE-NEXT: fmov x9, d0
+; NONEON-NOSVE-NEXT: mov x10, v0.d[1]
+; NONEON-NOSVE-NEXT: udiv x8, x9, x8
+; NONEON-NOSVE-NEXT: mov x9, v1.d[1]
+; NONEON-NOSVE-NEXT: udiv x9, x10, x9
+; NONEON-NOSVE-NEXT: fmov d0, x8
+; NONEON-NOSVE-NEXT: mov v0.d[1], x9
+; NONEON-NOSVE-NEXT: ret
%res = udiv <2 x i64> %op1, %op2
ret <2 x i64> %res
}
@@ -733,6 +1834,29 @@ define void @udiv_v4i64(ptr %a, ptr %b) {
; CHECK-NEXT: udiv z1.d, p0/m, z1.d, z3.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: udiv_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q2, q0, [x0]
+; NONEON-NOSVE-NEXT: ldp q3, q1, [x1]
+; NONEON-NOSVE-NEXT: fmov x9, d0
+; NONEON-NOSVE-NEXT: mov x10, v2.d[1]
+; NONEON-NOSVE-NEXT: fmov x11, d2
+; NONEON-NOSVE-NEXT: fmov x8, d1
+; NONEON-NOSVE-NEXT: mov x12, v0.d[1]
+; NONEON-NOSVE-NEXT: udiv x8, x9, x8
+; NONEON-NOSVE-NEXT: mov x9, v3.d[1]
+; NONEON-NOSVE-NEXT: udiv x9, x10, x9
+; NONEON-NOSVE-NEXT: fmov x10, d3
+; NONEON-NOSVE-NEXT: udiv x10, x11, x10
+; NONEON-NOSVE-NEXT: mov x11, v1.d[1]
+; NONEON-NOSVE-NEXT: fmov d1, x8
+; NONEON-NOSVE-NEXT: udiv x11, x12, x11
+; NONEON-NOSVE-NEXT: fmov d0, x10
+; NONEON-NOSVE-NEXT: mov v0.d[1], x9
+; NONEON-NOSVE-NEXT: mov v1.d[1], x11
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x i64>, ptr %a
%op2 = load <4 x i64>, ptr %b
%res = udiv <4 x i64> %op1, %op2
@@ -778,6 +1902,27 @@ define void @udiv_constantsplat_v8i32(ptr %a) {
; SVE2-NEXT: lsr z0.s, z0.s, #6
; SVE2-NEXT: stp q1, q0, [x0]
; SVE2-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: udiv_constantsplat_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov w8, #8969 // =0x2309
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: movk w8, #22765, lsl #16
+; NONEON-NOSVE-NEXT: dup v0.4s, w8
+; NONEON-NOSVE-NEXT: umull2 v3.2d, v1.4s, v0.4s
+; NONEON-NOSVE-NEXT: umull v4.2d, v1.2s, v0.2s
+; NONEON-NOSVE-NEXT: umull2 v5.2d, v2.4s, v0.4s
+; NONEON-NOSVE-NEXT: umull v0.2d, v2.2s, v0.2s
+; NONEON-NOSVE-NEXT: uzp2 v3.4s, v4.4s, v3.4s
+; NONEON-NOSVE-NEXT: uzp2 v0.4s, v0.4s, v5.4s
+; NONEON-NOSVE-NEXT: sub v1.4s, v1.4s, v3.4s
+; NONEON-NOSVE-NEXT: sub v2.4s, v2.4s, v0.4s
+; NONEON-NOSVE-NEXT: usra v3.4s, v1.4s, #1
+; NONEON-NOSVE-NEXT: usra v0.4s, v2.4s, #1
+; NONEON-NOSVE-NEXT: ushr v1.4s, v3.4s, #6
+; NONEON-NOSVE-NEXT: ushr v0.4s, v0.4s, #6
+; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x i32>, ptr %a
%res = udiv <8 x i32> %op1, <i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95, i32 95>
store <8 x i32> %res, ptr %a
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll
index e40668a8696ee..9f8511b00c6ed 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll
@@ -2,6 +2,7 @@
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE
; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE2
; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE2
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
@@ -26,6 +27,22 @@ define void @sext_v8i1_v8i32(<8 x i1> %a, ptr %out) {
; CHECK-NEXT: asr z0.s, z0.s, #31
; CHECK-NEXT: stp q1, q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sext_v8i1_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ushll v0.8h, v0.8b, #0
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0
+; NONEON-NOSVE-NEXT: ushll v1.4s, v1.4h, #0
+; NONEON-NOSVE-NEXT: shl v0.4s, v0.4s, #31
+; NONEON-NOSVE-NEXT: shl v1.4s, v1.4s, #31
+; NONEON-NOSVE-NEXT: cmlt v0.4s, v0.4s, #0
+; NONEON-NOSVE-NEXT: cmlt v1.4s, v1.4s, #0
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%b = sext <8 x i1> %a to <8 x i32>
store <8 x i32> %b, ptr %out
ret void
@@ -52,6 +69,22 @@ define void @sext_v4i3_v4i64(<4 x i3> %a, ptr %out) {
; CHECK-NEXT: asr z0.d, z0.d, #61
; CHECK-NEXT: stp q1, q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sext_v4i3_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: ushll v0.2d, v0.2s, #0
+; NONEON-NOSVE-NEXT: ushll v1.2d, v1.2s, #0
+; NONEON-NOSVE-NEXT: shl v0.2d, v0.2d, #61
+; NONEON-NOSVE-NEXT: shl v1.2d, v1.2d, #61
+; NONEON-NOSVE-NEXT: sshr v0.2d, v0.2d, #61
+; NONEON-NOSVE-NEXT: sshr v1.2d, v1.2d, #61
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%b = sext <4 x i3> %a to <4 x i64>
store <4 x i64> %b, ptr %out
ret void
@@ -70,6 +103,17 @@ define void @sext_v16i8_v16i16(<16 x i8> %a, ptr %out) {
; CHECK-NEXT: sunpklo z0.h, z0.b
; CHECK-NEXT: stp q1, q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sext_v16i8_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: sshll v0.8h, v0.8b, #0
+; NONEON-NOSVE-NEXT: sshll v1.8h, v1.8b, #0
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%b = sext <16 x i8> %a to <16 x i16>
store <16 x i16>%b, ptr %out
ret void
@@ -91,6 +135,24 @@ define void @sext_v32i8_v32i16(ptr %in, ptr %out) {
; CHECK-NEXT: stp q2, q0, [x1, #32]
; CHECK-NEXT: stp q3, q1, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sext_v32i8_v32i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: add v0.16b, v0.16b, v0.16b
+; NONEON-NOSVE-NEXT: add v1.16b, v1.16b, v1.16b
+; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-32]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
+; NONEON-NOSVE-NEXT: ldr d2, [sp, #24]
+; NONEON-NOSVE-NEXT: ldr d3, [sp, #8]
+; NONEON-NOSVE-NEXT: sshll v1.8h, v1.8b, #0
+; NONEON-NOSVE-NEXT: sshll v0.8h, v0.8b, #0
+; NONEON-NOSVE-NEXT: sshll v2.8h, v2.8b, #0
+; NONEON-NOSVE-NEXT: sshll v3.8h, v3.8b, #0
+; NONEON-NOSVE-NEXT: stp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: stp q1, q2, [x1, #32]
+; NONEON-NOSVE-NEXT: add sp, sp, #32
+; NONEON-NOSVE-NEXT: ret
%a = load <32 x i8>, ptr %in
%b = add <32 x i8> %a, %a
%c = sext <32 x i8> %b to <32 x i16>
@@ -112,6 +174,18 @@ define void @sext_v8i8_v8i32(<8 x i8> %a, ptr %out) {
; CHECK-NEXT: sunpklo z0.s, z0.h
; CHECK-NEXT: stp q1, q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sext_v8i8_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: sshll v0.8h, v0.8b, #0
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: sshll v0.4s, v0.4h, #0
+; NONEON-NOSVE-NEXT: sshll v1.4s, v1.4h, #0
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%b = sext <8 x i8> %a to <8 x i32>
store <8 x i32>%b, ptr %out
ret void
@@ -133,6 +207,25 @@ define void @sext_v16i8_v16i32(<16 x i8> %a, ptr %out) {
; CHECK-NEXT: stp q2, q1, [x0]
; CHECK-NEXT: stp q3, q0, [x0, #32]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sext_v16i8_v16i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: str q0, [sp, #-48]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: sshll v0.8h, v0.8b, #0
+; NONEON-NOSVE-NEXT: sshll v1.8h, v1.8b, #0
+; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #16]
+; NONEON-NOSVE-NEXT: sshll v0.4s, v0.4h, #0
+; NONEON-NOSVE-NEXT: sshll v1.4s, v1.4h, #0
+; NONEON-NOSVE-NEXT: ldr d2, [sp, #40]
+; NONEON-NOSVE-NEXT: ldr d3, [sp, #24]
+; NONEON-NOSVE-NEXT: sshll v2.4s, v2.4h, #0
+; NONEON-NOSVE-NEXT: sshll v3.4s, v3.4h, #0
+; NONEON-NOSVE-NEXT: stp q0, q2, [x0]
+; NONEON-NOSVE-NEXT: stp q1, q3, [x0, #32]
+; NONEON-NOSVE-NEXT: add sp, sp, #48
+; NONEON-NOSVE-NEXT: ret
%b = sext <16 x i8> %a to <16 x i32>
store <16 x i32> %b, ptr %out
ret void
@@ -167,6 +260,40 @@ define void @sext_v32i8_v32i32(ptr %in, ptr %out) {
; CHECK-NEXT: stp q6, q0, [x1, #96]
; CHECK-NEXT: stp q7, q1, [x1, #32]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sext_v32i8_v32i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: add v0.16b, v0.16b, v0.16b
+; NONEON-NOSVE-NEXT: add v1.16b, v1.16b, v1.16b
+; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-96]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
+; NONEON-NOSVE-NEXT: ldr d2, [sp, #8]
+; NONEON-NOSVE-NEXT: ldr d3, [sp, #24]
+; NONEON-NOSVE-NEXT: sshll v0.8h, v0.8b, #0
+; NONEON-NOSVE-NEXT: sshll v1.8h, v1.8b, #0
+; NONEON-NOSVE-NEXT: sshll v2.8h, v2.8b, #0
+; NONEON-NOSVE-NEXT: sshll v3.8h, v3.8b, #0
+; NONEON-NOSVE-NEXT: stp q2, q0, [sp, #32]
+; NONEON-NOSVE-NEXT: sshll v0.4s, v0.4h, #0
+; NONEON-NOSVE-NEXT: stp q3, q1, [sp, #64]
+; NONEON-NOSVE-NEXT: ldr d5, [sp, #56]
+; NONEON-NOSVE-NEXT: sshll v1.4s, v1.4h, #0
+; NONEON-NOSVE-NEXT: ldr d4, [sp, #88]
+; NONEON-NOSVE-NEXT: ldr d6, [sp, #40]
+; NONEON-NOSVE-NEXT: ldr d7, [sp, #72]
+; NONEON-NOSVE-NEXT: sshll v5.4s, v5.4h, #0
+; NONEON-NOSVE-NEXT: sshll v4.4s, v4.4h, #0
+; NONEON-NOSVE-NEXT: stp q0, q5, [x1]
+; NONEON-NOSVE-NEXT: sshll v0.4s, v2.4h, #0
+; NONEON-NOSVE-NEXT: sshll v2.4s, v6.4h, #0
+; NONEON-NOSVE-NEXT: stp q1, q4, [x1, #64]
+; NONEON-NOSVE-NEXT: sshll v1.4s, v3.4h, #0
+; NONEON-NOSVE-NEXT: sshll v3.4s, v7.4h, #0
+; NONEON-NOSVE-NEXT: stp q0, q2, [x1, #32]
+; NONEON-NOSVE-NEXT: stp q1, q3, [x1, #96]
+; NONEON-NOSVE-NEXT: add sp, sp, #96
+; NONEON-NOSVE-NEXT: ret
%a = load <32 x i8>, ptr %in
%b = add <32 x i8> %a, %a
%c = sext <32 x i8> %b to <32 x i32>
@@ -194,6 +321,22 @@ define void @sext_v4i8_v4i64(<4 x i8> %a, ptr %out) {
; CHECK-NEXT: sxtb z0.d, p0/m, z0.d
; CHECK-NEXT: stp q1, q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sext_v4i8_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: ushll v0.2d, v0.2s, #0
+; NONEON-NOSVE-NEXT: ushll v1.2d, v1.2s, #0
+; NONEON-NOSVE-NEXT: shl v0.2d, v0.2d, #56
+; NONEON-NOSVE-NEXT: shl v1.2d, v1.2d, #56
+; NONEON-NOSVE-NEXT: sshr v0.2d, v0.2d, #56
+; NONEON-NOSVE-NEXT: sshr v1.2d, v1.2d, #56
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%b = sext <4 x i8> %a to <4 x i64>
store <4 x i64>%b, ptr %out
ret void
@@ -216,6 +359,26 @@ define void @sext_v8i8_v8i64(<8 x i8> %a, ptr %out) {
; CHECK-NEXT: stp q2, q1, [x0]
; CHECK-NEXT: stp q3, q0, [x0, #32]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sext_v8i8_v8i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: sshll v0.8h, v0.8b, #0
+; NONEON-NOSVE-NEXT: str q0, [sp, #-48]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: sshll v0.4s, v0.4h, #0
+; NONEON-NOSVE-NEXT: sshll v1.4s, v1.4h, #0
+; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #16]
+; NONEON-NOSVE-NEXT: sshll v0.2d, v0.2s, #0
+; NONEON-NOSVE-NEXT: sshll v1.2d, v1.2s, #0
+; NONEON-NOSVE-NEXT: ldr d2, [sp, #40]
+; NONEON-NOSVE-NEXT: ldr d3, [sp, #24]
+; NONEON-NOSVE-NEXT: sshll v2.2d, v2.2s, #0
+; NONEON-NOSVE-NEXT: sshll v3.2d, v3.2s, #0
+; NONEON-NOSVE-NEXT: stp q0, q2, [x0]
+; NONEON-NOSVE-NEXT: stp q1, q3, [x0, #32]
+; NONEON-NOSVE-NEXT: add sp, sp, #48
+; NONEON-NOSVE-NEXT: ret
%b = sext <8 x i8> %a to <8 x i64>
store <8 x i64>%b, ptr %out
ret void
@@ -253,6 +416,41 @@ define void @sext_v16i8_v16i64(<16 x i8> %a, ptr %out) {
; CHECK-NEXT: stp q1, q4, [x0, #32]
; CHECK-NEXT: stp q0, q2, [x0, #96]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sext_v16i8_v16i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: str q0, [sp, #-112]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 112
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: sshll v0.8h, v0.8b, #0
+; NONEON-NOSVE-NEXT: sshll v1.8h, v1.8b, #0
+; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #16]
+; NONEON-NOSVE-NEXT: sshll v1.4s, v1.4h, #0
+; NONEON-NOSVE-NEXT: sshll v0.4s, v0.4h, #0
+; NONEON-NOSVE-NEXT: ldr d2, [sp, #24]
+; NONEON-NOSVE-NEXT: ldr d3, [sp, #40]
+; NONEON-NOSVE-NEXT: sshll v2.4s, v2.4h, #0
+; NONEON-NOSVE-NEXT: sshll v3.4s, v3.4h, #0
+; NONEON-NOSVE-NEXT: stp q2, q1, [sp, #48]
+; NONEON-NOSVE-NEXT: sshll v1.2d, v1.2s, #0
+; NONEON-NOSVE-NEXT: stp q3, q0, [sp, #80]
+; NONEON-NOSVE-NEXT: ldr d5, [sp, #72]
+; NONEON-NOSVE-NEXT: sshll v0.2d, v0.2s, #0
+; NONEON-NOSVE-NEXT: ldr d4, [sp, #104]
+; NONEON-NOSVE-NEXT: ldr d6, [sp, #56]
+; NONEON-NOSVE-NEXT: ldr d7, [sp, #88]
+; NONEON-NOSVE-NEXT: sshll v5.2d, v5.2s, #0
+; NONEON-NOSVE-NEXT: sshll v4.2d, v4.2s, #0
+; NONEON-NOSVE-NEXT: stp q1, q5, [x0, #64]
+; NONEON-NOSVE-NEXT: sshll v1.2d, v2.2s, #0
+; NONEON-NOSVE-NEXT: sshll v2.2d, v6.2s, #0
+; NONEON-NOSVE-NEXT: stp q0, q4, [x0]
+; NONEON-NOSVE-NEXT: sshll v0.2d, v3.2s, #0
+; NONEON-NOSVE-NEXT: sshll v3.2d, v7.2s, #0
+; NONEON-NOSVE-NEXT: stp q1, q2, [x0, #96]
+; NONEON-NOSVE-NEXT: stp q0, q3, [x0, #32]
+; NONEON-NOSVE-NEXT: add sp, sp, #112
+; NONEON-NOSVE-NEXT: ret
%b = sext <16 x i8> %a to <16 x i64>
store <16 x i64> %b, ptr %out
ret void
@@ -321,6 +519,73 @@ define void @sext_v32i8_v32i64(ptr %in, ptr %out) {
; CHECK-NEXT: stp q0, q2, [x1, #224]
; CHECK-NEXT: stp q3, q1, [x1, #96]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sext_v32i8_v32i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: sub sp, sp, #224
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 224
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: add v0.16b, v0.16b, v0.16b
+; NONEON-NOSVE-NEXT: add v1.16b, v1.16b, v1.16b
+; NONEON-NOSVE-NEXT: stp q0, q1, [sp]
+; NONEON-NOSVE-NEXT: sshll v5.8h, v0.8b, #0
+; NONEON-NOSVE-NEXT: sshll v6.8h, v1.8b, #0
+; NONEON-NOSVE-NEXT: ldr d2, [sp, #8]
+; NONEON-NOSVE-NEXT: sshll v3.8h, v2.8b, #0
+; NONEON-NOSVE-NEXT: ldr d2, [sp, #24]
+; NONEON-NOSVE-NEXT: sshll v4.8h, v2.8b, #0
+; NONEON-NOSVE-NEXT: stp q3, q5, [sp, #32]
+; NONEON-NOSVE-NEXT: sshll v5.4s, v5.4h, #0
+; NONEON-NOSVE-NEXT: sshll v3.4s, v3.4h, #0
+; NONEON-NOSVE-NEXT: ldr d2, [sp, #56]
+; NONEON-NOSVE-NEXT: ldr d0, [sp, #40]
+; NONEON-NOSVE-NEXT: stp q4, q6, [sp, #64]
+; NONEON-NOSVE-NEXT: sshll v6.4s, v6.4h, #0
+; NONEON-NOSVE-NEXT: sshll v4.4s, v4.4h, #0
+; NONEON-NOSVE-NEXT: ldr d7, [sp, #88]
+; NONEON-NOSVE-NEXT: sshll v2.4s, v2.4h, #0
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #72]
+; NONEON-NOSVE-NEXT: sshll v0.4s, v0.4h, #0
+; NONEON-NOSVE-NEXT: sshll v7.4s, v7.4h, #0
+; NONEON-NOSVE-NEXT: sshll v1.4s, v1.4h, #0
+; NONEON-NOSVE-NEXT: stp q2, q5, [sp, #128]
+; NONEON-NOSVE-NEXT: sshll v5.2d, v5.2s, #0
+; NONEON-NOSVE-NEXT: sshll v2.2d, v2.2s, #0
+; NONEON-NOSVE-NEXT: ldr d19, [sp, #152]
+; NONEON-NOSVE-NEXT: stp q0, q3, [sp, #96]
+; NONEON-NOSVE-NEXT: ldr d20, [sp, #136]
+; NONEON-NOSVE-NEXT: stp q1, q4, [sp, #160]
+; NONEON-NOSVE-NEXT: ldr d17, [sp, #104]
+; NONEON-NOSVE-NEXT: ldr d21, [sp, #120]
+; NONEON-NOSVE-NEXT: stp q7, q6, [sp, #192]
+; NONEON-NOSVE-NEXT: sshll v6.2d, v6.2s, #0
+; NONEON-NOSVE-NEXT: sshll v19.2d, v19.2s, #0
+; NONEON-NOSVE-NEXT: ldr d16, [sp, #216]
+; NONEON-NOSVE-NEXT: ldr d22, [sp, #200]
+; NONEON-NOSVE-NEXT: ldr d23, [sp, #184]
+; NONEON-NOSVE-NEXT: ldr d18, [sp, #168]
+; NONEON-NOSVE-NEXT: sshll v4.2d, v4.2s, #0
+; NONEON-NOSVE-NEXT: sshll v3.2d, v3.2s, #0
+; NONEON-NOSVE-NEXT: sshll v16.2d, v16.2s, #0
+; NONEON-NOSVE-NEXT: stp q5, q19, [x1]
+; NONEON-NOSVE-NEXT: sshll v5.2d, v7.2s, #0
+; NONEON-NOSVE-NEXT: sshll v7.2d, v22.2s, #0
+; NONEON-NOSVE-NEXT: sshll v1.2d, v1.2s, #0
+; NONEON-NOSVE-NEXT: sshll v0.2d, v0.2s, #0
+; NONEON-NOSVE-NEXT: stp q6, q16, [x1, #128]
+; NONEON-NOSVE-NEXT: sshll v6.2d, v23.2s, #0
+; NONEON-NOSVE-NEXT: stp q5, q7, [x1, #160]
+; NONEON-NOSVE-NEXT: sshll v5.2d, v20.2s, #0
+; NONEON-NOSVE-NEXT: stp q4, q6, [x1, #192]
+; NONEON-NOSVE-NEXT: sshll v4.2d, v21.2s, #0
+; NONEON-NOSVE-NEXT: stp q2, q5, [x1, #32]
+; NONEON-NOSVE-NEXT: sshll v2.2d, v17.2s, #0
+; NONEON-NOSVE-NEXT: stp q3, q4, [x1, #64]
+; NONEON-NOSVE-NEXT: sshll v3.2d, v18.2s, #0
+; NONEON-NOSVE-NEXT: stp q0, q2, [x1, #96]
+; NONEON-NOSVE-NEXT: stp q1, q3, [x1, #224]
+; NONEON-NOSVE-NEXT: add sp, sp, #224
+; NONEON-NOSVE-NEXT: ret
%a = load <32 x i8>, ptr %in
%b = add <32 x i8> %a, %a
%c = sext <32 x i8> %b to <32 x i64>
@@ -341,6 +606,17 @@ define void @sext_v8i16_v8i32(<8 x i16> %a, ptr %out) {
; CHECK-NEXT: sunpklo z0.s, z0.h
; CHECK-NEXT: stp q1, q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sext_v8i16_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: sshll v0.4s, v0.4h, #0
+; NONEON-NOSVE-NEXT: sshll v1.4s, v1.4h, #0
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%b = sext <8 x i16> %a to <8 x i32>
store <8 x i32>%b, ptr %out
ret void
@@ -361,6 +637,24 @@ define void @sext_v16i16_v16i32(ptr %in, ptr %out) {
; CHECK-NEXT: stp q2, q0, [x1, #32]
; CHECK-NEXT: stp q3, q1, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sext_v16i16_v16i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: add v0.8h, v0.8h, v0.8h
+; NONEON-NOSVE-NEXT: add v1.8h, v1.8h, v1.8h
+; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-32]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
+; NONEON-NOSVE-NEXT: ldr d2, [sp, #24]
+; NONEON-NOSVE-NEXT: ldr d3, [sp, #8]
+; NONEON-NOSVE-NEXT: sshll v1.4s, v1.4h, #0
+; NONEON-NOSVE-NEXT: sshll v0.4s, v0.4h, #0
+; NONEON-NOSVE-NEXT: sshll v2.4s, v2.4h, #0
+; NONEON-NOSVE-NEXT: sshll v3.4s, v3.4h, #0
+; NONEON-NOSVE-NEXT: stp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: stp q1, q2, [x1, #32]
+; NONEON-NOSVE-NEXT: add sp, sp, #32
+; NONEON-NOSVE-NEXT: ret
%a = load <16 x i16>, ptr %in
%b = add <16 x i16> %a, %a
%c = sext <16 x i16> %b to <16 x i32>
@@ -382,6 +676,18 @@ define void @sext_v4i16_v4i64(<4 x i16> %a, ptr %out) {
; CHECK-NEXT: sunpklo z0.d, z0.s
; CHECK-NEXT: stp q1, q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sext_v4i16_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: sshll v0.4s, v0.4h, #0
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: sshll v0.2d, v0.2s, #0
+; NONEON-NOSVE-NEXT: sshll v1.2d, v1.2s, #0
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%b = sext <4 x i16> %a to <4 x i64>
store <4 x i64>%b, ptr %out
ret void
@@ -403,6 +709,25 @@ define void @sext_v8i16_v8i64(<8 x i16> %a, ptr %out) {
; CHECK-NEXT: stp q2, q1, [x0]
; CHECK-NEXT: stp q3, q0, [x0, #32]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sext_v8i16_v8i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: str q0, [sp, #-48]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: sshll v0.4s, v0.4h, #0
+; NONEON-NOSVE-NEXT: sshll v1.4s, v1.4h, #0
+; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #16]
+; NONEON-NOSVE-NEXT: sshll v0.2d, v0.2s, #0
+; NONEON-NOSVE-NEXT: sshll v1.2d, v1.2s, #0
+; NONEON-NOSVE-NEXT: ldr d2, [sp, #40]
+; NONEON-NOSVE-NEXT: ldr d3, [sp, #24]
+; NONEON-NOSVE-NEXT: sshll v2.2d, v2.2s, #0
+; NONEON-NOSVE-NEXT: sshll v3.2d, v3.2s, #0
+; NONEON-NOSVE-NEXT: stp q0, q2, [x0]
+; NONEON-NOSVE-NEXT: stp q1, q3, [x0, #32]
+; NONEON-NOSVE-NEXT: add sp, sp, #48
+; NONEON-NOSVE-NEXT: ret
%b = sext <8 x i16> %a to <8 x i64>
store <8 x i64>%b, ptr %out
ret void
@@ -437,6 +762,40 @@ define void @sext_v16i16_v16i64(ptr %in, ptr %out) {
; CHECK-NEXT: stp q6, q0, [x1, #96]
; CHECK-NEXT: stp q7, q1, [x1, #32]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sext_v16i16_v16i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: add v0.8h, v0.8h, v0.8h
+; NONEON-NOSVE-NEXT: add v1.8h, v1.8h, v1.8h
+; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-96]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
+; NONEON-NOSVE-NEXT: ldr d2, [sp, #8]
+; NONEON-NOSVE-NEXT: ldr d3, [sp, #24]
+; NONEON-NOSVE-NEXT: sshll v0.4s, v0.4h, #0
+; NONEON-NOSVE-NEXT: sshll v1.4s, v1.4h, #0
+; NONEON-NOSVE-NEXT: sshll v2.4s, v2.4h, #0
+; NONEON-NOSVE-NEXT: sshll v3.4s, v3.4h, #0
+; NONEON-NOSVE-NEXT: stp q2, q0, [sp, #32]
+; NONEON-NOSVE-NEXT: sshll v0.2d, v0.2s, #0
+; NONEON-NOSVE-NEXT: stp q3, q1, [sp, #64]
+; NONEON-NOSVE-NEXT: ldr d5, [sp, #56]
+; NONEON-NOSVE-NEXT: sshll v1.2d, v1.2s, #0
+; NONEON-NOSVE-NEXT: ldr d4, [sp, #88]
+; NONEON-NOSVE-NEXT: ldr d6, [sp, #40]
+; NONEON-NOSVE-NEXT: ldr d7, [sp, #72]
+; NONEON-NOSVE-NEXT: sshll v5.2d, v5.2s, #0
+; NONEON-NOSVE-NEXT: sshll v4.2d, v4.2s, #0
+; NONEON-NOSVE-NEXT: stp q0, q5, [x1]
+; NONEON-NOSVE-NEXT: sshll v0.2d, v2.2s, #0
+; NONEON-NOSVE-NEXT: sshll v2.2d, v6.2s, #0
+; NONEON-NOSVE-NEXT: stp q1, q4, [x1, #64]
+; NONEON-NOSVE-NEXT: sshll v1.2d, v3.2s, #0
+; NONEON-NOSVE-NEXT: sshll v3.2d, v7.2s, #0
+; NONEON-NOSVE-NEXT: stp q0, q2, [x1, #32]
+; NONEON-NOSVE-NEXT: stp q1, q3, [x1, #96]
+; NONEON-NOSVE-NEXT: add sp, sp, #96
+; NONEON-NOSVE-NEXT: ret
%a = load <16 x i16>, ptr %in
%b = add <16 x i16> %a, %a
%c = sext <16 x i16> %b to <16 x i64>
@@ -457,6 +816,17 @@ define void @sext_v4i32_v4i64(<4 x i32> %a, ptr %out) {
; CHECK-NEXT: sunpklo z0.d, z0.s
; CHECK-NEXT: stp q1, q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sext_v4i32_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: sshll v0.2d, v0.2s, #0
+; NONEON-NOSVE-NEXT: sshll v1.2d, v1.2s, #0
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%b = sext <4 x i32> %a to <4 x i64>
store <4 x i64>%b, ptr %out
ret void
@@ -477,6 +847,24 @@ define void @sext_v8i32_v8i64(ptr %in, ptr %out) {
; CHECK-NEXT: stp q2, q0, [x1, #32]
; CHECK-NEXT: stp q3, q1, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sext_v8i32_v8i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: add v0.4s, v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: add v1.4s, v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-32]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
+; NONEON-NOSVE-NEXT: ldr d2, [sp, #24]
+; NONEON-NOSVE-NEXT: ldr d3, [sp, #8]
+; NONEON-NOSVE-NEXT: sshll v1.2d, v1.2s, #0
+; NONEON-NOSVE-NEXT: sshll v0.2d, v0.2s, #0
+; NONEON-NOSVE-NEXT: sshll v2.2d, v2.2s, #0
+; NONEON-NOSVE-NEXT: sshll v3.2d, v3.2s, #0
+; NONEON-NOSVE-NEXT: stp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: stp q1, q2, [x1, #32]
+; NONEON-NOSVE-NEXT: add sp, sp, #32
+; NONEON-NOSVE-NEXT: ret
%a = load <8 x i32>, ptr %in
%b = add <8 x i32> %a, %a
%c = sext <8 x i32> %b to <8 x i64>
@@ -497,6 +885,17 @@ define void @zext_v16i8_v16i16(<16 x i8> %a, ptr %out) {
; CHECK-NEXT: uunpklo z0.h, z0.b
; CHECK-NEXT: stp q1, q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: zext_v16i8_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: ushll v0.8h, v0.8b, #0
+; NONEON-NOSVE-NEXT: ushll v1.8h, v1.8b, #0
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%b = zext <16 x i8> %a to <16 x i16>
store <16 x i16>%b, ptr %out
ret void
@@ -518,6 +917,24 @@ define void @zext_v32i8_v32i16(ptr %in, ptr %out) {
; CHECK-NEXT: stp q2, q0, [x1, #32]
; CHECK-NEXT: stp q3, q1, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: zext_v32i8_v32i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: add v0.16b, v0.16b, v0.16b
+; NONEON-NOSVE-NEXT: add v1.16b, v1.16b, v1.16b
+; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-32]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
+; NONEON-NOSVE-NEXT: ldr d2, [sp, #24]
+; NONEON-NOSVE-NEXT: ldr d3, [sp, #8]
+; NONEON-NOSVE-NEXT: ushll v1.8h, v1.8b, #0
+; NONEON-NOSVE-NEXT: ushll v0.8h, v0.8b, #0
+; NONEON-NOSVE-NEXT: ushll v2.8h, v2.8b, #0
+; NONEON-NOSVE-NEXT: ushll v3.8h, v3.8b, #0
+; NONEON-NOSVE-NEXT: stp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: stp q1, q2, [x1, #32]
+; NONEON-NOSVE-NEXT: add sp, sp, #32
+; NONEON-NOSVE-NEXT: ret
%a = load <32 x i8>, ptr %in
%b = add <32 x i8> %a, %a
%c = zext <32 x i8> %b to <32 x i16>
@@ -539,6 +956,18 @@ define void @zext_v8i8_v8i32(<8 x i8> %a, ptr %out) {
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: stp q1, q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: zext_v8i8_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ushll v0.8h, v0.8b, #0
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0
+; NONEON-NOSVE-NEXT: ushll v1.4s, v1.4h, #0
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%b = zext <8 x i8> %a to <8 x i32>
store <8 x i32>%b, ptr %out
ret void
@@ -560,6 +989,25 @@ define void @zext_v16i8_v16i32(<16 x i8> %a, ptr %out) {
; CHECK-NEXT: stp q2, q1, [x0]
; CHECK-NEXT: stp q3, q0, [x0, #32]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: zext_v16i8_v16i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: str q0, [sp, #-48]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: ushll v0.8h, v0.8b, #0
+; NONEON-NOSVE-NEXT: ushll v1.8h, v1.8b, #0
+; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #16]
+; NONEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0
+; NONEON-NOSVE-NEXT: ushll v1.4s, v1.4h, #0
+; NONEON-NOSVE-NEXT: ldr d2, [sp, #40]
+; NONEON-NOSVE-NEXT: ldr d3, [sp, #24]
+; NONEON-NOSVE-NEXT: ushll v2.4s, v2.4h, #0
+; NONEON-NOSVE-NEXT: ushll v3.4s, v3.4h, #0
+; NONEON-NOSVE-NEXT: stp q0, q2, [x0]
+; NONEON-NOSVE-NEXT: stp q1, q3, [x0, #32]
+; NONEON-NOSVE-NEXT: add sp, sp, #48
+; NONEON-NOSVE-NEXT: ret
%b = zext <16 x i8> %a to <16 x i32>
store <16 x i32> %b, ptr %out
ret void
@@ -594,6 +1042,40 @@ define void @zext_v32i8_v32i32(ptr %in, ptr %out) {
; CHECK-NEXT: stp q6, q0, [x1, #96]
; CHECK-NEXT: stp q7, q1, [x1, #32]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: zext_v32i8_v32i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: add v0.16b, v0.16b, v0.16b
+; NONEON-NOSVE-NEXT: add v1.16b, v1.16b, v1.16b
+; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-96]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
+; NONEON-NOSVE-NEXT: ldr d2, [sp, #8]
+; NONEON-NOSVE-NEXT: ldr d3, [sp, #24]
+; NONEON-NOSVE-NEXT: ushll v0.8h, v0.8b, #0
+; NONEON-NOSVE-NEXT: ushll v1.8h, v1.8b, #0
+; NONEON-NOSVE-NEXT: ushll v2.8h, v2.8b, #0
+; NONEON-NOSVE-NEXT: ushll v3.8h, v3.8b, #0
+; NONEON-NOSVE-NEXT: stp q2, q0, [sp, #32]
+; NONEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0
+; NONEON-NOSVE-NEXT: stp q3, q1, [sp, #64]
+; NONEON-NOSVE-NEXT: ldr d5, [sp, #56]
+; NONEON-NOSVE-NEXT: ushll v1.4s, v1.4h, #0
+; NONEON-NOSVE-NEXT: ldr d4, [sp, #88]
+; NONEON-NOSVE-NEXT: ldr d6, [sp, #40]
+; NONEON-NOSVE-NEXT: ldr d7, [sp, #72]
+; NONEON-NOSVE-NEXT: ushll v5.4s, v5.4h, #0
+; NONEON-NOSVE-NEXT: ushll v4.4s, v4.4h, #0
+; NONEON-NOSVE-NEXT: stp q0, q5, [x1]
+; NONEON-NOSVE-NEXT: ushll v0.4s, v2.4h, #0
+; NONEON-NOSVE-NEXT: ushll v2.4s, v6.4h, #0
+; NONEON-NOSVE-NEXT: stp q1, q4, [x1, #64]
+; NONEON-NOSVE-NEXT: ushll v1.4s, v3.4h, #0
+; NONEON-NOSVE-NEXT: ushll v3.4s, v7.4h, #0
+; NONEON-NOSVE-NEXT: stp q0, q2, [x1, #32]
+; NONEON-NOSVE-NEXT: stp q1, q3, [x1, #96]
+; NONEON-NOSVE-NEXT: add sp, sp, #96
+; NONEON-NOSVE-NEXT: ret
%a = load <32 x i8>, ptr %in
%b = add <32 x i8> %a, %a
%c = zext <32 x i8> %b to <32 x i32>
@@ -619,6 +1101,20 @@ define void @zext_v4i8_v4i64(<4 x i8> %a, ptr %out) {
; CHECK-NEXT: uunpklo z0.d, z0.s
; CHECK-NEXT: stp q1, q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: zext_v4i8_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: movi d1, #0xff00ff00ff00ff
+; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: ushll v0.2d, v0.2s, #0
+; NONEON-NOSVE-NEXT: ushll v1.2d, v1.2s, #0
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%b = zext <4 x i8> %a to <4 x i64>
store <4 x i64>%b, ptr %out
ret void
@@ -641,6 +1137,26 @@ define void @zext_v8i8_v8i64(<8 x i8> %a, ptr %out) {
; CHECK-NEXT: stp q2, q1, [x0]
; CHECK-NEXT: stp q3, q0, [x0, #32]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: zext_v8i8_v8i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ushll v0.8h, v0.8b, #0
+; NONEON-NOSVE-NEXT: str q0, [sp, #-48]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0
+; NONEON-NOSVE-NEXT: ushll v1.4s, v1.4h, #0
+; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #16]
+; NONEON-NOSVE-NEXT: ushll v0.2d, v0.2s, #0
+; NONEON-NOSVE-NEXT: ushll v1.2d, v1.2s, #0
+; NONEON-NOSVE-NEXT: ldr d2, [sp, #40]
+; NONEON-NOSVE-NEXT: ldr d3, [sp, #24]
+; NONEON-NOSVE-NEXT: ushll v2.2d, v2.2s, #0
+; NONEON-NOSVE-NEXT: ushll v3.2d, v3.2s, #0
+; NONEON-NOSVE-NEXT: stp q0, q2, [x0]
+; NONEON-NOSVE-NEXT: stp q1, q3, [x0, #32]
+; NONEON-NOSVE-NEXT: add sp, sp, #48
+; NONEON-NOSVE-NEXT: ret
%b = zext <8 x i8> %a to <8 x i64>
store <8 x i64>%b, ptr %out
ret void
@@ -678,6 +1194,41 @@ define void @zext_v16i8_v16i64(<16 x i8> %a, ptr %out) {
; CHECK-NEXT: stp q1, q4, [x0, #32]
; CHECK-NEXT: stp q0, q2, [x0, #96]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: zext_v16i8_v16i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: str q0, [sp, #-112]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 112
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: ushll v0.8h, v0.8b, #0
+; NONEON-NOSVE-NEXT: ushll v1.8h, v1.8b, #0
+; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #16]
+; NONEON-NOSVE-NEXT: ushll v1.4s, v1.4h, #0
+; NONEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0
+; NONEON-NOSVE-NEXT: ldr d2, [sp, #24]
+; NONEON-NOSVE-NEXT: ldr d3, [sp, #40]
+; NONEON-NOSVE-NEXT: ushll v2.4s, v2.4h, #0
+; NONEON-NOSVE-NEXT: ushll v3.4s, v3.4h, #0
+; NONEON-NOSVE-NEXT: stp q2, q1, [sp, #48]
+; NONEON-NOSVE-NEXT: ushll v1.2d, v1.2s, #0
+; NONEON-NOSVE-NEXT: stp q3, q0, [sp, #80]
+; NONEON-NOSVE-NEXT: ldr d5, [sp, #72]
+; NONEON-NOSVE-NEXT: ushll v0.2d, v0.2s, #0
+; NONEON-NOSVE-NEXT: ldr d4, [sp, #104]
+; NONEON-NOSVE-NEXT: ldr d6, [sp, #56]
+; NONEON-NOSVE-NEXT: ldr d7, [sp, #88]
+; NONEON-NOSVE-NEXT: ushll v5.2d, v5.2s, #0
+; NONEON-NOSVE-NEXT: ushll v4.2d, v4.2s, #0
+; NONEON-NOSVE-NEXT: stp q1, q5, [x0, #64]
+; NONEON-NOSVE-NEXT: ushll v1.2d, v2.2s, #0
+; NONEON-NOSVE-NEXT: ushll v2.2d, v6.2s, #0
+; NONEON-NOSVE-NEXT: stp q0, q4, [x0]
+; NONEON-NOSVE-NEXT: ushll v0.2d, v3.2s, #0
+; NONEON-NOSVE-NEXT: ushll v3.2d, v7.2s, #0
+; NONEON-NOSVE-NEXT: stp q1, q2, [x0, #96]
+; NONEON-NOSVE-NEXT: stp q0, q3, [x0, #32]
+; NONEON-NOSVE-NEXT: add sp, sp, #112
+; NONEON-NOSVE-NEXT: ret
%b = zext <16 x i8> %a to <16 x i64>
store <16 x i64> %b, ptr %out
ret void
@@ -746,6 +1297,73 @@ define void @zext_v32i8_v32i64(ptr %in, ptr %out) {
; CHECK-NEXT: stp q0, q2, [x1, #224]
; CHECK-NEXT: stp q3, q1, [x1, #96]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: zext_v32i8_v32i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: sub sp, sp, #224
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 224
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: add v0.16b, v0.16b, v0.16b
+; NONEON-NOSVE-NEXT: add v1.16b, v1.16b, v1.16b
+; NONEON-NOSVE-NEXT: stp q0, q1, [sp]
+; NONEON-NOSVE-NEXT: ushll v5.8h, v0.8b, #0
+; NONEON-NOSVE-NEXT: ushll v6.8h, v1.8b, #0
+; NONEON-NOSVE-NEXT: ldr d2, [sp, #8]
+; NONEON-NOSVE-NEXT: ushll v3.8h, v2.8b, #0
+; NONEON-NOSVE-NEXT: ldr d2, [sp, #24]
+; NONEON-NOSVE-NEXT: ushll v4.8h, v2.8b, #0
+; NONEON-NOSVE-NEXT: stp q3, q5, [sp, #32]
+; NONEON-NOSVE-NEXT: ushll v5.4s, v5.4h, #0
+; NONEON-NOSVE-NEXT: ushll v3.4s, v3.4h, #0
+; NONEON-NOSVE-NEXT: ldr d2, [sp, #56]
+; NONEON-NOSVE-NEXT: ldr d0, [sp, #40]
+; NONEON-NOSVE-NEXT: stp q4, q6, [sp, #64]
+; NONEON-NOSVE-NEXT: ushll v6.4s, v6.4h, #0
+; NONEON-NOSVE-NEXT: ushll v4.4s, v4.4h, #0
+; NONEON-NOSVE-NEXT: ldr d7, [sp, #88]
+; NONEON-NOSVE-NEXT: ushll v2.4s, v2.4h, #0
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #72]
+; NONEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0
+; NONEON-NOSVE-NEXT: ushll v7.4s, v7.4h, #0
+; NONEON-NOSVE-NEXT: ushll v1.4s, v1.4h, #0
+; NONEON-NOSVE-NEXT: stp q2, q5, [sp, #128]
+; NONEON-NOSVE-NEXT: ushll v5.2d, v5.2s, #0
+; NONEON-NOSVE-NEXT: ushll v2.2d, v2.2s, #0
+; NONEON-NOSVE-NEXT: ldr d19, [sp, #152]
+; NONEON-NOSVE-NEXT: stp q0, q3, [sp, #96]
+; NONEON-NOSVE-NEXT: ldr d20, [sp, #136]
+; NONEON-NOSVE-NEXT: stp q1, q4, [sp, #160]
+; NONEON-NOSVE-NEXT: ldr d17, [sp, #104]
+; NONEON-NOSVE-NEXT: ldr d21, [sp, #120]
+; NONEON-NOSVE-NEXT: stp q7, q6, [sp, #192]
+; NONEON-NOSVE-NEXT: ushll v6.2d, v6.2s, #0
+; NONEON-NOSVE-NEXT: ushll v19.2d, v19.2s, #0
+; NONEON-NOSVE-NEXT: ldr d16, [sp, #216]
+; NONEON-NOSVE-NEXT: ldr d22, [sp, #200]
+; NONEON-NOSVE-NEXT: ldr d23, [sp, #184]
+; NONEON-NOSVE-NEXT: ldr d18, [sp, #168]
+; NONEON-NOSVE-NEXT: ushll v4.2d, v4.2s, #0
+; NONEON-NOSVE-NEXT: ushll v3.2d, v3.2s, #0
+; NONEON-NOSVE-NEXT: ushll v16.2d, v16.2s, #0
+; NONEON-NOSVE-NEXT: stp q5, q19, [x1]
+; NONEON-NOSVE-NEXT: ushll v5.2d, v7.2s, #0
+; NONEON-NOSVE-NEXT: ushll v7.2d, v22.2s, #0
+; NONEON-NOSVE-NEXT: ushll v1.2d, v1.2s, #0
+; NONEON-NOSVE-NEXT: ushll v0.2d, v0.2s, #0
+; NONEON-NOSVE-NEXT: stp q6, q16, [x1, #128]
+; NONEON-NOSVE-NEXT: ushll v6.2d, v23.2s, #0
+; NONEON-NOSVE-NEXT: stp q5, q7, [x1, #160]
+; NONEON-NOSVE-NEXT: ushll v5.2d, v20.2s, #0
+; NONEON-NOSVE-NEXT: stp q4, q6, [x1, #192]
+; NONEON-NOSVE-NEXT: ushll v4.2d, v21.2s, #0
+; NONEON-NOSVE-NEXT: stp q2, q5, [x1, #32]
+; NONEON-NOSVE-NEXT: ushll v2.2d, v17.2s, #0
+; NONEON-NOSVE-NEXT: stp q3, q4, [x1, #64]
+; NONEON-NOSVE-NEXT: ushll v3.2d, v18.2s, #0
+; NONEON-NOSVE-NEXT: stp q0, q2, [x1, #96]
+; NONEON-NOSVE-NEXT: stp q1, q3, [x1, #224]
+; NONEON-NOSVE-NEXT: add sp, sp, #224
+; NONEON-NOSVE-NEXT: ret
%a = load <32 x i8>, ptr %in
%b = add <32 x i8> %a, %a
%c = zext <32 x i8> %b to <32 x i64>
@@ -766,6 +1384,17 @@ define void @zext_v8i16_v8i32(<8 x i16> %a, ptr %out) {
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: stp q1, q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: zext_v8i16_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0
+; NONEON-NOSVE-NEXT: ushll v1.4s, v1.4h, #0
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%b = zext <8 x i16> %a to <8 x i32>
store <8 x i32>%b, ptr %out
ret void
@@ -786,6 +1415,24 @@ define void @zext_v16i16_v16i32(ptr %in, ptr %out) {
; CHECK-NEXT: stp q2, q0, [x1, #32]
; CHECK-NEXT: stp q3, q1, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: zext_v16i16_v16i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: add v0.8h, v0.8h, v0.8h
+; NONEON-NOSVE-NEXT: add v1.8h, v1.8h, v1.8h
+; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-32]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
+; NONEON-NOSVE-NEXT: ldr d2, [sp, #24]
+; NONEON-NOSVE-NEXT: ldr d3, [sp, #8]
+; NONEON-NOSVE-NEXT: ushll v1.4s, v1.4h, #0
+; NONEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0
+; NONEON-NOSVE-NEXT: ushll v2.4s, v2.4h, #0
+; NONEON-NOSVE-NEXT: ushll v3.4s, v3.4h, #0
+; NONEON-NOSVE-NEXT: stp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: stp q1, q2, [x1, #32]
+; NONEON-NOSVE-NEXT: add sp, sp, #32
+; NONEON-NOSVE-NEXT: ret
%a = load <16 x i16>, ptr %in
%b = add <16 x i16> %a, %a
%c = zext <16 x i16> %b to <16 x i32>
@@ -807,6 +1454,18 @@ define void @zext_v4i16_v4i64(<4 x i16> %a, ptr %out) {
; CHECK-NEXT: uunpklo z0.d, z0.s
; CHECK-NEXT: stp q1, q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: zext_v4i16_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: ushll v0.2d, v0.2s, #0
+; NONEON-NOSVE-NEXT: ushll v1.2d, v1.2s, #0
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%b = zext <4 x i16> %a to <4 x i64>
store <4 x i64>%b, ptr %out
ret void
@@ -828,6 +1487,25 @@ define void @zext_v8i16_v8i64(<8 x i16> %a, ptr %out) {
; CHECK-NEXT: stp q2, q1, [x0]
; CHECK-NEXT: stp q3, q0, [x0, #32]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: zext_v8i16_v8i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: str q0, [sp, #-48]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0
+; NONEON-NOSVE-NEXT: ushll v1.4s, v1.4h, #0
+; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #16]
+; NONEON-NOSVE-NEXT: ushll v0.2d, v0.2s, #0
+; NONEON-NOSVE-NEXT: ushll v1.2d, v1.2s, #0
+; NONEON-NOSVE-NEXT: ldr d2, [sp, #40]
+; NONEON-NOSVE-NEXT: ldr d3, [sp, #24]
+; NONEON-NOSVE-NEXT: ushll v2.2d, v2.2s, #0
+; NONEON-NOSVE-NEXT: ushll v3.2d, v3.2s, #0
+; NONEON-NOSVE-NEXT: stp q0, q2, [x0]
+; NONEON-NOSVE-NEXT: stp q1, q3, [x0, #32]
+; NONEON-NOSVE-NEXT: add sp, sp, #48
+; NONEON-NOSVE-NEXT: ret
%b = zext <8 x i16> %a to <8 x i64>
store <8 x i64>%b, ptr %out
ret void
@@ -862,6 +1540,40 @@ define void @zext_v16i16_v16i64(ptr %in, ptr %out) {
; CHECK-NEXT: stp q6, q0, [x1, #96]
; CHECK-NEXT: stp q7, q1, [x1, #32]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: zext_v16i16_v16i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: add v0.8h, v0.8h, v0.8h
+; NONEON-NOSVE-NEXT: add v1.8h, v1.8h, v1.8h
+; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-96]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
+; NONEON-NOSVE-NEXT: ldr d2, [sp, #8]
+; NONEON-NOSVE-NEXT: ldr d3, [sp, #24]
+; NONEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0
+; NONEON-NOSVE-NEXT: ushll v1.4s, v1.4h, #0
+; NONEON-NOSVE-NEXT: ushll v2.4s, v2.4h, #0
+; NONEON-NOSVE-NEXT: ushll v3.4s, v3.4h, #0
+; NONEON-NOSVE-NEXT: stp q2, q0, [sp, #32]
+; NONEON-NOSVE-NEXT: ushll v0.2d, v0.2s, #0
+; NONEON-NOSVE-NEXT: stp q3, q1, [sp, #64]
+; NONEON-NOSVE-NEXT: ldr d5, [sp, #56]
+; NONEON-NOSVE-NEXT: ushll v1.2d, v1.2s, #0
+; NONEON-NOSVE-NEXT: ldr d4, [sp, #88]
+; NONEON-NOSVE-NEXT: ldr d6, [sp, #40]
+; NONEON-NOSVE-NEXT: ldr d7, [sp, #72]
+; NONEON-NOSVE-NEXT: ushll v5.2d, v5.2s, #0
+; NONEON-NOSVE-NEXT: ushll v4.2d, v4.2s, #0
+; NONEON-NOSVE-NEXT: stp q0, q5, [x1]
+; NONEON-NOSVE-NEXT: ushll v0.2d, v2.2s, #0
+; NONEON-NOSVE-NEXT: ushll v2.2d, v6.2s, #0
+; NONEON-NOSVE-NEXT: stp q1, q4, [x1, #64]
+; NONEON-NOSVE-NEXT: ushll v1.2d, v3.2s, #0
+; NONEON-NOSVE-NEXT: ushll v3.2d, v7.2s, #0
+; NONEON-NOSVE-NEXT: stp q0, q2, [x1, #32]
+; NONEON-NOSVE-NEXT: stp q1, q3, [x1, #96]
+; NONEON-NOSVE-NEXT: add sp, sp, #96
+; NONEON-NOSVE-NEXT: ret
%a = load <16 x i16>, ptr %in
%b = add <16 x i16> %a, %a
%c = zext <16 x i16> %b to <16 x i64>
@@ -882,6 +1594,17 @@ define void @zext_v4i32_v4i64(<4 x i32> %a, ptr %out) {
; CHECK-NEXT: uunpklo z0.d, z0.s
; CHECK-NEXT: stp q1, q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: zext_v4i32_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: ushll v0.2d, v0.2s, #0
+; NONEON-NOSVE-NEXT: ushll v1.2d, v1.2s, #0
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%b = zext <4 x i32> %a to <4 x i64>
store <4 x i64>%b, ptr %out
ret void
@@ -902,6 +1625,24 @@ define void @zext_v8i32_v8i64(ptr %in, ptr %out) {
; CHECK-NEXT: stp q2, q0, [x1, #32]
; CHECK-NEXT: stp q3, q1, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: zext_v8i32_v8i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: add v0.4s, v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: add v1.4s, v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-32]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
+; NONEON-NOSVE-NEXT: ldr d2, [sp, #24]
+; NONEON-NOSVE-NEXT: ldr d3, [sp, #8]
+; NONEON-NOSVE-NEXT: ushll v1.2d, v1.2s, #0
+; NONEON-NOSVE-NEXT: ushll v0.2d, v0.2s, #0
+; NONEON-NOSVE-NEXT: ushll v2.2d, v2.2s, #0
+; NONEON-NOSVE-NEXT: ushll v3.2d, v3.2s, #0
+; NONEON-NOSVE-NEXT: stp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: stp q1, q2, [x1, #32]
+; NONEON-NOSVE-NEXT: add sp, sp, #32
+; NONEON-NOSVE-NEXT: ret
%a = load <8 x i32>, ptr %in
%b = add <8 x i32> %a, %a
%c = zext <8 x i32> %b to <8 x i64>
@@ -928,6 +1669,21 @@ define void @extend_and_mul(i32 %0, <2 x i64> %1, ptr %2) {
; SVE2-NEXT: mul z0.d, z1.d, z0.d
; SVE2-NEXT: str q0, [x1]
; SVE2-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: extend_and_mul:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: dup v1.2s, w0
+; NONEON-NOSVE-NEXT: fmov x10, d0
+; NONEON-NOSVE-NEXT: mov x8, v0.d[1]
+; NONEON-NOSVE-NEXT: ushll v1.2d, v1.2s, #0
+; NONEON-NOSVE-NEXT: fmov x11, d1
+; NONEON-NOSVE-NEXT: mov x9, v1.d[1]
+; NONEON-NOSVE-NEXT: mul x10, x11, x10
+; NONEON-NOSVE-NEXT: mul x8, x9, x8
+; NONEON-NOSVE-NEXT: fmov d0, x10
+; NONEON-NOSVE-NEXT: mov v0.d[1], x8
+; NONEON-NOSVE-NEXT: str q0, [x1]
+; NONEON-NOSVE-NEXT: ret
%broadcast.splatinsert2 = insertelement <2 x i32> poison, i32 %0, i64 0
%broadcast.splat3 = shufflevector <2 x i32> %broadcast.splatinsert2, <2 x i32> poison, <2 x i32> zeroinitializer
%4 = zext <2 x i32> %broadcast.splat3 to <2 x i64>
@@ -943,6 +1699,13 @@ define void @extend_no_mul(i32 %0, <2 x i64> %1, ptr %2) {
; CHECK-NEXT: mov z0.d, x8
; CHECK-NEXT: str q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: extend_no_mul:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: dup v0.2s, w0
+; NONEON-NOSVE-NEXT: ushll v0.2d, v0.2s, #0
+; NONEON-NOSVE-NEXT: str q0, [x1]
+; NONEON-NOSVE-NEXT: ret
entry:
%broadcast.splatinsert2 = insertelement <2 x i32> poison, i32 %0, i64 0
%broadcast.splat3 = shufflevector <2 x i32> %broadcast.splatinsert2, <2 x i32> poison, <2 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-immediates.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-immediates.ll
index 54276bb4ba01d..ade60b07150ce 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-immediates.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-immediates.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
@@ -22,6 +23,15 @@ define void @add_v32i8(ptr %a) {
; CHECK-NEXT: add z1.b, z1.b, #7 // =0x7
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: add_v32i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: movi v0.16b, #7
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: add v1.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: add v0.16b, v2.16b, v0.16b
+; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <32 x i8>, ptr %a
%ins = insertelement <32 x i8> undef, i8 7, i32 0
%op2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <32 x i32> zeroinitializer
@@ -38,6 +48,16 @@ define void @add_v16i16(ptr %a) {
; CHECK-NEXT: add z1.h, z1.h, #15 // =0xf
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: add_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov w8, #15 // =0xf
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: dup v0.8h, w8
+; NONEON-NOSVE-NEXT: add v1.8h, v1.8h, v0.8h
+; NONEON-NOSVE-NEXT: add v0.8h, v2.8h, v0.8h
+; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x i16>, ptr %a
%ins = insertelement <16 x i16> undef, i16 15, i64 0
%op2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer
@@ -54,6 +74,16 @@ define void @add_v8i32(ptr %a) {
; CHECK-NEXT: add z1.s, z1.s, #31 // =0x1f
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: add_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov w8, #31 // =0x1f
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: dup v0.4s, w8
+; NONEON-NOSVE-NEXT: add v1.4s, v1.4s, v0.4s
+; NONEON-NOSVE-NEXT: add v0.4s, v2.4s, v0.4s
+; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x i32>, ptr %a
%ins = insertelement <8 x i32> undef, i32 31, i64 0
%op2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer
@@ -70,6 +100,16 @@ define void @add_v4i64(ptr %a) {
; CHECK-NEXT: add z1.d, z1.d, #63 // =0x3f
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: add_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov w8, #63 // =0x3f
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: dup v0.2d, x8
+; NONEON-NOSVE-NEXT: add v1.2d, v1.2d, v0.2d
+; NONEON-NOSVE-NEXT: add v0.2d, v2.2d, v0.2d
+; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x i64>, ptr %a
%ins = insertelement <4 x i64> undef, i64 63, i64 0
%op2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer
@@ -90,6 +130,15 @@ define void @and_v32i8(ptr %a) {
; CHECK-NEXT: and z1.b, z1.b, #0x7
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: and_v32i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: movi v0.16b, #7
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: and v1.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: and v0.16b, v2.16b, v0.16b
+; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <32 x i8>, ptr %a
%ins = insertelement <32 x i8> undef, i8 7, i32 0
%op2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <32 x i32> zeroinitializer
@@ -106,6 +155,16 @@ define void @and_v16i16(ptr %a) {
; CHECK-NEXT: and z1.h, z1.h, #0xf
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: and_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov w8, #15 // =0xf
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: dup v0.8h, w8
+; NONEON-NOSVE-NEXT: and v1.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: and v0.16b, v2.16b, v0.16b
+; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x i16>, ptr %a
%ins = insertelement <16 x i16> undef, i16 15, i64 0
%op2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer
@@ -122,6 +181,16 @@ define void @and_v8i32(ptr %a) {
; CHECK-NEXT: and z1.s, z1.s, #0x1f
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: and_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov w8, #31 // =0x1f
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: dup v0.4s, w8
+; NONEON-NOSVE-NEXT: and v1.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: and v0.16b, v2.16b, v0.16b
+; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x i32>, ptr %a
%ins = insertelement <8 x i32> undef, i32 31, i64 0
%op2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer
@@ -138,6 +207,16 @@ define void @and_v4i64(ptr %a) {
; CHECK-NEXT: and z1.d, z1.d, #0x3f
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: and_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov w8, #63 // =0x3f
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: dup v0.2d, x8
+; NONEON-NOSVE-NEXT: and v1.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: and v0.16b, v2.16b, v0.16b
+; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x i64>, ptr %a
%ins = insertelement <4 x i64> undef, i64 63, i64 0
%op2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer
@@ -158,6 +237,14 @@ define void @ashr_v32i8(ptr %a) {
; CHECK-NEXT: asr z1.b, z1.b, #7
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ashr_v32i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: cmlt v0.16b, v0.16b, #0
+; NONEON-NOSVE-NEXT: cmlt v1.16b, v1.16b, #0
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <32 x i8>, ptr %a
%ins = insertelement <32 x i8> undef, i8 7, i32 0
%op2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <32 x i32> zeroinitializer
@@ -174,6 +261,14 @@ define void @ashr_v16i16(ptr %a) {
; CHECK-NEXT: asr z1.h, z1.h, #15
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ashr_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: cmlt v0.8h, v0.8h, #0
+; NONEON-NOSVE-NEXT: cmlt v1.8h, v1.8h, #0
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x i16>, ptr %a
%ins = insertelement <16 x i16> undef, i16 15, i64 0
%op2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer
@@ -190,6 +285,14 @@ define void @ashr_v8i32(ptr %a) {
; CHECK-NEXT: asr z1.s, z1.s, #31
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ashr_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: cmlt v0.4s, v0.4s, #0
+; NONEON-NOSVE-NEXT: cmlt v1.4s, v1.4s, #0
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x i32>, ptr %a
%ins = insertelement <8 x i32> undef, i32 31, i64 0
%op2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer
@@ -206,6 +309,14 @@ define void @ashr_v4i64(ptr %a) {
; CHECK-NEXT: asr z1.d, z1.d, #63
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ashr_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: cmlt v0.2d, v0.2d, #0
+; NONEON-NOSVE-NEXT: cmlt v1.2d, v1.2d, #0
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x i64>, ptr %a
%ins = insertelement <4 x i64> undef, i64 63, i64 0
%op2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer
@@ -229,6 +340,15 @@ define void @icmp_eq_v32i8(ptr %a) {
; CHECK-NEXT: mov z1.b, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: icmp_eq_v32i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: movi v0.16b, #7
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: cmeq v1.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: cmeq v0.16b, v2.16b, v0.16b
+; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <32 x i8>, ptr %a
%ins = insertelement <32 x i8> undef, i8 7, i64 0
%op2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <32 x i32> zeroinitializer
@@ -249,6 +369,16 @@ define void @icmp_sge_v16i16(ptr %a) {
; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: icmp_sge_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov w8, #15 // =0xf
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: dup v0.8h, w8
+; NONEON-NOSVE-NEXT: cmge v1.8h, v1.8h, v0.8h
+; NONEON-NOSVE-NEXT: cmge v0.8h, v2.8h, v0.8h
+; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x i16>, ptr %a
%ins = insertelement <16 x i16> undef, i16 15, i64 0
%op2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer
@@ -269,6 +399,16 @@ define void @icmp_sgt_v8i32(ptr %a) {
; CHECK-NEXT: mov z1.s, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: icmp_sgt_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov w8, #-8 // =0xfffffff8
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: dup v0.4s, w8
+; NONEON-NOSVE-NEXT: cmgt v1.4s, v1.4s, v0.4s
+; NONEON-NOSVE-NEXT: cmgt v0.4s, v2.4s, v0.4s
+; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x i32>, ptr %a
%ins = insertelement <8 x i32> undef, i32 -8, i64 0
%op2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer
@@ -289,6 +429,16 @@ define void @icmp_ult_v4i64(ptr %a) {
; CHECK-NEXT: mov z1.d, p0/z, #-1 // =0xffffffffffffffff
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: icmp_ult_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov w8, #63 // =0x3f
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: dup v0.2d, x8
+; NONEON-NOSVE-NEXT: cmhi v1.2d, v0.2d, v1.2d
+; NONEON-NOSVE-NEXT: cmhi v0.2d, v0.2d, v2.2d
+; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x i64>, ptr %a
%ins = insertelement <4 x i64> undef, i64 63, i64 0
%op2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer
@@ -310,6 +460,14 @@ define void @lshr_v32i8(ptr %a) {
; CHECK-NEXT: lsr z1.b, z1.b, #7
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: lshr_v32i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ushr v0.16b, v0.16b, #7
+; NONEON-NOSVE-NEXT: ushr v1.16b, v1.16b, #7
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <32 x i8>, ptr %a
%ins = insertelement <32 x i8> undef, i8 7, i64 0
%op2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <32 x i32> zeroinitializer
@@ -326,6 +484,14 @@ define void @lshr_v16i16(ptr %a) {
; CHECK-NEXT: lsr z1.h, z1.h, #15
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: lshr_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ushr v0.8h, v0.8h, #15
+; NONEON-NOSVE-NEXT: ushr v1.8h, v1.8h, #15
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x i16>, ptr %a
%ins = insertelement <16 x i16> undef, i16 15, i64 0
%op2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer
@@ -342,6 +508,14 @@ define void @lshr_v8i32(ptr %a) {
; CHECK-NEXT: lsr z1.s, z1.s, #31
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: lshr_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ushr v0.4s, v0.4s, #31
+; NONEON-NOSVE-NEXT: ushr v1.4s, v1.4s, #31
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x i32>, ptr %a
%ins = insertelement <8 x i32> undef, i32 31, i64 0
%op2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer
@@ -358,6 +532,14 @@ define void @lshr_v4i64(ptr %a) {
; CHECK-NEXT: lsr z1.d, z1.d, #63
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: lshr_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ushr v0.2d, v0.2d, #63
+; NONEON-NOSVE-NEXT: ushr v1.2d, v1.2d, #63
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x i64>, ptr %a
%ins = insertelement <4 x i64> undef, i64 63, i64 0
%op2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer
@@ -378,6 +560,15 @@ define void @mul_v32i8(ptr %a) {
; CHECK-NEXT: mul z1.b, z1.b, #7
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: mul_v32i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: movi v0.16b, #7
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: mul v1.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: mul v0.16b, v2.16b, v0.16b
+; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <32 x i8>, ptr %a
%ins = insertelement <32 x i8> undef, i8 7, i64 0
%op2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <32 x i32> zeroinitializer
@@ -394,6 +585,16 @@ define void @mul_v16i16(ptr %a) {
; CHECK-NEXT: mul z1.h, z1.h, #15
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: mul_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov w8, #15 // =0xf
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: dup v0.8h, w8
+; NONEON-NOSVE-NEXT: mul v1.8h, v1.8h, v0.8h
+; NONEON-NOSVE-NEXT: mul v0.8h, v2.8h, v0.8h
+; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x i16>, ptr %a
%ins = insertelement <16 x i16> undef, i16 15, i64 0
%op2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer
@@ -410,6 +611,16 @@ define void @mul_v8i32(ptr %a) {
; CHECK-NEXT: mul z1.s, z1.s, #31
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: mul_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov w8, #31 // =0x1f
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: dup v0.4s, w8
+; NONEON-NOSVE-NEXT: mul v1.4s, v1.4s, v0.4s
+; NONEON-NOSVE-NEXT: mul v0.4s, v2.4s, v0.4s
+; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x i32>, ptr %a
%ins = insertelement <8 x i32> undef, i32 31, i64 0
%op2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer
@@ -426,6 +637,28 @@ define void @mul_v4i64(ptr %a) {
; CHECK-NEXT: mul z1.d, z1.d, #63
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: mul_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: fmov x10, d0
+; NONEON-NOSVE-NEXT: fmov x11, d1
+; NONEON-NOSVE-NEXT: mov x8, v0.d[1]
+; NONEON-NOSVE-NEXT: mov x9, v1.d[1]
+; NONEON-NOSVE-NEXT: lsl x12, x10, #6
+; NONEON-NOSVE-NEXT: lsl x13, x11, #6
+; NONEON-NOSVE-NEXT: lsl x14, x8, #6
+; NONEON-NOSVE-NEXT: sub x10, x12, x10
+; NONEON-NOSVE-NEXT: sub x11, x13, x11
+; NONEON-NOSVE-NEXT: lsl x12, x9, #6
+; NONEON-NOSVE-NEXT: fmov d0, x10
+; NONEON-NOSVE-NEXT: fmov d1, x11
+; NONEON-NOSVE-NEXT: sub x8, x14, x8
+; NONEON-NOSVE-NEXT: sub x9, x12, x9
+; NONEON-NOSVE-NEXT: mov v0.d[1], x8
+; NONEON-NOSVE-NEXT: mov v1.d[1], x9
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x i64>, ptr %a
%ins = insertelement <4 x i64> undef, i64 63, i64 0
%op2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer
@@ -446,6 +679,15 @@ define void @or_v32i8(ptr %a) {
; CHECK-NEXT: orr z1.b, z1.b, #0x7
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: or_v32i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: movi v0.16b, #7
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: orr v1.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: orr v0.16b, v2.16b, v0.16b
+; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <32 x i8>, ptr %a
%ins = insertelement <32 x i8> undef, i8 7, i64 0
%op2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <32 x i32> zeroinitializer
@@ -462,6 +704,16 @@ define void @or_v16i16(ptr %a) {
; CHECK-NEXT: orr z1.h, z1.h, #0xf
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: or_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov w8, #15 // =0xf
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: dup v0.8h, w8
+; NONEON-NOSVE-NEXT: orr v1.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: orr v0.16b, v2.16b, v0.16b
+; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x i16>, ptr %a
%ins = insertelement <16 x i16> undef, i16 15, i64 0
%op2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer
@@ -478,6 +730,16 @@ define void @or_v8i32(ptr %a) {
; CHECK-NEXT: orr z1.s, z1.s, #0x1f
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: or_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov w8, #31 // =0x1f
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: dup v0.4s, w8
+; NONEON-NOSVE-NEXT: orr v1.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: orr v0.16b, v2.16b, v0.16b
+; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x i32>, ptr %a
%ins = insertelement <8 x i32> undef, i32 31, i64 0
%op2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer
@@ -494,6 +756,16 @@ define void @or_v4i64(ptr %a) {
; CHECK-NEXT: orr z1.d, z1.d, #0x3f
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: or_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov w8, #63 // =0x3f
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: dup v0.2d, x8
+; NONEON-NOSVE-NEXT: orr v1.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: orr v0.16b, v2.16b, v0.16b
+; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x i64>, ptr %a
%ins = insertelement <4 x i64> undef, i64 63, i64 0
%op2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer
@@ -514,6 +786,14 @@ define void @shl_v32i8(ptr %a) {
; CHECK-NEXT: lsl z1.b, z1.b, #7
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: shl_v32i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: shl v0.16b, v0.16b, #7
+; NONEON-NOSVE-NEXT: shl v1.16b, v1.16b, #7
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <32 x i8>, ptr %a
%ins = insertelement <32 x i8> undef, i8 7, i64 0
%op2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <32 x i32> zeroinitializer
@@ -530,6 +810,14 @@ define void @shl_v16i16(ptr %a) {
; CHECK-NEXT: lsl z1.h, z1.h, #15
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: shl_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: shl v0.8h, v0.8h, #15
+; NONEON-NOSVE-NEXT: shl v1.8h, v1.8h, #15
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x i16>, ptr %a
%ins = insertelement <16 x i16> undef, i16 15, i64 0
%op2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer
@@ -546,6 +834,14 @@ define void @shl_v8i32(ptr %a) {
; CHECK-NEXT: lsl z1.s, z1.s, #31
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: shl_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: shl v0.4s, v0.4s, #31
+; NONEON-NOSVE-NEXT: shl v1.4s, v1.4s, #31
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x i32>, ptr %a
%ins = insertelement <8 x i32> undef, i32 31, i64 0
%op2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer
@@ -562,6 +858,14 @@ define void @shl_v4i64(ptr %a) {
; CHECK-NEXT: lsl z1.d, z1.d, #63
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: shl_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: shl v0.2d, v0.2d, #63
+; NONEON-NOSVE-NEXT: shl v1.2d, v1.2d, #63
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x i64>, ptr %a
%ins = insertelement <4 x i64> undef, i64 63, i64 0
%op2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer
@@ -582,6 +886,15 @@ define void @smax_v32i8(ptr %a) {
; CHECK-NEXT: smax z1.b, z1.b, #7
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: smax_v32i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: movi v0.16b, #7
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: smax v1.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: smax v0.16b, v2.16b, v0.16b
+; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <32 x i8>, ptr %a
%ins = insertelement <32 x i8> undef, i8 7, i64 0
%op2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <32 x i32> zeroinitializer
@@ -598,6 +911,16 @@ define void @smax_v16i16(ptr %a) {
; CHECK-NEXT: smax z1.h, z1.h, #15
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: smax_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov w8, #15 // =0xf
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: dup v0.8h, w8
+; NONEON-NOSVE-NEXT: smax v1.8h, v1.8h, v0.8h
+; NONEON-NOSVE-NEXT: smax v0.8h, v2.8h, v0.8h
+; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x i16>, ptr %a
%ins = insertelement <16 x i16> undef, i16 15, i64 0
%op2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer
@@ -614,6 +937,16 @@ define void @smax_v8i32(ptr %a) {
; CHECK-NEXT: smax z1.s, z1.s, #31
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: smax_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov w8, #31 // =0x1f
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: dup v0.4s, w8
+; NONEON-NOSVE-NEXT: smax v1.4s, v1.4s, v0.4s
+; NONEON-NOSVE-NEXT: smax v0.4s, v2.4s, v0.4s
+; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x i32>, ptr %a
%ins = insertelement <8 x i32> undef, i32 31, i64 0
%op2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer
@@ -630,6 +963,18 @@ define void @smax_v4i64(ptr %a) {
; CHECK-NEXT: smax z1.d, z1.d, #63
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: smax_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov w8, #63 // =0x3f
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: dup v0.2d, x8
+; NONEON-NOSVE-NEXT: cmgt v3.2d, v1.2d, v0.2d
+; NONEON-NOSVE-NEXT: cmgt v4.2d, v2.2d, v0.2d
+; NONEON-NOSVE-NEXT: bif v1.16b, v0.16b, v3.16b
+; NONEON-NOSVE-NEXT: bit v0.16b, v2.16b, v4.16b
+; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x i64>, ptr %a
%ins = insertelement <4 x i64> undef, i64 63, i64 0
%op2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer
@@ -650,6 +995,15 @@ define void @smin_v32i8(ptr %a) {
; CHECK-NEXT: smin z1.b, z1.b, #7
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: smin_v32i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: movi v0.16b, #7
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: smin v1.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: smin v0.16b, v2.16b, v0.16b
+; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <32 x i8>, ptr %a
%ins = insertelement <32 x i8> undef, i8 7, i64 0
%op2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <32 x i32> zeroinitializer
@@ -666,6 +1020,16 @@ define void @smin_v16i16(ptr %a) {
; CHECK-NEXT: smin z1.h, z1.h, #15
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: smin_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov w8, #15 // =0xf
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: dup v0.8h, w8
+; NONEON-NOSVE-NEXT: smin v1.8h, v1.8h, v0.8h
+; NONEON-NOSVE-NEXT: smin v0.8h, v2.8h, v0.8h
+; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x i16>, ptr %a
%ins = insertelement <16 x i16> undef, i16 15, i64 0
%op2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer
@@ -682,6 +1046,16 @@ define void @smin_v8i32(ptr %a) {
; CHECK-NEXT: smin z1.s, z1.s, #31
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: smin_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov w8, #31 // =0x1f
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: dup v0.4s, w8
+; NONEON-NOSVE-NEXT: smin v1.4s, v1.4s, v0.4s
+; NONEON-NOSVE-NEXT: smin v0.4s, v2.4s, v0.4s
+; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x i32>, ptr %a
%ins = insertelement <8 x i32> undef, i32 31, i64 0
%op2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer
@@ -698,6 +1072,18 @@ define void @smin_v4i64(ptr %a) {
; CHECK-NEXT: smin z1.d, z1.d, #63
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: smin_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov w8, #63 // =0x3f
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: dup v0.2d, x8
+; NONEON-NOSVE-NEXT: cmgt v3.2d, v0.2d, v1.2d
+; NONEON-NOSVE-NEXT: cmgt v4.2d, v0.2d, v2.2d
+; NONEON-NOSVE-NEXT: bif v1.16b, v0.16b, v3.16b
+; NONEON-NOSVE-NEXT: bit v0.16b, v2.16b, v4.16b
+; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x i64>, ptr %a
%ins = insertelement <4 x i64> undef, i64 63, i64 0
%op2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer
@@ -718,6 +1104,15 @@ define void @sub_v32i8(ptr %a) {
; CHECK-NEXT: sub z1.b, z1.b, #7 // =0x7
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sub_v32i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: movi v0.16b, #7
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: sub v1.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: sub v0.16b, v2.16b, v0.16b
+; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <32 x i8>, ptr %a
%ins = insertelement <32 x i8> undef, i8 7, i64 0
%op2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <32 x i32> zeroinitializer
@@ -734,6 +1129,16 @@ define void @sub_v16i16(ptr %a) {
; CHECK-NEXT: sub z1.h, z1.h, #15 // =0xf
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sub_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov w8, #15 // =0xf
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: dup v0.8h, w8
+; NONEON-NOSVE-NEXT: sub v1.8h, v1.8h, v0.8h
+; NONEON-NOSVE-NEXT: sub v0.8h, v2.8h, v0.8h
+; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x i16>, ptr %a
%ins = insertelement <16 x i16> undef, i16 15, i64 0
%op2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer
@@ -750,6 +1155,16 @@ define void @sub_v8i32(ptr %a) {
; CHECK-NEXT: sub z1.s, z1.s, #31 // =0x1f
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sub_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov w8, #31 // =0x1f
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: dup v0.4s, w8
+; NONEON-NOSVE-NEXT: sub v1.4s, v1.4s, v0.4s
+; NONEON-NOSVE-NEXT: sub v0.4s, v2.4s, v0.4s
+; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x i32>, ptr %a
%ins = insertelement <8 x i32> undef, i32 31, i64 0
%op2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer
@@ -766,6 +1181,16 @@ define void @sub_v4i64(ptr %a) {
; CHECK-NEXT: sub z1.d, z1.d, #63 // =0x3f
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sub_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov w8, #63 // =0x3f
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: dup v0.2d, x8
+; NONEON-NOSVE-NEXT: sub v1.2d, v1.2d, v0.2d
+; NONEON-NOSVE-NEXT: sub v0.2d, v2.2d, v0.2d
+; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x i64>, ptr %a
%ins = insertelement <4 x i64> undef, i64 63, i64 0
%op2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer
@@ -786,6 +1211,15 @@ define void @umax_v32i8(ptr %a) {
; CHECK-NEXT: umax z1.b, z1.b, #7
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: umax_v32i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: movi v0.16b, #7
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: umax v1.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: umax v0.16b, v2.16b, v0.16b
+; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <32 x i8>, ptr %a
%ins = insertelement <32 x i8> undef, i8 7, i64 0
%op2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <32 x i32> zeroinitializer
@@ -802,6 +1236,16 @@ define void @umax_v16i16(ptr %a) {
; CHECK-NEXT: umax z1.h, z1.h, #15
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: umax_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov w8, #15 // =0xf
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: dup v0.8h, w8
+; NONEON-NOSVE-NEXT: umax v1.8h, v1.8h, v0.8h
+; NONEON-NOSVE-NEXT: umax v0.8h, v2.8h, v0.8h
+; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x i16>, ptr %a
%ins = insertelement <16 x i16> undef, i16 15, i64 0
%op2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer
@@ -818,6 +1262,16 @@ define void @umax_v8i32(ptr %a) {
; CHECK-NEXT: umax z1.s, z1.s, #31
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: umax_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov w8, #31 // =0x1f
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: dup v0.4s, w8
+; NONEON-NOSVE-NEXT: umax v1.4s, v1.4s, v0.4s
+; NONEON-NOSVE-NEXT: umax v0.4s, v2.4s, v0.4s
+; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x i32>, ptr %a
%ins = insertelement <8 x i32> undef, i32 31, i64 0
%op2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer
@@ -834,6 +1288,18 @@ define void @umax_v4i64(ptr %a) {
; CHECK-NEXT: umax z1.d, z1.d, #63
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: umax_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov w8, #63 // =0x3f
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: dup v0.2d, x8
+; NONEON-NOSVE-NEXT: cmhi v3.2d, v1.2d, v0.2d
+; NONEON-NOSVE-NEXT: cmhi v4.2d, v2.2d, v0.2d
+; NONEON-NOSVE-NEXT: bif v1.16b, v0.16b, v3.16b
+; NONEON-NOSVE-NEXT: bit v0.16b, v2.16b, v4.16b
+; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x i64>, ptr %a
%ins = insertelement <4 x i64> undef, i64 63, i64 0
%op2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer
@@ -854,6 +1320,15 @@ define void @umin_v32i8(ptr %a) {
; CHECK-NEXT: umin z1.b, z1.b, #7
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: umin_v32i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: movi v0.16b, #7
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: umin v1.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: umin v0.16b, v2.16b, v0.16b
+; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <32 x i8>, ptr %a
%ins = insertelement <32 x i8> undef, i8 7, i64 0
%op2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <32 x i32> zeroinitializer
@@ -870,6 +1345,16 @@ define void @umin_v16i16(ptr %a) {
; CHECK-NEXT: umin z1.h, z1.h, #15
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: umin_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov w8, #15 // =0xf
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: dup v0.8h, w8
+; NONEON-NOSVE-NEXT: umin v1.8h, v1.8h, v0.8h
+; NONEON-NOSVE-NEXT: umin v0.8h, v2.8h, v0.8h
+; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x i16>, ptr %a
%ins = insertelement <16 x i16> undef, i16 15, i64 0
%op2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer
@@ -886,6 +1371,16 @@ define void @umin_v8i32(ptr %a) {
; CHECK-NEXT: umin z1.s, z1.s, #31
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: umin_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov w8, #31 // =0x1f
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: dup v0.4s, w8
+; NONEON-NOSVE-NEXT: umin v1.4s, v1.4s, v0.4s
+; NONEON-NOSVE-NEXT: umin v0.4s, v2.4s, v0.4s
+; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x i32>, ptr %a
%ins = insertelement <8 x i32> undef, i32 31, i64 0
%op2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer
@@ -902,6 +1397,18 @@ define void @umin_v4i64(ptr %a) {
; CHECK-NEXT: umin z1.d, z1.d, #63
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: umin_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov w8, #63 // =0x3f
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: dup v0.2d, x8
+; NONEON-NOSVE-NEXT: cmhi v3.2d, v0.2d, v1.2d
+; NONEON-NOSVE-NEXT: cmhi v4.2d, v0.2d, v2.2d
+; NONEON-NOSVE-NEXT: bif v1.16b, v0.16b, v3.16b
+; NONEON-NOSVE-NEXT: bit v0.16b, v2.16b, v4.16b
+; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x i64>, ptr %a
%ins = insertelement <4 x i64> undef, i64 63, i64 0
%op2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer
@@ -922,6 +1429,15 @@ define void @xor_v32i8(ptr %a) {
; CHECK-NEXT: eor z1.b, z1.b, #0x7
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: xor_v32i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: movi v0.16b, #7
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: eor v1.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: eor v0.16b, v2.16b, v0.16b
+; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <32 x i8>, ptr %a
%ins = insertelement <32 x i8> undef, i8 7, i64 0
%op2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <32 x i32> zeroinitializer
@@ -938,6 +1454,16 @@ define void @xor_v16i16(ptr %a) {
; CHECK-NEXT: eor z1.h, z1.h, #0xf
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: xor_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov w8, #15 // =0xf
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: dup v0.8h, w8
+; NONEON-NOSVE-NEXT: eor v1.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: eor v0.16b, v2.16b, v0.16b
+; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x i16>, ptr %a
%ins = insertelement <16 x i16> undef, i16 15, i64 0
%op2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer
@@ -954,6 +1480,16 @@ define void @xor_v8i32(ptr %a) {
; CHECK-NEXT: eor z1.s, z1.s, #0x1f
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: xor_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov w8, #31 // =0x1f
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: dup v0.4s, w8
+; NONEON-NOSVE-NEXT: eor v1.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: eor v0.16b, v2.16b, v0.16b
+; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x i32>, ptr %a
%ins = insertelement <8 x i32> undef, i32 31, i64 0
%op2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer
@@ -970,6 +1506,16 @@ define void @xor_v4i64(ptr %a) {
; CHECK-NEXT: eor z1.d, z1.d, #0x3f
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: xor_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov w8, #63 // =0x3f
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: dup v0.2d, x8
+; NONEON-NOSVE-NEXT: eor v1.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: eor v0.16b, v2.16b, v0.16b
+; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x i64>, ptr %a
%ins = insertelement <4 x i64> undef, i64 63, i64 0
%op2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-log.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-log.ll
index 40824ba9ae9c5..4fc7ec3a8439d 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-log.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-log.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
@@ -16,6 +17,11 @@ define <8 x i8> @and_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
; CHECK-NEXT: and z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: and_v8i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: ret
%res = and <8 x i8> %op1, %op2
ret <8 x i8> %res
}
@@ -28,6 +34,11 @@ define <16 x i8> @and_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
; CHECK-NEXT: and z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: and_v16i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: and v0.16b, v0.16b, v1.16b
+; NONEON-NOSVE-NEXT: ret
%res = and <16 x i8> %op1, %op2
ret <16 x i8> %res
}
@@ -41,6 +52,15 @@ define void @and_v32i8(ptr %a, ptr %b) {
; CHECK-NEXT: and z1.d, z2.d, z3.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: and_v32i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: and v0.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: and v1.16b, v2.16b, v3.16b
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <32 x i8>, ptr %a
%op2 = load <32 x i8>, ptr %b
%res = and <32 x i8> %op1, %op2
@@ -56,6 +76,11 @@ define <4 x i16> @and_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
; CHECK-NEXT: and z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: and_v4i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: ret
%res = and <4 x i16> %op1, %op2
ret <4 x i16> %res
}
@@ -68,6 +93,11 @@ define <8 x i16> @and_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
; CHECK-NEXT: and z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: and_v8i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: and v0.16b, v0.16b, v1.16b
+; NONEON-NOSVE-NEXT: ret
%res = and <8 x i16> %op1, %op2
ret <8 x i16> %res
}
@@ -81,6 +111,15 @@ define void @and_v16i16(ptr %a, ptr %b) {
; CHECK-NEXT: and z1.d, z2.d, z3.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: and_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: and v0.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: and v1.16b, v2.16b, v3.16b
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x i16>, ptr %a
%op2 = load <16 x i16>, ptr %b
%res = and <16 x i16> %op1, %op2
@@ -96,6 +135,11 @@ define <2 x i32> @and_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
; CHECK-NEXT: and z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: and_v2i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: ret
%res = and <2 x i32> %op1, %op2
ret <2 x i32> %res
}
@@ -108,6 +152,11 @@ define <4 x i32> @and_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
; CHECK-NEXT: and z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: and_v4i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: and v0.16b, v0.16b, v1.16b
+; NONEON-NOSVE-NEXT: ret
%res = and <4 x i32> %op1, %op2
ret <4 x i32> %res
}
@@ -121,6 +170,15 @@ define void @and_v8i32(ptr %a, ptr %b) {
; CHECK-NEXT: and z1.d, z2.d, z3.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: and_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: and v0.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: and v1.16b, v2.16b, v3.16b
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x i32>, ptr %a
%op2 = load <8 x i32>, ptr %b
%res = and <8 x i32> %op1, %op2
@@ -136,6 +194,11 @@ define <1 x i64> @and_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
; CHECK-NEXT: and z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: and_v1i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: ret
%res = and <1 x i64> %op1, %op2
ret <1 x i64> %res
}
@@ -148,6 +211,11 @@ define <2 x i64> @and_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
; CHECK-NEXT: and z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: and_v2i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: and v0.16b, v0.16b, v1.16b
+; NONEON-NOSVE-NEXT: ret
%res = and <2 x i64> %op1, %op2
ret <2 x i64> %res
}
@@ -161,6 +229,15 @@ define void @and_v4i64(ptr %a, ptr %b) {
; CHECK-NEXT: and z1.d, z2.d, z3.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: and_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: and v0.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: and v1.16b, v2.16b, v3.16b
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x i64>, ptr %a
%op2 = load <4 x i64>, ptr %b
%res = and <4 x i64> %op1, %op2
@@ -180,6 +257,11 @@ define <8 x i8> @or_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: or_v8i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: orr v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: ret
%res = or <8 x i8> %op1, %op2
ret <8 x i8> %res
}
@@ -192,6 +274,11 @@ define <16 x i8> @or_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: or_v16i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: orr v0.16b, v0.16b, v1.16b
+; NONEON-NOSVE-NEXT: ret
%res = or <16 x i8> %op1, %op2
ret <16 x i8> %res
}
@@ -205,6 +292,15 @@ define void @or_v32i8(ptr %a, ptr %b) {
; CHECK-NEXT: orr z1.d, z2.d, z3.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: or_v32i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: orr v0.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: orr v1.16b, v2.16b, v3.16b
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <32 x i8>, ptr %a
%op2 = load <32 x i8>, ptr %b
%res = or <32 x i8> %op1, %op2
@@ -220,6 +316,11 @@ define <4 x i16> @or_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: or_v4i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: orr v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: ret
%res = or <4 x i16> %op1, %op2
ret <4 x i16> %res
}
@@ -232,6 +333,11 @@ define <8 x i16> @or_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: or_v8i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: orr v0.16b, v0.16b, v1.16b
+; NONEON-NOSVE-NEXT: ret
%res = or <8 x i16> %op1, %op2
ret <8 x i16> %res
}
@@ -245,6 +351,15 @@ define void @or_v16i16(ptr %a, ptr %b) {
; CHECK-NEXT: orr z1.d, z2.d, z3.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: or_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: orr v0.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: orr v1.16b, v2.16b, v3.16b
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x i16>, ptr %a
%op2 = load <16 x i16>, ptr %b
%res = or <16 x i16> %op1, %op2
@@ -260,6 +375,11 @@ define <2 x i32> @or_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: or_v2i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: orr v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: ret
%res = or <2 x i32> %op1, %op2
ret <2 x i32> %res
}
@@ -272,6 +392,11 @@ define <4 x i32> @or_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: or_v4i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: orr v0.16b, v0.16b, v1.16b
+; NONEON-NOSVE-NEXT: ret
%res = or <4 x i32> %op1, %op2
ret <4 x i32> %res
}
@@ -285,6 +410,15 @@ define void @or_v8i32(ptr %a, ptr %b) {
; CHECK-NEXT: orr z1.d, z2.d, z3.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: or_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: orr v0.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: orr v1.16b, v2.16b, v3.16b
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x i32>, ptr %a
%op2 = load <8 x i32>, ptr %b
%res = or <8 x i32> %op1, %op2
@@ -300,6 +434,11 @@ define <1 x i64> @or_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: or_v1i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: orr v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: ret
%res = or <1 x i64> %op1, %op2
ret <1 x i64> %res
}
@@ -312,6 +451,11 @@ define <2 x i64> @or_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: or_v2i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: orr v0.16b, v0.16b, v1.16b
+; NONEON-NOSVE-NEXT: ret
%res = or <2 x i64> %op1, %op2
ret <2 x i64> %res
}
@@ -325,6 +469,15 @@ define void @or_v4i64(ptr %a, ptr %b) {
; CHECK-NEXT: orr z1.d, z2.d, z3.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: or_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: orr v0.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: orr v1.16b, v2.16b, v3.16b
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x i64>, ptr %a
%op2 = load <4 x i64>, ptr %b
%res = or <4 x i64> %op1, %op2
@@ -344,6 +497,11 @@ define <8 x i8> @xor_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
; CHECK-NEXT: eor z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: xor_v8i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: eor v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: ret
%res = xor <8 x i8> %op1, %op2
ret <8 x i8> %res
}
@@ -356,6 +514,11 @@ define <16 x i8> @xor_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
; CHECK-NEXT: eor z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: xor_v16i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: eor v0.16b, v0.16b, v1.16b
+; NONEON-NOSVE-NEXT: ret
%res = xor <16 x i8> %op1, %op2
ret <16 x i8> %res
}
@@ -369,6 +532,15 @@ define void @xor_v32i8(ptr %a, ptr %b) {
; CHECK-NEXT: eor z1.d, z2.d, z3.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: xor_v32i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: eor v0.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: eor v1.16b, v2.16b, v3.16b
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <32 x i8>, ptr %a
%op2 = load <32 x i8>, ptr %b
%res = xor <32 x i8> %op1, %op2
@@ -384,6 +556,11 @@ define <4 x i16> @xor_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
; CHECK-NEXT: eor z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: xor_v4i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: eor v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: ret
%res = xor <4 x i16> %op1, %op2
ret <4 x i16> %res
}
@@ -396,6 +573,11 @@ define <8 x i16> @xor_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
; CHECK-NEXT: eor z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: xor_v8i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: eor v0.16b, v0.16b, v1.16b
+; NONEON-NOSVE-NEXT: ret
%res = xor <8 x i16> %op1, %op2
ret <8 x i16> %res
}
@@ -409,6 +591,15 @@ define void @xor_v16i16(ptr %a, ptr %b) {
; CHECK-NEXT: eor z1.d, z2.d, z3.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: xor_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: eor v0.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: eor v1.16b, v2.16b, v3.16b
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x i16>, ptr %a
%op2 = load <16 x i16>, ptr %b
%res = xor <16 x i16> %op1, %op2
@@ -424,6 +615,11 @@ define <2 x i32> @xor_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
; CHECK-NEXT: eor z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: xor_v2i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: eor v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: ret
%res = xor <2 x i32> %op1, %op2
ret <2 x i32> %res
}
@@ -436,6 +632,11 @@ define <4 x i32> @xor_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
; CHECK-NEXT: eor z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: xor_v4i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: eor v0.16b, v0.16b, v1.16b
+; NONEON-NOSVE-NEXT: ret
%res = xor <4 x i32> %op1, %op2
ret <4 x i32> %res
}
@@ -449,6 +650,15 @@ define void @xor_v8i32(ptr %a, ptr %b) {
; CHECK-NEXT: eor z1.d, z2.d, z3.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: xor_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: eor v0.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: eor v1.16b, v2.16b, v3.16b
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x i32>, ptr %a
%op2 = load <8 x i32>, ptr %b
%res = xor <8 x i32> %op1, %op2
@@ -464,6 +674,11 @@ define <1 x i64> @xor_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
; CHECK-NEXT: eor z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: xor_v1i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: eor v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: ret
%res = xor <1 x i64> %op1, %op2
ret <1 x i64> %res
}
@@ -476,6 +691,11 @@ define <2 x i64> @xor_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
; CHECK-NEXT: eor z0.d, z0.d, z1.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: xor_v2i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: eor v0.16b, v0.16b, v1.16b
+; NONEON-NOSVE-NEXT: ret
%res = xor <2 x i64> %op1, %op2
ret <2 x i64> %res
}
@@ -489,6 +709,15 @@ define void @xor_v4i64(ptr %a, ptr %b) {
; CHECK-NEXT: eor z1.d, z2.d, z3.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: xor_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: eor v0.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: eor v1.16b, v2.16b, v3.16b
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x i64>, ptr %a
%op2 = load <4 x i64>, ptr %b
%res = xor <4 x i64> %op1, %op2
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-minmax.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-minmax.ll
index 74ee5482a60c4..b9c859a58611e 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-minmax.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-minmax.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
@@ -17,6 +18,11 @@ define <8 x i8> @smax_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
; CHECK-NEXT: smax z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: smax_v8i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: smax v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: ret
%res = call <8 x i8> @llvm.smax.v8i8(<8 x i8> %op1, <8 x i8> %op2)
ret <8 x i8> %res
}
@@ -30,6 +36,11 @@ define <16 x i8> @smax_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
; CHECK-NEXT: smax z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: smax_v16i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: smax v0.16b, v0.16b, v1.16b
+; NONEON-NOSVE-NEXT: ret
%res = call <16 x i8> @llvm.smax.v16i8(<16 x i8> %op1, <16 x i8> %op2)
ret <16 x i8> %res
}
@@ -45,6 +56,15 @@ define void @smax_v32i8(ptr %a, ptr %b) {
; CHECK-NEXT: smax z1.b, p0/m, z1.b, z3.b
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: smax_v32i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: smax v0.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: smax v1.16b, v2.16b, v3.16b
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <32 x i8>, ptr %a
%op2 = load <32 x i8>, ptr %b
%res = call <32 x i8> @llvm.smax.v32i8(<32 x i8> %op1, <32 x i8> %op2)
@@ -61,6 +81,11 @@ define <4 x i16> @smax_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
; CHECK-NEXT: smax z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: smax_v4i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: smax v0.4h, v0.4h, v1.4h
+; NONEON-NOSVE-NEXT: ret
%res = call <4 x i16> @llvm.smax.v4i16(<4 x i16> %op1, <4 x i16> %op2)
ret <4 x i16> %res
}
@@ -74,6 +99,11 @@ define <8 x i16> @smax_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
; CHECK-NEXT: smax z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: smax_v8i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: smax v0.8h, v0.8h, v1.8h
+; NONEON-NOSVE-NEXT: ret
%res = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %op1, <8 x i16> %op2)
ret <8 x i16> %res
}
@@ -89,6 +119,15 @@ define void @smax_v16i16(ptr %a, ptr %b) {
; CHECK-NEXT: smax z1.h, p0/m, z1.h, z3.h
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: smax_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: smax v0.8h, v1.8h, v0.8h
+; NONEON-NOSVE-NEXT: smax v1.8h, v2.8h, v3.8h
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x i16>, ptr %a
%op2 = load <16 x i16>, ptr %b
%res = call <16 x i16> @llvm.smax.v16i16(<16 x i16> %op1, <16 x i16> %op2)
@@ -105,6 +144,11 @@ define <2 x i32> @smax_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
; CHECK-NEXT: smax z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: smax_v2i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: smax v0.2s, v0.2s, v1.2s
+; NONEON-NOSVE-NEXT: ret
%res = call <2 x i32> @llvm.smax.v2i32(<2 x i32> %op1, <2 x i32> %op2)
ret <2 x i32> %res
}
@@ -118,6 +162,11 @@ define <4 x i32> @smax_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
; CHECK-NEXT: smax z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: smax_v4i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: smax v0.4s, v0.4s, v1.4s
+; NONEON-NOSVE-NEXT: ret
%res = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %op1, <4 x i32> %op2)
ret <4 x i32> %res
}
@@ -133,6 +182,15 @@ define void @smax_v8i32(ptr %a, ptr %b) {
; CHECK-NEXT: smax z1.s, p0/m, z1.s, z3.s
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: smax_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: smax v0.4s, v1.4s, v0.4s
+; NONEON-NOSVE-NEXT: smax v1.4s, v2.4s, v3.4s
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x i32>, ptr %a
%op2 = load <8 x i32>, ptr %b
%res = call <8 x i32> @llvm.smax.v8i32(<8 x i32> %op1, <8 x i32> %op2)
@@ -150,6 +208,12 @@ define <1 x i64> @smax_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
; CHECK-NEXT: smax z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: smax_v1i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: cmgt d2, d0, d1
+; NONEON-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b
+; NONEON-NOSVE-NEXT: ret
%res = call <1 x i64> @llvm.smax.v1i64(<1 x i64> %op1, <1 x i64> %op2)
ret <1 x i64> %res
}
@@ -164,6 +228,12 @@ define <2 x i64> @smax_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
; CHECK-NEXT: smax z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: smax_v2i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: cmgt v2.2d, v0.2d, v1.2d
+; NONEON-NOSVE-NEXT: bif v0.16b, v1.16b, v2.16b
+; NONEON-NOSVE-NEXT: ret
%res = call <2 x i64> @llvm.smax.v2i64(<2 x i64> %op1, <2 x i64> %op2)
ret <2 x i64> %res
}
@@ -179,6 +249,18 @@ define void @smax_v4i64(ptr %a, ptr %b) {
; CHECK-NEXT: smax z1.d, p0/m, z1.d, z3.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: smax_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: cmgt v4.2d, v1.2d, v0.2d
+; NONEON-NOSVE-NEXT: cmgt v5.2d, v2.2d, v3.2d
+; NONEON-NOSVE-NEXT: bit v0.16b, v1.16b, v4.16b
+; NONEON-NOSVE-NEXT: mov v1.16b, v5.16b
+; NONEON-NOSVE-NEXT: bsl v1.16b, v2.16b, v3.16b
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x i64>, ptr %a
%op2 = load <4 x i64>, ptr %b
%res = call <4 x i64> @llvm.smax.v4i64(<4 x i64> %op1, <4 x i64> %op2)
@@ -199,6 +281,11 @@ define <8 x i8> @smin_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
; CHECK-NEXT: smin z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: smin_v8i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: smin v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: ret
%res = call <8 x i8> @llvm.smin.v8i8(<8 x i8> %op1, <8 x i8> %op2)
ret <8 x i8> %res
}
@@ -212,6 +299,11 @@ define <16 x i8> @smin_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
; CHECK-NEXT: smin z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: smin_v16i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: smin v0.16b, v0.16b, v1.16b
+; NONEON-NOSVE-NEXT: ret
%res = call <16 x i8> @llvm.smin.v16i8(<16 x i8> %op1, <16 x i8> %op2)
ret <16 x i8> %res
}
@@ -227,6 +319,15 @@ define void @smin_v32i8(ptr %a, ptr %b) {
; CHECK-NEXT: smin z1.b, p0/m, z1.b, z3.b
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: smin_v32i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: smin v0.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: smin v1.16b, v2.16b, v3.16b
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <32 x i8>, ptr %a
%op2 = load <32 x i8>, ptr %b
%res = call <32 x i8> @llvm.smin.v32i8(<32 x i8> %op1, <32 x i8> %op2)
@@ -243,6 +344,11 @@ define <4 x i16> @smin_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
; CHECK-NEXT: smin z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: smin_v4i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: smin v0.4h, v0.4h, v1.4h
+; NONEON-NOSVE-NEXT: ret
%res = call <4 x i16> @llvm.smin.v4i16(<4 x i16> %op1, <4 x i16> %op2)
ret <4 x i16> %res
}
@@ -256,6 +362,11 @@ define <8 x i16> @smin_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
; CHECK-NEXT: smin z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: smin_v8i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: smin v0.8h, v0.8h, v1.8h
+; NONEON-NOSVE-NEXT: ret
%res = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %op1, <8 x i16> %op2)
ret <8 x i16> %res
}
@@ -271,6 +382,15 @@ define void @smin_v16i16(ptr %a, ptr %b) {
; CHECK-NEXT: smin z1.h, p0/m, z1.h, z3.h
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: smin_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: smin v0.8h, v1.8h, v0.8h
+; NONEON-NOSVE-NEXT: smin v1.8h, v2.8h, v3.8h
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x i16>, ptr %a
%op2 = load <16 x i16>, ptr %b
%res = call <16 x i16> @llvm.smin.v16i16(<16 x i16> %op1, <16 x i16> %op2)
@@ -287,6 +407,11 @@ define <2 x i32> @smin_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
; CHECK-NEXT: smin z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: smin_v2i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: smin v0.2s, v0.2s, v1.2s
+; NONEON-NOSVE-NEXT: ret
%res = call <2 x i32> @llvm.smin.v2i32(<2 x i32> %op1, <2 x i32> %op2)
ret <2 x i32> %res
}
@@ -300,6 +425,11 @@ define <4 x i32> @smin_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
; CHECK-NEXT: smin z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: smin_v4i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: smin v0.4s, v0.4s, v1.4s
+; NONEON-NOSVE-NEXT: ret
%res = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %op1, <4 x i32> %op2)
ret <4 x i32> %res
}
@@ -315,6 +445,15 @@ define void @smin_v8i32(ptr %a, ptr %b) {
; CHECK-NEXT: smin z1.s, p0/m, z1.s, z3.s
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: smin_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: smin v0.4s, v1.4s, v0.4s
+; NONEON-NOSVE-NEXT: smin v1.4s, v2.4s, v3.4s
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x i32>, ptr %a
%op2 = load <8 x i32>, ptr %b
%res = call <8 x i32> @llvm.smin.v8i32(<8 x i32> %op1, <8 x i32> %op2)
@@ -332,6 +471,12 @@ define <1 x i64> @smin_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
; CHECK-NEXT: smin z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: smin_v1i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: cmgt d2, d1, d0
+; NONEON-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b
+; NONEON-NOSVE-NEXT: ret
%res = call <1 x i64> @llvm.smin.v1i64(<1 x i64> %op1, <1 x i64> %op2)
ret <1 x i64> %res
}
@@ -346,6 +491,12 @@ define <2 x i64> @smin_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
; CHECK-NEXT: smin z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: smin_v2i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: cmgt v2.2d, v1.2d, v0.2d
+; NONEON-NOSVE-NEXT: bif v0.16b, v1.16b, v2.16b
+; NONEON-NOSVE-NEXT: ret
%res = call <2 x i64> @llvm.smin.v2i64(<2 x i64> %op1, <2 x i64> %op2)
ret <2 x i64> %res
}
@@ -361,6 +512,18 @@ define void @smin_v4i64(ptr %a, ptr %b) {
; CHECK-NEXT: smin z1.d, p0/m, z1.d, z3.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: smin_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: cmgt v4.2d, v0.2d, v1.2d
+; NONEON-NOSVE-NEXT: cmgt v5.2d, v3.2d, v2.2d
+; NONEON-NOSVE-NEXT: bit v0.16b, v1.16b, v4.16b
+; NONEON-NOSVE-NEXT: mov v1.16b, v5.16b
+; NONEON-NOSVE-NEXT: bsl v1.16b, v2.16b, v3.16b
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x i64>, ptr %a
%op2 = load <4 x i64>, ptr %b
%res = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %op1, <4 x i64> %op2)
@@ -381,6 +544,11 @@ define <8 x i8> @umax_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
; CHECK-NEXT: umax z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: umax_v8i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: umax v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: ret
%res = call <8 x i8> @llvm.umax.v8i8(<8 x i8> %op1, <8 x i8> %op2)
ret <8 x i8> %res
}
@@ -394,6 +562,11 @@ define <16 x i8> @umax_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
; CHECK-NEXT: umax z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: umax_v16i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: umax v0.16b, v0.16b, v1.16b
+; NONEON-NOSVE-NEXT: ret
%res = call <16 x i8> @llvm.umax.v16i8(<16 x i8> %op1, <16 x i8> %op2)
ret <16 x i8> %res
}
@@ -409,6 +582,15 @@ define void @umax_v32i8(ptr %a, ptr %b) {
; CHECK-NEXT: umax z1.b, p0/m, z1.b, z3.b
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: umax_v32i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: umax v0.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: umax v1.16b, v2.16b, v3.16b
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <32 x i8>, ptr %a
%op2 = load <32 x i8>, ptr %b
%res = call <32 x i8> @llvm.umax.v32i8(<32 x i8> %op1, <32 x i8> %op2)
@@ -425,6 +607,11 @@ define <4 x i16> @umax_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
; CHECK-NEXT: umax z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: umax_v4i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: umax v0.4h, v0.4h, v1.4h
+; NONEON-NOSVE-NEXT: ret
%res = call <4 x i16> @llvm.umax.v4i16(<4 x i16> %op1, <4 x i16> %op2)
ret <4 x i16> %res
}
@@ -438,6 +625,11 @@ define <8 x i16> @umax_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
; CHECK-NEXT: umax z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: umax_v8i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: umax v0.8h, v0.8h, v1.8h
+; NONEON-NOSVE-NEXT: ret
%res = call <8 x i16> @llvm.umax.v8i16(<8 x i16> %op1, <8 x i16> %op2)
ret <8 x i16> %res
}
@@ -453,6 +645,15 @@ define void @umax_v16i16(ptr %a, ptr %b) {
; CHECK-NEXT: umax z1.h, p0/m, z1.h, z3.h
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: umax_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: umax v0.8h, v1.8h, v0.8h
+; NONEON-NOSVE-NEXT: umax v1.8h, v2.8h, v3.8h
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x i16>, ptr %a
%op2 = load <16 x i16>, ptr %b
%res = call <16 x i16> @llvm.umax.v16i16(<16 x i16> %op1, <16 x i16> %op2)
@@ -469,6 +670,11 @@ define <2 x i32> @umax_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
; CHECK-NEXT: umax z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: umax_v2i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: umax v0.2s, v0.2s, v1.2s
+; NONEON-NOSVE-NEXT: ret
%res = call <2 x i32> @llvm.umax.v2i32(<2 x i32> %op1, <2 x i32> %op2)
ret <2 x i32> %res
}
@@ -482,6 +688,11 @@ define <4 x i32> @umax_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
; CHECK-NEXT: umax z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: umax_v4i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: umax v0.4s, v0.4s, v1.4s
+; NONEON-NOSVE-NEXT: ret
%res = call <4 x i32> @llvm.umax.v4i32(<4 x i32> %op1, <4 x i32> %op2)
ret <4 x i32> %res
}
@@ -497,6 +708,15 @@ define void @umax_v8i32(ptr %a, ptr %b) {
; CHECK-NEXT: umax z1.s, p0/m, z1.s, z3.s
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: umax_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: umax v0.4s, v1.4s, v0.4s
+; NONEON-NOSVE-NEXT: umax v1.4s, v2.4s, v3.4s
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x i32>, ptr %a
%op2 = load <8 x i32>, ptr %b
%res = call <8 x i32> @llvm.umax.v8i32(<8 x i32> %op1, <8 x i32> %op2)
@@ -514,6 +734,12 @@ define <1 x i64> @umax_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
; CHECK-NEXT: umax z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: umax_v1i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: cmhi d2, d0, d1
+; NONEON-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b
+; NONEON-NOSVE-NEXT: ret
%res = call <1 x i64> @llvm.umax.v1i64(<1 x i64> %op1, <1 x i64> %op2)
ret <1 x i64> %res
}
@@ -528,6 +754,12 @@ define <2 x i64> @umax_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
; CHECK-NEXT: umax z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: umax_v2i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: cmhi v2.2d, v0.2d, v1.2d
+; NONEON-NOSVE-NEXT: bif v0.16b, v1.16b, v2.16b
+; NONEON-NOSVE-NEXT: ret
%res = call <2 x i64> @llvm.umax.v2i64(<2 x i64> %op1, <2 x i64> %op2)
ret <2 x i64> %res
}
@@ -543,6 +775,18 @@ define void @umax_v4i64(ptr %a, ptr %b) {
; CHECK-NEXT: umax z1.d, p0/m, z1.d, z3.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: umax_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: cmhi v4.2d, v1.2d, v0.2d
+; NONEON-NOSVE-NEXT: cmhi v5.2d, v2.2d, v3.2d
+; NONEON-NOSVE-NEXT: bit v0.16b, v1.16b, v4.16b
+; NONEON-NOSVE-NEXT: mov v1.16b, v5.16b
+; NONEON-NOSVE-NEXT: bsl v1.16b, v2.16b, v3.16b
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x i64>, ptr %a
%op2 = load <4 x i64>, ptr %b
%res = call <4 x i64> @llvm.umax.v4i64(<4 x i64> %op1, <4 x i64> %op2)
@@ -563,6 +807,11 @@ define <8 x i8> @umin_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
; CHECK-NEXT: umin z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: umin_v8i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: umin v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: ret
%res = call <8 x i8> @llvm.umin.v8i8(<8 x i8> %op1, <8 x i8> %op2)
ret <8 x i8> %res
}
@@ -576,6 +825,11 @@ define <16 x i8> @umin_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
; CHECK-NEXT: umin z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: umin_v16i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: umin v0.16b, v0.16b, v1.16b
+; NONEON-NOSVE-NEXT: ret
%res = call <16 x i8> @llvm.umin.v16i8(<16 x i8> %op1, <16 x i8> %op2)
ret <16 x i8> %res
}
@@ -591,6 +845,15 @@ define void @umin_v32i8(ptr %a, ptr %b) {
; CHECK-NEXT: umin z1.b, p0/m, z1.b, z3.b
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: umin_v32i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: umin v0.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: umin v1.16b, v2.16b, v3.16b
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <32 x i8>, ptr %a
%op2 = load <32 x i8>, ptr %b
%res = call <32 x i8> @llvm.umin.v32i8(<32 x i8> %op1, <32 x i8> %op2)
@@ -607,6 +870,11 @@ define <4 x i16> @umin_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
; CHECK-NEXT: umin z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: umin_v4i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: umin v0.4h, v0.4h, v1.4h
+; NONEON-NOSVE-NEXT: ret
%res = call <4 x i16> @llvm.umin.v4i16(<4 x i16> %op1, <4 x i16> %op2)
ret <4 x i16> %res
}
@@ -620,6 +888,11 @@ define <8 x i16> @umin_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
; CHECK-NEXT: umin z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: umin_v8i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: umin v0.8h, v0.8h, v1.8h
+; NONEON-NOSVE-NEXT: ret
%res = call <8 x i16> @llvm.umin.v8i16(<8 x i16> %op1, <8 x i16> %op2)
ret <8 x i16> %res
}
@@ -635,6 +908,15 @@ define void @umin_v16i16(ptr %a, ptr %b) {
; CHECK-NEXT: umin z1.h, p0/m, z1.h, z3.h
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: umin_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: umin v0.8h, v1.8h, v0.8h
+; NONEON-NOSVE-NEXT: umin v1.8h, v2.8h, v3.8h
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x i16>, ptr %a
%op2 = load <16 x i16>, ptr %b
%res = call <16 x i16> @llvm.umin.v16i16(<16 x i16> %op1, <16 x i16> %op2)
@@ -651,6 +933,11 @@ define <2 x i32> @umin_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
; CHECK-NEXT: umin z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: umin_v2i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: umin v0.2s, v0.2s, v1.2s
+; NONEON-NOSVE-NEXT: ret
%res = call <2 x i32> @llvm.umin.v2i32(<2 x i32> %op1, <2 x i32> %op2)
ret <2 x i32> %res
}
@@ -664,6 +951,11 @@ define <4 x i32> @umin_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
; CHECK-NEXT: umin z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: umin_v4i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: umin v0.4s, v0.4s, v1.4s
+; NONEON-NOSVE-NEXT: ret
%res = call <4 x i32> @llvm.umin.v4i32(<4 x i32> %op1, <4 x i32> %op2)
ret <4 x i32> %res
}
@@ -679,6 +971,15 @@ define void @umin_v8i32(ptr %a, ptr %b) {
; CHECK-NEXT: umin z1.s, p0/m, z1.s, z3.s
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: umin_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: umin v0.4s, v1.4s, v0.4s
+; NONEON-NOSVE-NEXT: umin v1.4s, v2.4s, v3.4s
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x i32>, ptr %a
%op2 = load <8 x i32>, ptr %b
%res = call <8 x i32> @llvm.umin.v8i32(<8 x i32> %op1, <8 x i32> %op2)
@@ -696,6 +997,12 @@ define <1 x i64> @umin_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
; CHECK-NEXT: umin z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: umin_v1i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: cmhi d2, d1, d0
+; NONEON-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b
+; NONEON-NOSVE-NEXT: ret
%res = call <1 x i64> @llvm.umin.v1i64(<1 x i64> %op1, <1 x i64> %op2)
ret <1 x i64> %res
}
@@ -710,6 +1017,12 @@ define <2 x i64> @umin_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
; CHECK-NEXT: umin z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: umin_v2i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: cmhi v2.2d, v1.2d, v0.2d
+; NONEON-NOSVE-NEXT: bif v0.16b, v1.16b, v2.16b
+; NONEON-NOSVE-NEXT: ret
%res = call <2 x i64> @llvm.umin.v2i64(<2 x i64> %op1, <2 x i64> %op2)
ret <2 x i64> %res
}
@@ -725,6 +1038,18 @@ define void @umin_v4i64(ptr %a, ptr %b) {
; CHECK-NEXT: umin z1.d, p0/m, z1.d, z3.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: umin_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: cmhi v4.2d, v0.2d, v1.2d
+; NONEON-NOSVE-NEXT: cmhi v5.2d, v3.2d, v2.2d
+; NONEON-NOSVE-NEXT: bit v0.16b, v1.16b, v4.16b
+; NONEON-NOSVE-NEXT: mov v1.16b, v5.16b
+; NONEON-NOSVE-NEXT: bsl v1.16b, v2.16b, v3.16b
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x i64>, ptr %a
%op2 = load <4 x i64>, ptr %b
%res = call <4 x i64> @llvm.umin.v4i64(<4 x i64> %op1, <4 x i64> %op2)
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mla-neon-fa64.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mla-neon-fa64.ll
index 3ff6983210a0a..3a03de3442d58 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mla-neon-fa64.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mla-neon-fa64.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sme-fa64 -force-streaming-compatible < %s | FileCheck %s -check-prefix=FA64
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s -check-prefix=NO-FA64
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
@@ -20,6 +21,12 @@ define <8 x i8> @mla8xi8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C) {
; NO-FA64-NEXT: mad z0.b, p0/m, z1.b, z2.b
; NO-FA64-NEXT: // kill: def $d0 killed $d0 killed $z0
; NO-FA64-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: mla8xi8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mla v2.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: fmov d0, d2
+; NONEON-NOSVE-NEXT: ret
%tmp1 = mul <8 x i8> %A, %B;
%tmp2 = add <8 x i8> %C, %tmp1;
ret <8 x i8> %tmp2
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll
index 8917f43002daf..1ed3d8fa39d8d 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll
@@ -2,6 +2,7 @@
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE
; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE2
; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE2
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
; This test only tests the legal types for a given vector width, as mulh nodes
; do not get generated for non-legal types.
@@ -36,6 +37,16 @@ define <4 x i8> @smulh_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
; SVE2-NEXT: lsr z0.h, z0.h, #4
; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0
; SVE2-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: smulh_v4i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: shl v0.4h, v0.4h, #8
+; NONEON-NOSVE-NEXT: shl v1.4h, v1.4h, #8
+; NONEON-NOSVE-NEXT: sshr v0.4h, v0.4h, #8
+; NONEON-NOSVE-NEXT: sshr v1.4h, v1.4h, #8
+; NONEON-NOSVE-NEXT: mul v0.4h, v0.4h, v1.4h
+; NONEON-NOSVE-NEXT: ushr v0.4h, v0.4h, #4
+; NONEON-NOSVE-NEXT: ret
%insert = insertelement <4 x i16> undef, i16 4, i64 0
%splat = shufflevector <4 x i16> %insert, <4 x i16> undef, <4 x i32> zeroinitializer
%1 = sext <4 x i8> %op1 to <4 x i16>
@@ -63,6 +74,12 @@ define <8 x i8> @smulh_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
; SVE2-NEXT: smulh z0.b, z0.b, z1.b
; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0
; SVE2-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: smulh_v8i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: smull v0.8h, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: shrn v0.8b, v0.8h, #8
+; NONEON-NOSVE-NEXT: ret
%insert = insertelement <8 x i16> undef, i16 8, i64 0
%splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
%1 = sext <8 x i8> %op1 to <8 x i16>
@@ -90,6 +107,13 @@ define <16 x i8> @smulh_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
; SVE2-NEXT: smulh z0.b, z0.b, z1.b
; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0
; SVE2-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: smulh_v16i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: smull2 v2.8h, v0.16b, v1.16b
+; NONEON-NOSVE-NEXT: smull v0.8h, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: uzp2 v0.16b, v0.16b, v2.16b
+; NONEON-NOSVE-NEXT: ret
%1 = sext <16 x i8> %op1 to <16 x i16>
%2 = sext <16 x i8> %op2 to <16 x i16>
%mul = mul <16 x i16> %1, %2
@@ -118,6 +142,19 @@ define void @smulh_v32i8(ptr %a, ptr %b) {
; SVE2-NEXT: smulh z1.b, z2.b, z3.b
; SVE2-NEXT: stp q0, q1, [x0]
; SVE2-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: smulh_v32i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: smull2 v4.8h, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: smull v0.8h, v1.8b, v0.8b
+; NONEON-NOSVE-NEXT: smull2 v1.8h, v2.16b, v3.16b
+; NONEON-NOSVE-NEXT: smull v2.8h, v2.8b, v3.8b
+; NONEON-NOSVE-NEXT: uzp2 v0.16b, v0.16b, v4.16b
+; NONEON-NOSVE-NEXT: uzp2 v1.16b, v2.16b, v1.16b
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <32 x i8>, ptr %a
%op2 = load <32 x i8>, ptr %b
%1 = sext <32 x i8> %op1 to <32 x i16>
@@ -153,6 +190,16 @@ define <2 x i16> @smulh_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
; SVE2-NEXT: lsr z0.s, z0.s, #16
; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0
; SVE2-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: smulh_v2i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: shl v0.2s, v0.2s, #16
+; NONEON-NOSVE-NEXT: shl v1.2s, v1.2s, #16
+; NONEON-NOSVE-NEXT: sshr v0.2s, v0.2s, #16
+; NONEON-NOSVE-NEXT: sshr v1.2s, v1.2s, #16
+; NONEON-NOSVE-NEXT: mul v0.2s, v0.2s, v1.2s
+; NONEON-NOSVE-NEXT: ushr v0.2s, v0.2s, #16
+; NONEON-NOSVE-NEXT: ret
%1 = sext <2 x i16> %op1 to <2 x i32>
%2 = sext <2 x i16> %op2 to <2 x i32>
%mul = mul <2 x i32> %1, %2
@@ -178,6 +225,12 @@ define <4 x i16> @smulh_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
; SVE2-NEXT: smulh z0.h, z0.h, z1.h
; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0
; SVE2-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: smulh_v4i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: smull v0.4s, v0.4h, v1.4h
+; NONEON-NOSVE-NEXT: shrn v0.4h, v0.4s, #16
+; NONEON-NOSVE-NEXT: ret
%1 = sext <4 x i16> %op1 to <4 x i32>
%2 = sext <4 x i16> %op2 to <4 x i32>
%mul = mul <4 x i32> %1, %2
@@ -203,6 +256,13 @@ define <8 x i16> @smulh_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
; SVE2-NEXT: smulh z0.h, z0.h, z1.h
; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0
; SVE2-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: smulh_v8i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: smull2 v2.4s, v0.8h, v1.8h
+; NONEON-NOSVE-NEXT: smull v0.4s, v0.4h, v1.4h
+; NONEON-NOSVE-NEXT: uzp2 v0.8h, v0.8h, v2.8h
+; NONEON-NOSVE-NEXT: ret
%1 = sext <8 x i16> %op1 to <8 x i32>
%2 = sext <8 x i16> %op2 to <8 x i32>
%mul = mul <8 x i32> %1, %2
@@ -231,6 +291,19 @@ define void @smulh_v16i16(ptr %a, ptr %b) {
; SVE2-NEXT: smulh z1.h, z2.h, z3.h
; SVE2-NEXT: stp q0, q1, [x0]
; SVE2-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: smulh_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: smull2 v4.4s, v1.8h, v0.8h
+; NONEON-NOSVE-NEXT: smull v0.4s, v1.4h, v0.4h
+; NONEON-NOSVE-NEXT: smull2 v1.4s, v2.8h, v3.8h
+; NONEON-NOSVE-NEXT: smull v2.4s, v2.4h, v3.4h
+; NONEON-NOSVE-NEXT: uzp2 v0.8h, v0.8h, v4.8h
+; NONEON-NOSVE-NEXT: uzp2 v1.8h, v2.8h, v1.8h
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x i16>, ptr %a
%op2 = load <16 x i16>, ptr %b
%1 = sext <16 x i16> %op1 to <16 x i32>
@@ -259,6 +332,12 @@ define <2 x i32> @smulh_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
; SVE2-NEXT: smulh z0.s, z0.s, z1.s
; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0
; SVE2-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: smulh_v2i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: smull v0.2d, v0.2s, v1.2s
+; NONEON-NOSVE-NEXT: shrn v0.2s, v0.2d, #32
+; NONEON-NOSVE-NEXT: ret
%1 = sext <2 x i32> %op1 to <2 x i64>
%2 = sext <2 x i32> %op2 to <2 x i64>
%mul = mul <2 x i64> %1, %2
@@ -284,6 +363,13 @@ define <4 x i32> @smulh_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
; SVE2-NEXT: smulh z0.s, z0.s, z1.s
; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0
; SVE2-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: smulh_v4i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: smull2 v2.2d, v0.4s, v1.4s
+; NONEON-NOSVE-NEXT: smull v0.2d, v0.2s, v1.2s
+; NONEON-NOSVE-NEXT: uzp2 v0.4s, v0.4s, v2.4s
+; NONEON-NOSVE-NEXT: ret
%1 = sext <4 x i32> %op1 to <4 x i64>
%2 = sext <4 x i32> %op2 to <4 x i64>
%mul = mul <4 x i64> %1, %2
@@ -312,6 +398,19 @@ define void @smulh_v8i32(ptr %a, ptr %b) {
; SVE2-NEXT: smulh z1.s, z2.s, z3.s
; SVE2-NEXT: stp q0, q1, [x0]
; SVE2-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: smulh_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: smull2 v4.2d, v1.4s, v0.4s
+; NONEON-NOSVE-NEXT: smull v0.2d, v1.2s, v0.2s
+; NONEON-NOSVE-NEXT: smull2 v1.2d, v2.4s, v3.4s
+; NONEON-NOSVE-NEXT: smull v2.2d, v2.2s, v3.2s
+; NONEON-NOSVE-NEXT: uzp2 v0.4s, v0.4s, v4.4s
+; NONEON-NOSVE-NEXT: uzp2 v1.4s, v2.4s, v1.4s
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x i32>, ptr %a
%op2 = load <8 x i32>, ptr %b
%1 = sext <8 x i32> %op1 to <8 x i64>
@@ -340,6 +439,16 @@ define <1 x i64> @smulh_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
; SVE2-NEXT: smulh z0.d, z0.d, z1.d
; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0
; SVE2-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: smulh_v1i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; NONEON-NOSVE-NEXT: fmov x8, d0
+; NONEON-NOSVE-NEXT: fmov x9, d1
+; NONEON-NOSVE-NEXT: smulh x8, x8, x9
+; NONEON-NOSVE-NEXT: fmov d0, x8
+; NONEON-NOSVE-NEXT: ret
%insert = insertelement <1 x i128> undef, i128 64, i128 0
%splat = shufflevector <1 x i128> %insert, <1 x i128> undef, <1 x i32> zeroinitializer
%1 = sext <1 x i64> %op1 to <1 x i128>
@@ -367,6 +476,19 @@ define <2 x i64> @smulh_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
; SVE2-NEXT: smulh z0.d, z0.d, z1.d
; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0
; SVE2-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: smulh_v2i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov x8, v0.d[1]
+; NONEON-NOSVE-NEXT: mov x9, v1.d[1]
+; NONEON-NOSVE-NEXT: fmov x10, d0
+; NONEON-NOSVE-NEXT: fmov x11, d1
+; NONEON-NOSVE-NEXT: smulh x10, x10, x11
+; NONEON-NOSVE-NEXT: smulh x8, x8, x9
+; NONEON-NOSVE-NEXT: fmov d0, x10
+; NONEON-NOSVE-NEXT: fmov d1, x8
+; NONEON-NOSVE-NEXT: mov v0.d[1], v1.d[0]
+; NONEON-NOSVE-NEXT: ret
%1 = sext <2 x i64> %op1 to <2 x i128>
%2 = sext <2 x i64> %op2 to <2 x i128>
%mul = mul <2 x i128> %1, %2
@@ -395,6 +517,31 @@ define void @smulh_v4i64(ptr %a, ptr %b) {
; SVE2-NEXT: smulh z1.d, z2.d, z3.d
; SVE2-NEXT: stp q0, q1, [x0]
; SVE2-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: smulh_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: fmov x9, d0
+; NONEON-NOSVE-NEXT: mov x11, v0.d[1]
+; NONEON-NOSVE-NEXT: mov x14, v3.d[1]
+; NONEON-NOSVE-NEXT: fmov x8, d1
+; NONEON-NOSVE-NEXT: mov x10, v1.d[1]
+; NONEON-NOSVE-NEXT: mov x13, v2.d[1]
+; NONEON-NOSVE-NEXT: fmov x12, d3
+; NONEON-NOSVE-NEXT: smulh x8, x8, x9
+; NONEON-NOSVE-NEXT: fmov x9, d2
+; NONEON-NOSVE-NEXT: smulh x10, x10, x11
+; NONEON-NOSVE-NEXT: smulh x9, x9, x12
+; NONEON-NOSVE-NEXT: fmov d0, x8
+; NONEON-NOSVE-NEXT: smulh x11, x13, x14
+; NONEON-NOSVE-NEXT: fmov d1, x10
+; NONEON-NOSVE-NEXT: fmov d2, x9
+; NONEON-NOSVE-NEXT: mov v0.d[1], v1.d[0]
+; NONEON-NOSVE-NEXT: fmov d3, x11
+; NONEON-NOSVE-NEXT: mov v2.d[1], v3.d[0]
+; NONEON-NOSVE-NEXT: stp q0, q2, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x i64>, ptr %a
%op2 = load <4 x i64>, ptr %b
%1 = sext <4 x i64> %op1 to <4 x i128>
@@ -433,6 +580,15 @@ define <4 x i8> @umulh_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
; SVE2-NEXT: lsr z0.h, z0.h, #4
; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0
; SVE2-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: umulh_v4i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: movi d2, #0xff00ff00ff00ff
+; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v2.8b
+; NONEON-NOSVE-NEXT: and v1.8b, v1.8b, v2.8b
+; NONEON-NOSVE-NEXT: mul v0.4h, v0.4h, v1.4h
+; NONEON-NOSVE-NEXT: ushr v0.4h, v0.4h, #4
+; NONEON-NOSVE-NEXT: ret
%1 = zext <4 x i8> %op1 to <4 x i16>
%2 = zext <4 x i8> %op2 to <4 x i16>
%mul = mul <4 x i16> %1, %2
@@ -458,6 +614,12 @@ define <8 x i8> @umulh_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
; SVE2-NEXT: umulh z0.b, z0.b, z1.b
; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0
; SVE2-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: umulh_v8i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: umull v0.8h, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: shrn v0.8b, v0.8h, #8
+; NONEON-NOSVE-NEXT: ret
%1 = zext <8 x i8> %op1 to <8 x i16>
%2 = zext <8 x i8> %op2 to <8 x i16>
%mul = mul <8 x i16> %1, %2
@@ -483,6 +645,13 @@ define <16 x i8> @umulh_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
; SVE2-NEXT: umulh z0.b, z0.b, z1.b
; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0
; SVE2-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: umulh_v16i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: umull2 v2.8h, v0.16b, v1.16b
+; NONEON-NOSVE-NEXT: umull v0.8h, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: uzp2 v0.16b, v0.16b, v2.16b
+; NONEON-NOSVE-NEXT: ret
%1 = zext <16 x i8> %op1 to <16 x i16>
%2 = zext <16 x i8> %op2 to <16 x i16>
%mul = mul <16 x i16> %1, %2
@@ -511,6 +680,19 @@ define void @umulh_v32i8(ptr %a, ptr %b) {
; SVE2-NEXT: umulh z1.b, z2.b, z3.b
; SVE2-NEXT: stp q0, q1, [x0]
; SVE2-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: umulh_v32i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: umull2 v4.8h, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: umull v0.8h, v1.8b, v0.8b
+; NONEON-NOSVE-NEXT: umull2 v1.8h, v2.16b, v3.16b
+; NONEON-NOSVE-NEXT: umull v2.8h, v2.8b, v3.8b
+; NONEON-NOSVE-NEXT: uzp2 v0.16b, v0.16b, v4.16b
+; NONEON-NOSVE-NEXT: uzp2 v1.16b, v2.16b, v1.16b
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <32 x i8>, ptr %a
%op2 = load <32 x i8>, ptr %b
%1 = zext <32 x i8> %op1 to <32 x i16>
@@ -545,6 +727,15 @@ define <2 x i16> @umulh_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
; SVE2-NEXT: lsr z0.s, z0.s, #16
; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0
; SVE2-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: umulh_v2i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: movi d2, #0x00ffff0000ffff
+; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v2.8b
+; NONEON-NOSVE-NEXT: and v1.8b, v1.8b, v2.8b
+; NONEON-NOSVE-NEXT: mul v0.2s, v0.2s, v1.2s
+; NONEON-NOSVE-NEXT: ushr v0.2s, v0.2s, #16
+; NONEON-NOSVE-NEXT: ret
%1 = zext <2 x i16> %op1 to <2 x i32>
%2 = zext <2 x i16> %op2 to <2 x i32>
%mul = mul <2 x i32> %1, %2
@@ -570,6 +761,12 @@ define <4 x i16> @umulh_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
; SVE2-NEXT: umulh z0.h, z0.h, z1.h
; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0
; SVE2-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: umulh_v4i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: umull v0.4s, v0.4h, v1.4h
+; NONEON-NOSVE-NEXT: shrn v0.4h, v0.4s, #16
+; NONEON-NOSVE-NEXT: ret
%1 = zext <4 x i16> %op1 to <4 x i32>
%2 = zext <4 x i16> %op2 to <4 x i32>
%mul = mul <4 x i32> %1, %2
@@ -595,6 +792,13 @@ define <8 x i16> @umulh_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
; SVE2-NEXT: umulh z0.h, z0.h, z1.h
; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0
; SVE2-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: umulh_v8i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: umull2 v2.4s, v0.8h, v1.8h
+; NONEON-NOSVE-NEXT: umull v0.4s, v0.4h, v1.4h
+; NONEON-NOSVE-NEXT: uzp2 v0.8h, v0.8h, v2.8h
+; NONEON-NOSVE-NEXT: ret
%1 = zext <8 x i16> %op1 to <8 x i32>
%2 = zext <8 x i16> %op2 to <8 x i32>
%mul = mul <8 x i32> %1, %2
@@ -623,6 +827,19 @@ define void @umulh_v16i16(ptr %a, ptr %b) {
; SVE2-NEXT: umulh z1.h, z2.h, z3.h
; SVE2-NEXT: stp q0, q1, [x0]
; SVE2-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: umulh_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: umull2 v4.4s, v1.8h, v0.8h
+; NONEON-NOSVE-NEXT: umull v0.4s, v1.4h, v0.4h
+; NONEON-NOSVE-NEXT: umull2 v1.4s, v2.8h, v3.8h
+; NONEON-NOSVE-NEXT: umull v2.4s, v2.4h, v3.4h
+; NONEON-NOSVE-NEXT: uzp2 v0.8h, v0.8h, v4.8h
+; NONEON-NOSVE-NEXT: uzp2 v1.8h, v2.8h, v1.8h
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x i16>, ptr %a
%op2 = load <16 x i16>, ptr %b
%1 = zext <16 x i16> %op1 to <16 x i32>
@@ -651,6 +868,12 @@ define <2 x i32> @umulh_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
; SVE2-NEXT: umulh z0.s, z0.s, z1.s
; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0
; SVE2-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: umulh_v2i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: umull v0.2d, v0.2s, v1.2s
+; NONEON-NOSVE-NEXT: shrn v0.2s, v0.2d, #32
+; NONEON-NOSVE-NEXT: ret
%1 = zext <2 x i32> %op1 to <2 x i64>
%2 = zext <2 x i32> %op2 to <2 x i64>
%mul = mul <2 x i64> %1, %2
@@ -676,6 +899,13 @@ define <4 x i32> @umulh_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
; SVE2-NEXT: umulh z0.s, z0.s, z1.s
; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0
; SVE2-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: umulh_v4i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: umull2 v2.2d, v0.4s, v1.4s
+; NONEON-NOSVE-NEXT: umull v0.2d, v0.2s, v1.2s
+; NONEON-NOSVE-NEXT: uzp2 v0.4s, v0.4s, v2.4s
+; NONEON-NOSVE-NEXT: ret
%1 = zext <4 x i32> %op1 to <4 x i64>
%2 = zext <4 x i32> %op2 to <4 x i64>
%mul = mul <4 x i64> %1, %2
@@ -704,6 +934,19 @@ define void @umulh_v8i32(ptr %a, ptr %b) {
; SVE2-NEXT: umulh z1.s, z2.s, z3.s
; SVE2-NEXT: stp q0, q1, [x0]
; SVE2-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: umulh_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: umull2 v4.2d, v1.4s, v0.4s
+; NONEON-NOSVE-NEXT: umull v0.2d, v1.2s, v0.2s
+; NONEON-NOSVE-NEXT: umull2 v1.2d, v2.4s, v3.4s
+; NONEON-NOSVE-NEXT: umull v2.2d, v2.2s, v3.2s
+; NONEON-NOSVE-NEXT: uzp2 v0.4s, v0.4s, v4.4s
+; NONEON-NOSVE-NEXT: uzp2 v1.4s, v2.4s, v1.4s
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x i32>, ptr %a
%op2 = load <8 x i32>, ptr %b
%insert = insertelement <8 x i64> undef, i64 32, i64 0
@@ -734,6 +977,16 @@ define <1 x i64> @umulh_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
; SVE2-NEXT: umulh z0.d, z0.d, z1.d
; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0
; SVE2-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: umulh_v1i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; NONEON-NOSVE-NEXT: fmov x8, d0
+; NONEON-NOSVE-NEXT: fmov x9, d1
+; NONEON-NOSVE-NEXT: umulh x8, x8, x9
+; NONEON-NOSVE-NEXT: fmov d0, x8
+; NONEON-NOSVE-NEXT: ret
%1 = zext <1 x i64> %op1 to <1 x i128>
%2 = zext <1 x i64> %op2 to <1 x i128>
%mul = mul <1 x i128> %1, %2
@@ -759,6 +1012,19 @@ define <2 x i64> @umulh_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
; SVE2-NEXT: umulh z0.d, z0.d, z1.d
; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0
; SVE2-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: umulh_v2i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov x8, v0.d[1]
+; NONEON-NOSVE-NEXT: mov x9, v1.d[1]
+; NONEON-NOSVE-NEXT: fmov x10, d0
+; NONEON-NOSVE-NEXT: fmov x11, d1
+; NONEON-NOSVE-NEXT: umulh x10, x10, x11
+; NONEON-NOSVE-NEXT: umulh x8, x8, x9
+; NONEON-NOSVE-NEXT: fmov d0, x10
+; NONEON-NOSVE-NEXT: fmov d1, x8
+; NONEON-NOSVE-NEXT: mov v0.d[1], v1.d[0]
+; NONEON-NOSVE-NEXT: ret
%1 = zext <2 x i64> %op1 to <2 x i128>
%2 = zext <2 x i64> %op2 to <2 x i128>
%mul = mul <2 x i128> %1, %2
@@ -787,6 +1053,31 @@ define void @umulh_v4i64(ptr %a, ptr %b) {
; SVE2-NEXT: umulh z1.d, z2.d, z3.d
; SVE2-NEXT: stp q0, q1, [x0]
; SVE2-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: umulh_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: fmov x9, d0
+; NONEON-NOSVE-NEXT: mov x11, v0.d[1]
+; NONEON-NOSVE-NEXT: mov x14, v3.d[1]
+; NONEON-NOSVE-NEXT: fmov x8, d1
+; NONEON-NOSVE-NEXT: mov x10, v1.d[1]
+; NONEON-NOSVE-NEXT: mov x13, v2.d[1]
+; NONEON-NOSVE-NEXT: fmov x12, d3
+; NONEON-NOSVE-NEXT: umulh x8, x8, x9
+; NONEON-NOSVE-NEXT: fmov x9, d2
+; NONEON-NOSVE-NEXT: umulh x10, x10, x11
+; NONEON-NOSVE-NEXT: umulh x9, x9, x12
+; NONEON-NOSVE-NEXT: fmov d0, x8
+; NONEON-NOSVE-NEXT: umulh x11, x13, x14
+; NONEON-NOSVE-NEXT: fmov d1, x10
+; NONEON-NOSVE-NEXT: fmov d2, x9
+; NONEON-NOSVE-NEXT: mov v0.d[1], v1.d[0]
+; NONEON-NOSVE-NEXT: fmov d3, x11
+; NONEON-NOSVE-NEXT: mov v2.d[1], v3.d[0]
+; NONEON-NOSVE-NEXT: stp q0, q2, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x i64>, ptr %a
%op2 = load <4 x i64>, ptr %b
%1 = zext <4 x i64> %op1 to <4 x i128>
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-reduce.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-reduce.ll
index 1123907f33899..ad75ba62e17cf 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-reduce.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-reduce.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
@@ -17,6 +18,12 @@ define i8 @uaddv_v8i8(<8 x i8> %a) {
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: uaddv_v8i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: addv b0, v0.8b
+; NONEON-NOSVE-NEXT: fmov w0, s0
+; NONEON-NOSVE-NEXT: ret
%res = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> %a)
ret i8 %res
}
@@ -30,6 +37,12 @@ define i8 @uaddv_v16i8(<16 x i8> %a) {
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: uaddv_v16i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: addv b0, v0.16b
+; NONEON-NOSVE-NEXT: fmov w0, s0
+; NONEON-NOSVE-NEXT: ret
%res = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %a)
ret i8 %res
}
@@ -44,6 +57,14 @@ define i8 @uaddv_v32i8(ptr %a) {
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: uaddv_v32i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: add v0.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: addv b0, v0.16b
+; NONEON-NOSVE-NEXT: fmov w0, s0
+; NONEON-NOSVE-NEXT: ret
%op = load <32 x i8>, ptr %a
%res = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> %op)
ret i8 %res
@@ -58,6 +79,12 @@ define i16 @uaddv_v4i16(<4 x i16> %a) {
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: uaddv_v4i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: addv h0, v0.4h
+; NONEON-NOSVE-NEXT: fmov w0, s0
+; NONEON-NOSVE-NEXT: ret
%res = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a)
ret i16 %res
}
@@ -71,6 +98,12 @@ define i16 @uaddv_v8i16(<8 x i16> %a) {
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: uaddv_v8i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: addv h0, v0.8h
+; NONEON-NOSVE-NEXT: fmov w0, s0
+; NONEON-NOSVE-NEXT: ret
%res = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a)
ret i16 %res
}
@@ -85,6 +118,14 @@ define i16 @uaddv_v16i16(ptr %a) {
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: uaddv_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: add v0.8h, v1.8h, v0.8h
+; NONEON-NOSVE-NEXT: addv h0, v0.8h
+; NONEON-NOSVE-NEXT: fmov w0, s0
+; NONEON-NOSVE-NEXT: ret
%op = load <16 x i16>, ptr %a
%res = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %op)
ret i16 %res
@@ -99,6 +140,12 @@ define i32 @uaddv_v2i32(<2 x i32> %a) {
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: uaddv_v2i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: addp v0.2s, v0.2s, v0.2s
+; NONEON-NOSVE-NEXT: fmov w0, s0
+; NONEON-NOSVE-NEXT: ret
%res = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a)
ret i32 %res
}
@@ -112,6 +159,12 @@ define i32 @uaddv_v4i32(<4 x i32> %a) {
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: uaddv_v4i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: addv s0, v0.4s
+; NONEON-NOSVE-NEXT: fmov w0, s0
+; NONEON-NOSVE-NEXT: ret
%res = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a)
ret i32 %res
}
@@ -126,6 +179,14 @@ define i32 @uaddv_v8i32(ptr %a) {
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: uaddv_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: add v0.4s, v1.4s, v0.4s
+; NONEON-NOSVE-NEXT: addv s0, v0.4s
+; NONEON-NOSVE-NEXT: fmov w0, s0
+; NONEON-NOSVE-NEXT: ret
%op = load <8 x i32>, ptr %a
%res = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %op)
ret i32 %res
@@ -139,6 +200,12 @@ define i64 @uaddv_v2i64(<2 x i64> %a) {
; CHECK-NEXT: uaddv d0, p0, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: uaddv_v2i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: addp d0, v0.2d
+; NONEON-NOSVE-NEXT: fmov x0, d0
+; NONEON-NOSVE-NEXT: ret
%res = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a)
ret i64 %res
}
@@ -152,6 +219,14 @@ define i64 @uaddv_v4i64(ptr %a) {
; CHECK-NEXT: uaddv d0, p0, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: uaddv_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: add v0.2d, v1.2d, v0.2d
+; NONEON-NOSVE-NEXT: addp d0, v0.2d
+; NONEON-NOSVE-NEXT: fmov x0, d0
+; NONEON-NOSVE-NEXT: ret
%op = load <4 x i64>, ptr %a
%res = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %op)
ret i64 %res
@@ -169,6 +244,12 @@ define i8 @smaxv_v8i8(<8 x i8> %a) {
; CHECK-NEXT: smaxv b0, p0, z0.b
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: smaxv_v8i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: smaxv b0, v0.8b
+; NONEON-NOSVE-NEXT: fmov w0, s0
+; NONEON-NOSVE-NEXT: ret
%res = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> %a)
ret i8 %res
}
@@ -181,6 +262,12 @@ define i8 @smaxv_v16i8(<16 x i8> %a) {
; CHECK-NEXT: smaxv b0, p0, z0.b
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: smaxv_v16i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: smaxv b0, v0.16b
+; NONEON-NOSVE-NEXT: fmov w0, s0
+; NONEON-NOSVE-NEXT: ret
%res = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> %a)
ret i8 %res
}
@@ -194,6 +281,14 @@ define i8 @smaxv_v32i8(ptr %a) {
; CHECK-NEXT: smaxv b0, p0, z0.b
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: smaxv_v32i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: smax v0.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: smaxv b0, v0.16b
+; NONEON-NOSVE-NEXT: fmov w0, s0
+; NONEON-NOSVE-NEXT: ret
%op = load <32 x i8>, ptr %a
%res = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> %op)
ret i8 %res
@@ -207,6 +302,12 @@ define i16 @smaxv_v4i16(<4 x i16> %a) {
; CHECK-NEXT: smaxv h0, p0, z0.h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: smaxv_v4i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: smaxv h0, v0.4h
+; NONEON-NOSVE-NEXT: fmov w0, s0
+; NONEON-NOSVE-NEXT: ret
%res = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> %a)
ret i16 %res
}
@@ -219,6 +320,12 @@ define i16 @smaxv_v8i16(<8 x i16> %a) {
; CHECK-NEXT: smaxv h0, p0, z0.h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: smaxv_v8i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: smaxv h0, v0.8h
+; NONEON-NOSVE-NEXT: fmov w0, s0
+; NONEON-NOSVE-NEXT: ret
%res = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> %a)
ret i16 %res
}
@@ -232,6 +339,14 @@ define i16 @smaxv_v16i16(ptr %a) {
; CHECK-NEXT: smaxv h0, p0, z0.h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: smaxv_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: smax v0.8h, v1.8h, v0.8h
+; NONEON-NOSVE-NEXT: smaxv h0, v0.8h
+; NONEON-NOSVE-NEXT: fmov w0, s0
+; NONEON-NOSVE-NEXT: ret
%op = load <16 x i16>, ptr %a
%res = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> %op)
ret i16 %res
@@ -245,6 +360,12 @@ define i32 @smaxv_v2i32(<2 x i32> %a) {
; CHECK-NEXT: smaxv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: smaxv_v2i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: smaxp v0.2s, v0.2s, v0.2s
+; NONEON-NOSVE-NEXT: fmov w0, s0
+; NONEON-NOSVE-NEXT: ret
%res = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> %a)
ret i32 %res
}
@@ -257,6 +378,12 @@ define i32 @smaxv_v4i32(<4 x i32> %a) {
; CHECK-NEXT: smaxv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: smaxv_v4i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: smaxv s0, v0.4s
+; NONEON-NOSVE-NEXT: fmov w0, s0
+; NONEON-NOSVE-NEXT: ret
%res = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %a)
ret i32 %res
}
@@ -270,6 +397,14 @@ define i32 @smaxv_v8i32(ptr %a) {
; CHECK-NEXT: smaxv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: smaxv_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: smax v0.4s, v1.4s, v0.4s
+; NONEON-NOSVE-NEXT: smaxv s0, v0.4s
+; NONEON-NOSVE-NEXT: fmov w0, s0
+; NONEON-NOSVE-NEXT: ret
%op = load <8 x i32>, ptr %a
%res = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> %op)
ret i32 %res
@@ -284,6 +419,17 @@ define i64 @smaxv_v2i64(<2 x i64> %a) {
; CHECK-NEXT: smaxv d0, p0, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: smaxv_v2i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: cmgt d2, d0, d1
+; NONEON-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b
+; NONEON-NOSVE-NEXT: fmov x0, d0
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%res = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> %a)
ret i64 %res
}
@@ -297,6 +443,20 @@ define i64 @smaxv_v4i64(ptr %a) {
; CHECK-NEXT: smaxv d0, p0, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: smaxv_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: cmgt v2.2d, v1.2d, v0.2d
+; NONEON-NOSVE-NEXT: bit v0.16b, v1.16b, v2.16b
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: cmgt d2, d0, d1
+; NONEON-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b
+; NONEON-NOSVE-NEXT: fmov x0, d0
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%op = load <4 x i64>, ptr %a
%res = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> %op)
ret i64 %res
@@ -314,6 +474,12 @@ define i8 @sminv_v8i8(<8 x i8> %a) {
; CHECK-NEXT: sminv b0, p0, z0.b
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sminv_v8i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: sminv b0, v0.8b
+; NONEON-NOSVE-NEXT: fmov w0, s0
+; NONEON-NOSVE-NEXT: ret
%res = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> %a)
ret i8 %res
}
@@ -326,6 +492,12 @@ define i8 @sminv_v16i8(<16 x i8> %a) {
; CHECK-NEXT: sminv b0, p0, z0.b
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sminv_v16i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: sminv b0, v0.16b
+; NONEON-NOSVE-NEXT: fmov w0, s0
+; NONEON-NOSVE-NEXT: ret
%res = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> %a)
ret i8 %res
}
@@ -339,6 +511,14 @@ define i8 @sminv_v32i8(ptr %a) {
; CHECK-NEXT: sminv b0, p0, z0.b
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sminv_v32i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: smin v0.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: sminv b0, v0.16b
+; NONEON-NOSVE-NEXT: fmov w0, s0
+; NONEON-NOSVE-NEXT: ret
%op = load <32 x i8>, ptr %a
%res = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> %op)
ret i8 %res
@@ -352,6 +532,12 @@ define i16 @sminv_v4i16(<4 x i16> %a) {
; CHECK-NEXT: sminv h0, p0, z0.h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sminv_v4i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: sminv h0, v0.4h
+; NONEON-NOSVE-NEXT: fmov w0, s0
+; NONEON-NOSVE-NEXT: ret
%res = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> %a)
ret i16 %res
}
@@ -364,6 +550,12 @@ define i16 @sminv_v8i16(<8 x i16> %a) {
; CHECK-NEXT: sminv h0, p0, z0.h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sminv_v8i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: sminv h0, v0.8h
+; NONEON-NOSVE-NEXT: fmov w0, s0
+; NONEON-NOSVE-NEXT: ret
%res = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> %a)
ret i16 %res
}
@@ -377,6 +569,14 @@ define i16 @sminv_v16i16(ptr %a) {
; CHECK-NEXT: sminv h0, p0, z0.h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sminv_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: smin v0.8h, v1.8h, v0.8h
+; NONEON-NOSVE-NEXT: sminv h0, v0.8h
+; NONEON-NOSVE-NEXT: fmov w0, s0
+; NONEON-NOSVE-NEXT: ret
%op = load <16 x i16>, ptr %a
%res = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> %op)
ret i16 %res
@@ -390,6 +590,12 @@ define i32 @sminv_v2i32(<2 x i32> %a) {
; CHECK-NEXT: sminv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sminv_v2i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: sminp v0.2s, v0.2s, v0.2s
+; NONEON-NOSVE-NEXT: fmov w0, s0
+; NONEON-NOSVE-NEXT: ret
%res = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> %a)
ret i32 %res
}
@@ -402,6 +608,12 @@ define i32 @sminv_v4i32(<4 x i32> %a) {
; CHECK-NEXT: sminv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sminv_v4i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: sminv s0, v0.4s
+; NONEON-NOSVE-NEXT: fmov w0, s0
+; NONEON-NOSVE-NEXT: ret
%res = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %a)
ret i32 %res
}
@@ -415,6 +627,14 @@ define i32 @sminv_v8i32(ptr %a) {
; CHECK-NEXT: sminv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sminv_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: smin v0.4s, v1.4s, v0.4s
+; NONEON-NOSVE-NEXT: sminv s0, v0.4s
+; NONEON-NOSVE-NEXT: fmov w0, s0
+; NONEON-NOSVE-NEXT: ret
%op = load <8 x i32>, ptr %a
%res = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> %op)
ret i32 %res
@@ -429,6 +649,17 @@ define i64 @sminv_v2i64(<2 x i64> %a) {
; CHECK-NEXT: sminv d0, p0, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sminv_v2i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: cmgt d2, d1, d0
+; NONEON-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b
+; NONEON-NOSVE-NEXT: fmov x0, d0
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%res = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> %a)
ret i64 %res
}
@@ -442,6 +673,20 @@ define i64 @sminv_v4i64(ptr %a) {
; CHECK-NEXT: sminv d0, p0, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sminv_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: cmgt v2.2d, v1.2d, v0.2d
+; NONEON-NOSVE-NEXT: bif v0.16b, v1.16b, v2.16b
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: cmgt d2, d1, d0
+; NONEON-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b
+; NONEON-NOSVE-NEXT: fmov x0, d0
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%op = load <4 x i64>, ptr %a
%res = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> %op)
ret i64 %res
@@ -459,6 +704,12 @@ define i8 @umaxv_v8i8(<8 x i8> %a) {
; CHECK-NEXT: umaxv b0, p0, z0.b
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: umaxv_v8i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: umaxv b0, v0.8b
+; NONEON-NOSVE-NEXT: fmov w0, s0
+; NONEON-NOSVE-NEXT: ret
%res = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> %a)
ret i8 %res
}
@@ -471,6 +722,12 @@ define i8 @umaxv_v16i8(<16 x i8> %a) {
; CHECK-NEXT: umaxv b0, p0, z0.b
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: umaxv_v16i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: umaxv b0, v0.16b
+; NONEON-NOSVE-NEXT: fmov w0, s0
+; NONEON-NOSVE-NEXT: ret
%res = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> %a)
ret i8 %res
}
@@ -484,6 +741,14 @@ define i8 @umaxv_v32i8(ptr %a) {
; CHECK-NEXT: umaxv b0, p0, z0.b
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: umaxv_v32i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: umax v0.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: umaxv b0, v0.16b
+; NONEON-NOSVE-NEXT: fmov w0, s0
+; NONEON-NOSVE-NEXT: ret
%op = load <32 x i8>, ptr %a
%res = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> %op)
ret i8 %res
@@ -497,6 +762,12 @@ define i16 @umaxv_v4i16(<4 x i16> %a) {
; CHECK-NEXT: umaxv h0, p0, z0.h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: umaxv_v4i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: umaxv h0, v0.4h
+; NONEON-NOSVE-NEXT: fmov w0, s0
+; NONEON-NOSVE-NEXT: ret
%res = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> %a)
ret i16 %res
}
@@ -509,6 +780,12 @@ define i16 @umaxv_v8i16(<8 x i16> %a) {
; CHECK-NEXT: umaxv h0, p0, z0.h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: umaxv_v8i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: umaxv h0, v0.8h
+; NONEON-NOSVE-NEXT: fmov w0, s0
+; NONEON-NOSVE-NEXT: ret
%res = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> %a)
ret i16 %res
}
@@ -522,6 +799,14 @@ define i16 @umaxv_v16i16(ptr %a) {
; CHECK-NEXT: umaxv h0, p0, z0.h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: umaxv_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: umax v0.8h, v1.8h, v0.8h
+; NONEON-NOSVE-NEXT: umaxv h0, v0.8h
+; NONEON-NOSVE-NEXT: fmov w0, s0
+; NONEON-NOSVE-NEXT: ret
%op = load <16 x i16>, ptr %a
%res = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> %op)
ret i16 %res
@@ -535,6 +820,12 @@ define i32 @umaxv_v2i32(<2 x i32> %a) {
; CHECK-NEXT: umaxv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: umaxv_v2i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: umaxp v0.2s, v0.2s, v0.2s
+; NONEON-NOSVE-NEXT: fmov w0, s0
+; NONEON-NOSVE-NEXT: ret
%res = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> %a)
ret i32 %res
}
@@ -547,6 +838,12 @@ define i32 @umaxv_v4i32(<4 x i32> %a) {
; CHECK-NEXT: umaxv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: umaxv_v4i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: umaxv s0, v0.4s
+; NONEON-NOSVE-NEXT: fmov w0, s0
+; NONEON-NOSVE-NEXT: ret
%res = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %a)
ret i32 %res
}
@@ -560,6 +857,14 @@ define i32 @umaxv_v8i32(ptr %a) {
; CHECK-NEXT: umaxv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: umaxv_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: umax v0.4s, v1.4s, v0.4s
+; NONEON-NOSVE-NEXT: umaxv s0, v0.4s
+; NONEON-NOSVE-NEXT: fmov w0, s0
+; NONEON-NOSVE-NEXT: ret
%op = load <8 x i32>, ptr %a
%res = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> %op)
ret i32 %res
@@ -574,6 +879,17 @@ define i64 @umaxv_v2i64(<2 x i64> %a) {
; CHECK-NEXT: umaxv d0, p0, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: umaxv_v2i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: cmhi d2, d0, d1
+; NONEON-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b
+; NONEON-NOSVE-NEXT: fmov x0, d0
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%res = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> %a)
ret i64 %res
}
@@ -587,6 +903,20 @@ define i64 @umaxv_v4i64(ptr %a) {
; CHECK-NEXT: umaxv d0, p0, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: umaxv_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: cmhi v2.2d, v1.2d, v0.2d
+; NONEON-NOSVE-NEXT: bit v0.16b, v1.16b, v2.16b
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: cmhi d2, d0, d1
+; NONEON-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b
+; NONEON-NOSVE-NEXT: fmov x0, d0
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%op = load <4 x i64>, ptr %a
%res = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> %op)
ret i64 %res
@@ -604,6 +934,12 @@ define i8 @uminv_v8i8(<8 x i8> %a) {
; CHECK-NEXT: uminv b0, p0, z0.b
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: uminv_v8i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: uminv b0, v0.8b
+; NONEON-NOSVE-NEXT: fmov w0, s0
+; NONEON-NOSVE-NEXT: ret
%res = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> %a)
ret i8 %res
}
@@ -616,6 +952,12 @@ define i8 @uminv_v16i8(<16 x i8> %a) {
; CHECK-NEXT: uminv b0, p0, z0.b
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: uminv_v16i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: uminv b0, v0.16b
+; NONEON-NOSVE-NEXT: fmov w0, s0
+; NONEON-NOSVE-NEXT: ret
%res = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> %a)
ret i8 %res
}
@@ -629,6 +971,14 @@ define i8 @uminv_v32i8(ptr %a) {
; CHECK-NEXT: uminv b0, p0, z0.b
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: uminv_v32i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: umin v0.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: uminv b0, v0.16b
+; NONEON-NOSVE-NEXT: fmov w0, s0
+; NONEON-NOSVE-NEXT: ret
%op = load <32 x i8>, ptr %a
%res = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> %op)
ret i8 %res
@@ -642,6 +992,12 @@ define i16 @uminv_v4i16(<4 x i16> %a) {
; CHECK-NEXT: uminv h0, p0, z0.h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: uminv_v4i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: uminv h0, v0.4h
+; NONEON-NOSVE-NEXT: fmov w0, s0
+; NONEON-NOSVE-NEXT: ret
%res = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> %a)
ret i16 %res
}
@@ -654,6 +1010,12 @@ define i16 @uminv_v8i16(<8 x i16> %a) {
; CHECK-NEXT: uminv h0, p0, z0.h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: uminv_v8i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: uminv h0, v0.8h
+; NONEON-NOSVE-NEXT: fmov w0, s0
+; NONEON-NOSVE-NEXT: ret
%res = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> %a)
ret i16 %res
}
@@ -667,6 +1029,14 @@ define i16 @uminv_v16i16(ptr %a) {
; CHECK-NEXT: uminv h0, p0, z0.h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: uminv_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: umin v0.8h, v1.8h, v0.8h
+; NONEON-NOSVE-NEXT: uminv h0, v0.8h
+; NONEON-NOSVE-NEXT: fmov w0, s0
+; NONEON-NOSVE-NEXT: ret
%op = load <16 x i16>, ptr %a
%res = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> %op)
ret i16 %res
@@ -680,6 +1050,12 @@ define i32 @uminv_v2i32(<2 x i32> %a) {
; CHECK-NEXT: uminv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: uminv_v2i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: uminp v0.2s, v0.2s, v0.2s
+; NONEON-NOSVE-NEXT: fmov w0, s0
+; NONEON-NOSVE-NEXT: ret
%res = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> %a)
ret i32 %res
}
@@ -692,6 +1068,12 @@ define i32 @uminv_v4i32(<4 x i32> %a) {
; CHECK-NEXT: uminv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: uminv_v4i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: uminv s0, v0.4s
+; NONEON-NOSVE-NEXT: fmov w0, s0
+; NONEON-NOSVE-NEXT: ret
%res = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %a)
ret i32 %res
}
@@ -705,6 +1087,14 @@ define i32 @uminv_v8i32(ptr %a) {
; CHECK-NEXT: uminv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: uminv_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: umin v0.4s, v1.4s, v0.4s
+; NONEON-NOSVE-NEXT: uminv s0, v0.4s
+; NONEON-NOSVE-NEXT: fmov w0, s0
+; NONEON-NOSVE-NEXT: ret
%op = load <8 x i32>, ptr %a
%res = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> %op)
ret i32 %res
@@ -719,6 +1109,17 @@ define i64 @uminv_v2i64(<2 x i64> %a) {
; CHECK-NEXT: uminv d0, p0, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: uminv_v2i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: cmhi d2, d1, d0
+; NONEON-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b
+; NONEON-NOSVE-NEXT: fmov x0, d0
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%res = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> %a)
ret i64 %res
}
@@ -732,6 +1133,20 @@ define i64 @uminv_v4i64(ptr %a) {
; CHECK-NEXT: uminv d0, p0, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: uminv_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: cmhi v2.2d, v1.2d, v0.2d
+; NONEON-NOSVE-NEXT: bif v0.16b, v1.16b, v2.16b
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: cmhi d2, d1, d0
+; NONEON-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b
+; NONEON-NOSVE-NEXT: fmov x0, d0
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%op = load <4 x i64>, ptr %a
%res = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> %op)
ret i64 %res
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll
index 4ae7586fca169..99f8aef9f2b22 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
@@ -24,6 +25,35 @@ define <4 x i8> @srem_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
; CHECK-NEXT: mls z0.h, p0/m, z2.h, z1.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: srem_v4i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: shl v0.4h, v0.4h, #8
+; NONEON-NOSVE-NEXT: shl v1.4h, v1.4h, #8
+; NONEON-NOSVE-NEXT: sshr v0.4h, v0.4h, #8
+; NONEON-NOSVE-NEXT: sshr v1.4h, v1.4h, #8
+; NONEON-NOSVE-NEXT: smov w11, v1.h[0]
+; NONEON-NOSVE-NEXT: smov w12, v0.h[0]
+; NONEON-NOSVE-NEXT: smov w8, v1.h[1]
+; NONEON-NOSVE-NEXT: smov w9, v0.h[1]
+; NONEON-NOSVE-NEXT: smov w14, v1.h[2]
+; NONEON-NOSVE-NEXT: smov w15, v0.h[2]
+; NONEON-NOSVE-NEXT: smov w17, v1.h[3]
+; NONEON-NOSVE-NEXT: smov w18, v0.h[3]
+; NONEON-NOSVE-NEXT: sdiv w13, w12, w11
+; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
+; NONEON-NOSVE-NEXT: msub w11, w13, w11, w12
+; NONEON-NOSVE-NEXT: fmov s0, w11
+; NONEON-NOSVE-NEXT: sdiv w16, w15, w14
+; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
+; NONEON-NOSVE-NEXT: mov v0.h[1], w8
+; NONEON-NOSVE-NEXT: sdiv w9, w18, w17
+; NONEON-NOSVE-NEXT: msub w8, w16, w14, w15
+; NONEON-NOSVE-NEXT: mov v0.h[2], w8
+; NONEON-NOSVE-NEXT: msub w8, w9, w17, w18
+; NONEON-NOSVE-NEXT: mov v0.h[3], w8
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0
+; NONEON-NOSVE-NEXT: ret
%res = srem <4 x i8> %op1, %op2
ret <4 x i8> %res
}
@@ -53,6 +83,53 @@ define <8 x i8> @srem_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
; CHECK-NEXT: mls z0.b, p0/m, z2.b, z1.b
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: srem_v8i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; NONEON-NOSVE-NEXT: smov w11, v1.b[0]
+; NONEON-NOSVE-NEXT: smov w12, v0.b[0]
+; NONEON-NOSVE-NEXT: smov w8, v1.b[1]
+; NONEON-NOSVE-NEXT: smov w9, v0.b[1]
+; NONEON-NOSVE-NEXT: smov w14, v1.b[2]
+; NONEON-NOSVE-NEXT: smov w15, v0.b[2]
+; NONEON-NOSVE-NEXT: smov w17, v1.b[3]
+; NONEON-NOSVE-NEXT: smov w18, v0.b[3]
+; NONEON-NOSVE-NEXT: smov w1, v1.b[4]
+; NONEON-NOSVE-NEXT: smov w2, v0.b[4]
+; NONEON-NOSVE-NEXT: smov w4, v1.b[5]
+; NONEON-NOSVE-NEXT: smov w5, v0.b[5]
+; NONEON-NOSVE-NEXT: sdiv w13, w12, w11
+; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
+; NONEON-NOSVE-NEXT: msub w11, w13, w11, w12
+; NONEON-NOSVE-NEXT: smov w13, v1.b[7]
+; NONEON-NOSVE-NEXT: fmov s2, w11
+; NONEON-NOSVE-NEXT: smov w11, v0.b[6]
+; NONEON-NOSVE-NEXT: sdiv w16, w15, w14
+; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
+; NONEON-NOSVE-NEXT: smov w10, v1.b[6]
+; NONEON-NOSVE-NEXT: mov v2.b[1], w8
+; NONEON-NOSVE-NEXT: sdiv w0, w18, w17
+; NONEON-NOSVE-NEXT: msub w8, w16, w14, w15
+; NONEON-NOSVE-NEXT: smov w14, v0.b[7]
+; NONEON-NOSVE-NEXT: mov v2.b[2], w8
+; NONEON-NOSVE-NEXT: sdiv w3, w2, w1
+; NONEON-NOSVE-NEXT: msub w8, w0, w17, w18
+; NONEON-NOSVE-NEXT: mov v2.b[3], w8
+; NONEON-NOSVE-NEXT: sdiv w9, w5, w4
+; NONEON-NOSVE-NEXT: msub w8, w3, w1, w2
+; NONEON-NOSVE-NEXT: mov v2.b[4], w8
+; NONEON-NOSVE-NEXT: sdiv w12, w11, w10
+; NONEON-NOSVE-NEXT: msub w8, w9, w4, w5
+; NONEON-NOSVE-NEXT: mov v2.b[5], w8
+; NONEON-NOSVE-NEXT: sdiv w9, w14, w13
+; NONEON-NOSVE-NEXT: msub w8, w12, w10, w11
+; NONEON-NOSVE-NEXT: mov v2.b[6], w8
+; NONEON-NOSVE-NEXT: msub w8, w9, w13, w14
+; NONEON-NOSVE-NEXT: mov v2.b[7], w8
+; NONEON-NOSVE-NEXT: fmov d0, d2
+; NONEON-NOSVE-NEXT: ret
%res = srem <8 x i8> %op1, %op2
ret <8 x i8> %res
}
@@ -102,6 +179,112 @@ define <16 x i8> @srem_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
; CHECK-NEXT: mls z0.b, p0/m, z3.b, z1.b
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: srem_v16i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #-80]! // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #16] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #32] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80
+; NONEON-NOSVE-NEXT: .cfi_offset w19, -8
+; NONEON-NOSVE-NEXT: .cfi_offset w20, -16
+; NONEON-NOSVE-NEXT: .cfi_offset w21, -24
+; NONEON-NOSVE-NEXT: .cfi_offset w22, -32
+; NONEON-NOSVE-NEXT: .cfi_offset w23, -40
+; NONEON-NOSVE-NEXT: .cfi_offset w24, -48
+; NONEON-NOSVE-NEXT: .cfi_offset w25, -56
+; NONEON-NOSVE-NEXT: .cfi_offset w26, -64
+; NONEON-NOSVE-NEXT: .cfi_offset w27, -72
+; NONEON-NOSVE-NEXT: .cfi_offset w28, -80
+; NONEON-NOSVE-NEXT: smov w11, v1.b[0]
+; NONEON-NOSVE-NEXT: smov w12, v0.b[0]
+; NONEON-NOSVE-NEXT: smov w8, v1.b[1]
+; NONEON-NOSVE-NEXT: smov w9, v0.b[1]
+; NONEON-NOSVE-NEXT: smov w14, v1.b[2]
+; NONEON-NOSVE-NEXT: smov w15, v0.b[2]
+; NONEON-NOSVE-NEXT: smov w17, v1.b[3]
+; NONEON-NOSVE-NEXT: smov w18, v0.b[3]
+; NONEON-NOSVE-NEXT: smov w1, v1.b[4]
+; NONEON-NOSVE-NEXT: smov w2, v0.b[4]
+; NONEON-NOSVE-NEXT: smov w4, v1.b[5]
+; NONEON-NOSVE-NEXT: smov w5, v0.b[5]
+; NONEON-NOSVE-NEXT: sdiv w13, w12, w11
+; NONEON-NOSVE-NEXT: smov w7, v1.b[6]
+; NONEON-NOSVE-NEXT: smov w19, v0.b[6]
+; NONEON-NOSVE-NEXT: smov w21, v1.b[7]
+; NONEON-NOSVE-NEXT: smov w22, v0.b[7]
+; NONEON-NOSVE-NEXT: smov w24, v1.b[8]
+; NONEON-NOSVE-NEXT: smov w25, v0.b[8]
+; NONEON-NOSVE-NEXT: smov w27, v1.b[9]
+; NONEON-NOSVE-NEXT: smov w28, v0.b[9]
+; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
+; NONEON-NOSVE-NEXT: msub w11, w13, w11, w12
+; NONEON-NOSVE-NEXT: smov w13, v1.b[11]
+; NONEON-NOSVE-NEXT: fmov s2, w11
+; NONEON-NOSVE-NEXT: smov w11, v0.b[10]
+; NONEON-NOSVE-NEXT: sdiv w16, w15, w14
+; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
+; NONEON-NOSVE-NEXT: smov w10, v1.b[10]
+; NONEON-NOSVE-NEXT: mov v2.b[1], w8
+; NONEON-NOSVE-NEXT: sdiv w0, w18, w17
+; NONEON-NOSVE-NEXT: msub w8, w16, w14, w15
+; NONEON-NOSVE-NEXT: smov w14, v0.b[11]
+; NONEON-NOSVE-NEXT: smov w16, v1.b[12]
+; NONEON-NOSVE-NEXT: mov v2.b[2], w8
+; NONEON-NOSVE-NEXT: sdiv w3, w2, w1
+; NONEON-NOSVE-NEXT: msub w8, w0, w17, w18
+; NONEON-NOSVE-NEXT: smov w17, v0.b[12]
+; NONEON-NOSVE-NEXT: smov w0, v1.b[13]
+; NONEON-NOSVE-NEXT: mov v2.b[3], w8
+; NONEON-NOSVE-NEXT: sdiv w6, w5, w4
+; NONEON-NOSVE-NEXT: msub w8, w3, w1, w2
+; NONEON-NOSVE-NEXT: smov w1, v0.b[13]
+; NONEON-NOSVE-NEXT: mov v2.b[4], w8
+; NONEON-NOSVE-NEXT: sdiv w20, w19, w7
+; NONEON-NOSVE-NEXT: msub w8, w6, w4, w5
+; NONEON-NOSVE-NEXT: mov v2.b[5], w8
+; NONEON-NOSVE-NEXT: sdiv w23, w22, w21
+; NONEON-NOSVE-NEXT: msub w8, w20, w7, w19
+; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT: mov v2.b[6], w8
+; NONEON-NOSVE-NEXT: sdiv w26, w25, w24
+; NONEON-NOSVE-NEXT: msub w8, w23, w21, w22
+; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT: mov v2.b[7], w8
+; NONEON-NOSVE-NEXT: sdiv w9, w28, w27
+; NONEON-NOSVE-NEXT: msub w8, w26, w24, w25
+; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #32] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #16] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT: mov v2.b[8], w8
+; NONEON-NOSVE-NEXT: sdiv w12, w11, w10
+; NONEON-NOSVE-NEXT: msub w8, w9, w27, w28
+; NONEON-NOSVE-NEXT: mov v2.b[9], w8
+; NONEON-NOSVE-NEXT: sdiv w15, w14, w13
+; NONEON-NOSVE-NEXT: msub w8, w12, w10, w11
+; NONEON-NOSVE-NEXT: smov w10, v1.b[14]
+; NONEON-NOSVE-NEXT: smov w11, v0.b[14]
+; NONEON-NOSVE-NEXT: mov v2.b[10], w8
+; NONEON-NOSVE-NEXT: sdiv w18, w17, w16
+; NONEON-NOSVE-NEXT: msub w8, w15, w13, w14
+; NONEON-NOSVE-NEXT: smov w13, v1.b[15]
+; NONEON-NOSVE-NEXT: smov w14, v0.b[15]
+; NONEON-NOSVE-NEXT: mov v2.b[11], w8
+; NONEON-NOSVE-NEXT: sdiv w9, w1, w0
+; NONEON-NOSVE-NEXT: msub w8, w18, w16, w17
+; NONEON-NOSVE-NEXT: mov v2.b[12], w8
+; NONEON-NOSVE-NEXT: sdiv w12, w11, w10
+; NONEON-NOSVE-NEXT: msub w8, w9, w0, w1
+; NONEON-NOSVE-NEXT: mov v2.b[13], w8
+; NONEON-NOSVE-NEXT: sdiv w9, w14, w13
+; NONEON-NOSVE-NEXT: msub w8, w12, w10, w11
+; NONEON-NOSVE-NEXT: mov v2.b[14], w8
+; NONEON-NOSVE-NEXT: msub w8, w9, w13, w14
+; NONEON-NOSVE-NEXT: mov v2.b[15], w8
+; NONEON-NOSVE-NEXT: mov v0.16b, v2.16b
+; NONEON-NOSVE-NEXT: ldp x28, x27, [sp], #80 // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT: ret
%res = srem <16 x i8> %op1, %op2
ret <16 x i8> %res
}
@@ -189,6 +372,279 @@ define void @srem_v32i8(ptr %a, ptr %b) {
; CHECK-NEXT: mls z2.b, p0/m, z7.b, z4.b
; CHECK-NEXT: stp q2, q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: srem_v32i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: sub sp, sp, #320
+; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #224] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #240] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #256] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #272] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #288] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #304] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 320
+; NONEON-NOSVE-NEXT: .cfi_offset w19, -8
+; NONEON-NOSVE-NEXT: .cfi_offset w20, -16
+; NONEON-NOSVE-NEXT: .cfi_offset w21, -24
+; NONEON-NOSVE-NEXT: .cfi_offset w22, -32
+; NONEON-NOSVE-NEXT: .cfi_offset w23, -40
+; NONEON-NOSVE-NEXT: .cfi_offset w24, -48
+; NONEON-NOSVE-NEXT: .cfi_offset w25, -56
+; NONEON-NOSVE-NEXT: .cfi_offset w26, -64
+; NONEON-NOSVE-NEXT: .cfi_offset w27, -72
+; NONEON-NOSVE-NEXT: .cfi_offset w28, -80
+; NONEON-NOSVE-NEXT: .cfi_offset w30, -88
+; NONEON-NOSVE-NEXT: .cfi_offset w29, -96
+; NONEON-NOSVE-NEXT: ldr q0, [x0, #16]
+; NONEON-NOSVE-NEXT: ldr q1, [x1, #16]
+; NONEON-NOSVE-NEXT: ldr q3, [x1]
+; NONEON-NOSVE-NEXT: ldr q2, [x0]
+; NONEON-NOSVE-NEXT: str x0, [sp, #216] // 8-byte Folded Spill
+; NONEON-NOSVE-NEXT: smov w8, v1.b[1]
+; NONEON-NOSVE-NEXT: smov w9, v0.b[1]
+; NONEON-NOSVE-NEXT: smov w4, v3.b[1]
+; NONEON-NOSVE-NEXT: smov w1, v2.b[1]
+; NONEON-NOSVE-NEXT: smov w7, v3.b[7]
+; NONEON-NOSVE-NEXT: smov w5, v2.b[7]
+; NONEON-NOSVE-NEXT: smov w6, v3.b[8]
+; NONEON-NOSVE-NEXT: smov w3, v2.b[8]
+; NONEON-NOSVE-NEXT: smov w22, v3.b[9]
+; NONEON-NOSVE-NEXT: smov w20, v2.b[9]
+; NONEON-NOSVE-NEXT: smov w13, v3.b[0]
+; NONEON-NOSVE-NEXT: smov w17, v3.b[3]
+; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
+; NONEON-NOSVE-NEXT: str w8, [sp, #100] // 4-byte Folded Spill
+; NONEON-NOSVE-NEXT: smov w8, v1.b[0]
+; NONEON-NOSVE-NEXT: str w9, [sp, #108] // 4-byte Folded Spill
+; NONEON-NOSVE-NEXT: smov w9, v0.b[0]
+; NONEON-NOSVE-NEXT: smov w14, v2.b[3]
+; NONEON-NOSVE-NEXT: smov w15, v3.b[4]
+; NONEON-NOSVE-NEXT: smov w12, v2.b[4]
+; NONEON-NOSVE-NEXT: smov w2, v3.b[5]
+; NONEON-NOSVE-NEXT: smov w18, v2.b[5]
+; NONEON-NOSVE-NEXT: smov w0, v3.b[6]
+; NONEON-NOSVE-NEXT: smov w16, v2.b[6]
+; NONEON-NOSVE-NEXT: smov w21, v3.b[10]
+; NONEON-NOSVE-NEXT: smov w19, v2.b[10]
+; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #36] // 8-byte Folded Spill
+; NONEON-NOSVE-NEXT: ldr w30, [sp, #36] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: str w10, [sp, #116] // 4-byte Folded Spill
+; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
+; NONEON-NOSVE-NEXT: smov w8, v1.b[2]
+; NONEON-NOSVE-NEXT: smov w9, v0.b[2]
+; NONEON-NOSVE-NEXT: stp w10, w8, [sp, #44] // 8-byte Folded Spill
+; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
+; NONEON-NOSVE-NEXT: smov w8, v1.b[3]
+; NONEON-NOSVE-NEXT: stp w9, w10, [sp, #52] // 8-byte Folded Spill
+; NONEON-NOSVE-NEXT: smov w9, v0.b[3]
+; NONEON-NOSVE-NEXT: sdiv w26, w14, w17
+; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #72] // 8-byte Folded Spill
+; NONEON-NOSVE-NEXT: sdiv w11, w9, w8
+; NONEON-NOSVE-NEXT: smov w8, v1.b[4]
+; NONEON-NOSVE-NEXT: smov w9, v0.b[4]
+; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #60] // 8-byte Folded Spill
+; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
+; NONEON-NOSVE-NEXT: smov w8, v1.b[5]
+; NONEON-NOSVE-NEXT: smov w9, v0.b[5]
+; NONEON-NOSVE-NEXT: str w8, [sp, #96] // 4-byte Folded Spill
+; NONEON-NOSVE-NEXT: str w9, [sp, #104] // 4-byte Folded Spill
+; NONEON-NOSVE-NEXT: str w10, [sp, #68] // 4-byte Folded Spill
+; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
+; NONEON-NOSVE-NEXT: smov w8, v1.b[6]
+; NONEON-NOSVE-NEXT: smov w9, v0.b[6]
+; NONEON-NOSVE-NEXT: stp w11, w8, [sp, #80] // 8-byte Folded Spill
+; NONEON-NOSVE-NEXT: str w10, [sp, #112] // 4-byte Folded Spill
+; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
+; NONEON-NOSVE-NEXT: smov w8, v1.b[7]
+; NONEON-NOSVE-NEXT: stp w9, w10, [sp, #88] // 8-byte Folded Spill
+; NONEON-NOSVE-NEXT: smov w9, v0.b[7]
+; NONEON-NOSVE-NEXT: sdiv w25, w12, w15
+; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #132] // 8-byte Folded Spill
+; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
+; NONEON-NOSVE-NEXT: smov w8, v1.b[8]
+; NONEON-NOSVE-NEXT: smov w9, v0.b[8]
+; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #120] // 8-byte Folded Spill
+; NONEON-NOSVE-NEXT: str w10, [sp, #140] // 4-byte Folded Spill
+; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
+; NONEON-NOSVE-NEXT: smov w8, v1.b[9]
+; NONEON-NOSVE-NEXT: smov w9, v0.b[9]
+; NONEON-NOSVE-NEXT: str w8, [sp, #148] // 4-byte Folded Spill
+; NONEON-NOSVE-NEXT: str w9, [sp, #156] // 4-byte Folded Spill
+; NONEON-NOSVE-NEXT: sdiv w11, w9, w8
+; NONEON-NOSVE-NEXT: smov w8, v1.b[10]
+; NONEON-NOSVE-NEXT: smov w9, v0.b[10]
+; NONEON-NOSVE-NEXT: str w10, [sp, #128] // 4-byte Folded Spill
+; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #204] // 8-byte Folded Spill
+; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
+; NONEON-NOSVE-NEXT: smov w8, v1.b[11]
+; NONEON-NOSVE-NEXT: smov w9, v0.b[11]
+; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #192] // 8-byte Folded Spill
+; NONEON-NOSVE-NEXT: str w10, [sp, #212] // 4-byte Folded Spill
+; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
+; NONEON-NOSVE-NEXT: smov w8, v1.b[12]
+; NONEON-NOSVE-NEXT: smov w9, v0.b[12]
+; NONEON-NOSVE-NEXT: str w8, [sp, #172] // 4-byte Folded Spill
+; NONEON-NOSVE-NEXT: str w9, [sp, #180] // 4-byte Folded Spill
+; NONEON-NOSVE-NEXT: str w10, [sp, #200] // 4-byte Folded Spill
+; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
+; NONEON-NOSVE-NEXT: smov w8, v1.b[13]
+; NONEON-NOSVE-NEXT: smov w9, v0.b[13]
+; NONEON-NOSVE-NEXT: stp w11, w8, [sp, #164] // 8-byte Folded Spill
+; NONEON-NOSVE-NEXT: smov w11, v3.b[2]
+; NONEON-NOSVE-NEXT: str w9, [sp, #176] // 4-byte Folded Spill
+; NONEON-NOSVE-NEXT: str w10, [sp, #188] // 4-byte Folded Spill
+; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
+; NONEON-NOSVE-NEXT: smov w8, v1.b[14]
+; NONEON-NOSVE-NEXT: smov w9, v0.b[14]
+; NONEON-NOSVE-NEXT: str w8, [sp, #144] // 4-byte Folded Spill
+; NONEON-NOSVE-NEXT: str w9, [sp, #152] // 4-byte Folded Spill
+; NONEON-NOSVE-NEXT: str w10, [sp, #184] // 4-byte Folded Spill
+; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
+; NONEON-NOSVE-NEXT: smov w9, v2.b[2]
+; NONEON-NOSVE-NEXT: sdiv w8, w1, w4
+; NONEON-NOSVE-NEXT: str w10, [sp, #160] // 4-byte Folded Spill
+; NONEON-NOSVE-NEXT: smov w10, v2.b[0]
+; NONEON-NOSVE-NEXT: str w8, [sp, #24] // 4-byte Folded Spill
+; NONEON-NOSVE-NEXT: sdiv w8, w5, w7
+; NONEON-NOSVE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill
+; NONEON-NOSVE-NEXT: sdiv w8, w3, w6
+; NONEON-NOSVE-NEXT: str w8, [sp, #20] // 4-byte Folded Spill
+; NONEON-NOSVE-NEXT: sdiv w8, w20, w22
+; NONEON-NOSVE-NEXT: sdiv w24, w10, w13
+; NONEON-NOSVE-NEXT: str w8, [sp, #32] // 4-byte Folded Spill
+; NONEON-NOSVE-NEXT: ldp w29, w8, [sp, #40] // 8-byte Folded Reload
+; NONEON-NOSVE-NEXT: msub w8, w8, w30, w29
+; NONEON-NOSVE-NEXT: ldp x29, x30, [sp, #224] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT: fmov s4, w8
+; NONEON-NOSVE-NEXT: sdiv w23, w9, w11
+; NONEON-NOSVE-NEXT: msub w10, w24, w13, w10
+; NONEON-NOSVE-NEXT: ldr w13, [sp, #24] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: ldr w24, [sp, #100] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: msub w13, w13, w4, w1
+; NONEON-NOSVE-NEXT: ldr w1, [sp, #116] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: ldr w4, [sp, #108] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: fmov s5, w10
+; NONEON-NOSVE-NEXT: msub w1, w1, w24, w4
+; NONEON-NOSVE-NEXT: mov v5.b[1], w13
+; NONEON-NOSVE-NEXT: mov v4.b[1], w1
+; NONEON-NOSVE-NEXT: ldr w1, [sp, #120] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: msub w8, w23, w11, w9
+; NONEON-NOSVE-NEXT: ldr w11, [sp, #48] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: sdiv w28, w18, w2
+; NONEON-NOSVE-NEXT: ldp w10, w9, [sp, #52] // 8-byte Folded Reload
+; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #272] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT: mov v5.b[2], w8
+; NONEON-NOSVE-NEXT: msub w8, w26, w17, w14
+; NONEON-NOSVE-NEXT: ldr w14, [sp, #72] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: msub w9, w9, w11, w10
+; NONEON-NOSVE-NEXT: ldr w17, [sp, #96] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: smov w10, v3.b[11]
+; NONEON-NOSVE-NEXT: smov w11, v2.b[11]
+; NONEON-NOSVE-NEXT: mov v4.b[2], w9
+; NONEON-NOSVE-NEXT: mov v5.b[3], w8
+; NONEON-NOSVE-NEXT: msub w8, w25, w15, w12
+; NONEON-NOSVE-NEXT: ldp w13, w9, [sp, #76] // 8-byte Folded Reload
+; NONEON-NOSVE-NEXT: sdiv w27, w16, w0
+; NONEON-NOSVE-NEXT: ldr w15, [sp, #104] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #256] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT: msub w9, w9, w14, w13
+; NONEON-NOSVE-NEXT: ldr w14, [sp, #60] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: mov v5.b[4], w8
+; NONEON-NOSVE-NEXT: msub w8, w28, w2, w18
+; NONEON-NOSVE-NEXT: ldr w2, [sp, #156] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: mov v4.b[3], w9
+; NONEON-NOSVE-NEXT: ldp w12, w9, [sp, #64] // 8-byte Folded Reload
+; NONEON-NOSVE-NEXT: mov v5.b[5], w8
+; NONEON-NOSVE-NEXT: msub w8, w27, w0, w16
+; NONEON-NOSVE-NEXT: ldr w0, [sp, #132] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: sdiv w4, w19, w21
+; NONEON-NOSVE-NEXT: msub w9, w9, w14, w12
+; NONEON-NOSVE-NEXT: smov w12, v3.b[12]
+; NONEON-NOSVE-NEXT: smov w14, v2.b[12]
+; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #240] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT: mov v5.b[6], w8
+; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: mov v4.b[4], w9
+; NONEON-NOSVE-NEXT: ldr w9, [sp, #112] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: msub w8, w8, w7, w5
+; NONEON-NOSVE-NEXT: ldr w5, [sp, #204] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: msub w9, w9, w17, w15
+; NONEON-NOSVE-NEXT: ldr w17, [sp, #84] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: mov v5.b[7], w8
+; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: sdiv w13, w11, w10
+; NONEON-NOSVE-NEXT: mov v4.b[5], w9
+; NONEON-NOSVE-NEXT: ldp w16, w9, [sp, #88] // 8-byte Folded Reload
+; NONEON-NOSVE-NEXT: msub w8, w8, w6, w3
+; NONEON-NOSVE-NEXT: ldr w3, [sp, #148] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: msub w9, w9, w17, w16
+; NONEON-NOSVE-NEXT: smov w16, v3.b[13]
+; NONEON-NOSVE-NEXT: smov w17, v2.b[13]
+; NONEON-NOSVE-NEXT: mov v5.b[8], w8
+; NONEON-NOSVE-NEXT: ldr w8, [sp, #32] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: mov v4.b[6], w9
+; NONEON-NOSVE-NEXT: msub w8, w8, w22, w20
+; NONEON-NOSVE-NEXT: sdiv w15, w14, w12
+; NONEON-NOSVE-NEXT: ldp w18, w9, [sp, #136] // 8-byte Folded Reload
+; NONEON-NOSVE-NEXT: mov v5.b[9], w8
+; NONEON-NOSVE-NEXT: msub w8, w4, w21, w19
+; NONEON-NOSVE-NEXT: msub w9, w9, w0, w18
+; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #304] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #288] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT: mov v4.b[7], w9
+; NONEON-NOSVE-NEXT: mov v5.b[10], w8
+; NONEON-NOSVE-NEXT: msub w8, w13, w10, w11
+; NONEON-NOSVE-NEXT: ldp w0, w9, [sp, #124] // 8-byte Folded Reload
+; NONEON-NOSVE-NEXT: ldp w11, w10, [sp, #196] // 8-byte Folded Reload
+; NONEON-NOSVE-NEXT: ldr w13, [sp, #192] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: sdiv w18, w17, w16
+; NONEON-NOSVE-NEXT: msub w9, w9, w1, w0
+; NONEON-NOSVE-NEXT: mov v5.b[11], w8
+; NONEON-NOSVE-NEXT: smov w0, v3.b[14]
+; NONEON-NOSVE-NEXT: msub w10, w10, w13, w11
+; NONEON-NOSVE-NEXT: smov w1, v2.b[14]
+; NONEON-NOSVE-NEXT: msub w8, w15, w12, w14
+; NONEON-NOSVE-NEXT: mov v4.b[8], w9
+; NONEON-NOSVE-NEXT: ldr w9, [sp, #164] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: ldp w15, w13, [sp, #168] // 8-byte Folded Reload
+; NONEON-NOSVE-NEXT: msub w9, w9, w3, w2
+; NONEON-NOSVE-NEXT: mov v5.b[12], w8
+; NONEON-NOSVE-NEXT: ldp w4, w3, [sp, #208] // 8-byte Folded Reload
+; NONEON-NOSVE-NEXT: ldp w14, w12, [sp, #176] // 8-byte Folded Reload
+; NONEON-NOSVE-NEXT: mov v4.b[9], w9
+; NONEON-NOSVE-NEXT: sdiv w2, w1, w0
+; NONEON-NOSVE-NEXT: smov w9, v3.b[15]
+; NONEON-NOSVE-NEXT: msub w3, w3, w5, w4
+; NONEON-NOSVE-NEXT: smov w4, v2.b[15]
+; NONEON-NOSVE-NEXT: msub w8, w18, w16, w17
+; NONEON-NOSVE-NEXT: ldr w16, [sp, #144] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: mov v4.b[10], w3
+; NONEON-NOSVE-NEXT: mov v5.b[13], w8
+; NONEON-NOSVE-NEXT: mov v4.b[11], w10
+; NONEON-NOSVE-NEXT: ldr w10, [sp, #188] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: sdiv w11, w4, w9
+; NONEON-NOSVE-NEXT: msub w8, w2, w0, w1
+; NONEON-NOSVE-NEXT: msub w10, w10, w13, w12
+; NONEON-NOSVE-NEXT: smov w12, v1.b[15]
+; NONEON-NOSVE-NEXT: smov w13, v0.b[15]
+; NONEON-NOSVE-NEXT: mov v5.b[14], w8
+; NONEON-NOSVE-NEXT: mov v4.b[12], w10
+; NONEON-NOSVE-NEXT: ldr w10, [sp, #184] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: msub w10, w10, w15, w14
+; NONEON-NOSVE-NEXT: ldr w15, [sp, #152] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: sdiv w14, w13, w12
+; NONEON-NOSVE-NEXT: msub w8, w11, w9, w4
+; NONEON-NOSVE-NEXT: mov v4.b[13], w10
+; NONEON-NOSVE-NEXT: ldr w10, [sp, #160] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: mov v5.b[15], w8
+; NONEON-NOSVE-NEXT: ldr x8, [sp, #216] // 8-byte Folded Reload
+; NONEON-NOSVE-NEXT: msub w10, w10, w16, w15
+; NONEON-NOSVE-NEXT: mov v4.b[14], w10
+; NONEON-NOSVE-NEXT: msub w9, w14, w12, w13
+; NONEON-NOSVE-NEXT: mov v4.b[15], w9
+; NONEON-NOSVE-NEXT: stp q5, q4, [x8]
+; NONEON-NOSVE-NEXT: add sp, sp, #320
+; NONEON-NOSVE-NEXT: ret
%op1 = load <32 x i8>, ptr %a
%op2 = load <32 x i8>, ptr %b
%res = srem <32 x i8> %op1, %op2
@@ -210,6 +666,33 @@ define <4 x i16> @srem_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
; CHECK-NEXT: mls z0.h, p0/m, z2.h, z1.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: srem_v4i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; NONEON-NOSVE-NEXT: smov w11, v1.h[0]
+; NONEON-NOSVE-NEXT: smov w12, v0.h[0]
+; NONEON-NOSVE-NEXT: smov w8, v1.h[1]
+; NONEON-NOSVE-NEXT: smov w9, v0.h[1]
+; NONEON-NOSVE-NEXT: smov w14, v1.h[2]
+; NONEON-NOSVE-NEXT: smov w15, v0.h[2]
+; NONEON-NOSVE-NEXT: smov w17, v1.h[3]
+; NONEON-NOSVE-NEXT: smov w18, v0.h[3]
+; NONEON-NOSVE-NEXT: sdiv w13, w12, w11
+; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
+; NONEON-NOSVE-NEXT: msub w11, w13, w11, w12
+; NONEON-NOSVE-NEXT: fmov s0, w11
+; NONEON-NOSVE-NEXT: sdiv w16, w15, w14
+; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
+; NONEON-NOSVE-NEXT: mov v0.h[1], w8
+; NONEON-NOSVE-NEXT: sdiv w9, w18, w17
+; NONEON-NOSVE-NEXT: msub w8, w16, w14, w15
+; NONEON-NOSVE-NEXT: mov v0.h[2], w8
+; NONEON-NOSVE-NEXT: msub w8, w9, w17, w18
+; NONEON-NOSVE-NEXT: mov v0.h[3], w8
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0
+; NONEON-NOSVE-NEXT: ret
%res = srem <4 x i16> %op1, %op2
ret <4 x i16> %res
}
@@ -238,6 +721,51 @@ define <8 x i16> @srem_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
; CHECK-NEXT: mls z0.h, p0/m, z3.h, z1.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: srem_v8i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: smov w11, v1.h[0]
+; NONEON-NOSVE-NEXT: smov w12, v0.h[0]
+; NONEON-NOSVE-NEXT: smov w8, v1.h[1]
+; NONEON-NOSVE-NEXT: smov w9, v0.h[1]
+; NONEON-NOSVE-NEXT: smov w14, v1.h[2]
+; NONEON-NOSVE-NEXT: smov w15, v0.h[2]
+; NONEON-NOSVE-NEXT: smov w17, v1.h[3]
+; NONEON-NOSVE-NEXT: smov w18, v0.h[3]
+; NONEON-NOSVE-NEXT: smov w1, v1.h[4]
+; NONEON-NOSVE-NEXT: smov w2, v0.h[4]
+; NONEON-NOSVE-NEXT: smov w4, v1.h[5]
+; NONEON-NOSVE-NEXT: smov w5, v0.h[5]
+; NONEON-NOSVE-NEXT: sdiv w13, w12, w11
+; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
+; NONEON-NOSVE-NEXT: msub w11, w13, w11, w12
+; NONEON-NOSVE-NEXT: smov w13, v1.h[7]
+; NONEON-NOSVE-NEXT: fmov s2, w11
+; NONEON-NOSVE-NEXT: smov w11, v0.h[6]
+; NONEON-NOSVE-NEXT: sdiv w16, w15, w14
+; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
+; NONEON-NOSVE-NEXT: smov w10, v1.h[6]
+; NONEON-NOSVE-NEXT: mov v2.h[1], w8
+; NONEON-NOSVE-NEXT: sdiv w0, w18, w17
+; NONEON-NOSVE-NEXT: msub w8, w16, w14, w15
+; NONEON-NOSVE-NEXT: smov w14, v0.h[7]
+; NONEON-NOSVE-NEXT: mov v2.h[2], w8
+; NONEON-NOSVE-NEXT: sdiv w3, w2, w1
+; NONEON-NOSVE-NEXT: msub w8, w0, w17, w18
+; NONEON-NOSVE-NEXT: mov v2.h[3], w8
+; NONEON-NOSVE-NEXT: sdiv w9, w5, w4
+; NONEON-NOSVE-NEXT: msub w8, w3, w1, w2
+; NONEON-NOSVE-NEXT: mov v2.h[4], w8
+; NONEON-NOSVE-NEXT: sdiv w12, w11, w10
+; NONEON-NOSVE-NEXT: msub w8, w9, w4, w5
+; NONEON-NOSVE-NEXT: mov v2.h[5], w8
+; NONEON-NOSVE-NEXT: sdiv w9, w14, w13
+; NONEON-NOSVE-NEXT: msub w8, w12, w10, w11
+; NONEON-NOSVE-NEXT: mov v2.h[6], w8
+; NONEON-NOSVE-NEXT: msub w8, w9, w13, w14
+; NONEON-NOSVE-NEXT: mov v2.h[7], w8
+; NONEON-NOSVE-NEXT: mov v0.16b, v2.16b
+; NONEON-NOSVE-NEXT: ret
%res = srem <8 x i16> %op1, %op2
ret <8 x i16> %res
}
@@ -282,6 +810,139 @@ define void @srem_v16i16(ptr %a, ptr %b) {
; CHECK-NEXT: mls z0.h, p0/m, z7.h, z1.h
; CHECK-NEXT: stp q2, q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: srem_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: sub sp, sp, #144
+; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #48] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #64] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #80] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #96] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #112] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #128] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 144
+; NONEON-NOSVE-NEXT: .cfi_offset w19, -8
+; NONEON-NOSVE-NEXT: .cfi_offset w20, -16
+; NONEON-NOSVE-NEXT: .cfi_offset w21, -24
+; NONEON-NOSVE-NEXT: .cfi_offset w22, -32
+; NONEON-NOSVE-NEXT: .cfi_offset w23, -40
+; NONEON-NOSVE-NEXT: .cfi_offset w24, -48
+; NONEON-NOSVE-NEXT: .cfi_offset w25, -56
+; NONEON-NOSVE-NEXT: .cfi_offset w26, -64
+; NONEON-NOSVE-NEXT: .cfi_offset w27, -72
+; NONEON-NOSVE-NEXT: .cfi_offset w28, -80
+; NONEON-NOSVE-NEXT: .cfi_offset w30, -88
+; NONEON-NOSVE-NEXT: .cfi_offset w29, -96
+; NONEON-NOSVE-NEXT: ldr q0, [x0, #16]
+; NONEON-NOSVE-NEXT: ldr q1, [x1, #16]
+; NONEON-NOSVE-NEXT: ldr q2, [x0]
+; NONEON-NOSVE-NEXT: ldr q3, [x1]
+; NONEON-NOSVE-NEXT: smov w8, v1.h[1]
+; NONEON-NOSVE-NEXT: smov w9, v0.h[1]
+; NONEON-NOSVE-NEXT: smov w20, v1.h[0]
+; NONEON-NOSVE-NEXT: smov w21, v0.h[0]
+; NONEON-NOSVE-NEXT: smov w19, v0.h[3]
+; NONEON-NOSVE-NEXT: smov w5, v1.h[4]
+; NONEON-NOSVE-NEXT: smov w2, v0.h[4]
+; NONEON-NOSVE-NEXT: smov w1, v3.h[1]
+; NONEON-NOSVE-NEXT: smov w23, v2.h[1]
+; NONEON-NOSVE-NEXT: smov w25, v3.h[0]
+; NONEON-NOSVE-NEXT: smov w26, v2.h[0]
+; NONEON-NOSVE-NEXT: smov w6, v1.h[5]
+; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
+; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #36] // 8-byte Folded Spill
+; NONEON-NOSVE-NEXT: smov w8, v1.h[2]
+; NONEON-NOSVE-NEXT: smov w9, v0.h[2]
+; NONEON-NOSVE-NEXT: smov w3, v0.h[5]
+; NONEON-NOSVE-NEXT: smov w4, v1.h[6]
+; NONEON-NOSVE-NEXT: smov w7, v0.h[6]
+; NONEON-NOSVE-NEXT: smov w28, v3.h[2]
+; NONEON-NOSVE-NEXT: smov w29, v2.h[2]
+; NONEON-NOSVE-NEXT: smov w15, v3.h[3]
+; NONEON-NOSVE-NEXT: smov w13, v2.h[3]
+; NONEON-NOSVE-NEXT: smov w12, v3.h[4]
+; NONEON-NOSVE-NEXT: smov w14, v3.h[5]
+; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] // 8-byte Folded Spill
+; NONEON-NOSVE-NEXT: sdiv w11, w21, w20
+; NONEON-NOSVE-NEXT: str w10, [sp, #44] // 4-byte Folded Spill
+; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
+; NONEON-NOSVE-NEXT: smov w8, v1.h[3]
+; NONEON-NOSVE-NEXT: stp w8, w11, [sp] // 8-byte Folded Spill
+; NONEON-NOSVE-NEXT: smov w11, v2.h[4]
+; NONEON-NOSVE-NEXT: ldr w22, [sp, #4] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: msub w20, w22, w20, w21
+; NONEON-NOSVE-NEXT: sdiv w9, w19, w8
+; NONEON-NOSVE-NEXT: str w10, [sp, #32] // 4-byte Folded Spill
+; NONEON-NOSVE-NEXT: smov w10, v3.h[6]
+; NONEON-NOSVE-NEXT: fmov s5, w20
+; NONEON-NOSVE-NEXT: smov w20, v3.h[7]
+; NONEON-NOSVE-NEXT: sdiv w8, w2, w5
+; NONEON-NOSVE-NEXT: sdiv w24, w23, w1
+; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #16] // 8-byte Folded Spill
+; NONEON-NOSVE-NEXT: sdiv w27, w26, w25
+; NONEON-NOSVE-NEXT: msub w1, w24, w1, w23
+; NONEON-NOSVE-NEXT: ldp w24, w23, [sp, #40] // 8-byte Folded Reload
+; NONEON-NOSVE-NEXT: sdiv w9, w3, w6
+; NONEON-NOSVE-NEXT: msub w21, w27, w25, w26
+; NONEON-NOSVE-NEXT: ldr w25, [sp, #36] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: msub w23, w23, w25, w24
+; NONEON-NOSVE-NEXT: ldr w25, [sp, #24] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: fmov s4, w21
+; NONEON-NOSVE-NEXT: mov v5.h[1], w23
+; NONEON-NOSVE-NEXT: ldp w23, w21, [sp, #28] // 8-byte Folded Reload
+; NONEON-NOSVE-NEXT: mov v4.h[1], w1
+; NONEON-NOSVE-NEXT: sdiv w8, w7, w4
+; NONEON-NOSVE-NEXT: msub w21, w21, w25, w23
+; NONEON-NOSVE-NEXT: smov w23, v2.h[7]
+; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #80] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT: mov v5.h[2], w21
+; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #112] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT: sdiv w30, w29, w28
+; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #8] // 8-byte Folded Spill
+; NONEON-NOSVE-NEXT: smov w9, v2.h[5]
+; NONEON-NOSVE-NEXT: smov w8, v2.h[6]
+; NONEON-NOSVE-NEXT: sdiv w18, w13, w15
+; NONEON-NOSVE-NEXT: msub w1, w30, w28, w29
+; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #64] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT: ldp x29, x30, [sp, #48] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT: mov v4.h[2], w1
+; NONEON-NOSVE-NEXT: sdiv w16, w11, w12
+; NONEON-NOSVE-NEXT: msub w13, w18, w15, w13
+; NONEON-NOSVE-NEXT: ldr w15, [sp, #20] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: ldr w18, [sp] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: msub w15, w15, w18, w19
+; NONEON-NOSVE-NEXT: mov v4.h[3], w13
+; NONEON-NOSVE-NEXT: smov w13, v1.h[7]
+; NONEON-NOSVE-NEXT: mov v5.h[3], w15
+; NONEON-NOSVE-NEXT: smov w15, v0.h[7]
+; NONEON-NOSVE-NEXT: sdiv w17, w9, w14
+; NONEON-NOSVE-NEXT: msub w11, w16, w12, w11
+; NONEON-NOSVE-NEXT: ldr w12, [sp, #16] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: msub w12, w12, w5, w2
+; NONEON-NOSVE-NEXT: mov v4.h[4], w11
+; NONEON-NOSVE-NEXT: ldr w11, [sp, #12] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: mov v5.h[4], w12
+; NONEON-NOSVE-NEXT: msub w11, w11, w6, w3
+; NONEON-NOSVE-NEXT: sdiv w24, w8, w10
+; NONEON-NOSVE-NEXT: msub w9, w17, w14, w9
+; NONEON-NOSVE-NEXT: mov v5.h[5], w11
+; NONEON-NOSVE-NEXT: mov v4.h[5], w9
+; NONEON-NOSVE-NEXT: ldr w9, [sp, #8] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: msub w9, w9, w4, w7
+; NONEON-NOSVE-NEXT: sdiv w18, w23, w20
+; NONEON-NOSVE-NEXT: msub w8, w24, w10, w8
+; NONEON-NOSVE-NEXT: mov v5.h[6], w9
+; NONEON-NOSVE-NEXT: mov v4.h[6], w8
+; NONEON-NOSVE-NEXT: sdiv w12, w15, w13
+; NONEON-NOSVE-NEXT: msub w8, w18, w20, w23
+; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #128] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #96] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT: mov v4.h[7], w8
+; NONEON-NOSVE-NEXT: msub w9, w12, w13, w15
+; NONEON-NOSVE-NEXT: mov v5.h[7], w9
+; NONEON-NOSVE-NEXT: stp q4, q5, [x0]
+; NONEON-NOSVE-NEXT: add sp, sp, #144
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x i16>, ptr %a
%op2 = load <16 x i16>, ptr %b
%res = srem <16 x i16> %op1, %op2
@@ -300,6 +961,23 @@ define <2 x i32> @srem_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
; CHECK-NEXT: mls z0.s, p0/m, z2.s, z1.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: srem_v2i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; NONEON-NOSVE-NEXT: fmov w8, s1
+; NONEON-NOSVE-NEXT: fmov w9, s0
+; NONEON-NOSVE-NEXT: mov w11, v1.s[1]
+; NONEON-NOSVE-NEXT: mov w12, v0.s[1]
+; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
+; NONEON-NOSVE-NEXT: sdiv w13, w12, w11
+; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
+; NONEON-NOSVE-NEXT: fmov s0, w8
+; NONEON-NOSVE-NEXT: msub w9, w13, w11, w12
+; NONEON-NOSVE-NEXT: mov v0.s[1], w9
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0
+; NONEON-NOSVE-NEXT: ret
%res = srem <2 x i32> %op1, %op2
ret <2 x i32> %res
}
@@ -315,6 +993,30 @@ define <4 x i32> @srem_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
; CHECK-NEXT: mls z0.s, p0/m, z2.s, z1.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: srem_v4i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fmov w11, s1
+; NONEON-NOSVE-NEXT: fmov w12, s0
+; NONEON-NOSVE-NEXT: mov w8, v1.s[1]
+; NONEON-NOSVE-NEXT: mov w9, v0.s[1]
+; NONEON-NOSVE-NEXT: mov w14, v1.s[2]
+; NONEON-NOSVE-NEXT: mov w15, v0.s[2]
+; NONEON-NOSVE-NEXT: mov w17, v1.s[3]
+; NONEON-NOSVE-NEXT: mov w18, v0.s[3]
+; NONEON-NOSVE-NEXT: sdiv w13, w12, w11
+; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
+; NONEON-NOSVE-NEXT: msub w11, w13, w11, w12
+; NONEON-NOSVE-NEXT: fmov s0, w11
+; NONEON-NOSVE-NEXT: sdiv w16, w15, w14
+; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
+; NONEON-NOSVE-NEXT: mov v0.s[1], w8
+; NONEON-NOSVE-NEXT: sdiv w9, w18, w17
+; NONEON-NOSVE-NEXT: msub w8, w16, w14, w15
+; NONEON-NOSVE-NEXT: mov v0.s[2], w8
+; NONEON-NOSVE-NEXT: msub w8, w9, w17, w18
+; NONEON-NOSVE-NEXT: mov v0.s[3], w8
+; NONEON-NOSVE-NEXT: ret
%res = srem <4 x i32> %op1, %op2
ret <4 x i32> %res
}
@@ -334,6 +1036,65 @@ define void @srem_v8i32(ptr %a, ptr %b) {
; CHECK-NEXT: mls z1.s, p0/m, z5.s, z3.s
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: srem_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: str x23, [sp, #-48]! // 8-byte Folded Spill
+; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
+; NONEON-NOSVE-NEXT: .cfi_offset w19, -8
+; NONEON-NOSVE-NEXT: .cfi_offset w20, -16
+; NONEON-NOSVE-NEXT: .cfi_offset w21, -24
+; NONEON-NOSVE-NEXT: .cfi_offset w22, -32
+; NONEON-NOSVE-NEXT: .cfi_offset w23, -48
+; NONEON-NOSVE-NEXT: ldp q2, q0, [x0]
+; NONEON-NOSVE-NEXT: ldp q3, q1, [x1]
+; NONEON-NOSVE-NEXT: fmov w12, s0
+; NONEON-NOSVE-NEXT: fmov w3, s2
+; NONEON-NOSVE-NEXT: mov w9, v0.s[1]
+; NONEON-NOSVE-NEXT: fmov w11, s1
+; NONEON-NOSVE-NEXT: fmov w2, s3
+; NONEON-NOSVE-NEXT: mov w8, v1.s[1]
+; NONEON-NOSVE-NEXT: mov w17, v3.s[1]
+; NONEON-NOSVE-NEXT: mov w18, v2.s[1]
+; NONEON-NOSVE-NEXT: mov w14, v1.s[2]
+; NONEON-NOSVE-NEXT: mov w15, v0.s[2]
+; NONEON-NOSVE-NEXT: mov w5, v3.s[2]
+; NONEON-NOSVE-NEXT: mov w6, v2.s[2]
+; NONEON-NOSVE-NEXT: sdiv w13, w12, w11
+; NONEON-NOSVE-NEXT: mov w19, v3.s[3]
+; NONEON-NOSVE-NEXT: mov w20, v2.s[3]
+; NONEON-NOSVE-NEXT: mov w22, v1.s[3]
+; NONEON-NOSVE-NEXT: mov w23, v0.s[3]
+; NONEON-NOSVE-NEXT: sdiv w4, w3, w2
+; NONEON-NOSVE-NEXT: msub w11, w13, w11, w12
+; NONEON-NOSVE-NEXT: fmov s1, w11
+; NONEON-NOSVE-NEXT: sdiv w10, w9, w8
+; NONEON-NOSVE-NEXT: msub w12, w4, w2, w3
+; NONEON-NOSVE-NEXT: fmov s0, w12
+; NONEON-NOSVE-NEXT: sdiv w1, w18, w17
+; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
+; NONEON-NOSVE-NEXT: mov v1.s[1], w8
+; NONEON-NOSVE-NEXT: sdiv w16, w15, w14
+; NONEON-NOSVE-NEXT: msub w13, w1, w17, w18
+; NONEON-NOSVE-NEXT: mov v0.s[1], w13
+; NONEON-NOSVE-NEXT: sdiv w7, w6, w5
+; NONEON-NOSVE-NEXT: msub w8, w16, w14, w15
+; NONEON-NOSVE-NEXT: mov v1.s[2], w8
+; NONEON-NOSVE-NEXT: sdiv w21, w20, w19
+; NONEON-NOSVE-NEXT: msub w10, w7, w5, w6
+; NONEON-NOSVE-NEXT: mov v0.s[2], w10
+; NONEON-NOSVE-NEXT: sdiv w9, w23, w22
+; NONEON-NOSVE-NEXT: msub w10, w21, w19, w20
+; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT: mov v0.s[3], w10
+; NONEON-NOSVE-NEXT: msub w8, w9, w22, w23
+; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT: mov v1.s[3], w8
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ldr x23, [sp], #48 // 8-byte Folded Reload
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x i32>, ptr %a
%op2 = load <8 x i32>, ptr %b
%res = srem <8 x i32> %op1, %op2
@@ -352,6 +1113,17 @@ define <1 x i64> @srem_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
; CHECK-NEXT: mls z0.d, p0/m, z2.d, z1.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: srem_v1i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; NONEON-NOSVE-NEXT: fmov x8, d1
+; NONEON-NOSVE-NEXT: fmov x9, d0
+; NONEON-NOSVE-NEXT: sdiv x10, x9, x8
+; NONEON-NOSVE-NEXT: msub x8, x10, x8, x9
+; NONEON-NOSVE-NEXT: fmov d0, x8
+; NONEON-NOSVE-NEXT: ret
%res = srem <1 x i64> %op1, %op2
ret <1 x i64> %res
}
@@ -367,6 +1139,20 @@ define <2 x i64> @srem_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
; CHECK-NEXT: mls z0.d, p0/m, z2.d, z1.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: srem_v2i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fmov x8, d1
+; NONEON-NOSVE-NEXT: fmov x9, d0
+; NONEON-NOSVE-NEXT: mov x11, v1.d[1]
+; NONEON-NOSVE-NEXT: mov x12, v0.d[1]
+; NONEON-NOSVE-NEXT: sdiv x10, x9, x8
+; NONEON-NOSVE-NEXT: sdiv x13, x12, x11
+; NONEON-NOSVE-NEXT: msub x8, x10, x8, x9
+; NONEON-NOSVE-NEXT: fmov d0, x8
+; NONEON-NOSVE-NEXT: msub x9, x13, x11, x12
+; NONEON-NOSVE-NEXT: mov v0.d[1], x9
+; NONEON-NOSVE-NEXT: ret
%res = srem <2 x i64> %op1, %op2
ret <2 x i64> %res
}
@@ -386,6 +1172,33 @@ define void @srem_v4i64(ptr %a, ptr %b) {
; CHECK-NEXT: mls z1.d, p0/m, z5.d, z3.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: srem_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q2, q0, [x0]
+; NONEON-NOSVE-NEXT: ldp q3, q1, [x1]
+; NONEON-NOSVE-NEXT: fmov x9, d0
+; NONEON-NOSVE-NEXT: fmov x15, d2
+; NONEON-NOSVE-NEXT: mov x12, v2.d[1]
+; NONEON-NOSVE-NEXT: fmov x8, d1
+; NONEON-NOSVE-NEXT: fmov x14, d3
+; NONEON-NOSVE-NEXT: mov x11, v3.d[1]
+; NONEON-NOSVE-NEXT: mov x17, v1.d[1]
+; NONEON-NOSVE-NEXT: mov x18, v0.d[1]
+; NONEON-NOSVE-NEXT: sdiv x10, x9, x8
+; NONEON-NOSVE-NEXT: sdiv x16, x15, x14
+; NONEON-NOSVE-NEXT: msub x8, x10, x8, x9
+; NONEON-NOSVE-NEXT: fmov d1, x8
+; NONEON-NOSVE-NEXT: sdiv x13, x12, x11
+; NONEON-NOSVE-NEXT: msub x10, x16, x14, x15
+; NONEON-NOSVE-NEXT: fmov d0, x10
+; NONEON-NOSVE-NEXT: sdiv x1, x18, x17
+; NONEON-NOSVE-NEXT: msub x9, x13, x11, x12
+; NONEON-NOSVE-NEXT: mov v0.d[1], x9
+; NONEON-NOSVE-NEXT: msub x11, x1, x17, x18
+; NONEON-NOSVE-NEXT: mov v1.d[1], x11
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x i64>, ptr %a
%op2 = load <4 x i64>, ptr %b
%res = srem <4 x i64> %op1, %op2
@@ -413,6 +1226,41 @@ define <4 x i8> @urem_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
; CHECK-NEXT: mls z0.h, p0/m, z2.h, z1.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: urem_v4i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; NONEON-NOSVE-NEXT: umov w11, v1.h[0]
+; NONEON-NOSVE-NEXT: umov w12, v0.h[0]
+; NONEON-NOSVE-NEXT: umov w8, v1.h[1]
+; NONEON-NOSVE-NEXT: umov w9, v0.h[1]
+; NONEON-NOSVE-NEXT: umov w14, v1.h[2]
+; NONEON-NOSVE-NEXT: umov w15, v0.h[2]
+; NONEON-NOSVE-NEXT: umov w17, v1.h[3]
+; NONEON-NOSVE-NEXT: umov w18, v0.h[3]
+; NONEON-NOSVE-NEXT: and w11, w11, #0xff
+; NONEON-NOSVE-NEXT: and w12, w12, #0xff
+; NONEON-NOSVE-NEXT: and w8, w8, #0xff
+; NONEON-NOSVE-NEXT: udiv w13, w12, w11
+; NONEON-NOSVE-NEXT: and w9, w9, #0xff
+; NONEON-NOSVE-NEXT: and w14, w14, #0xff
+; NONEON-NOSVE-NEXT: and w15, w15, #0xff
+; NONEON-NOSVE-NEXT: udiv w10, w9, w8
+; NONEON-NOSVE-NEXT: msub w11, w13, w11, w12
+; NONEON-NOSVE-NEXT: and w12, w17, #0xff
+; NONEON-NOSVE-NEXT: and w13, w18, #0xff
+; NONEON-NOSVE-NEXT: fmov s0, w11
+; NONEON-NOSVE-NEXT: udiv w16, w15, w14
+; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
+; NONEON-NOSVE-NEXT: mov v0.h[1], w8
+; NONEON-NOSVE-NEXT: udiv w9, w13, w12
+; NONEON-NOSVE-NEXT: msub w8, w16, w14, w15
+; NONEON-NOSVE-NEXT: mov v0.h[2], w8
+; NONEON-NOSVE-NEXT: msub w8, w9, w12, w13
+; NONEON-NOSVE-NEXT: mov v0.h[3], w8
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0
+; NONEON-NOSVE-NEXT: ret
%res = urem <4 x i8> %op1, %op2
ret <4 x i8> %res
}
@@ -442,6 +1290,53 @@ define <8 x i8> @urem_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
; CHECK-NEXT: mls z0.b, p0/m, z2.b, z1.b
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: urem_v8i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; NONEON-NOSVE-NEXT: umov w11, v1.b[0]
+; NONEON-NOSVE-NEXT: umov w12, v0.b[0]
+; NONEON-NOSVE-NEXT: umov w8, v1.b[1]
+; NONEON-NOSVE-NEXT: umov w9, v0.b[1]
+; NONEON-NOSVE-NEXT: umov w14, v1.b[2]
+; NONEON-NOSVE-NEXT: umov w15, v0.b[2]
+; NONEON-NOSVE-NEXT: umov w17, v1.b[3]
+; NONEON-NOSVE-NEXT: umov w18, v0.b[3]
+; NONEON-NOSVE-NEXT: umov w1, v1.b[4]
+; NONEON-NOSVE-NEXT: umov w2, v0.b[4]
+; NONEON-NOSVE-NEXT: umov w4, v1.b[5]
+; NONEON-NOSVE-NEXT: umov w5, v0.b[5]
+; NONEON-NOSVE-NEXT: udiv w13, w12, w11
+; NONEON-NOSVE-NEXT: udiv w10, w9, w8
+; NONEON-NOSVE-NEXT: msub w11, w13, w11, w12
+; NONEON-NOSVE-NEXT: umov w13, v1.b[7]
+; NONEON-NOSVE-NEXT: fmov s2, w11
+; NONEON-NOSVE-NEXT: umov w11, v0.b[6]
+; NONEON-NOSVE-NEXT: udiv w16, w15, w14
+; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
+; NONEON-NOSVE-NEXT: umov w10, v1.b[6]
+; NONEON-NOSVE-NEXT: mov v2.b[1], w8
+; NONEON-NOSVE-NEXT: udiv w0, w18, w17
+; NONEON-NOSVE-NEXT: msub w8, w16, w14, w15
+; NONEON-NOSVE-NEXT: umov w14, v0.b[7]
+; NONEON-NOSVE-NEXT: mov v2.b[2], w8
+; NONEON-NOSVE-NEXT: udiv w3, w2, w1
+; NONEON-NOSVE-NEXT: msub w8, w0, w17, w18
+; NONEON-NOSVE-NEXT: mov v2.b[3], w8
+; NONEON-NOSVE-NEXT: udiv w9, w5, w4
+; NONEON-NOSVE-NEXT: msub w8, w3, w1, w2
+; NONEON-NOSVE-NEXT: mov v2.b[4], w8
+; NONEON-NOSVE-NEXT: udiv w12, w11, w10
+; NONEON-NOSVE-NEXT: msub w8, w9, w4, w5
+; NONEON-NOSVE-NEXT: mov v2.b[5], w8
+; NONEON-NOSVE-NEXT: udiv w9, w14, w13
+; NONEON-NOSVE-NEXT: msub w8, w12, w10, w11
+; NONEON-NOSVE-NEXT: mov v2.b[6], w8
+; NONEON-NOSVE-NEXT: msub w8, w9, w13, w14
+; NONEON-NOSVE-NEXT: mov v2.b[7], w8
+; NONEON-NOSVE-NEXT: fmov d0, d2
+; NONEON-NOSVE-NEXT: ret
%res = urem <8 x i8> %op1, %op2
ret <8 x i8> %res
}
@@ -491,6 +1386,112 @@ define <16 x i8> @urem_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
; CHECK-NEXT: mls z0.b, p0/m, z3.b, z1.b
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: urem_v16i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #-80]! // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #16] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #32] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80
+; NONEON-NOSVE-NEXT: .cfi_offset w19, -8
+; NONEON-NOSVE-NEXT: .cfi_offset w20, -16
+; NONEON-NOSVE-NEXT: .cfi_offset w21, -24
+; NONEON-NOSVE-NEXT: .cfi_offset w22, -32
+; NONEON-NOSVE-NEXT: .cfi_offset w23, -40
+; NONEON-NOSVE-NEXT: .cfi_offset w24, -48
+; NONEON-NOSVE-NEXT: .cfi_offset w25, -56
+; NONEON-NOSVE-NEXT: .cfi_offset w26, -64
+; NONEON-NOSVE-NEXT: .cfi_offset w27, -72
+; NONEON-NOSVE-NEXT: .cfi_offset w28, -80
+; NONEON-NOSVE-NEXT: umov w11, v1.b[0]
+; NONEON-NOSVE-NEXT: umov w12, v0.b[0]
+; NONEON-NOSVE-NEXT: umov w8, v1.b[1]
+; NONEON-NOSVE-NEXT: umov w9, v0.b[1]
+; NONEON-NOSVE-NEXT: umov w14, v1.b[2]
+; NONEON-NOSVE-NEXT: umov w15, v0.b[2]
+; NONEON-NOSVE-NEXT: umov w17, v1.b[3]
+; NONEON-NOSVE-NEXT: umov w18, v0.b[3]
+; NONEON-NOSVE-NEXT: umov w1, v1.b[4]
+; NONEON-NOSVE-NEXT: umov w2, v0.b[4]
+; NONEON-NOSVE-NEXT: umov w4, v1.b[5]
+; NONEON-NOSVE-NEXT: umov w5, v0.b[5]
+; NONEON-NOSVE-NEXT: udiv w13, w12, w11
+; NONEON-NOSVE-NEXT: umov w7, v1.b[6]
+; NONEON-NOSVE-NEXT: umov w19, v0.b[6]
+; NONEON-NOSVE-NEXT: umov w21, v1.b[7]
+; NONEON-NOSVE-NEXT: umov w22, v0.b[7]
+; NONEON-NOSVE-NEXT: umov w24, v1.b[8]
+; NONEON-NOSVE-NEXT: umov w25, v0.b[8]
+; NONEON-NOSVE-NEXT: umov w27, v1.b[9]
+; NONEON-NOSVE-NEXT: umov w28, v0.b[9]
+; NONEON-NOSVE-NEXT: udiv w10, w9, w8
+; NONEON-NOSVE-NEXT: msub w11, w13, w11, w12
+; NONEON-NOSVE-NEXT: umov w13, v1.b[11]
+; NONEON-NOSVE-NEXT: fmov s2, w11
+; NONEON-NOSVE-NEXT: umov w11, v0.b[10]
+; NONEON-NOSVE-NEXT: udiv w16, w15, w14
+; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
+; NONEON-NOSVE-NEXT: umov w10, v1.b[10]
+; NONEON-NOSVE-NEXT: mov v2.b[1], w8
+; NONEON-NOSVE-NEXT: udiv w0, w18, w17
+; NONEON-NOSVE-NEXT: msub w8, w16, w14, w15
+; NONEON-NOSVE-NEXT: umov w14, v0.b[11]
+; NONEON-NOSVE-NEXT: umov w16, v1.b[12]
+; NONEON-NOSVE-NEXT: mov v2.b[2], w8
+; NONEON-NOSVE-NEXT: udiv w3, w2, w1
+; NONEON-NOSVE-NEXT: msub w8, w0, w17, w18
+; NONEON-NOSVE-NEXT: umov w17, v0.b[12]
+; NONEON-NOSVE-NEXT: umov w0, v1.b[13]
+; NONEON-NOSVE-NEXT: mov v2.b[3], w8
+; NONEON-NOSVE-NEXT: udiv w6, w5, w4
+; NONEON-NOSVE-NEXT: msub w8, w3, w1, w2
+; NONEON-NOSVE-NEXT: umov w1, v0.b[13]
+; NONEON-NOSVE-NEXT: mov v2.b[4], w8
+; NONEON-NOSVE-NEXT: udiv w20, w19, w7
+; NONEON-NOSVE-NEXT: msub w8, w6, w4, w5
+; NONEON-NOSVE-NEXT: mov v2.b[5], w8
+; NONEON-NOSVE-NEXT: udiv w23, w22, w21
+; NONEON-NOSVE-NEXT: msub w8, w20, w7, w19
+; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT: mov v2.b[6], w8
+; NONEON-NOSVE-NEXT: udiv w26, w25, w24
+; NONEON-NOSVE-NEXT: msub w8, w23, w21, w22
+; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT: mov v2.b[7], w8
+; NONEON-NOSVE-NEXT: udiv w9, w28, w27
+; NONEON-NOSVE-NEXT: msub w8, w26, w24, w25
+; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #32] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #16] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT: mov v2.b[8], w8
+; NONEON-NOSVE-NEXT: udiv w12, w11, w10
+; NONEON-NOSVE-NEXT: msub w8, w9, w27, w28
+; NONEON-NOSVE-NEXT: mov v2.b[9], w8
+; NONEON-NOSVE-NEXT: udiv w15, w14, w13
+; NONEON-NOSVE-NEXT: msub w8, w12, w10, w11
+; NONEON-NOSVE-NEXT: umov w10, v1.b[14]
+; NONEON-NOSVE-NEXT: umov w11, v0.b[14]
+; NONEON-NOSVE-NEXT: mov v2.b[10], w8
+; NONEON-NOSVE-NEXT: udiv w18, w17, w16
+; NONEON-NOSVE-NEXT: msub w8, w15, w13, w14
+; NONEON-NOSVE-NEXT: umov w13, v1.b[15]
+; NONEON-NOSVE-NEXT: umov w14, v0.b[15]
+; NONEON-NOSVE-NEXT: mov v2.b[11], w8
+; NONEON-NOSVE-NEXT: udiv w9, w1, w0
+; NONEON-NOSVE-NEXT: msub w8, w18, w16, w17
+; NONEON-NOSVE-NEXT: mov v2.b[12], w8
+; NONEON-NOSVE-NEXT: udiv w12, w11, w10
+; NONEON-NOSVE-NEXT: msub w8, w9, w0, w1
+; NONEON-NOSVE-NEXT: mov v2.b[13], w8
+; NONEON-NOSVE-NEXT: udiv w9, w14, w13
+; NONEON-NOSVE-NEXT: msub w8, w12, w10, w11
+; NONEON-NOSVE-NEXT: mov v2.b[14], w8
+; NONEON-NOSVE-NEXT: msub w8, w9, w13, w14
+; NONEON-NOSVE-NEXT: mov v2.b[15], w8
+; NONEON-NOSVE-NEXT: mov v0.16b, v2.16b
+; NONEON-NOSVE-NEXT: ldp x28, x27, [sp], #80 // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT: ret
%res = urem <16 x i8> %op1, %op2
ret <16 x i8> %res
}
@@ -578,6 +1579,279 @@ define void @urem_v32i8(ptr %a, ptr %b) {
; CHECK-NEXT: mls z2.b, p0/m, z7.b, z4.b
; CHECK-NEXT: stp q2, q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: urem_v32i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: sub sp, sp, #320
+; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #224] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #240] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #256] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #272] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #288] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #304] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 320
+; NONEON-NOSVE-NEXT: .cfi_offset w19, -8
+; NONEON-NOSVE-NEXT: .cfi_offset w20, -16
+; NONEON-NOSVE-NEXT: .cfi_offset w21, -24
+; NONEON-NOSVE-NEXT: .cfi_offset w22, -32
+; NONEON-NOSVE-NEXT: .cfi_offset w23, -40
+; NONEON-NOSVE-NEXT: .cfi_offset w24, -48
+; NONEON-NOSVE-NEXT: .cfi_offset w25, -56
+; NONEON-NOSVE-NEXT: .cfi_offset w26, -64
+; NONEON-NOSVE-NEXT: .cfi_offset w27, -72
+; NONEON-NOSVE-NEXT: .cfi_offset w28, -80
+; NONEON-NOSVE-NEXT: .cfi_offset w30, -88
+; NONEON-NOSVE-NEXT: .cfi_offset w29, -96
+; NONEON-NOSVE-NEXT: ldr q0, [x0, #16]
+; NONEON-NOSVE-NEXT: ldr q1, [x1, #16]
+; NONEON-NOSVE-NEXT: ldr q3, [x1]
+; NONEON-NOSVE-NEXT: ldr q2, [x0]
+; NONEON-NOSVE-NEXT: str x0, [sp, #216] // 8-byte Folded Spill
+; NONEON-NOSVE-NEXT: umov w8, v1.b[1]
+; NONEON-NOSVE-NEXT: umov w9, v0.b[1]
+; NONEON-NOSVE-NEXT: umov w4, v3.b[1]
+; NONEON-NOSVE-NEXT: umov w1, v2.b[1]
+; NONEON-NOSVE-NEXT: umov w7, v3.b[7]
+; NONEON-NOSVE-NEXT: umov w5, v2.b[7]
+; NONEON-NOSVE-NEXT: umov w6, v3.b[8]
+; NONEON-NOSVE-NEXT: umov w3, v2.b[8]
+; NONEON-NOSVE-NEXT: umov w22, v3.b[9]
+; NONEON-NOSVE-NEXT: umov w20, v2.b[9]
+; NONEON-NOSVE-NEXT: umov w13, v3.b[0]
+; NONEON-NOSVE-NEXT: umov w17, v3.b[3]
+; NONEON-NOSVE-NEXT: udiv w10, w9, w8
+; NONEON-NOSVE-NEXT: str w8, [sp, #100] // 4-byte Folded Spill
+; NONEON-NOSVE-NEXT: umov w8, v1.b[0]
+; NONEON-NOSVE-NEXT: str w9, [sp, #108] // 4-byte Folded Spill
+; NONEON-NOSVE-NEXT: umov w9, v0.b[0]
+; NONEON-NOSVE-NEXT: umov w14, v2.b[3]
+; NONEON-NOSVE-NEXT: umov w15, v3.b[4]
+; NONEON-NOSVE-NEXT: umov w12, v2.b[4]
+; NONEON-NOSVE-NEXT: umov w2, v3.b[5]
+; NONEON-NOSVE-NEXT: umov w18, v2.b[5]
+; NONEON-NOSVE-NEXT: umov w0, v3.b[6]
+; NONEON-NOSVE-NEXT: umov w16, v2.b[6]
+; NONEON-NOSVE-NEXT: umov w21, v3.b[10]
+; NONEON-NOSVE-NEXT: umov w19, v2.b[10]
+; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #36] // 8-byte Folded Spill
+; NONEON-NOSVE-NEXT: ldr w30, [sp, #36] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: str w10, [sp, #116] // 4-byte Folded Spill
+; NONEON-NOSVE-NEXT: udiv w10, w9, w8
+; NONEON-NOSVE-NEXT: umov w8, v1.b[2]
+; NONEON-NOSVE-NEXT: umov w9, v0.b[2]
+; NONEON-NOSVE-NEXT: stp w10, w8, [sp, #44] // 8-byte Folded Spill
+; NONEON-NOSVE-NEXT: udiv w10, w9, w8
+; NONEON-NOSVE-NEXT: umov w8, v1.b[3]
+; NONEON-NOSVE-NEXT: stp w9, w10, [sp, #52] // 8-byte Folded Spill
+; NONEON-NOSVE-NEXT: umov w9, v0.b[3]
+; NONEON-NOSVE-NEXT: udiv w26, w14, w17
+; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #72] // 8-byte Folded Spill
+; NONEON-NOSVE-NEXT: udiv w11, w9, w8
+; NONEON-NOSVE-NEXT: umov w8, v1.b[4]
+; NONEON-NOSVE-NEXT: umov w9, v0.b[4]
+; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #60] // 8-byte Folded Spill
+; NONEON-NOSVE-NEXT: udiv w10, w9, w8
+; NONEON-NOSVE-NEXT: umov w8, v1.b[5]
+; NONEON-NOSVE-NEXT: umov w9, v0.b[5]
+; NONEON-NOSVE-NEXT: str w8, [sp, #96] // 4-byte Folded Spill
+; NONEON-NOSVE-NEXT: str w9, [sp, #104] // 4-byte Folded Spill
+; NONEON-NOSVE-NEXT: str w10, [sp, #68] // 4-byte Folded Spill
+; NONEON-NOSVE-NEXT: udiv w10, w9, w8
+; NONEON-NOSVE-NEXT: umov w8, v1.b[6]
+; NONEON-NOSVE-NEXT: umov w9, v0.b[6]
+; NONEON-NOSVE-NEXT: stp w11, w8, [sp, #80] // 8-byte Folded Spill
+; NONEON-NOSVE-NEXT: str w10, [sp, #112] // 4-byte Folded Spill
+; NONEON-NOSVE-NEXT: udiv w10, w9, w8
+; NONEON-NOSVE-NEXT: umov w8, v1.b[7]
+; NONEON-NOSVE-NEXT: stp w9, w10, [sp, #88] // 8-byte Folded Spill
+; NONEON-NOSVE-NEXT: umov w9, v0.b[7]
+; NONEON-NOSVE-NEXT: udiv w25, w12, w15
+; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #132] // 8-byte Folded Spill
+; NONEON-NOSVE-NEXT: udiv w10, w9, w8
+; NONEON-NOSVE-NEXT: umov w8, v1.b[8]
+; NONEON-NOSVE-NEXT: umov w9, v0.b[8]
+; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #120] // 8-byte Folded Spill
+; NONEON-NOSVE-NEXT: str w10, [sp, #140] // 4-byte Folded Spill
+; NONEON-NOSVE-NEXT: udiv w10, w9, w8
+; NONEON-NOSVE-NEXT: umov w8, v1.b[9]
+; NONEON-NOSVE-NEXT: umov w9, v0.b[9]
+; NONEON-NOSVE-NEXT: str w8, [sp, #148] // 4-byte Folded Spill
+; NONEON-NOSVE-NEXT: str w9, [sp, #156] // 4-byte Folded Spill
+; NONEON-NOSVE-NEXT: udiv w11, w9, w8
+; NONEON-NOSVE-NEXT: umov w8, v1.b[10]
+; NONEON-NOSVE-NEXT: umov w9, v0.b[10]
+; NONEON-NOSVE-NEXT: str w10, [sp, #128] // 4-byte Folded Spill
+; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #204] // 8-byte Folded Spill
+; NONEON-NOSVE-NEXT: udiv w10, w9, w8
+; NONEON-NOSVE-NEXT: umov w8, v1.b[11]
+; NONEON-NOSVE-NEXT: umov w9, v0.b[11]
+; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #192] // 8-byte Folded Spill
+; NONEON-NOSVE-NEXT: str w10, [sp, #212] // 4-byte Folded Spill
+; NONEON-NOSVE-NEXT: udiv w10, w9, w8
+; NONEON-NOSVE-NEXT: umov w8, v1.b[12]
+; NONEON-NOSVE-NEXT: umov w9, v0.b[12]
+; NONEON-NOSVE-NEXT: str w8, [sp, #172] // 4-byte Folded Spill
+; NONEON-NOSVE-NEXT: str w9, [sp, #180] // 4-byte Folded Spill
+; NONEON-NOSVE-NEXT: str w10, [sp, #200] // 4-byte Folded Spill
+; NONEON-NOSVE-NEXT: udiv w10, w9, w8
+; NONEON-NOSVE-NEXT: umov w8, v1.b[13]
+; NONEON-NOSVE-NEXT: umov w9, v0.b[13]
+; NONEON-NOSVE-NEXT: stp w11, w8, [sp, #164] // 8-byte Folded Spill
+; NONEON-NOSVE-NEXT: umov w11, v3.b[2]
+; NONEON-NOSVE-NEXT: str w9, [sp, #176] // 4-byte Folded Spill
+; NONEON-NOSVE-NEXT: str w10, [sp, #188] // 4-byte Folded Spill
+; NONEON-NOSVE-NEXT: udiv w10, w9, w8
+; NONEON-NOSVE-NEXT: umov w8, v1.b[14]
+; NONEON-NOSVE-NEXT: umov w9, v0.b[14]
+; NONEON-NOSVE-NEXT: str w8, [sp, #144] // 4-byte Folded Spill
+; NONEON-NOSVE-NEXT: str w9, [sp, #152] // 4-byte Folded Spill
+; NONEON-NOSVE-NEXT: str w10, [sp, #184] // 4-byte Folded Spill
+; NONEON-NOSVE-NEXT: udiv w10, w9, w8
+; NONEON-NOSVE-NEXT: umov w9, v2.b[2]
+; NONEON-NOSVE-NEXT: udiv w8, w1, w4
+; NONEON-NOSVE-NEXT: str w10, [sp, #160] // 4-byte Folded Spill
+; NONEON-NOSVE-NEXT: umov w10, v2.b[0]
+; NONEON-NOSVE-NEXT: str w8, [sp, #24] // 4-byte Folded Spill
+; NONEON-NOSVE-NEXT: udiv w8, w5, w7
+; NONEON-NOSVE-NEXT: str w8, [sp, #28] // 4-byte Folded Spill
+; NONEON-NOSVE-NEXT: udiv w8, w3, w6
+; NONEON-NOSVE-NEXT: str w8, [sp, #20] // 4-byte Folded Spill
+; NONEON-NOSVE-NEXT: udiv w8, w20, w22
+; NONEON-NOSVE-NEXT: udiv w24, w10, w13
+; NONEON-NOSVE-NEXT: str w8, [sp, #32] // 4-byte Folded Spill
+; NONEON-NOSVE-NEXT: ldp w29, w8, [sp, #40] // 8-byte Folded Reload
+; NONEON-NOSVE-NEXT: msub w8, w8, w30, w29
+; NONEON-NOSVE-NEXT: ldp x29, x30, [sp, #224] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT: fmov s4, w8
+; NONEON-NOSVE-NEXT: udiv w23, w9, w11
+; NONEON-NOSVE-NEXT: msub w10, w24, w13, w10
+; NONEON-NOSVE-NEXT: ldr w13, [sp, #24] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: ldr w24, [sp, #100] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: msub w13, w13, w4, w1
+; NONEON-NOSVE-NEXT: ldr w1, [sp, #116] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: ldr w4, [sp, #108] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: fmov s5, w10
+; NONEON-NOSVE-NEXT: msub w1, w1, w24, w4
+; NONEON-NOSVE-NEXT: mov v5.b[1], w13
+; NONEON-NOSVE-NEXT: mov v4.b[1], w1
+; NONEON-NOSVE-NEXT: ldr w1, [sp, #120] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: msub w8, w23, w11, w9
+; NONEON-NOSVE-NEXT: ldr w11, [sp, #48] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: udiv w28, w18, w2
+; NONEON-NOSVE-NEXT: ldp w10, w9, [sp, #52] // 8-byte Folded Reload
+; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #272] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT: mov v5.b[2], w8
+; NONEON-NOSVE-NEXT: msub w8, w26, w17, w14
+; NONEON-NOSVE-NEXT: ldr w14, [sp, #72] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: msub w9, w9, w11, w10
+; NONEON-NOSVE-NEXT: ldr w17, [sp, #96] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: umov w10, v3.b[11]
+; NONEON-NOSVE-NEXT: umov w11, v2.b[11]
+; NONEON-NOSVE-NEXT: mov v4.b[2], w9
+; NONEON-NOSVE-NEXT: mov v5.b[3], w8
+; NONEON-NOSVE-NEXT: msub w8, w25, w15, w12
+; NONEON-NOSVE-NEXT: ldp w13, w9, [sp, #76] // 8-byte Folded Reload
+; NONEON-NOSVE-NEXT: udiv w27, w16, w0
+; NONEON-NOSVE-NEXT: ldr w15, [sp, #104] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #256] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT: msub w9, w9, w14, w13
+; NONEON-NOSVE-NEXT: ldr w14, [sp, #60] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: mov v5.b[4], w8
+; NONEON-NOSVE-NEXT: msub w8, w28, w2, w18
+; NONEON-NOSVE-NEXT: ldr w2, [sp, #156] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: mov v4.b[3], w9
+; NONEON-NOSVE-NEXT: ldp w12, w9, [sp, #64] // 8-byte Folded Reload
+; NONEON-NOSVE-NEXT: mov v5.b[5], w8
+; NONEON-NOSVE-NEXT: msub w8, w27, w0, w16
+; NONEON-NOSVE-NEXT: ldr w0, [sp, #132] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: udiv w4, w19, w21
+; NONEON-NOSVE-NEXT: msub w9, w9, w14, w12
+; NONEON-NOSVE-NEXT: umov w12, v3.b[12]
+; NONEON-NOSVE-NEXT: umov w14, v2.b[12]
+; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #240] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT: mov v5.b[6], w8
+; NONEON-NOSVE-NEXT: ldr w8, [sp, #28] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: mov v4.b[4], w9
+; NONEON-NOSVE-NEXT: ldr w9, [sp, #112] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: msub w8, w8, w7, w5
+; NONEON-NOSVE-NEXT: ldr w5, [sp, #204] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: msub w9, w9, w17, w15
+; NONEON-NOSVE-NEXT: ldr w17, [sp, #84] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: mov v5.b[7], w8
+; NONEON-NOSVE-NEXT: ldr w8, [sp, #20] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: udiv w13, w11, w10
+; NONEON-NOSVE-NEXT: mov v4.b[5], w9
+; NONEON-NOSVE-NEXT: ldp w16, w9, [sp, #88] // 8-byte Folded Reload
+; NONEON-NOSVE-NEXT: msub w8, w8, w6, w3
+; NONEON-NOSVE-NEXT: ldr w3, [sp, #148] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: msub w9, w9, w17, w16
+; NONEON-NOSVE-NEXT: umov w16, v3.b[13]
+; NONEON-NOSVE-NEXT: umov w17, v2.b[13]
+; NONEON-NOSVE-NEXT: mov v5.b[8], w8
+; NONEON-NOSVE-NEXT: ldr w8, [sp, #32] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: mov v4.b[6], w9
+; NONEON-NOSVE-NEXT: msub w8, w8, w22, w20
+; NONEON-NOSVE-NEXT: udiv w15, w14, w12
+; NONEON-NOSVE-NEXT: ldp w18, w9, [sp, #136] // 8-byte Folded Reload
+; NONEON-NOSVE-NEXT: mov v5.b[9], w8
+; NONEON-NOSVE-NEXT: msub w8, w4, w21, w19
+; NONEON-NOSVE-NEXT: msub w9, w9, w0, w18
+; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #304] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #288] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT: mov v4.b[7], w9
+; NONEON-NOSVE-NEXT: mov v5.b[10], w8
+; NONEON-NOSVE-NEXT: msub w8, w13, w10, w11
+; NONEON-NOSVE-NEXT: ldp w0, w9, [sp, #124] // 8-byte Folded Reload
+; NONEON-NOSVE-NEXT: ldp w11, w10, [sp, #196] // 8-byte Folded Reload
+; NONEON-NOSVE-NEXT: ldr w13, [sp, #192] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: udiv w18, w17, w16
+; NONEON-NOSVE-NEXT: msub w9, w9, w1, w0
+; NONEON-NOSVE-NEXT: mov v5.b[11], w8
+; NONEON-NOSVE-NEXT: umov w0, v3.b[14]
+; NONEON-NOSVE-NEXT: msub w10, w10, w13, w11
+; NONEON-NOSVE-NEXT: umov w1, v2.b[14]
+; NONEON-NOSVE-NEXT: msub w8, w15, w12, w14
+; NONEON-NOSVE-NEXT: mov v4.b[8], w9
+; NONEON-NOSVE-NEXT: ldr w9, [sp, #164] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: ldp w15, w13, [sp, #168] // 8-byte Folded Reload
+; NONEON-NOSVE-NEXT: msub w9, w9, w3, w2
+; NONEON-NOSVE-NEXT: mov v5.b[12], w8
+; NONEON-NOSVE-NEXT: ldp w4, w3, [sp, #208] // 8-byte Folded Reload
+; NONEON-NOSVE-NEXT: ldp w14, w12, [sp, #176] // 8-byte Folded Reload
+; NONEON-NOSVE-NEXT: mov v4.b[9], w9
+; NONEON-NOSVE-NEXT: udiv w2, w1, w0
+; NONEON-NOSVE-NEXT: umov w9, v3.b[15]
+; NONEON-NOSVE-NEXT: msub w3, w3, w5, w4
+; NONEON-NOSVE-NEXT: umov w4, v2.b[15]
+; NONEON-NOSVE-NEXT: msub w8, w18, w16, w17
+; NONEON-NOSVE-NEXT: ldr w16, [sp, #144] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: mov v4.b[10], w3
+; NONEON-NOSVE-NEXT: mov v5.b[13], w8
+; NONEON-NOSVE-NEXT: mov v4.b[11], w10
+; NONEON-NOSVE-NEXT: ldr w10, [sp, #188] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: udiv w11, w4, w9
+; NONEON-NOSVE-NEXT: msub w8, w2, w0, w1
+; NONEON-NOSVE-NEXT: msub w10, w10, w13, w12
+; NONEON-NOSVE-NEXT: umov w12, v1.b[15]
+; NONEON-NOSVE-NEXT: umov w13, v0.b[15]
+; NONEON-NOSVE-NEXT: mov v5.b[14], w8
+; NONEON-NOSVE-NEXT: mov v4.b[12], w10
+; NONEON-NOSVE-NEXT: ldr w10, [sp, #184] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: msub w10, w10, w15, w14
+; NONEON-NOSVE-NEXT: ldr w15, [sp, #152] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: udiv w14, w13, w12
+; NONEON-NOSVE-NEXT: msub w8, w11, w9, w4
+; NONEON-NOSVE-NEXT: mov v4.b[13], w10
+; NONEON-NOSVE-NEXT: ldr w10, [sp, #160] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: mov v5.b[15], w8
+; NONEON-NOSVE-NEXT: ldr x8, [sp, #216] // 8-byte Folded Reload
+; NONEON-NOSVE-NEXT: msub w10, w10, w16, w15
+; NONEON-NOSVE-NEXT: mov v4.b[14], w10
+; NONEON-NOSVE-NEXT: msub w9, w14, w12, w13
+; NONEON-NOSVE-NEXT: mov v4.b[15], w9
+; NONEON-NOSVE-NEXT: stp q5, q4, [x8]
+; NONEON-NOSVE-NEXT: add sp, sp, #320
+; NONEON-NOSVE-NEXT: ret
%op1 = load <32 x i8>, ptr %a
%op2 = load <32 x i8>, ptr %b
%res = urem <32 x i8> %op1, %op2
@@ -599,6 +1873,33 @@ define <4 x i16> @urem_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
; CHECK-NEXT: mls z0.h, p0/m, z2.h, z1.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: urem_v4i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; NONEON-NOSVE-NEXT: umov w11, v1.h[0]
+; NONEON-NOSVE-NEXT: umov w12, v0.h[0]
+; NONEON-NOSVE-NEXT: umov w8, v1.h[1]
+; NONEON-NOSVE-NEXT: umov w9, v0.h[1]
+; NONEON-NOSVE-NEXT: umov w14, v1.h[2]
+; NONEON-NOSVE-NEXT: umov w15, v0.h[2]
+; NONEON-NOSVE-NEXT: umov w17, v1.h[3]
+; NONEON-NOSVE-NEXT: umov w18, v0.h[3]
+; NONEON-NOSVE-NEXT: udiv w13, w12, w11
+; NONEON-NOSVE-NEXT: udiv w10, w9, w8
+; NONEON-NOSVE-NEXT: msub w11, w13, w11, w12
+; NONEON-NOSVE-NEXT: fmov s0, w11
+; NONEON-NOSVE-NEXT: udiv w16, w15, w14
+; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
+; NONEON-NOSVE-NEXT: mov v0.h[1], w8
+; NONEON-NOSVE-NEXT: udiv w9, w18, w17
+; NONEON-NOSVE-NEXT: msub w8, w16, w14, w15
+; NONEON-NOSVE-NEXT: mov v0.h[2], w8
+; NONEON-NOSVE-NEXT: msub w8, w9, w17, w18
+; NONEON-NOSVE-NEXT: mov v0.h[3], w8
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0
+; NONEON-NOSVE-NEXT: ret
%res = urem <4 x i16> %op1, %op2
ret <4 x i16> %res
}
@@ -627,6 +1928,51 @@ define <8 x i16> @urem_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
; CHECK-NEXT: mls z0.h, p0/m, z3.h, z1.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: urem_v8i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: umov w11, v1.h[0]
+; NONEON-NOSVE-NEXT: umov w12, v0.h[0]
+; NONEON-NOSVE-NEXT: umov w8, v1.h[1]
+; NONEON-NOSVE-NEXT: umov w9, v0.h[1]
+; NONEON-NOSVE-NEXT: umov w14, v1.h[2]
+; NONEON-NOSVE-NEXT: umov w15, v0.h[2]
+; NONEON-NOSVE-NEXT: umov w17, v1.h[3]
+; NONEON-NOSVE-NEXT: umov w18, v0.h[3]
+; NONEON-NOSVE-NEXT: umov w1, v1.h[4]
+; NONEON-NOSVE-NEXT: umov w2, v0.h[4]
+; NONEON-NOSVE-NEXT: umov w4, v1.h[5]
+; NONEON-NOSVE-NEXT: umov w5, v0.h[5]
+; NONEON-NOSVE-NEXT: udiv w13, w12, w11
+; NONEON-NOSVE-NEXT: udiv w10, w9, w8
+; NONEON-NOSVE-NEXT: msub w11, w13, w11, w12
+; NONEON-NOSVE-NEXT: umov w13, v1.h[7]
+; NONEON-NOSVE-NEXT: fmov s2, w11
+; NONEON-NOSVE-NEXT: umov w11, v0.h[6]
+; NONEON-NOSVE-NEXT: udiv w16, w15, w14
+; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
+; NONEON-NOSVE-NEXT: umov w10, v1.h[6]
+; NONEON-NOSVE-NEXT: mov v2.h[1], w8
+; NONEON-NOSVE-NEXT: udiv w0, w18, w17
+; NONEON-NOSVE-NEXT: msub w8, w16, w14, w15
+; NONEON-NOSVE-NEXT: umov w14, v0.h[7]
+; NONEON-NOSVE-NEXT: mov v2.h[2], w8
+; NONEON-NOSVE-NEXT: udiv w3, w2, w1
+; NONEON-NOSVE-NEXT: msub w8, w0, w17, w18
+; NONEON-NOSVE-NEXT: mov v2.h[3], w8
+; NONEON-NOSVE-NEXT: udiv w9, w5, w4
+; NONEON-NOSVE-NEXT: msub w8, w3, w1, w2
+; NONEON-NOSVE-NEXT: mov v2.h[4], w8
+; NONEON-NOSVE-NEXT: udiv w12, w11, w10
+; NONEON-NOSVE-NEXT: msub w8, w9, w4, w5
+; NONEON-NOSVE-NEXT: mov v2.h[5], w8
+; NONEON-NOSVE-NEXT: udiv w9, w14, w13
+; NONEON-NOSVE-NEXT: msub w8, w12, w10, w11
+; NONEON-NOSVE-NEXT: mov v2.h[6], w8
+; NONEON-NOSVE-NEXT: msub w8, w9, w13, w14
+; NONEON-NOSVE-NEXT: mov v2.h[7], w8
+; NONEON-NOSVE-NEXT: mov v0.16b, v2.16b
+; NONEON-NOSVE-NEXT: ret
%res = urem <8 x i16> %op1, %op2
ret <8 x i16> %res
}
@@ -671,6 +2017,139 @@ define void @urem_v16i16(ptr %a, ptr %b) {
; CHECK-NEXT: mls z0.h, p0/m, z7.h, z1.h
; CHECK-NEXT: stp q2, q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: urem_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: sub sp, sp, #144
+; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #48] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #64] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #80] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #96] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #112] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #128] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 144
+; NONEON-NOSVE-NEXT: .cfi_offset w19, -8
+; NONEON-NOSVE-NEXT: .cfi_offset w20, -16
+; NONEON-NOSVE-NEXT: .cfi_offset w21, -24
+; NONEON-NOSVE-NEXT: .cfi_offset w22, -32
+; NONEON-NOSVE-NEXT: .cfi_offset w23, -40
+; NONEON-NOSVE-NEXT: .cfi_offset w24, -48
+; NONEON-NOSVE-NEXT: .cfi_offset w25, -56
+; NONEON-NOSVE-NEXT: .cfi_offset w26, -64
+; NONEON-NOSVE-NEXT: .cfi_offset w27, -72
+; NONEON-NOSVE-NEXT: .cfi_offset w28, -80
+; NONEON-NOSVE-NEXT: .cfi_offset w30, -88
+; NONEON-NOSVE-NEXT: .cfi_offset w29, -96
+; NONEON-NOSVE-NEXT: ldr q0, [x0, #16]
+; NONEON-NOSVE-NEXT: ldr q1, [x1, #16]
+; NONEON-NOSVE-NEXT: ldr q2, [x0]
+; NONEON-NOSVE-NEXT: ldr q3, [x1]
+; NONEON-NOSVE-NEXT: umov w8, v1.h[1]
+; NONEON-NOSVE-NEXT: umov w9, v0.h[1]
+; NONEON-NOSVE-NEXT: umov w20, v1.h[0]
+; NONEON-NOSVE-NEXT: umov w21, v0.h[0]
+; NONEON-NOSVE-NEXT: umov w19, v0.h[3]
+; NONEON-NOSVE-NEXT: umov w5, v1.h[4]
+; NONEON-NOSVE-NEXT: umov w2, v0.h[4]
+; NONEON-NOSVE-NEXT: umov w1, v3.h[1]
+; NONEON-NOSVE-NEXT: umov w23, v2.h[1]
+; NONEON-NOSVE-NEXT: umov w25, v3.h[0]
+; NONEON-NOSVE-NEXT: umov w26, v2.h[0]
+; NONEON-NOSVE-NEXT: umov w6, v1.h[5]
+; NONEON-NOSVE-NEXT: udiv w10, w9, w8
+; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #36] // 8-byte Folded Spill
+; NONEON-NOSVE-NEXT: umov w8, v1.h[2]
+; NONEON-NOSVE-NEXT: umov w9, v0.h[2]
+; NONEON-NOSVE-NEXT: umov w3, v0.h[5]
+; NONEON-NOSVE-NEXT: umov w4, v1.h[6]
+; NONEON-NOSVE-NEXT: umov w7, v0.h[6]
+; NONEON-NOSVE-NEXT: umov w28, v3.h[2]
+; NONEON-NOSVE-NEXT: umov w29, v2.h[2]
+; NONEON-NOSVE-NEXT: umov w15, v3.h[3]
+; NONEON-NOSVE-NEXT: umov w13, v2.h[3]
+; NONEON-NOSVE-NEXT: umov w12, v3.h[4]
+; NONEON-NOSVE-NEXT: umov w14, v3.h[5]
+; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] // 8-byte Folded Spill
+; NONEON-NOSVE-NEXT: udiv w11, w21, w20
+; NONEON-NOSVE-NEXT: str w10, [sp, #44] // 4-byte Folded Spill
+; NONEON-NOSVE-NEXT: udiv w10, w9, w8
+; NONEON-NOSVE-NEXT: umov w8, v1.h[3]
+; NONEON-NOSVE-NEXT: stp w8, w11, [sp] // 8-byte Folded Spill
+; NONEON-NOSVE-NEXT: umov w11, v2.h[4]
+; NONEON-NOSVE-NEXT: ldr w22, [sp, #4] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: msub w20, w22, w20, w21
+; NONEON-NOSVE-NEXT: udiv w9, w19, w8
+; NONEON-NOSVE-NEXT: str w10, [sp, #32] // 4-byte Folded Spill
+; NONEON-NOSVE-NEXT: umov w10, v3.h[6]
+; NONEON-NOSVE-NEXT: fmov s5, w20
+; NONEON-NOSVE-NEXT: umov w20, v3.h[7]
+; NONEON-NOSVE-NEXT: udiv w8, w2, w5
+; NONEON-NOSVE-NEXT: udiv w24, w23, w1
+; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #16] // 8-byte Folded Spill
+; NONEON-NOSVE-NEXT: udiv w27, w26, w25
+; NONEON-NOSVE-NEXT: msub w1, w24, w1, w23
+; NONEON-NOSVE-NEXT: ldp w24, w23, [sp, #40] // 8-byte Folded Reload
+; NONEON-NOSVE-NEXT: udiv w9, w3, w6
+; NONEON-NOSVE-NEXT: msub w21, w27, w25, w26
+; NONEON-NOSVE-NEXT: ldr w25, [sp, #36] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: msub w23, w23, w25, w24
+; NONEON-NOSVE-NEXT: ldr w25, [sp, #24] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: fmov s4, w21
+; NONEON-NOSVE-NEXT: mov v5.h[1], w23
+; NONEON-NOSVE-NEXT: ldp w23, w21, [sp, #28] // 8-byte Folded Reload
+; NONEON-NOSVE-NEXT: mov v4.h[1], w1
+; NONEON-NOSVE-NEXT: udiv w8, w7, w4
+; NONEON-NOSVE-NEXT: msub w21, w21, w25, w23
+; NONEON-NOSVE-NEXT: umov w23, v2.h[7]
+; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #80] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT: mov v5.h[2], w21
+; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #112] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT: udiv w30, w29, w28
+; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #8] // 8-byte Folded Spill
+; NONEON-NOSVE-NEXT: umov w9, v2.h[5]
+; NONEON-NOSVE-NEXT: umov w8, v2.h[6]
+; NONEON-NOSVE-NEXT: udiv w18, w13, w15
+; NONEON-NOSVE-NEXT: msub w1, w30, w28, w29
+; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #64] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT: ldp x29, x30, [sp, #48] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT: mov v4.h[2], w1
+; NONEON-NOSVE-NEXT: udiv w16, w11, w12
+; NONEON-NOSVE-NEXT: msub w13, w18, w15, w13
+; NONEON-NOSVE-NEXT: ldr w15, [sp, #20] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: ldr w18, [sp] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: msub w15, w15, w18, w19
+; NONEON-NOSVE-NEXT: mov v4.h[3], w13
+; NONEON-NOSVE-NEXT: umov w13, v1.h[7]
+; NONEON-NOSVE-NEXT: mov v5.h[3], w15
+; NONEON-NOSVE-NEXT: umov w15, v0.h[7]
+; NONEON-NOSVE-NEXT: udiv w17, w9, w14
+; NONEON-NOSVE-NEXT: msub w11, w16, w12, w11
+; NONEON-NOSVE-NEXT: ldr w12, [sp, #16] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: msub w12, w12, w5, w2
+; NONEON-NOSVE-NEXT: mov v4.h[4], w11
+; NONEON-NOSVE-NEXT: ldr w11, [sp, #12] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: mov v5.h[4], w12
+; NONEON-NOSVE-NEXT: msub w11, w11, w6, w3
+; NONEON-NOSVE-NEXT: udiv w24, w8, w10
+; NONEON-NOSVE-NEXT: msub w9, w17, w14, w9
+; NONEON-NOSVE-NEXT: mov v5.h[5], w11
+; NONEON-NOSVE-NEXT: mov v4.h[5], w9
+; NONEON-NOSVE-NEXT: ldr w9, [sp, #8] // 4-byte Folded Reload
+; NONEON-NOSVE-NEXT: msub w9, w9, w4, w7
+; NONEON-NOSVE-NEXT: udiv w18, w23, w20
+; NONEON-NOSVE-NEXT: msub w8, w24, w10, w8
+; NONEON-NOSVE-NEXT: mov v5.h[6], w9
+; NONEON-NOSVE-NEXT: mov v4.h[6], w8
+; NONEON-NOSVE-NEXT: udiv w12, w15, w13
+; NONEON-NOSVE-NEXT: msub w8, w18, w20, w23
+; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #128] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #96] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT: mov v4.h[7], w8
+; NONEON-NOSVE-NEXT: msub w9, w12, w13, w15
+; NONEON-NOSVE-NEXT: mov v5.h[7], w9
+; NONEON-NOSVE-NEXT: stp q4, q5, [x0]
+; NONEON-NOSVE-NEXT: add sp, sp, #144
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x i16>, ptr %a
%op2 = load <16 x i16>, ptr %b
%res = urem <16 x i16> %op1, %op2
@@ -689,6 +2168,23 @@ define <2 x i32> @urem_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
; CHECK-NEXT: mls z0.s, p0/m, z2.s, z1.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: urem_v2i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; NONEON-NOSVE-NEXT: fmov w8, s1
+; NONEON-NOSVE-NEXT: fmov w9, s0
+; NONEON-NOSVE-NEXT: mov w11, v1.s[1]
+; NONEON-NOSVE-NEXT: mov w12, v0.s[1]
+; NONEON-NOSVE-NEXT: udiv w10, w9, w8
+; NONEON-NOSVE-NEXT: udiv w13, w12, w11
+; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
+; NONEON-NOSVE-NEXT: fmov s0, w8
+; NONEON-NOSVE-NEXT: msub w9, w13, w11, w12
+; NONEON-NOSVE-NEXT: mov v0.s[1], w9
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0
+; NONEON-NOSVE-NEXT: ret
%res = urem <2 x i32> %op1, %op2
ret <2 x i32> %res
}
@@ -704,6 +2200,30 @@ define <4 x i32> @urem_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
; CHECK-NEXT: mls z0.s, p0/m, z2.s, z1.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: urem_v4i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fmov w11, s1
+; NONEON-NOSVE-NEXT: fmov w12, s0
+; NONEON-NOSVE-NEXT: mov w8, v1.s[1]
+; NONEON-NOSVE-NEXT: mov w9, v0.s[1]
+; NONEON-NOSVE-NEXT: mov w14, v1.s[2]
+; NONEON-NOSVE-NEXT: mov w15, v0.s[2]
+; NONEON-NOSVE-NEXT: mov w17, v1.s[3]
+; NONEON-NOSVE-NEXT: mov w18, v0.s[3]
+; NONEON-NOSVE-NEXT: udiv w13, w12, w11
+; NONEON-NOSVE-NEXT: udiv w10, w9, w8
+; NONEON-NOSVE-NEXT: msub w11, w13, w11, w12
+; NONEON-NOSVE-NEXT: fmov s0, w11
+; NONEON-NOSVE-NEXT: udiv w16, w15, w14
+; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
+; NONEON-NOSVE-NEXT: mov v0.s[1], w8
+; NONEON-NOSVE-NEXT: udiv w9, w18, w17
+; NONEON-NOSVE-NEXT: msub w8, w16, w14, w15
+; NONEON-NOSVE-NEXT: mov v0.s[2], w8
+; NONEON-NOSVE-NEXT: msub w8, w9, w17, w18
+; NONEON-NOSVE-NEXT: mov v0.s[3], w8
+; NONEON-NOSVE-NEXT: ret
%res = urem <4 x i32> %op1, %op2
ret <4 x i32> %res
}
@@ -723,6 +2243,65 @@ define void @urem_v8i32(ptr %a, ptr %b) {
; CHECK-NEXT: mls z1.s, p0/m, z5.s, z3.s
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: urem_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: str x23, [sp, #-48]! // 8-byte Folded Spill
+; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
+; NONEON-NOSVE-NEXT: .cfi_offset w19, -8
+; NONEON-NOSVE-NEXT: .cfi_offset w20, -16
+; NONEON-NOSVE-NEXT: .cfi_offset w21, -24
+; NONEON-NOSVE-NEXT: .cfi_offset w22, -32
+; NONEON-NOSVE-NEXT: .cfi_offset w23, -48
+; NONEON-NOSVE-NEXT: ldp q2, q0, [x0]
+; NONEON-NOSVE-NEXT: ldp q3, q1, [x1]
+; NONEON-NOSVE-NEXT: fmov w12, s0
+; NONEON-NOSVE-NEXT: fmov w3, s2
+; NONEON-NOSVE-NEXT: mov w9, v0.s[1]
+; NONEON-NOSVE-NEXT: fmov w11, s1
+; NONEON-NOSVE-NEXT: fmov w2, s3
+; NONEON-NOSVE-NEXT: mov w8, v1.s[1]
+; NONEON-NOSVE-NEXT: mov w17, v3.s[1]
+; NONEON-NOSVE-NEXT: mov w18, v2.s[1]
+; NONEON-NOSVE-NEXT: mov w14, v1.s[2]
+; NONEON-NOSVE-NEXT: mov w15, v0.s[2]
+; NONEON-NOSVE-NEXT: mov w5, v3.s[2]
+; NONEON-NOSVE-NEXT: mov w6, v2.s[2]
+; NONEON-NOSVE-NEXT: udiv w13, w12, w11
+; NONEON-NOSVE-NEXT: mov w19, v3.s[3]
+; NONEON-NOSVE-NEXT: mov w20, v2.s[3]
+; NONEON-NOSVE-NEXT: mov w22, v1.s[3]
+; NONEON-NOSVE-NEXT: mov w23, v0.s[3]
+; NONEON-NOSVE-NEXT: udiv w4, w3, w2
+; NONEON-NOSVE-NEXT: msub w11, w13, w11, w12
+; NONEON-NOSVE-NEXT: fmov s1, w11
+; NONEON-NOSVE-NEXT: udiv w10, w9, w8
+; NONEON-NOSVE-NEXT: msub w12, w4, w2, w3
+; NONEON-NOSVE-NEXT: fmov s0, w12
+; NONEON-NOSVE-NEXT: udiv w1, w18, w17
+; NONEON-NOSVE-NEXT: msub w8, w10, w8, w9
+; NONEON-NOSVE-NEXT: mov v1.s[1], w8
+; NONEON-NOSVE-NEXT: udiv w16, w15, w14
+; NONEON-NOSVE-NEXT: msub w13, w1, w17, w18
+; NONEON-NOSVE-NEXT: mov v0.s[1], w13
+; NONEON-NOSVE-NEXT: udiv w7, w6, w5
+; NONEON-NOSVE-NEXT: msub w8, w16, w14, w15
+; NONEON-NOSVE-NEXT: mov v1.s[2], w8
+; NONEON-NOSVE-NEXT: udiv w21, w20, w19
+; NONEON-NOSVE-NEXT: msub w10, w7, w5, w6
+; NONEON-NOSVE-NEXT: mov v0.s[2], w10
+; NONEON-NOSVE-NEXT: udiv w9, w23, w22
+; NONEON-NOSVE-NEXT: msub w10, w21, w19, w20
+; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT: mov v0.s[3], w10
+; NONEON-NOSVE-NEXT: msub w8, w9, w22, w23
+; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload
+; NONEON-NOSVE-NEXT: mov v1.s[3], w8
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ldr x23, [sp], #48 // 8-byte Folded Reload
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x i32>, ptr %a
%op2 = load <8 x i32>, ptr %b
%res = urem <8 x i32> %op1, %op2
@@ -741,6 +2320,17 @@ define <1 x i64> @urem_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
; CHECK-NEXT: mls z0.d, p0/m, z2.d, z1.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: urem_v1i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $q1
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; NONEON-NOSVE-NEXT: fmov x8, d1
+; NONEON-NOSVE-NEXT: fmov x9, d0
+; NONEON-NOSVE-NEXT: udiv x10, x9, x8
+; NONEON-NOSVE-NEXT: msub x8, x10, x8, x9
+; NONEON-NOSVE-NEXT: fmov d0, x8
+; NONEON-NOSVE-NEXT: ret
%res = urem <1 x i64> %op1, %op2
ret <1 x i64> %res
}
@@ -756,6 +2346,20 @@ define <2 x i64> @urem_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
; CHECK-NEXT: mls z0.d, p0/m, z2.d, z1.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: urem_v2i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fmov x8, d1
+; NONEON-NOSVE-NEXT: fmov x9, d0
+; NONEON-NOSVE-NEXT: mov x11, v1.d[1]
+; NONEON-NOSVE-NEXT: mov x12, v0.d[1]
+; NONEON-NOSVE-NEXT: udiv x10, x9, x8
+; NONEON-NOSVE-NEXT: udiv x13, x12, x11
+; NONEON-NOSVE-NEXT: msub x8, x10, x8, x9
+; NONEON-NOSVE-NEXT: fmov d0, x8
+; NONEON-NOSVE-NEXT: msub x9, x13, x11, x12
+; NONEON-NOSVE-NEXT: mov v0.d[1], x9
+; NONEON-NOSVE-NEXT: ret
%res = urem <2 x i64> %op1, %op2
ret <2 x i64> %res
}
@@ -775,6 +2379,33 @@ define void @urem_v4i64(ptr %a, ptr %b) {
; CHECK-NEXT: mls z1.d, p0/m, z5.d, z3.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: urem_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q2, q0, [x0]
+; NONEON-NOSVE-NEXT: ldp q3, q1, [x1]
+; NONEON-NOSVE-NEXT: fmov x9, d0
+; NONEON-NOSVE-NEXT: fmov x15, d2
+; NONEON-NOSVE-NEXT: mov x12, v2.d[1]
+; NONEON-NOSVE-NEXT: fmov x8, d1
+; NONEON-NOSVE-NEXT: fmov x14, d3
+; NONEON-NOSVE-NEXT: mov x11, v3.d[1]
+; NONEON-NOSVE-NEXT: mov x17, v1.d[1]
+; NONEON-NOSVE-NEXT: mov x18, v0.d[1]
+; NONEON-NOSVE-NEXT: udiv x10, x9, x8
+; NONEON-NOSVE-NEXT: udiv x16, x15, x14
+; NONEON-NOSVE-NEXT: msub x8, x10, x8, x9
+; NONEON-NOSVE-NEXT: fmov d1, x8
+; NONEON-NOSVE-NEXT: udiv x13, x12, x11
+; NONEON-NOSVE-NEXT: msub x10, x16, x14, x15
+; NONEON-NOSVE-NEXT: fmov d0, x10
+; NONEON-NOSVE-NEXT: udiv x1, x18, x17
+; NONEON-NOSVE-NEXT: msub x9, x13, x11, x12
+; NONEON-NOSVE-NEXT: mov v0.d[1], x9
+; NONEON-NOSVE-NEXT: msub x11, x1, x17, x18
+; NONEON-NOSVE-NEXT: mov v1.d[1], x11
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x i64>, ptr %a
%op2 = load <4 x i64>, ptr %b
%res = urem <4 x i64> %op1, %op2
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-select.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-select.ll
index bfffe4b6315d7..0108fb580b947 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-select.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-select.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
@@ -16,6 +17,14 @@ define <4 x i8> @select_v4i8(<4 x i8> %op1, <4 x i8> %op2, i1 %mask) {
; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: select_v4i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: tst w0, #0x1
+; NONEON-NOSVE-NEXT: csetm w8, ne
+; NONEON-NOSVE-NEXT: dup v2.4h, w8
+; NONEON-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b
+; NONEON-NOSVE-NEXT: ret
%sel = select i1 %mask, <4 x i8> %op1, <4 x i8> %op2
ret <4 x i8> %sel
}
@@ -31,6 +40,14 @@ define <8 x i8> @select_v8i8(<8 x i8> %op1, <8 x i8> %op2, i1 %mask) {
; CHECK-NEXT: sel z0.b, p0, z0.b, z1.b
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: select_v8i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: tst w0, #0x1
+; NONEON-NOSVE-NEXT: csetm w8, ne
+; NONEON-NOSVE-NEXT: dup v2.8b, w8
+; NONEON-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b
+; NONEON-NOSVE-NEXT: ret
%sel = select i1 %mask, <8 x i8> %op1, <8 x i8> %op2
ret <8 x i8> %sel
}
@@ -46,6 +63,14 @@ define <16 x i8> @select_v16i8(<16 x i8> %op1, <16 x i8> %op2, i1 %mask) {
; CHECK-NEXT: sel z0.b, p0, z0.b, z1.b
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: select_v16i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: tst w0, #0x1
+; NONEON-NOSVE-NEXT: csetm w8, ne
+; NONEON-NOSVE-NEXT: dup v2.16b, w8
+; NONEON-NOSVE-NEXT: bif v0.16b, v1.16b, v2.16b
+; NONEON-NOSVE-NEXT: ret
%sel = select i1 %mask, <16 x i8> %op1, <16 x i8> %op2
ret <16 x i8> %sel
}
@@ -64,6 +89,20 @@ define void @select_v32i8(ptr %a, ptr %b, i1 %mask) {
; CHECK-NEXT: sel z1.b, p0, z1.b, z3.b
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: select_v32i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: tst w2, #0x1
+; NONEON-NOSVE-NEXT: ldr q1, [x0]
+; NONEON-NOSVE-NEXT: ldr q2, [x0, #16]
+; NONEON-NOSVE-NEXT: csetm w8, ne
+; NONEON-NOSVE-NEXT: ldr q3, [x1]
+; NONEON-NOSVE-NEXT: ldr q4, [x1, #16]
+; NONEON-NOSVE-NEXT: dup v0.16b, w8
+; NONEON-NOSVE-NEXT: bif v1.16b, v3.16b, v0.16b
+; NONEON-NOSVE-NEXT: bsl v0.16b, v2.16b, v4.16b
+; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load volatile <32 x i8>, ptr %a
%op2 = load volatile <32 x i8>, ptr %b
%sel = select i1 %mask, <32 x i8> %op1, <32 x i8> %op2
@@ -83,6 +122,14 @@ define <2 x i16> @select_v2i16(<2 x i16> %op1, <2 x i16> %op2, i1 %mask) {
; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: select_v2i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: tst w0, #0x1
+; NONEON-NOSVE-NEXT: csetm w8, ne
+; NONEON-NOSVE-NEXT: dup v2.2s, w8
+; NONEON-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b
+; NONEON-NOSVE-NEXT: ret
%sel = select i1 %mask, <2 x i16> %op1, <2 x i16> %op2
ret <2 x i16> %sel
}
@@ -99,6 +146,14 @@ define <4 x i16> @select_v4i16(<4 x i16> %op1, <4 x i16> %op2, i1 %mask) {
; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: select_v4i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: tst w0, #0x1
+; NONEON-NOSVE-NEXT: csetm w8, ne
+; NONEON-NOSVE-NEXT: dup v2.4h, w8
+; NONEON-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b
+; NONEON-NOSVE-NEXT: ret
%sel = select i1 %mask, <4 x i16> %op1, <4 x i16> %op2
ret <4 x i16> %sel
}
@@ -115,6 +170,14 @@ define <8 x i16> @select_v8i16(<8 x i16> %op1, <8 x i16> %op2, i1 %mask) {
; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: select_v8i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: tst w0, #0x1
+; NONEON-NOSVE-NEXT: csetm w8, ne
+; NONEON-NOSVE-NEXT: dup v2.8h, w8
+; NONEON-NOSVE-NEXT: bif v0.16b, v1.16b, v2.16b
+; NONEON-NOSVE-NEXT: ret
%sel = select i1 %mask, <8 x i16> %op1, <8 x i16> %op2
ret <8 x i16> %sel
}
@@ -134,6 +197,20 @@ define void @select_v16i16(ptr %a, ptr %b, i1 %mask) {
; CHECK-NEXT: sel z1.h, p0, z1.h, z3.h
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: select_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: tst w2, #0x1
+; NONEON-NOSVE-NEXT: ldr q1, [x0]
+; NONEON-NOSVE-NEXT: ldr q2, [x0, #16]
+; NONEON-NOSVE-NEXT: csetm w8, ne
+; NONEON-NOSVE-NEXT: ldr q3, [x1]
+; NONEON-NOSVE-NEXT: ldr q4, [x1, #16]
+; NONEON-NOSVE-NEXT: dup v0.8h, w8
+; NONEON-NOSVE-NEXT: bif v1.16b, v3.16b, v0.16b
+; NONEON-NOSVE-NEXT: bsl v0.16b, v2.16b, v4.16b
+; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load volatile <16 x i16>, ptr %a
%op2 = load volatile <16 x i16>, ptr %b
%sel = select i1 %mask, <16 x i16> %op1, <16 x i16> %op2
@@ -153,6 +230,14 @@ define <2 x i32> @select_v2i32(<2 x i32> %op1, <2 x i32> %op2, i1 %mask) {
; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: select_v2i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: tst w0, #0x1
+; NONEON-NOSVE-NEXT: csetm w8, ne
+; NONEON-NOSVE-NEXT: dup v2.2s, w8
+; NONEON-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b
+; NONEON-NOSVE-NEXT: ret
%sel = select i1 %mask, <2 x i32> %op1, <2 x i32> %op2
ret <2 x i32> %sel
}
@@ -169,6 +254,14 @@ define <4 x i32> @select_v4i32(<4 x i32> %op1, <4 x i32> %op2, i1 %mask) {
; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: select_v4i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: tst w0, #0x1
+; NONEON-NOSVE-NEXT: csetm w8, ne
+; NONEON-NOSVE-NEXT: dup v2.4s, w8
+; NONEON-NOSVE-NEXT: bif v0.16b, v1.16b, v2.16b
+; NONEON-NOSVE-NEXT: ret
%sel = select i1 %mask, <4 x i32> %op1, <4 x i32> %op2
ret <4 x i32> %sel
}
@@ -188,6 +281,20 @@ define void @select_v8i32(ptr %a, ptr %b, i1 %mask) {
; CHECK-NEXT: sel z1.s, p0, z1.s, z3.s
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: select_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: tst w2, #0x1
+; NONEON-NOSVE-NEXT: ldr q1, [x0]
+; NONEON-NOSVE-NEXT: ldr q2, [x0, #16]
+; NONEON-NOSVE-NEXT: csetm w8, ne
+; NONEON-NOSVE-NEXT: ldr q3, [x1]
+; NONEON-NOSVE-NEXT: ldr q4, [x1, #16]
+; NONEON-NOSVE-NEXT: dup v0.4s, w8
+; NONEON-NOSVE-NEXT: bif v1.16b, v3.16b, v0.16b
+; NONEON-NOSVE-NEXT: bsl v0.16b, v2.16b, v4.16b
+; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load volatile <8 x i32>, ptr %a
%op2 = load volatile <8 x i32>, ptr %b
%sel = select i1 %mask, <8 x i32> %op1, <8 x i32> %op2
@@ -208,6 +315,14 @@ define <1 x i64> @select_v1i64(<1 x i64> %op1, <1 x i64> %op2, i1 %mask) {
; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: select_v1i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: tst w0, #0x1
+; NONEON-NOSVE-NEXT: csetm x8, ne
+; NONEON-NOSVE-NEXT: fmov d2, x8
+; NONEON-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b
+; NONEON-NOSVE-NEXT: ret
%sel = select i1 %mask, <1 x i64> %op1, <1 x i64> %op2
ret <1 x i64> %sel
}
@@ -225,6 +340,14 @@ define <2 x i64> @select_v2i64(<2 x i64> %op1, <2 x i64> %op2, i1 %mask) {
; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: select_v2i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: tst w0, #0x1
+; NONEON-NOSVE-NEXT: csetm x8, ne
+; NONEON-NOSVE-NEXT: dup v2.2d, x8
+; NONEON-NOSVE-NEXT: bif v0.16b, v1.16b, v2.16b
+; NONEON-NOSVE-NEXT: ret
%sel = select i1 %mask, <2 x i64> %op1, <2 x i64> %op2
ret <2 x i64> %sel
}
@@ -245,6 +368,20 @@ define void @select_v4i64(ptr %a, ptr %b, i1 %mask) {
; CHECK-NEXT: sel z1.d, p0, z1.d, z3.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: select_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: tst w2, #0x1
+; NONEON-NOSVE-NEXT: ldr q1, [x0]
+; NONEON-NOSVE-NEXT: ldr q2, [x0, #16]
+; NONEON-NOSVE-NEXT: csetm x8, ne
+; NONEON-NOSVE-NEXT: ldr q3, [x1]
+; NONEON-NOSVE-NEXT: ldr q4, [x1, #16]
+; NONEON-NOSVE-NEXT: dup v0.2d, x8
+; NONEON-NOSVE-NEXT: bif v1.16b, v3.16b, v0.16b
+; NONEON-NOSVE-NEXT: bsl v0.16b, v2.16b, v4.16b
+; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load volatile <4 x i64>, ptr %a
%op2 = load volatile <4 x i64>, ptr %b
%sel = select i1 %mask, <4 x i64> %op1, <4 x i64> %op2
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll
index 9319bd69c25fb..f7198e3042ad5 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
@@ -19,6 +20,16 @@ define <4 x i8> @ashr_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
; CHECK-NEXT: asr z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ashr_v4i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: movi d2, #0xff00ff00ff00ff
+; NONEON-NOSVE-NEXT: shl v0.4h, v0.4h, #8
+; NONEON-NOSVE-NEXT: sshr v0.4h, v0.4h, #8
+; NONEON-NOSVE-NEXT: and v1.8b, v1.8b, v2.8b
+; NONEON-NOSVE-NEXT: neg v1.4h, v1.4h
+; NONEON-NOSVE-NEXT: sshl v0.4h, v0.4h, v1.4h
+; NONEON-NOSVE-NEXT: ret
%res = ashr <4 x i8> %op1, %op2
ret <4 x i8> %res
}
@@ -32,6 +43,12 @@ define <8 x i8> @ashr_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
; CHECK-NEXT: asr z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ashr_v8i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: neg v1.8b, v1.8b
+; NONEON-NOSVE-NEXT: sshl v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: ret
%res = ashr <8 x i8> %op1, %op2
ret <8 x i8> %res
}
@@ -45,6 +62,12 @@ define <16 x i8> @ashr_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
; CHECK-NEXT: asr z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ashr_v16i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: neg v1.16b, v1.16b
+; NONEON-NOSVE-NEXT: sshl v0.16b, v0.16b, v1.16b
+; NONEON-NOSVE-NEXT: ret
%res = ashr <16 x i8> %op1, %op2
ret <16 x i8> %res
}
@@ -60,6 +83,17 @@ define void @ashr_v32i8(ptr %a, ptr %b) {
; CHECK-NEXT: asr z1.b, p0/m, z1.b, z3.b
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ashr_v32i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x1]
+; NONEON-NOSVE-NEXT: ldp q2, q3, [x0]
+; NONEON-NOSVE-NEXT: neg v0.16b, v0.16b
+; NONEON-NOSVE-NEXT: neg v1.16b, v1.16b
+; NONEON-NOSVE-NEXT: sshl v0.16b, v2.16b, v0.16b
+; NONEON-NOSVE-NEXT: sshl v1.16b, v3.16b, v1.16b
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <32 x i8>, ptr %a
%op2 = load <32 x i8>, ptr %b
%res = ashr <32 x i8> %op1, %op2
@@ -78,6 +112,16 @@ define <2 x i16> @ashr_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
; CHECK-NEXT: asr z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ashr_v2i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: movi d2, #0x00ffff0000ffff
+; NONEON-NOSVE-NEXT: shl v0.2s, v0.2s, #16
+; NONEON-NOSVE-NEXT: sshr v0.2s, v0.2s, #16
+; NONEON-NOSVE-NEXT: and v1.8b, v1.8b, v2.8b
+; NONEON-NOSVE-NEXT: neg v1.2s, v1.2s
+; NONEON-NOSVE-NEXT: sshl v0.2s, v0.2s, v1.2s
+; NONEON-NOSVE-NEXT: ret
%res = ashr <2 x i16> %op1, %op2
ret <2 x i16> %res
}
@@ -91,6 +135,12 @@ define <4 x i16> @ashr_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
; CHECK-NEXT: asr z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ashr_v4i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: neg v1.4h, v1.4h
+; NONEON-NOSVE-NEXT: sshl v0.4h, v0.4h, v1.4h
+; NONEON-NOSVE-NEXT: ret
%res = ashr <4 x i16> %op1, %op2
ret <4 x i16> %res
}
@@ -104,6 +154,12 @@ define <8 x i16> @ashr_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
; CHECK-NEXT: asr z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ashr_v8i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: neg v1.8h, v1.8h
+; NONEON-NOSVE-NEXT: sshl v0.8h, v0.8h, v1.8h
+; NONEON-NOSVE-NEXT: ret
%res = ashr <8 x i16> %op1, %op2
ret <8 x i16> %res
}
@@ -119,6 +175,17 @@ define void @ashr_v16i16(ptr %a, ptr %b) {
; CHECK-NEXT: asr z1.h, p0/m, z1.h, z3.h
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ashr_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x1]
+; NONEON-NOSVE-NEXT: ldp q2, q3, [x0]
+; NONEON-NOSVE-NEXT: neg v0.8h, v0.8h
+; NONEON-NOSVE-NEXT: neg v1.8h, v1.8h
+; NONEON-NOSVE-NEXT: sshl v0.8h, v2.8h, v0.8h
+; NONEON-NOSVE-NEXT: sshl v1.8h, v3.8h, v1.8h
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x i16>, ptr %a
%op2 = load <16 x i16>, ptr %b
%res = ashr <16 x i16> %op1, %op2
@@ -135,6 +202,12 @@ define <2 x i32> @ashr_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
; CHECK-NEXT: asr z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ashr_v2i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: neg v1.2s, v1.2s
+; NONEON-NOSVE-NEXT: sshl v0.2s, v0.2s, v1.2s
+; NONEON-NOSVE-NEXT: ret
%res = ashr <2 x i32> %op1, %op2
ret <2 x i32> %res
}
@@ -148,6 +221,12 @@ define <4 x i32> @ashr_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
; CHECK-NEXT: asr z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ashr_v4i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: neg v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: sshl v0.4s, v0.4s, v1.4s
+; NONEON-NOSVE-NEXT: ret
%res = ashr <4 x i32> %op1, %op2
ret <4 x i32> %res
}
@@ -163,6 +242,17 @@ define void @ashr_v8i32(ptr %a, ptr %b) {
; CHECK-NEXT: asr z1.s, p0/m, z1.s, z3.s
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ashr_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x1]
+; NONEON-NOSVE-NEXT: ldp q2, q3, [x0]
+; NONEON-NOSVE-NEXT: neg v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: neg v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: sshl v0.4s, v2.4s, v0.4s
+; NONEON-NOSVE-NEXT: sshl v1.4s, v3.4s, v1.4s
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x i32>, ptr %a
%op2 = load <8 x i32>, ptr %b
%res = ashr <8 x i32> %op1, %op2
@@ -179,6 +269,12 @@ define <1 x i64> @ashr_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
; CHECK-NEXT: asr z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ashr_v1i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: neg d1, d1
+; NONEON-NOSVE-NEXT: sshl d0, d0, d1
+; NONEON-NOSVE-NEXT: ret
%res = ashr <1 x i64> %op1, %op2
ret <1 x i64> %res
}
@@ -192,6 +288,12 @@ define <2 x i64> @ashr_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
; CHECK-NEXT: asr z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ashr_v2i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: neg v1.2d, v1.2d
+; NONEON-NOSVE-NEXT: sshl v0.2d, v0.2d, v1.2d
+; NONEON-NOSVE-NEXT: ret
%res = ashr <2 x i64> %op1, %op2
ret <2 x i64> %res
}
@@ -207,6 +309,17 @@ define void @ashr_v4i64(ptr %a, ptr %b) {
; CHECK-NEXT: asr z1.d, p0/m, z1.d, z3.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ashr_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x1]
+; NONEON-NOSVE-NEXT: ldp q2, q3, [x0]
+; NONEON-NOSVE-NEXT: neg v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: neg v1.2d, v1.2d
+; NONEON-NOSVE-NEXT: sshl v0.2d, v2.2d, v0.2d
+; NONEON-NOSVE-NEXT: sshl v1.2d, v3.2d, v1.2d
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x i64>, ptr %a
%op2 = load <4 x i64>, ptr %b
%res = ashr <4 x i64> %op1, %op2
@@ -229,6 +342,15 @@ define <4 x i8> @lshr_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
; CHECK-NEXT: lsr z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: lshr_v4i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: movi d2, #0xff00ff00ff00ff
+; NONEON-NOSVE-NEXT: and v1.8b, v1.8b, v2.8b
+; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v2.8b
+; NONEON-NOSVE-NEXT: neg v1.4h, v1.4h
+; NONEON-NOSVE-NEXT: ushl v0.4h, v0.4h, v1.4h
+; NONEON-NOSVE-NEXT: ret
%res = lshr <4 x i8> %op1, %op2
ret <4 x i8> %res
}
@@ -242,6 +364,12 @@ define <8 x i8> @lshr_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
; CHECK-NEXT: lsr z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: lshr_v8i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: neg v1.8b, v1.8b
+; NONEON-NOSVE-NEXT: ushl v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: ret
%res = lshr <8 x i8> %op1, %op2
ret <8 x i8> %res
}
@@ -255,6 +383,12 @@ define <16 x i8> @lshr_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
; CHECK-NEXT: lsr z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: lshr_v16i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: neg v1.16b, v1.16b
+; NONEON-NOSVE-NEXT: ushl v0.16b, v0.16b, v1.16b
+; NONEON-NOSVE-NEXT: ret
%res = lshr <16 x i8> %op1, %op2
ret <16 x i8> %res
}
@@ -270,6 +404,17 @@ define void @lshr_v32i8(ptr %a, ptr %b) {
; CHECK-NEXT: lsr z1.b, p0/m, z1.b, z3.b
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: lshr_v32i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x1]
+; NONEON-NOSVE-NEXT: ldp q2, q3, [x0]
+; NONEON-NOSVE-NEXT: neg v0.16b, v0.16b
+; NONEON-NOSVE-NEXT: neg v1.16b, v1.16b
+; NONEON-NOSVE-NEXT: ushl v0.16b, v2.16b, v0.16b
+; NONEON-NOSVE-NEXT: ushl v1.16b, v3.16b, v1.16b
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <32 x i8>, ptr %a
%op2 = load <32 x i8>, ptr %b
%res = lshr <32 x i8> %op1, %op2
@@ -288,6 +433,15 @@ define <2 x i16> @lshr_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
; CHECK-NEXT: lsr z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: lshr_v2i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: movi d2, #0x00ffff0000ffff
+; NONEON-NOSVE-NEXT: and v1.8b, v1.8b, v2.8b
+; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v2.8b
+; NONEON-NOSVE-NEXT: neg v1.2s, v1.2s
+; NONEON-NOSVE-NEXT: ushl v0.2s, v0.2s, v1.2s
+; NONEON-NOSVE-NEXT: ret
%res = lshr <2 x i16> %op1, %op2
ret <2 x i16> %res
}
@@ -301,6 +455,12 @@ define <4 x i16> @lshr_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
; CHECK-NEXT: lsr z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: lshr_v4i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: neg v1.4h, v1.4h
+; NONEON-NOSVE-NEXT: ushl v0.4h, v0.4h, v1.4h
+; NONEON-NOSVE-NEXT: ret
%res = lshr <4 x i16> %op1, %op2
ret <4 x i16> %res
}
@@ -314,6 +474,12 @@ define <8 x i16> @lshr_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
; CHECK-NEXT: lsr z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: lshr_v8i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: neg v1.8h, v1.8h
+; NONEON-NOSVE-NEXT: ushl v0.8h, v0.8h, v1.8h
+; NONEON-NOSVE-NEXT: ret
%res = lshr <8 x i16> %op1, %op2
ret <8 x i16> %res
}
@@ -329,6 +495,17 @@ define void @lshr_v16i16(ptr %a, ptr %b) {
; CHECK-NEXT: lsr z1.h, p0/m, z1.h, z3.h
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: lshr_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x1]
+; NONEON-NOSVE-NEXT: ldp q2, q3, [x0]
+; NONEON-NOSVE-NEXT: neg v0.8h, v0.8h
+; NONEON-NOSVE-NEXT: neg v1.8h, v1.8h
+; NONEON-NOSVE-NEXT: ushl v0.8h, v2.8h, v0.8h
+; NONEON-NOSVE-NEXT: ushl v1.8h, v3.8h, v1.8h
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x i16>, ptr %a
%op2 = load <16 x i16>, ptr %b
%res = lshr <16 x i16> %op1, %op2
@@ -345,6 +522,12 @@ define <2 x i32> @lshr_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
; CHECK-NEXT: lsr z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: lshr_v2i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: neg v1.2s, v1.2s
+; NONEON-NOSVE-NEXT: ushl v0.2s, v0.2s, v1.2s
+; NONEON-NOSVE-NEXT: ret
%res = lshr <2 x i32> %op1, %op2
ret <2 x i32> %res
}
@@ -358,6 +541,12 @@ define <4 x i32> @lshr_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
; CHECK-NEXT: lsr z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: lshr_v4i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: neg v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: ushl v0.4s, v0.4s, v1.4s
+; NONEON-NOSVE-NEXT: ret
%res = lshr <4 x i32> %op1, %op2
ret <4 x i32> %res
}
@@ -373,6 +562,17 @@ define void @lshr_v8i32(ptr %a, ptr %b) {
; CHECK-NEXT: lsr z1.s, p0/m, z1.s, z3.s
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: lshr_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x1]
+; NONEON-NOSVE-NEXT: ldp q2, q3, [x0]
+; NONEON-NOSVE-NEXT: neg v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: neg v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: ushl v0.4s, v2.4s, v0.4s
+; NONEON-NOSVE-NEXT: ushl v1.4s, v3.4s, v1.4s
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x i32>, ptr %a
%op2 = load <8 x i32>, ptr %b
%res = lshr <8 x i32> %op1, %op2
@@ -389,6 +589,12 @@ define <1 x i64> @lshr_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
; CHECK-NEXT: lsr z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: lshr_v1i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: neg d1, d1
+; NONEON-NOSVE-NEXT: ushl d0, d0, d1
+; NONEON-NOSVE-NEXT: ret
%res = lshr <1 x i64> %op1, %op2
ret <1 x i64> %res
}
@@ -402,6 +608,12 @@ define <2 x i64> @lshr_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
; CHECK-NEXT: lsr z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: lshr_v2i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: neg v1.2d, v1.2d
+; NONEON-NOSVE-NEXT: ushl v0.2d, v0.2d, v1.2d
+; NONEON-NOSVE-NEXT: ret
%res = lshr <2 x i64> %op1, %op2
ret <2 x i64> %res
}
@@ -417,6 +629,17 @@ define void @lshr_v4i64(ptr %a, ptr %b) {
; CHECK-NEXT: lsr z1.d, p0/m, z1.d, z3.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: lshr_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x1]
+; NONEON-NOSVE-NEXT: ldp q2, q3, [x0]
+; NONEON-NOSVE-NEXT: neg v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: neg v1.2d, v1.2d
+; NONEON-NOSVE-NEXT: ushl v0.2d, v2.2d, v0.2d
+; NONEON-NOSVE-NEXT: ushl v1.2d, v3.2d, v1.2d
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x i64>, ptr %a
%op2 = load <4 x i64>, ptr %b
%res = lshr <4 x i64> %op1, %op2
@@ -438,6 +661,13 @@ define <2 x i8> @shl_v2i8(<2 x i8> %op1, <2 x i8> %op2) {
; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: shl_v2i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: movi d2, #0x0000ff000000ff
+; NONEON-NOSVE-NEXT: and v1.8b, v1.8b, v2.8b
+; NONEON-NOSVE-NEXT: ushl v0.2s, v0.2s, v1.2s
+; NONEON-NOSVE-NEXT: ret
%res = shl <2 x i8> %op1, %op2
ret <2 x i8> %res
}
@@ -452,6 +682,13 @@ define <4 x i8> @shl_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
; CHECK-NEXT: lsl z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: shl_v4i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: movi d2, #0xff00ff00ff00ff
+; NONEON-NOSVE-NEXT: and v1.8b, v1.8b, v2.8b
+; NONEON-NOSVE-NEXT: ushl v0.4h, v0.4h, v1.4h
+; NONEON-NOSVE-NEXT: ret
%res = shl <4 x i8> %op1, %op2
ret <4 x i8> %res
}
@@ -465,6 +702,11 @@ define <8 x i8> @shl_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
; CHECK-NEXT: lsl z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: shl_v8i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ushl v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: ret
%res = shl <8 x i8> %op1, %op2
ret <8 x i8> %res
}
@@ -478,6 +720,11 @@ define <16 x i8> @shl_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
; CHECK-NEXT: lsl z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: shl_v16i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ushl v0.16b, v0.16b, v1.16b
+; NONEON-NOSVE-NEXT: ret
%res = shl <16 x i8> %op1, %op2
ret <16 x i8> %res
}
@@ -493,6 +740,15 @@ define void @shl_v32i8(ptr %a, ptr %b) {
; CHECK-NEXT: lsl z1.b, p0/m, z1.b, z3.b
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: shl_v32i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: ushl v0.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: ushl v1.16b, v2.16b, v3.16b
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <32 x i8>, ptr %a
%op2 = load <32 x i8>, ptr %b
%res = shl <32 x i8> %op1, %op2
@@ -509,6 +765,11 @@ define <4 x i16> @shl_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
; CHECK-NEXT: lsl z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: shl_v4i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ushl v0.4h, v0.4h, v1.4h
+; NONEON-NOSVE-NEXT: ret
%res = shl <4 x i16> %op1, %op2
ret <4 x i16> %res
}
@@ -522,6 +783,11 @@ define <8 x i16> @shl_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
; CHECK-NEXT: lsl z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: shl_v8i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ushl v0.8h, v0.8h, v1.8h
+; NONEON-NOSVE-NEXT: ret
%res = shl <8 x i16> %op1, %op2
ret <8 x i16> %res
}
@@ -537,6 +803,15 @@ define void @shl_v16i16(ptr %a, ptr %b) {
; CHECK-NEXT: lsl z1.h, p0/m, z1.h, z3.h
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: shl_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: ushl v0.8h, v1.8h, v0.8h
+; NONEON-NOSVE-NEXT: ushl v1.8h, v2.8h, v3.8h
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x i16>, ptr %a
%op2 = load <16 x i16>, ptr %b
%res = shl <16 x i16> %op1, %op2
@@ -553,6 +828,11 @@ define <2 x i32> @shl_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: shl_v2i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ushl v0.2s, v0.2s, v1.2s
+; NONEON-NOSVE-NEXT: ret
%res = shl <2 x i32> %op1, %op2
ret <2 x i32> %res
}
@@ -566,6 +846,11 @@ define <4 x i32> @shl_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: shl_v4i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ushl v0.4s, v0.4s, v1.4s
+; NONEON-NOSVE-NEXT: ret
%res = shl <4 x i32> %op1, %op2
ret <4 x i32> %res
}
@@ -581,6 +866,15 @@ define void @shl_v8i32(ptr %a, ptr %b) {
; CHECK-NEXT: lsl z1.s, p0/m, z1.s, z3.s
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: shl_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: ushl v0.4s, v1.4s, v0.4s
+; NONEON-NOSVE-NEXT: ushl v1.4s, v2.4s, v3.4s
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x i32>, ptr %a
%op2 = load <8 x i32>, ptr %b
%res = shl <8 x i32> %op1, %op2
@@ -597,6 +891,11 @@ define <1 x i64> @shl_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: shl_v1i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ushl d0, d0, d1
+; NONEON-NOSVE-NEXT: ret
%res = shl <1 x i64> %op1, %op2
ret <1 x i64> %res
}
@@ -610,6 +909,11 @@ define <2 x i64> @shl_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: shl_v2i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ushl v0.2d, v0.2d, v1.2d
+; NONEON-NOSVE-NEXT: ret
%res = shl <2 x i64> %op1, %op2
ret <2 x i64> %res
}
@@ -625,6 +929,15 @@ define void @shl_v4i64(ptr %a, ptr %b) {
; CHECK-NEXT: lsl z1.d, p0/m, z1.d, z3.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: shl_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: ushl v0.2d, v1.2d, v0.2d
+; NONEON-NOSVE-NEXT: ushl v1.2d, v2.2d, v3.2d
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x i64>, ptr %a
%op2 = load <4 x i64>, ptr %b
%res = shl <4 x i64> %op1, %op2
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll
index 27dbfc9a23a8d..42d3b9d8f71f8 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
@@ -15,6 +16,13 @@ define <4 x half> @ucvtf_v4i16_v4f16(<4 x i16> %op1) {
; CHECK-NEXT: ucvtf z0.h, p0/m, z0.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ucvtf_v4i16_v4f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0
+; NONEON-NOSVE-NEXT: ucvtf v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s
+; NONEON-NOSVE-NEXT: ret
%res = uitofp <4 x i16> %op1 to <4 x half>
ret <4 x half> %res
}
@@ -27,6 +35,22 @@ define void @ucvtf_v8i16_v8f16(ptr %a, ptr %b) {
; CHECK-NEXT: ucvtf z0.h, p0/m, z0.h
; CHECK-NEXT: str q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ucvtf_v8i16_v8f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: ushll v1.4s, v0.4h, #0
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
+; NONEON-NOSVE-NEXT: ucvtf v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0
+; NONEON-NOSVE-NEXT: fcvtn v1.4h, v1.4s
+; NONEON-NOSVE-NEXT: ucvtf v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: fcvtn2 v1.8h, v0.4s
+; NONEON-NOSVE-NEXT: str q1, [x1]
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x i16>, ptr %a
%res = uitofp <8 x i16> %op1 to <8 x half>
store <8 x half> %res, ptr %b
@@ -42,6 +66,29 @@ define void @ucvtf_v16i16_v16f16(ptr %a, ptr %b) {
; CHECK-NEXT: ucvtf z1.h, p0/m, z1.h
; CHECK-NEXT: stp q0, q1, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ucvtf_v16i16_v16f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-32]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
+; NONEON-NOSVE-NEXT: ushll v2.4s, v0.4h, #0
+; NONEON-NOSVE-NEXT: ushll v0.4s, v1.4h, #0
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: ldr d3, [sp, #24]
+; NONEON-NOSVE-NEXT: ushll v1.4s, v1.4h, #0
+; NONEON-NOSVE-NEXT: ucvtf v2.4s, v2.4s
+; NONEON-NOSVE-NEXT: ucvtf v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: ushll v3.4s, v3.4h, #0
+; NONEON-NOSVE-NEXT: ucvtf v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: ucvtf v3.4s, v3.4s
+; NONEON-NOSVE-NEXT: fcvtn v2.4h, v2.4s
+; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s
+; NONEON-NOSVE-NEXT: fcvtn2 v2.8h, v1.4s
+; NONEON-NOSVE-NEXT: fcvtn2 v0.8h, v3.4s
+; NONEON-NOSVE-NEXT: stp q2, q0, [x1]
+; NONEON-NOSVE-NEXT: add sp, sp, #32
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x i16>, ptr %a
%res = uitofp <16 x i16> %op1 to <16 x half>
store <16 x half> %res, ptr %b
@@ -61,6 +108,13 @@ define <2 x float> @ucvtf_v2i16_v2f32(<2 x i16> %op1) {
; CHECK-NEXT: ucvtf z0.s, p0/m, z0.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ucvtf_v2i16_v2f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: movi d1, #0x00ffff0000ffff
+; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: ucvtf v0.2s, v0.2s
+; NONEON-NOSVE-NEXT: ret
%res = uitofp <2 x i16> %op1 to <2 x float>
ret <2 x float> %res
}
@@ -74,6 +128,12 @@ define <4 x float> @ucvtf_v4i16_v4f32(<4 x i16> %op1) {
; CHECK-NEXT: ucvtf z0.s, p0/m, z0.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ucvtf_v4i16_v4f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0
+; NONEON-NOSVE-NEXT: ucvtf v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: ret
%res = uitofp <4 x i16> %op1 to <4 x float>
ret <4 x float> %res
}
@@ -90,6 +150,20 @@ define void @ucvtf_v8i16_v8f32(ptr %a, ptr %b) {
; CHECK-NEXT: ucvtf z0.s, p0/m, z0.s
; CHECK-NEXT: stp q1, q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ucvtf_v8i16_v8f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0
+; NONEON-NOSVE-NEXT: ushll v1.4s, v1.4h, #0
+; NONEON-NOSVE-NEXT: ucvtf v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: ucvtf v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: stp q0, q1, [x1]
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x i16>, ptr %a
%res = uitofp <8 x i16> %op1 to <8 x float>
store <8 x float> %res, ptr %b
@@ -114,6 +188,26 @@ define void @ucvtf_v16i16_v16f32(ptr %a, ptr %b) {
; CHECK-NEXT: stp q2, q0, [x1, #32]
; CHECK-NEXT: stp q3, q1, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ucvtf_v16i16_v16f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-32]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
+; NONEON-NOSVE-NEXT: ldr d2, [sp, #24]
+; NONEON-NOSVE-NEXT: ldr d3, [sp, #8]
+; NONEON-NOSVE-NEXT: ushll v1.4s, v1.4h, #0
+; NONEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0
+; NONEON-NOSVE-NEXT: ushll v2.4s, v2.4h, #0
+; NONEON-NOSVE-NEXT: ushll v3.4s, v3.4h, #0
+; NONEON-NOSVE-NEXT: ucvtf v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: ucvtf v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: ucvtf v2.4s, v2.4s
+; NONEON-NOSVE-NEXT: ucvtf v3.4s, v3.4s
+; NONEON-NOSVE-NEXT: stp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: stp q1, q2, [x1, #32]
+; NONEON-NOSVE-NEXT: add sp, sp, #32
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x i16>, ptr %a
%res = uitofp <16 x i16> %op1 to <16 x float>
store <16 x float> %res, ptr %b
@@ -132,6 +226,13 @@ define <1 x double> @ucvtf_v1i16_v1f64(<1 x i16> %op1) {
; CHECK-NEXT: and w8, w8, #0xffff
; CHECK-NEXT: ucvtf d0, w8
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ucvtf_v1i16_v1f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; NONEON-NOSVE-NEXT: umov w8, v0.h[0]
+; NONEON-NOSVE-NEXT: ucvtf d0, w8
+; NONEON-NOSVE-NEXT: ret
%res = uitofp <1 x i16> %op1 to <1 x double>
ret <1 x double> %res
}
@@ -146,6 +247,14 @@ define <2 x double> @ucvtf_v2i16_v2f64(<2 x i16> %op1) {
; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ucvtf_v2i16_v2f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: movi d1, #0x00ffff0000ffff
+; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: ushll v0.2d, v0.2s, #0
+; NONEON-NOSVE-NEXT: ucvtf v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: ret
%res = uitofp <2 x i16> %op1 to <2 x double>
ret <2 x double> %res
}
@@ -163,6 +272,21 @@ define void @ucvtf_v4i16_v4f64(ptr %a, ptr %b) {
; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d
; CHECK-NEXT: stp q1, q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ucvtf_v4i16_v4f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr d0, [x0]
+; NONEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: ushll v0.2d, v0.2s, #0
+; NONEON-NOSVE-NEXT: ushll v1.2d, v1.2s, #0
+; NONEON-NOSVE-NEXT: ucvtf v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: ucvtf v1.2d, v1.2d
+; NONEON-NOSVE-NEXT: stp q0, q1, [x1]
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x i16>, ptr %a
%res = uitofp <4 x i16> %op1 to <4 x double>
store <4 x double> %res, ptr %b
@@ -191,6 +315,30 @@ define void @ucvtf_v8i16_v8f64(ptr %a, ptr %b) {
; CHECK-NEXT: stp q1, q3, [x1]
; CHECK-NEXT: stp q2, q0, [x1, #32]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ucvtf_v8i16_v8f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: str q0, [sp, #-48]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0
+; NONEON-NOSVE-NEXT: ushll v1.4s, v1.4h, #0
+; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #16]
+; NONEON-NOSVE-NEXT: ushll v0.2d, v0.2s, #0
+; NONEON-NOSVE-NEXT: ushll v1.2d, v1.2s, #0
+; NONEON-NOSVE-NEXT: ldr d2, [sp, #40]
+; NONEON-NOSVE-NEXT: ldr d3, [sp, #24]
+; NONEON-NOSVE-NEXT: ushll v2.2d, v2.2s, #0
+; NONEON-NOSVE-NEXT: ushll v3.2d, v3.2s, #0
+; NONEON-NOSVE-NEXT: ucvtf v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: ucvtf v1.2d, v1.2d
+; NONEON-NOSVE-NEXT: ucvtf v2.2d, v2.2d
+; NONEON-NOSVE-NEXT: ucvtf v3.2d, v3.2d
+; NONEON-NOSVE-NEXT: stp q0, q2, [x1]
+; NONEON-NOSVE-NEXT: stp q1, q3, [x1, #32]
+; NONEON-NOSVE-NEXT: add sp, sp, #48
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x i16>, ptr %a
%res = uitofp <8 x i16> %op1 to <8 x double>
store <8 x double> %res, ptr %b
@@ -239,6 +387,46 @@ define void @ucvtf_v16i16_v16f64(ptr %a, ptr %b) {
; CHECK-NEXT: stp q1, q2, [x1, #32]
; CHECK-NEXT: stp q3, q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ucvtf_v16i16_v16f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-96]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
+; NONEON-NOSVE-NEXT: ldr d2, [sp, #8]
+; NONEON-NOSVE-NEXT: ldr d3, [sp, #24]
+; NONEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0
+; NONEON-NOSVE-NEXT: ushll v1.4s, v1.4h, #0
+; NONEON-NOSVE-NEXT: ushll v2.4s, v2.4h, #0
+; NONEON-NOSVE-NEXT: ushll v3.4s, v3.4h, #0
+; NONEON-NOSVE-NEXT: stp q2, q0, [sp, #32]
+; NONEON-NOSVE-NEXT: ushll v0.2d, v0.2s, #0
+; NONEON-NOSVE-NEXT: ushll v2.2d, v2.2s, #0
+; NONEON-NOSVE-NEXT: stp q3, q1, [sp, #64]
+; NONEON-NOSVE-NEXT: ldr d5, [sp, #56]
+; NONEON-NOSVE-NEXT: ushll v1.2d, v1.2s, #0
+; NONEON-NOSVE-NEXT: ldr d4, [sp, #88]
+; NONEON-NOSVE-NEXT: ldr d6, [sp, #72]
+; NONEON-NOSVE-NEXT: ldr d7, [sp, #40]
+; NONEON-NOSVE-NEXT: ushll v5.2d, v5.2s, #0
+; NONEON-NOSVE-NEXT: ucvtf v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: ushll v3.2d, v3.2s, #0
+; NONEON-NOSVE-NEXT: ushll v4.2d, v4.2s, #0
+; NONEON-NOSVE-NEXT: ucvtf v1.2d, v1.2d
+; NONEON-NOSVE-NEXT: ushll v6.2d, v6.2s, #0
+; NONEON-NOSVE-NEXT: ushll v7.2d, v7.2s, #0
+; NONEON-NOSVE-NEXT: ucvtf v2.2d, v2.2d
+; NONEON-NOSVE-NEXT: ucvtf v5.2d, v5.2d
+; NONEON-NOSVE-NEXT: ucvtf v3.2d, v3.2d
+; NONEON-NOSVE-NEXT: ucvtf v4.2d, v4.2d
+; NONEON-NOSVE-NEXT: stp q0, q5, [x1]
+; NONEON-NOSVE-NEXT: ucvtf v0.2d, v7.2d
+; NONEON-NOSVE-NEXT: stp q1, q4, [x1, #64]
+; NONEON-NOSVE-NEXT: ucvtf v1.2d, v6.2d
+; NONEON-NOSVE-NEXT: stp q2, q0, [x1, #32]
+; NONEON-NOSVE-NEXT: stp q3, q1, [x1, #96]
+; NONEON-NOSVE-NEXT: add sp, sp, #96
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x i16>, ptr %a
%res = uitofp <16 x i16> %op1 to <16 x double>
store <16 x double> %res, ptr %b
@@ -258,6 +446,13 @@ define <2 x half> @ucvtf_v2i32_v2f16(<2 x i32> %op1) {
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ucvtf_v2i32_v2f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; NONEON-NOSVE-NEXT: ucvtf v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s
+; NONEON-NOSVE-NEXT: ret
%res = uitofp <2 x i32> %op1 to <2 x half>
ret <2 x half> %res
}
@@ -271,6 +466,12 @@ define <4 x half> @ucvtf_v4i32_v4f16(<4 x i32> %op1) {
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ucvtf_v4i32_v4f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ucvtf v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s
+; NONEON-NOSVE-NEXT: ret
%res = uitofp <4 x i32> %op1 to <4 x half>
ret <4 x half> %res
}
@@ -288,6 +489,15 @@ define <8 x half> @ucvtf_v8i32_v8f16(ptr %a) {
; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ucvtf_v8i32_v8f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ucvtf v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: ucvtf v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s
+; NONEON-NOSVE-NEXT: fcvtn2 v0.8h, v1.4s
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x i32>, ptr %a
%res = uitofp <8 x i32> %op1 to <8 x half>
ret <8 x half> %res
@@ -312,6 +522,21 @@ define void @ucvtf_v16i32_v16f16(ptr %a, ptr %b) {
; CHECK-NEXT: splice z2.h, p0, z2.h, z3.h
; CHECK-NEXT: stp q2, q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ucvtf_v16i32_v16f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q2, [x0, #32]
+; NONEON-NOSVE-NEXT: ldp q1, q3, [x0]
+; NONEON-NOSVE-NEXT: ucvtf v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: ucvtf v2.4s, v2.4s
+; NONEON-NOSVE-NEXT: ucvtf v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: ucvtf v3.4s, v3.4s
+; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s
+; NONEON-NOSVE-NEXT: fcvtn v1.4h, v1.4s
+; NONEON-NOSVE-NEXT: fcvtn2 v0.8h, v2.4s
+; NONEON-NOSVE-NEXT: fcvtn2 v1.8h, v3.4s
+; NONEON-NOSVE-NEXT: stp q1, q0, [x1]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x i32>, ptr %a
%res = uitofp <16 x i32> %op1 to <16 x half>
store <16 x half> %res, ptr %b
@@ -330,6 +555,11 @@ define <2 x float> @ucvtf_v2i32_v2f32(<2 x i32> %op1) {
; CHECK-NEXT: ucvtf z0.s, p0/m, z0.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ucvtf_v2i32_v2f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ucvtf v0.2s, v0.2s
+; NONEON-NOSVE-NEXT: ret
%res = uitofp <2 x i32> %op1 to <2 x float>
ret <2 x float> %res
}
@@ -342,6 +572,11 @@ define <4 x float> @ucvtf_v4i32_v4f32(<4 x i32> %op1) {
; CHECK-NEXT: ucvtf z0.s, p0/m, z0.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ucvtf_v4i32_v4f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ucvtf v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: ret
%res = uitofp <4 x i32> %op1 to <4 x float>
ret <4 x float> %res
}
@@ -355,6 +590,14 @@ define void @ucvtf_v8i32_v8f32(ptr %a, ptr %b) {
; CHECK-NEXT: ucvtf z1.s, p0/m, z1.s
; CHECK-NEXT: stp q0, q1, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ucvtf_v8i32_v8f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ucvtf v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: ucvtf v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: stp q0, q1, [x1]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x i32>, ptr %a
%res = uitofp <8 x i32> %op1 to <8 x float>
store <8 x float> %res, ptr %b
@@ -374,6 +617,12 @@ define <2 x double> @ucvtf_v2i32_v2f64(<2 x i32> %op1) {
; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ucvtf_v2i32_v2f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ushll v0.2d, v0.2s, #0
+; NONEON-NOSVE-NEXT: ucvtf v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: ret
%res = uitofp <2 x i32> %op1 to <2 x double>
ret <2 x double> %res
}
@@ -390,6 +639,20 @@ define void @ucvtf_v4i32_v4f64(ptr %a, ptr %b) {
; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d
; CHECK-NEXT: stp q1, q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ucvtf_v4i32_v4f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: ushll v0.2d, v0.2s, #0
+; NONEON-NOSVE-NEXT: ushll v1.2d, v1.2s, #0
+; NONEON-NOSVE-NEXT: ucvtf v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: ucvtf v1.2d, v1.2d
+; NONEON-NOSVE-NEXT: stp q0, q1, [x1]
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x i32>, ptr %a
%res = uitofp <4 x i32> %op1 to <4 x double>
store <4 x double> %res, ptr %b
@@ -414,6 +677,26 @@ define void @ucvtf_v8i32_v8f64(ptr %a, ptr %b) {
; CHECK-NEXT: stp q2, q0, [x1, #32]
; CHECK-NEXT: stp q3, q1, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ucvtf_v8i32_v8f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-32]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
+; NONEON-NOSVE-NEXT: ldr d2, [sp, #24]
+; NONEON-NOSVE-NEXT: ldr d3, [sp, #8]
+; NONEON-NOSVE-NEXT: ushll v1.2d, v1.2s, #0
+; NONEON-NOSVE-NEXT: ushll v0.2d, v0.2s, #0
+; NONEON-NOSVE-NEXT: ushll v2.2d, v2.2s, #0
+; NONEON-NOSVE-NEXT: ushll v3.2d, v3.2s, #0
+; NONEON-NOSVE-NEXT: ucvtf v1.2d, v1.2d
+; NONEON-NOSVE-NEXT: ucvtf v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: ucvtf v2.2d, v2.2d
+; NONEON-NOSVE-NEXT: ucvtf v3.2d, v3.2d
+; NONEON-NOSVE-NEXT: stp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: stp q1, q2, [x1, #32]
+; NONEON-NOSVE-NEXT: add sp, sp, #32
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x i32>, ptr %a
%res = uitofp <8 x i32> %op1 to <8 x double>
store <8 x double> %res, ptr %b
@@ -440,6 +723,18 @@ define <2 x half> @ucvtf_v2i64_v2f16(<2 x i64> %op1) {
; CHECK-NEXT: ldr d0, [sp, #8]
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ucvtf_v2i64_v2f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov x8, v0.d[1]
+; NONEON-NOSVE-NEXT: fmov x9, d0
+; NONEON-NOSVE-NEXT: ucvtf s1, x9
+; NONEON-NOSVE-NEXT: ucvtf s0, x8
+; NONEON-NOSVE-NEXT: fcvt h2, s0
+; NONEON-NOSVE-NEXT: fcvt h0, s1
+; NONEON-NOSVE-NEXT: mov v0.h[1], v2.h[0]
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0
+; NONEON-NOSVE-NEXT: ret
%res = uitofp <2 x i64> %op1 to <2 x half>
ret <2 x half> %res
}
@@ -460,6 +755,16 @@ define <4 x half> @ucvtf_v4i64_v4f16(ptr %a) {
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ucvtf_v4i64_v4f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ucvtf v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: ucvtf v1.2d, v1.2d
+; NONEON-NOSVE-NEXT: fcvtn v0.2s, v0.2d
+; NONEON-NOSVE-NEXT: fcvtn2 v0.4s, v1.2d
+; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x i64>, ptr %a
%res = uitofp <4 x i64> %op1 to <4 x half>
ret <4 x half> %res
@@ -493,6 +798,22 @@ define <8 x half> @ucvtf_v8i64_v8f16(ptr %a) {
; CHECK-NEXT: splice z0.h, p0, z0.h, z2.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ucvtf_v8i64_v8f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ldp q2, q3, [x0, #32]
+; NONEON-NOSVE-NEXT: ucvtf v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: ucvtf v1.2d, v1.2d
+; NONEON-NOSVE-NEXT: ucvtf v2.2d, v2.2d
+; NONEON-NOSVE-NEXT: ucvtf v3.2d, v3.2d
+; NONEON-NOSVE-NEXT: fcvtn v0.2s, v0.2d
+; NONEON-NOSVE-NEXT: fcvtn v2.2s, v2.2d
+; NONEON-NOSVE-NEXT: fcvtn2 v0.4s, v1.2d
+; NONEON-NOSVE-NEXT: fcvtn2 v2.4s, v3.2d
+; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s
+; NONEON-NOSVE-NEXT: fcvtn2 v0.8h, v2.4s
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x i64>, ptr %a
%res = uitofp <8 x i64> %op1 to <8 x half>
ret <8 x half> %res
@@ -511,6 +832,12 @@ define <2 x float> @ucvtf_v2i64_v2f32(<2 x i64> %op1) {
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ucvtf_v2i64_v2f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ucvtf v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: fcvtn v0.2s, v0.2d
+; NONEON-NOSVE-NEXT: ret
%res = uitofp <2 x i64> %op1 to <2 x float>
ret <2 x float> %res
}
@@ -528,6 +855,15 @@ define <4 x float> @ucvtf_v4i64_v4f32(ptr %a) {
; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ucvtf_v4i64_v4f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ucvtf v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: ucvtf v1.2d, v1.2d
+; NONEON-NOSVE-NEXT: fcvtn v0.2s, v0.2d
+; NONEON-NOSVE-NEXT: fcvtn2 v0.4s, v1.2d
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x i64>, ptr %a
%res = uitofp <4 x i64> %op1 to <4 x float>
ret <4 x float> %res
@@ -552,6 +888,21 @@ define void @ucvtf_v8i64_v8f32(ptr %a, ptr %b) {
; CHECK-NEXT: splice z2.s, p0, z2.s, z3.s
; CHECK-NEXT: stp q2, q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ucvtf_v8i64_v8f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q2, [x0, #32]
+; NONEON-NOSVE-NEXT: ldp q1, q3, [x0]
+; NONEON-NOSVE-NEXT: ucvtf v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: ucvtf v2.2d, v2.2d
+; NONEON-NOSVE-NEXT: ucvtf v1.2d, v1.2d
+; NONEON-NOSVE-NEXT: ucvtf v3.2d, v3.2d
+; NONEON-NOSVE-NEXT: fcvtn v0.2s, v0.2d
+; NONEON-NOSVE-NEXT: fcvtn v1.2s, v1.2d
+; NONEON-NOSVE-NEXT: fcvtn2 v0.4s, v2.2d
+; NONEON-NOSVE-NEXT: fcvtn2 v1.4s, v3.2d
+; NONEON-NOSVE-NEXT: stp q1, q0, [x1]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x i64>, ptr %a
%res = uitofp <8 x i64> %op1 to <8 x float>
store <8 x float> %res, ptr %b
@@ -570,6 +921,11 @@ define <2 x double> @ucvtf_v2i64_v2f64(<2 x i64> %op1) {
; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ucvtf_v2i64_v2f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ucvtf v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: ret
%res = uitofp <2 x i64> %op1 to <2 x double>
ret <2 x double> %res
}
@@ -583,6 +939,14 @@ define void @ucvtf_v4i64_v4f64(ptr %a, ptr %b) {
; CHECK-NEXT: ucvtf z1.d, p0/m, z1.d
; CHECK-NEXT: stp q0, q1, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ucvtf_v4i64_v4f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ucvtf v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: ucvtf v1.2d, v1.2d
+; NONEON-NOSVE-NEXT: stp q0, q1, [x1]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x i64>, ptr %a
%res = uitofp <4 x i64> %op1 to <4 x double>
store <4 x double> %res, ptr %b
@@ -601,6 +965,13 @@ define <4 x half> @scvtf_v4i16_v4f16(<4 x i16> %op1) {
; CHECK-NEXT: scvtf z0.h, p0/m, z0.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: scvtf_v4i16_v4f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: sshll v0.4s, v0.4h, #0
+; NONEON-NOSVE-NEXT: scvtf v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s
+; NONEON-NOSVE-NEXT: ret
%res = sitofp <4 x i16> %op1 to <4 x half>
ret <4 x half> %res
}
@@ -613,6 +984,22 @@ define void @scvtf_v8i16_v8f16(ptr %a, ptr %b) {
; CHECK-NEXT: scvtf z0.h, p0/m, z0.h
; CHECK-NEXT: str q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: scvtf_v8i16_v8f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: sshll v1.4s, v0.4h, #0
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d0, [sp, #8]
+; NONEON-NOSVE-NEXT: scvtf v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: sshll v0.4s, v0.4h, #0
+; NONEON-NOSVE-NEXT: fcvtn v1.4h, v1.4s
+; NONEON-NOSVE-NEXT: scvtf v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: fcvtn2 v1.8h, v0.4s
+; NONEON-NOSVE-NEXT: str q1, [x1]
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x i16>, ptr %a
%res = sitofp <8 x i16> %op1 to <8 x half>
store <8 x half> %res, ptr %b
@@ -628,6 +1015,29 @@ define void @scvtf_v16i16_v16f16(ptr %a, ptr %b) {
; CHECK-NEXT: scvtf z1.h, p0/m, z1.h
; CHECK-NEXT: stp q0, q1, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: scvtf_v16i16_v16f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-32]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
+; NONEON-NOSVE-NEXT: sshll v2.4s, v0.4h, #0
+; NONEON-NOSVE-NEXT: sshll v0.4s, v1.4h, #0
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: ldr d3, [sp, #24]
+; NONEON-NOSVE-NEXT: sshll v1.4s, v1.4h, #0
+; NONEON-NOSVE-NEXT: scvtf v2.4s, v2.4s
+; NONEON-NOSVE-NEXT: scvtf v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: sshll v3.4s, v3.4h, #0
+; NONEON-NOSVE-NEXT: scvtf v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: scvtf v3.4s, v3.4s
+; NONEON-NOSVE-NEXT: fcvtn v2.4h, v2.4s
+; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s
+; NONEON-NOSVE-NEXT: fcvtn2 v2.8h, v1.4s
+; NONEON-NOSVE-NEXT: fcvtn2 v0.8h, v3.4s
+; NONEON-NOSVE-NEXT: stp q2, q0, [x1]
+; NONEON-NOSVE-NEXT: add sp, sp, #32
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x i16>, ptr %a
%res = sitofp <16 x i16> %op1 to <16 x half>
store <16 x half> %res, ptr %b
@@ -646,6 +1056,13 @@ define <2 x float> @scvtf_v2i16_v2f32(<2 x i16> %op1) {
; CHECK-NEXT: scvtf z0.s, p0/m, z0.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: scvtf_v2i16_v2f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: shl v0.2s, v0.2s, #16
+; NONEON-NOSVE-NEXT: sshr v0.2s, v0.2s, #16
+; NONEON-NOSVE-NEXT: scvtf v0.2s, v0.2s
+; NONEON-NOSVE-NEXT: ret
%res = sitofp <2 x i16> %op1 to <2 x float>
ret <2 x float> %res
}
@@ -659,6 +1076,12 @@ define <4 x float> @scvtf_v4i16_v4f32(<4 x i16> %op1) {
; CHECK-NEXT: scvtf z0.s, p0/m, z0.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: scvtf_v4i16_v4f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: sshll v0.4s, v0.4h, #0
+; NONEON-NOSVE-NEXT: scvtf v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: ret
%res = sitofp <4 x i16> %op1 to <4 x float>
ret <4 x float> %res
}
@@ -675,6 +1098,20 @@ define void @scvtf_v8i16_v8f32(ptr %a, ptr %b) {
; CHECK-NEXT: scvtf z0.s, p0/m, z0.s
; CHECK-NEXT: stp q1, q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: scvtf_v8i16_v8f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: sshll v0.4s, v0.4h, #0
+; NONEON-NOSVE-NEXT: sshll v1.4s, v1.4h, #0
+; NONEON-NOSVE-NEXT: scvtf v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: scvtf v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: stp q0, q1, [x1]
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x i16>, ptr %a
%res = sitofp <8 x i16> %op1 to <8 x float>
store <8 x float> %res, ptr %b
@@ -699,6 +1136,26 @@ define void @scvtf_v16i16_v16f32(ptr %a, ptr %b) {
; CHECK-NEXT: stp q2, q0, [x1, #32]
; CHECK-NEXT: stp q3, q1, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: scvtf_v16i16_v16f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-32]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
+; NONEON-NOSVE-NEXT: ldr d2, [sp, #24]
+; NONEON-NOSVE-NEXT: ldr d3, [sp, #8]
+; NONEON-NOSVE-NEXT: sshll v1.4s, v1.4h, #0
+; NONEON-NOSVE-NEXT: sshll v0.4s, v0.4h, #0
+; NONEON-NOSVE-NEXT: sshll v2.4s, v2.4h, #0
+; NONEON-NOSVE-NEXT: sshll v3.4s, v3.4h, #0
+; NONEON-NOSVE-NEXT: scvtf v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: scvtf v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: scvtf v2.4s, v2.4s
+; NONEON-NOSVE-NEXT: scvtf v3.4s, v3.4s
+; NONEON-NOSVE-NEXT: stp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: stp q1, q2, [x1, #32]
+; NONEON-NOSVE-NEXT: add sp, sp, #32
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x i16>, ptr %a
%res = sitofp <16 x i16> %op1 to <16 x float>
store <16 x float> %res, ptr %b
@@ -720,6 +1177,14 @@ define <2 x double> @scvtf_v2i16_v2f64(<2 x i16> %op1) {
; CHECK-NEXT: scvtf z0.d, p0/m, z0.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: scvtf_v2i16_v2f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: shl v0.2s, v0.2s, #16
+; NONEON-NOSVE-NEXT: sshr v0.2s, v0.2s, #16
+; NONEON-NOSVE-NEXT: sshll v0.2d, v0.2s, #0
+; NONEON-NOSVE-NEXT: scvtf v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: ret
%res = sitofp <2 x i16> %op1 to <2 x double>
ret <2 x double> %res
}
@@ -737,6 +1202,21 @@ define void @scvtf_v4i16_v4f64(ptr %a, ptr %b) {
; CHECK-NEXT: scvtf z0.d, p0/m, z0.d
; CHECK-NEXT: stp q1, q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: scvtf_v4i16_v4f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr d0, [x0]
+; NONEON-NOSVE-NEXT: sshll v0.4s, v0.4h, #0
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: sshll v0.2d, v0.2s, #0
+; NONEON-NOSVE-NEXT: sshll v1.2d, v1.2s, #0
+; NONEON-NOSVE-NEXT: scvtf v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: scvtf v1.2d, v1.2d
+; NONEON-NOSVE-NEXT: stp q0, q1, [x1]
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x i16>, ptr %a
%res = sitofp <4 x i16> %op1 to <4 x double>
store <4 x double> %res, ptr %b
@@ -765,6 +1245,30 @@ define void @scvtf_v8i16_v8f64(ptr %a, ptr %b) {
; CHECK-NEXT: stp q1, q3, [x1]
; CHECK-NEXT: stp q2, q0, [x1, #32]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: scvtf_v8i16_v8f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: str q0, [sp, #-48]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: sshll v0.4s, v0.4h, #0
+; NONEON-NOSVE-NEXT: sshll v1.4s, v1.4h, #0
+; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #16]
+; NONEON-NOSVE-NEXT: sshll v0.2d, v0.2s, #0
+; NONEON-NOSVE-NEXT: sshll v1.2d, v1.2s, #0
+; NONEON-NOSVE-NEXT: ldr d2, [sp, #40]
+; NONEON-NOSVE-NEXT: ldr d3, [sp, #24]
+; NONEON-NOSVE-NEXT: sshll v2.2d, v2.2s, #0
+; NONEON-NOSVE-NEXT: sshll v3.2d, v3.2s, #0
+; NONEON-NOSVE-NEXT: scvtf v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: scvtf v1.2d, v1.2d
+; NONEON-NOSVE-NEXT: scvtf v2.2d, v2.2d
+; NONEON-NOSVE-NEXT: scvtf v3.2d, v3.2d
+; NONEON-NOSVE-NEXT: stp q0, q2, [x1]
+; NONEON-NOSVE-NEXT: stp q1, q3, [x1, #32]
+; NONEON-NOSVE-NEXT: add sp, sp, #48
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x i16>, ptr %a
%res = sitofp <8 x i16> %op1 to <8 x double>
store <8 x double> %res, ptr %b
@@ -813,6 +1317,46 @@ define void @scvtf_v16i16_v16f64(ptr %a, ptr %b) {
; CHECK-NEXT: stp q1, q2, [x1, #32]
; CHECK-NEXT: stp q3, q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: scvtf_v16i16_v16f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-96]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96
+; NONEON-NOSVE-NEXT: ldr d2, [sp, #8]
+; NONEON-NOSVE-NEXT: ldr d3, [sp, #24]
+; NONEON-NOSVE-NEXT: sshll v0.4s, v0.4h, #0
+; NONEON-NOSVE-NEXT: sshll v1.4s, v1.4h, #0
+; NONEON-NOSVE-NEXT: sshll v2.4s, v2.4h, #0
+; NONEON-NOSVE-NEXT: sshll v3.4s, v3.4h, #0
+; NONEON-NOSVE-NEXT: stp q2, q0, [sp, #32]
+; NONEON-NOSVE-NEXT: sshll v0.2d, v0.2s, #0
+; NONEON-NOSVE-NEXT: sshll v2.2d, v2.2s, #0
+; NONEON-NOSVE-NEXT: stp q3, q1, [sp, #64]
+; NONEON-NOSVE-NEXT: ldr d5, [sp, #56]
+; NONEON-NOSVE-NEXT: sshll v1.2d, v1.2s, #0
+; NONEON-NOSVE-NEXT: ldr d4, [sp, #88]
+; NONEON-NOSVE-NEXT: ldr d6, [sp, #72]
+; NONEON-NOSVE-NEXT: ldr d7, [sp, #40]
+; NONEON-NOSVE-NEXT: sshll v5.2d, v5.2s, #0
+; NONEON-NOSVE-NEXT: scvtf v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: sshll v3.2d, v3.2s, #0
+; NONEON-NOSVE-NEXT: sshll v4.2d, v4.2s, #0
+; NONEON-NOSVE-NEXT: scvtf v1.2d, v1.2d
+; NONEON-NOSVE-NEXT: sshll v6.2d, v6.2s, #0
+; NONEON-NOSVE-NEXT: sshll v7.2d, v7.2s, #0
+; NONEON-NOSVE-NEXT: scvtf v2.2d, v2.2d
+; NONEON-NOSVE-NEXT: scvtf v5.2d, v5.2d
+; NONEON-NOSVE-NEXT: scvtf v3.2d, v3.2d
+; NONEON-NOSVE-NEXT: scvtf v4.2d, v4.2d
+; NONEON-NOSVE-NEXT: stp q0, q5, [x1]
+; NONEON-NOSVE-NEXT: scvtf v0.2d, v7.2d
+; NONEON-NOSVE-NEXT: stp q1, q4, [x1, #64]
+; NONEON-NOSVE-NEXT: scvtf v1.2d, v6.2d
+; NONEON-NOSVE-NEXT: stp q2, q0, [x1, #32]
+; NONEON-NOSVE-NEXT: stp q3, q1, [x1, #96]
+; NONEON-NOSVE-NEXT: add sp, sp, #96
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x i16>, ptr %a
%res = sitofp <16 x i16> %op1 to <16 x double>
store <16 x double> %res, ptr %b
@@ -832,6 +1376,13 @@ define <2 x half> @scvtf_v2i32_v2f16(<2 x i32> %op1) {
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: scvtf_v2i32_v2f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; NONEON-NOSVE-NEXT: scvtf v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s
+; NONEON-NOSVE-NEXT: ret
%res = sitofp <2 x i32> %op1 to <2 x half>
ret <2 x half> %res
}
@@ -845,6 +1396,12 @@ define <4 x half> @scvtf_v4i32_v4f16(<4 x i32> %op1) {
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: scvtf_v4i32_v4f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: scvtf v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s
+; NONEON-NOSVE-NEXT: ret
%res = sitofp <4 x i32> %op1 to <4 x half>
ret <4 x half> %res
}
@@ -862,6 +1419,15 @@ define <8 x half> @scvtf_v8i32_v8f16(ptr %a) {
; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: scvtf_v8i32_v8f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: scvtf v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: scvtf v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s
+; NONEON-NOSVE-NEXT: fcvtn2 v0.8h, v1.4s
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x i32>, ptr %a
%res = sitofp <8 x i32> %op1 to <8 x half>
ret <8 x half> %res
@@ -879,6 +1445,11 @@ define <2 x float> @scvtf_v2i32_v2f32(<2 x i32> %op1) {
; CHECK-NEXT: scvtf z0.s, p0/m, z0.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: scvtf_v2i32_v2f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: scvtf v0.2s, v0.2s
+; NONEON-NOSVE-NEXT: ret
%res = sitofp <2 x i32> %op1 to <2 x float>
ret <2 x float> %res
}
@@ -891,6 +1462,11 @@ define <4 x float> @scvtf_v4i32_v4f32(<4 x i32> %op1) {
; CHECK-NEXT: scvtf z0.s, p0/m, z0.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: scvtf_v4i32_v4f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: scvtf v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: ret
%res = sitofp <4 x i32> %op1 to <4 x float>
ret <4 x float> %res
}
@@ -904,6 +1480,14 @@ define void @scvtf_v8i32_v8f32(ptr %a, ptr %b) {
; CHECK-NEXT: scvtf z1.s, p0/m, z1.s
; CHECK-NEXT: stp q0, q1, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: scvtf_v8i32_v8f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: scvtf v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: scvtf v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: stp q0, q1, [x1]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x i32>, ptr %a
%res = sitofp <8 x i32> %op1 to <8 x float>
store <8 x float> %res, ptr %b
@@ -923,6 +1507,12 @@ define <2 x double> @scvtf_v2i32_v2f64(<2 x i32> %op1) {
; CHECK-NEXT: scvtf z0.d, p0/m, z0.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: scvtf_v2i32_v2f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: sshll v0.2d, v0.2s, #0
+; NONEON-NOSVE-NEXT: scvtf v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: ret
%res = sitofp <2 x i32> %op1 to <2 x double>
ret <2 x double> %res
}
@@ -939,6 +1529,20 @@ define void @scvtf_v4i32_v4f64(ptr %a, ptr %b) {
; CHECK-NEXT: scvtf z0.d, p0/m, z0.d
; CHECK-NEXT: stp q1, q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: scvtf_v4i32_v4f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: sshll v0.2d, v0.2s, #0
+; NONEON-NOSVE-NEXT: sshll v1.2d, v1.2s, #0
+; NONEON-NOSVE-NEXT: scvtf v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: scvtf v1.2d, v1.2d
+; NONEON-NOSVE-NEXT: stp q0, q1, [x1]
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x i32>, ptr %a
%res = sitofp <4 x i32> %op1 to <4 x double>
store <4 x double> %res, ptr %b
@@ -963,6 +1567,26 @@ define void @scvtf_v8i32_v8f64(ptr %a, ptr %b) {
; CHECK-NEXT: stp q2, q0, [x1, #32]
; CHECK-NEXT: stp q3, q1, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: scvtf_v8i32_v8f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-32]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
+; NONEON-NOSVE-NEXT: ldr d2, [sp, #24]
+; NONEON-NOSVE-NEXT: ldr d3, [sp, #8]
+; NONEON-NOSVE-NEXT: sshll v1.2d, v1.2s, #0
+; NONEON-NOSVE-NEXT: sshll v0.2d, v0.2s, #0
+; NONEON-NOSVE-NEXT: sshll v2.2d, v2.2s, #0
+; NONEON-NOSVE-NEXT: sshll v3.2d, v3.2s, #0
+; NONEON-NOSVE-NEXT: scvtf v1.2d, v1.2d
+; NONEON-NOSVE-NEXT: scvtf v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: scvtf v2.2d, v2.2d
+; NONEON-NOSVE-NEXT: scvtf v3.2d, v3.2d
+; NONEON-NOSVE-NEXT: stp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: stp q1, q2, [x1, #32]
+; NONEON-NOSVE-NEXT: add sp, sp, #32
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x i32>, ptr %a
%res = sitofp <8 x i32> %op1 to <8 x double>
store <8 x double> %res, ptr %b
@@ -1007,6 +1631,40 @@ define void @scvtf_v16i32_v16f64(ptr %a, ptr %b) {
; CHECK-NEXT: stp q2, q1, [x1]
; CHECK-NEXT: stp q4, q0, [x1, #32]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: scvtf_v16i32_v16f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ldp q3, q2, [x0, #32]
+; NONEON-NOSVE-NEXT: stp q0, q2, [sp, #-64]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64
+; NONEON-NOSVE-NEXT: stp q1, q3, [sp, #32]
+; NONEON-NOSVE-NEXT: ldr d4, [sp, #24]
+; NONEON-NOSVE-NEXT: sshll v2.2d, v2.2s, #0
+; NONEON-NOSVE-NEXT: ldr d5, [sp, #56]
+; NONEON-NOSVE-NEXT: sshll v3.2d, v3.2s, #0
+; NONEON-NOSVE-NEXT: ldr d6, [sp, #40]
+; NONEON-NOSVE-NEXT: sshll v4.2d, v4.2s, #0
+; NONEON-NOSVE-NEXT: ldr d7, [sp, #8]
+; NONEON-NOSVE-NEXT: sshll v1.2d, v1.2s, #0
+; NONEON-NOSVE-NEXT: sshll v5.2d, v5.2s, #0
+; NONEON-NOSVE-NEXT: scvtf v2.2d, v2.2d
+; NONEON-NOSVE-NEXT: sshll v6.2d, v6.2s, #0
+; NONEON-NOSVE-NEXT: scvtf v3.2d, v3.2d
+; NONEON-NOSVE-NEXT: sshll v0.2d, v0.2s, #0
+; NONEON-NOSVE-NEXT: sshll v7.2d, v7.2s, #0
+; NONEON-NOSVE-NEXT: scvtf v4.2d, v4.2d
+; NONEON-NOSVE-NEXT: scvtf v1.2d, v1.2d
+; NONEON-NOSVE-NEXT: scvtf v5.2d, v5.2d
+; NONEON-NOSVE-NEXT: scvtf v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: stp q2, q4, [x1, #96]
+; NONEON-NOSVE-NEXT: scvtf v2.2d, v6.2d
+; NONEON-NOSVE-NEXT: stp q3, q5, [x1, #64]
+; NONEON-NOSVE-NEXT: scvtf v3.2d, v7.2d
+; NONEON-NOSVE-NEXT: stp q1, q2, [x1, #32]
+; NONEON-NOSVE-NEXT: stp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: add sp, sp, #64
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x i32>, ptr %a
%res = sitofp <16 x i32> %op1 to <16 x double>
store <16 x double> %res, ptr %b
@@ -1033,6 +1691,18 @@ define <2 x half> @scvtf_v2i64_v2f16(<2 x i64> %op1) {
; CHECK-NEXT: ldr d0, [sp, #8]
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: scvtf_v2i64_v2f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov x8, v0.d[1]
+; NONEON-NOSVE-NEXT: fmov x9, d0
+; NONEON-NOSVE-NEXT: scvtf s1, x9
+; NONEON-NOSVE-NEXT: scvtf s0, x8
+; NONEON-NOSVE-NEXT: fcvt h2, s0
+; NONEON-NOSVE-NEXT: fcvt h0, s1
+; NONEON-NOSVE-NEXT: mov v0.h[1], v2.h[0]
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0
+; NONEON-NOSVE-NEXT: ret
%res = sitofp <2 x i64> %op1 to <2 x half>
ret <2 x half> %res
}
@@ -1053,6 +1723,16 @@ define <4 x half> @scvtf_v4i64_v4f16(ptr %a) {
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: scvtf_v4i64_v4f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: scvtf v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: scvtf v1.2d, v1.2d
+; NONEON-NOSVE-NEXT: fcvtn v0.2s, v0.2d
+; NONEON-NOSVE-NEXT: fcvtn2 v0.4s, v1.2d
+; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x i64>, ptr %a
%res = sitofp <4 x i64> %op1 to <4 x half>
ret <4 x half> %res
@@ -1071,6 +1751,12 @@ define <2 x float> @scvtf_v2i64_v2f32(<2 x i64> %op1) {
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: scvtf_v2i64_v2f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: scvtf v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: fcvtn v0.2s, v0.2d
+; NONEON-NOSVE-NEXT: ret
%res = sitofp <2 x i64> %op1 to <2 x float>
ret <2 x float> %res
}
@@ -1088,6 +1774,15 @@ define <4 x float> @scvtf_v4i64_v4f32(ptr %a) {
; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: scvtf_v4i64_v4f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: scvtf v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: scvtf v1.2d, v1.2d
+; NONEON-NOSVE-NEXT: fcvtn v0.2s, v0.2d
+; NONEON-NOSVE-NEXT: fcvtn2 v0.4s, v1.2d
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x i64>, ptr %a
%res = sitofp <4 x i64> %op1 to <4 x float>
ret <4 x float> %res
@@ -1105,6 +1800,11 @@ define <2 x double> @scvtf_v2i64_v2f64(<2 x i64> %op1) {
; CHECK-NEXT: scvtf z0.d, p0/m, z0.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: scvtf_v2i64_v2f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: scvtf v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: ret
%res = sitofp <2 x i64> %op1 to <2 x double>
ret <2 x double> %res
}
@@ -1118,6 +1818,14 @@ define void @scvtf_v4i64_v4f64(ptr %a, ptr %b) {
; CHECK-NEXT: scvtf z1.d, p0/m, z1.d
; CHECK-NEXT: stp q0, q1, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: scvtf_v4i64_v4f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: scvtf v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: scvtf v1.2d, v1.2d
+; NONEON-NOSVE-NEXT: stp q0, q1, [x1]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x i64>, ptr %a
%res = sitofp <4 x i64> %op1 to <4 x double>
store <4 x double> %res, ptr %b
@@ -1130,6 +1838,13 @@ define half @scvtf_i16_f16(ptr %0) {
; CHECK-NEXT: ldrsh w8, [x0]
; CHECK-NEXT: scvtf h0, w8
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: scvtf_i16_f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldrsh w8, [x0]
+; NONEON-NOSVE-NEXT: scvtf s0, w8
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: ret
%2 = load i16, ptr %0, align 64
%3 = sitofp i16 %2 to half
ret half %3
@@ -1141,6 +1856,12 @@ define float @scvtf_i16_f32(ptr %0) {
; CHECK-NEXT: ldrsh w8, [x0]
; CHECK-NEXT: scvtf s0, w8
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: scvtf_i16_f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldrsh w8, [x0]
+; NONEON-NOSVE-NEXT: scvtf s0, w8
+; NONEON-NOSVE-NEXT: ret
%2 = load i16, ptr %0, align 64
%3 = sitofp i16 %2 to float
ret float %3
@@ -1152,6 +1873,12 @@ define double @scvtf_i16_f64(ptr %0) {
; CHECK-NEXT: ldrsh w8, [x0]
; CHECK-NEXT: scvtf d0, w8
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: scvtf_i16_f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldrsh w8, [x0]
+; NONEON-NOSVE-NEXT: scvtf d0, w8
+; NONEON-NOSVE-NEXT: ret
%2 = load i16, ptr %0, align 64
%3 = sitofp i16 %2 to double
ret double %3
@@ -1163,6 +1890,13 @@ define half @scvtf_i32_f16(ptr %0) {
; CHECK-NEXT: ldr w8, [x0]
; CHECK-NEXT: scvtf h0, w8
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: scvtf_i32_f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr w8, [x0]
+; NONEON-NOSVE-NEXT: scvtf s0, w8
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: ret
%2 = load i32, ptr %0, align 64
%3 = sitofp i32 %2 to half
ret half %3
@@ -1174,6 +1908,12 @@ define float @scvtf_i32_f32(ptr %0) {
; CHECK-NEXT: ldr w8, [x0]
; CHECK-NEXT: scvtf s0, w8
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: scvtf_i32_f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr w8, [x0]
+; NONEON-NOSVE-NEXT: scvtf s0, w8
+; NONEON-NOSVE-NEXT: ret
%2 = load i32, ptr %0, align 64
%3 = sitofp i32 %2 to float
ret float %3
@@ -1185,6 +1925,12 @@ define double @scvtf_i32_f64(ptr %0) {
; CHECK-NEXT: ldr w8, [x0]
; CHECK-NEXT: scvtf d0, w8
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: scvtf_i32_f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr w8, [x0]
+; NONEON-NOSVE-NEXT: scvtf d0, w8
+; NONEON-NOSVE-NEXT: ret
%2 = load i32, ptr %0, align 64
%3 = sitofp i32 %2 to double
ret double %3
@@ -1196,6 +1942,13 @@ define half @scvtf_i64_f16(ptr %0) {
; CHECK-NEXT: ldr x8, [x0]
; CHECK-NEXT: scvtf h0, x8
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: scvtf_i64_f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr x8, [x0]
+; NONEON-NOSVE-NEXT: scvtf s0, x8
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: ret
%2 = load i64, ptr %0, align 64
%3 = sitofp i64 %2 to half
ret half %3
@@ -1207,6 +1960,12 @@ define float @scvtf_i64_f32(ptr %0) {
; CHECK-NEXT: ldr x8, [x0]
; CHECK-NEXT: scvtf s0, x8
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: scvtf_i64_f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr x8, [x0]
+; NONEON-NOSVE-NEXT: scvtf s0, x8
+; NONEON-NOSVE-NEXT: ret
%2 = load i64, ptr %0, align 64
%3 = sitofp i64 %2 to float
ret float %3
@@ -1218,6 +1977,12 @@ define double @scvtf_i64_f64(ptr %0) {
; CHECK-NEXT: ldr x8, [x0]
; CHECK-NEXT: scvtf d0, x8
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: scvtf_i64_f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr x8, [x0]
+; NONEON-NOSVE-NEXT: scvtf d0, x8
+; NONEON-NOSVE-NEXT: ret
%2 = load i64, ptr %0, align 64
%3 = sitofp i64 %2 to double
ret double %3
@@ -1229,6 +1994,13 @@ define half @ucvtf_i16_f16(ptr %0) {
; CHECK-NEXT: ldrh w8, [x0]
; CHECK-NEXT: ucvtf h0, w8
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ucvtf_i16_f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr h0, [x0]
+; NONEON-NOSVE-NEXT: ucvtf s0, s0
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: ret
%2 = load i16, ptr %0, align 64
%3 = uitofp i16 %2 to half
ret half %3
@@ -1240,6 +2012,12 @@ define float @ucvtf_i16_f32(ptr %0) {
; CHECK-NEXT: ldr h0, [x0]
; CHECK-NEXT: ucvtf s0, s0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ucvtf_i16_f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr h0, [x0]
+; NONEON-NOSVE-NEXT: ucvtf s0, s0
+; NONEON-NOSVE-NEXT: ret
%2 = load i16, ptr %0, align 64
%3 = uitofp i16 %2 to float
ret float %3
@@ -1251,6 +2029,12 @@ define double @ucvtf_i16_f64(ptr %0) {
; CHECK-NEXT: ldr h0, [x0]
; CHECK-NEXT: ucvtf d0, d0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ucvtf_i16_f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr h0, [x0]
+; NONEON-NOSVE-NEXT: ucvtf d0, d0
+; NONEON-NOSVE-NEXT: ret
%2 = load i16, ptr %0, align 64
%3 = uitofp i16 %2 to double
ret double %3
@@ -1262,6 +2046,13 @@ define half @ucvtf_i32_f16(ptr %0) {
; CHECK-NEXT: ldr w8, [x0]
; CHECK-NEXT: ucvtf h0, w8
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ucvtf_i32_f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr w8, [x0]
+; NONEON-NOSVE-NEXT: ucvtf s0, w8
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: ret
%2 = load i32, ptr %0, align 64
%3 = uitofp i32 %2 to half
ret half %3
@@ -1273,6 +2064,12 @@ define float @ucvtf_i32_f32(ptr %0) {
; CHECK-NEXT: ldr w8, [x0]
; CHECK-NEXT: ucvtf s0, w8
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ucvtf_i32_f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr w8, [x0]
+; NONEON-NOSVE-NEXT: ucvtf s0, w8
+; NONEON-NOSVE-NEXT: ret
%2 = load i32, ptr %0, align 64
%3 = uitofp i32 %2 to float
ret float %3
@@ -1284,6 +2081,12 @@ define double @ucvtf_i32_f64(ptr %0) {
; CHECK-NEXT: ldr s0, [x0]
; CHECK-NEXT: ucvtf d0, d0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ucvtf_i32_f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr s0, [x0]
+; NONEON-NOSVE-NEXT: ucvtf d0, d0
+; NONEON-NOSVE-NEXT: ret
%2 = load i32, ptr %0, align 64
%3 = uitofp i32 %2 to double
ret double %3
@@ -1295,6 +2098,13 @@ define half @ucvtf_i64_f16(ptr %0) {
; CHECK-NEXT: ldr x8, [x0]
; CHECK-NEXT: ucvtf h0, x8
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ucvtf_i64_f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr x8, [x0]
+; NONEON-NOSVE-NEXT: ucvtf s0, x8
+; NONEON-NOSVE-NEXT: fcvt h0, s0
+; NONEON-NOSVE-NEXT: ret
%2 = load i64, ptr %0, align 64
%3 = uitofp i64 %2 to half
ret half %3
@@ -1306,6 +2116,12 @@ define float @ucvtf_i64_f32(ptr %0) {
; CHECK-NEXT: ldr x8, [x0]
; CHECK-NEXT: ucvtf s0, x8
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ucvtf_i64_f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr x8, [x0]
+; NONEON-NOSVE-NEXT: ucvtf s0, x8
+; NONEON-NOSVE-NEXT: ret
%2 = load i64, ptr %0, align 64
%3 = uitofp i64 %2 to float
ret float %3
@@ -1317,6 +2133,12 @@ define double @ucvtf_i64_f64(ptr %0) {
; CHECK-NEXT: ldr x8, [x0]
; CHECK-NEXT: ucvtf d0, x8
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ucvtf_i64_f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr x8, [x0]
+; NONEON-NOSVE-NEXT: ucvtf d0, x8
+; NONEON-NOSVE-NEXT: ret
%2 = load i64, ptr %0, align 64
%3 = uitofp i64 %2 to double
ret double %3
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll
index 3775a64a89a0c..250929df6b3c3 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
@@ -18,6 +19,13 @@ define <4 x i8> @select_v4i8(<4 x i8> %op1, <4 x i8> %op2, <4 x i1> %mask) {
; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: select_v4i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: shl v2.4h, v2.4h, #15
+; NONEON-NOSVE-NEXT: cmlt v2.4h, v2.4h, #0
+; NONEON-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b
+; NONEON-NOSVE-NEXT: ret
%sel = select <4 x i1> %mask, <4 x i8> %op1, <4 x i8> %op2
ret <4 x i8> %sel
}
@@ -36,6 +44,13 @@ define <8 x i8> @select_v8i8(<8 x i8> %op1, <8 x i8> %op2, <8 x i1> %mask) {
; CHECK-NEXT: sel z0.b, p0, z0.b, z1.b
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: select_v8i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: shl v2.8b, v2.8b, #7
+; NONEON-NOSVE-NEXT: cmlt v2.8b, v2.8b, #0
+; NONEON-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b
+; NONEON-NOSVE-NEXT: ret
%sel = select <8 x i1> %mask, <8 x i8> %op1, <8 x i8> %op2
ret <8 x i8> %sel
}
@@ -54,6 +69,13 @@ define <16 x i8> @select_v16i8(<16 x i8> %op1, <16 x i8> %op2, <16 x i1> %mask)
; CHECK-NEXT: sel z0.b, p0, z0.b, z1.b
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: select_v16i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: shl v2.16b, v2.16b, #7
+; NONEON-NOSVE-NEXT: cmlt v2.16b, v2.16b, #0
+; NONEON-NOSVE-NEXT: bif v0.16b, v1.16b, v2.16b
+; NONEON-NOSVE-NEXT: ret
%sel = select <16 x i1> %mask, <16 x i8> %op1, <16 x i8> %op2
ret <16 x i8> %sel
}
@@ -70,6 +92,18 @@ define void @select_v32i8(ptr %a, ptr %b) {
; CHECK-NEXT: sel z1.b, p0, z2.b, z3.b
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: select_v32i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q2, [x0]
+; NONEON-NOSVE-NEXT: ldp q1, q3, [x1]
+; NONEON-NOSVE-NEXT: cmeq v4.16b, v0.16b, v1.16b
+; NONEON-NOSVE-NEXT: cmeq v5.16b, v2.16b, v3.16b
+; NONEON-NOSVE-NEXT: bif v0.16b, v1.16b, v4.16b
+; NONEON-NOSVE-NEXT: mov v1.16b, v5.16b
+; NONEON-NOSVE-NEXT: bsl v1.16b, v2.16b, v3.16b
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <32 x i8>, ptr %a
%op2 = load <32 x i8>, ptr %b
%mask = icmp eq <32 x i8> %op1, %op2
@@ -92,6 +126,13 @@ define <2 x i16> @select_v2i16(<2 x i16> %op1, <2 x i16> %op2, <2 x i1> %mask) {
; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: select_v2i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: shl v2.2s, v2.2s, #31
+; NONEON-NOSVE-NEXT: cmlt v2.2s, v2.2s, #0
+; NONEON-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b
+; NONEON-NOSVE-NEXT: ret
%sel = select <2 x i1> %mask, <2 x i16> %op1, <2 x i16> %op2
ret <2 x i16> %sel
}
@@ -110,6 +151,13 @@ define <4 x i16> @select_v4i16(<4 x i16> %op1, <4 x i16> %op2, <4 x i1> %mask) {
; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: select_v4i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: shl v2.4h, v2.4h, #15
+; NONEON-NOSVE-NEXT: cmlt v2.4h, v2.4h, #0
+; NONEON-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b
+; NONEON-NOSVE-NEXT: ret
%sel = select <4 x i1> %mask, <4 x i16> %op1, <4 x i16> %op2
ret <4 x i16> %sel
}
@@ -129,6 +177,14 @@ define <8 x i16> @select_v8i16(<8 x i16> %op1, <8 x i16> %op2, <8 x i1> %mask) {
; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: select_v8i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ushll v2.8h, v2.8b, #0
+; NONEON-NOSVE-NEXT: shl v2.8h, v2.8h, #15
+; NONEON-NOSVE-NEXT: cmlt v2.8h, v2.8h, #0
+; NONEON-NOSVE-NEXT: bif v0.16b, v1.16b, v2.16b
+; NONEON-NOSVE-NEXT: ret
%sel = select <8 x i1> %mask, <8 x i16> %op1, <8 x i16> %op2
ret <8 x i16> %sel
}
@@ -145,6 +201,18 @@ define void @select_v16i16(ptr %a, ptr %b) {
; CHECK-NEXT: sel z1.h, p0, z2.h, z3.h
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: select_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q2, [x0]
+; NONEON-NOSVE-NEXT: ldp q1, q3, [x1]
+; NONEON-NOSVE-NEXT: cmeq v4.8h, v0.8h, v1.8h
+; NONEON-NOSVE-NEXT: cmeq v5.8h, v2.8h, v3.8h
+; NONEON-NOSVE-NEXT: bif v0.16b, v1.16b, v4.16b
+; NONEON-NOSVE-NEXT: mov v1.16b, v5.16b
+; NONEON-NOSVE-NEXT: bsl v1.16b, v2.16b, v3.16b
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x i16>, ptr %a
%op2 = load <16 x i16>, ptr %b
%mask = icmp eq <16 x i16> %op1, %op2
@@ -167,6 +235,13 @@ define <2 x i32> @select_v2i32(<2 x i32> %op1, <2 x i32> %op2, <2 x i1> %mask) {
; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: select_v2i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: shl v2.2s, v2.2s, #31
+; NONEON-NOSVE-NEXT: cmlt v2.2s, v2.2s, #0
+; NONEON-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b
+; NONEON-NOSVE-NEXT: ret
%sel = select <2 x i1> %mask, <2 x i32> %op1, <2 x i32> %op2
ret <2 x i32> %sel
}
@@ -186,6 +261,14 @@ define <4 x i32> @select_v4i32(<4 x i32> %op1, <4 x i32> %op2, <4 x i1> %mask) {
; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: select_v4i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ushll v2.4s, v2.4h, #0
+; NONEON-NOSVE-NEXT: shl v2.4s, v2.4s, #31
+; NONEON-NOSVE-NEXT: cmlt v2.4s, v2.4s, #0
+; NONEON-NOSVE-NEXT: bif v0.16b, v1.16b, v2.16b
+; NONEON-NOSVE-NEXT: ret
%sel = select <4 x i1> %mask, <4 x i32> %op1, <4 x i32> %op2
ret <4 x i32> %sel
}
@@ -202,6 +285,18 @@ define void @select_v8i32(ptr %a, ptr %b) {
; CHECK-NEXT: sel z1.s, p0, z2.s, z3.s
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: select_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q2, [x0]
+; NONEON-NOSVE-NEXT: ldp q1, q3, [x1]
+; NONEON-NOSVE-NEXT: cmeq v4.4s, v0.4s, v1.4s
+; NONEON-NOSVE-NEXT: cmeq v5.4s, v2.4s, v3.4s
+; NONEON-NOSVE-NEXT: bif v0.16b, v1.16b, v4.16b
+; NONEON-NOSVE-NEXT: mov v1.16b, v5.16b
+; NONEON-NOSVE-NEXT: bsl v1.16b, v2.16b, v3.16b
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x i32>, ptr %a
%op2 = load <8 x i32>, ptr %b
%mask = icmp eq <8 x i32> %op1, %op2
@@ -223,6 +318,14 @@ define <1 x i64> @select_v1i64(<1 x i64> %op1, <1 x i64> %op2, <1 x i1> %mask) {
; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: select_v1i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: tst w0, #0x1
+; NONEON-NOSVE-NEXT: csetm x8, ne
+; NONEON-NOSVE-NEXT: fmov d2, x8
+; NONEON-NOSVE-NEXT: bif v0.8b, v1.8b, v2.8b
+; NONEON-NOSVE-NEXT: ret
%sel = select <1 x i1> %mask, <1 x i64> %op1, <1 x i64> %op2
ret <1 x i64> %sel
}
@@ -242,6 +345,14 @@ define <2 x i64> @select_v2i64(<2 x i64> %op1, <2 x i64> %op2, <2 x i1> %mask) {
; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: select_v2i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ushll v2.2d, v2.2s, #0
+; NONEON-NOSVE-NEXT: shl v2.2d, v2.2d, #63
+; NONEON-NOSVE-NEXT: cmlt v2.2d, v2.2d, #0
+; NONEON-NOSVE-NEXT: bif v0.16b, v1.16b, v2.16b
+; NONEON-NOSVE-NEXT: ret
%sel = select <2 x i1> %mask, <2 x i64> %op1, <2 x i64> %op2
ret <2 x i64> %sel
}
@@ -258,6 +369,18 @@ define void @select_v4i64(ptr %a, ptr %b) {
; CHECK-NEXT: sel z1.d, p0, z2.d, z3.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: select_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q2, [x0]
+; NONEON-NOSVE-NEXT: ldp q1, q3, [x1]
+; NONEON-NOSVE-NEXT: cmeq v4.2d, v0.2d, v1.2d
+; NONEON-NOSVE-NEXT: cmeq v5.2d, v2.2d, v3.2d
+; NONEON-NOSVE-NEXT: bif v0.16b, v1.16b, v4.16b
+; NONEON-NOSVE-NEXT: mov v1.16b, v5.16b
+; NONEON-NOSVE-NEXT: bsl v1.16b, v2.16b, v3.16b
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x i64>, ptr %a
%op2 = load <4 x i64>, ptr %b
%mask = icmp eq <4 x i64> %op1, %op2
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-limit-duplane.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-limit-duplane.ll
index 918f0ccc0cf6a..42c439ca4b38d 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-limit-duplane.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-limit-duplane.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
@@ -18,6 +19,19 @@ define <4 x i32> @test(ptr %arg1, ptr %arg2) {
; CHECK-NEXT: stp q2, q5, [x0, #32]
; CHECK-NEXT: stp q1, q3, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: test:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0, #32]
+; NONEON-NOSVE-NEXT: ldp q3, q4, [x0]
+; NONEON-NOSVE-NEXT: add v2.4s, v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: add v5.4s, v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: dup v0.4s, v1.s[2]
+; NONEON-NOSVE-NEXT: add v1.4s, v3.4s, v3.4s
+; NONEON-NOSVE-NEXT: add v3.4s, v4.4s, v4.4s
+; NONEON-NOSVE-NEXT: stp q2, q5, [x0, #32]
+; NONEON-NOSVE-NEXT: stp q1, q3, [x0]
+; NONEON-NOSVE-NEXT: ret
entry:
%0 = load <16 x i32>, ptr %arg1, align 256
%1 = load <16 x i32>, ptr %arg2, align 256
@@ -42,6 +56,19 @@ define <2 x i32> @test2(ptr %arg1, ptr %arg2) {
; CHECK-NEXT: stp q3, q4, [x0]
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: test2:
+; NONEON-NOSVE: // %bb.0: // %entry
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0, #32]
+; NONEON-NOSVE-NEXT: ldp q3, q4, [x0]
+; NONEON-NOSVE-NEXT: add v2.4s, v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: dup v0.2s, v1.s[2]
+; NONEON-NOSVE-NEXT: add v1.4s, v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: add v3.4s, v3.4s, v3.4s
+; NONEON-NOSVE-NEXT: add v4.4s, v4.4s, v4.4s
+; NONEON-NOSVE-NEXT: stp q2, q1, [x0, #32]
+; NONEON-NOSVE-NEXT: stp q3, q4, [x0]
+; NONEON-NOSVE-NEXT: ret
entry:
%0 = load <16 x i32>, ptr %arg1, align 256
%1 = load <16 x i32>, ptr %arg2, align 256
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-loads.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-loads.ll
index 8c69d5b0bb375..992b667a2eafe 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-loads.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-loads.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
@@ -11,6 +12,13 @@ define <4 x i8> @load_v4i8(ptr %a) {
; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0]
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: load_v4i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr s0, [x0]
+; NONEON-NOSVE-NEXT: ushll v0.8h, v0.8b, #0
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0
+; NONEON-NOSVE-NEXT: ret
%load = load <4 x i8>, ptr %a
ret <4 x i8> %load
}
@@ -20,6 +28,11 @@ define <8 x i8> @load_v8i8(ptr %a) {
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: load_v8i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr d0, [x0]
+; NONEON-NOSVE-NEXT: ret
%load = load <8 x i8>, ptr %a
ret <8 x i8> %load
}
@@ -29,6 +42,11 @@ define <16 x i8> @load_v16i8(ptr %a) {
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: load_v16i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%load = load <16 x i8>, ptr %a
ret <16 x i8> %load
}
@@ -38,6 +56,11 @@ define <32 x i8> @load_v32i8(ptr %a) {
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: load_v32i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%load = load <32 x i8>, ptr %a
ret <32 x i8> %load
}
@@ -49,6 +72,15 @@ define <2 x i16> @load_v2i16(ptr %a) {
; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: load_v2i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldrh w8, [x0]
+; NONEON-NOSVE-NEXT: fmov s0, w8
+; NONEON-NOSVE-NEXT: add x8, x0, #2
+; NONEON-NOSVE-NEXT: ld1 { v0.h }[2], [x8]
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0
+; NONEON-NOSVE-NEXT: ret
%load = load <2 x i16>, ptr %a
ret <2 x i16> %load
}
@@ -58,6 +90,11 @@ define <2 x half> @load_v2f16(ptr %a) {
; CHECK: // %bb.0:
; CHECK-NEXT: ldr s0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: load_v2f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr s0, [x0]
+; NONEON-NOSVE-NEXT: ret
%load = load <2 x half>, ptr %a
ret <2 x half> %load
}
@@ -67,6 +104,11 @@ define <4 x i16> @load_v4i16(ptr %a) {
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: load_v4i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr d0, [x0]
+; NONEON-NOSVE-NEXT: ret
%load = load <4 x i16>, ptr %a
ret <4 x i16> %load
}
@@ -76,6 +118,11 @@ define <4 x half> @load_v4f16(ptr %a) {
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: load_v4f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr d0, [x0]
+; NONEON-NOSVE-NEXT: ret
%load = load <4 x half>, ptr %a
ret <4 x half> %load
}
@@ -85,6 +132,11 @@ define <8 x i16> @load_v8i16(ptr %a) {
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: load_v8i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%load = load <8 x i16>, ptr %a
ret <8 x i16> %load
}
@@ -94,6 +146,11 @@ define <8 x half> @load_v8f16(ptr %a) {
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: load_v8f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%load = load <8 x half>, ptr %a
ret <8 x half> %load
}
@@ -103,6 +160,11 @@ define <16 x i16> @load_v16i16(ptr %a) {
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: load_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%load = load <16 x i16>, ptr %a
ret <16 x i16> %load
}
@@ -112,6 +174,11 @@ define <16 x half> @load_v16f16(ptr %a) {
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: load_v16f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%load = load <16 x half>, ptr %a
ret <16 x half> %load
}
@@ -121,6 +188,11 @@ define <2 x i32> @load_v2i32(ptr %a) {
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: load_v2i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr d0, [x0]
+; NONEON-NOSVE-NEXT: ret
%load = load <2 x i32>, ptr %a
ret <2 x i32> %load
}
@@ -130,6 +202,11 @@ define <2 x float> @load_v2f32(ptr %a) {
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: load_v2f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr d0, [x0]
+; NONEON-NOSVE-NEXT: ret
%load = load <2 x float>, ptr %a
ret <2 x float> %load
}
@@ -139,6 +216,11 @@ define <4 x i32> @load_v4i32(ptr %a) {
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: load_v4i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%load = load <4 x i32>, ptr %a
ret <4 x i32> %load
}
@@ -148,6 +230,11 @@ define <4 x float> @load_v4f32(ptr %a) {
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: load_v4f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%load = load <4 x float>, ptr %a
ret <4 x float> %load
}
@@ -157,6 +244,11 @@ define <8 x i32> @load_v8i32(ptr %a) {
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: load_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%load = load <8 x i32>, ptr %a
ret <8 x i32> %load
}
@@ -166,6 +258,11 @@ define <8 x float> @load_v8f32(ptr %a) {
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: load_v8f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%load = load <8 x float>, ptr %a
ret <8 x float> %load
}
@@ -175,6 +272,11 @@ define <1 x i64> @load_v1i64(ptr %a) {
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: load_v1i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr d0, [x0]
+; NONEON-NOSVE-NEXT: ret
%load = load <1 x i64>, ptr %a
ret <1 x i64> %load
}
@@ -184,6 +286,11 @@ define <1 x double> @load_v1f64(ptr %a) {
; CHECK: // %bb.0:
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: load_v1f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr d0, [x0]
+; NONEON-NOSVE-NEXT: ret
%load = load <1 x double>, ptr %a
ret <1 x double> %load
}
@@ -193,6 +300,11 @@ define <2 x i64> @load_v2i64(ptr %a) {
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: load_v2i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%load = load <2 x i64>, ptr %a
ret <2 x i64> %load
}
@@ -202,6 +314,11 @@ define <2 x double> @load_v2f64(ptr %a) {
; CHECK: // %bb.0:
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: load_v2f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%load = load <2 x double>, ptr %a
ret <2 x double> %load
}
@@ -211,6 +328,11 @@ define <4 x i64> @load_v4i64(ptr %a) {
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: load_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%load = load <4 x i64>, ptr %a
ret <4 x i64> %load
}
@@ -220,6 +342,11 @@ define <4 x double> @load_v4f64(ptr %a) {
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: load_v4f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%load = load <4 x double>, ptr %a
ret <4 x double> %load
}
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-log-reduce.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-log-reduce.ll
index ef52eadc5d3b0..7abe73f08dfd6 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-log-reduce.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-log-reduce.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
@@ -17,6 +18,14 @@ define i8 @andv_v4i8(<4 x i8> %a) {
; CHECK-NEXT: andv h0, p0, z0.h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: andv_v4i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fmov x8, d0
+; NONEON-NOSVE-NEXT: and x8, x8, x8, lsr #32
+; NONEON-NOSVE-NEXT: lsr x9, x8, #16
+; NONEON-NOSVE-NEXT: and w0, w8, w9
+; NONEON-NOSVE-NEXT: ret
%res = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> %a)
ret i8 %res
}
@@ -29,6 +38,15 @@ define i8 @andv_v8i8(<8 x i8> %a) {
; CHECK-NEXT: andv b0, p0, z0.b
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: andv_v8i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fmov x8, d0
+; NONEON-NOSVE-NEXT: and x8, x8, x8, lsr #32
+; NONEON-NOSVE-NEXT: and x8, x8, x8, lsr #16
+; NONEON-NOSVE-NEXT: lsr x9, x8, #8
+; NONEON-NOSVE-NEXT: and w0, w8, w9
+; NONEON-NOSVE-NEXT: ret
%res = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> %a)
ret i8 %res
}
@@ -41,6 +59,20 @@ define i8 @andv_v16i8(<16 x i8> %a) {
; CHECK-NEXT: andv b0, p0, z0.b
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: andv_v16i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: fmov x8, d0
+; NONEON-NOSVE-NEXT: and x8, x8, x8, lsr #32
+; NONEON-NOSVE-NEXT: and x8, x8, x8, lsr #16
+; NONEON-NOSVE-NEXT: lsr x9, x8, #8
+; NONEON-NOSVE-NEXT: and w0, w8, w9
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%res = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> %a)
ret i8 %res
}
@@ -54,6 +86,22 @@ define i8 @andv_v32i8(ptr %a) {
; CHECK-NEXT: andv b0, p0, z0.b
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: andv_v32i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: and v0.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: fmov x8, d0
+; NONEON-NOSVE-NEXT: and x8, x8, x8, lsr #32
+; NONEON-NOSVE-NEXT: and x8, x8, x8, lsr #16
+; NONEON-NOSVE-NEXT: lsr x9, x8, #8
+; NONEON-NOSVE-NEXT: and w0, w8, w9
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%op = load <32 x i8>, ptr %a
%res = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> %op)
ret i8 %res
@@ -67,6 +115,13 @@ define i16 @andv_v2i16(<2 x i16> %a) {
; CHECK-NEXT: andv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: andv_v2i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fmov x8, d0
+; NONEON-NOSVE-NEXT: lsr x9, x8, #32
+; NONEON-NOSVE-NEXT: and w0, w8, w9
+; NONEON-NOSVE-NEXT: ret
%res = call i16 @llvm.vector.reduce.and.v2i16(<2 x i16> %a)
ret i16 %res
}
@@ -79,6 +134,14 @@ define i16 @andv_v4i16(<4 x i16> %a) {
; CHECK-NEXT: andv h0, p0, z0.h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: andv_v4i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fmov x8, d0
+; NONEON-NOSVE-NEXT: and x8, x8, x8, lsr #32
+; NONEON-NOSVE-NEXT: lsr x9, x8, #16
+; NONEON-NOSVE-NEXT: and w0, w8, w9
+; NONEON-NOSVE-NEXT: ret
%res = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> %a)
ret i16 %res
}
@@ -91,6 +154,19 @@ define i16 @andv_v8i16(<8 x i16> %a) {
; CHECK-NEXT: andv h0, p0, z0.h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: andv_v8i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: fmov x8, d0
+; NONEON-NOSVE-NEXT: and x8, x8, x8, lsr #32
+; NONEON-NOSVE-NEXT: lsr x9, x8, #16
+; NONEON-NOSVE-NEXT: and w0, w8, w9
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%res = call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> %a)
ret i16 %res
}
@@ -104,6 +180,21 @@ define i16 @andv_v16i16(ptr %a) {
; CHECK-NEXT: andv h0, p0, z0.h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: andv_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: and v0.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: fmov x8, d0
+; NONEON-NOSVE-NEXT: and x8, x8, x8, lsr #32
+; NONEON-NOSVE-NEXT: lsr x9, x8, #16
+; NONEON-NOSVE-NEXT: and w0, w8, w9
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%op = load <16 x i16>, ptr %a
%res = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> %op)
ret i16 %res
@@ -117,6 +208,13 @@ define i32 @andv_v2i32(<2 x i32> %a) {
; CHECK-NEXT: andv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: andv_v2i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fmov x8, d0
+; NONEON-NOSVE-NEXT: lsr x9, x8, #32
+; NONEON-NOSVE-NEXT: and w0, w8, w9
+; NONEON-NOSVE-NEXT: ret
%res = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> %a)
ret i32 %res
}
@@ -129,6 +227,18 @@ define i32 @andv_v4i32(<4 x i32> %a) {
; CHECK-NEXT: andv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: andv_v4i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: fmov x8, d0
+; NONEON-NOSVE-NEXT: lsr x9, x8, #32
+; NONEON-NOSVE-NEXT: and w0, w8, w9
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%res = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %a)
ret i32 %res
}
@@ -142,6 +252,20 @@ define i32 @andv_v8i32(ptr %a) {
; CHECK-NEXT: andv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: andv_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: and v0.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: fmov x8, d0
+; NONEON-NOSVE-NEXT: lsr x9, x8, #32
+; NONEON-NOSVE-NEXT: and w0, w8, w9
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%op = load <8 x i32>, ptr %a
%res = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> %op)
ret i32 %res
@@ -155,6 +279,16 @@ define i64 @andv_v2i64(<2 x i64> %a) {
; CHECK-NEXT: andv d0, p0, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: andv_v2i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: fmov x0, d0
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%res = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> %a)
ret i64 %res
}
@@ -168,6 +302,18 @@ define i64 @andv_v4i64(ptr %a) {
; CHECK-NEXT: andv d0, p0, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: andv_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: and v0.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: fmov x0, d0
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%op = load <4 x i64>, ptr %a
%res = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> %op)
ret i64 %res
@@ -185,6 +331,14 @@ define i8 @eorv_v4i8(<4 x i8> %a) {
; CHECK-NEXT: eorv h0, p0, z0.h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: eorv_v4i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fmov x8, d0
+; NONEON-NOSVE-NEXT: eor x8, x8, x8, lsr #32
+; NONEON-NOSVE-NEXT: lsr x9, x8, #16
+; NONEON-NOSVE-NEXT: eor w0, w8, w9
+; NONEON-NOSVE-NEXT: ret
%res = call i8 @llvm.vector.reduce.xor.v4i8(<4 x i8> %a)
ret i8 %res
}
@@ -197,6 +351,15 @@ define i8 @eorv_v8i8(<8 x i8> %a) {
; CHECK-NEXT: eorv b0, p0, z0.b
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: eorv_v8i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fmov x8, d0
+; NONEON-NOSVE-NEXT: eor x8, x8, x8, lsr #32
+; NONEON-NOSVE-NEXT: eor x8, x8, x8, lsr #16
+; NONEON-NOSVE-NEXT: lsr x9, x8, #8
+; NONEON-NOSVE-NEXT: eor w0, w8, w9
+; NONEON-NOSVE-NEXT: ret
%res = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> %a)
ret i8 %res
}
@@ -209,6 +372,20 @@ define i8 @eorv_v16i8(<16 x i8> %a) {
; CHECK-NEXT: eorv b0, p0, z0.b
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: eorv_v16i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: eor v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: fmov x8, d0
+; NONEON-NOSVE-NEXT: eor x8, x8, x8, lsr #32
+; NONEON-NOSVE-NEXT: eor x8, x8, x8, lsr #16
+; NONEON-NOSVE-NEXT: lsr x9, x8, #8
+; NONEON-NOSVE-NEXT: eor w0, w8, w9
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%res = call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> %a)
ret i8 %res
}
@@ -222,6 +399,22 @@ define i8 @eorv_v32i8(ptr %a) {
; CHECK-NEXT: eorv b0, p0, z0.b
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: eorv_v32i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: eor v0.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: eor v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: fmov x8, d0
+; NONEON-NOSVE-NEXT: eor x8, x8, x8, lsr #32
+; NONEON-NOSVE-NEXT: eor x8, x8, x8, lsr #16
+; NONEON-NOSVE-NEXT: lsr x9, x8, #8
+; NONEON-NOSVE-NEXT: eor w0, w8, w9
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%op = load <32 x i8>, ptr %a
%res = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> %op)
ret i8 %res
@@ -235,6 +428,13 @@ define i16 @eorv_v2i16(<2 x i16> %a) {
; CHECK-NEXT: eorv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: eorv_v2i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fmov x8, d0
+; NONEON-NOSVE-NEXT: lsr x9, x8, #32
+; NONEON-NOSVE-NEXT: eor w0, w8, w9
+; NONEON-NOSVE-NEXT: ret
%res = call i16 @llvm.vector.reduce.xor.v2i16(<2 x i16> %a)
ret i16 %res
}
@@ -247,6 +447,14 @@ define i16 @eorv_v4i16(<4 x i16> %a) {
; CHECK-NEXT: eorv h0, p0, z0.h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: eorv_v4i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fmov x8, d0
+; NONEON-NOSVE-NEXT: eor x8, x8, x8, lsr #32
+; NONEON-NOSVE-NEXT: lsr x9, x8, #16
+; NONEON-NOSVE-NEXT: eor w0, w8, w9
+; NONEON-NOSVE-NEXT: ret
%res = call i16 @llvm.vector.reduce.xor.v4i16(<4 x i16> %a)
ret i16 %res
}
@@ -259,6 +467,19 @@ define i16 @eorv_v8i16(<8 x i16> %a) {
; CHECK-NEXT: eorv h0, p0, z0.h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: eorv_v8i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: eor v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: fmov x8, d0
+; NONEON-NOSVE-NEXT: eor x8, x8, x8, lsr #32
+; NONEON-NOSVE-NEXT: lsr x9, x8, #16
+; NONEON-NOSVE-NEXT: eor w0, w8, w9
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%res = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> %a)
ret i16 %res
}
@@ -272,6 +493,21 @@ define i16 @eorv_v16i16(ptr %a) {
; CHECK-NEXT: eorv h0, p0, z0.h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: eorv_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: eor v0.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: eor v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: fmov x8, d0
+; NONEON-NOSVE-NEXT: eor x8, x8, x8, lsr #32
+; NONEON-NOSVE-NEXT: lsr x9, x8, #16
+; NONEON-NOSVE-NEXT: eor w0, w8, w9
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%op = load <16 x i16>, ptr %a
%res = call i16 @llvm.vector.reduce.xor.v16i16(<16 x i16> %op)
ret i16 %res
@@ -285,6 +521,13 @@ define i32 @eorv_v2i32(<2 x i32> %a) {
; CHECK-NEXT: eorv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: eorv_v2i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fmov x8, d0
+; NONEON-NOSVE-NEXT: lsr x9, x8, #32
+; NONEON-NOSVE-NEXT: eor w0, w8, w9
+; NONEON-NOSVE-NEXT: ret
%res = call i32 @llvm.vector.reduce.xor.v2i32(<2 x i32> %a)
ret i32 %res
}
@@ -297,6 +540,18 @@ define i32 @eorv_v4i32(<4 x i32> %a) {
; CHECK-NEXT: eorv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: eorv_v4i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: eor v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: fmov x8, d0
+; NONEON-NOSVE-NEXT: lsr x9, x8, #32
+; NONEON-NOSVE-NEXT: eor w0, w8, w9
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%res = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> %a)
ret i32 %res
}
@@ -310,6 +565,20 @@ define i32 @eorv_v8i32(ptr %a) {
; CHECK-NEXT: eorv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: eorv_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: eor v0.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: eor v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: fmov x8, d0
+; NONEON-NOSVE-NEXT: lsr x9, x8, #32
+; NONEON-NOSVE-NEXT: eor w0, w8, w9
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%op = load <8 x i32>, ptr %a
%res = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> %op)
ret i32 %res
@@ -323,6 +592,16 @@ define i64 @eorv_v2i64(<2 x i64> %a) {
; CHECK-NEXT: eorv d0, p0, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: eorv_v2i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: eor v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: fmov x0, d0
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%res = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> %a)
ret i64 %res
}
@@ -336,6 +615,18 @@ define i64 @eorv_v4i64(ptr %a) {
; CHECK-NEXT: eorv d0, p0, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: eorv_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: eor v0.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: eor v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: fmov x0, d0
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%op = load <4 x i64>, ptr %a
%res = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> %op)
ret i64 %res
@@ -353,6 +644,14 @@ define i8 @orv_v4i8(<4 x i8> %a) {
; CHECK-NEXT: orv h0, p0, z0.h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: orv_v4i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fmov x8, d0
+; NONEON-NOSVE-NEXT: orr x8, x8, x8, lsr #32
+; NONEON-NOSVE-NEXT: lsr x9, x8, #16
+; NONEON-NOSVE-NEXT: orr w0, w8, w9
+; NONEON-NOSVE-NEXT: ret
%res = call i8 @llvm.vector.reduce.or.v4i8(<4 x i8> %a)
ret i8 %res
}
@@ -365,6 +664,15 @@ define i8 @orv_v8i8(<8 x i8> %a) {
; CHECK-NEXT: orv b0, p0, z0.b
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: orv_v8i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fmov x8, d0
+; NONEON-NOSVE-NEXT: orr x8, x8, x8, lsr #32
+; NONEON-NOSVE-NEXT: orr x8, x8, x8, lsr #16
+; NONEON-NOSVE-NEXT: lsr x9, x8, #8
+; NONEON-NOSVE-NEXT: orr w0, w8, w9
+; NONEON-NOSVE-NEXT: ret
%res = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> %a)
ret i8 %res
}
@@ -377,6 +685,20 @@ define i8 @orv_v16i8(<16 x i8> %a) {
; CHECK-NEXT: orv b0, p0, z0.b
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: orv_v16i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: orr v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: fmov x8, d0
+; NONEON-NOSVE-NEXT: orr x8, x8, x8, lsr #32
+; NONEON-NOSVE-NEXT: orr x8, x8, x8, lsr #16
+; NONEON-NOSVE-NEXT: lsr x9, x8, #8
+; NONEON-NOSVE-NEXT: orr w0, w8, w9
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%res = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> %a)
ret i8 %res
}
@@ -390,6 +712,22 @@ define i8 @orv_v32i8(ptr %a) {
; CHECK-NEXT: orv b0, p0, z0.b
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: orv_v32i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: orr v0.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: orr v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: fmov x8, d0
+; NONEON-NOSVE-NEXT: orr x8, x8, x8, lsr #32
+; NONEON-NOSVE-NEXT: orr x8, x8, x8, lsr #16
+; NONEON-NOSVE-NEXT: lsr x9, x8, #8
+; NONEON-NOSVE-NEXT: orr w0, w8, w9
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%op = load <32 x i8>, ptr %a
%res = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> %op)
ret i8 %res
@@ -403,6 +741,13 @@ define i16 @orv_v2i16(<2 x i16> %a) {
; CHECK-NEXT: orv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: orv_v2i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fmov x8, d0
+; NONEON-NOSVE-NEXT: lsr x9, x8, #32
+; NONEON-NOSVE-NEXT: orr w0, w8, w9
+; NONEON-NOSVE-NEXT: ret
%res = call i16 @llvm.vector.reduce.or.v2i16(<2 x i16> %a)
ret i16 %res
}
@@ -415,6 +760,14 @@ define i16 @orv_v4i16(<4 x i16> %a) {
; CHECK-NEXT: orv h0, p0, z0.h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: orv_v4i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fmov x8, d0
+; NONEON-NOSVE-NEXT: orr x8, x8, x8, lsr #32
+; NONEON-NOSVE-NEXT: lsr x9, x8, #16
+; NONEON-NOSVE-NEXT: orr w0, w8, w9
+; NONEON-NOSVE-NEXT: ret
%res = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> %a)
ret i16 %res
}
@@ -427,6 +780,19 @@ define i16 @orv_v8i16(<8 x i16> %a) {
; CHECK-NEXT: orv h0, p0, z0.h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: orv_v8i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: orr v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: fmov x8, d0
+; NONEON-NOSVE-NEXT: orr x8, x8, x8, lsr #32
+; NONEON-NOSVE-NEXT: lsr x9, x8, #16
+; NONEON-NOSVE-NEXT: orr w0, w8, w9
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%res = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> %a)
ret i16 %res
}
@@ -440,6 +806,21 @@ define i16 @orv_v16i16(ptr %a) {
; CHECK-NEXT: orv h0, p0, z0.h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: orv_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: orr v0.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: orr v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: fmov x8, d0
+; NONEON-NOSVE-NEXT: orr x8, x8, x8, lsr #32
+; NONEON-NOSVE-NEXT: lsr x9, x8, #16
+; NONEON-NOSVE-NEXT: orr w0, w8, w9
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%op = load <16 x i16>, ptr %a
%res = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> %op)
ret i16 %res
@@ -453,6 +834,13 @@ define i32 @orv_v2i32(<2 x i32> %a) {
; CHECK-NEXT: orv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: orv_v2i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fmov x8, d0
+; NONEON-NOSVE-NEXT: lsr x9, x8, #32
+; NONEON-NOSVE-NEXT: orr w0, w8, w9
+; NONEON-NOSVE-NEXT: ret
%res = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> %a)
ret i32 %res
}
@@ -465,6 +853,18 @@ define i32 @orv_v4i32(<4 x i32> %a) {
; CHECK-NEXT: orv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: orv_v4i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: orr v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: fmov x8, d0
+; NONEON-NOSVE-NEXT: lsr x9, x8, #32
+; NONEON-NOSVE-NEXT: orr w0, w8, w9
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%res = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %a)
ret i32 %res
}
@@ -478,6 +878,20 @@ define i32 @orv_v8i32(ptr %a) {
; CHECK-NEXT: orv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: orv_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: orr v0.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: orr v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: fmov x8, d0
+; NONEON-NOSVE-NEXT: lsr x9, x8, #32
+; NONEON-NOSVE-NEXT: orr w0, w8, w9
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%op = load <8 x i32>, ptr %a
%res = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> %op)
ret i32 %res
@@ -491,6 +905,16 @@ define i64 @orv_v2i64(<2 x i64> %a) {
; CHECK-NEXT: orv d0, p0, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: orv_v2i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: orr v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: fmov x0, d0
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%res = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> %a)
ret i64 %res
}
@@ -504,6 +928,18 @@ define i64 @orv_v4i64(ptr %a) {
; CHECK-NEXT: orv d0, p0, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: orv_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: orr v0.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ldr d1, [sp, #8]
+; NONEON-NOSVE-NEXT: orr v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: fmov x0, d0
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%op = load <4 x i64>, ptr %a
%res = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> %op)
ret i64 %res
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-load.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-load.ll
index 4f8f8c2e4b244..6c33613f8e757 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-load.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-load.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
@@ -19,6 +20,44 @@ define <4 x i8> @masked_load_v4i8(ptr %src, <4 x i1> %mask) {
; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0]
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: masked_load_v4i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: shl v0.4h, v0.4h, #15
+; NONEON-NOSVE-NEXT: adrp x8, .LCPI0_0
+; NONEON-NOSVE-NEXT: ldr d1, [x8, :lo12:.LCPI0_0]
+; NONEON-NOSVE-NEXT: cmlt v0.4h, v0.4h, #0
+; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: addv h0, v0.4h
+; NONEON-NOSVE-NEXT: fmov w8, s0
+; NONEON-NOSVE-NEXT: tbz w8, #0, .LBB0_2
+; NONEON-NOSVE-NEXT: // %bb.1: // %cond.load
+; NONEON-NOSVE-NEXT: movi v0.2d, #0000000000000000
+; NONEON-NOSVE-NEXT: ld1 { v0.b }[0], [x0]
+; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB0_3
+; NONEON-NOSVE-NEXT: b .LBB0_4
+; NONEON-NOSVE-NEXT: .LBB0_2:
+; NONEON-NOSVE-NEXT: movi v0.2d, #0000000000000000
+; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB0_4
+; NONEON-NOSVE-NEXT: .LBB0_3: // %cond.load1
+; NONEON-NOSVE-NEXT: add x9, x0, #1
+; NONEON-NOSVE-NEXT: ld1 { v0.b }[2], [x9]
+; NONEON-NOSVE-NEXT: .LBB0_4: // %else2
+; NONEON-NOSVE-NEXT: tbnz w8, #2, .LBB0_7
+; NONEON-NOSVE-NEXT: // %bb.5: // %else5
+; NONEON-NOSVE-NEXT: tbnz w8, #3, .LBB0_8
+; NONEON-NOSVE-NEXT: .LBB0_6: // %else8
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0
+; NONEON-NOSVE-NEXT: ret
+; NONEON-NOSVE-NEXT: .LBB0_7: // %cond.load4
+; NONEON-NOSVE-NEXT: add x9, x0, #2
+; NONEON-NOSVE-NEXT: ld1 { v0.b }[4], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #3, .LBB0_6
+; NONEON-NOSVE-NEXT: .LBB0_8: // %cond.load7
+; NONEON-NOSVE-NEXT: add x8, x0, #3
+; NONEON-NOSVE-NEXT: ld1 { v0.b }[6], [x8]
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0
+; NONEON-NOSVE-NEXT: ret
%load = call <4 x i8> @llvm.masked.load.v4i8(ptr %src, i32 8, <4 x i1> %mask, <4 x i8> zeroinitializer)
ret <4 x i8> %load
}
@@ -34,6 +73,67 @@ define <8 x i8> @masked_load_v8i8(ptr %src, <8 x i1> %mask) {
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: masked_load_v8i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: shl v0.8b, v0.8b, #7
+; NONEON-NOSVE-NEXT: adrp x8, .LCPI1_0
+; NONEON-NOSVE-NEXT: ldr d1, [x8, :lo12:.LCPI1_0]
+; NONEON-NOSVE-NEXT: cmlt v0.8b, v0.8b, #0
+; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: addv b0, v0.8b
+; NONEON-NOSVE-NEXT: fmov w8, s0
+; NONEON-NOSVE-NEXT: tbz w8, #0, .LBB1_2
+; NONEON-NOSVE-NEXT: // %bb.1: // %cond.load
+; NONEON-NOSVE-NEXT: ldr b0, [x0]
+; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB1_3
+; NONEON-NOSVE-NEXT: b .LBB1_4
+; NONEON-NOSVE-NEXT: .LBB1_2:
+; NONEON-NOSVE-NEXT: movi v0.2d, #0000000000000000
+; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB1_4
+; NONEON-NOSVE-NEXT: .LBB1_3: // %cond.load1
+; NONEON-NOSVE-NEXT: add x9, x0, #1
+; NONEON-NOSVE-NEXT: ld1 { v0.b }[1], [x9]
+; NONEON-NOSVE-NEXT: .LBB1_4: // %else2
+; NONEON-NOSVE-NEXT: tbnz w8, #2, .LBB1_11
+; NONEON-NOSVE-NEXT: // %bb.5: // %else5
+; NONEON-NOSVE-NEXT: tbnz w8, #3, .LBB1_12
+; NONEON-NOSVE-NEXT: .LBB1_6: // %else8
+; NONEON-NOSVE-NEXT: tbnz w8, #4, .LBB1_13
+; NONEON-NOSVE-NEXT: .LBB1_7: // %else11
+; NONEON-NOSVE-NEXT: tbnz w8, #5, .LBB1_14
+; NONEON-NOSVE-NEXT: .LBB1_8: // %else14
+; NONEON-NOSVE-NEXT: tbnz w8, #6, .LBB1_15
+; NONEON-NOSVE-NEXT: .LBB1_9: // %else17
+; NONEON-NOSVE-NEXT: tbnz w8, #7, .LBB1_16
+; NONEON-NOSVE-NEXT: .LBB1_10: // %else20
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0
+; NONEON-NOSVE-NEXT: ret
+; NONEON-NOSVE-NEXT: .LBB1_11: // %cond.load4
+; NONEON-NOSVE-NEXT: add x9, x0, #2
+; NONEON-NOSVE-NEXT: ld1 { v0.b }[2], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #3, .LBB1_6
+; NONEON-NOSVE-NEXT: .LBB1_12: // %cond.load7
+; NONEON-NOSVE-NEXT: add x9, x0, #3
+; NONEON-NOSVE-NEXT: ld1 { v0.b }[3], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #4, .LBB1_7
+; NONEON-NOSVE-NEXT: .LBB1_13: // %cond.load10
+; NONEON-NOSVE-NEXT: add x9, x0, #4
+; NONEON-NOSVE-NEXT: ld1 { v0.b }[4], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #5, .LBB1_8
+; NONEON-NOSVE-NEXT: .LBB1_14: // %cond.load13
+; NONEON-NOSVE-NEXT: add x9, x0, #5
+; NONEON-NOSVE-NEXT: ld1 { v0.b }[5], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #6, .LBB1_9
+; NONEON-NOSVE-NEXT: .LBB1_15: // %cond.load16
+; NONEON-NOSVE-NEXT: add x9, x0, #6
+; NONEON-NOSVE-NEXT: ld1 { v0.b }[6], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #7, .LBB1_10
+; NONEON-NOSVE-NEXT: .LBB1_16: // %cond.load19
+; NONEON-NOSVE-NEXT: add x8, x0, #7
+; NONEON-NOSVE-NEXT: ld1 { v0.b }[7], [x8]
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0
+; NONEON-NOSVE-NEXT: ret
%load = call <8 x i8> @llvm.masked.load.v8i8(ptr %src, i32 8, <8 x i1> %mask, <8 x i8> zeroinitializer)
ret <8 x i8> %load
}
@@ -49,6 +149,115 @@ define <16 x i8> @masked_load_v16i8(ptr %src, <16 x i1> %mask) {
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: masked_load_v16i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: shl v0.16b, v0.16b, #7
+; NONEON-NOSVE-NEXT: adrp x8, .LCPI2_0
+; NONEON-NOSVE-NEXT: ldr q1, [x8, :lo12:.LCPI2_0]
+; NONEON-NOSVE-NEXT: cmlt v0.16b, v0.16b, #0
+; NONEON-NOSVE-NEXT: and v0.16b, v0.16b, v1.16b
+; NONEON-NOSVE-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; NONEON-NOSVE-NEXT: zip1 v0.16b, v0.16b, v1.16b
+; NONEON-NOSVE-NEXT: addv h1, v0.8h
+; NONEON-NOSVE-NEXT: movi v0.2d, #0000000000000000
+; NONEON-NOSVE-NEXT: fmov w8, s1
+; NONEON-NOSVE-NEXT: tbnz w8, #0, .LBB2_17
+; NONEON-NOSVE-NEXT: // %bb.1: // %else
+; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB2_18
+; NONEON-NOSVE-NEXT: .LBB2_2: // %else2
+; NONEON-NOSVE-NEXT: tbnz w8, #2, .LBB2_19
+; NONEON-NOSVE-NEXT: .LBB2_3: // %else5
+; NONEON-NOSVE-NEXT: tbnz w8, #3, .LBB2_20
+; NONEON-NOSVE-NEXT: .LBB2_4: // %else8
+; NONEON-NOSVE-NEXT: tbnz w8, #4, .LBB2_21
+; NONEON-NOSVE-NEXT: .LBB2_5: // %else11
+; NONEON-NOSVE-NEXT: tbnz w8, #5, .LBB2_22
+; NONEON-NOSVE-NEXT: .LBB2_6: // %else14
+; NONEON-NOSVE-NEXT: tbnz w8, #6, .LBB2_23
+; NONEON-NOSVE-NEXT: .LBB2_7: // %else17
+; NONEON-NOSVE-NEXT: tbnz w8, #7, .LBB2_24
+; NONEON-NOSVE-NEXT: .LBB2_8: // %else20
+; NONEON-NOSVE-NEXT: tbnz w8, #8, .LBB2_25
+; NONEON-NOSVE-NEXT: .LBB2_9: // %else23
+; NONEON-NOSVE-NEXT: tbnz w8, #9, .LBB2_26
+; NONEON-NOSVE-NEXT: .LBB2_10: // %else26
+; NONEON-NOSVE-NEXT: tbnz w8, #10, .LBB2_27
+; NONEON-NOSVE-NEXT: .LBB2_11: // %else29
+; NONEON-NOSVE-NEXT: tbnz w8, #11, .LBB2_28
+; NONEON-NOSVE-NEXT: .LBB2_12: // %else32
+; NONEON-NOSVE-NEXT: tbnz w8, #12, .LBB2_29
+; NONEON-NOSVE-NEXT: .LBB2_13: // %else35
+; NONEON-NOSVE-NEXT: tbnz w8, #13, .LBB2_30
+; NONEON-NOSVE-NEXT: .LBB2_14: // %else38
+; NONEON-NOSVE-NEXT: tbnz w8, #14, .LBB2_31
+; NONEON-NOSVE-NEXT: .LBB2_15: // %else41
+; NONEON-NOSVE-NEXT: tbnz w8, #15, .LBB2_32
+; NONEON-NOSVE-NEXT: .LBB2_16: // %else44
+; NONEON-NOSVE-NEXT: ret
+; NONEON-NOSVE-NEXT: .LBB2_17: // %cond.load
+; NONEON-NOSVE-NEXT: ldr b0, [x0]
+; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB2_2
+; NONEON-NOSVE-NEXT: .LBB2_18: // %cond.load1
+; NONEON-NOSVE-NEXT: add x9, x0, #1
+; NONEON-NOSVE-NEXT: ld1 { v0.b }[1], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #2, .LBB2_3
+; NONEON-NOSVE-NEXT: .LBB2_19: // %cond.load4
+; NONEON-NOSVE-NEXT: add x9, x0, #2
+; NONEON-NOSVE-NEXT: ld1 { v0.b }[2], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #3, .LBB2_4
+; NONEON-NOSVE-NEXT: .LBB2_20: // %cond.load7
+; NONEON-NOSVE-NEXT: add x9, x0, #3
+; NONEON-NOSVE-NEXT: ld1 { v0.b }[3], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #4, .LBB2_5
+; NONEON-NOSVE-NEXT: .LBB2_21: // %cond.load10
+; NONEON-NOSVE-NEXT: add x9, x0, #4
+; NONEON-NOSVE-NEXT: ld1 { v0.b }[4], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #5, .LBB2_6
+; NONEON-NOSVE-NEXT: .LBB2_22: // %cond.load13
+; NONEON-NOSVE-NEXT: add x9, x0, #5
+; NONEON-NOSVE-NEXT: ld1 { v0.b }[5], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #6, .LBB2_7
+; NONEON-NOSVE-NEXT: .LBB2_23: // %cond.load16
+; NONEON-NOSVE-NEXT: add x9, x0, #6
+; NONEON-NOSVE-NEXT: ld1 { v0.b }[6], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #7, .LBB2_8
+; NONEON-NOSVE-NEXT: .LBB2_24: // %cond.load19
+; NONEON-NOSVE-NEXT: add x9, x0, #7
+; NONEON-NOSVE-NEXT: ld1 { v0.b }[7], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #8, .LBB2_9
+; NONEON-NOSVE-NEXT: .LBB2_25: // %cond.load22
+; NONEON-NOSVE-NEXT: add x9, x0, #8
+; NONEON-NOSVE-NEXT: ld1 { v0.b }[8], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #9, .LBB2_10
+; NONEON-NOSVE-NEXT: .LBB2_26: // %cond.load25
+; NONEON-NOSVE-NEXT: add x9, x0, #9
+; NONEON-NOSVE-NEXT: ld1 { v0.b }[9], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #10, .LBB2_11
+; NONEON-NOSVE-NEXT: .LBB2_27: // %cond.load28
+; NONEON-NOSVE-NEXT: add x9, x0, #10
+; NONEON-NOSVE-NEXT: ld1 { v0.b }[10], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #11, .LBB2_12
+; NONEON-NOSVE-NEXT: .LBB2_28: // %cond.load31
+; NONEON-NOSVE-NEXT: add x9, x0, #11
+; NONEON-NOSVE-NEXT: ld1 { v0.b }[11], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #12, .LBB2_13
+; NONEON-NOSVE-NEXT: .LBB2_29: // %cond.load34
+; NONEON-NOSVE-NEXT: add x9, x0, #12
+; NONEON-NOSVE-NEXT: ld1 { v0.b }[12], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #13, .LBB2_14
+; NONEON-NOSVE-NEXT: .LBB2_30: // %cond.load37
+; NONEON-NOSVE-NEXT: add x9, x0, #13
+; NONEON-NOSVE-NEXT: ld1 { v0.b }[13], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #14, .LBB2_15
+; NONEON-NOSVE-NEXT: .LBB2_31: // %cond.load40
+; NONEON-NOSVE-NEXT: add x9, x0, #14
+; NONEON-NOSVE-NEXT: ld1 { v0.b }[14], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #15, .LBB2_16
+; NONEON-NOSVE-NEXT: .LBB2_32: // %cond.load43
+; NONEON-NOSVE-NEXT: add x8, x0, #15
+; NONEON-NOSVE-NEXT: ld1 { v0.b }[15], [x8]
+; NONEON-NOSVE-NEXT: ret
%load = call <16 x i8> @llvm.masked.load.v16i8(ptr %src, i32 8, <16 x i1> %mask, <16 x i8> zeroinitializer)
ret <16 x i8> %load
}
@@ -130,6 +339,277 @@ define <32 x i8> @masked_load_v32i8(ptr %src, <32 x i1> %mask) {
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1
; CHECK-NEXT: add sp, sp, #32
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: masked_load_v32i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr w8, [sp, #72]
+; NONEON-NOSVE-NEXT: fmov s1, w1
+; NONEON-NOSVE-NEXT: ldr w9, [sp, #80]
+; NONEON-NOSVE-NEXT: fmov s0, w8
+; NONEON-NOSVE-NEXT: ldr w8, [sp, #88]
+; NONEON-NOSVE-NEXT: mov v1.b[1], w2
+; NONEON-NOSVE-NEXT: mov v0.b[1], w9
+; NONEON-NOSVE-NEXT: ldr w9, [sp]
+; NONEON-NOSVE-NEXT: mov v1.b[2], w3
+; NONEON-NOSVE-NEXT: mov v0.b[2], w8
+; NONEON-NOSVE-NEXT: ldr w8, [sp, #96]
+; NONEON-NOSVE-NEXT: mov v1.b[3], w4
+; NONEON-NOSVE-NEXT: mov v0.b[3], w8
+; NONEON-NOSVE-NEXT: ldr w8, [sp, #104]
+; NONEON-NOSVE-NEXT: mov v1.b[4], w5
+; NONEON-NOSVE-NEXT: mov v0.b[4], w8
+; NONEON-NOSVE-NEXT: ldr w8, [sp, #112]
+; NONEON-NOSVE-NEXT: mov v1.b[5], w6
+; NONEON-NOSVE-NEXT: mov v0.b[5], w8
+; NONEON-NOSVE-NEXT: ldr w8, [sp, #120]
+; NONEON-NOSVE-NEXT: mov v1.b[6], w7
+; NONEON-NOSVE-NEXT: mov v0.b[6], w8
+; NONEON-NOSVE-NEXT: ldr w8, [sp, #128]
+; NONEON-NOSVE-NEXT: mov v1.b[7], w9
+; NONEON-NOSVE-NEXT: ldr w9, [sp, #8]
+; NONEON-NOSVE-NEXT: mov v0.b[7], w8
+; NONEON-NOSVE-NEXT: ldr w8, [sp, #136]
+; NONEON-NOSVE-NEXT: mov v1.b[8], w9
+; NONEON-NOSVE-NEXT: ldr w9, [sp, #16]
+; NONEON-NOSVE-NEXT: mov v0.b[8], w8
+; NONEON-NOSVE-NEXT: ldr w8, [sp, #144]
+; NONEON-NOSVE-NEXT: mov v1.b[9], w9
+; NONEON-NOSVE-NEXT: ldr w9, [sp, #24]
+; NONEON-NOSVE-NEXT: mov v0.b[9], w8
+; NONEON-NOSVE-NEXT: ldr w8, [sp, #152]
+; NONEON-NOSVE-NEXT: mov v1.b[10], w9
+; NONEON-NOSVE-NEXT: ldr w9, [sp, #32]
+; NONEON-NOSVE-NEXT: mov v0.b[10], w8
+; NONEON-NOSVE-NEXT: ldr w8, [sp, #160]
+; NONEON-NOSVE-NEXT: mov v1.b[11], w9
+; NONEON-NOSVE-NEXT: ldr w9, [sp, #40]
+; NONEON-NOSVE-NEXT: mov v0.b[11], w8
+; NONEON-NOSVE-NEXT: ldr w8, [sp, #168]
+; NONEON-NOSVE-NEXT: mov v1.b[12], w9
+; NONEON-NOSVE-NEXT: ldr w9, [sp, #48]
+; NONEON-NOSVE-NEXT: mov v0.b[12], w8
+; NONEON-NOSVE-NEXT: ldr w8, [sp, #176]
+; NONEON-NOSVE-NEXT: mov v1.b[13], w9
+; NONEON-NOSVE-NEXT: ldr w9, [sp, #56]
+; NONEON-NOSVE-NEXT: mov v0.b[13], w8
+; NONEON-NOSVE-NEXT: ldr w8, [sp, #184]
+; NONEON-NOSVE-NEXT: mov v1.b[14], w9
+; NONEON-NOSVE-NEXT: ldr w9, [sp, #64]
+; NONEON-NOSVE-NEXT: mov v0.b[14], w8
+; NONEON-NOSVE-NEXT: ldr w8, [sp, #192]
+; NONEON-NOSVE-NEXT: mov v1.b[15], w9
+; NONEON-NOSVE-NEXT: mov v0.b[15], w8
+; NONEON-NOSVE-NEXT: adrp x8, .LCPI3_0
+; NONEON-NOSVE-NEXT: ldr q2, [x8, :lo12:.LCPI3_0]
+; NONEON-NOSVE-NEXT: shl v1.16b, v1.16b, #7
+; NONEON-NOSVE-NEXT: shl v0.16b, v0.16b, #7
+; NONEON-NOSVE-NEXT: cmlt v1.16b, v1.16b, #0
+; NONEON-NOSVE-NEXT: cmlt v0.16b, v0.16b, #0
+; NONEON-NOSVE-NEXT: and v1.16b, v1.16b, v2.16b
+; NONEON-NOSVE-NEXT: and v0.16b, v0.16b, v2.16b
+; NONEON-NOSVE-NEXT: ext v3.16b, v1.16b, v1.16b, #8
+; NONEON-NOSVE-NEXT: ext v2.16b, v0.16b, v0.16b, #8
+; NONEON-NOSVE-NEXT: zip1 v1.16b, v1.16b, v3.16b
+; NONEON-NOSVE-NEXT: zip1 v0.16b, v0.16b, v2.16b
+; NONEON-NOSVE-NEXT: addv h1, v1.8h
+; NONEON-NOSVE-NEXT: addv h0, v0.8h
+; NONEON-NOSVE-NEXT: fmov w8, s1
+; NONEON-NOSVE-NEXT: movi v1.2d, #0000000000000000
+; NONEON-NOSVE-NEXT: fmov w9, s0
+; NONEON-NOSVE-NEXT: movi v0.2d, #0000000000000000
+; NONEON-NOSVE-NEXT: bfi w8, w9, #16, #16
+; NONEON-NOSVE-NEXT: tbnz w8, #0, .LBB3_33
+; NONEON-NOSVE-NEXT: // %bb.1: // %else
+; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB3_34
+; NONEON-NOSVE-NEXT: .LBB3_2: // %else2
+; NONEON-NOSVE-NEXT: tbnz w8, #2, .LBB3_35
+; NONEON-NOSVE-NEXT: .LBB3_3: // %else5
+; NONEON-NOSVE-NEXT: tbnz w8, #3, .LBB3_36
+; NONEON-NOSVE-NEXT: .LBB3_4: // %else8
+; NONEON-NOSVE-NEXT: tbnz w8, #4, .LBB3_37
+; NONEON-NOSVE-NEXT: .LBB3_5: // %else11
+; NONEON-NOSVE-NEXT: tbnz w8, #5, .LBB3_38
+; NONEON-NOSVE-NEXT: .LBB3_6: // %else14
+; NONEON-NOSVE-NEXT: tbnz w8, #6, .LBB3_39
+; NONEON-NOSVE-NEXT: .LBB3_7: // %else17
+; NONEON-NOSVE-NEXT: tbnz w8, #7, .LBB3_40
+; NONEON-NOSVE-NEXT: .LBB3_8: // %else20
+; NONEON-NOSVE-NEXT: tbnz w8, #8, .LBB3_41
+; NONEON-NOSVE-NEXT: .LBB3_9: // %else23
+; NONEON-NOSVE-NEXT: tbnz w8, #9, .LBB3_42
+; NONEON-NOSVE-NEXT: .LBB3_10: // %else26
+; NONEON-NOSVE-NEXT: tbnz w8, #10, .LBB3_43
+; NONEON-NOSVE-NEXT: .LBB3_11: // %else29
+; NONEON-NOSVE-NEXT: tbnz w8, #11, .LBB3_44
+; NONEON-NOSVE-NEXT: .LBB3_12: // %else32
+; NONEON-NOSVE-NEXT: tbnz w8, #12, .LBB3_45
+; NONEON-NOSVE-NEXT: .LBB3_13: // %else35
+; NONEON-NOSVE-NEXT: tbnz w8, #13, .LBB3_46
+; NONEON-NOSVE-NEXT: .LBB3_14: // %else38
+; NONEON-NOSVE-NEXT: tbnz w8, #14, .LBB3_47
+; NONEON-NOSVE-NEXT: .LBB3_15: // %else41
+; NONEON-NOSVE-NEXT: tbnz w8, #15, .LBB3_48
+; NONEON-NOSVE-NEXT: .LBB3_16: // %else44
+; NONEON-NOSVE-NEXT: tbnz w8, #16, .LBB3_49
+; NONEON-NOSVE-NEXT: .LBB3_17: // %else47
+; NONEON-NOSVE-NEXT: tbnz w8, #17, .LBB3_50
+; NONEON-NOSVE-NEXT: .LBB3_18: // %else50
+; NONEON-NOSVE-NEXT: tbnz w8, #18, .LBB3_51
+; NONEON-NOSVE-NEXT: .LBB3_19: // %else53
+; NONEON-NOSVE-NEXT: tbnz w8, #19, .LBB3_52
+; NONEON-NOSVE-NEXT: .LBB3_20: // %else56
+; NONEON-NOSVE-NEXT: tbnz w8, #20, .LBB3_53
+; NONEON-NOSVE-NEXT: .LBB3_21: // %else59
+; NONEON-NOSVE-NEXT: tbnz w8, #21, .LBB3_54
+; NONEON-NOSVE-NEXT: .LBB3_22: // %else62
+; NONEON-NOSVE-NEXT: tbnz w8, #22, .LBB3_55
+; NONEON-NOSVE-NEXT: .LBB3_23: // %else65
+; NONEON-NOSVE-NEXT: tbnz w8, #23, .LBB3_56
+; NONEON-NOSVE-NEXT: .LBB3_24: // %else68
+; NONEON-NOSVE-NEXT: tbnz w8, #24, .LBB3_57
+; NONEON-NOSVE-NEXT: .LBB3_25: // %else71
+; NONEON-NOSVE-NEXT: tbnz w8, #25, .LBB3_58
+; NONEON-NOSVE-NEXT: .LBB3_26: // %else74
+; NONEON-NOSVE-NEXT: tbnz w8, #26, .LBB3_59
+; NONEON-NOSVE-NEXT: .LBB3_27: // %else77
+; NONEON-NOSVE-NEXT: tbnz w8, #27, .LBB3_60
+; NONEON-NOSVE-NEXT: .LBB3_28: // %else80
+; NONEON-NOSVE-NEXT: tbnz w8, #28, .LBB3_61
+; NONEON-NOSVE-NEXT: .LBB3_29: // %else83
+; NONEON-NOSVE-NEXT: tbnz w8, #29, .LBB3_62
+; NONEON-NOSVE-NEXT: .LBB3_30: // %else86
+; NONEON-NOSVE-NEXT: tbnz w8, #30, .LBB3_63
+; NONEON-NOSVE-NEXT: .LBB3_31: // %else89
+; NONEON-NOSVE-NEXT: tbnz w8, #31, .LBB3_64
+; NONEON-NOSVE-NEXT: .LBB3_32: // %else92
+; NONEON-NOSVE-NEXT: ret
+; NONEON-NOSVE-NEXT: .LBB3_33: // %cond.load
+; NONEON-NOSVE-NEXT: ldr b0, [x0]
+; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB3_2
+; NONEON-NOSVE-NEXT: .LBB3_34: // %cond.load1
+; NONEON-NOSVE-NEXT: add x9, x0, #1
+; NONEON-NOSVE-NEXT: ld1 { v0.b }[1], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #2, .LBB3_3
+; NONEON-NOSVE-NEXT: .LBB3_35: // %cond.load4
+; NONEON-NOSVE-NEXT: add x9, x0, #2
+; NONEON-NOSVE-NEXT: ld1 { v0.b }[2], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #3, .LBB3_4
+; NONEON-NOSVE-NEXT: .LBB3_36: // %cond.load7
+; NONEON-NOSVE-NEXT: add x9, x0, #3
+; NONEON-NOSVE-NEXT: ld1 { v0.b }[3], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #4, .LBB3_5
+; NONEON-NOSVE-NEXT: .LBB3_37: // %cond.load10
+; NONEON-NOSVE-NEXT: add x9, x0, #4
+; NONEON-NOSVE-NEXT: ld1 { v0.b }[4], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #5, .LBB3_6
+; NONEON-NOSVE-NEXT: .LBB3_38: // %cond.load13
+; NONEON-NOSVE-NEXT: add x9, x0, #5
+; NONEON-NOSVE-NEXT: ld1 { v0.b }[5], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #6, .LBB3_7
+; NONEON-NOSVE-NEXT: .LBB3_39: // %cond.load16
+; NONEON-NOSVE-NEXT: add x9, x0, #6
+; NONEON-NOSVE-NEXT: ld1 { v0.b }[6], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #7, .LBB3_8
+; NONEON-NOSVE-NEXT: .LBB3_40: // %cond.load19
+; NONEON-NOSVE-NEXT: add x9, x0, #7
+; NONEON-NOSVE-NEXT: ld1 { v0.b }[7], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #8, .LBB3_9
+; NONEON-NOSVE-NEXT: .LBB3_41: // %cond.load22
+; NONEON-NOSVE-NEXT: add x9, x0, #8
+; NONEON-NOSVE-NEXT: ld1 { v0.b }[8], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #9, .LBB3_10
+; NONEON-NOSVE-NEXT: .LBB3_42: // %cond.load25
+; NONEON-NOSVE-NEXT: add x9, x0, #9
+; NONEON-NOSVE-NEXT: ld1 { v0.b }[9], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #10, .LBB3_11
+; NONEON-NOSVE-NEXT: .LBB3_43: // %cond.load28
+; NONEON-NOSVE-NEXT: add x9, x0, #10
+; NONEON-NOSVE-NEXT: ld1 { v0.b }[10], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #11, .LBB3_12
+; NONEON-NOSVE-NEXT: .LBB3_44: // %cond.load31
+; NONEON-NOSVE-NEXT: add x9, x0, #11
+; NONEON-NOSVE-NEXT: ld1 { v0.b }[11], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #12, .LBB3_13
+; NONEON-NOSVE-NEXT: .LBB3_45: // %cond.load34
+; NONEON-NOSVE-NEXT: add x9, x0, #12
+; NONEON-NOSVE-NEXT: ld1 { v0.b }[12], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #13, .LBB3_14
+; NONEON-NOSVE-NEXT: .LBB3_46: // %cond.load37
+; NONEON-NOSVE-NEXT: add x9, x0, #13
+; NONEON-NOSVE-NEXT: ld1 { v0.b }[13], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #14, .LBB3_15
+; NONEON-NOSVE-NEXT: .LBB3_47: // %cond.load40
+; NONEON-NOSVE-NEXT: add x9, x0, #14
+; NONEON-NOSVE-NEXT: ld1 { v0.b }[14], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #15, .LBB3_16
+; NONEON-NOSVE-NEXT: .LBB3_48: // %cond.load43
+; NONEON-NOSVE-NEXT: add x9, x0, #15
+; NONEON-NOSVE-NEXT: ld1 { v0.b }[15], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #16, .LBB3_17
+; NONEON-NOSVE-NEXT: .LBB3_49: // %cond.load46
+; NONEON-NOSVE-NEXT: add x9, x0, #16
+; NONEON-NOSVE-NEXT: ld1 { v1.b }[0], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #17, .LBB3_18
+; NONEON-NOSVE-NEXT: .LBB3_50: // %cond.load49
+; NONEON-NOSVE-NEXT: add x9, x0, #17
+; NONEON-NOSVE-NEXT: ld1 { v1.b }[1], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #18, .LBB3_19
+; NONEON-NOSVE-NEXT: .LBB3_51: // %cond.load52
+; NONEON-NOSVE-NEXT: add x9, x0, #18
+; NONEON-NOSVE-NEXT: ld1 { v1.b }[2], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #19, .LBB3_20
+; NONEON-NOSVE-NEXT: .LBB3_52: // %cond.load55
+; NONEON-NOSVE-NEXT: add x9, x0, #19
+; NONEON-NOSVE-NEXT: ld1 { v1.b }[3], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #20, .LBB3_21
+; NONEON-NOSVE-NEXT: .LBB3_53: // %cond.load58
+; NONEON-NOSVE-NEXT: add x9, x0, #20
+; NONEON-NOSVE-NEXT: ld1 { v1.b }[4], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #21, .LBB3_22
+; NONEON-NOSVE-NEXT: .LBB3_54: // %cond.load61
+; NONEON-NOSVE-NEXT: add x9, x0, #21
+; NONEON-NOSVE-NEXT: ld1 { v1.b }[5], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #22, .LBB3_23
+; NONEON-NOSVE-NEXT: .LBB3_55: // %cond.load64
+; NONEON-NOSVE-NEXT: add x9, x0, #22
+; NONEON-NOSVE-NEXT: ld1 { v1.b }[6], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #23, .LBB3_24
+; NONEON-NOSVE-NEXT: .LBB3_56: // %cond.load67
+; NONEON-NOSVE-NEXT: add x9, x0, #23
+; NONEON-NOSVE-NEXT: ld1 { v1.b }[7], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #24, .LBB3_25
+; NONEON-NOSVE-NEXT: .LBB3_57: // %cond.load70
+; NONEON-NOSVE-NEXT: add x9, x0, #24
+; NONEON-NOSVE-NEXT: ld1 { v1.b }[8], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #25, .LBB3_26
+; NONEON-NOSVE-NEXT: .LBB3_58: // %cond.load73
+; NONEON-NOSVE-NEXT: add x9, x0, #25
+; NONEON-NOSVE-NEXT: ld1 { v1.b }[9], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #26, .LBB3_27
+; NONEON-NOSVE-NEXT: .LBB3_59: // %cond.load76
+; NONEON-NOSVE-NEXT: add x9, x0, #26
+; NONEON-NOSVE-NEXT: ld1 { v1.b }[10], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #27, .LBB3_28
+; NONEON-NOSVE-NEXT: .LBB3_60: // %cond.load79
+; NONEON-NOSVE-NEXT: add x9, x0, #27
+; NONEON-NOSVE-NEXT: ld1 { v1.b }[11], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #28, .LBB3_29
+; NONEON-NOSVE-NEXT: .LBB3_61: // %cond.load82
+; NONEON-NOSVE-NEXT: add x9, x0, #28
+; NONEON-NOSVE-NEXT: ld1 { v1.b }[12], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #29, .LBB3_30
+; NONEON-NOSVE-NEXT: .LBB3_62: // %cond.load85
+; NONEON-NOSVE-NEXT: add x9, x0, #29
+; NONEON-NOSVE-NEXT: ld1 { v1.b }[13], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #30, .LBB3_31
+; NONEON-NOSVE-NEXT: .LBB3_63: // %cond.load88
+; NONEON-NOSVE-NEXT: add x9, x0, #30
+; NONEON-NOSVE-NEXT: ld1 { v1.b }[14], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #31, .LBB3_32
+; NONEON-NOSVE-NEXT: .LBB3_64: // %cond.load91
+; NONEON-NOSVE-NEXT: add x8, x0, #31
+; NONEON-NOSVE-NEXT: ld1 { v1.b }[15], [x8]
+; NONEON-NOSVE-NEXT: ret
%load = call <32 x i8> @llvm.masked.load.v32i8(ptr %src, i32 8, <32 x i1> %mask, <32 x i8> zeroinitializer)
ret <32 x i8> %load
}
@@ -155,6 +635,31 @@ define <2 x half> @masked_load_v2f16(ptr %src, <2 x i1> %mask) {
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: masked_load_v2f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: shl v0.2s, v0.2s, #31
+; NONEON-NOSVE-NEXT: adrp x8, .LCPI4_0
+; NONEON-NOSVE-NEXT: ldr d1, [x8, :lo12:.LCPI4_0]
+; NONEON-NOSVE-NEXT: cmlt v0.2s, v0.2s, #0
+; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: addp v1.2s, v0.2s, v0.2s
+; NONEON-NOSVE-NEXT: movi d0, #0000000000000000
+; NONEON-NOSVE-NEXT: fmov w8, s1
+; NONEON-NOSVE-NEXT: tbnz w8, #0, .LBB4_3
+; NONEON-NOSVE-NEXT: // %bb.1: // %else
+; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB4_4
+; NONEON-NOSVE-NEXT: .LBB4_2: // %else2
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0
+; NONEON-NOSVE-NEXT: ret
+; NONEON-NOSVE-NEXT: .LBB4_3: // %cond.load
+; NONEON-NOSVE-NEXT: ldr h0, [x0]
+; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB4_2
+; NONEON-NOSVE-NEXT: .LBB4_4: // %cond.load1
+; NONEON-NOSVE-NEXT: add x8, x0, #2
+; NONEON-NOSVE-NEXT: ld1 { v0.h }[1], [x8]
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0
+; NONEON-NOSVE-NEXT: ret
%load = call <2 x half> @llvm.masked.load.v2f16(ptr %src, i32 8, <2 x i1> %mask, <2 x half> zeroinitializer)
ret <2 x half> %load
}
@@ -170,6 +675,43 @@ define <4 x half> @masked_load_v4f16(ptr %src, <4 x i1> %mask) {
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: masked_load_v4f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: shl v0.4h, v0.4h, #15
+; NONEON-NOSVE-NEXT: adrp x8, .LCPI5_0
+; NONEON-NOSVE-NEXT: ldr d1, [x8, :lo12:.LCPI5_0]
+; NONEON-NOSVE-NEXT: cmlt v0.4h, v0.4h, #0
+; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: addv h1, v0.4h
+; NONEON-NOSVE-NEXT: movi d0, #0000000000000000
+; NONEON-NOSVE-NEXT: fmov w8, s1
+; NONEON-NOSVE-NEXT: tbnz w8, #0, .LBB5_5
+; NONEON-NOSVE-NEXT: // %bb.1: // %else
+; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB5_6
+; NONEON-NOSVE-NEXT: .LBB5_2: // %else2
+; NONEON-NOSVE-NEXT: tbnz w8, #2, .LBB5_7
+; NONEON-NOSVE-NEXT: .LBB5_3: // %else5
+; NONEON-NOSVE-NEXT: tbnz w8, #3, .LBB5_8
+; NONEON-NOSVE-NEXT: .LBB5_4: // %else8
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0
+; NONEON-NOSVE-NEXT: ret
+; NONEON-NOSVE-NEXT: .LBB5_5: // %cond.load
+; NONEON-NOSVE-NEXT: ldr h0, [x0]
+; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB5_2
+; NONEON-NOSVE-NEXT: .LBB5_6: // %cond.load1
+; NONEON-NOSVE-NEXT: add x9, x0, #2
+; NONEON-NOSVE-NEXT: ld1 { v0.h }[1], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #2, .LBB5_3
+; NONEON-NOSVE-NEXT: .LBB5_7: // %cond.load4
+; NONEON-NOSVE-NEXT: add x9, x0, #4
+; NONEON-NOSVE-NEXT: ld1 { v0.h }[2], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #3, .LBB5_4
+; NONEON-NOSVE-NEXT: .LBB5_8: // %cond.load7
+; NONEON-NOSVE-NEXT: add x8, x0, #6
+; NONEON-NOSVE-NEXT: ld1 { v0.h }[3], [x8]
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0
+; NONEON-NOSVE-NEXT: ret
%load = call <4 x half> @llvm.masked.load.v4f16(ptr %src, i32 8, <4 x i1> %mask, <4 x half> zeroinitializer)
ret <4 x half> %load
}
@@ -186,6 +728,65 @@ define <8 x half> @masked_load_v8f16(ptr %src, <8 x i1> %mask) {
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: masked_load_v8f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: shl v0.8b, v0.8b, #7
+; NONEON-NOSVE-NEXT: adrp x8, .LCPI6_0
+; NONEON-NOSVE-NEXT: ldr d1, [x8, :lo12:.LCPI6_0]
+; NONEON-NOSVE-NEXT: cmlt v0.8b, v0.8b, #0
+; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: addv b1, v0.8b
+; NONEON-NOSVE-NEXT: movi v0.2d, #0000000000000000
+; NONEON-NOSVE-NEXT: fmov w8, s1
+; NONEON-NOSVE-NEXT: tbnz w8, #0, .LBB6_9
+; NONEON-NOSVE-NEXT: // %bb.1: // %else
+; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB6_10
+; NONEON-NOSVE-NEXT: .LBB6_2: // %else2
+; NONEON-NOSVE-NEXT: tbnz w8, #2, .LBB6_11
+; NONEON-NOSVE-NEXT: .LBB6_3: // %else5
+; NONEON-NOSVE-NEXT: tbnz w8, #3, .LBB6_12
+; NONEON-NOSVE-NEXT: .LBB6_4: // %else8
+; NONEON-NOSVE-NEXT: tbnz w8, #4, .LBB6_13
+; NONEON-NOSVE-NEXT: .LBB6_5: // %else11
+; NONEON-NOSVE-NEXT: tbnz w8, #5, .LBB6_14
+; NONEON-NOSVE-NEXT: .LBB6_6: // %else14
+; NONEON-NOSVE-NEXT: tbnz w8, #6, .LBB6_15
+; NONEON-NOSVE-NEXT: .LBB6_7: // %else17
+; NONEON-NOSVE-NEXT: tbnz w8, #7, .LBB6_16
+; NONEON-NOSVE-NEXT: .LBB6_8: // %else20
+; NONEON-NOSVE-NEXT: ret
+; NONEON-NOSVE-NEXT: .LBB6_9: // %cond.load
+; NONEON-NOSVE-NEXT: ldr h0, [x0]
+; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB6_2
+; NONEON-NOSVE-NEXT: .LBB6_10: // %cond.load1
+; NONEON-NOSVE-NEXT: add x9, x0, #2
+; NONEON-NOSVE-NEXT: ld1 { v0.h }[1], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #2, .LBB6_3
+; NONEON-NOSVE-NEXT: .LBB6_11: // %cond.load4
+; NONEON-NOSVE-NEXT: add x9, x0, #4
+; NONEON-NOSVE-NEXT: ld1 { v0.h }[2], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #3, .LBB6_4
+; NONEON-NOSVE-NEXT: .LBB6_12: // %cond.load7
+; NONEON-NOSVE-NEXT: add x9, x0, #6
+; NONEON-NOSVE-NEXT: ld1 { v0.h }[3], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #4, .LBB6_5
+; NONEON-NOSVE-NEXT: .LBB6_13: // %cond.load10
+; NONEON-NOSVE-NEXT: add x9, x0, #8
+; NONEON-NOSVE-NEXT: ld1 { v0.h }[4], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #5, .LBB6_6
+; NONEON-NOSVE-NEXT: .LBB6_14: // %cond.load13
+; NONEON-NOSVE-NEXT: add x9, x0, #10
+; NONEON-NOSVE-NEXT: ld1 { v0.h }[5], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #6, .LBB6_7
+; NONEON-NOSVE-NEXT: .LBB6_15: // %cond.load16
+; NONEON-NOSVE-NEXT: add x9, x0, #12
+; NONEON-NOSVE-NEXT: ld1 { v0.h }[6], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #7, .LBB6_8
+; NONEON-NOSVE-NEXT: .LBB6_16: // %cond.load19
+; NONEON-NOSVE-NEXT: add x8, x0, #14
+; NONEON-NOSVE-NEXT: ld1 { v0.h }[7], [x8]
+; NONEON-NOSVE-NEXT: ret
%load = call <8 x half> @llvm.masked.load.v8f16(ptr %src, i32 8, <8 x i1> %mask, <8 x half> zeroinitializer)
ret <8 x half> %load
}
@@ -210,6 +811,116 @@ define <16 x half> @masked_load_v16f16(ptr %src, <16 x i1> %mask) {
; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0, x8, lsl #1]
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: masked_load_v16f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: shl v0.16b, v0.16b, #7
+; NONEON-NOSVE-NEXT: adrp x8, .LCPI7_0
+; NONEON-NOSVE-NEXT: ldr q1, [x8, :lo12:.LCPI7_0]
+; NONEON-NOSVE-NEXT: cmlt v0.16b, v0.16b, #0
+; NONEON-NOSVE-NEXT: and v0.16b, v0.16b, v1.16b
+; NONEON-NOSVE-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; NONEON-NOSVE-NEXT: zip1 v0.16b, v0.16b, v1.16b
+; NONEON-NOSVE-NEXT: movi v1.2d, #0000000000000000
+; NONEON-NOSVE-NEXT: addv h2, v0.8h
+; NONEON-NOSVE-NEXT: movi v0.2d, #0000000000000000
+; NONEON-NOSVE-NEXT: fmov w8, s2
+; NONEON-NOSVE-NEXT: tbnz w8, #0, .LBB7_17
+; NONEON-NOSVE-NEXT: // %bb.1: // %else
+; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB7_18
+; NONEON-NOSVE-NEXT: .LBB7_2: // %else2
+; NONEON-NOSVE-NEXT: tbnz w8, #2, .LBB7_19
+; NONEON-NOSVE-NEXT: .LBB7_3: // %else5
+; NONEON-NOSVE-NEXT: tbnz w8, #3, .LBB7_20
+; NONEON-NOSVE-NEXT: .LBB7_4: // %else8
+; NONEON-NOSVE-NEXT: tbnz w8, #4, .LBB7_21
+; NONEON-NOSVE-NEXT: .LBB7_5: // %else11
+; NONEON-NOSVE-NEXT: tbnz w8, #5, .LBB7_22
+; NONEON-NOSVE-NEXT: .LBB7_6: // %else14
+; NONEON-NOSVE-NEXT: tbnz w8, #6, .LBB7_23
+; NONEON-NOSVE-NEXT: .LBB7_7: // %else17
+; NONEON-NOSVE-NEXT: tbnz w8, #7, .LBB7_24
+; NONEON-NOSVE-NEXT: .LBB7_8: // %else20
+; NONEON-NOSVE-NEXT: tbnz w8, #8, .LBB7_25
+; NONEON-NOSVE-NEXT: .LBB7_9: // %else23
+; NONEON-NOSVE-NEXT: tbnz w8, #9, .LBB7_26
+; NONEON-NOSVE-NEXT: .LBB7_10: // %else26
+; NONEON-NOSVE-NEXT: tbnz w8, #10, .LBB7_27
+; NONEON-NOSVE-NEXT: .LBB7_11: // %else29
+; NONEON-NOSVE-NEXT: tbnz w8, #11, .LBB7_28
+; NONEON-NOSVE-NEXT: .LBB7_12: // %else32
+; NONEON-NOSVE-NEXT: tbnz w8, #12, .LBB7_29
+; NONEON-NOSVE-NEXT: .LBB7_13: // %else35
+; NONEON-NOSVE-NEXT: tbnz w8, #13, .LBB7_30
+; NONEON-NOSVE-NEXT: .LBB7_14: // %else38
+; NONEON-NOSVE-NEXT: tbnz w8, #14, .LBB7_31
+; NONEON-NOSVE-NEXT: .LBB7_15: // %else41
+; NONEON-NOSVE-NEXT: tbnz w8, #15, .LBB7_32
+; NONEON-NOSVE-NEXT: .LBB7_16: // %else44
+; NONEON-NOSVE-NEXT: ret
+; NONEON-NOSVE-NEXT: .LBB7_17: // %cond.load
+; NONEON-NOSVE-NEXT: ldr h0, [x0]
+; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB7_2
+; NONEON-NOSVE-NEXT: .LBB7_18: // %cond.load1
+; NONEON-NOSVE-NEXT: add x9, x0, #2
+; NONEON-NOSVE-NEXT: ld1 { v0.h }[1], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #2, .LBB7_3
+; NONEON-NOSVE-NEXT: .LBB7_19: // %cond.load4
+; NONEON-NOSVE-NEXT: add x9, x0, #4
+; NONEON-NOSVE-NEXT: ld1 { v0.h }[2], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #3, .LBB7_4
+; NONEON-NOSVE-NEXT: .LBB7_20: // %cond.load7
+; NONEON-NOSVE-NEXT: add x9, x0, #6
+; NONEON-NOSVE-NEXT: ld1 { v0.h }[3], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #4, .LBB7_5
+; NONEON-NOSVE-NEXT: .LBB7_21: // %cond.load10
+; NONEON-NOSVE-NEXT: add x9, x0, #8
+; NONEON-NOSVE-NEXT: ld1 { v0.h }[4], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #5, .LBB7_6
+; NONEON-NOSVE-NEXT: .LBB7_22: // %cond.load13
+; NONEON-NOSVE-NEXT: add x9, x0, #10
+; NONEON-NOSVE-NEXT: ld1 { v0.h }[5], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #6, .LBB7_7
+; NONEON-NOSVE-NEXT: .LBB7_23: // %cond.load16
+; NONEON-NOSVE-NEXT: add x9, x0, #12
+; NONEON-NOSVE-NEXT: ld1 { v0.h }[6], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #7, .LBB7_8
+; NONEON-NOSVE-NEXT: .LBB7_24: // %cond.load19
+; NONEON-NOSVE-NEXT: add x9, x0, #14
+; NONEON-NOSVE-NEXT: ld1 { v0.h }[7], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #8, .LBB7_9
+; NONEON-NOSVE-NEXT: .LBB7_25: // %cond.load22
+; NONEON-NOSVE-NEXT: add x9, x0, #16
+; NONEON-NOSVE-NEXT: ld1 { v1.h }[0], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #9, .LBB7_10
+; NONEON-NOSVE-NEXT: .LBB7_26: // %cond.load25
+; NONEON-NOSVE-NEXT: add x9, x0, #18
+; NONEON-NOSVE-NEXT: ld1 { v1.h }[1], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #10, .LBB7_11
+; NONEON-NOSVE-NEXT: .LBB7_27: // %cond.load28
+; NONEON-NOSVE-NEXT: add x9, x0, #20
+; NONEON-NOSVE-NEXT: ld1 { v1.h }[2], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #11, .LBB7_12
+; NONEON-NOSVE-NEXT: .LBB7_28: // %cond.load31
+; NONEON-NOSVE-NEXT: add x9, x0, #22
+; NONEON-NOSVE-NEXT: ld1 { v1.h }[3], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #12, .LBB7_13
+; NONEON-NOSVE-NEXT: .LBB7_29: // %cond.load34
+; NONEON-NOSVE-NEXT: add x9, x0, #24
+; NONEON-NOSVE-NEXT: ld1 { v1.h }[4], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #13, .LBB7_14
+; NONEON-NOSVE-NEXT: .LBB7_30: // %cond.load37
+; NONEON-NOSVE-NEXT: add x9, x0, #26
+; NONEON-NOSVE-NEXT: ld1 { v1.h }[5], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #14, .LBB7_15
+; NONEON-NOSVE-NEXT: .LBB7_31: // %cond.load40
+; NONEON-NOSVE-NEXT: add x9, x0, #28
+; NONEON-NOSVE-NEXT: ld1 { v1.h }[6], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #15, .LBB7_16
+; NONEON-NOSVE-NEXT: .LBB7_32: // %cond.load43
+; NONEON-NOSVE-NEXT: add x8, x0, #30
+; NONEON-NOSVE-NEXT: ld1 { v1.h }[7], [x8]
+; NONEON-NOSVE-NEXT: ret
%load = call <16 x half> @llvm.masked.load.v16f16(ptr %src, i32 8, <16 x i1> %mask, <16 x half> zeroinitializer)
ret <16 x half> %load
}
@@ -225,6 +936,31 @@ define <2 x float> @masked_load_v2f32(ptr %src, <2 x i1> %mask) {
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: masked_load_v2f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: shl v0.2s, v0.2s, #31
+; NONEON-NOSVE-NEXT: adrp x8, .LCPI8_0
+; NONEON-NOSVE-NEXT: ldr d1, [x8, :lo12:.LCPI8_0]
+; NONEON-NOSVE-NEXT: cmlt v0.2s, v0.2s, #0
+; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: addp v1.2s, v0.2s, v0.2s
+; NONEON-NOSVE-NEXT: movi d0, #0000000000000000
+; NONEON-NOSVE-NEXT: fmov w8, s1
+; NONEON-NOSVE-NEXT: tbnz w8, #0, .LBB8_3
+; NONEON-NOSVE-NEXT: // %bb.1: // %else
+; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB8_4
+; NONEON-NOSVE-NEXT: .LBB8_2: // %else2
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0
+; NONEON-NOSVE-NEXT: ret
+; NONEON-NOSVE-NEXT: .LBB8_3: // %cond.load
+; NONEON-NOSVE-NEXT: ldr s0, [x0]
+; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB8_2
+; NONEON-NOSVE-NEXT: .LBB8_4: // %cond.load1
+; NONEON-NOSVE-NEXT: add x8, x0, #4
+; NONEON-NOSVE-NEXT: ld1 { v0.s }[1], [x8]
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $q0
+; NONEON-NOSVE-NEXT: ret
%load = call <2 x float> @llvm.masked.load.v2f32(ptr %src, i32 8, <2 x i1> %mask, <2 x float> zeroinitializer)
ret <2 x float> %load
}
@@ -241,6 +977,41 @@ define <4 x float> @masked_load_v4f32(ptr %src, <4 x i1> %mask) {
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: masked_load_v4f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: shl v0.4h, v0.4h, #15
+; NONEON-NOSVE-NEXT: adrp x8, .LCPI9_0
+; NONEON-NOSVE-NEXT: ldr d1, [x8, :lo12:.LCPI9_0]
+; NONEON-NOSVE-NEXT: cmlt v0.4h, v0.4h, #0
+; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: addv h1, v0.4h
+; NONEON-NOSVE-NEXT: movi v0.2d, #0000000000000000
+; NONEON-NOSVE-NEXT: fmov w8, s1
+; NONEON-NOSVE-NEXT: tbnz w8, #0, .LBB9_5
+; NONEON-NOSVE-NEXT: // %bb.1: // %else
+; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB9_6
+; NONEON-NOSVE-NEXT: .LBB9_2: // %else2
+; NONEON-NOSVE-NEXT: tbnz w8, #2, .LBB9_7
+; NONEON-NOSVE-NEXT: .LBB9_3: // %else5
+; NONEON-NOSVE-NEXT: tbnz w8, #3, .LBB9_8
+; NONEON-NOSVE-NEXT: .LBB9_4: // %else8
+; NONEON-NOSVE-NEXT: ret
+; NONEON-NOSVE-NEXT: .LBB9_5: // %cond.load
+; NONEON-NOSVE-NEXT: ldr s0, [x0]
+; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB9_2
+; NONEON-NOSVE-NEXT: .LBB9_6: // %cond.load1
+; NONEON-NOSVE-NEXT: add x9, x0, #4
+; NONEON-NOSVE-NEXT: ld1 { v0.s }[1], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #2, .LBB9_3
+; NONEON-NOSVE-NEXT: .LBB9_7: // %cond.load4
+; NONEON-NOSVE-NEXT: add x9, x0, #8
+; NONEON-NOSVE-NEXT: ld1 { v0.s }[2], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #3, .LBB9_4
+; NONEON-NOSVE-NEXT: .LBB9_8: // %cond.load7
+; NONEON-NOSVE-NEXT: add x8, x0, #12
+; NONEON-NOSVE-NEXT: ld1 { v0.s }[3], [x8]
+; NONEON-NOSVE-NEXT: ret
%load = call <4 x float> @llvm.masked.load.v4f32(ptr %src, i32 8, <4 x i1> %mask, <4 x float> zeroinitializer)
ret <4 x float> %load
}
@@ -290,6 +1061,66 @@ define <8 x float> @masked_load_v8f32(ptr %src, <8 x i1> %mask) {
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: masked_load_v8f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: shl v0.8b, v0.8b, #7
+; NONEON-NOSVE-NEXT: adrp x8, .LCPI10_0
+; NONEON-NOSVE-NEXT: ldr d1, [x8, :lo12:.LCPI10_0]
+; NONEON-NOSVE-NEXT: cmlt v0.8b, v0.8b, #0
+; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: movi v1.2d, #0000000000000000
+; NONEON-NOSVE-NEXT: addv b2, v0.8b
+; NONEON-NOSVE-NEXT: movi v0.2d, #0000000000000000
+; NONEON-NOSVE-NEXT: fmov w8, s2
+; NONEON-NOSVE-NEXT: tbnz w8, #0, .LBB10_9
+; NONEON-NOSVE-NEXT: // %bb.1: // %else
+; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB10_10
+; NONEON-NOSVE-NEXT: .LBB10_2: // %else2
+; NONEON-NOSVE-NEXT: tbnz w8, #2, .LBB10_11
+; NONEON-NOSVE-NEXT: .LBB10_3: // %else5
+; NONEON-NOSVE-NEXT: tbnz w8, #3, .LBB10_12
+; NONEON-NOSVE-NEXT: .LBB10_4: // %else8
+; NONEON-NOSVE-NEXT: tbnz w8, #4, .LBB10_13
+; NONEON-NOSVE-NEXT: .LBB10_5: // %else11
+; NONEON-NOSVE-NEXT: tbnz w8, #5, .LBB10_14
+; NONEON-NOSVE-NEXT: .LBB10_6: // %else14
+; NONEON-NOSVE-NEXT: tbnz w8, #6, .LBB10_15
+; NONEON-NOSVE-NEXT: .LBB10_7: // %else17
+; NONEON-NOSVE-NEXT: tbnz w8, #7, .LBB10_16
+; NONEON-NOSVE-NEXT: .LBB10_8: // %else20
+; NONEON-NOSVE-NEXT: ret
+; NONEON-NOSVE-NEXT: .LBB10_9: // %cond.load
+; NONEON-NOSVE-NEXT: ldr s0, [x0]
+; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB10_2
+; NONEON-NOSVE-NEXT: .LBB10_10: // %cond.load1
+; NONEON-NOSVE-NEXT: add x9, x0, #4
+; NONEON-NOSVE-NEXT: ld1 { v0.s }[1], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #2, .LBB10_3
+; NONEON-NOSVE-NEXT: .LBB10_11: // %cond.load4
+; NONEON-NOSVE-NEXT: add x9, x0, #8
+; NONEON-NOSVE-NEXT: ld1 { v0.s }[2], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #3, .LBB10_4
+; NONEON-NOSVE-NEXT: .LBB10_12: // %cond.load7
+; NONEON-NOSVE-NEXT: add x9, x0, #12
+; NONEON-NOSVE-NEXT: ld1 { v0.s }[3], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #4, .LBB10_5
+; NONEON-NOSVE-NEXT: .LBB10_13: // %cond.load10
+; NONEON-NOSVE-NEXT: add x9, x0, #16
+; NONEON-NOSVE-NEXT: ld1 { v1.s }[0], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #5, .LBB10_6
+; NONEON-NOSVE-NEXT: .LBB10_14: // %cond.load13
+; NONEON-NOSVE-NEXT: add x9, x0, #20
+; NONEON-NOSVE-NEXT: ld1 { v1.s }[1], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #6, .LBB10_7
+; NONEON-NOSVE-NEXT: .LBB10_15: // %cond.load16
+; NONEON-NOSVE-NEXT: add x9, x0, #24
+; NONEON-NOSVE-NEXT: ld1 { v1.s }[2], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #7, .LBB10_8
+; NONEON-NOSVE-NEXT: .LBB10_16: // %cond.load19
+; NONEON-NOSVE-NEXT: add x8, x0, #28
+; NONEON-NOSVE-NEXT: ld1 { v1.s }[3], [x8]
+; NONEON-NOSVE-NEXT: ret
%load = call <8 x float> @llvm.masked.load.v8f32(ptr %src, i32 8, <8 x i1> %mask, <8 x float> zeroinitializer)
ret <8 x float> %load
}
@@ -306,6 +1137,29 @@ define <2 x double> @masked_load_v2f64(ptr %src, <2 x i1> %mask) {
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: masked_load_v2f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: shl v0.2s, v0.2s, #31
+; NONEON-NOSVE-NEXT: adrp x8, .LCPI11_0
+; NONEON-NOSVE-NEXT: ldr d1, [x8, :lo12:.LCPI11_0]
+; NONEON-NOSVE-NEXT: cmlt v0.2s, v0.2s, #0
+; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: addp v1.2s, v0.2s, v0.2s
+; NONEON-NOSVE-NEXT: movi v0.2d, #0000000000000000
+; NONEON-NOSVE-NEXT: fmov w8, s1
+; NONEON-NOSVE-NEXT: tbnz w8, #0, .LBB11_3
+; NONEON-NOSVE-NEXT: // %bb.1: // %else
+; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB11_4
+; NONEON-NOSVE-NEXT: .LBB11_2: // %else2
+; NONEON-NOSVE-NEXT: ret
+; NONEON-NOSVE-NEXT: .LBB11_3: // %cond.load
+; NONEON-NOSVE-NEXT: ldr d0, [x0]
+; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB11_2
+; NONEON-NOSVE-NEXT: .LBB11_4: // %cond.load1
+; NONEON-NOSVE-NEXT: add x8, x0, #8
+; NONEON-NOSVE-NEXT: ld1 { v0.d }[1], [x8]
+; NONEON-NOSVE-NEXT: ret
%load = call <2 x double> @llvm.masked.load.v2f64(ptr %src, i32 8, <2 x i1> %mask, <2 x double> zeroinitializer)
ret <2 x double> %load
}
@@ -331,6 +1185,42 @@ define <4 x double> @masked_load_v4f64(ptr %src, <4 x i1> %mask) {
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0, x8, lsl #3]
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: masked_load_v4f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: shl v0.4h, v0.4h, #15
+; NONEON-NOSVE-NEXT: adrp x8, .LCPI12_0
+; NONEON-NOSVE-NEXT: ldr d1, [x8, :lo12:.LCPI12_0]
+; NONEON-NOSVE-NEXT: cmlt v0.4h, v0.4h, #0
+; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: movi v1.2d, #0000000000000000
+; NONEON-NOSVE-NEXT: addv h2, v0.4h
+; NONEON-NOSVE-NEXT: movi v0.2d, #0000000000000000
+; NONEON-NOSVE-NEXT: fmov w8, s2
+; NONEON-NOSVE-NEXT: tbnz w8, #0, .LBB12_5
+; NONEON-NOSVE-NEXT: // %bb.1: // %else
+; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB12_6
+; NONEON-NOSVE-NEXT: .LBB12_2: // %else2
+; NONEON-NOSVE-NEXT: tbnz w8, #2, .LBB12_7
+; NONEON-NOSVE-NEXT: .LBB12_3: // %else5
+; NONEON-NOSVE-NEXT: tbnz w8, #3, .LBB12_8
+; NONEON-NOSVE-NEXT: .LBB12_4: // %else8
+; NONEON-NOSVE-NEXT: ret
+; NONEON-NOSVE-NEXT: .LBB12_5: // %cond.load
+; NONEON-NOSVE-NEXT: ldr d0, [x0]
+; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB12_2
+; NONEON-NOSVE-NEXT: .LBB12_6: // %cond.load1
+; NONEON-NOSVE-NEXT: add x9, x0, #8
+; NONEON-NOSVE-NEXT: ld1 { v0.d }[1], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #2, .LBB12_3
+; NONEON-NOSVE-NEXT: .LBB12_7: // %cond.load4
+; NONEON-NOSVE-NEXT: add x9, x0, #16
+; NONEON-NOSVE-NEXT: ld1 { v1.d }[0], [x9]
+; NONEON-NOSVE-NEXT: tbz w8, #3, .LBB12_4
+; NONEON-NOSVE-NEXT: .LBB12_8: // %cond.load7
+; NONEON-NOSVE-NEXT: add x8, x0, #24
+; NONEON-NOSVE-NEXT: ld1 { v1.d }[1], [x8]
+; NONEON-NOSVE-NEXT: ret
%load = call <4 x double> @llvm.masked.load.v4f64(ptr %src, i32 8, <4 x i1> %mask, <4 x double> zeroinitializer)
ret <4 x double> %load
}
@@ -356,6 +1246,38 @@ define <3 x i32> @masked_load_zext_v3i32(ptr %load_ptr, <3 x i1> %pm) {
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: masked_load_zext_v3i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: sub sp, sp, #16
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: and w8, w1, #0x1
+; NONEON-NOSVE-NEXT: bfi w8, w2, #1, #1
+; NONEON-NOSVE-NEXT: bfi w8, w3, #2, #1
+; NONEON-NOSVE-NEXT: tbz w8, #0, .LBB13_2
+; NONEON-NOSVE-NEXT: // %bb.1: // %cond.load
+; NONEON-NOSVE-NEXT: ldr h0, [x0]
+; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB13_3
+; NONEON-NOSVE-NEXT: b .LBB13_4
+; NONEON-NOSVE-NEXT: .LBB13_2:
+; NONEON-NOSVE-NEXT: movi v0.2d, #0000000000000000
+; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB13_4
+; NONEON-NOSVE-NEXT: .LBB13_3: // %cond.load1
+; NONEON-NOSVE-NEXT: mov v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: add x9, x0, #2
+; NONEON-NOSVE-NEXT: ld1 { v1.h }[1], [x9]
+; NONEON-NOSVE-NEXT: mov v1.h[2], v0.h[2]
+; NONEON-NOSVE-NEXT: fmov d0, d1
+; NONEON-NOSVE-NEXT: .LBB13_4: // %else2
+; NONEON-NOSVE-NEXT: tbz w8, #2, .LBB13_6
+; NONEON-NOSVE-NEXT: // %bb.5: // %cond.load4
+; NONEON-NOSVE-NEXT: mov v0.h[1], v0.h[1]
+; NONEON-NOSVE-NEXT: add x8, x0, #4
+; NONEON-NOSVE-NEXT: ld1 { v0.h }[2], [x8]
+; NONEON-NOSVE-NEXT: .LBB13_6: // %else5
+; NONEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%load_value = tail call <3 x i16> @llvm.masked.load.v3i16.p0(ptr %load_ptr, i32 4, <3 x i1> %pm, <3 x i16> zeroinitializer)
%extend = zext <3 x i16> %load_value to <3 x i32>
ret <3 x i32> %extend;
@@ -382,6 +1304,38 @@ define <3 x i32> @masked_load_sext_v3i32(ptr %load_ptr, <3 x i1> %pm) {
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: masked_load_sext_v3i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: sub sp, sp, #16
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: and w8, w1, #0x1
+; NONEON-NOSVE-NEXT: bfi w8, w2, #1, #1
+; NONEON-NOSVE-NEXT: bfi w8, w3, #2, #1
+; NONEON-NOSVE-NEXT: tbz w8, #0, .LBB14_2
+; NONEON-NOSVE-NEXT: // %bb.1: // %cond.load
+; NONEON-NOSVE-NEXT: ldr h0, [x0]
+; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB14_3
+; NONEON-NOSVE-NEXT: b .LBB14_4
+; NONEON-NOSVE-NEXT: .LBB14_2:
+; NONEON-NOSVE-NEXT: movi v0.2d, #0000000000000000
+; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB14_4
+; NONEON-NOSVE-NEXT: .LBB14_3: // %cond.load1
+; NONEON-NOSVE-NEXT: mov v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: add x9, x0, #2
+; NONEON-NOSVE-NEXT: ld1 { v1.h }[1], [x9]
+; NONEON-NOSVE-NEXT: mov v1.h[2], v0.h[2]
+; NONEON-NOSVE-NEXT: fmov d0, d1
+; NONEON-NOSVE-NEXT: .LBB14_4: // %else2
+; NONEON-NOSVE-NEXT: tbz w8, #2, .LBB14_6
+; NONEON-NOSVE-NEXT: // %bb.5: // %cond.load4
+; NONEON-NOSVE-NEXT: mov v0.h[1], v0.h[1]
+; NONEON-NOSVE-NEXT: add x8, x0, #4
+; NONEON-NOSVE-NEXT: ld1 { v0.h }[2], [x8]
+; NONEON-NOSVE-NEXT: .LBB14_6: // %else5
+; NONEON-NOSVE-NEXT: sshll v0.4s, v0.4h, #0
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
%load_value = tail call <3 x i16> @llvm.masked.load.v3i16.p0(ptr %load_ptr, i32 4, <3 x i1> %pm, <3 x i16> zeroinitializer)
%extend = sext <3 x i16> %load_value to <3 x i32>
ret <3 x i32> %extend;
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-store.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-store.ll
index bd6b96889b4cc..0904399558aee 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-store.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-store.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
@@ -19,6 +20,37 @@ define void @masked_store_v4i8(ptr %dst, <4 x i1> %mask) {
; CHECK-NEXT: mov z0.h, #0 // =0x0
; CHECK-NEXT: st1b { z0.h }, p0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: masked_store_v4i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: shl v0.4h, v0.4h, #15
+; NONEON-NOSVE-NEXT: adrp x8, .LCPI0_0
+; NONEON-NOSVE-NEXT: ldr d1, [x8, :lo12:.LCPI0_0]
+; NONEON-NOSVE-NEXT: cmlt v0.4h, v0.4h, #0
+; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: addv h0, v0.4h
+; NONEON-NOSVE-NEXT: fmov w8, s0
+; NONEON-NOSVE-NEXT: tbnz w8, #0, .LBB0_5
+; NONEON-NOSVE-NEXT: // %bb.1: // %else
+; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB0_6
+; NONEON-NOSVE-NEXT: .LBB0_2: // %else2
+; NONEON-NOSVE-NEXT: tbnz w8, #2, .LBB0_7
+; NONEON-NOSVE-NEXT: .LBB0_3: // %else4
+; NONEON-NOSVE-NEXT: tbnz w8, #3, .LBB0_8
+; NONEON-NOSVE-NEXT: .LBB0_4: // %else6
+; NONEON-NOSVE-NEXT: ret
+; NONEON-NOSVE-NEXT: .LBB0_5: // %cond.store
+; NONEON-NOSVE-NEXT: strb wzr, [x0]
+; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB0_2
+; NONEON-NOSVE-NEXT: .LBB0_6: // %cond.store1
+; NONEON-NOSVE-NEXT: strb wzr, [x0, #1]
+; NONEON-NOSVE-NEXT: tbz w8, #2, .LBB0_3
+; NONEON-NOSVE-NEXT: .LBB0_7: // %cond.store3
+; NONEON-NOSVE-NEXT: strb wzr, [x0, #2]
+; NONEON-NOSVE-NEXT: tbz w8, #3, .LBB0_4
+; NONEON-NOSVE-NEXT: .LBB0_8: // %cond.store5
+; NONEON-NOSVE-NEXT: strb wzr, [x0, #3]
+; NONEON-NOSVE-NEXT: ret
call void @llvm.masked.store.v4i8(<4 x i8> zeroinitializer, ptr %dst, i32 8, <4 x i1> %mask)
ret void
}
@@ -34,6 +66,57 @@ define void @masked_store_v8i8(ptr %dst, <8 x i1> %mask) {
; CHECK-NEXT: mov z0.b, #0 // =0x0
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: masked_store_v8i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: shl v0.8b, v0.8b, #7
+; NONEON-NOSVE-NEXT: adrp x8, .LCPI1_0
+; NONEON-NOSVE-NEXT: ldr d1, [x8, :lo12:.LCPI1_0]
+; NONEON-NOSVE-NEXT: cmlt v0.8b, v0.8b, #0
+; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: addv b0, v0.8b
+; NONEON-NOSVE-NEXT: fmov w8, s0
+; NONEON-NOSVE-NEXT: tbnz w8, #0, .LBB1_9
+; NONEON-NOSVE-NEXT: // %bb.1: // %else
+; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB1_10
+; NONEON-NOSVE-NEXT: .LBB1_2: // %else2
+; NONEON-NOSVE-NEXT: tbnz w8, #2, .LBB1_11
+; NONEON-NOSVE-NEXT: .LBB1_3: // %else4
+; NONEON-NOSVE-NEXT: tbnz w8, #3, .LBB1_12
+; NONEON-NOSVE-NEXT: .LBB1_4: // %else6
+; NONEON-NOSVE-NEXT: tbnz w8, #4, .LBB1_13
+; NONEON-NOSVE-NEXT: .LBB1_5: // %else8
+; NONEON-NOSVE-NEXT: tbnz w8, #5, .LBB1_14
+; NONEON-NOSVE-NEXT: .LBB1_6: // %else10
+; NONEON-NOSVE-NEXT: tbnz w8, #6, .LBB1_15
+; NONEON-NOSVE-NEXT: .LBB1_7: // %else12
+; NONEON-NOSVE-NEXT: tbnz w8, #7, .LBB1_16
+; NONEON-NOSVE-NEXT: .LBB1_8: // %else14
+; NONEON-NOSVE-NEXT: ret
+; NONEON-NOSVE-NEXT: .LBB1_9: // %cond.store
+; NONEON-NOSVE-NEXT: strb wzr, [x0]
+; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB1_2
+; NONEON-NOSVE-NEXT: .LBB1_10: // %cond.store1
+; NONEON-NOSVE-NEXT: strb wzr, [x0, #1]
+; NONEON-NOSVE-NEXT: tbz w8, #2, .LBB1_3
+; NONEON-NOSVE-NEXT: .LBB1_11: // %cond.store3
+; NONEON-NOSVE-NEXT: strb wzr, [x0, #2]
+; NONEON-NOSVE-NEXT: tbz w8, #3, .LBB1_4
+; NONEON-NOSVE-NEXT: .LBB1_12: // %cond.store5
+; NONEON-NOSVE-NEXT: strb wzr, [x0, #3]
+; NONEON-NOSVE-NEXT: tbz w8, #4, .LBB1_5
+; NONEON-NOSVE-NEXT: .LBB1_13: // %cond.store7
+; NONEON-NOSVE-NEXT: strb wzr, [x0, #4]
+; NONEON-NOSVE-NEXT: tbz w8, #5, .LBB1_6
+; NONEON-NOSVE-NEXT: .LBB1_14: // %cond.store9
+; NONEON-NOSVE-NEXT: strb wzr, [x0, #5]
+; NONEON-NOSVE-NEXT: tbz w8, #6, .LBB1_7
+; NONEON-NOSVE-NEXT: .LBB1_15: // %cond.store11
+; NONEON-NOSVE-NEXT: strb wzr, [x0, #6]
+; NONEON-NOSVE-NEXT: tbz w8, #7, .LBB1_8
+; NONEON-NOSVE-NEXT: .LBB1_16: // %cond.store13
+; NONEON-NOSVE-NEXT: strb wzr, [x0, #7]
+; NONEON-NOSVE-NEXT: ret
call void @llvm.masked.store.v8i8(<8 x i8> zeroinitializer, ptr %dst, i32 8, <8 x i1> %mask)
ret void
}
@@ -49,6 +132,99 @@ define void @masked_store_v16i8(ptr %dst, <16 x i1> %mask) {
; CHECK-NEXT: mov z0.b, #0 // =0x0
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: masked_store_v16i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: shl v0.16b, v0.16b, #7
+; NONEON-NOSVE-NEXT: adrp x8, .LCPI2_0
+; NONEON-NOSVE-NEXT: ldr q1, [x8, :lo12:.LCPI2_0]
+; NONEON-NOSVE-NEXT: cmlt v0.16b, v0.16b, #0
+; NONEON-NOSVE-NEXT: and v0.16b, v0.16b, v1.16b
+; NONEON-NOSVE-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; NONEON-NOSVE-NEXT: zip1 v0.16b, v0.16b, v1.16b
+; NONEON-NOSVE-NEXT: addv h0, v0.8h
+; NONEON-NOSVE-NEXT: fmov w8, s0
+; NONEON-NOSVE-NEXT: tbnz w8, #0, .LBB2_17
+; NONEON-NOSVE-NEXT: // %bb.1: // %else
+; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB2_18
+; NONEON-NOSVE-NEXT: .LBB2_2: // %else2
+; NONEON-NOSVE-NEXT: tbnz w8, #2, .LBB2_19
+; NONEON-NOSVE-NEXT: .LBB2_3: // %else4
+; NONEON-NOSVE-NEXT: tbnz w8, #3, .LBB2_20
+; NONEON-NOSVE-NEXT: .LBB2_4: // %else6
+; NONEON-NOSVE-NEXT: tbnz w8, #4, .LBB2_21
+; NONEON-NOSVE-NEXT: .LBB2_5: // %else8
+; NONEON-NOSVE-NEXT: tbnz w8, #5, .LBB2_22
+; NONEON-NOSVE-NEXT: .LBB2_6: // %else10
+; NONEON-NOSVE-NEXT: tbnz w8, #6, .LBB2_23
+; NONEON-NOSVE-NEXT: .LBB2_7: // %else12
+; NONEON-NOSVE-NEXT: tbnz w8, #7, .LBB2_24
+; NONEON-NOSVE-NEXT: .LBB2_8: // %else14
+; NONEON-NOSVE-NEXT: tbnz w8, #8, .LBB2_25
+; NONEON-NOSVE-NEXT: .LBB2_9: // %else16
+; NONEON-NOSVE-NEXT: tbnz w8, #9, .LBB2_26
+; NONEON-NOSVE-NEXT: .LBB2_10: // %else18
+; NONEON-NOSVE-NEXT: tbnz w8, #10, .LBB2_27
+; NONEON-NOSVE-NEXT: .LBB2_11: // %else20
+; NONEON-NOSVE-NEXT: tbnz w8, #11, .LBB2_28
+; NONEON-NOSVE-NEXT: .LBB2_12: // %else22
+; NONEON-NOSVE-NEXT: tbnz w8, #12, .LBB2_29
+; NONEON-NOSVE-NEXT: .LBB2_13: // %else24
+; NONEON-NOSVE-NEXT: tbnz w8, #13, .LBB2_30
+; NONEON-NOSVE-NEXT: .LBB2_14: // %else26
+; NONEON-NOSVE-NEXT: tbnz w8, #14, .LBB2_31
+; NONEON-NOSVE-NEXT: .LBB2_15: // %else28
+; NONEON-NOSVE-NEXT: tbnz w8, #15, .LBB2_32
+; NONEON-NOSVE-NEXT: .LBB2_16: // %else30
+; NONEON-NOSVE-NEXT: ret
+; NONEON-NOSVE-NEXT: .LBB2_17: // %cond.store
+; NONEON-NOSVE-NEXT: strb wzr, [x0]
+; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB2_2
+; NONEON-NOSVE-NEXT: .LBB2_18: // %cond.store1
+; NONEON-NOSVE-NEXT: strb wzr, [x0, #1]
+; NONEON-NOSVE-NEXT: tbz w8, #2, .LBB2_3
+; NONEON-NOSVE-NEXT: .LBB2_19: // %cond.store3
+; NONEON-NOSVE-NEXT: strb wzr, [x0, #2]
+; NONEON-NOSVE-NEXT: tbz w8, #3, .LBB2_4
+; NONEON-NOSVE-NEXT: .LBB2_20: // %cond.store5
+; NONEON-NOSVE-NEXT: strb wzr, [x0, #3]
+; NONEON-NOSVE-NEXT: tbz w8, #4, .LBB2_5
+; NONEON-NOSVE-NEXT: .LBB2_21: // %cond.store7
+; NONEON-NOSVE-NEXT: strb wzr, [x0, #4]
+; NONEON-NOSVE-NEXT: tbz w8, #5, .LBB2_6
+; NONEON-NOSVE-NEXT: .LBB2_22: // %cond.store9
+; NONEON-NOSVE-NEXT: strb wzr, [x0, #5]
+; NONEON-NOSVE-NEXT: tbz w8, #6, .LBB2_7
+; NONEON-NOSVE-NEXT: .LBB2_23: // %cond.store11
+; NONEON-NOSVE-NEXT: strb wzr, [x0, #6]
+; NONEON-NOSVE-NEXT: tbz w8, #7, .LBB2_8
+; NONEON-NOSVE-NEXT: .LBB2_24: // %cond.store13
+; NONEON-NOSVE-NEXT: strb wzr, [x0, #7]
+; NONEON-NOSVE-NEXT: tbz w8, #8, .LBB2_9
+; NONEON-NOSVE-NEXT: .LBB2_25: // %cond.store15
+; NONEON-NOSVE-NEXT: strb wzr, [x0, #8]
+; NONEON-NOSVE-NEXT: tbz w8, #9, .LBB2_10
+; NONEON-NOSVE-NEXT: .LBB2_26: // %cond.store17
+; NONEON-NOSVE-NEXT: strb wzr, [x0, #9]
+; NONEON-NOSVE-NEXT: tbz w8, #10, .LBB2_11
+; NONEON-NOSVE-NEXT: .LBB2_27: // %cond.store19
+; NONEON-NOSVE-NEXT: strb wzr, [x0, #10]
+; NONEON-NOSVE-NEXT: tbz w8, #11, .LBB2_12
+; NONEON-NOSVE-NEXT: .LBB2_28: // %cond.store21
+; NONEON-NOSVE-NEXT: strb wzr, [x0, #11]
+; NONEON-NOSVE-NEXT: tbz w8, #12, .LBB2_13
+; NONEON-NOSVE-NEXT: .LBB2_29: // %cond.store23
+; NONEON-NOSVE-NEXT: strb wzr, [x0, #12]
+; NONEON-NOSVE-NEXT: tbz w8, #13, .LBB2_14
+; NONEON-NOSVE-NEXT: .LBB2_30: // %cond.store25
+; NONEON-NOSVE-NEXT: strb wzr, [x0, #13]
+; NONEON-NOSVE-NEXT: tbz w8, #14, .LBB2_15
+; NONEON-NOSVE-NEXT: .LBB2_31: // %cond.store27
+; NONEON-NOSVE-NEXT: strb wzr, [x0, #14]
+; NONEON-NOSVE-NEXT: tbz w8, #15, .LBB2_16
+; NONEON-NOSVE-NEXT: .LBB2_32: // %cond.store29
+; NONEON-NOSVE-NEXT: strb wzr, [x0, #15]
+; NONEON-NOSVE-NEXT: ret
call void @llvm.masked.store.v16i8(<16 x i8> zeroinitializer, ptr %dst, i32 8, <16 x i1> %mask)
ret void
}
@@ -129,6 +305,244 @@ define void @masked_store_v32i8(ptr %dst, <32 x i1> %mask) {
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: add sp, sp, #32
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: masked_store_v32i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr w8, [sp, #72]
+; NONEON-NOSVE-NEXT: fmov s1, w1
+; NONEON-NOSVE-NEXT: ldr w9, [sp, #80]
+; NONEON-NOSVE-NEXT: fmov s0, w8
+; NONEON-NOSVE-NEXT: ldr w8, [sp, #88]
+; NONEON-NOSVE-NEXT: mov v1.b[1], w2
+; NONEON-NOSVE-NEXT: mov v0.b[1], w9
+; NONEON-NOSVE-NEXT: ldr w9, [sp]
+; NONEON-NOSVE-NEXT: mov v1.b[2], w3
+; NONEON-NOSVE-NEXT: mov v0.b[2], w8
+; NONEON-NOSVE-NEXT: ldr w8, [sp, #96]
+; NONEON-NOSVE-NEXT: mov v1.b[3], w4
+; NONEON-NOSVE-NEXT: mov v0.b[3], w8
+; NONEON-NOSVE-NEXT: ldr w8, [sp, #104]
+; NONEON-NOSVE-NEXT: mov v1.b[4], w5
+; NONEON-NOSVE-NEXT: mov v0.b[4], w8
+; NONEON-NOSVE-NEXT: ldr w8, [sp, #112]
+; NONEON-NOSVE-NEXT: mov v1.b[5], w6
+; NONEON-NOSVE-NEXT: mov v0.b[5], w8
+; NONEON-NOSVE-NEXT: ldr w8, [sp, #120]
+; NONEON-NOSVE-NEXT: mov v1.b[6], w7
+; NONEON-NOSVE-NEXT: mov v0.b[6], w8
+; NONEON-NOSVE-NEXT: ldr w8, [sp, #128]
+; NONEON-NOSVE-NEXT: mov v1.b[7], w9
+; NONEON-NOSVE-NEXT: ldr w9, [sp, #8]
+; NONEON-NOSVE-NEXT: mov v0.b[7], w8
+; NONEON-NOSVE-NEXT: ldr w8, [sp, #136]
+; NONEON-NOSVE-NEXT: mov v1.b[8], w9
+; NONEON-NOSVE-NEXT: ldr w9, [sp, #16]
+; NONEON-NOSVE-NEXT: mov v0.b[8], w8
+; NONEON-NOSVE-NEXT: ldr w8, [sp, #144]
+; NONEON-NOSVE-NEXT: mov v1.b[9], w9
+; NONEON-NOSVE-NEXT: ldr w9, [sp, #24]
+; NONEON-NOSVE-NEXT: mov v0.b[9], w8
+; NONEON-NOSVE-NEXT: ldr w8, [sp, #152]
+; NONEON-NOSVE-NEXT: mov v1.b[10], w9
+; NONEON-NOSVE-NEXT: ldr w9, [sp, #32]
+; NONEON-NOSVE-NEXT: mov v0.b[10], w8
+; NONEON-NOSVE-NEXT: ldr w8, [sp, #160]
+; NONEON-NOSVE-NEXT: mov v1.b[11], w9
+; NONEON-NOSVE-NEXT: ldr w9, [sp, #40]
+; NONEON-NOSVE-NEXT: mov v0.b[11], w8
+; NONEON-NOSVE-NEXT: ldr w8, [sp, #168]
+; NONEON-NOSVE-NEXT: mov v1.b[12], w9
+; NONEON-NOSVE-NEXT: ldr w9, [sp, #48]
+; NONEON-NOSVE-NEXT: mov v0.b[12], w8
+; NONEON-NOSVE-NEXT: ldr w8, [sp, #176]
+; NONEON-NOSVE-NEXT: mov v1.b[13], w9
+; NONEON-NOSVE-NEXT: ldr w9, [sp, #56]
+; NONEON-NOSVE-NEXT: mov v0.b[13], w8
+; NONEON-NOSVE-NEXT: ldr w8, [sp, #184]
+; NONEON-NOSVE-NEXT: mov v1.b[14], w9
+; NONEON-NOSVE-NEXT: ldr w9, [sp, #64]
+; NONEON-NOSVE-NEXT: mov v0.b[14], w8
+; NONEON-NOSVE-NEXT: ldr w8, [sp, #192]
+; NONEON-NOSVE-NEXT: mov v1.b[15], w9
+; NONEON-NOSVE-NEXT: mov v0.b[15], w8
+; NONEON-NOSVE-NEXT: adrp x8, .LCPI3_0
+; NONEON-NOSVE-NEXT: ldr q2, [x8, :lo12:.LCPI3_0]
+; NONEON-NOSVE-NEXT: shl v1.16b, v1.16b, #7
+; NONEON-NOSVE-NEXT: shl v0.16b, v0.16b, #7
+; NONEON-NOSVE-NEXT: cmlt v1.16b, v1.16b, #0
+; NONEON-NOSVE-NEXT: cmlt v0.16b, v0.16b, #0
+; NONEON-NOSVE-NEXT: and v1.16b, v1.16b, v2.16b
+; NONEON-NOSVE-NEXT: and v0.16b, v0.16b, v2.16b
+; NONEON-NOSVE-NEXT: ext v3.16b, v1.16b, v1.16b, #8
+; NONEON-NOSVE-NEXT: ext v2.16b, v0.16b, v0.16b, #8
+; NONEON-NOSVE-NEXT: zip1 v1.16b, v1.16b, v3.16b
+; NONEON-NOSVE-NEXT: zip1 v0.16b, v0.16b, v2.16b
+; NONEON-NOSVE-NEXT: addv h1, v1.8h
+; NONEON-NOSVE-NEXT: addv h0, v0.8h
+; NONEON-NOSVE-NEXT: fmov w8, s1
+; NONEON-NOSVE-NEXT: fmov w9, s0
+; NONEON-NOSVE-NEXT: bfi w8, w9, #16, #16
+; NONEON-NOSVE-NEXT: tbnz w8, #0, .LBB3_33
+; NONEON-NOSVE-NEXT: // %bb.1: // %else
+; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB3_34
+; NONEON-NOSVE-NEXT: .LBB3_2: // %else2
+; NONEON-NOSVE-NEXT: tbnz w8, #2, .LBB3_35
+; NONEON-NOSVE-NEXT: .LBB3_3: // %else4
+; NONEON-NOSVE-NEXT: tbnz w8, #3, .LBB3_36
+; NONEON-NOSVE-NEXT: .LBB3_4: // %else6
+; NONEON-NOSVE-NEXT: tbnz w8, #4, .LBB3_37
+; NONEON-NOSVE-NEXT: .LBB3_5: // %else8
+; NONEON-NOSVE-NEXT: tbnz w8, #5, .LBB3_38
+; NONEON-NOSVE-NEXT: .LBB3_6: // %else10
+; NONEON-NOSVE-NEXT: tbnz w8, #6, .LBB3_39
+; NONEON-NOSVE-NEXT: .LBB3_7: // %else12
+; NONEON-NOSVE-NEXT: tbnz w8, #7, .LBB3_40
+; NONEON-NOSVE-NEXT: .LBB3_8: // %else14
+; NONEON-NOSVE-NEXT: tbnz w8, #8, .LBB3_41
+; NONEON-NOSVE-NEXT: .LBB3_9: // %else16
+; NONEON-NOSVE-NEXT: tbnz w8, #9, .LBB3_42
+; NONEON-NOSVE-NEXT: .LBB3_10: // %else18
+; NONEON-NOSVE-NEXT: tbnz w8, #10, .LBB3_43
+; NONEON-NOSVE-NEXT: .LBB3_11: // %else20
+; NONEON-NOSVE-NEXT: tbnz w8, #11, .LBB3_44
+; NONEON-NOSVE-NEXT: .LBB3_12: // %else22
+; NONEON-NOSVE-NEXT: tbnz w8, #12, .LBB3_45
+; NONEON-NOSVE-NEXT: .LBB3_13: // %else24
+; NONEON-NOSVE-NEXT: tbnz w8, #13, .LBB3_46
+; NONEON-NOSVE-NEXT: .LBB3_14: // %else26
+; NONEON-NOSVE-NEXT: tbnz w8, #14, .LBB3_47
+; NONEON-NOSVE-NEXT: .LBB3_15: // %else28
+; NONEON-NOSVE-NEXT: tbnz w8, #15, .LBB3_48
+; NONEON-NOSVE-NEXT: .LBB3_16: // %else30
+; NONEON-NOSVE-NEXT: tbnz w8, #16, .LBB3_49
+; NONEON-NOSVE-NEXT: .LBB3_17: // %else32
+; NONEON-NOSVE-NEXT: tbnz w8, #17, .LBB3_50
+; NONEON-NOSVE-NEXT: .LBB3_18: // %else34
+; NONEON-NOSVE-NEXT: tbnz w8, #18, .LBB3_51
+; NONEON-NOSVE-NEXT: .LBB3_19: // %else36
+; NONEON-NOSVE-NEXT: tbnz w8, #19, .LBB3_52
+; NONEON-NOSVE-NEXT: .LBB3_20: // %else38
+; NONEON-NOSVE-NEXT: tbnz w8, #20, .LBB3_53
+; NONEON-NOSVE-NEXT: .LBB3_21: // %else40
+; NONEON-NOSVE-NEXT: tbnz w8, #21, .LBB3_54
+; NONEON-NOSVE-NEXT: .LBB3_22: // %else42
+; NONEON-NOSVE-NEXT: tbnz w8, #22, .LBB3_55
+; NONEON-NOSVE-NEXT: .LBB3_23: // %else44
+; NONEON-NOSVE-NEXT: tbnz w8, #23, .LBB3_56
+; NONEON-NOSVE-NEXT: .LBB3_24: // %else46
+; NONEON-NOSVE-NEXT: tbnz w8, #24, .LBB3_57
+; NONEON-NOSVE-NEXT: .LBB3_25: // %else48
+; NONEON-NOSVE-NEXT: tbnz w8, #25, .LBB3_58
+; NONEON-NOSVE-NEXT: .LBB3_26: // %else50
+; NONEON-NOSVE-NEXT: tbnz w8, #26, .LBB3_59
+; NONEON-NOSVE-NEXT: .LBB3_27: // %else52
+; NONEON-NOSVE-NEXT: tbnz w8, #27, .LBB3_60
+; NONEON-NOSVE-NEXT: .LBB3_28: // %else54
+; NONEON-NOSVE-NEXT: tbnz w8, #28, .LBB3_61
+; NONEON-NOSVE-NEXT: .LBB3_29: // %else56
+; NONEON-NOSVE-NEXT: tbnz w8, #29, .LBB3_62
+; NONEON-NOSVE-NEXT: .LBB3_30: // %else58
+; NONEON-NOSVE-NEXT: tbnz w8, #30, .LBB3_63
+; NONEON-NOSVE-NEXT: .LBB3_31: // %else60
+; NONEON-NOSVE-NEXT: tbnz w8, #31, .LBB3_64
+; NONEON-NOSVE-NEXT: .LBB3_32: // %else62
+; NONEON-NOSVE-NEXT: ret
+; NONEON-NOSVE-NEXT: .LBB3_33: // %cond.store
+; NONEON-NOSVE-NEXT: strb wzr, [x0]
+; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB3_2
+; NONEON-NOSVE-NEXT: .LBB3_34: // %cond.store1
+; NONEON-NOSVE-NEXT: strb wzr, [x0, #1]
+; NONEON-NOSVE-NEXT: tbz w8, #2, .LBB3_3
+; NONEON-NOSVE-NEXT: .LBB3_35: // %cond.store3
+; NONEON-NOSVE-NEXT: strb wzr, [x0, #2]
+; NONEON-NOSVE-NEXT: tbz w8, #3, .LBB3_4
+; NONEON-NOSVE-NEXT: .LBB3_36: // %cond.store5
+; NONEON-NOSVE-NEXT: strb wzr, [x0, #3]
+; NONEON-NOSVE-NEXT: tbz w8, #4, .LBB3_5
+; NONEON-NOSVE-NEXT: .LBB3_37: // %cond.store7
+; NONEON-NOSVE-NEXT: strb wzr, [x0, #4]
+; NONEON-NOSVE-NEXT: tbz w8, #5, .LBB3_6
+; NONEON-NOSVE-NEXT: .LBB3_38: // %cond.store9
+; NONEON-NOSVE-NEXT: strb wzr, [x0, #5]
+; NONEON-NOSVE-NEXT: tbz w8, #6, .LBB3_7
+; NONEON-NOSVE-NEXT: .LBB3_39: // %cond.store11
+; NONEON-NOSVE-NEXT: strb wzr, [x0, #6]
+; NONEON-NOSVE-NEXT: tbz w8, #7, .LBB3_8
+; NONEON-NOSVE-NEXT: .LBB3_40: // %cond.store13
+; NONEON-NOSVE-NEXT: strb wzr, [x0, #7]
+; NONEON-NOSVE-NEXT: tbz w8, #8, .LBB3_9
+; NONEON-NOSVE-NEXT: .LBB3_41: // %cond.store15
+; NONEON-NOSVE-NEXT: strb wzr, [x0, #8]
+; NONEON-NOSVE-NEXT: tbz w8, #9, .LBB3_10
+; NONEON-NOSVE-NEXT: .LBB3_42: // %cond.store17
+; NONEON-NOSVE-NEXT: strb wzr, [x0, #9]
+; NONEON-NOSVE-NEXT: tbz w8, #10, .LBB3_11
+; NONEON-NOSVE-NEXT: .LBB3_43: // %cond.store19
+; NONEON-NOSVE-NEXT: strb wzr, [x0, #10]
+; NONEON-NOSVE-NEXT: tbz w8, #11, .LBB3_12
+; NONEON-NOSVE-NEXT: .LBB3_44: // %cond.store21
+; NONEON-NOSVE-NEXT: strb wzr, [x0, #11]
+; NONEON-NOSVE-NEXT: tbz w8, #12, .LBB3_13
+; NONEON-NOSVE-NEXT: .LBB3_45: // %cond.store23
+; NONEON-NOSVE-NEXT: strb wzr, [x0, #12]
+; NONEON-NOSVE-NEXT: tbz w8, #13, .LBB3_14
+; NONEON-NOSVE-NEXT: .LBB3_46: // %cond.store25
+; NONEON-NOSVE-NEXT: strb wzr, [x0, #13]
+; NONEON-NOSVE-NEXT: tbz w8, #14, .LBB3_15
+; NONEON-NOSVE-NEXT: .LBB3_47: // %cond.store27
+; NONEON-NOSVE-NEXT: strb wzr, [x0, #14]
+; NONEON-NOSVE-NEXT: tbz w8, #15, .LBB3_16
+; NONEON-NOSVE-NEXT: .LBB3_48: // %cond.store29
+; NONEON-NOSVE-NEXT: strb wzr, [x0, #15]
+; NONEON-NOSVE-NEXT: tbz w8, #16, .LBB3_17
+; NONEON-NOSVE-NEXT: .LBB3_49: // %cond.store31
+; NONEON-NOSVE-NEXT: strb wzr, [x0, #16]
+; NONEON-NOSVE-NEXT: tbz w8, #17, .LBB3_18
+; NONEON-NOSVE-NEXT: .LBB3_50: // %cond.store33
+; NONEON-NOSVE-NEXT: strb wzr, [x0, #17]
+; NONEON-NOSVE-NEXT: tbz w8, #18, .LBB3_19
+; NONEON-NOSVE-NEXT: .LBB3_51: // %cond.store35
+; NONEON-NOSVE-NEXT: strb wzr, [x0, #18]
+; NONEON-NOSVE-NEXT: tbz w8, #19, .LBB3_20
+; NONEON-NOSVE-NEXT: .LBB3_52: // %cond.store37
+; NONEON-NOSVE-NEXT: strb wzr, [x0, #19]
+; NONEON-NOSVE-NEXT: tbz w8, #20, .LBB3_21
+; NONEON-NOSVE-NEXT: .LBB3_53: // %cond.store39
+; NONEON-NOSVE-NEXT: strb wzr, [x0, #20]
+; NONEON-NOSVE-NEXT: tbz w8, #21, .LBB3_22
+; NONEON-NOSVE-NEXT: .LBB3_54: // %cond.store41
+; NONEON-NOSVE-NEXT: strb wzr, [x0, #21]
+; NONEON-NOSVE-NEXT: tbz w8, #22, .LBB3_23
+; NONEON-NOSVE-NEXT: .LBB3_55: // %cond.store43
+; NONEON-NOSVE-NEXT: strb wzr, [x0, #22]
+; NONEON-NOSVE-NEXT: tbz w8, #23, .LBB3_24
+; NONEON-NOSVE-NEXT: .LBB3_56: // %cond.store45
+; NONEON-NOSVE-NEXT: strb wzr, [x0, #23]
+; NONEON-NOSVE-NEXT: tbz w8, #24, .LBB3_25
+; NONEON-NOSVE-NEXT: .LBB3_57: // %cond.store47
+; NONEON-NOSVE-NEXT: strb wzr, [x0, #24]
+; NONEON-NOSVE-NEXT: tbz w8, #25, .LBB3_26
+; NONEON-NOSVE-NEXT: .LBB3_58: // %cond.store49
+; NONEON-NOSVE-NEXT: strb wzr, [x0, #25]
+; NONEON-NOSVE-NEXT: tbz w8, #26, .LBB3_27
+; NONEON-NOSVE-NEXT: .LBB3_59: // %cond.store51
+; NONEON-NOSVE-NEXT: strb wzr, [x0, #26]
+; NONEON-NOSVE-NEXT: tbz w8, #27, .LBB3_28
+; NONEON-NOSVE-NEXT: .LBB3_60: // %cond.store53
+; NONEON-NOSVE-NEXT: strb wzr, [x0, #27]
+; NONEON-NOSVE-NEXT: tbz w8, #28, .LBB3_29
+; NONEON-NOSVE-NEXT: .LBB3_61: // %cond.store55
+; NONEON-NOSVE-NEXT: strb wzr, [x0, #28]
+; NONEON-NOSVE-NEXT: tbz w8, #29, .LBB3_30
+; NONEON-NOSVE-NEXT: .LBB3_62: // %cond.store57
+; NONEON-NOSVE-NEXT: strb wzr, [x0, #29]
+; NONEON-NOSVE-NEXT: tbz w8, #30, .LBB3_31
+; NONEON-NOSVE-NEXT: .LBB3_63: // %cond.store59
+; NONEON-NOSVE-NEXT: strb wzr, [x0, #30]
+; NONEON-NOSVE-NEXT: tbz w8, #31, .LBB3_32
+; NONEON-NOSVE-NEXT: .LBB3_64: // %cond.store61
+; NONEON-NOSVE-NEXT: strb wzr, [x0, #31]
+; NONEON-NOSVE-NEXT: ret
call void @llvm.masked.store.v32i8(<32 x i8> zeroinitializer, ptr %dst, i32 8, <32 x i1> %mask)
ret void
}
@@ -154,6 +568,29 @@ define void @masked_store_v2f16(ptr %dst, <2 x i1> %mask) {
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: masked_store_v2f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: shl v0.2s, v0.2s, #31
+; NONEON-NOSVE-NEXT: adrp x8, .LCPI4_0
+; NONEON-NOSVE-NEXT: ldr d1, [x8, :lo12:.LCPI4_0]
+; NONEON-NOSVE-NEXT: cmlt v0.2s, v0.2s, #0
+; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: addp v0.2s, v0.2s, v0.2s
+; NONEON-NOSVE-NEXT: fmov w8, s0
+; NONEON-NOSVE-NEXT: tbnz w8, #0, .LBB4_3
+; NONEON-NOSVE-NEXT: // %bb.1: // %else
+; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB4_4
+; NONEON-NOSVE-NEXT: .LBB4_2: // %else2
+; NONEON-NOSVE-NEXT: ret
+; NONEON-NOSVE-NEXT: .LBB4_3: // %cond.store
+; NONEON-NOSVE-NEXT: fmov s0, wzr
+; NONEON-NOSVE-NEXT: str h0, [x0]
+; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB4_2
+; NONEON-NOSVE-NEXT: .LBB4_4: // %cond.store1
+; NONEON-NOSVE-NEXT: fmov s0, wzr
+; NONEON-NOSVE-NEXT: str h0, [x0, #2]
+; NONEON-NOSVE-NEXT: ret
call void @llvm.masked.store.v2f16(<2 x half> zeroinitializer, ptr %dst, i32 8, <2 x i1> %mask)
ret void
}
@@ -169,6 +606,41 @@ define void @masked_store_v4f16(ptr %dst, <4 x i1> %mask) {
; CHECK-NEXT: mov z0.h, #0 // =0x0
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: masked_store_v4f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: shl v0.4h, v0.4h, #15
+; NONEON-NOSVE-NEXT: adrp x8, .LCPI5_0
+; NONEON-NOSVE-NEXT: ldr d1, [x8, :lo12:.LCPI5_0]
+; NONEON-NOSVE-NEXT: cmlt v0.4h, v0.4h, #0
+; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: addv h0, v0.4h
+; NONEON-NOSVE-NEXT: fmov w8, s0
+; NONEON-NOSVE-NEXT: tbnz w8, #0, .LBB5_5
+; NONEON-NOSVE-NEXT: // %bb.1: // %else
+; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB5_6
+; NONEON-NOSVE-NEXT: .LBB5_2: // %else2
+; NONEON-NOSVE-NEXT: tbnz w8, #2, .LBB5_7
+; NONEON-NOSVE-NEXT: .LBB5_3: // %else4
+; NONEON-NOSVE-NEXT: tbnz w8, #3, .LBB5_8
+; NONEON-NOSVE-NEXT: .LBB5_4: // %else6
+; NONEON-NOSVE-NEXT: ret
+; NONEON-NOSVE-NEXT: .LBB5_5: // %cond.store
+; NONEON-NOSVE-NEXT: fmov s0, wzr
+; NONEON-NOSVE-NEXT: str h0, [x0]
+; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB5_2
+; NONEON-NOSVE-NEXT: .LBB5_6: // %cond.store1
+; NONEON-NOSVE-NEXT: fmov s0, wzr
+; NONEON-NOSVE-NEXT: str h0, [x0, #2]
+; NONEON-NOSVE-NEXT: tbz w8, #2, .LBB5_3
+; NONEON-NOSVE-NEXT: .LBB5_7: // %cond.store3
+; NONEON-NOSVE-NEXT: fmov s0, wzr
+; NONEON-NOSVE-NEXT: str h0, [x0, #4]
+; NONEON-NOSVE-NEXT: tbz w8, #3, .LBB5_4
+; NONEON-NOSVE-NEXT: .LBB5_8: // %cond.store5
+; NONEON-NOSVE-NEXT: fmov s0, wzr
+; NONEON-NOSVE-NEXT: str h0, [x0, #6]
+; NONEON-NOSVE-NEXT: ret
call void @llvm.masked.store.v4f16(<4 x half> zeroinitializer, ptr %dst, i32 8, <4 x i1> %mask)
ret void
}
@@ -185,6 +657,65 @@ define void @masked_store_v8f16(ptr %dst, <8 x i1> %mask) {
; CHECK-NEXT: mov z0.h, #0 // =0x0
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: masked_store_v8f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: shl v0.8b, v0.8b, #7
+; NONEON-NOSVE-NEXT: adrp x8, .LCPI6_0
+; NONEON-NOSVE-NEXT: ldr d1, [x8, :lo12:.LCPI6_0]
+; NONEON-NOSVE-NEXT: cmlt v0.8b, v0.8b, #0
+; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: addv b0, v0.8b
+; NONEON-NOSVE-NEXT: fmov w8, s0
+; NONEON-NOSVE-NEXT: tbnz w8, #0, .LBB6_9
+; NONEON-NOSVE-NEXT: // %bb.1: // %else
+; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB6_10
+; NONEON-NOSVE-NEXT: .LBB6_2: // %else2
+; NONEON-NOSVE-NEXT: tbnz w8, #2, .LBB6_11
+; NONEON-NOSVE-NEXT: .LBB6_3: // %else4
+; NONEON-NOSVE-NEXT: tbnz w8, #3, .LBB6_12
+; NONEON-NOSVE-NEXT: .LBB6_4: // %else6
+; NONEON-NOSVE-NEXT: tbnz w8, #4, .LBB6_13
+; NONEON-NOSVE-NEXT: .LBB6_5: // %else8
+; NONEON-NOSVE-NEXT: tbnz w8, #5, .LBB6_14
+; NONEON-NOSVE-NEXT: .LBB6_6: // %else10
+; NONEON-NOSVE-NEXT: tbnz w8, #6, .LBB6_15
+; NONEON-NOSVE-NEXT: .LBB6_7: // %else12
+; NONEON-NOSVE-NEXT: tbnz w8, #7, .LBB6_16
+; NONEON-NOSVE-NEXT: .LBB6_8: // %else14
+; NONEON-NOSVE-NEXT: ret
+; NONEON-NOSVE-NEXT: .LBB6_9: // %cond.store
+; NONEON-NOSVE-NEXT: fmov s0, wzr
+; NONEON-NOSVE-NEXT: str h0, [x0]
+; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB6_2
+; NONEON-NOSVE-NEXT: .LBB6_10: // %cond.store1
+; NONEON-NOSVE-NEXT: fmov s0, wzr
+; NONEON-NOSVE-NEXT: str h0, [x0, #2]
+; NONEON-NOSVE-NEXT: tbz w8, #2, .LBB6_3
+; NONEON-NOSVE-NEXT: .LBB6_11: // %cond.store3
+; NONEON-NOSVE-NEXT: fmov s0, wzr
+; NONEON-NOSVE-NEXT: str h0, [x0, #4]
+; NONEON-NOSVE-NEXT: tbz w8, #3, .LBB6_4
+; NONEON-NOSVE-NEXT: .LBB6_12: // %cond.store5
+; NONEON-NOSVE-NEXT: fmov s0, wzr
+; NONEON-NOSVE-NEXT: str h0, [x0, #6]
+; NONEON-NOSVE-NEXT: tbz w8, #4, .LBB6_5
+; NONEON-NOSVE-NEXT: .LBB6_13: // %cond.store7
+; NONEON-NOSVE-NEXT: fmov s0, wzr
+; NONEON-NOSVE-NEXT: str h0, [x0, #8]
+; NONEON-NOSVE-NEXT: tbz w8, #5, .LBB6_6
+; NONEON-NOSVE-NEXT: .LBB6_14: // %cond.store9
+; NONEON-NOSVE-NEXT: fmov s0, wzr
+; NONEON-NOSVE-NEXT: str h0, [x0, #10]
+; NONEON-NOSVE-NEXT: tbz w8, #6, .LBB6_7
+; NONEON-NOSVE-NEXT: .LBB6_15: // %cond.store11
+; NONEON-NOSVE-NEXT: fmov s0, wzr
+; NONEON-NOSVE-NEXT: str h0, [x0, #12]
+; NONEON-NOSVE-NEXT: tbz w8, #7, .LBB6_8
+; NONEON-NOSVE-NEXT: .LBB6_16: // %cond.store13
+; NONEON-NOSVE-NEXT: fmov s0, wzr
+; NONEON-NOSVE-NEXT: str h0, [x0, #14]
+; NONEON-NOSVE-NEXT: ret
call void @llvm.masked.store.v8f16(<8 x half> zeroinitializer, ptr %dst, i32 8, <8 x i1> %mask)
ret void
}
@@ -209,6 +740,115 @@ define void @masked_store_v16f16(ptr %dst, <16 x i1> %mask) {
; CHECK-NEXT: st1h { z1.h }, p1, [x0, x8, lsl #1]
; CHECK-NEXT: st1h { z1.h }, p0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: masked_store_v16f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: shl v0.16b, v0.16b, #7
+; NONEON-NOSVE-NEXT: adrp x8, .LCPI7_0
+; NONEON-NOSVE-NEXT: ldr q1, [x8, :lo12:.LCPI7_0]
+; NONEON-NOSVE-NEXT: cmlt v0.16b, v0.16b, #0
+; NONEON-NOSVE-NEXT: and v0.16b, v0.16b, v1.16b
+; NONEON-NOSVE-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; NONEON-NOSVE-NEXT: zip1 v0.16b, v0.16b, v1.16b
+; NONEON-NOSVE-NEXT: addv h0, v0.8h
+; NONEON-NOSVE-NEXT: fmov w8, s0
+; NONEON-NOSVE-NEXT: tbnz w8, #0, .LBB7_17
+; NONEON-NOSVE-NEXT: // %bb.1: // %else
+; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB7_18
+; NONEON-NOSVE-NEXT: .LBB7_2: // %else2
+; NONEON-NOSVE-NEXT: tbnz w8, #2, .LBB7_19
+; NONEON-NOSVE-NEXT: .LBB7_3: // %else4
+; NONEON-NOSVE-NEXT: tbnz w8, #3, .LBB7_20
+; NONEON-NOSVE-NEXT: .LBB7_4: // %else6
+; NONEON-NOSVE-NEXT: tbnz w8, #4, .LBB7_21
+; NONEON-NOSVE-NEXT: .LBB7_5: // %else8
+; NONEON-NOSVE-NEXT: tbnz w8, #5, .LBB7_22
+; NONEON-NOSVE-NEXT: .LBB7_6: // %else10
+; NONEON-NOSVE-NEXT: tbnz w8, #6, .LBB7_23
+; NONEON-NOSVE-NEXT: .LBB7_7: // %else12
+; NONEON-NOSVE-NEXT: tbnz w8, #7, .LBB7_24
+; NONEON-NOSVE-NEXT: .LBB7_8: // %else14
+; NONEON-NOSVE-NEXT: tbnz w8, #8, .LBB7_25
+; NONEON-NOSVE-NEXT: .LBB7_9: // %else16
+; NONEON-NOSVE-NEXT: tbnz w8, #9, .LBB7_26
+; NONEON-NOSVE-NEXT: .LBB7_10: // %else18
+; NONEON-NOSVE-NEXT: tbnz w8, #10, .LBB7_27
+; NONEON-NOSVE-NEXT: .LBB7_11: // %else20
+; NONEON-NOSVE-NEXT: tbnz w8, #11, .LBB7_28
+; NONEON-NOSVE-NEXT: .LBB7_12: // %else22
+; NONEON-NOSVE-NEXT: tbnz w8, #12, .LBB7_29
+; NONEON-NOSVE-NEXT: .LBB7_13: // %else24
+; NONEON-NOSVE-NEXT: tbnz w8, #13, .LBB7_30
+; NONEON-NOSVE-NEXT: .LBB7_14: // %else26
+; NONEON-NOSVE-NEXT: tbnz w8, #14, .LBB7_31
+; NONEON-NOSVE-NEXT: .LBB7_15: // %else28
+; NONEON-NOSVE-NEXT: tbnz w8, #15, .LBB7_32
+; NONEON-NOSVE-NEXT: .LBB7_16: // %else30
+; NONEON-NOSVE-NEXT: ret
+; NONEON-NOSVE-NEXT: .LBB7_17: // %cond.store
+; NONEON-NOSVE-NEXT: fmov s0, wzr
+; NONEON-NOSVE-NEXT: str h0, [x0]
+; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB7_2
+; NONEON-NOSVE-NEXT: .LBB7_18: // %cond.store1
+; NONEON-NOSVE-NEXT: fmov s0, wzr
+; NONEON-NOSVE-NEXT: str h0, [x0, #2]
+; NONEON-NOSVE-NEXT: tbz w8, #2, .LBB7_3
+; NONEON-NOSVE-NEXT: .LBB7_19: // %cond.store3
+; NONEON-NOSVE-NEXT: fmov s0, wzr
+; NONEON-NOSVE-NEXT: str h0, [x0, #4]
+; NONEON-NOSVE-NEXT: tbz w8, #3, .LBB7_4
+; NONEON-NOSVE-NEXT: .LBB7_20: // %cond.store5
+; NONEON-NOSVE-NEXT: fmov s0, wzr
+; NONEON-NOSVE-NEXT: str h0, [x0, #6]
+; NONEON-NOSVE-NEXT: tbz w8, #4, .LBB7_5
+; NONEON-NOSVE-NEXT: .LBB7_21: // %cond.store7
+; NONEON-NOSVE-NEXT: fmov s0, wzr
+; NONEON-NOSVE-NEXT: str h0, [x0, #8]
+; NONEON-NOSVE-NEXT: tbz w8, #5, .LBB7_6
+; NONEON-NOSVE-NEXT: .LBB7_22: // %cond.store9
+; NONEON-NOSVE-NEXT: fmov s0, wzr
+; NONEON-NOSVE-NEXT: str h0, [x0, #10]
+; NONEON-NOSVE-NEXT: tbz w8, #6, .LBB7_7
+; NONEON-NOSVE-NEXT: .LBB7_23: // %cond.store11
+; NONEON-NOSVE-NEXT: fmov s0, wzr
+; NONEON-NOSVE-NEXT: str h0, [x0, #12]
+; NONEON-NOSVE-NEXT: tbz w8, #7, .LBB7_8
+; NONEON-NOSVE-NEXT: .LBB7_24: // %cond.store13
+; NONEON-NOSVE-NEXT: fmov s0, wzr
+; NONEON-NOSVE-NEXT: str h0, [x0, #14]
+; NONEON-NOSVE-NEXT: tbz w8, #8, .LBB7_9
+; NONEON-NOSVE-NEXT: .LBB7_25: // %cond.store15
+; NONEON-NOSVE-NEXT: fmov s0, wzr
+; NONEON-NOSVE-NEXT: str h0, [x0, #16]
+; NONEON-NOSVE-NEXT: tbz w8, #9, .LBB7_10
+; NONEON-NOSVE-NEXT: .LBB7_26: // %cond.store17
+; NONEON-NOSVE-NEXT: fmov s0, wzr
+; NONEON-NOSVE-NEXT: str h0, [x0, #18]
+; NONEON-NOSVE-NEXT: tbz w8, #10, .LBB7_11
+; NONEON-NOSVE-NEXT: .LBB7_27: // %cond.store19
+; NONEON-NOSVE-NEXT: fmov s0, wzr
+; NONEON-NOSVE-NEXT: str h0, [x0, #20]
+; NONEON-NOSVE-NEXT: tbz w8, #11, .LBB7_12
+; NONEON-NOSVE-NEXT: .LBB7_28: // %cond.store21
+; NONEON-NOSVE-NEXT: fmov s0, wzr
+; NONEON-NOSVE-NEXT: str h0, [x0, #22]
+; NONEON-NOSVE-NEXT: tbz w8, #12, .LBB7_13
+; NONEON-NOSVE-NEXT: .LBB7_29: // %cond.store23
+; NONEON-NOSVE-NEXT: fmov s0, wzr
+; NONEON-NOSVE-NEXT: str h0, [x0, #24]
+; NONEON-NOSVE-NEXT: tbz w8, #13, .LBB7_14
+; NONEON-NOSVE-NEXT: .LBB7_30: // %cond.store25
+; NONEON-NOSVE-NEXT: fmov s0, wzr
+; NONEON-NOSVE-NEXT: str h0, [x0, #26]
+; NONEON-NOSVE-NEXT: tbz w8, #14, .LBB7_15
+; NONEON-NOSVE-NEXT: .LBB7_31: // %cond.store27
+; NONEON-NOSVE-NEXT: fmov s0, wzr
+; NONEON-NOSVE-NEXT: str h0, [x0, #28]
+; NONEON-NOSVE-NEXT: tbz w8, #15, .LBB7_16
+; NONEON-NOSVE-NEXT: .LBB7_32: // %cond.store29
+; NONEON-NOSVE-NEXT: fmov s0, wzr
+; NONEON-NOSVE-NEXT: str h0, [x0, #30]
+; NONEON-NOSVE-NEXT: ret
call void @llvm.masked.store.v16f16(<16 x half> zeroinitializer, ptr %dst, i32 8, <16 x i1> %mask)
ret void
}
@@ -225,6 +865,37 @@ define void @masked_store_v4f32(ptr %dst, <4 x i1> %mask) {
; CHECK-NEXT: mov z0.s, #0 // =0x0
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: masked_store_v4f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: shl v0.4h, v0.4h, #15
+; NONEON-NOSVE-NEXT: adrp x8, .LCPI8_0
+; NONEON-NOSVE-NEXT: ldr d1, [x8, :lo12:.LCPI8_0]
+; NONEON-NOSVE-NEXT: cmlt v0.4h, v0.4h, #0
+; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: addv h0, v0.4h
+; NONEON-NOSVE-NEXT: fmov w8, s0
+; NONEON-NOSVE-NEXT: tbnz w8, #0, .LBB8_5
+; NONEON-NOSVE-NEXT: // %bb.1: // %else
+; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB8_6
+; NONEON-NOSVE-NEXT: .LBB8_2: // %else2
+; NONEON-NOSVE-NEXT: tbnz w8, #2, .LBB8_7
+; NONEON-NOSVE-NEXT: .LBB8_3: // %else4
+; NONEON-NOSVE-NEXT: tbnz w8, #3, .LBB8_8
+; NONEON-NOSVE-NEXT: .LBB8_4: // %else6
+; NONEON-NOSVE-NEXT: ret
+; NONEON-NOSVE-NEXT: .LBB8_5: // %cond.store
+; NONEON-NOSVE-NEXT: str wzr, [x0]
+; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB8_2
+; NONEON-NOSVE-NEXT: .LBB8_6: // %cond.store1
+; NONEON-NOSVE-NEXT: str wzr, [x0, #4]
+; NONEON-NOSVE-NEXT: tbz w8, #2, .LBB8_3
+; NONEON-NOSVE-NEXT: .LBB8_7: // %cond.store3
+; NONEON-NOSVE-NEXT: str wzr, [x0, #8]
+; NONEON-NOSVE-NEXT: tbz w8, #3, .LBB8_4
+; NONEON-NOSVE-NEXT: .LBB8_8: // %cond.store5
+; NONEON-NOSVE-NEXT: str wzr, [x0, #12]
+; NONEON-NOSVE-NEXT: ret
call void @llvm.masked.store.v4f32(<4 x float> zeroinitializer, ptr %dst, i32 8, <4 x i1> %mask)
ret void
}
@@ -275,6 +946,57 @@ define void @masked_store_v8f32(ptr %dst, <8 x i1> %mask) {
; CHECK-NEXT: st1w { z1.s }, p0, [x0]
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: masked_store_v8f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: shl v0.8b, v0.8b, #7
+; NONEON-NOSVE-NEXT: adrp x8, .LCPI9_0
+; NONEON-NOSVE-NEXT: ldr d1, [x8, :lo12:.LCPI9_0]
+; NONEON-NOSVE-NEXT: cmlt v0.8b, v0.8b, #0
+; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: addv b0, v0.8b
+; NONEON-NOSVE-NEXT: fmov w8, s0
+; NONEON-NOSVE-NEXT: tbnz w8, #0, .LBB9_9
+; NONEON-NOSVE-NEXT: // %bb.1: // %else
+; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB9_10
+; NONEON-NOSVE-NEXT: .LBB9_2: // %else2
+; NONEON-NOSVE-NEXT: tbnz w8, #2, .LBB9_11
+; NONEON-NOSVE-NEXT: .LBB9_3: // %else4
+; NONEON-NOSVE-NEXT: tbnz w8, #3, .LBB9_12
+; NONEON-NOSVE-NEXT: .LBB9_4: // %else6
+; NONEON-NOSVE-NEXT: tbnz w8, #4, .LBB9_13
+; NONEON-NOSVE-NEXT: .LBB9_5: // %else8
+; NONEON-NOSVE-NEXT: tbnz w8, #5, .LBB9_14
+; NONEON-NOSVE-NEXT: .LBB9_6: // %else10
+; NONEON-NOSVE-NEXT: tbnz w8, #6, .LBB9_15
+; NONEON-NOSVE-NEXT: .LBB9_7: // %else12
+; NONEON-NOSVE-NEXT: tbnz w8, #7, .LBB9_16
+; NONEON-NOSVE-NEXT: .LBB9_8: // %else14
+; NONEON-NOSVE-NEXT: ret
+; NONEON-NOSVE-NEXT: .LBB9_9: // %cond.store
+; NONEON-NOSVE-NEXT: str wzr, [x0]
+; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB9_2
+; NONEON-NOSVE-NEXT: .LBB9_10: // %cond.store1
+; NONEON-NOSVE-NEXT: str wzr, [x0, #4]
+; NONEON-NOSVE-NEXT: tbz w8, #2, .LBB9_3
+; NONEON-NOSVE-NEXT: .LBB9_11: // %cond.store3
+; NONEON-NOSVE-NEXT: str wzr, [x0, #8]
+; NONEON-NOSVE-NEXT: tbz w8, #3, .LBB9_4
+; NONEON-NOSVE-NEXT: .LBB9_12: // %cond.store5
+; NONEON-NOSVE-NEXT: str wzr, [x0, #12]
+; NONEON-NOSVE-NEXT: tbz w8, #4, .LBB9_5
+; NONEON-NOSVE-NEXT: .LBB9_13: // %cond.store7
+; NONEON-NOSVE-NEXT: str wzr, [x0, #16]
+; NONEON-NOSVE-NEXT: tbz w8, #5, .LBB9_6
+; NONEON-NOSVE-NEXT: .LBB9_14: // %cond.store9
+; NONEON-NOSVE-NEXT: str wzr, [x0, #20]
+; NONEON-NOSVE-NEXT: tbz w8, #6, .LBB9_7
+; NONEON-NOSVE-NEXT: .LBB9_15: // %cond.store11
+; NONEON-NOSVE-NEXT: str wzr, [x0, #24]
+; NONEON-NOSVE-NEXT: tbz w8, #7, .LBB9_8
+; NONEON-NOSVE-NEXT: .LBB9_16: // %cond.store13
+; NONEON-NOSVE-NEXT: str wzr, [x0, #28]
+; NONEON-NOSVE-NEXT: ret
call void @llvm.masked.store.v8f32(<8 x float> zeroinitializer, ptr %dst, i32 8, <8 x i1> %mask)
ret void
}
@@ -291,6 +1013,27 @@ define void @masked_store_v2f64(ptr %dst, <2 x i1> %mask) {
; CHECK-NEXT: mov z0.d, #0 // =0x0
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: masked_store_v2f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: shl v0.2s, v0.2s, #31
+; NONEON-NOSVE-NEXT: adrp x8, .LCPI10_0
+; NONEON-NOSVE-NEXT: ldr d1, [x8, :lo12:.LCPI10_0]
+; NONEON-NOSVE-NEXT: cmlt v0.2s, v0.2s, #0
+; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: addp v0.2s, v0.2s, v0.2s
+; NONEON-NOSVE-NEXT: fmov w8, s0
+; NONEON-NOSVE-NEXT: tbnz w8, #0, .LBB10_3
+; NONEON-NOSVE-NEXT: // %bb.1: // %else
+; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB10_4
+; NONEON-NOSVE-NEXT: .LBB10_2: // %else2
+; NONEON-NOSVE-NEXT: ret
+; NONEON-NOSVE-NEXT: .LBB10_3: // %cond.store
+; NONEON-NOSVE-NEXT: str xzr, [x0]
+; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB10_2
+; NONEON-NOSVE-NEXT: .LBB10_4: // %cond.store1
+; NONEON-NOSVE-NEXT: str xzr, [x0, #8]
+; NONEON-NOSVE-NEXT: ret
call void @llvm.masked.store.v2f64(<2 x double> zeroinitializer, ptr %dst, i32 8, <2 x i1> %mask)
ret void
}
@@ -315,6 +1058,37 @@ define void @masked_store_v4f64(ptr %dst, <4 x i1> %mask) {
; CHECK-NEXT: st1d { z0.d }, p1, [x0, x8, lsl #3]
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: masked_store_v4f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: shl v0.4h, v0.4h, #15
+; NONEON-NOSVE-NEXT: adrp x8, .LCPI11_0
+; NONEON-NOSVE-NEXT: ldr d1, [x8, :lo12:.LCPI11_0]
+; NONEON-NOSVE-NEXT: cmlt v0.4h, v0.4h, #0
+; NONEON-NOSVE-NEXT: and v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: addv h0, v0.4h
+; NONEON-NOSVE-NEXT: fmov w8, s0
+; NONEON-NOSVE-NEXT: tbnz w8, #0, .LBB11_5
+; NONEON-NOSVE-NEXT: // %bb.1: // %else
+; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB11_6
+; NONEON-NOSVE-NEXT: .LBB11_2: // %else2
+; NONEON-NOSVE-NEXT: tbnz w8, #2, .LBB11_7
+; NONEON-NOSVE-NEXT: .LBB11_3: // %else4
+; NONEON-NOSVE-NEXT: tbnz w8, #3, .LBB11_8
+; NONEON-NOSVE-NEXT: .LBB11_4: // %else6
+; NONEON-NOSVE-NEXT: ret
+; NONEON-NOSVE-NEXT: .LBB11_5: // %cond.store
+; NONEON-NOSVE-NEXT: str xzr, [x0]
+; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB11_2
+; NONEON-NOSVE-NEXT: .LBB11_6: // %cond.store1
+; NONEON-NOSVE-NEXT: str xzr, [x0, #8]
+; NONEON-NOSVE-NEXT: tbz w8, #2, .LBB11_3
+; NONEON-NOSVE-NEXT: .LBB11_7: // %cond.store3
+; NONEON-NOSVE-NEXT: str xzr, [x0, #16]
+; NONEON-NOSVE-NEXT: tbz w8, #3, .LBB11_4
+; NONEON-NOSVE-NEXT: .LBB11_8: // %cond.store5
+; NONEON-NOSVE-NEXT: str xzr, [x0, #24]
+; NONEON-NOSVE-NEXT: ret
call void @llvm.masked.store.v4f64(<4 x double> zeroinitializer, ptr %dst, i32 8, <4 x i1> %mask)
ret void
}
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-optimize-ptrue.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-optimize-ptrue.ll
index aef446a90df65..6a6b47e815ac1 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-optimize-ptrue.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-optimize-ptrue.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
@@ -14,6 +15,15 @@ define void @add_v4i8(ptr %a, ptr %b) {
; CHECK-NEXT: add z0.h, z0.h, z1.h
; CHECK-NEXT: st1b { z0.h }, p0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: add_v4i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr s0, [x0]
+; NONEON-NOSVE-NEXT: ldr s1, [x1]
+; NONEON-NOSVE-NEXT: uaddl v0.8h, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: uzp1 v0.8b, v0.8b, v0.8b
+; NONEON-NOSVE-NEXT: str s0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x i8>, ptr %a
%op2 = load <4 x i8>, ptr %b
%res = add <4 x i8> %op1, %op2
@@ -29,6 +39,14 @@ define void @add_v8i8(ptr %a, ptr %b) {
; CHECK-NEXT: add z0.b, z0.b, z1.b
; CHECK-NEXT: str d0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: add_v8i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr d0, [x0]
+; NONEON-NOSVE-NEXT: ldr d1, [x1]
+; NONEON-NOSVE-NEXT: add v0.8b, v0.8b, v1.8b
+; NONEON-NOSVE-NEXT: str d0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x i8>, ptr %a
%op2 = load <8 x i8>, ptr %b
%res = add <8 x i8> %op1, %op2
@@ -44,6 +62,14 @@ define void @add_v16i8(ptr %a, ptr %b) {
; CHECK-NEXT: add z0.b, z0.b, z1.b
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: add_v16i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: ldr q1, [x1]
+; NONEON-NOSVE-NEXT: add v0.16b, v0.16b, v1.16b
+; NONEON-NOSVE-NEXT: str q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x i8>, ptr %a
%op2 = load <16 x i8>, ptr %b
%res = add <16 x i8> %op1, %op2
@@ -60,6 +86,15 @@ define void @add_v32i8(ptr %a, ptr %b) {
; CHECK-NEXT: add z1.b, z2.b, z3.b
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: add_v32i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: add v0.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: add v1.16b, v2.16b, v3.16b
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <32 x i8>, ptr %a
%op2 = load <32 x i8>, ptr %b
%res = add <32 x i8> %op1, %op2
@@ -76,6 +111,23 @@ define void @add_v2i16(ptr %a, ptr %b, ptr %c) {
; CHECK-NEXT: add z0.s, z0.s, z1.s
; CHECK-NEXT: st1h { z0.s }, p0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: add_v2i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldrh w8, [x0]
+; NONEON-NOSVE-NEXT: ldrh w9, [x1]
+; NONEON-NOSVE-NEXT: fmov s0, w8
+; NONEON-NOSVE-NEXT: fmov s1, w9
+; NONEON-NOSVE-NEXT: add x8, x0, #2
+; NONEON-NOSVE-NEXT: add x9, x1, #2
+; NONEON-NOSVE-NEXT: ld1 { v0.h }[2], [x8]
+; NONEON-NOSVE-NEXT: ld1 { v1.h }[2], [x9]
+; NONEON-NOSVE-NEXT: add v0.2s, v0.2s, v1.2s
+; NONEON-NOSVE-NEXT: mov w8, v0.s[1]
+; NONEON-NOSVE-NEXT: fmov w9, s0
+; NONEON-NOSVE-NEXT: strh w9, [x0]
+; NONEON-NOSVE-NEXT: strh w8, [x0, #2]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <2 x i16>, ptr %a
%op2 = load <2 x i16>, ptr %b
%res = add <2 x i16> %op1, %op2
@@ -91,6 +143,14 @@ define void @add_v4i16(ptr %a, ptr %b, ptr %c) {
; CHECK-NEXT: add z0.h, z0.h, z1.h
; CHECK-NEXT: str d0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: add_v4i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr d0, [x0]
+; NONEON-NOSVE-NEXT: ldr d1, [x1]
+; NONEON-NOSVE-NEXT: add v0.4h, v0.4h, v1.4h
+; NONEON-NOSVE-NEXT: str d0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x i16>, ptr %a
%op2 = load <4 x i16>, ptr %b
%res = add <4 x i16> %op1, %op2
@@ -106,6 +166,14 @@ define void @add_v8i16(ptr %a, ptr %b, ptr %c) {
; CHECK-NEXT: add z0.h, z0.h, z1.h
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: add_v8i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: ldr q1, [x1]
+; NONEON-NOSVE-NEXT: add v0.8h, v0.8h, v1.8h
+; NONEON-NOSVE-NEXT: str q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x i16>, ptr %a
%op2 = load <8 x i16>, ptr %b
%res = add <8 x i16> %op1, %op2
@@ -122,6 +190,15 @@ define void @add_v16i16(ptr %a, ptr %b, ptr %c) {
; CHECK-NEXT: add z1.h, z2.h, z3.h
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: add_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: add v0.8h, v1.8h, v0.8h
+; NONEON-NOSVE-NEXT: add v1.8h, v2.8h, v3.8h
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x i16>, ptr %a
%op2 = load <16 x i16>, ptr %b
%res = add <16 x i16> %op1, %op2
@@ -137,6 +214,13 @@ define void @abs_v2i32(ptr %a) {
; CHECK-NEXT: abs z0.s, p0/m, z0.s
; CHECK-NEXT: str d0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: abs_v2i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr d0, [x0]
+; NONEON-NOSVE-NEXT: abs v0.2s, v0.2s
+; NONEON-NOSVE-NEXT: str d0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <2 x i32>, ptr %a
%res = call <2 x i32> @llvm.abs.v2i32(<2 x i32> %op1, i1 false)
store <2 x i32> %res, ptr %a
@@ -151,6 +235,13 @@ define void @abs_v4i32(ptr %a) {
; CHECK-NEXT: abs z0.s, p0/m, z0.s
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: abs_v4i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: abs v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: str q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x i32>, ptr %a
%res = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %op1, i1 false)
store <4 x i32> %res, ptr %a
@@ -166,6 +257,14 @@ define void @abs_v8i32(ptr %a) {
; CHECK-NEXT: abs z1.s, p0/m, z1.s
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: abs_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: abs v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: abs v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x i32>, ptr %a
%res = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %op1, i1 false)
store <8 x i32> %res, ptr %a
@@ -180,6 +279,13 @@ define void @abs_v2i64(ptr %a) {
; CHECK-NEXT: abs z0.d, p0/m, z0.d
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: abs_v2i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: abs v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: str q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <2 x i64>, ptr %a
%res = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %op1, i1 false)
store <2 x i64> %res, ptr %a
@@ -195,6 +301,14 @@ define void @abs_v4i64(ptr %a) {
; CHECK-NEXT: abs z1.d, p0/m, z1.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: abs_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: abs v0.2d, v0.2d
+; NONEON-NOSVE-NEXT: abs v1.2d, v1.2d
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x i64>, ptr %a
%res = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %op1, i1 false)
store <4 x i64> %res, ptr %a
@@ -211,6 +325,17 @@ define void @fadd_v2f16(ptr %a, ptr %b) {
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: str w8, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fadd_v2f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr s0, [x0]
+; NONEON-NOSVE-NEXT: ldr s1, [x1]
+; NONEON-NOSVE-NEXT: fcvtl v1.4s, v1.4h
+; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h
+; NONEON-NOSVE-NEXT: fadd v0.4s, v0.4s, v1.4s
+; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s
+; NONEON-NOSVE-NEXT: str s0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <2 x half>, ptr %a
%op2 = load <2 x half>, ptr %b
%res = fadd <2 x half> %op1, %op2
@@ -227,6 +352,17 @@ define void @fadd_v4f16(ptr %a, ptr %b) {
; CHECK-NEXT: fadd z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: str d0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fadd_v4f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr d0, [x0]
+; NONEON-NOSVE-NEXT: ldr d1, [x1]
+; NONEON-NOSVE-NEXT: fcvtl v1.4s, v1.4h
+; NONEON-NOSVE-NEXT: fcvtl v0.4s, v0.4h
+; NONEON-NOSVE-NEXT: fadd v0.4s, v0.4s, v1.4s
+; NONEON-NOSVE-NEXT: fcvtn v0.4h, v0.4s
+; NONEON-NOSVE-NEXT: str d0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x half>, ptr %a
%op2 = load <4 x half>, ptr %b
%res = fadd <4 x half> %op1, %op2
@@ -243,6 +379,21 @@ define void @fadd_v8f16(ptr %a, ptr %b) {
; CHECK-NEXT: fadd z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fadd_v8f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: ldr q1, [x1]
+; NONEON-NOSVE-NEXT: fcvtl v2.4s, v1.4h
+; NONEON-NOSVE-NEXT: fcvtl v3.4s, v0.4h
+; NONEON-NOSVE-NEXT: fcvtl2 v1.4s, v1.8h
+; NONEON-NOSVE-NEXT: fcvtl2 v0.4s, v0.8h
+; NONEON-NOSVE-NEXT: fadd v2.4s, v3.4s, v2.4s
+; NONEON-NOSVE-NEXT: fadd v0.4s, v0.4s, v1.4s
+; NONEON-NOSVE-NEXT: fcvtn v1.4h, v2.4s
+; NONEON-NOSVE-NEXT: fcvtn2 v1.8h, v0.4s
+; NONEON-NOSVE-NEXT: str q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x half>, ptr %a
%op2 = load <8 x half>, ptr %b
%res = fadd <8 x half> %op1, %op2
@@ -261,6 +412,29 @@ define void @fadd_v16f16(ptr %a, ptr %b) {
; CHECK-NEXT: fadd z1.h, p0/m, z1.h, z3.h
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fadd_v16f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: fcvtl v4.4s, v0.4h
+; NONEON-NOSVE-NEXT: fcvtl v6.4s, v3.4h
+; NONEON-NOSVE-NEXT: fcvtl2 v0.4s, v0.8h
+; NONEON-NOSVE-NEXT: fcvtl v5.4s, v1.4h
+; NONEON-NOSVE-NEXT: fcvtl v7.4s, v2.4h
+; NONEON-NOSVE-NEXT: fcvtl2 v1.4s, v1.8h
+; NONEON-NOSVE-NEXT: fcvtl2 v3.4s, v3.8h
+; NONEON-NOSVE-NEXT: fcvtl2 v2.4s, v2.8h
+; NONEON-NOSVE-NEXT: fadd v4.4s, v5.4s, v4.4s
+; NONEON-NOSVE-NEXT: fadd v5.4s, v7.4s, v6.4s
+; NONEON-NOSVE-NEXT: fadd v0.4s, v1.4s, v0.4s
+; NONEON-NOSVE-NEXT: fadd v2.4s, v2.4s, v3.4s
+; NONEON-NOSVE-NEXT: fcvtn v1.4h, v4.4s
+; NONEON-NOSVE-NEXT: fcvtn v3.4h, v5.4s
+; NONEON-NOSVE-NEXT: fcvtn2 v1.8h, v0.4s
+; NONEON-NOSVE-NEXT: fcvtn2 v3.8h, v2.4s
+; NONEON-NOSVE-NEXT: stp q1, q3, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x half>, ptr %a
%op2 = load <16 x half>, ptr %b
%res = fadd <16 x half> %op1, %op2
@@ -277,6 +451,14 @@ define void @fadd_v2f32(ptr %a, ptr %b) {
; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: str d0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fadd_v2f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr d0, [x0]
+; NONEON-NOSVE-NEXT: ldr d1, [x1]
+; NONEON-NOSVE-NEXT: fadd v0.2s, v0.2s, v1.2s
+; NONEON-NOSVE-NEXT: str d0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <2 x float>, ptr %a
%op2 = load <2 x float>, ptr %b
%res = fadd <2 x float> %op1, %op2
@@ -293,6 +475,14 @@ define void @fadd_v4f32(ptr %a, ptr %b) {
; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fadd_v4f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: ldr q1, [x1]
+; NONEON-NOSVE-NEXT: fadd v0.4s, v0.4s, v1.4s
+; NONEON-NOSVE-NEXT: str q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x float>, ptr %a
%op2 = load <4 x float>, ptr %b
%res = fadd <4 x float> %op1, %op2
@@ -311,6 +501,15 @@ define void @fadd_v8f32(ptr %a, ptr %b) {
; CHECK-NEXT: fadd z1.s, p0/m, z1.s, z3.s
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fadd_v8f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: fadd v0.4s, v1.4s, v0.4s
+; NONEON-NOSVE-NEXT: fadd v1.4s, v2.4s, v3.4s
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x float>, ptr %a
%op2 = load <8 x float>, ptr %b
%res = fadd <8 x float> %op1, %op2
@@ -327,6 +526,14 @@ define void @fadd_v2f64(ptr %a, ptr %b) {
; CHECK-NEXT: fadd z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fadd_v2f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: ldr q1, [x1]
+; NONEON-NOSVE-NEXT: fadd v0.2d, v0.2d, v1.2d
+; NONEON-NOSVE-NEXT: str q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <2 x double>, ptr %a
%op2 = load <2 x double>, ptr %b
%res = fadd <2 x double> %op1, %op2
@@ -345,6 +552,15 @@ define void @fadd_v4f64(ptr %a, ptr %b) {
; CHECK-NEXT: fadd z1.d, p0/m, z1.d, z3.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fadd_v4f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: fadd v0.2d, v1.2d, v0.2d
+; NONEON-NOSVE-NEXT: fadd v1.2d, v2.2d, v3.2d
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x double>, ptr %a
%op2 = load <4 x double>, ptr %b
%res = fadd <4 x double> %op1, %op2
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-rev.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-rev.ll
index 6d91253caae58..03bb899c517b4 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-rev.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-rev.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
@@ -15,6 +16,14 @@ define void @test_revbv16i16(ptr %a) {
; CHECK-NEXT: revb z1.h, p0/m, z1.h
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: test_revbv16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: rev16 v0.16b, v0.16b
+; NONEON-NOSVE-NEXT: rev16 v1.16b, v1.16b
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%tmp1 = load <32 x i8>, ptr %a
%tmp2 = shufflevector <32 x i8> %tmp1, <32 x i8> undef, <32 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14, i32 17, i32 16, i32 19, i32 18, i32 21, i32 20, i32 23, i32 22, i32 undef, i32 24, i32 27, i32 undef, i32 29, i32 28, i32 undef, i32 undef>
store <32 x i8> %tmp2, ptr %a
@@ -31,6 +40,14 @@ define void @test_revbv8i32(ptr %a) {
; CHECK-NEXT: revb z1.s, p0/m, z1.s
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: test_revbv8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: rev32 v0.16b, v0.16b
+; NONEON-NOSVE-NEXT: rev32 v1.16b, v1.16b
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%tmp1 = load <32 x i8>, ptr %a
%tmp2 = shufflevector <32 x i8> %tmp1, <32 x i8> undef, <32 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12, i32 19, i32 18, i32 17, i32 16, i32 23, i32 22, i32 21, i32 20, i32 27, i32 undef, i32 undef, i32 undef, i32 31, i32 30, i32 29, i32 undef>
store <32 x i8> %tmp2, ptr %a
@@ -47,6 +64,14 @@ define void @test_revbv4i64(ptr %a) {
; CHECK-NEXT: revb z1.d, p0/m, z1.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: test_revbv4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: rev64 v0.16b, v0.16b
+; NONEON-NOSVE-NEXT: rev64 v1.16b, v1.16b
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%tmp1 = load <32 x i8>, ptr %a
%tmp2 = shufflevector <32 x i8> %tmp1, <32 x i8> undef, <32 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 31, i32 30, i32 29, i32 undef, i32 27, i32 undef, i32 undef, i32 undef>
store <32 x i8> %tmp2, ptr %a
@@ -63,6 +88,14 @@ define void @test_revhv8i32(ptr %a) {
; CHECK-NEXT: revh z1.s, p0/m, z1.s
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: test_revhv8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: rev32 v0.8h, v0.8h
+; NONEON-NOSVE-NEXT: rev32 v1.8h, v1.8h
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%tmp1 = load <16 x i16>, ptr %a
%tmp2 = shufflevector <16 x i16> %tmp1, <16 x i16> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
store <16 x i16> %tmp2, ptr %a
@@ -79,6 +112,14 @@ define void @test_revhv8f32(ptr %a) {
; CHECK-NEXT: revh z1.s, p0/m, z1.s
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: test_revhv8f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: rev32 v0.8h, v0.8h
+; NONEON-NOSVE-NEXT: rev32 v1.8h, v1.8h
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%tmp1 = load <16 x half>, ptr %a
%tmp2 = shufflevector <16 x half> %tmp1, <16 x half> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
store <16 x half> %tmp2, ptr %a
@@ -95,6 +136,14 @@ define void @test_revhv4i64(ptr %a) {
; CHECK-NEXT: revh z1.d, p0/m, z1.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: test_revhv4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: rev64 v0.8h, v0.8h
+; NONEON-NOSVE-NEXT: rev64 v1.8h, v1.8h
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%tmp1 = load <16 x i16>, ptr %a
%tmp2 = shufflevector <16 x i16> %tmp1, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
store <16 x i16> %tmp2, ptr %a
@@ -111,6 +160,14 @@ define void @test_revwv4i64(ptr %a) {
; CHECK-NEXT: revw z1.d, p0/m, z1.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: test_revwv4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: rev64 v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: rev64 v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%tmp1 = load <8 x i32>, ptr %a
%tmp2 = shufflevector <8 x i32> %tmp1, <8 x i32> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
store <8 x i32> %tmp2, ptr %a
@@ -127,6 +184,14 @@ define void @test_revwv4f64(ptr %a) {
; CHECK-NEXT: revw z1.d, p0/m, z1.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: test_revwv4f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: rev64 v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: rev64 v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%tmp1 = load <8 x float>, ptr %a
%tmp2 = shufflevector <8 x float> %tmp1, <8 x float> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
store <8 x float> %tmp2, ptr %a
@@ -141,6 +206,12 @@ define <16 x i8> @test_revv16i8(ptr %a) {
; CHECK-NEXT: revb z0.d, p0/m, z0.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: test_revv16i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: rev64 v0.16b, v0.16b
+; NONEON-NOSVE-NEXT: ret
%tmp1 = load <16 x i8>, ptr %a
%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
ret <16 x i8> %tmp2
@@ -156,6 +227,14 @@ define void @test_revwv8i32v8i32(ptr %a, ptr %b) {
; CHECK-NEXT: revw z1.d, p0/m, z1.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: test_revwv8i32v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x1]
+; NONEON-NOSVE-NEXT: rev64 v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: rev64 v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%tmp1 = load <8 x i32>, ptr %a
%tmp2 = load <8 x i32>, ptr %b
%tmp3 = shufflevector <8 x i32> %tmp1, <8 x i32> %tmp2, <8 x i32> <i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
@@ -176,6 +255,18 @@ define void @test_revhv32i16(ptr %a) {
; CHECK-NEXT: stp q0, q1, [x0, #32]
; CHECK-NEXT: stp q2, q3, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: test_revhv32i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0, #32]
+; NONEON-NOSVE-NEXT: ldp q2, q3, [x0]
+; NONEON-NOSVE-NEXT: rev64 v0.8h, v0.8h
+; NONEON-NOSVE-NEXT: rev64 v1.8h, v1.8h
+; NONEON-NOSVE-NEXT: rev64 v2.8h, v2.8h
+; NONEON-NOSVE-NEXT: rev64 v3.8h, v3.8h
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0, #32]
+; NONEON-NOSVE-NEXT: stp q2, q3, [x0]
+; NONEON-NOSVE-NEXT: ret
%tmp1 = load <32 x i16>, ptr %a
%tmp2 = shufflevector <32 x i16> %tmp1, <32 x i16> undef, <32 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12, i32 19, i32 18, i32 17, i32 16, i32 23, i32 22, i32 21, i32 20, i32 27, i32 undef, i32 undef, i32 undef, i32 31, i32 30, i32 29, i32 undef>
store <32 x i16> %tmp2, ptr %a
@@ -191,6 +282,14 @@ define void @test_rev_elts_fail(ptr %a) {
; CHECK-NEXT: tbl z0.d, { z2.d }, z0.d
; CHECK-NEXT: stp q1, q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: test_rev_elts_fail:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; NONEON-NOSVE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%tmp1 = load <4 x i64>, ptr %a
%tmp2 = shufflevector <4 x i64> %tmp1, <4 x i64> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
store <4 x i64> %tmp2, ptr %a
@@ -208,6 +307,15 @@ define void @test_revdv4i64_sve2p1(ptr %a) #1 {
; CHECK-NEXT: revd z1.q, p0/m, z1.q
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: test_revdv4i64_sve2p1:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ptrue p0.d, vl2
+; NONEON-NOSVE-NEXT: revd z0.q, p0/m, z0.q
+; NONEON-NOSVE-NEXT: revd z1.q, p0/m, z1.q
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%tmp1 = load <4 x i64>, ptr %a
%tmp2 = shufflevector <4 x i64> %tmp1, <4 x i64> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
store <4 x i64> %tmp2, ptr %a
@@ -223,6 +331,15 @@ define void @test_revdv4f64_sve2p1(ptr %a) #1 {
; CHECK-NEXT: revd z1.q, p0/m, z1.q
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: test_revdv4f64_sve2p1:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ptrue p0.d
+; NONEON-NOSVE-NEXT: revd z0.q, p0/m, z0.q
+; NONEON-NOSVE-NEXT: revd z1.q, p0/m, z1.q
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%tmp1 = load <4 x double>, ptr %a
%tmp2 = shufflevector <4 x double> %tmp1, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
store <4 x double> %tmp2, ptr %a
@@ -238,6 +355,16 @@ define void @test_revv8i32(ptr %a) {
; CHECK-NEXT: tbl z0.s, { z2.s }, z0.s
; CHECK-NEXT: stp q1, q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: test_revv8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: rev64 v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: rev64 v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; NONEON-NOSVE-NEXT: ext v1.16b, v1.16b, v1.16b, #8
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%tmp1 = load <8 x i32>, ptr %a
%tmp2 = shufflevector <8 x i32> %tmp1, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
store <8 x i32> %tmp2, ptr %a
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-zip-uzp-trn.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-zip-uzp-trn.ll
index 8808ad9a23d7c..f254a1f9098f2 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-zip-uzp-trn.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-zip-uzp-trn.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
@@ -68,6 +69,18 @@ define void @zip1_v32i8(ptr %a, ptr %b) {
; CHECK-NEXT: str q1, [x0, #16]
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: zip1_v32i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0, #16]
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: ldr q1, [x1, #16]
+; NONEON-NOSVE-NEXT: ldr q1, [x1]
+; NONEON-NOSVE-NEXT: zip2 v2.16b, v0.16b, v1.16b
+; NONEON-NOSVE-NEXT: zip1 v0.16b, v0.16b, v1.16b
+; NONEON-NOSVE-NEXT: str q2, [x0, #16]
+; NONEON-NOSVE-NEXT: str q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%tmp1 = load volatile <32 x i8>, ptr %a
%tmp2 = load volatile <32 x i8>, ptr %b
%tmp3 = shufflevector <32 x i8> %tmp1, <32 x i8> %tmp2, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47>
@@ -196,6 +209,28 @@ define void @zip_v32i16(ptr %a, ptr %b) {
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: add sp, sp, #64
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: zip_v32i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q4, q0, [x0, #32]
+; NONEON-NOSVE-NEXT: ldp q5, q1, [x0]
+; NONEON-NOSVE-NEXT: ldp q6, q2, [x1, #32]
+; NONEON-NOSVE-NEXT: ldp q7, q3, [x1]
+; NONEON-NOSVE-NEXT: zip1 v17.8h, v0.8h, v2.8h
+; NONEON-NOSVE-NEXT: zip2 v0.8h, v0.8h, v2.8h
+; NONEON-NOSVE-NEXT: zip1 v16.8h, v1.8h, v3.8h
+; NONEON-NOSVE-NEXT: zip2 v1.8h, v1.8h, v3.8h
+; NONEON-NOSVE-NEXT: zip1 v2.8h, v5.8h, v7.8h
+; NONEON-NOSVE-NEXT: zip1 v3.8h, v4.8h, v6.8h
+; NONEON-NOSVE-NEXT: zip2 v5.8h, v5.8h, v7.8h
+; NONEON-NOSVE-NEXT: zip2 v4.8h, v4.8h, v6.8h
+; NONEON-NOSVE-NEXT: add v6.8h, v16.8h, v17.8h
+; NONEON-NOSVE-NEXT: add v0.8h, v1.8h, v0.8h
+; NONEON-NOSVE-NEXT: add v1.8h, v2.8h, v3.8h
+; NONEON-NOSVE-NEXT: add v2.8h, v5.8h, v4.8h
+; NONEON-NOSVE-NEXT: stp q6, q0, [x0, #32]
+; NONEON-NOSVE-NEXT: stp q1, q2, [x0]
+; NONEON-NOSVE-NEXT: ret
%tmp1 = load <32 x i16>, ptr %a
%tmp2 = load <32 x i16>, ptr %b
%tmp3 = shufflevector <32 x i16> %tmp1, <32 x i16> %tmp2, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47>
@@ -244,6 +279,18 @@ define void @zip1_v16i16(ptr %a, ptr %b) {
; CHECK-NEXT: str q1, [x0, #16]
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: zip1_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0, #16]
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: ldr q1, [x1, #16]
+; NONEON-NOSVE-NEXT: ldr q1, [x1]
+; NONEON-NOSVE-NEXT: zip2 v2.8h, v0.8h, v1.8h
+; NONEON-NOSVE-NEXT: zip1 v0.8h, v0.8h, v1.8h
+; NONEON-NOSVE-NEXT: str q2, [x0, #16]
+; NONEON-NOSVE-NEXT: str q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%tmp1 = load volatile <16 x i16>, ptr %a
%tmp2 = load volatile <16 x i16>, ptr %b
%tmp3 = shufflevector <16 x i16> %tmp1, <16 x i16> %tmp2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
@@ -276,6 +323,18 @@ define void @zip1_v8i32(ptr %a, ptr %b) {
; CHECK-NEXT: str q1, [x0, #16]
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: zip1_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0, #16]
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: ldr q1, [x1, #16]
+; NONEON-NOSVE-NEXT: ldr q1, [x1]
+; NONEON-NOSVE-NEXT: zip2 v2.4s, v0.4s, v1.4s
+; NONEON-NOSVE-NEXT: zip1 v0.4s, v0.4s, v1.4s
+; NONEON-NOSVE-NEXT: str q2, [x0, #16]
+; NONEON-NOSVE-NEXT: str q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%tmp1 = load volatile <8 x i32>, ptr %a
%tmp2 = load volatile <8 x i32>, ptr %b
%tmp3 = shufflevector <8 x i32> %tmp1, <8 x i32> %tmp2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
@@ -298,6 +357,19 @@ define void @zip_v4f64(ptr %a, ptr %b) {
; CHECK-NEXT: fadd z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: stp q2, q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: zip_v4f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ldp q3, q2, [x1]
+; NONEON-NOSVE-NEXT: zip1 v4.2d, v1.2d, v3.2d
+; NONEON-NOSVE-NEXT: zip1 v5.2d, v0.2d, v2.2d
+; NONEON-NOSVE-NEXT: zip2 v1.2d, v1.2d, v3.2d
+; NONEON-NOSVE-NEXT: zip2 v0.2d, v0.2d, v2.2d
+; NONEON-NOSVE-NEXT: fadd v2.2d, v4.2d, v5.2d
+; NONEON-NOSVE-NEXT: fadd v0.2d, v1.2d, v0.2d
+; NONEON-NOSVE-NEXT: stp q2, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%tmp1 = load <4 x double>, ptr %a
%tmp2 = load <4 x double>, ptr %b
%tmp3 = shufflevector <4 x double> %tmp1, <4 x double> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
@@ -330,6 +402,16 @@ define void @zip_v4i32(ptr %a, ptr %b) {
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: zip_v4i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: ldr q1, [x1]
+; NONEON-NOSVE-NEXT: zip1 v2.4s, v0.4s, v1.4s
+; NONEON-NOSVE-NEXT: zip2 v0.4s, v0.4s, v1.4s
+; NONEON-NOSVE-NEXT: add v0.4s, v2.4s, v0.4s
+; NONEON-NOSVE-NEXT: str q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%tmp1 = load <4 x i32>, ptr %a
%tmp2 = load <4 x i32>, ptr %b
%tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
@@ -351,6 +433,16 @@ define void @zip1_v8i32_undef(ptr %a) {
; CHECK-NEXT: str q1, [x0, #16]
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: zip1_v8i32_undef:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0, #16]
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: zip2 v1.4s, v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: zip1 v0.4s, v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: str q1, [x0, #16]
+; NONEON-NOSVE-NEXT: str q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%tmp1 = load volatile <8 x i32>, ptr %a
%tmp2 = shufflevector <8 x i32> %tmp1, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
store volatile <8 x i32> %tmp2, ptr %a
@@ -370,6 +462,19 @@ define void @trn_v32i8(ptr %a, ptr %b) {
; CHECK-NEXT: add z1.b, z1.b, z2.b
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: trn_v32i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q2, [x0]
+; NONEON-NOSVE-NEXT: ldp q1, q3, [x1]
+; NONEON-NOSVE-NEXT: trn1 v4.16b, v0.16b, v1.16b
+; NONEON-NOSVE-NEXT: trn2 v0.16b, v0.16b, v1.16b
+; NONEON-NOSVE-NEXT: trn1 v1.16b, v2.16b, v3.16b
+; NONEON-NOSVE-NEXT: trn2 v2.16b, v2.16b, v3.16b
+; NONEON-NOSVE-NEXT: add v0.16b, v4.16b, v0.16b
+; NONEON-NOSVE-NEXT: add v1.16b, v1.16b, v2.16b
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%tmp1 = load <32 x i8>, ptr %a
%tmp2 = load <32 x i8>, ptr %b
%tmp3 = shufflevector <32 x i8> %tmp1, <32 x i8> %tmp2, <32 x i32> <i32 0, i32 32, i32 2, i32 34, i32 4, i32 36, i32 6, i32 38, i32 8, i32 40, i32 10, i32 42, i32 12, i32 44, i32 14, i32 46, i32 16, i32 48, i32 18, i32 50, i32 20, i32 52, i32 22, i32 54, i32 24, i32 56, i32 26, i32 58, i32 28, i32 60, i32 30, i32 62>
@@ -392,6 +497,19 @@ define void @trn_v8i16(ptr %a, ptr %b) {
; CHECK-NEXT: add z0.h, z1.h, z0.h
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: trn_v8i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: adrp x8, .LCPI8_0
+; NONEON-NOSVE-NEXT: adrp x9, .LCPI8_1
+; NONEON-NOSVE-NEXT: ldr q1, [x0]
+; NONEON-NOSVE-NEXT: ldr q0, [x8, :lo12:.LCPI8_0]
+; NONEON-NOSVE-NEXT: ldr q2, [x9, :lo12:.LCPI8_1]
+; NONEON-NOSVE-NEXT: tbl v0.16b, { v1.16b }, v0.16b
+; NONEON-NOSVE-NEXT: tbl v1.16b, { v1.16b }, v2.16b
+; NONEON-NOSVE-NEXT: add v0.8h, v0.8h, v1.8h
+; NONEON-NOSVE-NEXT: str q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%tmp1 = load <8 x i16>, ptr %a
%tmp2 = load <8 x i16>, ptr %b
%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 7, i32 2, i32 6, i32 4, i32 5, i32 1, i32 3>
@@ -414,6 +532,19 @@ define void @trn_v16i16(ptr %a, ptr %b) {
; CHECK-NEXT: add z1.h, z1.h, z2.h
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: trn_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q2, [x0]
+; NONEON-NOSVE-NEXT: ldp q1, q3, [x1]
+; NONEON-NOSVE-NEXT: trn1 v4.8h, v0.8h, v1.8h
+; NONEON-NOSVE-NEXT: trn2 v0.8h, v0.8h, v1.8h
+; NONEON-NOSVE-NEXT: trn1 v1.8h, v2.8h, v3.8h
+; NONEON-NOSVE-NEXT: trn2 v2.8h, v2.8h, v3.8h
+; NONEON-NOSVE-NEXT: add v0.8h, v4.8h, v0.8h
+; NONEON-NOSVE-NEXT: add v1.8h, v1.8h, v2.8h
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%tmp1 = load <16 x i16>, ptr %a
%tmp2 = load <16 x i16>, ptr %b
%tmp3 = shufflevector <16 x i16> %tmp1, <16 x i16> %tmp2, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
@@ -436,6 +567,19 @@ define void @trn_v8i32(ptr %a, ptr %b) {
; CHECK-NEXT: add z1.s, z1.s, z2.s
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: trn_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q2, [x0]
+; NONEON-NOSVE-NEXT: ldp q1, q3, [x1]
+; NONEON-NOSVE-NEXT: zip1 v4.4s, v0.4s, v1.4s
+; NONEON-NOSVE-NEXT: trn2 v0.4s, v0.4s, v1.4s
+; NONEON-NOSVE-NEXT: trn1 v1.4s, v2.4s, v3.4s
+; NONEON-NOSVE-NEXT: trn2 v2.4s, v2.4s, v3.4s
+; NONEON-NOSVE-NEXT: add v0.4s, v4.4s, v0.4s
+; NONEON-NOSVE-NEXT: add v1.4s, v1.4s, v2.4s
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%tmp1 = load <8 x i32>, ptr %a
%tmp2 = load <8 x i32>, ptr %b
%tmp3 = shufflevector <8 x i32> %tmp1, <8 x i32> %tmp2, <8 x i32> <i32 0, i32 8, i32 undef, i32 undef, i32 4, i32 12, i32 6, i32 14>
@@ -459,6 +603,19 @@ define void @trn_v4f64(ptr %a, ptr %b) {
; CHECK-NEXT: fadd z1.d, p0/m, z1.d, z2.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: trn_v4f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q2, [x0]
+; NONEON-NOSVE-NEXT: ldp q1, q3, [x1]
+; NONEON-NOSVE-NEXT: zip1 v4.2d, v0.2d, v1.2d
+; NONEON-NOSVE-NEXT: zip2 v0.2d, v0.2d, v1.2d
+; NONEON-NOSVE-NEXT: zip1 v1.2d, v2.2d, v3.2d
+; NONEON-NOSVE-NEXT: zip2 v2.2d, v2.2d, v3.2d
+; NONEON-NOSVE-NEXT: fadd v0.2d, v4.2d, v0.2d
+; NONEON-NOSVE-NEXT: fadd v1.2d, v1.2d, v2.2d
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%tmp1 = load <4 x double>, ptr %a
%tmp2 = load <4 x double>, ptr %b
%tmp3 = shufflevector <4 x double> %tmp1, <4 x double> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
@@ -479,6 +636,16 @@ define void @trn_v4f32(ptr %a, ptr %b) {
; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z2.s
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: trn_v4f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: ldr q1, [x1]
+; NONEON-NOSVE-NEXT: trn1 v2.4s, v0.4s, v1.4s
+; NONEON-NOSVE-NEXT: trn2 v0.4s, v0.4s, v1.4s
+; NONEON-NOSVE-NEXT: fadd v0.4s, v2.4s, v0.4s
+; NONEON-NOSVE-NEXT: str q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%tmp1 = load <4 x float>, ptr %a
%tmp2 = load <4 x float>, ptr %b
%tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
@@ -500,6 +667,18 @@ define void @trn_v8i32_undef(ptr %a) {
; CHECK-NEXT: add z1.s, z3.s, z1.s
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: trn_v8i32_undef:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: trn1 v2.4s, v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: trn2 v0.4s, v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: trn1 v3.4s, v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: trn2 v1.4s, v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: add v0.4s, v2.4s, v0.4s
+; NONEON-NOSVE-NEXT: add v1.4s, v3.4s, v1.4s
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%tmp1 = load <8 x i32>, ptr %a
%tmp3 = shufflevector <8 x i32> %tmp1, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
%tmp4 = shufflevector <8 x i32> %tmp1, <8 x i32> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
@@ -571,6 +750,18 @@ define void @zip2_v32i8(ptr %a, ptr %b) #0{
; CHECK-NEXT: str q1, [x0, #16]
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: zip2_v32i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: ldr q0, [x0, #16]
+; NONEON-NOSVE-NEXT: ldr q1, [x1]
+; NONEON-NOSVE-NEXT: ldr q1, [x1, #16]
+; NONEON-NOSVE-NEXT: zip2 v2.16b, v0.16b, v1.16b
+; NONEON-NOSVE-NEXT: zip1 v0.16b, v0.16b, v1.16b
+; NONEON-NOSVE-NEXT: str q2, [x0, #16]
+; NONEON-NOSVE-NEXT: str q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%tmp1 = load volatile <32 x i8>, ptr %a
%tmp2 = load volatile <32 x i8>, ptr %b
%tmp3 = shufflevector <32 x i8> %tmp1, <32 x i8> %tmp2, <32 x i32> <i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
@@ -617,6 +808,18 @@ define void @zip2_v16i16(ptr %a, ptr %b) #0{
; CHECK-NEXT: str q1, [x0, #16]
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: zip2_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: ldr q0, [x0, #16]
+; NONEON-NOSVE-NEXT: ldr q1, [x1]
+; NONEON-NOSVE-NEXT: ldr q1, [x1, #16]
+; NONEON-NOSVE-NEXT: zip2 v2.8h, v0.8h, v1.8h
+; NONEON-NOSVE-NEXT: zip1 v0.8h, v0.8h, v1.8h
+; NONEON-NOSVE-NEXT: str q2, [x0, #16]
+; NONEON-NOSVE-NEXT: str q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%tmp1 = load volatile <16 x i16>, ptr %a
%tmp2 = load volatile <16 x i16>, ptr %b
%tmp3 = shufflevector <16 x i16> %tmp1, <16 x i16> %tmp2, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
@@ -649,6 +852,18 @@ define void @zip2_v8i32(ptr %a, ptr %b) #0{
; CHECK-NEXT: str q1, [x0, #16]
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: zip2_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: ldr q0, [x0, #16]
+; NONEON-NOSVE-NEXT: ldr q1, [x1]
+; NONEON-NOSVE-NEXT: ldr q1, [x1, #16]
+; NONEON-NOSVE-NEXT: zip2 v2.4s, v0.4s, v1.4s
+; NONEON-NOSVE-NEXT: zip1 v0.4s, v0.4s, v1.4s
+; NONEON-NOSVE-NEXT: str q2, [x0, #16]
+; NONEON-NOSVE-NEXT: str q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%tmp1 = load volatile <8 x i32>, ptr %a
%tmp2 = load volatile <8 x i32>, ptr %b
%tmp3 = shufflevector <8 x i32> %tmp1, <8 x i32> %tmp2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
@@ -668,6 +883,16 @@ define void @zip2_v8i32_undef(ptr %a) #0{
; CHECK-NEXT: str q1, [x0, #16]
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: zip2_v8i32_undef:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: ldr q0, [x0, #16]
+; NONEON-NOSVE-NEXT: zip2 v1.4s, v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: zip1 v0.4s, v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: str q1, [x0, #16]
+; NONEON-NOSVE-NEXT: str q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%tmp1 = load volatile <8 x i32>, ptr %a
%tmp2 = shufflevector <8 x i32> %tmp1, <8 x i32> undef, <8 x i32> <i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
store volatile <8 x i32> %tmp2, ptr %a
@@ -869,6 +1094,19 @@ define void @uzp_v32i8(ptr %a, ptr %b) #0{
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: add sp, sp, #64
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: uzp_v32i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ldp q3, q2, [x1]
+; NONEON-NOSVE-NEXT: uzp1 v4.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: uzp2 v0.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: uzp1 v1.16b, v3.16b, v2.16b
+; NONEON-NOSVE-NEXT: uzp2 v2.16b, v3.16b, v2.16b
+; NONEON-NOSVE-NEXT: add v0.16b, v4.16b, v0.16b
+; NONEON-NOSVE-NEXT: add v1.16b, v1.16b, v2.16b
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%tmp1 = load <32 x i8>, ptr %a
%tmp2 = load <32 x i8>, ptr %b
%tmp3 = shufflevector <32 x i8> %tmp1, <32 x i8> %tmp2, <32 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30, i32 32, i32 34, i32 36, i32 38, i32 40, i32 42, i32 44, i32 46, i32 48, i32 50, i32 52, i32 54, i32 56, i32 58, i32 60, i32 62>
@@ -891,6 +1129,17 @@ define void @uzp_v4i16(ptr %a, ptr %b) #0{
; CHECK-NEXT: add z0.h, z1.h, z0.h
; CHECK-NEXT: str d0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: uzp_v4i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr d0, [x0]
+; NONEON-NOSVE-NEXT: ext v1.8b, v0.8b, v0.8b, #6
+; NONEON-NOSVE-NEXT: ext v2.8b, v0.8b, v0.8b, #2
+; NONEON-NOSVE-NEXT: trn1 v1.4h, v0.4h, v1.4h
+; NONEON-NOSVE-NEXT: zip1 v0.4h, v2.4h, v0.4h
+; NONEON-NOSVE-NEXT: add v0.4h, v1.4h, v0.4h
+; NONEON-NOSVE-NEXT: str d0, [x0]
+; NONEON-NOSVE-NEXT: ret
%tmp1 = load <4 x i16>, ptr %a
%tmp2 = load <4 x i16>, ptr %b
%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 0, i32 3, i32 2, i32 1>
@@ -1008,6 +1257,19 @@ define void @uzp_v16i16(ptr %a, ptr %b) #0{
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: add sp, sp, #64
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: uzp_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ldp q3, q2, [x1]
+; NONEON-NOSVE-NEXT: uzp1 v4.8h, v1.8h, v0.8h
+; NONEON-NOSVE-NEXT: uzp2 v0.8h, v1.8h, v0.8h
+; NONEON-NOSVE-NEXT: uzp1 v1.8h, v3.8h, v2.8h
+; NONEON-NOSVE-NEXT: uzp2 v2.8h, v3.8h, v2.8h
+; NONEON-NOSVE-NEXT: add v0.8h, v4.8h, v0.8h
+; NONEON-NOSVE-NEXT: add v1.8h, v1.8h, v2.8h
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%tmp1 = load <16 x i16>, ptr %a
%tmp2 = load <16 x i16>, ptr %b
%tmp3 = shufflevector <16 x i16> %tmp1, <16 x i16> %tmp2, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
@@ -1047,6 +1309,19 @@ define void @uzp_v8f32(ptr %a, ptr %b) #0{
; CHECK-NEXT: stp q1, q0, [x0]
; CHECK-NEXT: add sp, sp, #48
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: uzp_v8f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ldp q3, q2, [x1]
+; NONEON-NOSVE-NEXT: uzp1 v4.4s, v1.4s, v0.4s
+; NONEON-NOSVE-NEXT: uzp2 v0.4s, v1.4s, v0.4s
+; NONEON-NOSVE-NEXT: uzp1 v1.4s, v3.4s, v2.4s
+; NONEON-NOSVE-NEXT: uzp2 v2.4s, v3.4s, v0.4s
+; NONEON-NOSVE-NEXT: fadd v0.4s, v4.4s, v0.4s
+; NONEON-NOSVE-NEXT: fadd v1.4s, v1.4s, v2.4s
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%tmp1 = load <8 x float>, ptr %a
%tmp2 = load <8 x float>, ptr %b
%tmp3 = shufflevector <8 x float> %tmp1, <8 x float> %tmp2, <8 x i32> <i32 0, i32 undef, i32 4, i32 6, i32 undef, i32 10, i32 12, i32 14>
@@ -1069,6 +1344,19 @@ define void @uzp_v4i64(ptr %a, ptr %b) #0{
; CHECK-NEXT: add z1.d, z1.d, z2.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: uzp_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ldp q3, q2, [x1]
+; NONEON-NOSVE-NEXT: zip1 v4.2d, v1.2d, v0.2d
+; NONEON-NOSVE-NEXT: zip2 v0.2d, v1.2d, v0.2d
+; NONEON-NOSVE-NEXT: zip1 v1.2d, v3.2d, v2.2d
+; NONEON-NOSVE-NEXT: zip2 v2.2d, v3.2d, v2.2d
+; NONEON-NOSVE-NEXT: add v0.2d, v4.2d, v0.2d
+; NONEON-NOSVE-NEXT: add v1.2d, v1.2d, v2.2d
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%tmp1 = load <4 x i64>, ptr %a
%tmp2 = load <4 x i64>, ptr %b
%tmp3 = shufflevector <4 x i64> %tmp1, <4 x i64> %tmp2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
@@ -1136,6 +1424,16 @@ define void @uzp_v8i16(ptr %a, ptr %b) #0{
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: add sp, sp, #32
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: uzp_v8i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: ldr q1, [x1]
+; NONEON-NOSVE-NEXT: uzp1 v2.8h, v0.8h, v1.8h
+; NONEON-NOSVE-NEXT: uzp2 v0.8h, v0.8h, v1.8h
+; NONEON-NOSVE-NEXT: add v0.8h, v2.8h, v0.8h
+; NONEON-NOSVE-NEXT: str q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%tmp1 = load <8 x i16>, ptr %a
%tmp2 = load <8 x i16>, ptr %b
%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
@@ -1174,6 +1472,15 @@ define void @uzp_v8i32_undef(ptr %a) #0{
; CHECK-NEXT: stp q0, q0, [x0]
; CHECK-NEXT: add sp, sp, #32
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: uzp_v8i32_undef:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: uzp1 v2.4s, v1.4s, v0.4s
+; NONEON-NOSVE-NEXT: uzp2 v0.4s, v1.4s, v0.4s
+; NONEON-NOSVE-NEXT: add v0.4s, v2.4s, v0.4s
+; NONEON-NOSVE-NEXT: stp q0, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%tmp1 = load <8 x i32>, ptr %a
%tmp3 = shufflevector <8 x i32> %tmp1, <8 x i32> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 0, i32 2, i32 4, i32 6>
%tmp4 = shufflevector <8 x i32> %tmp1, <8 x i32> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 1, i32 3, i32 5, i32 7>
@@ -1197,6 +1504,19 @@ define void @zip_vscale2_4(ptr %a, ptr %b) {
; CHECK-NEXT: fadd z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: stp q2, q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: zip_vscale2_4:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ldp q3, q2, [x1]
+; NONEON-NOSVE-NEXT: zip1 v4.2d, v1.2d, v3.2d
+; NONEON-NOSVE-NEXT: zip1 v5.2d, v0.2d, v2.2d
+; NONEON-NOSVE-NEXT: zip2 v1.2d, v1.2d, v3.2d
+; NONEON-NOSVE-NEXT: zip2 v0.2d, v0.2d, v2.2d
+; NONEON-NOSVE-NEXT: fadd v2.2d, v4.2d, v5.2d
+; NONEON-NOSVE-NEXT: fadd v0.2d, v1.2d, v0.2d
+; NONEON-NOSVE-NEXT: stp q2, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%tmp1 = load <4 x double>, ptr %a
%tmp2 = load <4 x double>, ptr %b
%tmp3 = shufflevector <4 x double> %tmp1, <4 x double> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ptest.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ptest.ll
index 8039bd096bcb8..41d2cb8a2c756 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ptest.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ptest.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
@@ -35,6 +36,23 @@ define i1 @ptest_v16i1(ptr %a, ptr %b) {
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ptest_v16i1:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #32]
+; NONEON-NOSVE-NEXT: ldp q2, q3, [x0]
+; NONEON-NOSVE-NEXT: fcmeq v0.4s, v0.4s, #0.0
+; NONEON-NOSVE-NEXT: fcmeq v1.4s, v1.4s, #0.0
+; NONEON-NOSVE-NEXT: fcmeq v3.4s, v3.4s, #0.0
+; NONEON-NOSVE-NEXT: fcmeq v2.4s, v2.4s, #0.0
+; NONEON-NOSVE-NEXT: uzp1 v0.8h, v1.8h, v0.8h
+; NONEON-NOSVE-NEXT: uzp1 v1.8h, v2.8h, v3.8h
+; NONEON-NOSVE-NEXT: uzp1 v0.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: mvn v0.16b, v0.16b
+; NONEON-NOSVE-NEXT: umaxv b0, v0.16b
+; NONEON-NOSVE-NEXT: fmov w8, s0
+; NONEON-NOSVE-NEXT: and w0, w8, #0x1
+; NONEON-NOSVE-NEXT: ret
%v0 = bitcast ptr %a to ptr
%v1 = load <16 x float>, ptr %v0, align 4
%v2 = fcmp une <16 x float> %v1, zeroinitializer
@@ -92,6 +110,33 @@ define i1 @ptest_or_v16i1(ptr %a, ptr %b) {
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ptest_or_v16i1:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0, #32]
+; NONEON-NOSVE-NEXT: ldp q2, q3, [x0]
+; NONEON-NOSVE-NEXT: ldp q5, q4, [x1, #32]
+; NONEON-NOSVE-NEXT: fcmeq v1.4s, v1.4s, #0.0
+; NONEON-NOSVE-NEXT: fcmeq v0.4s, v0.4s, #0.0
+; NONEON-NOSVE-NEXT: fcmeq v3.4s, v3.4s, #0.0
+; NONEON-NOSVE-NEXT: fcmeq v2.4s, v2.4s, #0.0
+; NONEON-NOSVE-NEXT: ldp q6, q7, [x1]
+; NONEON-NOSVE-NEXT: fcmeq v4.4s, v4.4s, #0.0
+; NONEON-NOSVE-NEXT: fcmeq v5.4s, v5.4s, #0.0
+; NONEON-NOSVE-NEXT: uzp1 v0.8h, v0.8h, v1.8h
+; NONEON-NOSVE-NEXT: fcmeq v7.4s, v7.4s, #0.0
+; NONEON-NOSVE-NEXT: fcmeq v6.4s, v6.4s, #0.0
+; NONEON-NOSVE-NEXT: uzp1 v1.8h, v2.8h, v3.8h
+; NONEON-NOSVE-NEXT: uzp1 v2.8h, v5.8h, v4.8h
+; NONEON-NOSVE-NEXT: uzp1 v3.8h, v6.8h, v7.8h
+; NONEON-NOSVE-NEXT: uzp1 v0.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: uzp1 v1.16b, v3.16b, v2.16b
+; NONEON-NOSVE-NEXT: mvn v0.16b, v0.16b
+; NONEON-NOSVE-NEXT: orn v0.16b, v0.16b, v1.16b
+; NONEON-NOSVE-NEXT: umaxv b0, v0.16b
+; NONEON-NOSVE-NEXT: fmov w8, s0
+; NONEON-NOSVE-NEXT: and w0, w8, #0x1
+; NONEON-NOSVE-NEXT: ret
%v0 = bitcast ptr %a to ptr
%v1 = load <16 x float>, ptr %v0, align 4
%v2 = fcmp une <16 x float> %v1, zeroinitializer
@@ -159,6 +204,33 @@ define i1 @ptest_and_v16i1(ptr %a, ptr %b) {
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: ptest_and_v16i1:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0, #32]
+; NONEON-NOSVE-NEXT: ldp q2, q3, [x0]
+; NONEON-NOSVE-NEXT: ldp q5, q4, [x1, #32]
+; NONEON-NOSVE-NEXT: fcmeq v1.4s, v1.4s, #0.0
+; NONEON-NOSVE-NEXT: fcmeq v0.4s, v0.4s, #0.0
+; NONEON-NOSVE-NEXT: fcmeq v3.4s, v3.4s, #0.0
+; NONEON-NOSVE-NEXT: fcmeq v2.4s, v2.4s, #0.0
+; NONEON-NOSVE-NEXT: ldp q6, q7, [x1]
+; NONEON-NOSVE-NEXT: fcmeq v4.4s, v4.4s, #0.0
+; NONEON-NOSVE-NEXT: fcmeq v5.4s, v5.4s, #0.0
+; NONEON-NOSVE-NEXT: uzp1 v0.8h, v0.8h, v1.8h
+; NONEON-NOSVE-NEXT: fcmeq v7.4s, v7.4s, #0.0
+; NONEON-NOSVE-NEXT: fcmeq v6.4s, v6.4s, #0.0
+; NONEON-NOSVE-NEXT: uzp1 v1.8h, v2.8h, v3.8h
+; NONEON-NOSVE-NEXT: uzp1 v2.8h, v5.8h, v4.8h
+; NONEON-NOSVE-NEXT: uzp1 v3.8h, v6.8h, v7.8h
+; NONEON-NOSVE-NEXT: uzp1 v0.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: uzp1 v1.16b, v3.16b, v2.16b
+; NONEON-NOSVE-NEXT: mvn v0.16b, v0.16b
+; NONEON-NOSVE-NEXT: bic v0.16b, v0.16b, v1.16b
+; NONEON-NOSVE-NEXT: uminv b0, v0.16b
+; NONEON-NOSVE-NEXT: fmov w8, s0
+; NONEON-NOSVE-NEXT: and w0, w8, #0x1
+; NONEON-NOSVE-NEXT: ret
%v0 = bitcast ptr %a to ptr
%v1 = load <16 x float>, ptr %v0, align 4
%v2 = fcmp une <16 x float> %v1, zeroinitializer
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll
index 726fd28c90ae2..5626f77c684f2 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
@@ -18,6 +19,13 @@ define <4 x i8> @bitreverse_v4i8(<4 x i8> %op) {
; CHECK-NEXT: lsr z0.h, z0.h, #8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: bitreverse_v4i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: rev16 v0.8b, v0.8b
+; NONEON-NOSVE-NEXT: rbit v0.8b, v0.8b
+; NONEON-NOSVE-NEXT: ushr v0.4h, v0.4h, #8
+; NONEON-NOSVE-NEXT: ret
%res = call <4 x i8> @llvm.bitreverse.v4i8(<4 x i8> %op)
ret <4 x i8> %res
}
@@ -30,6 +38,11 @@ define <8 x i8> @bitreverse_v8i8(<8 x i8> %op) {
; CHECK-NEXT: rbit z0.b, p0/m, z0.b
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: bitreverse_v8i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: rbit v0.8b, v0.8b
+; NONEON-NOSVE-NEXT: ret
%res = call <8 x i8> @llvm.bitreverse.v8i8(<8 x i8> %op)
ret <8 x i8> %res
}
@@ -42,6 +55,11 @@ define <16 x i8> @bitreverse_v16i8(<16 x i8> %op) {
; CHECK-NEXT: rbit z0.b, p0/m, z0.b
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: bitreverse_v16i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: rbit v0.16b, v0.16b
+; NONEON-NOSVE-NEXT: ret
%res = call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> %op)
ret <16 x i8> %res
}
@@ -55,6 +73,14 @@ define void @bitreverse_v32i8(ptr %a) {
; CHECK-NEXT: rbit z1.b, p0/m, z1.b
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: bitreverse_v32i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: rbit v0.16b, v0.16b
+; NONEON-NOSVE-NEXT: rbit v1.16b, v1.16b
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op = load <32 x i8>, ptr %a
%res = call <32 x i8> @llvm.bitreverse.v32i8(<32 x i8> %op)
store <32 x i8> %res, ptr %a
@@ -70,6 +96,13 @@ define <2 x i16> @bitreverse_v2i16(<2 x i16> %op) {
; CHECK-NEXT: lsr z0.s, z0.s, #16
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: bitreverse_v2i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: rev32 v0.8b, v0.8b
+; NONEON-NOSVE-NEXT: rbit v0.8b, v0.8b
+; NONEON-NOSVE-NEXT: ushr v0.2s, v0.2s, #16
+; NONEON-NOSVE-NEXT: ret
%res = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> %op)
ret <2 x i16> %res
}
@@ -82,6 +115,12 @@ define <4 x i16> @bitreverse_v4i16(<4 x i16> %op) {
; CHECK-NEXT: rbit z0.h, p0/m, z0.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: bitreverse_v4i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: rev16 v0.8b, v0.8b
+; NONEON-NOSVE-NEXT: rbit v0.8b, v0.8b
+; NONEON-NOSVE-NEXT: ret
%res = call <4 x i16> @llvm.bitreverse.v4i16(<4 x i16> %op)
ret <4 x i16> %res
}
@@ -94,6 +133,12 @@ define <8 x i16> @bitreverse_v8i16(<8 x i16> %op) {
; CHECK-NEXT: rbit z0.h, p0/m, z0.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: bitreverse_v8i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: rev16 v0.16b, v0.16b
+; NONEON-NOSVE-NEXT: rbit v0.16b, v0.16b
+; NONEON-NOSVE-NEXT: ret
%res = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> %op)
ret <8 x i16> %res
}
@@ -107,6 +152,16 @@ define void @bitreverse_v16i16(ptr %a) {
; CHECK-NEXT: rbit z1.h, p0/m, z1.h
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: bitreverse_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: rev16 v0.16b, v0.16b
+; NONEON-NOSVE-NEXT: rev16 v1.16b, v1.16b
+; NONEON-NOSVE-NEXT: rbit v0.16b, v0.16b
+; NONEON-NOSVE-NEXT: rbit v1.16b, v1.16b
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op = load <16 x i16>, ptr %a
%res = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %op)
store <16 x i16> %res, ptr %a
@@ -121,6 +176,12 @@ define <2 x i32> @bitreverse_v2i32(<2 x i32> %op) {
; CHECK-NEXT: rbit z0.s, p0/m, z0.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: bitreverse_v2i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: rev32 v0.8b, v0.8b
+; NONEON-NOSVE-NEXT: rbit v0.8b, v0.8b
+; NONEON-NOSVE-NEXT: ret
%res = call <2 x i32> @llvm.bitreverse.v2i32(<2 x i32> %op)
ret <2 x i32> %res
}
@@ -133,6 +194,12 @@ define <4 x i32> @bitreverse_v4i32(<4 x i32> %op) {
; CHECK-NEXT: rbit z0.s, p0/m, z0.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: bitreverse_v4i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: rev32 v0.16b, v0.16b
+; NONEON-NOSVE-NEXT: rbit v0.16b, v0.16b
+; NONEON-NOSVE-NEXT: ret
%res = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> %op)
ret <4 x i32> %res
}
@@ -146,6 +213,16 @@ define void @bitreverse_v8i32(ptr %a) {
; CHECK-NEXT: rbit z1.s, p0/m, z1.s
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: bitreverse_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: rev32 v0.16b, v0.16b
+; NONEON-NOSVE-NEXT: rev32 v1.16b, v1.16b
+; NONEON-NOSVE-NEXT: rbit v0.16b, v0.16b
+; NONEON-NOSVE-NEXT: rbit v1.16b, v1.16b
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op = load <8 x i32>, ptr %a
%res = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %op)
store <8 x i32> %res, ptr %a
@@ -160,6 +237,12 @@ define <1 x i64> @bitreverse_v1i64(<1 x i64> %op) {
; CHECK-NEXT: rbit z0.d, p0/m, z0.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: bitreverse_v1i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: rev64 v0.8b, v0.8b
+; NONEON-NOSVE-NEXT: rbit v0.8b, v0.8b
+; NONEON-NOSVE-NEXT: ret
%res = call <1 x i64> @llvm.bitreverse.v1i64(<1 x i64> %op)
ret <1 x i64> %res
}
@@ -172,6 +255,12 @@ define <2 x i64> @bitreverse_v2i64(<2 x i64> %op) {
; CHECK-NEXT: rbit z0.d, p0/m, z0.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: bitreverse_v2i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: rev64 v0.16b, v0.16b
+; NONEON-NOSVE-NEXT: rbit v0.16b, v0.16b
+; NONEON-NOSVE-NEXT: ret
%res = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> %op)
ret <2 x i64> %res
}
@@ -185,6 +274,16 @@ define void @bitreverse_v4i64(ptr %a) {
; CHECK-NEXT: rbit z1.d, p0/m, z1.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: bitreverse_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: rev64 v0.16b, v0.16b
+; NONEON-NOSVE-NEXT: rev64 v1.16b, v1.16b
+; NONEON-NOSVE-NEXT: rbit v0.16b, v0.16b
+; NONEON-NOSVE-NEXT: rbit v1.16b, v1.16b
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op = load <4 x i64>, ptr %a
%res = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %op)
store <4 x i64> %res, ptr %a
@@ -204,6 +303,12 @@ define <2 x i16> @bswap_v2i16(<2 x i16> %op) {
; CHECK-NEXT: lsr z0.s, z0.s, #16
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: bswap_v2i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: rev32 v0.8b, v0.8b
+; NONEON-NOSVE-NEXT: ushr v0.2s, v0.2s, #16
+; NONEON-NOSVE-NEXT: ret
%res = call <2 x i16> @llvm.bswap.v2i16(<2 x i16> %op)
ret <2 x i16> %res
}
@@ -216,6 +321,11 @@ define <4 x i16> @bswap_v4i16(<4 x i16> %op) {
; CHECK-NEXT: revb z0.h, p0/m, z0.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: bswap_v4i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: rev16 v0.8b, v0.8b
+; NONEON-NOSVE-NEXT: ret
%res = call <4 x i16> @llvm.bswap.v4i16(<4 x i16> %op)
ret <4 x i16> %res
}
@@ -228,6 +338,11 @@ define <8 x i16> @bswap_v8i16(<8 x i16> %op) {
; CHECK-NEXT: revb z0.h, p0/m, z0.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: bswap_v8i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: rev16 v0.16b, v0.16b
+; NONEON-NOSVE-NEXT: ret
%res = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %op)
ret <8 x i16> %res
}
@@ -241,6 +356,14 @@ define void @bswap_v16i16(ptr %a) {
; CHECK-NEXT: revb z1.h, p0/m, z1.h
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: bswap_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: rev16 v0.16b, v0.16b
+; NONEON-NOSVE-NEXT: rev16 v1.16b, v1.16b
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op = load <16 x i16>, ptr %a
%res = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %op)
store <16 x i16> %res, ptr %a
@@ -255,6 +378,11 @@ define <2 x i32> @bswap_v2i32(<2 x i32> %op) {
; CHECK-NEXT: revb z0.s, p0/m, z0.s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: bswap_v2i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: rev32 v0.8b, v0.8b
+; NONEON-NOSVE-NEXT: ret
%res = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %op)
ret <2 x i32> %res
}
@@ -267,6 +395,11 @@ define <4 x i32> @bswap_v4i32(<4 x i32> %op) {
; CHECK-NEXT: revb z0.s, p0/m, z0.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: bswap_v4i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: rev32 v0.16b, v0.16b
+; NONEON-NOSVE-NEXT: ret
%res = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %op)
ret <4 x i32> %res
}
@@ -280,6 +413,14 @@ define void @bswap_v8i32(ptr %a) {
; CHECK-NEXT: revb z1.s, p0/m, z1.s
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: bswap_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: rev32 v0.16b, v0.16b
+; NONEON-NOSVE-NEXT: rev32 v1.16b, v1.16b
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op = load <8 x i32>, ptr %a
%res = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %op)
store <8 x i32> %res, ptr %a
@@ -294,6 +435,11 @@ define <1 x i64> @bswap_v1i64(<1 x i64> %op) {
; CHECK-NEXT: revb z0.d, p0/m, z0.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: bswap_v1i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: rev64 v0.8b, v0.8b
+; NONEON-NOSVE-NEXT: ret
%res = call <1 x i64> @llvm.bswap.v1i64(<1 x i64> %op)
ret <1 x i64> %res
}
@@ -306,6 +452,11 @@ define <2 x i64> @bswap_v2i64(<2 x i64> %op) {
; CHECK-NEXT: revb z0.d, p0/m, z0.d
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: bswap_v2i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: rev64 v0.16b, v0.16b
+; NONEON-NOSVE-NEXT: ret
%res = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %op)
ret <2 x i64> %res
}
@@ -319,6 +470,14 @@ define void @bswap_v4i64(ptr %a) {
; CHECK-NEXT: revb z1.d, p0/m, z1.d
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: bswap_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: rev64 v0.16b, v0.16b
+; NONEON-NOSVE-NEXT: rev64 v1.16b, v1.16b
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op = load <4 x i64>, ptr %a
%res = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %op)
store <4 x i64> %res, ptr %a
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-sdiv-pow2.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-sdiv-pow2.ll
index c022bf85e67e9..55f4f5bae641e 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-sdiv-pow2.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-sdiv-pow2.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
@@ -14,6 +15,19 @@ define <4 x i8> @sdiv_v4i8(<4 x i8> %op1) {
; CHECK-NEXT: asrd z0.h, p0/m, z0.h, #5
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sdiv_v4i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: shl v1.4h, v0.4h, #8
+; NONEON-NOSVE-NEXT: movi d2, #0xff00ff00ff00ff
+; NONEON-NOSVE-NEXT: sshr v1.4h, v1.4h, #8
+; NONEON-NOSVE-NEXT: ushr v1.4h, v1.4h, #7
+; NONEON-NOSVE-NEXT: and v1.8b, v1.8b, v2.8b
+; NONEON-NOSVE-NEXT: usra v0.4h, v1.4h, #3
+; NONEON-NOSVE-NEXT: shl v0.4h, v0.4h, #8
+; NONEON-NOSVE-NEXT: sshr v0.4h, v0.4h, #8
+; NONEON-NOSVE-NEXT: sshr v0.4h, v0.4h, #5
+; NONEON-NOSVE-NEXT: ret
%res = sdiv <4 x i8> %op1, shufflevector (<4 x i8> insertelement (<4 x i8> poison, i8 32, i32 0), <4 x i8> poison, <4 x i32> zeroinitializer)
ret <4 x i8> %res
}
@@ -26,6 +40,13 @@ define <8 x i8> @sdiv_v8i8(<8 x i8> %op1) {
; CHECK-NEXT: asrd z0.b, p0/m, z0.b, #5
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sdiv_v8i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: cmlt v1.8b, v0.8b, #0
+; NONEON-NOSVE-NEXT: usra v0.8b, v1.8b, #3
+; NONEON-NOSVE-NEXT: sshr v0.8b, v0.8b, #5
+; NONEON-NOSVE-NEXT: ret
%res = sdiv <8 x i8> %op1, shufflevector (<8 x i8> insertelement (<8 x i8> poison, i8 32, i32 0), <8 x i8> poison, <8 x i32> zeroinitializer)
ret <8 x i8> %res
}
@@ -38,6 +59,13 @@ define <16 x i8> @sdiv_v16i8(<16 x i8> %op1) {
; CHECK-NEXT: asrd z0.b, p0/m, z0.b, #5
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sdiv_v16i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: cmlt v1.16b, v0.16b, #0
+; NONEON-NOSVE-NEXT: usra v0.16b, v1.16b, #3
+; NONEON-NOSVE-NEXT: sshr v0.16b, v0.16b, #5
+; NONEON-NOSVE-NEXT: ret
%res = sdiv <16 x i8> %op1, shufflevector (<16 x i8> insertelement (<16 x i8> poison, i8 32, i32 0), <16 x i8> poison, <16 x i32> zeroinitializer)
ret <16 x i8> %res
}
@@ -51,6 +79,18 @@ define void @sdiv_v32i8(ptr %a) {
; CHECK-NEXT: asrd z1.b, p0/m, z1.b, #5
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sdiv_v32i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: cmlt v2.16b, v0.16b, #0
+; NONEON-NOSVE-NEXT: cmlt v3.16b, v1.16b, #0
+; NONEON-NOSVE-NEXT: usra v0.16b, v2.16b, #3
+; NONEON-NOSVE-NEXT: usra v1.16b, v3.16b, #3
+; NONEON-NOSVE-NEXT: sshr v0.16b, v0.16b, #5
+; NONEON-NOSVE-NEXT: sshr v1.16b, v1.16b, #5
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <32 x i8>, ptr %a
%res = sdiv <32 x i8> %op1, shufflevector (<32 x i8> insertelement (<32 x i8> poison, i8 32, i32 0), <32 x i8> poison, <32 x i32> zeroinitializer)
store <32 x i8> %res, ptr %a
@@ -66,6 +106,20 @@ define <2 x i16> @sdiv_v2i16(<2 x i16> %op1) {
; CHECK-NEXT: asrd z0.s, p0/m, z0.s, #5
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sdiv_v2i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: shl v1.2s, v0.2s, #16
+; NONEON-NOSVE-NEXT: mov w8, #31 // =0x1f
+; NONEON-NOSVE-NEXT: dup v2.2s, w8
+; NONEON-NOSVE-NEXT: sshr v1.2s, v1.2s, #16
+; NONEON-NOSVE-NEXT: ushr v1.2s, v1.2s, #26
+; NONEON-NOSVE-NEXT: and v1.8b, v1.8b, v2.8b
+; NONEON-NOSVE-NEXT: add v0.2s, v0.2s, v1.2s
+; NONEON-NOSVE-NEXT: shl v0.2s, v0.2s, #16
+; NONEON-NOSVE-NEXT: sshr v0.2s, v0.2s, #16
+; NONEON-NOSVE-NEXT: sshr v0.2s, v0.2s, #5
+; NONEON-NOSVE-NEXT: ret
%res = sdiv <2 x i16> %op1, shufflevector (<2 x i16> insertelement (<2 x i16> poison, i16 32, i32 0), <2 x i16> poison, <2 x i32> zeroinitializer)
ret <2 x i16> %res
}
@@ -78,6 +132,13 @@ define <4 x i16> @sdiv_v4i16(<4 x i16> %op1) {
; CHECK-NEXT: asrd z0.h, p0/m, z0.h, #5
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sdiv_v4i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: cmlt v1.4h, v0.4h, #0
+; NONEON-NOSVE-NEXT: usra v0.4h, v1.4h, #11
+; NONEON-NOSVE-NEXT: sshr v0.4h, v0.4h, #5
+; NONEON-NOSVE-NEXT: ret
%res = sdiv <4 x i16> %op1, shufflevector (<4 x i16> insertelement (<4 x i16> poison, i16 32, i32 0), <4 x i16> poison, <4 x i32> zeroinitializer)
ret <4 x i16> %res
}
@@ -90,6 +151,13 @@ define <8 x i16> @sdiv_v8i16(<8 x i16> %op1) {
; CHECK-NEXT: asrd z0.h, p0/m, z0.h, #5
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sdiv_v8i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: cmlt v1.8h, v0.8h, #0
+; NONEON-NOSVE-NEXT: usra v0.8h, v1.8h, #11
+; NONEON-NOSVE-NEXT: sshr v0.8h, v0.8h, #5
+; NONEON-NOSVE-NEXT: ret
%res = sdiv <8 x i16> %op1, shufflevector (<8 x i16> insertelement (<8 x i16> poison, i16 32, i32 0), <8 x i16> poison, <8 x i32> zeroinitializer)
ret <8 x i16> %res
}
@@ -103,6 +171,18 @@ define void @sdiv_v16i16(ptr %a) {
; CHECK-NEXT: asrd z1.h, p0/m, z1.h, #5
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sdiv_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: cmlt v2.8h, v0.8h, #0
+; NONEON-NOSVE-NEXT: cmlt v3.8h, v1.8h, #0
+; NONEON-NOSVE-NEXT: usra v0.8h, v2.8h, #11
+; NONEON-NOSVE-NEXT: usra v1.8h, v3.8h, #11
+; NONEON-NOSVE-NEXT: sshr v0.8h, v0.8h, #5
+; NONEON-NOSVE-NEXT: sshr v1.8h, v1.8h, #5
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x i16>, ptr %a
%res = sdiv <16 x i16> %op1, shufflevector (<16 x i16> insertelement (<16 x i16> poison, i16 32, i32 0), <16 x i16> poison, <16 x i32> zeroinitializer)
store <16 x i16> %res, ptr %a
@@ -117,6 +197,13 @@ define <2 x i32> @sdiv_v2i32(<2 x i32> %op1) {
; CHECK-NEXT: asrd z0.s, p0/m, z0.s, #5
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sdiv_v2i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: cmlt v1.2s, v0.2s, #0
+; NONEON-NOSVE-NEXT: usra v0.2s, v1.2s, #27
+; NONEON-NOSVE-NEXT: sshr v0.2s, v0.2s, #5
+; NONEON-NOSVE-NEXT: ret
%res = sdiv <2 x i32> %op1, shufflevector (<2 x i32> insertelement (<2 x i32> poison, i32 32, i32 0), <2 x i32> poison, <2 x i32> zeroinitializer)
ret <2 x i32> %res
}
@@ -129,6 +216,13 @@ define <4 x i32> @sdiv_v4i32(<4 x i32> %op1) {
; CHECK-NEXT: asrd z0.s, p0/m, z0.s, #5
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sdiv_v4i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: cmlt v1.4s, v0.4s, #0
+; NONEON-NOSVE-NEXT: usra v0.4s, v1.4s, #27
+; NONEON-NOSVE-NEXT: sshr v0.4s, v0.4s, #5
+; NONEON-NOSVE-NEXT: ret
%res = sdiv <4 x i32> %op1, shufflevector (<4 x i32> insertelement (<4 x i32> poison, i32 32, i32 0), <4 x i32> poison, <4 x i32> zeroinitializer)
ret <4 x i32> %res
}
@@ -142,6 +236,18 @@ define void @sdiv_v8i32(ptr %a) {
; CHECK-NEXT: asrd z1.s, p0/m, z1.s, #5
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sdiv_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: cmlt v2.4s, v0.4s, #0
+; NONEON-NOSVE-NEXT: cmlt v3.4s, v1.4s, #0
+; NONEON-NOSVE-NEXT: usra v0.4s, v2.4s, #27
+; NONEON-NOSVE-NEXT: usra v1.4s, v3.4s, #27
+; NONEON-NOSVE-NEXT: sshr v0.4s, v0.4s, #5
+; NONEON-NOSVE-NEXT: sshr v1.4s, v1.4s, #5
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x i32>, ptr %a
%res = sdiv <8 x i32> %op1, shufflevector (<8 x i32> insertelement (<8 x i32> poison, i32 32, i32 0), <8 x i32> poison, <8 x i32> zeroinitializer)
store <8 x i32> %res, ptr %a
@@ -156,6 +262,13 @@ define <1 x i64> @sdiv_v1i64(<1 x i64> %op1) {
; CHECK-NEXT: asrd z0.d, p0/m, z0.d, #5
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sdiv_v1i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: cmlt d1, d0, #0
+; NONEON-NOSVE-NEXT: usra d0, d1, #59
+; NONEON-NOSVE-NEXT: sshr d0, d0, #5
+; NONEON-NOSVE-NEXT: ret
%res = sdiv <1 x i64> %op1, shufflevector (<1 x i64> insertelement (<1 x i64> poison, i64 32, i32 0), <1 x i64> poison, <1 x i32> zeroinitializer)
ret <1 x i64> %res
}
@@ -169,6 +282,13 @@ define <2 x i64> @sdiv_v2i64(<2 x i64> %op1) {
; CHECK-NEXT: asrd z0.d, p0/m, z0.d, #5
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sdiv_v2i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: cmlt v1.2d, v0.2d, #0
+; NONEON-NOSVE-NEXT: usra v0.2d, v1.2d, #59
+; NONEON-NOSVE-NEXT: sshr v0.2d, v0.2d, #5
+; NONEON-NOSVE-NEXT: ret
%res = sdiv <2 x i64> %op1, shufflevector (<2 x i64> insertelement (<2 x i64> poison, i64 32, i32 0), <2 x i64> poison, <2 x i32> zeroinitializer)
ret <2 x i64> %res
}
@@ -182,6 +302,18 @@ define void @sdiv_v4i64(ptr %a) {
; CHECK-NEXT: asrd z1.d, p0/m, z1.d, #5
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: sdiv_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: cmlt v2.2d, v0.2d, #0
+; NONEON-NOSVE-NEXT: cmlt v3.2d, v1.2d, #0
+; NONEON-NOSVE-NEXT: usra v0.2d, v2.2d, #59
+; NONEON-NOSVE-NEXT: usra v1.2d, v3.2d, #59
+; NONEON-NOSVE-NEXT: sshr v0.2d, v0.2d, #5
+; NONEON-NOSVE-NEXT: sshr v1.2d, v1.2d, #5
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x i64>, ptr %a
%res = sdiv <4 x i64> %op1, shufflevector (<4 x i64> insertelement (<4 x i64> poison, i64 32, i32 0), <4 x i64> poison, <4 x i32> zeroinitializer)
store <4 x i64> %res, ptr %a
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-splat-vector.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-splat-vector.ll
index 649b13fa8a1e3..e15529e1926ac 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-splat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-splat-vector.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
@@ -15,6 +16,11 @@ define <4 x i8> @splat_v4i8(i8 %a) {
; CHECK-NEXT: mov z0.h, w0
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: splat_v4i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: dup v0.4h, w0
+; NONEON-NOSVE-NEXT: ret
%insert = insertelement <4 x i8> undef, i8 %a, i64 0
%splat = shufflevector <4 x i8> %insert, <4 x i8> undef, <4 x i32> zeroinitializer
ret <4 x i8> %splat
@@ -26,6 +32,11 @@ define <8 x i8> @splat_v8i8(i8 %a) {
; CHECK-NEXT: mov z0.b, w0
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: splat_v8i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: dup v0.8b, w0
+; NONEON-NOSVE-NEXT: ret
%insert = insertelement <8 x i8> undef, i8 %a, i64 0
%splat = shufflevector <8 x i8> %insert, <8 x i8> undef, <8 x i32> zeroinitializer
ret <8 x i8> %splat
@@ -37,6 +48,11 @@ define <16 x i8> @splat_v16i8(i8 %a) {
; CHECK-NEXT: mov z0.b, w0
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: splat_v16i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: dup v0.16b, w0
+; NONEON-NOSVE-NEXT: ret
%insert = insertelement <16 x i8> undef, i8 %a, i64 0
%splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
ret <16 x i8> %splat
@@ -48,6 +64,12 @@ define void @splat_v32i8(i8 %a, ptr %b) {
; CHECK-NEXT: mov z0.b, w0
; CHECK-NEXT: stp q0, q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: splat_v32i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: dup v0.16b, w0
+; NONEON-NOSVE-NEXT: stp q0, q0, [x1]
+; NONEON-NOSVE-NEXT: ret
%insert = insertelement <32 x i8> undef, i8 %a, i64 0
%splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
store <32 x i8> %splat, ptr %b
@@ -60,6 +82,11 @@ define <2 x i16> @splat_v2i16(i16 %a) {
; CHECK-NEXT: mov z0.s, w0
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: splat_v2i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: dup v0.2s, w0
+; NONEON-NOSVE-NEXT: ret
%insert = insertelement <2 x i16> undef, i16 %a, i64 0
%splat = shufflevector <2 x i16> %insert, <2 x i16> undef, <2 x i32> zeroinitializer
ret <2 x i16> %splat
@@ -71,6 +98,11 @@ define <4 x i16> @splat_v4i16(i16 %a) {
; CHECK-NEXT: mov z0.h, w0
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: splat_v4i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: dup v0.4h, w0
+; NONEON-NOSVE-NEXT: ret
%insert = insertelement <4 x i16> undef, i16 %a, i64 0
%splat = shufflevector <4 x i16> %insert, <4 x i16> undef, <4 x i32> zeroinitializer
ret <4 x i16> %splat
@@ -82,6 +114,11 @@ define <8 x i16> @splat_v8i16(i16 %a) {
; CHECK-NEXT: mov z0.h, w0
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: splat_v8i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: dup v0.8h, w0
+; NONEON-NOSVE-NEXT: ret
%insert = insertelement <8 x i16> undef, i16 %a, i64 0
%splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
ret <8 x i16> %splat
@@ -93,6 +130,12 @@ define void @splat_v16i16(i16 %a, ptr %b) {
; CHECK-NEXT: mov z0.h, w0
; CHECK-NEXT: stp q0, q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: splat_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: dup v0.8h, w0
+; NONEON-NOSVE-NEXT: stp q0, q0, [x1]
+; NONEON-NOSVE-NEXT: ret
%insert = insertelement <16 x i16> undef, i16 %a, i64 0
%splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
store <16 x i16> %splat, ptr %b
@@ -105,6 +148,11 @@ define <2 x i32> @splat_v2i32(i32 %a) {
; CHECK-NEXT: mov z0.s, w0
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: splat_v2i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: dup v0.2s, w0
+; NONEON-NOSVE-NEXT: ret
%insert = insertelement <2 x i32> undef, i32 %a, i64 0
%splat = shufflevector <2 x i32> %insert, <2 x i32> undef, <2 x i32> zeroinitializer
ret <2 x i32> %splat
@@ -116,6 +164,11 @@ define <4 x i32> @splat_v4i32(i32 %a) {
; CHECK-NEXT: mov z0.s, w0
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: splat_v4i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: dup v0.4s, w0
+; NONEON-NOSVE-NEXT: ret
%insert = insertelement <4 x i32> undef, i32 %a, i64 0
%splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
ret <4 x i32> %splat
@@ -127,6 +180,12 @@ define void @splat_v8i32(i32 %a, ptr %b) {
; CHECK-NEXT: mov z0.s, w0
; CHECK-NEXT: stp q0, q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: splat_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: dup v0.4s, w0
+; NONEON-NOSVE-NEXT: stp q0, q0, [x1]
+; NONEON-NOSVE-NEXT: ret
%insert = insertelement <8 x i32> undef, i32 %a, i64 0
%splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
store <8 x i32> %splat, ptr %b
@@ -139,6 +198,11 @@ define <1 x i64> @splat_v1i64(i64 %a) {
; CHECK-NEXT: mov z0.d, x0
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: splat_v1i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fmov d0, x0
+; NONEON-NOSVE-NEXT: ret
%insert = insertelement <1 x i64> undef, i64 %a, i64 0
%splat = shufflevector <1 x i64> %insert, <1 x i64> undef, <1 x i32> zeroinitializer
ret <1 x i64> %splat
@@ -150,6 +214,11 @@ define <2 x i64> @splat_v2i64(i64 %a) {
; CHECK-NEXT: mov z0.d, x0
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: splat_v2i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: dup v0.2d, x0
+; NONEON-NOSVE-NEXT: ret
%insert = insertelement <2 x i64> undef, i64 %a, i64 0
%splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
ret <2 x i64> %splat
@@ -161,6 +230,12 @@ define void @splat_v4i64(i64 %a, ptr %b) {
; CHECK-NEXT: mov z0.d, x0
; CHECK-NEXT: stp q0, q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: splat_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: dup v0.2d, x0
+; NONEON-NOSVE-NEXT: stp q0, q0, [x1]
+; NONEON-NOSVE-NEXT: ret
%insert = insertelement <4 x i64> undef, i64 %a, i64 0
%splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
store <4 x i64> %splat, ptr %b
@@ -178,6 +253,12 @@ define <2 x half> @splat_v2f16(half %a) {
; CHECK-NEXT: mov z0.h, h0
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: splat_v2f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $h0 killed $h0 def $q0
+; NONEON-NOSVE-NEXT: dup v0.4h, v0.h[0]
+; NONEON-NOSVE-NEXT: ret
%insert = insertelement <2 x half> undef, half %a, i64 0
%splat = shufflevector <2 x half> %insert, <2 x half> undef, <2 x i32> zeroinitializer
ret <2 x half> %splat
@@ -190,6 +271,12 @@ define <4 x half> @splat_v4f16(half %a) {
; CHECK-NEXT: mov z0.h, h0
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: splat_v4f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $h0 killed $h0 def $q0
+; NONEON-NOSVE-NEXT: dup v0.4h, v0.h[0]
+; NONEON-NOSVE-NEXT: ret
%insert = insertelement <4 x half> undef, half %a, i64 0
%splat = shufflevector <4 x half> %insert, <4 x half> undef, <4 x i32> zeroinitializer
ret <4 x half> %splat
@@ -202,6 +289,12 @@ define <8 x half> @splat_v8f16(half %a) {
; CHECK-NEXT: mov z0.h, h0
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: splat_v8f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $h0 killed $h0 def $q0
+; NONEON-NOSVE-NEXT: dup v0.8h, v0.h[0]
+; NONEON-NOSVE-NEXT: ret
%insert = insertelement <8 x half> undef, half %a, i64 0
%splat = shufflevector <8 x half> %insert, <8 x half> undef, <8 x i32> zeroinitializer
ret <8 x half> %splat
@@ -214,6 +307,13 @@ define void @splat_v16f16(half %a, ptr %b) {
; CHECK-NEXT: mov z0.h, h0
; CHECK-NEXT: stp q0, q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: splat_v16f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $h0 killed $h0 def $q0
+; NONEON-NOSVE-NEXT: dup v0.8h, v0.h[0]
+; NONEON-NOSVE-NEXT: stp q0, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%insert = insertelement <16 x half> undef, half %a, i64 0
%splat = shufflevector <16 x half> %insert, <16 x half> undef, <16 x i32> zeroinitializer
store <16 x half> %splat, ptr %b
@@ -227,6 +327,12 @@ define <2 x float> @splat_v2f32(float %a, <2 x float> %op2) {
; CHECK-NEXT: mov z0.s, s0
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: splat_v2f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $s0 killed $s0 def $q0
+; NONEON-NOSVE-NEXT: dup v0.2s, v0.s[0]
+; NONEON-NOSVE-NEXT: ret
%insert = insertelement <2 x float> undef, float %a, i64 0
%splat = shufflevector <2 x float> %insert, <2 x float> undef, <2 x i32> zeroinitializer
ret <2 x float> %splat
@@ -239,6 +345,12 @@ define <4 x float> @splat_v4f32(float %a, <4 x float> %op2) {
; CHECK-NEXT: mov z0.s, s0
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: splat_v4f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $s0 killed $s0 def $q0
+; NONEON-NOSVE-NEXT: dup v0.4s, v0.s[0]
+; NONEON-NOSVE-NEXT: ret
%insert = insertelement <4 x float> undef, float %a, i64 0
%splat = shufflevector <4 x float> %insert, <4 x float> undef, <4 x i32> zeroinitializer
ret <4 x float> %splat
@@ -251,6 +363,13 @@ define void @splat_v8f32(float %a, ptr %b) {
; CHECK-NEXT: mov z0.s, s0
; CHECK-NEXT: stp q0, q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: splat_v8f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $s0 killed $s0 def $q0
+; NONEON-NOSVE-NEXT: dup v0.4s, v0.s[0]
+; NONEON-NOSVE-NEXT: stp q0, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%insert = insertelement <8 x float> undef, float %a, i64 0
%splat = shufflevector <8 x float> %insert, <8 x float> undef, <8 x i32> zeroinitializer
store <8 x float> %splat, ptr %b
@@ -261,6 +380,10 @@ define <1 x double> @splat_v1f64(double %a, <1 x double> %op2) {
; CHECK-LABEL: splat_v1f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: splat_v1f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ret
%insert = insertelement <1 x double> undef, double %a, i64 0
%splat = shufflevector <1 x double> %insert, <1 x double> undef, <1 x i32> zeroinitializer
ret <1 x double> %splat
@@ -273,6 +396,12 @@ define <2 x double> @splat_v2f64(double %a, <2 x double> %op2) {
; CHECK-NEXT: mov z0.d, d0
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: splat_v2f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; NONEON-NOSVE-NEXT: dup v0.2d, v0.d[0]
+; NONEON-NOSVE-NEXT: ret
%insert = insertelement <2 x double> undef, double %a, i64 0
%splat = shufflevector <2 x double> %insert, <2 x double> undef, <2 x i32> zeroinitializer
ret <2 x double> %splat
@@ -285,6 +414,13 @@ define void @splat_v4f64(double %a, ptr %b) {
; CHECK-NEXT: mov z0.d, d0
; CHECK-NEXT: stp q0, q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: splat_v4f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $q0
+; NONEON-NOSVE-NEXT: dup v0.2d, v0.d[0]
+; NONEON-NOSVE-NEXT: stp q0, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%insert = insertelement <4 x double> undef, double %a, i64 0
%splat = shufflevector <4 x double> %insert, <4 x double> undef, <4 x i32> zeroinitializer
store <4 x double> %splat, ptr %b
@@ -301,6 +437,12 @@ define void @splat_imm_v32i8(ptr %a) {
; CHECK-NEXT: mov z0.b, #1 // =0x1
; CHECK-NEXT: stp q0, q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: splat_imm_v32i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: movi v0.16b, #1
+; NONEON-NOSVE-NEXT: stp q0, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%insert = insertelement <32 x i8> undef, i8 1, i64 0
%splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
store <32 x i8> %splat, ptr %a
@@ -313,6 +455,13 @@ define void @splat_imm_v16i16(ptr %a) {
; CHECK-NEXT: mov z0.h, #2 // =0x2
; CHECK-NEXT: stp q0, q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: splat_imm_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov w8, #2 // =0x2
+; NONEON-NOSVE-NEXT: dup v0.8h, w8
+; NONEON-NOSVE-NEXT: stp q0, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%insert = insertelement <16 x i16> undef, i16 2, i64 0
%splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer
store <16 x i16> %splat, ptr %a
@@ -325,6 +474,13 @@ define void @splat_imm_v8i32(ptr %a) {
; CHECK-NEXT: mov z0.s, #3 // =0x3
; CHECK-NEXT: stp q0, q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: splat_imm_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov w8, #3 // =0x3
+; NONEON-NOSVE-NEXT: dup v0.4s, w8
+; NONEON-NOSVE-NEXT: stp q0, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%insert = insertelement <8 x i32> undef, i32 3, i64 0
%splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
store <8 x i32> %splat, ptr %a
@@ -337,6 +493,13 @@ define void @splat_imm_v4i64(ptr %a) {
; CHECK-NEXT: mov z0.d, #4 // =0x4
; CHECK-NEXT: stp q0, q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: splat_imm_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov w8, #4 // =0x4
+; NONEON-NOSVE-NEXT: dup v0.2d, x8
+; NONEON-NOSVE-NEXT: stp q0, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%insert = insertelement <4 x i64> undef, i64 4, i64 0
%splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
store <4 x i64> %splat, ptr %a
@@ -353,6 +516,13 @@ define void @splat_imm_v16f16(ptr %a) {
; CHECK-NEXT: fmov z0.h, #5.00000000
; CHECK-NEXT: stp q0, q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: splat_imm_v16f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov w8, #17664 // =0x4500
+; NONEON-NOSVE-NEXT: dup v0.8h, w8
+; NONEON-NOSVE-NEXT: stp q0, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%insert = insertelement <16 x half> undef, half 5.0, i64 0
%splat = shufflevector <16 x half> %insert, <16 x half> undef, <16 x i32> zeroinitializer
store <16 x half> %splat, ptr %a
@@ -365,6 +535,12 @@ define void @splat_imm_v8f32(ptr %a) {
; CHECK-NEXT: fmov z0.s, #6.00000000
; CHECK-NEXT: stp q0, q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: splat_imm_v8f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fmov v0.4s, #6.00000000
+; NONEON-NOSVE-NEXT: stp q0, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%insert = insertelement <8 x float> undef, float 6.0, i64 0
%splat = shufflevector <8 x float> %insert, <8 x float> undef, <8 x i32> zeroinitializer
store <8 x float> %splat, ptr %a
@@ -377,6 +553,12 @@ define void @splat_imm_v4f64(ptr %a) {
; CHECK-NEXT: fmov z0.d, #7.00000000
; CHECK-NEXT: stp q0, q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: splat_imm_v4f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fmov v0.2d, #7.00000000
+; NONEON-NOSVE-NEXT: stp q0, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%insert = insertelement <4 x double> undef, double 7.0, i64 0
%splat = shufflevector <4 x double> %insert, <4 x double> undef, <4 x i32> zeroinitializer
store <4 x double> %splat, ptr %a
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-stores.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-stores.ll
index c7435bdbec949..f055061b13bed 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-stores.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-stores.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
@@ -12,6 +13,11 @@ define void @store_v4i8(ptr %a) {
; CHECK-NEXT: ptrue p0.h, vl4
; CHECK-NEXT: st1b { z0.h }, p0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: store_v4i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: str wzr, [x0]
+; NONEON-NOSVE-NEXT: ret
store <4 x i8> zeroinitializer, ptr %a
ret void
}
@@ -22,6 +28,12 @@ define void @store_v8i8(ptr %a) {
; CHECK-NEXT: mov z0.b, #0 // =0x0
; CHECK-NEXT: str d0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: store_v8i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: movi v0.2d, #0000000000000000
+; NONEON-NOSVE-NEXT: str d0, [x0]
+; NONEON-NOSVE-NEXT: ret
store <8 x i8> zeroinitializer, ptr %a
ret void
}
@@ -32,6 +44,12 @@ define void @store_v16i8(ptr %a) {
; CHECK-NEXT: mov z0.b, #0 // =0x0
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: store_v16i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: movi v0.2d, #0000000000000000
+; NONEON-NOSVE-NEXT: str q0, [x0]
+; NONEON-NOSVE-NEXT: ret
store <16 x i8> zeroinitializer, ptr %a
ret void
}
@@ -42,6 +60,12 @@ define void @store_v32i8(ptr %a) {
; CHECK-NEXT: mov z0.b, #0 // =0x0
; CHECK-NEXT: stp q0, q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: store_v32i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: movi v0.2d, #0000000000000000
+; NONEON-NOSVE-NEXT: stp q0, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
store <32 x i8> zeroinitializer, ptr %a
ret void
}
@@ -53,6 +77,11 @@ define void @store_v2i16(ptr %a) {
; CHECK-NEXT: ptrue p0.s, vl2
; CHECK-NEXT: st1h { z0.s }, p0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: store_v2i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: str wzr, [x0]
+; NONEON-NOSVE-NEXT: ret
store <2 x i16> zeroinitializer, ptr %a
ret void
}
@@ -64,6 +93,11 @@ define void @store_v2f16(ptr %a) {
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: str w8, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: store_v2f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: str wzr, [x0]
+; NONEON-NOSVE-NEXT: ret
store <2 x half> zeroinitializer, ptr %a
ret void
}
@@ -74,6 +108,12 @@ define void @store_v4i16(ptr %a) {
; CHECK-NEXT: mov z0.h, #0 // =0x0
; CHECK-NEXT: str d0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: store_v4i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: movi v0.2d, #0000000000000000
+; NONEON-NOSVE-NEXT: str d0, [x0]
+; NONEON-NOSVE-NEXT: ret
store <4 x i16> zeroinitializer, ptr %a
ret void
}
@@ -84,6 +124,12 @@ define void @store_v4f16(ptr %a) {
; CHECK-NEXT: mov z0.h, #0 // =0x0
; CHECK-NEXT: str d0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: store_v4f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: movi d0, #0000000000000000
+; NONEON-NOSVE-NEXT: str d0, [x0]
+; NONEON-NOSVE-NEXT: ret
store <4 x half> zeroinitializer, ptr %a
ret void
}
@@ -94,6 +140,12 @@ define void @store_v8i16(ptr %a) {
; CHECK-NEXT: mov z0.h, #0 // =0x0
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: store_v8i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: movi v0.2d, #0000000000000000
+; NONEON-NOSVE-NEXT: str q0, [x0]
+; NONEON-NOSVE-NEXT: ret
store <8 x i16> zeroinitializer, ptr %a
ret void
}
@@ -104,6 +156,12 @@ define void @store_v8f16(ptr %a) {
; CHECK-NEXT: mov z0.h, #0 // =0x0
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: store_v8f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: movi v0.2d, #0000000000000000
+; NONEON-NOSVE-NEXT: str q0, [x0]
+; NONEON-NOSVE-NEXT: ret
store <8 x half> zeroinitializer, ptr %a
ret void
}
@@ -114,6 +172,12 @@ define void @store_v16i16(ptr %a) {
; CHECK-NEXT: mov z0.h, #0 // =0x0
; CHECK-NEXT: stp q0, q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: store_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: movi v0.2d, #0000000000000000
+; NONEON-NOSVE-NEXT: stp q0, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
store <16 x i16> zeroinitializer, ptr %a
ret void
}
@@ -124,6 +188,12 @@ define void @store_v16f16(ptr %a) {
; CHECK-NEXT: mov z0.h, #0 // =0x0
; CHECK-NEXT: stp q0, q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: store_v16f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: movi v0.2d, #0000000000000000
+; NONEON-NOSVE-NEXT: stp q0, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
store <16 x half> zeroinitializer, ptr %a
ret void
}
@@ -133,6 +203,11 @@ define void @store_v2i32(ptr %a) {
; CHECK: // %bb.0:
; CHECK-NEXT: str xzr, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: store_v2i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: str xzr, [x0]
+; NONEON-NOSVE-NEXT: ret
store <2 x i32> zeroinitializer, ptr %a
ret void
}
@@ -142,6 +217,11 @@ define void @store_v2f32(ptr %a) {
; CHECK: // %bb.0:
; CHECK-NEXT: str xzr, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: store_v2f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: str xzr, [x0]
+; NONEON-NOSVE-NEXT: ret
store <2 x float> zeroinitializer, ptr %a
ret void
}
@@ -151,6 +231,11 @@ define void @store_v4i32(ptr %a) {
; CHECK: // %bb.0:
; CHECK-NEXT: stp xzr, xzr, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: store_v4i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: stp xzr, xzr, [x0]
+; NONEON-NOSVE-NEXT: ret
store <4 x i32> zeroinitializer, ptr %a
ret void
}
@@ -160,6 +245,11 @@ define void @store_v4f32(ptr %a) {
; CHECK: // %bb.0:
; CHECK-NEXT: stp xzr, xzr, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: store_v4f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: stp xzr, xzr, [x0]
+; NONEON-NOSVE-NEXT: ret
store <4 x float> zeroinitializer, ptr %a
ret void
}
@@ -170,6 +260,12 @@ define void @store_v8i32(ptr %a) {
; CHECK-NEXT: mov z0.s, #0 // =0x0
; CHECK-NEXT: stp q0, q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: store_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: movi v0.2d, #0000000000000000
+; NONEON-NOSVE-NEXT: stp q0, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
store <8 x i32> zeroinitializer, ptr %a
ret void
}
@@ -180,6 +276,12 @@ define void @store_v8f32(ptr %a) {
; CHECK-NEXT: mov z0.s, #0 // =0x0
; CHECK-NEXT: stp q0, q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: store_v8f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: movi v0.2d, #0000000000000000
+; NONEON-NOSVE-NEXT: stp q0, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
store <8 x float> zeroinitializer, ptr %a
ret void
}
@@ -190,6 +292,12 @@ define void @store_v1i64(ptr %a) {
; CHECK-NEXT: mov z0.d, #0 // =0x0
; CHECK-NEXT: str d0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: store_v1i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: movi v0.2d, #0000000000000000
+; NONEON-NOSVE-NEXT: str d0, [x0]
+; NONEON-NOSVE-NEXT: ret
store <1 x i64> zeroinitializer, ptr %a
ret void
}
@@ -200,6 +308,12 @@ define void @store_v1f64(ptr %a) {
; CHECK-NEXT: fmov d0, xzr
; CHECK-NEXT: str d0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: store_v1f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: movi d0, #0000000000000000
+; NONEON-NOSVE-NEXT: str d0, [x0]
+; NONEON-NOSVE-NEXT: ret
store <1 x double> zeroinitializer, ptr %a
ret void
}
@@ -209,6 +323,11 @@ define void @store_v2i64(ptr %a) {
; CHECK: // %bb.0:
; CHECK-NEXT: stp xzr, xzr, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: store_v2i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: stp xzr, xzr, [x0]
+; NONEON-NOSVE-NEXT: ret
store <2 x i64> zeroinitializer, ptr %a
ret void
}
@@ -218,6 +337,11 @@ define void @store_v2f64(ptr %a) {
; CHECK: // %bb.0:
; CHECK-NEXT: stp xzr, xzr, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: store_v2f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: stp xzr, xzr, [x0]
+; NONEON-NOSVE-NEXT: ret
store <2 x double> zeroinitializer, ptr %a
ret void
}
@@ -228,6 +352,12 @@ define void @store_v4i64(ptr %a) {
; CHECK-NEXT: mov z0.d, #0 // =0x0
; CHECK-NEXT: stp q0, q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: store_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: movi v0.2d, #0000000000000000
+; NONEON-NOSVE-NEXT: stp q0, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
store <4 x i64> zeroinitializer, ptr %a
ret void
}
@@ -238,6 +368,12 @@ define void @store_v4f64(ptr %a) {
; CHECK-NEXT: mov z0.d, #0 // =0x0
; CHECK-NEXT: stp q0, q0, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: store_v4f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: movi v0.2d, #0000000000000000
+; NONEON-NOSVE-NEXT: stp q0, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
store <4 x double> zeroinitializer, ptr %a
ret void
}
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-subvector.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-subvector.ll
index 9e04fc236836c..80c9ef87e9b91 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-subvector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-subvector.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
; Test we can code generater patterns of the form:
@@ -23,6 +24,12 @@ define void @subvector_v4i8(ptr %in, ptr %out) {
; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0]
; CHECK-NEXT: st1b { z0.h }, p0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: subvector_v4i8:
+; NONEON-NOSVE: // %bb.0: // %bb1
+; NONEON-NOSVE-NEXT: ldr w8, [x0]
+; NONEON-NOSVE-NEXT: str w8, [x1]
+; NONEON-NOSVE-NEXT: ret
%a = load <4 x i8>, ptr %in
br label %bb1
@@ -37,6 +44,12 @@ define void @subvector_v8i8(ptr %in, ptr %out) {
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: str d0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: subvector_v8i8:
+; NONEON-NOSVE: // %bb.0: // %bb1
+; NONEON-NOSVE-NEXT: ldr d0, [x0]
+; NONEON-NOSVE-NEXT: str d0, [x1]
+; NONEON-NOSVE-NEXT: ret
%a = load <8 x i8>, ptr %in
br label %bb1
@@ -51,6 +64,12 @@ define void @subvector_v16i8(ptr %in, ptr %out) {
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: str q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: subvector_v16i8:
+; NONEON-NOSVE: // %bb.0: // %bb1
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: str q0, [x1]
+; NONEON-NOSVE-NEXT: ret
%a = load <16 x i8>, ptr %in
br label %bb1
@@ -65,6 +84,12 @@ define void @subvector_v32i8(ptr %in, ptr %out) {
; CHECK-NEXT: ldp q0, q1, [x0]
; CHECK-NEXT: stp q0, q1, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: subvector_v32i8:
+; NONEON-NOSVE: // %bb.0: // %bb1
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: stp q0, q1, [x1]
+; NONEON-NOSVE-NEXT: ret
%a = load <32 x i8>, ptr %in
br label %bb1
@@ -81,6 +106,12 @@ define void @subvector_v2i16(ptr %in, ptr %out) {
; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
; CHECK-NEXT: st1h { z0.s }, p0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: subvector_v2i16:
+; NONEON-NOSVE: // %bb.0: // %bb1
+; NONEON-NOSVE-NEXT: ldr w8, [x0]
+; NONEON-NOSVE-NEXT: str w8, [x1]
+; NONEON-NOSVE-NEXT: ret
%a = load <2 x i16>, ptr %in
br label %bb1
@@ -95,6 +126,12 @@ define void @subvector_v4i16(ptr %in, ptr %out) {
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: str d0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: subvector_v4i16:
+; NONEON-NOSVE: // %bb.0: // %bb1
+; NONEON-NOSVE-NEXT: ldr d0, [x0]
+; NONEON-NOSVE-NEXT: str d0, [x1]
+; NONEON-NOSVE-NEXT: ret
%a = load <4 x i16>, ptr %in
br label %bb1
@@ -109,6 +146,12 @@ define void @subvector_v8i16(ptr %in, ptr %out) {
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: str q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: subvector_v8i16:
+; NONEON-NOSVE: // %bb.0: // %bb1
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: str q0, [x1]
+; NONEON-NOSVE-NEXT: ret
%a = load <8 x i16>, ptr %in
br label %bb1
@@ -123,6 +166,12 @@ define void @subvector_v16i16(ptr %in, ptr %out) {
; CHECK-NEXT: ldp q0, q1, [x0]
; CHECK-NEXT: stp q0, q1, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: subvector_v16i16:
+; NONEON-NOSVE: // %bb.0: // %bb1
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: stp q0, q1, [x1]
+; NONEON-NOSVE-NEXT: ret
%a = load <16 x i16>, ptr %in
br label %bb1
@@ -138,6 +187,12 @@ define void @subvector_v2i32(ptr %in, ptr %out) {
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: str d0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: subvector_v2i32:
+; NONEON-NOSVE: // %bb.0: // %bb1
+; NONEON-NOSVE-NEXT: ldr d0, [x0]
+; NONEON-NOSVE-NEXT: str d0, [x1]
+; NONEON-NOSVE-NEXT: ret
%a = load <2 x i32>, ptr %in
br label %bb1
@@ -152,6 +207,12 @@ define void @subvector_v4i32(ptr %in, ptr %out) {
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: str q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: subvector_v4i32:
+; NONEON-NOSVE: // %bb.0: // %bb1
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: str q0, [x1]
+; NONEON-NOSVE-NEXT: ret
%a = load <4 x i32>, ptr %in
br label %bb1
@@ -166,6 +227,12 @@ define void @subvector_v8i32(ptr %in, ptr %out) {
; CHECK-NEXT: ldp q0, q1, [x0]
; CHECK-NEXT: stp q0, q1, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: subvector_v8i32:
+; NONEON-NOSVE: // %bb.0: // %bb1
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: stp q0, q1, [x1]
+; NONEON-NOSVE-NEXT: ret
%a = load <8 x i32>, ptr %in
br label %bb1
@@ -181,6 +248,12 @@ define void @subvector_v2i64(ptr %in, ptr %out) {
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: str q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: subvector_v2i64:
+; NONEON-NOSVE: // %bb.0: // %bb1
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: str q0, [x1]
+; NONEON-NOSVE-NEXT: ret
%a = load <2 x i64>, ptr %in
br label %bb1
@@ -195,6 +268,12 @@ define void @subvector_v4i64(ptr %in, ptr %out) {
; CHECK-NEXT: ldp q0, q1, [x0]
; CHECK-NEXT: stp q0, q1, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: subvector_v4i64:
+; NONEON-NOSVE: // %bb.0: // %bb1
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: stp q0, q1, [x1]
+; NONEON-NOSVE-NEXT: ret
%a = load <4 x i64>, ptr %in
br label %bb1
@@ -210,6 +289,12 @@ define void @subvector_v2f16(ptr %in, ptr %out) {
; CHECK-NEXT: ldr w8, [x0]
; CHECK-NEXT: str w8, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: subvector_v2f16:
+; NONEON-NOSVE: // %bb.0: // %bb1
+; NONEON-NOSVE-NEXT: ldr w8, [x0]
+; NONEON-NOSVE-NEXT: str w8, [x1]
+; NONEON-NOSVE-NEXT: ret
%a = load <2 x half>, ptr %in
br label %bb1
@@ -224,6 +309,12 @@ define void @subvector_v4f16(ptr %in, ptr %out) {
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: str d0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: subvector_v4f16:
+; NONEON-NOSVE: // %bb.0: // %bb1
+; NONEON-NOSVE-NEXT: ldr d0, [x0]
+; NONEON-NOSVE-NEXT: str d0, [x1]
+; NONEON-NOSVE-NEXT: ret
%a = load <4 x half>, ptr %in
br label %bb1
@@ -238,6 +329,12 @@ define void @subvector_v8f16(ptr %in, ptr %out) {
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: str q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: subvector_v8f16:
+; NONEON-NOSVE: // %bb.0: // %bb1
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: str q0, [x1]
+; NONEON-NOSVE-NEXT: ret
%a = load <8 x half>, ptr %in
br label %bb1
@@ -252,6 +349,12 @@ define void @subvector_v16f16(ptr %in, ptr %out) {
; CHECK-NEXT: ldp q0, q1, [x0]
; CHECK-NEXT: stp q0, q1, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: subvector_v16f16:
+; NONEON-NOSVE: // %bb.0: // %bb1
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: stp q0, q1, [x1]
+; NONEON-NOSVE-NEXT: ret
%a = load <16 x half>, ptr %in
br label %bb1
@@ -267,6 +370,12 @@ define void @subvector_v2f32(ptr %in, ptr %out) {
; CHECK-NEXT: ldr d0, [x0]
; CHECK-NEXT: str d0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: subvector_v2f32:
+; NONEON-NOSVE: // %bb.0: // %bb1
+; NONEON-NOSVE-NEXT: ldr d0, [x0]
+; NONEON-NOSVE-NEXT: str d0, [x1]
+; NONEON-NOSVE-NEXT: ret
%a = load <2 x float>, ptr %in
br label %bb1
@@ -281,6 +390,12 @@ define void @subvector_v4f32(ptr %in, ptr %out) {
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: str q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: subvector_v4f32:
+; NONEON-NOSVE: // %bb.0: // %bb1
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: str q0, [x1]
+; NONEON-NOSVE-NEXT: ret
%a = load <4 x float>, ptr %in
br label %bb1
@@ -295,6 +410,12 @@ define void @subvector_v8f32(ptr %in, ptr %out) {
; CHECK-NEXT: ldp q0, q1, [x0]
; CHECK-NEXT: stp q0, q1, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: subvector_v8f32:
+; NONEON-NOSVE: // %bb.0: // %bb1
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: stp q0, q1, [x1]
+; NONEON-NOSVE-NEXT: ret
%a = load <8 x float>,ptr %in
br label %bb1
@@ -310,6 +431,12 @@ define void @subvector_v2f64(ptr %in, ptr %out) {
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: str q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: subvector_v2f64:
+; NONEON-NOSVE: // %bb.0: // %bb1
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: str q0, [x1]
+; NONEON-NOSVE-NEXT: ret
%a = load <2 x double>, ptr %in
br label %bb1
@@ -324,6 +451,12 @@ define void @subvector_v4f64(ptr %in, ptr %out) {
; CHECK-NEXT: ldp q0, q1, [x0]
; CHECK-NEXT: stp q0, q1, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: subvector_v4f64:
+; NONEON-NOSVE: // %bb.0: // %bb1
+; NONEON-NOSVE-NEXT: ldp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: stp q0, q1, [x1]
+; NONEON-NOSVE-NEXT: ret
%a = load <4 x double>, ptr %in
br label %bb1
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc-stores.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc-stores.ll
index b34fe438a063a..41b68e10e75de 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc-stores.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc-stores.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
@@ -12,6 +13,13 @@ define void @store_trunc_v8i16i8(ptr %ap, ptr %dest) {
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: st1b { z0.h }, p0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: store_trunc_v8i16i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: xtn v0.8b, v0.8h
+; NONEON-NOSVE-NEXT: str d0, [x1]
+; NONEON-NOSVE-NEXT: ret
%a = load <8 x i16>, ptr %ap
%val = trunc <8 x i16> %a to <8 x i8>
store <8 x i8> %val, ptr %dest
@@ -25,6 +33,14 @@ define void @store_trunc_v4i32i8(ptr %ap, ptr %dest) {
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: st1b { z0.s }, p0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: store_trunc_v4i32i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: xtn v0.4h, v0.4s
+; NONEON-NOSVE-NEXT: uzp1 v0.8b, v0.8b, v0.8b
+; NONEON-NOSVE-NEXT: str s0, [x1]
+; NONEON-NOSVE-NEXT: ret
%a = load <4 x i32>, ptr %ap
%val = trunc <4 x i32> %a to <4 x i8>
store <4 x i8> %val, ptr %dest
@@ -38,6 +54,13 @@ define void @store_trunc_v4i32i16(ptr %ap, ptr %dest) {
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: st1h { z0.s }, p0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: store_trunc_v4i32i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: xtn v0.4h, v0.4s
+; NONEON-NOSVE-NEXT: str d0, [x1]
+; NONEON-NOSVE-NEXT: ret
%a = load <4 x i32>, ptr %ap
%val = trunc <4 x i32> %a to <4 x i16>
store <4 x i16> %val, ptr %dest
@@ -51,6 +74,13 @@ define void @store_trunc_v2i64i8(ptr %ap, ptr %dest) {
; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: st1w { z0.d }, p0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: store_trunc_v2i64i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: xtn v0.2s, v0.2d
+; NONEON-NOSVE-NEXT: str d0, [x1]
+; NONEON-NOSVE-NEXT: ret
%a = load <2 x i64>, ptr %ap
%val = trunc <2 x i64> %a to <2 x i32>
store <2 x i32> %val, ptr %dest
@@ -66,6 +96,14 @@ define void @store_trunc_v2i256i64(ptr %ap, ptr %dest) {
; CHECK-NEXT: splice z1.d, p0, z1.d, z0.d
; CHECK-NEXT: str q1, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: store_trunc_v2i256i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr d0, [x0, #32]
+; NONEON-NOSVE-NEXT: ldr d1, [x0]
+; NONEON-NOSVE-NEXT: mov v1.d[1], v0.d[0]
+; NONEON-NOSVE-NEXT: str q1, [x1]
+; NONEON-NOSVE-NEXT: ret
%a = load <2 x i256>, ptr %ap
%val = trunc <2 x i256> %a to <2 x i64>
store <2 x i64> %val, ptr %dest
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc.ll
index 9e56462df3889..8242b4e26d505 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
@@ -19,6 +20,12 @@ define <16 x i8> @trunc_v16i16_v16i8(ptr %in) nounwind {
; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: trunc_v16i16_v16i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: uzp1 v0.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: ret
%a = load <16 x i16>, ptr %in
%b = trunc <16 x i16> %a to <16 x i8>
ret <16 x i8> %b
@@ -41,6 +48,17 @@ define void @trunc_v32i16_v32i8(ptr %in, ptr %out) nounwind {
; CHECK-NEXT: add z1.b, z2.b, z2.b
; CHECK-NEXT: stp q1, q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: trunc_v32i16_v32i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #32]
+; NONEON-NOSVE-NEXT: ldp q3, q2, [x0]
+; NONEON-NOSVE-NEXT: uzp1 v0.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: uzp1 v1.16b, v3.16b, v2.16b
+; NONEON-NOSVE-NEXT: add v0.16b, v0.16b, v0.16b
+; NONEON-NOSVE-NEXT: add v1.16b, v1.16b, v1.16b
+; NONEON-NOSVE-NEXT: stp q1, q0, [x1]
+; NONEON-NOSVE-NEXT: ret
%a = load <32 x i16>, ptr %in
%b = trunc <32 x i16> %a to <32 x i8>
%c = add <32 x i8> %b, %b
@@ -76,6 +94,24 @@ define void @trunc_v64i16_v64i8(ptr %in, ptr %out) nounwind {
; CHECK-NEXT: stp q0, q1, [x1, #32]
; CHECK-NEXT: stp q2, q3, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: trunc_v64i16_v64i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #64]
+; NONEON-NOSVE-NEXT: ldp q3, q2, [x0, #96]
+; NONEON-NOSVE-NEXT: ldp q5, q4, [x0]
+; NONEON-NOSVE-NEXT: uzp1 v0.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: ldp q6, q1, [x0, #32]
+; NONEON-NOSVE-NEXT: uzp1 v2.16b, v3.16b, v2.16b
+; NONEON-NOSVE-NEXT: uzp1 v3.16b, v5.16b, v4.16b
+; NONEON-NOSVE-NEXT: uzp1 v1.16b, v6.16b, v1.16b
+; NONEON-NOSVE-NEXT: add v0.16b, v0.16b, v0.16b
+; NONEON-NOSVE-NEXT: add v2.16b, v2.16b, v2.16b
+; NONEON-NOSVE-NEXT: add v3.16b, v3.16b, v3.16b
+; NONEON-NOSVE-NEXT: add v1.16b, v1.16b, v1.16b
+; NONEON-NOSVE-NEXT: stp q0, q2, [x1, #32]
+; NONEON-NOSVE-NEXT: stp q3, q1, [x1]
+; NONEON-NOSVE-NEXT: ret
%a = load <64 x i16>, ptr %in
%b = trunc <64 x i16> %a to <64 x i8>
%c = add <64 x i8> %b, %b
@@ -133,6 +169,38 @@ define void @trunc_v128i16_v128i8(ptr %in, ptr %out) nounwind {
; CHECK-NEXT: stp q2, q3, [x1, #32]
; CHECK-NEXT: stp q4, q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: trunc_v128i16_v128i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #192]
+; NONEON-NOSVE-NEXT: ldp q5, q4, [x0, #224]
+; NONEON-NOSVE-NEXT: ldp q7, q6, [x0, #128]
+; NONEON-NOSVE-NEXT: uzp1 v0.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: ldp q16, q1, [x0, #160]
+; NONEON-NOSVE-NEXT: uzp1 v4.16b, v5.16b, v4.16b
+; NONEON-NOSVE-NEXT: ldp q17, q5, [x0, #64]
+; NONEON-NOSVE-NEXT: uzp1 v6.16b, v7.16b, v6.16b
+; NONEON-NOSVE-NEXT: ldp q3, q2, [x0]
+; NONEON-NOSVE-NEXT: ldp q18, q7, [x0, #96]
+; NONEON-NOSVE-NEXT: uzp1 v1.16b, v16.16b, v1.16b
+; NONEON-NOSVE-NEXT: uzp1 v5.16b, v17.16b, v5.16b
+; NONEON-NOSVE-NEXT: ldp q17, q16, [x0, #32]
+; NONEON-NOSVE-NEXT: uzp1 v2.16b, v3.16b, v2.16b
+; NONEON-NOSVE-NEXT: add v0.16b, v0.16b, v0.16b
+; NONEON-NOSVE-NEXT: add v4.16b, v4.16b, v4.16b
+; NONEON-NOSVE-NEXT: uzp1 v7.16b, v18.16b, v7.16b
+; NONEON-NOSVE-NEXT: add v3.16b, v6.16b, v6.16b
+; NONEON-NOSVE-NEXT: uzp1 v6.16b, v17.16b, v16.16b
+; NONEON-NOSVE-NEXT: add v1.16b, v1.16b, v1.16b
+; NONEON-NOSVE-NEXT: stp q0, q4, [x1, #96]
+; NONEON-NOSVE-NEXT: add v0.16b, v5.16b, v5.16b
+; NONEON-NOSVE-NEXT: add v2.16b, v2.16b, v2.16b
+; NONEON-NOSVE-NEXT: add v4.16b, v7.16b, v7.16b
+; NONEON-NOSVE-NEXT: stp q3, q1, [x1, #64]
+; NONEON-NOSVE-NEXT: add v1.16b, v6.16b, v6.16b
+; NONEON-NOSVE-NEXT: stp q0, q4, [x1, #32]
+; NONEON-NOSVE-NEXT: stp q2, q1, [x1]
+; NONEON-NOSVE-NEXT: ret
%a = load <128 x i16>, ptr %in
%b = trunc <128 x i16> %a to <128 x i8>
%c = add <128 x i8> %b, %b
@@ -155,6 +223,13 @@ define <8 x i8> @trunc_v8i32_v8i8(ptr %in) nounwind {
; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: trunc_v8i32_v8i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: uzp1 v0.8h, v1.8h, v0.8h
+; NONEON-NOSVE-NEXT: xtn v0.8b, v0.8h
+; NONEON-NOSVE-NEXT: ret
%a = load <8 x i32>, ptr %in
%b = trunc <8 x i32> %a to <8 x i8>
ret <8 x i8> %b
@@ -178,6 +253,15 @@ define <16 x i8> @trunc_v16i32_v16i8(ptr %in) nounwind {
; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: trunc_v16i32_v16i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ldp q3, q2, [x0, #32]
+; NONEON-NOSVE-NEXT: uzp1 v0.8h, v1.8h, v0.8h
+; NONEON-NOSVE-NEXT: uzp1 v2.8h, v3.8h, v2.8h
+; NONEON-NOSVE-NEXT: uzp1 v0.16b, v0.16b, v2.16b
+; NONEON-NOSVE-NEXT: ret
%a = load <16 x i32>, ptr %in
%b = trunc <16 x i32> %a to <16 x i8>
ret <16 x i8> %b
@@ -215,6 +299,23 @@ define void @trunc_v32i32_v32i8(ptr %in, ptr %out) nounwind {
; CHECK-NEXT: add z1.b, z3.b, z3.b
; CHECK-NEXT: stp q1, q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: trunc_v32i32_v32i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #64]
+; NONEON-NOSVE-NEXT: ldp q3, q2, [x0, #96]
+; NONEON-NOSVE-NEXT: ldp q5, q4, [x0]
+; NONEON-NOSVE-NEXT: ldp q7, q6, [x0, #32]
+; NONEON-NOSVE-NEXT: uzp1 v0.8h, v1.8h, v0.8h
+; NONEON-NOSVE-NEXT: uzp1 v2.8h, v3.8h, v2.8h
+; NONEON-NOSVE-NEXT: uzp1 v3.8h, v5.8h, v4.8h
+; NONEON-NOSVE-NEXT: uzp1 v1.8h, v7.8h, v6.8h
+; NONEON-NOSVE-NEXT: uzp1 v0.16b, v0.16b, v2.16b
+; NONEON-NOSVE-NEXT: uzp1 v1.16b, v3.16b, v1.16b
+; NONEON-NOSVE-NEXT: add v0.16b, v0.16b, v0.16b
+; NONEON-NOSVE-NEXT: add v1.16b, v1.16b, v1.16b
+; NONEON-NOSVE-NEXT: stp q1, q0, [x1]
+; NONEON-NOSVE-NEXT: ret
%a = load <32 x i32>, ptr %in
%b = trunc <32 x i32> %a to <32 x i8>
%c = add <32 x i8> %b, %b
@@ -279,6 +380,36 @@ define void @trunc_v64i32_v64i8(ptr %in, ptr %out) nounwind {
; CHECK-NEXT: stp q1, q2, [x1, #32]
; CHECK-NEXT: stp q3, q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: trunc_v64i32_v64i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #128]
+; NONEON-NOSVE-NEXT: ldp q3, q2, [x0, #160]
+; NONEON-NOSVE-NEXT: ldp q5, q4, [x0, #192]
+; NONEON-NOSVE-NEXT: ldp q7, q6, [x0, #224]
+; NONEON-NOSVE-NEXT: uzp1 v0.8h, v1.8h, v0.8h
+; NONEON-NOSVE-NEXT: uzp1 v2.8h, v3.8h, v2.8h
+; NONEON-NOSVE-NEXT: ldp q3, q1, [x0]
+; NONEON-NOSVE-NEXT: uzp1 v4.8h, v5.8h, v4.8h
+; NONEON-NOSVE-NEXT: ldp q17, q5, [x0, #64]
+; NONEON-NOSVE-NEXT: uzp1 v6.8h, v7.8h, v6.8h
+; NONEON-NOSVE-NEXT: ldp q16, q7, [x0, #32]
+; NONEON-NOSVE-NEXT: ldp q19, q18, [x0, #96]
+; NONEON-NOSVE-NEXT: uzp1 v1.8h, v3.8h, v1.8h
+; NONEON-NOSVE-NEXT: uzp1 v5.8h, v17.8h, v5.8h
+; NONEON-NOSVE-NEXT: uzp1 v0.16b, v0.16b, v2.16b
+; NONEON-NOSVE-NEXT: uzp1 v7.8h, v16.8h, v7.8h
+; NONEON-NOSVE-NEXT: uzp1 v3.8h, v19.8h, v18.8h
+; NONEON-NOSVE-NEXT: uzp1 v2.16b, v4.16b, v6.16b
+; NONEON-NOSVE-NEXT: add v0.16b, v0.16b, v0.16b
+; NONEON-NOSVE-NEXT: uzp1 v1.16b, v1.16b, v7.16b
+; NONEON-NOSVE-NEXT: uzp1 v3.16b, v5.16b, v3.16b
+; NONEON-NOSVE-NEXT: add v2.16b, v2.16b, v2.16b
+; NONEON-NOSVE-NEXT: add v1.16b, v1.16b, v1.16b
+; NONEON-NOSVE-NEXT: stp q0, q2, [x1, #32]
+; NONEON-NOSVE-NEXT: add v3.16b, v3.16b, v3.16b
+; NONEON-NOSVE-NEXT: stp q1, q3, [x1]
+; NONEON-NOSVE-NEXT: ret
%a = load <64 x i32>, ptr %in
%b = trunc <64 x i32> %a to <64 x i8>
%c = add <64 x i8> %b, %b
@@ -300,6 +431,12 @@ define <8 x i16> @trunc_v8i32_v8i16(ptr %in) nounwind {
; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: trunc_v8i32_v8i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: uzp1 v0.8h, v1.8h, v0.8h
+; NONEON-NOSVE-NEXT: ret
%a = load <8 x i32>, ptr %in
%b = trunc <8 x i32> %a to <8 x i16>
ret <8 x i16> %b
@@ -322,6 +459,17 @@ define void @trunc_v16i32_v16i16(ptr %in, ptr %out) nounwind {
; CHECK-NEXT: add z1.h, z2.h, z2.h
; CHECK-NEXT: stp q1, q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: trunc_v16i32_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #32]
+; NONEON-NOSVE-NEXT: ldp q3, q2, [x0]
+; NONEON-NOSVE-NEXT: uzp1 v0.8h, v1.8h, v0.8h
+; NONEON-NOSVE-NEXT: uzp1 v1.8h, v3.8h, v2.8h
+; NONEON-NOSVE-NEXT: add v0.8h, v0.8h, v0.8h
+; NONEON-NOSVE-NEXT: add v1.8h, v1.8h, v1.8h
+; NONEON-NOSVE-NEXT: stp q1, q0, [x1]
+; NONEON-NOSVE-NEXT: ret
%a = load <16 x i32>, ptr %in
%b = trunc <16 x i32> %a to <16 x i16>
%c = add <16 x i16> %b, %b
@@ -357,6 +505,24 @@ define void @trunc_v32i32_v32i16(ptr %in, ptr %out) nounwind {
; CHECK-NEXT: stp q0, q1, [x1, #32]
; CHECK-NEXT: stp q2, q3, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: trunc_v32i32_v32i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #64]
+; NONEON-NOSVE-NEXT: ldp q3, q2, [x0, #96]
+; NONEON-NOSVE-NEXT: ldp q5, q4, [x0]
+; NONEON-NOSVE-NEXT: uzp1 v0.8h, v1.8h, v0.8h
+; NONEON-NOSVE-NEXT: ldp q6, q1, [x0, #32]
+; NONEON-NOSVE-NEXT: uzp1 v2.8h, v3.8h, v2.8h
+; NONEON-NOSVE-NEXT: uzp1 v3.8h, v5.8h, v4.8h
+; NONEON-NOSVE-NEXT: uzp1 v1.8h, v6.8h, v1.8h
+; NONEON-NOSVE-NEXT: add v0.8h, v0.8h, v0.8h
+; NONEON-NOSVE-NEXT: add v2.8h, v2.8h, v2.8h
+; NONEON-NOSVE-NEXT: add v3.8h, v3.8h, v3.8h
+; NONEON-NOSVE-NEXT: add v1.8h, v1.8h, v1.8h
+; NONEON-NOSVE-NEXT: stp q0, q2, [x1, #32]
+; NONEON-NOSVE-NEXT: stp q3, q1, [x1]
+; NONEON-NOSVE-NEXT: ret
%a = load <32 x i32>, ptr %in
%b = trunc <32 x i32> %a to <32 x i16>
%c = add <32 x i16> %b, %b
@@ -414,6 +580,38 @@ define void @trunc_v64i32_v64i16(ptr %in, ptr %out) nounwind {
; CHECK-NEXT: stp q2, q3, [x1, #32]
; CHECK-NEXT: stp q4, q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: trunc_v64i32_v64i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #192]
+; NONEON-NOSVE-NEXT: ldp q5, q4, [x0, #224]
+; NONEON-NOSVE-NEXT: ldp q7, q6, [x0, #128]
+; NONEON-NOSVE-NEXT: uzp1 v0.8h, v1.8h, v0.8h
+; NONEON-NOSVE-NEXT: ldp q16, q1, [x0, #160]
+; NONEON-NOSVE-NEXT: uzp1 v4.8h, v5.8h, v4.8h
+; NONEON-NOSVE-NEXT: ldp q17, q5, [x0, #64]
+; NONEON-NOSVE-NEXT: uzp1 v6.8h, v7.8h, v6.8h
+; NONEON-NOSVE-NEXT: ldp q3, q2, [x0]
+; NONEON-NOSVE-NEXT: ldp q18, q7, [x0, #96]
+; NONEON-NOSVE-NEXT: uzp1 v1.8h, v16.8h, v1.8h
+; NONEON-NOSVE-NEXT: uzp1 v5.8h, v17.8h, v5.8h
+; NONEON-NOSVE-NEXT: ldp q17, q16, [x0, #32]
+; NONEON-NOSVE-NEXT: uzp1 v2.8h, v3.8h, v2.8h
+; NONEON-NOSVE-NEXT: add v0.8h, v0.8h, v0.8h
+; NONEON-NOSVE-NEXT: add v4.8h, v4.8h, v4.8h
+; NONEON-NOSVE-NEXT: uzp1 v7.8h, v18.8h, v7.8h
+; NONEON-NOSVE-NEXT: add v3.8h, v6.8h, v6.8h
+; NONEON-NOSVE-NEXT: uzp1 v6.8h, v17.8h, v16.8h
+; NONEON-NOSVE-NEXT: add v1.8h, v1.8h, v1.8h
+; NONEON-NOSVE-NEXT: stp q0, q4, [x1, #96]
+; NONEON-NOSVE-NEXT: add v0.8h, v5.8h, v5.8h
+; NONEON-NOSVE-NEXT: add v2.8h, v2.8h, v2.8h
+; NONEON-NOSVE-NEXT: add v4.8h, v7.8h, v7.8h
+; NONEON-NOSVE-NEXT: stp q3, q1, [x1, #64]
+; NONEON-NOSVE-NEXT: add v1.8h, v6.8h, v6.8h
+; NONEON-NOSVE-NEXT: stp q0, q4, [x1, #32]
+; NONEON-NOSVE-NEXT: stp q2, q1, [x1]
+; NONEON-NOSVE-NEXT: ret
%a = load <64 x i32>, ptr %in
%b = trunc <64 x i32> %a to <64 x i16>
%c = add <64 x i16> %b, %b
@@ -437,6 +635,13 @@ define <4 x i8> @trunc_v4i64_v4i8(ptr %in) nounwind {
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: trunc_v4i64_v4i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: uzp1 v0.4s, v1.4s, v0.4s
+; NONEON-NOSVE-NEXT: xtn v0.4h, v0.4s
+; NONEON-NOSVE-NEXT: ret
%a = load <4 x i64>, ptr %in
%b = trunc <4 x i64> %a to <4 x i8>
ret <4 x i8> %b
@@ -461,6 +666,16 @@ define <8 x i8> @trunc_v8i64_v8i8(ptr %in) nounwind {
; CHECK-NEXT: uzp1 z0.b, z1.b, z1.b
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: trunc_v8i64_v8i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ldp q3, q2, [x0, #32]
+; NONEON-NOSVE-NEXT: uzp1 v0.4s, v1.4s, v0.4s
+; NONEON-NOSVE-NEXT: uzp1 v2.4s, v3.4s, v2.4s
+; NONEON-NOSVE-NEXT: uzp1 v0.8h, v0.8h, v2.8h
+; NONEON-NOSVE-NEXT: xtn v0.8b, v0.8h
+; NONEON-NOSVE-NEXT: ret
%a = load <8 x i64>, ptr %in
%b = trunc <8 x i64> %a to <8 x i8>
ret <8 x i8> %b
@@ -499,6 +714,21 @@ define <16 x i8> @trunc_v16i64_v16i8(ptr %in) nounwind {
; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: trunc_v16i64_v16i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ldp q3, q2, [x0, #96]
+; NONEON-NOSVE-NEXT: ldp q5, q4, [x0, #32]
+; NONEON-NOSVE-NEXT: ldp q7, q6, [x0, #64]
+; NONEON-NOSVE-NEXT: uzp1 v0.4s, v1.4s, v0.4s
+; NONEON-NOSVE-NEXT: uzp1 v2.4s, v3.4s, v2.4s
+; NONEON-NOSVE-NEXT: uzp1 v4.4s, v5.4s, v4.4s
+; NONEON-NOSVE-NEXT: uzp1 v3.4s, v7.4s, v6.4s
+; NONEON-NOSVE-NEXT: uzp1 v0.8h, v0.8h, v4.8h
+; NONEON-NOSVE-NEXT: uzp1 v1.8h, v3.8h, v2.8h
+; NONEON-NOSVE-NEXT: uzp1 v0.16b, v0.16b, v1.16b
+; NONEON-NOSVE-NEXT: ret
%a = load <16 x i64>, ptr %in
%b = trunc <16 x i64> %a to <16 x i8>
ret <16 x i8> %b
@@ -565,6 +795,35 @@ define void @trunc_v32i64_v32i8(ptr %in, ptr %out) nounwind {
; CHECK-NEXT: add z0.b, z0.b, z0.b
; CHECK-NEXT: stp q0, q1, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: trunc_v32i64_v32i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #224]
+; NONEON-NOSVE-NEXT: ldp q5, q4, [x0, #192]
+; NONEON-NOSVE-NEXT: ldp q3, q2, [x0]
+; NONEON-NOSVE-NEXT: ldp q7, q6, [x0, #96]
+; NONEON-NOSVE-NEXT: uzp1 v0.4s, v1.4s, v0.4s
+; NONEON-NOSVE-NEXT: uzp1 v1.4s, v5.4s, v4.4s
+; NONEON-NOSVE-NEXT: ldp q5, q4, [x0, #128]
+; NONEON-NOSVE-NEXT: ldp q17, q16, [x0, #160]
+; NONEON-NOSVE-NEXT: uzp1 v2.4s, v3.4s, v2.4s
+; NONEON-NOSVE-NEXT: ldp q19, q18, [x0, #32]
+; NONEON-NOSVE-NEXT: ldp q21, q20, [x0, #64]
+; NONEON-NOSVE-NEXT: uzp1 v4.4s, v5.4s, v4.4s
+; NONEON-NOSVE-NEXT: uzp1 v16.4s, v17.4s, v16.4s
+; NONEON-NOSVE-NEXT: uzp1 v5.4s, v7.4s, v6.4s
+; NONEON-NOSVE-NEXT: uzp1 v0.8h, v1.8h, v0.8h
+; NONEON-NOSVE-NEXT: uzp1 v7.4s, v19.4s, v18.4s
+; NONEON-NOSVE-NEXT: uzp1 v6.4s, v21.4s, v20.4s
+; NONEON-NOSVE-NEXT: uzp1 v1.8h, v4.8h, v16.8h
+; NONEON-NOSVE-NEXT: uzp1 v2.8h, v2.8h, v7.8h
+; NONEON-NOSVE-NEXT: uzp1 v3.8h, v6.8h, v5.8h
+; NONEON-NOSVE-NEXT: uzp1 v0.16b, v1.16b, v0.16b
+; NONEON-NOSVE-NEXT: uzp1 v1.16b, v2.16b, v3.16b
+; NONEON-NOSVE-NEXT: add v0.16b, v0.16b, v0.16b
+; NONEON-NOSVE-NEXT: add v1.16b, v1.16b, v1.16b
+; NONEON-NOSVE-NEXT: stp q1, q0, [x1]
+; NONEON-NOSVE-NEXT: ret
%a = load <32 x i64>, ptr %in
%b = trunc <32 x i64> %a to <32 x i8>
%c = add <32 x i8> %b, %b
@@ -587,6 +846,13 @@ define <4 x i16> @trunc_v4i64_v4i16(ptr %in) nounwind {
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: trunc_v4i64_v4i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: uzp1 v0.4s, v1.4s, v0.4s
+; NONEON-NOSVE-NEXT: xtn v0.4h, v0.4s
+; NONEON-NOSVE-NEXT: ret
%a = load <4 x i64>, ptr %in
%b = trunc <4 x i64> %a to <4 x i16>
ret <4 x i16> %b
@@ -610,6 +876,15 @@ define <8 x i16> @trunc_v8i64_v8i16(ptr %in) nounwind {
; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: trunc_v8i64_v8i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ldp q3, q2, [x0, #32]
+; NONEON-NOSVE-NEXT: uzp1 v0.4s, v1.4s, v0.4s
+; NONEON-NOSVE-NEXT: uzp1 v2.4s, v3.4s, v2.4s
+; NONEON-NOSVE-NEXT: uzp1 v0.8h, v0.8h, v2.8h
+; NONEON-NOSVE-NEXT: ret
%a = load <8 x i64>, ptr %in
%b = trunc <8 x i64> %a to <8 x i16>
ret <8 x i16> %b
@@ -647,6 +922,23 @@ define void @trunc_v16i64_v16i16(ptr %in, ptr %out) nounwind {
; CHECK-NEXT: add z1.h, z3.h, z3.h
; CHECK-NEXT: stp q1, q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: trunc_v16i64_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #64]
+; NONEON-NOSVE-NEXT: ldp q3, q2, [x0, #96]
+; NONEON-NOSVE-NEXT: ldp q5, q4, [x0]
+; NONEON-NOSVE-NEXT: ldp q7, q6, [x0, #32]
+; NONEON-NOSVE-NEXT: uzp1 v0.4s, v1.4s, v0.4s
+; NONEON-NOSVE-NEXT: uzp1 v2.4s, v3.4s, v2.4s
+; NONEON-NOSVE-NEXT: uzp1 v3.4s, v5.4s, v4.4s
+; NONEON-NOSVE-NEXT: uzp1 v1.4s, v7.4s, v6.4s
+; NONEON-NOSVE-NEXT: uzp1 v0.8h, v0.8h, v2.8h
+; NONEON-NOSVE-NEXT: uzp1 v1.8h, v3.8h, v1.8h
+; NONEON-NOSVE-NEXT: add v0.8h, v0.8h, v0.8h
+; NONEON-NOSVE-NEXT: add v1.8h, v1.8h, v1.8h
+; NONEON-NOSVE-NEXT: stp q1, q0, [x1]
+; NONEON-NOSVE-NEXT: ret
%a = load <16 x i64>, ptr %in
%b = trunc <16 x i64> %a to <16 x i16>
%c = add <16 x i16> %b, %b
@@ -711,6 +1003,36 @@ define void @trunc_v32i64_v32i16(ptr %in, ptr %out) nounwind {
; CHECK-NEXT: stp q1, q2, [x1, #32]
; CHECK-NEXT: stp q3, q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: trunc_v32i64_v32i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #128]
+; NONEON-NOSVE-NEXT: ldp q3, q2, [x0, #160]
+; NONEON-NOSVE-NEXT: ldp q5, q4, [x0, #192]
+; NONEON-NOSVE-NEXT: ldp q7, q6, [x0, #224]
+; NONEON-NOSVE-NEXT: uzp1 v0.4s, v1.4s, v0.4s
+; NONEON-NOSVE-NEXT: uzp1 v2.4s, v3.4s, v2.4s
+; NONEON-NOSVE-NEXT: ldp q3, q1, [x0]
+; NONEON-NOSVE-NEXT: uzp1 v4.4s, v5.4s, v4.4s
+; NONEON-NOSVE-NEXT: ldp q17, q5, [x0, #64]
+; NONEON-NOSVE-NEXT: uzp1 v6.4s, v7.4s, v6.4s
+; NONEON-NOSVE-NEXT: ldp q16, q7, [x0, #32]
+; NONEON-NOSVE-NEXT: ldp q19, q18, [x0, #96]
+; NONEON-NOSVE-NEXT: uzp1 v1.4s, v3.4s, v1.4s
+; NONEON-NOSVE-NEXT: uzp1 v5.4s, v17.4s, v5.4s
+; NONEON-NOSVE-NEXT: uzp1 v0.8h, v0.8h, v2.8h
+; NONEON-NOSVE-NEXT: uzp1 v7.4s, v16.4s, v7.4s
+; NONEON-NOSVE-NEXT: uzp1 v3.4s, v19.4s, v18.4s
+; NONEON-NOSVE-NEXT: uzp1 v2.8h, v4.8h, v6.8h
+; NONEON-NOSVE-NEXT: add v0.8h, v0.8h, v0.8h
+; NONEON-NOSVE-NEXT: uzp1 v1.8h, v1.8h, v7.8h
+; NONEON-NOSVE-NEXT: uzp1 v3.8h, v5.8h, v3.8h
+; NONEON-NOSVE-NEXT: add v2.8h, v2.8h, v2.8h
+; NONEON-NOSVE-NEXT: add v1.8h, v1.8h, v1.8h
+; NONEON-NOSVE-NEXT: stp q0, q2, [x1, #32]
+; NONEON-NOSVE-NEXT: add v3.8h, v3.8h, v3.8h
+; NONEON-NOSVE-NEXT: stp q1, q3, [x1]
+; NONEON-NOSVE-NEXT: ret
%a = load <32 x i64>, ptr %in
%b = trunc <32 x i64> %a to <32 x i16>
%c = add <32 x i16> %b, %b
@@ -732,6 +1054,12 @@ define <4 x i32> @trunc_v4i64_v4i32(ptr %in) nounwind {
; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: trunc_v4i64_v4i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: uzp1 v0.4s, v1.4s, v0.4s
+; NONEON-NOSVE-NEXT: ret
%a = load <4 x i64>, ptr %in
%b = trunc <4 x i64> %a to <4 x i32>
ret <4 x i32> %b
@@ -754,6 +1082,17 @@ define void @trunc_v8i64_v8i32(ptr %in, ptr %out) nounwind {
; CHECK-NEXT: add z1.s, z2.s, z2.s
; CHECK-NEXT: stp q1, q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: trunc_v8i64_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #32]
+; NONEON-NOSVE-NEXT: ldp q3, q2, [x0]
+; NONEON-NOSVE-NEXT: uzp1 v0.4s, v1.4s, v0.4s
+; NONEON-NOSVE-NEXT: uzp1 v1.4s, v3.4s, v2.4s
+; NONEON-NOSVE-NEXT: add v0.4s, v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: add v1.4s, v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: stp q1, q0, [x1]
+; NONEON-NOSVE-NEXT: ret
%a = load <8 x i64>, ptr %in
%b = trunc <8 x i64> %a to <8 x i32>
%c = add <8 x i32> %b, %b
@@ -789,6 +1128,24 @@ define void @trunc_v16i64_v16i32(ptr %in, ptr %out) nounwind {
; CHECK-NEXT: stp q0, q1, [x1, #32]
; CHECK-NEXT: stp q2, q3, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: trunc_v16i64_v16i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #64]
+; NONEON-NOSVE-NEXT: ldp q3, q2, [x0, #96]
+; NONEON-NOSVE-NEXT: ldp q5, q4, [x0]
+; NONEON-NOSVE-NEXT: uzp1 v0.4s, v1.4s, v0.4s
+; NONEON-NOSVE-NEXT: ldp q6, q1, [x0, #32]
+; NONEON-NOSVE-NEXT: uzp1 v2.4s, v3.4s, v2.4s
+; NONEON-NOSVE-NEXT: uzp1 v3.4s, v5.4s, v4.4s
+; NONEON-NOSVE-NEXT: uzp1 v1.4s, v6.4s, v1.4s
+; NONEON-NOSVE-NEXT: add v0.4s, v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: add v2.4s, v2.4s, v2.4s
+; NONEON-NOSVE-NEXT: add v3.4s, v3.4s, v3.4s
+; NONEON-NOSVE-NEXT: add v1.4s, v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: stp q0, q2, [x1, #32]
+; NONEON-NOSVE-NEXT: stp q3, q1, [x1]
+; NONEON-NOSVE-NEXT: ret
%a = load <16 x i64>, ptr %in
%b = trunc <16 x i64> %a to <16 x i32>
%c = add <16 x i32> %b, %b
@@ -846,6 +1203,38 @@ define void @trunc_v32i64_v32i32(ptr %in, ptr %out) nounwind {
; CHECK-NEXT: stp q2, q3, [x1, #32]
; CHECK-NEXT: stp q4, q0, [x1]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: trunc_v32i64_v32i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q0, [x0, #192]
+; NONEON-NOSVE-NEXT: ldp q5, q4, [x0, #224]
+; NONEON-NOSVE-NEXT: ldp q7, q6, [x0, #128]
+; NONEON-NOSVE-NEXT: uzp1 v0.4s, v1.4s, v0.4s
+; NONEON-NOSVE-NEXT: ldp q16, q1, [x0, #160]
+; NONEON-NOSVE-NEXT: uzp1 v4.4s, v5.4s, v4.4s
+; NONEON-NOSVE-NEXT: ldp q17, q5, [x0, #64]
+; NONEON-NOSVE-NEXT: uzp1 v6.4s, v7.4s, v6.4s
+; NONEON-NOSVE-NEXT: ldp q3, q2, [x0]
+; NONEON-NOSVE-NEXT: ldp q18, q7, [x0, #96]
+; NONEON-NOSVE-NEXT: uzp1 v1.4s, v16.4s, v1.4s
+; NONEON-NOSVE-NEXT: uzp1 v5.4s, v17.4s, v5.4s
+; NONEON-NOSVE-NEXT: ldp q17, q16, [x0, #32]
+; NONEON-NOSVE-NEXT: uzp1 v2.4s, v3.4s, v2.4s
+; NONEON-NOSVE-NEXT: add v0.4s, v0.4s, v0.4s
+; NONEON-NOSVE-NEXT: add v4.4s, v4.4s, v4.4s
+; NONEON-NOSVE-NEXT: uzp1 v7.4s, v18.4s, v7.4s
+; NONEON-NOSVE-NEXT: add v3.4s, v6.4s, v6.4s
+; NONEON-NOSVE-NEXT: uzp1 v6.4s, v17.4s, v16.4s
+; NONEON-NOSVE-NEXT: add v1.4s, v1.4s, v1.4s
+; NONEON-NOSVE-NEXT: stp q0, q4, [x1, #96]
+; NONEON-NOSVE-NEXT: add v0.4s, v5.4s, v5.4s
+; NONEON-NOSVE-NEXT: add v2.4s, v2.4s, v2.4s
+; NONEON-NOSVE-NEXT: add v4.4s, v7.4s, v7.4s
+; NONEON-NOSVE-NEXT: stp q3, q1, [x1, #64]
+; NONEON-NOSVE-NEXT: add v1.4s, v6.4s, v6.4s
+; NONEON-NOSVE-NEXT: stp q0, q4, [x1, #32]
+; NONEON-NOSVE-NEXT: stp q2, q1, [x1]
+; NONEON-NOSVE-NEXT: ret
%a = load <32 x i64>, ptr %in
%b = trunc <32 x i64> %a to <32 x i32>
%c = add <32 x i32> %b, %b
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-vector-shuffle.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-vector-shuffle.ll
index 304823c9e6414..874af15e21117 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-vector-shuffle.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-vector-shuffle.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
@@ -14,6 +15,12 @@ define <4 x i8> @shuffle_ext_byone_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
; CHECK-NEXT: tbl z0.h, { z0.h }, z1.h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: shuffle_ext_byone_v4i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ext v1.8b, v0.8b, v0.8b, #6
+; NONEON-NOSVE-NEXT: trn1 v0.4h, v0.4h, v1.4h
+; NONEON-NOSVE-NEXT: ret
%ret = shufflevector <4 x i8> %op1, <4 x i8> %op2, <4 x i32> <i32 0, i32 3, i32 2, i32 1>
ret <4 x i8> %ret
}
@@ -28,6 +35,11 @@ define <8 x i8> @shuffle_ext_byone_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
; CHECK-NEXT: insr z1.b, w8
; CHECK-NEXT: fmov d0, d1
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: shuffle_ext_byone_v8i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ext v0.8b, v0.8b, v1.8b, #7
+; NONEON-NOSVE-NEXT: ret
%ret = shufflevector <8 x i8> %op1, <8 x i8> %op2, <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
ret <8 x i8> %ret
}
@@ -42,6 +54,11 @@ define <16 x i8> @shuffle_ext_byone_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
; CHECK-NEXT: insr z1.b, w8
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: shuffle_ext_byone_v16i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ext v0.16b, v0.16b, v1.16b, #15
+; NONEON-NOSVE-NEXT: ret
%ret = shufflevector <16 x i8> %op1, <16 x i8> %op2, <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22,
i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
ret <16 x i8> %ret
@@ -60,6 +77,15 @@ define void @shuffle_ext_byone_v32i8(ptr %a, ptr %b) {
; CHECK-NEXT: insr z3.b, w8
; CHECK-NEXT: stp q1, q3, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: shuffle_ext_byone_v32i8:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x1]
+; NONEON-NOSVE-NEXT: ldr q0, [x0, #16]
+; NONEON-NOSVE-NEXT: ext v0.16b, v0.16b, v1.16b, #15
+; NONEON-NOSVE-NEXT: ext v1.16b, v1.16b, v2.16b, #15
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <32 x i8>, ptr %a
%op2 = load <32 x i8>, ptr %b
%ret = shufflevector <32 x i8> %op1, <32 x i8> %op2, <32 x i32> <i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38,
@@ -78,6 +104,11 @@ define <2 x i16> @shuffle_ext_byone_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
; CHECK-NEXT: revw z0.d, p0/m, z0.d
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: shuffle_ext_byone_v2i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: rev64 v0.2s, v0.2s
+; NONEON-NOSVE-NEXT: ret
%ret = shufflevector <2 x i16> %op1, <2 x i16> %op2, <2 x i32> <i32 1, i32 0>
ret <2 x i16> %ret
}
@@ -92,6 +123,11 @@ define <4 x i16> @shuffle_ext_byone_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
; CHECK-NEXT: insr z1.h, w8
; CHECK-NEXT: fmov d0, d1
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: shuffle_ext_byone_v4i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ext v0.8b, v0.8b, v1.8b, #6
+; NONEON-NOSVE-NEXT: ret
%ret = shufflevector <4 x i16> %op1, <4 x i16> %op2, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
ret <4 x i16> %ret
}
@@ -106,6 +142,11 @@ define <8 x i16> @shuffle_ext_byone_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
; CHECK-NEXT: insr z1.h, w8
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: shuffle_ext_byone_v8i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ext v0.16b, v0.16b, v1.16b, #14
+; NONEON-NOSVE-NEXT: ret
%ret = shufflevector <8 x i16> %op1, <8 x i16> %op2, <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
ret <8 x i16> %ret
}
@@ -123,6 +164,15 @@ define void @shuffle_ext_byone_v16i16(ptr %a, ptr %b) {
; CHECK-NEXT: insr z3.h, w8
; CHECK-NEXT: stp q1, q3, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: shuffle_ext_byone_v16i16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x1]
+; NONEON-NOSVE-NEXT: ldr q0, [x0, #16]
+; NONEON-NOSVE-NEXT: ext v0.16b, v0.16b, v1.16b, #14
+; NONEON-NOSVE-NEXT: ext v1.16b, v1.16b, v2.16b, #14
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x i16>, ptr %a
%op2 = load <16 x i16>, ptr %b
%ret = shufflevector <16 x i16> %op1, <16 x i16> %op2, <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22,
@@ -141,6 +191,11 @@ define <2 x i32> @shuffle_ext_byone_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
; CHECK-NEXT: insr z1.s, w8
; CHECK-NEXT: fmov d0, d1
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: shuffle_ext_byone_v2i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ext v0.8b, v0.8b, v1.8b, #4
+; NONEON-NOSVE-NEXT: ret
%ret = shufflevector <2 x i32> %op1, <2 x i32> %op2, <2 x i32> <i32 1, i32 2>
ret <2 x i32> %ret
}
@@ -155,6 +210,11 @@ define <4 x i32> @shuffle_ext_byone_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
; CHECK-NEXT: insr z1.s, w8
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: shuffle_ext_byone_v4i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ext v0.16b, v0.16b, v1.16b, #12
+; NONEON-NOSVE-NEXT: ret
%ret = shufflevector <4 x i32> %op1, <4 x i32> %op2, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
ret <4 x i32> %ret
}
@@ -172,6 +232,15 @@ define void @shuffle_ext_byone_v8i32(ptr %a, ptr %b) {
; CHECK-NEXT: insr z3.s, w8
; CHECK-NEXT: stp q1, q3, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: shuffle_ext_byone_v8i32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x1]
+; NONEON-NOSVE-NEXT: ldr q0, [x0, #16]
+; NONEON-NOSVE-NEXT: ext v0.16b, v0.16b, v1.16b, #12
+; NONEON-NOSVE-NEXT: ext v1.16b, v1.16b, v2.16b, #12
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x i32>, ptr %a
%op2 = load <8 x i32>, ptr %b
%ret = shufflevector <8 x i32> %op1, <8 x i32> %op2, <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
@@ -189,6 +258,11 @@ define <2 x i64> @shuffle_ext_byone_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
; CHECK-NEXT: insr z1.d, x8
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: shuffle_ext_byone_v2i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ext v0.16b, v0.16b, v1.16b, #8
+; NONEON-NOSVE-NEXT: ret
%ret = shufflevector <2 x i64> %op1, <2 x i64> %op2, <2 x i32> <i32 1, i32 2>
ret <2 x i64> %ret
}
@@ -206,6 +280,15 @@ define void @shuffle_ext_byone_v4i64(ptr %a, ptr %b) {
; CHECK-NEXT: insr z3.d, x8
; CHECK-NEXT: stp q1, q3, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: shuffle_ext_byone_v4i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x1]
+; NONEON-NOSVE-NEXT: ldr q0, [x0, #16]
+; NONEON-NOSVE-NEXT: ext v0.16b, v0.16b, v1.16b, #8
+; NONEON-NOSVE-NEXT: ext v1.16b, v1.16b, v2.16b, #8
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x i64>, ptr %a
%op2 = load <4 x i64>, ptr %b
%ret = shufflevector <4 x i64> %op1, <4 x i64> %op2, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
@@ -223,6 +306,11 @@ define <4 x half> @shuffle_ext_byone_v4f16(<4 x half> %op1, <4 x half> %op2) {
; CHECK-NEXT: insr z0.h, h2
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: shuffle_ext_byone_v4f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ext v0.8b, v0.8b, v1.8b, #6
+; NONEON-NOSVE-NEXT: ret
%ret = shufflevector <4 x half> %op1, <4 x half> %op2, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
ret <4 x half> %ret
}
@@ -236,6 +324,11 @@ define <8 x half> @shuffle_ext_byone_v8f16(<8 x half> %op1, <8 x half> %op2) {
; CHECK-NEXT: insr z0.h, h2
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: shuffle_ext_byone_v8f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ext v0.16b, v0.16b, v1.16b, #14
+; NONEON-NOSVE-NEXT: ret
%ret = shufflevector <8 x half> %op1, <8 x half> %op2, <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
ret <8 x half> %ret
}
@@ -251,6 +344,15 @@ define void @shuffle_ext_byone_v16f16(ptr %a, ptr %b) {
; CHECK-NEXT: insr z3.h, h2
; CHECK-NEXT: stp q1, q3, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: shuffle_ext_byone_v16f16:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x1]
+; NONEON-NOSVE-NEXT: ldr q0, [x0, #16]
+; NONEON-NOSVE-NEXT: ext v0.16b, v0.16b, v1.16b, #14
+; NONEON-NOSVE-NEXT: ext v1.16b, v1.16b, v2.16b, #14
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <16 x half>, ptr %a
%op2 = load <16 x half>, ptr %b
%ret = shufflevector <16 x half> %op1, <16 x half> %op2, <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22,
@@ -268,6 +370,11 @@ define <2 x float> @shuffle_ext_byone_v2f32(<2 x float> %op1, <2 x float> %op2)
; CHECK-NEXT: insr z0.s, s2
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: shuffle_ext_byone_v2f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ext v0.8b, v0.8b, v1.8b, #4
+; NONEON-NOSVE-NEXT: ret
%ret = shufflevector <2 x float> %op1, <2 x float> %op2, <2 x i32> <i32 1, i32 2>
ret <2 x float> %ret
}
@@ -281,6 +388,11 @@ define <4 x float> @shuffle_ext_byone_v4f32(<4 x float> %op1, <4 x float> %op2)
; CHECK-NEXT: insr z0.s, s2
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: shuffle_ext_byone_v4f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ext v0.16b, v0.16b, v1.16b, #12
+; NONEON-NOSVE-NEXT: ret
%ret = shufflevector <4 x float> %op1, <4 x float> %op2, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
ret <4 x float> %ret
}
@@ -296,6 +408,15 @@ define void @shuffle_ext_byone_v8f32(ptr %a, ptr %b) {
; CHECK-NEXT: insr z3.s, s2
; CHECK-NEXT: stp q1, q3, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: shuffle_ext_byone_v8f32:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x1]
+; NONEON-NOSVE-NEXT: ldr q0, [x0, #16]
+; NONEON-NOSVE-NEXT: ext v0.16b, v0.16b, v1.16b, #12
+; NONEON-NOSVE-NEXT: ext v1.16b, v1.16b, v2.16b, #12
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <8 x float>, ptr %a
%op2 = load <8 x float>, ptr %b
%ret = shufflevector <8 x float> %op1, <8 x float> %op2, <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
@@ -312,6 +433,11 @@ define <2 x double> @shuffle_ext_byone_v2f64(<2 x double> %op1, <2 x double> %op
; CHECK-NEXT: insr z0.d, d2
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: shuffle_ext_byone_v2f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ext v0.16b, v0.16b, v1.16b, #8
+; NONEON-NOSVE-NEXT: ret
%ret = shufflevector <2 x double> %op1, <2 x double> %op2, <2 x i32> <i32 1, i32 2>
ret <2 x double> %ret
}
@@ -327,6 +453,15 @@ define void @shuffle_ext_byone_v4f64(ptr %a, ptr %b) {
; CHECK-NEXT: insr z3.d, d2
; CHECK-NEXT: stp q1, q3, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: shuffle_ext_byone_v4f64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q1, q2, [x1]
+; NONEON-NOSVE-NEXT: ldr q0, [x0, #16]
+; NONEON-NOSVE-NEXT: ext v0.16b, v0.16b, v1.16b, #8
+; NONEON-NOSVE-NEXT: ext v1.16b, v1.16b, v2.16b, #8
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x double>, ptr %a
%op2 = load <4 x double>, ptr %b
%ret = shufflevector <4 x double> %op1, <4 x double> %op2, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
@@ -345,6 +480,15 @@ define void @shuffle_ext_byone_reverse(ptr %a, ptr %b) {
; CHECK-NEXT: insr z3.d, d2
; CHECK-NEXT: stp q1, q3, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: shuffle_ext_byone_reverse:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldp q0, q2, [x0]
+; NONEON-NOSVE-NEXT: ldr q1, [x1, #16]
+; NONEON-NOSVE-NEXT: ext v1.16b, v1.16b, v0.16b, #8
+; NONEON-NOSVE-NEXT: ext v0.16b, v0.16b, v2.16b, #8
+; NONEON-NOSVE-NEXT: stp q1, q0, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x double>, ptr %a
%op2 = load <4 x double>, ptr %b
%ret = shufflevector <4 x double> %op1, <4 x double> %op2, <4 x i32> <i32 7, i32 0, i32 1, i32 2>
@@ -359,6 +503,13 @@ define void @shuffle_ext_invalid(ptr %a, ptr %b) {
; CHECK-NEXT: ldr q1, [x1]
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: shuffle_ext_invalid:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: ldr q0, [x0, #16]
+; NONEON-NOSVE-NEXT: ldr q1, [x1]
+; NONEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NONEON-NOSVE-NEXT: ret
%op1 = load <4 x double>, ptr %a
%op2 = load <4 x double>, ptr %b
%ret = shufflevector <4 x double> %op1, <4 x double> %op2, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-test-register-mov.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-test-register-mov.ll
index 6c9c055605668..e69f59aedc026 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-test-register-mov.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-test-register-mov.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
@@ -11,6 +12,11 @@ define fp128 @test_streaming_compatible_register_mov(fp128 %q0, fp128 %q1) {
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.d, z1.d
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: test_streaming_compatible_register_mov:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: mov v0.16b, v1.16b
+; NONEON-NOSVE-NEXT: ret
ret fp128 %q1
}
@@ -20,6 +26,11 @@ define double @fp_zero_constant() {
; CHECK: // %bb.0:
; CHECK-NEXT: fmov d0, xzr
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fp_zero_constant:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: fmov d0, xzr
+; NONEON-NOSVE-NEXT: ret
ret double 0.0
}
@@ -29,6 +40,11 @@ define <2 x i64> @fixed_vec_zero_constant() {
; CHECK-NEXT: mov z0.d, #0 // =0x0
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fixed_vec_zero_constant:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: movi v0.2d, #0000000000000000
+; NONEON-NOSVE-NEXT: ret
ret <2 x i64> zeroinitializer
}
@@ -38,5 +54,10 @@ define <2 x double> @fixed_vec_fp_zero_constant() {
; CHECK-NEXT: mov z0.d, #0 // =0x0
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: fixed_vec_fp_zero_constant:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: movi v0.2d, #0000000000000000
+; NONEON-NOSVE-NEXT: ret
ret <2 x double> <double 0.0, double 0.0>
}
More information about the llvm-commits
mailing list