[llvm] 5dde495 - [AArch64] NFC: Precommit some tests for SME
Sander de Smalen via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 19 03:48:44 PDT 2024
Author: Sander de Smalen
Date: 2024-06-19T10:46:47Z
New Revision: 5dde4951ae16283fffad40f84bc8ae4149766782
URL: https://github.com/llvm/llvm-project/commit/5dde4951ae16283fffad40f84bc8ae4149766782
DIFF: https://github.com/llvm/llvm-project/commit/5dde4951ae16283fffad40f84bc8ae4149766782.diff
LOG: [AArch64] NFC: Precommit some tests for SME
This shows that when compiling for +sme only, the code-generator
doesn't consider streaming mode to determine whether to use
(compatible) SVE instructions.
A follow-up patch will fix these issues.
Added:
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mul.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-gather-scatter.ll
Modified:
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll
index 233939f7285fa..43c67382c9d82 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll
@@ -2,6 +2,8 @@
; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE
; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE2
; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE2
+; FIXME: We shouldn't ever be emitting any SVE instructions when +sme is set but the function is not in streaming mode.
+; RUN: llc -mattr=+sme < %s | FileCheck %s --check-prefixes=NEON-NOSVE
; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
target triple = "aarch64-unknown-linux-gnu"
@@ -26,6 +28,19 @@ define <4 x i8> @sdiv_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
+; NEON-NOSVE-LABEL: sdiv_v4i8:
+; NEON-NOSVE: // %bb.0:
+; NEON-NOSVE-NEXT: shl v0.4h, v0.4h, #8
+; NEON-NOSVE-NEXT: shl v1.4h, v1.4h, #8
+; NEON-NOSVE-NEXT: ptrue p0.s, vl4
+; NEON-NOSVE-NEXT: sshr v0.4h, v0.4h, #8
+; NEON-NOSVE-NEXT: sshr v1.4h, v1.4h, #8
+; NEON-NOSVE-NEXT: sshll v1.4s, v1.4h, #0
+; NEON-NOSVE-NEXT: sshll v0.4s, v0.4h, #0
+; NEON-NOSVE-NEXT: sdiv z0.s, p0/m, z0.s, z1.s
+; NEON-NOSVE-NEXT: xtn v0.4h, v0.4s
+; NEON-NOSVE-NEXT: ret
+;
; NONEON-NOSVE-LABEL: sdiv_v4i8:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #32
@@ -78,6 +93,21 @@ define <8 x i8> @sdiv_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
+; NEON-NOSVE-LABEL: sdiv_v8i8:
+; NEON-NOSVE: // %bb.0:
+; NEON-NOSVE-NEXT: sshll v1.8h, v1.8b, #0
+; NEON-NOSVE-NEXT: sshll v0.8h, v0.8b, #0
+; NEON-NOSVE-NEXT: ptrue p0.s, vl4
+; NEON-NOSVE-NEXT: sshll2 v2.4s, v1.8h, #0
+; NEON-NOSVE-NEXT: sshll2 v3.4s, v0.8h, #0
+; NEON-NOSVE-NEXT: sshll v1.4s, v1.4h, #0
+; NEON-NOSVE-NEXT: sshll v0.4s, v0.4h, #0
+; NEON-NOSVE-NEXT: sdivr z2.s, p0/m, z2.s, z3.s
+; NEON-NOSVE-NEXT: sdiv z0.s, p0/m, z0.s, z1.s
+; NEON-NOSVE-NEXT: uzp1 v0.8h, v0.8h, v2.8h
+; NEON-NOSVE-NEXT: xtn v0.8b, v0.8h
+; NEON-NOSVE-NEXT: ret
+;
; NONEON-NOSVE-LABEL: sdiv_v8i8:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #32
@@ -164,6 +194,30 @@ define <16 x i8> @sdiv_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
+; NEON-NOSVE-LABEL: sdiv_v16i8:
+; NEON-NOSVE: // %bb.0:
+; NEON-NOSVE-NEXT: sshll2 v2.8h, v1.16b, #0
+; NEON-NOSVE-NEXT: sshll2 v3.8h, v0.16b, #0
+; NEON-NOSVE-NEXT: sshll v1.8h, v1.8b, #0
+; NEON-NOSVE-NEXT: sshll v0.8h, v0.8b, #0
+; NEON-NOSVE-NEXT: ptrue p0.s, vl4
+; NEON-NOSVE-NEXT: sshll2 v4.4s, v2.8h, #0
+; NEON-NOSVE-NEXT: sshll2 v5.4s, v3.8h, #0
+; NEON-NOSVE-NEXT: sshll v2.4s, v2.4h, #0
+; NEON-NOSVE-NEXT: sshll v3.4s, v3.4h, #0
+; NEON-NOSVE-NEXT: sdivr z4.s, p0/m, z4.s, z5.s
+; NEON-NOSVE-NEXT: sshll2 v5.4s, v0.8h, #0
+; NEON-NOSVE-NEXT: sshll v0.4s, v0.4h, #0
+; NEON-NOSVE-NEXT: sdivr z2.s, p0/m, z2.s, z3.s
+; NEON-NOSVE-NEXT: sshll2 v3.4s, v1.8h, #0
+; NEON-NOSVE-NEXT: sshll v1.4s, v1.4h, #0
+; NEON-NOSVE-NEXT: sdivr z3.s, p0/m, z3.s, z5.s
+; NEON-NOSVE-NEXT: sdiv z0.s, p0/m, z0.s, z1.s
+; NEON-NOSVE-NEXT: uzp1 v1.8h, v2.8h, v4.8h
+; NEON-NOSVE-NEXT: uzp1 v0.8h, v0.8h, v3.8h
+; NEON-NOSVE-NEXT: uzp1 v0.16b, v0.16b, v1.16b
+; NEON-NOSVE-NEXT: ret
+;
; NONEON-NOSVE-LABEL: sdiv_v16i8:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]!
@@ -313,6 +367,53 @@ define void @sdiv_v32i8(ptr %a, ptr %b) {
; CHECK-NEXT: stp q1, q0, [x0]
; CHECK-NEXT: ret
;
+; NEON-NOSVE-LABEL: sdiv_v32i8:
+; NEON-NOSVE: // %bb.0:
+; NEON-NOSVE-NEXT: ldp q6, q3, [x1]
+; NEON-NOSVE-NEXT: ptrue p0.s, vl4
+; NEON-NOSVE-NEXT: ldr q2, [x0, #16]
+; NEON-NOSVE-NEXT: sshll2 v1.8h, v3.16b, #0
+; NEON-NOSVE-NEXT: sshll2 v4.8h, v2.16b, #0
+; NEON-NOSVE-NEXT: sshll v3.8h, v3.8b, #0
+; NEON-NOSVE-NEXT: sshll v2.8h, v2.8b, #0
+; NEON-NOSVE-NEXT: sshll2 v7.8h, v6.16b, #0
+; NEON-NOSVE-NEXT: sshll v6.8h, v6.8b, #0
+; NEON-NOSVE-NEXT: sshll2 v0.4s, v1.8h, #0
+; NEON-NOSVE-NEXT: sshll2 v5.4s, v4.8h, #0
+; NEON-NOSVE-NEXT: sshll v1.4s, v1.4h, #0
+; NEON-NOSVE-NEXT: sshll v4.4s, v4.4h, #0
+; NEON-NOSVE-NEXT: sshll2 v17.4s, v7.8h, #0
+; NEON-NOSVE-NEXT: sshll v7.4s, v7.4h, #0
+; NEON-NOSVE-NEXT: sdivr z0.s, p0/m, z0.s, z5.s
+; NEON-NOSVE-NEXT: sshll2 v5.4s, v2.8h, #0
+; NEON-NOSVE-NEXT: sshll v2.4s, v2.4h, #0
+; NEON-NOSVE-NEXT: sdivr z1.s, p0/m, z1.s, z4.s
+; NEON-NOSVE-NEXT: sshll2 v4.4s, v3.8h, #0
+; NEON-NOSVE-NEXT: sshll v3.4s, v3.4h, #0
+; NEON-NOSVE-NEXT: sdivr z4.s, p0/m, z4.s, z5.s
+; NEON-NOSVE-NEXT: ldr q5, [x0]
+; NEON-NOSVE-NEXT: sshll2 v16.8h, v5.16b, #0
+; NEON-NOSVE-NEXT: sshll v5.8h, v5.8b, #0
+; NEON-NOSVE-NEXT: uzp1 v0.8h, v1.8h, v0.8h
+; NEON-NOSVE-NEXT: sshll2 v18.4s, v16.8h, #0
+; NEON-NOSVE-NEXT: sshll v16.4s, v16.4h, #0
+; NEON-NOSVE-NEXT: sdivr z17.s, p0/m, z17.s, z18.s
+; NEON-NOSVE-NEXT: sshll2 v18.4s, v5.8h, #0
+; NEON-NOSVE-NEXT: sshll v5.4s, v5.4h, #0
+; NEON-NOSVE-NEXT: sdivr z7.s, p0/m, z7.s, z16.s
+; NEON-NOSVE-NEXT: sshll2 v16.4s, v6.8h, #0
+; NEON-NOSVE-NEXT: sshll v6.4s, v6.4h, #0
+; NEON-NOSVE-NEXT: sdivr z16.s, p0/m, z16.s, z18.s
+; NEON-NOSVE-NEXT: sdiv z5.s, p0/m, z5.s, z6.s
+; NEON-NOSVE-NEXT: sdiv z2.s, p0/m, z2.s, z3.s
+; NEON-NOSVE-NEXT: uzp1 v3.8h, v7.8h, v17.8h
+; NEON-NOSVE-NEXT: uzp1 v5.8h, v5.8h, v16.8h
+; NEON-NOSVE-NEXT: uzp1 v1.8h, v2.8h, v4.8h
+; NEON-NOSVE-NEXT: uzp1 v2.16b, v5.16b, v3.16b
+; NEON-NOSVE-NEXT: uzp1 v0.16b, v1.16b, v0.16b
+; NEON-NOSVE-NEXT: stp q2, q0, [x0]
+; NEON-NOSVE-NEXT: ret
+;
; NONEON-NOSVE-LABEL: sdiv_v32i8:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #96
@@ -472,6 +573,17 @@ define <2 x i16> @sdiv_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
+; NEON-NOSVE-LABEL: sdiv_v2i16:
+; NEON-NOSVE: // %bb.0:
+; NEON-NOSVE-NEXT: shl v1.2s, v1.2s, #16
+; NEON-NOSVE-NEXT: shl v0.2s, v0.2s, #16
+; NEON-NOSVE-NEXT: ptrue p0.s, vl2
+; NEON-NOSVE-NEXT: sshr v1.2s, v1.2s, #16
+; NEON-NOSVE-NEXT: sshr v0.2s, v0.2s, #16
+; NEON-NOSVE-NEXT: sdiv z0.s, p0/m, z0.s, z1.s
+; NEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $z0
+; NEON-NOSVE-NEXT: ret
+;
; NONEON-NOSVE-LABEL: sdiv_v2i16:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #32
@@ -504,6 +616,15 @@ define <4 x i16> @sdiv_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
+; NEON-NOSVE-LABEL: sdiv_v4i16:
+; NEON-NOSVE: // %bb.0:
+; NEON-NOSVE-NEXT: sshll v1.4s, v1.4h, #0
+; NEON-NOSVE-NEXT: sshll v0.4s, v0.4h, #0
+; NEON-NOSVE-NEXT: ptrue p0.s, vl4
+; NEON-NOSVE-NEXT: sdiv z0.s, p0/m, z0.s, z1.s
+; NEON-NOSVE-NEXT: xtn v0.4h, v0.4s
+; NEON-NOSVE-NEXT: ret
+;
; NONEON-NOSVE-LABEL: sdiv_v4i16:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #32
@@ -553,6 +674,18 @@ define <8 x i16> @sdiv_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
+; NEON-NOSVE-LABEL: sdiv_v8i16:
+; NEON-NOSVE: // %bb.0:
+; NEON-NOSVE-NEXT: sshll2 v2.4s, v1.8h, #0
+; NEON-NOSVE-NEXT: sshll2 v3.4s, v0.8h, #0
+; NEON-NOSVE-NEXT: sshll v1.4s, v1.4h, #0
+; NEON-NOSVE-NEXT: sshll v0.4s, v0.4h, #0
+; NEON-NOSVE-NEXT: ptrue p0.s, vl4
+; NEON-NOSVE-NEXT: sdivr z2.s, p0/m, z2.s, z3.s
+; NEON-NOSVE-NEXT: sdiv z0.s, p0/m, z0.s, z1.s
+; NEON-NOSVE-NEXT: uzp1 v0.8h, v0.8h, v2.8h
+; NEON-NOSVE-NEXT: ret
+;
; NONEON-NOSVE-LABEL: sdiv_v8i16:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]!
@@ -629,6 +762,29 @@ define void @sdiv_v16i16(ptr %a, ptr %b) {
; CHECK-NEXT: stp q1, q2, [x0]
; CHECK-NEXT: ret
;
+; NEON-NOSVE-LABEL: sdiv_v16i16:
+; NEON-NOSVE: // %bb.0:
+; NEON-NOSVE-NEXT: ldp q4, q1, [x1]
+; NEON-NOSVE-NEXT: ptrue p0.s, vl4
+; NEON-NOSVE-NEXT: ldr q0, [x0, #16]
+; NEON-NOSVE-NEXT: sshll2 v2.4s, v1.8h, #0
+; NEON-NOSVE-NEXT: sshll2 v3.4s, v0.8h, #0
+; NEON-NOSVE-NEXT: sshll2 v5.4s, v4.8h, #0
+; NEON-NOSVE-NEXT: sshll v4.4s, v4.4h, #0
+; NEON-NOSVE-NEXT: sshll v1.4s, v1.4h, #0
+; NEON-NOSVE-NEXT: sshll v0.4s, v0.4h, #0
+; NEON-NOSVE-NEXT: sdivr z2.s, p0/m, z2.s, z3.s
+; NEON-NOSVE-NEXT: ldr q3, [x0]
+; NEON-NOSVE-NEXT: sshll2 v6.4s, v3.8h, #0
+; NEON-NOSVE-NEXT: sshll v3.4s, v3.4h, #0
+; NEON-NOSVE-NEXT: sdivr z5.s, p0/m, z5.s, z6.s
+; NEON-NOSVE-NEXT: sdiv z3.s, p0/m, z3.s, z4.s
+; NEON-NOSVE-NEXT: sdiv z0.s, p0/m, z0.s, z1.s
+; NEON-NOSVE-NEXT: uzp1 v1.8h, v3.8h, v5.8h
+; NEON-NOSVE-NEXT: uzp1 v0.8h, v0.8h, v2.8h
+; NEON-NOSVE-NEXT: stp q1, q0, [x0]
+; NEON-NOSVE-NEXT: ret
+;
; NONEON-NOSVE-LABEL: sdiv_v16i16:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #96
@@ -722,6 +878,15 @@ define <2 x i32> @sdiv_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
+; NEON-NOSVE-LABEL: sdiv_v2i32:
+; NEON-NOSVE: // %bb.0:
+; NEON-NOSVE-NEXT: ptrue p0.s, vl2
+; NEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $z0
+; NEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $z1
+; NEON-NOSVE-NEXT: sdiv z0.s, p0/m, z0.s, z1.s
+; NEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $z0
+; NEON-NOSVE-NEXT: ret
+;
; NONEON-NOSVE-LABEL: sdiv_v2i32:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #32
@@ -750,6 +915,15 @@ define <4 x i32> @sdiv_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
+; NEON-NOSVE-LABEL: sdiv_v4i32:
+; NEON-NOSVE: // %bb.0:
+; NEON-NOSVE-NEXT: ptrue p0.s, vl4
+; NEON-NOSVE-NEXT: // kill: def $q0 killed $q0 def $z0
+; NEON-NOSVE-NEXT: // kill: def $q1 killed $q1 def $z1
+; NEON-NOSVE-NEXT: sdiv z0.s, p0/m, z0.s, z1.s
+; NEON-NOSVE-NEXT: // kill: def $q0 killed $q0 killed $z0
+; NEON-NOSVE-NEXT: ret
+;
; NONEON-NOSVE-LABEL: sdiv_v4i32:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]!
@@ -785,6 +959,17 @@ define void @sdiv_v8i32(ptr %a, ptr %b) {
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
;
+; NEON-NOSVE-LABEL: sdiv_v8i32:
+; NEON-NOSVE: // %bb.0:
+; NEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NEON-NOSVE-NEXT: ptrue p0.s, vl4
+; NEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NEON-NOSVE-NEXT: sdivr z0.s, p0/m, z0.s, z1.s
+; NEON-NOSVE-NEXT: movprfx z1, z2
+; NEON-NOSVE-NEXT: sdiv z1.s, p0/m, z1.s, z3.s
+; NEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NEON-NOSVE-NEXT: ret
+;
; NONEON-NOSVE-LABEL: sdiv_v8i32:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #96
@@ -838,6 +1023,15 @@ define <1 x i64> @sdiv_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
+; NEON-NOSVE-LABEL: sdiv_v1i64:
+; NEON-NOSVE: // %bb.0:
+; NEON-NOSVE-NEXT: ptrue p0.d, vl1
+; NEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $z0
+; NEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $z1
+; NEON-NOSVE-NEXT: sdiv z0.d, p0/m, z0.d, z1.d
+; NEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $z0
+; NEON-NOSVE-NEXT: ret
+;
; NONEON-NOSVE-LABEL: sdiv_v1i64:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #16
@@ -863,6 +1057,15 @@ define <2 x i64> @sdiv_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
+; NEON-NOSVE-LABEL: sdiv_v2i64:
+; NEON-NOSVE: // %bb.0:
+; NEON-NOSVE-NEXT: ptrue p0.d, vl2
+; NEON-NOSVE-NEXT: // kill: def $q0 killed $q0 def $z0
+; NEON-NOSVE-NEXT: // kill: def $q1 killed $q1 def $z1
+; NEON-NOSVE-NEXT: sdiv z0.d, p0/m, z0.d, z1.d
+; NEON-NOSVE-NEXT: // kill: def $q0 killed $q0 killed $z0
+; NEON-NOSVE-NEXT: ret
+;
; NONEON-NOSVE-LABEL: sdiv_v2i64:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]!
@@ -892,6 +1095,17 @@ define void @sdiv_v4i64(ptr %a, ptr %b) {
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
;
+; NEON-NOSVE-LABEL: sdiv_v4i64:
+; NEON-NOSVE: // %bb.0:
+; NEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NEON-NOSVE-NEXT: ptrue p0.d, vl2
+; NEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NEON-NOSVE-NEXT: sdivr z0.d, p0/m, z0.d, z1.d
+; NEON-NOSVE-NEXT: movprfx z1, z2
+; NEON-NOSVE-NEXT: sdiv z1.d, p0/m, z1.d, z3.d
+; NEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NEON-NOSVE-NEXT: ret
+;
; NONEON-NOSVE-LABEL: sdiv_v4i64:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #96
@@ -942,6 +1156,17 @@ define <4 x i8> @udiv_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
+; NEON-NOSVE-LABEL: udiv_v4i8:
+; NEON-NOSVE: // %bb.0:
+; NEON-NOSVE-NEXT: bic v0.4h, #255, lsl #8
+; NEON-NOSVE-NEXT: bic v1.4h, #255, lsl #8
+; NEON-NOSVE-NEXT: ptrue p0.s, vl4
+; NEON-NOSVE-NEXT: ushll v1.4s, v1.4h, #0
+; NEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0
+; NEON-NOSVE-NEXT: udiv z0.s, p0/m, z0.s, z1.s
+; NEON-NOSVE-NEXT: xtn v0.4h, v0.4s
+; NEON-NOSVE-NEXT: ret
+;
; NONEON-NOSVE-LABEL: udiv_v4i8:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #32
@@ -994,6 +1219,21 @@ define <8 x i8> @udiv_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
+; NEON-NOSVE-LABEL: udiv_v8i8:
+; NEON-NOSVE: // %bb.0:
+; NEON-NOSVE-NEXT: ushll v1.8h, v1.8b, #0
+; NEON-NOSVE-NEXT: ushll v0.8h, v0.8b, #0
+; NEON-NOSVE-NEXT: ptrue p0.s, vl4
+; NEON-NOSVE-NEXT: ushll2 v2.4s, v1.8h, #0
+; NEON-NOSVE-NEXT: ushll2 v3.4s, v0.8h, #0
+; NEON-NOSVE-NEXT: ushll v1.4s, v1.4h, #0
+; NEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0
+; NEON-NOSVE-NEXT: udivr z2.s, p0/m, z2.s, z3.s
+; NEON-NOSVE-NEXT: udiv z0.s, p0/m, z0.s, z1.s
+; NEON-NOSVE-NEXT: uzp1 v0.8h, v0.8h, v2.8h
+; NEON-NOSVE-NEXT: xtn v0.8b, v0.8h
+; NEON-NOSVE-NEXT: ret
+;
; NONEON-NOSVE-LABEL: udiv_v8i8:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #32
@@ -1080,6 +1320,30 @@ define <16 x i8> @udiv_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
+; NEON-NOSVE-LABEL: udiv_v16i8:
+; NEON-NOSVE: // %bb.0:
+; NEON-NOSVE-NEXT: ushll2 v2.8h, v1.16b, #0
+; NEON-NOSVE-NEXT: ushll2 v3.8h, v0.16b, #0
+; NEON-NOSVE-NEXT: ushll v1.8h, v1.8b, #0
+; NEON-NOSVE-NEXT: ushll v0.8h, v0.8b, #0
+; NEON-NOSVE-NEXT: ptrue p0.s, vl4
+; NEON-NOSVE-NEXT: ushll2 v4.4s, v2.8h, #0
+; NEON-NOSVE-NEXT: ushll2 v5.4s, v3.8h, #0
+; NEON-NOSVE-NEXT: ushll v2.4s, v2.4h, #0
+; NEON-NOSVE-NEXT: ushll v3.4s, v3.4h, #0
+; NEON-NOSVE-NEXT: udivr z4.s, p0/m, z4.s, z5.s
+; NEON-NOSVE-NEXT: ushll2 v5.4s, v0.8h, #0
+; NEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0
+; NEON-NOSVE-NEXT: udivr z2.s, p0/m, z2.s, z3.s
+; NEON-NOSVE-NEXT: ushll2 v3.4s, v1.8h, #0
+; NEON-NOSVE-NEXT: ushll v1.4s, v1.4h, #0
+; NEON-NOSVE-NEXT: udivr z3.s, p0/m, z3.s, z5.s
+; NEON-NOSVE-NEXT: udiv z0.s, p0/m, z0.s, z1.s
+; NEON-NOSVE-NEXT: uzp1 v1.8h, v2.8h, v4.8h
+; NEON-NOSVE-NEXT: uzp1 v0.8h, v0.8h, v3.8h
+; NEON-NOSVE-NEXT: uzp1 v0.16b, v0.16b, v1.16b
+; NEON-NOSVE-NEXT: ret
+;
; NONEON-NOSVE-LABEL: udiv_v16i8:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]!
@@ -1229,6 +1493,53 @@ define void @udiv_v32i8(ptr %a, ptr %b) {
; CHECK-NEXT: stp q1, q0, [x0]
; CHECK-NEXT: ret
;
+; NEON-NOSVE-LABEL: udiv_v32i8:
+; NEON-NOSVE: // %bb.0:
+; NEON-NOSVE-NEXT: ldp q6, q3, [x1]
+; NEON-NOSVE-NEXT: ptrue p0.s, vl4
+; NEON-NOSVE-NEXT: ldr q2, [x0, #16]
+; NEON-NOSVE-NEXT: ushll2 v1.8h, v3.16b, #0
+; NEON-NOSVE-NEXT: ushll2 v4.8h, v2.16b, #0
+; NEON-NOSVE-NEXT: ushll v3.8h, v3.8b, #0
+; NEON-NOSVE-NEXT: ushll v2.8h, v2.8b, #0
+; NEON-NOSVE-NEXT: ushll2 v7.8h, v6.16b, #0
+; NEON-NOSVE-NEXT: ushll v6.8h, v6.8b, #0
+; NEON-NOSVE-NEXT: ushll2 v0.4s, v1.8h, #0
+; NEON-NOSVE-NEXT: ushll2 v5.4s, v4.8h, #0
+; NEON-NOSVE-NEXT: ushll v1.4s, v1.4h, #0
+; NEON-NOSVE-NEXT: ushll v4.4s, v4.4h, #0
+; NEON-NOSVE-NEXT: ushll2 v17.4s, v7.8h, #0
+; NEON-NOSVE-NEXT: ushll v7.4s, v7.4h, #0
+; NEON-NOSVE-NEXT: udivr z0.s, p0/m, z0.s, z5.s
+; NEON-NOSVE-NEXT: ushll2 v5.4s, v2.8h, #0
+; NEON-NOSVE-NEXT: ushll v2.4s, v2.4h, #0
+; NEON-NOSVE-NEXT: udivr z1.s, p0/m, z1.s, z4.s
+; NEON-NOSVE-NEXT: ushll2 v4.4s, v3.8h, #0
+; NEON-NOSVE-NEXT: ushll v3.4s, v3.4h, #0
+; NEON-NOSVE-NEXT: udivr z4.s, p0/m, z4.s, z5.s
+; NEON-NOSVE-NEXT: ldr q5, [x0]
+; NEON-NOSVE-NEXT: ushll2 v16.8h, v5.16b, #0
+; NEON-NOSVE-NEXT: ushll v5.8h, v5.8b, #0
+; NEON-NOSVE-NEXT: uzp1 v0.8h, v1.8h, v0.8h
+; NEON-NOSVE-NEXT: ushll2 v18.4s, v16.8h, #0
+; NEON-NOSVE-NEXT: ushll v16.4s, v16.4h, #0
+; NEON-NOSVE-NEXT: udivr z17.s, p0/m, z17.s, z18.s
+; NEON-NOSVE-NEXT: ushll2 v18.4s, v5.8h, #0
+; NEON-NOSVE-NEXT: ushll v5.4s, v5.4h, #0
+; NEON-NOSVE-NEXT: udivr z7.s, p0/m, z7.s, z16.s
+; NEON-NOSVE-NEXT: ushll2 v16.4s, v6.8h, #0
+; NEON-NOSVE-NEXT: ushll v6.4s, v6.4h, #0
+; NEON-NOSVE-NEXT: udivr z16.s, p0/m, z16.s, z18.s
+; NEON-NOSVE-NEXT: udiv z5.s, p0/m, z5.s, z6.s
+; NEON-NOSVE-NEXT: udiv z2.s, p0/m, z2.s, z3.s
+; NEON-NOSVE-NEXT: uzp1 v3.8h, v7.8h, v17.8h
+; NEON-NOSVE-NEXT: uzp1 v5.8h, v5.8h, v16.8h
+; NEON-NOSVE-NEXT: uzp1 v1.8h, v2.8h, v4.8h
+; NEON-NOSVE-NEXT: uzp1 v2.16b, v5.16b, v3.16b
+; NEON-NOSVE-NEXT: uzp1 v0.16b, v1.16b, v0.16b
+; NEON-NOSVE-NEXT: stp q2, q0, [x0]
+; NEON-NOSVE-NEXT: ret
+;
; NONEON-NOSVE-LABEL: udiv_v32i8:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #96
@@ -1388,6 +1699,16 @@ define <2 x i16> @udiv_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
+; NEON-NOSVE-LABEL: udiv_v2i16:
+; NEON-NOSVE: // %bb.0:
+; NEON-NOSVE-NEXT: movi d2, #0x00ffff0000ffff
+; NEON-NOSVE-NEXT: ptrue p0.s, vl2
+; NEON-NOSVE-NEXT: and v1.8b, v1.8b, v2.8b
+; NEON-NOSVE-NEXT: and v0.8b, v0.8b, v2.8b
+; NEON-NOSVE-NEXT: udiv z0.s, p0/m, z0.s, z1.s
+; NEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $z0
+; NEON-NOSVE-NEXT: ret
+;
; NONEON-NOSVE-LABEL: udiv_v2i16:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #32
@@ -1420,6 +1741,15 @@ define <4 x i16> @udiv_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
+; NEON-NOSVE-LABEL: udiv_v4i16:
+; NEON-NOSVE: // %bb.0:
+; NEON-NOSVE-NEXT: ushll v1.4s, v1.4h, #0
+; NEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0
+; NEON-NOSVE-NEXT: ptrue p0.s, vl4
+; NEON-NOSVE-NEXT: udiv z0.s, p0/m, z0.s, z1.s
+; NEON-NOSVE-NEXT: xtn v0.4h, v0.4s
+; NEON-NOSVE-NEXT: ret
+;
; NONEON-NOSVE-LABEL: udiv_v4i16:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #32
@@ -1469,6 +1799,18 @@ define <8 x i16> @udiv_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
+; NEON-NOSVE-LABEL: udiv_v8i16:
+; NEON-NOSVE: // %bb.0:
+; NEON-NOSVE-NEXT: ushll2 v2.4s, v1.8h, #0
+; NEON-NOSVE-NEXT: ushll2 v3.4s, v0.8h, #0
+; NEON-NOSVE-NEXT: ushll v1.4s, v1.4h, #0
+; NEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0
+; NEON-NOSVE-NEXT: ptrue p0.s, vl4
+; NEON-NOSVE-NEXT: udivr z2.s, p0/m, z2.s, z3.s
+; NEON-NOSVE-NEXT: udiv z0.s, p0/m, z0.s, z1.s
+; NEON-NOSVE-NEXT: uzp1 v0.8h, v0.8h, v2.8h
+; NEON-NOSVE-NEXT: ret
+;
; NONEON-NOSVE-LABEL: udiv_v8i16:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]!
@@ -1545,6 +1887,29 @@ define void @udiv_v16i16(ptr %a, ptr %b) {
; CHECK-NEXT: stp q1, q2, [x0]
; CHECK-NEXT: ret
;
+; NEON-NOSVE-LABEL: udiv_v16i16:
+; NEON-NOSVE: // %bb.0:
+; NEON-NOSVE-NEXT: ldp q4, q1, [x1]
+; NEON-NOSVE-NEXT: ptrue p0.s, vl4
+; NEON-NOSVE-NEXT: ldr q0, [x0, #16]
+; NEON-NOSVE-NEXT: ushll2 v2.4s, v1.8h, #0
+; NEON-NOSVE-NEXT: ushll2 v3.4s, v0.8h, #0
+; NEON-NOSVE-NEXT: ushll2 v5.4s, v4.8h, #0
+; NEON-NOSVE-NEXT: ushll v4.4s, v4.4h, #0
+; NEON-NOSVE-NEXT: ushll v1.4s, v1.4h, #0
+; NEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0
+; NEON-NOSVE-NEXT: udivr z2.s, p0/m, z2.s, z3.s
+; NEON-NOSVE-NEXT: ldr q3, [x0]
+; NEON-NOSVE-NEXT: ushll2 v6.4s, v3.8h, #0
+; NEON-NOSVE-NEXT: ushll v3.4s, v3.4h, #0
+; NEON-NOSVE-NEXT: udivr z5.s, p0/m, z5.s, z6.s
+; NEON-NOSVE-NEXT: udiv z3.s, p0/m, z3.s, z4.s
+; NEON-NOSVE-NEXT: udiv z0.s, p0/m, z0.s, z1.s
+; NEON-NOSVE-NEXT: uzp1 v1.8h, v3.8h, v5.8h
+; NEON-NOSVE-NEXT: uzp1 v0.8h, v0.8h, v2.8h
+; NEON-NOSVE-NEXT: stp q1, q0, [x0]
+; NEON-NOSVE-NEXT: ret
+;
; NONEON-NOSVE-LABEL: udiv_v16i16:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #96
@@ -1638,6 +2003,15 @@ define <2 x i32> @udiv_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
+; NEON-NOSVE-LABEL: udiv_v2i32:
+; NEON-NOSVE: // %bb.0:
+; NEON-NOSVE-NEXT: ptrue p0.s, vl2
+; NEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $z0
+; NEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $z1
+; NEON-NOSVE-NEXT: udiv z0.s, p0/m, z0.s, z1.s
+; NEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $z0
+; NEON-NOSVE-NEXT: ret
+;
; NONEON-NOSVE-LABEL: udiv_v2i32:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #32
@@ -1666,6 +2040,15 @@ define <4 x i32> @udiv_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
+; NEON-NOSVE-LABEL: udiv_v4i32:
+; NEON-NOSVE: // %bb.0:
+; NEON-NOSVE-NEXT: ptrue p0.s, vl4
+; NEON-NOSVE-NEXT: // kill: def $q0 killed $q0 def $z0
+; NEON-NOSVE-NEXT: // kill: def $q1 killed $q1 def $z1
+; NEON-NOSVE-NEXT: udiv z0.s, p0/m, z0.s, z1.s
+; NEON-NOSVE-NEXT: // kill: def $q0 killed $q0 killed $z0
+; NEON-NOSVE-NEXT: ret
+;
; NONEON-NOSVE-LABEL: udiv_v4i32:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]!
@@ -1701,6 +2084,17 @@ define void @udiv_v8i32(ptr %a, ptr %b) {
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
;
+; NEON-NOSVE-LABEL: udiv_v8i32:
+; NEON-NOSVE: // %bb.0:
+; NEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NEON-NOSVE-NEXT: ptrue p0.s, vl4
+; NEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NEON-NOSVE-NEXT: udivr z0.s, p0/m, z0.s, z1.s
+; NEON-NOSVE-NEXT: movprfx z1, z2
+; NEON-NOSVE-NEXT: udiv z1.s, p0/m, z1.s, z3.s
+; NEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NEON-NOSVE-NEXT: ret
+;
; NONEON-NOSVE-LABEL: udiv_v8i32:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #96
@@ -1754,6 +2148,15 @@ define <1 x i64> @udiv_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
+; NEON-NOSVE-LABEL: udiv_v1i64:
+; NEON-NOSVE: // %bb.0:
+; NEON-NOSVE-NEXT: ptrue p0.d, vl1
+; NEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $z0
+; NEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $z1
+; NEON-NOSVE-NEXT: udiv z0.d, p0/m, z0.d, z1.d
+; NEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $z0
+; NEON-NOSVE-NEXT: ret
+;
; NONEON-NOSVE-LABEL: udiv_v1i64:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #16
@@ -1779,6 +2182,15 @@ define <2 x i64> @udiv_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
+; NEON-NOSVE-LABEL: udiv_v2i64:
+; NEON-NOSVE: // %bb.0:
+; NEON-NOSVE-NEXT: ptrue p0.d, vl2
+; NEON-NOSVE-NEXT: // kill: def $q0 killed $q0 def $z0
+; NEON-NOSVE-NEXT: // kill: def $q1 killed $q1 def $z1
+; NEON-NOSVE-NEXT: udiv z0.d, p0/m, z0.d, z1.d
+; NEON-NOSVE-NEXT: // kill: def $q0 killed $q0 killed $z0
+; NEON-NOSVE-NEXT: ret
+;
; NONEON-NOSVE-LABEL: udiv_v2i64:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]!
@@ -1808,6 +2220,17 @@ define void @udiv_v4i64(ptr %a, ptr %b) {
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
;
+; NEON-NOSVE-LABEL: udiv_v4i64:
+; NEON-NOSVE: // %bb.0:
+; NEON-NOSVE-NEXT: ldp q0, q3, [x1]
+; NEON-NOSVE-NEXT: ptrue p0.d, vl2
+; NEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NEON-NOSVE-NEXT: udivr z0.d, p0/m, z0.d, z1.d
+; NEON-NOSVE-NEXT: movprfx z1, z2
+; NEON-NOSVE-NEXT: udiv z1.d, p0/m, z1.d, z3.d
+; NEON-NOSVE-NEXT: stp q0, q1, [x0]
+; NEON-NOSVE-NEXT: ret
+;
; NONEON-NOSVE-LABEL: udiv_v4i64:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: sub sp, sp, #96
@@ -1878,6 +2301,27 @@ define void @udiv_constantsplat_v8i32(ptr %a) {
; SVE2-NEXT: stp q1, q0, [x0]
; SVE2-NEXT: ret
;
+; NEON-NOSVE-LABEL: udiv_constantsplat_v8i32:
+; NEON-NOSVE: // %bb.0:
+; NEON-NOSVE-NEXT: mov w8, #8969 // =0x2309
+; NEON-NOSVE-NEXT: ldp q1, q2, [x0]
+; NEON-NOSVE-NEXT: movk w8, #22765, lsl #16
+; NEON-NOSVE-NEXT: dup v0.4s, w8
+; NEON-NOSVE-NEXT: umull2 v3.2d, v1.4s, v0.4s
+; NEON-NOSVE-NEXT: umull v4.2d, v1.2s, v0.2s
+; NEON-NOSVE-NEXT: umull2 v5.2d, v2.4s, v0.4s
+; NEON-NOSVE-NEXT: umull v0.2d, v2.2s, v0.2s
+; NEON-NOSVE-NEXT: uzp2 v3.4s, v4.4s, v3.4s
+; NEON-NOSVE-NEXT: uzp2 v0.4s, v0.4s, v5.4s
+; NEON-NOSVE-NEXT: sub v1.4s, v1.4s, v3.4s
+; NEON-NOSVE-NEXT: sub v2.4s, v2.4s, v0.4s
+; NEON-NOSVE-NEXT: usra v3.4s, v1.4s, #1
+; NEON-NOSVE-NEXT: usra v0.4s, v2.4s, #1
+; NEON-NOSVE-NEXT: ushr v1.4s, v3.4s, #6
+; NEON-NOSVE-NEXT: ushr v0.4s, v0.4s, #6
+; NEON-NOSVE-NEXT: stp q1, q0, [x0]
+; NEON-NOSVE-NEXT: ret
+;
; NONEON-NOSVE-LABEL: udiv_constantsplat_v8i32:
; NONEON-NOSVE: // %bb.0:
; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mul.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mul.ll
new file mode 100644
index 0000000000000..ba479fc3bbe2d
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mul.ll
@@ -0,0 +1,37 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s --check-prefix=SVE2
+; FIXME: We shouldn't ever be emitting any SVE instructions when +sme is set but the function is not in streaming mode.
+; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s --check-prefix=SVE2
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
+
+target triple = "aarch64-unknown-linux-gnu"
+
+;
+; MUL
+;
+
+define <2 x i64> @mul_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
+; SVE2-LABEL: mul_v2i64:
+; SVE2: // %bb.0:
+; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0
+; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1
+; SVE2-NEXT: mul z0.d, z0.d, z1.d
+; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0
+; SVE2-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: mul_v2i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]!
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48
+; NONEON-NOSVE-NEXT: ldp x9, x10, [sp]
+; NONEON-NOSVE-NEXT: ldr x8, [sp, #24]
+; NONEON-NOSVE-NEXT: mul x11, x10, x8
+; NONEON-NOSVE-NEXT: ldr x8, [sp, #16]
+; NONEON-NOSVE-NEXT: mul x8, x9, x8
+; NONEON-NOSVE-NEXT: stp x8, x11, [sp, #32]
+; NONEON-NOSVE-NEXT: ldr q0, [sp, #32]
+; NONEON-NOSVE-NEXT: add sp, sp, #48
+; NONEON-NOSVE-NEXT: ret
+ %res = mul <2 x i64> %op1, %op2
+ ret <2 x i64> %res
+}
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-gather-scatter.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-gather-scatter.ll
new file mode 100644
index 0000000000000..75c4536c97306
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-gather-scatter.ll
@@ -0,0 +1,246 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mattr=+sve < %s | FileCheck %s --check-prefix=SVE
+; FIXME: We shouldn't ever be emitting any SVE instructions when +sme is set but the function is not in streaming mode.
+; RUN: llc -mattr=+sme < %s | FileCheck %s --check-prefix=SME
+; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-SVE-NOGATHER
+; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
+
+
+target triple = "aarch64-unknown-linux-gnu"
+
+define <2 x i64> @masked_gather_v2i64(ptr %a, ptr %b) vscale_range(2, 2) {
+; SVE-LABEL: masked_gather_v2i64:
+; SVE: // %bb.0:
+; SVE-NEXT: ldr q0, [x0]
+; SVE-NEXT: ptrue p0.d, vl2
+; SVE-NEXT: cmeq v0.2d, v0.2d, #0
+; SVE-NEXT: cmpne p0.d, p0/z, z0.d, #0
+; SVE-NEXT: ldr q0, [x1]
+; SVE-NEXT: ld1d { z0.d }, p0/z, [z0.d]
+; SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
+; SVE-NEXT: ret
+;
+; SME-LABEL: masked_gather_v2i64:
+; SME: // %bb.0:
+; SME-NEXT: ldr q0, [x0]
+; SME-NEXT: adrp x8, .LCPI0_0
+; SME-NEXT: ldr q1, [x8, :lo12:.LCPI0_0]
+; SME-NEXT: cmeq v0.2d, v0.2d, #0
+; SME-NEXT: and v0.16b, v0.16b, v1.16b
+; SME-NEXT: ldr q1, [x1]
+; SME-NEXT: addp d0, v0.2d
+; SME-NEXT: fmov x8, d0
+; SME-NEXT: // implicit-def: $q0
+; SME-NEXT: tbnz w8, #0, .LBB0_3
+; SME-NEXT: // %bb.1: // %else
+; SME-NEXT: tbnz w8, #1, .LBB0_4
+; SME-NEXT: .LBB0_2: // %else2
+; SME-NEXT: ret
+; SME-NEXT: .LBB0_3: // %cond.load
+; SME-NEXT: fmov x9, d1
+; SME-NEXT: ldr d0, [x9]
+; SME-NEXT: tbz w8, #1, .LBB0_2
+; SME-NEXT: .LBB0_4: // %cond.load1
+; SME-NEXT: mov x8, v1.d[1]
+; SME-NEXT: ld1 { v0.d }[1], [x8]
+; SME-NEXT: ret
+;
+; NONEON-SVE-NOGATHER-LABEL: masked_gather_v2i64:
+; NONEON-SVE-NOGATHER: // %bb.0:
+; NONEON-SVE-NOGATHER-NEXT: sub sp, sp, #16
+; NONEON-SVE-NOGATHER-NEXT: .cfi_def_cfa_offset 16
+; NONEON-SVE-NOGATHER-NEXT: ptrue p0.d, vl2
+; NONEON-SVE-NOGATHER-NEXT: ldr q0, [x0]
+; NONEON-SVE-NOGATHER-NEXT: cmpeq p1.d, p0/z, z0.d, #0
+; NONEON-SVE-NOGATHER-NEXT: index z0.d, #1, #1
+; NONEON-SVE-NOGATHER-NEXT: mov z1.d, p1/z, #-1 // =0xffffffffffffffff
+; NONEON-SVE-NOGATHER-NEXT: and z0.d, z1.d, z0.d
+; NONEON-SVE-NOGATHER-NEXT: ldr q1, [x1]
+; NONEON-SVE-NOGATHER-NEXT: uaddv d0, p0, z0.d
+; NONEON-SVE-NOGATHER-NEXT: ptrue p0.d
+; NONEON-SVE-NOGATHER-NEXT: fmov x8, d0
+; NONEON-SVE-NOGATHER-NEXT: strb w8, [sp, #12]
+; NONEON-SVE-NOGATHER-NEXT: and w8, w8, #0xff
+; NONEON-SVE-NOGATHER-NEXT: tbz w8, #0, .LBB0_2
+; NONEON-SVE-NOGATHER-NEXT: // %bb.1: // %cond.load
+; NONEON-SVE-NOGATHER-NEXT: fmov x9, d1
+; NONEON-SVE-NOGATHER-NEXT: ld1rd { z0.d }, p0/z, [x9]
+; NONEON-SVE-NOGATHER-NEXT: tbnz w8, #1, .LBB0_3
+; NONEON-SVE-NOGATHER-NEXT: b .LBB0_4
+; NONEON-SVE-NOGATHER-NEXT: .LBB0_2:
+; NONEON-SVE-NOGATHER-NEXT: adrp x9, .LCPI0_0
+; NONEON-SVE-NOGATHER-NEXT: ldr q0, [x9, :lo12:.LCPI0_0]
+; NONEON-SVE-NOGATHER-NEXT: tbz w8, #1, .LBB0_4
+; NONEON-SVE-NOGATHER-NEXT: .LBB0_3: // %cond.load1
+; NONEON-SVE-NOGATHER-NEXT: mov w8, #1 // =0x1
+; NONEON-SVE-NOGATHER-NEXT: index z2.d, #0, #1
+; NONEON-SVE-NOGATHER-NEXT: mov z1.d, z1.d[1]
+; NONEON-SVE-NOGATHER-NEXT: mov z3.d, x8
+; NONEON-SVE-NOGATHER-NEXT: fmov x8, d1
+; NONEON-SVE-NOGATHER-NEXT: cmpeq p0.d, p0/z, z2.d, z3.d
+; NONEON-SVE-NOGATHER-NEXT: ldr x8, [x8]
+; NONEON-SVE-NOGATHER-NEXT: mov z0.d, p0/m, x8
+; NONEON-SVE-NOGATHER-NEXT: .LBB0_4: // %else2
+; NONEON-SVE-NOGATHER-NEXT: // kill: def $q0 killed $q0 killed $z0
+; NONEON-SVE-NOGATHER-NEXT: add sp, sp, #16
+; NONEON-SVE-NOGATHER-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: masked_gather_v2i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: sub sp, sp, #16
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ptrue p0.d, vl2
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: cmpeq p1.d, p0/z, z0.d, #0
+; NONEON-NOSVE-NEXT: index z0.d, #1, #1
+; NONEON-NOSVE-NEXT: mov z1.d, p1/z, #-1 // =0xffffffffffffffff
+; NONEON-NOSVE-NEXT: and z0.d, z1.d, z0.d
+; NONEON-NOSVE-NEXT: ldr q1, [x1]
+; NONEON-NOSVE-NEXT: uaddv d0, p0, z0.d
+; NONEON-NOSVE-NEXT: ptrue p0.d
+; NONEON-NOSVE-NEXT: fmov x8, d0
+; NONEON-NOSVE-NEXT: strb w8, [sp, #12]
+; NONEON-NOSVE-NEXT: and w8, w8, #0xff
+; NONEON-NOSVE-NEXT: tbz w8, #0, .LBB0_2
+; NONEON-NOSVE-NEXT: // %bb.1: // %cond.load
+; NONEON-NOSVE-NEXT: fmov x9, d1
+; NONEON-NOSVE-NEXT: ld1rd { z0.d }, p0/z, [x9]
+; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB0_3
+; NONEON-NOSVE-NEXT: b .LBB0_4
+; NONEON-NOSVE-NEXT: .LBB0_2:
+; NONEON-NOSVE-NEXT: adrp x9, .LCPI0_0
+; NONEON-NOSVE-NEXT: ldr q0, [x9, :lo12:.LCPI0_0]
+; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB0_4
+; NONEON-NOSVE-NEXT: .LBB0_3: // %cond.load1
+; NONEON-NOSVE-NEXT: mov w8, #1 // =0x1
+; NONEON-NOSVE-NEXT: index z2.d, #0, #1
+; NONEON-NOSVE-NEXT: mov z1.d, z1.d[1]
+; NONEON-NOSVE-NEXT: mov z3.d, x8
+; NONEON-NOSVE-NEXT: fmov x8, d1
+; NONEON-NOSVE-NEXT: cmpeq p0.d, p0/z, z2.d, z3.d
+; NONEON-NOSVE-NEXT: ldr x8, [x8]
+; NONEON-NOSVE-NEXT: mov z0.d, p0/m, x8
+; NONEON-NOSVE-NEXT: .LBB0_4: // %else2
+; NONEON-NOSVE-NEXT: // kill: def $q0 killed $q0 killed $z0
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
+ %vals = load <2 x i64>, ptr %a
+ %ptrs = load <2 x ptr>, ptr %b
+ %mask = icmp eq <2 x i64> %vals, zeroinitializer
+ %res = call <2 x i64> @llvm.masked.gather.v2i64(<2 x ptr> %ptrs, i32 8, <2 x i1> %mask, <2 x i64> poison)
+ ret <2 x i64> %res
+}
+
+define void @masked_scatter_v2i64(ptr %a, ptr %b) vscale_range(2, 2) {
+; SVE-LABEL: masked_scatter_v2i64:
+; SVE: // %bb.0:
+; SVE-NEXT: ldr q0, [x0]
+; SVE-NEXT: ptrue p0.d, vl2
+; SVE-NEXT: cmeq v1.2d, v0.2d, #0
+; SVE-NEXT: cmpne p0.d, p0/z, z1.d, #0
+; SVE-NEXT: ldr q1, [x1]
+; SVE-NEXT: st1d { z0.d }, p0, [z1.d]
+; SVE-NEXT: ret
+;
+; SME-LABEL: masked_scatter_v2i64:
+; SME: // %bb.0:
+; SME-NEXT: ldr q0, [x0]
+; SME-NEXT: adrp x8, .LCPI1_0
+; SME-NEXT: ldr q2, [x8, :lo12:.LCPI1_0]
+; SME-NEXT: cmeq v1.2d, v0.2d, #0
+; SME-NEXT: and v1.16b, v1.16b, v2.16b
+; SME-NEXT: addp d2, v1.2d
+; SME-NEXT: ldr q1, [x1]
+; SME-NEXT: fmov x8, d2
+; SME-NEXT: tbnz w8, #0, .LBB1_3
+; SME-NEXT: // %bb.1: // %else
+; SME-NEXT: tbnz w8, #1, .LBB1_4
+; SME-NEXT: .LBB1_2: // %else2
+; SME-NEXT: ret
+; SME-NEXT: .LBB1_3: // %cond.store
+; SME-NEXT: fmov x9, d1
+; SME-NEXT: str d0, [x9]
+; SME-NEXT: tbz w8, #1, .LBB1_2
+; SME-NEXT: .LBB1_4: // %cond.store1
+; SME-NEXT: mov x8, v1.d[1]
+; SME-NEXT: st1 { v0.d }[1], [x8]
+; SME-NEXT: ret
+;
+; NONEON-SVE-NOGATHER-LABEL: masked_scatter_v2i64:
+; NONEON-SVE-NOGATHER: // %bb.0:
+; NONEON-SVE-NOGATHER-NEXT: sub sp, sp, #16
+; NONEON-SVE-NOGATHER-NEXT: .cfi_def_cfa_offset 16
+; NONEON-SVE-NOGATHER-NEXT: ptrue p0.d, vl2
+; NONEON-SVE-NOGATHER-NEXT: ldr q0, [x0]
+; NONEON-SVE-NOGATHER-NEXT: index z1.d, #1, #1
+; NONEON-SVE-NOGATHER-NEXT: cmpeq p1.d, p0/z, z0.d, #0
+; NONEON-SVE-NOGATHER-NEXT: mov z2.d, p1/z, #-1 // =0xffffffffffffffff
+; NONEON-SVE-NOGATHER-NEXT: and z1.d, z2.d, z1.d
+; NONEON-SVE-NOGATHER-NEXT: uaddv d1, p0, z1.d
+; NONEON-SVE-NOGATHER-NEXT: fmov x8, d1
+; NONEON-SVE-NOGATHER-NEXT: ldr q1, [x1]
+; NONEON-SVE-NOGATHER-NEXT: strb w8, [sp, #12]
+; NONEON-SVE-NOGATHER-NEXT: and w8, w8, #0xff
+; NONEON-SVE-NOGATHER-NEXT: tbnz w8, #0, .LBB1_3
+; NONEON-SVE-NOGATHER-NEXT: // %bb.1: // %else
+; NONEON-SVE-NOGATHER-NEXT: tbnz w8, #1, .LBB1_4
+; NONEON-SVE-NOGATHER-NEXT: .LBB1_2: // %else2
+; NONEON-SVE-NOGATHER-NEXT: add sp, sp, #16
+; NONEON-SVE-NOGATHER-NEXT: ret
+; NONEON-SVE-NOGATHER-NEXT: .LBB1_3: // %cond.store
+; NONEON-SVE-NOGATHER-NEXT: fmov x9, d0
+; NONEON-SVE-NOGATHER-NEXT: fmov x10, d1
+; NONEON-SVE-NOGATHER-NEXT: str x9, [x10]
+; NONEON-SVE-NOGATHER-NEXT: tbz w8, #1, .LBB1_2
+; NONEON-SVE-NOGATHER-NEXT: .LBB1_4: // %cond.store1
+; NONEON-SVE-NOGATHER-NEXT: mov z0.d, z0.d[1]
+; NONEON-SVE-NOGATHER-NEXT: mov z1.d, z1.d[1]
+; NONEON-SVE-NOGATHER-NEXT: fmov x8, d0
+; NONEON-SVE-NOGATHER-NEXT: fmov x9, d1
+; NONEON-SVE-NOGATHER-NEXT: str x8, [x9]
+; NONEON-SVE-NOGATHER-NEXT: add sp, sp, #16
+; NONEON-SVE-NOGATHER-NEXT: ret
+;
+; NONEON-NOSVE-LABEL: masked_scatter_v2i64:
+; NONEON-NOSVE: // %bb.0:
+; NONEON-NOSVE-NEXT: sub sp, sp, #16
+; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT: ptrue p0.d, vl2
+; NONEON-NOSVE-NEXT: ldr q0, [x0]
+; NONEON-NOSVE-NEXT: index z1.d, #1, #1
+; NONEON-NOSVE-NEXT: cmpeq p1.d, p0/z, z0.d, #0
+; NONEON-NOSVE-NEXT: mov z2.d, p1/z, #-1 // =0xffffffffffffffff
+; NONEON-NOSVE-NEXT: and z1.d, z2.d, z1.d
+; NONEON-NOSVE-NEXT: uaddv d1, p0, z1.d
+; NONEON-NOSVE-NEXT: fmov x8, d1
+; NONEON-NOSVE-NEXT: ldr q1, [x1]
+; NONEON-NOSVE-NEXT: strb w8, [sp, #12]
+; NONEON-NOSVE-NEXT: and w8, w8, #0xff
+; NONEON-NOSVE-NEXT: tbnz w8, #0, .LBB1_3
+; NONEON-NOSVE-NEXT: // %bb.1: // %else
+; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB1_4
+; NONEON-NOSVE-NEXT: .LBB1_2: // %else2
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
+; NONEON-NOSVE-NEXT: .LBB1_3: // %cond.store
+; NONEON-NOSVE-NEXT: fmov x9, d0
+; NONEON-NOSVE-NEXT: fmov x10, d1
+; NONEON-NOSVE-NEXT: str x9, [x10]
+; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB1_2
+; NONEON-NOSVE-NEXT: .LBB1_4: // %cond.store1
+; NONEON-NOSVE-NEXT: mov z0.d, z0.d[1]
+; NONEON-NOSVE-NEXT: mov z1.d, z1.d[1]
+; NONEON-NOSVE-NEXT: fmov x8, d0
+; NONEON-NOSVE-NEXT: fmov x9, d1
+; NONEON-NOSVE-NEXT: str x8, [x9]
+; NONEON-NOSVE-NEXT: add sp, sp, #16
+; NONEON-NOSVE-NEXT: ret
+ %vals = load <2 x i64>, ptr %a
+ %ptrs = load <2 x ptr>, ptr %b
+ %mask = icmp eq <2 x i64> %vals, zeroinitializer
+ call void @llvm.masked.scatter.v2i64(<2 x i64> %vals, <2 x ptr> %ptrs, i32 8, <2 x i1> %mask)
+ ret void
+}
+
+declare void @llvm.masked.scatter.v2i64(<2 x i64>, <2 x ptr>, i32, <2 x i1>)
+declare <2 x i64> @llvm.masked.gather.v2i64(<2 x ptr>, i32, <2 x i1>, <2 x i64>)
More information about the llvm-commits
mailing list