[llvm] 5dde495 - [AArch64] NFC: Precommit some tests for SME

Sander de Smalen via llvm-commits llvm-commits at lists.llvm.org
Wed Jun 19 03:48:44 PDT 2024


Author: Sander de Smalen
Date: 2024-06-19T10:46:47Z
New Revision: 5dde4951ae16283fffad40f84bc8ae4149766782

URL: https://github.com/llvm/llvm-project/commit/5dde4951ae16283fffad40f84bc8ae4149766782
DIFF: https://github.com/llvm/llvm-project/commit/5dde4951ae16283fffad40f84bc8ae4149766782.diff

LOG: [AArch64] NFC: Precommit some tests for SME

This shows that when compiling for +sme only, the code-generator
doesn't consider streaming mode to determine whether to use
(compatible) SVE instructions.

A follow-up patch will fix these issues.

Added: 
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mul.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-gather-scatter.ll

Modified: 
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll
index 233939f7285fa..43c67382c9d82 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll
@@ -2,6 +2,8 @@
 ; RUN: llc -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s --check-prefixes=CHECK,SVE
 ; RUN: llc -mattr=+sve2 -force-streaming-compatible  < %s | FileCheck %s --check-prefixes=CHECK,SVE2
 ; RUN: llc -mattr=+sme -force-streaming-compatible  < %s | FileCheck %s --check-prefixes=CHECK,SVE2
+; FIXME: We shouldn't ever be emitting any SVE instructions when +sme is set but the function is not in streaming mode.
+; RUN: llc -mattr=+sme < %s | FileCheck %s --check-prefixes=NEON-NOSVE
 ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 target triple = "aarch64-unknown-linux-gnu"
@@ -26,6 +28,19 @@ define <4 x i8> @sdiv_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
+; NEON-NOSVE-LABEL: sdiv_v4i8:
+; NEON-NOSVE:       // %bb.0:
+; NEON-NOSVE-NEXT:    shl v0.4h, v0.4h, #8
+; NEON-NOSVE-NEXT:    shl v1.4h, v1.4h, #8
+; NEON-NOSVE-NEXT:    ptrue p0.s, vl4
+; NEON-NOSVE-NEXT:    sshr v0.4h, v0.4h, #8
+; NEON-NOSVE-NEXT:    sshr v1.4h, v1.4h, #8
+; NEON-NOSVE-NEXT:    sshll v1.4s, v1.4h, #0
+; NEON-NOSVE-NEXT:    sshll v0.4s, v0.4h, #0
+; NEON-NOSVE-NEXT:    sdiv z0.s, p0/m, z0.s, z1.s
+; NEON-NOSVE-NEXT:    xtn v0.4h, v0.4s
+; NEON-NOSVE-NEXT:    ret
+;
 ; NONEON-NOSVE-LABEL: sdiv_v4i8:
 ; NONEON-NOSVE:       // %bb.0:
 ; NONEON-NOSVE-NEXT:    sub sp, sp, #32
@@ -78,6 +93,21 @@ define <8 x i8> @sdiv_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
+; NEON-NOSVE-LABEL: sdiv_v8i8:
+; NEON-NOSVE:       // %bb.0:
+; NEON-NOSVE-NEXT:    sshll v1.8h, v1.8b, #0
+; NEON-NOSVE-NEXT:    sshll v0.8h, v0.8b, #0
+; NEON-NOSVE-NEXT:    ptrue p0.s, vl4
+; NEON-NOSVE-NEXT:    sshll2 v2.4s, v1.8h, #0
+; NEON-NOSVE-NEXT:    sshll2 v3.4s, v0.8h, #0
+; NEON-NOSVE-NEXT:    sshll v1.4s, v1.4h, #0
+; NEON-NOSVE-NEXT:    sshll v0.4s, v0.4h, #0
+; NEON-NOSVE-NEXT:    sdivr z2.s, p0/m, z2.s, z3.s
+; NEON-NOSVE-NEXT:    sdiv z0.s, p0/m, z0.s, z1.s
+; NEON-NOSVE-NEXT:    uzp1 v0.8h, v0.8h, v2.8h
+; NEON-NOSVE-NEXT:    xtn v0.8b, v0.8h
+; NEON-NOSVE-NEXT:    ret
+;
 ; NONEON-NOSVE-LABEL: sdiv_v8i8:
 ; NONEON-NOSVE:       // %bb.0:
 ; NONEON-NOSVE-NEXT:    sub sp, sp, #32
@@ -164,6 +194,30 @@ define <16 x i8> @sdiv_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
+; NEON-NOSVE-LABEL: sdiv_v16i8:
+; NEON-NOSVE:       // %bb.0:
+; NEON-NOSVE-NEXT:    sshll2 v2.8h, v1.16b, #0
+; NEON-NOSVE-NEXT:    sshll2 v3.8h, v0.16b, #0
+; NEON-NOSVE-NEXT:    sshll v1.8h, v1.8b, #0
+; NEON-NOSVE-NEXT:    sshll v0.8h, v0.8b, #0
+; NEON-NOSVE-NEXT:    ptrue p0.s, vl4
+; NEON-NOSVE-NEXT:    sshll2 v4.4s, v2.8h, #0
+; NEON-NOSVE-NEXT:    sshll2 v5.4s, v3.8h, #0
+; NEON-NOSVE-NEXT:    sshll v2.4s, v2.4h, #0
+; NEON-NOSVE-NEXT:    sshll v3.4s, v3.4h, #0
+; NEON-NOSVE-NEXT:    sdivr z4.s, p0/m, z4.s, z5.s
+; NEON-NOSVE-NEXT:    sshll2 v5.4s, v0.8h, #0
+; NEON-NOSVE-NEXT:    sshll v0.4s, v0.4h, #0
+; NEON-NOSVE-NEXT:    sdivr z2.s, p0/m, z2.s, z3.s
+; NEON-NOSVE-NEXT:    sshll2 v3.4s, v1.8h, #0
+; NEON-NOSVE-NEXT:    sshll v1.4s, v1.4h, #0
+; NEON-NOSVE-NEXT:    sdivr z3.s, p0/m, z3.s, z5.s
+; NEON-NOSVE-NEXT:    sdiv z0.s, p0/m, z0.s, z1.s
+; NEON-NOSVE-NEXT:    uzp1 v1.8h, v2.8h, v4.8h
+; NEON-NOSVE-NEXT:    uzp1 v0.8h, v0.8h, v3.8h
+; NEON-NOSVE-NEXT:    uzp1 v0.16b, v0.16b, v1.16b
+; NEON-NOSVE-NEXT:    ret
+;
 ; NONEON-NOSVE-LABEL: sdiv_v16i8:
 ; NONEON-NOSVE:       // %bb.0:
 ; NONEON-NOSVE-NEXT:    stp q0, q1, [sp, #-48]!
@@ -313,6 +367,53 @@ define void @sdiv_v32i8(ptr %a, ptr %b) {
 ; CHECK-NEXT:    stp q1, q0, [x0]
 ; CHECK-NEXT:    ret
 ;
+; NEON-NOSVE-LABEL: sdiv_v32i8:
+; NEON-NOSVE:       // %bb.0:
+; NEON-NOSVE-NEXT:    ldp q6, q3, [x1]
+; NEON-NOSVE-NEXT:    ptrue p0.s, vl4
+; NEON-NOSVE-NEXT:    ldr q2, [x0, #16]
+; NEON-NOSVE-NEXT:    sshll2 v1.8h, v3.16b, #0
+; NEON-NOSVE-NEXT:    sshll2 v4.8h, v2.16b, #0
+; NEON-NOSVE-NEXT:    sshll v3.8h, v3.8b, #0
+; NEON-NOSVE-NEXT:    sshll v2.8h, v2.8b, #0
+; NEON-NOSVE-NEXT:    sshll2 v7.8h, v6.16b, #0
+; NEON-NOSVE-NEXT:    sshll v6.8h, v6.8b, #0
+; NEON-NOSVE-NEXT:    sshll2 v0.4s, v1.8h, #0
+; NEON-NOSVE-NEXT:    sshll2 v5.4s, v4.8h, #0
+; NEON-NOSVE-NEXT:    sshll v1.4s, v1.4h, #0
+; NEON-NOSVE-NEXT:    sshll v4.4s, v4.4h, #0
+; NEON-NOSVE-NEXT:    sshll2 v17.4s, v7.8h, #0
+; NEON-NOSVE-NEXT:    sshll v7.4s, v7.4h, #0
+; NEON-NOSVE-NEXT:    sdivr z0.s, p0/m, z0.s, z5.s
+; NEON-NOSVE-NEXT:    sshll2 v5.4s, v2.8h, #0
+; NEON-NOSVE-NEXT:    sshll v2.4s, v2.4h, #0
+; NEON-NOSVE-NEXT:    sdivr z1.s, p0/m, z1.s, z4.s
+; NEON-NOSVE-NEXT:    sshll2 v4.4s, v3.8h, #0
+; NEON-NOSVE-NEXT:    sshll v3.4s, v3.4h, #0
+; NEON-NOSVE-NEXT:    sdivr z4.s, p0/m, z4.s, z5.s
+; NEON-NOSVE-NEXT:    ldr q5, [x0]
+; NEON-NOSVE-NEXT:    sshll2 v16.8h, v5.16b, #0
+; NEON-NOSVE-NEXT:    sshll v5.8h, v5.8b, #0
+; NEON-NOSVE-NEXT:    uzp1 v0.8h, v1.8h, v0.8h
+; NEON-NOSVE-NEXT:    sshll2 v18.4s, v16.8h, #0
+; NEON-NOSVE-NEXT:    sshll v16.4s, v16.4h, #0
+; NEON-NOSVE-NEXT:    sdivr z17.s, p0/m, z17.s, z18.s
+; NEON-NOSVE-NEXT:    sshll2 v18.4s, v5.8h, #0
+; NEON-NOSVE-NEXT:    sshll v5.4s, v5.4h, #0
+; NEON-NOSVE-NEXT:    sdivr z7.s, p0/m, z7.s, z16.s
+; NEON-NOSVE-NEXT:    sshll2 v16.4s, v6.8h, #0
+; NEON-NOSVE-NEXT:    sshll v6.4s, v6.4h, #0
+; NEON-NOSVE-NEXT:    sdivr z16.s, p0/m, z16.s, z18.s
+; NEON-NOSVE-NEXT:    sdiv z5.s, p0/m, z5.s, z6.s
+; NEON-NOSVE-NEXT:    sdiv z2.s, p0/m, z2.s, z3.s
+; NEON-NOSVE-NEXT:    uzp1 v3.8h, v7.8h, v17.8h
+; NEON-NOSVE-NEXT:    uzp1 v5.8h, v5.8h, v16.8h
+; NEON-NOSVE-NEXT:    uzp1 v1.8h, v2.8h, v4.8h
+; NEON-NOSVE-NEXT:    uzp1 v2.16b, v5.16b, v3.16b
+; NEON-NOSVE-NEXT:    uzp1 v0.16b, v1.16b, v0.16b
+; NEON-NOSVE-NEXT:    stp q2, q0, [x0]
+; NEON-NOSVE-NEXT:    ret
+;
 ; NONEON-NOSVE-LABEL: sdiv_v32i8:
 ; NONEON-NOSVE:       // %bb.0:
 ; NONEON-NOSVE-NEXT:    sub sp, sp, #96
@@ -472,6 +573,17 @@ define <2 x i16> @sdiv_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
+; NEON-NOSVE-LABEL: sdiv_v2i16:
+; NEON-NOSVE:       // %bb.0:
+; NEON-NOSVE-NEXT:    shl v1.2s, v1.2s, #16
+; NEON-NOSVE-NEXT:    shl v0.2s, v0.2s, #16
+; NEON-NOSVE-NEXT:    ptrue p0.s, vl2
+; NEON-NOSVE-NEXT:    sshr v1.2s, v1.2s, #16
+; NEON-NOSVE-NEXT:    sshr v0.2s, v0.2s, #16
+; NEON-NOSVE-NEXT:    sdiv z0.s, p0/m, z0.s, z1.s
+; NEON-NOSVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
+; NEON-NOSVE-NEXT:    ret
+;
 ; NONEON-NOSVE-LABEL: sdiv_v2i16:
 ; NONEON-NOSVE:       // %bb.0:
 ; NONEON-NOSVE-NEXT:    sub sp, sp, #32
@@ -504,6 +616,15 @@ define <4 x i16> @sdiv_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
+; NEON-NOSVE-LABEL: sdiv_v4i16:
+; NEON-NOSVE:       // %bb.0:
+; NEON-NOSVE-NEXT:    sshll v1.4s, v1.4h, #0
+; NEON-NOSVE-NEXT:    sshll v0.4s, v0.4h, #0
+; NEON-NOSVE-NEXT:    ptrue p0.s, vl4
+; NEON-NOSVE-NEXT:    sdiv z0.s, p0/m, z0.s, z1.s
+; NEON-NOSVE-NEXT:    xtn v0.4h, v0.4s
+; NEON-NOSVE-NEXT:    ret
+;
 ; NONEON-NOSVE-LABEL: sdiv_v4i16:
 ; NONEON-NOSVE:       // %bb.0:
 ; NONEON-NOSVE-NEXT:    sub sp, sp, #32
@@ -553,6 +674,18 @@ define <8 x i16> @sdiv_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
+; NEON-NOSVE-LABEL: sdiv_v8i16:
+; NEON-NOSVE:       // %bb.0:
+; NEON-NOSVE-NEXT:    sshll2 v2.4s, v1.8h, #0
+; NEON-NOSVE-NEXT:    sshll2 v3.4s, v0.8h, #0
+; NEON-NOSVE-NEXT:    sshll v1.4s, v1.4h, #0
+; NEON-NOSVE-NEXT:    sshll v0.4s, v0.4h, #0
+; NEON-NOSVE-NEXT:    ptrue p0.s, vl4
+; NEON-NOSVE-NEXT:    sdivr z2.s, p0/m, z2.s, z3.s
+; NEON-NOSVE-NEXT:    sdiv z0.s, p0/m, z0.s, z1.s
+; NEON-NOSVE-NEXT:    uzp1 v0.8h, v0.8h, v2.8h
+; NEON-NOSVE-NEXT:    ret
+;
 ; NONEON-NOSVE-LABEL: sdiv_v8i16:
 ; NONEON-NOSVE:       // %bb.0:
 ; NONEON-NOSVE-NEXT:    stp q0, q1, [sp, #-48]!
@@ -629,6 +762,29 @@ define void @sdiv_v16i16(ptr %a, ptr %b) {
 ; CHECK-NEXT:    stp q1, q2, [x0]
 ; CHECK-NEXT:    ret
 ;
+; NEON-NOSVE-LABEL: sdiv_v16i16:
+; NEON-NOSVE:       // %bb.0:
+; NEON-NOSVE-NEXT:    ldp q4, q1, [x1]
+; NEON-NOSVE-NEXT:    ptrue p0.s, vl4
+; NEON-NOSVE-NEXT:    ldr q0, [x0, #16]
+; NEON-NOSVE-NEXT:    sshll2 v2.4s, v1.8h, #0
+; NEON-NOSVE-NEXT:    sshll2 v3.4s, v0.8h, #0
+; NEON-NOSVE-NEXT:    sshll2 v5.4s, v4.8h, #0
+; NEON-NOSVE-NEXT:    sshll v4.4s, v4.4h, #0
+; NEON-NOSVE-NEXT:    sshll v1.4s, v1.4h, #0
+; NEON-NOSVE-NEXT:    sshll v0.4s, v0.4h, #0
+; NEON-NOSVE-NEXT:    sdivr z2.s, p0/m, z2.s, z3.s
+; NEON-NOSVE-NEXT:    ldr q3, [x0]
+; NEON-NOSVE-NEXT:    sshll2 v6.4s, v3.8h, #0
+; NEON-NOSVE-NEXT:    sshll v3.4s, v3.4h, #0
+; NEON-NOSVE-NEXT:    sdivr z5.s, p0/m, z5.s, z6.s
+; NEON-NOSVE-NEXT:    sdiv z3.s, p0/m, z3.s, z4.s
+; NEON-NOSVE-NEXT:    sdiv z0.s, p0/m, z0.s, z1.s
+; NEON-NOSVE-NEXT:    uzp1 v1.8h, v3.8h, v5.8h
+; NEON-NOSVE-NEXT:    uzp1 v0.8h, v0.8h, v2.8h
+; NEON-NOSVE-NEXT:    stp q1, q0, [x0]
+; NEON-NOSVE-NEXT:    ret
+;
 ; NONEON-NOSVE-LABEL: sdiv_v16i16:
 ; NONEON-NOSVE:       // %bb.0:
 ; NONEON-NOSVE-NEXT:    sub sp, sp, #96
@@ -722,6 +878,15 @@ define <2 x i32> @sdiv_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
+; NEON-NOSVE-LABEL: sdiv_v2i32:
+; NEON-NOSVE:       // %bb.0:
+; NEON-NOSVE-NEXT:    ptrue p0.s, vl2
+; NEON-NOSVE-NEXT:    // kill: def $d0 killed $d0 def $z0
+; NEON-NOSVE-NEXT:    // kill: def $d1 killed $d1 def $z1
+; NEON-NOSVE-NEXT:    sdiv z0.s, p0/m, z0.s, z1.s
+; NEON-NOSVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
+; NEON-NOSVE-NEXT:    ret
+;
 ; NONEON-NOSVE-LABEL: sdiv_v2i32:
 ; NONEON-NOSVE:       // %bb.0:
 ; NONEON-NOSVE-NEXT:    sub sp, sp, #32
@@ -750,6 +915,15 @@ define <4 x i32> @sdiv_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
 ; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
+; NEON-NOSVE-LABEL: sdiv_v4i32:
+; NEON-NOSVE:       // %bb.0:
+; NEON-NOSVE-NEXT:    ptrue p0.s, vl4
+; NEON-NOSVE-NEXT:    // kill: def $q0 killed $q0 def $z0
+; NEON-NOSVE-NEXT:    // kill: def $q1 killed $q1 def $z1
+; NEON-NOSVE-NEXT:    sdiv z0.s, p0/m, z0.s, z1.s
+; NEON-NOSVE-NEXT:    // kill: def $q0 killed $q0 killed $z0
+; NEON-NOSVE-NEXT:    ret
+;
 ; NONEON-NOSVE-LABEL: sdiv_v4i32:
 ; NONEON-NOSVE:       // %bb.0:
 ; NONEON-NOSVE-NEXT:    stp q0, q1, [sp, #-48]!
@@ -785,6 +959,17 @@ define void @sdiv_v8i32(ptr %a, ptr %b)  {
 ; CHECK-NEXT:    stp q0, q1, [x0]
 ; CHECK-NEXT:    ret
 ;
+; NEON-NOSVE-LABEL: sdiv_v8i32:
+; NEON-NOSVE:       // %bb.0:
+; NEON-NOSVE-NEXT:    ldp q0, q3, [x1]
+; NEON-NOSVE-NEXT:    ptrue p0.s, vl4
+; NEON-NOSVE-NEXT:    ldp q1, q2, [x0]
+; NEON-NOSVE-NEXT:    sdivr z0.s, p0/m, z0.s, z1.s
+; NEON-NOSVE-NEXT:    movprfx z1, z2
+; NEON-NOSVE-NEXT:    sdiv z1.s, p0/m, z1.s, z3.s
+; NEON-NOSVE-NEXT:    stp q0, q1, [x0]
+; NEON-NOSVE-NEXT:    ret
+;
 ; NONEON-NOSVE-LABEL: sdiv_v8i32:
 ; NONEON-NOSVE:       // %bb.0:
 ; NONEON-NOSVE-NEXT:    sub sp, sp, #96
@@ -838,6 +1023,15 @@ define <1 x i64> @sdiv_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
+; NEON-NOSVE-LABEL: sdiv_v1i64:
+; NEON-NOSVE:       // %bb.0:
+; NEON-NOSVE-NEXT:    ptrue p0.d, vl1
+; NEON-NOSVE-NEXT:    // kill: def $d0 killed $d0 def $z0
+; NEON-NOSVE-NEXT:    // kill: def $d1 killed $d1 def $z1
+; NEON-NOSVE-NEXT:    sdiv z0.d, p0/m, z0.d, z1.d
+; NEON-NOSVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
+; NEON-NOSVE-NEXT:    ret
+;
 ; NONEON-NOSVE-LABEL: sdiv_v1i64:
 ; NONEON-NOSVE:       // %bb.0:
 ; NONEON-NOSVE-NEXT:    sub sp, sp, #16
@@ -863,6 +1057,15 @@ define <2 x i64> @sdiv_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
 ; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
+; NEON-NOSVE-LABEL: sdiv_v2i64:
+; NEON-NOSVE:       // %bb.0:
+; NEON-NOSVE-NEXT:    ptrue p0.d, vl2
+; NEON-NOSVE-NEXT:    // kill: def $q0 killed $q0 def $z0
+; NEON-NOSVE-NEXT:    // kill: def $q1 killed $q1 def $z1
+; NEON-NOSVE-NEXT:    sdiv z0.d, p0/m, z0.d, z1.d
+; NEON-NOSVE-NEXT:    // kill: def $q0 killed $q0 killed $z0
+; NEON-NOSVE-NEXT:    ret
+;
 ; NONEON-NOSVE-LABEL: sdiv_v2i64:
 ; NONEON-NOSVE:       // %bb.0:
 ; NONEON-NOSVE-NEXT:    stp q0, q1, [sp, #-48]!
@@ -892,6 +1095,17 @@ define void @sdiv_v4i64(ptr %a, ptr %b)  {
 ; CHECK-NEXT:    stp q0, q1, [x0]
 ; CHECK-NEXT:    ret
 ;
+; NEON-NOSVE-LABEL: sdiv_v4i64:
+; NEON-NOSVE:       // %bb.0:
+; NEON-NOSVE-NEXT:    ldp q0, q3, [x1]
+; NEON-NOSVE-NEXT:    ptrue p0.d, vl2
+; NEON-NOSVE-NEXT:    ldp q1, q2, [x0]
+; NEON-NOSVE-NEXT:    sdivr z0.d, p0/m, z0.d, z1.d
+; NEON-NOSVE-NEXT:    movprfx z1, z2
+; NEON-NOSVE-NEXT:    sdiv z1.d, p0/m, z1.d, z3.d
+; NEON-NOSVE-NEXT:    stp q0, q1, [x0]
+; NEON-NOSVE-NEXT:    ret
+;
 ; NONEON-NOSVE-LABEL: sdiv_v4i64:
 ; NONEON-NOSVE:       // %bb.0:
 ; NONEON-NOSVE-NEXT:    sub sp, sp, #96
@@ -942,6 +1156,17 @@ define <4 x i8> @udiv_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
+; NEON-NOSVE-LABEL: udiv_v4i8:
+; NEON-NOSVE:       // %bb.0:
+; NEON-NOSVE-NEXT:    bic v0.4h, #255, lsl #8
+; NEON-NOSVE-NEXT:    bic v1.4h, #255, lsl #8
+; NEON-NOSVE-NEXT:    ptrue p0.s, vl4
+; NEON-NOSVE-NEXT:    ushll v1.4s, v1.4h, #0
+; NEON-NOSVE-NEXT:    ushll v0.4s, v0.4h, #0
+; NEON-NOSVE-NEXT:    udiv z0.s, p0/m, z0.s, z1.s
+; NEON-NOSVE-NEXT:    xtn v0.4h, v0.4s
+; NEON-NOSVE-NEXT:    ret
+;
 ; NONEON-NOSVE-LABEL: udiv_v4i8:
 ; NONEON-NOSVE:       // %bb.0:
 ; NONEON-NOSVE-NEXT:    sub sp, sp, #32
@@ -994,6 +1219,21 @@ define <8 x i8> @udiv_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
+; NEON-NOSVE-LABEL: udiv_v8i8:
+; NEON-NOSVE:       // %bb.0:
+; NEON-NOSVE-NEXT:    ushll v1.8h, v1.8b, #0
+; NEON-NOSVE-NEXT:    ushll v0.8h, v0.8b, #0
+; NEON-NOSVE-NEXT:    ptrue p0.s, vl4
+; NEON-NOSVE-NEXT:    ushll2 v2.4s, v1.8h, #0
+; NEON-NOSVE-NEXT:    ushll2 v3.4s, v0.8h, #0
+; NEON-NOSVE-NEXT:    ushll v1.4s, v1.4h, #0
+; NEON-NOSVE-NEXT:    ushll v0.4s, v0.4h, #0
+; NEON-NOSVE-NEXT:    udivr z2.s, p0/m, z2.s, z3.s
+; NEON-NOSVE-NEXT:    udiv z0.s, p0/m, z0.s, z1.s
+; NEON-NOSVE-NEXT:    uzp1 v0.8h, v0.8h, v2.8h
+; NEON-NOSVE-NEXT:    xtn v0.8b, v0.8h
+; NEON-NOSVE-NEXT:    ret
+;
 ; NONEON-NOSVE-LABEL: udiv_v8i8:
 ; NONEON-NOSVE:       // %bb.0:
 ; NONEON-NOSVE-NEXT:    sub sp, sp, #32
@@ -1080,6 +1320,30 @@ define <16 x i8> @udiv_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
+; NEON-NOSVE-LABEL: udiv_v16i8:
+; NEON-NOSVE:       // %bb.0:
+; NEON-NOSVE-NEXT:    ushll2 v2.8h, v1.16b, #0
+; NEON-NOSVE-NEXT:    ushll2 v3.8h, v0.16b, #0
+; NEON-NOSVE-NEXT:    ushll v1.8h, v1.8b, #0
+; NEON-NOSVE-NEXT:    ushll v0.8h, v0.8b, #0
+; NEON-NOSVE-NEXT:    ptrue p0.s, vl4
+; NEON-NOSVE-NEXT:    ushll2 v4.4s, v2.8h, #0
+; NEON-NOSVE-NEXT:    ushll2 v5.4s, v3.8h, #0
+; NEON-NOSVE-NEXT:    ushll v2.4s, v2.4h, #0
+; NEON-NOSVE-NEXT:    ushll v3.4s, v3.4h, #0
+; NEON-NOSVE-NEXT:    udivr z4.s, p0/m, z4.s, z5.s
+; NEON-NOSVE-NEXT:    ushll2 v5.4s, v0.8h, #0
+; NEON-NOSVE-NEXT:    ushll v0.4s, v0.4h, #0
+; NEON-NOSVE-NEXT:    udivr z2.s, p0/m, z2.s, z3.s
+; NEON-NOSVE-NEXT:    ushll2 v3.4s, v1.8h, #0
+; NEON-NOSVE-NEXT:    ushll v1.4s, v1.4h, #0
+; NEON-NOSVE-NEXT:    udivr z3.s, p0/m, z3.s, z5.s
+; NEON-NOSVE-NEXT:    udiv z0.s, p0/m, z0.s, z1.s
+; NEON-NOSVE-NEXT:    uzp1 v1.8h, v2.8h, v4.8h
+; NEON-NOSVE-NEXT:    uzp1 v0.8h, v0.8h, v3.8h
+; NEON-NOSVE-NEXT:    uzp1 v0.16b, v0.16b, v1.16b
+; NEON-NOSVE-NEXT:    ret
+;
 ; NONEON-NOSVE-LABEL: udiv_v16i8:
 ; NONEON-NOSVE:       // %bb.0:
 ; NONEON-NOSVE-NEXT:    stp q0, q1, [sp, #-48]!
@@ -1229,6 +1493,53 @@ define void @udiv_v32i8(ptr %a, ptr %b) {
 ; CHECK-NEXT:    stp q1, q0, [x0]
 ; CHECK-NEXT:    ret
 ;
+; NEON-NOSVE-LABEL: udiv_v32i8:
+; NEON-NOSVE:       // %bb.0:
+; NEON-NOSVE-NEXT:    ldp q6, q3, [x1]
+; NEON-NOSVE-NEXT:    ptrue p0.s, vl4
+; NEON-NOSVE-NEXT:    ldr q2, [x0, #16]
+; NEON-NOSVE-NEXT:    ushll2 v1.8h, v3.16b, #0
+; NEON-NOSVE-NEXT:    ushll2 v4.8h, v2.16b, #0
+; NEON-NOSVE-NEXT:    ushll v3.8h, v3.8b, #0
+; NEON-NOSVE-NEXT:    ushll v2.8h, v2.8b, #0
+; NEON-NOSVE-NEXT:    ushll2 v7.8h, v6.16b, #0
+; NEON-NOSVE-NEXT:    ushll v6.8h, v6.8b, #0
+; NEON-NOSVE-NEXT:    ushll2 v0.4s, v1.8h, #0
+; NEON-NOSVE-NEXT:    ushll2 v5.4s, v4.8h, #0
+; NEON-NOSVE-NEXT:    ushll v1.4s, v1.4h, #0
+; NEON-NOSVE-NEXT:    ushll v4.4s, v4.4h, #0
+; NEON-NOSVE-NEXT:    ushll2 v17.4s, v7.8h, #0
+; NEON-NOSVE-NEXT:    ushll v7.4s, v7.4h, #0
+; NEON-NOSVE-NEXT:    udivr z0.s, p0/m, z0.s, z5.s
+; NEON-NOSVE-NEXT:    ushll2 v5.4s, v2.8h, #0
+; NEON-NOSVE-NEXT:    ushll v2.4s, v2.4h, #0
+; NEON-NOSVE-NEXT:    udivr z1.s, p0/m, z1.s, z4.s
+; NEON-NOSVE-NEXT:    ushll2 v4.4s, v3.8h, #0
+; NEON-NOSVE-NEXT:    ushll v3.4s, v3.4h, #0
+; NEON-NOSVE-NEXT:    udivr z4.s, p0/m, z4.s, z5.s
+; NEON-NOSVE-NEXT:    ldr q5, [x0]
+; NEON-NOSVE-NEXT:    ushll2 v16.8h, v5.16b, #0
+; NEON-NOSVE-NEXT:    ushll v5.8h, v5.8b, #0
+; NEON-NOSVE-NEXT:    uzp1 v0.8h, v1.8h, v0.8h
+; NEON-NOSVE-NEXT:    ushll2 v18.4s, v16.8h, #0
+; NEON-NOSVE-NEXT:    ushll v16.4s, v16.4h, #0
+; NEON-NOSVE-NEXT:    udivr z17.s, p0/m, z17.s, z18.s
+; NEON-NOSVE-NEXT:    ushll2 v18.4s, v5.8h, #0
+; NEON-NOSVE-NEXT:    ushll v5.4s, v5.4h, #0
+; NEON-NOSVE-NEXT:    udivr z7.s, p0/m, z7.s, z16.s
+; NEON-NOSVE-NEXT:    ushll2 v16.4s, v6.8h, #0
+; NEON-NOSVE-NEXT:    ushll v6.4s, v6.4h, #0
+; NEON-NOSVE-NEXT:    udivr z16.s, p0/m, z16.s, z18.s
+; NEON-NOSVE-NEXT:    udiv z5.s, p0/m, z5.s, z6.s
+; NEON-NOSVE-NEXT:    udiv z2.s, p0/m, z2.s, z3.s
+; NEON-NOSVE-NEXT:    uzp1 v3.8h, v7.8h, v17.8h
+; NEON-NOSVE-NEXT:    uzp1 v5.8h, v5.8h, v16.8h
+; NEON-NOSVE-NEXT:    uzp1 v1.8h, v2.8h, v4.8h
+; NEON-NOSVE-NEXT:    uzp1 v2.16b, v5.16b, v3.16b
+; NEON-NOSVE-NEXT:    uzp1 v0.16b, v1.16b, v0.16b
+; NEON-NOSVE-NEXT:    stp q2, q0, [x0]
+; NEON-NOSVE-NEXT:    ret
+;
 ; NONEON-NOSVE-LABEL: udiv_v32i8:
 ; NONEON-NOSVE:       // %bb.0:
 ; NONEON-NOSVE-NEXT:    sub sp, sp, #96
@@ -1388,6 +1699,16 @@ define <2 x i16> @udiv_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
+; NEON-NOSVE-LABEL: udiv_v2i16:
+; NEON-NOSVE:       // %bb.0:
+; NEON-NOSVE-NEXT:    movi d2, #0x00ffff0000ffff
+; NEON-NOSVE-NEXT:    ptrue p0.s, vl2
+; NEON-NOSVE-NEXT:    and v1.8b, v1.8b, v2.8b
+; NEON-NOSVE-NEXT:    and v0.8b, v0.8b, v2.8b
+; NEON-NOSVE-NEXT:    udiv z0.s, p0/m, z0.s, z1.s
+; NEON-NOSVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
+; NEON-NOSVE-NEXT:    ret
+;
 ; NONEON-NOSVE-LABEL: udiv_v2i16:
 ; NONEON-NOSVE:       // %bb.0:
 ; NONEON-NOSVE-NEXT:    sub sp, sp, #32
@@ -1420,6 +1741,15 @@ define <4 x i16> @udiv_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
+; NEON-NOSVE-LABEL: udiv_v4i16:
+; NEON-NOSVE:       // %bb.0:
+; NEON-NOSVE-NEXT:    ushll v1.4s, v1.4h, #0
+; NEON-NOSVE-NEXT:    ushll v0.4s, v0.4h, #0
+; NEON-NOSVE-NEXT:    ptrue p0.s, vl4
+; NEON-NOSVE-NEXT:    udiv z0.s, p0/m, z0.s, z1.s
+; NEON-NOSVE-NEXT:    xtn v0.4h, v0.4s
+; NEON-NOSVE-NEXT:    ret
+;
 ; NONEON-NOSVE-LABEL: udiv_v4i16:
 ; NONEON-NOSVE:       // %bb.0:
 ; NONEON-NOSVE-NEXT:    sub sp, sp, #32
@@ -1469,6 +1799,18 @@ define <8 x i16> @udiv_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
+; NEON-NOSVE-LABEL: udiv_v8i16:
+; NEON-NOSVE:       // %bb.0:
+; NEON-NOSVE-NEXT:    ushll2 v2.4s, v1.8h, #0
+; NEON-NOSVE-NEXT:    ushll2 v3.4s, v0.8h, #0
+; NEON-NOSVE-NEXT:    ushll v1.4s, v1.4h, #0
+; NEON-NOSVE-NEXT:    ushll v0.4s, v0.4h, #0
+; NEON-NOSVE-NEXT:    ptrue p0.s, vl4
+; NEON-NOSVE-NEXT:    udivr z2.s, p0/m, z2.s, z3.s
+; NEON-NOSVE-NEXT:    udiv z0.s, p0/m, z0.s, z1.s
+; NEON-NOSVE-NEXT:    uzp1 v0.8h, v0.8h, v2.8h
+; NEON-NOSVE-NEXT:    ret
+;
 ; NONEON-NOSVE-LABEL: udiv_v8i16:
 ; NONEON-NOSVE:       // %bb.0:
 ; NONEON-NOSVE-NEXT:    stp q0, q1, [sp, #-48]!
@@ -1545,6 +1887,29 @@ define void @udiv_v16i16(ptr %a, ptr %b) {
 ; CHECK-NEXT:    stp q1, q2, [x0]
 ; CHECK-NEXT:    ret
 ;
+; NEON-NOSVE-LABEL: udiv_v16i16:
+; NEON-NOSVE:       // %bb.0:
+; NEON-NOSVE-NEXT:    ldp q4, q1, [x1]
+; NEON-NOSVE-NEXT:    ptrue p0.s, vl4
+; NEON-NOSVE-NEXT:    ldr q0, [x0, #16]
+; NEON-NOSVE-NEXT:    ushll2 v2.4s, v1.8h, #0
+; NEON-NOSVE-NEXT:    ushll2 v3.4s, v0.8h, #0
+; NEON-NOSVE-NEXT:    ushll2 v5.4s, v4.8h, #0
+; NEON-NOSVE-NEXT:    ushll v4.4s, v4.4h, #0
+; NEON-NOSVE-NEXT:    ushll v1.4s, v1.4h, #0
+; NEON-NOSVE-NEXT:    ushll v0.4s, v0.4h, #0
+; NEON-NOSVE-NEXT:    udivr z2.s, p0/m, z2.s, z3.s
+; NEON-NOSVE-NEXT:    ldr q3, [x0]
+; NEON-NOSVE-NEXT:    ushll2 v6.4s, v3.8h, #0
+; NEON-NOSVE-NEXT:    ushll v3.4s, v3.4h, #0
+; NEON-NOSVE-NEXT:    udivr z5.s, p0/m, z5.s, z6.s
+; NEON-NOSVE-NEXT:    udiv z3.s, p0/m, z3.s, z4.s
+; NEON-NOSVE-NEXT:    udiv z0.s, p0/m, z0.s, z1.s
+; NEON-NOSVE-NEXT:    uzp1 v1.8h, v3.8h, v5.8h
+; NEON-NOSVE-NEXT:    uzp1 v0.8h, v0.8h, v2.8h
+; NEON-NOSVE-NEXT:    stp q1, q0, [x0]
+; NEON-NOSVE-NEXT:    ret
+;
 ; NONEON-NOSVE-LABEL: udiv_v16i16:
 ; NONEON-NOSVE:       // %bb.0:
 ; NONEON-NOSVE-NEXT:    sub sp, sp, #96
@@ -1638,6 +2003,15 @@ define <2 x i32> @udiv_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
+; NEON-NOSVE-LABEL: udiv_v2i32:
+; NEON-NOSVE:       // %bb.0:
+; NEON-NOSVE-NEXT:    ptrue p0.s, vl2
+; NEON-NOSVE-NEXT:    // kill: def $d0 killed $d0 def $z0
+; NEON-NOSVE-NEXT:    // kill: def $d1 killed $d1 def $z1
+; NEON-NOSVE-NEXT:    udiv z0.s, p0/m, z0.s, z1.s
+; NEON-NOSVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
+; NEON-NOSVE-NEXT:    ret
+;
 ; NONEON-NOSVE-LABEL: udiv_v2i32:
 ; NONEON-NOSVE:       // %bb.0:
 ; NONEON-NOSVE-NEXT:    sub sp, sp, #32
@@ -1666,6 +2040,15 @@ define <4 x i32> @udiv_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
 ; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
+; NEON-NOSVE-LABEL: udiv_v4i32:
+; NEON-NOSVE:       // %bb.0:
+; NEON-NOSVE-NEXT:    ptrue p0.s, vl4
+; NEON-NOSVE-NEXT:    // kill: def $q0 killed $q0 def $z0
+; NEON-NOSVE-NEXT:    // kill: def $q1 killed $q1 def $z1
+; NEON-NOSVE-NEXT:    udiv z0.s, p0/m, z0.s, z1.s
+; NEON-NOSVE-NEXT:    // kill: def $q0 killed $q0 killed $z0
+; NEON-NOSVE-NEXT:    ret
+;
 ; NONEON-NOSVE-LABEL: udiv_v4i32:
 ; NONEON-NOSVE:       // %bb.0:
 ; NONEON-NOSVE-NEXT:    stp q0, q1, [sp, #-48]!
@@ -1701,6 +2084,17 @@ define void @udiv_v8i32(ptr %a, ptr %b)  {
 ; CHECK-NEXT:    stp q0, q1, [x0]
 ; CHECK-NEXT:    ret
 ;
+; NEON-NOSVE-LABEL: udiv_v8i32:
+; NEON-NOSVE:       // %bb.0:
+; NEON-NOSVE-NEXT:    ldp q0, q3, [x1]
+; NEON-NOSVE-NEXT:    ptrue p0.s, vl4
+; NEON-NOSVE-NEXT:    ldp q1, q2, [x0]
+; NEON-NOSVE-NEXT:    udivr z0.s, p0/m, z0.s, z1.s
+; NEON-NOSVE-NEXT:    movprfx z1, z2
+; NEON-NOSVE-NEXT:    udiv z1.s, p0/m, z1.s, z3.s
+; NEON-NOSVE-NEXT:    stp q0, q1, [x0]
+; NEON-NOSVE-NEXT:    ret
+;
 ; NONEON-NOSVE-LABEL: udiv_v8i32:
 ; NONEON-NOSVE:       // %bb.0:
 ; NONEON-NOSVE-NEXT:    sub sp, sp, #96
@@ -1754,6 +2148,15 @@ define <1 x i64> @udiv_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
+; NEON-NOSVE-LABEL: udiv_v1i64:
+; NEON-NOSVE:       // %bb.0:
+; NEON-NOSVE-NEXT:    ptrue p0.d, vl1
+; NEON-NOSVE-NEXT:    // kill: def $d0 killed $d0 def $z0
+; NEON-NOSVE-NEXT:    // kill: def $d1 killed $d1 def $z1
+; NEON-NOSVE-NEXT:    udiv z0.d, p0/m, z0.d, z1.d
+; NEON-NOSVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
+; NEON-NOSVE-NEXT:    ret
+;
 ; NONEON-NOSVE-LABEL: udiv_v1i64:
 ; NONEON-NOSVE:       // %bb.0:
 ; NONEON-NOSVE-NEXT:    sub sp, sp, #16
@@ -1779,6 +2182,15 @@ define <2 x i64> @udiv_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
 ; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
+; NEON-NOSVE-LABEL: udiv_v2i64:
+; NEON-NOSVE:       // %bb.0:
+; NEON-NOSVE-NEXT:    ptrue p0.d, vl2
+; NEON-NOSVE-NEXT:    // kill: def $q0 killed $q0 def $z0
+; NEON-NOSVE-NEXT:    // kill: def $q1 killed $q1 def $z1
+; NEON-NOSVE-NEXT:    udiv z0.d, p0/m, z0.d, z1.d
+; NEON-NOSVE-NEXT:    // kill: def $q0 killed $q0 killed $z0
+; NEON-NOSVE-NEXT:    ret
+;
 ; NONEON-NOSVE-LABEL: udiv_v2i64:
 ; NONEON-NOSVE:       // %bb.0:
 ; NONEON-NOSVE-NEXT:    stp q0, q1, [sp, #-48]!
@@ -1808,6 +2220,17 @@ define void @udiv_v4i64(ptr %a, ptr %b)  {
 ; CHECK-NEXT:    stp q0, q1, [x0]
 ; CHECK-NEXT:    ret
 ;
+; NEON-NOSVE-LABEL: udiv_v4i64:
+; NEON-NOSVE:       // %bb.0:
+; NEON-NOSVE-NEXT:    ldp q0, q3, [x1]
+; NEON-NOSVE-NEXT:    ptrue p0.d, vl2
+; NEON-NOSVE-NEXT:    ldp q1, q2, [x0]
+; NEON-NOSVE-NEXT:    udivr z0.d, p0/m, z0.d, z1.d
+; NEON-NOSVE-NEXT:    movprfx z1, z2
+; NEON-NOSVE-NEXT:    udiv z1.d, p0/m, z1.d, z3.d
+; NEON-NOSVE-NEXT:    stp q0, q1, [x0]
+; NEON-NOSVE-NEXT:    ret
+;
 ; NONEON-NOSVE-LABEL: udiv_v4i64:
 ; NONEON-NOSVE:       // %bb.0:
 ; NONEON-NOSVE-NEXT:    sub sp, sp, #96
@@ -1878,6 +2301,27 @@ define void @udiv_constantsplat_v8i32(ptr %a)  {
 ; SVE2-NEXT:    stp q1, q0, [x0]
 ; SVE2-NEXT:    ret
 ;
+; NEON-NOSVE-LABEL: udiv_constantsplat_v8i32:
+; NEON-NOSVE:       // %bb.0:
+; NEON-NOSVE-NEXT:    mov w8, #8969 // =0x2309
+; NEON-NOSVE-NEXT:    ldp q1, q2, [x0]
+; NEON-NOSVE-NEXT:    movk w8, #22765, lsl #16
+; NEON-NOSVE-NEXT:    dup v0.4s, w8
+; NEON-NOSVE-NEXT:    umull2 v3.2d, v1.4s, v0.4s
+; NEON-NOSVE-NEXT:    umull v4.2d, v1.2s, v0.2s
+; NEON-NOSVE-NEXT:    umull2 v5.2d, v2.4s, v0.4s
+; NEON-NOSVE-NEXT:    umull v0.2d, v2.2s, v0.2s
+; NEON-NOSVE-NEXT:    uzp2 v3.4s, v4.4s, v3.4s
+; NEON-NOSVE-NEXT:    uzp2 v0.4s, v0.4s, v5.4s
+; NEON-NOSVE-NEXT:    sub v1.4s, v1.4s, v3.4s
+; NEON-NOSVE-NEXT:    sub v2.4s, v2.4s, v0.4s
+; NEON-NOSVE-NEXT:    usra v3.4s, v1.4s, #1
+; NEON-NOSVE-NEXT:    usra v0.4s, v2.4s, #1
+; NEON-NOSVE-NEXT:    ushr v1.4s, v3.4s, #6
+; NEON-NOSVE-NEXT:    ushr v0.4s, v0.4s, #6
+; NEON-NOSVE-NEXT:    stp q1, q0, [x0]
+; NEON-NOSVE-NEXT:    ret
+;
 ; NONEON-NOSVE-LABEL: udiv_constantsplat_v8i32:
 ; NONEON-NOSVE:       // %bb.0:
 ; NONEON-NOSVE-NEXT:    ldp q1, q0, [x0]

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mul.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mul.ll
new file mode 100644
index 0000000000000..ba479fc3bbe2d
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mul.ll
@@ -0,0 +1,37 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mattr=+sve2 -force-streaming-compatible  < %s | FileCheck %s --check-prefix=SVE2
+; FIXME: We shouldn't ever be emitting any SVE instructions when +sme is set but the function is not in streaming mode.
+; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s --check-prefix=SVE2
+; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
+
+target triple = "aarch64-unknown-linux-gnu"
+
+;
+; MUL
+;
+
+define <2 x i64> @mul_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
+; SVE2-LABEL: mul_v2i64:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
+; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
+; SVE2-NEXT:    mul z0.d, z0.d, z1.d
+; SVE2-NEXT:    // kill: def $q0 killed $q0 killed $z0
+; SVE2-NEXT:    ret
+;
+; NONEON-NOSVE-LABEL: mul_v2i64:
+; NONEON-NOSVE:       // %bb.0:
+; NONEON-NOSVE-NEXT:    stp q0, q1, [sp, #-48]!
+; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 48
+; NONEON-NOSVE-NEXT:    ldp x9, x10, [sp]
+; NONEON-NOSVE-NEXT:    ldr x8, [sp, #24]
+; NONEON-NOSVE-NEXT:    mul x11, x10, x8
+; NONEON-NOSVE-NEXT:    ldr x8, [sp, #16]
+; NONEON-NOSVE-NEXT:    mul x8, x9, x8
+; NONEON-NOSVE-NEXT:    stp x8, x11, [sp, #32]
+; NONEON-NOSVE-NEXT:    ldr q0, [sp, #32]
+; NONEON-NOSVE-NEXT:    add sp, sp, #48
+; NONEON-NOSVE-NEXT:    ret
+  %res = mul <2 x i64> %op1, %op2
+  ret <2 x i64> %res
+}

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-gather-scatter.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-gather-scatter.ll
new file mode 100644
index 0000000000000..75c4536c97306
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-gather-scatter.ll
@@ -0,0 +1,246 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mattr=+sve < %s | FileCheck %s --check-prefix=SVE
+; FIXME: We shouldn't ever be emitting any SVE instructions when +sme is set but the function is not in streaming mode.
+; RUN: llc -mattr=+sme < %s | FileCheck %s --check-prefix=SME
+; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-SVE-NOGATHER
+; RUN: llc -mattr=+sme -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
+
+
+target triple = "aarch64-unknown-linux-gnu"
+
+define <2 x i64> @masked_gather_v2i64(ptr %a, ptr %b) vscale_range(2, 2) {
+; SVE-LABEL: masked_gather_v2i64:
+; SVE:       // %bb.0:
+; SVE-NEXT:    ldr q0, [x0]
+; SVE-NEXT:    ptrue p0.d, vl2
+; SVE-NEXT:    cmeq v0.2d, v0.2d, #0
+; SVE-NEXT:    cmpne p0.d, p0/z, z0.d, #0
+; SVE-NEXT:    ldr q0, [x1]
+; SVE-NEXT:    ld1d { z0.d }, p0/z, [z0.d]
+; SVE-NEXT:    // kill: def $q0 killed $q0 killed $z0
+; SVE-NEXT:    ret
+;
+; SME-LABEL: masked_gather_v2i64:
+; SME:       // %bb.0:
+; SME-NEXT:    ldr q0, [x0]
+; SME-NEXT:    adrp x8, .LCPI0_0
+; SME-NEXT:    ldr q1, [x8, :lo12:.LCPI0_0]
+; SME-NEXT:    cmeq v0.2d, v0.2d, #0
+; SME-NEXT:    and v0.16b, v0.16b, v1.16b
+; SME-NEXT:    ldr q1, [x1]
+; SME-NEXT:    addp d0, v0.2d
+; SME-NEXT:    fmov x8, d0
+; SME-NEXT:    // implicit-def: $q0
+; SME-NEXT:    tbnz w8, #0, .LBB0_3
+; SME-NEXT:  // %bb.1: // %else
+; SME-NEXT:    tbnz w8, #1, .LBB0_4
+; SME-NEXT:  .LBB0_2: // %else2
+; SME-NEXT:    ret
+; SME-NEXT:  .LBB0_3: // %cond.load
+; SME-NEXT:    fmov x9, d1
+; SME-NEXT:    ldr d0, [x9]
+; SME-NEXT:    tbz w8, #1, .LBB0_2
+; SME-NEXT:  .LBB0_4: // %cond.load1
+; SME-NEXT:    mov x8, v1.d[1]
+; SME-NEXT:    ld1 { v0.d }[1], [x8]
+; SME-NEXT:    ret
+;
+; NONEON-SVE-NOGATHER-LABEL: masked_gather_v2i64:
+; NONEON-SVE-NOGATHER:       // %bb.0:
+; NONEON-SVE-NOGATHER-NEXT:    sub sp, sp, #16
+; NONEON-SVE-NOGATHER-NEXT:    .cfi_def_cfa_offset 16
+; NONEON-SVE-NOGATHER-NEXT:    ptrue p0.d, vl2
+; NONEON-SVE-NOGATHER-NEXT:    ldr q0, [x0]
+; NONEON-SVE-NOGATHER-NEXT:    cmpeq p1.d, p0/z, z0.d, #0
+; NONEON-SVE-NOGATHER-NEXT:    index z0.d, #1, #1
+; NONEON-SVE-NOGATHER-NEXT:    mov z1.d, p1/z, #-1 // =0xffffffffffffffff
+; NONEON-SVE-NOGATHER-NEXT:    and z0.d, z1.d, z0.d
+; NONEON-SVE-NOGATHER-NEXT:    ldr q1, [x1]
+; NONEON-SVE-NOGATHER-NEXT:    uaddv d0, p0, z0.d
+; NONEON-SVE-NOGATHER-NEXT:    ptrue p0.d
+; NONEON-SVE-NOGATHER-NEXT:    fmov x8, d0
+; NONEON-SVE-NOGATHER-NEXT:    strb w8, [sp, #12]
+; NONEON-SVE-NOGATHER-NEXT:    and w8, w8, #0xff
+; NONEON-SVE-NOGATHER-NEXT:    tbz w8, #0, .LBB0_2
+; NONEON-SVE-NOGATHER-NEXT:  // %bb.1: // %cond.load
+; NONEON-SVE-NOGATHER-NEXT:    fmov x9, d1
+; NONEON-SVE-NOGATHER-NEXT:    ld1rd { z0.d }, p0/z, [x9]
+; NONEON-SVE-NOGATHER-NEXT:    tbnz w8, #1, .LBB0_3
+; NONEON-SVE-NOGATHER-NEXT:    b .LBB0_4
+; NONEON-SVE-NOGATHER-NEXT:  .LBB0_2:
+; NONEON-SVE-NOGATHER-NEXT:    adrp x9, .LCPI0_0
+; NONEON-SVE-NOGATHER-NEXT:    ldr q0, [x9, :lo12:.LCPI0_0]
+; NONEON-SVE-NOGATHER-NEXT:    tbz w8, #1, .LBB0_4
+; NONEON-SVE-NOGATHER-NEXT:  .LBB0_3: // %cond.load1
+; NONEON-SVE-NOGATHER-NEXT:    mov w8, #1 // =0x1
+; NONEON-SVE-NOGATHER-NEXT:    index z2.d, #0, #1
+; NONEON-SVE-NOGATHER-NEXT:    mov z1.d, z1.d[1]
+; NONEON-SVE-NOGATHER-NEXT:    mov z3.d, x8
+; NONEON-SVE-NOGATHER-NEXT:    fmov x8, d1
+; NONEON-SVE-NOGATHER-NEXT:    cmpeq p0.d, p0/z, z2.d, z3.d
+; NONEON-SVE-NOGATHER-NEXT:    ldr x8, [x8]
+; NONEON-SVE-NOGATHER-NEXT:    mov z0.d, p0/m, x8
+; NONEON-SVE-NOGATHER-NEXT:  .LBB0_4: // %else2
+; NONEON-SVE-NOGATHER-NEXT:    // kill: def $q0 killed $q0 killed $z0
+; NONEON-SVE-NOGATHER-NEXT:    add sp, sp, #16
+; NONEON-SVE-NOGATHER-NEXT:    ret
+;
+; NONEON-NOSVE-LABEL: masked_gather_v2i64:
+; NONEON-NOSVE:       // %bb.0:
+; NONEON-NOSVE-NEXT:    sub sp, sp, #16
+; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT:    ptrue p0.d, vl2
+; NONEON-NOSVE-NEXT:    ldr q0, [x0]
+; NONEON-NOSVE-NEXT:    cmpeq p1.d, p0/z, z0.d, #0
+; NONEON-NOSVE-NEXT:    index z0.d, #1, #1
+; NONEON-NOSVE-NEXT:    mov z1.d, p1/z, #-1 // =0xffffffffffffffff
+; NONEON-NOSVE-NEXT:    and z0.d, z1.d, z0.d
+; NONEON-NOSVE-NEXT:    ldr q1, [x1]
+; NONEON-NOSVE-NEXT:    uaddv d0, p0, z0.d
+; NONEON-NOSVE-NEXT:    ptrue p0.d
+; NONEON-NOSVE-NEXT:    fmov x8, d0
+; NONEON-NOSVE-NEXT:    strb w8, [sp, #12]
+; NONEON-NOSVE-NEXT:    and w8, w8, #0xff
+; NONEON-NOSVE-NEXT:    tbz w8, #0, .LBB0_2
+; NONEON-NOSVE-NEXT:  // %bb.1: // %cond.load
+; NONEON-NOSVE-NEXT:    fmov x9, d1
+; NONEON-NOSVE-NEXT:    ld1rd { z0.d }, p0/z, [x9]
+; NONEON-NOSVE-NEXT:    tbnz w8, #1, .LBB0_3
+; NONEON-NOSVE-NEXT:    b .LBB0_4
+; NONEON-NOSVE-NEXT:  .LBB0_2:
+; NONEON-NOSVE-NEXT:    adrp x9, .LCPI0_0
+; NONEON-NOSVE-NEXT:    ldr q0, [x9, :lo12:.LCPI0_0]
+; NONEON-NOSVE-NEXT:    tbz w8, #1, .LBB0_4
+; NONEON-NOSVE-NEXT:  .LBB0_3: // %cond.load1
+; NONEON-NOSVE-NEXT:    mov w8, #1 // =0x1
+; NONEON-NOSVE-NEXT:    index z2.d, #0, #1
+; NONEON-NOSVE-NEXT:    mov z1.d, z1.d[1]
+; NONEON-NOSVE-NEXT:    mov z3.d, x8
+; NONEON-NOSVE-NEXT:    fmov x8, d1
+; NONEON-NOSVE-NEXT:    cmpeq p0.d, p0/z, z2.d, z3.d
+; NONEON-NOSVE-NEXT:    ldr x8, [x8]
+; NONEON-NOSVE-NEXT:    mov z0.d, p0/m, x8
+; NONEON-NOSVE-NEXT:  .LBB0_4: // %else2
+; NONEON-NOSVE-NEXT:    // kill: def $q0 killed $q0 killed $z0
+; NONEON-NOSVE-NEXT:    add sp, sp, #16
+; NONEON-NOSVE-NEXT:    ret
+  %vals = load <2 x i64>, ptr %a
+  %ptrs = load <2 x ptr>, ptr %b
+  %mask = icmp eq <2 x i64> %vals, zeroinitializer
+  %res = call <2 x i64> @llvm.masked.gather.v2i64(<2 x ptr> %ptrs, i32 8, <2 x i1> %mask, <2 x i64> poison)
+  ret <2 x i64> %res
+}
+
+define void @masked_scatter_v2i64(ptr %a, ptr %b) vscale_range(2, 2) {
+; SVE-LABEL: masked_scatter_v2i64:
+; SVE:       // %bb.0:
+; SVE-NEXT:    ldr q0, [x0]
+; SVE-NEXT:    ptrue p0.d, vl2
+; SVE-NEXT:    cmeq v1.2d, v0.2d, #0
+; SVE-NEXT:    cmpne p0.d, p0/z, z1.d, #0
+; SVE-NEXT:    ldr q1, [x1]
+; SVE-NEXT:    st1d { z0.d }, p0, [z1.d]
+; SVE-NEXT:    ret
+;
+; SME-LABEL: masked_scatter_v2i64:
+; SME:       // %bb.0:
+; SME-NEXT:    ldr q0, [x0]
+; SME-NEXT:    adrp x8, .LCPI1_0
+; SME-NEXT:    ldr q2, [x8, :lo12:.LCPI1_0]
+; SME-NEXT:    cmeq v1.2d, v0.2d, #0
+; SME-NEXT:    and v1.16b, v1.16b, v2.16b
+; SME-NEXT:    addp d2, v1.2d
+; SME-NEXT:    ldr q1, [x1]
+; SME-NEXT:    fmov x8, d2
+; SME-NEXT:    tbnz w8, #0, .LBB1_3
+; SME-NEXT:  // %bb.1: // %else
+; SME-NEXT:    tbnz w8, #1, .LBB1_4
+; SME-NEXT:  .LBB1_2: // %else2
+; SME-NEXT:    ret
+; SME-NEXT:  .LBB1_3: // %cond.store
+; SME-NEXT:    fmov x9, d1
+; SME-NEXT:    str d0, [x9]
+; SME-NEXT:    tbz w8, #1, .LBB1_2
+; SME-NEXT:  .LBB1_4: // %cond.store1
+; SME-NEXT:    mov x8, v1.d[1]
+; SME-NEXT:    st1 { v0.d }[1], [x8]
+; SME-NEXT:    ret
+;
+; NONEON-SVE-NOGATHER-LABEL: masked_scatter_v2i64:
+; NONEON-SVE-NOGATHER:       // %bb.0:
+; NONEON-SVE-NOGATHER-NEXT:    sub sp, sp, #16
+; NONEON-SVE-NOGATHER-NEXT:    .cfi_def_cfa_offset 16
+; NONEON-SVE-NOGATHER-NEXT:    ptrue p0.d, vl2
+; NONEON-SVE-NOGATHER-NEXT:    ldr q0, [x0]
+; NONEON-SVE-NOGATHER-NEXT:    index z1.d, #1, #1
+; NONEON-SVE-NOGATHER-NEXT:    cmpeq p1.d, p0/z, z0.d, #0
+; NONEON-SVE-NOGATHER-NEXT:    mov z2.d, p1/z, #-1 // =0xffffffffffffffff
+; NONEON-SVE-NOGATHER-NEXT:    and z1.d, z2.d, z1.d
+; NONEON-SVE-NOGATHER-NEXT:    uaddv d1, p0, z1.d
+; NONEON-SVE-NOGATHER-NEXT:    fmov x8, d1
+; NONEON-SVE-NOGATHER-NEXT:    ldr q1, [x1]
+; NONEON-SVE-NOGATHER-NEXT:    strb w8, [sp, #12]
+; NONEON-SVE-NOGATHER-NEXT:    and w8, w8, #0xff
+; NONEON-SVE-NOGATHER-NEXT:    tbnz w8, #0, .LBB1_3
+; NONEON-SVE-NOGATHER-NEXT:  // %bb.1: // %else
+; NONEON-SVE-NOGATHER-NEXT:    tbnz w8, #1, .LBB1_4
+; NONEON-SVE-NOGATHER-NEXT:  .LBB1_2: // %else2
+; NONEON-SVE-NOGATHER-NEXT:    add sp, sp, #16
+; NONEON-SVE-NOGATHER-NEXT:    ret
+; NONEON-SVE-NOGATHER-NEXT:  .LBB1_3: // %cond.store
+; NONEON-SVE-NOGATHER-NEXT:    fmov x9, d0
+; NONEON-SVE-NOGATHER-NEXT:    fmov x10, d1
+; NONEON-SVE-NOGATHER-NEXT:    str x9, [x10]
+; NONEON-SVE-NOGATHER-NEXT:    tbz w8, #1, .LBB1_2
+; NONEON-SVE-NOGATHER-NEXT:  .LBB1_4: // %cond.store1
+; NONEON-SVE-NOGATHER-NEXT:    mov z0.d, z0.d[1]
+; NONEON-SVE-NOGATHER-NEXT:    mov z1.d, z1.d[1]
+; NONEON-SVE-NOGATHER-NEXT:    fmov x8, d0
+; NONEON-SVE-NOGATHER-NEXT:    fmov x9, d1
+; NONEON-SVE-NOGATHER-NEXT:    str x8, [x9]
+; NONEON-SVE-NOGATHER-NEXT:    add sp, sp, #16
+; NONEON-SVE-NOGATHER-NEXT:    ret
+;
+; NONEON-NOSVE-LABEL: masked_scatter_v2i64:
+; NONEON-NOSVE:       // %bb.0:
+; NONEON-NOSVE-NEXT:    sub sp, sp, #16
+; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT:    ptrue p0.d, vl2
+; NONEON-NOSVE-NEXT:    ldr q0, [x0]
+; NONEON-NOSVE-NEXT:    index z1.d, #1, #1
+; NONEON-NOSVE-NEXT:    cmpeq p1.d, p0/z, z0.d, #0
+; NONEON-NOSVE-NEXT:    mov z2.d, p1/z, #-1 // =0xffffffffffffffff
+; NONEON-NOSVE-NEXT:    and z1.d, z2.d, z1.d
+; NONEON-NOSVE-NEXT:    uaddv d1, p0, z1.d
+; NONEON-NOSVE-NEXT:    fmov x8, d1
+; NONEON-NOSVE-NEXT:    ldr q1, [x1]
+; NONEON-NOSVE-NEXT:    strb w8, [sp, #12]
+; NONEON-NOSVE-NEXT:    and w8, w8, #0xff
+; NONEON-NOSVE-NEXT:    tbnz w8, #0, .LBB1_3
+; NONEON-NOSVE-NEXT:  // %bb.1: // %else
+; NONEON-NOSVE-NEXT:    tbnz w8, #1, .LBB1_4
+; NONEON-NOSVE-NEXT:  .LBB1_2: // %else2
+; NONEON-NOSVE-NEXT:    add sp, sp, #16
+; NONEON-NOSVE-NEXT:    ret
+; NONEON-NOSVE-NEXT:  .LBB1_3: // %cond.store
+; NONEON-NOSVE-NEXT:    fmov x9, d0
+; NONEON-NOSVE-NEXT:    fmov x10, d1
+; NONEON-NOSVE-NEXT:    str x9, [x10]
+; NONEON-NOSVE-NEXT:    tbz w8, #1, .LBB1_2
+; NONEON-NOSVE-NEXT:  .LBB1_4: // %cond.store1
+; NONEON-NOSVE-NEXT:    mov z0.d, z0.d[1]
+; NONEON-NOSVE-NEXT:    mov z1.d, z1.d[1]
+; NONEON-NOSVE-NEXT:    fmov x8, d0
+; NONEON-NOSVE-NEXT:    fmov x9, d1
+; NONEON-NOSVE-NEXT:    str x8, [x9]
+; NONEON-NOSVE-NEXT:    add sp, sp, #16
+; NONEON-NOSVE-NEXT:    ret
+  %vals = load <2 x i64>, ptr %a
+  %ptrs = load <2 x ptr>, ptr %b
+  %mask = icmp eq <2 x i64> %vals, zeroinitializer
+  call void @llvm.masked.scatter.v2i64(<2 x i64> %vals, <2 x ptr> %ptrs, i32 8, <2 x i1> %mask)
+  ret void
+}
+
+declare void @llvm.masked.scatter.v2i64(<2 x i64>, <2 x ptr>, i32, <2 x i1>)
+declare <2 x i64> @llvm.masked.gather.v2i64(<2 x ptr>, i32, <2 x i1>, <2 x i64>)


        


More information about the llvm-commits mailing list