[llvm] 9c14132 - [AArch64][SME]: Add missing Ops that need custom-lowering in streaming mode.

Hassnaa Hamdi via llvm-commits llvm-commits at lists.llvm.org
Tue Feb 14 07:10:37 PST 2023


Author: Hassnaa Hamdi
Date: 2023-02-14T15:00:25Z
New Revision: 9c14132d7b6c9e6bc69cabd0e81a371731f2376c

URL: https://github.com/llvm/llvm-project/commit/9c14132d7b6c9e6bc69cabd0e81a371731f2376c
DIFF: https://github.com/llvm/llvm-project/commit/9c14132d7b6c9e6bc69cabd0e81a371731f2376c.diff

LOG: [AArch64][SME]: Add missing Ops that need custom-lowering in streaming mode.

Add missing Ops and update related testing files.

Reviewed By: sdesmalen

Differential Revision: https://reviews.llvm.org/D141595

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 4db2b10ed8bb..4db5a04bb0a8 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1756,10 +1756,13 @@ void AArch64TargetLowering::addTypeForStreamingSVE(MVT VT) {
   setOperationAction(ISD::ADD, VT, Custom);
   setOperationAction(ISD::AND, VT, Custom);
   setOperationAction(ISD::ANY_EXTEND, VT, Custom);
+  setOperationAction(ISD::BITREVERSE, VT, Custom);
+  setOperationAction(ISD::BSWAP, VT, Custom);
   setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
   setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
   setOperationAction(ISD::CTLZ, VT, Custom);
   setOperationAction(ISD::CTPOP, VT, Custom);
+  setOperationAction(ISD::CTTZ, VT, Custom);
   setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
   setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
   setOperationAction(ISD::FABS, VT, Custom);
@@ -1809,15 +1812,20 @@ void AArch64TargetLowering::addTypeForStreamingSVE(MVT VT) {
   setOperationAction(ISD::UMAX, VT, Custom);
   setOperationAction(ISD::UMIN, VT, Custom);
   setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
+  setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
   setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
   setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
   setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
+  setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
   setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
   setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
   setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
   setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
   setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
+  setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
   setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+  setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
+  setOperationAction(ISD::VSELECT, VT, Custom);
   setOperationAction(ISD::XOR, VT, Custom);
   setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
 }

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll
index e6bae72d5871..008ff7d90b5e 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll
@@ -12,16 +12,17 @@ define <2 x half> @select_v2f16(<2 x half> %op1, <2 x half> %op2, <2 x i1> %mask
 ; CHECK-NEXT:    mov z3.s, z2.s[1]
 ; CHECK-NEXT:    fmov w8, s2
 ; CHECK-NEXT:    fmov w9, s3
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
+; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    strh w8, [sp, #8]
 ; CHECK-NEXT:    strh w9, [sp, #10]
 ; CHECK-NEXT:    ldr d2, [sp, #8]
 ; CHECK-NEXT:    lsl z2.h, z2.h, #15
 ; CHECK-NEXT:    asr z2.h, z2.h, #15
-; CHECK-NEXT:    bic z1.d, z1.d, z2.d
-; CHECK-NEXT:    and z0.d, z0.d, z2.d
-; CHECK-NEXT:    orr z0.d, z0.d, z1.d
+; CHECK-NEXT:    and z2.h, z2.h, #0x1
+; CHECK-NEXT:    cmpne p0.h, p0/z, z2.h, #0
+; CHECK-NEXT:    sel z0.h, p0, z0.h, z1.h
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    add sp, sp, #16
 ; CHECK-NEXT:    ret
@@ -33,13 +34,14 @@ define <4 x half> @select_v4f16(<4 x half> %op1, <4 x half> %op2, <4 x i1> %mask
 ; CHECK-LABEL: select_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
+; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    lsl z2.h, z2.h, #15
 ; CHECK-NEXT:    asr z2.h, z2.h, #15
-; CHECK-NEXT:    bic z1.d, z1.d, z2.d
-; CHECK-NEXT:    and z0.d, z0.d, z2.d
-; CHECK-NEXT:    orr z0.d, z0.d, z1.d
+; CHECK-NEXT:    and z2.h, z2.h, #0x1
+; CHECK-NEXT:    cmpne p0.h, p0/z, z2.h, #0
+; CHECK-NEXT:    sel z0.h, p0, z0.h, z1.h
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %sel = select <4 x i1> %mask, <4 x half> %op1, <4 x half> %op2
@@ -50,14 +52,15 @@ define <8 x half> @select_v8f16(<8 x half> %op1, <8 x half> %op2, <8 x i1> %mask
 ; CHECK-LABEL: select_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    uunpklo z2.h, z2.b
 ; CHECK-NEXT:    lsl z2.h, z2.h, #15
 ; CHECK-NEXT:    asr z2.h, z2.h, #15
-; CHECK-NEXT:    bic z1.d, z1.d, z2.d
-; CHECK-NEXT:    and z0.d, z0.d, z2.d
-; CHECK-NEXT:    orr z0.d, z0.d, z1.d
+; CHECK-NEXT:    and z2.h, z2.h, #0x1
+; CHECK-NEXT:    cmpne p0.h, p0/z, z2.h, #0
+; CHECK-NEXT:    sel z0.h, p0, z0.h, z1.h
 ; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %sel = select <8 x i1> %mask, <8 x half> %op1, <8 x half> %op2
@@ -69,17 +72,11 @@ define void @select_v16f16(ptr %a, ptr %b) #0 {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldp q1, q0, [x0]
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    ldp q3, q2, [x1]
-; CHECK-NEXT:    fcmeq p1.h, p0/z, z0.h, z2.h
-; CHECK-NEXT:    fcmeq p0.h, p0/z, z1.h, z3.h
-; CHECK-NEXT:    mov z4.h, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    mov z5.h, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    bic z2.d, z2.d, z4.d
-; CHECK-NEXT:    bic z3.d, z3.d, z5.d
-; CHECK-NEXT:    and z1.d, z1.d, z5.d
-; CHECK-NEXT:    and z0.d, z0.d, z4.d
-; CHECK-NEXT:    orr z1.d, z1.d, z3.d
-; CHECK-NEXT:    orr z0.d, z0.d, z2.d
+; CHECK-NEXT:    ldp q2, q3, [x1]
+; CHECK-NEXT:    fcmeq p1.h, p0/z, z1.h, z2.h
+; CHECK-NEXT:    sel z1.h, p1, z1.h, z2.h
+; CHECK-NEXT:    fcmeq p0.h, p0/z, z0.h, z3.h
+; CHECK-NEXT:    sel z0.h, p0, z0.h, z3.h
 ; CHECK-NEXT:    stp q1, q0, [x0]
 ; CHECK-NEXT:    ret
   %op1 = load <16 x half>, ptr %a
@@ -94,13 +91,14 @@ define <2 x float> @select_v2f32(<2 x float> %op1, <2 x float> %op2, <2 x i1> %m
 ; CHECK-LABEL: select_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
+; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    lsl z2.s, z2.s, #31
 ; CHECK-NEXT:    asr z2.s, z2.s, #31
-; CHECK-NEXT:    bic z1.d, z1.d, z2.d
-; CHECK-NEXT:    and z0.d, z0.d, z2.d
-; CHECK-NEXT:    orr z0.d, z0.d, z1.d
+; CHECK-NEXT:    and z2.s, z2.s, #0x1
+; CHECK-NEXT:    cmpne p0.s, p0/z, z2.s, #0
+; CHECK-NEXT:    sel z0.s, p0, z0.s, z1.s
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %sel = select <2 x i1> %mask, <2 x float> %op1, <2 x float> %op2
@@ -111,14 +109,15 @@ define <4 x float> @select_v4f32(<4 x float> %op1, <4 x float> %op2, <4 x i1> %m
 ; CHECK-LABEL: select_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    uunpklo z2.s, z2.h
 ; CHECK-NEXT:    lsl z2.s, z2.s, #31
 ; CHECK-NEXT:    asr z2.s, z2.s, #31
-; CHECK-NEXT:    bic z1.d, z1.d, z2.d
-; CHECK-NEXT:    and z0.d, z0.d, z2.d
-; CHECK-NEXT:    orr z0.d, z0.d, z1.d
+; CHECK-NEXT:    and z2.s, z2.s, #0x1
+; CHECK-NEXT:    cmpne p0.s, p0/z, z2.s, #0
+; CHECK-NEXT:    sel z0.s, p0, z0.s, z1.s
 ; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %sel = select <4 x i1> %mask, <4 x float> %op1, <4 x float> %op2
@@ -130,17 +129,11 @@ define void @select_v8f32(ptr %a, ptr %b) #0 {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldp q1, q0, [x0]
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    ldp q3, q2, [x1]
-; CHECK-NEXT:    fcmeq p1.s, p0/z, z0.s, z2.s
-; CHECK-NEXT:    fcmeq p0.s, p0/z, z1.s, z3.s
-; CHECK-NEXT:    mov z4.s, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    mov z5.s, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    bic z2.d, z2.d, z4.d
-; CHECK-NEXT:    bic z3.d, z3.d, z5.d
-; CHECK-NEXT:    and z1.d, z1.d, z5.d
-; CHECK-NEXT:    and z0.d, z0.d, z4.d
-; CHECK-NEXT:    orr z1.d, z1.d, z3.d
-; CHECK-NEXT:    orr z0.d, z0.d, z2.d
+; CHECK-NEXT:    ldp q2, q3, [x1]
+; CHECK-NEXT:    fcmeq p1.s, p0/z, z1.s, z2.s
+; CHECK-NEXT:    sel z1.s, p1, z1.s, z2.s
+; CHECK-NEXT:    fcmeq p0.s, p0/z, z0.s, z3.s
+; CHECK-NEXT:    sel z0.s, p0, z0.s, z3.s
 ; CHECK-NEXT:    stp q1, q0, [x0]
 ; CHECK-NEXT:    ret
   %op1 = load <8 x float>, ptr %a
@@ -174,14 +167,15 @@ define <2 x double> @select_v2f64(<2 x double> %op1, <2 x double> %op2, <2 x i1>
 ; CHECK-LABEL: select_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    uunpklo z2.d, z2.s
 ; CHECK-NEXT:    lsl z2.d, z2.d, #63
 ; CHECK-NEXT:    asr z2.d, z2.d, #63
-; CHECK-NEXT:    bic z1.d, z1.d, z2.d
-; CHECK-NEXT:    and z0.d, z0.d, z2.d
-; CHECK-NEXT:    orr z0.d, z0.d, z1.d
+; CHECK-NEXT:    and z2.d, z2.d, #0x1
+; CHECK-NEXT:    cmpne p0.d, p0/z, z2.d, #0
+; CHECK-NEXT:    sel z0.d, p0, z0.d, z1.d
 ; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %sel = select <2 x i1> %mask, <2 x double> %op1, <2 x double> %op2
@@ -193,17 +187,11 @@ define void @select_v4f64(ptr %a, ptr %b) #0 {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldp q1, q0, [x0]
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    ldp q3, q2, [x1]
-; CHECK-NEXT:    fcmeq p1.d, p0/z, z0.d, z2.d
-; CHECK-NEXT:    fcmeq p0.d, p0/z, z1.d, z3.d
-; CHECK-NEXT:    mov z4.d, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    mov z5.d, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    bic z2.d, z2.d, z4.d
-; CHECK-NEXT:    bic z3.d, z3.d, z5.d
-; CHECK-NEXT:    and z1.d, z1.d, z5.d
-; CHECK-NEXT:    and z0.d, z0.d, z4.d
-; CHECK-NEXT:    orr z1.d, z1.d, z3.d
-; CHECK-NEXT:    orr z0.d, z0.d, z2.d
+; CHECK-NEXT:    ldp q2, q3, [x1]
+; CHECK-NEXT:    fcmeq p1.d, p0/z, z1.d, z2.d
+; CHECK-NEXT:    sel z1.d, p1, z1.d, z2.d
+; CHECK-NEXT:    fcmeq p0.d, p0/z, z0.d, z3.d
+; CHECK-NEXT:    sel z0.d, p0, z0.d, z3.d
 ; CHECK-NEXT:    stp q1, q0, [x0]
 ; CHECK-NEXT:    ret
   %op1 = load <4 x double>, ptr %a

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll
index e9e96cc622a6..7d8fe130b402 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll
@@ -7,13 +7,14 @@ define <4 x i8> @select_v4i8(<4 x i8> %op1, <4 x i8> %op2, <4 x i1> %mask) #0 {
 ; CHECK-LABEL: select_v4i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
+; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    lsl z2.h, z2.h, #15
 ; CHECK-NEXT:    asr z2.h, z2.h, #15
-; CHECK-NEXT:    bic z1.d, z1.d, z2.d
-; CHECK-NEXT:    and z0.d, z0.d, z2.d
-; CHECK-NEXT:    orr z0.d, z0.d, z1.d
+; CHECK-NEXT:    and z2.h, z2.h, #0x1
+; CHECK-NEXT:    cmpne p0.h, p0/z, z2.h, #0
+; CHECK-NEXT:    sel z0.h, p0, z0.h, z1.h
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %sel = select <4 x i1> %mask, <4 x i8> %op1, <4 x i8> %op2
@@ -24,13 +25,14 @@ define <8 x i8> @select_v8i8(<8 x i8> %op1, <8 x i8> %op2, <8 x i1> %mask) #0 {
 ; CHECK-LABEL: select_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
+; CHECK-NEXT:    ptrue p0.b
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    lsl z2.b, z2.b, #7
 ; CHECK-NEXT:    asr z2.b, z2.b, #7
-; CHECK-NEXT:    bic z1.d, z1.d, z2.d
-; CHECK-NEXT:    and z0.d, z0.d, z2.d
-; CHECK-NEXT:    orr z0.d, z0.d, z1.d
+; CHECK-NEXT:    and z2.b, z2.b, #0x1
+; CHECK-NEXT:    cmpne p0.b, p0/z, z2.b, #0
+; CHECK-NEXT:    sel z0.b, p0, z0.b, z1.b
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %sel = select <8 x i1> %mask, <8 x i8> %op1, <8 x i8> %op2
@@ -41,13 +43,14 @@ define <16 x i8> @select_v16i8(<16 x i8> %op1, <16 x i8> %op2, <16 x i1> %mask)
 ; CHECK-LABEL: select_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    // kill: def $q2 killed $q2 def $z2
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT:    ptrue p0.b
 ; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    lsl z2.b, z2.b, #7
 ; CHECK-NEXT:    asr z2.b, z2.b, #7
-; CHECK-NEXT:    bic z1.d, z1.d, z2.d
-; CHECK-NEXT:    and z0.d, z0.d, z2.d
-; CHECK-NEXT:    orr z0.d, z0.d, z1.d
+; CHECK-NEXT:    and z2.b, z2.b, #0x1
+; CHECK-NEXT:    cmpne p0.b, p0/z, z2.b, #0
+; CHECK-NEXT:    sel z0.b, p0, z0.b, z1.b
 ; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %sel = select <16 x i1> %mask, <16 x i8> %op1, <16 x i8> %op2
@@ -59,17 +62,11 @@ define void @select_v32i8(ptr %a, ptr %b) #0 {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldp q1, q0, [x0]
 ; CHECK-NEXT:    ptrue p0.b, vl16
-; CHECK-NEXT:    ldp q3, q2, [x1]
-; CHECK-NEXT:    cmpeq p1.b, p0/z, z0.b, z2.b
-; CHECK-NEXT:    cmpeq p0.b, p0/z, z1.b, z3.b
-; CHECK-NEXT:    mov z4.b, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    mov z5.b, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    bic z2.d, z2.d, z4.d
-; CHECK-NEXT:    bic z3.d, z3.d, z5.d
-; CHECK-NEXT:    and z1.d, z1.d, z5.d
-; CHECK-NEXT:    and z0.d, z0.d, z4.d
-; CHECK-NEXT:    orr z1.d, z1.d, z3.d
-; CHECK-NEXT:    orr z0.d, z0.d, z2.d
+; CHECK-NEXT:    ldp q2, q3, [x1]
+; CHECK-NEXT:    cmpeq p1.b, p0/z, z1.b, z2.b
+; CHECK-NEXT:    sel z1.b, p1, z1.b, z2.b
+; CHECK-NEXT:    cmpeq p0.b, p0/z, z0.b, z3.b
+; CHECK-NEXT:    sel z0.b, p0, z0.b, z3.b
 ; CHECK-NEXT:    stp q1, q0, [x0]
 ; CHECK-NEXT:    ret
   %op1 = load <32 x i8>, ptr %a
@@ -84,13 +81,14 @@ define <2 x i16> @select_v2i16(<2 x i16> %op1, <2 x i16> %op2, <2 x i1> %mask) #
 ; CHECK-LABEL: select_v2i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
+; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    lsl z2.s, z2.s, #31
 ; CHECK-NEXT:    asr z2.s, z2.s, #31
-; CHECK-NEXT:    bic z1.d, z1.d, z2.d
-; CHECK-NEXT:    and z0.d, z0.d, z2.d
-; CHECK-NEXT:    orr z0.d, z0.d, z1.d
+; CHECK-NEXT:    and z2.s, z2.s, #0x1
+; CHECK-NEXT:    cmpne p0.s, p0/z, z2.s, #0
+; CHECK-NEXT:    sel z0.s, p0, z0.s, z1.s
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %sel = select <2 x i1> %mask, <2 x i16> %op1, <2 x i16> %op2
@@ -101,13 +99,14 @@ define <4 x i16> @select_v4i16(<4 x i16> %op1, <4 x i16> %op2, <4 x i1> %mask) #
 ; CHECK-LABEL: select_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
+; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    lsl z2.h, z2.h, #15
 ; CHECK-NEXT:    asr z2.h, z2.h, #15
-; CHECK-NEXT:    bic z1.d, z1.d, z2.d
-; CHECK-NEXT:    and z0.d, z0.d, z2.d
-; CHECK-NEXT:    orr z0.d, z0.d, z1.d
+; CHECK-NEXT:    and z2.h, z2.h, #0x1
+; CHECK-NEXT:    cmpne p0.h, p0/z, z2.h, #0
+; CHECK-NEXT:    sel z0.h, p0, z0.h, z1.h
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %sel = select <4 x i1> %mask, <4 x i16> %op1, <4 x i16> %op2
@@ -118,14 +117,15 @@ define <8 x i16> @select_v8i16(<8 x i16> %op1, <8 x i16> %op2, <8 x i1> %mask) #
 ; CHECK-LABEL: select_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    uunpklo z2.h, z2.b
 ; CHECK-NEXT:    lsl z2.h, z2.h, #15
 ; CHECK-NEXT:    asr z2.h, z2.h, #15
-; CHECK-NEXT:    bic z1.d, z1.d, z2.d
-; CHECK-NEXT:    and z0.d, z0.d, z2.d
-; CHECK-NEXT:    orr z0.d, z0.d, z1.d
+; CHECK-NEXT:    and z2.h, z2.h, #0x1
+; CHECK-NEXT:    cmpne p0.h, p0/z, z2.h, #0
+; CHECK-NEXT:    sel z0.h, p0, z0.h, z1.h
 ; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %sel = select <8 x i1> %mask, <8 x i16> %op1, <8 x i16> %op2
@@ -137,17 +137,11 @@ define void @select_v16i16(ptr %a, ptr %b) #0 {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldp q1, q0, [x0]
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    ldp q3, q2, [x1]
-; CHECK-NEXT:    cmpeq p1.h, p0/z, z0.h, z2.h
-; CHECK-NEXT:    cmpeq p0.h, p0/z, z1.h, z3.h
-; CHECK-NEXT:    mov z4.h, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    mov z5.h, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    bic z2.d, z2.d, z4.d
-; CHECK-NEXT:    bic z3.d, z3.d, z5.d
-; CHECK-NEXT:    and z1.d, z1.d, z5.d
-; CHECK-NEXT:    and z0.d, z0.d, z4.d
-; CHECK-NEXT:    orr z1.d, z1.d, z3.d
-; CHECK-NEXT:    orr z0.d, z0.d, z2.d
+; CHECK-NEXT:    ldp q2, q3, [x1]
+; CHECK-NEXT:    cmpeq p1.h, p0/z, z1.h, z2.h
+; CHECK-NEXT:    sel z1.h, p1, z1.h, z2.h
+; CHECK-NEXT:    cmpeq p0.h, p0/z, z0.h, z3.h
+; CHECK-NEXT:    sel z0.h, p0, z0.h, z3.h
 ; CHECK-NEXT:    stp q1, q0, [x0]
 ; CHECK-NEXT:    ret
   %op1 = load <16 x i16>, ptr %a
@@ -162,13 +156,14 @@ define <2 x i32> @select_v2i32(<2 x i32> %op1, <2 x i32> %op2, <2 x i1> %mask) #
 ; CHECK-LABEL: select_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
+; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    lsl z2.s, z2.s, #31
 ; CHECK-NEXT:    asr z2.s, z2.s, #31
-; CHECK-NEXT:    bic z1.d, z1.d, z2.d
-; CHECK-NEXT:    and z0.d, z0.d, z2.d
-; CHECK-NEXT:    orr z0.d, z0.d, z1.d
+; CHECK-NEXT:    and z2.s, z2.s, #0x1
+; CHECK-NEXT:    cmpne p0.s, p0/z, z2.s, #0
+; CHECK-NEXT:    sel z0.s, p0, z0.s, z1.s
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %sel = select <2 x i1> %mask, <2 x i32> %op1, <2 x i32> %op2
@@ -179,14 +174,15 @@ define <4 x i32> @select_v4i32(<4 x i32> %op1, <4 x i32> %op2, <4 x i1> %mask) #
 ; CHECK-LABEL: select_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    uunpklo z2.s, z2.h
 ; CHECK-NEXT:    lsl z2.s, z2.s, #31
 ; CHECK-NEXT:    asr z2.s, z2.s, #31
-; CHECK-NEXT:    bic z1.d, z1.d, z2.d
-; CHECK-NEXT:    and z0.d, z0.d, z2.d
-; CHECK-NEXT:    orr z0.d, z0.d, z1.d
+; CHECK-NEXT:    and z2.s, z2.s, #0x1
+; CHECK-NEXT:    cmpne p0.s, p0/z, z2.s, #0
+; CHECK-NEXT:    sel z0.s, p0, z0.s, z1.s
 ; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %sel = select <4 x i1> %mask, <4 x i32> %op1, <4 x i32> %op2
@@ -198,17 +194,11 @@ define void @select_v8i32(ptr %a, ptr %b) #0 {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldp q1, q0, [x0]
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    ldp q3, q2, [x1]
-; CHECK-NEXT:    cmpeq p1.s, p0/z, z0.s, z2.s
-; CHECK-NEXT:    cmpeq p0.s, p0/z, z1.s, z3.s
-; CHECK-NEXT:    mov z4.s, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    mov z5.s, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    bic z2.d, z2.d, z4.d
-; CHECK-NEXT:    bic z3.d, z3.d, z5.d
-; CHECK-NEXT:    and z1.d, z1.d, z5.d
-; CHECK-NEXT:    and z0.d, z0.d, z4.d
-; CHECK-NEXT:    orr z1.d, z1.d, z3.d
-; CHECK-NEXT:    orr z0.d, z0.d, z2.d
+; CHECK-NEXT:    ldp q2, q3, [x1]
+; CHECK-NEXT:    cmpeq p1.s, p0/z, z1.s, z2.s
+; CHECK-NEXT:    sel z1.s, p1, z1.s, z2.s
+; CHECK-NEXT:    cmpeq p0.s, p0/z, z0.s, z3.s
+; CHECK-NEXT:    sel z0.s, p0, z0.s, z3.s
 ; CHECK-NEXT:    stp q1, q0, [x0]
 ; CHECK-NEXT:    ret
   %op1 = load <8 x i32>, ptr %a
@@ -242,14 +232,15 @@ define <2 x i64> @select_v2i64(<2 x i64> %op1, <2 x i64> %op2, <2 x i1> %mask) #
 ; CHECK-LABEL: select_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    uunpklo z2.d, z2.s
 ; CHECK-NEXT:    lsl z2.d, z2.d, #63
 ; CHECK-NEXT:    asr z2.d, z2.d, #63
-; CHECK-NEXT:    bic z1.d, z1.d, z2.d
-; CHECK-NEXT:    and z0.d, z0.d, z2.d
-; CHECK-NEXT:    orr z0.d, z0.d, z1.d
+; CHECK-NEXT:    and z2.d, z2.d, #0x1
+; CHECK-NEXT:    cmpne p0.d, p0/z, z2.d, #0
+; CHECK-NEXT:    sel z0.d, p0, z0.d, z1.d
 ; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %sel = select <2 x i1> %mask, <2 x i64> %op1, <2 x i64> %op2
@@ -261,17 +252,11 @@ define void @select_v4i64(ptr %a, ptr %b) #0 {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldp q1, q0, [x0]
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    ldp q3, q2, [x1]
-; CHECK-NEXT:    cmpeq p1.d, p0/z, z0.d, z2.d
-; CHECK-NEXT:    cmpeq p0.d, p0/z, z1.d, z3.d
-; CHECK-NEXT:    mov z4.d, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    mov z5.d, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    bic z2.d, z2.d, z4.d
-; CHECK-NEXT:    bic z3.d, z3.d, z5.d
-; CHECK-NEXT:    and z1.d, z1.d, z5.d
-; CHECK-NEXT:    and z0.d, z0.d, z4.d
-; CHECK-NEXT:    orr z1.d, z1.d, z3.d
-; CHECK-NEXT:    orr z0.d, z0.d, z2.d
+; CHECK-NEXT:    ldp q2, q3, [x1]
+; CHECK-NEXT:    cmpeq p1.d, p0/z, z1.d, z2.d
+; CHECK-NEXT:    sel z1.d, p1, z1.d, z2.d
+; CHECK-NEXT:    cmpeq p0.d, p0/z, z0.d, z3.d
+; CHECK-NEXT:    sel z0.d, p0, z0.d, z3.d
 ; CHECK-NEXT:    stp q1, q0, [x0]
 ; CHECK-NEXT:    ret
   %op1 = load <4 x i64>, ptr %a

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll
index 03d4118cce28..1d034e4475c9 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll
@@ -197,15 +197,8 @@ define <2 x i16> @bswap_v2i16(<2 x i16> %op) #0 {
 ; CHECK-LABEL: bswap_v2i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    lsr z1.s, z0.s, #24
-; CHECK-NEXT:    lsr z2.s, z0.s, #8
-; CHECK-NEXT:    lsl z3.s, z0.s, #24
-; CHECK-NEXT:    and z0.s, z0.s, #0xff00
-; CHECK-NEXT:    and z2.s, z2.s, #0xff00
-; CHECK-NEXT:    lsl z0.s, z0.s, #8
-; CHECK-NEXT:    orr z1.d, z2.d, z1.d
-; CHECK-NEXT:    orr z0.d, z3.d, z0.d
-; CHECK-NEXT:    orr z0.d, z0.d, z1.d
+; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    revb z0.s, p0/m, z0.s
 ; CHECK-NEXT:    lsr z0.s, z0.s, #16
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
@@ -217,9 +210,8 @@ define <4 x i16> @bswap_v4i16(<4 x i16> %op) #0 {
 ; CHECK-LABEL: bswap_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    lsr z1.h, z0.h, #8
-; CHECK-NEXT:    lsl z0.h, z0.h, #8
-; CHECK-NEXT:    orr z0.d, z0.d, z1.d
+; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    revb z0.h, p0/m, z0.h
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call <4 x i16> @llvm.bswap.v4i16(<4 x i16> %op)
@@ -230,9 +222,8 @@ define <8 x i16> @bswap_v8i16(<8 x i16> %op) #0 {
 ; CHECK-LABEL: bswap_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    lsr z1.h, z0.h, #8
-; CHECK-NEXT:    lsl z0.h, z0.h, #8
-; CHECK-NEXT:    orr z0.d, z0.d, z1.d
+; CHECK-NEXT:    ptrue p0.h, vl8
+; CHECK-NEXT:    revb z0.h, p0/m, z0.h
 ; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %op)
@@ -242,14 +233,11 @@ define <8 x i16> @bswap_v8i16(<8 x i16> %op) #0 {
 define void @bswap_v16i16(ptr %a) #0 {
 ; CHECK-LABEL: bswap_v16i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldp q1, q0, [x0]
-; CHECK-NEXT:    lsr z3.h, z1.h, #8
-; CHECK-NEXT:    lsl z1.h, z1.h, #8
-; CHECK-NEXT:    orr z1.d, z1.d, z3.d
-; CHECK-NEXT:    lsr z2.h, z0.h, #8
-; CHECK-NEXT:    lsl z0.h, z0.h, #8
-; CHECK-NEXT:    orr z0.d, z0.d, z2.d
-; CHECK-NEXT:    stp q1, q0, [x0]
+; CHECK-NEXT:    ldp q0, q1, [x0]
+; CHECK-NEXT:    ptrue p0.h, vl8
+; CHECK-NEXT:    revb z0.h, p0/m, z0.h
+; CHECK-NEXT:    revb z1.h, p0/m, z1.h
+; CHECK-NEXT:    stp q0, q1, [x0]
 ; CHECK-NEXT:    ret
   %op = load <16 x i16>, ptr %a
   %res = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %op)
@@ -261,15 +249,8 @@ define <2 x i32> @bswap_v2i32(<2 x i32> %op) #0 {
 ; CHECK-LABEL: bswap_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    lsr z1.s, z0.s, #24
-; CHECK-NEXT:    lsr z2.s, z0.s, #8
-; CHECK-NEXT:    lsl z3.s, z0.s, #24
-; CHECK-NEXT:    and z0.s, z0.s, #0xff00
-; CHECK-NEXT:    and z2.s, z2.s, #0xff00
-; CHECK-NEXT:    lsl z0.s, z0.s, #8
-; CHECK-NEXT:    orr z1.d, z2.d, z1.d
-; CHECK-NEXT:    orr z0.d, z3.d, z0.d
-; CHECK-NEXT:    orr z0.d, z0.d, z1.d
+; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    revb z0.s, p0/m, z0.s
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %op)
@@ -280,15 +261,8 @@ define <4 x i32> @bswap_v4i32(<4 x i32> %op) #0 {
 ; CHECK-LABEL: bswap_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    lsr z1.s, z0.s, #24
-; CHECK-NEXT:    lsr z2.s, z0.s, #8
-; CHECK-NEXT:    lsl z3.s, z0.s, #24
-; CHECK-NEXT:    and z0.s, z0.s, #0xff00
-; CHECK-NEXT:    and z2.s, z2.s, #0xff00
-; CHECK-NEXT:    lsl z0.s, z0.s, #8
-; CHECK-NEXT:    orr z1.d, z2.d, z1.d
-; CHECK-NEXT:    orr z0.d, z3.d, z0.d
-; CHECK-NEXT:    orr z0.d, z0.d, z1.d
+; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    revb z0.s, p0/m, z0.s
 ; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %op)
@@ -298,26 +272,11 @@ define <4 x i32> @bswap_v4i32(<4 x i32> %op) #0 {
 define void @bswap_v8i32(ptr %a) #0 {
 ; CHECK-LABEL: bswap_v8i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldp q1, q0, [x0]
-; CHECK-NEXT:    lsr z5.s, z1.s, #8
-; CHECK-NEXT:    lsr z4.s, z1.s, #24
-; CHECK-NEXT:    and z5.s, z5.s, #0xff00
-; CHECK-NEXT:    lsr z3.s, z0.s, #8
-; CHECK-NEXT:    lsr z2.s, z0.s, #24
-; CHECK-NEXT:    and z3.s, z3.s, #0xff00
-; CHECK-NEXT:    orr z4.d, z5.d, z4.d
-; CHECK-NEXT:    orr z2.d, z3.d, z2.d
-; CHECK-NEXT:    lsl z3.s, z0.s, #24
-; CHECK-NEXT:    lsl z5.s, z1.s, #24
-; CHECK-NEXT:    and z1.s, z1.s, #0xff00
-; CHECK-NEXT:    and z0.s, z0.s, #0xff00
-; CHECK-NEXT:    lsl z1.s, z1.s, #8
-; CHECK-NEXT:    lsl z0.s, z0.s, #8
-; CHECK-NEXT:    orr z1.d, z5.d, z1.d
-; CHECK-NEXT:    orr z0.d, z3.d, z0.d
-; CHECK-NEXT:    orr z1.d, z1.d, z4.d
-; CHECK-NEXT:    orr z0.d, z0.d, z2.d
-; CHECK-NEXT:    stp q1, q0, [x0]
+; CHECK-NEXT:    ldp q0, q1, [x0]
+; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    revb z0.s, p0/m, z0.s
+; CHECK-NEXT:    revb z1.s, p0/m, z1.s
+; CHECK-NEXT:    stp q0, q1, [x0]
 ; CHECK-NEXT:    ret
   %op = load <8 x i32>, ptr %a
   %res = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %op)
@@ -329,29 +288,8 @@ define <1 x i64> @bswap_v1i64(<1 x i64> %op) #0 {
 ; CHECK-LABEL: bswap_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    lsr z2.d, z0.d, #40
-; CHECK-NEXT:    lsr z1.d, z0.d, #56
-; CHECK-NEXT:    lsr z3.d, z0.d, #24
-; CHECK-NEXT:    lsr z4.d, z0.d, #8
-; CHECK-NEXT:    and z2.d, z2.d, #0xff00
-; CHECK-NEXT:    mov z5.d, z0.d
-; CHECK-NEXT:    orr z1.d, z2.d, z1.d
-; CHECK-NEXT:    mov z2.d, z0.d
-; CHECK-NEXT:    and z3.d, z3.d, #0xff0000
-; CHECK-NEXT:    and z4.d, z4.d, #0xff000000
-; CHECK-NEXT:    orr z3.d, z4.d, z3.d
-; CHECK-NEXT:    and z5.d, z5.d, #0xff000000
-; CHECK-NEXT:    and z2.d, z2.d, #0xff0000
-; CHECK-NEXT:    lsl z4.d, z0.d, #56
-; CHECK-NEXT:    and z0.d, z0.d, #0xff00
-; CHECK-NEXT:    lsl z5.d, z5.d, #8
-; CHECK-NEXT:    lsl z2.d, z2.d, #24
-; CHECK-NEXT:    lsl z0.d, z0.d, #40
-; CHECK-NEXT:    orr z2.d, z2.d, z5.d
-; CHECK-NEXT:    orr z0.d, z4.d, z0.d
-; CHECK-NEXT:    orr z1.d, z3.d, z1.d
-; CHECK-NEXT:    orr z0.d, z0.d, z2.d
-; CHECK-NEXT:    orr z0.d, z0.d, z1.d
+; CHECK-NEXT:    ptrue p0.d, vl1
+; CHECK-NEXT:    revb z0.d, p0/m, z0.d
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call <1 x i64> @llvm.bswap.v1i64(<1 x i64> %op)
@@ -362,29 +300,8 @@ define <2 x i64> @bswap_v2i64(<2 x i64> %op) #0 {
 ; CHECK-LABEL: bswap_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    lsr z2.d, z0.d, #40
-; CHECK-NEXT:    lsr z1.d, z0.d, #56
-; CHECK-NEXT:    lsr z3.d, z0.d, #24
-; CHECK-NEXT:    lsr z4.d, z0.d, #8
-; CHECK-NEXT:    and z2.d, z2.d, #0xff00
-; CHECK-NEXT:    mov z5.d, z0.d
-; CHECK-NEXT:    orr z1.d, z2.d, z1.d
-; CHECK-NEXT:    mov z2.d, z0.d
-; CHECK-NEXT:    and z3.d, z3.d, #0xff0000
-; CHECK-NEXT:    and z4.d, z4.d, #0xff000000
-; CHECK-NEXT:    orr z3.d, z4.d, z3.d
-; CHECK-NEXT:    and z5.d, z5.d, #0xff000000
-; CHECK-NEXT:    and z2.d, z2.d, #0xff0000
-; CHECK-NEXT:    lsl z4.d, z0.d, #56
-; CHECK-NEXT:    and z0.d, z0.d, #0xff00
-; CHECK-NEXT:    lsl z5.d, z5.d, #8
-; CHECK-NEXT:    lsl z2.d, z2.d, #24
-; CHECK-NEXT:    lsl z0.d, z0.d, #40
-; CHECK-NEXT:    orr z2.d, z2.d, z5.d
-; CHECK-NEXT:    orr z0.d, z4.d, z0.d
-; CHECK-NEXT:    orr z1.d, z3.d, z1.d
-; CHECK-NEXT:    orr z0.d, z0.d, z2.d
-; CHECK-NEXT:    orr z0.d, z0.d, z1.d
+; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    revb z0.d, p0/m, z0.d
 ; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %op)
@@ -394,54 +311,11 @@ define <2 x i64> @bswap_v2i64(<2 x i64> %op) #0 {
 define void @bswap_v4i64(ptr %a) #0 {
 ; CHECK-LABEL: bswap_v4i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldp q1, q0, [x0]
-; CHECK-NEXT:    lsr z3.d, z0.d, #40
-; CHECK-NEXT:    lsr z4.d, z0.d, #24
-; CHECK-NEXT:    lsr z5.d, z0.d, #8
-; CHECK-NEXT:    mov z6.d, z0.d
-; CHECK-NEXT:    mov z7.d, z0.d
-; CHECK-NEXT:    lsr z2.d, z0.d, #56
-; CHECK-NEXT:    and z3.d, z3.d, #0xff00
-; CHECK-NEXT:    and z4.d, z4.d, #0xff0000
-; CHECK-NEXT:    and z5.d, z5.d, #0xff000000
-; CHECK-NEXT:    and z6.d, z6.d, #0xff000000
-; CHECK-NEXT:    and z7.d, z7.d, #0xff0000
-; CHECK-NEXT:    orr z2.d, z3.d, z2.d
-; CHECK-NEXT:    lsr z3.d, z1.d, #40
-; CHECK-NEXT:    orr z4.d, z5.d, z4.d
-; CHECK-NEXT:    lsl z5.d, z6.d, #8
-; CHECK-NEXT:    lsl z6.d, z7.d, #24
-; CHECK-NEXT:    lsl z16.d, z0.d, #56
-; CHECK-NEXT:    and z0.d, z0.d, #0xff00
-; CHECK-NEXT:    orr z2.d, z4.d, z2.d
-; CHECK-NEXT:    orr z4.d, z6.d, z5.d
-; CHECK-NEXT:    lsr z5.d, z1.d, #56
-; CHECK-NEXT:    and z3.d, z3.d, #0xff00
-; CHECK-NEXT:    lsl z0.d, z0.d, #40
-; CHECK-NEXT:    orr z3.d, z3.d, z5.d
-; CHECK-NEXT:    lsr z5.d, z1.d, #24
-; CHECK-NEXT:    lsr z7.d, z1.d, #8
-; CHECK-NEXT:    orr z0.d, z16.d, z0.d
-; CHECK-NEXT:    mov z6.d, z1.d
-; CHECK-NEXT:    mov z16.d, z1.d
-; CHECK-NEXT:    and z5.d, z5.d, #0xff0000
-; CHECK-NEXT:    and z7.d, z7.d, #0xff000000
-; CHECK-NEXT:    and z6.d, z6.d, #0xff000000
-; CHECK-NEXT:    orr z5.d, z7.d, z5.d
-; CHECK-NEXT:    and z16.d, z16.d, #0xff0000
-; CHECK-NEXT:    lsl z7.d, z1.d, #56
-; CHECK-NEXT:    and z1.d, z1.d, #0xff00
-; CHECK-NEXT:    lsl z6.d, z6.d, #8
-; CHECK-NEXT:    lsl z16.d, z16.d, #24
-; CHECK-NEXT:    lsl z1.d, z1.d, #40
-; CHECK-NEXT:    orr z6.d, z16.d, z6.d
-; CHECK-NEXT:    orr z1.d, z7.d, z1.d
-; CHECK-NEXT:    orr z3.d, z5.d, z3.d
-; CHECK-NEXT:    orr z1.d, z1.d, z6.d
-; CHECK-NEXT:    orr z0.d, z0.d, z4.d
-; CHECK-NEXT:    orr z1.d, z1.d, z3.d
-; CHECK-NEXT:    orr z0.d, z0.d, z2.d
-; CHECK-NEXT:    stp q1, q0, [x0]
+; CHECK-NEXT:    ldp q0, q1, [x0]
+; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    revb z0.d, p0/m, z0.d
+; CHECK-NEXT:    revb z1.d, p0/m, z1.d
+; CHECK-NEXT:    stp q0, q1, [x0]
 ; CHECK-NEXT:    ret
   %op = load <4 x i64>, ptr %a
   %res = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %op)


        


More information about the llvm-commits mailing list