[llvm] 1b2d58b - [ARM] Extra testing for v2i1 types. NFC

David Green via llvm-commits llvm-commits at lists.llvm.org
Sat Nov 27 05:21:14 PST 2021


Author: David Green
Date: 2021-11-27T13:21:09Z
New Revision: 1b2d58ba90cd72a8dfe1535b5fbecbefacc30016

URL: https://github.com/llvm/llvm-project/commit/1b2d58ba90cd72a8dfe1535b5fbecbefacc30016
DIFF: https://github.com/llvm/llvm-project/commit/1b2d58ba90cd72a8dfe1535b5fbecbefacc30016.diff

LOG: [ARM] Extra testing for v2i1 types. NFC

This adds extra tests for various operations from making the v2i1 type
legal.

Added: 
    

Modified: 
    llvm/test/CodeGen/Thumb2/active_lane_mask.ll
    llvm/test/CodeGen/Thumb2/mve-pred-ext.ll
    llvm/test/CodeGen/Thumb2/mve-pred-shuffle.ll
    llvm/test/CodeGen/Thumb2/mve-pred-spill.ll
    llvm/test/CodeGen/Thumb2/mve-vcmp.ll
    llvm/test/CodeGen/Thumb2/mve-vmovimm.ll
    llvm/test/CodeGen/Thumb2/mve-vpsel.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/Thumb2/active_lane_mask.ll b/llvm/test/CodeGen/Thumb2/active_lane_mask.ll
index c39d9226bb9e4..6127fdd40ea15 100644
--- a/llvm/test/CodeGen/Thumb2/active_lane_mask.ll
+++ b/llvm/test/CodeGen/Thumb2/active_lane_mask.ll
@@ -1,10 +1,82 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve %s -o - | FileCheck %s
 
+define <2 x i64> @v2i64(i32 %index, i32 %TC, <2 x i64> %V1, <2 x i64> %V2) {
+; CHECK-LABEL: v2i64:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, r9, lr}
+; CHECK-NEXT:    sub sp, #4
+; CHECK-NEXT:    vmov q0[2], q0[0], r0, r0
+; CHECK-NEXT:    vmov.i64 q1, #0xffffffff
+; CHECK-NEXT:    vand q0, q0, q1
+; CHECK-NEXT:    vmov q2[2], q2[0], r1, r1
+; CHECK-NEXT:    vmov r0, r12, d1
+; CHECK-NEXT:    movs r7, #0
+; CHECK-NEXT:    adds.w r8, r0, #1
+; CHECK-NEXT:    adc lr, r12, #0
+; CHECK-NEXT:    vmov r12, s0
+; CHECK-NEXT:    vmov q0[2], q0[0], r12, r8
+; CHECK-NEXT:    vand q0, q0, q1
+; CHECK-NEXT:    vand q1, q2, q1
+; CHECK-NEXT:    vmov r4, r5, d1
+; CHECK-NEXT:    vmov.i32 q2, #0x1
+; CHECK-NEXT:    vmov r1, r6, d3
+; CHECK-NEXT:    subs r1, r4, r1
+; CHECK-NEXT:    sbcs.w r1, r5, r6
+; CHECK-NEXT:    vmov r5, r6, d0
+; CHECK-NEXT:    mov.w r1, #0
+; CHECK-NEXT:    it lo
+; CHECK-NEXT:    movlo r1, #1
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    vmov r0, r1, d2
+; CHECK-NEXT:    csetm r9, ne
+; CHECK-NEXT:    subs r0, r5, r0
+; CHECK-NEXT:    sbcs.w r0, r6, r1
+; CHECK-NEXT:    it lo
+; CHECK-NEXT:    movlo r7, #1
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    csetm r0, ne
+; CHECK-NEXT:    vmov q0[2], q0[0], r0, r9
+; CHECK-NEXT:    eor.w r0, r4, r8
+; CHECK-NEXT:    orrs.w r0, r0, lr
+; CHECK-NEXT:    cset r0, ne
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    csetm r0, ne
+; CHECK-NEXT:    teq.w r5, r12
+; CHECK-NEXT:    cset r1, ne
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, ne
+; CHECK-NEXT:    vmov q1[2], q1[0], r1, r0
+; CHECK-NEXT:    veor q1, q1, q2
+; CHECK-NEXT:    vldr d5, [sp, #32]
+; CHECK-NEXT:    vand q0, q1, q0
+; CHECK-NEXT:    vmov d4, r2, r3
+; CHECK-NEXT:    vmov r0, s2
+; CHECK-NEXT:    vmov r1, s0
+; CHECK-NEXT:    and r0, r0, #1
+; CHECK-NEXT:    and r1, r1, #1
+; CHECK-NEXT:    rsbs r0, r0, #0
+; CHECK-NEXT:    rsbs r1, r1, #0
+; CHECK-NEXT:    vmov q0[2], q0[0], r1, r0
+; CHECK-NEXT:    vmov q0[3], q0[1], r1, r0
+; CHECK-NEXT:    add r0, sp, #40
+; CHECK-NEXT:    vldrw.u32 q1, [r0]
+; CHECK-NEXT:    vbic q1, q1, q0
+; CHECK-NEXT:    vand q0, q2, q0
+; CHECK-NEXT:    vorr q0, q0, q1
+; CHECK-NEXT:    vmov r0, r1, d0
+; CHECK-NEXT:    vmov r2, r3, d1
+; CHECK-NEXT:    add sp, #4
+; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, pc}
+  %active.lane.mask = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 %index, i32 %TC)
+  %select = select <2 x i1> %active.lane.mask, <2 x i64> %V1, <2 x i64> %V2
+  ret <2 x i64> %select
+}
+
 define <4 x i32> @v4i32(i32 %index, i32 %TC, <4 x i32> %V1, <4 x i32> %V2) {
 ; CHECK-LABEL: v4i32:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    adr.w r12, .LCPI0_0
+; CHECK-NEXT:    adr.w r12, .LCPI1_0
 ; CHECK-NEXT:    vdup.32 q1, r0
 ; CHECK-NEXT:    vldrw.u32 q0, [r12]
 ; CHECK-NEXT:    vadd.i32 q0, q0, r0
@@ -23,7 +95,7 @@ define <4 x i32> @v4i32(i32 %index, i32 %TC, <4 x i32> %V1, <4 x i32> %V2) {
 ; CHECK-NEXT:    bx lr
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  @ %bb.1:
-; CHECK-NEXT:  .LCPI0_0:
+; CHECK-NEXT:  .LCPI1_0:
 ; CHECK-NEXT:    .long 0 @ 0x0
 ; CHECK-NEXT:    .long 1 @ 0x1
 ; CHECK-NEXT:    .long 2 @ 0x2
@@ -36,7 +108,7 @@ define <4 x i32> @v4i32(i32 %index, i32 %TC, <4 x i32> %V1, <4 x i32> %V2) {
 define <7 x i32> @v7i32(i32 %index, i32 %TC, <7 x i32> %V1, <7 x i32> %V2) {
 ; CHECK-LABEL: v7i32:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    adr r3, .LCPI1_0
+; CHECK-NEXT:    adr r3, .LCPI2_0
 ; CHECK-NEXT:    vdup.32 q1, r1
 ; CHECK-NEXT:    vldrw.u32 q0, [r3]
 ; CHECK-NEXT:    ldr r3, [sp, #32]
@@ -57,7 +129,7 @@ define <7 x i32> @v7i32(i32 %index, i32 %TC, <7 x i32> %V1, <7 x i32> %V2) {
 ; CHECK-NEXT:    ldr r2, [sp, #12]
 ; CHECK-NEXT:    ldr r3, [sp, #4]
 ; CHECK-NEXT:    vmov q3[3], q3[1], r3, r2
-; CHECK-NEXT:    adr r2, .LCPI1_1
+; CHECK-NEXT:    adr r2, .LCPI2_1
 ; CHECK-NEXT:    vpsel q2, q3, q2
 ; CHECK-NEXT:    vstrw.32 q2, [r0]
 ; CHECK-NEXT:    vldrw.u32 q2, [r2]
@@ -89,12 +161,12 @@ define <7 x i32> @v7i32(i32 %index, i32 %TC, <7 x i32> %V1, <7 x i32> %V2) {
 ; CHECK-NEXT:    bx lr
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  @ %bb.1:
-; CHECK-NEXT:  .LCPI1_0:
+; CHECK-NEXT:  .LCPI2_0:
 ; CHECK-NEXT:    .long 0 @ 0x0
 ; CHECK-NEXT:    .long 1 @ 0x1
 ; CHECK-NEXT:    .long 2 @ 0x2
 ; CHECK-NEXT:    .long 3 @ 0x3
-; CHECK-NEXT:  .LCPI1_1:
+; CHECK-NEXT:  .LCPI2_1:
 ; CHECK-NEXT:    .long 4 @ 0x4
 ; CHECK-NEXT:    .long 5 @ 0x5
 ; CHECK-NEXT:    .long 6 @ 0x6
@@ -108,7 +180,7 @@ define <8 x i16> @v8i16(i32 %index, i32 %TC, <8 x i16> %V1, <8 x i16> %V2) {
 ; CHECK-LABEL: v8i16:
 ; CHECK:       @ %bb.0:
 ; CHECK-NEXT:    vpush {d8, d9, d10, d11, d12, d13}
-; CHECK-NEXT:    adr.w r12, .LCPI2_0
+; CHECK-NEXT:    adr.w r12, .LCPI3_0
 ; CHECK-NEXT:    vdup.32 q5, r1
 ; CHECK-NEXT:    vldrw.u32 q0, [r12]
 ; CHECK-NEXT:    vmov.i8 q1, #0x0
@@ -121,7 +193,7 @@ define <8 x i16> @v8i16(i32 %index, i32 %TC, <8 x i16> %V1, <8 x i16> %V2) {
 ; CHECK-NEXT:    vmov.16 q0[1], r12
 ; CHECK-NEXT:    vmov r1, r12, d9
 ; CHECK-NEXT:    vmov.16 q0[2], r1
-; CHECK-NEXT:    adr r1, .LCPI2_1
+; CHECK-NEXT:    adr r1, .LCPI3_1
 ; CHECK-NEXT:    vldrw.u32 q4, [r1]
 ; CHECK-NEXT:    vmov.16 q0[3], r12
 ; CHECK-NEXT:    vadd.i32 q4, q4, r0
@@ -165,12 +237,12 @@ define <8 x i16> @v8i16(i32 %index, i32 %TC, <8 x i16> %V1, <8 x i16> %V2) {
 ; CHECK-NEXT:    bx lr
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  @ %bb.1:
-; CHECK-NEXT:  .LCPI2_0:
+; CHECK-NEXT:  .LCPI3_0:
 ; CHECK-NEXT:    .long 0 @ 0x0
 ; CHECK-NEXT:    .long 1 @ 0x1
 ; CHECK-NEXT:    .long 2 @ 0x2
 ; CHECK-NEXT:    .long 3 @ 0x3
-; CHECK-NEXT:  .LCPI2_1:
+; CHECK-NEXT:  .LCPI3_1:
 ; CHECK-NEXT:    .long 4 @ 0x4
 ; CHECK-NEXT:    .long 5 @ 0x5
 ; CHECK-NEXT:    .long 6 @ 0x6
@@ -185,7 +257,7 @@ define <16 x i8> @v16i8(i32 %index, i32 %TC, <16 x i8> %V1, <16 x i8> %V2) {
 ; CHECK:       @ %bb.0:
 ; CHECK-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
 ; CHECK-NEXT:    sub sp, #16
-; CHECK-NEXT:    adr.w r12, .LCPI3_0
+; CHECK-NEXT:    adr.w r12, .LCPI4_0
 ; CHECK-NEXT:    vdup.32 q7, r1
 ; CHECK-NEXT:    vldrw.u32 q0, [r12]
 ; CHECK-NEXT:    vmov.i8 q5, #0x0
@@ -198,7 +270,7 @@ define <16 x i8> @v16i8(i32 %index, i32 %TC, <16 x i8> %V1, <16 x i8> %V2) {
 ; CHECK-NEXT:    vmov.16 q2[1], r12
 ; CHECK-NEXT:    vmov r1, r12, d1
 ; CHECK-NEXT:    vmov.16 q2[2], r1
-; CHECK-NEXT:    adr r1, .LCPI3_1
+; CHECK-NEXT:    adr r1, .LCPI4_1
 ; CHECK-NEXT:    vldrw.u32 q0, [r1]
 ; CHECK-NEXT:    vmov.16 q2[3], r12
 ; CHECK-NEXT:    vadd.i32 q3, q0, r0
@@ -228,7 +300,7 @@ define <16 x i8> @v16i8(i32 %index, i32 %TC, <16 x i8> %V1, <16 x i8> %V2) {
 ; CHECK-NEXT:    vmov.8 q2[6], r1
 ; CHECK-NEXT:    vmov.u16 r1, q0[7]
 ; CHECK-NEXT:    vmov.8 q2[7], r1
-; CHECK-NEXT:    adr r1, .LCPI3_2
+; CHECK-NEXT:    adr r1, .LCPI4_2
 ; CHECK-NEXT:    vldrw.u32 q0, [r1]
 ; CHECK-NEXT:    vadd.i32 q0, q0, r0
 ; CHECK-NEXT:    vcmp.u32 hi, q7, q0
@@ -239,7 +311,7 @@ define <16 x i8> @v16i8(i32 %index, i32 %TC, <16 x i8> %V1, <16 x i8> %V2) {
 ; CHECK-NEXT:    vmov.16 q0[1], r12
 ; CHECK-NEXT:    vmov r1, r12, d13
 ; CHECK-NEXT:    vmov.16 q0[2], r1
-; CHECK-NEXT:    adr r1, .LCPI3_3
+; CHECK-NEXT:    adr r1, .LCPI4_3
 ; CHECK-NEXT:    vldrw.u32 q6, [r1]
 ; CHECK-NEXT:    vmov.16 q0[3], r12
 ; CHECK-NEXT:    vadd.i32 q6, q6, r0
@@ -355,22 +427,22 @@ define <16 x i8> @v16i8(i32 %index, i32 %TC, <16 x i8> %V1, <16 x i8> %V2) {
 ; CHECK-NEXT:    bx lr
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  @ %bb.1:
-; CHECK-NEXT:  .LCPI3_0:
+; CHECK-NEXT:  .LCPI4_0:
 ; CHECK-NEXT:    .long 0 @ 0x0
 ; CHECK-NEXT:    .long 1 @ 0x1
 ; CHECK-NEXT:    .long 2 @ 0x2
 ; CHECK-NEXT:    .long 3 @ 0x3
-; CHECK-NEXT:  .LCPI3_1:
+; CHECK-NEXT:  .LCPI4_1:
 ; CHECK-NEXT:    .long 4 @ 0x4
 ; CHECK-NEXT:    .long 5 @ 0x5
 ; CHECK-NEXT:    .long 6 @ 0x6
 ; CHECK-NEXT:    .long 7 @ 0x7
-; CHECK-NEXT:  .LCPI3_2:
+; CHECK-NEXT:  .LCPI4_2:
 ; CHECK-NEXT:    .long 8 @ 0x8
 ; CHECK-NEXT:    .long 9 @ 0x9
 ; CHECK-NEXT:    .long 10 @ 0xa
 ; CHECK-NEXT:    .long 11 @ 0xb
-; CHECK-NEXT:  .LCPI3_3:
+; CHECK-NEXT:  .LCPI4_3:
 ; CHECK-NEXT:    .long 12 @ 0xc
 ; CHECK-NEXT:    .long 13 @ 0xd
 ; CHECK-NEXT:    .long 14 @ 0xe
@@ -388,12 +460,12 @@ define void @test_width2(i32* nocapture readnone %x, i32* nocapture %y, i8 zeroe
 ; CHECK-NEXT:    vpush {d8, d9}
 ; CHECK-NEXT:    sub sp, #8
 ; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    beq.w .LBB4_3
+; CHECK-NEXT:    beq.w .LBB5_3
 ; CHECK-NEXT:  @ %bb.1: @ %for.body.preheader
 ; CHECK-NEXT:    adds r0, r2, #1
 ; CHECK-NEXT:    vmov q1[2], q1[0], r2, r2
 ; CHECK-NEXT:    bic r0, r0, #1
-; CHECK-NEXT:    adr r2, .LCPI4_0
+; CHECK-NEXT:    adr r2, .LCPI5_0
 ; CHECK-NEXT:    subs r0, #2
 ; CHECK-NEXT:    movs r3, #1
 ; CHECK-NEXT:    vmov.i64 q0, #0xffffffff
@@ -401,7 +473,7 @@ define void @test_width2(i32* nocapture readnone %x, i32* nocapture %y, i8 zeroe
 ; CHECK-NEXT:    add.w lr, r3, r0, lsr #1
 ; CHECK-NEXT:    mov.w r12, #0
 ; CHECK-NEXT:    vand q1, q1, q0
-; CHECK-NEXT:  .LBB4_2: @ %vector.body
+; CHECK-NEXT:  .LBB5_2: @ %vector.body
 ; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    vmov q3[2], q3[0], r12, r12
 ; CHECK-NEXT:    vmov r6, r7, d3
@@ -474,15 +546,15 @@ define void @test_width2(i32* nocapture readnone %x, i32* nocapture %y, i8 zeroe
 ; CHECK-NEXT:    vmovmi r0, s14
 ; CHECK-NEXT:    strmi r0, [r1, #4]
 ; CHECK-NEXT:    adds r1, #8
-; CHECK-NEXT:    le lr, .LBB4_2
-; CHECK-NEXT:  .LBB4_3: @ %for.cond.cleanup
+; CHECK-NEXT:    le lr, .LBB5_2
+; CHECK-NEXT:  .LBB5_3: @ %for.cond.cleanup
 ; CHECK-NEXT:    add sp, #8
 ; CHECK-NEXT:    vpop {d8, d9}
 ; CHECK-NEXT:    add sp, #4
 ; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, pc}
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  @ %bb.4:
-; CHECK-NEXT:  .LCPI4_0:
+; CHECK-NEXT:  .LCPI5_0:
 ; CHECK-NEXT:    .long 1 @ 0x1
 ; CHECK-NEXT:    .long 0 @ 0x0
 ; CHECK-NEXT:    .long 1 @ 0x1

diff  --git a/llvm/test/CodeGen/Thumb2/mve-pred-ext.ll b/llvm/test/CodeGen/Thumb2/mve-pred-ext.ll
index d5dfecba2630d..eaceb1dd3f800 100644
--- a/llvm/test/CodeGen/Thumb2/mve-pred-ext.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-pred-ext.ll
@@ -418,8 +418,6 @@ entry:
   ret <4 x float> %s
 }
 
-
-
 define arm_aapcs_vfpcc <8 x half> @uitofp_v8i1_v8f16(<8 x i16> %src) {
 ; CHECK-LABEL: uitofp_v8i1_v8f16:
 ; CHECK:       @ %bb.0: @ %entry
@@ -475,3 +473,168 @@ entry:
   %s = select <8 x i1> %0, <8 x half> <half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0, half 1.0>, <8 x half> zeroinitializer
   ret <8 x half> %s
 }
+
+
+define arm_aapcs_vfpcc <2 x double> @uitofp_v2i1_v2f64(<2 x i64> %src) {
+; CHECK-LABEL: uitofp_v2i1_v2f64:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r7, lr}
+; CHECK-NEXT:    push {r7, lr}
+; CHECK-NEXT:    .vsave {d8, d9}
+; CHECK-NEXT:    vpush {d8, d9}
+; CHECK-NEXT:    vmov r0, r1, d1
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vmov lr, r12, d0
+; CHECK-NEXT:    adr r2, .LCPI26_0
+; CHECK-NEXT:    vldrw.u32 q0, [r2]
+; CHECK-NEXT:    rsbs r0, r0, #0
+; CHECK-NEXT:    sbcs.w r0, r3, r1
+; CHECK-NEXT:    mov.w r0, #0
+; CHECK-NEXT:    it lt
+; CHECK-NEXT:    movlt r0, #1
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    csetm r0, ne
+; CHECK-NEXT:    rsbs.w r1, lr, #0
+; CHECK-NEXT:    sbcs.w r1, r3, r12
+; CHECK-NEXT:    it lt
+; CHECK-NEXT:    movlt r3, #1
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    csetm r1, ne
+; CHECK-NEXT:    vmov q1[2], q1[0], r1, r0
+; CHECK-NEXT:    vand q4, q1, q0
+; CHECK-NEXT:    vmov r0, r1, d9
+; CHECK-NEXT:    bl __aeabi_ul2d
+; CHECK-NEXT:    vmov r2, r3, d8
+; CHECK-NEXT:    vmov d9, r0, r1
+; CHECK-NEXT:    mov r0, r2
+; CHECK-NEXT:    mov r1, r3
+; CHECK-NEXT:    bl __aeabi_ul2d
+; CHECK-NEXT:    vmov d8, r0, r1
+; CHECK-NEXT:    vmov q0, q4
+; CHECK-NEXT:    vpop {d8, d9}
+; CHECK-NEXT:    pop {r7, pc}
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  @ %bb.1:
+; CHECK-NEXT:  .LCPI26_0:
+; CHECK-NEXT:    .long 1 @ 0x1
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 1 @ 0x1
+; CHECK-NEXT:    .long 0 @ 0x0
+entry:
+  %c = icmp sgt <2 x i64> %src, zeroinitializer
+  %0 = uitofp <2 x i1> %c to <2 x double>
+  ret <2 x double> %0
+}
+
+define arm_aapcs_vfpcc <2 x double> @sitofp_v2i1_v2f64(<2 x i64> %src) {
+; CHECK-LABEL: sitofp_v2i1_v2f64:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r4, lr}
+; CHECK-NEXT:    push {r4, lr}
+; CHECK-NEXT:    .vsave {d8, d9}
+; CHECK-NEXT:    vpush {d8, d9}
+; CHECK-NEXT:    vmov r0, r1, d0
+; CHECK-NEXT:    movs r3, #0
+; CHECK-NEXT:    vmov r2, r12, d1
+; CHECK-NEXT:    movs r4, #0
+; CHECK-NEXT:    rsbs r0, r0, #0
+; CHECK-NEXT:    sbcs.w r0, r3, r1
+; CHECK-NEXT:    it lt
+; CHECK-NEXT:    movlt r4, #1
+; CHECK-NEXT:    rsbs r0, r2, #0
+; CHECK-NEXT:    sbcs.w r0, r3, r12
+; CHECK-NEXT:    it lt
+; CHECK-NEXT:    movlt r3, #1
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    csetm r0, ne
+; CHECK-NEXT:    mov r1, r0
+; CHECK-NEXT:    bl __aeabi_l2d
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    vmov d9, r0, r1
+; CHECK-NEXT:    csetm r2, ne
+; CHECK-NEXT:    mov r0, r2
+; CHECK-NEXT:    mov r1, r2
+; CHECK-NEXT:    bl __aeabi_l2d
+; CHECK-NEXT:    vmov d8, r0, r1
+; CHECK-NEXT:    vmov q0, q4
+; CHECK-NEXT:    vpop {d8, d9}
+; CHECK-NEXT:    pop {r4, pc}
+entry:
+  %c = icmp sgt <2 x i64> %src, zeroinitializer
+  %0 = sitofp <2 x i1> %c to <2 x double>
+  ret <2 x double> %0
+}
+
+define arm_aapcs_vfpcc <2 x double> @fptoui_v2i1_v2f64(<2 x double> %src) {
+; CHECK-LABEL: fptoui_v2i1_v2f64:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r4, r5, r7, lr}
+; CHECK-NEXT:    push {r4, r5, r7, lr}
+; CHECK-NEXT:    .vsave {d8, d9}
+; CHECK-NEXT:    vpush {d8, d9}
+; CHECK-NEXT:    vmov q4, q0
+; CHECK-NEXT:    vmov r0, r1, d9
+; CHECK-NEXT:    bl __aeabi_d2ulz
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r5, r1
+; CHECK-NEXT:    vmov r0, r1, d8
+; CHECK-NEXT:    bl __aeabi_d2ulz
+; CHECK-NEXT:    vmov q1[2], q1[0], r0, r4
+; CHECK-NEXT:    adr r2, .LCPI28_0
+; CHECK-NEXT:    vmov q1[3], q1[1], r1, r5
+; CHECK-NEXT:    vldrw.u32 q0, [r2]
+; CHECK-NEXT:    vmov r0, s6
+; CHECK-NEXT:    vmov r1, s4
+; CHECK-NEXT:    rsbs r0, r0, #0
+; CHECK-NEXT:    rsbs r1, r1, #0
+; CHECK-NEXT:    vmov q1[2], q1[0], r1, r0
+; CHECK-NEXT:    vmov q1[3], q1[1], r1, r0
+; CHECK-NEXT:    vand q0, q0, q1
+; CHECK-NEXT:    vpop {d8, d9}
+; CHECK-NEXT:    pop {r4, r5, r7, pc}
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  @ %bb.1:
+; CHECK-NEXT:  .LCPI28_0:
+; CHECK-NEXT:    .long 0 @ double 1
+; CHECK-NEXT:    .long 1072693248
+; CHECK-NEXT:    .long 0 @ double 1
+; CHECK-NEXT:    .long 1072693248
+entry:
+  %0 = fptoui <2 x double> %src to <2 x i1>
+  %s = select <2 x i1> %0, <2 x double> <double 1.0, double 1.0>, <2 x double> zeroinitializer
+  ret <2 x double> %s
+}
+
+define arm_aapcs_vfpcc <2 x double> @fptosi_v2i1_v2f64(<2 x double> %src) {
+; CHECK-LABEL: fptosi_v2i1_v2f64:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r4, r5, r7, lr}
+; CHECK-NEXT:    push {r4, r5, r7, lr}
+; CHECK-NEXT:    .vsave {d8, d9}
+; CHECK-NEXT:    vpush {d8, d9}
+; CHECK-NEXT:    vmov q4, q0
+; CHECK-NEXT:    vmov r0, r1, d9
+; CHECK-NEXT:    bl __aeabi_d2lz
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r5, r1
+; CHECK-NEXT:    vmov r0, r1, d8
+; CHECK-NEXT:    bl __aeabi_d2lz
+; CHECK-NEXT:    adr r2, .LCPI29_0
+; CHECK-NEXT:    vmov q1[2], q1[0], r0, r4
+; CHECK-NEXT:    vldrw.u32 q0, [r2]
+; CHECK-NEXT:    vmov q1[3], q1[1], r1, r5
+; CHECK-NEXT:    vand q0, q0, q1
+; CHECK-NEXT:    vpop {d8, d9}
+; CHECK-NEXT:    pop {r4, r5, r7, pc}
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  @ %bb.1:
+; CHECK-NEXT:  .LCPI29_0:
+; CHECK-NEXT:    .long 0 @ double 1
+; CHECK-NEXT:    .long 1072693248
+; CHECK-NEXT:    .long 0 @ double 1
+; CHECK-NEXT:    .long 1072693248
+entry:
+  %0 = fptosi <2 x double> %src to <2 x i1>
+  %s = select <2 x i1> %0, <2 x double> <double 1.0, double 1.0>, <2 x double> zeroinitializer
+  ret <2 x double> %s
+}

diff  --git a/llvm/test/CodeGen/Thumb2/mve-pred-shuffle.ll b/llvm/test/CodeGen/Thumb2/mve-pred-shuffle.ll
index 477db0718410e..dcff285e50d6c 100644
--- a/llvm/test/CodeGen/Thumb2/mve-pred-shuffle.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-pred-shuffle.ll
@@ -1,6 +1,36 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s
 
+define <2 x i64> @shuffle1_v2i64(<2 x i64> %src, <2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: shuffle1_v2i64:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    orrs r0, r1
+; CHECK-NEXT:    cset r0, eq
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    csetm r0, ne
+; CHECK-NEXT:    orrs.w r1, r2, r3
+; CHECK-NEXT:    cset r1, eq
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, ne
+; CHECK-NEXT:    vmov q0[2], q0[0], r1, r0
+; CHECK-NEXT:    vmov q0[3], q0[1], r1, r0
+; CHECK-NEXT:    add r0, sp, #16
+; CHECK-NEXT:    vldrw.u32 q1, [r0]
+; CHECK-NEXT:    mov r0, sp
+; CHECK-NEXT:    vldrw.u32 q2, [r0]
+; CHECK-NEXT:    vbic q1, q1, q0
+; CHECK-NEXT:    vand q0, q2, q0
+; CHECK-NEXT:    vorr q0, q0, q1
+; CHECK-NEXT:    vmov r0, r1, d0
+; CHECK-NEXT:    vmov r2, r3, d1
+; CHECK-NEXT:    bx lr
+entry:
+  %c = icmp eq <2 x i64> %src, zeroinitializer
+  %sh = shufflevector <2 x i1> %c, <2 x i1> undef, <2 x i32> <i32 1, i32 0>
+  %s = select <2 x i1> %sh, <2 x i64> %a, <2 x i64> %b
+  ret <2 x i64> %s
+}
+
 define <4 x i32> @shuffle1_v4i32(<4 x i32> %src, <4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: shuffle1_v4i32:
 ; CHECK:       @ %bb.0: @ %entry
@@ -76,6 +106,36 @@ entry:
   ret <16 x i8> %s
 }
 
+define <2 x i64> @shuffle2_v2i64(<2 x i64> %src, <2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: shuffle2_v2i64:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    orrs r2, r3
+; CHECK-NEXT:    cset r2, eq
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, ne
+; CHECK-NEXT:    orrs r0, r1
+; CHECK-NEXT:    cset r0, eq
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    csetm r0, ne
+; CHECK-NEXT:    vmov q0[2], q0[0], r0, r2
+; CHECK-NEXT:    vmov q0[3], q0[1], r0, r2
+; CHECK-NEXT:    add r0, sp, #16
+; CHECK-NEXT:    vldrw.u32 q1, [r0]
+; CHECK-NEXT:    mov r0, sp
+; CHECK-NEXT:    vldrw.u32 q2, [r0]
+; CHECK-NEXT:    vbic q1, q1, q0
+; CHECK-NEXT:    vand q0, q2, q0
+; CHECK-NEXT:    vorr q0, q0, q1
+; CHECK-NEXT:    vmov r0, r1, d0
+; CHECK-NEXT:    vmov r2, r3, d1
+; CHECK-NEXT:    bx lr
+entry:
+  %c = icmp eq <2 x i64> %src, zeroinitializer
+  %sh = shufflevector <2 x i1> %c, <2 x i1> undef, <2 x i32> <i32 0, i32 1>
+  %s = select <2 x i1> %sh, <2 x i64> %a, <2 x i64> %b
+  ret <2 x i64> %s
+}
+
 define <4 x i32> @shuffle2_v4i32(<4 x i32> %src, <4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: shuffle2_v4i32:
 ; CHECK:       @ %bb.0: @ %entry
@@ -139,6 +199,31 @@ entry:
   ret <16 x i8> %s
 }
 
+define <2 x i64> @shuffle3_v2i64(<2 x i64> %src, <2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: shuffle3_v2i64:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    orrs r0, r1
+; CHECK-NEXT:    cset r0, eq
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    csetm r0, ne
+; CHECK-NEXT:    vdup.32 q0, r0
+; CHECK-NEXT:    add r0, sp, #16
+; CHECK-NEXT:    vldrw.u32 q1, [r0]
+; CHECK-NEXT:    mov r0, sp
+; CHECK-NEXT:    vldrw.u32 q2, [r0]
+; CHECK-NEXT:    vbic q1, q1, q0
+; CHECK-NEXT:    vand q0, q2, q0
+; CHECK-NEXT:    vorr q0, q0, q1
+; CHECK-NEXT:    vmov r0, r1, d0
+; CHECK-NEXT:    vmov r2, r3, d1
+; CHECK-NEXT:    bx lr
+entry:
+  %c = icmp eq <2 x i64> %src, zeroinitializer
+  %sh = shufflevector <2 x i1> %c, <2 x i1> undef, <2 x i32> <i32 0, i32 0>
+  %s = select <2 x i1> %sh, <2 x i64> %a, <2 x i64> %b
+  ret <2 x i64> %s
+}
+
 define <4 x i32> @shuffle3_v4i32(<4 x i32> %src, <4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: shuffle3_v4i32:
 ; CHECK:       @ %bb.0: @ %entry
@@ -220,6 +305,31 @@ entry:
   ret <16 x i8> %s
 }
 
+define <2 x i64> @shuffle4_v2i64(<2 x i64> %src, <2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: shuffle4_v2i64:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    orrs.w r0, r2, r3
+; CHECK-NEXT:    cset r0, eq
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    csetm r0, ne
+; CHECK-NEXT:    vdup.32 q0, r0
+; CHECK-NEXT:    add r0, sp, #16
+; CHECK-NEXT:    vldrw.u32 q1, [r0]
+; CHECK-NEXT:    mov r0, sp
+; CHECK-NEXT:    vldrw.u32 q2, [r0]
+; CHECK-NEXT:    vbic q1, q1, q0
+; CHECK-NEXT:    vand q0, q2, q0
+; CHECK-NEXT:    vorr q0, q0, q1
+; CHECK-NEXT:    vmov r0, r1, d0
+; CHECK-NEXT:    vmov r2, r3, d1
+; CHECK-NEXT:    bx lr
+entry:
+  %c = icmp eq <2 x i64> %src, zeroinitializer
+  %sh = shufflevector <2 x i1> %c, <2 x i1> undef, <2 x i32> <i32 1, i32 1>
+  %s = select <2 x i1> %sh, <2 x i64> %a, <2 x i64> %b
+  ret <2 x i64> %s
+}
+
 define <4 x i32> @shuffle4_v4i32(<4 x i32> %src, <4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: shuffle4_v4i32:
 ; CHECK:       @ %bb.0: @ %entry
@@ -306,6 +416,66 @@ entry:
   ret <16 x i8> %s
 }
 
+define <2 x i64> @shuffle5_b_v2i64(<4 x i32> %src, <2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: shuffle5_b_v2i64:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmov d1, r2, r3
+; CHECK-NEXT:    vmov d0, r0, r1
+; CHECK-NEXT:    vcmp.i32 eq, q0, zr
+; CHECK-NEXT:    vmrs r0, p0
+; CHECK-NEXT:    ubfx r1, r0, #4, #1
+; CHECK-NEXT:    and r0, r0, #1
+; CHECK-NEXT:    rsbs r1, r1, #0
+; CHECK-NEXT:    rsbs r0, r0, #0
+; CHECK-NEXT:    vmov q0[2], q0[0], r0, r1
+; CHECK-NEXT:    vmov q0[3], q0[1], r0, r1
+; CHECK-NEXT:    add r0, sp, #16
+; CHECK-NEXT:    vldrw.u32 q1, [r0]
+; CHECK-NEXT:    mov r0, sp
+; CHECK-NEXT:    vldrw.u32 q2, [r0]
+; CHECK-NEXT:    vbic q1, q1, q0
+; CHECK-NEXT:    vand q0, q2, q0
+; CHECK-NEXT:    vorr q0, q0, q1
+; CHECK-NEXT:    vmov r0, r1, d0
+; CHECK-NEXT:    vmov r2, r3, d1
+; CHECK-NEXT:    bx lr
+entry:
+  %c = icmp eq <4 x i32> %src, zeroinitializer
+  %sh = shufflevector <4 x i1> %c, <4 x i1> undef, <2 x i32> <i32 0, i32 1>
+  %s = select <2 x i1> %sh, <2 x i64> %a, <2 x i64> %b
+  ret <2 x i64> %s
+}
+
+define <2 x i64> @shuffle5_t_v2i64(<4 x i32> %src, <2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: shuffle5_t_v2i64:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmov d1, r2, r3
+; CHECK-NEXT:    vmov d0, r0, r1
+; CHECK-NEXT:    vcmp.i32 eq, q0, zr
+; CHECK-NEXT:    vmrs r0, p0
+; CHECK-NEXT:    ubfx r1, r0, #12, #1
+; CHECK-NEXT:    ubfx r0, r0, #8, #1
+; CHECK-NEXT:    rsbs r1, r1, #0
+; CHECK-NEXT:    rsbs r0, r0, #0
+; CHECK-NEXT:    vmov q0[2], q0[0], r0, r1
+; CHECK-NEXT:    vmov q0[3], q0[1], r0, r1
+; CHECK-NEXT:    add r0, sp, #16
+; CHECK-NEXT:    vldrw.u32 q1, [r0]
+; CHECK-NEXT:    mov r0, sp
+; CHECK-NEXT:    vldrw.u32 q2, [r0]
+; CHECK-NEXT:    vbic q1, q1, q0
+; CHECK-NEXT:    vand q0, q2, q0
+; CHECK-NEXT:    vorr q0, q0, q1
+; CHECK-NEXT:    vmov r0, r1, d0
+; CHECK-NEXT:    vmov r2, r3, d1
+; CHECK-NEXT:    bx lr
+entry:
+  %c = icmp eq <4 x i32> %src, zeroinitializer
+  %sh = shufflevector <4 x i1> %c, <4 x i1> undef, <2 x i32> <i32 2, i32 3>
+  %s = select <2 x i1> %sh, <2 x i64> %a, <2 x i64> %b
+  ret <2 x i64> %s
+}
+
 define <4 x i32> @shuffle5_b_v4i32(<8 x i16> %src, <4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: shuffle5_b_v4i32:
 ; CHECK:       @ %bb.0: @ %entry
@@ -450,6 +620,61 @@ entry:
   ret <8 x i16> %s
 }
 
+define <4 x i32> @shuffle6_v2i64(<2 x i64> %src1, <2 x i64> %src2, <4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: shuffle6_v2i64:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    orrs r2, r3
+; CHECK-NEXT:    cset r2, eq
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csetm r2, ne
+; CHECK-NEXT:    orrs r0, r1
+; CHECK-NEXT:    cset r0, eq
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    csetm r0, ne
+; CHECK-NEXT:    vmov q0[2], q0[0], r0, r2
+; CHECK-NEXT:    vmov r0, s0
+; CHECK-NEXT:    and r0, r0, #1
+; CHECK-NEXT:    rsbs r1, r0, #0
+; CHECK-NEXT:    movs r0, #0
+; CHECK-NEXT:    bfi r0, r1, #0, #4
+; CHECK-NEXT:    and r1, r2, #1
+; CHECK-NEXT:    rsbs r1, r1, #0
+; CHECK-NEXT:    bfi r0, r1, #4, #4
+; CHECK-NEXT:    mov r1, sp
+; CHECK-NEXT:    vldrw.u32 q0, [r1]
+; CHECK-NEXT:    vmov r1, r2, d0
+; CHECK-NEXT:    orrs r1, r2
+; CHECK-NEXT:    cset r1, eq
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, ne
+; CHECK-NEXT:    and r1, r1, #1
+; CHECK-NEXT:    rsbs r1, r1, #0
+; CHECK-NEXT:    bfi r0, r1, #8, #4
+; CHECK-NEXT:    vmov r1, r2, d1
+; CHECK-NEXT:    orrs r1, r2
+; CHECK-NEXT:    cset r1, eq
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, ne
+; CHECK-NEXT:    and r1, r1, #1
+; CHECK-NEXT:    rsbs r1, r1, #0
+; CHECK-NEXT:    bfi r0, r1, #12, #4
+; CHECK-NEXT:    vmsr p0, r0
+; CHECK-NEXT:    add r0, sp, #32
+; CHECK-NEXT:    vldrw.u32 q0, [r0]
+; CHECK-NEXT:    add r0, sp, #16
+; CHECK-NEXT:    vldrw.u32 q1, [r0]
+; CHECK-NEXT:    vpsel q0, q1, q0
+; CHECK-NEXT:    vmov r0, r1, d0
+; CHECK-NEXT:    vmov r2, r3, d1
+; CHECK-NEXT:    bx lr
+entry:
+  %c1 = icmp eq <2 x i64> %src1, zeroinitializer
+  %c2 = icmp eq <2 x i64> %src2, zeroinitializer
+  %sh = shufflevector <2 x i1> %c1, <2 x i1> %c2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %s = select <4 x i1> %sh, <4 x i32> %a, <4 x i32> %b
+  ret <4 x i32> %s
+}
+
 define <8 x i16> @shuffle6_v4i32(<4 x i32> %src1, <4 x i32> %src2, <8 x i16> %a, <8 x i16> %b) {
 ; CHECK-LABEL: shuffle6_v4i32:
 ; CHECK:       @ %bb.0: @ %entry

diff  --git a/llvm/test/CodeGen/Thumb2/mve-pred-spill.ll b/llvm/test/CodeGen/Thumb2/mve-pred-spill.ll
index 460e3c09f7e49..30978ee43348a 100644
--- a/llvm/test/CodeGen/Thumb2/mve-pred-spill.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-pred-spill.ll
@@ -2,10 +2,75 @@
 ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK-LE
 ; RUN: llc -mtriple=thumbebv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK-BE
 
+declare arm_aapcs_vfpcc <2 x i64> @ext_i64(<2 x i64> %c)
 declare arm_aapcs_vfpcc <4 x i32> @ext_i32(<4 x i32> %c)
 declare arm_aapcs_vfpcc <8 x i16> @ext_i16(<8 x i16> %c)
 declare arm_aapcs_vfpcc <16 x i8> @ext_i8(<16 x i8> %c)
 
+define arm_aapcs_vfpcc <2 x i64> @shuffle1_v2i64(<2 x i64> %src, <2 x i64> %a) {
+; CHECK-LE-LABEL: shuffle1_v2i64:
+; CHECK-LE:       @ %bb.0: @ %entry
+; CHECK-LE-NEXT:    .save {r7, lr}
+; CHECK-LE-NEXT:    push {r7, lr}
+; CHECK-LE-NEXT:    .vsave {d8, d9, d10, d11}
+; CHECK-LE-NEXT:    vpush {d8, d9, d10, d11}
+; CHECK-LE-NEXT:    vmov r0, r1, d1
+; CHECK-LE-NEXT:    orrs r0, r1
+; CHECK-LE-NEXT:    vmov r1, r2, d0
+; CHECK-LE-NEXT:    cset r0, eq
+; CHECK-LE-NEXT:    cmp r0, #0
+; CHECK-LE-NEXT:    csetm r0, ne
+; CHECK-LE-NEXT:    orrs r1, r2
+; CHECK-LE-NEXT:    cset r1, eq
+; CHECK-LE-NEXT:    cmp r1, #0
+; CHECK-LE-NEXT:    csetm r1, ne
+; CHECK-LE-NEXT:    vmov q5[2], q5[0], r1, r0
+; CHECK-LE-NEXT:    vmov q5[3], q5[1], r1, r0
+; CHECK-LE-NEXT:    vand q4, q1, q5
+; CHECK-LE-NEXT:    vmov q0, q4
+; CHECK-LE-NEXT:    bl ext_i64
+; CHECK-LE-NEXT:    vbic q0, q0, q5
+; CHECK-LE-NEXT:    vorr q0, q4, q0
+; CHECK-LE-NEXT:    vpop {d8, d9, d10, d11}
+; CHECK-LE-NEXT:    pop {r7, pc}
+;
+; CHECK-BE-LABEL: shuffle1_v2i64:
+; CHECK-BE:       @ %bb.0: @ %entry
+; CHECK-BE-NEXT:    .save {r7, lr}
+; CHECK-BE-NEXT:    push {r7, lr}
+; CHECK-BE-NEXT:    .vsave {d8, d9, d10, d11}
+; CHECK-BE-NEXT:    vpush {d8, d9, d10, d11}
+; CHECK-BE-NEXT:    vrev64.32 q2, q0
+; CHECK-BE-NEXT:    vmov r0, r1, d5
+; CHECK-BE-NEXT:    orrs r0, r1
+; CHECK-BE-NEXT:    vmov r1, r2, d4
+; CHECK-BE-NEXT:    cset r0, eq
+; CHECK-BE-NEXT:    cmp r0, #0
+; CHECK-BE-NEXT:    csetm r0, ne
+; CHECK-BE-NEXT:    orrs r1, r2
+; CHECK-BE-NEXT:    cset r1, eq
+; CHECK-BE-NEXT:    cmp r1, #0
+; CHECK-BE-NEXT:    csetm r1, ne
+; CHECK-BE-NEXT:    vmov q0[2], q0[0], r1, r0
+; CHECK-BE-NEXT:    vmov q0[3], q0[1], r1, r0
+; CHECK-BE-NEXT:    vrev64.32 q2, q0
+; CHECK-BE-NEXT:    vmov.i8 q0, #0xff
+; CHECK-BE-NEXT:    vand q4, q1, q2
+; CHECK-BE-NEXT:    veor q5, q2, q0
+; CHECK-BE-NEXT:    vmov q0, q4
+; CHECK-BE-NEXT:    bl ext_i64
+; CHECK-BE-NEXT:    vand q0, q0, q5
+; CHECK-BE-NEXT:    vorr q0, q4, q0
+; CHECK-BE-NEXT:    vpop {d8, d9, d10, d11}
+; CHECK-BE-NEXT:    pop {r7, pc}
+entry:
+  %c = icmp eq <2 x i64> %src, zeroinitializer
+  %s1 = select <2 x i1> %c, <2 x i64> %a, <2 x i64> zeroinitializer
+  %ext = call arm_aapcs_vfpcc <2 x i64> @ext_i64(<2 x i64> %s1)
+  %s = select <2 x i1> %c, <2 x i64> %a, <2 x i64> %ext
+  ret <2 x i64> %s
+}
+
 define arm_aapcs_vfpcc <4 x i32> @shuffle1_v4i32(<4 x i32> %src, <4 x i32> %a) {
 ; CHECK-LE-LABEL: shuffle1_v4i32:
 ; CHECK-LE:       @ %bb.0: @ %entry

diff  --git a/llvm/test/CodeGen/Thumb2/mve-vcmp.ll b/llvm/test/CodeGen/Thumb2/mve-vcmp.ll
index 34fc2bbb86f36..2b23f9a88b1f5 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vcmp.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vcmp.ll
@@ -395,6 +395,41 @@ entry:
   ret <2 x i64> %s
 }
 
+define arm_aapcs_vfpcc <2 x i64> @vcmp_slt_v2i64(<2 x i64> %src, <2 x i64> %srcb, <2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: vcmp_slt_v2i64:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r7, lr}
+; CHECK-NEXT:    push {r7, lr}
+; CHECK-NEXT:    vmov r0, r12, d3
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    vmov r2, r3, d1
+; CHECK-NEXT:    subs r0, r2, r0
+; CHECK-NEXT:    sbcs.w r0, r3, r12
+; CHECK-NEXT:    vmov lr, r12, d2
+; CHECK-NEXT:    vmov r3, r2, d0
+; CHECK-NEXT:    mov.w r0, #0
+; CHECK-NEXT:    it lt
+; CHECK-NEXT:    movlt r0, #1
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    csetm r0, ne
+; CHECK-NEXT:    subs.w r3, r3, lr
+; CHECK-NEXT:    sbcs.w r2, r2, r12
+; CHECK-NEXT:    it lt
+; CHECK-NEXT:    movlt r1, #1
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, ne
+; CHECK-NEXT:    vmov q0[2], q0[0], r1, r0
+; CHECK-NEXT:    vmov q0[3], q0[1], r1, r0
+; CHECK-NEXT:    vbic q1, q3, q0
+; CHECK-NEXT:    vand q0, q2, q0
+; CHECK-NEXT:    vorr q0, q0, q1
+; CHECK-NEXT:    pop {r7, pc}
+entry:
+  %c = icmp slt <2 x i64> %src, %srcb
+  %s = select <2 x i1> %c, <2 x i64> %a, <2 x i64> %b
+  ret <2 x i64> %s
+}
+
 define arm_aapcs_vfpcc <2 x i32> @vcmp_eq_v2i32(<2 x i64> %src, <2 x i64> %srcb, <2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: vcmp_eq_v2i32:
 ; CHECK:       @ %bb.0: @ %entry

diff  --git a/llvm/test/CodeGen/Thumb2/mve-vmovimm.ll b/llvm/test/CodeGen/Thumb2/mve-vmovimm.ll
index ce4756b17b452..f8069f6967817 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vmovimm.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vmovimm.ll
@@ -507,3 +507,91 @@ entry:
   %s = select <4 x i1> %l699, <4 x i32> %a, <4 x i32> %b
   ret <4 x i32> %s
 }
+
+define arm_aapcs_vfpcc <2 x i64> @v2i1and_vmov(<2 x i64> %a, <2 x i64> %b, i32 %c) {
+; CHECKBE-LABEL: v2i1and_vmov:
+; CHECKBE:       @ %bb.0: @ %entry
+; CHECKBE-NEXT:    .vsave {d8, d9}
+; CHECKBE-NEXT:    vpush {d8, d9}
+; CHECKBE-NEXT:    cmp r0, #0
+; CHECKBE-NEXT:    adr r1, .LCPI37_0
+; CHECKBE-NEXT:    cset r0, eq
+; CHECKBE-NEXT:    vldrw.u32 q3, [r1]
+; CHECKBE-NEXT:    vmov.32 q4[3], r0
+; CHECKBE-NEXT:    rsbs r0, r0, #0
+; CHECKBE-NEXT:    vand q3, q4, q3
+; CHECKBE-NEXT:    vmov.i8 q2, #0xff
+; CHECKBE-NEXT:    vmov r1, s15
+; CHECKBE-NEXT:    vmov q3[2], q3[0], r0, r1
+; CHECKBE-NEXT:    vmov q3[3], q3[1], r0, r1
+; CHECKBE-NEXT:    vrev64.32 q4, q3
+; CHECKBE-NEXT:    veor q2, q4, q2
+; CHECKBE-NEXT:    vand q0, q0, q4
+; CHECKBE-NEXT:    vand q1, q1, q2
+; CHECKBE-NEXT:    vorr q0, q0, q1
+; CHECKBE-NEXT:    vpop {d8, d9}
+; CHECKBE-NEXT:    bx lr
+; CHECKBE-NEXT:    .p2align 4
+; CHECKBE-NEXT:  @ %bb.1:
+; CHECKBE-NEXT:  .LCPI37_0:
+; CHECKBE-NEXT:    .zero 4
+; CHECKBE-NEXT:    .long 1 @ 0x1
+; CHECKBE-NEXT:    .zero 4
+; CHECKBE-NEXT:    .long 0 @ 0x0
+entry:
+  %c1 = icmp eq i32 %c, zeroinitializer
+  %broadcast.splatinsert1967 = insertelement <2 x i1> undef, i1 %c1, i32 0
+  %broadcast.splat1968 = shufflevector <2 x i1> %broadcast.splatinsert1967, <2 x i1> undef, <2 x i32> zeroinitializer
+  %l699 = and <2 x i1> %broadcast.splat1968, <i1 true, i1 false>
+  %s = select <2 x i1> %l699, <2 x i64> %a, <2 x i64> %b
+  ret <2 x i64> %s
+}
+
+define arm_aapcs_vfpcc <2 x i64> @v2i1or_vmov(<2 x i64> %a, <2 x i64> %b, i32 %c) {
+; CHECKLE-LABEL: v2i1or_vmov:
+; CHECKLE:       @ %bb.0: @ %entry
+; CHECKLE-NEXT:    cmp r0, #0
+; CHECKLE-NEXT:    vldr s8, .LCPI38_0
+; CHECKLE-NEXT:    csetm r0, eq
+; CHECKLE-NEXT:    vmov s10, r0
+; CHECKLE-NEXT:    vmov.f32 s9, s8
+; CHECKLE-NEXT:    vmov.f32 s11, s10
+; CHECKLE-NEXT:    vbic q1, q1, q2
+; CHECKLE-NEXT:    vand q0, q0, q2
+; CHECKLE-NEXT:    vorr q0, q0, q1
+; CHECKLE-NEXT:    bx lr
+; CHECKLE-NEXT:    .p2align 2
+; CHECKLE-NEXT:  @ %bb.1:
+; CHECKLE-NEXT:  .LCPI38_0:
+; CHECKLE-NEXT:    .long 0xffffffff @ float NaN
+;
+; CHECKBE-LABEL: v2i1or_vmov:
+; CHECKBE:       @ %bb.0: @ %entry
+; CHECKBE-NEXT:    .vsave {d8, d9}
+; CHECKBE-NEXT:    vpush {d8, d9}
+; CHECKBE-NEXT:    cmp r0, #0
+; CHECKBE-NEXT:    vldr s8, .LCPI38_0
+; CHECKBE-NEXT:    csetm r0, eq
+; CHECKBE-NEXT:    vmov.i8 q3, #0xff
+; CHECKBE-NEXT:    vmov s10, r0
+; CHECKBE-NEXT:    vmov.f32 s9, s8
+; CHECKBE-NEXT:    vmov.f32 s11, s10
+; CHECKBE-NEXT:    vrev64.32 q4, q2
+; CHECKBE-NEXT:    veor q2, q4, q3
+; CHECKBE-NEXT:    vand q0, q0, q4
+; CHECKBE-NEXT:    vand q1, q1, q2
+; CHECKBE-NEXT:    vorr q0, q0, q1
+; CHECKBE-NEXT:    vpop {d8, d9}
+; CHECKBE-NEXT:    bx lr
+; CHECKBE-NEXT:    .p2align 2
+; CHECKBE-NEXT:  @ %bb.1:
+; CHECKBE-NEXT:  .LCPI38_0:
+; CHECKBE-NEXT:    .long 0xffffffff @ float NaN
+entry:
+  %c1 = icmp eq i32 %c, zeroinitializer
+  %broadcast.splatinsert1967 = insertelement <2 x i1> undef, i1 %c1, i32 0
+  %broadcast.splat1968 = shufflevector <2 x i1> %broadcast.splatinsert1967, <2 x i1> undef, <2 x i32> zeroinitializer
+  %l699 = or <2 x i1> %broadcast.splat1968, <i1 true, i1 false>
+  %s = select <2 x i1> %l699, <2 x i64> %a, <2 x i64> %b
+  ret <2 x i64> %s
+}

diff  --git a/llvm/test/CodeGen/Thumb2/mve-vpsel.ll b/llvm/test/CodeGen/Thumb2/mve-vpsel.ll
index fa897c5fe9d96..fad1647618fe0 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vpsel.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vpsel.ll
@@ -37,6 +37,31 @@ entry:
   ret <4 x i32> %1
 }
 
+define arm_aapcs_vfpcc <2 x i64> @vpsel_i64(<2 x i64> %mask, <2 x i64> %src1, <2 x i64> %src2) {
+; CHECK-LABEL: vpsel_i64:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmov r0, r1, d1
+; CHECK-NEXT:    vmov r2, r3, d0
+; CHECK-NEXT:    orrs r0, r1
+; CHECK-NEXT:    cset r0, ne
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    csetm r0, ne
+; CHECK-NEXT:    orrs.w r1, r2, r3
+; CHECK-NEXT:    cset r1, ne
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, ne
+; CHECK-NEXT:    vmov q0[2], q0[0], r1, r0
+; CHECK-NEXT:    vmov q0[3], q0[1], r1, r0
+; CHECK-NEXT:    vbic q2, q2, q0
+; CHECK-NEXT:    vand q0, q1, q0
+; CHECK-NEXT:    vorr q0, q0, q2
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = icmp ne <2 x i64> %mask, zeroinitializer
+  %1 = select <2 x i1> %0, <2 x i64> %src1, <2 x i64> %src2
+  ret <2 x i64> %1
+}
+
 define arm_aapcs_vfpcc <8 x half> @vpsel_f16(<8 x i16> %mask, <8 x half> %src1, <8 x half> %src2) {
 ; CHECK-LABEL: vpsel_f16:
 ; CHECK:       @ %bb.0: @ %entry
@@ -61,6 +86,31 @@ entry:
   ret <4 x float> %1
 }
 
+define arm_aapcs_vfpcc <2 x double> @vpsel_f64(<2 x i64> %mask, <2 x double> %src1, <2 x double> %src2) {
+; CHECK-LABEL: vpsel_f64:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vmov r0, r1, d1
+; CHECK-NEXT:    vmov r2, r3, d0
+; CHECK-NEXT:    orrs r0, r1
+; CHECK-NEXT:    cset r0, ne
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    csetm r0, ne
+; CHECK-NEXT:    orrs.w r1, r2, r3
+; CHECK-NEXT:    cset r1, ne
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    csetm r1, ne
+; CHECK-NEXT:    vmov q0[2], q0[0], r1, r0
+; CHECK-NEXT:    vmov q0[3], q0[1], r1, r0
+; CHECK-NEXT:    vbic q2, q2, q0
+; CHECK-NEXT:    vand q0, q1, q0
+; CHECK-NEXT:    vorr q0, q0, q2
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = icmp ne <2 x i64> %mask, zeroinitializer
+  %1 = select <2 x i1> %0, <2 x double> %src1, <2 x double> %src2
+  ret <2 x double> %1
+}
+
 define arm_aapcs_vfpcc <4 x i32> @foo(<4 x i32> %vec.ind) {
 ; CHECK-LABEL: foo:
 ; CHECK:       @ %bb.0:


        


More information about the llvm-commits mailing list