[llvm] db1be69 - [DAG] SimplifyDemandedBits - add ISD::VSELECT handling

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Sun Jun 19 07:18:33 PDT 2022


Author: Simon Pilgrim
Date: 2022-06-19T15:18:25+01:00
New Revision: db1be696c406fd4e894883845eea2f030e992626

URL: https://github.com/llvm/llvm-project/commit/db1be696c406fd4e894883845eea2f030e992626
DIFF: https://github.com/llvm/llvm-project/commit/db1be696c406fd4e894883845eea2f030e992626.diff

LOG: [DAG] SimplifyDemandedBits - add ISD::VSELECT handling

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
    llvm/test/CodeGen/Hexagon/isel-memory-vNi1.ll
    llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll
    llvm/test/CodeGen/Thumb2/mve-laneinterleaving.ll
    llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll
    llvm/test/CodeGen/X86/extractelement-load.ll
    llvm/test/CodeGen/X86/known-signbits-vector.ll
    llvm/test/CodeGen/X86/select-of-fp-constants.ll
    llvm/test/CodeGen/X86/vselect-zero.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index e09f7d664ef04..2957976bcc240 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -1536,6 +1536,19 @@ bool TargetLowering::SimplifyDemandedBits(
     if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
       return true;
 
+    // Only known if known in both the LHS and RHS.
+    Known = KnownBits::commonBits(Known, Known2);
+    break;
+  case ISD::VSELECT:
+    if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
+                             Known, TLO, Depth + 1))
+      return true;
+    if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
+                             Known2, TLO, Depth + 1))
+      return true;
+    assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+    assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
+
     // Only known if known in both the LHS and RHS.
     Known = KnownBits::commonBits(Known, Known2);
     break;

diff  --git a/llvm/test/CodeGen/Hexagon/isel-memory-vNi1.ll b/llvm/test/CodeGen/Hexagon/isel-memory-vNi1.ll
index 832beee337666..1173624a21f20 100644
--- a/llvm/test/CodeGen/Hexagon/isel-memory-vNi1.ll
+++ b/llvm/test/CodeGen/Hexagon/isel-memory-vNi1.ll
@@ -36,7 +36,7 @@ define i32 @f1(<4 x i1>* %a0, <4 x i8> %a1) #0 {
 ; CHECK-NEXT:     r3:2 = combine(#0,#0)
 ; CHECK-NEXT:    }
 ; CHECK-NEXT:    {
-; CHECK-NEXT:     r5:4 = vsxtbh(r1)
+; CHECK-NEXT:     r5:4 = vzxtbh(r1)
 ; CHECK-NEXT:    }
 ; CHECK-NEXT:    {
 ; CHECK-NEXT:     p0 = r0

diff  --git a/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll b/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll
index 1e9fbe09539c8..7929bba0638a5 100644
--- a/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll
@@ -279,42 +279,40 @@ define arm_aapcs_vfpcc <4 x i32> @ustest_f16i32(<4 x half> %x) {
 ; CHECK-NEXT:    push {r4, r5, r6, lr}
 ; CHECK-NEXT:    .vsave {d8, d9, d10, d11, d12, d13}
 ; CHECK-NEXT:    vpush {d8, d9, d10, d11, d12, d13}
-; CHECK-NEXT:    vmov.u16 r0, q0[2]
+; CHECK-NEXT:    vmov.u16 r0, q0[3]
 ; CHECK-NEXT:    vmov q4, q0
 ; CHECK-NEXT:    bl __fixhfdi
 ; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    vmov.u16 r0, q4[3]
+; CHECK-NEXT:    vmov.u16 r0, q4[2]
 ; CHECK-NEXT:    mov r5, r1
 ; CHECK-NEXT:    bl __fixhfdi
-; CHECK-NEXT:    rsbs r2, r4, #0
+; CHECK-NEXT:    rsbs r2, r0, #0
 ; CHECK-NEXT:    mov.w r6, #0
-; CHECK-NEXT:    sbcs.w r2, r6, r5
-; CHECK-NEXT:    vmov q0[2], q0[0], r4, r0
-; CHECK-NEXT:    csetm r2, lt
-; CHECK-NEXT:    rsbs r0, r0, #0
-; CHECK-NEXT:    mov.w r3, #0
+; CHECK-NEXT:    vmov q0[2], q0[0], r0, r4
 ; CHECK-NEXT:    sbcs.w r0, r6, r1
-; CHECK-NEXT:    bfi r3, r2, #0, #8
 ; CHECK-NEXT:    csetm r0, lt
-; CHECK-NEXT:    bfi r3, r0, #8, #8
-; CHECK-NEXT:    vmov.u16 r0, q4[0]
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    bfi r1, r0, #0, #8
+; CHECK-NEXT:    rsbs r0, r4, #0
+; CHECK-NEXT:    sbcs.w r0, r6, r5
 ; CHECK-NEXT:    vmov.i32 q5, #0x0
-; CHECK-NEXT:    vmov q0[3], q0[1], r5, r1
-; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    csetm r0, lt
+; CHECK-NEXT:    bfi r1, r0, #8, #8
+; CHECK-NEXT:    vmov.u16 r0, q4[1]
+; CHECK-NEXT:    vmsr p0, r1
 ; CHECK-NEXT:    vpsel q6, q0, q5
 ; CHECK-NEXT:    bl __fixhfdi
 ; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    vmov.u16 r0, q4[1]
+; CHECK-NEXT:    vmov.u16 r0, q4[0]
 ; CHECK-NEXT:    mov r5, r1
 ; CHECK-NEXT:    bl __fixhfdi
-; CHECK-NEXT:    rsbs r2, r4, #0
-; CHECK-NEXT:    vmov q0[2], q0[0], r4, r0
-; CHECK-NEXT:    sbcs.w r2, r6, r5
-; CHECK-NEXT:    vmov q0[3], q0[1], r5, r1
-; CHECK-NEXT:    csetm r2, lt
-; CHECK-NEXT:    rsbs r0, r0, #0
+; CHECK-NEXT:    rsbs r2, r0, #0
+; CHECK-NEXT:    vmov q0[2], q0[0], r0, r4
 ; CHECK-NEXT:    sbcs.w r0, r6, r1
-; CHECK-NEXT:    bfi r6, r2, #0, #8
+; CHECK-NEXT:    csetm r0, lt
+; CHECK-NEXT:    rsbs r1, r4, #0
+; CHECK-NEXT:    sbcs.w r1, r6, r5
+; CHECK-NEXT:    bfi r6, r0, #0, #8
 ; CHECK-NEXT:    csetm r0, lt
 ; CHECK-NEXT:    bfi r6, r0, #8, #8
 ; CHECK-NEXT:    vmsr p0, r6
@@ -1436,42 +1434,40 @@ define arm_aapcs_vfpcc <4 x i32> @ustest_f16i32_mm(<4 x half> %x) {
 ; CHECK-NEXT:    push {r4, r5, r6, lr}
 ; CHECK-NEXT:    .vsave {d8, d9, d10, d11, d12, d13}
 ; CHECK-NEXT:    vpush {d8, d9, d10, d11, d12, d13}
-; CHECK-NEXT:    vmov.u16 r0, q0[2]
+; CHECK-NEXT:    vmov.u16 r0, q0[3]
 ; CHECK-NEXT:    vmov q4, q0
 ; CHECK-NEXT:    bl __fixhfdi
 ; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    vmov.u16 r0, q4[3]
+; CHECK-NEXT:    vmov.u16 r0, q4[2]
 ; CHECK-NEXT:    mov r5, r1
 ; CHECK-NEXT:    bl __fixhfdi
-; CHECK-NEXT:    rsbs r2, r4, #0
+; CHECK-NEXT:    rsbs r2, r0, #0
 ; CHECK-NEXT:    mov.w r6, #0
-; CHECK-NEXT:    sbcs.w r2, r6, r5
-; CHECK-NEXT:    vmov q0[2], q0[0], r4, r0
-; CHECK-NEXT:    csetm r2, lt
-; CHECK-NEXT:    rsbs r0, r0, #0
-; CHECK-NEXT:    mov.w r3, #0
+; CHECK-NEXT:    vmov q0[2], q0[0], r0, r4
 ; CHECK-NEXT:    sbcs.w r0, r6, r1
-; CHECK-NEXT:    bfi r3, r2, #0, #8
 ; CHECK-NEXT:    csetm r0, lt
-; CHECK-NEXT:    bfi r3, r0, #8, #8
-; CHECK-NEXT:    vmov.u16 r0, q4[0]
+; CHECK-NEXT:    movs r1, #0
+; CHECK-NEXT:    bfi r1, r0, #0, #8
+; CHECK-NEXT:    rsbs r0, r4, #0
+; CHECK-NEXT:    sbcs.w r0, r6, r5
 ; CHECK-NEXT:    vmov.i32 q5, #0x0
-; CHECK-NEXT:    vmov q0[3], q0[1], r5, r1
-; CHECK-NEXT:    vmsr p0, r3
+; CHECK-NEXT:    csetm r0, lt
+; CHECK-NEXT:    bfi r1, r0, #8, #8
+; CHECK-NEXT:    vmov.u16 r0, q4[1]
+; CHECK-NEXT:    vmsr p0, r1
 ; CHECK-NEXT:    vpsel q6, q0, q5
 ; CHECK-NEXT:    bl __fixhfdi
 ; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    vmov.u16 r0, q4[1]
+; CHECK-NEXT:    vmov.u16 r0, q4[0]
 ; CHECK-NEXT:    mov r5, r1
 ; CHECK-NEXT:    bl __fixhfdi
-; CHECK-NEXT:    rsbs r2, r4, #0
-; CHECK-NEXT:    vmov q0[2], q0[0], r4, r0
-; CHECK-NEXT:    sbcs.w r2, r6, r5
-; CHECK-NEXT:    vmov q0[3], q0[1], r5, r1
-; CHECK-NEXT:    csetm r2, lt
-; CHECK-NEXT:    rsbs r0, r0, #0
+; CHECK-NEXT:    rsbs r2, r0, #0
+; CHECK-NEXT:    vmov q0[2], q0[0], r0, r4
 ; CHECK-NEXT:    sbcs.w r0, r6, r1
-; CHECK-NEXT:    bfi r6, r2, #0, #8
+; CHECK-NEXT:    csetm r0, lt
+; CHECK-NEXT:    rsbs r1, r4, #0
+; CHECK-NEXT:    sbcs.w r1, r6, r5
+; CHECK-NEXT:    bfi r6, r0, #0, #8
 ; CHECK-NEXT:    csetm r0, lt
 ; CHECK-NEXT:    bfi r6, r0, #8, #8
 ; CHECK-NEXT:    vmsr p0, r6

diff  --git a/llvm/test/CodeGen/Thumb2/mve-laneinterleaving.ll b/llvm/test/CodeGen/Thumb2/mve-laneinterleaving.ll
index 46ade7114bf6c..6e6fe1d93c0b0 100644
--- a/llvm/test/CodeGen/Thumb2/mve-laneinterleaving.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-laneinterleaving.ll
@@ -334,102 +334,100 @@ define arm_aapcs_vfpcc <4 x i32> @ext_ops_trunc_i32(<4 x i32> %a, <4 x i32> %b)
 ; CHECK-NEXT:    sub sp, #4
 ; CHECK-NEXT:    .vsave {d8, d9}
 ; CHECK-NEXT:    vpush {d8, d9}
-; CHECK-NEXT:    vmov.f32 s8, s4
+; CHECK-NEXT:    vmov.f32 s18, s1
 ; CHECK-NEXT:    vmov.i64 q3, #0xffffffff
+; CHECK-NEXT:    vmov.f32 s8, s4
 ; CHECK-NEXT:    vmov.f32 s10, s5
 ; CHECK-NEXT:    vand q2, q2, q3
-; CHECK-NEXT:    vmov r3, s0
-; CHECK-NEXT:    vmov r1, r0, d4
-; CHECK-NEXT:    vmov.f32 s18, s1
-; CHECK-NEXT:    vmov r2, r12, d5
-; CHECK-NEXT:    vmov.f32 s0, s2
 ; CHECK-NEXT:    vmov.f32 s4, s6
+; CHECK-NEXT:    vmov r2, r0, d5
+; CHECK-NEXT:    vmov r1, r12, d4
 ; CHECK-NEXT:    vmov.f32 s6, s7
+; CHECK-NEXT:    vmov r3, s18
 ; CHECK-NEXT:    vand q1, q1, q3
-; CHECK-NEXT:    vmov.f32 s2, s3
-; CHECK-NEXT:    adds r4, r3, r1
+; CHECK-NEXT:    adds r4, r3, r2
 ; CHECK-NEXT:    asr.w r6, r3, #31
 ; CHECK-NEXT:    adc.w r5, r6, r0
-; CHECK-NEXT:    asrl r4, r5, r1
-; CHECK-NEXT:    subs r6, r4, r1
+; CHECK-NEXT:    asrl r4, r5, r2
+; CHECK-NEXT:    subs r6, r4, r2
 ; CHECK-NEXT:    sbc.w r8, r5, r0
-; CHECK-NEXT:    umull r10, lr, r6, r1
+; CHECK-NEXT:    umull r10, lr, r6, r2
 ; CHECK-NEXT:    muls r6, r0, r6
-; CHECK-NEXT:    vmov r0, s18
+; CHECK-NEXT:    vmov r0, s0
+; CHECK-NEXT:    vmov.f32 s0, s2
+; CHECK-NEXT:    vmov.f32 s2, s3
 ; CHECK-NEXT:    orr.w lr, lr, r6
-; CHECK-NEXT:    adds r6, r0, r2
+; CHECK-NEXT:    adds r6, r0, r1
 ; CHECK-NEXT:    asr.w r5, r0, #31
 ; CHECK-NEXT:    adc.w r7, r5, r12
-; CHECK-NEXT:    asrl r6, r7, r2
-; CHECK-NEXT:    mla r5, r8, r1, lr
-; CHECK-NEXT:    subs r4, r6, r2
+; CHECK-NEXT:    asrl r6, r7, r1
+; CHECK-NEXT:    mla r5, r8, r2, lr
+; CHECK-NEXT:    subs r4, r6, r1
 ; CHECK-NEXT:    sbc.w lr, r7, r12
-; CHECK-NEXT:    umull r6, r7, r4, r2
+; CHECK-NEXT:    umull r6, r7, r4, r1
 ; CHECK-NEXT:    mul r4, r4, r12
 ; CHECK-NEXT:    mov.w r12, #0
 ; CHECK-NEXT:    orr.w r8, r7, r4
-; CHECK-NEXT:    eor.w r7, r3, r1
-; CHECK-NEXT:    orr.w r7, r7, r3, asr #31
+; CHECK-NEXT:    eor.w r7, r0, r1
+; CHECK-NEXT:    orr.w r7, r7, r0, asr #31
 ; CHECK-NEXT:    movs r4, #0
 ; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    rsb.w r0, r0, #0
 ; CHECK-NEXT:    csetm r7, eq
 ; CHECK-NEXT:    bfi r4, r7, #0, #8
-; CHECK-NEXT:    eor.w r7, r0, r2
-; CHECK-NEXT:    orr.w r7, r7, r0, asr #31
-; CHECK-NEXT:    rsbs r0, r0, #0
+; CHECK-NEXT:    eor.w r7, r3, r2
+; CHECK-NEXT:    orr.w r7, r7, r3, asr #31
 ; CHECK-NEXT:    cmp r7, #0
 ; CHECK-NEXT:    csetm r7, eq
 ; CHECK-NEXT:    bfi r4, r7, #8, #8
 ; CHECK-NEXT:    vmsr p0, r4
 ; CHECK-NEXT:    rsbs r4, r3, #0
-; CHECK-NEXT:    mla r3, lr, r2, r8
+; CHECK-NEXT:    mla r3, lr, r1, r8
 ; CHECK-NEXT:    lsll r10, r5, r4
-; CHECK-NEXT:    lsll r10, r5, r1
+; CHECK-NEXT:    vmov r4, s0
+; CHECK-NEXT:    lsll r10, r5, r2
 ; CHECK-NEXT:    lsll r6, r3, r0
-; CHECK-NEXT:    vmov r0, r7, d3
-; CHECK-NEXT:    lsll r6, r3, r2
-; CHECK-NEXT:    vmov r2, s0
-; CHECK-NEXT:    vmov q4[2], q4[0], r10, r6
-; CHECK-NEXT:    vmov q4[3], q4[1], r5, r3
+; CHECK-NEXT:    vmov r2, r7, d3
+; CHECK-NEXT:    lsll r6, r3, r1
 ; CHECK-NEXT:    vmov r1, r3, d2
+; CHECK-NEXT:    vmov q4[2], q4[0], r6, r10
 ; CHECK-NEXT:    vpsel q2, q4, q2
 ; CHECK-NEXT:    vmov.f32 s9, s10
-; CHECK-NEXT:    asrs r6, r2, #31
-; CHECK-NEXT:    adds r4, r2, r1
-; CHECK-NEXT:    adc.w r5, r6, r3
-; CHECK-NEXT:    asrl r4, r5, r1
-; CHECK-NEXT:    subs r6, r4, r1
-; CHECK-NEXT:    sbc.w lr, r5, r3
-; CHECK-NEXT:    vmov r5, s2
-; CHECK-NEXT:    adds r4, r5, r0
-; CHECK-NEXT:    asr.w r3, r5, #31
-; CHECK-NEXT:    adcs r3, r7
-; CHECK-NEXT:    asrl r4, r3, r0
-; CHECK-NEXT:    subs r4, r4, r0
-; CHECK-NEXT:    sbcs r3, r7
-; CHECK-NEXT:    umull r4, r7, r4, r0
-; CHECK-NEXT:    mla r3, r3, r0, r7
-; CHECK-NEXT:    eor.w r7, r2, r1
-; CHECK-NEXT:    orr.w r7, r7, r2, asr #31
-; CHECK-NEXT:    cmp r7, #0
-; CHECK-NEXT:    csetm r7, eq
-; CHECK-NEXT:    bfi r12, r7, #0, #8
-; CHECK-NEXT:    eor.w r7, r5, r0
-; CHECK-NEXT:    orr.w r7, r7, r5, asr #31
-; CHECK-NEXT:    cmp r7, #0
-; CHECK-NEXT:    csetm r7, eq
-; CHECK-NEXT:    bfi r12, r7, #8, #8
-; CHECK-NEXT:    umull r6, r7, r6, r1
+; CHECK-NEXT:    asrs r0, r4, #31
+; CHECK-NEXT:    adds r6, r4, r1
+; CHECK-NEXT:    adc.w r5, r0, r3
+; CHECK-NEXT:    asrl r6, r5, r1
+; CHECK-NEXT:    subs r0, r6, r1
+; CHECK-NEXT:    sbc.w r3, r5, r3
+; CHECK-NEXT:    umull r8, r6, r0, r1
+; CHECK-NEXT:    mla r3, r3, r1, r6
+; CHECK-NEXT:    vmov r6, s2
+; CHECK-NEXT:    adds r0, r6, r2
+; CHECK-NEXT:    asr.w r5, r6, #31
+; CHECK-NEXT:    adcs r5, r7
+; CHECK-NEXT:    asrl r0, r5, r2
+; CHECK-NEXT:    subs r0, r0, r2
+; CHECK-NEXT:    sbc.w r7, r5, r7
+; CHECK-NEXT:    eor.w r5, r4, r1
+; CHECK-NEXT:    orr.w r5, r5, r4, asr #31
+; CHECK-NEXT:    rsbs r4, r4, #0
+; CHECK-NEXT:    cmp r5, #0
+; CHECK-NEXT:    lsll r8, r3, r4
+; CHECK-NEXT:    csetm r5, eq
+; CHECK-NEXT:    lsll r8, r3, r1
+; CHECK-NEXT:    bfi r12, r5, #0, #8
+; CHECK-NEXT:    eor.w r5, r6, r2
+; CHECK-NEXT:    orr.w r5, r5, r6, asr #31
+; CHECK-NEXT:    cmp r5, #0
+; CHECK-NEXT:    csetm r5, eq
+; CHECK-NEXT:    bfi r12, r5, #8, #8
+; CHECK-NEXT:    umull r0, r5, r0, r2
 ; CHECK-NEXT:    vmsr p0, r12
-; CHECK-NEXT:    rsb.w r12, r5, #0
-; CHECK-NEXT:    lsll r4, r3, r12
-; CHECK-NEXT:    mla r5, lr, r1, r7
-; CHECK-NEXT:    lsll r4, r3, r0
-; CHECK-NEXT:    rsbs r0, r2, #0
-; CHECK-NEXT:    lsll r6, r5, r0
-; CHECK-NEXT:    lsll r6, r5, r1
-; CHECK-NEXT:    vmov q0[2], q0[0], r6, r4
-; CHECK-NEXT:    vmov q0[3], q0[1], r5, r3
+; CHECK-NEXT:    mla r5, r7, r2, r5
+; CHECK-NEXT:    rsbs r7, r6, #0
+; CHECK-NEXT:    lsll r0, r5, r7
+; CHECK-NEXT:    lsll r0, r5, r2
+; CHECK-NEXT:    vmov q0[2], q0[0], r8, r0
 ; CHECK-NEXT:    vpsel q0, q0, q1
 ; CHECK-NEXT:    vmov.f32 s10, s0
 ; CHECK-NEXT:    vmov.f32 s11, s2

diff  --git a/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll b/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll
index 251b187e7bcf2..ea351a3518ed8 100644
--- a/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll
@@ -21,19 +21,18 @@ define arm_aapcs_vfpcc void @ssatmul_s_q31(i32* nocapture readonly %pSrcA, i32*
 ; CHECK-NEXT:    b .LBB0_6
 ; CHECK-NEXT:  .LBB0_3: @ %vector.ph
 ; CHECK-NEXT:    bic r5, r3, #1
-; CHECK-NEXT:    adr r4, .LCPI0_0
-; CHECK-NEXT:    subs r7, r5, #2
 ; CHECK-NEXT:    str r3, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT:    subs r7, r5, #2
 ; CHECK-NEXT:    movs r6, #1
 ; CHECK-NEXT:    add.w r3, r1, r5, lsl #2
-; CHECK-NEXT:    vldrw.u32 q0, [r4]
-; CHECK-NEXT:    adr r4, .LCPI0_1
+; CHECK-NEXT:    adr r4, .LCPI0_0
 ; CHECK-NEXT:    add.w lr, r6, r7, lsr #1
 ; CHECK-NEXT:    str r3, [sp, #8] @ 4-byte Spill
 ; CHECK-NEXT:    str r5, [sp] @ 4-byte Spill
 ; CHECK-NEXT:    add.w r10, r2, r5, lsl #2
 ; CHECK-NEXT:    add.w r12, r0, r5, lsl #2
-; CHECK-NEXT:    vldrw.u32 q1, [r4]
+; CHECK-NEXT:    vldrw.u32 q0, [r4]
+; CHECK-NEXT:    vmvn.i32 q1, #0x80000000
 ; CHECK-NEXT:  .LBB0_4: @ %vector.body
 ; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    ldrd r4, r6, [r0], #8
@@ -111,11 +110,6 @@ define arm_aapcs_vfpcc void @ssatmul_s_q31(i32* nocapture readonly %pSrcA, i32*
 ; CHECK-NEXT:    .long 4294967295 @ 0xffffffff
 ; CHECK-NEXT:    .long 2147483648 @ 0x80000000
 ; CHECK-NEXT:    .long 4294967295 @ 0xffffffff
-; CHECK-NEXT:  .LCPI0_1:
-; CHECK-NEXT:    .long 2147483647 @ 0x7fffffff
-; CHECK-NEXT:    .long 0 @ 0x0
-; CHECK-NEXT:    .long 2147483647 @ 0x7fffffff
-; CHECK-NEXT:    .long 0 @ 0x0
 entry:
   switch i32 %N, label %vector.ph [
     i32 0, label %for.cond.cleanup
@@ -623,8 +617,8 @@ define arm_aapcs_vfpcc void @usatmul_2_q31(i32* nocapture readonly %pSrcA, i32*
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; CHECK-NEXT:    .pad #8
-; CHECK-NEXT:    sub sp, #8
+; CHECK-NEXT:    .pad #4
+; CHECK-NEXT:    sub sp, #4
 ; CHECK-NEXT:    cmp r3, #0
 ; CHECK-NEXT:    beq .LBB3_8
 ; CHECK-NEXT:  @ %bb.1: @ %entry
@@ -633,47 +627,45 @@ define arm_aapcs_vfpcc void @usatmul_2_q31(i32* nocapture readonly %pSrcA, i32*
 ; CHECK-NEXT:  @ %bb.2:
 ; CHECK-NEXT:    movs r7, #0
 ; CHECK-NEXT:    mov r12, r0
-; CHECK-NEXT:    mov r10, r1
-; CHECK-NEXT:    mov r11, r2
+; CHECK-NEXT:    mov r11, r1
+; CHECK-NEXT:    mov r8, r2
 ; CHECK-NEXT:    b .LBB3_6
 ; CHECK-NEXT:  .LBB3_3: @ %vector.ph
-; CHECK-NEXT:    str r3, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT:    bic r3, r3, #1
-; CHECK-NEXT:    subs r7, r3, #2
+; CHECK-NEXT:    bic r5, r3, #1
 ; CHECK-NEXT:    movs r6, #1
-; CHECK-NEXT:    str r3, [sp] @ 4-byte Spill
-; CHECK-NEXT:    add.w r11, r2, r3, lsl #2
+; CHECK-NEXT:    subs r7, r5, #2
+; CHECK-NEXT:    str r5, [sp] @ 4-byte Spill
+; CHECK-NEXT:    add.w r8, r2, r5, lsl #2
+; CHECK-NEXT:    add.w r11, r1, r5, lsl #2
 ; CHECK-NEXT:    add.w lr, r6, r7, lsr #1
-; CHECK-NEXT:    add.w r10, r1, r3, lsl #2
-; CHECK-NEXT:    add.w r12, r0, r3, lsl #2
-; CHECK-NEXT:    vmov.i64 q0, #0xffffffff
+; CHECK-NEXT:    add.w r12, r0, r5, lsl #2
+; CHECK-NEXT:    vmov.i8 q0, #0xff
 ; CHECK-NEXT:  .LBB3_4: @ %vector.body
 ; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    ldrd r4, r6, [r0], #8
-; CHECK-NEXT:    mov.w r8, #0
-; CHECK-NEXT:    ldrd r7, r3, [r1], #8
-; CHECK-NEXT:    umull r4, r9, r7, r4
-; CHECK-NEXT:    lsrl r4, r9, #31
-; CHECK-NEXT:    subs.w r5, r4, #-1
-; CHECK-NEXT:    sbcs r5, r9, #0
+; CHECK-NEXT:    ldrd r4, r9, [r0], #8
+; CHECK-NEXT:    ldrd r5, r10, [r1], #8
+; CHECK-NEXT:    umull r4, r5, r5, r4
+; CHECK-NEXT:    lsrl r4, r5, #31
+; CHECK-NEXT:    subs.w r6, r4, #-1
+; CHECK-NEXT:    sbcs r5, r5, #0
+; CHECK-NEXT:    mov.w r6, #0
 ; CHECK-NEXT:    csetm r5, lo
-; CHECK-NEXT:    bfi r8, r5, #0, #8
-; CHECK-NEXT:    umull r6, r5, r3, r6
-; CHECK-NEXT:    lsrl r6, r5, #31
-; CHECK-NEXT:    subs.w r7, r6, #-1
-; CHECK-NEXT:    vmov q1[2], q1[0], r4, r6
-; CHECK-NEXT:    sbcs r3, r5, #0
-; CHECK-NEXT:    vmov q1[3], q1[1], r9, r5
-; CHECK-NEXT:    csetm r3, lo
-; CHECK-NEXT:    bfi r8, r3, #8, #8
-; CHECK-NEXT:    vmsr p0, r8
+; CHECK-NEXT:    bfi r6, r5, #0, #8
+; CHECK-NEXT:    umull r10, r5, r10, r9
+; CHECK-NEXT:    lsrl r10, r5, #31
+; CHECK-NEXT:    subs.w r7, r10, #-1
+; CHECK-NEXT:    vmov q1[2], q1[0], r4, r10
+; CHECK-NEXT:    sbcs r5, r5, #0
+; CHECK-NEXT:    csetm r5, lo
+; CHECK-NEXT:    bfi r6, r5, #8, #8
+; CHECK-NEXT:    vmsr p0, r6
 ; CHECK-NEXT:    vpsel q1, q1, q0
-; CHECK-NEXT:    vmov r3, s6
-; CHECK-NEXT:    vmov r4, s4
-; CHECK-NEXT:    strd r4, r3, [r2], #8
+; CHECK-NEXT:    vmov r4, s6
+; CHECK-NEXT:    vmov r5, s4
+; CHECK-NEXT:    strd r5, r4, [r2], #8
 ; CHECK-NEXT:    le lr, .LBB3_4
 ; CHECK-NEXT:  @ %bb.5: @ %middle.block
-; CHECK-NEXT:    ldrd r7, r3, [sp] @ 8-byte Folded Reload
+; CHECK-NEXT:    ldr r7, [sp] @ 4-byte Reload
 ; CHECK-NEXT:    cmp r7, r3
 ; CHECK-NEXT:    beq .LBB3_8
 ; CHECK-NEXT:  .LBB3_6: @ %for.body.preheader
@@ -681,17 +673,17 @@ define arm_aapcs_vfpcc void @usatmul_2_q31(i32* nocapture readonly %pSrcA, i32*
 ; CHECK-NEXT:  .LBB3_7: @ %for.body
 ; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    ldr r0, [r12], #4
-; CHECK-NEXT:    ldr r1, [r10], #4
+; CHECK-NEXT:    ldr r1, [r11], #4
 ; CHECK-NEXT:    umull r0, r1, r1, r0
 ; CHECK-NEXT:    lsrl r0, r1, #31
 ; CHECK-NEXT:    subs.w r2, r0, #-1
 ; CHECK-NEXT:    sbcs r1, r1, #0
 ; CHECK-NEXT:    it hs
 ; CHECK-NEXT:    movhs.w r0, #-1
-; CHECK-NEXT:    str r0, [r11], #4
+; CHECK-NEXT:    str r0, [r8], #4
 ; CHECK-NEXT:    le lr, .LBB3_7
 ; CHECK-NEXT:  .LBB3_8: @ %for.cond.cleanup
-; CHECK-NEXT:    add sp, #8
+; CHECK-NEXT:    add sp, #4
 ; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 entry:
   switch i32 %N, label %vector.ph [
@@ -774,78 +766,69 @@ define arm_aapcs_vfpcc void @usatmul_4_q31(i32* nocapture readonly %pSrcA, i32*
 ; CHECK-NEXT:    sub sp, #4
 ; CHECK-NEXT:    .vsave {d8, d9, d10, d11}
 ; CHECK-NEXT:    vpush {d8, d9, d10, d11}
-; CHECK-NEXT:    .pad #16
-; CHECK-NEXT:    sub sp, #16
 ; CHECK-NEXT:    cmp r3, #0
 ; CHECK-NEXT:    beq.w .LBB4_8
 ; CHECK-NEXT:  @ %bb.1: @ %for.body.preheader
-; CHECK-NEXT:    movs r7, #0
+; CHECK-NEXT:    mov.w r8, #0
 ; CHECK-NEXT:    cmp r3, #3
 ; CHECK-NEXT:    bhi .LBB4_3
 ; CHECK-NEXT:  @ %bb.2:
-; CHECK-NEXT:    mov r10, r1
 ; CHECK-NEXT:    mov r12, r0
-; CHECK-NEXT:    mov r1, r2
+; CHECK-NEXT:    mov r9, r1
+; CHECK-NEXT:    mov r11, r2
 ; CHECK-NEXT:    b .LBB4_6
 ; CHECK-NEXT:  .LBB4_3: @ %vector.ph
-; CHECK-NEXT:    str r3, [sp, #8] @ 4-byte Spill
-; CHECK-NEXT:    bic r3, r3, #3
-; CHECK-NEXT:    subs r7, r3, #4
+; CHECK-NEXT:    bic r8, r3, #3
 ; CHECK-NEXT:    movs r6, #1
-; CHECK-NEXT:    str r3, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT:    add.w r10, r1, r3, lsl #2
-; CHECK-NEXT:    add.w lr, r6, r7, lsr #2
-; CHECK-NEXT:    add.w r7, r2, r3, lsl #2
-; CHECK-NEXT:    str r7, [sp] @ 4-byte Spill
-; CHECK-NEXT:    add.w r12, r0, r3, lsl #2
+; CHECK-NEXT:    sub.w r7, r8, #4
 ; CHECK-NEXT:    vmov.i64 q0, #0xffffffff
+; CHECK-NEXT:    add.w r11, r2, r8, lsl #2
+; CHECK-NEXT:    add.w r9, r1, r8, lsl #2
+; CHECK-NEXT:    add.w lr, r6, r7, lsr #2
+; CHECK-NEXT:    add.w r12, r0, r8, lsl #2
 ; CHECK-NEXT:  .LBB4_4: @ %vector.body
 ; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    vldrw.u32 q1, [r0], #16
 ; CHECK-NEXT:    vldrw.u32 q3, [r1], #16
-; CHECK-NEXT:    movs r6, #0
-; CHECK-NEXT:    str r2, [sp, #12] @ 4-byte Spill
 ; CHECK-NEXT:    vmov.f32 s8, s6
 ; CHECK-NEXT:    vmov.f32 s16, s14
 ; CHECK-NEXT:    vmov.f32 s10, s7
 ; CHECK-NEXT:    vmov.f32 s18, s15
 ; CHECK-NEXT:    vmullb.u32 q5, q4, q2
 ; CHECK-NEXT:    vmov.f32 s6, s5
-; CHECK-NEXT:    vmov r4, r9, d10
-; CHECK-NEXT:    lsrl r4, r9, #31
+; CHECK-NEXT:    vmov r10, r5, d10
+; CHECK-NEXT:    lsrl r10, r5, #31
 ; CHECK-NEXT:    vmov.f32 s14, s13
-; CHECK-NEXT:    subs.w r5, r4, #-1
+; CHECK-NEXT:    subs.w r6, r10, #-1
 ; CHECK-NEXT:    vmullb.u32 q4, q3, q1
-; CHECK-NEXT:    sbcs r5, r9, #0
+; CHECK-NEXT:    sbcs r5, r5, #0
+; CHECK-NEXT:    mov.w r6, #0
 ; CHECK-NEXT:    csetm r5, lo
 ; CHECK-NEXT:    bfi r6, r5, #0, #8
-; CHECK-NEXT:    vmov r8, r5, d11
-; CHECK-NEXT:    lsrl r8, r5, #31
-; CHECK-NEXT:    subs.w r11, r8, #-1
-; CHECK-NEXT:    vmov q2[2], q2[0], r4, r8
-; CHECK-NEXT:    sbcs r7, r5, #0
-; CHECK-NEXT:    vmov q2[3], q2[1], r9, r5
-; CHECK-NEXT:    csetm r7, lo
-; CHECK-NEXT:    bfi r6, r7, #8, #8
-; CHECK-NEXT:    vmov r4, r7, d8
-; CHECK-NEXT:    lsrl r4, r7, #31
+; CHECK-NEXT:    vmov r4, r5, d11
+; CHECK-NEXT:    lsrl r4, r5, #31
+; CHECK-NEXT:    subs.w r7, r4, #-1
+; CHECK-NEXT:    vmov q2[2], q2[0], r10, r4
+; CHECK-NEXT:    sbcs r5, r5, #0
+; CHECK-NEXT:    csetm r5, lo
+; CHECK-NEXT:    bfi r6, r5, #8, #8
+; CHECK-NEXT:    vmov r10, r5, d8
+; CHECK-NEXT:    lsrl r10, r5, #31
 ; CHECK-NEXT:    vmsr p0, r6
-; CHECK-NEXT:    subs.w r5, r4, #-1
-; CHECK-NEXT:    mov.w r6, #0
-; CHECK-NEXT:    sbcs r5, r7, #0
+; CHECK-NEXT:    subs.w r6, r10, #-1
 ; CHECK-NEXT:    vpsel q2, q2, q0
+; CHECK-NEXT:    sbcs r5, r5, #0
+; CHECK-NEXT:    mov.w r6, #0
 ; CHECK-NEXT:    csetm r5, lo
 ; CHECK-NEXT:    bfi r6, r5, #0, #8
-; CHECK-NEXT:    vmov r2, r5, d9
-; CHECK-NEXT:    lsrl r2, r5, #31
-; CHECK-NEXT:    subs.w r3, r2, #-1
-; CHECK-NEXT:    vmov q1[2], q1[0], r4, r2
-; CHECK-NEXT:    sbcs r3, r5, #0
-; CHECK-NEXT:    vmov q1[3], q1[1], r7, r5
-; CHECK-NEXT:    csetm r3, lo
-; CHECK-NEXT:    bfi r6, r3, #8, #8
+; CHECK-NEXT:    vmov r4, r5, d9
+; CHECK-NEXT:    lsrl r4, r5, #31
+; CHECK-NEXT:    subs.w r7, r4, #-1
+; CHECK-NEXT:    vmov q1[2], q1[0], r10, r4
+; CHECK-NEXT:    sbcs r5, r5, #0
+; CHECK-NEXT:    csetm r5, lo
+; CHECK-NEXT:    bfi r6, r5, #8, #8
 ; CHECK-NEXT:    vmsr p0, r6
-; CHECK-NEXT:    ldr r2, [sp, #12] @ 4-byte Reload
 ; CHECK-NEXT:    vpsel q1, q1, q0
 ; CHECK-NEXT:    vmov.f32 s5, s6
 ; CHECK-NEXT:    vmov.f32 s6, s8
@@ -853,26 +836,23 @@ define arm_aapcs_vfpcc void @usatmul_4_q31(i32* nocapture readonly %pSrcA, i32*
 ; CHECK-NEXT:    vstrb.8 q1, [r2], #16
 ; CHECK-NEXT:    le lr, .LBB4_4
 ; CHECK-NEXT:  @ %bb.5: @ %middle.block
-; CHECK-NEXT:    ldrd r7, r3, [sp, #4] @ 8-byte Folded Reload
-; CHECK-NEXT:    ldr r1, [sp] @ 4-byte Reload
-; CHECK-NEXT:    cmp r7, r3
+; CHECK-NEXT:    cmp r8, r3
 ; CHECK-NEXT:    beq .LBB4_8
 ; CHECK-NEXT:  .LBB4_6: @ %for.body.preheader21
-; CHECK-NEXT:    sub.w lr, r3, r7
+; CHECK-NEXT:    sub.w lr, r3, r8
 ; CHECK-NEXT:  .LBB4_7: @ %for.body
 ; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    ldr r0, [r12], #4
-; CHECK-NEXT:    ldr r2, [r10], #4
-; CHECK-NEXT:    umull r0, r3, r2, r0
-; CHECK-NEXT:    lsrl r0, r3, #31
+; CHECK-NEXT:    ldr r1, [r9], #4
+; CHECK-NEXT:    umull r0, r1, r1, r0
+; CHECK-NEXT:    lsrl r0, r1, #31
 ; CHECK-NEXT:    subs.w r2, r0, #-1
-; CHECK-NEXT:    sbcs r2, r3, #0
+; CHECK-NEXT:    sbcs r1, r1, #0
 ; CHECK-NEXT:    it hs
 ; CHECK-NEXT:    movhs.w r0, #-1
-; CHECK-NEXT:    str r0, [r1], #4
+; CHECK-NEXT:    str r0, [r11], #4
 ; CHECK-NEXT:    le lr, .LBB4_7
 ; CHECK-NEXT:  .LBB4_8: @ %for.cond.cleanup
-; CHECK-NEXT:    add sp, #16
 ; CHECK-NEXT:    vpop {d8, d9, d10, d11}
 ; CHECK-NEXT:    add sp, #4
 ; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}

diff  --git a/llvm/test/CodeGen/X86/extractelement-load.ll b/llvm/test/CodeGen/X86/extractelement-load.ll
index 738489b3060f5..917b54ae8306d 100644
--- a/llvm/test/CodeGen/X86/extractelement-load.ll
+++ b/llvm/test/CodeGen/X86/extractelement-load.ll
@@ -160,14 +160,22 @@ define float @t6(<8 x float> *%a0) {
 ; X64-SSSE3-NEXT:    orps %xmm2, %xmm0
 ; X64-SSSE3-NEXT:    retq
 ;
-; X64-AVX-LABEL: t6:
-; X64-AVX:       # %bb.0:
-; X64-AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X64-AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
-; X64-AVX-NEXT:    vcmpeqss %xmm1, %xmm0, %xmm1
-; X64-AVX-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; X64-AVX-NEXT:    vblendvps %xmm1, %xmm2, %xmm0, %xmm0
-; X64-AVX-NEXT:    retq
+; X64-AVX1-LABEL: t6:
+; X64-AVX1:       # %bb.0:
+; X64-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; X64-AVX1-NEXT:    vcmpeqss %xmm1, %xmm0, %xmm1
+; X64-AVX1-NEXT:    vblendvps %xmm1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    retq
+;
+; X64-AVX2-LABEL: t6:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-AVX2-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; X64-AVX2-NEXT:    vcmpeqss %xmm1, %xmm0, %xmm1
+; X64-AVX2-NEXT:    vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; X64-AVX2-NEXT:    vblendvps %xmm1, %xmm2, %xmm0, %xmm0
+; X64-AVX2-NEXT:    retq
   %vecload = load <8 x float>, <8 x float>* %a0, align 32
   %vecext = extractelement <8 x float> %vecload, i32 1
   %cmp = fcmp oeq float %vecext, 0.000000e+00
@@ -251,14 +259,22 @@ define float @PR43971_1(<8 x float> *%a0) nounwind {
 ; X64-SSSE3-NEXT:    orps %xmm2, %xmm0
 ; X64-SSSE3-NEXT:    retq
 ;
-; X64-AVX-LABEL: PR43971_1:
-; X64-AVX:       # %bb.0: # %entry
-; X64-AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X64-AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
-; X64-AVX-NEXT:    vcmpeqss %xmm1, %xmm0, %xmm1
-; X64-AVX-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; X64-AVX-NEXT:    vblendvps %xmm1, %xmm2, %xmm0, %xmm0
-; X64-AVX-NEXT:    retq
+; X64-AVX1-LABEL: PR43971_1:
+; X64-AVX1:       # %bb.0: # %entry
+; X64-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; X64-AVX1-NEXT:    vcmpeqss %xmm1, %xmm0, %xmm1
+; X64-AVX1-NEXT:    vblendvps %xmm1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    retq
+;
+; X64-AVX2-LABEL: PR43971_1:
+; X64-AVX2:       # %bb.0: # %entry
+; X64-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-AVX2-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; X64-AVX2-NEXT:    vcmpeqss %xmm1, %xmm0, %xmm1
+; X64-AVX2-NEXT:    vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; X64-AVX2-NEXT:    vblendvps %xmm1, %xmm2, %xmm0, %xmm0
+; X64-AVX2-NEXT:    retq
 entry:
   %0 = load <8 x float>, <8 x float>* %a0, align 32
   %vecext = extractelement <8 x float> %0, i32 1

diff  --git a/llvm/test/CodeGen/X86/known-signbits-vector.ll b/llvm/test/CodeGen/X86/known-signbits-vector.ll
index acd0c4eba7723..30830be7250ca 100644
--- a/llvm/test/CodeGen/X86/known-signbits-vector.ll
+++ b/llvm/test/CodeGen/X86/known-signbits-vector.ll
@@ -429,24 +429,24 @@ define <4 x float> @signbits_ashr_sext_select_shuffle_sitofp(<4 x i64> %a0, <4 x
 ; X86-NEXT:    movl %esp, %ebp
 ; X86-NEXT:    andl $-16, %esp
 ; X86-NEXT:    subl $16, %esp
-; X86-NEXT:    vpmovsxdq 8(%ebp), %xmm4
-; X86-NEXT:    vpmovsxdq 16(%ebp), %xmm3
-; X86-NEXT:    vpsrad $31, %xmm2, %xmm5
-; X86-NEXT:    vpsrad $1, %xmm2, %xmm6
-; X86-NEXT:    vpshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
-; X86-NEXT:    vpblendw {{.*#+}} xmm5 = xmm6[0,1],xmm5[2,3],xmm6[4,5],xmm5[6,7]
+; X86-NEXT:    vmovapd 8(%ebp), %xmm3
+; X86-NEXT:    vpsrad $31, %xmm2, %xmm4
+; X86-NEXT:    vpsrad $1, %xmm2, %xmm5
+; X86-NEXT:    vpshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
+; X86-NEXT:    vpblendw {{.*#+}} xmm4 = xmm5[0,1],xmm4[2,3],xmm5[4,5],xmm4[6,7]
 ; X86-NEXT:    vextractf128 $1, %ymm2, %xmm2
-; X86-NEXT:    vpsrad $31, %xmm2, %xmm6
+; X86-NEXT:    vpsrad $31, %xmm2, %xmm5
 ; X86-NEXT:    vpsrad $1, %xmm2, %xmm2
 ; X86-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
-; X86-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm6[2,3],xmm2[4,5],xmm6[6,7]
+; X86-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm5[2,3],xmm2[4,5],xmm5[6,7]
+; X86-NEXT:    vpermilps {{.*#+}} xmm5 = xmm3[2,2,3,3]
 ; X86-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm6
-; X86-NEXT:    vblendvpd %xmm6, %xmm5, %xmm4, %xmm4
 ; X86-NEXT:    vextractf128 $1, %ymm1, %xmm1
 ; X86-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; X86-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
-; X86-NEXT:    vblendvpd %xmm0, %xmm2, %xmm3, %xmm0
-; X86-NEXT:    vinsertf128 $1, %xmm0, %ymm4, %ymm0
+; X86-NEXT:    vblendvpd %xmm0, %xmm2, %xmm5, %xmm0
+; X86-NEXT:    vblendvpd %xmm6, %xmm4, %xmm3, %xmm1
+; X86-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 ; X86-NEXT:    vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
 ; X86-NEXT:    vextractf128 $1, %ymm0, %xmm1
 ; X86-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
@@ -467,16 +467,14 @@ define <4 x float> @signbits_ashr_sext_select_shuffle_sitofp(<4 x i64> %a0, <4 x
 ; X64-AVX1-NEXT:    vpsrad $1, %xmm2, %xmm2
 ; X64-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
 ; X64-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm5[2,3],xmm2[4,5],xmm5[6,7]
-; X64-AVX1-NEXT:    vpmovsxdq %xmm3, %xmm5
-; X64-AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
-; X64-AVX1-NEXT:    vpmovsxdq %xmm3, %xmm3
+; X64-AVX1-NEXT:    vpermilps {{.*#+}} xmm5 = xmm3[2,2,3,3]
 ; X64-AVX1-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm6
-; X64-AVX1-NEXT:    vblendvpd %xmm6, %xmm4, %xmm5, %xmm4
 ; X64-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
 ; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; X64-AVX1-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT:    vblendvpd %xmm0, %xmm2, %xmm3, %xmm0
-; X64-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm4, %ymm0
+; X64-AVX1-NEXT:    vblendvpd %xmm0, %xmm2, %xmm5, %xmm0
+; X64-AVX1-NEXT:    vblendvpd %xmm6, %xmm4, %xmm3, %xmm1
+; X64-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 ; X64-AVX1-NEXT:    vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
 ; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
 ; X64-AVX1-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
@@ -486,11 +484,9 @@ define <4 x float> @signbits_ashr_sext_select_shuffle_sitofp(<4 x i64> %a0, <4 x
 ;
 ; X64-AVX2-LABEL: signbits_ashr_sext_select_shuffle_sitofp:
 ; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    vpsrad $31, %ymm2, %ymm4
 ; X64-AVX2-NEXT:    vpsrad $1, %ymm2, %ymm2
 ; X64-AVX2-NEXT:    vpshufd {{.*#+}} ymm2 = ymm2[1,1,3,3,5,5,7,7]
-; X64-AVX2-NEXT:    vpblendd {{.*#+}} ymm2 = ymm2[0],ymm4[1],ymm2[2],ymm4[3],ymm2[4],ymm4[5],ymm2[6],ymm4[7]
-; X64-AVX2-NEXT:    vpmovsxdq %xmm3, %ymm3
+; X64-AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero
 ; X64-AVX2-NEXT:    vpcmpeqq %ymm1, %ymm0, %ymm0
 ; X64-AVX2-NEXT:    vblendvpd %ymm0, %ymm2, %ymm3, %ymm0
 ; X64-AVX2-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]

diff  --git a/llvm/test/CodeGen/X86/select-of-fp-constants.ll b/llvm/test/CodeGen/X86/select-of-fp-constants.ll
index 9f4510ff1964d..3ddeeee1bce04 100644
--- a/llvm/test/CodeGen/X86/select-of-fp-constants.ll
+++ b/llvm/test/CodeGen/X86/select-of-fp-constants.ll
@@ -77,8 +77,8 @@ define float @fcmp_select_fp_constants(float %x) nounwind readnone {
 ; X64-AVX2-LABEL: fcmp_select_fp_constants:
 ; X64-AVX2:       # %bb.0:
 ; X64-AVX2-NEXT:    vcmpneqss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; X64-AVX2-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X64-AVX2-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; X64-AVX2-NEXT:    vbroadcastss {{.*#+}} xmm1 = [4.2E+1,4.2E+1,4.2E+1,4.2E+1]
+; X64-AVX2-NEXT:    vbroadcastss {{.*#+}} xmm2 = [2.3E+1,2.3E+1,2.3E+1,2.3E+1]
 ; X64-AVX2-NEXT:    vblendvps %xmm0, %xmm1, %xmm2, %xmm0
 ; X64-AVX2-NEXT:    retq
 ;

diff  --git a/llvm/test/CodeGen/X86/vselect-zero.ll b/llvm/test/CodeGen/X86/vselect-zero.ll
index e00f06cc5912f..fefc5a2e9b5bb 100644
--- a/llvm/test/CodeGen/X86/vselect-zero.ll
+++ b/llvm/test/CodeGen/X86/vselect-zero.ll
@@ -125,7 +125,7 @@ define double @fsel_nonzero_false_val(double %x, double %y, double %z) {
 ; AVX-LABEL: fsel_nonzero_false_val:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vcmpeqsd %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
+; AVX-NEXT:    vmovapd {{.*#+}} xmm1 = [4.2E+1,4.2E+1]
 ; AVX-NEXT:    vblendvpd %xmm0, %xmm2, %xmm1, %xmm0
 ; AVX-NEXT:    retq
 ;
@@ -153,8 +153,7 @@ define double @fsel_nonzero_true_val(double %x, double %y, double %z) {
 ; AVX-LABEL: fsel_nonzero_true_val:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vcmpeqsd %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX-NEXT:    vblendvpd %xmm0, %xmm1, %xmm2, %xmm0
+; AVX-NEXT:    vblendvpd %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0
 ; AVX-NEXT:    retq
 ;
 ; AVX512-LABEL: fsel_nonzero_true_val:
@@ -180,9 +179,8 @@ define double @fsel_nonzero_constants(double %x, double %y) {
 ; AVX-LABEL: fsel_nonzero_constants:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vcmpeqsd %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX-NEXT:    vmovsd {{.*#+}} xmm2 = mem[0],zero
-; AVX-NEXT:    vblendvpd %xmm0, %xmm1, %xmm2, %xmm0
+; AVX-NEXT:    vmovapd {{.*#+}} xmm1 = [4.2E+1,4.2E+1]
+; AVX-NEXT:    vblendvpd %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
 ; AVX-NEXT:    retq
 ;
 ; AVX512-LABEL: fsel_nonzero_constants:


        


More information about the llvm-commits mailing list