[llvm] db1be69 - [DAG] SimplifyDemandedBits - add ISD::VSELECT handling
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sun Jun 19 07:18:33 PDT 2022
Author: Simon Pilgrim
Date: 2022-06-19T15:18:25+01:00
New Revision: db1be696c406fd4e894883845eea2f030e992626
URL: https://github.com/llvm/llvm-project/commit/db1be696c406fd4e894883845eea2f030e992626
DIFF: https://github.com/llvm/llvm-project/commit/db1be696c406fd4e894883845eea2f030e992626.diff
LOG: [DAG] SimplifyDemandedBits - add ISD::VSELECT handling
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/test/CodeGen/Hexagon/isel-memory-vNi1.ll
llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll
llvm/test/CodeGen/Thumb2/mve-laneinterleaving.ll
llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll
llvm/test/CodeGen/X86/extractelement-load.ll
llvm/test/CodeGen/X86/known-signbits-vector.ll
llvm/test/CodeGen/X86/select-of-fp-constants.ll
llvm/test/CodeGen/X86/vselect-zero.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index e09f7d664ef04..2957976bcc240 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -1536,6 +1536,19 @@ bool TargetLowering::SimplifyDemandedBits(
if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
return true;
+ // Only known if known in both the LHS and RHS.
+ Known = KnownBits::commonBits(Known, Known2);
+ break;
+ case ISD::VSELECT:
+ if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
+ Known, TLO, Depth + 1))
+ return true;
+ if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
+ Known2, TLO, Depth + 1))
+ return true;
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+ assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
+
// Only known if known in both the LHS and RHS.
Known = KnownBits::commonBits(Known, Known2);
break;
diff --git a/llvm/test/CodeGen/Hexagon/isel-memory-vNi1.ll b/llvm/test/CodeGen/Hexagon/isel-memory-vNi1.ll
index 832beee337666..1173624a21f20 100644
--- a/llvm/test/CodeGen/Hexagon/isel-memory-vNi1.ll
+++ b/llvm/test/CodeGen/Hexagon/isel-memory-vNi1.ll
@@ -36,7 +36,7 @@ define i32 @f1(<4 x i1>* %a0, <4 x i8> %a1) #0 {
; CHECK-NEXT: r3:2 = combine(#0,#0)
; CHECK-NEXT: }
; CHECK-NEXT: {
-; CHECK-NEXT: r5:4 = vsxtbh(r1)
+; CHECK-NEXT: r5:4 = vzxtbh(r1)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: p0 = r0
diff --git a/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll b/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll
index 1e9fbe09539c8..7929bba0638a5 100644
--- a/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll
@@ -279,42 +279,40 @@ define arm_aapcs_vfpcc <4 x i32> @ustest_f16i32(<4 x half> %x) {
; CHECK-NEXT: push {r4, r5, r6, lr}
; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13}
; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13}
-; CHECK-NEXT: vmov.u16 r0, q0[2]
+; CHECK-NEXT: vmov.u16 r0, q0[3]
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: bl __fixhfdi
; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: vmov.u16 r0, q4[3]
+; CHECK-NEXT: vmov.u16 r0, q4[2]
; CHECK-NEXT: mov r5, r1
; CHECK-NEXT: bl __fixhfdi
-; CHECK-NEXT: rsbs r2, r4, #0
+; CHECK-NEXT: rsbs r2, r0, #0
; CHECK-NEXT: mov.w r6, #0
-; CHECK-NEXT: sbcs.w r2, r6, r5
-; CHECK-NEXT: vmov q0[2], q0[0], r4, r0
-; CHECK-NEXT: csetm r2, lt
-; CHECK-NEXT: rsbs r0, r0, #0
-; CHECK-NEXT: mov.w r3, #0
+; CHECK-NEXT: vmov q0[2], q0[0], r0, r4
; CHECK-NEXT: sbcs.w r0, r6, r1
-; CHECK-NEXT: bfi r3, r2, #0, #8
; CHECK-NEXT: csetm r0, lt
-; CHECK-NEXT: bfi r3, r0, #8, #8
-; CHECK-NEXT: vmov.u16 r0, q4[0]
+; CHECK-NEXT: movs r1, #0
+; CHECK-NEXT: bfi r1, r0, #0, #8
+; CHECK-NEXT: rsbs r0, r4, #0
+; CHECK-NEXT: sbcs.w r0, r6, r5
; CHECK-NEXT: vmov.i32 q5, #0x0
-; CHECK-NEXT: vmov q0[3], q0[1], r5, r1
-; CHECK-NEXT: vmsr p0, r3
+; CHECK-NEXT: csetm r0, lt
+; CHECK-NEXT: bfi r1, r0, #8, #8
+; CHECK-NEXT: vmov.u16 r0, q4[1]
+; CHECK-NEXT: vmsr p0, r1
; CHECK-NEXT: vpsel q6, q0, q5
; CHECK-NEXT: bl __fixhfdi
; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: vmov.u16 r0, q4[1]
+; CHECK-NEXT: vmov.u16 r0, q4[0]
; CHECK-NEXT: mov r5, r1
; CHECK-NEXT: bl __fixhfdi
-; CHECK-NEXT: rsbs r2, r4, #0
-; CHECK-NEXT: vmov q0[2], q0[0], r4, r0
-; CHECK-NEXT: sbcs.w r2, r6, r5
-; CHECK-NEXT: vmov q0[3], q0[1], r5, r1
-; CHECK-NEXT: csetm r2, lt
-; CHECK-NEXT: rsbs r0, r0, #0
+; CHECK-NEXT: rsbs r2, r0, #0
+; CHECK-NEXT: vmov q0[2], q0[0], r0, r4
; CHECK-NEXT: sbcs.w r0, r6, r1
-; CHECK-NEXT: bfi r6, r2, #0, #8
+; CHECK-NEXT: csetm r0, lt
+; CHECK-NEXT: rsbs r1, r4, #0
+; CHECK-NEXT: sbcs.w r1, r6, r5
+; CHECK-NEXT: bfi r6, r0, #0, #8
; CHECK-NEXT: csetm r0, lt
; CHECK-NEXT: bfi r6, r0, #8, #8
; CHECK-NEXT: vmsr p0, r6
@@ -1436,42 +1434,40 @@ define arm_aapcs_vfpcc <4 x i32> @ustest_f16i32_mm(<4 x half> %x) {
; CHECK-NEXT: push {r4, r5, r6, lr}
; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13}
; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13}
-; CHECK-NEXT: vmov.u16 r0, q0[2]
+; CHECK-NEXT: vmov.u16 r0, q0[3]
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: bl __fixhfdi
; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: vmov.u16 r0, q4[3]
+; CHECK-NEXT: vmov.u16 r0, q4[2]
; CHECK-NEXT: mov r5, r1
; CHECK-NEXT: bl __fixhfdi
-; CHECK-NEXT: rsbs r2, r4, #0
+; CHECK-NEXT: rsbs r2, r0, #0
; CHECK-NEXT: mov.w r6, #0
-; CHECK-NEXT: sbcs.w r2, r6, r5
-; CHECK-NEXT: vmov q0[2], q0[0], r4, r0
-; CHECK-NEXT: csetm r2, lt
-; CHECK-NEXT: rsbs r0, r0, #0
-; CHECK-NEXT: mov.w r3, #0
+; CHECK-NEXT: vmov q0[2], q0[0], r0, r4
; CHECK-NEXT: sbcs.w r0, r6, r1
-; CHECK-NEXT: bfi r3, r2, #0, #8
; CHECK-NEXT: csetm r0, lt
-; CHECK-NEXT: bfi r3, r0, #8, #8
-; CHECK-NEXT: vmov.u16 r0, q4[0]
+; CHECK-NEXT: movs r1, #0
+; CHECK-NEXT: bfi r1, r0, #0, #8
+; CHECK-NEXT: rsbs r0, r4, #0
+; CHECK-NEXT: sbcs.w r0, r6, r5
; CHECK-NEXT: vmov.i32 q5, #0x0
-; CHECK-NEXT: vmov q0[3], q0[1], r5, r1
-; CHECK-NEXT: vmsr p0, r3
+; CHECK-NEXT: csetm r0, lt
+; CHECK-NEXT: bfi r1, r0, #8, #8
+; CHECK-NEXT: vmov.u16 r0, q4[1]
+; CHECK-NEXT: vmsr p0, r1
; CHECK-NEXT: vpsel q6, q0, q5
; CHECK-NEXT: bl __fixhfdi
; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: vmov.u16 r0, q4[1]
+; CHECK-NEXT: vmov.u16 r0, q4[0]
; CHECK-NEXT: mov r5, r1
; CHECK-NEXT: bl __fixhfdi
-; CHECK-NEXT: rsbs r2, r4, #0
-; CHECK-NEXT: vmov q0[2], q0[0], r4, r0
-; CHECK-NEXT: sbcs.w r2, r6, r5
-; CHECK-NEXT: vmov q0[3], q0[1], r5, r1
-; CHECK-NEXT: csetm r2, lt
-; CHECK-NEXT: rsbs r0, r0, #0
+; CHECK-NEXT: rsbs r2, r0, #0
+; CHECK-NEXT: vmov q0[2], q0[0], r0, r4
; CHECK-NEXT: sbcs.w r0, r6, r1
-; CHECK-NEXT: bfi r6, r2, #0, #8
+; CHECK-NEXT: csetm r0, lt
+; CHECK-NEXT: rsbs r1, r4, #0
+; CHECK-NEXT: sbcs.w r1, r6, r5
+; CHECK-NEXT: bfi r6, r0, #0, #8
; CHECK-NEXT: csetm r0, lt
; CHECK-NEXT: bfi r6, r0, #8, #8
; CHECK-NEXT: vmsr p0, r6
diff --git a/llvm/test/CodeGen/Thumb2/mve-laneinterleaving.ll b/llvm/test/CodeGen/Thumb2/mve-laneinterleaving.ll
index 46ade7114bf6c..6e6fe1d93c0b0 100644
--- a/llvm/test/CodeGen/Thumb2/mve-laneinterleaving.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-laneinterleaving.ll
@@ -334,102 +334,100 @@ define arm_aapcs_vfpcc <4 x i32> @ext_ops_trunc_i32(<4 x i32> %a, <4 x i32> %b)
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
-; CHECK-NEXT: vmov.f32 s8, s4
+; CHECK-NEXT: vmov.f32 s18, s1
; CHECK-NEXT: vmov.i64 q3, #0xffffffff
+; CHECK-NEXT: vmov.f32 s8, s4
; CHECK-NEXT: vmov.f32 s10, s5
; CHECK-NEXT: vand q2, q2, q3
-; CHECK-NEXT: vmov r3, s0
-; CHECK-NEXT: vmov r1, r0, d4
-; CHECK-NEXT: vmov.f32 s18, s1
-; CHECK-NEXT: vmov r2, r12, d5
-; CHECK-NEXT: vmov.f32 s0, s2
; CHECK-NEXT: vmov.f32 s4, s6
+; CHECK-NEXT: vmov r2, r0, d5
+; CHECK-NEXT: vmov r1, r12, d4
; CHECK-NEXT: vmov.f32 s6, s7
+; CHECK-NEXT: vmov r3, s18
; CHECK-NEXT: vand q1, q1, q3
-; CHECK-NEXT: vmov.f32 s2, s3
-; CHECK-NEXT: adds r4, r3, r1
+; CHECK-NEXT: adds r4, r3, r2
; CHECK-NEXT: asr.w r6, r3, #31
; CHECK-NEXT: adc.w r5, r6, r0
-; CHECK-NEXT: asrl r4, r5, r1
-; CHECK-NEXT: subs r6, r4, r1
+; CHECK-NEXT: asrl r4, r5, r2
+; CHECK-NEXT: subs r6, r4, r2
; CHECK-NEXT: sbc.w r8, r5, r0
-; CHECK-NEXT: umull r10, lr, r6, r1
+; CHECK-NEXT: umull r10, lr, r6, r2
; CHECK-NEXT: muls r6, r0, r6
-; CHECK-NEXT: vmov r0, s18
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: vmov.f32 s0, s2
+; CHECK-NEXT: vmov.f32 s2, s3
; CHECK-NEXT: orr.w lr, lr, r6
-; CHECK-NEXT: adds r6, r0, r2
+; CHECK-NEXT: adds r6, r0, r1
; CHECK-NEXT: asr.w r5, r0, #31
; CHECK-NEXT: adc.w r7, r5, r12
-; CHECK-NEXT: asrl r6, r7, r2
-; CHECK-NEXT: mla r5, r8, r1, lr
-; CHECK-NEXT: subs r4, r6, r2
+; CHECK-NEXT: asrl r6, r7, r1
+; CHECK-NEXT: mla r5, r8, r2, lr
+; CHECK-NEXT: subs r4, r6, r1
; CHECK-NEXT: sbc.w lr, r7, r12
-; CHECK-NEXT: umull r6, r7, r4, r2
+; CHECK-NEXT: umull r6, r7, r4, r1
; CHECK-NEXT: mul r4, r4, r12
; CHECK-NEXT: mov.w r12, #0
; CHECK-NEXT: orr.w r8, r7, r4
-; CHECK-NEXT: eor.w r7, r3, r1
-; CHECK-NEXT: orr.w r7, r7, r3, asr #31
+; CHECK-NEXT: eor.w r7, r0, r1
+; CHECK-NEXT: orr.w r7, r7, r0, asr #31
; CHECK-NEXT: movs r4, #0
; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: rsb.w r0, r0, #0
; CHECK-NEXT: csetm r7, eq
; CHECK-NEXT: bfi r4, r7, #0, #8
-; CHECK-NEXT: eor.w r7, r0, r2
-; CHECK-NEXT: orr.w r7, r7, r0, asr #31
-; CHECK-NEXT: rsbs r0, r0, #0
+; CHECK-NEXT: eor.w r7, r3, r2
+; CHECK-NEXT: orr.w r7, r7, r3, asr #31
; CHECK-NEXT: cmp r7, #0
; CHECK-NEXT: csetm r7, eq
; CHECK-NEXT: bfi r4, r7, #8, #8
; CHECK-NEXT: vmsr p0, r4
; CHECK-NEXT: rsbs r4, r3, #0
-; CHECK-NEXT: mla r3, lr, r2, r8
+; CHECK-NEXT: mla r3, lr, r1, r8
; CHECK-NEXT: lsll r10, r5, r4
-; CHECK-NEXT: lsll r10, r5, r1
+; CHECK-NEXT: vmov r4, s0
+; CHECK-NEXT: lsll r10, r5, r2
; CHECK-NEXT: lsll r6, r3, r0
-; CHECK-NEXT: vmov r0, r7, d3
-; CHECK-NEXT: lsll r6, r3, r2
-; CHECK-NEXT: vmov r2, s0
-; CHECK-NEXT: vmov q4[2], q4[0], r10, r6
-; CHECK-NEXT: vmov q4[3], q4[1], r5, r3
+; CHECK-NEXT: vmov r2, r7, d3
+; CHECK-NEXT: lsll r6, r3, r1
; CHECK-NEXT: vmov r1, r3, d2
+; CHECK-NEXT: vmov q4[2], q4[0], r6, r10
; CHECK-NEXT: vpsel q2, q4, q2
; CHECK-NEXT: vmov.f32 s9, s10
-; CHECK-NEXT: asrs r6, r2, #31
-; CHECK-NEXT: adds r4, r2, r1
-; CHECK-NEXT: adc.w r5, r6, r3
-; CHECK-NEXT: asrl r4, r5, r1
-; CHECK-NEXT: subs r6, r4, r1
-; CHECK-NEXT: sbc.w lr, r5, r3
-; CHECK-NEXT: vmov r5, s2
-; CHECK-NEXT: adds r4, r5, r0
-; CHECK-NEXT: asr.w r3, r5, #31
-; CHECK-NEXT: adcs r3, r7
-; CHECK-NEXT: asrl r4, r3, r0
-; CHECK-NEXT: subs r4, r4, r0
-; CHECK-NEXT: sbcs r3, r7
-; CHECK-NEXT: umull r4, r7, r4, r0
-; CHECK-NEXT: mla r3, r3, r0, r7
-; CHECK-NEXT: eor.w r7, r2, r1
-; CHECK-NEXT: orr.w r7, r7, r2, asr #31
-; CHECK-NEXT: cmp r7, #0
-; CHECK-NEXT: csetm r7, eq
-; CHECK-NEXT: bfi r12, r7, #0, #8
-; CHECK-NEXT: eor.w r7, r5, r0
-; CHECK-NEXT: orr.w r7, r7, r5, asr #31
-; CHECK-NEXT: cmp r7, #0
-; CHECK-NEXT: csetm r7, eq
-; CHECK-NEXT: bfi r12, r7, #8, #8
-; CHECK-NEXT: umull r6, r7, r6, r1
+; CHECK-NEXT: asrs r0, r4, #31
+; CHECK-NEXT: adds r6, r4, r1
+; CHECK-NEXT: adc.w r5, r0, r3
+; CHECK-NEXT: asrl r6, r5, r1
+; CHECK-NEXT: subs r0, r6, r1
+; CHECK-NEXT: sbc.w r3, r5, r3
+; CHECK-NEXT: umull r8, r6, r0, r1
+; CHECK-NEXT: mla r3, r3, r1, r6
+; CHECK-NEXT: vmov r6, s2
+; CHECK-NEXT: adds r0, r6, r2
+; CHECK-NEXT: asr.w r5, r6, #31
+; CHECK-NEXT: adcs r5, r7
+; CHECK-NEXT: asrl r0, r5, r2
+; CHECK-NEXT: subs r0, r0, r2
+; CHECK-NEXT: sbc.w r7, r5, r7
+; CHECK-NEXT: eor.w r5, r4, r1
+; CHECK-NEXT: orr.w r5, r5, r4, asr #31
+; CHECK-NEXT: rsbs r4, r4, #0
+; CHECK-NEXT: cmp r5, #0
+; CHECK-NEXT: lsll r8, r3, r4
+; CHECK-NEXT: csetm r5, eq
+; CHECK-NEXT: lsll r8, r3, r1
+; CHECK-NEXT: bfi r12, r5, #0, #8
+; CHECK-NEXT: eor.w r5, r6, r2
+; CHECK-NEXT: orr.w r5, r5, r6, asr #31
+; CHECK-NEXT: cmp r5, #0
+; CHECK-NEXT: csetm r5, eq
+; CHECK-NEXT: bfi r12, r5, #8, #8
+; CHECK-NEXT: umull r0, r5, r0, r2
; CHECK-NEXT: vmsr p0, r12
-; CHECK-NEXT: rsb.w r12, r5, #0
-; CHECK-NEXT: lsll r4, r3, r12
-; CHECK-NEXT: mla r5, lr, r1, r7
-; CHECK-NEXT: lsll r4, r3, r0
-; CHECK-NEXT: rsbs r0, r2, #0
-; CHECK-NEXT: lsll r6, r5, r0
-; CHECK-NEXT: lsll r6, r5, r1
-; CHECK-NEXT: vmov q0[2], q0[0], r6, r4
-; CHECK-NEXT: vmov q0[3], q0[1], r5, r3
+; CHECK-NEXT: mla r5, r7, r2, r5
+; CHECK-NEXT: rsbs r7, r6, #0
+; CHECK-NEXT: lsll r0, r5, r7
+; CHECK-NEXT: lsll r0, r5, r2
+; CHECK-NEXT: vmov q0[2], q0[0], r8, r0
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: vmov.f32 s10, s0
; CHECK-NEXT: vmov.f32 s11, s2
diff --git a/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll b/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll
index 251b187e7bcf2..ea351a3518ed8 100644
--- a/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll
@@ -21,19 +21,18 @@ define arm_aapcs_vfpcc void @ssatmul_s_q31(i32* nocapture readonly %pSrcA, i32*
; CHECK-NEXT: b .LBB0_6
; CHECK-NEXT: .LBB0_3: @ %vector.ph
; CHECK-NEXT: bic r5, r3, #1
-; CHECK-NEXT: adr r4, .LCPI0_0
-; CHECK-NEXT: subs r7, r5, #2
; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: subs r7, r5, #2
; CHECK-NEXT: movs r6, #1
; CHECK-NEXT: add.w r3, r1, r5, lsl #2
-; CHECK-NEXT: vldrw.u32 q0, [r4]
-; CHECK-NEXT: adr r4, .LCPI0_1
+; CHECK-NEXT: adr r4, .LCPI0_0
; CHECK-NEXT: add.w lr, r6, r7, lsr #1
; CHECK-NEXT: str r3, [sp, #8] @ 4-byte Spill
; CHECK-NEXT: str r5, [sp] @ 4-byte Spill
; CHECK-NEXT: add.w r10, r2, r5, lsl #2
; CHECK-NEXT: add.w r12, r0, r5, lsl #2
-; CHECK-NEXT: vldrw.u32 q1, [r4]
+; CHECK-NEXT: vldrw.u32 q0, [r4]
+; CHECK-NEXT: vmvn.i32 q1, #0x80000000
; CHECK-NEXT: .LBB0_4: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldrd r4, r6, [r0], #8
@@ -111,11 +110,6 @@ define arm_aapcs_vfpcc void @ssatmul_s_q31(i32* nocapture readonly %pSrcA, i32*
; CHECK-NEXT: .long 4294967295 @ 0xffffffff
; CHECK-NEXT: .long 2147483648 @ 0x80000000
; CHECK-NEXT: .long 4294967295 @ 0xffffffff
-; CHECK-NEXT: .LCPI0_1:
-; CHECK-NEXT: .long 2147483647 @ 0x7fffffff
-; CHECK-NEXT: .long 0 @ 0x0
-; CHECK-NEXT: .long 2147483647 @ 0x7fffffff
-; CHECK-NEXT: .long 0 @ 0x0
entry:
switch i32 %N, label %vector.ph [
i32 0, label %for.cond.cleanup
@@ -623,8 +617,8 @@ define arm_aapcs_vfpcc void @usatmul_2_q31(i32* nocapture readonly %pSrcA, i32*
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; CHECK-NEXT: .pad #8
-; CHECK-NEXT: sub sp, #8
+; CHECK-NEXT: .pad #4
+; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: beq .LBB3_8
; CHECK-NEXT: @ %bb.1: @ %entry
@@ -633,47 +627,45 @@ define arm_aapcs_vfpcc void @usatmul_2_q31(i32* nocapture readonly %pSrcA, i32*
; CHECK-NEXT: @ %bb.2:
; CHECK-NEXT: movs r7, #0
; CHECK-NEXT: mov r12, r0
-; CHECK-NEXT: mov r10, r1
-; CHECK-NEXT: mov r11, r2
+; CHECK-NEXT: mov r11, r1
+; CHECK-NEXT: mov r8, r2
; CHECK-NEXT: b .LBB3_6
; CHECK-NEXT: .LBB3_3: @ %vector.ph
-; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT: bic r3, r3, #1
-; CHECK-NEXT: subs r7, r3, #2
+; CHECK-NEXT: bic r5, r3, #1
; CHECK-NEXT: movs r6, #1
-; CHECK-NEXT: str r3, [sp] @ 4-byte Spill
-; CHECK-NEXT: add.w r11, r2, r3, lsl #2
+; CHECK-NEXT: subs r7, r5, #2
+; CHECK-NEXT: str r5, [sp] @ 4-byte Spill
+; CHECK-NEXT: add.w r8, r2, r5, lsl #2
+; CHECK-NEXT: add.w r11, r1, r5, lsl #2
; CHECK-NEXT: add.w lr, r6, r7, lsr #1
-; CHECK-NEXT: add.w r10, r1, r3, lsl #2
-; CHECK-NEXT: add.w r12, r0, r3, lsl #2
-; CHECK-NEXT: vmov.i64 q0, #0xffffffff
+; CHECK-NEXT: add.w r12, r0, r5, lsl #2
+; CHECK-NEXT: vmov.i8 q0, #0xff
; CHECK-NEXT: .LBB3_4: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldrd r4, r6, [r0], #8
-; CHECK-NEXT: mov.w r8, #0
-; CHECK-NEXT: ldrd r7, r3, [r1], #8
-; CHECK-NEXT: umull r4, r9, r7, r4
-; CHECK-NEXT: lsrl r4, r9, #31
-; CHECK-NEXT: subs.w r5, r4, #-1
-; CHECK-NEXT: sbcs r5, r9, #0
+; CHECK-NEXT: ldrd r4, r9, [r0], #8
+; CHECK-NEXT: ldrd r5, r10, [r1], #8
+; CHECK-NEXT: umull r4, r5, r5, r4
+; CHECK-NEXT: lsrl r4, r5, #31
+; CHECK-NEXT: subs.w r6, r4, #-1
+; CHECK-NEXT: sbcs r5, r5, #0
+; CHECK-NEXT: mov.w r6, #0
; CHECK-NEXT: csetm r5, lo
-; CHECK-NEXT: bfi r8, r5, #0, #8
-; CHECK-NEXT: umull r6, r5, r3, r6
-; CHECK-NEXT: lsrl r6, r5, #31
-; CHECK-NEXT: subs.w r7, r6, #-1
-; CHECK-NEXT: vmov q1[2], q1[0], r4, r6
-; CHECK-NEXT: sbcs r3, r5, #0
-; CHECK-NEXT: vmov q1[3], q1[1], r9, r5
-; CHECK-NEXT: csetm r3, lo
-; CHECK-NEXT: bfi r8, r3, #8, #8
-; CHECK-NEXT: vmsr p0, r8
+; CHECK-NEXT: bfi r6, r5, #0, #8
+; CHECK-NEXT: umull r10, r5, r10, r9
+; CHECK-NEXT: lsrl r10, r5, #31
+; CHECK-NEXT: subs.w r7, r10, #-1
+; CHECK-NEXT: vmov q1[2], q1[0], r4, r10
+; CHECK-NEXT: sbcs r5, r5, #0
+; CHECK-NEXT: csetm r5, lo
+; CHECK-NEXT: bfi r6, r5, #8, #8
+; CHECK-NEXT: vmsr p0, r6
; CHECK-NEXT: vpsel q1, q1, q0
-; CHECK-NEXT: vmov r3, s6
-; CHECK-NEXT: vmov r4, s4
-; CHECK-NEXT: strd r4, r3, [r2], #8
+; CHECK-NEXT: vmov r4, s6
+; CHECK-NEXT: vmov r5, s4
+; CHECK-NEXT: strd r5, r4, [r2], #8
; CHECK-NEXT: le lr, .LBB3_4
; CHECK-NEXT: @ %bb.5: @ %middle.block
-; CHECK-NEXT: ldrd r7, r3, [sp] @ 8-byte Folded Reload
+; CHECK-NEXT: ldr r7, [sp] @ 4-byte Reload
; CHECK-NEXT: cmp r7, r3
; CHECK-NEXT: beq .LBB3_8
; CHECK-NEXT: .LBB3_6: @ %for.body.preheader
@@ -681,17 +673,17 @@ define arm_aapcs_vfpcc void @usatmul_2_q31(i32* nocapture readonly %pSrcA, i32*
; CHECK-NEXT: .LBB3_7: @ %for.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldr r0, [r12], #4
-; CHECK-NEXT: ldr r1, [r10], #4
+; CHECK-NEXT: ldr r1, [r11], #4
; CHECK-NEXT: umull r0, r1, r1, r0
; CHECK-NEXT: lsrl r0, r1, #31
; CHECK-NEXT: subs.w r2, r0, #-1
; CHECK-NEXT: sbcs r1, r1, #0
; CHECK-NEXT: it hs
; CHECK-NEXT: movhs.w r0, #-1
-; CHECK-NEXT: str r0, [r11], #4
+; CHECK-NEXT: str r0, [r8], #4
; CHECK-NEXT: le lr, .LBB3_7
; CHECK-NEXT: .LBB3_8: @ %for.cond.cleanup
-; CHECK-NEXT: add sp, #8
+; CHECK-NEXT: add sp, #4
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
entry:
switch i32 %N, label %vector.ph [
@@ -774,78 +766,69 @@ define arm_aapcs_vfpcc void @usatmul_4_q31(i32* nocapture readonly %pSrcA, i32*
; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: .vsave {d8, d9, d10, d11}
; CHECK-NEXT: vpush {d8, d9, d10, d11}
-; CHECK-NEXT: .pad #16
-; CHECK-NEXT: sub sp, #16
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: beq.w .LBB4_8
; CHECK-NEXT: @ %bb.1: @ %for.body.preheader
-; CHECK-NEXT: movs r7, #0
+; CHECK-NEXT: mov.w r8, #0
; CHECK-NEXT: cmp r3, #3
; CHECK-NEXT: bhi .LBB4_3
; CHECK-NEXT: @ %bb.2:
-; CHECK-NEXT: mov r10, r1
; CHECK-NEXT: mov r12, r0
-; CHECK-NEXT: mov r1, r2
+; CHECK-NEXT: mov r9, r1
+; CHECK-NEXT: mov r11, r2
; CHECK-NEXT: b .LBB4_6
; CHECK-NEXT: .LBB4_3: @ %vector.ph
-; CHECK-NEXT: str r3, [sp, #8] @ 4-byte Spill
-; CHECK-NEXT: bic r3, r3, #3
-; CHECK-NEXT: subs r7, r3, #4
+; CHECK-NEXT: bic r8, r3, #3
; CHECK-NEXT: movs r6, #1
-; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT: add.w r10, r1, r3, lsl #2
-; CHECK-NEXT: add.w lr, r6, r7, lsr #2
-; CHECK-NEXT: add.w r7, r2, r3, lsl #2
-; CHECK-NEXT: str r7, [sp] @ 4-byte Spill
-; CHECK-NEXT: add.w r12, r0, r3, lsl #2
+; CHECK-NEXT: sub.w r7, r8, #4
; CHECK-NEXT: vmov.i64 q0, #0xffffffff
+; CHECK-NEXT: add.w r11, r2, r8, lsl #2
+; CHECK-NEXT: add.w r9, r1, r8, lsl #2
+; CHECK-NEXT: add.w lr, r6, r7, lsr #2
+; CHECK-NEXT: add.w r12, r0, r8, lsl #2
; CHECK-NEXT: .LBB4_4: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrw.u32 q1, [r0], #16
; CHECK-NEXT: vldrw.u32 q3, [r1], #16
-; CHECK-NEXT: movs r6, #0
-; CHECK-NEXT: str r2, [sp, #12] @ 4-byte Spill
; CHECK-NEXT: vmov.f32 s8, s6
; CHECK-NEXT: vmov.f32 s16, s14
; CHECK-NEXT: vmov.f32 s10, s7
; CHECK-NEXT: vmov.f32 s18, s15
; CHECK-NEXT: vmullb.u32 q5, q4, q2
; CHECK-NEXT: vmov.f32 s6, s5
-; CHECK-NEXT: vmov r4, r9, d10
-; CHECK-NEXT: lsrl r4, r9, #31
+; CHECK-NEXT: vmov r10, r5, d10
+; CHECK-NEXT: lsrl r10, r5, #31
; CHECK-NEXT: vmov.f32 s14, s13
-; CHECK-NEXT: subs.w r5, r4, #-1
+; CHECK-NEXT: subs.w r6, r10, #-1
; CHECK-NEXT: vmullb.u32 q4, q3, q1
-; CHECK-NEXT: sbcs r5, r9, #0
+; CHECK-NEXT: sbcs r5, r5, #0
+; CHECK-NEXT: mov.w r6, #0
; CHECK-NEXT: csetm r5, lo
; CHECK-NEXT: bfi r6, r5, #0, #8
-; CHECK-NEXT: vmov r8, r5, d11
-; CHECK-NEXT: lsrl r8, r5, #31
-; CHECK-NEXT: subs.w r11, r8, #-1
-; CHECK-NEXT: vmov q2[2], q2[0], r4, r8
-; CHECK-NEXT: sbcs r7, r5, #0
-; CHECK-NEXT: vmov q2[3], q2[1], r9, r5
-; CHECK-NEXT: csetm r7, lo
-; CHECK-NEXT: bfi r6, r7, #8, #8
-; CHECK-NEXT: vmov r4, r7, d8
-; CHECK-NEXT: lsrl r4, r7, #31
+; CHECK-NEXT: vmov r4, r5, d11
+; CHECK-NEXT: lsrl r4, r5, #31
+; CHECK-NEXT: subs.w r7, r4, #-1
+; CHECK-NEXT: vmov q2[2], q2[0], r10, r4
+; CHECK-NEXT: sbcs r5, r5, #0
+; CHECK-NEXT: csetm r5, lo
+; CHECK-NEXT: bfi r6, r5, #8, #8
+; CHECK-NEXT: vmov r10, r5, d8
+; CHECK-NEXT: lsrl r10, r5, #31
; CHECK-NEXT: vmsr p0, r6
-; CHECK-NEXT: subs.w r5, r4, #-1
-; CHECK-NEXT: mov.w r6, #0
-; CHECK-NEXT: sbcs r5, r7, #0
+; CHECK-NEXT: subs.w r6, r10, #-1
; CHECK-NEXT: vpsel q2, q2, q0
+; CHECK-NEXT: sbcs r5, r5, #0
+; CHECK-NEXT: mov.w r6, #0
; CHECK-NEXT: csetm r5, lo
; CHECK-NEXT: bfi r6, r5, #0, #8
-; CHECK-NEXT: vmov r2, r5, d9
-; CHECK-NEXT: lsrl r2, r5, #31
-; CHECK-NEXT: subs.w r3, r2, #-1
-; CHECK-NEXT: vmov q1[2], q1[0], r4, r2
-; CHECK-NEXT: sbcs r3, r5, #0
-; CHECK-NEXT: vmov q1[3], q1[1], r7, r5
-; CHECK-NEXT: csetm r3, lo
-; CHECK-NEXT: bfi r6, r3, #8, #8
+; CHECK-NEXT: vmov r4, r5, d9
+; CHECK-NEXT: lsrl r4, r5, #31
+; CHECK-NEXT: subs.w r7, r4, #-1
+; CHECK-NEXT: vmov q1[2], q1[0], r10, r4
+; CHECK-NEXT: sbcs r5, r5, #0
+; CHECK-NEXT: csetm r5, lo
+; CHECK-NEXT: bfi r6, r5, #8, #8
; CHECK-NEXT: vmsr p0, r6
-; CHECK-NEXT: ldr r2, [sp, #12] @ 4-byte Reload
; CHECK-NEXT: vpsel q1, q1, q0
; CHECK-NEXT: vmov.f32 s5, s6
; CHECK-NEXT: vmov.f32 s6, s8
@@ -853,26 +836,23 @@ define arm_aapcs_vfpcc void @usatmul_4_q31(i32* nocapture readonly %pSrcA, i32*
; CHECK-NEXT: vstrb.8 q1, [r2], #16
; CHECK-NEXT: le lr, .LBB4_4
; CHECK-NEXT: @ %bb.5: @ %middle.block
-; CHECK-NEXT: ldrd r7, r3, [sp, #4] @ 8-byte Folded Reload
-; CHECK-NEXT: ldr r1, [sp] @ 4-byte Reload
-; CHECK-NEXT: cmp r7, r3
+; CHECK-NEXT: cmp r8, r3
; CHECK-NEXT: beq .LBB4_8
; CHECK-NEXT: .LBB4_6: @ %for.body.preheader21
-; CHECK-NEXT: sub.w lr, r3, r7
+; CHECK-NEXT: sub.w lr, r3, r8
; CHECK-NEXT: .LBB4_7: @ %for.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldr r0, [r12], #4
-; CHECK-NEXT: ldr r2, [r10], #4
-; CHECK-NEXT: umull r0, r3, r2, r0
-; CHECK-NEXT: lsrl r0, r3, #31
+; CHECK-NEXT: ldr r1, [r9], #4
+; CHECK-NEXT: umull r0, r1, r1, r0
+; CHECK-NEXT: lsrl r0, r1, #31
; CHECK-NEXT: subs.w r2, r0, #-1
-; CHECK-NEXT: sbcs r2, r3, #0
+; CHECK-NEXT: sbcs r1, r1, #0
; CHECK-NEXT: it hs
; CHECK-NEXT: movhs.w r0, #-1
-; CHECK-NEXT: str r0, [r1], #4
+; CHECK-NEXT: str r0, [r11], #4
; CHECK-NEXT: le lr, .LBB4_7
; CHECK-NEXT: .LBB4_8: @ %for.cond.cleanup
-; CHECK-NEXT: add sp, #16
; CHECK-NEXT: vpop {d8, d9, d10, d11}
; CHECK-NEXT: add sp, #4
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
diff --git a/llvm/test/CodeGen/X86/extractelement-load.ll b/llvm/test/CodeGen/X86/extractelement-load.ll
index 738489b3060f5..917b54ae8306d 100644
--- a/llvm/test/CodeGen/X86/extractelement-load.ll
+++ b/llvm/test/CodeGen/X86/extractelement-load.ll
@@ -160,14 +160,22 @@ define float @t6(<8 x float> *%a0) {
; X64-SSSE3-NEXT: orps %xmm2, %xmm0
; X64-SSSE3-NEXT: retq
;
-; X64-AVX-LABEL: t6:
-; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X64-AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; X64-AVX-NEXT: vcmpeqss %xmm1, %xmm0, %xmm1
-; X64-AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; X64-AVX-NEXT: vblendvps %xmm1, %xmm2, %xmm0, %xmm0
-; X64-AVX-NEXT: retq
+; X64-AVX1-LABEL: t6:
+; X64-AVX1: # %bb.0:
+; X64-AVX1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; X64-AVX1-NEXT: vcmpeqss %xmm1, %xmm0, %xmm1
+; X64-AVX1-NEXT: vblendvps %xmm1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT: retq
+;
+; X64-AVX2-LABEL: t6:
+; X64-AVX2: # %bb.0:
+; X64-AVX2-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; X64-AVX2-NEXT: vcmpeqss %xmm1, %xmm0, %xmm1
+; X64-AVX2-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; X64-AVX2-NEXT: vblendvps %xmm1, %xmm2, %xmm0, %xmm0
+; X64-AVX2-NEXT: retq
%vecload = load <8 x float>, <8 x float>* %a0, align 32
%vecext = extractelement <8 x float> %vecload, i32 1
%cmp = fcmp oeq float %vecext, 0.000000e+00
@@ -251,14 +259,22 @@ define float @PR43971_1(<8 x float> *%a0) nounwind {
; X64-SSSE3-NEXT: orps %xmm2, %xmm0
; X64-SSSE3-NEXT: retq
;
-; X64-AVX-LABEL: PR43971_1:
-; X64-AVX: # %bb.0: # %entry
-; X64-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X64-AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; X64-AVX-NEXT: vcmpeqss %xmm1, %xmm0, %xmm1
-; X64-AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; X64-AVX-NEXT: vblendvps %xmm1, %xmm2, %xmm0, %xmm0
-; X64-AVX-NEXT: retq
+; X64-AVX1-LABEL: PR43971_1:
+; X64-AVX1: # %bb.0: # %entry
+; X64-AVX1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; X64-AVX1-NEXT: vcmpeqss %xmm1, %xmm0, %xmm1
+; X64-AVX1-NEXT: vblendvps %xmm1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT: retq
+;
+; X64-AVX2-LABEL: PR43971_1:
+; X64-AVX2: # %bb.0: # %entry
+; X64-AVX2-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; X64-AVX2-NEXT: vcmpeqss %xmm1, %xmm0, %xmm1
+; X64-AVX2-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; X64-AVX2-NEXT: vblendvps %xmm1, %xmm2, %xmm0, %xmm0
+; X64-AVX2-NEXT: retq
entry:
%0 = load <8 x float>, <8 x float>* %a0, align 32
%vecext = extractelement <8 x float> %0, i32 1
diff --git a/llvm/test/CodeGen/X86/known-signbits-vector.ll b/llvm/test/CodeGen/X86/known-signbits-vector.ll
index acd0c4eba7723..30830be7250ca 100644
--- a/llvm/test/CodeGen/X86/known-signbits-vector.ll
+++ b/llvm/test/CodeGen/X86/known-signbits-vector.ll
@@ -429,24 +429,24 @@ define <4 x float> @signbits_ashr_sext_select_shuffle_sitofp(<4 x i64> %a0, <4 x
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-16, %esp
; X86-NEXT: subl $16, %esp
-; X86-NEXT: vpmovsxdq 8(%ebp), %xmm4
-; X86-NEXT: vpmovsxdq 16(%ebp), %xmm3
-; X86-NEXT: vpsrad $31, %xmm2, %xmm5
-; X86-NEXT: vpsrad $1, %xmm2, %xmm6
-; X86-NEXT: vpshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
-; X86-NEXT: vpblendw {{.*#+}} xmm5 = xmm6[0,1],xmm5[2,3],xmm6[4,5],xmm5[6,7]
+; X86-NEXT: vmovapd 8(%ebp), %xmm3
+; X86-NEXT: vpsrad $31, %xmm2, %xmm4
+; X86-NEXT: vpsrad $1, %xmm2, %xmm5
+; X86-NEXT: vpshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
+; X86-NEXT: vpblendw {{.*#+}} xmm4 = xmm5[0,1],xmm4[2,3],xmm5[4,5],xmm4[6,7]
; X86-NEXT: vextractf128 $1, %ymm2, %xmm2
-; X86-NEXT: vpsrad $31, %xmm2, %xmm6
+; X86-NEXT: vpsrad $31, %xmm2, %xmm5
; X86-NEXT: vpsrad $1, %xmm2, %xmm2
; X86-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
-; X86-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm6[2,3],xmm2[4,5],xmm6[6,7]
+; X86-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm5[2,3],xmm2[4,5],xmm5[6,7]
+; X86-NEXT: vpermilps {{.*#+}} xmm5 = xmm3[2,2,3,3]
; X86-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm6
-; X86-NEXT: vblendvpd %xmm6, %xmm5, %xmm4, %xmm4
; X86-NEXT: vextractf128 $1, %ymm1, %xmm1
; X86-NEXT: vextractf128 $1, %ymm0, %xmm0
; X86-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
-; X86-NEXT: vblendvpd %xmm0, %xmm2, %xmm3, %xmm0
-; X86-NEXT: vinsertf128 $1, %xmm0, %ymm4, %ymm0
+; X86-NEXT: vblendvpd %xmm0, %xmm2, %xmm5, %xmm0
+; X86-NEXT: vblendvpd %xmm6, %xmm4, %xmm3, %xmm1
+; X86-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; X86-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
; X86-NEXT: vextractf128 $1, %ymm0, %xmm1
; X86-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
@@ -467,16 +467,14 @@ define <4 x float> @signbits_ashr_sext_select_shuffle_sitofp(<4 x i64> %a0, <4 x
; X64-AVX1-NEXT: vpsrad $1, %xmm2, %xmm2
; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm5[2,3],xmm2[4,5],xmm5[6,7]
-; X64-AVX1-NEXT: vpmovsxdq %xmm3, %xmm5
-; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
-; X64-AVX1-NEXT: vpmovsxdq %xmm3, %xmm3
+; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm5 = xmm3[2,2,3,3]
; X64-AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm6
-; X64-AVX1-NEXT: vblendvpd %xmm6, %xmm4, %xmm5, %xmm4
; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
; X64-AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
-; X64-AVX1-NEXT: vblendvpd %xmm0, %xmm2, %xmm3, %xmm0
-; X64-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm4, %ymm0
+; X64-AVX1-NEXT: vblendvpd %xmm0, %xmm2, %xmm5, %xmm0
+; X64-AVX1-NEXT: vblendvpd %xmm6, %xmm4, %xmm3, %xmm1
+; X64-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; X64-AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; X64-AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
@@ -486,11 +484,9 @@ define <4 x float> @signbits_ashr_sext_select_shuffle_sitofp(<4 x i64> %a0, <4 x
;
; X64-AVX2-LABEL: signbits_ashr_sext_select_shuffle_sitofp:
; X64-AVX2: # %bb.0:
-; X64-AVX2-NEXT: vpsrad $31, %ymm2, %ymm4
; X64-AVX2-NEXT: vpsrad $1, %ymm2, %ymm2
; X64-AVX2-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[1,1,3,3,5,5,7,7]
-; X64-AVX2-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0],ymm4[1],ymm2[2],ymm4[3],ymm2[4],ymm4[5],ymm2[6],ymm4[7]
-; X64-AVX2-NEXT: vpmovsxdq %xmm3, %ymm3
+; X64-AVX2-NEXT: vpmovzxdq {{.*#+}} ymm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero
; X64-AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
; X64-AVX2-NEXT: vblendvpd %ymm0, %ymm2, %ymm3, %ymm0
; X64-AVX2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
diff --git a/llvm/test/CodeGen/X86/select-of-fp-constants.ll b/llvm/test/CodeGen/X86/select-of-fp-constants.ll
index 9f4510ff1964d..3ddeeee1bce04 100644
--- a/llvm/test/CodeGen/X86/select-of-fp-constants.ll
+++ b/llvm/test/CodeGen/X86/select-of-fp-constants.ll
@@ -77,8 +77,8 @@ define float @fcmp_select_fp_constants(float %x) nounwind readnone {
; X64-AVX2-LABEL: fcmp_select_fp_constants:
; X64-AVX2: # %bb.0:
; X64-AVX2-NEXT: vcmpneqss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; X64-AVX2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; X64-AVX2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; X64-AVX2-NEXT: vbroadcastss {{.*#+}} xmm1 = [4.2E+1,4.2E+1,4.2E+1,4.2E+1]
+; X64-AVX2-NEXT: vbroadcastss {{.*#+}} xmm2 = [2.3E+1,2.3E+1,2.3E+1,2.3E+1]
; X64-AVX2-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0
; X64-AVX2-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vselect-zero.ll b/llvm/test/CodeGen/X86/vselect-zero.ll
index e00f06cc5912f..fefc5a2e9b5bb 100644
--- a/llvm/test/CodeGen/X86/vselect-zero.ll
+++ b/llvm/test/CodeGen/X86/vselect-zero.ll
@@ -125,7 +125,7 @@ define double @fsel_nonzero_false_val(double %x, double %y, double %z) {
; AVX-LABEL: fsel_nonzero_false_val:
; AVX: # %bb.0:
; AVX-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
+; AVX-NEXT: vmovapd {{.*#+}} xmm1 = [4.2E+1,4.2E+1]
; AVX-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0
; AVX-NEXT: retq
;
@@ -153,8 +153,7 @@ define double @fsel_nonzero_true_val(double %x, double %y, double %z) {
; AVX-LABEL: fsel_nonzero_true_val:
; AVX: # %bb.0:
; AVX-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm0
+; AVX-NEXT: vblendvpd %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0
; AVX-NEXT: retq
;
; AVX512-LABEL: fsel_nonzero_true_val:
@@ -180,9 +179,8 @@ define double @fsel_nonzero_constants(double %x, double %y) {
; AVX-LABEL: fsel_nonzero_constants:
; AVX: # %bb.0:
; AVX-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
-; AVX-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm0
+; AVX-NEXT: vmovapd {{.*#+}} xmm1 = [4.2E+1,4.2E+1]
+; AVX-NEXT: vblendvpd %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
; AVX-NEXT: retq
;
; AVX512-LABEL: fsel_nonzero_constants:
More information about the llvm-commits
mailing list