[llvm] r364079 - [X86] Use vmovq for v4i64/v4f64/v8i64/v8f64 vzmovl.

Fri Jun 21 10:24:22 PDT 2019

Author: ctopper
Date: Fri Jun 21 10:24:21 2019
New Revision: 364079

URL: http://llvm.org/viewvc/llvm-project?rev=364079&view=rev
Log:
[X86] Use vmovq for v4i64/v4f64/v8i64/v8f64 vzmovl.

We already use vmovq for v2i64/v2f64 vzmovl. But we were using a
blendpd+xorpd for v4i64/v4f64/v8i64/v8f64 under opt speed. Or
movsd+xorpd under optsize.

I think the blend with 0 or movss/d is only needed for
vXi32 where we don't have an instruction that can move 32
bits from one xmm to another while zeroing upper bits.

movq is no worse than blendpd on any known CPUs.

Modified:
    llvm/trunk/lib/Target/X86/X86InstrAVX512.td
    llvm/trunk/lib/Target/X86/X86InstrSSE.td
    llvm/trunk/test/CodeGen/X86/vec_extract-avx.ll
    llvm/trunk/test/CodeGen/X86/vector-extend-inreg.ll
    llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll
    llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v8.ll
    llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll

Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=364079&r1=364078&r2=364079&view=diff
==============================================================================

--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Fri Jun 21 10:24:21 2019
@@ -4286,15 +4286,6 @@ let Predicates = [HasAVX512, OptForSize]
              (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
               (v4i32 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)))), sub_xmm)>;
 
-  def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
-            (SUBREG_TO_REG (i32 0),
-             (v2f64 (VMOVSDZrr (v2f64 (AVX512_128_SET0)),
-              (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))), sub_xmm)>;
-  def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
-            (SUBREG_TO_REG (i32 0),
-             (v2i64 (VMOVSDZrr (v2i64 (AVX512_128_SET0)),
-              (v2i64 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)))), sub_xmm)>;
-
   def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
             (SUBREG_TO_REG (i32 0),
              (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
@@ -4303,17 +4294,6 @@ let Predicates = [HasAVX512, OptForSize]
             (SUBREG_TO_REG (i32 0),
              (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
               (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)))), sub_xmm)>;
-
-  def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
-            (SUBREG_TO_REG (i32 0),
-             (v2f64 (VMOVSDZrr (v2f64 (AVX512_128_SET0)),
-              (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))), sub_xmm)>;
-
-  def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
-            (SUBREG_TO_REG (i32 0),
-             (v2i64 (VMOVSDZrr (v2i64 (AVX512_128_SET0)),
-              (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)))), sub_xmm)>;
-
 }
 
 // Use 128-bit blends for OptForSpeed since BLENDs have better throughput than
@@ -4329,17 +4309,6 @@ let Predicates = [HasAVX512, OptForSpeed
              (v4i32 (VPBLENDWrri (v4i32 (V_SET0)),
                           (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)),
                           (i8 3))), sub_xmm)>;
-
-  def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
-            (SUBREG_TO_REG (i32 0),
-             (v2f64 (VBLENDPDrri (v2f64 (V_SET0)),
-                          (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)),
-                          (i8 1))), sub_xmm)>;
-  def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
-            (SUBREG_TO_REG (i32 0),
-             (v2i64 (VPBLENDWrri (v2i64 (V_SET0)),
-                          (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)),
-                          (i8 0xf))), sub_xmm)>;
 }
 
 let Predicates = [HasAVX512] in {
@@ -4452,6 +4421,28 @@ let Predicates = [HasAVX512] in {
             (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
   def : Pat<(v8i64 (X86vzload addr:$src)),
             (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
+
+  def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
+            (SUBREG_TO_REG (i32 0),
+             (v2f64 (VMOVZPQILo2PQIZrr
+                     (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))),
+             sub_xmm)>;
+  def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
+            (SUBREG_TO_REG (i32 0),
+             (v2i64 (VMOVZPQILo2PQIZrr
+                     (v2i64 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)))),
+             sub_xmm)>;
+
+  def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
+            (SUBREG_TO_REG (i32 0),
+             (v2f64 (VMOVZPQILo2PQIZrr
+                     (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))),
+             sub_xmm)>;
+  def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
+            (SUBREG_TO_REG (i32 0),
+             (v2i64 (VMOVZPQILo2PQIZrr
+                     (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)))),
+             sub_xmm)>;
 }
 
 //===----------------------------------------------------------------------===//

Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=364079&r1=364078&r2=364079&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Fri Jun 21 10:24:21 2019
@@ -312,17 +312,6 @@ let Predicates = [UseAVX, OptForSize] in
             (SUBREG_TO_REG (i32 0),
              (v4i32 (VMOVSSrr (v4i32 (V_SET0)),
               (v4i32 (EXTRACT_SUBREG (v8i32 VR256:$src), sub_xmm)))), sub_xmm)>;
-
-  def : Pat<(v4f64 (X86vzmovl (v4f64 VR256:$src))),
-            (SUBREG_TO_REG (i32 0),
-             (v2f64 (VMOVSDrr (v2f64 (V_SET0)),
-                       (v2f64 (EXTRACT_SUBREG (v4f64 VR256:$src), sub_xmm)))),
-             sub_xmm)>;
-  def : Pat<(v4i64 (X86vzmovl (v4i64 VR256:$src))),
-            (SUBREG_TO_REG (i32 0),
-             (v2i64 (VMOVSDrr (v2i64 (V_SET0)),
-                       (v2i64 (EXTRACT_SUBREG (v4i64 VR256:$src), sub_xmm)))),
-             sub_xmm)>;
 }
 
 let Predicates = [UseSSE1] in {
@@ -4307,6 +4296,19 @@ let Predicates = [UseSSE2] in {
             (MOVZPQILo2PQIrr VR128:$src)>;
 }
 
+let Predicates = [UseAVX] in {
+  def : Pat<(v4f64 (X86vzmovl (v4f64 VR256:$src))),
+            (SUBREG_TO_REG (i32 0),
+             (v2f64 (VMOVZPQILo2PQIrr
+                     (v2f64 (EXTRACT_SUBREG (v4f64 VR256:$src), sub_xmm)))),
+             sub_xmm)>;
+  def : Pat<(v4i64 (X86vzmovl (v4i64 VR256:$src))),
+            (SUBREG_TO_REG (i32 0),
+             (v2i64 (VMOVZPQILo2PQIrr
+                     (v2i64 (EXTRACT_SUBREG (v4i64 VR256:$src), sub_xmm)))),
+             sub_xmm)>;
+}
+
 //===---------------------------------------------------------------------===//
 // SSE3 - Replicate Single FP - MOVSHDUP and MOVSLDUP
 //===---------------------------------------------------------------------===//
@@ -6319,17 +6321,6 @@ let Predicates = [HasAVX, OptForSpeed] i
              (v4i32 (VPBLENDWrri (v4i32 (V_SET0)),
                           (v4i32 (EXTRACT_SUBREG (v8i32 VR256:$src), sub_xmm)),
                           (i8 3))), sub_xmm)>;
-
-  def : Pat<(v4f64 (X86vzmovl (v4f64 VR256:$src))),
-            (SUBREG_TO_REG (i32 0),
-             (v2f64 (VBLENDPDrri (v2f64 (V_SET0)),
-                          (v2f64 (EXTRACT_SUBREG (v4f64 VR256:$src), sub_xmm)),
-                          (i8 1))), sub_xmm)>;
-  def : Pat<(v4i64 (X86vzmovl (v4i64 VR256:$src))),
-            (SUBREG_TO_REG (i32 0),
-             (v2i64 (VPBLENDWrri (v2i64 (V_SET0)),
-                          (v2i64 (EXTRACT_SUBREG (v4i64 VR256:$src), sub_xmm)),
-                          (i8 0xf))), sub_xmm)>;
 }
 
 // Prefer a movss or movsd over a blendps when optimizing for size. these were

Modified: llvm/trunk/test/CodeGen/X86/vec_extract-avx.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_extract-avx.ll?rev=364079&r1=364078&r2=364079&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_extract-avx.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_extract-avx.ll Fri Jun 21 10:24:21 2019
@@ -144,19 +144,17 @@ define void @legal_vzmovl_2i64_4i64(<2 x
 ; X32:       # %bb.0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    vmovups (%ecx), %xmm0
-; X32-NEXT:    vxorps %xmm1, %xmm1, %xmm1
-; X32-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
-; X32-NEXT:    vmovaps %ymm0, (%eax)
+; X32-NEXT:    vmovdqu (%ecx), %xmm0
+; X32-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; X32-NEXT:    vmovdqa %ymm0, (%eax)
 ; X32-NEXT:    vzeroupper
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: legal_vzmovl_2i64_4i64:
 ; X64:       # %bb.0:
-; X64-NEXT:    vmovups (%rdi), %xmm0
-; X64-NEXT:    vxorps %xmm1, %xmm1, %xmm1
-; X64-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
-; X64-NEXT:    vmovaps %ymm0, (%rsi)
+; X64-NEXT:    vmovdqu (%rdi), %xmm0
+; X64-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; X64-NEXT:    vmovdqa %ymm0, (%rsi)
 ; X64-NEXT:    vzeroupper
 ; X64-NEXT:    retq
   %ld = load <2 x i64>, <2 x i64>* %in, align 8
@@ -198,19 +196,17 @@ define void @legal_vzmovl_2f64_4f64(<2 x
 ; X32:       # %bb.0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    vmovups (%ecx), %xmm0
-; X32-NEXT:    vxorps %xmm1, %xmm1, %xmm1
-; X32-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
-; X32-NEXT:    vmovaps %ymm0, (%eax)
+; X32-NEXT:    vmovdqu (%ecx), %xmm0
+; X32-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; X32-NEXT:    vmovdqa %ymm0, (%eax)
 ; X32-NEXT:    vzeroupper
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: legal_vzmovl_2f64_4f64:
 ; X64:       # %bb.0:
-; X64-NEXT:    vmovups (%rdi), %xmm0
-; X64-NEXT:    vxorps %xmm1, %xmm1, %xmm1
-; X64-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
-; X64-NEXT:    vmovaps %ymm0, (%rsi)
+; X64-NEXT:    vmovdqu (%rdi), %xmm0
+; X64-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; X64-NEXT:    vmovdqa %ymm0, (%rsi)
 ; X64-NEXT:    vzeroupper
 ; X64-NEXT:    retq
   %ld = load <2 x double>, <2 x double>* %in, align 8

Modified: llvm/trunk/test/CodeGen/X86/vector-extend-inreg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-extend-inreg.ll?rev=364079&r1=364078&r2=364079&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-extend-inreg.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-extend-inreg.ll Fri Jun 21 10:24:21 2019
@@ -71,18 +71,17 @@ define i64 @extract_any_extend_vector_in
 ; X32-AVX-NEXT:    andl $-128, %esp
 ; X32-AVX-NEXT:    subl $384, %esp # imm = 0x180
 ; X32-AVX-NEXT:    movl 40(%ebp), %ecx
-; X32-AVX-NEXT:    vbroadcastsd 32(%ebp), %ymm0
-; X32-AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
-; X32-AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
+; X32-AVX-NEXT:    vpbroadcastq 32(%ebp), %ymm0
+; X32-AVX-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
 ; X32-AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
 ; X32-AVX-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%esp)
 ; X32-AVX-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%esp)
 ; X32-AVX-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%esp)
-; X32-AVX-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%esp)
+; X32-AVX-NEXT:    vmovdqa %ymm0, {{[0-9]+}}(%esp)
 ; X32-AVX-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%esp)
 ; X32-AVX-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%esp)
 ; X32-AVX-NEXT:    vmovaps %ymm1, (%esp)
-; X32-AVX-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%esp)
+; X32-AVX-NEXT:    vmovdqa %ymm0, {{[0-9]+}}(%esp)
 ; X32-AVX-NEXT:    leal (%ecx,%ecx), %eax
 ; X32-AVX-NEXT:    andl $31, %eax
 ; X32-AVX-NEXT:    movl 128(%esp,%eax,4), %eax
@@ -101,14 +100,13 @@ define i64 @extract_any_extend_vector_in
 ; X64-AVX-NEXT:    andq $-128, %rsp
 ; X64-AVX-NEXT:    subq $256, %rsp # imm = 0x100
 ; X64-AVX-NEXT:    # kill: def $edi killed $edi def $rdi
-; X64-AVX-NEXT:    vpermpd {{.*#+}} ymm0 = ymm3[3,1,2,3]
-; X64-AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
-; X64-AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
+; X64-AVX-NEXT:    vpermq {{.*#+}} ymm0 = ymm3[3,1,2,3]
+; X64-AVX-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
 ; X64-AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
 ; X64-AVX-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
 ; X64-AVX-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
 ; X64-AVX-NEXT:    vmovaps %ymm1, (%rsp)
-; X64-AVX-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; X64-AVX-NEXT:    vmovdqa %ymm0, {{[0-9]+}}(%rsp)
 ; X64-AVX-NEXT:    andl $15, %edi
 ; X64-AVX-NEXT:    movq (%rsp,%rdi,8), %rax
 ; X64-AVX-NEXT:    movq %rbp, %rsp

Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll?rev=364079&r1=364078&r2=364079&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll Fri Jun 21 10:24:21 2019
@@ -1505,8 +1505,7 @@ define <4 x double> @insert_reg_and_zero
 ; ALL-LABEL: insert_reg_and_zero_v4f64:
 ; ALL:       # %bb.0:
 ; ALL-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
-; ALL-NEXT:    vxorps %xmm1, %xmm1, %xmm1
-; ALL-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
+; ALL-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
 ; ALL-NEXT:    retq
   %v = insertelement <4 x double> undef, double %a, i32 0
   %shuffle = shufflevector <4 x double> %v, <4 x double> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
@@ -1987,8 +1986,7 @@ entry:
 define <4 x double> @shuffle_v4f64_0zzz_optsize(<4 x double> %a) optsize {
 ; ALL-LABEL: shuffle_v4f64_0zzz_optsize:
 ; ALL:       # %bb.0:
-; ALL-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
-; ALL-NEXT:    vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; ALL-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
 ; ALL-NEXT:    retq
   %b = shufflevector <4 x double> %a, <4 x double> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
   ret <4 x double> %b
@@ -1997,8 +1995,7 @@ define <4 x double> @shuffle_v4f64_0zzz_
 define <4 x i64> @shuffle_v4i64_0zzz_optsize(<4 x i64> %a) optsize {
 ; ALL-LABEL: shuffle_v4i64_0zzz_optsize:
 ; ALL:       # %bb.0:
-; ALL-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
-; ALL-NEXT:    vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; ALL-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
 ; ALL-NEXT:    retq
   %b = shufflevector <4 x i64> %a, <4 x i64> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
   ret <4 x i64> %b

Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v8.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v8.ll?rev=364079&r1=364078&r2=364079&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v8.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v8.ll Fri Jun 21 10:24:21 2019
@@ -1973,8 +1973,7 @@ define <8 x double> @shuffle_v8f64_uuu23
 define <8 x i64> @shuffle_v8i64_0zzzzzzz(<8 x i64> %a) {
 ; ALL-LABEL: shuffle_v8i64_0zzzzzzz:
 ; ALL:       # %bb.0:
-; ALL-NEXT:    vxorps %xmm1, %xmm1, %xmm1
-; ALL-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
+; ALL-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
 ; ALL-NEXT:    ret{{[l|q]}}
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
   ret <8 x i64> %shuffle
@@ -1983,8 +1982,7 @@ define <8 x i64> @shuffle_v8i64_0zzzzzzz
 define <8 x double> @shuffle_v8f64_0zzzzzzz(<8 x double> %a) {
 ; ALL-LABEL: shuffle_v8f64_0zzzzzzz:
 ; ALL:       # %bb.0:
-; ALL-NEXT:    vxorps %xmm1, %xmm1, %xmm1
-; ALL-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
+; ALL-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
 ; ALL-NEXT:    ret{{[l|q]}}
   %shuffle = shufflevector <8 x double> %a, <8 x double> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
   ret <8 x double> %shuffle

Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll?rev=364079&r1=364078&r2=364079&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll Fri Jun 21 10:24:21 2019
@@ -380,8 +380,7 @@ define <4 x i64> @combine_pshufb_as_zext
 define <4 x double> @combine_pshufb_as_vzmovl_64(<4 x double> %a0) {
 ; CHECK-LABEL: combine_pshufb_as_vzmovl_64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
-; CHECK-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
+; CHECK-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
 ; CHECK-NEXT:    ret{{[l|q]}}
   %1 = bitcast <4 x double> %a0 to <32 x i8>
   %2 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %1, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)