[llvm] r364079 - [X86] Use vmovq for v4i64/v4f64/v8i64/v8f64 vzmovl.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 21 10:24:22 PDT 2019


Author: ctopper
Date: Fri Jun 21 10:24:21 2019
New Revision: 364079

URL: http://llvm.org/viewvc/llvm-project?rev=364079&view=rev
Log:
[X86] Use vmovq for v4i64/v4f64/v8i64/v8f64 vzmovl.

We already use vmovq for v2i64/v2f64 vzmovl. But we were using a
blendpd+xorpd for v4i64/v4f64/v8i64/v8f64 under opt speed. Or
movsd+xorpd under optsize.

I think the blend with 0 or movss/d is only needed for
vXi32 where we don't have an instruction that can move 32
bits from one xmm to another while zeroing upper bits.

movq is no worse than blendpd on any known CPUs.

Modified:
    llvm/trunk/lib/Target/X86/X86InstrAVX512.td
    llvm/trunk/lib/Target/X86/X86InstrSSE.td
    llvm/trunk/test/CodeGen/X86/vec_extract-avx.ll
    llvm/trunk/test/CodeGen/X86/vector-extend-inreg.ll
    llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll
    llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v8.ll
    llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll

Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=364079&r1=364078&r2=364079&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Fri Jun 21 10:24:21 2019
@@ -4286,15 +4286,6 @@ let Predicates = [HasAVX512, OptForSize]
              (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
               (v4i32 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)))), sub_xmm)>;
 
-  def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
-            (SUBREG_TO_REG (i32 0),
-             (v2f64 (VMOVSDZrr (v2f64 (AVX512_128_SET0)),
-              (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))), sub_xmm)>;
-  def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
-            (SUBREG_TO_REG (i32 0),
-             (v2i64 (VMOVSDZrr (v2i64 (AVX512_128_SET0)),
-              (v2i64 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)))), sub_xmm)>;
-
   def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
             (SUBREG_TO_REG (i32 0),
              (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
@@ -4303,17 +4294,6 @@ let Predicates = [HasAVX512, OptForSize]
             (SUBREG_TO_REG (i32 0),
              (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
               (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)))), sub_xmm)>;
-
-  def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
-            (SUBREG_TO_REG (i32 0),
-             (v2f64 (VMOVSDZrr (v2f64 (AVX512_128_SET0)),
-              (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))), sub_xmm)>;
-
-  def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
-            (SUBREG_TO_REG (i32 0),
-             (v2i64 (VMOVSDZrr (v2i64 (AVX512_128_SET0)),
-              (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)))), sub_xmm)>;
-
 }
 
 // Use 128-bit blends for OptForSpeed since BLENDs have better throughput than
@@ -4329,17 +4309,6 @@ let Predicates = [HasAVX512, OptForSpeed
              (v4i32 (VPBLENDWrri (v4i32 (V_SET0)),
                           (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)),
                           (i8 3))), sub_xmm)>;
-
-  def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
-            (SUBREG_TO_REG (i32 0),
-             (v2f64 (VBLENDPDrri (v2f64 (V_SET0)),
-                          (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)),
-                          (i8 1))), sub_xmm)>;
-  def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
-            (SUBREG_TO_REG (i32 0),
-             (v2i64 (VPBLENDWrri (v2i64 (V_SET0)),
-                          (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)),
-                          (i8 0xf))), sub_xmm)>;
 }
 
 let Predicates = [HasAVX512] in {
@@ -4452,6 +4421,28 @@ let Predicates = [HasAVX512] in {
             (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
   def : Pat<(v8i64 (X86vzload addr:$src)),
             (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
+
+  def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
+            (SUBREG_TO_REG (i32 0),
+             (v2f64 (VMOVZPQILo2PQIZrr
+                     (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))),
+             sub_xmm)>;
+  def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
+            (SUBREG_TO_REG (i32 0),
+             (v2i64 (VMOVZPQILo2PQIZrr
+                     (v2i64 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)))),
+             sub_xmm)>;
+
+  def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
+            (SUBREG_TO_REG (i32 0),
+             (v2f64 (VMOVZPQILo2PQIZrr
+                     (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))),
+             sub_xmm)>;
+  def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
+            (SUBREG_TO_REG (i32 0),
+             (v2i64 (VMOVZPQILo2PQIZrr
+                     (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)))),
+             sub_xmm)>;
 }
 
 //===----------------------------------------------------------------------===//

Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=364079&r1=364078&r2=364079&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Fri Jun 21 10:24:21 2019
@@ -312,17 +312,6 @@ let Predicates = [UseAVX, OptForSize] in
             (SUBREG_TO_REG (i32 0),
              (v4i32 (VMOVSSrr (v4i32 (V_SET0)),
               (v4i32 (EXTRACT_SUBREG (v8i32 VR256:$src), sub_xmm)))), sub_xmm)>;
-
-  def : Pat<(v4f64 (X86vzmovl (v4f64 VR256:$src))),
-            (SUBREG_TO_REG (i32 0),
-             (v2f64 (VMOVSDrr (v2f64 (V_SET0)),
-                       (v2f64 (EXTRACT_SUBREG (v4f64 VR256:$src), sub_xmm)))),
-             sub_xmm)>;
-  def : Pat<(v4i64 (X86vzmovl (v4i64 VR256:$src))),
-            (SUBREG_TO_REG (i32 0),
-             (v2i64 (VMOVSDrr (v2i64 (V_SET0)),
-                       (v2i64 (EXTRACT_SUBREG (v4i64 VR256:$src), sub_xmm)))),
-             sub_xmm)>;
 }
 
 let Predicates = [UseSSE1] in {
@@ -4307,6 +4296,19 @@ let Predicates = [UseSSE2] in {
             (MOVZPQILo2PQIrr VR128:$src)>;
 }
 
+let Predicates = [UseAVX] in {
+  def : Pat<(v4f64 (X86vzmovl (v4f64 VR256:$src))),
+            (SUBREG_TO_REG (i32 0),
+             (v2f64 (VMOVZPQILo2PQIrr
+                     (v2f64 (EXTRACT_SUBREG (v4f64 VR256:$src), sub_xmm)))),
+             sub_xmm)>;
+  def : Pat<(v4i64 (X86vzmovl (v4i64 VR256:$src))),
+            (SUBREG_TO_REG (i32 0),
+             (v2i64 (VMOVZPQILo2PQIrr
+                     (v2i64 (EXTRACT_SUBREG (v4i64 VR256:$src), sub_xmm)))),
+             sub_xmm)>;
+}
+
 //===---------------------------------------------------------------------===//
 // SSE3 - Replicate Single FP - MOVSHDUP and MOVSLDUP
 //===---------------------------------------------------------------------===//
@@ -6319,17 +6321,6 @@ let Predicates = [HasAVX, OptForSpeed] i
              (v4i32 (VPBLENDWrri (v4i32 (V_SET0)),
                           (v4i32 (EXTRACT_SUBREG (v8i32 VR256:$src), sub_xmm)),
                           (i8 3))), sub_xmm)>;
-
-  def : Pat<(v4f64 (X86vzmovl (v4f64 VR256:$src))),
-            (SUBREG_TO_REG (i32 0),
-             (v2f64 (VBLENDPDrri (v2f64 (V_SET0)),
-                          (v2f64 (EXTRACT_SUBREG (v4f64 VR256:$src), sub_xmm)),
-                          (i8 1))), sub_xmm)>;
-  def : Pat<(v4i64 (X86vzmovl (v4i64 VR256:$src))),
-            (SUBREG_TO_REG (i32 0),
-             (v2i64 (VPBLENDWrri (v2i64 (V_SET0)),
-                          (v2i64 (EXTRACT_SUBREG (v4i64 VR256:$src), sub_xmm)),
-                          (i8 0xf))), sub_xmm)>;
 }
 
 // Prefer a movss or movsd over a blendps when optimizing for size. these were

Modified: llvm/trunk/test/CodeGen/X86/vec_extract-avx.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_extract-avx.ll?rev=364079&r1=364078&r2=364079&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_extract-avx.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_extract-avx.ll Fri Jun 21 10:24:21 2019
@@ -144,19 +144,17 @@ define void @legal_vzmovl_2i64_4i64(<2 x
 ; X32:       # %bb.0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    vmovups (%ecx), %xmm0
-; X32-NEXT:    vxorps %xmm1, %xmm1, %xmm1
-; X32-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
-; X32-NEXT:    vmovaps %ymm0, (%eax)
+; X32-NEXT:    vmovdqu (%ecx), %xmm0
+; X32-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; X32-NEXT:    vmovdqa %ymm0, (%eax)
 ; X32-NEXT:    vzeroupper
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: legal_vzmovl_2i64_4i64:
 ; X64:       # %bb.0:
-; X64-NEXT:    vmovups (%rdi), %xmm0
-; X64-NEXT:    vxorps %xmm1, %xmm1, %xmm1
-; X64-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
-; X64-NEXT:    vmovaps %ymm0, (%rsi)
+; X64-NEXT:    vmovdqu (%rdi), %xmm0
+; X64-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; X64-NEXT:    vmovdqa %ymm0, (%rsi)
 ; X64-NEXT:    vzeroupper
 ; X64-NEXT:    retq
   %ld = load <2 x i64>, <2 x i64>* %in, align 8
@@ -198,19 +196,17 @@ define void @legal_vzmovl_2f64_4f64(<2 x
 ; X32:       # %bb.0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    vmovups (%ecx), %xmm0
-; X32-NEXT:    vxorps %xmm1, %xmm1, %xmm1
-; X32-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
-; X32-NEXT:    vmovaps %ymm0, (%eax)
+; X32-NEXT:    vmovdqu (%ecx), %xmm0
+; X32-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; X32-NEXT:    vmovdqa %ymm0, (%eax)
 ; X32-NEXT:    vzeroupper
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: legal_vzmovl_2f64_4f64:
 ; X64:       # %bb.0:
-; X64-NEXT:    vmovups (%rdi), %xmm0
-; X64-NEXT:    vxorps %xmm1, %xmm1, %xmm1
-; X64-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
-; X64-NEXT:    vmovaps %ymm0, (%rsi)
+; X64-NEXT:    vmovdqu (%rdi), %xmm0
+; X64-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; X64-NEXT:    vmovdqa %ymm0, (%rsi)
 ; X64-NEXT:    vzeroupper
 ; X64-NEXT:    retq
   %ld = load <2 x double>, <2 x double>* %in, align 8

Modified: llvm/trunk/test/CodeGen/X86/vector-extend-inreg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-extend-inreg.ll?rev=364079&r1=364078&r2=364079&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-extend-inreg.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-extend-inreg.ll Fri Jun 21 10:24:21 2019
@@ -71,18 +71,17 @@ define i64 @extract_any_extend_vector_in
 ; X32-AVX-NEXT:    andl $-128, %esp
 ; X32-AVX-NEXT:    subl $384, %esp # imm = 0x180
 ; X32-AVX-NEXT:    movl 40(%ebp), %ecx
-; X32-AVX-NEXT:    vbroadcastsd 32(%ebp), %ymm0
-; X32-AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
-; X32-AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
+; X32-AVX-NEXT:    vpbroadcastq 32(%ebp), %ymm0
+; X32-AVX-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
 ; X32-AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
 ; X32-AVX-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%esp)
 ; X32-AVX-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%esp)
 ; X32-AVX-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%esp)
-; X32-AVX-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%esp)
+; X32-AVX-NEXT:    vmovdqa %ymm0, {{[0-9]+}}(%esp)
 ; X32-AVX-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%esp)
 ; X32-AVX-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%esp)
 ; X32-AVX-NEXT:    vmovaps %ymm1, (%esp)
-; X32-AVX-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%esp)
+; X32-AVX-NEXT:    vmovdqa %ymm0, {{[0-9]+}}(%esp)
 ; X32-AVX-NEXT:    leal (%ecx,%ecx), %eax
 ; X32-AVX-NEXT:    andl $31, %eax
 ; X32-AVX-NEXT:    movl 128(%esp,%eax,4), %eax
@@ -101,14 +100,13 @@ define i64 @extract_any_extend_vector_in
 ; X64-AVX-NEXT:    andq $-128, %rsp
 ; X64-AVX-NEXT:    subq $256, %rsp # imm = 0x100
 ; X64-AVX-NEXT:    # kill: def $edi killed $edi def $rdi
-; X64-AVX-NEXT:    vpermpd {{.*#+}} ymm0 = ymm3[3,1,2,3]
-; X64-AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
-; X64-AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
+; X64-AVX-NEXT:    vpermq {{.*#+}} ymm0 = ymm3[3,1,2,3]
+; X64-AVX-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
 ; X64-AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
 ; X64-AVX-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
 ; X64-AVX-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
 ; X64-AVX-NEXT:    vmovaps %ymm1, (%rsp)
-; X64-AVX-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; X64-AVX-NEXT:    vmovdqa %ymm0, {{[0-9]+}}(%rsp)
 ; X64-AVX-NEXT:    andl $15, %edi
 ; X64-AVX-NEXT:    movq (%rsp,%rdi,8), %rax
 ; X64-AVX-NEXT:    movq %rbp, %rsp

Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll?rev=364079&r1=364078&r2=364079&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll Fri Jun 21 10:24:21 2019
@@ -1505,8 +1505,7 @@ define <4 x double> @insert_reg_and_zero
 ; ALL-LABEL: insert_reg_and_zero_v4f64:
 ; ALL:       # %bb.0:
 ; ALL-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
-; ALL-NEXT:    vxorps %xmm1, %xmm1, %xmm1
-; ALL-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
+; ALL-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
 ; ALL-NEXT:    retq
   %v = insertelement <4 x double> undef, double %a, i32 0
   %shuffle = shufflevector <4 x double> %v, <4 x double> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
@@ -1987,8 +1986,7 @@ entry:
 define <4 x double> @shuffle_v4f64_0zzz_optsize(<4 x double> %a) optsize {
 ; ALL-LABEL: shuffle_v4f64_0zzz_optsize:
 ; ALL:       # %bb.0:
-; ALL-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
-; ALL-NEXT:    vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; ALL-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
 ; ALL-NEXT:    retq
   %b = shufflevector <4 x double> %a, <4 x double> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
   ret <4 x double> %b
@@ -1997,8 +1995,7 @@ define <4 x double> @shuffle_v4f64_0zzz_
 define <4 x i64> @shuffle_v4i64_0zzz_optsize(<4 x i64> %a) optsize {
 ; ALL-LABEL: shuffle_v4i64_0zzz_optsize:
 ; ALL:       # %bb.0:
-; ALL-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
-; ALL-NEXT:    vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; ALL-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
 ; ALL-NEXT:    retq
   %b = shufflevector <4 x i64> %a, <4 x i64> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
   ret <4 x i64> %b

Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v8.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v8.ll?rev=364079&r1=364078&r2=364079&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v8.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v8.ll Fri Jun 21 10:24:21 2019
@@ -1973,8 +1973,7 @@ define <8 x double> @shuffle_v8f64_uuu23
 define <8 x i64> @shuffle_v8i64_0zzzzzzz(<8 x i64> %a) {
 ; ALL-LABEL: shuffle_v8i64_0zzzzzzz:
 ; ALL:       # %bb.0:
-; ALL-NEXT:    vxorps %xmm1, %xmm1, %xmm1
-; ALL-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
+; ALL-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
 ; ALL-NEXT:    ret{{[l|q]}}
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
   ret <8 x i64> %shuffle
@@ -1983,8 +1982,7 @@ define <8 x i64> @shuffle_v8i64_0zzzzzzz
 define <8 x double> @shuffle_v8f64_0zzzzzzz(<8 x double> %a) {
 ; ALL-LABEL: shuffle_v8f64_0zzzzzzz:
 ; ALL:       # %bb.0:
-; ALL-NEXT:    vxorps %xmm1, %xmm1, %xmm1
-; ALL-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
+; ALL-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
 ; ALL-NEXT:    ret{{[l|q]}}
   %shuffle = shufflevector <8 x double> %a, <8 x double> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
   ret <8 x double> %shuffle

Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll?rev=364079&r1=364078&r2=364079&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx2.ll Fri Jun 21 10:24:21 2019
@@ -380,8 +380,7 @@ define <4 x i64> @combine_pshufb_as_zext
 define <4 x double> @combine_pshufb_as_vzmovl_64(<4 x double> %a0) {
 ; CHECK-LABEL: combine_pshufb_as_vzmovl_64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
-; CHECK-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
+; CHECK-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
 ; CHECK-NEXT:    ret{{[l|q]}}
   %1 = bitcast <4 x double> %a0 to <32 x i8>
   %2 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %1, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)




More information about the llvm-commits mailing list